File size: 3,477 Bytes
46df0b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr

from widgets.widget_base import Widget
from data_measurements.dataset_statistics import DatasetStatisticsCacheClass as dmt_cls
import utils

logs = utils.prepare_logging(__file__)
## possibyl looking for update()--> return output

class TextLengths(Widget):
    def __init__(self):
        self.text_length_distribution_plot = gr.Image(render=False)
        self.text_length_explainer = gr.Markdown(render=False)
        self.text_length_drop_down = gr.Dropdown(render=False)
        self.text_length_df = gr.DataFrame(render=False)

    def update_text_length_df(self, length, dstats):
        return dstats.length_obj.lengths_df[
            dstats.length_obj.lengths_df["length"] == length
        ].set_index("length")

    def render(self):
        with gr.TabItem("Text Lengths"):
            gr.Markdown(
                "Use this widget to identify outliers, particularly suspiciously long outliers."
            )
            gr.Markdown(
                "Below, you can see how the lengths of the text instances in your "
                "dataset are distributed."
            )
            gr.Markdown(
                "Any unexpected peaks or valleys in the distribution may help to "
                "identify instances you want to remove or augment."
            )
            gr.Markdown(
                "### Here is the count of different text lengths in " "your dataset:"
            )
            # When matplotlib first creates this, it's a Figure.
            # Once it's saved, then read back in,
            # it's an ndarray that must be displayed using st.image
            # (I know, lame).
            self.text_length_distribution_plot.render()
            self.text_length_explainer.render()
            self.text_length_drop_down.render()
            self.text_length_df.render()

    def update(self, dstats: dmt_cls):
        explainer_text = (
            "The average length of text instances is **"
            + str(round(dstats.length_obj.avg_length, 2))
            + " words**, with a standard deviation of **"
            + str(round(dstats.length_obj.std_length, 2))
            + "**."
        )
        # TODO: Add text on choosing the length you want to the dropdown.
        output = {
            self.text_length_distribution_plot: dstats.length_obj.fig_lengths,
            self.text_length_explainer: explainer_text,
        }
        if dstats.length_obj.lengths_df is not None:
            import numpy as np

            choices = np.sort(dstats.length_obj.lengths_df["length"].unique())[
                ::-1
            ].tolist()
            output[self.text_length_drop_down] = gr.Dropdown.update(
                choices=choices, value=choices[0]
            )
            output[self.text_length_df] = self.update_text_length_df(choices[0], dstats)
        else:
            output[self.text_length_df] = gr.update(visible=False)
            output[self.text_length_drop_down] = gr.update(visible=False)
        return output

    @property
    def output_components(self):
        return [
            self.text_length_distribution_plot,
            self.text_length_explainer,
            self.text_length_drop_down,
            self.text_length_df,
        ]

    def add_events(self, state: gr.State):
        self.text_length_drop_down.change(
            self.update_text_length_df,
            inputs=[self.text_length_drop_down, state],
            outputs=[self.text_length_df],
        )