jsulz HF staff commited on
Commit
6c9d6d5
1 Parent(s): b831af7

minor cleanup

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -14,22 +14,22 @@ def load_transform_dataset():
14
  # Load the dataset and convert it to a Pandas dataframe
15
  sotu_dataset = "jsulz/state-of-the-union-addresses"
16
  dataset = load_dataset(sotu_dataset)
17
- df = dataset["train"].to_pandas()
18
  # Do some on-the-fly calculations
19
  # calcualte the number of words in each address
20
- df["word_count"] = df["speech_html"].apply(lambda x: len(x.split()))
21
  # calculate the automated readibility index reading ease score for each address
22
  # automated readability index = 4.71 * (characters/words) + 0.5 * (words/sentences) - 21.43
23
- df["ari"] = df["no-contractions"].apply(
24
  lambda x: (4.71 * (len(x.replace(" ", "")) / len(x.split())))
25
  + (0.5 * (len(x.split()) / len(x.split("."))))
26
  - 21.43
27
  )
28
  # Sort the dataframe by date because Plotly doesn't do any of this automatically
29
- df = df.sort_values(by="date")
30
- written = df[df["categories"] == "Written"]
31
- spoken = df[df["categories"] == "Spoken"]
32
- return df, written, spoken
33
 
34
 
35
  """
@@ -234,6 +234,7 @@ with gr.Blocks() as demo:
234
  minimum=1, maximum=4, step=1, label="N-grams", interactive=True, value=1
235
  )
236
 
 
237
  df_state = gr.State(df)
238
 
239
  # show a bar chart of the top n-grams for a selected president
 
14
  # Load the dataset and convert it to a Pandas dataframe
15
  sotu_dataset = "jsulz/state-of-the-union-addresses"
16
  dataset = load_dataset(sotu_dataset)
17
+ _df = dataset["train"].to_pandas()
18
  # Do some on-the-fly calculations
19
  # calcualte the number of words in each address
20
+ _df["word_count"] = _df["speech_html"].apply(lambda x: len(x.split()))
21
  # calculate the automated readibility index reading ease score for each address
22
  # automated readability index = 4.71 * (characters/words) + 0.5 * (words/sentences) - 21.43
23
+ _df["ari"] = _df["no-contractions"].apply(
24
  lambda x: (4.71 * (len(x.replace(" ", "")) / len(x.split())))
25
  + (0.5 * (len(x.split()) / len(x.split("."))))
26
  - 21.43
27
  )
28
  # Sort the dataframe by date because Plotly doesn't do any of this automatically
29
+ _df = _df.sort_values(by="date")
30
+ _written = _df[_df["categories"] == "Written"]
31
+ _spoken = _df[_df["categories"] == "Spoken"]
32
+ return _df, _written, _spoken
33
 
34
 
35
  """
 
234
  minimum=1, maximum=4, step=1, label="N-grams", interactive=True, value=1
235
  )
236
 
237
+ # store the dataframe in a state object before passing to plots
238
  df_state = gr.State(df)
239
 
240
  # show a bar chart of the top n-grams for a selected president