victormiller commited on
Commit
7cc1892
1 Parent(s): 6e60fe2

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +7 -7
curated.py CHANGED
@@ -440,7 +440,7 @@ table_div_phil = Div(NotStr(table_html_phil), style="margin: 40px;")
440
 
441
  filtering_process = Div(
442
  Section(
443
- P("This section contains the specific steps taken to filter all 14 curated source datasets.")
444
  ),
445
  Section(
446
  Div(
@@ -965,7 +965,7 @@ for dataset in df.columns[1:]:
965
  # Update the layout
966
  fig.update_layout(
967
  barmode='stack',
968
- title='Stacked Bar Chart of Line Reductions by Filter for Each Dataset',
969
  xaxis_title='Filter',
970
  yaxis_title='Number of Lines',
971
  legend_title='Dataset',
@@ -1123,14 +1123,14 @@ def curated(request):
1123
  copyright_disclaimer,
1124
  plotly2fasthtml(treemap_chart),
1125
  data_preprocessing_div,
1126
- plotly2fasthtml(diff2_stacked_bar),
1127
  H2("Curated Sources Processing"),
1128
- H3("TALK ABOUT THE DIFFERENT FILTERS BEFORE HAND"),
 
1129
  filtering_process,
1130
  data_preparation_div,
1131
- H2("Local Deduplication"),
1132
- local_dedup_text,
1133
- table_div_data_pipe,
1134
  id="inner-text",
1135
  )
1136
 
 
440
 
441
  filtering_process = Div(
442
  Section(
443
+ H3("This section contains the specific filtering steps taken for all 14 curated datasets."),
444
  ),
445
  Section(
446
  Div(
 
965
  # Update the layout
966
  fig.update_layout(
967
  barmode='stack',
968
+ title='Document Reduction by Filter for Each Dataset',
969
  xaxis_title='Filter',
970
  yaxis_title='Number of Lines',
971
  legend_title='Dataset',
 
1123
  copyright_disclaimer,
1124
  plotly2fasthtml(treemap_chart),
1125
  data_preprocessing_div,
 
1126
  H2("Curated Sources Processing"),
1127
+ plotly2fasthtml(diff2_stacked_bar),
1128
+ P("The figure above provides a global view of the document filtering results. ~8% of documents were removed during these three steps."),
1129
  filtering_process,
1130
  data_preparation_div,
1131
+ #H2("Local Deduplication"), are these numbers even right?
1132
+ #local_dedup_text,
1133
+ #table_div_data_pipe,
1134
  id="inner-text",
1135
  )
1136