victormiller commited on
Commit
775d041
1 Parent(s): 666337a

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +0 -1
curated.py CHANGED
@@ -450,7 +450,6 @@ filtering_process = Div(
450
  P("The Wikimedia dataset was downloaded from the official snapshot on Huggingface: ", A("https://huggingface.co/datasets/wikimedia/wikipedia/tree/main", href="https://huggingface.co/datasets/wikimedia/wikipedia/tree/main"), ". The", D_code("huggingface dataset.to_json", language="python"), " function was used to convert the original parqet format to the jsonl format."),
451
  H4("Filtering"),
452
  P("Manual inspection of the dataset demostrated high quality content. Only one filter was used to remove articles with few words. Based normal sentence constructs, the article was kept if it contained 10 or more words. Any article with fewer than 10 words was removed."),
453
- ),
454
  table_div_wikipedia,
455
  ),
456
  ),
 
450
  P("The Wikimedia dataset was downloaded from the official snapshot on Huggingface: ", A("https://huggingface.co/datasets/wikimedia/wikipedia/tree/main", href="https://huggingface.co/datasets/wikimedia/wikipedia/tree/main"), ". The", D_code("huggingface dataset.to_json", language="python"), " function was used to convert the original parqet format to the jsonl format."),
451
  H4("Filtering"),
452
  P("Manual inspection of the dataset demostrated high quality content. Only one filter was used to remove articles with few words. Based normal sentence constructs, the article was kept if it contained 10 or more words. Any article with fewer than 10 words was removed."),
 
453
  table_div_wikipedia,
454
  ),
455
  ),