victormiller commited on
Commit
61e28b6
1 Parent(s): 0dbc84f

Update curated.py

Browse files
Files changed (1) hide show
  1. curated.py +0 -1
curated.py CHANGED
@@ -445,7 +445,6 @@ filtering_process = Div(
445
  Section(
446
  Div(
447
  H3("Wikipedia"),
448
- H4("What is Wikipedia:")
449
  P("Wikipedia is an encyclopedia form of high-quality text data used for language modeling. We have included filtered and deduplicated versions of complete Wikipedia data directly provided by the Wikipedia Foundation for more than 350 languages."),
450
  H4("Download and Extraction"),
451
  P("The Wikimedia dataset was downloaded from the official snapshot on Huggingface: ", A("https://huggingface.co/datasets/wikimedia/wikipedia/tree/main", href="https://huggingface.co/datasets/wikimedia/wikipedia/tree/main"), ". The", D_code("huggingface dataset.to_json", language="python"), " function was used to convert the original parqet format to the jsonl format."),
 
445
  Section(
446
  Div(
447
  H3("Wikipedia"),
 
448
  P("Wikipedia is an encyclopedia form of high-quality text data used for language modeling. We have included filtered and deduplicated versions of complete Wikipedia data directly provided by the Wikipedia Foundation for more than 350 languages."),
449
  H4("Download and Extraction"),
450
  P("The Wikimedia dataset was downloaded from the official snapshot on Huggingface: ", A("https://huggingface.co/datasets/wikimedia/wikipedia/tree/main", href="https://huggingface.co/datasets/wikimedia/wikipedia/tree/main"), ". The", D_code("huggingface dataset.to_json", language="python"), " function was used to convert the original parqet format to the jsonl format."),