victormiller
commited on
Commit
•
8c7dda2
1
Parent(s):
b16daa1
Update web.py
Browse files
web.py
CHANGED
@@ -388,6 +388,7 @@ def web_data():
|
|
388 |
Li("Each section is complete with code and comparisons to Dolma,", D_cite(bibtex_key="soldaini2024dolma"),
|
389 |
"DataTrove,", D_cite(bibtex_key="penedo2024datatrove"),
|
390 |
"and/or RedPajama-V-2", D_cite(bibtex_key="redpajama-v2"), style = "margin-bottom: 5px"),
|
|
|
391 |
),
|
392 |
P("To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Below is a comprehensive list of datasets we reviewed the comparison of filters we have applied."),
|
393 |
),
|
|
|
388 |
Li("Each section is complete with code and comparisons to Dolma,", D_cite(bibtex_key="soldaini2024dolma"),
|
389 |
"DataTrove,", D_cite(bibtex_key="penedo2024datatrove"),
|
390 |
"and/or RedPajama-V-2", D_cite(bibtex_key="redpajama-v2"), style = "margin-bottom: 5px"),
|
391 |
+
Li(B("Estimated Reading Time: 31 minutes"), style = "margin-bottom: 5px"),
|
392 |
),
|
393 |
P("To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Below is a comprehensive list of datasets we reviewed the comparison of filters we have applied."),
|
394 |
),
|