Spaces:
Running
Running
victormiller
commited on
Commit
•
f4f88cc
1
Parent(s):
7b420f4
Update web.py
Browse files
web.py
CHANGED
@@ -364,9 +364,9 @@ def web_data():
|
|
364 |
Li("Each section is complete with code and comparisons to Dolma, DataTrove, and/or RedPajama-V-2", style = "margin-bottom: 5px"),
|
365 |
),
|
366 |
),
|
367 |
-
|
368 |
-
H2("Common Crawl Data Processing Summary"),
|
369 |
Div(
|
|
|
370 |
P(
|
371 |
"To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Starting from ",
|
372 |
A("Common Crawl", href="https://commoncrawl.org/"),
|
|
|
364 |
Li("Each section is complete with code and comparisons to Dolma, DataTrove, and/or RedPajama-V-2", style = "margin-bottom: 5px"),
|
365 |
),
|
366 |
),
|
367 |
+
|
|
|
368 |
Div(
|
369 |
+
H2("Common Crawl Data Processing Summary"),
|
370 |
P(
|
371 |
"To generate a high-quality dataset from large-scale webpages, we have investigated the processing steps used by the community and made our choices based on careful manual inspection. Starting from ",
|
372 |
A("Common Crawl", href="https://commoncrawl.org/"),
|