victormiller
commited on
Commit
•
6d51d72
1
Parent(s):
1228e08
Update overview.py
Browse files- overview.py +0 -4
overview.py
CHANGED
@@ -269,10 +269,6 @@ def get_curated_chart():
|
|
269 |
|
270 |
overview_div = Div(
|
271 |
Section(
|
272 |
-
Details(
|
273 |
-
Summary("Open Me"),
|
274 |
-
"maybe this will work"
|
275 |
-
),
|
276 |
H2("Combining the Best of Web and Curated Sources"),
|
277 |
P("""The quality and size of a pre-training dataset play a crucial role in the performance of large language models (LLMs).
|
278 |
The community has introduced a variety of datasets for this purpose, including purely web-based datasets like RefinedWeb{citation_obj.display_citation("refinedweb")}, RedPajama-Data-V2{citation_obj.display_citation("redpajama-v2")}, DCLM{citation_obj.display_citation("dclm")}, and FineWeb{citation_obj.display_citation("fineweb")},
|
|
|
269 |
|
270 |
overview_div = Div(
|
271 |
Section(
|
|
|
|
|
|
|
|
|
272 |
H2("Combining the Best of Web and Curated Sources"),
|
273 |
P("""The quality and size of a pre-training dataset play a crucial role in the performance of large language models (LLMs).
|
274 |
The community has introduced a variety of datasets for this purpose, including purely web-based datasets like RefinedWeb{citation_obj.display_citation("refinedweb")}, RedPajama-Data-V2{citation_obj.display_citation("redpajama-v2")}, DCLM{citation_obj.display_citation("dclm")}, and FineWeb{citation_obj.display_citation("fineweb")},
|