victormiller commited on
Commit
8bb7e3f
1 Parent(s): 5b83110

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +5 -5
main.py CHANGED
@@ -133,11 +133,11 @@ intro_text = P("Pretraining performant large language models (LLMs) requires tri
133
  intro_list = P("We present TxT360, the Trillion eXtracted Text corpus, a 5.7T token dataset for pretraining projects that:")
134
 
135
  intro_list1 = Ol(
136
- Li("Curates commonly used pretraining datasets, including all CommonCrawl"),
137
- Li("Employs carefully selected filters designed for each data source"),
138
- Li("Provides only unique data elements via globally deduplicated across all datasets"),
139
- Li("Retains all deduplication metadata for custom upweighting"),
140
- Li("Is Production ready! Download here [link to HF repo]")
141
  )
142
 
143
  previous_intro = P("""We are excited to introduce TxT360, a
 
133
  intro_list = P("We present TxT360, the Trillion eXtracted Text corpus, a 5.7T token dataset for pretraining projects that:")
134
 
135
  intro_list1 = Ol(
136
+ Li("Curates commonly used pretraining datasets, including all CommonCrawl", style = "margin-bottom: 5px"),
137
+ Li("Employs carefully selected filters designed for each data source", style = "margin-bottom: 5px"),
138
+ Li("Provides only unique data elements via globally deduplicated across all datasets", style = "margin-bottom: 5px"),
139
+ Li("Retains all deduplication metadata for custom upweighting", style = "margin-bottom: 5px"),
140
+ Li("Is Production ready! Download here [link to HF repo]", style = "margin-bottom: 5px")
141
  )
142
 
143
  previous_intro = P("""We are excited to introduce TxT360, a