hunterhector commited on
Commit
d098e08
1 Parent(s): eb884e6

fix common crawl stats

Browse files
Files changed (1) hide show
  1. overview.py +3 -3
overview.py CHANGED
@@ -172,7 +172,7 @@ table_div_2 = Div(NotStr(table_html2), style="margin: 40px;")
172
  dataset_sources = pd.DataFrame(
173
  {
174
  "Data Source": [
175
- "CommonCrawl",
176
  "Papers",
177
  "Wikipedia",
178
  "Freelaw",
@@ -185,7 +185,7 @@ dataset_sources = pd.DataFrame(
185
  "StackExchange",
186
  ],
187
  "Raw Data Size": [
188
- "11 TB",
189
  "712 GB",
190
  "210 GB",
191
  "23 GB",
@@ -198,7 +198,7 @@ dataset_sources = pd.DataFrame(
198
  "45 GB",
199
  ],
200
  "Token Count": [
201
- "5.71T",
202
  "154.96B",
203
  "4.75B",
204
  "7.34B",
 
172
  dataset_sources = pd.DataFrame(
173
  {
174
  "Data Source": [
175
+ "Common Crawl",
176
  "Papers",
177
  "Wikipedia",
178
  "Freelaw",
 
185
  "StackExchange",
186
  ],
187
  "Raw Data Size": [
188
+ "9.2 TB",
189
  "712 GB",
190
  "210 GB",
191
  "23 GB",
 
198
  "45 GB",
199
  ],
200
  "Token Count": [
201
+ "4.83T",
202
  "154.96B",
203
  "4.75B",
204
  "7.34B",