pretrain dataset
Browse files
scripts/prepare_pretrain_dataset.py
CHANGED
@@ -263,8 +263,8 @@ def batch_iterator(name=None):
|
|
263 |
|
264 |
# math
|
265 |
if name in (None, 'fblgit/simple-math'):
|
266 |
-
for split in ['
|
267 |
-
dataset = load_dataset(name, split=split)
|
268 |
|
269 |
for row in dataset:
|
270 |
yield (
|
|
|
263 |
|
264 |
# math
|
265 |
if name in (None, 'fblgit/simple-math'):
|
266 |
+
for split in ['train', 'test']:
|
267 |
+
dataset = load_dataset(name, revision='refs/convert/parquet', split=split)
|
268 |
|
269 |
for row in dataset:
|
270 |
yield (
|