mtasic85 commited on
Commit
012c999
1 Parent(s): d8a12fc

pretrain dataset

Browse files
scripts/prepare_pretrain_dataset.py CHANGED
@@ -263,8 +263,8 @@ def batch_iterator(name=None):
263
 
264
  # math
265
  if name in (None, 'fblgit/simple-math'):
266
- for split in ['arithmetic.float3_valid', 'arithmetic.float34_train', 'arithmetic.float2_train', 'arithmetic.float3_train', 'arithmetic.float34_valid', 'arithmetic.float4_train', 'arithmetic.float4_valid', 'arithmetic.float2_valid']:
267
- dataset = load_dataset(name, split=split)
268
 
269
  for row in dataset:
270
  yield (
 
263
 
264
  # math
265
  if name in (None, 'fblgit/simple-math'):
266
+ for split in ['train', 'test']:
267
+ dataset = load_dataset(name, revision='refs/convert/parquet', split=split)
268
 
269
  for row in dataset:
270
  yield (