mtasic85 commited on
Commit
a4a75cd
1 Parent(s): 48be84a

pretrain dataset

Browse files
scripts/prepare_pretrain_dataset.py CHANGED
@@ -421,5 +421,5 @@ outputs = optimize(
421
  # Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
422
  chunk_size=(2049 * 8012),
423
  num_workers=32,
424
- compression='zstd',
425
  )
 
421
  # Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
422
  chunk_size=(2049 * 8012),
423
  num_workers=32,
424
+ # compression='zstd',
425
  )