pretrain dataset
Browse files
scripts/prepare_pretrain_dataset.py
CHANGED
@@ -421,5 +421,5 @@ outputs = optimize(
|
|
421 |
# Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
|
422 |
chunk_size=(2049 * 8012),
|
423 |
num_workers=32,
|
424 |
-
compression='zstd',
|
425 |
)
|
|
|
421 |
# Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
|
422 |
chunk_size=(2049 * 8012),
|
423 |
num_workers=32,
|
424 |
+
# compression='zstd',
|
425 |
)
|