pretrain model
Browse files
scripts/prepare_pretrain_dataset.py
CHANGED
@@ -222,7 +222,7 @@ datasets_names = [
|
|
222 |
'm-a-p/CodeFeedback-Filtered-Instruction',
|
223 |
'jtatman/python-code-dataset-500k',
|
224 |
'iamtarun/python_code_instructions_18k_alpaca',
|
225 |
-
'',
|
226 |
'gair-prox/open-web-math-pro',
|
227 |
'ajibawa-2023/Maths-College',
|
228 |
'microsoft/orca-math-word-problems-200k',
|
|
|
222 |
'm-a-p/CodeFeedback-Filtered-Instruction',
|
223 |
'jtatman/python-code-dataset-500k',
|
224 |
'iamtarun/python_code_instructions_18k_alpaca',
|
225 |
+
'HuggingFaceH4/CodeAlpaca_20K',
|
226 |
'gair-prox/open-web-math-pro',
|
227 |
'ajibawa-2023/Maths-College',
|
228 |
'microsoft/orca-math-word-problems-200k',
|