pretrain mode
Browse files
scripts/pretrain-model.yaml
CHANGED
@@ -19,7 +19,7 @@ model_config:
|
|
19 |
norm_class_name: "RMSNorm"
|
20 |
norm_eps: 1e-05
|
21 |
mlp_class_name: "LLaMAMLP"
|
22 |
-
intermediate_size:
|
23 |
rope_base: 500000
|
24 |
rope_adjustments:
|
25 |
factor: 32.0
|
@@ -76,7 +76,7 @@ train:
|
|
76 |
|
77 |
# Total number of tokens to train on (type: Optional[int], default: 3000000000000)
|
78 |
# max_tokens: 3000000000000
|
79 |
-
max_tokens:
|
80 |
|
81 |
# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
|
82 |
max_steps:
|
|
|
19 |
norm_class_name: "RMSNorm"
|
20 |
norm_eps: 1e-05
|
21 |
mlp_class_name: "LLaMAMLP"
|
22 |
+
intermediate_size: 4096
|
23 |
rope_base: 500000
|
24 |
rope_adjustments:
|
25 |
factor: 32.0
|
|
|
76 |
|
77 |
# Total number of tokens to train on (type: Optional[int], default: 3000000000000)
|
78 |
# max_tokens: 3000000000000
|
79 |
+
max_tokens: 8159107755 # 796399 * 2049 * 5
|
80 |
|
81 |
# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
|
82 |
max_steps:
|