train model: tuned training
Browse files- scripts/model.yaml +7 -4
scripts/model.yaml
CHANGED
@@ -17,7 +17,7 @@ model_config:
|
|
17 |
parallel_residual: false
|
18 |
bias: false
|
19 |
norm_class_name: "RMSNorm"
|
20 |
-
norm_eps:
|
21 |
mlp_class_name: "LLaMAMLP"
|
22 |
intermediate_size: 2048
|
23 |
rope_base: 500000
|
@@ -88,7 +88,8 @@ train:
|
|
88 |
max_norm: 1.0
|
89 |
|
90 |
# (type: float, default: 4e-05)
|
91 |
-
min_lr: 4.0e-05
|
|
|
92 |
|
93 |
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
|
94 |
eval:
|
@@ -116,10 +117,12 @@ optimizer:
|
|
116 |
|
117 |
init_args:
|
118 |
# (type: float, default: 0.001)
|
119 |
-
lr: 5e-5
|
|
|
120 |
|
121 |
# (type: float, default: 0.01)
|
122 |
-
weight_decay: 0.1
|
|
|
123 |
|
124 |
# (type: tuple, default: (0.9,0.999))
|
125 |
betas:
|
|
|
17 |
parallel_residual: false
|
18 |
bias: false
|
19 |
norm_class_name: "RMSNorm"
|
20 |
+
norm_eps: 1.0e-05
|
21 |
mlp_class_name: "LLaMAMLP"
|
22 |
intermediate_size: 2048
|
23 |
rope_base: 500000
|
|
|
88 |
max_norm: 1.0
|
89 |
|
90 |
# (type: float, default: 4e-05)
|
91 |
+
# min_lr: 4.0e-05
|
92 |
+
min_lr: 1.0e-3
|
93 |
|
94 |
# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
|
95 |
eval:
|
|
|
117 |
|
118 |
init_args:
|
119 |
# (type: float, default: 0.001)
|
120 |
+
# lr: 5e-5
|
121 |
+
lr: 1.0e-3
|
122 |
|
123 |
# (type: float, default: 0.01)
|
124 |
+
# weight_decay: 0.1
|
125 |
+
weight_decay: 0.01
|
126 |
|
127 |
# (type: tuple, default: (0.9,0.999))
|
128 |
betas:
|