mtasic85 commited on
Commit
590516a
1 Parent(s): 7a40fb6

train model: tuned training

Browse files
Files changed (1) hide show
  1. scripts/model.yaml +7 -4
scripts/model.yaml CHANGED
@@ -17,7 +17,7 @@ model_config:
17
  parallel_residual: false
18
  bias: false
19
  norm_class_name: "RMSNorm"
20
- norm_eps: 1e-05
21
  mlp_class_name: "LLaMAMLP"
22
  intermediate_size: 2048
23
  rope_base: 500000
@@ -88,7 +88,8 @@ train:
88
  max_norm: 1.0
89
 
90
  # (type: float, default: 4e-05)
91
- min_lr: 4.0e-05
 
92
 
93
  # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
94
  eval:
@@ -116,10 +117,12 @@ optimizer:
116
 
117
  init_args:
118
  # (type: float, default: 0.001)
119
- lr: 5e-5
 
120
 
121
  # (type: float, default: 0.01)
122
- weight_decay: 0.1
 
123
 
124
  # (type: tuple, default: (0.9,0.999))
125
  betas:
 
17
  parallel_residual: false
18
  bias: false
19
  norm_class_name: "RMSNorm"
20
+ norm_eps: 1.0e-05
21
  mlp_class_name: "LLaMAMLP"
22
  intermediate_size: 2048
23
  rope_base: 500000
 
88
  max_norm: 1.0
89
 
90
  # (type: float, default: 4e-05)
91
+ # min_lr: 4.0e-05
92
+ min_lr: 1.0e-3
93
 
94
  # Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
95
  eval:
 
117
 
118
  init_args:
119
  # (type: float, default: 0.001)
120
+ # lr: 5e-5
121
+ lr: 1.0e-3
122
 
123
  # (type: float, default: 0.01)
124
+ # weight_decay: 0.1
125
+ weight_decay: 0.01
126
 
127
  # (type: tuple, default: (0.9,0.999))
128
  betas: