Training in progress, step 410000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995605445
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c55164556dd25d6a582fef22bc2c651808a168730ec26b5c6008062fd0f7cc5d
|
3 |
size 995605445
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d13f3b70adc2f936ec547de1ec36f77495ca229446b2ed5ee40a8227e58a819b
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc51395fb5526d062f2906017676468fbf5f119fecfd463d76e3f9fc8a940a31
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5132,11 +5132,139 @@
|
|
5132 |
"eval_samples_per_second": 166.431,
|
5133 |
"eval_steps_per_second": 20.808,
|
5134 |
"step": 400000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5135 |
}
|
5136 |
],
|
5137 |
"max_steps": 633540,
|
5138 |
"num_train_epochs": 15,
|
5139 |
-
"total_flos": 8.
|
5140 |
"trial_name": null,
|
5141 |
"trial_params": null
|
5142 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.707358651387443,
|
5 |
+
"global_step": 410000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5132 |
"eval_samples_per_second": 166.431,
|
5133 |
"eval_steps_per_second": 20.808,
|
5134 |
"step": 400000
|
5135 |
+
},
|
5136 |
+
{
|
5137 |
+
"epoch": 9.48,
|
5138 |
+
"learning_rate": 0.0,
|
5139 |
+
"loss": 2.413,
|
5140 |
+
"step": 400500
|
5141 |
+
},
|
5142 |
+
{
|
5143 |
+
"epoch": 9.49,
|
5144 |
+
"learning_rate": 0.0,
|
5145 |
+
"loss": 2.4149,
|
5146 |
+
"step": 401000
|
5147 |
+
},
|
5148 |
+
{
|
5149 |
+
"epoch": 9.51,
|
5150 |
+
"learning_rate": 0.0,
|
5151 |
+
"loss": 2.4278,
|
5152 |
+
"step": 401500
|
5153 |
+
},
|
5154 |
+
{
|
5155 |
+
"epoch": 9.52,
|
5156 |
+
"learning_rate": 0.0,
|
5157 |
+
"loss": 2.4337,
|
5158 |
+
"step": 402000
|
5159 |
+
},
|
5160 |
+
{
|
5161 |
+
"epoch": 9.53,
|
5162 |
+
"learning_rate": 0.0,
|
5163 |
+
"loss": 2.4553,
|
5164 |
+
"step": 402500
|
5165 |
+
},
|
5166 |
+
{
|
5167 |
+
"epoch": 9.54,
|
5168 |
+
"learning_rate": 0.0,
|
5169 |
+
"loss": 2.4216,
|
5170 |
+
"step": 403000
|
5171 |
+
},
|
5172 |
+
{
|
5173 |
+
"epoch": 9.55,
|
5174 |
+
"learning_rate": 0.0,
|
5175 |
+
"loss": 2.4092,
|
5176 |
+
"step": 403500
|
5177 |
+
},
|
5178 |
+
{
|
5179 |
+
"epoch": 9.57,
|
5180 |
+
"learning_rate": 0.0,
|
5181 |
+
"loss": 2.4274,
|
5182 |
+
"step": 404000
|
5183 |
+
},
|
5184 |
+
{
|
5185 |
+
"epoch": 9.58,
|
5186 |
+
"learning_rate": 0.0,
|
5187 |
+
"loss": 2.426,
|
5188 |
+
"step": 404500
|
5189 |
+
},
|
5190 |
+
{
|
5191 |
+
"epoch": 9.59,
|
5192 |
+
"learning_rate": 0.0,
|
5193 |
+
"loss": 2.4348,
|
5194 |
+
"step": 405000
|
5195 |
+
},
|
5196 |
+
{
|
5197 |
+
"epoch": 9.6,
|
5198 |
+
"learning_rate": 0.0,
|
5199 |
+
"loss": 2.4308,
|
5200 |
+
"step": 405500
|
5201 |
+
},
|
5202 |
+
{
|
5203 |
+
"epoch": 9.61,
|
5204 |
+
"learning_rate": 0.0,
|
5205 |
+
"loss": 2.4369,
|
5206 |
+
"step": 406000
|
5207 |
+
},
|
5208 |
+
{
|
5209 |
+
"epoch": 9.62,
|
5210 |
+
"learning_rate": 0.0,
|
5211 |
+
"loss": 2.4375,
|
5212 |
+
"step": 406500
|
5213 |
+
},
|
5214 |
+
{
|
5215 |
+
"epoch": 9.64,
|
5216 |
+
"learning_rate": 0.0,
|
5217 |
+
"loss": 2.4243,
|
5218 |
+
"step": 407000
|
5219 |
+
},
|
5220 |
+
{
|
5221 |
+
"epoch": 9.65,
|
5222 |
+
"learning_rate": 0.0,
|
5223 |
+
"loss": 2.4209,
|
5224 |
+
"step": 407500
|
5225 |
+
},
|
5226 |
+
{
|
5227 |
+
"epoch": 9.66,
|
5228 |
+
"learning_rate": 0.0,
|
5229 |
+
"loss": 2.4399,
|
5230 |
+
"step": 408000
|
5231 |
+
},
|
5232 |
+
{
|
5233 |
+
"epoch": 9.67,
|
5234 |
+
"learning_rate": 0.0,
|
5235 |
+
"loss": 2.4302,
|
5236 |
+
"step": 408500
|
5237 |
+
},
|
5238 |
+
{
|
5239 |
+
"epoch": 9.68,
|
5240 |
+
"learning_rate": 0.0,
|
5241 |
+
"loss": 2.4611,
|
5242 |
+
"step": 409000
|
5243 |
+
},
|
5244 |
+
{
|
5245 |
+
"epoch": 9.7,
|
5246 |
+
"learning_rate": 0.0,
|
5247 |
+
"loss": 2.4166,
|
5248 |
+
"step": 409500
|
5249 |
+
},
|
5250 |
+
{
|
5251 |
+
"epoch": 9.71,
|
5252 |
+
"learning_rate": 0.0,
|
5253 |
+
"loss": 2.4281,
|
5254 |
+
"step": 410000
|
5255 |
+
},
|
5256 |
+
{
|
5257 |
+
"epoch": 9.71,
|
5258 |
+
"eval_loss": 3.1522228717803955,
|
5259 |
+
"eval_runtime": 112.8711,
|
5260 |
+
"eval_samples_per_second": 166.314,
|
5261 |
+
"eval_steps_per_second": 20.794,
|
5262 |
+
"step": 410000
|
5263 |
}
|
5264 |
],
|
5265 |
"max_steps": 633540,
|
5266 |
"num_train_epochs": 15,
|
5267 |
+
"total_flos": 8.6548090457088e+16,
|
5268 |
"trial_name": null,
|
5269 |
"trial_params": null
|
5270 |
}
|