triki07 commited on
Commit
5554f7a
1 Parent(s): 6ef0fe2

ocr version 1

Browse files
Files changed (1) hide show
  1. trainer_state.json +152 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 936,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -224,6 +224,155 @@
224
  "learning_rate": 2.0192307692307694e-05,
225
  "loss": 0.2934,
226
  "step": 930
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  }
228
  ],
229
  "logging_steps": 31,
@@ -231,7 +380,7 @@
231
  "num_input_tokens_seen": 0,
232
  "num_train_epochs": 5,
233
  "save_steps": 500,
234
- "total_flos": 1.1075205215224332e+19,
235
  "train_batch_size": 8,
236
  "trial_name": null,
237
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 1560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
224
  "learning_rate": 2.0192307692307694e-05,
225
  "loss": 0.2934,
226
  "step": 930
227
+ },
228
+ {
229
+ "epoch": 3.08,
230
+ "grad_norm": 15.149397850036621,
231
+ "learning_rate": 1.919871794871795e-05,
232
+ "loss": 0.2682,
233
+ "step": 961
234
+ },
235
+ {
236
+ "epoch": 3.18,
237
+ "grad_norm": 3.612698793411255,
238
+ "learning_rate": 1.8205128205128204e-05,
239
+ "loss": 0.264,
240
+ "step": 992
241
+ },
242
+ {
243
+ "epoch": 3.28,
244
+ "grad_norm": 1.7986979484558105,
245
+ "learning_rate": 1.721153846153846e-05,
246
+ "loss": 0.2085,
247
+ "step": 1023
248
+ },
249
+ {
250
+ "epoch": 3.38,
251
+ "grad_norm": 3.6060192584991455,
252
+ "learning_rate": 1.6217948717948718e-05,
253
+ "loss": 0.247,
254
+ "step": 1054
255
+ },
256
+ {
257
+ "epoch": 3.48,
258
+ "grad_norm": 2.7116451263427734,
259
+ "learning_rate": 1.5224358974358973e-05,
260
+ "loss": 0.2104,
261
+ "step": 1085
262
+ },
263
+ {
264
+ "epoch": 3.58,
265
+ "grad_norm": 4.838766574859619,
266
+ "learning_rate": 1.423076923076923e-05,
267
+ "loss": 0.234,
268
+ "step": 1116
269
+ },
270
+ {
271
+ "epoch": 3.68,
272
+ "grad_norm": 2.237657070159912,
273
+ "learning_rate": 1.3237179487179489e-05,
274
+ "loss": 0.2033,
275
+ "step": 1147
276
+ },
277
+ {
278
+ "epoch": 3.78,
279
+ "grad_norm": 1.6461944580078125,
280
+ "learning_rate": 1.2243589743589744e-05,
281
+ "loss": 0.2098,
282
+ "step": 1178
283
+ },
284
+ {
285
+ "epoch": 3.88,
286
+ "grad_norm": 6.327276229858398,
287
+ "learning_rate": 1.125e-05,
288
+ "loss": 0.2073,
289
+ "step": 1209
290
+ },
291
+ {
292
+ "epoch": 3.97,
293
+ "grad_norm": 2.9778146743774414,
294
+ "learning_rate": 1.0256410256410256e-05,
295
+ "loss": 0.1988,
296
+ "step": 1240
297
+ },
298
+ {
299
+ "epoch": 4.07,
300
+ "grad_norm": 1.4347281455993652,
301
+ "learning_rate": 9.262820512820514e-06,
302
+ "loss": 0.1664,
303
+ "step": 1271
304
+ },
305
+ {
306
+ "epoch": 4.17,
307
+ "grad_norm": 2.844505786895752,
308
+ "learning_rate": 8.26923076923077e-06,
309
+ "loss": 0.1529,
310
+ "step": 1302
311
+ },
312
+ {
313
+ "epoch": 4.27,
314
+ "grad_norm": 1.985013723373413,
315
+ "learning_rate": 7.275641025641026e-06,
316
+ "loss": 0.1447,
317
+ "step": 1333
318
+ },
319
+ {
320
+ "epoch": 4.37,
321
+ "grad_norm": 2.9127843379974365,
322
+ "learning_rate": 6.282051282051282e-06,
323
+ "loss": 0.1375,
324
+ "step": 1364
325
+ },
326
+ {
327
+ "epoch": 4.47,
328
+ "grad_norm": 2.6174566745758057,
329
+ "learning_rate": 5.288461538461538e-06,
330
+ "loss": 0.1515,
331
+ "step": 1395
332
+ },
333
+ {
334
+ "epoch": 4.57,
335
+ "grad_norm": 1.2411088943481445,
336
+ "learning_rate": 4.294871794871795e-06,
337
+ "loss": 0.1408,
338
+ "step": 1426
339
+ },
340
+ {
341
+ "epoch": 4.67,
342
+ "grad_norm": 1.8333454132080078,
343
+ "learning_rate": 3.3012820512820517e-06,
344
+ "loss": 0.1372,
345
+ "step": 1457
346
+ },
347
+ {
348
+ "epoch": 4.77,
349
+ "grad_norm": 1.785672903060913,
350
+ "learning_rate": 2.307692307692308e-06,
351
+ "loss": 0.1409,
352
+ "step": 1488
353
+ },
354
+ {
355
+ "epoch": 4.87,
356
+ "grad_norm": 3.533236026763916,
357
+ "learning_rate": 1.3141025641025643e-06,
358
+ "loss": 0.1276,
359
+ "step": 1519
360
+ },
361
+ {
362
+ "epoch": 4.97,
363
+ "grad_norm": 1.3145009279251099,
364
+ "learning_rate": 3.205128205128205e-07,
365
+ "loss": 0.1329,
366
+ "step": 1550
367
+ },
368
+ {
369
+ "epoch": 5.0,
370
+ "step": 1560,
371
+ "total_flos": 1.845867535870722e+19,
372
+ "train_loss": 0.4759287901413746,
373
+ "train_runtime": 3971.7695,
374
+ "train_samples_per_second": 3.14,
375
+ "train_steps_per_second": 0.393
376
  }
377
  ],
378
  "logging_steps": 31,
 
380
  "num_input_tokens_seen": 0,
381
  "num_train_epochs": 5,
382
  "save_steps": 500,
383
+ "total_flos": 1.845867535870722e+19,
384
  "train_batch_size": 8,
385
  "trial_name": null,
386
  "trial_params": null