MarkelFe commited on
Commit
f86303f
1 Parent(s): 2a24b9e

Training in progress, step 40000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f9fd3bc16beea82ad987b4bff078f2c2c3f4fcad8460dbb8eb220175985152
3
  size 995605189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dafd1a597d541396efb5c638af9a54bbc8ac18c68cd48a20f76d0ca2bc1e01ef
3
  size 995605189
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5447a2edf6b2eb946d8d40d9f690fbbabccb89792a44d56a897ebdbe771a9946
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529a26013828a3643211b2475be16389f662dc8c3c06d5719b6a70819da68c8a
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3021a61927d9bb43a2f41d0611bdb8462cf969c0d98440929f2484f2963b1a1
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3004509848150ee75a25be67dfe66552d50a705fc14ae18f62d4bd5e0795e8c
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:160cfc175cb0a133b8c88162ad6a65e1d4d7974bec1e5017230d638bf8bdc3ea
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a499652f62153d33f4dd0503b07f247980d8d57628f7715c2fef0cb3d0b038
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7102945354673738,
5
- "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -396,11 +396,139 @@
396
  "eval_samples_per_second": 165.982,
397
  "eval_steps_per_second": 20.752,
398
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  }
400
  ],
401
  "max_steps": 633540,
402
  "num_train_epochs": 15,
403
- "total_flos": 6497673481728000.0,
404
  "trial_name": null,
405
  "trial_params": null
406
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9470593806231651,
5
+ "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
396
  "eval_samples_per_second": 165.982,
397
  "eval_steps_per_second": 20.752,
398
  "step": 30000
399
+ },
400
+ {
401
+ "epoch": 0.72,
402
+ "learning_rate": 3.528276036240806e-05,
403
+ "loss": 3.2187,
404
+ "step": 30500
405
+ },
406
+ {
407
+ "epoch": 0.73,
408
+ "learning_rate": 3.521962307036651e-05,
409
+ "loss": 3.2015,
410
+ "step": 31000
411
+ },
412
+ {
413
+ "epoch": 0.75,
414
+ "learning_rate": 3.515648577832497e-05,
415
+ "loss": 3.2153,
416
+ "step": 31500
417
+ },
418
+ {
419
+ "epoch": 0.76,
420
+ "learning_rate": 3.509334848628343e-05,
421
+ "loss": 3.2242,
422
+ "step": 32000
423
+ },
424
+ {
425
+ "epoch": 0.77,
426
+ "learning_rate": 3.503021119424188e-05,
427
+ "loss": 3.2025,
428
+ "step": 32500
429
+ },
430
+ {
431
+ "epoch": 0.78,
432
+ "learning_rate": 3.496707390220034e-05,
433
+ "loss": 3.2164,
434
+ "step": 33000
435
+ },
436
+ {
437
+ "epoch": 0.79,
438
+ "learning_rate": 3.4903936610158796e-05,
439
+ "loss": 3.1862,
440
+ "step": 33500
441
+ },
442
+ {
443
+ "epoch": 0.81,
444
+ "learning_rate": 3.484079931811725e-05,
445
+ "loss": 3.207,
446
+ "step": 34000
447
+ },
448
+ {
449
+ "epoch": 0.82,
450
+ "learning_rate": 3.4777662026075706e-05,
451
+ "loss": 3.1953,
452
+ "step": 34500
453
+ },
454
+ {
455
+ "epoch": 0.83,
456
+ "learning_rate": 3.471452473403416e-05,
457
+ "loss": 3.1788,
458
+ "step": 35000
459
+ },
460
+ {
461
+ "epoch": 0.84,
462
+ "learning_rate": 3.4651387441992616e-05,
463
+ "loss": 3.1987,
464
+ "step": 35500
465
+ },
466
+ {
467
+ "epoch": 0.85,
468
+ "learning_rate": 3.4588250149951075e-05,
469
+ "loss": 3.1914,
470
+ "step": 36000
471
+ },
472
+ {
473
+ "epoch": 0.86,
474
+ "learning_rate": 3.4525112857909527e-05,
475
+ "loss": 3.2037,
476
+ "step": 36500
477
+ },
478
+ {
479
+ "epoch": 0.88,
480
+ "learning_rate": 3.4461975565867985e-05,
481
+ "loss": 3.1951,
482
+ "step": 37000
483
+ },
484
+ {
485
+ "epoch": 0.89,
486
+ "learning_rate": 3.4398838273826444e-05,
487
+ "loss": 3.1909,
488
+ "step": 37500
489
+ },
490
+ {
491
+ "epoch": 0.9,
492
+ "learning_rate": 3.433570098178489e-05,
493
+ "loss": 3.2012,
494
+ "step": 38000
495
+ },
496
+ {
497
+ "epoch": 0.91,
498
+ "learning_rate": 3.427256368974335e-05,
499
+ "loss": 3.2092,
500
+ "step": 38500
501
+ },
502
+ {
503
+ "epoch": 0.92,
504
+ "learning_rate": 3.4209426397701805e-05,
505
+ "loss": 3.2108,
506
+ "step": 39000
507
+ },
508
+ {
509
+ "epoch": 0.94,
510
+ "learning_rate": 3.414628910566026e-05,
511
+ "loss": 3.1818,
512
+ "step": 39500
513
+ },
514
+ {
515
+ "epoch": 0.95,
516
+ "learning_rate": 3.4083151813618716e-05,
517
+ "loss": 3.2072,
518
+ "step": 40000
519
+ },
520
+ {
521
+ "epoch": 0.95,
522
+ "eval_loss": 3.1675474643707275,
523
+ "eval_runtime": 113.0729,
524
+ "eval_samples_per_second": 166.017,
525
+ "eval_steps_per_second": 20.757,
526
+ "step": 40000
527
  }
528
  ],
529
  "max_steps": 633540,
530
  "num_train_epochs": 15,
531
+ "total_flos": 8586525680640000.0,
532
  "trial_name": null,
533
  "trial_params": null
534
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5447a2edf6b2eb946d8d40d9f690fbbabccb89792a44d56a897ebdbe771a9946
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529a26013828a3643211b2475be16389f662dc8c3c06d5719b6a70819da68c8a
3
  size 510398013