iamnguyen commited on
Commit
614c764
1 Parent(s): 3cc68ef

Training in progress, step 5376, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b30a18281b22182316747cceddd8d6c37353221bc3f8fb5d02c882c5629422d
3
  size 903834408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff920e5ccc2d2a405fd728410318b5d77a58a46344ae8542bb3adc83fa0b5aa5
3
  size 903834408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb114aa5f0332140b4eec1de73af442aa382ec1878556383d86000d2acf516d8
3
  size 1807824186
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc711544bbc463382370e087fb56a499145af6b3d8d135d3d2ad1e1612e329ff
3
  size 1807824186
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56b9ce523794195d6fe6eaf298487c89289b615fed50c7eecdaaf4b487304fb9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc2f5216a7387c189c9349463d3b40a111802d247e9267a1c2d9b8f7b01f222
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9637751fdfaaf8a45d6222ffb7baf0bbdacedfccacc03a39d139d6c493b9ec1d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2cc71334cc40bd202de7bb17936da773f888ad3b66367935ef5b0e0f47c791
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3465050505050505,
5
  "eval_steps": 16,
6
- "global_step": 5360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -40542,6 +40542,127 @@
40542
  "eval_samples_per_second": 11.555,
40543
  "eval_steps_per_second": 1.444,
40544
  "step": 5360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40545
  }
40546
  ],
40547
  "logging_steps": 1,
@@ -40561,7 +40682,7 @@
40561
  "attributes": {}
40562
  }
40563
  },
40564
- "total_flos": 1.044484573888512e+17,
40565
  "train_batch_size": 8,
40566
  "trial_name": null,
40567
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3475393939393939,
5
  "eval_steps": 16,
6
+ "global_step": 5376,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
40542
  "eval_samples_per_second": 11.555,
40543
  "eval_steps_per_second": 1.444,
40544
  "step": 5360
40545
+ },
40546
+ {
40547
+ "epoch": 0.34656969696969697,
40548
+ "grad_norm": 0.0666361004114151,
40549
+ "learning_rate": 0.00019445821927260975,
40550
+ "loss": 0.0985,
40551
+ "step": 5361
40552
+ },
40553
+ {
40554
+ "epoch": 0.34663434343434346,
40555
+ "grad_norm": 0.0619664341211319,
40556
+ "learning_rate": 0.0001944559741044043,
40557
+ "loss": 0.0795,
40558
+ "step": 5362
40559
+ },
40560
+ {
40561
+ "epoch": 0.3466989898989899,
40562
+ "grad_norm": 0.07481850683689117,
40563
+ "learning_rate": 0.0001944537284944595,
40564
+ "loss": 0.0889,
40565
+ "step": 5363
40566
+ },
40567
+ {
40568
+ "epoch": 0.3467636363636364,
40569
+ "grad_norm": 0.10630080848932266,
40570
+ "learning_rate": 0.00019445148244278586,
40571
+ "loss": 0.1363,
40572
+ "step": 5364
40573
+ },
40574
+ {
40575
+ "epoch": 0.3468282828282828,
40576
+ "grad_norm": 0.06847912818193436,
40577
+ "learning_rate": 0.00019444923594939386,
40578
+ "loss": 0.0961,
40579
+ "step": 5365
40580
+ },
40581
+ {
40582
+ "epoch": 0.3468929292929293,
40583
+ "grad_norm": 0.06266526877880096,
40584
+ "learning_rate": 0.00019444698901429397,
40585
+ "loss": 0.0847,
40586
+ "step": 5366
40587
+ },
40588
+ {
40589
+ "epoch": 0.34695757575757574,
40590
+ "grad_norm": 0.058274924755096436,
40591
+ "learning_rate": 0.00019444474163749677,
40592
+ "loss": 0.0782,
40593
+ "step": 5367
40594
+ },
40595
+ {
40596
+ "epoch": 0.34702222222222223,
40597
+ "grad_norm": 0.06570993363857269,
40598
+ "learning_rate": 0.00019444249381901272,
40599
+ "loss": 0.093,
40600
+ "step": 5368
40601
+ },
40602
+ {
40603
+ "epoch": 0.34708686868686867,
40604
+ "grad_norm": 0.06814686208963394,
40605
+ "learning_rate": 0.00019444024555885237,
40606
+ "loss": 0.0896,
40607
+ "step": 5369
40608
+ },
40609
+ {
40610
+ "epoch": 0.34715151515151516,
40611
+ "grad_norm": 0.06851497292518616,
40612
+ "learning_rate": 0.0001944379968570262,
40613
+ "loss": 0.1058,
40614
+ "step": 5370
40615
+ },
40616
+ {
40617
+ "epoch": 0.3472161616161616,
40618
+ "grad_norm": 0.058960504829883575,
40619
+ "learning_rate": 0.00019443574771354474,
40620
+ "loss": 0.0793,
40621
+ "step": 5371
40622
+ },
40623
+ {
40624
+ "epoch": 0.3472808080808081,
40625
+ "grad_norm": 0.06058274954557419,
40626
+ "learning_rate": 0.0001944334981284185,
40627
+ "loss": 0.0858,
40628
+ "step": 5372
40629
+ },
40630
+ {
40631
+ "epoch": 0.34734545454545457,
40632
+ "grad_norm": 0.06411627680063248,
40633
+ "learning_rate": 0.00019443124810165802,
40634
+ "loss": 0.0831,
40635
+ "step": 5373
40636
+ },
40637
+ {
40638
+ "epoch": 0.347410101010101,
40639
+ "grad_norm": 0.06605079025030136,
40640
+ "learning_rate": 0.00019442899763327378,
40641
+ "loss": 0.0885,
40642
+ "step": 5374
40643
+ },
40644
+ {
40645
+ "epoch": 0.3474747474747475,
40646
+ "grad_norm": 0.058634303510189056,
40647
+ "learning_rate": 0.00019442674672327638,
40648
+ "loss": 0.0788,
40649
+ "step": 5375
40650
+ },
40651
+ {
40652
+ "epoch": 0.3475393939393939,
40653
+ "grad_norm": 0.06674228608608246,
40654
+ "learning_rate": 0.00019442449537167628,
40655
+ "loss": 0.1054,
40656
+ "step": 5376
40657
+ },
40658
+ {
40659
+ "epoch": 0.3475393939393939,
40660
+ "eval_bleu": 15.692248886076214,
40661
+ "eval_loss": 0.09231525659561157,
40662
+ "eval_runtime": 2.8228,
40663
+ "eval_samples_per_second": 11.336,
40664
+ "eval_steps_per_second": 1.417,
40665
+ "step": 5376
40666
  }
40667
  ],
40668
  "logging_steps": 1,
 
40682
  "attributes": {}
40683
  }
40684
  },
40685
+ "total_flos": 1.0476024382881792e+17,
40686
  "train_batch_size": 8,
40687
  "trial_name": null,
40688
  "trial_params": null