MarkelFe commited on
Commit
cc088a9
1 Parent(s): e400c65

Training in progress, step 210000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:622ffd3a7b017059677381c4957b8684c94feda5c36dd09d4c9eb745833a1cf3
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd19e30a2a650450011c0e6b074763121dae91837a5de2ae7a75d902c60dc2d
3
  size 995605445
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df5e243d791ce4f65dc68bf0f3a46091fb97d6aafd2414ee9a81131404021ee0
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920d137fdcb173efb8049387289fab5904ebed3270ec2f582ab357e4385f15ac
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9c10092a1e7977cf34645ee895c471f8c4d2d3bd2aa8edd2570bdd9e3a7e77a
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99009f31f62eb4abfe3090a9bbfa89c4ba84d5d9f26ec160f92e0423b3ef657b
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cdc4afee694df5e30232d1b113751895a7bfe4c051ca10278bb6f9244c467f9
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2632d3faec0736fab7590967000d1b66b3c018c0af2551fd0387b0a028e120fe
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.735296903115826,
5
- "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2572,11 +2572,139 @@
2572
  "eval_samples_per_second": 166.155,
2573
  "eval_steps_per_second": 20.774,
2574
  "step": 200000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2575
  }
2576
  ],
2577
  "max_steps": 633540,
2578
  "num_train_epochs": 15,
2579
- "total_flos": 4.2369106205952e+16,
2580
  "trial_name": null,
2581
  "trial_params": null
2582
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.972061748271616,
5
+ "global_step": 210000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2572
  "eval_samples_per_second": 166.155,
2573
  "eval_steps_per_second": 20.774,
2574
  "step": 200000
2575
+ },
2576
+ {
2577
+ "epoch": 4.75,
2578
+ "learning_rate": 1.3816081068282983e-05,
2579
+ "loss": 2.6777,
2580
+ "step": 200500
2581
+ },
2582
+ {
2583
+ "epoch": 4.76,
2584
+ "learning_rate": 1.3752943776241438e-05,
2585
+ "loss": 2.6712,
2586
+ "step": 201000
2587
+ },
2588
+ {
2589
+ "epoch": 4.77,
2590
+ "learning_rate": 1.3689806484199895e-05,
2591
+ "loss": 2.6751,
2592
+ "step": 201500
2593
+ },
2594
+ {
2595
+ "epoch": 4.78,
2596
+ "learning_rate": 1.362666919215835e-05,
2597
+ "loss": 2.677,
2598
+ "step": 202000
2599
+ },
2600
+ {
2601
+ "epoch": 4.79,
2602
+ "learning_rate": 1.3563531900116805e-05,
2603
+ "loss": 2.6732,
2604
+ "step": 202500
2605
+ },
2606
+ {
2607
+ "epoch": 4.81,
2608
+ "learning_rate": 1.350039460807526e-05,
2609
+ "loss": 2.6634,
2610
+ "step": 203000
2611
+ },
2612
+ {
2613
+ "epoch": 4.82,
2614
+ "learning_rate": 1.3437257316033717e-05,
2615
+ "loss": 2.6684,
2616
+ "step": 203500
2617
+ },
2618
+ {
2619
+ "epoch": 4.83,
2620
+ "learning_rate": 1.3374120023992172e-05,
2621
+ "loss": 2.6892,
2622
+ "step": 204000
2623
+ },
2624
+ {
2625
+ "epoch": 4.84,
2626
+ "learning_rate": 1.3310982731950629e-05,
2627
+ "loss": 2.676,
2628
+ "step": 204500
2629
+ },
2630
+ {
2631
+ "epoch": 4.85,
2632
+ "learning_rate": 1.3247845439909084e-05,
2633
+ "loss": 2.6605,
2634
+ "step": 205000
2635
+ },
2636
+ {
2637
+ "epoch": 4.87,
2638
+ "learning_rate": 1.318470814786754e-05,
2639
+ "loss": 2.6773,
2640
+ "step": 205500
2641
+ },
2642
+ {
2643
+ "epoch": 4.88,
2644
+ "learning_rate": 1.3121570855825996e-05,
2645
+ "loss": 2.6702,
2646
+ "step": 206000
2647
+ },
2648
+ {
2649
+ "epoch": 4.89,
2650
+ "learning_rate": 1.305843356378445e-05,
2651
+ "loss": 2.6662,
2652
+ "step": 206500
2653
+ },
2654
+ {
2655
+ "epoch": 4.9,
2656
+ "learning_rate": 1.2995296271742906e-05,
2657
+ "loss": 2.6775,
2658
+ "step": 207000
2659
+ },
2660
+ {
2661
+ "epoch": 4.91,
2662
+ "learning_rate": 1.2932158979701361e-05,
2663
+ "loss": 2.6829,
2664
+ "step": 207500
2665
+ },
2666
+ {
2667
+ "epoch": 4.92,
2668
+ "learning_rate": 1.2869021687659818e-05,
2669
+ "loss": 2.6679,
2670
+ "step": 208000
2671
+ },
2672
+ {
2673
+ "epoch": 4.94,
2674
+ "learning_rate": 1.2805884395618273e-05,
2675
+ "loss": 2.6671,
2676
+ "step": 208500
2677
+ },
2678
+ {
2679
+ "epoch": 4.95,
2680
+ "learning_rate": 1.274274710357673e-05,
2681
+ "loss": 2.6746,
2682
+ "step": 209000
2683
+ },
2684
+ {
2685
+ "epoch": 4.96,
2686
+ "learning_rate": 1.2679609811535185e-05,
2687
+ "loss": 2.6955,
2688
+ "step": 209500
2689
+ },
2690
+ {
2691
+ "epoch": 4.97,
2692
+ "learning_rate": 1.2616472519493642e-05,
2693
+ "loss": 2.6812,
2694
+ "step": 210000
2695
+ },
2696
+ {
2697
+ "epoch": 4.97,
2698
+ "eval_loss": 3.1202192306518555,
2699
+ "eval_runtime": 112.9566,
2700
+ "eval_samples_per_second": 166.188,
2701
+ "eval_steps_per_second": 20.778,
2702
+ "step": 210000
2703
  }
2704
  ],
2705
  "max_steps": 633540,
2706
  "num_train_epochs": 15,
2707
+ "total_flos": 4.4469447088896e+16,
2708
  "trial_name": null,
2709
  "trial_params": null
2710
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df5e243d791ce4f65dc68bf0f3a46091fb97d6aafd2414ee9a81131404021ee0
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920d137fdcb173efb8049387289fab5904ebed3270ec2f582ab357e4385f15ac
3
  size 510398013