MarkelFe commited on
Commit
83bdbc2
1 Parent(s): 25c44fc

Training in progress, step 440000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f901372af6bb5b8376fb0e8c20402718b44a70119bbfac36dcede9e02013f02
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35d81de4abe2a01fcba75c50c9cab620ae3a65b0f95ffcc3a534ae9481509bf9
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68bd329f4168f6dda7d79fde6f824423ece665c87015507338729a29734782dc
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58855d5f2b4933bc7ddfa06dc9f27291b7fece9d81c961cb18c4fbd189f9d2b6
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b2d1c20de5c0b926a7cd672fd77f7fbe7fbc7338cfbe0b012185d341668909e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afb4822acf65cc3a55faa6756d51c5357c415e3967748bd0784d6c7fc546fb6f
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.180888341699024,
5
- "global_step": 430000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5516,11 +5516,139 @@
5516
  "eval_samples_per_second": 166.474,
5517
  "eval_steps_per_second": 20.814,
5518
  "step": 430000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5519
  }
5520
  ],
5521
  "max_steps": 633540,
5522
  "num_train_epochs": 15,
5523
- "total_flos": 9.0761179511808e+16,
5524
  "trial_name": null,
5525
  "trial_params": null
5526
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.417653186854816,
5
+ "global_step": 440000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5516
  "eval_samples_per_second": 166.474,
5517
  "eval_steps_per_second": 20.814,
5518
  "step": 430000
5519
+ },
5520
+ {
5521
+ "epoch": 10.19,
5522
+ "learning_rate": 0.0,
5523
+ "loss": 2.414,
5524
+ "step": 430500
5525
+ },
5526
+ {
5527
+ "epoch": 10.2,
5528
+ "learning_rate": 0.0,
5529
+ "loss": 2.4156,
5530
+ "step": 431000
5531
+ },
5532
+ {
5533
+ "epoch": 10.22,
5534
+ "learning_rate": 0.0,
5535
+ "loss": 2.4344,
5536
+ "step": 431500
5537
+ },
5538
+ {
5539
+ "epoch": 10.23,
5540
+ "learning_rate": 0.0,
5541
+ "loss": 2.4329,
5542
+ "step": 432000
5543
+ },
5544
+ {
5545
+ "epoch": 10.24,
5546
+ "learning_rate": 0.0,
5547
+ "loss": 2.43,
5548
+ "step": 432500
5549
+ },
5550
+ {
5551
+ "epoch": 10.25,
5552
+ "learning_rate": 0.0,
5553
+ "loss": 2.421,
5554
+ "step": 433000
5555
+ },
5556
+ {
5557
+ "epoch": 10.26,
5558
+ "learning_rate": 0.0,
5559
+ "loss": 2.445,
5560
+ "step": 433500
5561
+ },
5562
+ {
5563
+ "epoch": 10.28,
5564
+ "learning_rate": 0.0,
5565
+ "loss": 2.4289,
5566
+ "step": 434000
5567
+ },
5568
+ {
5569
+ "epoch": 10.29,
5570
+ "learning_rate": 0.0,
5571
+ "loss": 2.4156,
5572
+ "step": 434500
5573
+ },
5574
+ {
5575
+ "epoch": 10.3,
5576
+ "learning_rate": 0.0,
5577
+ "loss": 2.4156,
5578
+ "step": 435000
5579
+ },
5580
+ {
5581
+ "epoch": 10.31,
5582
+ "learning_rate": 0.0,
5583
+ "loss": 2.4323,
5584
+ "step": 435500
5585
+ },
5586
+ {
5587
+ "epoch": 10.32,
5588
+ "learning_rate": 0.0,
5589
+ "loss": 2.4255,
5590
+ "step": 436000
5591
+ },
5592
+ {
5593
+ "epoch": 10.33,
5594
+ "learning_rate": 0.0,
5595
+ "loss": 2.4298,
5596
+ "step": 436500
5597
+ },
5598
+ {
5599
+ "epoch": 10.35,
5600
+ "learning_rate": 0.0,
5601
+ "loss": 2.4263,
5602
+ "step": 437000
5603
+ },
5604
+ {
5605
+ "epoch": 10.36,
5606
+ "learning_rate": 0.0,
5607
+ "loss": 2.4381,
5608
+ "step": 437500
5609
+ },
5610
+ {
5611
+ "epoch": 10.37,
5612
+ "learning_rate": 0.0,
5613
+ "loss": 2.4327,
5614
+ "step": 438000
5615
+ },
5616
+ {
5617
+ "epoch": 10.38,
5618
+ "learning_rate": 0.0,
5619
+ "loss": 2.4352,
5620
+ "step": 438500
5621
+ },
5622
+ {
5623
+ "epoch": 10.39,
5624
+ "learning_rate": 0.0,
5625
+ "loss": 2.4329,
5626
+ "step": 439000
5627
+ },
5628
+ {
5629
+ "epoch": 10.41,
5630
+ "learning_rate": 0.0,
5631
+ "loss": 2.4273,
5632
+ "step": 439500
5633
+ },
5634
+ {
5635
+ "epoch": 10.42,
5636
+ "learning_rate": 0.0,
5637
+ "loss": 2.4106,
5638
+ "step": 440000
5639
+ },
5640
+ {
5641
+ "epoch": 10.42,
5642
+ "eval_loss": 3.1522228717803955,
5643
+ "eval_runtime": 112.8716,
5644
+ "eval_samples_per_second": 166.313,
5645
+ "eval_steps_per_second": 20.794,
5646
+ "step": 440000
5647
  }
5648
  ],
5649
  "max_steps": 633540,
5650
  "num_train_epochs": 15,
5651
+ "total_flos": 9.2865123366912e+16,
5652
  "trial_name": null,
5653
  "trial_params": null
5654
  }