cwaud commited on
Commit
2f3ebd0
1 Parent(s): 8a45eab

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17a52caa1e20e38dfe8eb07879a087d9dcd992146f6063103b794d9b3086a966
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9632e1bff5f17086ad1def3c234561955e256311bd9f2a969b68fe2f4ddf2998
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7836da12d7ca83dc950ebc87ef7fc115575098f1c3336207e44368f32620c412
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f439b78259c1c2fc57babea1b73c33d1273fe60dbc47df07b0b26b74a24f20
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51e5ba31674e094f8dc7f8773808a529a42a0e24ba3f7bdab7bd92f5c1ee0fc0
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3a4ac9a9f9986597647c8a0e867c4f72eeab3c3d1e43c8f960b44a326903986
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e22fe3d8075184be8a174d539009c2395967dc63a8330ca6156ee33f4ca2c44c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9497c8a8d24f41cf5c0069eb540cebc255b08ff9535263160a28759922eb3e3
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c9a297f4567a0c49fdad083d7daf079fe6bba9f11d7261f1d03aa77e099ed78
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a425f1c819a05abc0c03ce33851974f4dbcd20cfb9efd9dad629120a2b5f92
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b17a256f3cc972905c791170bba62e8bf48140bfcb668be8b9ca74a89159a7d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81cc0f4203a4456d3477f2e743f020a5c58989c2a1cd33891ca202c49ac579ac
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e8ef33d61a22f8317ddd5200f4e1dabd39f9e47b2da21b95b4059442d67ba66
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25aa2f483294991f80a6c969eac63941b33052740b29e7eb2c606cbf255fbb72
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7638214826583862,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 1.839080459770115,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,372 @@
381
  "eval_samples_per_second": 15.294,
382
  "eval_steps_per_second": 3.976,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -395,7 +761,7 @@
395
  "early_stopping_threshold": 0.0
396
  },
397
  "attributes": {
398
- "early_stopping_patience_counter": 0
399
  }
400
  },
401
  "TrainerControl": {
@@ -404,12 +770,12 @@
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
- "should_training_stop": false
408
  },
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 5.983936330868982e+17,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7638214826583862,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 3.67816091954023,
5
  "eval_steps": 25,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 15.294,
382
  "eval_steps_per_second": 3.976,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 1.8758620689655172,
387
+ "grad_norm": 0.21353751420974731,
388
+ "learning_rate": 6.311147670162576e-05,
389
+ "loss": 0.663,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 1.9126436781609195,
394
+ "grad_norm": 0.2514137327671051,
395
+ "learning_rate": 6.177090264736525e-05,
396
+ "loss": 0.7005,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 1.9494252873563218,
401
+ "grad_norm": 0.222330704331398,
402
+ "learning_rate": 6.042415061148954e-05,
403
+ "loss": 0.7893,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 1.986206896551724,
408
+ "grad_norm": 0.2652716040611267,
409
+ "learning_rate": 5.907244941233371e-05,
410
+ "loss": 0.7701,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 2.0229885057471266,
415
+ "grad_norm": 0.20406022667884827,
416
+ "learning_rate": 5.771703238400288e-05,
417
+ "loss": 0.557,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 2.0597701149425287,
422
+ "grad_norm": 0.1386067122220993,
423
+ "learning_rate": 5.635913625104e-05,
424
+ "loss": 0.5773,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 2.0965517241379312,
429
+ "grad_norm": 0.18351519107818604,
430
+ "learning_rate": 5.500000000000001e-05,
431
+ "loss": 0.6024,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 2.1333333333333333,
436
+ "grad_norm": 0.19990648329257965,
437
+ "learning_rate": 5.364086374896001e-05,
438
+ "loss": 0.6177,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 2.170114942528736,
443
+ "grad_norm": 0.19572705030441284,
444
+ "learning_rate": 5.2282967615997125e-05,
445
+ "loss": 0.6475,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 2.206896551724138,
450
+ "grad_norm": 0.23045289516448975,
451
+ "learning_rate": 5.092755058766631e-05,
452
+ "loss": 0.7089,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 2.2436781609195404,
457
+ "grad_norm": 0.2674247622489929,
458
+ "learning_rate": 4.9575849388510473e-05,
459
+ "loss": 0.6667,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 2.2804597701149425,
464
+ "grad_norm": 0.2119389772415161,
465
+ "learning_rate": 4.8229097352634765e-05,
466
+ "loss": 0.5402,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 2.317241379310345,
471
+ "grad_norm": 0.19508974254131317,
472
+ "learning_rate": 4.688852329837424e-05,
473
+ "loss": 0.6063,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 2.354022988505747,
478
+ "grad_norm": 0.19634996354579926,
479
+ "learning_rate": 4.5555350407081863e-05,
480
+ "loss": 0.6404,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 2.3908045977011496,
485
+ "grad_norm": 0.21487970650196075,
486
+ "learning_rate": 4.423079510705992e-05,
487
+ "loss": 0.6663,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 2.4275862068965517,
492
+ "grad_norm": 0.2401036024093628,
493
+ "learning_rate": 4.291606596365304e-05,
494
+ "loss": 0.6801,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 2.464367816091954,
499
+ "grad_norm": 0.2552790343761444,
500
+ "learning_rate": 4.161236257651587e-05,
501
+ "loss": 0.7218,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 2.5011494252873563,
506
+ "grad_norm": 0.4108894467353821,
507
+ "learning_rate": 4.032087448506089e-05,
508
+ "loss": 0.6251,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 2.5379310344827584,
513
+ "grad_norm": 0.1864066869020462,
514
+ "learning_rate": 3.904278008308589e-05,
515
+ "loss": 0.5174,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 2.574712643678161,
520
+ "grad_norm": 0.20834487676620483,
521
+ "learning_rate": 3.777924554357096e-05,
522
+ "loss": 0.5929,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 2.6114942528735634,
527
+ "grad_norm": 0.2088489681482315,
528
+ "learning_rate": 3.653142375462596e-05,
529
+ "loss": 0.6374,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 2.6482758620689655,
534
+ "grad_norm": 0.25101637840270996,
535
+ "learning_rate": 3.530045326755967e-05,
536
+ "loss": 0.6594,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 2.6850574712643676,
541
+ "grad_norm": 0.27176716923713684,
542
+ "learning_rate": 3.408745725803042e-05,
543
+ "loss": 0.6788,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 2.72183908045977,
548
+ "grad_norm": 0.3027721047401428,
549
+ "learning_rate": 3.2893542501225534e-05,
550
+ "loss": 0.7124,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 2.7586206896551726,
555
+ "grad_norm": 0.27195319533348083,
556
+ "learning_rate": 3.1719798362005444e-05,
557
+ "loss": 0.5897,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 2.7586206896551726,
562
+ "eval_loss": 0.7599511742591858,
563
+ "eval_runtime": 3.275,
564
+ "eval_samples_per_second": 15.267,
565
+ "eval_steps_per_second": 3.969,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 2.7954022988505747,
570
+ "grad_norm": 0.17247594892978668,
571
+ "learning_rate": 3.056729580093346e-05,
572
+ "loss": 0.5506,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 2.8321839080459768,
577
+ "grad_norm": 0.2535310685634613,
578
+ "learning_rate": 2.9437086397097995e-05,
579
+ "loss": 0.6236,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 2.8689655172413793,
584
+ "grad_norm": 0.2153378278017044,
585
+ "learning_rate": 2.8330201388619253e-05,
586
+ "loss": 0.6671,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 2.905747126436782,
591
+ "grad_norm": 0.23758837580680847,
592
+ "learning_rate": 2.7247650731715564e-05,
593
+ "loss": 0.6473,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 2.942528735632184,
598
+ "grad_norm": 0.25426074862480164,
599
+ "learning_rate": 2.6190422179188044e-05,
600
+ "loss": 0.6725,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 2.979310344827586,
605
+ "grad_norm": 0.3190159499645233,
606
+ "learning_rate": 2.515948037916423e-05,
607
+ "loss": 0.6748,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 3.0160919540229885,
612
+ "grad_norm": 0.26219987869262695,
613
+ "learning_rate": 2.415576599492321e-05,
614
+ "loss": 0.5995,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 3.052873563218391,
619
+ "grad_norm": 0.16341930627822876,
620
+ "learning_rate": 2.3180194846605367e-05,
621
+ "loss": 0.5468,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 3.089655172413793,
626
+ "grad_norm": 0.18180637061595917,
627
+ "learning_rate": 2.223365707558953e-05,
628
+ "loss": 0.5735,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 3.1264367816091956,
633
+ "grad_norm": 0.21310141682624817,
634
+ "learning_rate": 2.1317016332300447e-05,
635
+ "loss": 0.636,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 3.1632183908045977,
640
+ "grad_norm": 0.2241068184375763,
641
+ "learning_rate": 2.043110898818738e-05,
642
+ "loss": 0.6183,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 3.2,
647
+ "grad_norm": 0.2601270079612732,
648
+ "learning_rate": 1.9576743372592747e-05,
649
+ "loss": 0.6213,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 3.2367816091954023,
654
+ "grad_norm": 0.3257927894592285,
655
+ "learning_rate": 1.875469903520743e-05,
656
+ "loss": 0.6393,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 3.2735632183908048,
661
+ "grad_norm": 0.24459469318389893,
662
+ "learning_rate": 1.7965726034785466e-05,
663
+ "loss": 0.5094,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 3.310344827586207,
668
+ "grad_norm": 0.19675396382808685,
669
+ "learning_rate": 1.7210544254767098e-05,
670
+ "loss": 0.5474,
671
+ "step": 90
672
+ },
673
+ {
674
+ "epoch": 3.3471264367816094,
675
+ "grad_norm": 0.2235768735408783,
676
+ "learning_rate": 1.648984274643487e-05,
677
+ "loss": 0.5492,
678
+ "step": 91
679
+ },
680
+ {
681
+ "epoch": 3.3839080459770114,
682
+ "grad_norm": 0.24168157577514648,
683
+ "learning_rate": 1.58042791002018e-05,
684
+ "loss": 0.6109,
685
+ "step": 92
686
+ },
687
+ {
688
+ "epoch": 3.420689655172414,
689
+ "grad_norm": 0.2579849064350128,
690
+ "learning_rate": 1.515447884560556e-05,
691
+ "loss": 0.6455,
692
+ "step": 93
693
+ },
694
+ {
695
+ "epoch": 3.457471264367816,
696
+ "grad_norm": 0.3024696111679077,
697
+ "learning_rate": 1.4541034880555838e-05,
698
+ "loss": 0.6716,
699
+ "step": 94
700
+ },
701
+ {
702
+ "epoch": 3.4942528735632186,
703
+ "grad_norm": 0.3916833698749542,
704
+ "learning_rate": 1.3964506930355947e-05,
705
+ "loss": 0.6166,
706
+ "step": 95
707
+ },
708
+ {
709
+ "epoch": 3.5310344827586206,
710
+ "grad_norm": 0.23013140261173248,
711
+ "learning_rate": 1.3425421036992098e-05,
712
+ "loss": 0.5145,
713
+ "step": 96
714
+ },
715
+ {
716
+ "epoch": 3.5678160919540227,
717
+ "grad_norm": 0.2350182831287384,
718
+ "learning_rate": 1.292426907915634e-05,
719
+ "loss": 0.5648,
720
+ "step": 97
721
+ },
722
+ {
723
+ "epoch": 3.6045977011494252,
724
+ "grad_norm": 0.24683219194412231,
725
+ "learning_rate": 1.2461508323441185e-05,
726
+ "loss": 0.6,
727
+ "step": 98
728
+ },
729
+ {
730
+ "epoch": 3.6413793103448278,
731
+ "grad_norm": 0.2576511800289154,
732
+ "learning_rate": 1.203756100711545e-05,
733
+ "loss": 0.6375,
734
+ "step": 99
735
+ },
736
+ {
737
+ "epoch": 3.67816091954023,
738
+ "grad_norm": 0.31485050916671753,
739
+ "learning_rate": 1.1652813952861769e-05,
740
+ "loss": 0.6974,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 3.67816091954023,
745
+ "eval_loss": 0.764268159866333,
746
+ "eval_runtime": 3.2754,
747
+ "eval_samples_per_second": 15.265,
748
+ "eval_steps_per_second": 3.969,
749
+ "step": 100
750
  }
751
  ],
752
  "logging_steps": 1,
 
761
  "early_stopping_threshold": 0.0
762
  },
763
  "attributes": {
764
+ "early_stopping_patience_counter": 1
765
  }
766
  },
767
  "TrainerControl": {
 
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
+ "should_training_stop": true
774
  },
775
  "attributes": {}
776
  }
777
  },
778
+ "total_flos": 1.1966940149176074e+18,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null