jflotz commited on
Commit
139feeb
1 Parent(s): e79159e

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b2aa3ac438aafc200ae70f8085f90242dc992cf907ac3f0294370fe40a0df68
3
  size 50044241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44519022516715e3b4aa9fbebd26a8f16d69cc1037c43939223444845c039b2a
3
  size 50044241
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d18c8e7e9886f3271a24bd5849f3f89c80dbc60739565e2d86ced010c5eeb20
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31ddb9e289da6dc236cf93f3881df915ba1b345fb3bc4108ca78271c87c7807
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ede58e3004a4327d493f9d870219b307a5ef56e0c4fc6821301c4f24058b122
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95236c288314cf7e95c084d5233e1be1e022395a42503d56c52a71f4cff32b4
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7750ccd53e61fd7fcec6ad8e54086c4abb8aa56c6a0781b4ecadd56cbb87c42
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9511edec0a698219a4fec94630f2bf1ec60055a31bda0393e6aadb5a36db13d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7844396859386151,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -806,11 +806,211 @@
806
  "eval_samples_per_second": 1057.979,
807
  "eval_steps_per_second": 16.581,
808
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  }
810
  ],
811
  "max_steps": 250000,
812
  "num_train_epochs": 12,
813
- "total_flos": 6.406648794810428e+20,
814
  "trial_name": null,
815
  "trial_params": null
816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.230549607423269,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
806
  "eval_samples_per_second": 1057.979,
807
  "eval_steps_per_second": 16.581,
808
  "step": 40000
809
+ },
810
+ {
811
+ "epoch": 1.81,
812
+ "learning_rate": 0.0005799963048115559,
813
+ "loss": 0.6757,
814
+ "step": 40500
815
+ },
816
+ {
817
+ "epoch": 1.83,
818
+ "learning_rate": 0.0005792840633370341,
819
+ "loss": 0.6575,
820
+ "step": 41000
821
+ },
822
+ {
823
+ "epoch": 1.83,
824
+ "eval_loss": 0.6465174555778503,
825
+ "eval_runtime": 2.2243,
826
+ "eval_samples_per_second": 1032.694,
827
+ "eval_steps_per_second": 16.185,
828
+ "step": 41000
829
+ },
830
+ {
831
+ "epoch": 1.85,
832
+ "learning_rate": 0.0005785598237890247,
833
+ "loss": 0.6461,
834
+ "step": 41500
835
+ },
836
+ {
837
+ "epoch": 1.87,
838
+ "learning_rate": 0.0005778236178481119,
839
+ "loss": 0.6373,
840
+ "step": 42000
841
+ },
842
+ {
843
+ "epoch": 1.87,
844
+ "eval_loss": 0.631793737411499,
845
+ "eval_runtime": 2.22,
846
+ "eval_samples_per_second": 1034.694,
847
+ "eval_steps_per_second": 16.216,
848
+ "step": 42000
849
+ },
850
+ {
851
+ "epoch": 1.9,
852
+ "learning_rate": 0.0005770754777183285,
853
+ "loss": 0.6309,
854
+ "step": 42500
855
+ },
856
+ {
857
+ "epoch": 1.92,
858
+ "learning_rate": 0.0005763154361257473,
859
+ "loss": 0.6257,
860
+ "step": 43000
861
+ },
862
+ {
863
+ "epoch": 1.92,
864
+ "eval_loss": 0.6184197664260864,
865
+ "eval_runtime": 2.2848,
866
+ "eval_samples_per_second": 1005.32,
867
+ "eval_steps_per_second": 15.756,
868
+ "step": 43000
869
+ },
870
+ {
871
+ "epoch": 1.94,
872
+ "learning_rate": 0.0005755435263170498,
873
+ "loss": 0.623,
874
+ "step": 43500
875
+ },
876
+ {
877
+ "epoch": 1.96,
878
+ "learning_rate": 0.0005747597820580717,
879
+ "loss": 0.621,
880
+ "step": 44000
881
+ },
882
+ {
883
+ "epoch": 1.96,
884
+ "eval_loss": 0.6136025786399841,
885
+ "eval_runtime": 2.2706,
886
+ "eval_samples_per_second": 1011.621,
887
+ "eval_steps_per_second": 15.855,
888
+ "step": 44000
889
+ },
890
+ {
891
+ "epoch": 1.99,
892
+ "learning_rate": 0.000573964237632326,
893
+ "loss": 0.6195,
894
+ "step": 44500
895
+ },
896
+ {
897
+ "epoch": 2.01,
898
+ "learning_rate": 0.0005731569278395029,
899
+ "loss": 0.6183,
900
+ "step": 45000
901
+ },
902
+ {
903
+ "epoch": 2.01,
904
+ "eval_loss": 0.6127315163612366,
905
+ "eval_runtime": 2.1317,
906
+ "eval_samples_per_second": 1077.566,
907
+ "eval_steps_per_second": 16.888,
908
+ "step": 45000
909
+ },
910
+ {
911
+ "epoch": 2.03,
912
+ "learning_rate": 0.0005723378879939481,
913
+ "loss": 0.6172,
914
+ "step": 45500
915
+ },
916
+ {
917
+ "epoch": 2.05,
918
+ "learning_rate": 0.0005715071539231178,
919
+ "loss": 0.6165,
920
+ "step": 46000
921
+ },
922
+ {
923
+ "epoch": 2.05,
924
+ "eval_loss": 0.6103396415710449,
925
+ "eval_runtime": 2.1687,
926
+ "eval_samples_per_second": 1059.145,
927
+ "eval_steps_per_second": 16.6,
928
+ "step": 46000
929
+ },
930
+ {
931
+ "epoch": 2.07,
932
+ "learning_rate": 0.0005706647619660116,
933
+ "loss": 0.6155,
934
+ "step": 46500
935
+ },
936
+ {
937
+ "epoch": 2.1,
938
+ "learning_rate": 0.0005698107489715823,
939
+ "loss": 0.612,
940
+ "step": 47000
941
+ },
942
+ {
943
+ "epoch": 2.1,
944
+ "eval_loss": 0.6013069748878479,
945
+ "eval_runtime": 2.2612,
946
+ "eval_samples_per_second": 1015.821,
947
+ "eval_steps_per_second": 15.921,
948
+ "step": 47000
949
+ },
950
+ {
951
+ "epoch": 2.12,
952
+ "learning_rate": 0.0005689451522971252,
953
+ "loss": 0.6068,
954
+ "step": 47500
955
+ },
956
+ {
957
+ "epoch": 2.14,
958
+ "learning_rate": 0.0005680680098066429,
959
+ "loss": 0.6037,
960
+ "step": 48000
961
+ },
962
+ {
963
+ "epoch": 2.14,
964
+ "eval_loss": 0.5942632555961609,
965
+ "eval_runtime": 2.2238,
966
+ "eval_samples_per_second": 1032.912,
967
+ "eval_steps_per_second": 16.188,
968
+ "step": 48000
969
+ },
970
+ {
971
+ "epoch": 2.16,
972
+ "learning_rate": 0.0005671793598691895,
973
+ "loss": 0.6015,
974
+ "step": 48500
975
+ },
976
+ {
977
+ "epoch": 2.19,
978
+ "learning_rate": 0.0005662792413571921,
979
+ "loss": 0.6,
980
+ "step": 49000
981
+ },
982
+ {
983
+ "epoch": 2.19,
984
+ "eval_loss": 0.5915176868438721,
985
+ "eval_runtime": 2.2389,
986
+ "eval_samples_per_second": 1025.951,
987
+ "eval_steps_per_second": 16.079,
988
+ "step": 49000
989
+ },
990
+ {
991
+ "epoch": 2.21,
992
+ "learning_rate": 0.0005653676936447504,
993
+ "loss": 0.5986,
994
+ "step": 49500
995
+ },
996
+ {
997
+ "epoch": 2.23,
998
+ "learning_rate": 0.0005644447566059142,
999
+ "loss": 0.5973,
1000
+ "step": 50000
1001
+ },
1002
+ {
1003
+ "epoch": 2.23,
1004
+ "eval_loss": 0.5880685448646545,
1005
+ "eval_runtime": 2.2056,
1006
+ "eval_samples_per_second": 1041.459,
1007
+ "eval_steps_per_second": 16.322,
1008
+ "step": 50000
1009
  }
1010
  ],
1011
  "max_steps": 250000,
1012
  "num_train_epochs": 12,
1013
+ "total_flos": 8.008257503646733e+20,
1014
  "trial_name": null,
1015
  "trial_params": null
1016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d18c8e7e9886f3271a24bd5849f3f89c80dbc60739565e2d86ced010c5eeb20
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31ddb9e289da6dc236cf93f3881df915ba1b345fb3bc4108ca78271c87c7807
3
  size 25761253