masatochi commited on
Commit
be0a068
1 Parent(s): b262227

Training in progress, step 125, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e7950327da01392443cc2b7f37b2c125ed93ce974ba68872e001c1e4864c342
3
  size 59827904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912c2125854beec112043bed7df3a59b52ffafb2c8e37e37de6b7d6428738e36
3
  size 59827904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce3bb0ac6988a7ccfed473206a359b6472da41742b949fe87b1a58da8fa50e05
3
  size 30875540
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c11c7845b668b903082c2011f56571536b84f2bdfd426b174fcd31b01a682d
3
  size 30875540
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc20ee7bacf5c95c2142e506581a4d197785697bff9c378cd497db51cba9e5a9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057ecbb0fa4569828a6baa3b23b2b8db44c4a71010246fbd0848ee61061b4c1a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba16893b2c4735e8eaf86592331a8dda9b3bcccecd302e828000513277487239
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b577939e8ae09a93269bdd1ffbcc4ef41ec4027476aa914ab19034c5a6ebf492
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05868329359985329,
5
  "eval_steps": 34,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -879,6 +879,41 @@
879
  "learning_rate": 9.077316405366981e-05,
880
  "loss": 9.4713,
881
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
882
  }
883
  ],
884
  "logging_steps": 1,
@@ -898,7 +933,7 @@
898
  "attributes": {}
899
  }
900
  },
901
- "total_flos": 2.6453756111486976e+17,
902
  "train_batch_size": 3,
903
  "trial_name": null,
904
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.061128430833180515,
5
  "eval_steps": 34,
6
+ "global_step": 125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
879
  "learning_rate": 9.077316405366981e-05,
880
  "loss": 9.4713,
881
  "step": 120
882
+ },
883
+ {
884
+ "epoch": 0.05917232104651873,
885
+ "grad_norm": Infinity,
886
+ "learning_rate": 8.893473181084994e-05,
887
+ "loss": 8.8276,
888
+ "step": 121
889
+ },
890
+ {
891
+ "epoch": 0.05966134849318418,
892
+ "grad_norm": Infinity,
893
+ "learning_rate": 8.710007834697969e-05,
894
+ "loss": 8.9863,
895
+ "step": 122
896
+ },
897
+ {
898
+ "epoch": 0.06015037593984962,
899
+ "grad_norm": Infinity,
900
+ "learning_rate": 8.526983019453623e-05,
901
+ "loss": 9.3632,
902
+ "step": 123
903
+ },
904
+ {
905
+ "epoch": 0.06063940338651507,
906
+ "grad_norm": Infinity,
907
+ "learning_rate": 8.344461238158699e-05,
908
+ "loss": 9.7675,
909
+ "step": 124
910
+ },
911
+ {
912
+ "epoch": 0.061128430833180515,
913
+ "grad_norm": 2.9368804950629417e+18,
914
+ "learning_rate": 8.162504821834295e-05,
915
+ "loss": 8.2794,
916
+ "step": 125
917
  }
918
  ],
919
  "logging_steps": 1,
 
933
  "attributes": {}
934
  }
935
  },
936
+ "total_flos": 2.75559959494656e+17,
937
  "train_batch_size": 3,
938
  "trial_name": null,
939
  "trial_params": null