masatochi commited on
Commit
dd14124
1 Parent(s): 26dd814

Training in progress, step 110, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac183d4e3bfb289b72fde08aa4baa18edb218bf3a85ffc21baef8c9e8c0e1162
3
  size 22573704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:261d77ce7ab15ec1632ae754a5bf3877455f864e510ce3b388967e414c46403d
3
  size 22573704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f925744c6fae1afd5a9a43ee550e41eb8783773a2f9ae74177e5221304945938
3
  size 11710970
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc4f4b7834272c51f494774a035366f28694ccb4e7c34efb5c050c467ac11a74
3
  size 11710970
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4dedf35ba75ba42e0333d411cfe9a97e5addfe3e2f5fe02a25acdf6a6edac029
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e17e0dfc13384541ffecf7da21d366d7aa79296745bb1827f3c372bace18c9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8c855c846898181ed358c1ef65b19ad1435172d9025fde7f25f4580bfc48faa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802e09b6cc63e64e726d0b68ba37b81d6a6fcf54cdf00e4821b3e38426a8a5c4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.051347881899871634,
5
  "eval_steps": 34,
6
- "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -774,6 +774,41 @@
774
  "learning_rate": 0.00011837495178165706,
775
  "loss": 0.9059,
776
  "step": 105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
  }
778
  ],
779
  "logging_steps": 1,
@@ -793,7 +828,7 @@
793
  "attributes": {}
794
  }
795
  },
796
- "total_flos": 6.061746989039616e+16,
797
  "train_batch_size": 3,
798
  "trial_name": null,
799
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05379301913319885,
5
  "eval_steps": 34,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
774
  "learning_rate": 0.00011837495178165706,
775
  "loss": 0.9059,
776
  "step": 105
777
+ },
778
+ {
779
+ "epoch": 0.05183690934653708,
780
+ "grad_norm": 1.8266162872314453,
781
+ "learning_rate": 0.000116555387618413,
782
+ "loss": 1.1925,
783
+ "step": 106
784
+ },
785
+ {
786
+ "epoch": 0.05232593679320252,
787
+ "grad_norm": 1.780070424079895,
788
+ "learning_rate": 0.00011473016980546377,
789
+ "loss": 1.248,
790
+ "step": 107
791
+ },
792
+ {
793
+ "epoch": 0.052814964239867965,
794
+ "grad_norm": 2.0775444507598877,
795
+ "learning_rate": 0.00011289992165302035,
796
+ "loss": 1.3379,
797
+ "step": 108
798
+ },
799
+ {
800
+ "epoch": 0.05330399168653341,
801
+ "grad_norm": 1.8501131534576416,
802
+ "learning_rate": 0.00011106526818915008,
803
+ "loss": 1.3934,
804
+ "step": 109
805
+ },
806
+ {
807
+ "epoch": 0.05379301913319885,
808
+ "grad_norm": 1.610377311706543,
809
+ "learning_rate": 0.00010922683594633021,
810
+ "loss": 1.2629,
811
+ "step": 110
812
  }
813
  ],
814
  "logging_steps": 1,
 
828
  "attributes": {}
829
  }
830
  },
831
+ "total_flos": 6.350401607565312e+16,
832
  "train_batch_size": 3,
833
  "trial_name": null,
834
  "trial_params": null