Training in progress, step 125, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e7950327da01392443cc2b7f37b2c125ed93ce974ba68872e001c1e4864c342
 size 59827904

 version https://git-lfs.github.com/spec/v1
+oid sha256:912c2125854beec112043bed7df3a59b52ffafb2c8e37e37de6b7d6428738e36
 size 59827904

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce3bb0ac6988a7ccfed473206a359b6472da41742b949fe87b1a58da8fa50e05
 size 30875540

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0c11c7845b668b903082c2011f56571536b84f2bdfd426b174fcd31b01a682d
 size 30875540

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc20ee7bacf5c95c2142e506581a4d197785697bff9c378cd497db51cba9e5a9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:057ecbb0fa4569828a6baa3b23b2b8db44c4a71010246fbd0848ee61061b4c1a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba16893b2c4735e8eaf86592331a8dda9b3bcccecd302e828000513277487239
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b577939e8ae09a93269bdd1ffbcc4ef41ec4027476aa914ab19034c5a6ebf492
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.05868329359985329,
   "eval_steps": 34,
-  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -879,6 +879,41 @@
       "learning_rate": 9.077316405366981e-05,
       "loss": 9.4713,
       "step": 120
     }
   ],
   "logging_steps": 1,
@@ -898,7 +933,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.6453756111486976e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.061128430833180515,
   "eval_steps": 34,
+  "global_step": 125,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.077316405366981e-05,
       "loss": 9.4713,
       "step": 120
+    },
+    {
+      "epoch": 0.05917232104651873,
+      "grad_norm": Infinity,
+      "learning_rate": 8.893473181084994e-05,
+      "loss": 8.8276,
+      "step": 121
+    },
+    {
+      "epoch": 0.05966134849318418,
+      "grad_norm": Infinity,
+      "learning_rate": 8.710007834697969e-05,
+      "loss": 8.9863,
+      "step": 122
+    },
+    {
+      "epoch": 0.06015037593984962,
+      "grad_norm": Infinity,
+      "learning_rate": 8.526983019453623e-05,
+      "loss": 9.3632,
+      "step": 123
+    },
+    {
+      "epoch": 0.06063940338651507,
+      "grad_norm": Infinity,
+      "learning_rate": 8.344461238158699e-05,
+      "loss": 9.7675,
+      "step": 124
+    },
+    {
+      "epoch": 0.061128430833180515,
+      "grad_norm": 2.9368804950629417e+18,
+      "learning_rate": 8.162504821834295e-05,
+      "loss": 8.2794,
+      "step": 125
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.75559959494656e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null