Training in progress, step 240000

Browse files

Files changed (6) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +131 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c9d709a3c4448fe60ea415f10ee91a070b24f9162901c412c4e21dcfd54ab7b
 size 995605445

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6513746a4d26d5fd8e8b1510e4afbe1bf070c07834ed6053beed680b7ca90b2
 size 995605445

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbc9c398625e51025e3deb582f2b959048d2b72dfb099c42bbf32395fdf91c5a
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:eefbf39ee5b2d619291bd576add06f62ce7d41ddd117d09ee485a01c6448e0e6
 size 510398013

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75d2ca6ab96f3202129b4649f78a5046017c32ffaaa3dc647813c032cd6852a6
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:35f8148796f898b2b4fcf5c05df1927f2a493f35953ae82aca4269bacc8abba0
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d94ea35adb5eefa4698c691c0c27296b7c47b8b7685eea584b57c3f9813cff98
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff1a502467cfb3dd9e512dddfc344981425abb5014f29f7395635782b358b80b
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.445591438583199,
-  "global_step": 230000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2956,11 +2956,139 @@
       "eval_samples_per_second": 166.094,
       "eval_steps_per_second": 20.766,
       "step": 230000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
-  "total_flos": 4.8674597867136e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.68235628373899,
+  "global_step": 240000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 166.094,
       "eval_steps_per_second": 20.766,
       "step": 230000
+    },
+    {
+      "epoch": 5.46,
+      "learning_rate": 1.0027843545790321e-05,
+      "loss": 2.56,
+      "step": 230500
+    },
+    {
+      "epoch": 5.47,
+      "learning_rate": 9.964706253748776e-06,
+      "loss": 2.5605,
+      "step": 231000
+    },
+    {
+      "epoch": 5.48,
+      "learning_rate": 9.901568961707233e-06,
+      "loss": 2.5527,
+      "step": 231500
+    },
+    {
+      "epoch": 5.49,
+      "learning_rate": 9.838431669665688e-06,
+      "loss": 2.5613,
+      "step": 232000
+    },
+    {
+      "epoch": 5.5,
+      "learning_rate": 9.775294377624145e-06,
+      "loss": 2.5698,
+      "step": 232500
+    },
+    {
+      "epoch": 5.52,
+      "learning_rate": 9.7121570855826e-06,
+      "loss": 2.5681,
+      "step": 233000
+    },
+    {
+      "epoch": 5.53,
+      "learning_rate": 9.649019793541055e-06,
+      "loss": 2.5763,
+      "step": 233500
+    },
+    {
+      "epoch": 5.54,
+      "learning_rate": 9.585882501499512e-06,
+      "loss": 2.5602,
+      "step": 234000
+    },
+    {
+      "epoch": 5.55,
+      "learning_rate": 9.522745209457967e-06,
+      "loss": 2.5559,
+      "step": 234500
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 9.459607917416422e-06,
+      "loss": 2.5877,
+      "step": 235000
+    },
+    {
+      "epoch": 5.58,
+      "learning_rate": 9.396470625374878e-06,
+      "loss": 2.5779,
+      "step": 235500
+    },
+    {
+      "epoch": 5.59,
+      "learning_rate": 9.333333333333334e-06,
+      "loss": 2.5723,
+      "step": 236000
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 9.27019604129179e-06,
+      "loss": 2.5695,
+      "step": 236500
+    },
+    {
+      "epoch": 5.61,
+      "learning_rate": 9.207058749250245e-06,
+      "loss": 2.576,
+      "step": 237000
+    },
+    {
+      "epoch": 5.62,
+      "learning_rate": 9.143921457208701e-06,
+      "loss": 2.5758,
+      "step": 237500
+    },
+    {
+      "epoch": 5.64,
+      "learning_rate": 9.080784165167156e-06,
+      "loss": 2.5674,
+      "step": 238000
+    },
+    {
+      "epoch": 5.65,
+      "learning_rate": 9.017646873125613e-06,
+      "loss": 2.5822,
+      "step": 238500
+    },
+    {
+      "epoch": 5.66,
+      "learning_rate": 8.954509581084068e-06,
+      "loss": 2.5668,
+      "step": 239000
+    },
+    {
+      "epoch": 5.67,
+      "learning_rate": 8.891372289042523e-06,
+      "loss": 2.5873,
+      "step": 239500
+    },
+    {
+      "epoch": 5.68,
+      "learning_rate": 8.82823499700098e-06,
+      "loss": 2.5795,
+      "step": 240000
+    },
+    {
+      "epoch": 5.68,
+      "eval_loss": 3.1380364894866943,
+      "eval_runtime": 112.9332,
+      "eval_samples_per_second": 166.222,
+      "eval_steps_per_second": 20.782,
+      "step": 240000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
+  "total_flos": 5.078657441088e+16,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbc9c398625e51025e3deb582f2b959048d2b72dfb099c42bbf32395fdf91c5a
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:eefbf39ee5b2d619291bd576add06f62ce7d41ddd117d09ee485a01c6448e0e6
 size 510398013