Training in progress, step 40000

Browse files

Files changed (6) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +131 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43f9fd3bc16beea82ad987b4bff078f2c2c3f4fcad8460dbb8eb220175985152
 size 995605189

 version https://git-lfs.github.com/spec/v1
+oid sha256:dafd1a597d541396efb5c638af9a54bbc8ac18c68cd48a20f76d0ca2bc1e01ef
 size 995605189

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5447a2edf6b2eb946d8d40d9f690fbbabccb89792a44d56a897ebdbe771a9946
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:529a26013828a3643211b2475be16389f662dc8c3c06d5719b6a70819da68c8a
 size 510398013

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3021a61927d9bb43a2f41d0611bdb8462cf969c0d98440929f2484f2963b1a1
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3004509848150ee75a25be67dfe66552d50a705fc14ae18f62d4bd5e0795e8c
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:160cfc175cb0a133b8c88162ad6a65e1d4d7974bec1e5017230d638bf8bdc3ea
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:91a499652f62153d33f4dd0503b07f247980d8d57628f7715c2fef0cb3d0b038
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7102945354673738,
-  "global_step": 30000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -396,11 +396,139 @@
       "eval_samples_per_second": 165.982,
       "eval_steps_per_second": 20.752,
       "step": 30000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
-  "total_flos": 6497673481728000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9470593806231651,
+  "global_step": 40000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 165.982,
       "eval_steps_per_second": 20.752,
       "step": 30000
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 3.528276036240806e-05,
+      "loss": 3.2187,
+      "step": 30500
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 3.521962307036651e-05,
+      "loss": 3.2015,
+      "step": 31000
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 3.515648577832497e-05,
+      "loss": 3.2153,
+      "step": 31500
+    },
+    {
+      "epoch": 0.76,
+      "learning_rate": 3.509334848628343e-05,
+      "loss": 3.2242,
+      "step": 32000
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 3.503021119424188e-05,
+      "loss": 3.2025,
+      "step": 32500
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 3.496707390220034e-05,
+      "loss": 3.2164,
+      "step": 33000
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 3.4903936610158796e-05,
+      "loss": 3.1862,
+      "step": 33500
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 3.484079931811725e-05,
+      "loss": 3.207,
+      "step": 34000
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 3.4777662026075706e-05,
+      "loss": 3.1953,
+      "step": 34500
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 3.471452473403416e-05,
+      "loss": 3.1788,
+      "step": 35000
+    },
+    {
+      "epoch": 0.84,
+      "learning_rate": 3.4651387441992616e-05,
+      "loss": 3.1987,
+      "step": 35500
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 3.4588250149951075e-05,
+      "loss": 3.1914,
+      "step": 36000
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 3.4525112857909527e-05,
+      "loss": 3.2037,
+      "step": 36500
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 3.4461975565867985e-05,
+      "loss": 3.1951,
+      "step": 37000
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 3.4398838273826444e-05,
+      "loss": 3.1909,
+      "step": 37500
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 3.433570098178489e-05,
+      "loss": 3.2012,
+      "step": 38000
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 3.427256368974335e-05,
+      "loss": 3.2092,
+      "step": 38500
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 3.4209426397701805e-05,
+      "loss": 3.2108,
+      "step": 39000
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 3.414628910566026e-05,
+      "loss": 3.1818,
+      "step": 39500
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 3.4083151813618716e-05,
+      "loss": 3.2072,
+      "step": 40000
+    },
+    {
+      "epoch": 0.95,
+      "eval_loss": 3.1675474643707275,
+      "eval_runtime": 113.0729,
+      "eval_samples_per_second": 166.017,
+      "eval_steps_per_second": 20.757,
+      "step": 40000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
+  "total_flos": 8586525680640000.0,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5447a2edf6b2eb946d8d40d9f690fbbabccb89792a44d56a897ebdbe771a9946
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:529a26013828a3643211b2475be16389f662dc8c3c06d5719b6a70819da68c8a
 size 510398013