Training in progress, step 100000

Browse files

Files changed (6) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +131 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cb35c5ce4af04108203f479dae670d8d2387bc995e569ddd217192af4464df3
 size 995605445

 version https://git-lfs.github.com/spec/v1
+oid sha256:62a1421f8e6cb5bb67425311aef9dda9661c917aab4a357a23ec44b2e3daab85
 size 995605445

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea5b57c1afcfb613703cab27e0ecdc612f9fb54011d8038bc39bf00d2f9efda7
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f16473c3f1ed9f0581444acbd15664c381713c51843177e152cb72b1bd63766
 size 510398013

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c773d9b5ba19d40a8728dbc67546e0b3665d859a9b09fe4a7a38284b71fcc39
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:412df950fe881812d4df452ea4ec2fcd612c8449348c26dda4f3e5ddd7d9a49a
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c47f9fb86395a323bb61e82417c7ea6cc95d7fdffcd278133fc4fb35c1af521
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:e61191d318bf68e1245e4fc98bbc55f1b8337c3d4b3f28a8adc2b1ca718b159b
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.1308836064021213,
-  "global_step": 90000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1164,11 +1164,139 @@
       "eval_samples_per_second": 166.312,
       "eval_steps_per_second": 20.793,
       "step": 90000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
-  "total_flos": 1.9109545299072e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.367648451557913,
+  "global_step": 100000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 166.312,
       "eval_steps_per_second": 20.793,
       "step": 90000
+    },
+    {
+      "epoch": 2.14,
+      "learning_rate": 2.770628531742274e-05,
+      "loss": 2.8933,
+      "step": 90500
+    },
+    {
+      "epoch": 2.15,
+      "learning_rate": 2.7643148025381194e-05,
+      "loss": 2.8842,
+      "step": 91000
+    },
+    {
+      "epoch": 2.17,
+      "learning_rate": 2.758001073333965e-05,
+      "loss": 2.8859,
+      "step": 91500
+    },
+    {
+      "epoch": 2.18,
+      "learning_rate": 2.7516873441298108e-05,
+      "loss": 2.8765,
+      "step": 92000
+    },
+    {
+      "epoch": 2.19,
+      "learning_rate": 2.7453736149256563e-05,
+      "loss": 2.8814,
+      "step": 92500
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 2.7390598857215018e-05,
+      "loss": 2.8964,
+      "step": 93000
+    },
+    {
+      "epoch": 2.21,
+      "learning_rate": 2.732746156517347e-05,
+      "loss": 2.9037,
+      "step": 93500
+    },
+    {
+      "epoch": 2.23,
+      "learning_rate": 2.7264324273131925e-05,
+      "loss": 2.8856,
+      "step": 94000
+    },
+    {
+      "epoch": 2.24,
+      "learning_rate": 2.7201186981090383e-05,
+      "loss": 2.8979,
+      "step": 94500
+    },
+    {
+      "epoch": 2.25,
+      "learning_rate": 2.713804968904884e-05,
+      "loss": 2.8945,
+      "step": 95000
+    },
+    {
+      "epoch": 2.26,
+      "learning_rate": 2.7074912397007294e-05,
+      "loss": 2.9037,
+      "step": 95500
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 2.701177510496575e-05,
+      "loss": 2.9087,
+      "step": 96000
+    },
+    {
+      "epoch": 2.28,
+      "learning_rate": 2.6948637812924207e-05,
+      "loss": 2.905,
+      "step": 96500
+    },
+    {
+      "epoch": 2.3,
+      "learning_rate": 2.6885500520882662e-05,
+      "loss": 2.9209,
+      "step": 97000
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 2.6822363228841117e-05,
+      "loss": 2.895,
+      "step": 97500
+    },
+    {
+      "epoch": 2.32,
+      "learning_rate": 2.6759225936799572e-05,
+      "loss": 2.8885,
+      "step": 98000
+    },
+    {
+      "epoch": 2.33,
+      "learning_rate": 2.6696088644758028e-05,
+      "loss": 2.9038,
+      "step": 98500
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 2.6632951352716486e-05,
+      "loss": 2.9143,
+      "step": 99000
+    },
+    {
+      "epoch": 2.36,
+      "learning_rate": 2.656981406067494e-05,
+      "loss": 2.9099,
+      "step": 99500
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 2.6506676768633396e-05,
+      "loss": 2.9092,
+      "step": 100000
+    },
+    {
+      "epoch": 2.37,
+      "eval_loss": 3.122859477996826,
+      "eval_runtime": 113.0124,
+      "eval_samples_per_second": 166.106,
+      "eval_steps_per_second": 20.768,
+      "step": 100000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
+  "total_flos": 2.120856134976e+16,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea5b57c1afcfb613703cab27e0ecdc612f9fb54011d8038bc39bf00d2f9efda7
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f16473c3f1ed9f0581444acbd15664c381713c51843177e152cb72b1bd63766
 size 510398013