Training in progress, step 210000

Browse files

Files changed (6) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +131 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:622ffd3a7b017059677381c4957b8684c94feda5c36dd09d4c9eb745833a1cf3
 size 995605445

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfd19e30a2a650450011c0e6b074763121dae91837a5de2ae7a75d902c60dc2d
 size 995605445

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df5e243d791ce4f65dc68bf0f3a46091fb97d6aafd2414ee9a81131404021ee0
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:920d137fdcb173efb8049387289fab5904ebed3270ec2f582ab357e4385f15ac
 size 510398013

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9c10092a1e7977cf34645ee895c471f8c4d2d3bd2aa8edd2570bdd9e3a7e77a
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:99009f31f62eb4abfe3090a9bbfa89c4ba84d5d9f26ec160f92e0423b3ef657b
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cdc4afee694df5e30232d1b113751895a7bfe4c051ca10278bb6f9244c467f9
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:2632d3faec0736fab7590967000d1b66b3c018c0af2551fd0387b0a028e120fe
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.735296903115826,
-  "global_step": 200000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2572,11 +2572,139 @@
       "eval_samples_per_second": 166.155,
       "eval_steps_per_second": 20.774,
       "step": 200000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
-  "total_flos": 4.2369106205952e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.972061748271616,
+  "global_step": 210000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 166.155,
       "eval_steps_per_second": 20.774,
       "step": 200000
+    },
+    {
+      "epoch": 4.75,
+      "learning_rate": 1.3816081068282983e-05,
+      "loss": 2.6777,
+      "step": 200500
+    },
+    {
+      "epoch": 4.76,
+      "learning_rate": 1.3752943776241438e-05,
+      "loss": 2.6712,
+      "step": 201000
+    },
+    {
+      "epoch": 4.77,
+      "learning_rate": 1.3689806484199895e-05,
+      "loss": 2.6751,
+      "step": 201500
+    },
+    {
+      "epoch": 4.78,
+      "learning_rate": 1.362666919215835e-05,
+      "loss": 2.677,
+      "step": 202000
+    },
+    {
+      "epoch": 4.79,
+      "learning_rate": 1.3563531900116805e-05,
+      "loss": 2.6732,
+      "step": 202500
+    },
+    {
+      "epoch": 4.81,
+      "learning_rate": 1.350039460807526e-05,
+      "loss": 2.6634,
+      "step": 203000
+    },
+    {
+      "epoch": 4.82,
+      "learning_rate": 1.3437257316033717e-05,
+      "loss": 2.6684,
+      "step": 203500
+    },
+    {
+      "epoch": 4.83,
+      "learning_rate": 1.3374120023992172e-05,
+      "loss": 2.6892,
+      "step": 204000
+    },
+    {
+      "epoch": 4.84,
+      "learning_rate": 1.3310982731950629e-05,
+      "loss": 2.676,
+      "step": 204500
+    },
+    {
+      "epoch": 4.85,
+      "learning_rate": 1.3247845439909084e-05,
+      "loss": 2.6605,
+      "step": 205000
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 1.318470814786754e-05,
+      "loss": 2.6773,
+      "step": 205500
+    },
+    {
+      "epoch": 4.88,
+      "learning_rate": 1.3121570855825996e-05,
+      "loss": 2.6702,
+      "step": 206000
+    },
+    {
+      "epoch": 4.89,
+      "learning_rate": 1.305843356378445e-05,
+      "loss": 2.6662,
+      "step": 206500
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 1.2995296271742906e-05,
+      "loss": 2.6775,
+      "step": 207000
+    },
+    {
+      "epoch": 4.91,
+      "learning_rate": 1.2932158979701361e-05,
+      "loss": 2.6829,
+      "step": 207500
+    },
+    {
+      "epoch": 4.92,
+      "learning_rate": 1.2869021687659818e-05,
+      "loss": 2.6679,
+      "step": 208000
+    },
+    {
+      "epoch": 4.94,
+      "learning_rate": 1.2805884395618273e-05,
+      "loss": 2.6671,
+      "step": 208500
+    },
+    {
+      "epoch": 4.95,
+      "learning_rate": 1.274274710357673e-05,
+      "loss": 2.6746,
+      "step": 209000
+    },
+    {
+      "epoch": 4.96,
+      "learning_rate": 1.2679609811535185e-05,
+      "loss": 2.6955,
+      "step": 209500
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 1.2616472519493642e-05,
+      "loss": 2.6812,
+      "step": 210000
+    },
+    {
+      "epoch": 4.97,
+      "eval_loss": 3.1202192306518555,
+      "eval_runtime": 112.9566,
+      "eval_samples_per_second": 166.188,
+      "eval_steps_per_second": 20.778,
+      "step": 210000
     }
   ],
   "max_steps": 633540,
   "num_train_epochs": 15,
+  "total_flos": 4.4469447088896e+16,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df5e243d791ce4f65dc68bf0f3a46091fb97d6aafd2414ee9a81131404021ee0
 size 510398013

 version https://git-lfs.github.com/spec/v1
+oid sha256:920d137fdcb173efb8049387289fab5904ebed3270ec2f582ab357e4385f15ac
 size 510398013