Training in progress, step 100, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17a52caa1e20e38dfe8eb07879a087d9dcd992146f6063103b794d9b3086a966
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:9632e1bff5f17086ad1def3c234561955e256311bd9f2a969b68fe2f4ddf2998
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7836da12d7ca83dc950ebc87ef7fc115575098f1c3336207e44368f32620c412
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1f439b78259c1c2fc57babea1b73c33d1273fe60dbc47df07b0b26b74a24f20
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51e5ba31674e094f8dc7f8773808a529a42a0e24ba3f7bdab7bd92f5c1ee0fc0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3a4ac9a9f9986597647c8a0e867c4f72eeab3c3d1e43c8f960b44a326903986
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e22fe3d8075184be8a174d539009c2395967dc63a8330ca6156ee33f4ca2c44c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9497c8a8d24f41cf5c0069eb540cebc255b08ff9535263160a28759922eb3e3
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c9a297f4567a0c49fdad083d7daf079fe6bba9f11d7261f1d03aa77e099ed78
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:19a425f1c819a05abc0c03ce33851974f4dbcd20cfb9efd9dad629120a2b5f92
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b17a256f3cc972905c791170bba62e8bf48140bfcb668be8b9ca74a89159a7d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:81cc0f4203a4456d3477f2e743f020a5c58989c2a1cd33891ca202c49ac579ac
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e8ef33d61a22f8317ddd5200f4e1dabd39f9e47b2da21b95b4059442d67ba66
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:25aa2f483294991f80a6c969eac63941b33052740b29e7eb2c606cbf255fbb72
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7638214826583862,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 1.839080459770115,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,372 @@
       "eval_samples_per_second": 15.294,
       "eval_steps_per_second": 3.976,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -395,7 +761,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -404,12 +770,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.983936330868982e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7638214826583862,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.67816091954023,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 15.294,
       "eval_steps_per_second": 3.976,
       "step": 50
+    },
+    {
+      "epoch": 1.8758620689655172,
+      "grad_norm": 0.21353751420974731,
+      "learning_rate": 6.311147670162576e-05,
+      "loss": 0.663,
+      "step": 51
+    },
+    {
+      "epoch": 1.9126436781609195,
+      "grad_norm": 0.2514137327671051,
+      "learning_rate": 6.177090264736525e-05,
+      "loss": 0.7005,
+      "step": 52
+    },
+    {
+      "epoch": 1.9494252873563218,
+      "grad_norm": 0.222330704331398,
+      "learning_rate": 6.042415061148954e-05,
+      "loss": 0.7893,
+      "step": 53
+    },
+    {
+      "epoch": 1.986206896551724,
+      "grad_norm": 0.2652716040611267,
+      "learning_rate": 5.907244941233371e-05,
+      "loss": 0.7701,
+      "step": 54
+    },
+    {
+      "epoch": 2.0229885057471266,
+      "grad_norm": 0.20406022667884827,
+      "learning_rate": 5.771703238400288e-05,
+      "loss": 0.557,
+      "step": 55
+    },
+    {
+      "epoch": 2.0597701149425287,
+      "grad_norm": 0.1386067122220993,
+      "learning_rate": 5.635913625104e-05,
+      "loss": 0.5773,
+      "step": 56
+    },
+    {
+      "epoch": 2.0965517241379312,
+      "grad_norm": 0.18351519107818604,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 0.6024,
+      "step": 57
+    },
+    {
+      "epoch": 2.1333333333333333,
+      "grad_norm": 0.19990648329257965,
+      "learning_rate": 5.364086374896001e-05,
+      "loss": 0.6177,
+      "step": 58
+    },
+    {
+      "epoch": 2.170114942528736,
+      "grad_norm": 0.19572705030441284,
+      "learning_rate": 5.2282967615997125e-05,
+      "loss": 0.6475,
+      "step": 59
+    },
+    {
+      "epoch": 2.206896551724138,
+      "grad_norm": 0.23045289516448975,
+      "learning_rate": 5.092755058766631e-05,
+      "loss": 0.7089,
+      "step": 60
+    },
+    {
+      "epoch": 2.2436781609195404,
+      "grad_norm": 0.2674247622489929,
+      "learning_rate": 4.9575849388510473e-05,
+      "loss": 0.6667,
+      "step": 61
+    },
+    {
+      "epoch": 2.2804597701149425,
+      "grad_norm": 0.2119389772415161,
+      "learning_rate": 4.8229097352634765e-05,
+      "loss": 0.5402,
+      "step": 62
+    },
+    {
+      "epoch": 2.317241379310345,
+      "grad_norm": 0.19508974254131317,
+      "learning_rate": 4.688852329837424e-05,
+      "loss": 0.6063,
+      "step": 63
+    },
+    {
+      "epoch": 2.354022988505747,
+      "grad_norm": 0.19634996354579926,
+      "learning_rate": 4.5555350407081863e-05,
+      "loss": 0.6404,
+      "step": 64
+    },
+    {
+      "epoch": 2.3908045977011496,
+      "grad_norm": 0.21487970650196075,
+      "learning_rate": 4.423079510705992e-05,
+      "loss": 0.6663,
+      "step": 65
+    },
+    {
+      "epoch": 2.4275862068965517,
+      "grad_norm": 0.2401036024093628,
+      "learning_rate": 4.291606596365304e-05,
+      "loss": 0.6801,
+      "step": 66
+    },
+    {
+      "epoch": 2.464367816091954,
+      "grad_norm": 0.2552790343761444,
+      "learning_rate": 4.161236257651587e-05,
+      "loss": 0.7218,
+      "step": 67
+    },
+    {
+      "epoch": 2.5011494252873563,
+      "grad_norm": 0.4108894467353821,
+      "learning_rate": 4.032087448506089e-05,
+      "loss": 0.6251,
+      "step": 68
+    },
+    {
+      "epoch": 2.5379310344827584,
+      "grad_norm": 0.1864066869020462,
+      "learning_rate": 3.904278008308589e-05,
+      "loss": 0.5174,
+      "step": 69
+    },
+    {
+      "epoch": 2.574712643678161,
+      "grad_norm": 0.20834487676620483,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 0.5929,
+      "step": 70
+    },
+    {
+      "epoch": 2.6114942528735634,
+      "grad_norm": 0.2088489681482315,
+      "learning_rate": 3.653142375462596e-05,
+      "loss": 0.6374,
+      "step": 71
+    },
+    {
+      "epoch": 2.6482758620689655,
+      "grad_norm": 0.25101637840270996,
+      "learning_rate": 3.530045326755967e-05,
+      "loss": 0.6594,
+      "step": 72
+    },
+    {
+      "epoch": 2.6850574712643676,
+      "grad_norm": 0.27176716923713684,
+      "learning_rate": 3.408745725803042e-05,
+      "loss": 0.6788,
+      "step": 73
+    },
+    {
+      "epoch": 2.72183908045977,
+      "grad_norm": 0.3027721047401428,
+      "learning_rate": 3.2893542501225534e-05,
+      "loss": 0.7124,
+      "step": 74
+    },
+    {
+      "epoch": 2.7586206896551726,
+      "grad_norm": 0.27195319533348083,
+      "learning_rate": 3.1719798362005444e-05,
+      "loss": 0.5897,
+      "step": 75
+    },
+    {
+      "epoch": 2.7586206896551726,
+      "eval_loss": 0.7599511742591858,
+      "eval_runtime": 3.275,
+      "eval_samples_per_second": 15.267,
+      "eval_steps_per_second": 3.969,
+      "step": 75
+    },
+    {
+      "epoch": 2.7954022988505747,
+      "grad_norm": 0.17247594892978668,
+      "learning_rate": 3.056729580093346e-05,
+      "loss": 0.5506,
+      "step": 76
+    },
+    {
+      "epoch": 2.8321839080459768,
+      "grad_norm": 0.2535310685634613,
+      "learning_rate": 2.9437086397097995e-05,
+      "loss": 0.6236,
+      "step": 77
+    },
+    {
+      "epoch": 2.8689655172413793,
+      "grad_norm": 0.2153378278017044,
+      "learning_rate": 2.8330201388619253e-05,
+      "loss": 0.6671,
+      "step": 78
+    },
+    {
+      "epoch": 2.905747126436782,
+      "grad_norm": 0.23758837580680847,
+      "learning_rate": 2.7247650731715564e-05,
+      "loss": 0.6473,
+      "step": 79
+    },
+    {
+      "epoch": 2.942528735632184,
+      "grad_norm": 0.25426074862480164,
+      "learning_rate": 2.6190422179188044e-05,
+      "loss": 0.6725,
+      "step": 80
+    },
+    {
+      "epoch": 2.979310344827586,
+      "grad_norm": 0.3190159499645233,
+      "learning_rate": 2.515948037916423e-05,
+      "loss": 0.6748,
+      "step": 81
+    },
+    {
+      "epoch": 3.0160919540229885,
+      "grad_norm": 0.26219987869262695,
+      "learning_rate": 2.415576599492321e-05,
+      "loss": 0.5995,
+      "step": 82
+    },
+    {
+      "epoch": 3.052873563218391,
+      "grad_norm": 0.16341930627822876,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 0.5468,
+      "step": 83
+    },
+    {
+      "epoch": 3.089655172413793,
+      "grad_norm": 0.18180637061595917,
+      "learning_rate": 2.223365707558953e-05,
+      "loss": 0.5735,
+      "step": 84
+    },
+    {
+      "epoch": 3.1264367816091956,
+      "grad_norm": 0.21310141682624817,
+      "learning_rate": 2.1317016332300447e-05,
+      "loss": 0.636,
+      "step": 85
+    },
+    {
+      "epoch": 3.1632183908045977,
+      "grad_norm": 0.2241068184375763,
+      "learning_rate": 2.043110898818738e-05,
+      "loss": 0.6183,
+      "step": 86
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.2601270079612732,
+      "learning_rate": 1.9576743372592747e-05,
+      "loss": 0.6213,
+      "step": 87
+    },
+    {
+      "epoch": 3.2367816091954023,
+      "grad_norm": 0.3257927894592285,
+      "learning_rate": 1.875469903520743e-05,
+      "loss": 0.6393,
+      "step": 88
+    },
+    {
+      "epoch": 3.2735632183908048,
+      "grad_norm": 0.24459469318389893,
+      "learning_rate": 1.7965726034785466e-05,
+      "loss": 0.5094,
+      "step": 89
+    },
+    {
+      "epoch": 3.310344827586207,
+      "grad_norm": 0.19675396382808685,
+      "learning_rate": 1.7210544254767098e-05,
+      "loss": 0.5474,
+      "step": 90
+    },
+    {
+      "epoch": 3.3471264367816094,
+      "grad_norm": 0.2235768735408783,
+      "learning_rate": 1.648984274643487e-05,
+      "loss": 0.5492,
+      "step": 91
+    },
+    {
+      "epoch": 3.3839080459770114,
+      "grad_norm": 0.24168157577514648,
+      "learning_rate": 1.58042791002018e-05,
+      "loss": 0.6109,
+      "step": 92
+    },
+    {
+      "epoch": 3.420689655172414,
+      "grad_norm": 0.2579849064350128,
+      "learning_rate": 1.515447884560556e-05,
+      "loss": 0.6455,
+      "step": 93
+    },
+    {
+      "epoch": 3.457471264367816,
+      "grad_norm": 0.3024696111679077,
+      "learning_rate": 1.4541034880555838e-05,
+      "loss": 0.6716,
+      "step": 94
+    },
+    {
+      "epoch": 3.4942528735632186,
+      "grad_norm": 0.3916833698749542,
+      "learning_rate": 1.3964506930355947e-05,
+      "loss": 0.6166,
+      "step": 95
+    },
+    {
+      "epoch": 3.5310344827586206,
+      "grad_norm": 0.23013140261173248,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 0.5145,
+      "step": 96
+    },
+    {
+      "epoch": 3.5678160919540227,
+      "grad_norm": 0.2350182831287384,
+      "learning_rate": 1.292426907915634e-05,
+      "loss": 0.5648,
+      "step": 97
+    },
+    {
+      "epoch": 3.6045977011494252,
+      "grad_norm": 0.24683219194412231,
+      "learning_rate": 1.2461508323441185e-05,
+      "loss": 0.6,
+      "step": 98
+    },
+    {
+      "epoch": 3.6413793103448278,
+      "grad_norm": 0.2576511800289154,
+      "learning_rate": 1.203756100711545e-05,
+      "loss": 0.6375,
+      "step": 99
+    },
+    {
+      "epoch": 3.67816091954023,
+      "grad_norm": 0.31485050916671753,
+      "learning_rate": 1.1652813952861769e-05,
+      "loss": 0.6974,
+      "step": 100
+    },
+    {
+      "epoch": 3.67816091954023,
+      "eval_loss": 0.764268159866333,
+      "eval_runtime": 3.2754,
+      "eval_samples_per_second": 15.265,
+      "eval_steps_per_second": 3.969,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1966940149176074e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null