mkopecki
/

chess-no-cot-lora-adapter-4b-llama-3.1-8b

Generated from Trainer

Model card Files Files and versions Community

mkopecki commited on Aug 12

Commit

1fb1140

•

1 Parent(s): 0187f02

End of training

Files changed (1) hide show

trainer_state.json +16 -9

trainer_state.json CHANGED Viewed

@@ -3,23 +3,30 @@
   "best_model_checkpoint": null,
   "epoch": 3.0,
   "eval_steps": 500,
-  "global_step": 192,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 3.0,
-      "step": 192,
-      "total_flos": 2.031101718065971e+16,
-      "train_loss": 0.5815114180246989,
-      "train_runtime": 555.1073,
-      "train_samples_per_second": 5.502,
-      "train_steps_per_second": 0.346
     }
   ],
   "logging_steps": 500,
-  "max_steps": 192,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
@@ -35,7 +42,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.031101718065971e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_model_checkpoint": null,
   "epoch": 3.0,
   "eval_steps": 500,
+  "global_step": 654,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
+    {
+      "epoch": 2.293577981651376,
+      "grad_norm": 0.8083828687667847,
+      "learning_rate": 1.1773700305810397e-05,
+      "loss": 0.5038,
+      "step": 500
+    },
     {
       "epoch": 3.0,
+      "step": 654,
+      "total_flos": 6.243879793095475e+16,
+      "train_loss": 0.462136259866417,
+      "train_runtime": 1671.2602,
+      "train_samples_per_second": 6.24,
+      "train_steps_per_second": 0.391
     }
   ],
   "logging_steps": 500,
+  "max_steps": 654,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 6.243879793095475e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null