mkopecki
/

chess-lora-adapter-shortened-llama-3.1-8b

Generated from Trainer

Model card Files Files and versions Metrics Training metrics Community

mkopecki commited on Aug 5

Commit

1714448

•

1 Parent(s): 3fe4c9b

End of training

Files changed (2) hide show

README.md +2 -2
trainer_state.json +12 -12

README.md CHANGED Viewed

@@ -7,14 +7,14 @@ tags:
 - sft
 - generated_from_trainer
 model-index:
-- name: chess-lora-adapter-llama-3.1-8b
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# chess-lora-adapter-llama-3.1-8b
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on the None dataset.

 - sft
 - generated_from_trainer
 model-index:
+- name: chess-lora-adapter-shortened-llama-3.1-8b
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# chess-lora-adapter-shortened-llama-3.1-8b
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on the None dataset.

trainer_state.json CHANGED Viewed

@@ -10,33 +10,33 @@
   "log_history": [
     {
       "epoch": 0.922509225092251,
-      "grad_norm": 0.6193047165870667,
       "learning_rate": 3.4624846248462485e-05,
-      "loss": 0.5669,
       "step": 500
     },
     {
       "epoch": 1.8450184501845017,
-      "grad_norm": 0.5879950523376465,
       "learning_rate": 1.924969249692497e-05,
-      "loss": 0.4066,
       "step": 1000
     },
     {
       "epoch": 2.767527675276753,
-      "grad_norm": 0.9034110307693481,
       "learning_rate": 3.874538745387454e-06,
-      "loss": 0.3173,
       "step": 1500
     },
     {
       "epoch": 3.0,
       "step": 1626,
-      "total_flos": 2.068561910815949e+16,
-      "train_loss": 0.41841167248189814,
-      "train_runtime": 3228.8428,
-      "train_samples_per_second": 0.504,
-      "train_steps_per_second": 0.504
     }
   ],
   "logging_steps": 500,
@@ -56,7 +56,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.068561910815949e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "log_history": [
     {
       "epoch": 0.922509225092251,
+      "grad_norm": 0.7217928171157837,
       "learning_rate": 3.4624846248462485e-05,
+      "loss": 0.6626,
       "step": 500
     },
     {
       "epoch": 1.8450184501845017,
+      "grad_norm": 0.7293610572814941,
       "learning_rate": 1.924969249692497e-05,
+      "loss": 0.4814,
       "step": 1000
     },
     {
       "epoch": 2.767527675276753,
+      "grad_norm": 1.1111749410629272,
       "learning_rate": 3.874538745387454e-06,
+      "loss": 0.3739,
       "step": 1500
     },
     {
       "epoch": 3.0,
       "step": 1626,
+      "total_flos": 1.7396479642263552e+16,
+      "train_loss": 0.49204357611737126,
+      "train_runtime": 2840.3977,
+      "train_samples_per_second": 0.572,
+      "train_steps_per_second": 0.572
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1.7396479642263552e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null