mkopecki commited on
Commit
1714448
1 Parent(s): 3fe4c9b

End of training

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. trainer_state.json +12 -12
README.md CHANGED
@@ -7,14 +7,14 @@ tags:
7
  - sft
8
  - generated_from_trainer
9
  model-index:
10
- - name: chess-lora-adapter-llama-3.1-8b
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # chess-lora-adapter-llama-3.1-8b
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on the None dataset.
20
 
 
7
  - sft
8
  - generated_from_trainer
9
  model-index:
10
+ - name: chess-lora-adapter-shortened-llama-3.1-8b
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # chess-lora-adapter-shortened-llama-3.1-8b
18
 
19
  This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct) on the None dataset.
20
 
trainer_state.json CHANGED
@@ -10,33 +10,33 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.922509225092251,
13
- "grad_norm": 0.6193047165870667,
14
  "learning_rate": 3.4624846248462485e-05,
15
- "loss": 0.5669,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.8450184501845017,
20
- "grad_norm": 0.5879950523376465,
21
  "learning_rate": 1.924969249692497e-05,
22
- "loss": 0.4066,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 2.767527675276753,
27
- "grad_norm": 0.9034110307693481,
28
  "learning_rate": 3.874538745387454e-06,
29
- "loss": 0.3173,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 3.0,
34
  "step": 1626,
35
- "total_flos": 2.068561910815949e+16,
36
- "train_loss": 0.41841167248189814,
37
- "train_runtime": 3228.8428,
38
- "train_samples_per_second": 0.504,
39
- "train_steps_per_second": 0.504
40
  }
41
  ],
42
  "logging_steps": 500,
@@ -56,7 +56,7 @@
56
  "attributes": {}
57
  }
58
  },
59
- "total_flos": 2.068561910815949e+16,
60
  "train_batch_size": 1,
61
  "trial_name": null,
62
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.922509225092251,
13
+ "grad_norm": 0.7217928171157837,
14
  "learning_rate": 3.4624846248462485e-05,
15
+ "loss": 0.6626,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 1.8450184501845017,
20
+ "grad_norm": 0.7293610572814941,
21
  "learning_rate": 1.924969249692497e-05,
22
+ "loss": 0.4814,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 2.767527675276753,
27
+ "grad_norm": 1.1111749410629272,
28
  "learning_rate": 3.874538745387454e-06,
29
+ "loss": 0.3739,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 3.0,
34
  "step": 1626,
35
+ "total_flos": 1.7396479642263552e+16,
36
+ "train_loss": 0.49204357611737126,
37
+ "train_runtime": 2840.3977,
38
+ "train_samples_per_second": 0.572,
39
+ "train_steps_per_second": 0.572
40
  }
41
  ],
42
  "logging_steps": 500,
 
56
  "attributes": {}
57
  }
58
  },
59
+ "total_flos": 1.7396479642263552e+16,
60
  "train_batch_size": 1,
61
  "trial_name": null,
62
  "trial_params": null