Ap98 commited on
Commit
f2fbbd5
1 Parent(s): 102934f

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,13 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
8
  - generated_from_trainer
 
9
  base_model: mistralai/Mistral-7B-v0.1
10
  datasets:
11
- - HuggingFaceH4/ultrachat_200k
12
  model-index:
13
  - name: zephyr-7b-sft-qlora
14
  results: []
@@ -19,9 +19,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # zephyr-7b-sft-qlora
21
 
22
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/ultrachat_200k dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.0181
25
 
26
  ## Model description
27
 
@@ -55,7 +55,7 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
- | 0.9758 | 1.0 | 42 | 1.0181 |
59
 
60
 
61
  ### Framework versions
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
+ - alignment-handbook
9
  base_model: mistralai/Mistral-7B-v0.1
10
  datasets:
11
+ - generator
12
  model-index:
13
  - name: zephyr-7b-sft-qlora
14
  results: []
 
19
 
20
  # zephyr-7b-sft-qlora
21
 
22
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 1.0182
25
 
26
  ## Model description
27
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 0.9758 | 1.0 | 42 | 1.0182 |
59
 
60
 
61
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41ccb51498f8845b621d5d36eea3d8eb2c81e08fc49e8f99acb12ab4b3a743f0
3
  size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4087837c1fbb724c2f8523b904cf61c2411363f6773675bf9f2afaf3af12896
3
  size 83946192
all_results.json CHANGED
@@ -6,9 +6,9 @@
6
  "eval_samples_per_second": 3.701,
7
  "eval_steps_per_second": 0.47,
8
  "total_flos": 2.935565656994611e+16,
9
- "train_loss": 1.0142703553040822,
10
- "train_runtime": 423.6882,
11
  "train_samples": 500,
12
- "train_samples_per_second": 0.788,
13
- "train_steps_per_second": 0.099
14
  }
 
6
  "eval_samples_per_second": 3.701,
7
  "eval_steps_per_second": 0.47,
8
  "total_flos": 2.935565656994611e+16,
9
+ "train_loss": 1.0143198739914667,
10
+ "train_runtime": 408.5529,
11
  "train_samples": 500,
12
+ "train_samples_per_second": 0.818,
13
+ "train_steps_per_second": 0.103
14
  }
runs/May14_15-05-13_541c0b3e5c99/events.out.tfevents.1715699125.541c0b3e5c99.8018.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a705a70bcfbf43744e0f054e3d4756c8f4b06f15e6ae33317543204662fca9de
3
- size 7122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97c3d02c11c4b0f8a9ce754c39ece2c39a137b2b200e60f6ebce352cd5bce6a3
3
+ size 7736
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 2.935565656994611e+16,
4
- "train_loss": 1.0142703553040822,
5
- "train_runtime": 423.6882,
6
  "train_samples": 500,
7
- "train_samples_per_second": 0.788,
8
- "train_steps_per_second": 0.099
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 2.935565656994611e+16,
4
+ "train_loss": 1.0143198739914667,
5
+ "train_runtime": 408.5529,
6
  "train_samples": 500,
7
+ "train_samples_per_second": 0.818,
8
+ "train_steps_per_second": 0.103
9
  }
trainer_state.json CHANGED
@@ -12,88 +12,88 @@
12
  "epoch": 0.023809523809523808,
13
  "grad_norm": 0.35546875,
14
  "learning_rate": 4e-05,
15
- "loss": 0.995,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.11904761904761904,
20
  "grad_norm": 0.31640625,
21
  "learning_rate": 0.0002,
22
- "loss": 1.11,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.23809523809523808,
27
- "grad_norm": 0.318359375,
28
  "learning_rate": 0.0001911228490388136,
29
- "loss": 1.0479,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.35714285714285715,
34
  "grad_norm": 0.267578125,
35
  "learning_rate": 0.00016606747233900815,
36
- "loss": 0.9913,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.47619047619047616,
41
- "grad_norm": 0.26953125,
42
  "learning_rate": 0.00012928227712765504,
43
  "loss": 0.9396,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.5952380952380952,
48
- "grad_norm": 0.2314453125,
49
  "learning_rate": 8.729821802531212e-05,
50
- "loss": 0.9968,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.7142857142857143,
55
- "grad_norm": 0.267578125,
56
  "learning_rate": 4.756927164427685e-05,
57
- "loss": 1.0416,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.8333333333333334,
62
- "grad_norm": 0.251953125,
63
  "learning_rate": 1.7149035075615794e-05,
64
  "loss": 1.0231,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.9523809523809523,
69
- "grad_norm": 0.2099609375,
70
  "learning_rate": 1.4384089652291543e-06,
71
  "loss": 0.9758,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 1.0,
76
- "eval_loss": 1.0180646181106567,
77
- "eval_runtime": 89.6858,
78
- "eval_samples_per_second": 3.691,
79
- "eval_steps_per_second": 0.468,
80
  "step": 42
81
  },
82
  {
83
  "epoch": 1.0,
84
  "step": 42,
85
  "total_flos": 2.935565656994611e+16,
86
- "train_loss": 1.0142703553040822,
87
- "train_runtime": 423.6882,
88
- "train_samples_per_second": 0.788,
89
- "train_steps_per_second": 0.099
90
  }
91
  ],
92
  "logging_steps": 5,
93
  "max_steps": 42,
94
  "num_input_tokens_seen": 0,
95
  "num_train_epochs": 1,
96
- "save_steps": 100,
97
  "total_flos": 2.935565656994611e+16,
98
  "train_batch_size": 4,
99
  "trial_name": null,
 
12
  "epoch": 0.023809523809523808,
13
  "grad_norm": 0.35546875,
14
  "learning_rate": 4e-05,
15
+ "loss": 0.9948,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.11904761904761904,
20
  "grad_norm": 0.31640625,
21
  "learning_rate": 0.0002,
22
+ "loss": 1.1099,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.23809523809523808,
27
+ "grad_norm": 0.31640625,
28
  "learning_rate": 0.0001911228490388136,
29
+ "loss": 1.048,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.35714285714285715,
34
  "grad_norm": 0.267578125,
35
  "learning_rate": 0.00016606747233900815,
36
+ "loss": 0.9912,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.47619047619047616,
41
+ "grad_norm": 0.2578125,
42
  "learning_rate": 0.00012928227712765504,
43
  "loss": 0.9396,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.5952380952380952,
48
+ "grad_norm": 0.2294921875,
49
  "learning_rate": 8.729821802531212e-05,
50
+ "loss": 0.997,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.7142857142857143,
55
+ "grad_norm": 0.2421875,
56
  "learning_rate": 4.756927164427685e-05,
57
+ "loss": 1.0419,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.8333333333333334,
62
+ "grad_norm": 0.248046875,
63
  "learning_rate": 1.7149035075615794e-05,
64
  "loss": 1.0231,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.9523809523809523,
69
+ "grad_norm": 0.208984375,
70
  "learning_rate": 1.4384089652291543e-06,
71
  "loss": 0.9758,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 1.0,
76
+ "eval_loss": 1.0181750059127808,
77
+ "eval_runtime": 88.1603,
78
+ "eval_samples_per_second": 3.755,
79
+ "eval_steps_per_second": 0.476,
80
  "step": 42
81
  },
82
  {
83
  "epoch": 1.0,
84
  "step": 42,
85
  "total_flos": 2.935565656994611e+16,
86
+ "train_loss": 1.0143198739914667,
87
+ "train_runtime": 408.5529,
88
+ "train_samples_per_second": 0.818,
89
+ "train_steps_per_second": 0.103
90
  }
91
  ],
92
  "logging_steps": 5,
93
  "max_steps": 42,
94
  "num_input_tokens_seen": 0,
95
  "num_train_epochs": 1,
96
+ "save_steps": 10,
97
  "total_flos": 2.935565656994611e+16,
98
  "train_batch_size": 4,
99
  "trial_name": null,