weijie210 commited on
Commit
5cb3d2f
1 Parent(s): ae8e6ee

Model save

Browse files
README.md CHANGED
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.2915
21
 
22
  ## Model description
23
 
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
- | 0.2698 | 1.0 | 124 | 0.2915 |
56
 
57
 
58
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3246
21
 
22
  ## Model description
23
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
+ | 0.3042 | 1.0 | 101 | 0.3246 |
56
 
57
 
58
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.2914734482765198,
4
- "eval_runtime": 9.1903,
5
- "eval_samples": 220,
6
- "eval_samples_per_second": 23.938,
7
- "eval_steps_per_second": 0.435,
8
- "train_loss": 0.4545349882495019,
9
- "train_runtime": 740.5857,
10
- "train_samples": 1973,
11
- "train_samples_per_second": 2.664,
12
- "train_steps_per_second": 0.167
13
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.32460731267929077,
4
+ "eval_runtime": 7.1069,
5
+ "eval_samples": 180,
6
+ "eval_samples_per_second": 25.327,
7
+ "eval_steps_per_second": 0.422,
8
+ "train_loss": 0.38626466322653363,
9
+ "train_runtime": 734.6882,
10
+ "train_samples": 1604,
11
+ "train_samples_per_second": 2.183,
12
+ "train_steps_per_second": 0.137
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 0.2914734482765198,
4
- "eval_runtime": 9.1903,
5
- "eval_samples": 220,
6
- "eval_samples_per_second": 23.938,
7
- "eval_steps_per_second": 0.435
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.32460731267929077,
4
+ "eval_runtime": 7.1069,
5
+ "eval_samples": 180,
6
+ "eval_samples_per_second": 25.327,
7
+ "eval_steps_per_second": 0.422
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52728496fbfea53bbc11b6356ba8397dde6064309ce912f13c76a43f4abef491
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22380803012d497a3f200e23a07484869365dfb82529b70d138fbd4e99c3bcaf
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca10a93d50dcd47d5a06611c6213ed546bc2e842e0541852ca492ff221766327
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ab7da49019d1928eb38ac90ea51f2a2b66b90fb628c104cfdddd85c4a2507e
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:139ecc7ff75421346a0ffb2851a474c2fbc465c4d2393bba613f9b55bdea335b
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23432c8a06703eafc006e23d0dd7e5845c3cd6da82a76282f2200df1d288b876
3
  size 4540516344
runs/Feb08_21-56-24_node01/events.out.tfevents.1707400703.node01.374598.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d2eeb9d25c52cd2c2f58151d8cabe38ae2f09155602e146a2d79befaa0814ac
3
+ size 8258
runs/Feb08_21-56-24_node01/events.out.tfevents.1707401445.node01.374598.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb20be6e11151d5a0298c60b0da76a5ac9e997cd86f670d80d1b5d5a4ea26df
3
+ size 354
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 2048,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.4545349882495019,
4
- "train_runtime": 740.5857,
5
- "train_samples": 1973,
6
- "train_samples_per_second": 2.664,
7
- "train_steps_per_second": 0.167
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.38626466322653363,
4
+ "train_runtime": 734.6882,
5
+ "train_samples": 1604,
6
+ "train_samples_per_second": 2.183,
7
+ "train_steps_per_second": 0.137
8
  }
trainer_state.json CHANGED
@@ -3,185 +3,161 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 124,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
- "learning_rate": 1.9996790752964305e-05,
14
- "loss": 0.7636,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.04,
19
- "learning_rate": 1.991987177050743e-05,
20
- "loss": 2.6172,
21
  "step": 5
22
  },
23
  {
24
- "epoch": 0.08,
25
- "learning_rate": 1.9680771188662044e-05,
26
- "loss": 1.5854,
27
  "step": 10
28
  },
29
  {
30
- "epoch": 0.12,
31
- "learning_rate": 1.9286529995722624e-05,
32
- "loss": 0.6342,
33
  "step": 15
34
  },
35
  {
36
- "epoch": 0.16,
37
- "learning_rate": 1.8743466161445823e-05,
38
- "loss": 0.4285,
39
  "step": 20
40
  },
41
  {
42
- "epoch": 0.2,
43
- "learning_rate": 1.8060282634540053e-05,
44
- "loss": 0.4147,
45
  "step": 25
46
  },
47
  {
48
- "epoch": 0.24,
49
- "learning_rate": 1.72479278722912e-05,
50
- "loss": 0.3739,
51
  "step": 30
52
  },
53
  {
54
- "epoch": 0.28,
55
- "learning_rate": 1.631942038446304e-05,
56
- "loss": 0.3636,
57
  "step": 35
58
  },
59
  {
60
- "epoch": 0.32,
61
- "learning_rate": 1.5289640103269626e-05,
62
- "loss": 0.2994,
63
  "step": 40
64
  },
65
  {
66
- "epoch": 0.36,
67
- "learning_rate": 1.4175089922850633e-05,
68
- "loss": 0.2938,
69
  "step": 45
70
  },
71
  {
72
- "epoch": 0.4,
73
- "learning_rate": 1.2993631229733584e-05,
74
- "loss": 0.3058,
75
  "step": 50
76
  },
77
  {
78
- "epoch": 0.44,
79
- "learning_rate": 1.1764197662578087e-05,
80
- "loss": 0.3086,
81
  "step": 55
82
  },
83
  {
84
- "epoch": 0.48,
85
- "learning_rate": 1.0506491688387128e-05,
86
- "loss": 0.3109,
87
  "step": 60
88
  },
89
  {
90
- "epoch": 0.52,
91
- "learning_rate": 9.24066885774754e-06,
92
- "loss": 0.3275,
93
  "step": 65
94
  },
95
  {
96
- "epoch": 0.56,
97
- "learning_rate": 7.987014799113398e-06,
98
- "loss": 0.3047,
99
  "step": 70
100
  },
101
  {
102
- "epoch": 0.6,
103
- "learning_rate": 6.7656201285076195e-06,
104
- "loss": 0.2778,
105
  "step": 75
106
  },
107
  {
108
- "epoch": 0.65,
109
- "learning_rate": 5.5960584844236565e-06,
110
- "loss": 0.269,
111
  "step": 80
112
  },
113
  {
114
- "epoch": 0.69,
115
- "learning_rate": 4.497072847626087e-06,
116
- "loss": 0.3077,
117
  "step": 85
118
  },
119
  {
120
- "epoch": 0.73,
121
- "learning_rate": 3.48627517277778e-06,
122
- "loss": 0.3078,
123
  "step": 90
124
  },
125
  {
126
- "epoch": 0.77,
127
- "learning_rate": 2.5798641454908945e-06,
128
- "loss": 0.2905,
129
  "step": 95
130
  },
131
  {
132
- "epoch": 0.81,
133
- "learning_rate": 1.7923655879272395e-06,
134
- "loss": 0.2927,
135
  "step": 100
136
  },
137
- {
138
- "epoch": 0.85,
139
- "learning_rate": 1.1363996731159188e-06,
140
- "loss": 0.3442,
141
- "step": 105
142
- },
143
- {
144
- "epoch": 0.89,
145
- "learning_rate": 6.22478678529197e-07,
146
- "loss": 0.2968,
147
- "step": 110
148
- },
149
- {
150
- "epoch": 0.93,
151
- "learning_rate": 2.588385200461307e-07,
152
- "loss": 0.2628,
153
- "step": 115
154
- },
155
- {
156
- "epoch": 0.97,
157
- "learning_rate": 5.1306766081048456e-08,
158
- "loss": 0.2698,
159
- "step": 120
160
- },
161
  {
162
  "epoch": 1.0,
163
- "eval_loss": 0.2914734482765198,
164
- "eval_runtime": 11.0655,
165
- "eval_samples_per_second": 19.882,
166
- "eval_steps_per_second": 0.361,
167
- "step": 124
168
  },
169
  {
170
  "epoch": 1.0,
171
- "step": 124,
172
- "total_flos": 880074424320.0,
173
- "train_loss": 0.4545349882495019,
174
- "train_runtime": 740.5857,
175
- "train_samples_per_second": 2.664,
176
- "train_steps_per_second": 0.167
177
  }
178
  ],
179
  "logging_steps": 5,
180
- "max_steps": 124,
181
  "num_input_tokens_seen": 0,
182
  "num_train_epochs": 1,
183
  "save_steps": 500,
184
- "total_flos": 880074424320.0,
185
  "train_batch_size": 4,
186
  "trial_name": null,
187
  "trial_params": null
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 101,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
+ "learning_rate": 1.999516282291988e-05,
14
+ "loss": 0.8295,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.05,
19
+ "learning_rate": 1.987930439740757e-05,
20
+ "loss": 1.3536,
21
  "step": 5
22
  },
23
  {
24
+ "epoch": 0.1,
25
+ "learning_rate": 1.95201310753273e-05,
26
+ "loss": 0.4367,
27
  "step": 10
28
  },
29
  {
30
+ "epoch": 0.15,
31
+ "learning_rate": 1.8931150161867917e-05,
32
+ "loss": 0.3533,
33
  "step": 15
34
  },
35
  {
36
+ "epoch": 0.2,
37
+ "learning_rate": 1.8126579138282502e-05,
38
+ "loss": 0.3654,
39
  "step": 20
40
  },
41
  {
42
+ "epoch": 0.25,
43
+ "learning_rate": 1.7125839641475074e-05,
44
+ "loss": 0.3764,
45
  "step": 25
46
  },
47
  {
48
+ "epoch": 0.3,
49
+ "learning_rate": 1.595308864276666e-05,
50
+ "loss": 0.3401,
51
  "step": 30
52
  },
53
  {
54
+ "epoch": 0.35,
55
+ "learning_rate": 1.4636635319853274e-05,
56
+ "loss": 0.3539,
57
  "step": 35
58
  },
59
  {
60
+ "epoch": 0.4,
61
+ "learning_rate": 1.3208257698153677e-05,
62
+ "loss": 0.3999,
63
  "step": 40
64
  },
65
  {
66
+ "epoch": 0.45,
67
+ "learning_rate": 1.1702435557223988e-05,
68
+ "loss": 0.3299,
69
  "step": 45
70
  },
71
  {
72
+ "epoch": 0.5,
73
+ "learning_rate": 1.0155518119203511e-05,
74
+ "loss": 0.2854,
75
  "step": 50
76
  },
77
  {
78
+ "epoch": 0.54,
79
+ "learning_rate": 8.604846610560771e-06,
80
+ "loss": 0.3799,
81
  "step": 55
82
  },
83
  {
84
+ "epoch": 0.59,
85
+ "learning_rate": 7.0878528777274814e-06,
86
+ "loss": 0.3321,
87
  "step": 60
88
  },
89
  {
90
+ "epoch": 0.64,
91
+ "learning_rate": 5.64115581524629e-06,
92
+ "loss": 0.369,
93
  "step": 65
94
  },
95
  {
96
+ "epoch": 0.69,
97
+ "learning_rate": 4.299677417862174e-06,
98
+ "loss": 0.3302,
99
  "step": 70
100
  },
101
  {
102
+ "epoch": 0.74,
103
+ "learning_rate": 3.0957997942825337e-06,
104
+ "loss": 0.3035,
105
  "step": 75
106
  },
107
  {
108
+ "epoch": 0.79,
109
+ "learning_rate": 2.058583491552465e-06,
110
+ "loss": 0.3097,
111
  "step": 80
112
  },
113
  {
114
+ "epoch": 0.84,
115
+ "learning_rate": 1.2130659990073146e-06,
116
+ "loss": 0.3248,
117
  "step": 85
118
  },
119
  {
120
+ "epoch": 0.89,
121
+ "learning_rate": 5.796573653001091e-07,
122
+ "loss": 0.2946,
123
  "step": 90
124
  },
125
  {
126
+ "epoch": 0.94,
127
+ "learning_rate": 1.7364751777736334e-07,
128
+ "loss": 0.2796,
129
  "step": 95
130
  },
131
  {
132
+ "epoch": 0.99,
133
+ "learning_rate": 4.837177080119215e-09,
134
+ "loss": 0.3042,
135
  "step": 100
136
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  {
138
  "epoch": 1.0,
139
+ "eval_loss": 0.32460731267929077,
140
+ "eval_runtime": 8.9558,
141
+ "eval_samples_per_second": 20.099,
142
+ "eval_steps_per_second": 0.335,
143
+ "step": 101
144
  },
145
  {
146
  "epoch": 1.0,
147
+ "step": 101,
148
+ "total_flos": 709723422720.0,
149
+ "train_loss": 0.38626466322653363,
150
+ "train_runtime": 734.6882,
151
+ "train_samples_per_second": 2.183,
152
+ "train_steps_per_second": 0.137
153
  }
154
  ],
155
  "logging_steps": 5,
156
+ "max_steps": 101,
157
  "num_input_tokens_seen": 0,
158
  "num_train_epochs": 1,
159
  "save_steps": 500,
160
+ "total_flos": 709723422720.0,
161
  "train_batch_size": 4,
162
  "trial_name": null,
163
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cadd1667db1395f170b945e2fac1f97fd899a914caea8b710ecb93ea39668cc3
3
  size 5307
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e159432ad0f082807d9e226e6b6d861f799fdcd0937de4ef57b98a9aeffb0073
3
  size 5307