quyanh commited on
Commit
7c50a3c
1 Parent(s): 0c07ad8

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -18,6 +18,7 @@ base_model: mistralai/Mistral-7B-v0.1
18
 
19
 
20
  - **Developed by:** [More Information Needed]
 
21
  - **Shared by [optional]:** [More Information Needed]
22
  - **Model type:** [More Information Needed]
23
  - **Language(s) (NLP):** [More Information Needed]
@@ -76,7 +77,7 @@ Use the code below to get started with the model.
76
 
77
  ### Training Data
78
 
79
- <!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
80
 
81
  [More Information Needed]
82
 
@@ -107,7 +108,7 @@ Use the code below to get started with the model.
107
 
108
  #### Testing Data
109
 
110
- <!-- This should link to a Data Card if possible. -->
111
 
112
  [More Information Needed]
113
 
 
18
 
19
 
20
  - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
  - **Shared by [optional]:** [More Information Needed]
23
  - **Model type:** [More Information Needed]
24
  - **Language(s) (NLP):** [More Information Needed]
 
77
 
78
  ### Training Data
79
 
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
 
82
  [More Information Needed]
83
 
 
108
 
109
  #### Testing Data
110
 
111
+ <!-- This should link to a Dataset Card if possible. -->
112
 
113
  [More Information Needed]
114
 
adapter_config.json CHANGED
@@ -16,14 +16,14 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "lm_head",
20
- "down_proj",
21
  "q_proj",
22
  "up_proj",
 
23
  "k_proj",
24
  "v_proj",
25
- "gate_proj",
26
- "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "o_proj",
 
20
  "q_proj",
21
  "up_proj",
22
+ "down_proj",
23
  "k_proj",
24
  "v_proj",
25
+ "lm_head",
26
+ "gate_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6efe7d5921665fda917c906f1427a1880a0089b02cd0f7eb51b99a6d43957ac
3
  size 85100592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c9908eaf5b57e25f51feb62dce86a0ca6495fd6cffe0db54f46a8d0e35c2ee
3
  size 85100592
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5600b3956d0427774a54d7f3bf3b7938a23d41b7b69dc207ab3e78e2479d7f37
3
  size 43127132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b0f3b68162daa429f96d6924f555c99d0a088ac6eff7504061c06826bdc554
3
  size 43127132
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad9dc56dfc01b966639cac8cd8f049d1b8d912aa8e90419adc5b16ff7e57382f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffed6d4f513eefea1bbd2e6d94b0c2ee7dba23cd38e7ff8e5de5c5fa5fdae612
3
  size 14244
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8506113769271664,
5
  "eval_steps": 20,
6
  "global_step": 400,
7
  "is_hyper_param_search": false,
@@ -11,281 +11,281 @@
11
  {
12
  "epoch": 0.04,
13
  "learning_rate": 1.9325842696629215e-05,
14
- "loss": 1.8391,
15
  "step": 20
16
  },
17
  {
18
  "epoch": 0.04,
19
- "eval_loss": 1.7458518743515015,
20
- "eval_runtime": 218.3179,
21
- "eval_samples_per_second": 1.928,
22
- "eval_steps_per_second": 0.197,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.09,
27
  "learning_rate": 1.8426966292134835e-05,
28
- "loss": 1.7536,
29
  "step": 40
30
  },
31
  {
32
  "epoch": 0.09,
33
- "eval_loss": 1.693428635597229,
34
- "eval_runtime": 218.0486,
35
- "eval_samples_per_second": 1.931,
36
- "eval_steps_per_second": 0.197,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.13,
41
  "learning_rate": 1.752808988764045e-05,
42
- "loss": 1.7346,
43
  "step": 60
44
  },
45
  {
46
  "epoch": 0.13,
47
- "eval_loss": 1.6598807573318481,
48
- "eval_runtime": 218.036,
49
- "eval_samples_per_second": 1.931,
50
- "eval_steps_per_second": 0.197,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.17,
55
  "learning_rate": 1.662921348314607e-05,
56
- "loss": 1.6951,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.17,
61
- "eval_loss": 1.639635443687439,
62
- "eval_runtime": 218.0597,
63
- "eval_samples_per_second": 1.931,
64
- "eval_steps_per_second": 0.197,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.21,
69
  "learning_rate": 1.5730337078651687e-05,
70
- "loss": 1.6508,
71
  "step": 100
72
  },
73
  {
74
  "epoch": 0.21,
75
- "eval_loss": 1.626449465751648,
76
- "eval_runtime": 218.0237,
77
- "eval_samples_per_second": 1.931,
78
- "eval_steps_per_second": 0.197,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.26,
83
  "learning_rate": 1.4831460674157305e-05,
84
- "loss": 1.6245,
85
  "step": 120
86
  },
87
  {
88
  "epoch": 0.26,
89
- "eval_loss": 1.616517186164856,
90
- "eval_runtime": 218.0987,
91
- "eval_samples_per_second": 1.93,
92
- "eval_steps_per_second": 0.197,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.3,
97
  "learning_rate": 1.3932584269662923e-05,
98
- "loss": 1.6023,
99
  "step": 140
100
  },
101
  {
102
  "epoch": 0.3,
103
- "eval_loss": 1.6096081733703613,
104
- "eval_runtime": 218.0759,
105
- "eval_samples_per_second": 1.931,
106
- "eval_steps_per_second": 0.197,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.34,
111
  "learning_rate": 1.303370786516854e-05,
112
- "loss": 1.6259,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 0.34,
117
- "eval_loss": 1.6037150621414185,
118
- "eval_runtime": 218.0228,
119
- "eval_samples_per_second": 1.931,
120
- "eval_steps_per_second": 0.197,
121
  "step": 160
122
  },
123
  {
124
- "epoch": 0.38,
125
  "learning_rate": 1.213483146067416e-05,
126
- "loss": 1.6116,
127
  "step": 180
128
  },
129
  {
130
- "epoch": 0.38,
131
- "eval_loss": 1.599488377571106,
132
- "eval_runtime": 218.0297,
133
- "eval_samples_per_second": 1.931,
134
- "eval_steps_per_second": 0.197,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.43,
139
  "learning_rate": 1.1235955056179778e-05,
140
- "loss": 1.6288,
141
  "step": 200
142
  },
143
  {
144
  "epoch": 0.43,
145
- "eval_loss": 1.595850944519043,
146
- "eval_runtime": 218.1168,
147
- "eval_samples_per_second": 1.93,
148
- "eval_steps_per_second": 0.197,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.47,
153
  "learning_rate": 1.0337078651685396e-05,
154
- "loss": 1.6328,
155
  "step": 220
156
  },
157
  {
158
  "epoch": 0.47,
159
- "eval_loss": 1.5929853916168213,
160
- "eval_runtime": 218.0481,
161
- "eval_samples_per_second": 1.931,
162
- "eval_steps_per_second": 0.197,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.51,
167
  "learning_rate": 9.438202247191012e-06,
168
- "loss": 1.622,
169
  "step": 240
170
  },
171
  {
172
  "epoch": 0.51,
173
- "eval_loss": 1.590191125869751,
174
- "eval_runtime": 218.0232,
175
- "eval_samples_per_second": 1.931,
176
- "eval_steps_per_second": 0.197,
177
  "step": 240
178
  },
179
  {
180
- "epoch": 0.55,
181
  "learning_rate": 8.53932584269663e-06,
182
- "loss": 1.5966,
183
  "step": 260
184
  },
185
  {
186
- "epoch": 0.55,
187
- "eval_loss": 1.5878028869628906,
188
- "eval_runtime": 218.0617,
189
- "eval_samples_per_second": 1.931,
190
- "eval_steps_per_second": 0.197,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.6,
195
  "learning_rate": 7.640449438202247e-06,
196
- "loss": 1.6094,
197
  "step": 280
198
  },
199
  {
200
  "epoch": 0.6,
201
- "eval_loss": 1.5858081579208374,
202
- "eval_runtime": 218.1219,
203
- "eval_samples_per_second": 1.93,
204
- "eval_steps_per_second": 0.197,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.64,
209
  "learning_rate": 6.741573033707865e-06,
210
- "loss": 1.5934,
211
  "step": 300
212
  },
213
  {
214
  "epoch": 0.64,
215
- "eval_loss": 1.584080696105957,
216
- "eval_runtime": 218.0609,
217
- "eval_samples_per_second": 1.931,
218
- "eval_steps_per_second": 0.197,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.68,
223
  "learning_rate": 5.842696629213483e-06,
224
- "loss": 1.59,
225
  "step": 320
226
  },
227
  {
228
  "epoch": 0.68,
229
- "eval_loss": 1.5824154615402222,
230
- "eval_runtime": 218.0845,
231
- "eval_samples_per_second": 1.93,
232
- "eval_steps_per_second": 0.197,
233
  "step": 320
234
  },
235
  {
236
- "epoch": 0.72,
237
  "learning_rate": 4.943820224719101e-06,
238
- "loss": 1.6134,
239
  "step": 340
240
  },
241
  {
242
- "epoch": 0.72,
243
- "eval_loss": 1.5810182094573975,
244
- "eval_runtime": 218.1022,
245
- "eval_samples_per_second": 1.93,
246
- "eval_steps_per_second": 0.197,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.77,
251
  "learning_rate": 4.04494382022472e-06,
252
- "loss": 1.5954,
253
  "step": 360
254
  },
255
  {
256
  "epoch": 0.77,
257
- "eval_loss": 1.5799251794815063,
258
- "eval_runtime": 218.1537,
259
- "eval_samples_per_second": 1.93,
260
- "eval_steps_per_second": 0.197,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.81,
265
  "learning_rate": 3.146067415730337e-06,
266
- "loss": 1.5973,
267
  "step": 380
268
  },
269
  {
270
  "epoch": 0.81,
271
- "eval_loss": 1.579064130783081,
272
- "eval_runtime": 218.0366,
273
- "eval_samples_per_second": 1.931,
274
- "eval_steps_per_second": 0.197,
275
  "step": 380
276
  },
277
  {
278
- "epoch": 0.85,
279
  "learning_rate": 2.2471910112359554e-06,
280
- "loss": 1.5468,
281
  "step": 400
282
  },
283
  {
284
- "epoch": 0.85,
285
- "eval_loss": 1.5782713890075684,
286
- "eval_runtime": 218.0535,
287
- "eval_samples_per_second": 1.931,
288
- "eval_steps_per_second": 0.197,
289
  "step": 400
290
  }
291
  ],
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8560727661851257,
5
  "eval_steps": 20,
6
  "global_step": 400,
7
  "is_hyper_param_search": false,
 
11
  {
12
  "epoch": 0.04,
13
  "learning_rate": 1.9325842696629215e-05,
14
+ "loss": 1.8283,
15
  "step": 20
16
  },
17
  {
18
  "epoch": 0.04,
19
+ "eval_loss": 1.7597882747650146,
20
+ "eval_runtime": 167.9254,
21
+ "eval_samples_per_second": 2.65,
22
+ "eval_steps_per_second": 0.268,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.09,
27
  "learning_rate": 1.8426966292134835e-05,
28
+ "loss": 1.7181,
29
  "step": 40
30
  },
31
  {
32
  "epoch": 0.09,
33
+ "eval_loss": 1.7029485702514648,
34
+ "eval_runtime": 168.0408,
35
+ "eval_samples_per_second": 2.648,
36
+ "eval_steps_per_second": 0.268,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.13,
41
  "learning_rate": 1.752808988764045e-05,
42
+ "loss": 1.6777,
43
  "step": 60
44
  },
45
  {
46
  "epoch": 0.13,
47
+ "eval_loss": 1.6699800491333008,
48
+ "eval_runtime": 167.9993,
49
+ "eval_samples_per_second": 2.649,
50
+ "eval_steps_per_second": 0.268,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.17,
55
  "learning_rate": 1.662921348314607e-05,
56
+ "loss": 1.6555,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.17,
61
+ "eval_loss": 1.6510975360870361,
62
+ "eval_runtime": 168.0236,
63
+ "eval_samples_per_second": 2.648,
64
+ "eval_steps_per_second": 0.268,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.21,
69
  "learning_rate": 1.5730337078651687e-05,
70
+ "loss": 1.6371,
71
  "step": 100
72
  },
73
  {
74
  "epoch": 0.21,
75
+ "eval_loss": 1.6391140222549438,
76
+ "eval_runtime": 167.9874,
77
+ "eval_samples_per_second": 2.649,
78
+ "eval_steps_per_second": 0.268,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.26,
83
  "learning_rate": 1.4831460674157305e-05,
84
+ "loss": 1.638,
85
  "step": 120
86
  },
87
  {
88
  "epoch": 0.26,
89
+ "eval_loss": 1.6307817697525024,
90
+ "eval_runtime": 167.9676,
91
+ "eval_samples_per_second": 2.649,
92
+ "eval_steps_per_second": 0.268,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.3,
97
  "learning_rate": 1.3932584269662923e-05,
98
+ "loss": 1.6334,
99
  "step": 140
100
  },
101
  {
102
  "epoch": 0.3,
103
+ "eval_loss": 1.6237455606460571,
104
+ "eval_runtime": 167.9795,
105
+ "eval_samples_per_second": 2.649,
106
+ "eval_steps_per_second": 0.268,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.34,
111
  "learning_rate": 1.303370786516854e-05,
112
+ "loss": 1.6568,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 0.34,
117
+ "eval_loss": 1.6185747385025024,
118
+ "eval_runtime": 167.9491,
119
+ "eval_samples_per_second": 2.65,
120
+ "eval_steps_per_second": 0.268,
121
  "step": 160
122
  },
123
  {
124
+ "epoch": 0.39,
125
  "learning_rate": 1.213483146067416e-05,
126
+ "loss": 1.6095,
127
  "step": 180
128
  },
129
  {
130
+ "epoch": 0.39,
131
+ "eval_loss": 1.6145987510681152,
132
+ "eval_runtime": 167.8883,
133
+ "eval_samples_per_second": 2.651,
134
+ "eval_steps_per_second": 0.268,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.43,
139
  "learning_rate": 1.1235955056179778e-05,
140
+ "loss": 1.6347,
141
  "step": 200
142
  },
143
  {
144
  "epoch": 0.43,
145
+ "eval_loss": 1.6116222143173218,
146
+ "eval_runtime": 167.9012,
147
+ "eval_samples_per_second": 2.65,
148
+ "eval_steps_per_second": 0.268,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.47,
153
  "learning_rate": 1.0337078651685396e-05,
154
+ "loss": 1.6201,
155
  "step": 220
156
  },
157
  {
158
  "epoch": 0.47,
159
+ "eval_loss": 1.608642339706421,
160
+ "eval_runtime": 168.0086,
161
+ "eval_samples_per_second": 2.649,
162
+ "eval_steps_per_second": 0.268,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.51,
167
  "learning_rate": 9.438202247191012e-06,
168
+ "loss": 1.5781,
169
  "step": 240
170
  },
171
  {
172
  "epoch": 0.51,
173
+ "eval_loss": 1.6063767671585083,
174
+ "eval_runtime": 167.9056,
175
+ "eval_samples_per_second": 2.65,
176
+ "eval_steps_per_second": 0.268,
177
  "step": 240
178
  },
179
  {
180
+ "epoch": 0.56,
181
  "learning_rate": 8.53932584269663e-06,
182
+ "loss": 1.6158,
183
  "step": 260
184
  },
185
  {
186
+ "epoch": 0.56,
187
+ "eval_loss": 1.6039100885391235,
188
+ "eval_runtime": 286.9359,
189
+ "eval_samples_per_second": 1.551,
190
+ "eval_steps_per_second": 0.157,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.6,
195
  "learning_rate": 7.640449438202247e-06,
196
+ "loss": 1.6098,
197
  "step": 280
198
  },
199
  {
200
  "epoch": 0.6,
201
+ "eval_loss": 1.602189540863037,
202
+ "eval_runtime": 336.0332,
203
+ "eval_samples_per_second": 1.324,
204
+ "eval_steps_per_second": 0.134,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.64,
209
  "learning_rate": 6.741573033707865e-06,
210
+ "loss": 1.6083,
211
  "step": 300
212
  },
213
  {
214
  "epoch": 0.64,
215
+ "eval_loss": 1.6008193492889404,
216
+ "eval_runtime": 333.4484,
217
+ "eval_samples_per_second": 1.335,
218
+ "eval_steps_per_second": 0.135,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.68,
223
  "learning_rate": 5.842696629213483e-06,
224
+ "loss": 1.596,
225
  "step": 320
226
  },
227
  {
228
  "epoch": 0.68,
229
+ "eval_loss": 1.5991967916488647,
230
+ "eval_runtime": 333.495,
231
+ "eval_samples_per_second": 1.334,
232
+ "eval_steps_per_second": 0.135,
233
  "step": 320
234
  },
235
  {
236
+ "epoch": 0.73,
237
  "learning_rate": 4.943820224719101e-06,
238
+ "loss": 1.6007,
239
  "step": 340
240
  },
241
  {
242
+ "epoch": 0.73,
243
+ "eval_loss": 1.598080039024353,
244
+ "eval_runtime": 333.0437,
245
+ "eval_samples_per_second": 1.336,
246
+ "eval_steps_per_second": 0.135,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.77,
251
  "learning_rate": 4.04494382022472e-06,
252
+ "loss": 1.6234,
253
  "step": 360
254
  },
255
  {
256
  "epoch": 0.77,
257
+ "eval_loss": 1.5969579219818115,
258
+ "eval_runtime": 336.627,
259
+ "eval_samples_per_second": 1.322,
260
+ "eval_steps_per_second": 0.134,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.81,
265
  "learning_rate": 3.146067415730337e-06,
266
+ "loss": 1.6078,
267
  "step": 380
268
  },
269
  {
270
  "epoch": 0.81,
271
+ "eval_loss": 1.5960769653320312,
272
+ "eval_runtime": 333.84,
273
+ "eval_samples_per_second": 1.333,
274
+ "eval_steps_per_second": 0.135,
275
  "step": 380
276
  },
277
  {
278
+ "epoch": 0.86,
279
  "learning_rate": 2.2471910112359554e-06,
280
+ "loss": 1.5983,
281
  "step": 400
282
  },
283
  {
284
+ "epoch": 0.86,
285
+ "eval_loss": 1.5955034494400024,
286
+ "eval_runtime": 335.6186,
287
+ "eval_samples_per_second": 1.326,
288
+ "eval_steps_per_second": 0.134,
289
  "step": 400
290
  }
291
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18f22170b63f557ce8ed503ea5362cfa3b2bf6a88a049f98dbc25116e2a6df00
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75dba7c5f891acdcd82aeb8bc8677541948c16cf3e42df5cb5a12220cc740514
3
  size 4664