TinyPixel commited on
Commit
54bac18
1 Parent(s): 27c9fc4

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. adapter_model.bin +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +133 -145
  6. training_args.bin +1 -1
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:677027e4409b490a593295b65f19efa300a5a1e9aba0cff392ca786175193366
3
  size 160069389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a914e52682550f8564955bcd75cde39fb1d446260e8c8c363d1a5c6a787b2423
3
  size 160069389
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a4c4dc4fc3b82c7e414e47ad4783fba119068cc7afd7fea70f21bd361051e0c
3
  size 320193565
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91674d4a52910440bb5bb312e287f4513fc044ee6d514529f25330adf4e70a4b
3
  size 320193565
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d78143d9be5aecff6d15eb9fff604018f5c22d556656dce9ab94663022a8f0ae
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fdbed67c3829622dfa48a2bbf084356503a2a890373c8bf0c98c2412e580c5
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c8c6aaf7abfed00c96e02dd330ed2bd374bffe362c92f8d5b77049e400c3168
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:094fe83ba6b00779a606e1c315d5190ccde76611e3c53c464d9f758dd4968766
3
  size 627
trainer_state.json CHANGED
@@ -1,289 +1,277 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 91,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.04,
13
- "learning_rate": 2.857142857142857e-05,
14
- "loss": 1.8496,
15
  "step": 2
16
  },
17
  {
18
- "epoch": 0.09,
19
- "learning_rate": 5.714285714285714e-05,
20
- "loss": 1.9673,
21
  "step": 4
22
  },
23
  {
24
- "epoch": 0.13,
25
- "learning_rate": 8.571428571428571e-05,
26
- "loss": 1.8491,
27
  "step": 6
28
  },
29
  {
30
- "epoch": 0.18,
31
- "learning_rate": 0.00011428571428571428,
32
- "loss": 1.9002,
33
  "step": 8
34
  },
35
  {
36
- "epoch": 0.22,
37
- "learning_rate": 0.00014285714285714287,
38
- "loss": 1.873,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.26,
43
- "learning_rate": 0.00017142857142857143,
44
- "loss": 1.9039,
45
  "step": 12
46
  },
47
  {
48
- "epoch": 0.31,
49
- "learning_rate": 0.0002,
50
- "loss": 1.8956,
51
  "step": 14
52
  },
53
  {
54
- "epoch": 0.35,
55
- "learning_rate": 0.0001966942148760331,
56
- "loss": 1.8618,
57
  "step": 16
58
  },
59
  {
60
- "epoch": 0.4,
61
- "learning_rate": 0.0001933884297520661,
62
- "loss": 1.7929,
63
  "step": 18
64
  },
65
  {
66
- "epoch": 0.44,
67
- "learning_rate": 0.0001900826446280992,
68
- "loss": 1.852,
69
  "step": 20
70
  },
71
  {
72
- "epoch": 0.48,
73
- "learning_rate": 0.00018677685950413224,
74
- "loss": 1.8218,
75
  "step": 22
76
  },
77
  {
78
- "epoch": 0.53,
79
- "learning_rate": 0.00018347107438016532,
80
- "loss": 1.8421,
81
  "step": 24
82
  },
83
  {
84
- "epoch": 0.57,
85
- "learning_rate": 0.00018016528925619835,
86
- "loss": 1.7531,
87
  "step": 26
88
  },
89
  {
90
- "epoch": 0.62,
91
- "learning_rate": 0.00017685950413223143,
92
- "loss": 1.7133,
93
  "step": 28
94
  },
95
  {
96
- "epoch": 0.66,
97
- "learning_rate": 0.00017520661157024794,
98
- "loss": 2.2706,
99
  "step": 30
100
  },
101
  {
102
- "epoch": 0.7,
103
- "learning_rate": 0.00017190082644628102,
104
- "loss": 1.7739,
105
  "step": 32
106
  },
107
  {
108
- "epoch": 0.75,
109
- "learning_rate": 0.00016859504132231404,
110
- "loss": 1.7082,
111
  "step": 34
112
  },
113
  {
114
- "epoch": 0.79,
115
- "learning_rate": 0.00016528925619834712,
116
- "loss": 1.7831,
117
  "step": 36
118
  },
119
  {
120
- "epoch": 0.84,
121
- "learning_rate": 0.00016198347107438017,
122
- "loss": 1.778,
123
  "step": 38
124
  },
125
  {
126
- "epoch": 0.88,
127
- "learning_rate": 0.00015867768595041322,
128
- "loss": 1.7482,
129
  "step": 40
130
  },
131
  {
132
- "epoch": 0.92,
133
- "learning_rate": 0.00015537190082644627,
134
- "loss": 1.8466,
135
  "step": 42
136
  },
137
  {
138
- "epoch": 0.97,
139
- "learning_rate": 0.00015206611570247935,
140
- "loss": 1.7918,
141
  "step": 44
142
  },
143
  {
144
- "epoch": 1.01,
145
- "learning_rate": 0.0001487603305785124,
146
- "loss": 1.7058,
147
  "step": 46
148
  },
149
  {
150
- "epoch": 1.05,
151
- "learning_rate": 0.00014545454545454546,
152
- "loss": 1.7294,
153
  "step": 48
154
  },
155
  {
156
- "epoch": 1.1,
157
- "learning_rate": 0.0001421487603305785,
158
- "loss": 1.7285,
159
  "step": 50
160
  },
161
  {
162
- "epoch": 1.14,
163
- "learning_rate": 0.0001388429752066116,
164
- "loss": 1.6392,
165
  "step": 52
166
  },
167
  {
168
- "epoch": 1.19,
169
- "learning_rate": 0.00013553719008264464,
170
- "loss": 1.712,
171
  "step": 54
172
  },
173
  {
174
- "epoch": 1.23,
175
- "learning_rate": 0.0001322314049586777,
176
- "loss": 1.6992,
177
  "step": 56
178
  },
179
  {
180
- "epoch": 1.27,
181
- "learning_rate": 0.00012892561983471074,
182
- "loss": 1.7677,
183
  "step": 58
184
  },
185
  {
186
- "epoch": 1.32,
187
- "learning_rate": 0.00012561983471074382,
188
- "loss": 1.7096,
189
  "step": 60
190
  },
191
  {
192
- "epoch": 1.36,
193
- "learning_rate": 0.00012231404958677685,
194
- "loss": 1.7602,
195
  "step": 62
196
  },
197
  {
198
- "epoch": 1.41,
199
- "learning_rate": 0.00011900826446280992,
200
- "loss": 1.7128,
201
  "step": 64
202
  },
203
  {
204
- "epoch": 1.45,
205
- "learning_rate": 0.00011570247933884298,
206
- "loss": 1.6605,
207
  "step": 66
208
  },
209
  {
210
- "epoch": 1.49,
211
- "learning_rate": 0.00011239669421487604,
212
- "loss": 1.6559,
213
  "step": 68
214
  },
215
  {
216
- "epoch": 1.54,
217
- "learning_rate": 0.00010909090909090909,
218
- "loss": 1.6775,
219
  "step": 70
220
  },
221
  {
222
- "epoch": 1.58,
223
- "learning_rate": 0.00010578512396694216,
224
- "loss": 1.8318,
225
  "step": 72
226
  },
227
  {
228
- "epoch": 1.63,
229
- "learning_rate": 0.00010247933884297521,
230
- "loss": 1.7568,
231
  "step": 74
232
  },
233
  {
234
- "epoch": 1.67,
235
- "learning_rate": 9.917355371900827e-05,
236
- "loss": 1.7349,
237
  "step": 76
238
  },
239
  {
240
- "epoch": 1.71,
241
- "learning_rate": 9.586776859504133e-05,
242
- "loss": 1.7956,
243
  "step": 78
244
  },
245
  {
246
- "epoch": 1.76,
247
- "learning_rate": 9.256198347107439e-05,
248
- "loss": 1.684,
249
  "step": 80
250
  },
251
  {
252
- "epoch": 1.8,
253
- "learning_rate": 8.925619834710744e-05,
254
- "loss": 1.6579,
255
  "step": 82
256
  },
257
  {
258
- "epoch": 1.85,
259
- "learning_rate": 8.595041322314051e-05,
260
- "loss": 1.7188,
261
  "step": 84
262
  },
263
  {
264
- "epoch": 1.89,
265
- "learning_rate": 8.264462809917356e-05,
266
- "loss": 1.6533,
267
  "step": 86
268
- },
269
- {
270
- "epoch": 1.93,
271
- "learning_rate": 8.099173553719009e-05,
272
- "loss": 1.9246,
273
- "step": 88
274
- },
275
- {
276
- "epoch": 1.98,
277
- "learning_rate": 7.768595041322314e-05,
278
- "loss": 1.7969,
279
- "step": 90
280
  }
281
  ],
282
  "logging_steps": 2,
283
- "max_steps": 135,
284
  "num_train_epochs": 3,
285
  "save_steps": 500,
286
- "total_flos": 5.946469637318246e+16,
287
  "trial_name": null,
288
  "trial_params": null
289
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9935483870967743,
5
  "eval_steps": 500,
6
+ "global_step": 87,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.07,
13
+ "learning_rate": 4.4444444444444447e-05,
14
+ "loss": 1.912,
15
  "step": 2
16
  },
17
  {
18
+ "epoch": 0.14,
19
+ "learning_rate": 8.888888888888889e-05,
20
+ "loss": 2.0591,
21
  "step": 4
22
  },
23
  {
24
+ "epoch": 0.21,
25
+ "learning_rate": 0.00013333333333333334,
26
+ "loss": 1.9454,
27
  "step": 6
28
  },
29
  {
30
+ "epoch": 0.28,
31
+ "learning_rate": 0.00017777777777777779,
32
+ "loss": 1.9692,
33
  "step": 8
34
  },
35
  {
36
+ "epoch": 0.34,
37
+ "learning_rate": 0.00019743589743589744,
38
+ "loss": 1.849,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.41,
43
+ "learning_rate": 0.00019230769230769233,
44
+ "loss": 1.7833,
45
  "step": 12
46
  },
47
  {
48
+ "epoch": 0.48,
49
+ "learning_rate": 0.0001871794871794872,
50
+ "loss": 1.8451,
51
  "step": 14
52
  },
53
  {
54
+ "epoch": 0.55,
55
+ "learning_rate": 0.00018205128205128207,
56
+ "loss": 1.8017,
57
  "step": 16
58
  },
59
  {
60
+ "epoch": 0.62,
61
+ "learning_rate": 0.00017692307692307693,
62
+ "loss": 1.7176,
63
  "step": 18
64
  },
65
  {
66
+ "epoch": 0.69,
67
+ "learning_rate": 0.0001717948717948718,
68
+ "loss": 1.8041,
69
  "step": 20
70
  },
71
  {
72
+ "epoch": 0.76,
73
+ "learning_rate": 0.0001666666666666667,
74
+ "loss": 1.6708,
75
  "step": 22
76
  },
77
  {
78
+ "epoch": 0.83,
79
+ "learning_rate": 0.00016153846153846155,
80
+ "loss": 1.7398,
81
  "step": 24
82
  },
83
  {
84
+ "epoch": 0.89,
85
+ "learning_rate": 0.00015641025641025642,
86
+ "loss": 1.7176,
87
  "step": 26
88
  },
89
  {
90
+ "epoch": 0.96,
91
+ "learning_rate": 0.00015128205128205128,
92
+ "loss": 1.7141,
93
  "step": 28
94
  },
95
  {
96
+ "epoch": 1.03,
97
+ "learning_rate": 0.00014615384615384615,
98
+ "loss": 1.7503,
99
  "step": 30
100
  },
101
  {
102
+ "epoch": 1.1,
103
+ "learning_rate": 0.00014102564102564104,
104
+ "loss": 1.6686,
105
  "step": 32
106
  },
107
  {
108
+ "epoch": 1.17,
109
+ "learning_rate": 0.0001358974358974359,
110
+ "loss": 1.723,
111
  "step": 34
112
  },
113
  {
114
+ "epoch": 1.24,
115
+ "learning_rate": 0.00013076923076923077,
116
+ "loss": 1.7153,
117
  "step": 36
118
  },
119
  {
120
+ "epoch": 1.31,
121
+ "learning_rate": 0.00012564102564102564,
122
+ "loss": 1.6379,
123
  "step": 38
124
  },
125
  {
126
+ "epoch": 1.38,
127
+ "learning_rate": 0.00012051282051282052,
128
+ "loss": 1.8047,
129
  "step": 40
130
  },
131
  {
132
+ "epoch": 1.45,
133
+ "learning_rate": 0.00011538461538461538,
134
+ "loss": 1.719,
135
  "step": 42
136
  },
137
  {
138
+ "epoch": 1.51,
139
+ "learning_rate": 0.00011025641025641027,
140
+ "loss": 1.6785,
141
  "step": 44
142
  },
143
  {
144
+ "epoch": 1.58,
145
+ "learning_rate": 0.00010512820512820514,
146
+ "loss": 1.6407,
147
  "step": 46
148
  },
149
  {
150
+ "epoch": 1.65,
151
+ "learning_rate": 0.0001,
152
+ "loss": 1.6414,
153
  "step": 48
154
  },
155
  {
156
+ "epoch": 1.72,
157
+ "learning_rate": 9.487179487179487e-05,
158
+ "loss": 1.6422,
159
  "step": 50
160
  },
161
  {
162
+ "epoch": 1.79,
163
+ "learning_rate": 8.974358974358975e-05,
164
+ "loss": 1.7277,
165
  "step": 52
166
  },
167
  {
168
+ "epoch": 1.86,
169
+ "learning_rate": 8.461538461538461e-05,
170
+ "loss": 1.6827,
171
  "step": 54
172
  },
173
  {
174
+ "epoch": 1.93,
175
+ "learning_rate": 7.948717948717948e-05,
176
+ "loss": 1.721,
177
  "step": 56
178
  },
179
  {
180
+ "epoch": 2.0,
181
+ "learning_rate": 7.435897435897436e-05,
182
+ "loss": 1.7153,
183
  "step": 58
184
  },
185
  {
186
+ "epoch": 2.06,
187
+ "learning_rate": 6.923076923076924e-05,
188
+ "loss": 1.5865,
189
  "step": 60
190
  },
191
  {
192
+ "epoch": 2.13,
193
+ "learning_rate": 6.410256410256412e-05,
194
+ "loss": 1.7077,
195
  "step": 62
196
  },
197
  {
198
+ "epoch": 2.2,
199
+ "learning_rate": 5.897435897435898e-05,
200
+ "loss": 1.664,
201
  "step": 64
202
  },
203
  {
204
+ "epoch": 2.27,
205
+ "learning_rate": 5.384615384615385e-05,
206
+ "loss": 1.7481,
207
  "step": 66
208
  },
209
  {
210
+ "epoch": 2.34,
211
+ "learning_rate": 4.871794871794872e-05,
212
+ "loss": 1.7459,
213
  "step": 68
214
  },
215
  {
216
+ "epoch": 2.41,
217
+ "learning_rate": 4.358974358974359e-05,
218
+ "loss": 1.7067,
219
  "step": 70
220
  },
221
  {
222
+ "epoch": 2.48,
223
+ "learning_rate": 3.846153846153846e-05,
224
+ "loss": 1.6086,
225
  "step": 72
226
  },
227
  {
228
+ "epoch": 2.55,
229
+ "learning_rate": 3.3333333333333335e-05,
230
+ "loss": 1.5464,
231
  "step": 74
232
  },
233
  {
234
+ "epoch": 2.62,
235
+ "learning_rate": 2.8205128205128207e-05,
236
+ "loss": 1.5403,
237
  "step": 76
238
  },
239
  {
240
+ "epoch": 2.68,
241
+ "learning_rate": 2.307692307692308e-05,
242
+ "loss": 1.5363,
243
  "step": 78
244
  },
245
  {
246
+ "epoch": 2.75,
247
+ "learning_rate": 1.794871794871795e-05,
248
+ "loss": 1.6695,
249
  "step": 80
250
  },
251
  {
252
+ "epoch": 2.82,
253
+ "learning_rate": 1.282051282051282e-05,
254
+ "loss": 1.5652,
255
  "step": 82
256
  },
257
  {
258
+ "epoch": 2.89,
259
+ "learning_rate": 7.692307692307694e-06,
260
+ "loss": 1.6139,
261
  "step": 84
262
  },
263
  {
264
+ "epoch": 2.96,
265
+ "learning_rate": 2.564102564102564e-06,
266
+ "loss": 1.6482,
267
  "step": 86
 
 
 
 
 
 
 
 
 
 
 
 
268
  }
269
  ],
270
  "logging_steps": 2,
271
+ "max_steps": 87,
272
  "num_train_epochs": 3,
273
  "save_steps": 500,
274
+ "total_flos": 5.685086356557005e+16,
275
  "trial_name": null,
276
  "trial_params": null
277
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6660384e2521611c02ca201838bbcb32971b9104a07ca83aeebb90ae8d66cba1
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2cc6016d4f33d3436bd7d0abd53631d8fb77e5aa03ef5a3f588886acfcc626
3
  size 4027