TinyPixel commited on
Commit
2eb8fa7
1 Parent(s): fce6e26

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d2f9e238aa5a49d602e835a5896672d7a3190bfefa9629bb3bba1569670bfe3
3
- size 100300298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98bd65f0aad14bf8362d393a7ffc29de49dfbf711f8e7b01f3a61e31614a1b4b
3
+ size 100299853
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90e39b978dcdf99edabb64ecd69bdf0397ca86edf1ff11e8f3608e417a9d55bb
3
- size 200654930
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d3dad79229c5f41a5efd216c5e8c0d3d05ac696ceb194d9af6b051dc4721ec7
3
+ size 200654493
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:049c26b844b79121ddd8379f7f69194e63f6fbf6aa007eeac0c66f17eebb8893
3
- size 888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c152074a486243089e4fc0fdee0a373a30fb0e0a6e40eb5fd0d36fdafc97a155
3
+ size 443
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0d90839c1d2b909c1e5054d9a87b1c188f1ce2eeb0eec32bf435ffe51732a6
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7518176d5848fbf2c861708ada9e33d1db0810678c0719528a70505e1263858b
3
+ size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55453edc7ae4b6242ad6bf3479593988d77a7dd0d971dd86170c4caeeca3a0e8
3
- size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa261fcaa1099f210643b90fd45cdba8d6a304a680cda31e9c953c42c2ba5be6
3
+ size 627
trainer_state.json CHANGED
@@ -1,325 +1,385 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.978102189781022,
5
  "eval_steps": 500,
6
- "global_step": 102,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.06,
13
- "learning_rate": 3.6363636363636364e-05,
14
- "loss": 1.8122,
15
  "step": 2
16
  },
17
  {
18
- "epoch": 0.12,
19
- "learning_rate": 7.272727272727273e-05,
20
- "loss": 1.6058,
21
  "step": 4
22
  },
23
  {
24
- "epoch": 0.18,
25
- "learning_rate": 0.00010909090909090909,
26
- "loss": 1.5108,
27
  "step": 6
28
  },
29
  {
30
- "epoch": 0.23,
31
- "learning_rate": 0.00014545454545454546,
32
- "loss": 1.9854,
33
  "step": 8
34
  },
35
  {
36
- "epoch": 0.29,
37
- "learning_rate": 0.00018181818181818183,
38
- "loss": 1.4263,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.35,
43
- "learning_rate": 0.0001978021978021978,
44
- "loss": 1.6944,
45
  "step": 12
46
  },
47
  {
48
- "epoch": 0.41,
49
- "learning_rate": 0.00019340659340659342,
50
- "loss": 1.5822,
51
  "step": 14
52
  },
53
  {
54
- "epoch": 0.47,
55
- "learning_rate": 0.00018901098901098903,
56
- "loss": 1.5416,
57
  "step": 16
58
  },
59
  {
60
- "epoch": 0.53,
61
- "learning_rate": 0.00018461538461538463,
62
- "loss": 1.7547,
63
  "step": 18
64
  },
65
  {
66
- "epoch": 0.58,
67
- "learning_rate": 0.00018021978021978024,
68
- "loss": 1.6478,
69
  "step": 20
70
  },
71
  {
72
- "epoch": 0.64,
73
- "learning_rate": 0.00017582417582417582,
74
- "loss": 1.7881,
75
  "step": 22
76
  },
77
  {
78
- "epoch": 0.7,
79
- "learning_rate": 0.00017142857142857143,
80
- "loss": 1.5457,
81
  "step": 24
82
  },
83
  {
84
- "epoch": 0.76,
85
- "learning_rate": 0.00016703296703296706,
86
- "loss": 1.6142,
87
  "step": 26
88
  },
89
  {
90
- "epoch": 0.82,
91
- "learning_rate": 0.00016263736263736264,
92
- "loss": 1.4016,
93
  "step": 28
94
  },
95
  {
96
- "epoch": 0.88,
97
- "learning_rate": 0.00015824175824175824,
98
- "loss": 1.6815,
99
  "step": 30
100
  },
101
  {
102
- "epoch": 0.93,
103
- "learning_rate": 0.00015384615384615385,
104
- "loss": 1.6923,
105
  "step": 32
106
  },
107
  {
108
- "epoch": 0.99,
109
- "learning_rate": 0.00014945054945054946,
110
- "loss": 1.4493,
111
  "step": 34
112
  },
113
  {
114
- "epoch": 1.05,
115
- "learning_rate": 0.00014505494505494506,
116
- "loss": 1.3942,
117
  "step": 36
118
  },
119
  {
120
- "epoch": 1.11,
121
- "learning_rate": 0.00014065934065934067,
122
- "loss": 1.9206,
123
  "step": 38
124
  },
125
  {
126
- "epoch": 1.17,
127
- "learning_rate": 0.00013626373626373628,
128
- "loss": 1.5305,
129
  "step": 40
130
  },
131
  {
132
- "epoch": 1.23,
133
- "learning_rate": 0.00013186813186813188,
134
- "loss": 1.4763,
135
  "step": 42
136
  },
137
  {
138
- "epoch": 1.28,
139
- "learning_rate": 0.00012747252747252746,
140
- "loss": 1.452,
141
  "step": 44
142
  },
143
  {
144
- "epoch": 1.34,
145
- "learning_rate": 0.0001230769230769231,
146
- "loss": 1.322,
147
  "step": 46
148
  },
149
  {
150
- "epoch": 1.4,
151
- "learning_rate": 0.00011868131868131869,
152
- "loss": 1.5994,
153
  "step": 48
154
  },
155
  {
156
- "epoch": 1.46,
157
- "learning_rate": 0.00011428571428571428,
158
- "loss": 1.3866,
159
  "step": 50
160
  },
161
  {
162
- "epoch": 1.52,
163
- "learning_rate": 0.0001098901098901099,
164
- "loss": 1.1438,
165
  "step": 52
166
  },
167
  {
168
- "epoch": 1.58,
169
- "learning_rate": 0.0001054945054945055,
170
- "loss": 1.0993,
171
  "step": 54
172
  },
173
  {
174
- "epoch": 1.64,
175
- "learning_rate": 0.0001010989010989011,
176
- "loss": 1.6039,
177
  "step": 56
178
  },
179
  {
180
- "epoch": 1.69,
181
- "learning_rate": 9.670329670329671e-05,
182
- "loss": 1.3193,
183
  "step": 58
184
  },
185
  {
186
- "epoch": 1.75,
187
- "learning_rate": 9.230769230769232e-05,
188
- "loss": 1.3037,
189
  "step": 60
190
  },
191
  {
192
- "epoch": 1.81,
193
- "learning_rate": 8.791208791208791e-05,
194
- "loss": 1.4728,
195
  "step": 62
196
  },
197
  {
198
- "epoch": 1.87,
199
- "learning_rate": 8.351648351648353e-05,
200
- "loss": 1.3756,
201
  "step": 64
202
  },
203
  {
204
- "epoch": 1.93,
205
- "learning_rate": 7.912087912087912e-05,
206
- "loss": 1.3881,
207
  "step": 66
208
  },
209
  {
210
- "epoch": 1.99,
211
- "learning_rate": 7.472527472527473e-05,
212
- "loss": 1.1797,
213
  "step": 68
214
  },
215
  {
216
- "epoch": 2.04,
217
- "learning_rate": 7.032967032967034e-05,
218
- "loss": 1.5189,
219
  "step": 70
220
  },
221
  {
222
- "epoch": 2.1,
223
- "learning_rate": 6.593406593406594e-05,
224
- "loss": 1.4004,
225
  "step": 72
226
  },
227
  {
228
- "epoch": 2.16,
229
- "learning_rate": 6.153846153846155e-05,
230
- "loss": 1.1743,
231
  "step": 74
232
  },
233
  {
234
- "epoch": 2.22,
235
- "learning_rate": 5.714285714285714e-05,
236
- "loss": 1.1663,
237
  "step": 76
238
  },
239
  {
240
- "epoch": 2.28,
241
- "learning_rate": 5.274725274725275e-05,
242
- "loss": 1.1875,
243
  "step": 78
244
  },
245
  {
246
- "epoch": 2.34,
247
- "learning_rate": 4.8351648351648355e-05,
248
- "loss": 1.7227,
249
  "step": 80
250
  },
251
  {
252
- "epoch": 2.39,
253
- "learning_rate": 4.3956043956043955e-05,
254
- "loss": 1.2398,
255
  "step": 82
256
  },
257
  {
258
- "epoch": 2.45,
259
- "learning_rate": 3.956043956043956e-05,
260
- "loss": 1.3196,
261
  "step": 84
262
  },
263
  {
264
- "epoch": 2.51,
265
- "learning_rate": 3.516483516483517e-05,
266
- "loss": 1.2791,
267
  "step": 86
268
  },
269
  {
270
- "epoch": 2.57,
271
- "learning_rate": 3.0769230769230774e-05,
272
- "loss": 1.3619,
273
  "step": 88
274
  },
275
  {
276
- "epoch": 2.63,
277
- "learning_rate": 2.6373626373626374e-05,
278
- "loss": 1.0073,
279
  "step": 90
280
  },
281
  {
282
- "epoch": 2.69,
283
- "learning_rate": 2.1978021978021977e-05,
284
- "loss": 1.1867,
285
  "step": 92
286
  },
287
  {
288
- "epoch": 2.74,
289
- "learning_rate": 1.7582417582417584e-05,
290
- "loss": 1.605,
291
  "step": 94
292
  },
293
  {
294
- "epoch": 2.8,
295
- "learning_rate": 1.3186813186813187e-05,
296
- "loss": 1.1101,
297
  "step": 96
298
  },
299
  {
300
- "epoch": 2.86,
301
- "learning_rate": 8.791208791208792e-06,
302
- "loss": 1.2586,
303
  "step": 98
304
  },
305
  {
306
- "epoch": 2.92,
307
- "learning_rate": 4.395604395604396e-06,
308
- "loss": 1.4676,
309
  "step": 100
310
  },
311
  {
312
- "epoch": 2.98,
313
- "learning_rate": 0.0,
314
- "loss": 1.4705,
315
  "step": 102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  }
317
  ],
318
  "logging_steps": 2,
319
- "max_steps": 102,
320
  "num_train_epochs": 3,
321
  "save_steps": 500,
322
- "total_flos": 2.6989001479028736e+16,
323
  "trial_name": null,
324
  "trial_params": null
325
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.963855421686747,
5
  "eval_steps": 500,
6
+ "global_step": 123,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.05,
13
+ "learning_rate": 3.0769230769230774e-05,
14
+ "loss": 2.2564,
15
  "step": 2
16
  },
17
  {
18
+ "epoch": 0.1,
19
+ "learning_rate": 6.153846153846155e-05,
20
+ "loss": 2.1528,
21
  "step": 4
22
  },
23
  {
24
+ "epoch": 0.14,
25
+ "learning_rate": 9.230769230769232e-05,
26
+ "loss": 2.1326,
27
  "step": 6
28
  },
29
  {
30
+ "epoch": 0.19,
31
+ "learning_rate": 0.0001230769230769231,
32
+ "loss": 2.0429,
33
  "step": 8
34
  },
35
  {
36
+ "epoch": 0.24,
37
+ "learning_rate": 0.00015384615384615385,
38
+ "loss": 2.0747,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.29,
43
+ "learning_rate": 0.00018461538461538463,
44
+ "loss": 2.0012,
45
  "step": 12
46
  },
47
  {
48
+ "epoch": 0.34,
49
+ "learning_rate": 0.00019818181818181821,
50
+ "loss": 2.0527,
51
  "step": 14
52
  },
53
  {
54
+ "epoch": 0.39,
55
+ "learning_rate": 0.00019454545454545457,
56
+ "loss": 2.0473,
57
  "step": 16
58
  },
59
  {
60
+ "epoch": 0.43,
61
+ "learning_rate": 0.00019090909090909092,
62
+ "loss": 1.9741,
63
  "step": 18
64
  },
65
  {
66
+ "epoch": 0.48,
67
+ "learning_rate": 0.00018727272727272728,
68
+ "loss": 1.979,
69
  "step": 20
70
  },
71
  {
72
+ "epoch": 0.53,
73
+ "learning_rate": 0.00018363636363636366,
74
+ "loss": 2.1499,
75
  "step": 22
76
  },
77
  {
78
+ "epoch": 0.58,
79
+ "learning_rate": 0.00018,
80
+ "loss": 2.0575,
81
  "step": 24
82
  },
83
  {
84
+ "epoch": 0.63,
85
+ "learning_rate": 0.00017636363636363637,
86
+ "loss": 1.9974,
87
  "step": 26
88
  },
89
  {
90
+ "epoch": 0.67,
91
+ "learning_rate": 0.00017272727272727275,
92
+ "loss": 1.8976,
93
  "step": 28
94
  },
95
  {
96
+ "epoch": 0.72,
97
+ "learning_rate": 0.0001690909090909091,
98
+ "loss": 1.9667,
99
  "step": 30
100
  },
101
  {
102
+ "epoch": 0.77,
103
+ "learning_rate": 0.00016545454545454545,
104
+ "loss": 1.9728,
105
  "step": 32
106
  },
107
  {
108
+ "epoch": 0.82,
109
+ "learning_rate": 0.00016181818181818184,
110
+ "loss": 2.0311,
111
  "step": 34
112
  },
113
  {
114
+ "epoch": 0.87,
115
+ "learning_rate": 0.0001581818181818182,
116
+ "loss": 1.9161,
117
  "step": 36
118
  },
119
  {
120
+ "epoch": 0.92,
121
+ "learning_rate": 0.00015454545454545454,
122
+ "loss": 2.0139,
123
  "step": 38
124
  },
125
  {
126
+ "epoch": 0.96,
127
+ "learning_rate": 0.0001509090909090909,
128
+ "loss": 2.0425,
129
  "step": 40
130
  },
131
  {
132
+ "epoch": 1.01,
133
+ "learning_rate": 0.00014727272727272728,
134
+ "loss": 2.0341,
135
  "step": 42
136
  },
137
  {
138
+ "epoch": 1.06,
139
+ "learning_rate": 0.00014363636363636363,
140
+ "loss": 1.9073,
141
  "step": 44
142
  },
143
  {
144
+ "epoch": 1.11,
145
+ "learning_rate": 0.00014,
146
+ "loss": 1.8796,
147
  "step": 46
148
  },
149
  {
150
+ "epoch": 1.16,
151
+ "learning_rate": 0.00013636363636363637,
152
+ "loss": 1.8176,
153
  "step": 48
154
  },
155
  {
156
+ "epoch": 1.2,
157
+ "learning_rate": 0.00013272727272727275,
158
+ "loss": 1.8576,
159
  "step": 50
160
  },
161
  {
162
+ "epoch": 1.25,
163
+ "learning_rate": 0.0001290909090909091,
164
+ "loss": 1.9112,
165
  "step": 52
166
  },
167
  {
168
+ "epoch": 1.3,
169
+ "learning_rate": 0.00012545454545454546,
170
+ "loss": 1.9554,
171
  "step": 54
172
  },
173
  {
174
+ "epoch": 1.35,
175
+ "learning_rate": 0.00012181818181818183,
176
+ "loss": 1.7846,
177
  "step": 56
178
  },
179
  {
180
+ "epoch": 1.4,
181
+ "learning_rate": 0.0001181818181818182,
182
+ "loss": 1.9487,
183
  "step": 58
184
  },
185
  {
186
+ "epoch": 1.45,
187
+ "learning_rate": 0.00011454545454545456,
188
+ "loss": 1.8499,
189
  "step": 60
190
  },
191
  {
192
+ "epoch": 1.49,
193
+ "learning_rate": 0.00011090909090909092,
194
+ "loss": 1.9938,
195
  "step": 62
196
  },
197
  {
198
+ "epoch": 1.54,
199
+ "learning_rate": 0.00010727272727272728,
200
+ "loss": 1.9252,
201
  "step": 64
202
  },
203
  {
204
+ "epoch": 1.59,
205
+ "learning_rate": 0.00010363636363636364,
206
+ "loss": 1.8742,
207
  "step": 66
208
  },
209
  {
210
+ "epoch": 1.64,
211
+ "learning_rate": 0.0001,
212
+ "loss": 1.9286,
213
  "step": 68
214
  },
215
  {
216
+ "epoch": 1.69,
217
+ "learning_rate": 9.636363636363637e-05,
218
+ "loss": 1.8691,
219
  "step": 70
220
  },
221
  {
222
+ "epoch": 1.73,
223
+ "learning_rate": 9.272727272727273e-05,
224
+ "loss": 1.8683,
225
  "step": 72
226
  },
227
  {
228
+ "epoch": 1.78,
229
+ "learning_rate": 8.90909090909091e-05,
230
+ "loss": 1.9512,
231
  "step": 74
232
  },
233
  {
234
+ "epoch": 1.83,
235
+ "learning_rate": 8.545454545454545e-05,
236
+ "loss": 1.9497,
237
  "step": 76
238
  },
239
  {
240
+ "epoch": 1.88,
241
+ "learning_rate": 8.181818181818183e-05,
242
+ "loss": 1.8344,
243
  "step": 78
244
  },
245
  {
246
+ "epoch": 1.93,
247
+ "learning_rate": 7.818181818181818e-05,
248
+ "loss": 1.8214,
249
  "step": 80
250
  },
251
  {
252
+ "epoch": 1.98,
253
+ "learning_rate": 7.454545454545455e-05,
254
+ "loss": 1.8507,
255
  "step": 82
256
  },
257
  {
258
+ "epoch": 2.02,
259
+ "learning_rate": 7.090909090909092e-05,
260
+ "loss": 1.8836,
261
  "step": 84
262
  },
263
  {
264
+ "epoch": 2.07,
265
+ "learning_rate": 6.727272727272727e-05,
266
+ "loss": 1.76,
267
  "step": 86
268
  },
269
  {
270
+ "epoch": 2.12,
271
+ "learning_rate": 6.363636363636364e-05,
272
+ "loss": 1.7621,
273
  "step": 88
274
  },
275
  {
276
+ "epoch": 2.17,
277
+ "learning_rate": 6e-05,
278
+ "loss": 1.8145,
279
  "step": 90
280
  },
281
  {
282
+ "epoch": 2.22,
283
+ "learning_rate": 5.636363636363636e-05,
284
+ "loss": 1.8005,
285
  "step": 92
286
  },
287
  {
288
+ "epoch": 2.27,
289
+ "learning_rate": 5.272727272727272e-05,
290
+ "loss": 1.9014,
291
  "step": 94
292
  },
293
  {
294
+ "epoch": 2.31,
295
+ "learning_rate": 4.909090909090909e-05,
296
+ "loss": 1.8024,
297
  "step": 96
298
  },
299
  {
300
+ "epoch": 2.36,
301
+ "learning_rate": 4.545454545454546e-05,
302
+ "loss": 1.7389,
303
  "step": 98
304
  },
305
  {
306
+ "epoch": 2.41,
307
+ "learning_rate": 4.181818181818182e-05,
308
+ "loss": 1.8094,
309
  "step": 100
310
  },
311
  {
312
+ "epoch": 2.46,
313
+ "learning_rate": 3.818181818181819e-05,
314
+ "loss": 1.7297,
315
  "step": 102
316
+ },
317
+ {
318
+ "epoch": 2.51,
319
+ "learning_rate": 3.454545454545455e-05,
320
+ "loss": 1.8403,
321
+ "step": 104
322
+ },
323
+ {
324
+ "epoch": 2.55,
325
+ "learning_rate": 3.090909090909091e-05,
326
+ "loss": 1.7573,
327
+ "step": 106
328
+ },
329
+ {
330
+ "epoch": 2.6,
331
+ "learning_rate": 2.7272727272727273e-05,
332
+ "loss": 1.758,
333
+ "step": 108
334
+ },
335
+ {
336
+ "epoch": 2.65,
337
+ "learning_rate": 2.3636363636363637e-05,
338
+ "loss": 1.7608,
339
+ "step": 110
340
+ },
341
+ {
342
+ "epoch": 2.7,
343
+ "learning_rate": 2e-05,
344
+ "loss": 1.8246,
345
+ "step": 112
346
+ },
347
+ {
348
+ "epoch": 2.75,
349
+ "learning_rate": 1.6363636363636366e-05,
350
+ "loss": 1.813,
351
+ "step": 114
352
+ },
353
+ {
354
+ "epoch": 2.8,
355
+ "learning_rate": 1.2727272727272727e-05,
356
+ "loss": 1.786,
357
+ "step": 116
358
+ },
359
+ {
360
+ "epoch": 2.84,
361
+ "learning_rate": 9.090909090909091e-06,
362
+ "loss": 1.8059,
363
+ "step": 118
364
+ },
365
+ {
366
+ "epoch": 2.89,
367
+ "learning_rate": 5.4545454545454545e-06,
368
+ "loss": 1.7401,
369
+ "step": 120
370
+ },
371
+ {
372
+ "epoch": 2.94,
373
+ "learning_rate": 1.818181818181818e-06,
374
+ "loss": 1.8273,
375
+ "step": 122
376
  }
377
  ],
378
  "logging_steps": 2,
379
+ "max_steps": 123,
380
  "num_train_epochs": 3,
381
  "save_steps": 500,
382
+ "total_flos": 3.2545560607064064e+16,
383
  "trial_name": null,
384
  "trial_params": null
385
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e35e5509b7c25ac17c40a1eee45fbfbb580135d1a9777e8e43b88eb21c75dbea
3
- size 4472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54465407d7f86d0a02eabf5537d57b7d4b300528a41a2625cbac0c7dbe56a591
3
+ size 4027