DunnBC22 commited on
Commit
80d463c
1 Parent(s): 4211bd3

All Dunn!!!

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +1090 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 1.1316755048115336e+16,
4
+ "train_loss": 0.19905866315323814,
5
+ "train_runtime": 6850.4631,
6
+ "train_samples_per_second": 40.041,
7
+ "train_steps_per_second": 5.006
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "total_flos": 1.1316755048115336e+16,
4
+ "train_loss": 0.19905866315323814,
5
+ "train_runtime": 6850.4631,
6
+ "train_samples_per_second": 40.041,
7
+ "train_steps_per_second": 5.006
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1090 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3088998794555664,
3
+ "best_model_checkpoint": "bert-base-uncased-Regression-Edmunds_Car_Reviews/checkpoint-34290",
4
+ "epoch": 3.0,
5
+ "global_step": 34290,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 1.999941673957422e-05,
13
+ "loss": 16.5207,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.02,
18
+ "learning_rate": 1.988334791484398e-05,
19
+ "loss": 1.4237,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.03,
24
+ "learning_rate": 1.9766695829687957e-05,
25
+ "loss": 0.3652,
26
+ "step": 400
27
+ },
28
+ {
29
+ "epoch": 0.05,
30
+ "learning_rate": 1.9650043744531936e-05,
31
+ "loss": 0.3355,
32
+ "step": 600
33
+ },
34
+ {
35
+ "epoch": 0.07,
36
+ "learning_rate": 1.9533391659375912e-05,
37
+ "loss": 0.2945,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 0.09,
42
+ "learning_rate": 1.941673957421989e-05,
43
+ "loss": 0.2863,
44
+ "step": 1000
45
+ },
46
+ {
47
+ "epoch": 0.1,
48
+ "learning_rate": 1.9300087489063867e-05,
49
+ "loss": 0.3029,
50
+ "step": 1200
51
+ },
52
+ {
53
+ "epoch": 0.12,
54
+ "learning_rate": 1.9183435403907846e-05,
55
+ "loss": 0.3111,
56
+ "step": 1400
57
+ },
58
+ {
59
+ "epoch": 0.14,
60
+ "learning_rate": 1.9066783318751826e-05,
61
+ "loss": 0.2634,
62
+ "step": 1600
63
+ },
64
+ {
65
+ "epoch": 0.16,
66
+ "learning_rate": 1.89501312335958e-05,
67
+ "loss": 0.278,
68
+ "step": 1800
69
+ },
70
+ {
71
+ "epoch": 0.17,
72
+ "learning_rate": 1.883347914843978e-05,
73
+ "loss": 0.2729,
74
+ "step": 2000
75
+ },
76
+ {
77
+ "epoch": 0.19,
78
+ "learning_rate": 1.8716827063283757e-05,
79
+ "loss": 0.2829,
80
+ "step": 2200
81
+ },
82
+ {
83
+ "epoch": 0.21,
84
+ "learning_rate": 1.8600174978127736e-05,
85
+ "loss": 0.2461,
86
+ "step": 2400
87
+ },
88
+ {
89
+ "epoch": 0.23,
90
+ "learning_rate": 1.8483522892971712e-05,
91
+ "loss": 0.2497,
92
+ "step": 2600
93
+ },
94
+ {
95
+ "epoch": 0.24,
96
+ "learning_rate": 1.836687080781569e-05,
97
+ "loss": 0.2592,
98
+ "step": 2800
99
+ },
100
+ {
101
+ "epoch": 0.26,
102
+ "learning_rate": 1.825021872265967e-05,
103
+ "loss": 0.2701,
104
+ "step": 3000
105
+ },
106
+ {
107
+ "epoch": 0.28,
108
+ "learning_rate": 1.813356663750365e-05,
109
+ "loss": 0.2699,
110
+ "step": 3200
111
+ },
112
+ {
113
+ "epoch": 0.3,
114
+ "learning_rate": 1.8016914552347625e-05,
115
+ "loss": 0.2656,
116
+ "step": 3400
117
+ },
118
+ {
119
+ "epoch": 0.31,
120
+ "learning_rate": 1.79002624671916e-05,
121
+ "loss": 0.2521,
122
+ "step": 3600
123
+ },
124
+ {
125
+ "epoch": 0.33,
126
+ "learning_rate": 1.778361038203558e-05,
127
+ "loss": 0.2776,
128
+ "step": 3800
129
+ },
130
+ {
131
+ "epoch": 0.35,
132
+ "learning_rate": 1.7666958296879556e-05,
133
+ "loss": 0.2753,
134
+ "step": 4000
135
+ },
136
+ {
137
+ "epoch": 0.37,
138
+ "learning_rate": 1.7550306211723536e-05,
139
+ "loss": 0.2268,
140
+ "step": 4200
141
+ },
142
+ {
143
+ "epoch": 0.38,
144
+ "learning_rate": 1.7433654126567515e-05,
145
+ "loss": 0.2617,
146
+ "step": 4400
147
+ },
148
+ {
149
+ "epoch": 0.4,
150
+ "learning_rate": 1.7317002041411494e-05,
151
+ "loss": 0.2425,
152
+ "step": 4600
153
+ },
154
+ {
155
+ "epoch": 0.42,
156
+ "learning_rate": 1.720034995625547e-05,
157
+ "loss": 0.2474,
158
+ "step": 4800
159
+ },
160
+ {
161
+ "epoch": 0.44,
162
+ "learning_rate": 1.7083697871099446e-05,
163
+ "loss": 0.2589,
164
+ "step": 5000
165
+ },
166
+ {
167
+ "epoch": 0.45,
168
+ "learning_rate": 1.6967045785943425e-05,
169
+ "loss": 0.2389,
170
+ "step": 5200
171
+ },
172
+ {
173
+ "epoch": 0.47,
174
+ "learning_rate": 1.68503937007874e-05,
175
+ "loss": 0.2494,
176
+ "step": 5400
177
+ },
178
+ {
179
+ "epoch": 0.49,
180
+ "learning_rate": 1.673374161563138e-05,
181
+ "loss": 0.2558,
182
+ "step": 5600
183
+ },
184
+ {
185
+ "epoch": 0.51,
186
+ "learning_rate": 1.661708953047536e-05,
187
+ "loss": 0.2524,
188
+ "step": 5800
189
+ },
190
+ {
191
+ "epoch": 0.52,
192
+ "learning_rate": 1.650043744531934e-05,
193
+ "loss": 0.2509,
194
+ "step": 6000
195
+ },
196
+ {
197
+ "epoch": 0.54,
198
+ "learning_rate": 1.6383785360163315e-05,
199
+ "loss": 0.2605,
200
+ "step": 6200
201
+ },
202
+ {
203
+ "epoch": 0.56,
204
+ "learning_rate": 1.626713327500729e-05,
205
+ "loss": 0.2354,
206
+ "step": 6400
207
+ },
208
+ {
209
+ "epoch": 0.58,
210
+ "learning_rate": 1.615048118985127e-05,
211
+ "loss": 0.27,
212
+ "step": 6600
213
+ },
214
+ {
215
+ "epoch": 0.59,
216
+ "learning_rate": 1.603382910469525e-05,
217
+ "loss": 0.2453,
218
+ "step": 6800
219
+ },
220
+ {
221
+ "epoch": 0.61,
222
+ "learning_rate": 1.5917177019539225e-05,
223
+ "loss": 0.2589,
224
+ "step": 7000
225
+ },
226
+ {
227
+ "epoch": 0.63,
228
+ "learning_rate": 1.5800524934383204e-05,
229
+ "loss": 0.2513,
230
+ "step": 7200
231
+ },
232
+ {
233
+ "epoch": 0.65,
234
+ "learning_rate": 1.5683872849227184e-05,
235
+ "loss": 0.2583,
236
+ "step": 7400
237
+ },
238
+ {
239
+ "epoch": 0.66,
240
+ "learning_rate": 1.556722076407116e-05,
241
+ "loss": 0.257,
242
+ "step": 7600
243
+ },
244
+ {
245
+ "epoch": 0.68,
246
+ "learning_rate": 1.5450568678915135e-05,
247
+ "loss": 0.2363,
248
+ "step": 7800
249
+ },
250
+ {
251
+ "epoch": 0.7,
252
+ "learning_rate": 1.5333916593759115e-05,
253
+ "loss": 0.2378,
254
+ "step": 8000
255
+ },
256
+ {
257
+ "epoch": 0.72,
258
+ "learning_rate": 1.5217264508603094e-05,
259
+ "loss": 0.2438,
260
+ "step": 8200
261
+ },
262
+ {
263
+ "epoch": 0.73,
264
+ "learning_rate": 1.510061242344707e-05,
265
+ "loss": 0.2489,
266
+ "step": 8400
267
+ },
268
+ {
269
+ "epoch": 0.75,
270
+ "learning_rate": 1.4983960338291049e-05,
271
+ "loss": 0.2618,
272
+ "step": 8600
273
+ },
274
+ {
275
+ "epoch": 0.77,
276
+ "learning_rate": 1.4867308253135026e-05,
277
+ "loss": 0.2264,
278
+ "step": 8800
279
+ },
280
+ {
281
+ "epoch": 0.79,
282
+ "learning_rate": 1.4750656167979002e-05,
283
+ "loss": 0.2595,
284
+ "step": 9000
285
+ },
286
+ {
287
+ "epoch": 0.8,
288
+ "learning_rate": 1.4634004082822982e-05,
289
+ "loss": 0.2246,
290
+ "step": 9200
291
+ },
292
+ {
293
+ "epoch": 0.82,
294
+ "learning_rate": 1.451735199766696e-05,
295
+ "loss": 0.2281,
296
+ "step": 9400
297
+ },
298
+ {
299
+ "epoch": 0.84,
300
+ "learning_rate": 1.4400699912510938e-05,
301
+ "loss": 0.217,
302
+ "step": 9600
303
+ },
304
+ {
305
+ "epoch": 0.86,
306
+ "learning_rate": 1.4284047827354914e-05,
307
+ "loss": 0.2359,
308
+ "step": 9800
309
+ },
310
+ {
311
+ "epoch": 0.87,
312
+ "learning_rate": 1.4167395742198894e-05,
313
+ "loss": 0.2322,
314
+ "step": 10000
315
+ },
316
+ {
317
+ "epoch": 0.89,
318
+ "learning_rate": 1.4050743657042871e-05,
319
+ "loss": 0.2055,
320
+ "step": 10200
321
+ },
322
+ {
323
+ "epoch": 0.91,
324
+ "learning_rate": 1.393409157188685e-05,
325
+ "loss": 0.2374,
326
+ "step": 10400
327
+ },
328
+ {
329
+ "epoch": 0.93,
330
+ "learning_rate": 1.3817439486730826e-05,
331
+ "loss": 0.2386,
332
+ "step": 10600
333
+ },
334
+ {
335
+ "epoch": 0.94,
336
+ "learning_rate": 1.3700787401574804e-05,
337
+ "loss": 0.2323,
338
+ "step": 10800
339
+ },
340
+ {
341
+ "epoch": 0.96,
342
+ "learning_rate": 1.3584135316418783e-05,
343
+ "loss": 0.2362,
344
+ "step": 11000
345
+ },
346
+ {
347
+ "epoch": 0.98,
348
+ "learning_rate": 1.3467483231262759e-05,
349
+ "loss": 0.2211,
350
+ "step": 11200
351
+ },
352
+ {
353
+ "epoch": 1.0,
354
+ "learning_rate": 1.3350831146106738e-05,
355
+ "loss": 0.2142,
356
+ "step": 11400
357
+ },
358
+ {
359
+ "epoch": 1.0,
360
+ "eval_loss": 0.24207346141338348,
361
+ "eval_mae": 0.3126428425312042,
362
+ "eval_mse": 0.24207346141338348,
363
+ "eval_rmse": 0.49200960993766785,
364
+ "eval_runtime": 434.9139,
365
+ "eval_samples_per_second": 113.204,
366
+ "eval_steps_per_second": 14.152,
367
+ "step": 11430
368
+ },
369
+ {
370
+ "epoch": 1.01,
371
+ "learning_rate": 1.3234179060950716e-05,
372
+ "loss": 0.194,
373
+ "step": 11600
374
+ },
375
+ {
376
+ "epoch": 1.03,
377
+ "learning_rate": 1.3117526975794695e-05,
378
+ "loss": 0.1782,
379
+ "step": 11800
380
+ },
381
+ {
382
+ "epoch": 1.05,
383
+ "learning_rate": 1.3000874890638671e-05,
384
+ "loss": 0.1992,
385
+ "step": 12000
386
+ },
387
+ {
388
+ "epoch": 1.07,
389
+ "learning_rate": 1.2884222805482649e-05,
390
+ "loss": 0.1657,
391
+ "step": 12200
392
+ },
393
+ {
394
+ "epoch": 1.08,
395
+ "learning_rate": 1.2767570720326628e-05,
396
+ "loss": 0.2156,
397
+ "step": 12400
398
+ },
399
+ {
400
+ "epoch": 1.1,
401
+ "learning_rate": 1.2650918635170604e-05,
402
+ "loss": 0.1742,
403
+ "step": 12600
404
+ },
405
+ {
406
+ "epoch": 1.12,
407
+ "learning_rate": 1.2534266550014583e-05,
408
+ "loss": 0.156,
409
+ "step": 12800
410
+ },
411
+ {
412
+ "epoch": 1.14,
413
+ "learning_rate": 1.241761446485856e-05,
414
+ "loss": 0.2079,
415
+ "step": 13000
416
+ },
417
+ {
418
+ "epoch": 1.15,
419
+ "learning_rate": 1.230096237970254e-05,
420
+ "loss": 0.1635,
421
+ "step": 13200
422
+ },
423
+ {
424
+ "epoch": 1.17,
425
+ "learning_rate": 1.2184310294546516e-05,
426
+ "loss": 0.1987,
427
+ "step": 13400
428
+ },
429
+ {
430
+ "epoch": 1.19,
431
+ "learning_rate": 1.2067658209390493e-05,
432
+ "loss": 0.1683,
433
+ "step": 13600
434
+ },
435
+ {
436
+ "epoch": 1.21,
437
+ "learning_rate": 1.1951006124234472e-05,
438
+ "loss": 0.1735,
439
+ "step": 13800
440
+ },
441
+ {
442
+ "epoch": 1.22,
443
+ "learning_rate": 1.183435403907845e-05,
444
+ "loss": 0.1971,
445
+ "step": 14000
446
+ },
447
+ {
448
+ "epoch": 1.24,
449
+ "learning_rate": 1.1717701953922428e-05,
450
+ "loss": 0.2014,
451
+ "step": 14200
452
+ },
453
+ {
454
+ "epoch": 1.26,
455
+ "learning_rate": 1.1601049868766405e-05,
456
+ "loss": 0.2114,
457
+ "step": 14400
458
+ },
459
+ {
460
+ "epoch": 1.28,
461
+ "learning_rate": 1.1484397783610384e-05,
462
+ "loss": 0.1707,
463
+ "step": 14600
464
+ },
465
+ {
466
+ "epoch": 1.29,
467
+ "learning_rate": 1.136774569845436e-05,
468
+ "loss": 0.194,
469
+ "step": 14800
470
+ },
471
+ {
472
+ "epoch": 1.31,
473
+ "learning_rate": 1.1251093613298338e-05,
474
+ "loss": 0.1714,
475
+ "step": 15000
476
+ },
477
+ {
478
+ "epoch": 1.33,
479
+ "learning_rate": 1.1134441528142317e-05,
480
+ "loss": 0.1973,
481
+ "step": 15200
482
+ },
483
+ {
484
+ "epoch": 1.35,
485
+ "learning_rate": 1.1017789442986295e-05,
486
+ "loss": 0.1705,
487
+ "step": 15400
488
+ },
489
+ {
490
+ "epoch": 1.36,
491
+ "learning_rate": 1.0901137357830272e-05,
492
+ "loss": 0.1842,
493
+ "step": 15600
494
+ },
495
+ {
496
+ "epoch": 1.38,
497
+ "learning_rate": 1.078448527267425e-05,
498
+ "loss": 0.1578,
499
+ "step": 15800
500
+ },
501
+ {
502
+ "epoch": 1.4,
503
+ "learning_rate": 1.0667833187518229e-05,
504
+ "loss": 0.1598,
505
+ "step": 16000
506
+ },
507
+ {
508
+ "epoch": 1.42,
509
+ "learning_rate": 1.0551181102362205e-05,
510
+ "loss": 0.1895,
511
+ "step": 16200
512
+ },
513
+ {
514
+ "epoch": 1.43,
515
+ "learning_rate": 1.0434529017206183e-05,
516
+ "loss": 0.1846,
517
+ "step": 16400
518
+ },
519
+ {
520
+ "epoch": 1.45,
521
+ "learning_rate": 1.0317876932050162e-05,
522
+ "loss": 0.1875,
523
+ "step": 16600
524
+ },
525
+ {
526
+ "epoch": 1.47,
527
+ "learning_rate": 1.020122484689414e-05,
528
+ "loss": 0.1748,
529
+ "step": 16800
530
+ },
531
+ {
532
+ "epoch": 1.49,
533
+ "learning_rate": 1.0084572761738117e-05,
534
+ "loss": 0.1837,
535
+ "step": 17000
536
+ },
537
+ {
538
+ "epoch": 1.5,
539
+ "learning_rate": 9.967920676582095e-06,
540
+ "loss": 0.1888,
541
+ "step": 17200
542
+ },
543
+ {
544
+ "epoch": 1.52,
545
+ "learning_rate": 9.851268591426074e-06,
546
+ "loss": 0.2038,
547
+ "step": 17400
548
+ },
549
+ {
550
+ "epoch": 1.54,
551
+ "learning_rate": 9.73461650627005e-06,
552
+ "loss": 0.1736,
553
+ "step": 17600
554
+ },
555
+ {
556
+ "epoch": 1.56,
557
+ "learning_rate": 9.617964421114027e-06,
558
+ "loss": 0.1853,
559
+ "step": 17800
560
+ },
561
+ {
562
+ "epoch": 1.57,
563
+ "learning_rate": 9.501312335958006e-06,
564
+ "loss": 0.1762,
565
+ "step": 18000
566
+ },
567
+ {
568
+ "epoch": 1.59,
569
+ "learning_rate": 9.384660250801984e-06,
570
+ "loss": 0.1979,
571
+ "step": 18200
572
+ },
573
+ {
574
+ "epoch": 1.61,
575
+ "learning_rate": 9.268008165645962e-06,
576
+ "loss": 0.1747,
577
+ "step": 18400
578
+ },
579
+ {
580
+ "epoch": 1.63,
581
+ "learning_rate": 9.15135608048994e-06,
582
+ "loss": 0.1793,
583
+ "step": 18600
584
+ },
585
+ {
586
+ "epoch": 1.64,
587
+ "learning_rate": 9.034703995333918e-06,
588
+ "loss": 0.1788,
589
+ "step": 18800
590
+ },
591
+ {
592
+ "epoch": 1.66,
593
+ "learning_rate": 8.918051910177894e-06,
594
+ "loss": 0.1901,
595
+ "step": 19000
596
+ },
597
+ {
598
+ "epoch": 1.68,
599
+ "learning_rate": 8.801399825021874e-06,
600
+ "loss": 0.1755,
601
+ "step": 19200
602
+ },
603
+ {
604
+ "epoch": 1.7,
605
+ "learning_rate": 8.684747739865851e-06,
606
+ "loss": 0.1782,
607
+ "step": 19400
608
+ },
609
+ {
610
+ "epoch": 1.71,
611
+ "learning_rate": 8.568095654709829e-06,
612
+ "loss": 0.1928,
613
+ "step": 19600
614
+ },
615
+ {
616
+ "epoch": 1.73,
617
+ "learning_rate": 8.451443569553806e-06,
618
+ "loss": 0.1821,
619
+ "step": 19800
620
+ },
621
+ {
622
+ "epoch": 1.75,
623
+ "learning_rate": 8.334791484397784e-06,
624
+ "loss": 0.1836,
625
+ "step": 20000
626
+ },
627
+ {
628
+ "epoch": 1.77,
629
+ "learning_rate": 8.218139399241763e-06,
630
+ "loss": 0.1803,
631
+ "step": 20200
632
+ },
633
+ {
634
+ "epoch": 1.78,
635
+ "learning_rate": 8.101487314085739e-06,
636
+ "loss": 0.176,
637
+ "step": 20400
638
+ },
639
+ {
640
+ "epoch": 1.8,
641
+ "learning_rate": 7.984835228929718e-06,
642
+ "loss": 0.2068,
643
+ "step": 20600
644
+ },
645
+ {
646
+ "epoch": 1.82,
647
+ "learning_rate": 7.868183143773696e-06,
648
+ "loss": 0.2005,
649
+ "step": 20800
650
+ },
651
+ {
652
+ "epoch": 1.84,
653
+ "learning_rate": 7.751531058617673e-06,
654
+ "loss": 0.1706,
655
+ "step": 21000
656
+ },
657
+ {
658
+ "epoch": 1.85,
659
+ "learning_rate": 7.634878973461651e-06,
660
+ "loss": 0.1821,
661
+ "step": 21200
662
+ },
663
+ {
664
+ "epoch": 1.87,
665
+ "learning_rate": 7.5182268883056285e-06,
666
+ "loss": 0.2003,
667
+ "step": 21400
668
+ },
669
+ {
670
+ "epoch": 1.89,
671
+ "learning_rate": 7.401574803149607e-06,
672
+ "loss": 0.1839,
673
+ "step": 21600
674
+ },
675
+ {
676
+ "epoch": 1.91,
677
+ "learning_rate": 7.2849227179935845e-06,
678
+ "loss": 0.191,
679
+ "step": 21800
680
+ },
681
+ {
682
+ "epoch": 1.92,
683
+ "learning_rate": 7.168270632837563e-06,
684
+ "loss": 0.2031,
685
+ "step": 22000
686
+ },
687
+ {
688
+ "epoch": 1.94,
689
+ "learning_rate": 7.0516185476815405e-06,
690
+ "loss": 0.2054,
691
+ "step": 22200
692
+ },
693
+ {
694
+ "epoch": 1.96,
695
+ "learning_rate": 6.934966462525519e-06,
696
+ "loss": 0.1687,
697
+ "step": 22400
698
+ },
699
+ {
700
+ "epoch": 1.98,
701
+ "learning_rate": 6.818314377369496e-06,
702
+ "loss": 0.176,
703
+ "step": 22600
704
+ },
705
+ {
706
+ "epoch": 1.99,
707
+ "learning_rate": 6.701662292213474e-06,
708
+ "loss": 0.1931,
709
+ "step": 22800
710
+ },
711
+ {
712
+ "epoch": 2.0,
713
+ "eval_loss": 0.2529958486557007,
714
+ "eval_mae": 0.3336344063282013,
715
+ "eval_mse": 0.2529958486557007,
716
+ "eval_rmse": 0.5029869079589844,
717
+ "eval_runtime": 433.6838,
718
+ "eval_samples_per_second": 113.525,
719
+ "eval_steps_per_second": 14.192,
720
+ "step": 22860
721
+ },
722
+ {
723
+ "epoch": 2.01,
724
+ "learning_rate": 6.585010207057452e-06,
725
+ "loss": 0.1951,
726
+ "step": 23000
727
+ },
728
+ {
729
+ "epoch": 2.03,
730
+ "learning_rate": 6.468358121901429e-06,
731
+ "loss": 0.127,
732
+ "step": 23200
733
+ },
734
+ {
735
+ "epoch": 2.05,
736
+ "learning_rate": 6.351706036745408e-06,
737
+ "loss": 0.1387,
738
+ "step": 23400
739
+ },
740
+ {
741
+ "epoch": 2.06,
742
+ "learning_rate": 6.235053951589385e-06,
743
+ "loss": 0.1436,
744
+ "step": 23600
745
+ },
746
+ {
747
+ "epoch": 2.08,
748
+ "learning_rate": 6.118401866433364e-06,
749
+ "loss": 0.1446,
750
+ "step": 23800
751
+ },
752
+ {
753
+ "epoch": 2.1,
754
+ "learning_rate": 6.00174978127734e-06,
755
+ "loss": 0.1315,
756
+ "step": 24000
757
+ },
758
+ {
759
+ "epoch": 2.12,
760
+ "learning_rate": 5.885097696121319e-06,
761
+ "loss": 0.1489,
762
+ "step": 24200
763
+ },
764
+ {
765
+ "epoch": 2.13,
766
+ "learning_rate": 5.768445610965296e-06,
767
+ "loss": 0.1527,
768
+ "step": 24400
769
+ },
770
+ {
771
+ "epoch": 2.15,
772
+ "learning_rate": 5.651793525809274e-06,
773
+ "loss": 0.1516,
774
+ "step": 24600
775
+ },
776
+ {
777
+ "epoch": 2.17,
778
+ "learning_rate": 5.535141440653252e-06,
779
+ "loss": 0.1203,
780
+ "step": 24800
781
+ },
782
+ {
783
+ "epoch": 2.19,
784
+ "learning_rate": 5.41848935549723e-06,
785
+ "loss": 0.1204,
786
+ "step": 25000
787
+ },
788
+ {
789
+ "epoch": 2.2,
790
+ "learning_rate": 5.301837270341208e-06,
791
+ "loss": 0.15,
792
+ "step": 25200
793
+ },
794
+ {
795
+ "epoch": 2.22,
796
+ "learning_rate": 5.185185185185185e-06,
797
+ "loss": 0.1453,
798
+ "step": 25400
799
+ },
800
+ {
801
+ "epoch": 2.24,
802
+ "learning_rate": 5.068533100029163e-06,
803
+ "loss": 0.1406,
804
+ "step": 25600
805
+ },
806
+ {
807
+ "epoch": 2.26,
808
+ "learning_rate": 4.951881014873141e-06,
809
+ "loss": 0.1263,
810
+ "step": 25800
811
+ },
812
+ {
813
+ "epoch": 2.27,
814
+ "learning_rate": 4.835228929717119e-06,
815
+ "loss": 0.1187,
816
+ "step": 26000
817
+ },
818
+ {
819
+ "epoch": 2.29,
820
+ "learning_rate": 4.718576844561097e-06,
821
+ "loss": 0.1363,
822
+ "step": 26200
823
+ },
824
+ {
825
+ "epoch": 2.31,
826
+ "learning_rate": 4.6019247594050745e-06,
827
+ "loss": 0.1284,
828
+ "step": 26400
829
+ },
830
+ {
831
+ "epoch": 2.33,
832
+ "learning_rate": 4.485272674249053e-06,
833
+ "loss": 0.1371,
834
+ "step": 26600
835
+ },
836
+ {
837
+ "epoch": 2.34,
838
+ "learning_rate": 4.3686205890930305e-06,
839
+ "loss": 0.128,
840
+ "step": 26800
841
+ },
842
+ {
843
+ "epoch": 2.36,
844
+ "learning_rate": 4.251968503937008e-06,
845
+ "loss": 0.1246,
846
+ "step": 27000
847
+ },
848
+ {
849
+ "epoch": 2.38,
850
+ "learning_rate": 4.135316418780986e-06,
851
+ "loss": 0.1283,
852
+ "step": 27200
853
+ },
854
+ {
855
+ "epoch": 2.4,
856
+ "learning_rate": 4.018664333624964e-06,
857
+ "loss": 0.1382,
858
+ "step": 27400
859
+ },
860
+ {
861
+ "epoch": 2.41,
862
+ "learning_rate": 3.902012248468942e-06,
863
+ "loss": 0.1324,
864
+ "step": 27600
865
+ },
866
+ {
867
+ "epoch": 2.43,
868
+ "learning_rate": 3.7853601633129196e-06,
869
+ "loss": 0.1305,
870
+ "step": 27800
871
+ },
872
+ {
873
+ "epoch": 2.45,
874
+ "learning_rate": 3.668708078156897e-06,
875
+ "loss": 0.1319,
876
+ "step": 28000
877
+ },
878
+ {
879
+ "epoch": 2.47,
880
+ "learning_rate": 3.552055993000875e-06,
881
+ "loss": 0.1322,
882
+ "step": 28200
883
+ },
884
+ {
885
+ "epoch": 2.48,
886
+ "learning_rate": 3.435403907844853e-06,
887
+ "loss": 0.1424,
888
+ "step": 28400
889
+ },
890
+ {
891
+ "epoch": 2.5,
892
+ "learning_rate": 3.318751822688831e-06,
893
+ "loss": 0.1473,
894
+ "step": 28600
895
+ },
896
+ {
897
+ "epoch": 2.52,
898
+ "learning_rate": 3.2020997375328087e-06,
899
+ "loss": 0.1361,
900
+ "step": 28800
901
+ },
902
+ {
903
+ "epoch": 2.54,
904
+ "learning_rate": 3.0854476523767863e-06,
905
+ "loss": 0.128,
906
+ "step": 29000
907
+ },
908
+ {
909
+ "epoch": 2.55,
910
+ "learning_rate": 2.9687955672207643e-06,
911
+ "loss": 0.1212,
912
+ "step": 29200
913
+ },
914
+ {
915
+ "epoch": 2.57,
916
+ "learning_rate": 2.852143482064742e-06,
917
+ "loss": 0.13,
918
+ "step": 29400
919
+ },
920
+ {
921
+ "epoch": 2.59,
922
+ "learning_rate": 2.73549139690872e-06,
923
+ "loss": 0.1401,
924
+ "step": 29600
925
+ },
926
+ {
927
+ "epoch": 2.61,
928
+ "learning_rate": 2.618839311752698e-06,
929
+ "loss": 0.1412,
930
+ "step": 29800
931
+ },
932
+ {
933
+ "epoch": 2.62,
934
+ "learning_rate": 2.502187226596676e-06,
935
+ "loss": 0.1262,
936
+ "step": 30000
937
+ },
938
+ {
939
+ "epoch": 2.64,
940
+ "learning_rate": 2.3855351414406534e-06,
941
+ "loss": 0.1412,
942
+ "step": 30200
943
+ },
944
+ {
945
+ "epoch": 2.66,
946
+ "learning_rate": 2.2688830562846314e-06,
947
+ "loss": 0.1441,
948
+ "step": 30400
949
+ },
950
+ {
951
+ "epoch": 2.68,
952
+ "learning_rate": 2.152230971128609e-06,
953
+ "loss": 0.1443,
954
+ "step": 30600
955
+ },
956
+ {
957
+ "epoch": 2.69,
958
+ "learning_rate": 2.035578885972587e-06,
959
+ "loss": 0.1541,
960
+ "step": 30800
961
+ },
962
+ {
963
+ "epoch": 2.71,
964
+ "learning_rate": 1.918926800816565e-06,
965
+ "loss": 0.1187,
966
+ "step": 31000
967
+ },
968
+ {
969
+ "epoch": 2.73,
970
+ "learning_rate": 1.8022747156605425e-06,
971
+ "loss": 0.1254,
972
+ "step": 31200
973
+ },
974
+ {
975
+ "epoch": 2.75,
976
+ "learning_rate": 1.6856226305045203e-06,
977
+ "loss": 0.1403,
978
+ "step": 31400
979
+ },
980
+ {
981
+ "epoch": 2.76,
982
+ "learning_rate": 1.5689705453484983e-06,
983
+ "loss": 0.1272,
984
+ "step": 31600
985
+ },
986
+ {
987
+ "epoch": 2.78,
988
+ "learning_rate": 1.452318460192476e-06,
989
+ "loss": 0.1223,
990
+ "step": 31800
991
+ },
992
+ {
993
+ "epoch": 2.8,
994
+ "learning_rate": 1.335666375036454e-06,
995
+ "loss": 0.1448,
996
+ "step": 32000
997
+ },
998
+ {
999
+ "epoch": 2.82,
1000
+ "learning_rate": 1.2190142898804318e-06,
1001
+ "loss": 0.1376,
1002
+ "step": 32200
1003
+ },
1004
+ {
1005
+ "epoch": 2.83,
1006
+ "learning_rate": 1.1023622047244096e-06,
1007
+ "loss": 0.1246,
1008
+ "step": 32400
1009
+ },
1010
+ {
1011
+ "epoch": 2.85,
1012
+ "learning_rate": 9.857101195683874e-07,
1013
+ "loss": 0.1548,
1014
+ "step": 32600
1015
+ },
1016
+ {
1017
+ "epoch": 2.87,
1018
+ "learning_rate": 8.690580344123652e-07,
1019
+ "loss": 0.1408,
1020
+ "step": 32800
1021
+ },
1022
+ {
1023
+ "epoch": 2.89,
1024
+ "learning_rate": 7.524059492563431e-07,
1025
+ "loss": 0.1282,
1026
+ "step": 33000
1027
+ },
1028
+ {
1029
+ "epoch": 2.9,
1030
+ "learning_rate": 6.357538641003208e-07,
1031
+ "loss": 0.1552,
1032
+ "step": 33200
1033
+ },
1034
+ {
1035
+ "epoch": 2.92,
1036
+ "learning_rate": 5.191017789442987e-07,
1037
+ "loss": 0.1272,
1038
+ "step": 33400
1039
+ },
1040
+ {
1041
+ "epoch": 2.94,
1042
+ "learning_rate": 4.024496937882765e-07,
1043
+ "loss": 0.1166,
1044
+ "step": 33600
1045
+ },
1046
+ {
1047
+ "epoch": 2.96,
1048
+ "learning_rate": 2.8579760863225433e-07,
1049
+ "loss": 0.1245,
1050
+ "step": 33800
1051
+ },
1052
+ {
1053
+ "epoch": 2.97,
1054
+ "learning_rate": 1.6914552347623217e-07,
1055
+ "loss": 0.1233,
1056
+ "step": 34000
1057
+ },
1058
+ {
1059
+ "epoch": 2.99,
1060
+ "learning_rate": 5.249343832020998e-08,
1061
+ "loss": 0.1192,
1062
+ "step": 34200
1063
+ },
1064
+ {
1065
+ "epoch": 3.0,
1066
+ "eval_loss": 0.23235057294368744,
1067
+ "eval_mae": 0.3088998794555664,
1068
+ "eval_mse": 0.23235058784484863,
1069
+ "eval_rmse": 0.4820275902748108,
1070
+ "eval_runtime": 434.9142,
1071
+ "eval_samples_per_second": 113.204,
1072
+ "eval_steps_per_second": 14.152,
1073
+ "step": 34290
1074
+ },
1075
+ {
1076
+ "epoch": 3.0,
1077
+ "step": 34290,
1078
+ "total_flos": 1.1316755048115336e+16,
1079
+ "train_loss": 0.19905866315323814,
1080
+ "train_runtime": 6850.4631,
1081
+ "train_samples_per_second": 40.041,
1082
+ "train_steps_per_second": 5.006
1083
+ }
1084
+ ],
1085
+ "max_steps": 34290,
1086
+ "num_train_epochs": 3,
1087
+ "total_flos": 1.1316755048115336e+16,
1088
+ "trial_name": null,
1089
+ "trial_params": null
1090
+ }