pszemraj commited on
Commit
56f8449
1 Parent(s): 3a5a162

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "eval_loss": 0.46361204981803894,
4
+ "eval_matthews_correlation": 0.5057060886900621,
5
+ "eval_runtime": 1.1268,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 925.6,
8
+ "eval_steps_per_second": 58.571,
9
+ "train_loss": 0.18462801172133925,
10
+ "train_runtime": 361.7714,
11
+ "train_samples": 8551,
12
+ "train_samples_per_second": 378.184,
13
+ "train_steps_per_second": 2.963
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "eval_loss": 0.46361204981803894,
4
+ "eval_matthews_correlation": 0.5057060886900621,
5
+ "eval_runtime": 1.1268,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 925.6,
8
+ "eval_steps_per_second": 58.571
9
+ }
predict_results_cola.txt ADDED
@@ -0,0 +1,1064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index prediction
2
+ 0 acceptable
3
+ 1 acceptable
4
+ 2 acceptable
5
+ 3 acceptable
6
+ 4 unacceptable
7
+ 5 acceptable
8
+ 6 acceptable
9
+ 7 unacceptable
10
+ 8 unacceptable
11
+ 9 acceptable
12
+ 10 acceptable
13
+ 11 unacceptable
14
+ 12 unacceptable
15
+ 13 acceptable
16
+ 14 acceptable
17
+ 15 acceptable
18
+ 16 unacceptable
19
+ 17 acceptable
20
+ 18 acceptable
21
+ 19 unacceptable
22
+ 20 acceptable
23
+ 21 acceptable
24
+ 22 acceptable
25
+ 23 acceptable
26
+ 24 acceptable
27
+ 25 acceptable
28
+ 26 acceptable
29
+ 27 unacceptable
30
+ 28 unacceptable
31
+ 29 acceptable
32
+ 30 unacceptable
33
+ 31 unacceptable
34
+ 32 acceptable
35
+ 33 acceptable
36
+ 34 unacceptable
37
+ 35 acceptable
38
+ 36 acceptable
39
+ 37 unacceptable
40
+ 38 unacceptable
41
+ 39 unacceptable
42
+ 40 unacceptable
43
+ 41 unacceptable
44
+ 42 unacceptable
45
+ 43 acceptable
46
+ 44 unacceptable
47
+ 45 acceptable
48
+ 46 unacceptable
49
+ 47 unacceptable
50
+ 48 acceptable
51
+ 49 acceptable
52
+ 50 unacceptable
53
+ 51 unacceptable
54
+ 52 acceptable
55
+ 53 acceptable
56
+ 54 acceptable
57
+ 55 acceptable
58
+ 56 acceptable
59
+ 57 unacceptable
60
+ 58 acceptable
61
+ 59 acceptable
62
+ 60 unacceptable
63
+ 61 acceptable
64
+ 62 acceptable
65
+ 63 acceptable
66
+ 64 acceptable
67
+ 65 acceptable
68
+ 66 acceptable
69
+ 67 acceptable
70
+ 68 acceptable
71
+ 69 acceptable
72
+ 70 acceptable
73
+ 71 unacceptable
74
+ 72 unacceptable
75
+ 73 unacceptable
76
+ 74 acceptable
77
+ 75 acceptable
78
+ 76 acceptable
79
+ 77 acceptable
80
+ 78 acceptable
81
+ 79 acceptable
82
+ 80 acceptable
83
+ 81 unacceptable
84
+ 82 unacceptable
85
+ 83 acceptable
86
+ 84 unacceptable
87
+ 85 acceptable
88
+ 86 acceptable
89
+ 87 unacceptable
90
+ 88 acceptable
91
+ 89 acceptable
92
+ 90 unacceptable
93
+ 91 acceptable
94
+ 92 acceptable
95
+ 93 acceptable
96
+ 94 acceptable
97
+ 95 acceptable
98
+ 96 acceptable
99
+ 97 acceptable
100
+ 98 acceptable
101
+ 99 unacceptable
102
+ 100 unacceptable
103
+ 101 unacceptable
104
+ 102 acceptable
105
+ 103 acceptable
106
+ 104 acceptable
107
+ 105 acceptable
108
+ 106 acceptable
109
+ 107 acceptable
110
+ 108 acceptable
111
+ 109 unacceptable
112
+ 110 acceptable
113
+ 111 acceptable
114
+ 112 acceptable
115
+ 113 acceptable
116
+ 114 acceptable
117
+ 115 unacceptable
118
+ 116 acceptable
119
+ 117 unacceptable
120
+ 118 unacceptable
121
+ 119 acceptable
122
+ 120 unacceptable
123
+ 121 acceptable
124
+ 122 unacceptable
125
+ 123 unacceptable
126
+ 124 acceptable
127
+ 125 acceptable
128
+ 126 unacceptable
129
+ 127 acceptable
130
+ 128 acceptable
131
+ 129 acceptable
132
+ 130 unacceptable
133
+ 131 acceptable
134
+ 132 acceptable
135
+ 133 acceptable
136
+ 134 acceptable
137
+ 135 acceptable
138
+ 136 acceptable
139
+ 137 acceptable
140
+ 138 unacceptable
141
+ 139 unacceptable
142
+ 140 unacceptable
143
+ 141 unacceptable
144
+ 142 acceptable
145
+ 143 acceptable
146
+ 144 acceptable
147
+ 145 acceptable
148
+ 146 acceptable
149
+ 147 unacceptable
150
+ 148 acceptable
151
+ 149 acceptable
152
+ 150 acceptable
153
+ 151 acceptable
154
+ 152 unacceptable
155
+ 153 acceptable
156
+ 154 acceptable
157
+ 155 unacceptable
158
+ 156 unacceptable
159
+ 157 unacceptable
160
+ 158 acceptable
161
+ 159 acceptable
162
+ 160 acceptable
163
+ 161 acceptable
164
+ 162 acceptable
165
+ 163 unacceptable
166
+ 164 acceptable
167
+ 165 unacceptable
168
+ 166 unacceptable
169
+ 167 unacceptable
170
+ 168 acceptable
171
+ 169 acceptable
172
+ 170 acceptable
173
+ 171 acceptable
174
+ 172 acceptable
175
+ 173 unacceptable
176
+ 174 acceptable
177
+ 175 acceptable
178
+ 176 acceptable
179
+ 177 acceptable
180
+ 178 acceptable
181
+ 179 acceptable
182
+ 180 acceptable
183
+ 181 acceptable
184
+ 182 unacceptable
185
+ 183 acceptable
186
+ 184 unacceptable
187
+ 185 acceptable
188
+ 186 acceptable
189
+ 187 acceptable
190
+ 188 unacceptable
191
+ 189 acceptable
192
+ 190 acceptable
193
+ 191 acceptable
194
+ 192 acceptable
195
+ 193 acceptable
196
+ 194 unacceptable
197
+ 195 acceptable
198
+ 196 unacceptable
199
+ 197 unacceptable
200
+ 198 acceptable
201
+ 199 acceptable
202
+ 200 acceptable
203
+ 201 unacceptable
204
+ 202 unacceptable
205
+ 203 acceptable
206
+ 204 acceptable
207
+ 205 acceptable
208
+ 206 acceptable
209
+ 207 acceptable
210
+ 208 acceptable
211
+ 209 acceptable
212
+ 210 acceptable
213
+ 211 acceptable
214
+ 212 acceptable
215
+ 213 unacceptable
216
+ 214 acceptable
217
+ 215 acceptable
218
+ 216 acceptable
219
+ 217 acceptable
220
+ 218 acceptable
221
+ 219 acceptable
222
+ 220 acceptable
223
+ 221 unacceptable
224
+ 222 acceptable
225
+ 223 acceptable
226
+ 224 unacceptable
227
+ 225 acceptable
228
+ 226 unacceptable
229
+ 227 acceptable
230
+ 228 unacceptable
231
+ 229 acceptable
232
+ 230 acceptable
233
+ 231 acceptable
234
+ 232 acceptable
235
+ 233 acceptable
236
+ 234 acceptable
237
+ 235 acceptable
238
+ 236 unacceptable
239
+ 237 acceptable
240
+ 238 acceptable
241
+ 239 unacceptable
242
+ 240 acceptable
243
+ 241 acceptable
244
+ 242 acceptable
245
+ 243 acceptable
246
+ 244 acceptable
247
+ 245 acceptable
248
+ 246 acceptable
249
+ 247 acceptable
250
+ 248 acceptable
251
+ 249 acceptable
252
+ 250 acceptable
253
+ 251 acceptable
254
+ 252 acceptable
255
+ 253 acceptable
256
+ 254 acceptable
257
+ 255 acceptable
258
+ 256 acceptable
259
+ 257 acceptable
260
+ 258 unacceptable
261
+ 259 acceptable
262
+ 260 unacceptable
263
+ 261 acceptable
264
+ 262 acceptable
265
+ 263 acceptable
266
+ 264 acceptable
267
+ 265 acceptable
268
+ 266 acceptable
269
+ 267 acceptable
270
+ 268 acceptable
271
+ 269 acceptable
272
+ 270 acceptable
273
+ 271 acceptable
274
+ 272 unacceptable
275
+ 273 acceptable
276
+ 274 acceptable
277
+ 275 acceptable
278
+ 276 acceptable
279
+ 277 unacceptable
280
+ 278 acceptable
281
+ 279 unacceptable
282
+ 280 acceptable
283
+ 281 acceptable
284
+ 282 acceptable
285
+ 283 acceptable
286
+ 284 acceptable
287
+ 285 unacceptable
288
+ 286 unacceptable
289
+ 287 acceptable
290
+ 288 acceptable
291
+ 289 acceptable
292
+ 290 acceptable
293
+ 291 acceptable
294
+ 292 acceptable
295
+ 293 unacceptable
296
+ 294 unacceptable
297
+ 295 acceptable
298
+ 296 acceptable
299
+ 297 acceptable
300
+ 298 acceptable
301
+ 299 acceptable
302
+ 300 acceptable
303
+ 301 unacceptable
304
+ 302 acceptable
305
+ 303 acceptable
306
+ 304 acceptable
307
+ 305 acceptable
308
+ 306 unacceptable
309
+ 307 acceptable
310
+ 308 acceptable
311
+ 309 acceptable
312
+ 310 acceptable
313
+ 311 acceptable
314
+ 312 acceptable
315
+ 313 acceptable
316
+ 314 unacceptable
317
+ 315 unacceptable
318
+ 316 acceptable
319
+ 317 unacceptable
320
+ 318 acceptable
321
+ 319 unacceptable
322
+ 320 unacceptable
323
+ 321 acceptable
324
+ 322 unacceptable
325
+ 323 acceptable
326
+ 324 acceptable
327
+ 325 acceptable
328
+ 326 acceptable
329
+ 327 acceptable
330
+ 328 acceptable
331
+ 329 acceptable
332
+ 330 acceptable
333
+ 331 acceptable
334
+ 332 acceptable
335
+ 333 acceptable
336
+ 334 acceptable
337
+ 335 acceptable
338
+ 336 acceptable
339
+ 337 acceptable
340
+ 338 acceptable
341
+ 339 acceptable
342
+ 340 acceptable
343
+ 341 acceptable
344
+ 342 unacceptable
345
+ 343 unacceptable
346
+ 344 unacceptable
347
+ 345 acceptable
348
+ 346 acceptable
349
+ 347 acceptable
350
+ 348 unacceptable
351
+ 349 acceptable
352
+ 350 acceptable
353
+ 351 acceptable
354
+ 352 acceptable
355
+ 353 acceptable
356
+ 354 acceptable
357
+ 355 acceptable
358
+ 356 unacceptable
359
+ 357 acceptable
360
+ 358 acceptable
361
+ 359 acceptable
362
+ 360 acceptable
363
+ 361 unacceptable
364
+ 362 acceptable
365
+ 363 acceptable
366
+ 364 acceptable
367
+ 365 unacceptable
368
+ 366 acceptable
369
+ 367 acceptable
370
+ 368 unacceptable
371
+ 369 acceptable
372
+ 370 acceptable
373
+ 371 acceptable
374
+ 372 acceptable
375
+ 373 acceptable
376
+ 374 acceptable
377
+ 375 acceptable
378
+ 376 acceptable
379
+ 377 acceptable
380
+ 378 acceptable
381
+ 379 acceptable
382
+ 380 acceptable
383
+ 381 acceptable
384
+ 382 acceptable
385
+ 383 acceptable
386
+ 384 acceptable
387
+ 385 acceptable
388
+ 386 acceptable
389
+ 387 acceptable
390
+ 388 acceptable
391
+ 389 unacceptable
392
+ 390 acceptable
393
+ 391 unacceptable
394
+ 392 unacceptable
395
+ 393 acceptable
396
+ 394 acceptable
397
+ 395 acceptable
398
+ 396 acceptable
399
+ 397 acceptable
400
+ 398 acceptable
401
+ 399 acceptable
402
+ 400 acceptable
403
+ 401 acceptable
404
+ 402 unacceptable
405
+ 403 acceptable
406
+ 404 acceptable
407
+ 405 acceptable
408
+ 406 acceptable
409
+ 407 acceptable
410
+ 408 acceptable
411
+ 409 acceptable
412
+ 410 acceptable
413
+ 411 acceptable
414
+ 412 acceptable
415
+ 413 acceptable
416
+ 414 acceptable
417
+ 415 acceptable
418
+ 416 acceptable
419
+ 417 unacceptable
420
+ 418 unacceptable
421
+ 419 unacceptable
422
+ 420 acceptable
423
+ 421 acceptable
424
+ 422 acceptable
425
+ 423 unacceptable
426
+ 424 acceptable
427
+ 425 acceptable
428
+ 426 acceptable
429
+ 427 acceptable
430
+ 428 acceptable
431
+ 429 unacceptable
432
+ 430 acceptable
433
+ 431 unacceptable
434
+ 432 acceptable
435
+ 433 acceptable
436
+ 434 acceptable
437
+ 435 unacceptable
438
+ 436 acceptable
439
+ 437 acceptable
440
+ 438 acceptable
441
+ 439 acceptable
442
+ 440 acceptable
443
+ 441 acceptable
444
+ 442 acceptable
445
+ 443 acceptable
446
+ 444 unacceptable
447
+ 445 unacceptable
448
+ 446 unacceptable
449
+ 447 acceptable
450
+ 448 unacceptable
451
+ 449 acceptable
452
+ 450 acceptable
453
+ 451 acceptable
454
+ 452 acceptable
455
+ 453 acceptable
456
+ 454 acceptable
457
+ 455 unacceptable
458
+ 456 acceptable
459
+ 457 acceptable
460
+ 458 acceptable
461
+ 459 unacceptable
462
+ 460 acceptable
463
+ 461 acceptable
464
+ 462 acceptable
465
+ 463 acceptable
466
+ 464 acceptable
467
+ 465 acceptable
468
+ 466 acceptable
469
+ 467 acceptable
470
+ 468 acceptable
471
+ 469 acceptable
472
+ 470 acceptable
473
+ 471 acceptable
474
+ 472 acceptable
475
+ 473 unacceptable
476
+ 474 acceptable
477
+ 475 unacceptable
478
+ 476 unacceptable
479
+ 477 acceptable
480
+ 478 acceptable
481
+ 479 unacceptable
482
+ 480 acceptable
483
+ 481 unacceptable
484
+ 482 acceptable
485
+ 483 unacceptable
486
+ 484 acceptable
487
+ 485 acceptable
488
+ 486 acceptable
489
+ 487 acceptable
490
+ 488 unacceptable
491
+ 489 unacceptable
492
+ 490 acceptable
493
+ 491 unacceptable
494
+ 492 acceptable
495
+ 493 acceptable
496
+ 494 acceptable
497
+ 495 unacceptable
498
+ 496 unacceptable
499
+ 497 acceptable
500
+ 498 acceptable
501
+ 499 acceptable
502
+ 500 acceptable
503
+ 501 acceptable
504
+ 502 acceptable
505
+ 503 acceptable
506
+ 504 unacceptable
507
+ 505 acceptable
508
+ 506 acceptable
509
+ 507 acceptable
510
+ 508 unacceptable
511
+ 509 acceptable
512
+ 510 acceptable
513
+ 511 unacceptable
514
+ 512 acceptable
515
+ 513 acceptable
516
+ 514 acceptable
517
+ 515 acceptable
518
+ 516 acceptable
519
+ 517 acceptable
520
+ 518 acceptable
521
+ 519 acceptable
522
+ 520 acceptable
523
+ 521 unacceptable
524
+ 522 acceptable
525
+ 523 acceptable
526
+ 524 unacceptable
527
+ 525 unacceptable
528
+ 526 acceptable
529
+ 527 acceptable
530
+ 528 acceptable
531
+ 529 acceptable
532
+ 530 acceptable
533
+ 531 unacceptable
534
+ 532 acceptable
535
+ 533 acceptable
536
+ 534 acceptable
537
+ 535 acceptable
538
+ 536 acceptable
539
+ 537 acceptable
540
+ 538 acceptable
541
+ 539 acceptable
542
+ 540 acceptable
543
+ 541 acceptable
544
+ 542 acceptable
545
+ 543 acceptable
546
+ 544 unacceptable
547
+ 545 acceptable
548
+ 546 acceptable
549
+ 547 acceptable
550
+ 548 acceptable
551
+ 549 acceptable
552
+ 550 acceptable
553
+ 551 acceptable
554
+ 552 acceptable
555
+ 553 acceptable
556
+ 554 acceptable
557
+ 555 acceptable
558
+ 556 acceptable
559
+ 557 acceptable
560
+ 558 acceptable
561
+ 559 acceptable
562
+ 560 acceptable
563
+ 561 acceptable
564
+ 562 acceptable
565
+ 563 acceptable
566
+ 564 unacceptable
567
+ 565 acceptable
568
+ 566 acceptable
569
+ 567 acceptable
570
+ 568 acceptable
571
+ 569 acceptable
572
+ 570 unacceptable
573
+ 571 unacceptable
574
+ 572 acceptable
575
+ 573 acceptable
576
+ 574 acceptable
577
+ 575 unacceptable
578
+ 576 acceptable
579
+ 577 acceptable
580
+ 578 acceptable
581
+ 579 acceptable
582
+ 580 unacceptable
583
+ 581 unacceptable
584
+ 582 unacceptable
585
+ 583 acceptable
586
+ 584 acceptable
587
+ 585 acceptable
588
+ 586 unacceptable
589
+ 587 acceptable
590
+ 588 acceptable
591
+ 589 acceptable
592
+ 590 acceptable
593
+ 591 acceptable
594
+ 592 unacceptable
595
+ 593 acceptable
596
+ 594 acceptable
597
+ 595 unacceptable
598
+ 596 acceptable
599
+ 597 acceptable
600
+ 598 unacceptable
601
+ 599 acceptable
602
+ 600 acceptable
603
+ 601 unacceptable
604
+ 602 acceptable
605
+ 603 acceptable
606
+ 604 unacceptable
607
+ 605 acceptable
608
+ 606 acceptable
609
+ 607 acceptable
610
+ 608 acceptable
611
+ 609 acceptable
612
+ 610 unacceptable
613
+ 611 unacceptable
614
+ 612 acceptable
615
+ 613 acceptable
616
+ 614 acceptable
617
+ 615 acceptable
618
+ 616 acceptable
619
+ 617 acceptable
620
+ 618 acceptable
621
+ 619 acceptable
622
+ 620 acceptable
623
+ 621 acceptable
624
+ 622 acceptable
625
+ 623 acceptable
626
+ 624 acceptable
627
+ 625 acceptable
628
+ 626 acceptable
629
+ 627 acceptable
630
+ 628 acceptable
631
+ 629 acceptable
632
+ 630 unacceptable
633
+ 631 acceptable
634
+ 632 acceptable
635
+ 633 unacceptable
636
+ 634 acceptable
637
+ 635 unacceptable
638
+ 636 unacceptable
639
+ 637 unacceptable
640
+ 638 unacceptable
641
+ 639 acceptable
642
+ 640 unacceptable
643
+ 641 acceptable
644
+ 642 unacceptable
645
+ 643 acceptable
646
+ 644 acceptable
647
+ 645 unacceptable
648
+ 646 unacceptable
649
+ 647 acceptable
650
+ 648 unacceptable
651
+ 649 unacceptable
652
+ 650 acceptable
653
+ 651 acceptable
654
+ 652 acceptable
655
+ 653 acceptable
656
+ 654 acceptable
657
+ 655 unacceptable
658
+ 656 unacceptable
659
+ 657 unacceptable
660
+ 658 acceptable
661
+ 659 unacceptable
662
+ 660 acceptable
663
+ 661 acceptable
664
+ 662 acceptable
665
+ 663 acceptable
666
+ 664 acceptable
667
+ 665 acceptable
668
+ 666 acceptable
669
+ 667 acceptable
670
+ 668 unacceptable
671
+ 669 acceptable
672
+ 670 acceptable
673
+ 671 acceptable
674
+ 672 acceptable
675
+ 673 acceptable
676
+ 674 unacceptable
677
+ 675 acceptable
678
+ 676 acceptable
679
+ 677 unacceptable
680
+ 678 acceptable
681
+ 679 unacceptable
682
+ 680 acceptable
683
+ 681 acceptable
684
+ 682 acceptable
685
+ 683 acceptable
686
+ 684 acceptable
687
+ 685 acceptable
688
+ 686 acceptable
689
+ 687 acceptable
690
+ 688 unacceptable
691
+ 689 unacceptable
692
+ 690 unacceptable
693
+ 691 acceptable
694
+ 692 unacceptable
695
+ 693 acceptable
696
+ 694 unacceptable
697
+ 695 unacceptable
698
+ 696 acceptable
699
+ 697 acceptable
700
+ 698 acceptable
701
+ 699 acceptable
702
+ 700 acceptable
703
+ 701 unacceptable
704
+ 702 unacceptable
705
+ 703 unacceptable
706
+ 704 unacceptable
707
+ 705 unacceptable
708
+ 706 unacceptable
709
+ 707 acceptable
710
+ 708 acceptable
711
+ 709 acceptable
712
+ 710 unacceptable
713
+ 711 acceptable
714
+ 712 acceptable
715
+ 713 acceptable
716
+ 714 acceptable
717
+ 715 unacceptable
718
+ 716 unacceptable
719
+ 717 unacceptable
720
+ 718 unacceptable
721
+ 719 unacceptable
722
+ 720 acceptable
723
+ 721 unacceptable
724
+ 722 unacceptable
725
+ 723 unacceptable
726
+ 724 unacceptable
727
+ 725 unacceptable
728
+ 726 acceptable
729
+ 727 unacceptable
730
+ 728 acceptable
731
+ 729 acceptable
732
+ 730 acceptable
733
+ 731 acceptable
734
+ 732 acceptable
735
+ 733 acceptable
736
+ 734 unacceptable
737
+ 735 unacceptable
738
+ 736 unacceptable
739
+ 737 acceptable
740
+ 738 acceptable
741
+ 739 acceptable
742
+ 740 acceptable
743
+ 741 unacceptable
744
+ 742 acceptable
745
+ 743 acceptable
746
+ 744 unacceptable
747
+ 745 acceptable
748
+ 746 acceptable
749
+ 747 unacceptable
750
+ 748 acceptable
751
+ 749 acceptable
752
+ 750 acceptable
753
+ 751 acceptable
754
+ 752 acceptable
755
+ 753 acceptable
756
+ 754 acceptable
757
+ 755 unacceptable
758
+ 756 unacceptable
759
+ 757 acceptable
760
+ 758 acceptable
761
+ 759 acceptable
762
+ 760 acceptable
763
+ 761 acceptable
764
+ 762 acceptable
765
+ 763 acceptable
766
+ 764 acceptable
767
+ 765 acceptable
768
+ 766 acceptable
769
+ 767 acceptable
770
+ 768 acceptable
771
+ 769 acceptable
772
+ 770 acceptable
773
+ 771 acceptable
774
+ 772 acceptable
775
+ 773 acceptable
776
+ 774 acceptable
777
+ 775 unacceptable
778
+ 776 acceptable
779
+ 777 acceptable
780
+ 778 acceptable
781
+ 779 acceptable
782
+ 780 acceptable
783
+ 781 acceptable
784
+ 782 acceptable
785
+ 783 acceptable
786
+ 784 acceptable
787
+ 785 acceptable
788
+ 786 acceptable
789
+ 787 acceptable
790
+ 788 acceptable
791
+ 789 acceptable
792
+ 790 acceptable
793
+ 791 acceptable
794
+ 792 acceptable
795
+ 793 acceptable
796
+ 794 unacceptable
797
+ 795 acceptable
798
+ 796 unacceptable
799
+ 797 acceptable
800
+ 798 acceptable
801
+ 799 unacceptable
802
+ 800 acceptable
803
+ 801 acceptable
804
+ 802 acceptable
805
+ 803 acceptable
806
+ 804 acceptable
807
+ 805 acceptable
808
+ 806 acceptable
809
+ 807 acceptable
810
+ 808 unacceptable
811
+ 809 acceptable
812
+ 810 acceptable
813
+ 811 acceptable
814
+ 812 acceptable
815
+ 813 acceptable
816
+ 814 acceptable
817
+ 815 unacceptable
818
+ 816 acceptable
819
+ 817 acceptable
820
+ 818 acceptable
821
+ 819 acceptable
822
+ 820 acceptable
823
+ 821 acceptable
824
+ 822 unacceptable
825
+ 823 unacceptable
826
+ 824 acceptable
827
+ 825 unacceptable
828
+ 826 acceptable
829
+ 827 acceptable
830
+ 828 acceptable
831
+ 829 acceptable
832
+ 830 unacceptable
833
+ 831 unacceptable
834
+ 832 unacceptable
835
+ 833 acceptable
836
+ 834 acceptable
837
+ 835 acceptable
838
+ 836 unacceptable
839
+ 837 acceptable
840
+ 838 acceptable
841
+ 839 acceptable
842
+ 840 acceptable
843
+ 841 unacceptable
844
+ 842 acceptable
845
+ 843 acceptable
846
+ 844 unacceptable
847
+ 845 acceptable
848
+ 846 unacceptable
849
+ 847 acceptable
850
+ 848 acceptable
851
+ 849 acceptable
852
+ 850 acceptable
853
+ 851 acceptable
854
+ 852 acceptable
855
+ 853 acceptable
856
+ 854 acceptable
857
+ 855 acceptable
858
+ 856 acceptable
859
+ 857 acceptable
860
+ 858 unacceptable
861
+ 859 acceptable
862
+ 860 acceptable
863
+ 861 acceptable
864
+ 862 acceptable
865
+ 863 acceptable
866
+ 864 acceptable
867
+ 865 acceptable
868
+ 866 acceptable
869
+ 867 acceptable
870
+ 868 acceptable
871
+ 869 acceptable
872
+ 870 unacceptable
873
+ 871 acceptable
874
+ 872 unacceptable
875
+ 873 unacceptable
876
+ 874 acceptable
877
+ 875 acceptable
878
+ 876 acceptable
879
+ 877 acceptable
880
+ 878 acceptable
881
+ 879 acceptable
882
+ 880 acceptable
883
+ 881 acceptable
884
+ 882 acceptable
885
+ 883 acceptable
886
+ 884 acceptable
887
+ 885 acceptable
888
+ 886 unacceptable
889
+ 887 unacceptable
890
+ 888 unacceptable
891
+ 889 acceptable
892
+ 890 unacceptable
893
+ 891 unacceptable
894
+ 892 unacceptable
895
+ 893 acceptable
896
+ 894 acceptable
897
+ 895 acceptable
898
+ 896 acceptable
899
+ 897 unacceptable
900
+ 898 acceptable
901
+ 899 acceptable
902
+ 900 acceptable
903
+ 901 acceptable
904
+ 902 acceptable
905
+ 903 acceptable
906
+ 904 unacceptable
907
+ 905 acceptable
908
+ 906 acceptable
909
+ 907 acceptable
910
+ 908 unacceptable
911
+ 909 acceptable
912
+ 910 acceptable
913
+ 911 acceptable
914
+ 912 acceptable
915
+ 913 acceptable
916
+ 914 acceptable
917
+ 915 acceptable
918
+ 916 unacceptable
919
+ 917 acceptable
920
+ 918 acceptable
921
+ 919 acceptable
922
+ 920 acceptable
923
+ 921 acceptable
924
+ 922 acceptable
925
+ 923 acceptable
926
+ 924 acceptable
927
+ 925 acceptable
928
+ 926 acceptable
929
+ 927 acceptable
930
+ 928 acceptable
931
+ 929 unacceptable
932
+ 930 unacceptable
933
+ 931 unacceptable
934
+ 932 acceptable
935
+ 933 unacceptable
936
+ 934 acceptable
937
+ 935 acceptable
938
+ 936 acceptable
939
+ 937 unacceptable
940
+ 938 acceptable
941
+ 939 unacceptable
942
+ 940 acceptable
943
+ 941 acceptable
944
+ 942 acceptable
945
+ 943 acceptable
946
+ 944 acceptable
947
+ 945 acceptable
948
+ 946 acceptable
949
+ 947 acceptable
950
+ 948 unacceptable
951
+ 949 unacceptable
952
+ 950 acceptable
953
+ 951 acceptable
954
+ 952 unacceptable
955
+ 953 acceptable
956
+ 954 acceptable
957
+ 955 unacceptable
958
+ 956 acceptable
959
+ 957 acceptable
960
+ 958 acceptable
961
+ 959 acceptable
962
+ 960 acceptable
963
+ 961 acceptable
964
+ 962 acceptable
965
+ 963 acceptable
966
+ 964 acceptable
967
+ 965 acceptable
968
+ 966 acceptable
969
+ 967 acceptable
970
+ 968 acceptable
971
+ 969 acceptable
972
+ 970 acceptable
973
+ 971 acceptable
974
+ 972 acceptable
975
+ 973 acceptable
976
+ 974 acceptable
977
+ 975 acceptable
978
+ 976 acceptable
979
+ 977 acceptable
980
+ 978 acceptable
981
+ 979 acceptable
982
+ 980 acceptable
983
+ 981 unacceptable
984
+ 982 unacceptable
985
+ 983 acceptable
986
+ 984 acceptable
987
+ 985 unacceptable
988
+ 986 unacceptable
989
+ 987 acceptable
990
+ 988 unacceptable
991
+ 989 acceptable
992
+ 990 acceptable
993
+ 991 unacceptable
994
+ 992 acceptable
995
+ 993 unacceptable
996
+ 994 acceptable
997
+ 995 acceptable
998
+ 996 acceptable
999
+ 997 acceptable
1000
+ 998 acceptable
1001
+ 999 unacceptable
1002
+ 1000 acceptable
1003
+ 1001 acceptable
1004
+ 1002 acceptable
1005
+ 1003 acceptable
1006
+ 1004 unacceptable
1007
+ 1005 unacceptable
1008
+ 1006 acceptable
1009
+ 1007 unacceptable
1010
+ 1008 acceptable
1011
+ 1009 acceptable
1012
+ 1010 acceptable
1013
+ 1011 acceptable
1014
+ 1012 unacceptable
1015
+ 1013 acceptable
1016
+ 1014 acceptable
1017
+ 1015 acceptable
1018
+ 1016 acceptable
1019
+ 1017 acceptable
1020
+ 1018 acceptable
1021
+ 1019 acceptable
1022
+ 1020 acceptable
1023
+ 1021 acceptable
1024
+ 1022 acceptable
1025
+ 1023 unacceptable
1026
+ 1024 acceptable
1027
+ 1025 acceptable
1028
+ 1026 acceptable
1029
+ 1027 unacceptable
1030
+ 1028 acceptable
1031
+ 1029 acceptable
1032
+ 1030 acceptable
1033
+ 1031 acceptable
1034
+ 1032 unacceptable
1035
+ 1033 acceptable
1036
+ 1034 acceptable
1037
+ 1035 acceptable
1038
+ 1036 unacceptable
1039
+ 1037 acceptable
1040
+ 1038 acceptable
1041
+ 1039 acceptable
1042
+ 1040 unacceptable
1043
+ 1041 acceptable
1044
+ 1042 acceptable
1045
+ 1043 acceptable
1046
+ 1044 unacceptable
1047
+ 1045 unacceptable
1048
+ 1046 unacceptable
1049
+ 1047 acceptable
1050
+ 1048 acceptable
1051
+ 1049 unacceptable
1052
+ 1050 unacceptable
1053
+ 1051 unacceptable
1054
+ 1052 unacceptable
1055
+ 1053 unacceptable
1056
+ 1054 unacceptable
1057
+ 1055 unacceptable
1058
+ 1056 acceptable
1059
+ 1057 acceptable
1060
+ 1058 acceptable
1061
+ 1059 acceptable
1062
+ 1060 acceptable
1063
+ 1061 unacceptable
1064
+ 1062 acceptable
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "train_loss": 0.18462801172133925,
4
+ "train_runtime": 361.7714,
5
+ "train_samples": 8551,
6
+ "train_samples_per_second": 378.184,
7
+ "train_steps_per_second": 2.963
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1453 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5057060886900621,
3
+ "best_model_checkpoint": "./runtime-text-classification/paraphrase-MiniLM-L12-v2-CoLA/checkpoint-201",
4
+ "epoch": 16.0,
5
+ "global_step": 1072,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.07,
12
+ "learning_rate": 1.2121212121212122e-05,
13
+ "loss": 0.691,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.15,
18
+ "learning_rate": 2.4242424242424244e-05,
19
+ "loss": 0.6789,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.22,
24
+ "learning_rate": 3.6363636363636364e-05,
25
+ "loss": 0.6487,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.3,
30
+ "learning_rate": 4.848484848484849e-05,
31
+ "loss": 0.6241,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.37,
36
+ "learning_rate": 6.060606060606061e-05,
37
+ "loss": 0.6337,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.45,
42
+ "learning_rate": 7.272727272727273e-05,
43
+ "loss": 0.5772,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.52,
48
+ "learning_rate": 7.99992685960293e-05,
49
+ "loss": 0.6126,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.6,
54
+ "learning_rate": 7.99910406085351e-05,
55
+ "loss": 0.6145,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.67,
60
+ "learning_rate": 7.997367226544184e-05,
61
+ "loss": 0.5957,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.75,
66
+ "learning_rate": 7.994716753645944e-05,
67
+ "loss": 0.6062,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.82,
72
+ "learning_rate": 7.991153247951099e-05,
73
+ "loss": 0.6065,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.9,
78
+ "learning_rate": 7.986677523934811e-05,
79
+ "loss": 0.5945,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.97,
84
+ "learning_rate": 7.981290604568935e-05,
85
+ "loss": 0.5747,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 1.0,
90
+ "eval_loss": 0.539438784122467,
91
+ "eval_matthews_correlation": 0.34550903174440106,
92
+ "eval_runtime": 1.0831,
93
+ "eval_samples_per_second": 962.985,
94
+ "eval_steps_per_second": 60.937,
95
+ "step": 67
96
+ },
97
+ {
98
+ "epoch": 1.04,
99
+ "learning_rate": 7.974993721088216e-05,
100
+ "loss": 0.5494,
101
+ "step": 70
102
+ },
103
+ {
104
+ "epoch": 1.12,
105
+ "learning_rate": 7.96778831270887e-05,
106
+ "loss": 0.5121,
107
+ "step": 75
108
+ },
109
+ {
110
+ "epoch": 1.19,
111
+ "learning_rate": 7.95967602629964e-05,
112
+ "loss": 0.5236,
113
+ "step": 80
114
+ },
115
+ {
116
+ "epoch": 1.27,
117
+ "learning_rate": 7.95065871600539e-05,
118
+ "loss": 0.5188,
119
+ "step": 85
120
+ },
121
+ {
122
+ "epoch": 1.34,
123
+ "learning_rate": 7.940738442823318e-05,
124
+ "loss": 0.5408,
125
+ "step": 90
126
+ },
127
+ {
128
+ "epoch": 1.42,
129
+ "learning_rate": 7.929917474131895e-05,
130
+ "loss": 0.5426,
131
+ "step": 95
132
+ },
133
+ {
134
+ "epoch": 1.49,
135
+ "learning_rate": 7.918198283172634e-05,
136
+ "loss": 0.5215,
137
+ "step": 100
138
+ },
139
+ {
140
+ "epoch": 1.57,
141
+ "learning_rate": 7.905583548484803e-05,
142
+ "loss": 0.5201,
143
+ "step": 105
144
+ },
145
+ {
146
+ "epoch": 1.64,
147
+ "learning_rate": 7.892076153293224e-05,
148
+ "loss": 0.5009,
149
+ "step": 110
150
+ },
151
+ {
152
+ "epoch": 1.72,
153
+ "learning_rate": 7.877679184849275e-05,
154
+ "loss": 0.485,
155
+ "step": 115
156
+ },
157
+ {
158
+ "epoch": 1.79,
159
+ "learning_rate": 7.86239593372527e-05,
160
+ "loss": 0.5062,
161
+ "step": 120
162
+ },
163
+ {
164
+ "epoch": 1.87,
165
+ "learning_rate": 7.846229893062373e-05,
166
+ "loss": 0.5096,
167
+ "step": 125
168
+ },
169
+ {
170
+ "epoch": 1.94,
171
+ "learning_rate": 7.829184757772192e-05,
172
+ "loss": 0.5025,
173
+ "step": 130
174
+ },
175
+ {
176
+ "epoch": 2.0,
177
+ "eval_loss": 0.4999140501022339,
178
+ "eval_matthews_correlation": 0.42696977338948733,
179
+ "eval_runtime": 1.0932,
180
+ "eval_samples_per_second": 954.042,
181
+ "eval_steps_per_second": 60.371,
182
+ "step": 134
183
+ },
184
+ {
185
+ "epoch": 2.01,
186
+ "learning_rate": 7.811264423692282e-05,
187
+ "loss": 0.4893,
188
+ "step": 135
189
+ },
190
+ {
191
+ "epoch": 2.09,
192
+ "learning_rate": 7.792472986695704e-05,
193
+ "loss": 0.3651,
194
+ "step": 140
195
+ },
196
+ {
197
+ "epoch": 2.16,
198
+ "learning_rate": 7.772814741754875e-05,
199
+ "loss": 0.4004,
200
+ "step": 145
201
+ },
202
+ {
203
+ "epoch": 2.24,
204
+ "learning_rate": 7.752294181959911e-05,
205
+ "loss": 0.3846,
206
+ "step": 150
207
+ },
208
+ {
209
+ "epoch": 2.31,
210
+ "learning_rate": 7.730915997491679e-05,
211
+ "loss": 0.3769,
212
+ "step": 155
213
+ },
214
+ {
215
+ "epoch": 2.39,
216
+ "learning_rate": 7.708685074549819e-05,
217
+ "loss": 0.3736,
218
+ "step": 160
219
+ },
220
+ {
221
+ "epoch": 2.46,
222
+ "learning_rate": 7.685606494235943e-05,
223
+ "loss": 0.3752,
224
+ "step": 165
225
+ },
226
+ {
227
+ "epoch": 2.54,
228
+ "learning_rate": 7.661685531392307e-05,
229
+ "loss": 0.4007,
230
+ "step": 170
231
+ },
232
+ {
233
+ "epoch": 2.61,
234
+ "learning_rate": 7.636927653396184e-05,
235
+ "loss": 0.3887,
236
+ "step": 175
237
+ },
238
+ {
239
+ "epoch": 2.69,
240
+ "learning_rate": 7.611338518910249e-05,
241
+ "loss": 0.3895,
242
+ "step": 180
243
+ },
244
+ {
245
+ "epoch": 2.76,
246
+ "learning_rate": 7.584923976589229e-05,
247
+ "loss": 0.3789,
248
+ "step": 185
249
+ },
250
+ {
251
+ "epoch": 2.84,
252
+ "learning_rate": 7.557690063743136e-05,
253
+ "loss": 0.3876,
254
+ "step": 190
255
+ },
256
+ {
257
+ "epoch": 2.91,
258
+ "learning_rate": 7.529643004957372e-05,
259
+ "loss": 0.3893,
260
+ "step": 195
261
+ },
262
+ {
263
+ "epoch": 2.99,
264
+ "learning_rate": 7.500789210670052e-05,
265
+ "loss": 0.3698,
266
+ "step": 200
267
+ },
268
+ {
269
+ "epoch": 3.0,
270
+ "eval_loss": 0.46361204981803894,
271
+ "eval_matthews_correlation": 0.5057060886900621,
272
+ "eval_runtime": 1.0755,
273
+ "eval_samples_per_second": 969.746,
274
+ "eval_steps_per_second": 61.365,
275
+ "step": 201
276
+ },
277
+ {
278
+ "epoch": 3.06,
279
+ "learning_rate": 7.47113527570682e-05,
280
+ "loss": 0.3288,
281
+ "step": 205
282
+ },
283
+ {
284
+ "epoch": 3.13,
285
+ "learning_rate": 7.44068797777354e-05,
286
+ "loss": 0.283,
287
+ "step": 210
288
+ },
289
+ {
290
+ "epoch": 3.21,
291
+ "learning_rate": 7.409454275907184e-05,
292
+ "loss": 0.2778,
293
+ "step": 215
294
+ },
295
+ {
296
+ "epoch": 3.28,
297
+ "learning_rate": 7.377441308885273e-05,
298
+ "loss": 0.2891,
299
+ "step": 220
300
+ },
301
+ {
302
+ "epoch": 3.36,
303
+ "learning_rate": 7.344656393594239e-05,
304
+ "loss": 0.2637,
305
+ "step": 225
306
+ },
307
+ {
308
+ "epoch": 3.43,
309
+ "learning_rate": 7.311107023357071e-05,
310
+ "loss": 0.2877,
311
+ "step": 230
312
+ },
313
+ {
314
+ "epoch": 3.51,
315
+ "learning_rate": 7.27680086622065e-05,
316
+ "loss": 0.2744,
317
+ "step": 235
318
+ },
319
+ {
320
+ "epoch": 3.58,
321
+ "learning_rate": 7.241745763203128e-05,
322
+ "loss": 0.2728,
323
+ "step": 240
324
+ },
325
+ {
326
+ "epoch": 3.66,
327
+ "learning_rate": 7.205949726501796e-05,
328
+ "loss": 0.2808,
329
+ "step": 245
330
+ },
331
+ {
332
+ "epoch": 3.73,
333
+ "learning_rate": 7.169420937661808e-05,
334
+ "loss": 0.3497,
335
+ "step": 250
336
+ },
337
+ {
338
+ "epoch": 3.81,
339
+ "learning_rate": 7.132167745706208e-05,
340
+ "loss": 0.3107,
341
+ "step": 255
342
+ },
343
+ {
344
+ "epoch": 3.88,
345
+ "learning_rate": 7.094198665227675e-05,
346
+ "loss": 0.2813,
347
+ "step": 260
348
+ },
349
+ {
350
+ "epoch": 3.96,
351
+ "learning_rate": 7.055522374442433e-05,
352
+ "loss": 0.2969,
353
+ "step": 265
354
+ },
355
+ {
356
+ "epoch": 4.0,
357
+ "eval_loss": 0.5308544039726257,
358
+ "eval_matthews_correlation": 0.4751366513569292,
359
+ "eval_runtime": 1.4776,
360
+ "eval_samples_per_second": 705.87,
361
+ "eval_steps_per_second": 44.667,
362
+ "step": 268
363
+ },
364
+ {
365
+ "epoch": 4.03,
366
+ "learning_rate": 7.01614771320675e-05,
367
+ "loss": 0.3118,
368
+ "step": 270
369
+ },
370
+ {
371
+ "epoch": 4.1,
372
+ "learning_rate": 6.976083680996507e-05,
373
+ "loss": 0.2078,
374
+ "step": 275
375
+ },
376
+ {
377
+ "epoch": 4.18,
378
+ "learning_rate": 6.935339434850266e-05,
379
+ "loss": 0.1871,
380
+ "step": 280
381
+ },
382
+ {
383
+ "epoch": 4.25,
384
+ "learning_rate": 6.89392428727635e-05,
385
+ "loss": 0.2217,
386
+ "step": 285
387
+ },
388
+ {
389
+ "epoch": 4.33,
390
+ "learning_rate": 6.851847704124361e-05,
391
+ "loss": 0.2802,
392
+ "step": 290
393
+ },
394
+ {
395
+ "epoch": 4.4,
396
+ "learning_rate": 6.809119302421682e-05,
397
+ "loss": 0.2718,
398
+ "step": 295
399
+ },
400
+ {
401
+ "epoch": 4.48,
402
+ "learning_rate": 6.765748848175391e-05,
403
+ "loss": 0.2333,
404
+ "step": 300
405
+ },
406
+ {
407
+ "epoch": 4.55,
408
+ "learning_rate": 6.721746254140152e-05,
409
+ "loss": 0.2585,
410
+ "step": 305
411
+ },
412
+ {
413
+ "epoch": 4.63,
414
+ "learning_rate": 6.677121577552554e-05,
415
+ "loss": 0.2538,
416
+ "step": 310
417
+ },
418
+ {
419
+ "epoch": 4.7,
420
+ "learning_rate": 6.631885017832421e-05,
421
+ "loss": 0.2754,
422
+ "step": 315
423
+ },
424
+ {
425
+ "epoch": 4.78,
426
+ "learning_rate": 6.586046914251644e-05,
427
+ "loss": 0.2585,
428
+ "step": 320
429
+ },
430
+ {
431
+ "epoch": 4.85,
432
+ "learning_rate": 6.539617743571023e-05,
433
+ "loss": 0.2141,
434
+ "step": 325
435
+ },
436
+ {
437
+ "epoch": 4.93,
438
+ "learning_rate": 6.492608117645711e-05,
439
+ "loss": 0.2563,
440
+ "step": 330
441
+ },
442
+ {
443
+ "epoch": 5.0,
444
+ "learning_rate": 6.445028780999751e-05,
445
+ "loss": 0.2275,
446
+ "step": 335
447
+ },
448
+ {
449
+ "epoch": 5.0,
450
+ "eval_loss": 0.623831033706665,
451
+ "eval_matthews_correlation": 0.4774618517292431,
452
+ "eval_runtime": 1.0964,
453
+ "eval_samples_per_second": 951.284,
454
+ "eval_steps_per_second": 60.196,
455
+ "step": 335
456
+ },
457
+ {
458
+ "epoch": 5.07,
459
+ "learning_rate": 6.396890608370326e-05,
460
+ "loss": 0.1801,
461
+ "step": 340
462
+ },
463
+ {
464
+ "epoch": 5.15,
465
+ "learning_rate": 6.348204602222213e-05,
466
+ "loss": 0.1687,
467
+ "step": 345
468
+ },
469
+ {
470
+ "epoch": 5.22,
471
+ "learning_rate": 6.298981890233062e-05,
472
+ "loss": 0.1548,
473
+ "step": 350
474
+ },
475
+ {
476
+ "epoch": 5.3,
477
+ "learning_rate": 6.24923372275006e-05,
478
+ "loss": 0.1502,
479
+ "step": 355
480
+ },
481
+ {
482
+ "epoch": 5.37,
483
+ "learning_rate": 6.198971470218543e-05,
484
+ "loss": 0.1816,
485
+ "step": 360
486
+ },
487
+ {
488
+ "epoch": 5.45,
489
+ "learning_rate": 6.148206620583172e-05,
490
+ "loss": 0.1695,
491
+ "step": 365
492
+ },
493
+ {
494
+ "epoch": 5.52,
495
+ "learning_rate": 6.096950776662239e-05,
496
+ "loss": 0.1828,
497
+ "step": 370
498
+ },
499
+ {
500
+ "epoch": 5.6,
501
+ "learning_rate": 6.0452156534957336e-05,
502
+ "loss": 0.1768,
503
+ "step": 375
504
+ },
505
+ {
506
+ "epoch": 5.67,
507
+ "learning_rate": 5.993013075667753e-05,
508
+ "loss": 0.2368,
509
+ "step": 380
510
+ },
511
+ {
512
+ "epoch": 5.75,
513
+ "learning_rate": 5.940354974603868e-05,
514
+ "loss": 0.1782,
515
+ "step": 385
516
+ },
517
+ {
518
+ "epoch": 5.82,
519
+ "learning_rate": 5.887253385844076e-05,
520
+ "loss": 0.1508,
521
+ "step": 390
522
+ },
523
+ {
524
+ "epoch": 5.9,
525
+ "learning_rate": 5.8337204462919645e-05,
526
+ "loss": 0.1778,
527
+ "step": 395
528
+ },
529
+ {
530
+ "epoch": 5.97,
531
+ "learning_rate": 5.779768391440693e-05,
532
+ "loss": 0.1859,
533
+ "step": 400
534
+ },
535
+ {
536
+ "epoch": 6.0,
537
+ "eval_loss": 0.6314846873283386,
538
+ "eval_matthews_correlation": 0.4867092923459376,
539
+ "eval_runtime": 1.0756,
540
+ "eval_samples_per_second": 969.736,
541
+ "eval_steps_per_second": 61.364,
542
+ "step": 402
543
+ },
544
+ {
545
+ "epoch": 6.04,
546
+ "learning_rate": 5.725409552576445e-05,
547
+ "loss": 0.1603,
548
+ "step": 405
549
+ },
550
+ {
551
+ "epoch": 6.12,
552
+ "learning_rate": 5.670656353959997e-05,
553
+ "loss": 0.1204,
554
+ "step": 410
555
+ },
556
+ {
557
+ "epoch": 6.19,
558
+ "learning_rate": 5.615521309987022e-05,
559
+ "loss": 0.1699,
560
+ "step": 415
561
+ },
562
+ {
563
+ "epoch": 6.27,
564
+ "learning_rate": 5.5600170223278e-05,
565
+ "loss": 0.1502,
566
+ "step": 420
567
+ },
568
+ {
569
+ "epoch": 6.34,
570
+ "learning_rate": 5.5041561770469787e-05,
571
+ "loss": 0.1389,
572
+ "step": 425
573
+ },
574
+ {
575
+ "epoch": 6.42,
576
+ "learning_rate": 5.4479515417040426e-05,
577
+ "loss": 0.1567,
578
+ "step": 430
579
+ },
580
+ {
581
+ "epoch": 6.49,
582
+ "learning_rate": 5.391415962435167e-05,
583
+ "loss": 0.1644,
584
+ "step": 435
585
+ },
586
+ {
587
+ "epoch": 6.57,
588
+ "learning_rate": 5.334562361017097e-05,
589
+ "loss": 0.1748,
590
+ "step": 440
591
+ },
592
+ {
593
+ "epoch": 6.64,
594
+ "learning_rate": 5.277403731913743e-05,
595
+ "loss": 0.1308,
596
+ "step": 445
597
+ },
598
+ {
599
+ "epoch": 6.72,
600
+ "learning_rate": 5.2199531393061755e-05,
601
+ "loss": 0.1034,
602
+ "step": 450
603
+ },
604
+ {
605
+ "epoch": 6.79,
606
+ "learning_rate": 5.162223714106661e-05,
607
+ "loss": 0.1266,
608
+ "step": 455
609
+ },
610
+ {
611
+ "epoch": 6.87,
612
+ "learning_rate": 5.104228650957472e-05,
613
+ "loss": 0.0935,
614
+ "step": 460
615
+ },
616
+ {
617
+ "epoch": 6.94,
618
+ "learning_rate": 5.0459812052151114e-05,
619
+ "loss": 0.1517,
620
+ "step": 465
621
+ },
622
+ {
623
+ "epoch": 7.0,
624
+ "eval_loss": 0.7783187627792358,
625
+ "eval_matthews_correlation": 0.4695101101903524,
626
+ "eval_runtime": 1.0697,
627
+ "eval_samples_per_second": 975.006,
628
+ "eval_steps_per_second": 61.697,
629
+ "step": 469
630
+ },
631
+ {
632
+ "epoch": 7.01,
633
+ "learning_rate": 4.9874946899206665e-05,
634
+ "loss": 0.1364,
635
+ "step": 470
636
+ },
637
+ {
638
+ "epoch": 7.09,
639
+ "learning_rate": 4.928782472756985e-05,
640
+ "loss": 0.1386,
641
+ "step": 475
642
+ },
643
+ {
644
+ "epoch": 7.16,
645
+ "learning_rate": 4.869857972993352e-05,
646
+ "loss": 0.0828,
647
+ "step": 480
648
+ },
649
+ {
650
+ "epoch": 7.24,
651
+ "learning_rate": 4.810734658418377e-05,
652
+ "loss": 0.123,
653
+ "step": 485
654
+ },
655
+ {
656
+ "epoch": 7.31,
657
+ "learning_rate": 4.751426042261806e-05,
658
+ "loss": 0.0893,
659
+ "step": 490
660
+ },
661
+ {
662
+ "epoch": 7.39,
663
+ "learning_rate": 4.691945680105929e-05,
664
+ "loss": 0.108,
665
+ "step": 495
666
+ },
667
+ {
668
+ "epoch": 7.46,
669
+ "learning_rate": 4.63230716678731e-05,
670
+ "loss": 0.0797,
671
+ "step": 500
672
+ },
673
+ {
674
+ "epoch": 7.54,
675
+ "learning_rate": 4.5725241332895643e-05,
676
+ "loss": 0.1078,
677
+ "step": 505
678
+ },
679
+ {
680
+ "epoch": 7.61,
681
+ "learning_rate": 4.512610243627845e-05,
682
+ "loss": 0.1057,
683
+ "step": 510
684
+ },
685
+ {
686
+ "epoch": 7.69,
687
+ "learning_rate": 4.4525791917258025e-05,
688
+ "loss": 0.0968,
689
+ "step": 515
690
+ },
691
+ {
692
+ "epoch": 7.76,
693
+ "learning_rate": 4.3924446982857027e-05,
694
+ "loss": 0.1196,
695
+ "step": 520
696
+ },
697
+ {
698
+ "epoch": 7.84,
699
+ "learning_rate": 4.3322205076524145e-05,
700
+ "loss": 0.1423,
701
+ "step": 525
702
+ },
703
+ {
704
+ "epoch": 7.91,
705
+ "learning_rate": 4.271920384672002e-05,
706
+ "loss": 0.1203,
707
+ "step": 530
708
+ },
709
+ {
710
+ "epoch": 7.99,
711
+ "learning_rate": 4.21155811154564e-05,
712
+ "loss": 0.1016,
713
+ "step": 535
714
+ },
715
+ {
716
+ "epoch": 8.0,
717
+ "eval_loss": 0.6762046813964844,
718
+ "eval_matthews_correlation": 0.49007739162498226,
719
+ "eval_runtime": 1.0748,
720
+ "eval_samples_per_second": 970.453,
721
+ "eval_steps_per_second": 61.409,
722
+ "step": 536
723
+ },
724
+ {
725
+ "epoch": 8.06,
726
+ "learning_rate": 4.151147484679541e-05,
727
+ "loss": 0.0866,
728
+ "step": 540
729
+ },
730
+ {
731
+ "epoch": 8.13,
732
+ "learning_rate": 4.0907023115316525e-05,
733
+ "loss": 0.0947,
734
+ "step": 545
735
+ },
736
+ {
737
+ "epoch": 8.21,
738
+ "learning_rate": 4.030236407455827e-05,
739
+ "loss": 0.0718,
740
+ "step": 550
741
+ },
742
+ {
743
+ "epoch": 8.28,
744
+ "learning_rate": 3.969763592544173e-05,
745
+ "loss": 0.0729,
746
+ "step": 555
747
+ },
748
+ {
749
+ "epoch": 8.36,
750
+ "learning_rate": 3.909297688468347e-05,
751
+ "loss": 0.1148,
752
+ "step": 560
753
+ },
754
+ {
755
+ "epoch": 8.43,
756
+ "learning_rate": 3.84885251532046e-05,
757
+ "loss": 0.1083,
758
+ "step": 565
759
+ },
760
+ {
761
+ "epoch": 8.51,
762
+ "learning_rate": 3.7884418884543614e-05,
763
+ "loss": 0.1135,
764
+ "step": 570
765
+ },
766
+ {
767
+ "epoch": 8.58,
768
+ "learning_rate": 3.728079615327999e-05,
769
+ "loss": 0.0581,
770
+ "step": 575
771
+ },
772
+ {
773
+ "epoch": 8.66,
774
+ "learning_rate": 3.667779492347588e-05,
775
+ "loss": 0.0943,
776
+ "step": 580
777
+ },
778
+ {
779
+ "epoch": 8.73,
780
+ "learning_rate": 3.607555301714297e-05,
781
+ "loss": 0.095,
782
+ "step": 585
783
+ },
784
+ {
785
+ "epoch": 8.81,
786
+ "learning_rate": 3.5474208082741975e-05,
787
+ "loss": 0.1263,
788
+ "step": 590
789
+ },
790
+ {
791
+ "epoch": 8.88,
792
+ "learning_rate": 3.4873897563721565e-05,
793
+ "loss": 0.1371,
794
+ "step": 595
795
+ },
796
+ {
797
+ "epoch": 8.96,
798
+ "learning_rate": 3.427475866710437e-05,
799
+ "loss": 0.1017,
800
+ "step": 600
801
+ },
802
+ {
803
+ "epoch": 9.0,
804
+ "eval_loss": 0.741180419921875,
805
+ "eval_matthews_correlation": 0.5046330570516372,
806
+ "eval_runtime": 1.0744,
807
+ "eval_samples_per_second": 970.819,
808
+ "eval_steps_per_second": 61.432,
809
+ "step": 603
810
+ },
811
+ {
812
+ "epoch": 9.03,
813
+ "learning_rate": 3.367692833212691e-05,
814
+ "loss": 0.0929,
815
+ "step": 605
816
+ },
817
+ {
818
+ "epoch": 9.1,
819
+ "learning_rate": 3.308054319894074e-05,
820
+ "loss": 0.0638,
821
+ "step": 610
822
+ },
823
+ {
824
+ "epoch": 9.18,
825
+ "learning_rate": 3.2485739577381946e-05,
826
+ "loss": 0.1338,
827
+ "step": 615
828
+ },
829
+ {
830
+ "epoch": 9.25,
831
+ "learning_rate": 3.1892653415816245e-05,
832
+ "loss": 0.0663,
833
+ "step": 620
834
+ },
835
+ {
836
+ "epoch": 9.33,
837
+ "learning_rate": 3.13014202700665e-05,
838
+ "loss": 0.0767,
839
+ "step": 625
840
+ },
841
+ {
842
+ "epoch": 9.4,
843
+ "learning_rate": 3.071217527243016e-05,
844
+ "loss": 0.1068,
845
+ "step": 630
846
+ },
847
+ {
848
+ "epoch": 9.48,
849
+ "learning_rate": 3.012505310079335e-05,
850
+ "loss": 0.0818,
851
+ "step": 635
852
+ },
853
+ {
854
+ "epoch": 9.55,
855
+ "learning_rate": 2.9540187947848906e-05,
856
+ "loss": 0.0527,
857
+ "step": 640
858
+ },
859
+ {
860
+ "epoch": 9.63,
861
+ "learning_rate": 2.8957713490425298e-05,
862
+ "loss": 0.0837,
863
+ "step": 645
864
+ },
865
+ {
866
+ "epoch": 9.7,
867
+ "learning_rate": 2.837776285893339e-05,
868
+ "loss": 0.0804,
869
+ "step": 650
870
+ },
871
+ {
872
+ "epoch": 9.78,
873
+ "learning_rate": 2.780046860693826e-05,
874
+ "loss": 0.1138,
875
+ "step": 655
876
+ },
877
+ {
878
+ "epoch": 9.85,
879
+ "learning_rate": 2.7225962680862575e-05,
880
+ "loss": 0.0705,
881
+ "step": 660
882
+ },
883
+ {
884
+ "epoch": 9.93,
885
+ "learning_rate": 2.6654376389829043e-05,
886
+ "loss": 0.0562,
887
+ "step": 665
888
+ },
889
+ {
890
+ "epoch": 10.0,
891
+ "learning_rate": 2.6085840375648337e-05,
892
+ "loss": 0.0898,
893
+ "step": 670
894
+ },
895
+ {
896
+ "epoch": 10.0,
897
+ "eval_loss": 0.7719386219978333,
898
+ "eval_matthews_correlation": 0.48769484136865787,
899
+ "eval_runtime": 1.1003,
900
+ "eval_samples_per_second": 947.882,
901
+ "eval_steps_per_second": 59.981,
902
+ "step": 670
903
+ },
904
+ {
905
+ "epoch": 10.07,
906
+ "learning_rate": 2.5520484582959588e-05,
907
+ "loss": 0.0739,
908
+ "step": 675
909
+ },
910
+ {
911
+ "epoch": 10.15,
912
+ "learning_rate": 2.4958438229530223e-05,
913
+ "loss": 0.0927,
914
+ "step": 680
915
+ },
916
+ {
917
+ "epoch": 10.22,
918
+ "learning_rate": 2.4399829776722005e-05,
919
+ "loss": 0.0731,
920
+ "step": 685
921
+ },
922
+ {
923
+ "epoch": 10.3,
924
+ "learning_rate": 2.384478690012978e-05,
925
+ "loss": 0.051,
926
+ "step": 690
927
+ },
928
+ {
929
+ "epoch": 10.37,
930
+ "learning_rate": 2.3293436460400044e-05,
931
+ "loss": 0.0647,
932
+ "step": 695
933
+ },
934
+ {
935
+ "epoch": 10.45,
936
+ "learning_rate": 2.2745904474235573e-05,
937
+ "loss": 0.0704,
938
+ "step": 700
939
+ },
940
+ {
941
+ "epoch": 10.52,
942
+ "learning_rate": 2.2202316085593106e-05,
943
+ "loss": 0.0479,
944
+ "step": 705
945
+ },
946
+ {
947
+ "epoch": 10.6,
948
+ "learning_rate": 2.1662795537080365e-05,
949
+ "loss": 0.076,
950
+ "step": 710
951
+ },
952
+ {
953
+ "epoch": 10.67,
954
+ "learning_rate": 2.1127466141559257e-05,
955
+ "loss": 0.0709,
956
+ "step": 715
957
+ },
958
+ {
959
+ "epoch": 10.75,
960
+ "learning_rate": 2.059645025396135e-05,
961
+ "loss": 0.0698,
962
+ "step": 720
963
+ },
964
+ {
965
+ "epoch": 10.82,
966
+ "learning_rate": 2.006986924332247e-05,
967
+ "loss": 0.0606,
968
+ "step": 725
969
+ },
970
+ {
971
+ "epoch": 10.9,
972
+ "learning_rate": 1.9547843465042664e-05,
973
+ "loss": 0.096,
974
+ "step": 730
975
+ },
976
+ {
977
+ "epoch": 10.97,
978
+ "learning_rate": 1.9030492233377623e-05,
979
+ "loss": 0.0527,
980
+ "step": 735
981
+ },
982
+ {
983
+ "epoch": 11.0,
984
+ "eval_loss": 0.8626514673233032,
985
+ "eval_matthews_correlation": 0.49547617383910947,
986
+ "eval_runtime": 1.0828,
987
+ "eval_samples_per_second": 963.257,
988
+ "eval_steps_per_second": 60.954,
989
+ "step": 737
990
+ },
991
+ {
992
+ "epoch": 11.04,
993
+ "learning_rate": 1.8517933794168294e-05,
994
+ "loss": 0.0582,
995
+ "step": 740
996
+ },
997
+ {
998
+ "epoch": 11.12,
999
+ "learning_rate": 1.8010285297814563e-05,
1000
+ "loss": 0.0731,
1001
+ "step": 745
1002
+ },
1003
+ {
1004
+ "epoch": 11.19,
1005
+ "learning_rate": 1.7507662772499406e-05,
1006
+ "loss": 0.0478,
1007
+ "step": 750
1008
+ },
1009
+ {
1010
+ "epoch": 11.27,
1011
+ "learning_rate": 1.701018109766939e-05,
1012
+ "loss": 0.0564,
1013
+ "step": 755
1014
+ },
1015
+ {
1016
+ "epoch": 11.34,
1017
+ "learning_rate": 1.6517953977777888e-05,
1018
+ "loss": 0.0413,
1019
+ "step": 760
1020
+ },
1021
+ {
1022
+ "epoch": 11.42,
1023
+ "learning_rate": 1.603109391629675e-05,
1024
+ "loss": 0.0478,
1025
+ "step": 765
1026
+ },
1027
+ {
1028
+ "epoch": 11.49,
1029
+ "learning_rate": 1.5549712190002498e-05,
1030
+ "loss": 0.0844,
1031
+ "step": 770
1032
+ },
1033
+ {
1034
+ "epoch": 11.57,
1035
+ "learning_rate": 1.50739188235429e-05,
1036
+ "loss": 0.065,
1037
+ "step": 775
1038
+ },
1039
+ {
1040
+ "epoch": 11.64,
1041
+ "learning_rate": 1.4603822564289764e-05,
1042
+ "loss": 0.068,
1043
+ "step": 780
1044
+ },
1045
+ {
1046
+ "epoch": 11.72,
1047
+ "learning_rate": 1.4139530857483568e-05,
1048
+ "loss": 0.0324,
1049
+ "step": 785
1050
+ },
1051
+ {
1052
+ "epoch": 11.79,
1053
+ "learning_rate": 1.3681149821675797e-05,
1054
+ "loss": 0.0436,
1055
+ "step": 790
1056
+ },
1057
+ {
1058
+ "epoch": 11.87,
1059
+ "learning_rate": 1.322878422447448e-05,
1060
+ "loss": 0.0566,
1061
+ "step": 795
1062
+ },
1063
+ {
1064
+ "epoch": 11.94,
1065
+ "learning_rate": 1.2782537458598489e-05,
1066
+ "loss": 0.0582,
1067
+ "step": 800
1068
+ },
1069
+ {
1070
+ "epoch": 12.0,
1071
+ "eval_loss": 0.8985734581947327,
1072
+ "eval_matthews_correlation": 0.47375271850377065,
1073
+ "eval_runtime": 1.0831,
1074
+ "eval_samples_per_second": 962.944,
1075
+ "eval_steps_per_second": 60.934,
1076
+ "step": 804
1077
+ },
1078
+ {
1079
+ "epoch": 12.01,
1080
+ "learning_rate": 1.2342511518246095e-05,
1081
+ "loss": 0.0521,
1082
+ "step": 805
1083
+ },
1084
+ {
1085
+ "epoch": 12.09,
1086
+ "learning_rate": 1.1908806975783192e-05,
1087
+ "loss": 0.0565,
1088
+ "step": 810
1089
+ },
1090
+ {
1091
+ "epoch": 12.16,
1092
+ "learning_rate": 1.1481522958756383e-05,
1093
+ "loss": 0.0358,
1094
+ "step": 815
1095
+ },
1096
+ {
1097
+ "epoch": 12.24,
1098
+ "learning_rate": 1.1060757127236514e-05,
1099
+ "loss": 0.0436,
1100
+ "step": 820
1101
+ },
1102
+ {
1103
+ "epoch": 12.31,
1104
+ "learning_rate": 1.0646605651497342e-05,
1105
+ "loss": 0.06,
1106
+ "step": 825
1107
+ },
1108
+ {
1109
+ "epoch": 12.39,
1110
+ "learning_rate": 1.0239163190034937e-05,
1111
+ "loss": 0.0439,
1112
+ "step": 830
1113
+ },
1114
+ {
1115
+ "epoch": 12.46,
1116
+ "learning_rate": 9.838522867932499e-06,
1117
+ "loss": 0.0436,
1118
+ "step": 835
1119
+ },
1120
+ {
1121
+ "epoch": 12.54,
1122
+ "learning_rate": 9.444776255575672e-06,
1123
+ "loss": 0.032,
1124
+ "step": 840
1125
+ },
1126
+ {
1127
+ "epoch": 12.61,
1128
+ "learning_rate": 9.058013347723258e-06,
1129
+ "loss": 0.0623,
1130
+ "step": 845
1131
+ },
1132
+ {
1133
+ "epoch": 12.69,
1134
+ "learning_rate": 8.678322542937941e-06,
1135
+ "loss": 0.0526,
1136
+ "step": 850
1137
+ },
1138
+ {
1139
+ "epoch": 12.76,
1140
+ "learning_rate": 8.305790623381934e-06,
1141
+ "loss": 0.0231,
1142
+ "step": 855
1143
+ },
1144
+ {
1145
+ "epoch": 12.84,
1146
+ "learning_rate": 7.94050273498205e-06,
1147
+ "loss": 0.035,
1148
+ "step": 860
1149
+ },
1150
+ {
1151
+ "epoch": 12.91,
1152
+ "learning_rate": 7.582542367968733e-06,
1153
+ "loss": 0.0612,
1154
+ "step": 865
1155
+ },
1156
+ {
1157
+ "epoch": 12.99,
1158
+ "learning_rate": 7.2319913377935096e-06,
1159
+ "loss": 0.074,
1160
+ "step": 870
1161
+ },
1162
+ {
1163
+ "epoch": 13.0,
1164
+ "eval_loss": 0.9468681812286377,
1165
+ "eval_matthews_correlation": 0.49419533040588925,
1166
+ "eval_runtime": 1.0698,
1167
+ "eval_samples_per_second": 974.98,
1168
+ "eval_steps_per_second": 61.696,
1169
+ "step": 871
1170
+ },
1171
+ {
1172
+ "epoch": 13.06,
1173
+ "learning_rate": 6.888929766429293e-06,
1174
+ "loss": 0.0266,
1175
+ "step": 875
1176
+ },
1177
+ {
1178
+ "epoch": 13.13,
1179
+ "learning_rate": 6.553436064057619e-06,
1180
+ "loss": 0.0631,
1181
+ "step": 880
1182
+ },
1183
+ {
1184
+ "epoch": 13.21,
1185
+ "learning_rate": 6.225586911147274e-06,
1186
+ "loss": 0.0549,
1187
+ "step": 885
1188
+ },
1189
+ {
1190
+ "epoch": 13.28,
1191
+ "learning_rate": 5.9054572409281655e-06,
1192
+ "loss": 0.0212,
1193
+ "step": 890
1194
+ },
1195
+ {
1196
+ "epoch": 13.36,
1197
+ "learning_rate": 5.5931202222646145e-06,
1198
+ "loss": 0.0404,
1199
+ "step": 895
1200
+ },
1201
+ {
1202
+ "epoch": 13.43,
1203
+ "learning_rate": 5.288647242931815e-06,
1204
+ "loss": 0.0606,
1205
+ "step": 900
1206
+ },
1207
+ {
1208
+ "epoch": 13.51,
1209
+ "learning_rate": 4.992107893299483e-06,
1210
+ "loss": 0.0523,
1211
+ "step": 905
1212
+ },
1213
+ {
1214
+ "epoch": 13.58,
1215
+ "learning_rate": 4.703569950426285e-06,
1216
+ "loss": 0.0283,
1217
+ "step": 910
1218
+ },
1219
+ {
1220
+ "epoch": 13.66,
1221
+ "learning_rate": 4.4230993625686616e-06,
1222
+ "loss": 0.0515,
1223
+ "step": 915
1224
+ },
1225
+ {
1226
+ "epoch": 13.73,
1227
+ "learning_rate": 4.150760234107716e-06,
1228
+ "loss": 0.0333,
1229
+ "step": 920
1230
+ },
1231
+ {
1232
+ "epoch": 13.81,
1233
+ "learning_rate": 3.88661481089752e-06,
1234
+ "loss": 0.0338,
1235
+ "step": 925
1236
+ },
1237
+ {
1238
+ "epoch": 13.88,
1239
+ "learning_rate": 3.630723466038175e-06,
1240
+ "loss": 0.0344,
1241
+ "step": 930
1242
+ },
1243
+ {
1244
+ "epoch": 13.96,
1245
+ "learning_rate": 3.3831446860769444e-06,
1246
+ "loss": 0.0508,
1247
+ "step": 935
1248
+ },
1249
+ {
1250
+ "epoch": 14.0,
1251
+ "eval_loss": 0.9435966610908508,
1252
+ "eval_matthews_correlation": 0.49177365217771035,
1253
+ "eval_runtime": 1.082,
1254
+ "eval_samples_per_second": 963.975,
1255
+ "eval_steps_per_second": 60.999,
1256
+ "step": 938
1257
+ },
1258
+ {
1259
+ "epoch": 14.03,
1260
+ "learning_rate": 3.1439350576405726e-06,
1261
+ "loss": 0.0513,
1262
+ "step": 940
1263
+ },
1264
+ {
1265
+ "epoch": 14.1,
1266
+ "learning_rate": 2.913149254501817e-06,
1267
+ "loss": 0.0477,
1268
+ "step": 945
1269
+ },
1270
+ {
1271
+ "epoch": 14.18,
1272
+ "learning_rate": 2.69084002508321e-06,
1273
+ "loss": 0.0403,
1274
+ "step": 950
1275
+ },
1276
+ {
1277
+ "epoch": 14.25,
1278
+ "learning_rate": 2.477058180400902e-06,
1279
+ "loss": 0.0657,
1280
+ "step": 955
1281
+ },
1282
+ {
1283
+ "epoch": 14.33,
1284
+ "learning_rate": 2.2718525824512614e-06,
1285
+ "loss": 0.0454,
1286
+ "step": 960
1287
+ },
1288
+ {
1289
+ "epoch": 14.4,
1290
+ "learning_rate": 2.0752701330429702e-06,
1291
+ "loss": 0.0314,
1292
+ "step": 965
1293
+ },
1294
+ {
1295
+ "epoch": 14.48,
1296
+ "learning_rate": 1.887355763077192e-06,
1297
+ "loss": 0.0261,
1298
+ "step": 970
1299
+ },
1300
+ {
1301
+ "epoch": 14.55,
1302
+ "learning_rate": 1.7081524222780864e-06,
1303
+ "loss": 0.0366,
1304
+ "step": 975
1305
+ },
1306
+ {
1307
+ "epoch": 14.63,
1308
+ "learning_rate": 1.5377010693762784e-06,
1309
+ "loss": 0.0307,
1310
+ "step": 980
1311
+ },
1312
+ {
1313
+ "epoch": 14.7,
1314
+ "learning_rate": 1.3760406627473022e-06,
1315
+ "loss": 0.0604,
1316
+ "step": 985
1317
+ },
1318
+ {
1319
+ "epoch": 14.78,
1320
+ "learning_rate": 1.2232081515072624e-06,
1321
+ "loss": 0.0477,
1322
+ "step": 990
1323
+ },
1324
+ {
1325
+ "epoch": 14.85,
1326
+ "learning_rate": 1.0792384670677625e-06,
1327
+ "loss": 0.0303,
1328
+ "step": 995
1329
+ },
1330
+ {
1331
+ "epoch": 14.93,
1332
+ "learning_rate": 9.441645151519708e-07,
1333
+ "loss": 0.032,
1334
+ "step": 1000
1335
+ },
1336
+ {
1337
+ "epoch": 15.0,
1338
+ "learning_rate": 8.180171682736682e-07,
1339
+ "loss": 0.024,
1340
+ "step": 1005
1341
+ },
1342
+ {
1343
+ "epoch": 15.0,
1344
+ "eval_loss": 0.9390689730644226,
1345
+ "eval_matthews_correlation": 0.49194159281920924,
1346
+ "eval_runtime": 1.0846,
1347
+ "eval_samples_per_second": 961.661,
1348
+ "eval_steps_per_second": 60.853,
1349
+ "step": 1005
1350
+ },
1351
+ {
1352
+ "epoch": 15.07,
1353
+ "learning_rate": 7.008252586810571e-07,
1354
+ "loss": 0.0518,
1355
+ "step": 1010
1356
+ },
1357
+ {
1358
+ "epoch": 15.15,
1359
+ "learning_rate": 5.92615571766828e-07,
1360
+ "loss": 0.0266,
1361
+ "step": 1015
1362
+ },
1363
+ {
1364
+ "epoch": 15.22,
1365
+ "learning_rate": 4.934128399461057e-07,
1366
+ "loss": 0.0424,
1367
+ "step": 1020
1368
+ },
1369
+ {
1370
+ "epoch": 15.3,
1371
+ "learning_rate": 4.032397370036067e-07,
1372
+ "loss": 0.0441,
1373
+ "step": 1025
1374
+ },
1375
+ {
1376
+ "epoch": 15.37,
1377
+ "learning_rate": 3.221168729113089e-07,
1378
+ "loss": 0.0388,
1379
+ "step": 1030
1380
+ },
1381
+ {
1382
+ "epoch": 15.45,
1383
+ "learning_rate": 2.5006278911784643e-07,
1384
+ "loss": 0.0486,
1385
+ "step": 1035
1386
+ },
1387
+ {
1388
+ "epoch": 15.52,
1389
+ "learning_rate": 1.8709395431065535e-07,
1390
+ "loss": 0.0311,
1391
+ "step": 1040
1392
+ },
1393
+ {
1394
+ "epoch": 15.6,
1395
+ "learning_rate": 1.3322476065190438e-07,
1396
+ "loss": 0.023,
1397
+ "step": 1045
1398
+ },
1399
+ {
1400
+ "epoch": 15.67,
1401
+ "learning_rate": 8.846752048901952e-08,
1402
+ "loss": 0.0396,
1403
+ "step": 1050
1404
+ },
1405
+ {
1406
+ "epoch": 15.75,
1407
+ "learning_rate": 5.2832463540575254e-08,
1408
+ "loss": 0.0288,
1409
+ "step": 1055
1410
+ },
1411
+ {
1412
+ "epoch": 15.82,
1413
+ "learning_rate": 2.6327734558173613e-08,
1414
+ "loss": 0.028,
1415
+ "step": 1060
1416
+ },
1417
+ {
1418
+ "epoch": 15.9,
1419
+ "learning_rate": 8.95939146489777e-09,
1420
+ "loss": 0.0409,
1421
+ "step": 1065
1422
+ },
1423
+ {
1424
+ "epoch": 15.97,
1425
+ "learning_rate": 7.314039707040366e-10,
1426
+ "loss": 0.0458,
1427
+ "step": 1070
1428
+ },
1429
+ {
1430
+ "epoch": 16.0,
1431
+ "eval_loss": 0.9374607801437378,
1432
+ "eval_matthews_correlation": 0.49464326454019025,
1433
+ "eval_runtime": 1.0777,
1434
+ "eval_samples_per_second": 967.817,
1435
+ "eval_steps_per_second": 61.243,
1436
+ "step": 1072
1437
+ },
1438
+ {
1439
+ "epoch": 16.0,
1440
+ "step": 1072,
1441
+ "total_flos": 2253110874603520.0,
1442
+ "train_loss": 0.18462801172133925,
1443
+ "train_runtime": 361.7714,
1444
+ "train_samples_per_second": 378.184,
1445
+ "train_steps_per_second": 2.963
1446
+ }
1447
+ ],
1448
+ "max_steps": 1072,
1449
+ "num_train_epochs": 16,
1450
+ "total_flos": 2253110874603520.0,
1451
+ "trial_name": null,
1452
+ "trial_params": null
1453
+ }