DiTo97 commited on
Commit
5c68ca2
1 Parent(s): 514770f

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: microsoft/swinv2-small-patch4-window16-256
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - generator
@@ -15,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # swinv2-small-panorama-IQA
17
 
18
- This model is a fine-tuned version of [microsoft/swinv2-small-patch4-window16-256](https://huggingface.co/microsoft/swinv2-small-patch4-window16-256) on the generator dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.0223
21
  - Srocc: 0.1291
 
2
  license: apache-2.0
3
  base_model: microsoft/swinv2-small-patch4-window16-256
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - generator
 
17
 
18
  # swinv2-small-panorama-IQA
19
 
20
+ This model is a fine-tuned version of [microsoft/swinv2-small-patch4-window16-256](https://huggingface.co/microsoft/swinv2-small-patch4-window16-256) on the isiqa-2019-hf dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0223
23
  - Srocc: 0.1291
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 42.857142857142854,
3
+ "eval_LCC": 0.12712191056512775,
4
+ "eval_SROCC": 0.1291236494597839,
5
+ "eval_loss": 0.022257013246417046,
6
+ "eval_runtime": 35.7972,
7
+ "eval_samples_per_second": 1.397,
8
+ "eval_steps_per_second": 0.056,
9
+ "total_flos": 2.1207924866757427e+18,
10
+ "train_loss": 0.02509542241692543,
11
+ "train_runtime": 7697.1048,
12
+ "train_samples_per_second": 1.39,
13
+ "train_steps_per_second": 0.019
14
+ }
runs/Aug03_12-51-27_a0d6f1d07aac/events.out.tfevents.1722697230.a0d6f1d07aac.54.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc74e7b2446669c436b9de44e6f5c596b772e4bf251c8d28f80963143bad7598
3
+ size 455
test_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 42.857142857142854,
3
+ "eval_LCC": 0.12712191056512775,
4
+ "eval_SROCC": 0.1291236494597839,
5
+ "eval_loss": 0.022257013246417046,
6
+ "eval_runtime": 35.7972,
7
+ "eval_samples_per_second": 1.397,
8
+ "eval_steps_per_second": 0.056
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,586 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.022257013246417046,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-150",
4
+ "epoch": 42.857142857142854,
5
+ "eval_steps": 500,
6
+ "global_step": 150,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8571428571428571,
13
+ "eval_LCC": -0.38242422405954946,
14
+ "eval_SROCC": -0.3890036014405762,
15
+ "eval_loss": 0.2948198914527893,
16
+ "eval_runtime": 37.5549,
17
+ "eval_samples_per_second": 1.331,
18
+ "eval_steps_per_second": 0.053,
19
+ "step": 3
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_LCC": -0.3732136136633078,
24
+ "eval_SROCC": -0.36653061224489797,
25
+ "eval_loss": 0.11433681845664978,
26
+ "eval_runtime": 35.6391,
27
+ "eval_samples_per_second": 1.403,
28
+ "eval_steps_per_second": 0.056,
29
+ "step": 7
30
+ },
31
+ {
32
+ "epoch": 2.857142857142857,
33
+ "grad_norm": 4.934920310974121,
34
+ "learning_rate": 6.666666666666667e-06,
35
+ "loss": 0.1552,
36
+ "step": 10
37
+ },
38
+ {
39
+ "epoch": 2.857142857142857,
40
+ "eval_LCC": -0.3657486170203663,
41
+ "eval_SROCC": -0.3477070828331333,
42
+ "eval_loss": 0.07677865773439407,
43
+ "eval_runtime": 35.6897,
44
+ "eval_samples_per_second": 1.401,
45
+ "eval_steps_per_second": 0.056,
46
+ "step": 10
47
+ },
48
+ {
49
+ "epoch": 4.0,
50
+ "eval_LCC": -0.35044377450614,
51
+ "eval_SROCC": -0.3395438175270108,
52
+ "eval_loss": 0.07478620857000351,
53
+ "eval_runtime": 35.8511,
54
+ "eval_samples_per_second": 1.395,
55
+ "eval_steps_per_second": 0.056,
56
+ "step": 14
57
+ },
58
+ {
59
+ "epoch": 4.857142857142857,
60
+ "eval_LCC": -0.33219308626687694,
61
+ "eval_SROCC": -0.34981992797118844,
62
+ "eval_loss": 0.05174265429377556,
63
+ "eval_runtime": 35.8384,
64
+ "eval_samples_per_second": 1.395,
65
+ "eval_steps_per_second": 0.056,
66
+ "step": 17
67
+ },
68
+ {
69
+ "epoch": 5.714285714285714,
70
+ "grad_norm": 2.3777573108673096,
71
+ "learning_rate": 9.966191788709716e-06,
72
+ "loss": 0.0657,
73
+ "step": 20
74
+ },
75
+ {
76
+ "epoch": 6.0,
77
+ "eval_LCC": -0.30599681663050493,
78
+ "eval_SROCC": -0.33368547418967587,
79
+ "eval_loss": 0.05527381971478462,
80
+ "eval_runtime": 35.8215,
81
+ "eval_samples_per_second": 1.396,
82
+ "eval_steps_per_second": 0.056,
83
+ "step": 21
84
+ },
85
+ {
86
+ "epoch": 6.857142857142857,
87
+ "eval_LCC": -0.28100937429201095,
88
+ "eval_SROCC": -0.2921008403361345,
89
+ "eval_loss": 0.043372660875320435,
90
+ "eval_runtime": 35.795,
91
+ "eval_samples_per_second": 1.397,
92
+ "eval_steps_per_second": 0.056,
93
+ "step": 24
94
+ },
95
+ {
96
+ "epoch": 8.0,
97
+ "eval_LCC": -0.25699939917329884,
98
+ "eval_SROCC": -0.24811524609843938,
99
+ "eval_loss": 0.040563274174928665,
100
+ "eval_runtime": 35.9877,
101
+ "eval_samples_per_second": 1.389,
102
+ "eval_steps_per_second": 0.056,
103
+ "step": 28
104
+ },
105
+ {
106
+ "epoch": 8.571428571428571,
107
+ "grad_norm": 1.0520217418670654,
108
+ "learning_rate": 9.698463103929542e-06,
109
+ "loss": 0.0249,
110
+ "step": 30
111
+ },
112
+ {
113
+ "epoch": 8.857142857142858,
114
+ "eval_LCC": -0.2477874480763097,
115
+ "eval_SROCC": -0.23457382953181274,
116
+ "eval_loss": 0.040173906832933426,
117
+ "eval_runtime": 35.9073,
118
+ "eval_samples_per_second": 1.392,
119
+ "eval_steps_per_second": 0.056,
120
+ "step": 31
121
+ },
122
+ {
123
+ "epoch": 10.0,
124
+ "eval_LCC": -0.21817464996358082,
125
+ "eval_SROCC": -0.20758703481392557,
126
+ "eval_loss": 0.0384274497628212,
127
+ "eval_runtime": 35.6916,
128
+ "eval_samples_per_second": 1.401,
129
+ "eval_steps_per_second": 0.056,
130
+ "step": 35
131
+ },
132
+ {
133
+ "epoch": 10.857142857142858,
134
+ "eval_LCC": -0.19225659084922117,
135
+ "eval_SROCC": -0.19193277310924367,
136
+ "eval_loss": 0.03174906224012375,
137
+ "eval_runtime": 35.716,
138
+ "eval_samples_per_second": 1.4,
139
+ "eval_steps_per_second": 0.056,
140
+ "step": 38
141
+ },
142
+ {
143
+ "epoch": 11.428571428571429,
144
+ "grad_norm": 1.398345947265625,
145
+ "learning_rate": 9.177439057064684e-06,
146
+ "loss": 0.0215,
147
+ "step": 40
148
+ },
149
+ {
150
+ "epoch": 12.0,
151
+ "eval_LCC": -0.16355167219890276,
152
+ "eval_SROCC": -0.15178871548619446,
153
+ "eval_loss": 0.031038017943501472,
154
+ "eval_runtime": 35.6946,
155
+ "eval_samples_per_second": 1.401,
156
+ "eval_steps_per_second": 0.056,
157
+ "step": 42
158
+ },
159
+ {
160
+ "epoch": 12.857142857142858,
161
+ "eval_LCC": -0.1548916268794662,
162
+ "eval_SROCC": -0.1291236494597839,
163
+ "eval_loss": 0.03165186941623688,
164
+ "eval_runtime": 35.6768,
165
+ "eval_samples_per_second": 1.401,
166
+ "eval_steps_per_second": 0.056,
167
+ "step": 45
168
+ },
169
+ {
170
+ "epoch": 14.0,
171
+ "eval_LCC": -0.12920387967048147,
172
+ "eval_SROCC": -0.09752701080432173,
173
+ "eval_loss": 0.03006185218691826,
174
+ "eval_runtime": 35.8955,
175
+ "eval_samples_per_second": 1.393,
176
+ "eval_steps_per_second": 0.056,
177
+ "step": 49
178
+ },
179
+ {
180
+ "epoch": 14.285714285714286,
181
+ "grad_norm": 0.4825093746185303,
182
+ "learning_rate": 8.43120818934367e-06,
183
+ "loss": 0.0154,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 14.857142857142858,
188
+ "eval_LCC": -0.10573621549419017,
189
+ "eval_SROCC": -0.08043217286914765,
190
+ "eval_loss": 0.0284834336489439,
191
+ "eval_runtime": 35.7717,
192
+ "eval_samples_per_second": 1.398,
193
+ "eval_steps_per_second": 0.056,
194
+ "step": 52
195
+ },
196
+ {
197
+ "epoch": 16.0,
198
+ "eval_LCC": -0.07621053426257447,
199
+ "eval_SROCC": -0.04614645858343337,
200
+ "eval_loss": 0.027657881379127502,
201
+ "eval_runtime": 35.7321,
202
+ "eval_samples_per_second": 1.399,
203
+ "eval_steps_per_second": 0.056,
204
+ "step": 56
205
+ },
206
+ {
207
+ "epoch": 16.857142857142858,
208
+ "eval_LCC": -0.04854793231941595,
209
+ "eval_SROCC": -0.03567827130852341,
210
+ "eval_loss": 0.02630411647260189,
211
+ "eval_runtime": 35.6683,
212
+ "eval_samples_per_second": 1.402,
213
+ "eval_steps_per_second": 0.056,
214
+ "step": 59
215
+ },
216
+ {
217
+ "epoch": 17.142857142857142,
218
+ "grad_norm": 0.7504790425300598,
219
+ "learning_rate": 7.500000000000001e-06,
220
+ "loss": 0.0128,
221
+ "step": 60
222
+ },
223
+ {
224
+ "epoch": 18.0,
225
+ "eval_LCC": -0.03166657911247084,
226
+ "eval_SROCC": -0.01714285714285714,
227
+ "eval_loss": 0.026295064017176628,
228
+ "eval_runtime": 35.6163,
229
+ "eval_samples_per_second": 1.404,
230
+ "eval_steps_per_second": 0.056,
231
+ "step": 63
232
+ },
233
+ {
234
+ "epoch": 18.857142857142858,
235
+ "eval_LCC": -0.023641716533156595,
236
+ "eval_SROCC": -0.003985594237695078,
237
+ "eval_loss": 0.026547763496637344,
238
+ "eval_runtime": 35.7061,
239
+ "eval_samples_per_second": 1.4,
240
+ "eval_steps_per_second": 0.056,
241
+ "step": 66
242
+ },
243
+ {
244
+ "epoch": 20.0,
245
+ "grad_norm": 0.4742915630340576,
246
+ "learning_rate": 6.434016163555452e-06,
247
+ "loss": 0.0113,
248
+ "step": 70
249
+ },
250
+ {
251
+ "epoch": 20.0,
252
+ "eval_LCC": -0.008938391820749465,
253
+ "eval_SROCC": 0.022713085234093634,
254
+ "eval_loss": 0.026299767196178436,
255
+ "eval_runtime": 35.6413,
256
+ "eval_samples_per_second": 1.403,
257
+ "eval_steps_per_second": 0.056,
258
+ "step": 70
259
+ },
260
+ {
261
+ "epoch": 20.857142857142858,
262
+ "eval_LCC": 0.008109004168265264,
263
+ "eval_SROCC": 0.025402160864345734,
264
+ "eval_loss": 0.025580281391739845,
265
+ "eval_runtime": 35.724,
266
+ "eval_samples_per_second": 1.4,
267
+ "eval_steps_per_second": 0.056,
268
+ "step": 73
269
+ },
270
+ {
271
+ "epoch": 22.0,
272
+ "eval_LCC": 0.023345863441218237,
273
+ "eval_SROCC": 0.049315726290516206,
274
+ "eval_loss": 0.02492944523692131,
275
+ "eval_runtime": 35.624,
276
+ "eval_samples_per_second": 1.404,
277
+ "eval_steps_per_second": 0.056,
278
+ "step": 77
279
+ },
280
+ {
281
+ "epoch": 22.857142857142858,
282
+ "grad_norm": 0.43411004543304443,
283
+ "learning_rate": 5.290724144552379e-06,
284
+ "loss": 0.0104,
285
+ "step": 80
286
+ },
287
+ {
288
+ "epoch": 22.857142857142858,
289
+ "eval_LCC": 0.033023044716641976,
290
+ "eval_SROCC": 0.06160864345738295,
291
+ "eval_loss": 0.024564068764448166,
292
+ "eval_runtime": 35.6925,
293
+ "eval_samples_per_second": 1.401,
294
+ "eval_steps_per_second": 0.056,
295
+ "step": 80
296
+ },
297
+ {
298
+ "epoch": 24.0,
299
+ "eval_LCC": 0.04347957438577821,
300
+ "eval_SROCC": 0.06909963985594238,
301
+ "eval_loss": 0.02423253282904625,
302
+ "eval_runtime": 35.9004,
303
+ "eval_samples_per_second": 1.393,
304
+ "eval_steps_per_second": 0.056,
305
+ "step": 84
306
+ },
307
+ {
308
+ "epoch": 24.857142857142858,
309
+ "eval_LCC": 0.051818984749542364,
310
+ "eval_SROCC": 0.07956782713085234,
311
+ "eval_loss": 0.024022720754146576,
312
+ "eval_runtime": 35.8318,
313
+ "eval_samples_per_second": 1.395,
314
+ "eval_steps_per_second": 0.056,
315
+ "step": 87
316
+ },
317
+ {
318
+ "epoch": 25.714285714285715,
319
+ "grad_norm": 0.443155437707901,
320
+ "learning_rate": 4.131759111665349e-06,
321
+ "loss": 0.0095,
322
+ "step": 90
323
+ },
324
+ {
325
+ "epoch": 26.0,
326
+ "eval_LCC": 0.0679416905737761,
327
+ "eval_SROCC": 0.0830252100840336,
328
+ "eval_loss": 0.02380475588142872,
329
+ "eval_runtime": 35.6776,
330
+ "eval_samples_per_second": 1.401,
331
+ "eval_steps_per_second": 0.056,
332
+ "step": 91
333
+ },
334
+ {
335
+ "epoch": 26.857142857142858,
336
+ "eval_LCC": 0.0746956496870511,
337
+ "eval_SROCC": 0.09291716686674668,
338
+ "eval_loss": 0.023518024012446404,
339
+ "eval_runtime": 35.6658,
340
+ "eval_samples_per_second": 1.402,
341
+ "eval_steps_per_second": 0.056,
342
+ "step": 94
343
+ },
344
+ {
345
+ "epoch": 28.0,
346
+ "eval_LCC": 0.08622063268749575,
347
+ "eval_SROCC": 0.10031212484993997,
348
+ "eval_loss": 0.02321736514568329,
349
+ "eval_runtime": 35.7144,
350
+ "eval_samples_per_second": 1.4,
351
+ "eval_steps_per_second": 0.056,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 28.571428571428573,
356
+ "grad_norm": 0.6047067642211914,
357
+ "learning_rate": 3.019601169804216e-06,
358
+ "loss": 0.009,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 28.857142857142858,
363
+ "eval_LCC": 0.09545687497338319,
364
+ "eval_SROCC": 0.10501800720288115,
365
+ "eval_loss": 0.022862296551465988,
366
+ "eval_runtime": 35.7409,
367
+ "eval_samples_per_second": 1.399,
368
+ "eval_steps_per_second": 0.056,
369
+ "step": 101
370
+ },
371
+ {
372
+ "epoch": 30.0,
373
+ "eval_LCC": 0.10522076553867485,
374
+ "eval_SROCC": 0.10722689075630251,
375
+ "eval_loss": 0.02260303497314453,
376
+ "eval_runtime": 35.8268,
377
+ "eval_samples_per_second": 1.396,
378
+ "eval_steps_per_second": 0.056,
379
+ "step": 105
380
+ },
381
+ {
382
+ "epoch": 30.857142857142858,
383
+ "eval_LCC": 0.11099832718077456,
384
+ "eval_SROCC": 0.11769507803121249,
385
+ "eval_loss": 0.022564733400940895,
386
+ "eval_runtime": 35.6873,
387
+ "eval_samples_per_second": 1.401,
388
+ "eval_steps_per_second": 0.056,
389
+ "step": 108
390
+ },
391
+ {
392
+ "epoch": 31.428571428571427,
393
+ "grad_norm": 0.3605582118034363,
394
+ "learning_rate": 2.0142070414860704e-06,
395
+ "loss": 0.0084,
396
+ "step": 110
397
+ },
398
+ {
399
+ "epoch": 32.0,
400
+ "eval_LCC": 0.11518191832516576,
401
+ "eval_SROCC": 0.12864345738295319,
402
+ "eval_loss": 0.022482411935925484,
403
+ "eval_runtime": 35.5295,
404
+ "eval_samples_per_second": 1.407,
405
+ "eval_steps_per_second": 0.056,
406
+ "step": 112
407
+ },
408
+ {
409
+ "epoch": 32.857142857142854,
410
+ "eval_LCC": 0.11672118384335717,
411
+ "eval_SROCC": 0.12960384153661464,
412
+ "eval_loss": 0.02244633622467518,
413
+ "eval_runtime": 35.6625,
414
+ "eval_samples_per_second": 1.402,
415
+ "eval_steps_per_second": 0.056,
416
+ "step": 115
417
+ },
418
+ {
419
+ "epoch": 34.0,
420
+ "eval_LCC": 0.11853509722750845,
421
+ "eval_SROCC": 0.12960384153661464,
422
+ "eval_loss": 0.022433871403336525,
423
+ "eval_runtime": 35.775,
424
+ "eval_samples_per_second": 1.398,
425
+ "eval_steps_per_second": 0.056,
426
+ "step": 119
427
+ },
428
+ {
429
+ "epoch": 34.285714285714285,
430
+ "grad_norm": 0.37335312366485596,
431
+ "learning_rate": 1.1697777844051105e-06,
432
+ "loss": 0.0085,
433
+ "step": 120
434
+ },
435
+ {
436
+ "epoch": 34.857142857142854,
437
+ "eval_LCC": 0.11996998016040515,
438
+ "eval_SROCC": 0.13104441776710685,
439
+ "eval_loss": 0.022421473637223244,
440
+ "eval_runtime": 35.8151,
441
+ "eval_samples_per_second": 1.396,
442
+ "eval_steps_per_second": 0.056,
443
+ "step": 122
444
+ },
445
+ {
446
+ "epoch": 36.0,
447
+ "eval_LCC": 0.12212764115994604,
448
+ "eval_SROCC": 0.12633853541416568,
449
+ "eval_loss": 0.02237151563167572,
450
+ "eval_runtime": 35.6186,
451
+ "eval_samples_per_second": 1.404,
452
+ "eval_steps_per_second": 0.056,
453
+ "step": 126
454
+ },
455
+ {
456
+ "epoch": 36.857142857142854,
457
+ "eval_LCC": 0.12332180896765454,
458
+ "eval_SROCC": 0.12489795918367347,
459
+ "eval_loss": 0.022355427965521812,
460
+ "eval_runtime": 35.9066,
461
+ "eval_samples_per_second": 1.393,
462
+ "eval_steps_per_second": 0.056,
463
+ "step": 129
464
+ },
465
+ {
466
+ "epoch": 37.142857142857146,
467
+ "grad_norm": 0.5681003332138062,
468
+ "learning_rate": 5.318367983829393e-07,
469
+ "loss": 0.0082,
470
+ "step": 130
471
+ },
472
+ {
473
+ "epoch": 38.0,
474
+ "eval_LCC": 0.12470767155506596,
475
+ "eval_SROCC": 0.12720288115246098,
476
+ "eval_loss": 0.022325601428747177,
477
+ "eval_runtime": 35.6817,
478
+ "eval_samples_per_second": 1.401,
479
+ "eval_steps_per_second": 0.056,
480
+ "step": 133
481
+ },
482
+ {
483
+ "epoch": 38.857142857142854,
484
+ "eval_LCC": 0.1255198227036719,
485
+ "eval_SROCC": 0.12720288115246098,
486
+ "eval_loss": 0.02230682410299778,
487
+ "eval_runtime": 35.758,
488
+ "eval_samples_per_second": 1.398,
489
+ "eval_steps_per_second": 0.056,
490
+ "step": 136
491
+ },
492
+ {
493
+ "epoch": 40.0,
494
+ "grad_norm": 0.49957460165023804,
495
+ "learning_rate": 1.3477564710088097e-07,
496
+ "loss": 0.008,
497
+ "step": 140
498
+ },
499
+ {
500
+ "epoch": 40.0,
501
+ "eval_LCC": 0.12651835944328998,
502
+ "eval_SROCC": 0.1291236494597839,
503
+ "eval_loss": 0.022274091839790344,
504
+ "eval_runtime": 35.7033,
505
+ "eval_samples_per_second": 1.4,
506
+ "eval_steps_per_second": 0.056,
507
+ "step": 140
508
+ },
509
+ {
510
+ "epoch": 40.857142857142854,
511
+ "eval_LCC": 0.1269013775632479,
512
+ "eval_SROCC": 0.1291236494597839,
513
+ "eval_loss": 0.022262830287218094,
514
+ "eval_runtime": 35.6563,
515
+ "eval_samples_per_second": 1.402,
516
+ "eval_steps_per_second": 0.056,
517
+ "step": 143
518
+ },
519
+ {
520
+ "epoch": 42.0,
521
+ "eval_LCC": 0.12710778753633592,
522
+ "eval_SROCC": 0.1291236494597839,
523
+ "eval_loss": 0.022257346659898758,
524
+ "eval_runtime": 35.6941,
525
+ "eval_samples_per_second": 1.401,
526
+ "eval_steps_per_second": 0.056,
527
+ "step": 147
528
+ },
529
+ {
530
+ "epoch": 42.857142857142854,
531
+ "grad_norm": 0.33458444476127625,
532
+ "learning_rate": 0.0,
533
+ "loss": 0.0078,
534
+ "step": 150
535
+ },
536
+ {
537
+ "epoch": 42.857142857142854,
538
+ "eval_LCC": 0.12712191056512775,
539
+ "eval_SROCC": 0.1291236494597839,
540
+ "eval_loss": 0.022257013246417046,
541
+ "eval_runtime": 35.8013,
542
+ "eval_samples_per_second": 1.397,
543
+ "eval_steps_per_second": 0.056,
544
+ "step": 150
545
+ },
546
+ {
547
+ "epoch": 42.857142857142854,
548
+ "step": 150,
549
+ "total_flos": 2.1207924866757427e+18,
550
+ "train_loss": 0.02509542241692543,
551
+ "train_runtime": 7697.1048,
552
+ "train_samples_per_second": 1.39,
553
+ "train_steps_per_second": 0.019
554
+ }
555
+ ],
556
+ "logging_steps": 10,
557
+ "max_steps": 150,
558
+ "num_input_tokens_seen": 0,
559
+ "num_train_epochs": 50,
560
+ "save_steps": 500,
561
+ "stateful_callbacks": {
562
+ "EarlyStoppingCallback": {
563
+ "args": {
564
+ "early_stopping_patience": 5,
565
+ "early_stopping_threshold": 0.0
566
+ },
567
+ "attributes": {
568
+ "early_stopping_patience_counter": 0
569
+ }
570
+ },
571
+ "TrainerControl": {
572
+ "args": {
573
+ "should_epoch_stop": false,
574
+ "should_evaluate": false,
575
+ "should_log": false,
576
+ "should_save": true,
577
+ "should_training_stop": true
578
+ },
579
+ "attributes": {}
580
+ }
581
+ },
582
+ "total_flos": 2.1207924866757427e+18,
583
+ "train_batch_size": 16,
584
+ "trial_name": null,
585
+ "trial_params": null
586
+ }
training_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 42.857142857142854,
3
+ "total_flos": 2.1207924866757427e+18,
4
+ "train_loss": 0.02509542241692543,
5
+ "train_runtime": 7697.1048,
6
+ "train_samples_per_second": 1.39,
7
+ "train_steps_per_second": 0.019
8
+ }