itsLeen commited on
Commit
1450f88
1 Parent(s): e6c99c5

🍻 cheers

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,10 +17,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # finetuned-fake-food
18
 
19
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.3455
22
- - Accuracy: 0.8541
23
 
24
  ## Model description
25
 
 
3
  license: apache-2.0
4
  base_model: google/vit-base-patch16-224-in21k
5
  tags:
6
+ - image-classification
7
  - generated_from_trainer
8
  metrics:
9
  - accuracy
 
17
 
18
  # finetuned-fake-food
19
 
20
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the indian_food_images dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.3199
23
+ - Accuracy: 0.8720
24
 
25
  ## Model description
26
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.6164383561643836,
4
- "eval_loss": 0.6574238538742065,
5
- "eval_runtime": 6.2004,
6
- "eval_samples_per_second": 23.547,
7
- "eval_steps_per_second": 3.064,
8
- "total_flos": 6.400838342165299e+17,
9
- "train_loss": 0.6831480086282725,
10
- "train_runtime": 1157.4221,
11
- "train_samples_per_second": 7.137,
12
- "train_steps_per_second": 3.568
13
  }
 
1
  {
2
+ "epoch": 2.5284450063211126,
3
+ "eval_accuracy": 0.8719785138764548,
4
+ "eval_loss": 0.3198860287666321,
5
+ "eval_runtime": 79.7356,
6
+ "eval_samples_per_second": 14.009,
7
+ "eval_steps_per_second": 1.756,
8
+ "total_flos": 1.2397168498542428e+18,
9
+ "train_loss": 0.49920871353149415,
10
+ "train_runtime": 3192.0436,
11
+ "train_samples_per_second": 5.012,
12
+ "train_steps_per_second": 0.627
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.6164383561643836,
4
- "eval_loss": 0.6574238538742065,
5
- "eval_runtime": 6.2004,
6
- "eval_samples_per_second": 23.547,
7
- "eval_steps_per_second": 3.064
8
  }
 
1
  {
2
+ "epoch": 2.5284450063211126,
3
+ "eval_accuracy": 0.8719785138764548,
4
+ "eval_loss": 0.3198860287666321,
5
+ "eval_runtime": 79.7356,
6
+ "eval_samples_per_second": 14.009,
7
+ "eval_steps_per_second": 1.756
8
  }
runs/Oct01_12-13-49_a59caee1d103/events.out.tfevents.1727788202.a59caee1d103.4704.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be6c5de00393ad2a2b627973c4c86df3c1d4e27dc2028aff98dfeb9a620f001
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 6.400838342165299e+17,
4
- "train_loss": 0.6831480086282725,
5
- "train_runtime": 1157.4221,
6
- "train_samples_per_second": 7.137,
7
- "train_steps_per_second": 3.568
8
  }
 
1
  {
2
+ "epoch": 2.5284450063211126,
3
+ "total_flos": 1.2397168498542428e+18,
4
+ "train_loss": 0.49920871353149415,
5
+ "train_runtime": 3192.0436,
6
+ "train_samples_per_second": 5.012,
7
+ "train_steps_per_second": 0.627
8
  }
trainer_state.json CHANGED
@@ -1,683 +1,347 @@
1
  {
2
- "best_metric": 0.6574238538742065,
3
- "best_model_checkpoint": "finetuned-fake-food/checkpoint-4000",
4
- "epoch": 10.0,
5
  "eval_steps": 100,
6
- "global_step": 4130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.24213075060532688,
13
- "grad_norm": 0.15737777948379517,
14
- "learning_rate": 0.00019515738498789345,
15
- "loss": 0.6977,
16
  "step": 100
17
  },
18
  {
19
- "epoch": 0.24213075060532688,
20
- "eval_accuracy": 0.5821917808219178,
21
- "eval_loss": 0.6804767847061157,
22
- "eval_runtime": 5.525,
23
- "eval_samples_per_second": 26.425,
24
- "eval_steps_per_second": 3.439,
25
  "step": 100
26
  },
27
  {
28
- "epoch": 0.48426150121065376,
29
- "grad_norm": 0.03558634594082832,
30
- "learning_rate": 0.00019031476997578695,
31
- "loss": 0.6956,
32
  "step": 200
33
  },
34
  {
35
- "epoch": 0.48426150121065376,
36
- "eval_accuracy": 0.4178082191780822,
37
- "eval_loss": 0.6935968399047852,
38
- "eval_runtime": 5.2465,
39
- "eval_samples_per_second": 27.828,
40
- "eval_steps_per_second": 3.621,
41
  "step": 200
42
  },
43
  {
44
- "epoch": 0.7263922518159807,
45
- "grad_norm": 1.5009288787841797,
46
- "learning_rate": 0.0001854721549636804,
47
- "loss": 0.6795,
48
  "step": 300
49
  },
50
  {
51
- "epoch": 0.7263922518159807,
52
- "eval_accuracy": 0.6506849315068494,
53
- "eval_loss": 0.6734184622764587,
54
- "eval_runtime": 5.9372,
55
- "eval_samples_per_second": 24.591,
56
- "eval_steps_per_second": 3.2,
57
  "step": 300
58
  },
59
  {
60
- "epoch": 0.9685230024213075,
61
- "grad_norm": 0.23967677354812622,
62
- "learning_rate": 0.00018062953995157384,
63
- "loss": 0.7061,
64
  "step": 400
65
  },
66
  {
67
- "epoch": 0.9685230024213075,
68
- "eval_accuracy": 0.5821917808219178,
69
- "eval_loss": 0.6760488152503967,
70
- "eval_runtime": 5.7531,
71
- "eval_samples_per_second": 25.378,
72
- "eval_steps_per_second": 3.303,
73
  "step": 400
74
  },
75
  {
76
- "epoch": 1.2106537530266344,
77
- "grad_norm": 2.081388235092163,
78
- "learning_rate": 0.00017578692493946732,
79
- "loss": 0.6941,
80
  "step": 500
81
  },
82
  {
83
- "epoch": 1.2106537530266344,
84
- "eval_accuracy": 0.5821917808219178,
85
- "eval_loss": 0.6746240854263306,
86
- "eval_runtime": 5.812,
87
- "eval_samples_per_second": 25.121,
88
- "eval_steps_per_second": 3.269,
89
  "step": 500
90
  },
91
  {
92
- "epoch": 1.4527845036319613,
93
- "grad_norm": 0.4285804331302643,
94
- "learning_rate": 0.0001709443099273608,
95
- "loss": 0.6898,
96
  "step": 600
97
  },
98
  {
99
- "epoch": 1.4527845036319613,
100
- "eval_accuracy": 0.6027397260273972,
101
- "eval_loss": 0.6674954891204834,
102
- "eval_runtime": 6.1547,
103
- "eval_samples_per_second": 23.722,
104
- "eval_steps_per_second": 3.087,
105
  "step": 600
106
  },
107
  {
108
- "epoch": 1.694915254237288,
109
- "grad_norm": 0.12052281200885773,
110
- "learning_rate": 0.00016610169491525423,
111
- "loss": 0.6956,
112
  "step": 700
113
  },
114
  {
115
- "epoch": 1.694915254237288,
116
- "eval_accuracy": 0.5753424657534246,
117
- "eval_loss": 0.684603750705719,
118
- "eval_runtime": 6.0144,
119
- "eval_samples_per_second": 24.275,
120
- "eval_steps_per_second": 3.159,
121
  "step": 700
122
  },
123
  {
124
- "epoch": 1.937046004842615,
125
- "grad_norm": 0.3585425913333893,
126
- "learning_rate": 0.0001612590799031477,
127
- "loss": 0.6847,
128
  "step": 800
129
  },
130
  {
131
- "epoch": 1.937046004842615,
132
- "eval_accuracy": 0.5821917808219178,
133
- "eval_loss": 0.6745873093605042,
134
- "eval_runtime": 5.8427,
135
- "eval_samples_per_second": 24.988,
136
- "eval_steps_per_second": 3.252,
137
  "step": 800
138
  },
139
  {
140
- "epoch": 2.179176755447942,
141
- "grad_norm": 1.2445541620254517,
142
- "learning_rate": 0.00015641646489104115,
143
- "loss": 0.6949,
144
  "step": 900
145
  },
146
  {
147
- "epoch": 2.179176755447942,
148
- "eval_accuracy": 0.589041095890411,
149
- "eval_loss": 0.6779718399047852,
150
- "eval_runtime": 4.8307,
151
- "eval_samples_per_second": 30.223,
152
- "eval_steps_per_second": 3.933,
153
  "step": 900
154
  },
155
  {
156
- "epoch": 2.4213075060532687,
157
- "grad_norm": 1.429865837097168,
158
- "learning_rate": 0.00015157384987893465,
159
- "loss": 0.703,
160
  "step": 1000
161
  },
162
  {
163
- "epoch": 2.4213075060532687,
164
- "eval_accuracy": 0.5753424657534246,
165
- "eval_loss": 0.6894732117652893,
166
- "eval_runtime": 5.0834,
167
- "eval_samples_per_second": 28.721,
168
- "eval_steps_per_second": 3.738,
169
  "step": 1000
170
  },
171
  {
172
- "epoch": 2.663438256658596,
173
- "grad_norm": 1.2485073804855347,
174
- "learning_rate": 0.0001467312348668281,
175
- "loss": 0.6851,
176
  "step": 1100
177
  },
178
  {
179
- "epoch": 2.663438256658596,
180
- "eval_accuracy": 0.5821917808219178,
181
- "eval_loss": 0.6741558909416199,
182
- "eval_runtime": 6.0858,
183
- "eval_samples_per_second": 23.99,
184
- "eval_steps_per_second": 3.122,
185
  "step": 1100
186
  },
187
  {
188
- "epoch": 2.9055690072639226,
189
- "grad_norm": 0.1997382789850235,
190
- "learning_rate": 0.00014188861985472154,
191
- "loss": 0.6878,
192
  "step": 1200
193
  },
194
  {
195
- "epoch": 2.9055690072639226,
196
- "eval_accuracy": 0.6301369863013698,
197
- "eval_loss": 0.674239456653595,
198
- "eval_runtime": 6.1499,
199
- "eval_samples_per_second": 23.74,
200
- "eval_steps_per_second": 3.089,
201
  "step": 1200
202
  },
203
  {
204
- "epoch": 3.1476997578692494,
205
- "grad_norm": 0.4324168860912323,
206
- "learning_rate": 0.00013704600484261504,
207
- "loss": 0.68,
208
  "step": 1300
209
  },
210
  {
211
- "epoch": 3.1476997578692494,
212
- "eval_accuracy": 0.5821917808219178,
213
- "eval_loss": 0.6712663173675537,
214
- "eval_runtime": 6.0266,
215
- "eval_samples_per_second": 24.226,
216
- "eval_steps_per_second": 3.153,
217
  "step": 1300
218
  },
219
  {
220
- "epoch": 3.389830508474576,
221
- "grad_norm": 0.9948041439056396,
222
- "learning_rate": 0.00013220338983050849,
223
- "loss": 0.6728,
224
  "step": 1400
225
  },
226
  {
227
- "epoch": 3.389830508474576,
228
- "eval_accuracy": 0.5958904109589042,
229
- "eval_loss": 0.6838211417198181,
230
- "eval_runtime": 6.1631,
231
- "eval_samples_per_second": 23.689,
232
- "eval_steps_per_second": 3.083,
233
  "step": 1400
234
  },
235
  {
236
- "epoch": 3.6319612590799033,
237
- "grad_norm": 1.2490299940109253,
238
- "learning_rate": 0.00012736077481840193,
239
- "loss": 0.698,
240
  "step": 1500
241
  },
242
  {
243
- "epoch": 3.6319612590799033,
244
- "eval_accuracy": 0.5821917808219178,
245
- "eval_loss": 0.6774668097496033,
246
- "eval_runtime": 6.0857,
247
- "eval_samples_per_second": 23.991,
248
- "eval_steps_per_second": 3.122,
249
  "step": 1500
250
  },
251
  {
252
- "epoch": 3.87409200968523,
253
- "grad_norm": 0.2908919155597687,
254
- "learning_rate": 0.0001225181598062954,
255
- "loss": 0.7033,
256
  "step": 1600
257
  },
258
  {
259
- "epoch": 3.87409200968523,
260
- "eval_accuracy": 0.5821917808219178,
261
- "eval_loss": 0.6734635829925537,
262
- "eval_runtime": 5.783,
263
- "eval_samples_per_second": 25.247,
264
- "eval_steps_per_second": 3.286,
265
  "step": 1600
266
  },
267
  {
268
- "epoch": 4.116222760290557,
269
- "grad_norm": 0.21098549664020538,
270
- "learning_rate": 0.00011767554479418887,
271
- "loss": 0.6973,
272
  "step": 1700
273
  },
274
  {
275
- "epoch": 4.116222760290557,
276
- "eval_accuracy": 0.6232876712328768,
277
- "eval_loss": 0.6803831458091736,
278
- "eval_runtime": 4.6265,
279
- "eval_samples_per_second": 31.557,
280
- "eval_steps_per_second": 4.107,
281
  "step": 1700
282
  },
283
  {
284
- "epoch": 4.358353510895884,
285
- "grad_norm": 0.03869936615228653,
286
- "learning_rate": 0.00011283292978208233,
287
- "loss": 0.6822,
288
  "step": 1800
289
  },
290
  {
291
- "epoch": 4.358353510895884,
292
- "eval_accuracy": 0.6027397260273972,
293
- "eval_loss": 0.6847726702690125,
294
- "eval_runtime": 4.6717,
295
- "eval_samples_per_second": 31.252,
296
- "eval_steps_per_second": 4.067,
297
  "step": 1800
298
  },
299
  {
300
- "epoch": 4.600484261501211,
301
- "grad_norm": 0.13196176290512085,
302
- "learning_rate": 0.00010799031476997579,
303
- "loss": 0.6896,
304
  "step": 1900
305
  },
306
  {
307
- "epoch": 4.600484261501211,
308
- "eval_accuracy": 0.541095890410959,
309
- "eval_loss": 0.6835151314735413,
310
- "eval_runtime": 5.191,
311
- "eval_samples_per_second": 28.126,
312
- "eval_steps_per_second": 3.66,
313
  "step": 1900
314
  },
315
  {
316
- "epoch": 4.842615012106537,
317
- "grad_norm": 0.5055987238883972,
318
- "learning_rate": 0.00010314769975786926,
319
- "loss": 0.6772,
320
  "step": 2000
321
  },
322
  {
323
- "epoch": 4.842615012106537,
324
- "eval_accuracy": 0.6095890410958904,
325
- "eval_loss": 0.6753013134002686,
326
- "eval_runtime": 6.1231,
327
- "eval_samples_per_second": 23.844,
328
- "eval_steps_per_second": 3.103,
329
  "step": 2000
330
  },
331
  {
332
- "epoch": 5.084745762711864,
333
- "grad_norm": 0.4209335148334503,
334
- "learning_rate": 9.835351089588378e-05,
335
- "loss": 0.6843,
336
- "step": 2100
337
- },
338
- {
339
- "epoch": 5.084745762711864,
340
- "eval_accuracy": 0.589041095890411,
341
- "eval_loss": 0.6667279601097107,
342
- "eval_runtime": 4.6413,
343
- "eval_samples_per_second": 31.457,
344
- "eval_steps_per_second": 4.094,
345
- "step": 2100
346
- },
347
- {
348
- "epoch": 5.326876513317191,
349
- "grad_norm": 1.4965670108795166,
350
- "learning_rate": 9.351089588377724e-05,
351
- "loss": 0.6898,
352
- "step": 2200
353
- },
354
- {
355
- "epoch": 5.326876513317191,
356
- "eval_accuracy": 0.5821917808219178,
357
- "eval_loss": 0.6725605726242065,
358
- "eval_runtime": 6.034,
359
- "eval_samples_per_second": 24.196,
360
- "eval_steps_per_second": 3.149,
361
- "step": 2200
362
- },
363
- {
364
- "epoch": 5.5690072639225185,
365
- "grad_norm": 0.4163062572479248,
366
- "learning_rate": 8.86682808716707e-05,
367
- "loss": 0.6868,
368
- "step": 2300
369
- },
370
- {
371
- "epoch": 5.5690072639225185,
372
- "eval_accuracy": 0.5616438356164384,
373
- "eval_loss": 0.6784049272537231,
374
- "eval_runtime": 6.0533,
375
- "eval_samples_per_second": 24.119,
376
- "eval_steps_per_second": 3.139,
377
- "step": 2300
378
- },
379
- {
380
- "epoch": 5.811138014527845,
381
- "grad_norm": 1.2287280559539795,
382
- "learning_rate": 8.382566585956417e-05,
383
- "loss": 0.6636,
384
- "step": 2400
385
- },
386
- {
387
- "epoch": 5.811138014527845,
388
- "eval_accuracy": 0.6301369863013698,
389
- "eval_loss": 0.6639688611030579,
390
- "eval_runtime": 5.3104,
391
- "eval_samples_per_second": 27.493,
392
- "eval_steps_per_second": 3.578,
393
- "step": 2400
394
- },
395
- {
396
- "epoch": 6.053268765133172,
397
- "grad_norm": 0.8932170867919922,
398
- "learning_rate": 7.898305084745763e-05,
399
- "loss": 0.6833,
400
- "step": 2500
401
- },
402
- {
403
- "epoch": 6.053268765133172,
404
- "eval_accuracy": 0.5136986301369864,
405
- "eval_loss": 0.676824688911438,
406
- "eval_runtime": 4.6631,
407
- "eval_samples_per_second": 31.309,
408
- "eval_steps_per_second": 4.074,
409
- "step": 2500
410
- },
411
- {
412
- "epoch": 6.295399515738499,
413
- "grad_norm": 1.1837154626846313,
414
- "learning_rate": 7.414043583535109e-05,
415
- "loss": 0.678,
416
- "step": 2600
417
- },
418
- {
419
- "epoch": 6.295399515738499,
420
- "eval_accuracy": 0.6232876712328768,
421
- "eval_loss": 0.6652230024337769,
422
- "eval_runtime": 4.933,
423
- "eval_samples_per_second": 29.597,
424
- "eval_steps_per_second": 3.852,
425
- "step": 2600
426
- },
427
- {
428
- "epoch": 6.5375302663438255,
429
- "grad_norm": 1.4030615091323853,
430
- "learning_rate": 6.929782082324455e-05,
431
- "loss": 0.6672,
432
- "step": 2700
433
- },
434
- {
435
- "epoch": 6.5375302663438255,
436
- "eval_accuracy": 0.547945205479452,
437
- "eval_loss": 0.6735221147537231,
438
- "eval_runtime": 6.0106,
439
- "eval_samples_per_second": 24.29,
440
- "eval_steps_per_second": 3.161,
441
- "step": 2700
442
- },
443
- {
444
- "epoch": 6.779661016949152,
445
- "grad_norm": 0.6782599687576294,
446
- "learning_rate": 6.445520581113802e-05,
447
- "loss": 0.6975,
448
- "step": 2800
449
- },
450
- {
451
- "epoch": 6.779661016949152,
452
- "eval_accuracy": 0.589041095890411,
453
- "eval_loss": 0.6686810851097107,
454
- "eval_runtime": 4.7582,
455
- "eval_samples_per_second": 30.684,
456
- "eval_steps_per_second": 3.993,
457
- "step": 2800
458
- },
459
- {
460
- "epoch": 7.021791767554479,
461
- "grad_norm": 0.4288092255592346,
462
- "learning_rate": 5.961259079903147e-05,
463
- "loss": 0.6858,
464
- "step": 2900
465
- },
466
- {
467
- "epoch": 7.021791767554479,
468
- "eval_accuracy": 0.6027397260273972,
469
- "eval_loss": 0.6672346591949463,
470
- "eval_runtime": 4.7612,
471
- "eval_samples_per_second": 30.665,
472
- "eval_steps_per_second": 3.991,
473
- "step": 2900
474
- },
475
- {
476
- "epoch": 7.263922518159807,
477
- "grad_norm": 1.373633861541748,
478
- "learning_rate": 5.4769975786924946e-05,
479
- "loss": 0.6687,
480
- "step": 3000
481
- },
482
- {
483
- "epoch": 7.263922518159807,
484
- "eval_accuracy": 0.5753424657534246,
485
- "eval_loss": 0.6647915840148926,
486
- "eval_runtime": 4.681,
487
- "eval_samples_per_second": 31.19,
488
- "eval_steps_per_second": 4.059,
489
- "step": 3000
490
- },
491
- {
492
- "epoch": 7.506053268765133,
493
- "grad_norm": 0.4883480668067932,
494
- "learning_rate": 4.9927360774818404e-05,
495
- "loss": 0.6636,
496
- "step": 3100
497
- },
498
- {
499
- "epoch": 7.506053268765133,
500
- "eval_accuracy": 0.5684931506849316,
501
- "eval_loss": 0.6673935055732727,
502
- "eval_runtime": 4.6832,
503
- "eval_samples_per_second": 31.175,
504
- "eval_steps_per_second": 4.057,
505
- "step": 3100
506
- },
507
- {
508
- "epoch": 7.74818401937046,
509
- "grad_norm": 0.2553524672985077,
510
- "learning_rate": 4.508474576271187e-05,
511
- "loss": 0.6904,
512
- "step": 3200
513
- },
514
- {
515
- "epoch": 7.74818401937046,
516
- "eval_accuracy": 0.5342465753424658,
517
- "eval_loss": 0.6751775741577148,
518
- "eval_runtime": 4.7128,
519
- "eval_samples_per_second": 30.979,
520
- "eval_steps_per_second": 4.032,
521
- "step": 3200
522
- },
523
- {
524
- "epoch": 7.990314769975787,
525
- "grad_norm": 0.5203524827957153,
526
- "learning_rate": 4.024213075060533e-05,
527
- "loss": 0.6585,
528
- "step": 3300
529
- },
530
- {
531
- "epoch": 7.990314769975787,
532
- "eval_accuracy": 0.5958904109589042,
533
- "eval_loss": 0.7023173570632935,
534
- "eval_runtime": 5.9291,
535
- "eval_samples_per_second": 24.624,
536
- "eval_steps_per_second": 3.205,
537
- "step": 3300
538
- },
539
- {
540
- "epoch": 8.232445520581114,
541
- "grad_norm": 1.3221914768218994,
542
- "learning_rate": 3.539951573849879e-05,
543
- "loss": 0.6874,
544
- "step": 3400
545
- },
546
- {
547
- "epoch": 8.232445520581114,
548
- "eval_accuracy": 0.5753424657534246,
549
- "eval_loss": 0.6615224480628967,
550
- "eval_runtime": 5.9877,
551
- "eval_samples_per_second": 24.383,
552
- "eval_steps_per_second": 3.173,
553
- "step": 3400
554
- },
555
- {
556
- "epoch": 8.474576271186441,
557
- "grad_norm": 0.6332941651344299,
558
- "learning_rate": 3.055690072639225e-05,
559
- "loss": 0.6444,
560
- "step": 3500
561
- },
562
- {
563
- "epoch": 8.474576271186441,
564
- "eval_accuracy": 0.5205479452054794,
565
- "eval_loss": 0.772119402885437,
566
- "eval_runtime": 5.8976,
567
- "eval_samples_per_second": 24.756,
568
- "eval_steps_per_second": 3.222,
569
- "step": 3500
570
- },
571
- {
572
- "epoch": 8.716707021791768,
573
- "grad_norm": 1.8709771633148193,
574
- "learning_rate": 2.5714285714285714e-05,
575
- "loss": 0.6803,
576
- "step": 3600
577
- },
578
- {
579
- "epoch": 8.716707021791768,
580
- "eval_accuracy": 0.5821917808219178,
581
- "eval_loss": 0.6808822751045227,
582
- "eval_runtime": 4.743,
583
- "eval_samples_per_second": 30.782,
584
- "eval_steps_per_second": 4.006,
585
- "step": 3600
586
- },
587
- {
588
- "epoch": 8.958837772397095,
589
- "grad_norm": 1.1251460313796997,
590
- "learning_rate": 2.0871670702179177e-05,
591
- "loss": 0.6782,
592
- "step": 3700
593
- },
594
- {
595
- "epoch": 8.958837772397095,
596
- "eval_accuracy": 0.5821917808219178,
597
- "eval_loss": 0.6638409495353699,
598
- "eval_runtime": 5.3184,
599
- "eval_samples_per_second": 27.452,
600
- "eval_steps_per_second": 3.573,
601
- "step": 3700
602
- },
603
- {
604
- "epoch": 9.200968523002421,
605
- "grad_norm": 0.5918538570404053,
606
- "learning_rate": 1.602905569007264e-05,
607
- "loss": 0.6536,
608
- "step": 3800
609
- },
610
- {
611
- "epoch": 9.200968523002421,
612
- "eval_accuracy": 0.6232876712328768,
613
- "eval_loss": 0.6606671214103699,
614
- "eval_runtime": 4.6821,
615
- "eval_samples_per_second": 31.183,
616
- "eval_steps_per_second": 4.058,
617
- "step": 3800
618
- },
619
- {
620
- "epoch": 9.443099273607748,
621
- "grad_norm": 1.1931524276733398,
622
- "learning_rate": 1.1186440677966102e-05,
623
- "loss": 0.6188,
624
- "step": 3900
625
- },
626
- {
627
- "epoch": 9.443099273607748,
628
- "eval_accuracy": 0.5684931506849316,
629
- "eval_loss": 0.7090215682983398,
630
- "eval_runtime": 4.6915,
631
- "eval_samples_per_second": 31.12,
632
- "eval_steps_per_second": 4.05,
633
- "step": 3900
634
- },
635
- {
636
- "epoch": 9.685230024213075,
637
- "grad_norm": 2.177264928817749,
638
- "learning_rate": 6.3438256658595635e-06,
639
- "loss": 0.7026,
640
- "step": 4000
641
- },
642
- {
643
- "epoch": 9.685230024213075,
644
- "eval_accuracy": 0.6164383561643836,
645
- "eval_loss": 0.6574238538742065,
646
- "eval_runtime": 4.7502,
647
- "eval_samples_per_second": 30.736,
648
- "eval_steps_per_second": 4.0,
649
- "step": 4000
650
- },
651
- {
652
- "epoch": 9.927360774818402,
653
- "grad_norm": 0.5541784167289734,
654
- "learning_rate": 1.549636803874092e-06,
655
- "loss": 0.7008,
656
- "step": 4100
657
- },
658
- {
659
- "epoch": 9.927360774818402,
660
- "eval_accuracy": 0.6095890410958904,
661
- "eval_loss": 0.6576805710792542,
662
- "eval_runtime": 4.7337,
663
- "eval_samples_per_second": 30.843,
664
- "eval_steps_per_second": 4.014,
665
- "step": 4100
666
- },
667
- {
668
- "epoch": 10.0,
669
- "step": 4130,
670
- "total_flos": 6.400838342165299e+17,
671
- "train_loss": 0.6831480086282725,
672
- "train_runtime": 1157.4221,
673
- "train_samples_per_second": 7.137,
674
- "train_steps_per_second": 3.568
675
  }
676
  ],
677
  "logging_steps": 100,
678
- "max_steps": 4130,
679
  "num_input_tokens_seen": 0,
680
- "num_train_epochs": 10,
681
  "save_steps": 100,
682
  "stateful_callbacks": {
683
  "TrainerControl": {
@@ -691,8 +355,8 @@
691
  "attributes": {}
692
  }
693
  },
694
- "total_flos": 6.400838342165299e+17,
695
- "train_batch_size": 2,
696
  "trial_name": null,
697
  "trial_params": null
698
  }
 
1
  {
2
+ "best_metric": 0.3198860287666321,
3
+ "best_model_checkpoint": "finetuned-fake-food/checkpoint-1800",
4
+ "epoch": 2.5284450063211126,
5
  "eval_steps": 100,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.1264222503160556,
13
+ "grad_norm": 5.020864009857178,
14
+ "learning_rate": 0.00019,
15
+ "loss": 0.5416,
16
  "step": 100
17
  },
18
  {
19
+ "epoch": 0.1264222503160556,
20
+ "eval_accuracy": 0.7081468218442256,
21
+ "eval_loss": 0.5592844486236572,
22
+ "eval_runtime": 75.3793,
23
+ "eval_samples_per_second": 14.818,
24
+ "eval_steps_per_second": 1.857,
25
  "step": 100
26
  },
27
  {
28
+ "epoch": 0.2528445006321112,
29
+ "grad_norm": 3.241377115249634,
30
+ "learning_rate": 0.00018,
31
+ "loss": 0.5299,
32
  "step": 200
33
  },
34
  {
35
+ "epoch": 0.2528445006321112,
36
+ "eval_accuracy": 0.7421665174574754,
37
+ "eval_loss": 0.5342246294021606,
38
+ "eval_runtime": 75.6464,
39
+ "eval_samples_per_second": 14.766,
40
+ "eval_steps_per_second": 1.851,
41
  "step": 200
42
  },
43
  {
44
+ "epoch": 0.37926675094816686,
45
+ "grad_norm": 3.0553033351898193,
46
+ "learning_rate": 0.00017,
47
+ "loss": 0.5503,
48
  "step": 300
49
  },
50
  {
51
+ "epoch": 0.37926675094816686,
52
+ "eval_accuracy": 0.7717099373321397,
53
+ "eval_loss": 0.48751309514045715,
54
+ "eval_runtime": 75.98,
55
+ "eval_samples_per_second": 14.701,
56
+ "eval_steps_per_second": 1.843,
57
  "step": 300
58
  },
59
  {
60
+ "epoch": 0.5056890012642224,
61
+ "grad_norm": 2.0104384422302246,
62
+ "learning_rate": 0.00016,
63
+ "loss": 0.5561,
64
  "step": 400
65
  },
66
  {
67
+ "epoch": 0.5056890012642224,
68
+ "eval_accuracy": 0.7940913160250671,
69
+ "eval_loss": 0.4621775448322296,
70
+ "eval_runtime": 76.6055,
71
+ "eval_samples_per_second": 14.581,
72
+ "eval_steps_per_second": 1.828,
73
  "step": 400
74
  },
75
  {
76
+ "epoch": 0.6321112515802781,
77
+ "grad_norm": 3.4203433990478516,
78
+ "learning_rate": 0.00015000000000000001,
79
+ "loss": 0.5581,
80
  "step": 500
81
  },
82
  {
83
+ "epoch": 0.6321112515802781,
84
+ "eval_accuracy": 0.7457475380483438,
85
+ "eval_loss": 0.5501323342323303,
86
+ "eval_runtime": 75.3664,
87
+ "eval_samples_per_second": 14.821,
88
+ "eval_steps_per_second": 1.858,
89
  "step": 500
90
  },
91
  {
92
+ "epoch": 0.7585335018963337,
93
+ "grad_norm": 1.4952611923217773,
94
+ "learning_rate": 0.00014,
95
+ "loss": 0.5845,
96
  "step": 600
97
  },
98
  {
99
+ "epoch": 0.7585335018963337,
100
+ "eval_accuracy": 0.747538048343778,
101
+ "eval_loss": 0.5088097453117371,
102
+ "eval_runtime": 76.5056,
103
+ "eval_samples_per_second": 14.6,
104
+ "eval_steps_per_second": 1.83,
105
  "step": 600
106
  },
107
  {
108
+ "epoch": 0.8849557522123894,
109
+ "grad_norm": 1.8074195384979248,
110
+ "learning_rate": 0.00013000000000000002,
111
+ "loss": 0.5695,
112
  "step": 700
113
  },
114
  {
115
+ "epoch": 0.8849557522123894,
116
+ "eval_accuracy": 0.7860340196956133,
117
+ "eval_loss": 0.4740249812602997,
118
+ "eval_runtime": 76.9715,
119
+ "eval_samples_per_second": 14.512,
120
+ "eval_steps_per_second": 1.819,
121
  "step": 700
122
  },
123
  {
124
+ "epoch": 1.011378002528445,
125
+ "grad_norm": 1.2785142660140991,
126
+ "learning_rate": 0.00012,
127
+ "loss": 0.5406,
128
  "step": 800
129
  },
130
  {
131
+ "epoch": 1.011378002528445,
132
+ "eval_accuracy": 0.7815577439570277,
133
+ "eval_loss": 0.4855746030807495,
134
+ "eval_runtime": 76.7685,
135
+ "eval_samples_per_second": 14.55,
136
+ "eval_steps_per_second": 1.824,
137
  "step": 800
138
  },
139
  {
140
+ "epoch": 1.1378002528445006,
141
+ "grad_norm": 1.3373093605041504,
142
+ "learning_rate": 0.00011000000000000002,
143
+ "loss": 0.5353,
144
  "step": 900
145
  },
146
  {
147
+ "epoch": 1.1378002528445006,
148
+ "eval_accuracy": 0.8155774395702775,
149
+ "eval_loss": 0.4251798987388611,
150
+ "eval_runtime": 75.415,
151
+ "eval_samples_per_second": 14.811,
152
+ "eval_steps_per_second": 1.856,
153
  "step": 900
154
  },
155
  {
156
+ "epoch": 1.2642225031605563,
157
+ "grad_norm": 2.4060959815979004,
158
+ "learning_rate": 0.0001,
159
+ "loss": 0.5345,
160
  "step": 1000
161
  },
162
  {
163
+ "epoch": 1.2642225031605563,
164
+ "eval_accuracy": 0.7761862130707251,
165
+ "eval_loss": 0.50136399269104,
166
+ "eval_runtime": 75.9241,
167
+ "eval_samples_per_second": 14.712,
168
+ "eval_steps_per_second": 1.844,
169
  "step": 1000
170
  },
171
  {
172
+ "epoch": 1.3906447534766118,
173
+ "grad_norm": 1.6286314725875854,
174
+ "learning_rate": 9e-05,
175
+ "loss": 0.5105,
176
  "step": 1100
177
  },
178
  {
179
+ "epoch": 1.3906447534766118,
180
+ "eval_accuracy": 0.7860340196956133,
181
+ "eval_loss": 0.48000478744506836,
182
+ "eval_runtime": 75.3515,
183
+ "eval_samples_per_second": 14.824,
184
+ "eval_steps_per_second": 1.858,
185
  "step": 1100
186
  },
187
  {
188
+ "epoch": 1.5170670037926675,
189
+ "grad_norm": 2.462752103805542,
190
+ "learning_rate": 8e-05,
191
+ "loss": 0.5266,
192
  "step": 1200
193
  },
194
  {
195
+ "epoch": 1.5170670037926675,
196
+ "eval_accuracy": 0.7958818263205013,
197
+ "eval_loss": 0.4617547392845154,
198
+ "eval_runtime": 75.1188,
199
+ "eval_samples_per_second": 14.87,
200
+ "eval_steps_per_second": 1.864,
201
  "step": 1200
202
  },
203
  {
204
+ "epoch": 1.6434892541087232,
205
+ "grad_norm": 2.6984634399414062,
206
+ "learning_rate": 7e-05,
207
+ "loss": 0.4709,
208
  "step": 1300
209
  },
210
  {
211
+ "epoch": 1.6434892541087232,
212
+ "eval_accuracy": 0.8281110116383169,
213
+ "eval_loss": 0.39056020975112915,
214
+ "eval_runtime": 74.801,
215
+ "eval_samples_per_second": 14.933,
216
+ "eval_steps_per_second": 1.872,
217
  "step": 1300
218
  },
219
  {
220
+ "epoch": 1.7699115044247788,
221
+ "grad_norm": 2.939568281173706,
222
+ "learning_rate": 6e-05,
223
+ "loss": 0.4624,
224
  "step": 1400
225
  },
226
  {
227
+ "epoch": 1.7699115044247788,
228
+ "eval_accuracy": 0.8128916741271263,
229
+ "eval_loss": 0.4208226203918457,
230
+ "eval_runtime": 77.4109,
231
+ "eval_samples_per_second": 14.429,
232
+ "eval_steps_per_second": 1.809,
233
  "step": 1400
234
  },
235
  {
236
+ "epoch": 1.8963337547408345,
237
+ "grad_norm": 1.791272759437561,
238
+ "learning_rate": 5e-05,
239
+ "loss": 0.4677,
240
  "step": 1500
241
  },
242
  {
243
+ "epoch": 1.8963337547408345,
244
+ "eval_accuracy": 0.8173679498657117,
245
+ "eval_loss": 0.4207296073436737,
246
+ "eval_runtime": 76.4178,
247
+ "eval_samples_per_second": 14.617,
248
+ "eval_steps_per_second": 1.832,
249
  "step": 1500
250
  },
251
  {
252
+ "epoch": 2.02275600505689,
253
+ "grad_norm": 1.7240327596664429,
254
+ "learning_rate": 4e-05,
255
+ "loss": 0.4478,
256
  "step": 1600
257
  },
258
  {
259
+ "epoch": 2.02275600505689,
260
+ "eval_accuracy": 0.8478066248880931,
261
+ "eval_loss": 0.35574597120285034,
262
+ "eval_runtime": 75.4802,
263
+ "eval_samples_per_second": 14.799,
264
+ "eval_steps_per_second": 1.855,
265
  "step": 1600
266
  },
267
  {
268
+ "epoch": 2.1491782553729455,
269
+ "grad_norm": 3.029090642929077,
270
+ "learning_rate": 3e-05,
271
+ "loss": 0.4451,
272
  "step": 1700
273
  },
274
  {
275
+ "epoch": 2.1491782553729455,
276
+ "eval_accuracy": 0.8442256042972247,
277
+ "eval_loss": 0.3545984923839569,
278
+ "eval_runtime": 75.7957,
279
+ "eval_samples_per_second": 14.737,
280
+ "eval_steps_per_second": 1.847,
281
  "step": 1700
282
  },
283
  {
284
+ "epoch": 2.275600505689001,
285
+ "grad_norm": 2.259213447570801,
286
+ "learning_rate": 2e-05,
287
+ "loss": 0.3796,
288
  "step": 1800
289
  },
290
  {
291
+ "epoch": 2.275600505689001,
292
+ "eval_accuracy": 0.8719785138764548,
293
+ "eval_loss": 0.3198860287666321,
294
+ "eval_runtime": 74.5384,
295
+ "eval_samples_per_second": 14.986,
296
+ "eval_steps_per_second": 1.878,
297
  "step": 1800
298
  },
299
  {
300
+ "epoch": 2.402022756005057,
301
+ "grad_norm": 2.9328560829162598,
302
+ "learning_rate": 1e-05,
303
+ "loss": 0.4358,
304
  "step": 1900
305
  },
306
  {
307
+ "epoch": 2.402022756005057,
308
+ "eval_accuracy": 0.8603401969561325,
309
+ "eval_loss": 0.33084791898727417,
310
+ "eval_runtime": 76.0815,
311
+ "eval_samples_per_second": 14.682,
312
+ "eval_steps_per_second": 1.84,
313
  "step": 1900
314
  },
315
  {
316
+ "epoch": 2.5284450063211126,
317
+ "grad_norm": 1.4755433797836304,
318
+ "learning_rate": 0.0,
319
+ "loss": 0.3373,
320
  "step": 2000
321
  },
322
  {
323
+ "epoch": 2.5284450063211126,
324
+ "eval_accuracy": 0.8540734109221128,
325
+ "eval_loss": 0.34551766514778137,
326
+ "eval_runtime": 75.3964,
327
+ "eval_samples_per_second": 14.815,
328
+ "eval_steps_per_second": 1.857,
329
  "step": 2000
330
  },
331
  {
332
+ "epoch": 2.5284450063211126,
333
+ "step": 2000,
334
+ "total_flos": 1.2397168498542428e+18,
335
+ "train_loss": 0.49920871353149415,
336
+ "train_runtime": 3192.0436,
337
+ "train_samples_per_second": 5.012,
338
+ "train_steps_per_second": 0.627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  }
340
  ],
341
  "logging_steps": 100,
342
+ "max_steps": 2000,
343
  "num_input_tokens_seen": 0,
344
+ "num_train_epochs": 3,
345
  "save_steps": 100,
346
  "stateful_callbacks": {
347
  "TrainerControl": {
 
355
  "attributes": {}
356
  }
357
  },
358
+ "total_flos": 1.2397168498542428e+18,
359
+ "train_batch_size": 8,
360
  "trial_name": null,
361
  "trial_params": null
362
  }