|
{ |
|
"best_metric": 0.45239612460136414, |
|
"best_model_checkpoint": "vit-base-food-items-v1/checkpoint-300", |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 608, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06578947368421052, |
|
"grad_norm": 0.04839174449443817, |
|
"learning_rate": 0.00019671052631578949, |
|
"loss": 0.0259, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13157894736842105, |
|
"grad_norm": 5.086187362670898, |
|
"learning_rate": 0.00019342105263157894, |
|
"loss": 0.0743, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19736842105263158, |
|
"grad_norm": 8.687716484069824, |
|
"learning_rate": 0.00019013157894736844, |
|
"loss": 0.0621, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 0.0554538369178772, |
|
"learning_rate": 0.00018684210526315792, |
|
"loss": 0.1584, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32894736842105265, |
|
"grad_norm": 7.25691556930542, |
|
"learning_rate": 0.00018355263157894736, |
|
"loss": 0.0284, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.39473684210526316, |
|
"grad_norm": 0.0355791412293911, |
|
"learning_rate": 0.00018026315789473684, |
|
"loss": 0.1607, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4605263157894737, |
|
"grad_norm": 6.474045276641846, |
|
"learning_rate": 0.00017697368421052632, |
|
"loss": 0.2034, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.047177255153656006, |
|
"learning_rate": 0.0001736842105263158, |
|
"loss": 0.1755, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5921052631578947, |
|
"grad_norm": 7.999953269958496, |
|
"learning_rate": 0.00017039473684210527, |
|
"loss": 0.078, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6578947368421053, |
|
"grad_norm": 0.2906012237071991, |
|
"learning_rate": 0.00016710526315789475, |
|
"loss": 0.1773, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6578947368421053, |
|
"eval_accuracy": 0.8472727272727273, |
|
"eval_loss": 0.7279737591743469, |
|
"eval_runtime": 6.7097, |
|
"eval_samples_per_second": 81.971, |
|
"eval_steps_per_second": 10.284, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7236842105263158, |
|
"grad_norm": 0.038031741976737976, |
|
"learning_rate": 0.00016381578947368422, |
|
"loss": 0.1011, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7894736842105263, |
|
"grad_norm": 0.8751915097236633, |
|
"learning_rate": 0.0001605263157894737, |
|
"loss": 0.1059, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8552631578947368, |
|
"grad_norm": 0.08943302929401398, |
|
"learning_rate": 0.00015723684210526318, |
|
"loss": 0.0334, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9210526315789473, |
|
"grad_norm": 0.17175784707069397, |
|
"learning_rate": 0.00015394736842105265, |
|
"loss": 0.1515, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9868421052631579, |
|
"grad_norm": 0.053591687232255936, |
|
"learning_rate": 0.0001506578947368421, |
|
"loss": 0.1301, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.026137366890907288, |
|
"learning_rate": 0.00014736842105263158, |
|
"loss": 0.0102, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.118421052631579, |
|
"grad_norm": 0.09105370193719864, |
|
"learning_rate": 0.00014407894736842106, |
|
"loss": 0.0066, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1842105263157894, |
|
"grad_norm": 0.050408605486154556, |
|
"learning_rate": 0.00014078947368421053, |
|
"loss": 0.0679, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.051493316888809204, |
|
"learning_rate": 0.0001375, |
|
"loss": 0.007, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.3157894736842106, |
|
"grad_norm": 0.023582015186548233, |
|
"learning_rate": 0.00013421052631578948, |
|
"loss": 0.0589, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3157894736842106, |
|
"eval_accuracy": 0.8872727272727273, |
|
"eval_loss": 0.5529205203056335, |
|
"eval_runtime": 5.9487, |
|
"eval_samples_per_second": 92.458, |
|
"eval_steps_per_second": 11.599, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.381578947368421, |
|
"grad_norm": 0.0221235528588295, |
|
"learning_rate": 0.00013092105263157893, |
|
"loss": 0.0046, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4473684210526316, |
|
"grad_norm": 6.497156620025635, |
|
"learning_rate": 0.00012763157894736844, |
|
"loss": 0.0086, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.513157894736842, |
|
"grad_norm": 0.013416736386716366, |
|
"learning_rate": 0.00012434210526315791, |
|
"loss": 0.0042, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.012088390998542309, |
|
"learning_rate": 0.00012105263157894738, |
|
"loss": 0.1094, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6447368421052633, |
|
"grad_norm": 7.198599338531494, |
|
"learning_rate": 0.00011776315789473684, |
|
"loss": 0.045, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.7105263157894737, |
|
"grad_norm": 0.031135905534029007, |
|
"learning_rate": 0.00011447368421052632, |
|
"loss": 0.0331, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.776315789473684, |
|
"grad_norm": 0.07299932837486267, |
|
"learning_rate": 0.0001111842105263158, |
|
"loss": 0.0535, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.8421052631578947, |
|
"grad_norm": 0.02018345519900322, |
|
"learning_rate": 0.00010789473684210527, |
|
"loss": 0.0241, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.9078947368421053, |
|
"grad_norm": 0.012926718220114708, |
|
"learning_rate": 0.00010460526315789475, |
|
"loss": 0.033, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.973684210526316, |
|
"grad_norm": 8.804197311401367, |
|
"learning_rate": 0.00010131578947368421, |
|
"loss": 0.043, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.973684210526316, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.45239612460136414, |
|
"eval_runtime": 5.7174, |
|
"eval_samples_per_second": 96.197, |
|
"eval_steps_per_second": 12.068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.039473684210526, |
|
"grad_norm": 21.062307357788086, |
|
"learning_rate": 9.802631578947369e-05, |
|
"loss": 0.0327, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.009257642552256584, |
|
"learning_rate": 9.473684210526316e-05, |
|
"loss": 0.0151, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.1710526315789473, |
|
"grad_norm": 0.014151917770504951, |
|
"learning_rate": 9.144736842105264e-05, |
|
"loss": 0.0248, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.236842105263158, |
|
"grad_norm": 0.013802828267216682, |
|
"learning_rate": 8.81578947368421e-05, |
|
"loss": 0.003, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.3026315789473686, |
|
"grad_norm": 0.014456182718276978, |
|
"learning_rate": 8.486842105263159e-05, |
|
"loss": 0.0035, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3684210526315788, |
|
"grad_norm": 0.006758903618901968, |
|
"learning_rate": 8.157894736842105e-05, |
|
"loss": 0.0024, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.4342105263157894, |
|
"grad_norm": 0.009314753115177155, |
|
"learning_rate": 7.828947368421053e-05, |
|
"loss": 0.0024, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.006471664644777775, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0022, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.5657894736842106, |
|
"grad_norm": 0.013896413147449493, |
|
"learning_rate": 7.171052631578947e-05, |
|
"loss": 0.0023, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.009549788199365139, |
|
"learning_rate": 6.842105263157895e-05, |
|
"loss": 0.0022, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"eval_accuracy": 0.8909090909090909, |
|
"eval_loss": 0.5150398015975952, |
|
"eval_runtime": 6.2356, |
|
"eval_samples_per_second": 88.203, |
|
"eval_steps_per_second": 11.065, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6973684210526314, |
|
"grad_norm": 0.00833881739526987, |
|
"learning_rate": 6.513157894736842e-05, |
|
"loss": 0.0024, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.763157894736842, |
|
"grad_norm": 0.006957135163247585, |
|
"learning_rate": 6.18421052631579e-05, |
|
"loss": 0.0021, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.8289473684210527, |
|
"grad_norm": 0.006556599400937557, |
|
"learning_rate": 5.855263157894737e-05, |
|
"loss": 0.0021, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.8947368421052633, |
|
"grad_norm": 0.007122657261788845, |
|
"learning_rate": 5.526315789473685e-05, |
|
"loss": 0.0021, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.9605263157894735, |
|
"grad_norm": 0.0069893728941679, |
|
"learning_rate": 5.197368421052632e-05, |
|
"loss": 0.002, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.026315789473684, |
|
"grad_norm": 0.006159682292491198, |
|
"learning_rate": 4.868421052631579e-05, |
|
"loss": 0.002, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.0921052631578947, |
|
"grad_norm": 0.0069947754964232445, |
|
"learning_rate": 4.539473684210527e-05, |
|
"loss": 0.0019, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 0.007419601548463106, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.0018, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.223684210526316, |
|
"grad_norm": 0.006330096162855625, |
|
"learning_rate": 3.8815789473684214e-05, |
|
"loss": 0.0018, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.2894736842105265, |
|
"grad_norm": 0.006105512380599976, |
|
"learning_rate": 3.5526315789473684e-05, |
|
"loss": 0.0018, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.2894736842105265, |
|
"eval_accuracy": 0.9018181818181819, |
|
"eval_loss": 0.49247637391090393, |
|
"eval_runtime": 6.5136, |
|
"eval_samples_per_second": 84.439, |
|
"eval_steps_per_second": 10.593, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.3552631578947367, |
|
"grad_norm": 0.006337973289191723, |
|
"learning_rate": 3.223684210526316e-05, |
|
"loss": 0.0018, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.4210526315789473, |
|
"grad_norm": 0.005863433238118887, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 0.0018, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.486842105263158, |
|
"grad_norm": 0.0057103936560451984, |
|
"learning_rate": 2.565789473684211e-05, |
|
"loss": 0.0017, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.5526315789473686, |
|
"grad_norm": 0.004713858477771282, |
|
"learning_rate": 2.236842105263158e-05, |
|
"loss": 0.0018, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.6184210526315788, |
|
"grad_norm": 0.007430619560182095, |
|
"learning_rate": 1.9078947368421056e-05, |
|
"loss": 0.0017, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.6842105263157894, |
|
"grad_norm": 0.0051925876177847385, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.0018, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.0064788335002958775, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0017, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.8157894736842106, |
|
"grad_norm": 0.006365407258272171, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 0.0017, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.8815789473684212, |
|
"grad_norm": 0.005164624657481909, |
|
"learning_rate": 5.921052631578948e-06, |
|
"loss": 0.0018, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.9473684210526314, |
|
"grad_norm": 0.006292811129242182, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.0017, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.9473684210526314, |
|
"eval_accuracy": 0.9018181818181819, |
|
"eval_loss": 0.4941176474094391, |
|
"eval_runtime": 6.4553, |
|
"eval_samples_per_second": 85.201, |
|
"eval_steps_per_second": 10.689, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 608, |
|
"total_flos": 7.501829674622976e+17, |
|
"train_loss": 0.03790271527280933, |
|
"train_runtime": 250.8529, |
|
"train_samples_per_second": 38.588, |
|
"train_steps_per_second": 2.424 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 608, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.501829674622976e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|