|
{ |
|
"best_metric": 0.9302325581395349, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-da-40B/checkpoint-107", |
|
"epoch": 36.92307692307692, |
|
"eval_steps": 500, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.32558139534883723, |
|
"eval_loss": 1.4669076204299927, |
|
"eval_runtime": 0.6877, |
|
"eval_samples_per_second": 62.528, |
|
"eval_steps_per_second": 2.908, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.4418604651162791, |
|
"eval_loss": 1.26887047290802, |
|
"eval_runtime": 0.661, |
|
"eval_samples_per_second": 65.049, |
|
"eval_steps_per_second": 3.026, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.46511627906976744, |
|
"eval_loss": 1.1591116189956665, |
|
"eval_runtime": 0.629, |
|
"eval_samples_per_second": 68.363, |
|
"eval_steps_per_second": 3.18, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.3901, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5813953488372093, |
|
"eval_loss": 0.977810800075531, |
|
"eval_runtime": 0.6189, |
|
"eval_samples_per_second": 69.481, |
|
"eval_steps_per_second": 3.232, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_accuracy": 0.6511627906976745, |
|
"eval_loss": 0.8885109424591064, |
|
"eval_runtime": 0.5951, |
|
"eval_samples_per_second": 72.259, |
|
"eval_steps_per_second": 3.361, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_accuracy": 0.6511627906976745, |
|
"eval_loss": 0.7884993553161621, |
|
"eval_runtime": 0.6522, |
|
"eval_samples_per_second": 65.934, |
|
"eval_steps_per_second": 3.067, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.9794, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.6853659152984619, |
|
"eval_runtime": 0.675, |
|
"eval_samples_per_second": 63.705, |
|
"eval_steps_per_second": 2.963, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7674418604651163, |
|
"eval_loss": 0.5821540951728821, |
|
"eval_runtime": 0.6249, |
|
"eval_samples_per_second": 68.812, |
|
"eval_steps_per_second": 3.201, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.492933064699173, |
|
"eval_runtime": 0.6176, |
|
"eval_samples_per_second": 69.619, |
|
"eval_steps_per_second": 3.238, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6573, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.4822414219379425, |
|
"eval_runtime": 0.6106, |
|
"eval_samples_per_second": 70.422, |
|
"eval_steps_per_second": 3.275, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"eval_accuracy": 0.8372093023255814, |
|
"eval_loss": 0.45293128490448, |
|
"eval_runtime": 0.6231, |
|
"eval_samples_per_second": 69.013, |
|
"eval_steps_per_second": 3.21, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7906976744186046, |
|
"eval_loss": 0.42034050822257996, |
|
"eval_runtime": 0.6472, |
|
"eval_samples_per_second": 66.442, |
|
"eval_steps_per_second": 3.09, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.4166, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.38893207907676697, |
|
"eval_runtime": 0.6646, |
|
"eval_samples_per_second": 64.703, |
|
"eval_steps_per_second": 3.009, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.36970528960227966, |
|
"eval_runtime": 0.6738, |
|
"eval_samples_per_second": 63.818, |
|
"eval_steps_per_second": 2.968, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"eval_accuracy": 0.813953488372093, |
|
"eval_loss": 0.3991001546382904, |
|
"eval_runtime": 0.6109, |
|
"eval_samples_per_second": 70.384, |
|
"eval_steps_per_second": 3.274, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.3376, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.3038073778152466, |
|
"eval_runtime": 0.6212, |
|
"eval_samples_per_second": 69.217, |
|
"eval_steps_per_second": 3.219, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.3138751685619354, |
|
"eval_runtime": 0.6788, |
|
"eval_samples_per_second": 63.343, |
|
"eval_steps_per_second": 2.946, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2821277976036072, |
|
"eval_runtime": 0.6103, |
|
"eval_samples_per_second": 70.454, |
|
"eval_steps_per_second": 3.277, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.191, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2904522120952606, |
|
"eval_runtime": 0.6114, |
|
"eval_samples_per_second": 70.334, |
|
"eval_steps_per_second": 3.271, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.2616073787212372, |
|
"eval_runtime": 0.6712, |
|
"eval_samples_per_second": 64.062, |
|
"eval_steps_per_second": 2.98, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2636496126651764, |
|
"eval_runtime": 0.6805, |
|
"eval_samples_per_second": 63.193, |
|
"eval_steps_per_second": 2.939, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.2065, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.85, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.286418080329895, |
|
"eval_runtime": 0.611, |
|
"eval_samples_per_second": 70.371, |
|
"eval_steps_per_second": 3.273, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"eval_accuracy": 0.8604651162790697, |
|
"eval_loss": 0.2832680940628052, |
|
"eval_runtime": 0.6085, |
|
"eval_samples_per_second": 70.668, |
|
"eval_steps_per_second": 3.287, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.2507215142250061, |
|
"eval_runtime": 0.6793, |
|
"eval_samples_per_second": 63.297, |
|
"eval_steps_per_second": 2.944, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.62, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1328, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.92, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2889547646045685, |
|
"eval_runtime": 0.6175, |
|
"eval_samples_per_second": 69.635, |
|
"eval_steps_per_second": 3.239, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.30645591020584106, |
|
"eval_runtime": 0.607, |
|
"eval_samples_per_second": 70.842, |
|
"eval_steps_per_second": 3.295, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.77, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.28910166025161743, |
|
"eval_runtime": 0.6138, |
|
"eval_samples_per_second": 70.052, |
|
"eval_steps_per_second": 3.258, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.69, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.1065, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2815239727497101, |
|
"eval_runtime": 0.7441, |
|
"eval_samples_per_second": 57.787, |
|
"eval_steps_per_second": 2.688, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.27533814311027527, |
|
"eval_runtime": 0.6118, |
|
"eval_samples_per_second": 70.283, |
|
"eval_steps_per_second": 3.269, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2767714560031891, |
|
"eval_runtime": 0.6146, |
|
"eval_samples_per_second": 69.968, |
|
"eval_steps_per_second": 3.254, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1122, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2863713502883911, |
|
"eval_runtime": 0.6702, |
|
"eval_samples_per_second": 64.162, |
|
"eval_steps_per_second": 2.984, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.25629687309265137, |
|
"eval_runtime": 0.6095, |
|
"eval_samples_per_second": 70.554, |
|
"eval_steps_per_second": 3.282, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92, |
|
"eval_accuracy": 0.9302325581395349, |
|
"eval_loss": 0.2420913577079773, |
|
"eval_runtime": 0.6092, |
|
"eval_samples_per_second": 70.587, |
|
"eval_steps_per_second": 3.283, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.0879, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.85, |
|
"eval_accuracy": 0.9069767441860465, |
|
"eval_loss": 0.24532586336135864, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 70.15, |
|
"eval_steps_per_second": 3.263, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.77, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.24344570934772491, |
|
"eval_runtime": 0.6673, |
|
"eval_samples_per_second": 64.436, |
|
"eval_steps_per_second": 2.997, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.24062730371952057, |
|
"eval_runtime": 0.6294, |
|
"eval_samples_per_second": 68.316, |
|
"eval_steps_per_second": 3.177, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"learning_rate": 0.0, |
|
"loss": 0.1082, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"eval_accuracy": 0.8837209302325582, |
|
"eval_loss": 0.2406519055366516, |
|
"eval_runtime": 0.6219, |
|
"eval_samples_per_second": 69.138, |
|
"eval_steps_per_second": 3.216, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"step": 120, |
|
"total_flos": 1.1038156860172861e+18, |
|
"train_loss": 0.39384828855594, |
|
"train_runtime": 762.8266, |
|
"train_samples_per_second": 20.188, |
|
"train_steps_per_second": 0.157 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 120, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 1.1038156860172861e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|