{ "best_metric": 0.9302325581395349, "best_model_checkpoint": "vit-base-patch16-224-dmae-va-da-40B/checkpoint-107", "epoch": 36.92307692307692, "eval_steps": 500, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.92, "eval_accuracy": 0.32558139534883723, "eval_loss": 1.4669076204299927, "eval_runtime": 0.6877, "eval_samples_per_second": 62.528, "eval_steps_per_second": 2.908, "step": 3 }, { "epoch": 1.85, "eval_accuracy": 0.4418604651162791, "eval_loss": 1.26887047290802, "eval_runtime": 0.661, "eval_samples_per_second": 65.049, "eval_steps_per_second": 3.026, "step": 6 }, { "epoch": 2.77, "eval_accuracy": 0.46511627906976744, "eval_loss": 1.1591116189956665, "eval_runtime": 0.629, "eval_samples_per_second": 68.363, "eval_steps_per_second": 3.18, "step": 9 }, { "epoch": 3.08, "learning_rate": 4.166666666666667e-05, "loss": 1.3901, "step": 10 }, { "epoch": 4.0, "eval_accuracy": 0.5813953488372093, "eval_loss": 0.977810800075531, "eval_runtime": 0.6189, "eval_samples_per_second": 69.481, "eval_steps_per_second": 3.232, "step": 13 }, { "epoch": 4.92, "eval_accuracy": 0.6511627906976745, "eval_loss": 0.8885109424591064, "eval_runtime": 0.5951, "eval_samples_per_second": 72.259, "eval_steps_per_second": 3.361, "step": 16 }, { "epoch": 5.85, "eval_accuracy": 0.6511627906976745, "eval_loss": 0.7884993553161621, "eval_runtime": 0.6522, "eval_samples_per_second": 65.934, "eval_steps_per_second": 3.067, "step": 19 }, { "epoch": 6.15, "learning_rate": 4.62962962962963e-05, "loss": 0.9794, "step": 20 }, { "epoch": 6.77, "eval_accuracy": 0.7441860465116279, "eval_loss": 0.6853659152984619, "eval_runtime": 0.675, "eval_samples_per_second": 63.705, "eval_steps_per_second": 2.963, "step": 22 }, { "epoch": 8.0, "eval_accuracy": 0.7674418604651163, "eval_loss": 0.5821540951728821, "eval_runtime": 0.6249, "eval_samples_per_second": 68.812, "eval_steps_per_second": 3.201, "step": 26 }, { "epoch": 8.92, "eval_accuracy": 0.8604651162790697, "eval_loss": 0.492933064699173, "eval_runtime": 0.6176, "eval_samples_per_second": 69.619, "eval_steps_per_second": 3.238, "step": 29 }, { "epoch": 9.23, "learning_rate": 4.166666666666667e-05, "loss": 0.6573, "step": 30 }, { "epoch": 9.85, "eval_accuracy": 0.8604651162790697, "eval_loss": 0.4822414219379425, "eval_runtime": 0.6106, "eval_samples_per_second": 70.422, "eval_steps_per_second": 3.275, "step": 32 }, { "epoch": 10.77, "eval_accuracy": 0.8372093023255814, "eval_loss": 0.45293128490448, "eval_runtime": 0.6231, "eval_samples_per_second": 69.013, "eval_steps_per_second": 3.21, "step": 35 }, { "epoch": 12.0, "eval_accuracy": 0.7906976744186046, "eval_loss": 0.42034050822257996, "eval_runtime": 0.6472, "eval_samples_per_second": 66.442, "eval_steps_per_second": 3.09, "step": 39 }, { "epoch": 12.31, "learning_rate": 3.7037037037037037e-05, "loss": 0.4166, "step": 40 }, { "epoch": 12.92, "eval_accuracy": 0.8604651162790697, "eval_loss": 0.38893207907676697, "eval_runtime": 0.6646, "eval_samples_per_second": 64.703, "eval_steps_per_second": 3.009, "step": 42 }, { "epoch": 13.85, "eval_accuracy": 0.8604651162790697, "eval_loss": 0.36970528960227966, "eval_runtime": 0.6738, "eval_samples_per_second": 63.818, "eval_steps_per_second": 2.968, "step": 45 }, { "epoch": 14.77, "eval_accuracy": 0.813953488372093, "eval_loss": 0.3991001546382904, "eval_runtime": 0.6109, "eval_samples_per_second": 70.384, "eval_steps_per_second": 3.274, "step": 48 }, { "epoch": 15.38, "learning_rate": 3.240740740740741e-05, "loss": 0.3376, "step": 50 }, { "epoch": 16.0, "eval_accuracy": 0.9069767441860465, "eval_loss": 0.3038073778152466, "eval_runtime": 0.6212, "eval_samples_per_second": 69.217, "eval_steps_per_second": 3.219, "step": 52 }, { "epoch": 16.92, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.3138751685619354, "eval_runtime": 0.6788, "eval_samples_per_second": 63.343, "eval_steps_per_second": 2.946, "step": 55 }, { "epoch": 17.85, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2821277976036072, "eval_runtime": 0.6103, "eval_samples_per_second": 70.454, "eval_steps_per_second": 3.277, "step": 58 }, { "epoch": 18.46, "learning_rate": 2.777777777777778e-05, "loss": 0.191, "step": 60 }, { "epoch": 18.77, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2904522120952606, "eval_runtime": 0.6114, "eval_samples_per_second": 70.334, "eval_steps_per_second": 3.271, "step": 61 }, { "epoch": 20.0, "eval_accuracy": 0.8604651162790697, "eval_loss": 0.2616073787212372, "eval_runtime": 0.6712, "eval_samples_per_second": 64.062, "eval_steps_per_second": 2.98, "step": 65 }, { "epoch": 20.92, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2636496126651764, "eval_runtime": 0.6805, "eval_samples_per_second": 63.193, "eval_steps_per_second": 2.939, "step": 68 }, { "epoch": 21.54, "learning_rate": 2.314814814814815e-05, "loss": 0.2065, "step": 70 }, { "epoch": 21.85, "eval_accuracy": 0.9069767441860465, "eval_loss": 0.286418080329895, "eval_runtime": 0.611, "eval_samples_per_second": 70.371, "eval_steps_per_second": 3.273, "step": 71 }, { "epoch": 22.77, "eval_accuracy": 0.8604651162790697, "eval_loss": 0.2832680940628052, "eval_runtime": 0.6085, "eval_samples_per_second": 70.668, "eval_steps_per_second": 3.287, "step": 74 }, { "epoch": 24.0, "eval_accuracy": 0.9069767441860465, "eval_loss": 0.2507215142250061, "eval_runtime": 0.6793, "eval_samples_per_second": 63.297, "eval_steps_per_second": 2.944, "step": 78 }, { "epoch": 24.62, "learning_rate": 1.8518518518518518e-05, "loss": 0.1328, "step": 80 }, { "epoch": 24.92, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2889547646045685, "eval_runtime": 0.6175, "eval_samples_per_second": 69.635, "eval_steps_per_second": 3.239, "step": 81 }, { "epoch": 25.85, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.30645591020584106, "eval_runtime": 0.607, "eval_samples_per_second": 70.842, "eval_steps_per_second": 3.295, "step": 84 }, { "epoch": 26.77, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.28910166025161743, "eval_runtime": 0.6138, "eval_samples_per_second": 70.052, "eval_steps_per_second": 3.258, "step": 87 }, { "epoch": 27.69, "learning_rate": 1.388888888888889e-05, "loss": 0.1065, "step": 90 }, { "epoch": 28.0, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2815239727497101, "eval_runtime": 0.7441, "eval_samples_per_second": 57.787, "eval_steps_per_second": 2.688, "step": 91 }, { "epoch": 28.92, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.27533814311027527, "eval_runtime": 0.6118, "eval_samples_per_second": 70.283, "eval_steps_per_second": 3.269, "step": 94 }, { "epoch": 29.85, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2767714560031891, "eval_runtime": 0.6146, "eval_samples_per_second": 69.968, "eval_steps_per_second": 3.254, "step": 97 }, { "epoch": 30.77, "learning_rate": 9.259259259259259e-06, "loss": 0.1122, "step": 100 }, { "epoch": 30.77, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2863713502883911, "eval_runtime": 0.6702, "eval_samples_per_second": 64.162, "eval_steps_per_second": 2.984, "step": 100 }, { "epoch": 32.0, "eval_accuracy": 0.9069767441860465, "eval_loss": 0.25629687309265137, "eval_runtime": 0.6095, "eval_samples_per_second": 70.554, "eval_steps_per_second": 3.282, "step": 104 }, { "epoch": 32.92, "eval_accuracy": 0.9302325581395349, "eval_loss": 0.2420913577079773, "eval_runtime": 0.6092, "eval_samples_per_second": 70.587, "eval_steps_per_second": 3.283, "step": 107 }, { "epoch": 33.85, "learning_rate": 4.6296296296296296e-06, "loss": 0.0879, "step": 110 }, { "epoch": 33.85, "eval_accuracy": 0.9069767441860465, "eval_loss": 0.24532586336135864, "eval_runtime": 0.613, "eval_samples_per_second": 70.15, "eval_steps_per_second": 3.263, "step": 110 }, { "epoch": 34.77, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.24344570934772491, "eval_runtime": 0.6673, "eval_samples_per_second": 64.436, "eval_steps_per_second": 2.997, "step": 113 }, { "epoch": 36.0, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.24062730371952057, "eval_runtime": 0.6294, "eval_samples_per_second": 68.316, "eval_steps_per_second": 3.177, "step": 117 }, { "epoch": 36.92, "learning_rate": 0.0, "loss": 0.1082, "step": 120 }, { "epoch": 36.92, "eval_accuracy": 0.8837209302325582, "eval_loss": 0.2406519055366516, "eval_runtime": 0.6219, "eval_samples_per_second": 69.138, "eval_steps_per_second": 3.216, "step": 120 }, { "epoch": 36.92, "step": 120, "total_flos": 1.1038156860172861e+18, "train_loss": 0.39384828855594, "train_runtime": 762.8266, "train_samples_per_second": 20.188, "train_steps_per_second": 0.157 } ], "logging_steps": 10, "max_steps": 120, "num_train_epochs": 40, "save_steps": 500, "total_flos": 1.1038156860172861e+18, "trial_name": null, "trial_params": null }