{ "best_metric": 0.344835102558136, "best_model_checkpoint": "./vit-base-pets/checkpoint-235", "epoch": 5.0, "eval_steps": 500, "global_step": 235, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "grad_norm": 216203.125, "learning_rate": 0.0002872340425531915, "loss": 3.3311, "step": 10 }, { "epoch": 0.43, "grad_norm": 198156.9375, "learning_rate": 0.000274468085106383, "loss": 2.5921, "step": 20 }, { "epoch": 0.64, "grad_norm": 176661.6875, "learning_rate": 0.0002617021276595745, "loss": 1.9823, "step": 30 }, { "epoch": 0.85, "grad_norm": 142275.75, "learning_rate": 0.0002489361702127659, "loss": 1.5136, "step": 40 }, { "epoch": 1.0, "eval_accuracy": 0.8430311231393776, "eval_loss": 1.1030857563018799, "eval_runtime": 8.7542, "eval_samples_per_second": 84.417, "eval_steps_per_second": 5.369, "step": 47 }, { "epoch": 1.06, "grad_norm": 124899.421875, "learning_rate": 0.00023617021276595742, "loss": 1.1858, "step": 50 }, { "epoch": 1.28, "grad_norm": 98946.875, "learning_rate": 0.0002234042553191489, "loss": 0.9313, "step": 60 }, { "epoch": 1.49, "grad_norm": 92924.6484375, "learning_rate": 0.0002106382978723404, "loss": 0.7466, "step": 70 }, { "epoch": 1.7, "grad_norm": 90032.1484375, "learning_rate": 0.00019787234042553187, "loss": 0.6475, "step": 80 }, { "epoch": 1.91, "grad_norm": 68696.1875, "learning_rate": 0.0001851063829787234, "loss": 0.5547, "step": 90 }, { "epoch": 2.0, "eval_accuracy": 0.9269282814614344, "eval_loss": 0.5232290625572205, "eval_runtime": 8.9185, "eval_samples_per_second": 82.861, "eval_steps_per_second": 5.27, "step": 94 }, { "epoch": 2.13, "grad_norm": 67699.609375, "learning_rate": 0.0001723404255319149, "loss": 0.5311, "step": 100 }, { "epoch": 2.34, "grad_norm": 72000.0234375, "learning_rate": 0.00015957446808510637, "loss": 0.4636, "step": 110 }, { "epoch": 2.55, "grad_norm": 54618.05078125, "learning_rate": 0.00014680851063829785, "loss": 0.4171, "step": 120 }, { "epoch": 2.77, "grad_norm": 57285.890625, "learning_rate": 0.00013404255319148935, "loss": 0.3946, "step": 130 }, { "epoch": 2.98, "grad_norm": 73116.6171875, "learning_rate": 0.00012127659574468084, "loss": 0.4111, "step": 140 }, { "epoch": 3.0, "eval_accuracy": 0.9309878213802436, "eval_loss": 0.39878538250923157, "eval_runtime": 9.1048, "eval_samples_per_second": 81.166, "eval_steps_per_second": 5.162, "step": 141 }, { "epoch": 3.19, "grad_norm": 64126.3828125, "learning_rate": 0.00010851063829787234, "loss": 0.3607, "step": 150 }, { "epoch": 3.4, "grad_norm": 69913.5390625, "learning_rate": 9.574468085106382e-05, "loss": 0.3387, "step": 160 }, { "epoch": 3.62, "grad_norm": 54300.03125, "learning_rate": 8.297872340425531e-05, "loss": 0.3568, "step": 170 }, { "epoch": 3.83, "grad_norm": 60929.75390625, "learning_rate": 7.02127659574468e-05, "loss": 0.3438, "step": 180 }, { "epoch": 4.0, "eval_accuracy": 0.9336941813261164, "eval_loss": 0.35527506470680237, "eval_runtime": 9.4286, "eval_samples_per_second": 78.378, "eval_steps_per_second": 4.985, "step": 188 }, { "epoch": 4.04, "grad_norm": 69279.328125, "learning_rate": 5.7446808510638294e-05, "loss": 0.3087, "step": 190 }, { "epoch": 4.26, "grad_norm": 49415.69140625, "learning_rate": 4.468085106382978e-05, "loss": 0.328, "step": 200 }, { "epoch": 4.47, "grad_norm": 62788.3359375, "learning_rate": 3.1914893617021275e-05, "loss": 0.3199, "step": 210 }, { "epoch": 4.68, "grad_norm": 61852.7421875, "learning_rate": 1.9148936170212762e-05, "loss": 0.3244, "step": 220 }, { "epoch": 4.89, "grad_norm": 64687.453125, "learning_rate": 6.382978723404255e-06, "loss": 0.298, "step": 230 }, { "epoch": 5.0, "eval_accuracy": 0.9296346414073072, "eval_loss": 0.344835102558136, "eval_runtime": 9.1923, "eval_samples_per_second": 80.393, "eval_steps_per_second": 5.113, "step": 235 }, { "epoch": 5.0, "step": 235, "total_flos": 2.2913817801515827e+18, "train_loss": 0.8009341437765892, "train_runtime": 407.3253, "train_samples_per_second": 72.571, "train_steps_per_second": 0.577 } ], "logging_steps": 10, "max_steps": 235, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 2.2913817801515827e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }