{ "best_metric": 0.775, "best_model_checkpoint": "videomae-base-finetuned-rwf2000-subset___v1/checkpoint-500", "epoch": 4.2, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 6.027842044830322, "learning_rate": 2.0000000000000003e-06, "loss": 0.6934, "step": 10 }, { "epoch": 0.04, "grad_norm": 5.1828837394714355, "learning_rate": 4.000000000000001e-06, "loss": 0.7343, "step": 20 }, { "epoch": 0.06, "grad_norm": 6.022104740142822, "learning_rate": 6e-06, "loss": 0.6867, "step": 30 }, { "epoch": 0.08, "grad_norm": 3.9819915294647217, "learning_rate": 8.000000000000001e-06, "loss": 0.6967, "step": 40 }, { "epoch": 0.1, "grad_norm": 3.980625867843628, "learning_rate": 1e-05, "loss": 0.6699, "step": 50 }, { "epoch": 0.12, "grad_norm": 6.501969337463379, "learning_rate": 9.777777777777779e-06, "loss": 0.6533, "step": 60 }, { "epoch": 0.14, "grad_norm": 3.093120813369751, "learning_rate": 9.555555555555556e-06, "loss": 0.627, "step": 70 }, { "epoch": 0.16, "grad_norm": 6.481435775756836, "learning_rate": 9.333333333333334e-06, "loss": 0.6128, "step": 80 }, { "epoch": 0.18, "grad_norm": 5.1239118576049805, "learning_rate": 9.111111111111112e-06, "loss": 0.5995, "step": 90 }, { "epoch": 0.2, "grad_norm": 3.803208351135254, "learning_rate": 8.888888888888888e-06, "loss": 0.5205, "step": 100 }, { "epoch": 0.2, "eval_accuracy": 0.68375, "eval_loss": 0.5434452891349792, "eval_runtime": 225.7874, "eval_samples_per_second": 3.543, "eval_steps_per_second": 0.221, "step": 100 }, { "epoch": 1.02, "grad_norm": 4.416550159454346, "learning_rate": 8.666666666666668e-06, "loss": 0.5088, "step": 110 }, { "epoch": 1.04, "grad_norm": 6.156940460205078, "learning_rate": 8.444444444444446e-06, "loss": 0.5568, "step": 120 }, { "epoch": 1.06, "grad_norm": 5.02749490737915, "learning_rate": 8.222222222222222e-06, "loss": 0.5033, "step": 130 }, { "epoch": 1.08, "grad_norm": 5.65871524810791, "learning_rate": 8.000000000000001e-06, "loss": 0.4887, "step": 140 }, { "epoch": 1.1, "grad_norm": 5.758727550506592, "learning_rate": 7.77777777777778e-06, "loss": 0.5106, "step": 150 }, { "epoch": 1.12, "grad_norm": 11.959117889404297, "learning_rate": 7.555555555555556e-06, "loss": 0.5293, "step": 160 }, { "epoch": 1.1400000000000001, "grad_norm": 5.07788610458374, "learning_rate": 7.333333333333333e-06, "loss": 0.4665, "step": 170 }, { "epoch": 1.16, "grad_norm": 4.521602153778076, "learning_rate": 7.111111111111112e-06, "loss": 0.4658, "step": 180 }, { "epoch": 1.18, "grad_norm": 5.800256252288818, "learning_rate": 6.88888888888889e-06, "loss": 0.5002, "step": 190 }, { "epoch": 1.2, "grad_norm": 3.6714704036712646, "learning_rate": 6.666666666666667e-06, "loss": 0.4084, "step": 200 }, { "epoch": 1.2, "eval_accuracy": 0.655, "eval_loss": 0.5904867053031921, "eval_runtime": 247.6204, "eval_samples_per_second": 3.231, "eval_steps_per_second": 0.202, "step": 200 }, { "epoch": 2.02, "grad_norm": 6.782428741455078, "learning_rate": 6.444444444444445e-06, "loss": 0.4441, "step": 210 }, { "epoch": 2.04, "grad_norm": 4.966807842254639, "learning_rate": 6.222222222222223e-06, "loss": 0.4307, "step": 220 }, { "epoch": 2.06, "grad_norm": 8.04571533203125, "learning_rate": 6e-06, "loss": 0.4022, "step": 230 }, { "epoch": 2.08, "grad_norm": 6.805935859680176, "learning_rate": 5.777777777777778e-06, "loss": 0.4298, "step": 240 }, { "epoch": 2.1, "grad_norm": 6.641063213348389, "learning_rate": 5.555555555555557e-06, "loss": 0.4494, "step": 250 }, { "epoch": 2.12, "grad_norm": 6.542612552642822, "learning_rate": 5.333333333333334e-06, "loss": 0.4735, "step": 260 }, { "epoch": 2.14, "grad_norm": 6.421093940734863, "learning_rate": 5.1111111111111115e-06, "loss": 0.3762, "step": 270 }, { "epoch": 2.16, "grad_norm": 5.869024753570557, "learning_rate": 4.888888888888889e-06, "loss": 0.4387, "step": 280 }, { "epoch": 2.18, "grad_norm": 9.899517059326172, "learning_rate": 4.666666666666667e-06, "loss": 0.4453, "step": 290 }, { "epoch": 2.2, "grad_norm": 11.099763870239258, "learning_rate": 4.444444444444444e-06, "loss": 0.4198, "step": 300 }, { "epoch": 2.2, "eval_accuracy": 0.74625, "eval_loss": 0.48142582178115845, "eval_runtime": 271.5798, "eval_samples_per_second": 2.946, "eval_steps_per_second": 0.184, "step": 300 }, { "epoch": 3.02, "grad_norm": 8.095602989196777, "learning_rate": 4.222222222222223e-06, "loss": 0.4279, "step": 310 }, { "epoch": 3.04, "grad_norm": 5.949367046356201, "learning_rate": 4.000000000000001e-06, "loss": 0.3158, "step": 320 }, { "epoch": 3.06, "grad_norm": 10.842527389526367, "learning_rate": 3.777777777777778e-06, "loss": 0.3554, "step": 330 }, { "epoch": 3.08, "grad_norm": 5.070864677429199, "learning_rate": 3.555555555555556e-06, "loss": 0.2949, "step": 340 }, { "epoch": 3.1, "grad_norm": 15.597376823425293, "learning_rate": 3.3333333333333333e-06, "loss": 0.3548, "step": 350 }, { "epoch": 3.12, "grad_norm": 4.660379409790039, "learning_rate": 3.1111111111111116e-06, "loss": 0.4349, "step": 360 }, { "epoch": 3.14, "grad_norm": 11.151165008544922, "learning_rate": 2.888888888888889e-06, "loss": 0.3602, "step": 370 }, { "epoch": 3.16, "grad_norm": 9.200762748718262, "learning_rate": 2.666666666666667e-06, "loss": 0.3891, "step": 380 }, { "epoch": 3.18, "grad_norm": 6.462916851043701, "learning_rate": 2.4444444444444447e-06, "loss": 0.2933, "step": 390 }, { "epoch": 3.2, "grad_norm": 8.92557144165039, "learning_rate": 2.222222222222222e-06, "loss": 0.3188, "step": 400 }, { "epoch": 3.2, "eval_accuracy": 0.755, "eval_loss": 0.5160250663757324, "eval_runtime": 225.8295, "eval_samples_per_second": 3.542, "eval_steps_per_second": 0.221, "step": 400 }, { "epoch": 4.02, "grad_norm": 7.164823532104492, "learning_rate": 2.0000000000000003e-06, "loss": 0.2785, "step": 410 }, { "epoch": 4.04, "grad_norm": 4.740599155426025, "learning_rate": 1.777777777777778e-06, "loss": 0.3388, "step": 420 }, { "epoch": 4.06, "grad_norm": 8.506775856018066, "learning_rate": 1.5555555555555558e-06, "loss": 0.3093, "step": 430 }, { "epoch": 4.08, "grad_norm": 4.401611804962158, "learning_rate": 1.3333333333333334e-06, "loss": 0.2399, "step": 440 }, { "epoch": 4.1, "grad_norm": 7.568962574005127, "learning_rate": 1.111111111111111e-06, "loss": 0.3434, "step": 450 }, { "epoch": 4.12, "grad_norm": 9.326196670532227, "learning_rate": 8.88888888888889e-07, "loss": 0.2818, "step": 460 }, { "epoch": 4.14, "grad_norm": 6.332729816436768, "learning_rate": 6.666666666666667e-07, "loss": 0.3154, "step": 470 }, { "epoch": 4.16, "grad_norm": 11.177745819091797, "learning_rate": 4.444444444444445e-07, "loss": 0.3217, "step": 480 }, { "epoch": 4.18, "grad_norm": 6.826053619384766, "learning_rate": 2.2222222222222224e-07, "loss": 0.3167, "step": 490 }, { "epoch": 4.2, "grad_norm": 2.244813919067383, "learning_rate": 0.0, "loss": 0.2687, "step": 500 }, { "epoch": 4.2, "eval_accuracy": 0.775, "eval_loss": 0.48075544834136963, "eval_runtime": 229.5259, "eval_samples_per_second": 3.485, "eval_steps_per_second": 0.218, "step": 500 }, { "epoch": 4.2, "step": 500, "total_flos": 9.968516715773952e+18, "train_loss": 0.4460222806930542, "train_runtime": 4184.8427, "train_samples_per_second": 1.912, "train_steps_per_second": 0.119 }, { "epoch": 4.2, "eval_accuracy": 0.775, "eval_loss": 0.48075541853904724, "eval_runtime": 225.4225, "eval_samples_per_second": 3.549, "eval_steps_per_second": 0.222, "step": 500 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.968516715773952e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }