|
{ |
|
"best_metric": 0.775, |
|
"best_model_checkpoint": "videomae-base-finetuned-rwf2000-subset___v1/checkpoint-500", |
|
"epoch": 4.2, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.027842044830322, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6934, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.1828837394714355, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.7343, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.022104740142822, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6867, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.9819915294647217, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.6967, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.980625867843628, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6699, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 6.501969337463379, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.6533, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.093120813369751, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.627, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.481435775756836, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.6128, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.1239118576049805, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.5995, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.803208351135254, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.5205, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.68375, |
|
"eval_loss": 0.5434452891349792, |
|
"eval_runtime": 225.7874, |
|
"eval_samples_per_second": 3.543, |
|
"eval_steps_per_second": 0.221, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 4.416550159454346, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.5088, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 6.156940460205078, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.5568, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 5.02749490737915, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.5033, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 5.65871524810791, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4887, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 5.758727550506592, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.5106, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 11.959117889404297, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.5293, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.1400000000000001, |
|
"grad_norm": 5.07788610458374, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.4665, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 4.521602153778076, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.4658, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 5.800256252288818, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.5002, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 3.6714704036712646, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.4084, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.655, |
|
"eval_loss": 0.5904867053031921, |
|
"eval_runtime": 247.6204, |
|
"eval_samples_per_second": 3.231, |
|
"eval_steps_per_second": 0.202, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 6.782428741455078, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.4441, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 4.966807842254639, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.4307, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 8.04571533203125, |
|
"learning_rate": 6e-06, |
|
"loss": 0.4022, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 6.805935859680176, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.4298, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 6.641063213348389, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.4494, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 6.542612552642822, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.4735, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 6.421093940734863, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.3762, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 5.869024753570557, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.4387, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 9.899517059326172, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.4453, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 11.099763870239258, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.4198, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.74625, |
|
"eval_loss": 0.48142582178115845, |
|
"eval_runtime": 271.5798, |
|
"eval_samples_per_second": 2.946, |
|
"eval_steps_per_second": 0.184, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 8.095602989196777, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.4279, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 5.949367046356201, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.3158, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 10.842527389526367, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.3554, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 5.070864677429199, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.2949, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 15.597376823425293, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.3548, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 4.660379409790039, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.4349, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 11.151165008544922, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.3602, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 9.200762748718262, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.3891, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 6.462916851043701, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.2933, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 8.92557144165039, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.3188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.755, |
|
"eval_loss": 0.5160250663757324, |
|
"eval_runtime": 225.8295, |
|
"eval_samples_per_second": 3.542, |
|
"eval_steps_per_second": 0.221, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 7.164823532104492, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.2785, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 4.740599155426025, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.3388, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 8.506775856018066, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.3093, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 4.401611804962158, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.2399, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 7.568962574005127, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.3434, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 9.326196670532227, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.2818, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 6.332729816436768, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.3154, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 11.177745819091797, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.3217, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"grad_norm": 6.826053619384766, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.3167, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 2.244813919067383, |
|
"learning_rate": 0.0, |
|
"loss": 0.2687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.775, |
|
"eval_loss": 0.48075544834136963, |
|
"eval_runtime": 229.5259, |
|
"eval_samples_per_second": 3.485, |
|
"eval_steps_per_second": 0.218, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"step": 500, |
|
"total_flos": 9.968516715773952e+18, |
|
"train_loss": 0.4460222806930542, |
|
"train_runtime": 4184.8427, |
|
"train_samples_per_second": 1.912, |
|
"train_steps_per_second": 0.119 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.775, |
|
"eval_loss": 0.48075541853904724, |
|
"eval_runtime": 225.4225, |
|
"eval_samples_per_second": 3.549, |
|
"eval_steps_per_second": 0.222, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.968516715773952e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|