|
{ |
|
"best_metric": 0.5404761904761904, |
|
"best_model_checkpoint": "videomae-base-finetuned-ElderReact-anger-balanced-hp/checkpoint-400", |
|
"epoch": 19.010416666666668, |
|
"eval_steps": 500, |
|
"global_step": 480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.783530235290527, |
|
"learning_rate": 0.00020833333333333335, |
|
"loss": 0.7372, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 3.4003899097442627, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 0.7532, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.5238095238095238, |
|
"eval_loss": 0.7077798247337341, |
|
"eval_runtime": 171.2381, |
|
"eval_samples_per_second": 2.453, |
|
"eval_steps_per_second": 0.158, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 4.148990631103516, |
|
"learning_rate": 0.000625, |
|
"loss": 0.8004, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.1322150230407715, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 0.7903, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 13.590558052062988, |
|
"learning_rate": 0.0009953703703703704, |
|
"loss": 0.7571, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.47619047619047616, |
|
"eval_loss": 0.7033982276916504, |
|
"eval_runtime": 171.2144, |
|
"eval_samples_per_second": 2.453, |
|
"eval_steps_per_second": 0.158, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 3.682979106903076, |
|
"learning_rate": 0.0009722222222222222, |
|
"loss": 0.723, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 1.8970884084701538, |
|
"learning_rate": 0.0009490740740740741, |
|
"loss": 0.7357, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.44285714285714284, |
|
"eval_loss": 0.7080289125442505, |
|
"eval_runtime": 124.8954, |
|
"eval_samples_per_second": 3.363, |
|
"eval_steps_per_second": 0.216, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 4.713415145874023, |
|
"learning_rate": 0.000925925925925926, |
|
"loss": 0.7389, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 2.461824893951416, |
|
"learning_rate": 0.0009027777777777778, |
|
"loss": 0.6913, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 9.549880981445312, |
|
"learning_rate": 0.0008796296296296296, |
|
"loss": 0.6976, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_accuracy": 0.5238095238095238, |
|
"eval_loss": 0.716019332408905, |
|
"eval_runtime": 123.304, |
|
"eval_samples_per_second": 3.406, |
|
"eval_steps_per_second": 0.219, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 2.853116273880005, |
|
"learning_rate": 0.0008564814814814815, |
|
"loss": 0.7026, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 5.189712047576904, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 0.7131, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.4714285714285714, |
|
"eval_loss": 0.6892691254615784, |
|
"eval_runtime": 124.5641, |
|
"eval_samples_per_second": 3.372, |
|
"eval_steps_per_second": 0.217, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"grad_norm": 9.913081169128418, |
|
"learning_rate": 0.0008101851851851853, |
|
"loss": 0.7349, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 5.214167594909668, |
|
"learning_rate": 0.0007870370370370372, |
|
"loss": 0.7542, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"grad_norm": 7.558966636657715, |
|
"learning_rate": 0.0007638888888888888, |
|
"loss": 0.7275, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.4928571428571429, |
|
"eval_loss": 0.8349756002426147, |
|
"eval_runtime": 173.5792, |
|
"eval_samples_per_second": 2.42, |
|
"eval_steps_per_second": 0.156, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"grad_norm": 0.9638558030128479, |
|
"learning_rate": 0.0007407407407407407, |
|
"loss": 0.8133, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 2.0914902687072754, |
|
"learning_rate": 0.0007175925925925926, |
|
"loss": 0.7334, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_accuracy": 0.4738095238095238, |
|
"eval_loss": 0.7126601338386536, |
|
"eval_runtime": 163.6881, |
|
"eval_samples_per_second": 2.566, |
|
"eval_steps_per_second": 0.165, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 1.6074931621551514, |
|
"learning_rate": 0.0006944444444444445, |
|
"loss": 0.6938, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 1.0195335149765015, |
|
"learning_rate": 0.0006712962962962962, |
|
"loss": 0.6986, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"grad_norm": 10.589719772338867, |
|
"learning_rate": 0.0006481481481481481, |
|
"loss": 0.7274, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_accuracy": 0.5047619047619047, |
|
"eval_loss": 0.7088494896888733, |
|
"eval_runtime": 163.5307, |
|
"eval_samples_per_second": 2.568, |
|
"eval_steps_per_second": 0.165, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 1.649876356124878, |
|
"learning_rate": 0.000625, |
|
"loss": 0.6904, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 6.6575798988342285, |
|
"learning_rate": 0.0006018518518518519, |
|
"loss": 0.697, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_accuracy": 0.5190476190476191, |
|
"eval_loss": 0.6910755038261414, |
|
"eval_runtime": 125.391, |
|
"eval_samples_per_second": 3.35, |
|
"eval_steps_per_second": 0.215, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"grad_norm": 3.279046058654785, |
|
"learning_rate": 0.0005787037037037038, |
|
"loss": 0.7072, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"grad_norm": 1.2495840787887573, |
|
"learning_rate": 0.0005555555555555556, |
|
"loss": 0.694, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 11.012711524963379, |
|
"learning_rate": 0.0005324074074074074, |
|
"loss": 0.7605, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_accuracy": 0.4976190476190476, |
|
"eval_loss": 0.7295739054679871, |
|
"eval_runtime": 127.4402, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.212, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"grad_norm": 1.9475804567337036, |
|
"learning_rate": 0.0005092592592592593, |
|
"loss": 0.7224, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"grad_norm": 2.378416061401367, |
|
"learning_rate": 0.0004861111111111111, |
|
"loss": 0.7105, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 0.710009753704071, |
|
"eval_runtime": 122.4996, |
|
"eval_samples_per_second": 3.429, |
|
"eval_steps_per_second": 0.22, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"grad_norm": 0.702026903629303, |
|
"learning_rate": 0.000462962962962963, |
|
"loss": 0.695, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"grad_norm": 2.676254987716675, |
|
"learning_rate": 0.0004398148148148148, |
|
"loss": 0.6865, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"grad_norm": 7.562650680541992, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 0.6745, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"eval_accuracy": 0.45476190476190476, |
|
"eval_loss": 0.7270693778991699, |
|
"eval_runtime": 175.5484, |
|
"eval_samples_per_second": 2.393, |
|
"eval_steps_per_second": 0.154, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"grad_norm": 4.304387092590332, |
|
"learning_rate": 0.0003935185185185186, |
|
"loss": 0.6889, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"grad_norm": 0.35395267605781555, |
|
"learning_rate": 0.00037037037037037035, |
|
"loss": 0.7166, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_accuracy": 0.5285714285714286, |
|
"eval_loss": 0.6954912543296814, |
|
"eval_runtime": 177.5444, |
|
"eval_samples_per_second": 2.366, |
|
"eval_steps_per_second": 0.152, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"grad_norm": 0.8164138793945312, |
|
"learning_rate": 0.00034722222222222224, |
|
"loss": 0.7363, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"grad_norm": 1.0167158842086792, |
|
"learning_rate": 0.00032407407407407406, |
|
"loss": 0.6993, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"grad_norm": 7.524409294128418, |
|
"learning_rate": 0.00030092592592592595, |
|
"loss": 0.6849, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"eval_accuracy": 0.4976190476190476, |
|
"eval_loss": 0.698103666305542, |
|
"eval_runtime": 161.7269, |
|
"eval_samples_per_second": 2.597, |
|
"eval_steps_per_second": 0.167, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"grad_norm": 1.1957156658172607, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 0.6888, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"grad_norm": 1.1630483865737915, |
|
"learning_rate": 0.00025462962962962966, |
|
"loss": 0.6978, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"eval_accuracy": 0.49523809523809526, |
|
"eval_loss": 0.697565495967865, |
|
"eval_runtime": 162.8303, |
|
"eval_samples_per_second": 2.579, |
|
"eval_steps_per_second": 0.166, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"grad_norm": 2.8283958435058594, |
|
"learning_rate": 0.0002314814814814815, |
|
"loss": 0.6931, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"grad_norm": 1.4520641565322876, |
|
"learning_rate": 0.00020833333333333335, |
|
"loss": 0.6966, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"grad_norm": 9.017864227294922, |
|
"learning_rate": 0.00018518518518518518, |
|
"loss": 0.6928, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"eval_accuracy": 0.5404761904761904, |
|
"eval_loss": 0.6940857768058777, |
|
"eval_runtime": 165.3836, |
|
"eval_samples_per_second": 2.54, |
|
"eval_steps_per_second": 0.163, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"grad_norm": 2.541412591934204, |
|
"learning_rate": 0.00016203703703703703, |
|
"loss": 0.6795, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"grad_norm": 0.8520733118057251, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.7057, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.7022379636764526, |
|
"eval_runtime": 164.8925, |
|
"eval_samples_per_second": 2.547, |
|
"eval_steps_per_second": 0.164, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"grad_norm": 2.4603936672210693, |
|
"learning_rate": 0.00011574074074074075, |
|
"loss": 0.733, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"grad_norm": 2.6278584003448486, |
|
"learning_rate": 9.259259259259259e-05, |
|
"loss": 0.6902, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"grad_norm": 8.034465789794922, |
|
"learning_rate": 6.944444444444444e-05, |
|
"loss": 0.6842, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"eval_accuracy": 0.4738095238095238, |
|
"eval_loss": 0.6942616701126099, |
|
"eval_runtime": 172.9688, |
|
"eval_samples_per_second": 2.428, |
|
"eval_steps_per_second": 0.156, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"grad_norm": 1.0102304220199585, |
|
"learning_rate": 4.6296296296296294e-05, |
|
"loss": 0.6921, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"grad_norm": 1.547524094581604, |
|
"learning_rate": 2.3148148148148147e-05, |
|
"loss": 0.6824, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 0.694518506526947, |
|
"eval_runtime": 164.6584, |
|
"eval_samples_per_second": 2.551, |
|
"eval_steps_per_second": 0.164, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"grad_norm": 0.8265247941017151, |
|
"learning_rate": 0.0, |
|
"loss": 0.7065, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_accuracy": 0.5142857142857142, |
|
"eval_loss": 0.6948422193527222, |
|
"eval_runtime": 157.477, |
|
"eval_samples_per_second": 2.667, |
|
"eval_steps_per_second": 0.171, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"step": 480, |
|
"total_flos": 9.214647639143547e+18, |
|
"train_loss": 0.7152137100696564, |
|
"train_runtime": 6018.7779, |
|
"train_samples_per_second": 1.276, |
|
"train_steps_per_second": 0.08 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_accuracy": 0.4671916010498688, |
|
"eval_loss": 0.6937770247459412, |
|
"eval_runtime": 144.3495, |
|
"eval_samples_per_second": 2.639, |
|
"eval_steps_per_second": 0.166, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_accuracy": 0.4671916010498688, |
|
"eval_loss": 0.6937723755836487, |
|
"eval_runtime": 152.0578, |
|
"eval_samples_per_second": 2.506, |
|
"eval_steps_per_second": 0.158, |
|
"step": 480 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 9.214647639143547e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|