|
{ |
|
"best_metric": 0.9412656309208033, |
|
"best_model_checkpoint": "hubert-large-ls960-ft/checkpoint-12500", |
|
"epoch": 31.998484082870135, |
|
"global_step": 15808, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.487666034155598e-06, |
|
"loss": 4.7142, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.0, |
|
"eval_f1": 0.0, |
|
"eval_loss": 5.276514053344727, |
|
"eval_precision": 0.0, |
|
"eval_runtime": 207.9912, |
|
"eval_samples_per_second": 12.688, |
|
"eval_steps_per_second": 1.058, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8975332068311197e-05, |
|
"loss": 4.396, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.0, |
|
"eval_f1": 0.0, |
|
"eval_loss": 5.414546012878418, |
|
"eval_precision": 0.0, |
|
"eval_runtime": 211.3332, |
|
"eval_samples_per_second": 12.487, |
|
"eval_steps_per_second": 1.041, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.846299810246679e-05, |
|
"loss": 3.8883, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.0473664266767715, |
|
"eval_f1": 0.010399469379098707, |
|
"eval_loss": 4.433555603027344, |
|
"eval_precision": 0.04084585694141838, |
|
"eval_runtime": 206.03, |
|
"eval_samples_per_second": 12.809, |
|
"eval_steps_per_second": 1.068, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.9116468686300697e-05, |
|
"loss": 2.7848, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.129973474801061, |
|
"eval_f1": 0.09643252305766986, |
|
"eval_loss": 3.9772207736968994, |
|
"eval_precision": 0.1280919938791079, |
|
"eval_runtime": 212.3184, |
|
"eval_samples_per_second": 12.429, |
|
"eval_steps_per_second": 1.036, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.8062135376396992e-05, |
|
"loss": 1.8649, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_accuracy": 0.15763546798029557, |
|
"eval_f1": 0.15466013987384877, |
|
"eval_loss": 3.44816255569458, |
|
"eval_precision": 0.33391187818919965, |
|
"eval_runtime": 206.6675, |
|
"eval_samples_per_second": 12.769, |
|
"eval_steps_per_second": 1.065, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 2.700780206649329e-05, |
|
"loss": 1.3084, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_accuracy": 0.3080712391057219, |
|
"eval_f1": 0.3402390216080642, |
|
"eval_loss": 2.9702537059783936, |
|
"eval_precision": 0.5296031035295795, |
|
"eval_runtime": 212.8576, |
|
"eval_samples_per_second": 12.398, |
|
"eval_steps_per_second": 1.034, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 2.5953468756589585e-05, |
|
"loss": 0.9868, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_accuracy": 0.46873815839333083, |
|
"eval_f1": 0.5353322629682942, |
|
"eval_loss": 2.3984930515289307, |
|
"eval_precision": 0.8031566212997642, |
|
"eval_runtime": 207.1643, |
|
"eval_samples_per_second": 12.739, |
|
"eval_steps_per_second": 1.062, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 2.489913544668588e-05, |
|
"loss": 0.7679, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.6521409624857901, |
|
"eval_f1": 0.7095065774545717, |
|
"eval_loss": 1.7936781644821167, |
|
"eval_precision": 0.8388581448735327, |
|
"eval_runtime": 207.3219, |
|
"eval_samples_per_second": 12.729, |
|
"eval_steps_per_second": 1.061, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 2.3844802136782175e-05, |
|
"loss": 0.6232, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"eval_accuracy": 0.7389162561576355, |
|
"eval_f1": 0.784666736405717, |
|
"eval_loss": 1.4767512083053589, |
|
"eval_precision": 0.8697758947640245, |
|
"eval_runtime": 211.7813, |
|
"eval_samples_per_second": 12.461, |
|
"eval_steps_per_second": 1.039, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 2.279046882687847e-05, |
|
"loss": 0.5126, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"eval_accuracy": 0.8287230011367942, |
|
"eval_f1": 0.8762583534303519, |
|
"eval_loss": 1.054182529449463, |
|
"eval_precision": 0.9442564310504037, |
|
"eval_runtime": 210.7585, |
|
"eval_samples_per_second": 12.521, |
|
"eval_steps_per_second": 1.044, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 2.1736135516974768e-05, |
|
"loss": 0.4453, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"eval_accuracy": 0.8518378173550587, |
|
"eval_f1": 0.8959895568662314, |
|
"eval_loss": 0.9049583673477173, |
|
"eval_precision": 0.9511477433978877, |
|
"eval_runtime": 205.4589, |
|
"eval_samples_per_second": 12.844, |
|
"eval_steps_per_second": 1.071, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 2.0681802207071063e-05, |
|
"loss": 0.3775, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"eval_accuracy": 0.8927624100037893, |
|
"eval_f1": 0.9265578308615539, |
|
"eval_loss": 0.699573278427124, |
|
"eval_precision": 0.9662252548898577, |
|
"eval_runtime": 212.4773, |
|
"eval_samples_per_second": 12.42, |
|
"eval_steps_per_second": 1.035, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 1.9627468897167357e-05, |
|
"loss": 0.3568, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"eval_accuracy": 0.8957938613111027, |
|
"eval_f1": 0.9284579551378114, |
|
"eval_loss": 0.6156648993492126, |
|
"eval_precision": 0.9743169949637361, |
|
"eval_runtime": 210.2311, |
|
"eval_samples_per_second": 12.553, |
|
"eval_steps_per_second": 1.046, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 1.8573135587263652e-05, |
|
"loss": 0.3165, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"eval_accuracy": 0.9151193633952255, |
|
"eval_f1": 0.943623023847651, |
|
"eval_loss": 0.4924512505531311, |
|
"eval_precision": 0.9763736094436646, |
|
"eval_runtime": 206.0776, |
|
"eval_samples_per_second": 12.806, |
|
"eval_steps_per_second": 1.068, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 1.751880227735995e-05, |
|
"loss": 0.2951, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"eval_accuracy": 0.9037514209928003, |
|
"eval_f1": 0.9368584519497015, |
|
"eval_loss": 0.49918055534362793, |
|
"eval_precision": 0.9772932630240506, |
|
"eval_runtime": 205.4956, |
|
"eval_samples_per_second": 12.842, |
|
"eval_steps_per_second": 1.071, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 1.6464468967456245e-05, |
|
"loss": 0.2763, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"eval_accuracy": 0.9071618037135278, |
|
"eval_f1": 0.9403855453166055, |
|
"eval_loss": 0.5212343335151672, |
|
"eval_precision": 0.9820562398022753, |
|
"eval_runtime": 211.5185, |
|
"eval_samples_per_second": 12.476, |
|
"eval_steps_per_second": 1.04, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 1.541013565755254e-05, |
|
"loss": 0.2634, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"eval_accuracy": 0.9086775293671845, |
|
"eval_f1": 0.9417778927796274, |
|
"eval_loss": 0.5201326012611389, |
|
"eval_precision": 0.9816838665228488, |
|
"eval_runtime": 205.8774, |
|
"eval_samples_per_second": 12.818, |
|
"eval_steps_per_second": 1.069, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 1.4355802347648837e-05, |
|
"loss": 0.2422, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"eval_accuracy": 0.9234558544903373, |
|
"eval_f1": 0.9514314684994079, |
|
"eval_loss": 0.45036178827285767, |
|
"eval_precision": 0.9839863223736393, |
|
"eval_runtime": 211.5273, |
|
"eval_samples_per_second": 12.476, |
|
"eval_steps_per_second": 1.04, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 1.3301469037745133e-05, |
|
"loss": 0.236, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_accuracy": 0.9257294429708223, |
|
"eval_f1": 0.9518288569523149, |
|
"eval_loss": 0.3829096853733063, |
|
"eval_precision": 0.9824861532841103, |
|
"eval_runtime": 210.8744, |
|
"eval_samples_per_second": 12.515, |
|
"eval_steps_per_second": 1.043, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"learning_rate": 1.2247135727841428e-05, |
|
"loss": 0.2272, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 20.24, |
|
"eval_accuracy": 0.9154982948086396, |
|
"eval_f1": 0.9451193658066668, |
|
"eval_loss": 0.4632132947444916, |
|
"eval_precision": 0.9822249030180286, |
|
"eval_runtime": 207.0475, |
|
"eval_samples_per_second": 12.746, |
|
"eval_steps_per_second": 1.063, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 1.1192802417937724e-05, |
|
"loss": 0.226, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"eval_accuracy": 0.9158772262220538, |
|
"eval_f1": 0.947028332160647, |
|
"eval_loss": 0.47312408685684204, |
|
"eval_precision": 0.9837188228053231, |
|
"eval_runtime": 210.5001, |
|
"eval_samples_per_second": 12.537, |
|
"eval_steps_per_second": 1.045, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"learning_rate": 1.013846910803402e-05, |
|
"loss": 0.2129, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 22.27, |
|
"eval_accuracy": 0.9298976885183782, |
|
"eval_f1": 0.9548846481808738, |
|
"eval_loss": 0.38141778111457825, |
|
"eval_precision": 0.983160805693422, |
|
"eval_runtime": 210.7476, |
|
"eval_samples_per_second": 12.522, |
|
"eval_steps_per_second": 1.044, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 23.28, |
|
"learning_rate": 9.084135798130316e-06, |
|
"loss": 0.2009, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 23.28, |
|
"eval_accuracy": 0.9257294429708223, |
|
"eval_f1": 0.9514840419100091, |
|
"eval_loss": 0.4119464159011841, |
|
"eval_precision": 0.9814366336318854, |
|
"eval_runtime": 206.3284, |
|
"eval_samples_per_second": 12.79, |
|
"eval_steps_per_second": 1.066, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 8.029802488226612e-06, |
|
"loss": 0.1973, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"eval_accuracy": 0.9215611974232664, |
|
"eval_f1": 0.9492716683321308, |
|
"eval_loss": 0.43100807070732117, |
|
"eval_precision": 0.98427363081955, |
|
"eval_runtime": 210.5932, |
|
"eval_samples_per_second": 12.531, |
|
"eval_steps_per_second": 1.045, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"learning_rate": 6.975469178322908e-06, |
|
"loss": 0.1965, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 25.3, |
|
"eval_accuracy": 0.9412656309208033, |
|
"eval_f1": 0.9627712250405202, |
|
"eval_loss": 0.327240914106369, |
|
"eval_precision": 0.9865248931670977, |
|
"eval_runtime": 207.1296, |
|
"eval_samples_per_second": 12.741, |
|
"eval_steps_per_second": 1.062, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 5.9211358684192026e-06, |
|
"loss": 0.1989, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"eval_accuracy": 0.9242137173171656, |
|
"eval_f1": 0.9527761591577618, |
|
"eval_loss": 0.4231082797050476, |
|
"eval_precision": 0.9877979282330233, |
|
"eval_runtime": 212.06, |
|
"eval_samples_per_second": 12.445, |
|
"eval_steps_per_second": 1.037, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 4.866802558515498e-06, |
|
"loss": 0.1916, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"eval_accuracy": 0.9283819628647215, |
|
"eval_f1": 0.9559474626706007, |
|
"eval_loss": 0.3977676033973694, |
|
"eval_precision": 0.9875814057989544, |
|
"eval_runtime": 205.5092, |
|
"eval_samples_per_second": 12.841, |
|
"eval_steps_per_second": 1.071, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 28.34, |
|
"learning_rate": 3.8124692486117947e-06, |
|
"loss": 0.1849, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 28.34, |
|
"eval_accuracy": 0.9215611974232664, |
|
"eval_f1": 0.9506673563440831, |
|
"eval_loss": 0.4528682827949524, |
|
"eval_precision": 0.9865112585967588, |
|
"eval_runtime": 212.4387, |
|
"eval_samples_per_second": 12.422, |
|
"eval_steps_per_second": 1.036, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"learning_rate": 2.7581359387080904e-06, |
|
"loss": 0.1844, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"eval_accuracy": 0.9314134141720348, |
|
"eval_f1": 0.9566213429287339, |
|
"eval_loss": 0.3853737413883209, |
|
"eval_precision": 0.9863541882706378, |
|
"eval_runtime": 205.8351, |
|
"eval_samples_per_second": 12.821, |
|
"eval_steps_per_second": 1.069, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 30.36, |
|
"learning_rate": 1.7038026288043862e-06, |
|
"loss": 0.1831, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 30.36, |
|
"eval_accuracy": 0.9257294429708223, |
|
"eval_f1": 0.9527879636596958, |
|
"eval_loss": 0.41776272654533386, |
|
"eval_precision": 0.9853006909522924, |
|
"eval_runtime": 210.65, |
|
"eval_samples_per_second": 12.528, |
|
"eval_steps_per_second": 1.044, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 31.38, |
|
"learning_rate": 6.494693189006819e-07, |
|
"loss": 0.1778, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 31.38, |
|
"eval_accuracy": 0.9359605911330049, |
|
"eval_f1": 0.9606362253618668, |
|
"eval_loss": 0.37370702624320984, |
|
"eval_precision": 0.9883638648463977, |
|
"eval_runtime": 211.0911, |
|
"eval_samples_per_second": 12.502, |
|
"eval_steps_per_second": 1.042, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"step": 15808, |
|
"total_flos": 1.2696857331997786e+20, |
|
"train_loss": 0.858633176759187, |
|
"train_runtime": 80385.1912, |
|
"train_samples_per_second": 9.454, |
|
"train_steps_per_second": 0.197 |
|
} |
|
], |
|
"max_steps": 15808, |
|
"num_train_epochs": 32, |
|
"total_flos": 1.2696857331997786e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|