Spaces:
Sleeping
Sleeping
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 2.0, | |
"eval_steps": 2000, | |
"global_step": 21994, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0, | |
"eval_HasAns_exact": 0.06747638326585695, | |
"eval_HasAns_f1": 7.18502154989456, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 0.0336417157275021, | |
"eval_NoAns_f1": 0.0336417157275021, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 50.07159100480081, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 50.075846569172874, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 0.050534826918217805, | |
"eval_f1": 3.6042118881306284, | |
"eval_runtime": -14.2699, | |
"eval_samples_per_second": -852.912, | |
"eval_steps_per_second": -13.385, | |
"eval_total": 11873, | |
"step": 0 | |
}, | |
{ | |
"epoch": 0.18186778212239701, | |
"grad_norm": 40.6475715637207, | |
"learning_rate": 2.7271983268164044e-05, | |
"loss": 1.4132, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.18186778212239701, | |
"eval_HasAns_exact": 67.25708502024291, | |
"eval_HasAns_f1": 74.98296116605809, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 70.81581160639193, | |
"eval_NoAns_f1": 70.81581160639193, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 69.03899604143855, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 72.89640308198375, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 69.03899604143855, | |
"eval_f1": 72.89640308198369, | |
"eval_runtime": 136.0827, | |
"eval_samples_per_second": 89.438, | |
"eval_steps_per_second": 1.404, | |
"eval_total": 11873, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.36373556424479403, | |
"grad_norm": 41.061927795410156, | |
"learning_rate": 2.4543966536328087e-05, | |
"loss": 1.2638, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.36373556424479403, | |
"eval_HasAns_exact": 73.59986504723346, | |
"eval_HasAns_f1": 81.00757791997825, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 65.78637510513036, | |
"eval_NoAns_f1": 65.78637510513036, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 69.68752632022235, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 73.38607950051664, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 69.68752632022235, | |
"eval_f1": 73.38607950051639, | |
"eval_runtime": 136.2286, | |
"eval_samples_per_second": 89.342, | |
"eval_steps_per_second": 1.402, | |
"eval_total": 11873, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 0.5456033463671911, | |
"grad_norm": 52.56464767456055, | |
"learning_rate": 2.1815949804492137e-05, | |
"loss": 1.3977, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 0.5456033463671911, | |
"eval_HasAns_exact": 71.94669365721997, | |
"eval_HasAns_f1": 79.2064004599959, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 72.17830109335576, | |
"eval_NoAns_f1": 72.17830109335576, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 72.07108565653162, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 75.69574176087386, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 72.0626631853786, | |
"eval_f1": 75.68731928972085, | |
"eval_runtime": 136.1854, | |
"eval_samples_per_second": 89.371, | |
"eval_steps_per_second": 1.403, | |
"eval_total": 11873, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 0.7274711284895881, | |
"grad_norm": 49.72623825073242, | |
"learning_rate": 1.908793307265618e-05, | |
"loss": 1.3384, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 0.7274711284895881, | |
"eval_HasAns_exact": 72.0310391363023, | |
"eval_HasAns_f1": 78.95689148837117, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 75.00420521446594, | |
"eval_NoAns_f1": 75.00420521446594, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 73.51975069485387, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 76.9777185835984, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 73.51975069485387, | |
"eval_f1": 76.97771858359845, | |
"eval_runtime": 135.7796, | |
"eval_samples_per_second": 89.638, | |
"eval_steps_per_second": 1.407, | |
"eval_total": 11873, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 0.9093389106119851, | |
"grad_norm": 39.458927154541016, | |
"learning_rate": 1.6359916340820223e-05, | |
"loss": 1.2258, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 0.9093389106119851, | |
"eval_HasAns_exact": 73.54925775978407, | |
"eval_HasAns_f1": 80.29283351227244, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 76.23212783851976, | |
"eval_NoAns_f1": 76.23212783851976, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 74.88419102164575, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 78.25115110424915, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 74.89261349279879, | |
"eval_f1": 78.25957357540227, | |
"eval_runtime": 136.1576, | |
"eval_samples_per_second": 89.389, | |
"eval_steps_per_second": 1.403, | |
"eval_total": 11873, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 1.0912066927343822, | |
"grad_norm": 41.23106002807617, | |
"learning_rate": 1.363189960898427e-05, | |
"loss": 0.7822, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 1.0912066927343822, | |
"eval_HasAns_exact": 77.26045883940621, | |
"eval_HasAns_f1": 84.1124672080756, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 69.97476871320437, | |
"eval_NoAns_f1": 69.97476871320437, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 73.6208203486903, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 77.04191911138483, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 73.61239787753728, | |
"eval_f1": 77.0334966402318, | |
"eval_runtime": 135.9367, | |
"eval_samples_per_second": 89.534, | |
"eval_steps_per_second": 1.405, | |
"eval_total": 11873, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 1.273074474856779, | |
"grad_norm": 21.682905197143555, | |
"learning_rate": 1.0903882877148312e-05, | |
"loss": 0.6771, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 1.273074474856779, | |
"eval_HasAns_exact": 73.78542510121457, | |
"eval_HasAns_f1": 80.35769680092089, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 80.7569386038688, | |
"eval_NoAns_f1": 80.7569386038688, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 77.27617282910806, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 80.55760352361308, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 77.27617282910806, | |
"eval_f1": 80.55760352361317, | |
"eval_runtime": 136.5688, | |
"eval_samples_per_second": 89.12, | |
"eval_steps_per_second": 1.399, | |
"eval_total": 11873, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 1.4549422569791761, | |
"grad_norm": 56.2855339050293, | |
"learning_rate": 8.175866145312359e-06, | |
"loss": 0.6521, | |
"step": 16000 | |
}, | |
{ | |
"epoch": 1.4549422569791761, | |
"eval_HasAns_exact": 71.82860998650472, | |
"eval_HasAns_f1": 77.91336965584932, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 85.18082422203533, | |
"eval_NoAns_f1": 85.18082422203533, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 78.52269855975743, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 81.56072225384261, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 78.51427608860439, | |
"eval_f1": 81.55229978268969, | |
"eval_runtime": 136.3366, | |
"eval_samples_per_second": 89.272, | |
"eval_steps_per_second": 1.401, | |
"eval_total": 11873, | |
"step": 16000 | |
}, | |
{ | |
"epoch": 1.6368100391015732, | |
"grad_norm": 24.467727661132812, | |
"learning_rate": 5.447849413476403e-06, | |
"loss": 0.6455, | |
"step": 18000 | |
}, | |
{ | |
"epoch": 1.6368100391015732, | |
"eval_HasAns_exact": 75.57354925775978, | |
"eval_HasAns_f1": 82.12994203860852, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 79.0412111017662, | |
"eval_NoAns_f1": 79.0412111017662, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 77.31828518487325, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 80.59178778782702, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 77.30986271372021, | |
"eval_f1": 80.58336531667406, | |
"eval_runtime": 136.1893, | |
"eval_samples_per_second": 89.368, | |
"eval_steps_per_second": 1.402, | |
"eval_total": 11873, | |
"step": 18000 | |
}, | |
{ | |
"epoch": 1.8186778212239703, | |
"grad_norm": 36.233985900878906, | |
"learning_rate": 2.7198326816404476e-06, | |
"loss": 0.6175, | |
"step": 20000 | |
}, | |
{ | |
"epoch": 1.8186778212239703, | |
"eval_HasAns_exact": 74.19028340080972, | |
"eval_HasAns_f1": 80.57880057678263, | |
"eval_HasAns_total": 5928, | |
"eval_NoAns_exact": 83.46509671993272, | |
"eval_NoAns_f1": 83.46509671993272, | |
"eval_NoAns_total": 5945, | |
"eval_best_exact": 78.83432999241978, | |
"eval_best_exact_thresh": 0.0, | |
"eval_best_f1": 82.02401497676792, | |
"eval_best_f1_thresh": 0.0, | |
"eval_exact": 78.83432999241978, | |
"eval_f1": 82.02401497676809, | |
"eval_runtime": 136.3043, | |
"eval_samples_per_second": 89.293, | |
"eval_steps_per_second": 1.401, | |
"eval_total": 11873, | |
"step": 20000 | |
}, | |
{ | |
"epoch": 2.0, | |
"step": 21994, | |
"total_flos": 4371201741330432.0, | |
"train_loss": 0.9652459149015072, | |
"train_runtime": 5408.776, | |
"train_samples_per_second": 48.794, | |
"train_steps_per_second": 4.066 | |
} | |
], | |
"logging_steps": 2000, | |
"max_steps": 21994, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 2, | |
"save_steps": 2000, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 4371201741330432.0, | |
"train_batch_size": 12, | |
"trial_name": null, | |
"trial_params": null | |
} | |