|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0239344681940357, |
|
"eval_steps": 500, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004957330602491893, |
|
"loss": 2.0441, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004914661204983786, |
|
"loss": 1.713, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00048719918074756785, |
|
"loss": 1.6007, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00048293224099675715, |
|
"loss": 1.5181, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00047866530124594645, |
|
"loss": 1.4493, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 3.9997520454971136, |
|
"eval_loss": 1.3540712594985962, |
|
"eval_runtime": 10.3322, |
|
"eval_samples_per_second": 193.569, |
|
"eval_steps_per_second": 1.549, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004743983614951357, |
|
"loss": 1.4096, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000470131421744325, |
|
"loss": 1.3729, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004658644819935143, |
|
"loss": 1.3389, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00046159754224270354, |
|
"loss": 1.3166, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00045733060249189284, |
|
"loss": 1.2915, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_bleu": 5.393565315342523, |
|
"eval_loss": 1.2113043069839478, |
|
"eval_runtime": 8.5631, |
|
"eval_samples_per_second": 233.562, |
|
"eval_steps_per_second": 1.868, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00045306366274108214, |
|
"loss": 1.2669, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004487967229902714, |
|
"loss": 1.2492, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004445297832394607, |
|
"loss": 1.2371, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00044026284348864993, |
|
"loss": 1.2231, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00043599590373783923, |
|
"loss": 1.2059, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_bleu": 5.838067918496751, |
|
"eval_loss": 1.1367273330688477, |
|
"eval_runtime": 9.2404, |
|
"eval_samples_per_second": 216.442, |
|
"eval_steps_per_second": 1.732, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004317289639870285, |
|
"loss": 1.1931, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0004274620242362178, |
|
"loss": 1.1761, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0004231950844854071, |
|
"loss": 1.1792, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004189281447345963, |
|
"loss": 1.1614, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004146612049837856, |
|
"loss": 1.1573, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_bleu": 6.242227124133382, |
|
"eval_loss": 1.0900604724884033, |
|
"eval_runtime": 8.3953, |
|
"eval_samples_per_second": 238.228, |
|
"eval_steps_per_second": 1.906, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004103942652329749, |
|
"loss": 1.1449, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00040612732548216417, |
|
"loss": 1.1379, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00040186038573135347, |
|
"loss": 1.1281, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00039759344598054277, |
|
"loss": 1.1226, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00039332650622973206, |
|
"loss": 1.1121, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bleu": 6.627148578471305, |
|
"eval_loss": 1.0542418956756592, |
|
"eval_runtime": 9.2835, |
|
"eval_samples_per_second": 215.436, |
|
"eval_steps_per_second": 1.723, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0003890595664789213, |
|
"loss": 1.1076, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0003847926267281106, |
|
"loss": 1.1001, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0003805256869772999, |
|
"loss": 1.0996, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00037625874722648915, |
|
"loss": 1.088, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00037199180747567845, |
|
"loss": 1.0867, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_bleu": 6.879580310573366, |
|
"eval_loss": 1.025155782699585, |
|
"eval_runtime": 8.5706, |
|
"eval_samples_per_second": 233.355, |
|
"eval_steps_per_second": 1.867, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00036772486772486775, |
|
"loss": 1.0784, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.000363457927974057, |
|
"loss": 1.0794, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0003591909882232463, |
|
"loss": 1.0736, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0003549240484724356, |
|
"loss": 1.0684, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003506571087216249, |
|
"loss": 1.0623, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_bleu": 7.039305128716119, |
|
"eval_loss": 1.0067821741104126, |
|
"eval_runtime": 9.3223, |
|
"eval_samples_per_second": 214.539, |
|
"eval_steps_per_second": 1.716, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00034639016897081414, |
|
"loss": 1.0609, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00034212322922000344, |
|
"loss": 1.0579, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00033785628946919274, |
|
"loss": 1.0517, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.000333589349718382, |
|
"loss": 1.054, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0003293224099675713, |
|
"loss": 1.0408, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_bleu": 7.2660174031875915, |
|
"eval_loss": 0.9882246255874634, |
|
"eval_runtime": 8.4135, |
|
"eval_samples_per_second": 237.713, |
|
"eval_steps_per_second": 1.902, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 11718, |
|
"num_train_epochs": 3, |
|
"save_steps": 2000, |
|
"total_flos": 1.1691349260632064e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|