|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.22919483853223627, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4084967333570947e-06, |
|
"loss": 2.2507, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0507482022971233e-06, |
|
"loss": 1.9542, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.385606273598312e-06, |
|
"loss": 1.8446, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6136695401116585e-06, |
|
"loss": 1.831, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7868297632261957e-06, |
|
"loss": 1.8121, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.926458092787486e-06, |
|
"loss": 1.7884, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.0434580045013773e-06, |
|
"loss": 1.755, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.1441512086208035e-06, |
|
"loss": 1.7662, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.232532087697698e-06, |
|
"loss": 1.7246, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.3112862237770753e-06, |
|
"loss": 1.7563, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.3823062961420163e-06, |
|
"loss": 1.7531, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.446976436243603e-06, |
|
"loss": 1.7334, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.506339534926595e-06, |
|
"loss": 1.7231, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.5612009452606784e-06, |
|
"loss": 1.7151, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.612195557913627e-06, |
|
"loss": 1.7218, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.65983275401539e-06, |
|
"loss": 1.7144, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.7045274519126395e-06, |
|
"loss": 1.7195, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.7466221106030114e-06, |
|
"loss": 1.6989, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.786402677560832e-06, |
|
"loss": 1.7034, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.824110376935989e-06, |
|
"loss": 1.7049, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.8599505757615295e-06, |
|
"loss": 1.7457, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.894099556414216e-06, |
|
"loss": 1.7092, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.9267097619885385e-06, |
|
"loss": 1.7283, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.95791391001684e-06, |
|
"loss": 1.6915, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.987828255432777e-06, |
|
"loss": 1.6902, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.016555205552159e-06, |
|
"loss": 1.7059, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.044185435607626e-06, |
|
"loss": 1.7044, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.070799615107415e-06, |
|
"loss": 1.6984, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.096469827889988e-06, |
|
"loss": 1.7203, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.121260748862021e-06, |
|
"loss": 1.7046, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.145230625795312e-06, |
|
"loss": 1.6732, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.1684321036962525e-06, |
|
"loss": 1.6948, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.190912921100477e-06, |
|
"loss": 1.656, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.212716501452232e-06, |
|
"loss": 1.6729, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.233882457984791e-06, |
|
"loss": 1.7092, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.2544470268536555e-06, |
|
"loss": 1.6883, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.27444344042015e-06, |
|
"loss": 1.6877, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.293902250342989e-06, |
|
"loss": 1.6774, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.312851608364853e-06, |
|
"loss": 1.6957, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3313175112718595e-06, |
|
"loss": 1.6848, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3493240153753665e-06, |
|
"loss": 1.682, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.366893424956263e-06, |
|
"loss": 1.6724, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.38404645837504e-06, |
|
"loss": 1.7079, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.400802394950703e-06, |
|
"loss": 1.6605, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4171792052198945e-06, |
|
"loss": 1.6822, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.433193666783084e-06, |
|
"loss": 1.6731, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.448861467610187e-06, |
|
"loss": 1.6648, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4641972984001906e-06, |
|
"loss": 1.6781, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.479214935357724e-06, |
|
"loss": 1.6752, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.493927314555554e-06, |
|
"loss": 1.6754, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_webgpt_accuracy": 0.5055517960817275, |
|
"eval_webgpt_loss": 2.15625, |
|
"eval_webgpt_runtime": 39.0916, |
|
"eval_webgpt_samples_per_second": 100.175, |
|
"eval_webgpt_steps_per_second": 1.253, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_prompt_dialogue_accuracy": 0.6254543673617606, |
|
"eval_prompt_dialogue_loss": 1.357421875, |
|
"eval_prompt_dialogue_runtime": 71.3081, |
|
"eval_prompt_dialogue_samples_per_second": 144.57, |
|
"eval_prompt_dialogue_steps_per_second": 1.809, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_adversarial_qa_accuracy": 0.8029728725380899, |
|
"eval_adversarial_qa_loss": 0.70654296875, |
|
"eval_adversarial_qa_runtime": 20.7874, |
|
"eval_adversarial_qa_samples_per_second": 144.318, |
|
"eval_adversarial_qa_steps_per_second": 1.828, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_xsum_accuracy": 0.632906181388279, |
|
"eval_xsum_loss": 1.3935546875, |
|
"eval_xsum_runtime": 122.5752, |
|
"eval_xsum_samples_per_second": 92.449, |
|
"eval_xsum_steps_per_second": 1.158, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_cnn_dailymail_accuracy": 0.7001129736496595, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 144.5118, |
|
"eval_cnn_dailymail_samples_per_second": 92.505, |
|
"eval_cnn_dailymail_steps_per_second": 1.163, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_multi_news_accuracy": 0.5801641857474902, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 62.7124, |
|
"eval_multi_news_samples_per_second": 89.647, |
|
"eval_multi_news_steps_per_second": 1.132, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_scitldr_accuracy": 0.4978125, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 8.0438, |
|
"eval_scitldr_samples_per_second": 76.954, |
|
"eval_scitldr_steps_per_second": 0.995, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_joke_accuracy": 0.5093821076573162, |
|
"eval_joke_loss": 2.1171875, |
|
"eval_joke_runtime": 0.9767, |
|
"eval_joke_samples_per_second": 77.813, |
|
"eval_joke_steps_per_second": 1.024, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_gsm8k_accuracy": 0.7808137739345274, |
|
"eval_gsm8k_loss": 0.8134765625, |
|
"eval_gsm8k_runtime": 9.4148, |
|
"eval_gsm8k_samples_per_second": 140.099, |
|
"eval_gsm8k_steps_per_second": 1.806, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_dive_mt_accuracy": 0.7367253212240054, |
|
"eval_dive_mt_loss": 1.04296875, |
|
"eval_dive_mt_runtime": 10.0465, |
|
"eval_dive_mt_samples_per_second": 128.403, |
|
"eval_dive_mt_steps_per_second": 1.692, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_math_qa_accuracy": 0.608876541257212, |
|
"eval_math_qa_loss": 1.6689453125, |
|
"eval_math_qa_runtime": 30.5447, |
|
"eval_math_qa_samples_per_second": 146.507, |
|
"eval_math_qa_steps_per_second": 1.833, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_essay_instruction_accuracy": 0.6053833226455565, |
|
"eval_essay_instruction_loss": 1.8876953125, |
|
"eval_essay_instruction_runtime": 8.3301, |
|
"eval_essay_instruction_samples_per_second": 49.579, |
|
"eval_essay_instruction_steps_per_second": 0.72, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_tldr_news_accuracy": 0.6061431123968348, |
|
"eval_tldr_news_loss": 1.697265625, |
|
"eval_tldr_news_runtime": 5.1098, |
|
"eval_tldr_news_samples_per_second": 155.389, |
|
"eval_tldr_news_steps_per_second": 1.957, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_reddit_eli5_accuracy": 0.46120351563185963, |
|
"eval_reddit_eli5_loss": 2.423828125, |
|
"eval_reddit_eli5_runtime": 107.5158, |
|
"eval_reddit_eli5_samples_per_second": 91.261, |
|
"eval_reddit_eli5_steps_per_second": 1.144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_reddit_asks_accuracy": 0.4690436591507088, |
|
"eval_reddit_asks_loss": 2.412109375, |
|
"eval_reddit_asks_runtime": 32.2161, |
|
"eval_reddit_asks_samples_per_second": 70.803, |
|
"eval_reddit_asks_steps_per_second": 0.9, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_reddit_askh_accuracy": 0.466922516495131, |
|
"eval_reddit_askh_loss": 2.513671875, |
|
"eval_reddit_askh_runtime": 61.0062, |
|
"eval_reddit_askh_samples_per_second": 80.336, |
|
"eval_reddit_askh_steps_per_second": 1.016, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_wmt2019_zh-en_accuracy": 0.6711313468964325, |
|
"eval_wmt2019_zh-en_loss": 1.44140625, |
|
"eval_wmt2019_zh-en_runtime": 27.1705, |
|
"eval_wmt2019_zh-en_samples_per_second": 146.519, |
|
"eval_wmt2019_zh-en_steps_per_second": 1.84, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_wmt2019_fr-de_accuracy": 0.751335577309082, |
|
"eval_wmt2019_fr-de_loss": 0.9892578125, |
|
"eval_wmt2019_fr-de_runtime": 9.8591, |
|
"eval_wmt2019_fr-de_samples_per_second": 153.361, |
|
"eval_wmt2019_fr-de_steps_per_second": 1.927, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_wmt2019_ru-en_accuracy": 0.7610682787220373, |
|
"eval_wmt2019_ru-en_loss": 0.92138671875, |
|
"eval_wmt2019_ru-en_runtime": 21.983, |
|
"eval_wmt2019_ru-en_samples_per_second": 136.469, |
|
"eval_wmt2019_ru-en_steps_per_second": 1.729, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_wmt2019_de-en_accuracy": 0.7658361423127319, |
|
"eval_wmt2019_de-en_loss": 0.92041015625, |
|
"eval_wmt2019_de-en_runtime": 17.0498, |
|
"eval_wmt2019_de-en_samples_per_second": 175.838, |
|
"eval_wmt2019_de-en_steps_per_second": 2.229, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_ted_trans_de-ja_accuracy": 0.6635957565605807, |
|
"eval_ted_trans_de-ja_loss": 1.4384765625, |
|
"eval_ted_trans_de-ja_runtime": 7.9688, |
|
"eval_ted_trans_de-ja_samples_per_second": 90.101, |
|
"eval_ted_trans_de-ja_steps_per_second": 1.129, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_ted_trans_en-ja_accuracy": 0.6737063575554276, |
|
"eval_ted_trans_en-ja_loss": 1.3544921875, |
|
"eval_ted_trans_en-ja_runtime": 9.6629, |
|
"eval_ted_trans_en-ja_samples_per_second": 82.894, |
|
"eval_ted_trans_en-ja_steps_per_second": 1.138, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_ted_trans_en-hi_accuracy": 0.6986381322957198, |
|
"eval_ted_trans_en-hi_loss": 1.1357421875, |
|
"eval_ted_trans_en-hi_runtime": 2.3375, |
|
"eval_ted_trans_en-hi_samples_per_second": 44.064, |
|
"eval_ted_trans_en-hi_steps_per_second": 0.856, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_ted_trans_en-es_accuracy": 0.7880831502109065, |
|
"eval_ted_trans_en-es_loss": 0.87353515625, |
|
"eval_ted_trans_en-es_runtime": 8.2834, |
|
"eval_ted_trans_en-es_samples_per_second": 99.718, |
|
"eval_ted_trans_en-es_steps_per_second": 1.328, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_private_tuning_accuracy": 0.6889973407198902, |
|
"eval_private_tuning_loss": 1.130859375, |
|
"eval_private_tuning_runtime": 142.1785, |
|
"eval_private_tuning_samples_per_second": 148.954, |
|
"eval_private_tuning_steps_per_second": 1.864, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_samsum_accuracy": 0.6474302924317498, |
|
"eval_samsum_loss": 1.27734375, |
|
"eval_samsum_runtime": 12.2877, |
|
"eval_samsum_samples_per_second": 66.571, |
|
"eval_samsum_steps_per_second": 0.895, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_prosocial_dialogue_accuracy": 0.5408795463448586, |
|
"eval_prosocial_dialogue_loss": 1.7060546875, |
|
"eval_prosocial_dialogue_runtime": 48.1618, |
|
"eval_prosocial_dialogue_samples_per_second": 560.257, |
|
"eval_prosocial_dialogue_steps_per_second": 7.018, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_oa_translated_accuracy": 0.719222779150248, |
|
"eval_oa_translated_loss": 1.1240234375, |
|
"eval_oa_translated_runtime": 59.7453, |
|
"eval_oa_translated_samples_per_second": 86.484, |
|
"eval_oa_translated_steps_per_second": 1.088, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_wikihow_accuracy": 0.622957980862571, |
|
"eval_wikihow_loss": 1.7578125, |
|
"eval_wikihow_runtime": 15.5342, |
|
"eval_wikihow_samples_per_second": 147.61, |
|
"eval_wikihow_steps_per_second": 1.867, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_explain_prosocial_accuracy": 0.6863205647867285, |
|
"eval_explain_prosocial_loss": 1.310546875, |
|
"eval_explain_prosocial_runtime": 111.3962, |
|
"eval_explain_prosocial_samples_per_second": 549.821, |
|
"eval_explain_prosocial_steps_per_second": 6.876, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5083465988888945e-06, |
|
"loss": 1.6702, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5224842384899045e-06, |
|
"loss": 1.6841, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5363510253542444e-06, |
|
"loss": 1.6574, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.549957142832593e-06, |
|
"loss": 1.673, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.563312210555719e-06, |
|
"loss": 1.6541, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.576425325289549e-06, |
|
"loss": 1.6516, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.589305098154845e-06, |
|
"loss": 1.6717, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.601959688592886e-06, |
|
"loss": 1.6191, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.614396835412691e-06, |
|
"loss": 1.6685, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.626623885215616e-06, |
|
"loss": 1.6424, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.638647818458763e-06, |
|
"loss": 1.6391, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.650475273388737e-06, |
|
"loss": 1.6604, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.662112568051194e-06, |
|
"loss": 1.6693, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.673565720558918e-06, |
|
"loss": 1.6437, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.6848404677811685e-06, |
|
"loss": 1.6688, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.695942282599635e-06, |
|
"loss": 1.6521, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.706876389860915e-06, |
|
"loss": 1.6568, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.717647781141908e-06, |
|
"loss": 1.6462, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7282612284325845e-06, |
|
"loss": 1.6463, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.738721296830016e-06, |
|
"loss": 1.6495, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.749032356328167e-06, |
|
"loss": 1.6536, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.759198592779668e-06, |
|
"loss": 1.6366, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.769224018098397e-06, |
|
"loss": 1.6626, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7791124797650865e-06, |
|
"loss": 1.616, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.788867669692332e-06, |
|
"loss": 1.6401, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_webgpt_accuracy": 0.5064634455053698, |
|
"eval_webgpt_loss": 2.14453125, |
|
"eval_webgpt_runtime": 38.6915, |
|
"eval_webgpt_samples_per_second": 101.211, |
|
"eval_webgpt_steps_per_second": 1.266, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_prompt_dialogue_accuracy": 0.6281575769303819, |
|
"eval_prompt_dialogue_loss": 1.3408203125, |
|
"eval_prompt_dialogue_runtime": 73.877, |
|
"eval_prompt_dialogue_samples_per_second": 139.543, |
|
"eval_prompt_dialogue_steps_per_second": 1.746, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_adversarial_qa_accuracy": 0.8144184318097362, |
|
"eval_adversarial_qa_loss": 0.67626953125, |
|
"eval_adversarial_qa_runtime": 20.0332, |
|
"eval_adversarial_qa_samples_per_second": 149.751, |
|
"eval_adversarial_qa_steps_per_second": 1.897, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_xsum_accuracy": 0.6340356358723188, |
|
"eval_xsum_loss": 1.3828125, |
|
"eval_xsum_runtime": 121.341, |
|
"eval_xsum_samples_per_second": 93.39, |
|
"eval_xsum_steps_per_second": 1.17, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_cnn_dailymail_accuracy": 0.7028426612927849, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 144.2535, |
|
"eval_cnn_dailymail_samples_per_second": 92.67, |
|
"eval_cnn_dailymail_steps_per_second": 1.165, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_multi_news_accuracy": 0.5819939666683152, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 61.526, |
|
"eval_multi_news_samples_per_second": 91.376, |
|
"eval_multi_news_steps_per_second": 1.154, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_scitldr_accuracy": 0.491875, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 8.1765, |
|
"eval_scitldr_samples_per_second": 75.704, |
|
"eval_scitldr_steps_per_second": 0.978, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_joke_accuracy": 0.5185746777862017, |
|
"eval_joke_loss": 2.07421875, |
|
"eval_joke_runtime": 0.7033, |
|
"eval_joke_samples_per_second": 108.069, |
|
"eval_joke_steps_per_second": 1.422, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_gsm8k_accuracy": 0.7870444718962323, |
|
"eval_gsm8k_loss": 0.78466796875, |
|
"eval_gsm8k_runtime": 10.9372, |
|
"eval_gsm8k_samples_per_second": 120.597, |
|
"eval_gsm8k_steps_per_second": 1.554, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_math_qa_accuracy": 0.619624940687815, |
|
"eval_math_qa_loss": 1.6083984375, |
|
"eval_math_qa_runtime": 31.4027, |
|
"eval_math_qa_samples_per_second": 142.504, |
|
"eval_math_qa_steps_per_second": 1.783, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_essay_instruction_accuracy": 0.6070694115826017, |
|
"eval_essay_instruction_loss": 1.8740234375, |
|
"eval_essay_instruction_runtime": 8.0521, |
|
"eval_essay_instruction_samples_per_second": 51.291, |
|
"eval_essay_instruction_steps_per_second": 0.745, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_tldr_news_accuracy": 0.615757678890496, |
|
"eval_tldr_news_loss": 1.66015625, |
|
"eval_tldr_news_runtime": 4.1264, |
|
"eval_tldr_news_samples_per_second": 192.418, |
|
"eval_tldr_news_steps_per_second": 2.423, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_reddit_eli5_accuracy": 0.461742772350252, |
|
"eval_reddit_eli5_loss": 2.421875, |
|
"eval_reddit_eli5_runtime": 108.3649, |
|
"eval_reddit_eli5_samples_per_second": 90.546, |
|
"eval_reddit_eli5_steps_per_second": 1.135, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_reddit_asks_accuracy": 0.4700219866226591, |
|
"eval_reddit_asks_loss": 2.41015625, |
|
"eval_reddit_asks_runtime": 31.416, |
|
"eval_reddit_asks_samples_per_second": 72.606, |
|
"eval_reddit_asks_steps_per_second": 0.923, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_reddit_askh_accuracy": 0.46774579304106356, |
|
"eval_reddit_askh_loss": 2.5078125, |
|
"eval_reddit_askh_runtime": 61.093, |
|
"eval_reddit_askh_samples_per_second": 80.222, |
|
"eval_reddit_askh_steps_per_second": 1.015, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_wmt2019_zh-en_accuracy": 0.6671902987021268, |
|
"eval_wmt2019_zh-en_loss": 1.4541015625, |
|
"eval_wmt2019_zh-en_runtime": 27.3556, |
|
"eval_wmt2019_zh-en_samples_per_second": 145.528, |
|
"eval_wmt2019_zh-en_steps_per_second": 1.828, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_wmt2019_fr-de_accuracy": 0.7487164373574896, |
|
"eval_wmt2019_fr-de_loss": 0.9892578125, |
|
"eval_wmt2019_fr-de_runtime": 11.3417, |
|
"eval_wmt2019_fr-de_samples_per_second": 133.314, |
|
"eval_wmt2019_fr-de_steps_per_second": 1.675, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_wmt2019_ru-en_accuracy": 0.7546621422248451, |
|
"eval_wmt2019_ru-en_loss": 0.94970703125, |
|
"eval_wmt2019_ru-en_runtime": 22.6465, |
|
"eval_wmt2019_ru-en_samples_per_second": 132.471, |
|
"eval_wmt2019_ru-en_steps_per_second": 1.678, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_wmt2019_de-en_accuracy": 0.7651105551969012, |
|
"eval_wmt2019_de-en_loss": 0.92236328125, |
|
"eval_wmt2019_de-en_runtime": 16.3647, |
|
"eval_wmt2019_de-en_samples_per_second": 183.199, |
|
"eval_wmt2019_de-en_steps_per_second": 2.322, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_ted_trans_de-ja_accuracy": 0.6670448957978744, |
|
"eval_ted_trans_de-ja_loss": 1.4306640625, |
|
"eval_ted_trans_de-ja_runtime": 8.4143, |
|
"eval_ted_trans_de-ja_samples_per_second": 85.331, |
|
"eval_ted_trans_de-ja_steps_per_second": 1.07, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_ted_trans_en-ja_accuracy": 0.6718075628588398, |
|
"eval_ted_trans_en-ja_loss": 1.33203125, |
|
"eval_ted_trans_en-ja_runtime": 10.3712, |
|
"eval_ted_trans_en-ja_samples_per_second": 77.233, |
|
"eval_ted_trans_en-ja_steps_per_second": 1.061, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_ted_trans_en-hi_accuracy": 0.6781445982723938, |
|
"eval_ted_trans_en-hi_loss": 1.2021484375, |
|
"eval_ted_trans_en-hi_runtime": 1.747, |
|
"eval_ted_trans_en-hi_samples_per_second": 58.959, |
|
"eval_ted_trans_en-hi_steps_per_second": 1.145, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_ted_trans_en-es_accuracy": 0.787559638615369, |
|
"eval_ted_trans_en-es_loss": 0.88525390625, |
|
"eval_ted_trans_en-es_runtime": 9.0268, |
|
"eval_ted_trans_en-es_samples_per_second": 91.505, |
|
"eval_ted_trans_en-es_steps_per_second": 1.219, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_private_tuning_accuracy": 0.693209861771278, |
|
"eval_private_tuning_loss": 1.1103515625, |
|
"eval_private_tuning_runtime": 144.1209, |
|
"eval_private_tuning_samples_per_second": 146.946, |
|
"eval_private_tuning_steps_per_second": 1.839, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_samsum_accuracy": 0.6467502185951618, |
|
"eval_samsum_loss": 1.259765625, |
|
"eval_samsum_runtime": 9.1622, |
|
"eval_samsum_samples_per_second": 89.28, |
|
"eval_samsum_steps_per_second": 1.201, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_prosocial_dialogue_accuracy": 0.5496461048716204, |
|
"eval_prosocial_dialogue_loss": 1.6904296875, |
|
"eval_prosocial_dialogue_runtime": 49.4898, |
|
"eval_prosocial_dialogue_samples_per_second": 545.224, |
|
"eval_prosocial_dialogue_steps_per_second": 6.83, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_oa_translated_accuracy": 0.7254537658996713, |
|
"eval_oa_translated_loss": 1.0947265625, |
|
"eval_oa_translated_runtime": 57.4991, |
|
"eval_oa_translated_samples_per_second": 89.862, |
|
"eval_oa_translated_steps_per_second": 1.13, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_wikihow_accuracy": 0.6223200665649702, |
|
"eval_wikihow_loss": 1.744140625, |
|
"eval_wikihow_runtime": 16.9927, |
|
"eval_wikihow_samples_per_second": 134.94, |
|
"eval_wikihow_steps_per_second": 1.707, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_explain_prosocial_accuracy": 0.6895944881927522, |
|
"eval_explain_prosocial_loss": 1.2900390625, |
|
"eval_explain_prosocial_runtime": 109.8962, |
|
"eval_explain_prosocial_samples_per_second": 557.326, |
|
"eval_explain_prosocial_steps_per_second": 6.97, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.798493132500121e-06, |
|
"loss": 1.6331, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8079922732483016e-06, |
|
"loss": 1.6242, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.817368364668191e-06, |
|
"loss": 1.6471, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8266245539317745e-06, |
|
"loss": 1.6592, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.835763868993521e-06, |
|
"loss": 1.6586, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.844789224536785e-06, |
|
"loss": 1.6354, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.853703427554027e-06, |
|
"loss": 1.6602, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.862509182587578e-06, |
|
"loss": 1.652, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.871209096655434e-06, |
|
"loss": 1.6451, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.879805683884512e-06, |
|
"loss": 1.6404, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.888301369871998e-06, |
|
"loss": 1.6411, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8966984957936845e-06, |
|
"loss": 1.6314, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.904999322276735e-06, |
|
"loss": 1.6189, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.913206033052878e-06, |
|
"loss": 1.6514, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.921320738406821e-06, |
|
"loss": 1.6363, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.929345478433492e-06, |
|
"loss": 1.6398, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.937282226116702e-06, |
|
"loss": 1.6277, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.945132890240829e-06, |
|
"loss": 1.6236, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.952899318146298e-06, |
|
"loss": 1.6353, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.96058329833879e-06, |
|
"loss": 1.6394, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.968186562961406e-06, |
|
"loss": 1.6293, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.975710790138337e-06, |
|
"loss": 1.6259, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9831576061979556e-06, |
|
"loss": 1.6124, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.990528587782728e-06, |
|
"loss": 1.6569, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.99782526385276e-06, |
|
"loss": 1.638, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_webgpt_accuracy": 0.5073254588442403, |
|
"eval_webgpt_loss": 2.140625, |
|
"eval_webgpt_runtime": 38.7594, |
|
"eval_webgpt_samples_per_second": 101.034, |
|
"eval_webgpt_steps_per_second": 1.264, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_prompt_dialogue_accuracy": 0.6310662447605156, |
|
"eval_prompt_dialogue_loss": 1.3232421875, |
|
"eval_prompt_dialogue_runtime": 75.506, |
|
"eval_prompt_dialogue_samples_per_second": 136.532, |
|
"eval_prompt_dialogue_steps_per_second": 1.708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_adversarial_qa_accuracy": 0.8161278335191379, |
|
"eval_adversarial_qa_loss": 0.65185546875, |
|
"eval_adversarial_qa_runtime": 17.8135, |
|
"eval_adversarial_qa_samples_per_second": 168.411, |
|
"eval_adversarial_qa_steps_per_second": 2.133, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_xsum_accuracy": 0.6358828745144401, |
|
"eval_xsum_loss": 1.3759765625, |
|
"eval_xsum_runtime": 120.9036, |
|
"eval_xsum_samples_per_second": 93.728, |
|
"eval_xsum_steps_per_second": 1.174, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_cnn_dailymail_accuracy": 0.7034533296693231, |
|
"eval_cnn_dailymail_loss": NaN, |
|
"eval_cnn_dailymail_runtime": 143.0011, |
|
"eval_cnn_dailymail_samples_per_second": 93.482, |
|
"eval_cnn_dailymail_steps_per_second": 1.175, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_multi_news_accuracy": 0.58382374758914, |
|
"eval_multi_news_loss": NaN, |
|
"eval_multi_news_runtime": 62.8321, |
|
"eval_multi_news_samples_per_second": 89.477, |
|
"eval_multi_news_steps_per_second": 1.13, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_scitldr_accuracy": 0.495625, |
|
"eval_scitldr_loss": NaN, |
|
"eval_scitldr_runtime": 7.4344, |
|
"eval_scitldr_samples_per_second": 83.262, |
|
"eval_scitldr_steps_per_second": 1.076, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_joke_accuracy": 0.5290940106141016, |
|
"eval_joke_loss": 2.01953125, |
|
"eval_joke_runtime": 0.7811, |
|
"eval_joke_samples_per_second": 97.299, |
|
"eval_joke_steps_per_second": 1.28, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_gsm8k_accuracy": 0.7911056207535516, |
|
"eval_gsm8k_loss": 0.76318359375, |
|
"eval_gsm8k_runtime": 9.9142, |
|
"eval_gsm8k_samples_per_second": 133.041, |
|
"eval_gsm8k_steps_per_second": 1.715, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_math_qa_accuracy": 0.6306243424083842, |
|
"eval_math_qa_loss": 1.548828125, |
|
"eval_math_qa_runtime": 31.7722, |
|
"eval_math_qa_samples_per_second": 140.847, |
|
"eval_math_qa_steps_per_second": 1.763, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_essay_instruction_accuracy": 0.6082469097583089, |
|
"eval_essay_instruction_loss": 1.8671875, |
|
"eval_essay_instruction_runtime": 8.3983, |
|
"eval_essay_instruction_samples_per_second": 49.176, |
|
"eval_essay_instruction_steps_per_second": 0.714, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_tldr_news_accuracy": 0.6160980175274398, |
|
"eval_tldr_news_loss": 1.6416015625, |
|
"eval_tldr_news_runtime": 3.6256, |
|
"eval_tldr_news_samples_per_second": 218.998, |
|
"eval_tldr_news_steps_per_second": 2.758, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_reddit_eli5_accuracy": 0.4611581507518863, |
|
"eval_reddit_eli5_loss": 2.419921875, |
|
"eval_reddit_eli5_runtime": 107.6335, |
|
"eval_reddit_eli5_samples_per_second": 91.161, |
|
"eval_reddit_eli5_steps_per_second": 1.143, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_reddit_asks_accuracy": 0.4688479936563187, |
|
"eval_reddit_asks_loss": 2.408203125, |
|
"eval_reddit_asks_runtime": 31.0014, |
|
"eval_reddit_asks_samples_per_second": 73.577, |
|
"eval_reddit_asks_steps_per_second": 0.935, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_reddit_askh_accuracy": 0.4678089398518734, |
|
"eval_reddit_askh_loss": 2.505859375, |
|
"eval_reddit_askh_runtime": 58.2699, |
|
"eval_reddit_askh_samples_per_second": 84.109, |
|
"eval_reddit_askh_steps_per_second": 1.064, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_wmt2019_zh-en_accuracy": 0.6663175800777634, |
|
"eval_wmt2019_zh-en_loss": 1.4599609375, |
|
"eval_wmt2019_zh-en_runtime": 29.9768, |
|
"eval_wmt2019_zh-en_samples_per_second": 132.803, |
|
"eval_wmt2019_zh-en_steps_per_second": 1.668, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_wmt2019_fr-de_accuracy": 0.7535103098381767, |
|
"eval_wmt2019_fr-de_loss": 0.9736328125, |
|
"eval_wmt2019_fr-de_runtime": 10.3533, |
|
"eval_wmt2019_fr-de_samples_per_second": 146.04, |
|
"eval_wmt2019_fr-de_steps_per_second": 1.835, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_wmt2019_ru-en_accuracy": 0.7577544408610879, |
|
"eval_wmt2019_ru-en_loss": 0.93603515625, |
|
"eval_wmt2019_ru-en_runtime": 22.7659, |
|
"eval_wmt2019_ru-en_samples_per_second": 131.776, |
|
"eval_wmt2019_ru-en_steps_per_second": 1.669, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_wmt2019_de-en_accuracy": 0.7657056972240155, |
|
"eval_wmt2019_de-en_loss": 0.9150390625, |
|
"eval_wmt2019_de-en_runtime": 15.5528, |
|
"eval_wmt2019_de-en_samples_per_second": 192.762, |
|
"eval_wmt2019_de-en_steps_per_second": 2.443, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_ted_trans_de-ja_accuracy": 0.6747348791651755, |
|
"eval_ted_trans_de-ja_loss": 1.3994140625, |
|
"eval_ted_trans_de-ja_runtime": 8.3691, |
|
"eval_ted_trans_de-ja_samples_per_second": 85.792, |
|
"eval_ted_trans_de-ja_steps_per_second": 1.075, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_ted_trans_en-ja_accuracy": 0.6812527237431164, |
|
"eval_ted_trans_en-ja_loss": 1.3115234375, |
|
"eval_ted_trans_en-ja_runtime": 9.3233, |
|
"eval_ted_trans_en-ja_samples_per_second": 85.914, |
|
"eval_ted_trans_en-ja_steps_per_second": 1.18, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_ted_trans_en-hi_accuracy": 0.6843067779174763, |
|
"eval_ted_trans_en-hi_loss": 1.19921875, |
|
"eval_ted_trans_en-hi_runtime": 2.8696, |
|
"eval_ted_trans_en-hi_samples_per_second": 35.893, |
|
"eval_ted_trans_en-hi_steps_per_second": 0.697, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_ted_trans_en-es_accuracy": 0.7943555499559027, |
|
"eval_ted_trans_en-es_loss": 0.85009765625, |
|
"eval_ted_trans_en-es_runtime": 8.4213, |
|
"eval_ted_trans_en-es_samples_per_second": 98.085, |
|
"eval_ted_trans_en-es_steps_per_second": 1.306, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_private_tuning_accuracy": 0.6955408449271764, |
|
"eval_private_tuning_loss": 1.0966796875, |
|
"eval_private_tuning_runtime": 144.1155, |
|
"eval_private_tuning_samples_per_second": 146.952, |
|
"eval_private_tuning_steps_per_second": 1.839, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_samsum_accuracy": 0.651219275235597, |
|
"eval_samsum_loss": 1.2451171875, |
|
"eval_samsum_runtime": 10.3624, |
|
"eval_samsum_samples_per_second": 78.939, |
|
"eval_samsum_steps_per_second": 1.062, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_prosocial_dialogue_accuracy": 0.5541225644750905, |
|
"eval_prosocial_dialogue_loss": 1.671875, |
|
"eval_prosocial_dialogue_runtime": 49.1807, |
|
"eval_prosocial_dialogue_samples_per_second": 548.65, |
|
"eval_prosocial_dialogue_steps_per_second": 6.873, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_oa_translated_accuracy": 0.7297566303926173, |
|
"eval_oa_translated_loss": 1.0712890625, |
|
"eval_oa_translated_runtime": 57.6497, |
|
"eval_oa_translated_samples_per_second": 89.628, |
|
"eval_oa_translated_steps_per_second": 1.127, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_wikihow_accuracy": 0.6223200665649702, |
|
"eval_wikihow_loss": 1.73828125, |
|
"eval_wikihow_runtime": 16.8591, |
|
"eval_wikihow_samples_per_second": 136.009, |
|
"eval_wikihow_steps_per_second": 1.72, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_explain_prosocial_accuracy": 0.6887323092680878, |
|
"eval_explain_prosocial_loss": 1.2744140625, |
|
"eval_explain_prosocial_runtime": 111.2104, |
|
"eval_explain_prosocial_samples_per_second": 550.74, |
|
"eval_explain_prosocial_steps_per_second": 6.888, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 17452, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.6761689779124306e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|