|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9927797833935017, |
|
"global_step": 11040, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 4.599947213172912, |
|
"distil_loss": 0.0, |
|
"epoch": 0.05, |
|
"learning_rate": 0.001, |
|
"loss": 4.5999, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.8339186582565308, |
|
"distil_loss": 0.0, |
|
"epoch": 0.09, |
|
"learning_rate": 0.002, |
|
"loss": 1.8339, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.3567713406085968, |
|
"distil_loss": 0.0, |
|
"epoch": 0.14, |
|
"learning_rate": 0.003, |
|
"loss": 1.3568, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.2095605379343033, |
|
"distil_loss": 0.0, |
|
"epoch": 0.18, |
|
"learning_rate": 0.004, |
|
"loss": 1.2096, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 1000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.1451576855182648, |
|
"distil_loss": 0.0, |
|
"epoch": 0.23, |
|
"learning_rate": 0.005, |
|
"loss": 1.1452, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 1250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 1.1159179366551912, |
|
"distil_loss": 0.0, |
|
"epoch": 0.25, |
|
"eval_ampere_temperature": 0.0, |
|
"eval_exact_match": 77.360454115421, |
|
"eval_f1": 86.34721419771964, |
|
"eval_progress": 0.0, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 1380 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.0835382461547851, |
|
"distil_loss": 0.0, |
|
"epoch": 0.27, |
|
"learning_rate": 0.006, |
|
"loss": 1.1004, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 1500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.1237352261543274, |
|
"distil_loss": 0.0, |
|
"epoch": 0.32, |
|
"learning_rate": 0.006999999999999999, |
|
"loss": 1.1237, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 1750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.116382148861885, |
|
"distil_loss": 0.0, |
|
"epoch": 0.36, |
|
"learning_rate": 0.008, |
|
"loss": 1.1164, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 2000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.0670130407810212, |
|
"distil_loss": 0.0, |
|
"epoch": 0.41, |
|
"learning_rate": 0.009000000000000001, |
|
"loss": 1.067, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 2250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 0.0, |
|
"ce_loss": 1.0639437032938004, |
|
"distil_loss": 0.0, |
|
"epoch": 0.45, |
|
"learning_rate": 0.01, |
|
"loss": 1.0639, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 2500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 1.6912145472259645, |
|
"ce_loss": 1.0629408322572709, |
|
"distil_loss": 0.0, |
|
"epoch": 0.5, |
|
"learning_rate": 0.00970862470862471, |
|
"loss": 1.0629, |
|
"nnz_perc": 1.0, |
|
"progress": 0.029020979020979, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 2750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 1.0985989689826965, |
|
"distil_loss": 0.0, |
|
"epoch": 0.5, |
|
"eval_ampere_temperature": 1.7570655286803998, |
|
"eval_exact_match": 75.37369914853359, |
|
"eval_f1": 85.4846023509551, |
|
"eval_progress": 0.03018648018648018, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 2760 |
|
}, |
|
{ |
|
"ampere_temperature": 3.2905000860378912, |
|
"ce_loss": 1.0230497049788634, |
|
"distil_loss": 0.0, |
|
"epoch": 0.54, |
|
"learning_rate": 0.009417249417249416, |
|
"loss": 1.0261, |
|
"nnz_perc": 1.0, |
|
"progress": 0.058158508158508204, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 3000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 4.793831310474058, |
|
"ce_loss": 0.9981409941911698, |
|
"distil_loss": 0.0, |
|
"epoch": 0.59, |
|
"learning_rate": 0.009125874125874126, |
|
"loss": 0.9981, |
|
"nnz_perc": 1.0, |
|
"progress": 0.0872960372960373, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 3250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 6.204176736633212, |
|
"ce_loss": 1.0074045011997224, |
|
"distil_loss": 0.0, |
|
"epoch": 0.63, |
|
"learning_rate": 0.008834498834498834, |
|
"loss": 1.0074, |
|
"nnz_perc": 1.0, |
|
"progress": 0.1164335664335664, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 3500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 7.524504880614105, |
|
"ce_loss": 0.9891920503377915, |
|
"distil_loss": 0.0, |
|
"epoch": 0.68, |
|
"learning_rate": 0.008543123543123544, |
|
"loss": 0.9892, |
|
"nnz_perc": 1.0, |
|
"progress": 0.1455710955710956, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 3750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 8.757784258515466, |
|
"ce_loss": 1.0083434996008873, |
|
"distil_loss": 0.0, |
|
"epoch": 0.72, |
|
"learning_rate": 0.008251748251748252, |
|
"loss": 1.0083, |
|
"nnz_perc": 1.0, |
|
"progress": 0.1747086247086247, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 4000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 0.9699458577803203, |
|
"distil_loss": 0.0, |
|
"epoch": 0.75, |
|
"eval_ampere_temperature": 9.411504281933276, |
|
"eval_exact_match": 79.94323557237465, |
|
"eval_f1": 88.17033886272301, |
|
"eval_progress": 0.191025641025641, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 4140 |
|
}, |
|
{ |
|
"ampere_temperature": 9.906983386436048, |
|
"ce_loss": 0.9698418254202062, |
|
"distil_loss": 0.0, |
|
"epoch": 0.77, |
|
"learning_rate": 0.00796037296037296, |
|
"loss": 0.9699, |
|
"nnz_perc": 1.0, |
|
"progress": 0.2038461538461538, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 4250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 10.97507078047459, |
|
"ce_loss": 0.9425091907978058, |
|
"distil_loss": 0.0, |
|
"epoch": 0.81, |
|
"learning_rate": 0.007668997668997669, |
|
"loss": 0.9425, |
|
"nnz_perc": 1.0, |
|
"progress": 0.232983682983683, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 4500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 11.965014956729835, |
|
"ce_loss": 0.9731772248744964, |
|
"distil_loss": 0.0, |
|
"epoch": 0.86, |
|
"learning_rate": 0.007377622377622378, |
|
"loss": 0.9732, |
|
"nnz_perc": 1.0, |
|
"progress": 0.2621212121212122, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 4750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 12.879784431300521, |
|
"ce_loss": 0.9197172073125839, |
|
"distil_loss": 0.0, |
|
"epoch": 0.9, |
|
"learning_rate": 0.007086247086247086, |
|
"loss": 0.9197, |
|
"nnz_perc": 1.0, |
|
"progress": 0.2912587412587413, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 5000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 13.722347720285395, |
|
"ce_loss": 0.9390108388662338, |
|
"distil_loss": 0.0, |
|
"epoch": 0.95, |
|
"learning_rate": 0.006794871794871795, |
|
"loss": 0.939, |
|
"nnz_perc": 1.0, |
|
"progress": 0.3203962703962704, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 5250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 14.495673339783197, |
|
"ce_loss": 0.9188237161636352, |
|
"distil_loss": 0.0, |
|
"epoch": 0.99, |
|
"learning_rate": 0.006503496503496503, |
|
"loss": 0.9188, |
|
"nnz_perc": 1.0, |
|
"progress": 0.3495337995337995, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 5500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 0.9402093678712845, |
|
"distil_loss": 0.0, |
|
"epoch": 1.0, |
|
"eval_ampere_temperature": 14.55463723501537, |
|
"eval_exact_match": 81.63670766319773, |
|
"eval_f1": 89.21446798933258, |
|
"eval_progress": 0.35186480186480185, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 5520 |
|
}, |
|
{ |
|
"ampere_temperature": 15.202729805892675, |
|
"ce_loss": 0.7292252867118172, |
|
"distil_loss": 0.0, |
|
"epoch": 1.04, |
|
"learning_rate": 0.006212121212121212, |
|
"loss": 0.7461, |
|
"nnz_perc": 1.0, |
|
"progress": 0.3786713286713287, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 5750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 15.846485634712565, |
|
"ce_loss": 0.7380791381597519, |
|
"distil_loss": 0.0, |
|
"epoch": 1.08, |
|
"learning_rate": 0.005920745920745921, |
|
"loss": 0.7381, |
|
"nnz_perc": 1.0, |
|
"progress": 0.4078088578088578, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 6000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 16.429909342341613, |
|
"ce_loss": 0.7548821606636047, |
|
"distil_loss": 0.0, |
|
"epoch": 1.13, |
|
"learning_rate": 0.005629370629370629, |
|
"loss": 0.7549, |
|
"nnz_perc": 1.0, |
|
"progress": 0.436946386946387, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 6250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 16.955969444878562, |
|
"ce_loss": 0.7157313173413277, |
|
"distil_loss": 0.0, |
|
"epoch": 1.17, |
|
"learning_rate": 0.005337995337995338, |
|
"loss": 0.7157, |
|
"nnz_perc": 1.0, |
|
"progress": 0.4660839160839161, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 6500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 17.427634458422148, |
|
"ce_loss": 0.7611533465385437, |
|
"distil_loss": 0.0, |
|
"epoch": 1.22, |
|
"learning_rate": 0.005046620046620046, |
|
"loss": 0.7612, |
|
"nnz_perc": 1.0, |
|
"progress": 0.4952214452214452, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 6750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 0.7508984424670537, |
|
"distil_loss": 0.0, |
|
"epoch": 1.25, |
|
"eval_ampere_temperature": 17.68575872652857, |
|
"eval_exact_match": 81.51371807000946, |
|
"eval_f1": 88.80037767793473, |
|
"eval_progress": 0.5127039627039627, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"ampere_temperature": 17.847872899071124, |
|
"ce_loss": 0.6947148644924164, |
|
"distil_loss": 0.0, |
|
"epoch": 1.26, |
|
"learning_rate": 0.004755244755244755, |
|
"loss": 0.7284, |
|
"nnz_perc": 1.0, |
|
"progress": 0.5243589743589744, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 7000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 18.219653282924224, |
|
"ce_loss": 0.7663285417556762, |
|
"distil_loss": 0.0, |
|
"epoch": 1.31, |
|
"learning_rate": 0.004463869463869464, |
|
"loss": 0.7663, |
|
"nnz_perc": 1.0, |
|
"progress": 0.5534965034965035, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 7250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 18.545944126080197, |
|
"ce_loss": 0.691897637873888, |
|
"distil_loss": 0.0, |
|
"epoch": 1.35, |
|
"learning_rate": 0.004172494172494173, |
|
"loss": 0.6919, |
|
"nnz_perc": 1.0, |
|
"progress": 0.5826340326340327, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 7500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 18.82971394463778, |
|
"ce_loss": 0.7088325002193451, |
|
"distil_loss": 0.0, |
|
"epoch": 1.4, |
|
"learning_rate": 0.0038811188811188812, |
|
"loss": 0.7088, |
|
"nnz_perc": 1.0, |
|
"progress": 0.6117715617715618, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 7750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.07393125469572, |
|
"ce_loss": 0.7107383124232293, |
|
"distil_loss": 0.0, |
|
"epoch": 1.44, |
|
"learning_rate": 0.0035897435897435897, |
|
"loss": 0.7107, |
|
"nnz_perc": 1.0, |
|
"progress": 0.6409090909090909, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 8000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.281564572352753, |
|
"ce_loss": 0.7073436776399612, |
|
"distil_loss": 0.0, |
|
"epoch": 1.49, |
|
"learning_rate": 0.0032983682983682983, |
|
"loss": 0.7073, |
|
"nnz_perc": 1.0, |
|
"progress": 0.6700466200466201, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 8250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 0.7176821072896321, |
|
"distil_loss": 0.0, |
|
"epoch": 1.49, |
|
"eval_ampere_temperature": 19.304163095074752, |
|
"eval_exact_match": 82.71523178807946, |
|
"eval_f1": 89.82467226075393, |
|
"eval_progress": 0.6735431235431235, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 8280 |
|
}, |
|
{ |
|
"ampere_temperature": 19.455582413707628, |
|
"ce_loss": 0.7027889224615964, |
|
"distil_loss": 0.0, |
|
"epoch": 1.53, |
|
"learning_rate": 0.0030069930069930068, |
|
"loss": 0.7046, |
|
"nnz_perc": 1.0, |
|
"progress": 0.6991841491841492, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 8500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.598953294859086, |
|
"ce_loss": 0.6954642720222474, |
|
"distil_loss": 0.0, |
|
"epoch": 1.58, |
|
"learning_rate": 0.0027156177156177157, |
|
"loss": 0.6955, |
|
"nnz_perc": 1.0, |
|
"progress": 0.7283216783216783, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 8750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.71464573190587, |
|
"ce_loss": 0.7050508892536164, |
|
"distil_loss": 0.0, |
|
"epoch": 1.62, |
|
"learning_rate": 0.0024242424242424242, |
|
"loss": 0.7051, |
|
"nnz_perc": 1.0, |
|
"progress": 0.7574592074592075, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 9000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.805628240946717, |
|
"ce_loss": 0.6534205512404442, |
|
"distil_loss": 0.0, |
|
"epoch": 1.67, |
|
"learning_rate": 0.0021328671328671328, |
|
"loss": 0.6534, |
|
"nnz_perc": 1.0, |
|
"progress": 0.7865967365967366, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 9250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.874869338080376, |
|
"ce_loss": 0.6931327093839645, |
|
"distil_loss": 0.0, |
|
"epoch": 1.71, |
|
"learning_rate": 0.0018414918414918417, |
|
"loss": 0.6931, |
|
"nnz_perc": 1.0, |
|
"progress": 0.8157342657342658, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 9500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 0.6803905916400254, |
|
"distil_loss": 0.0, |
|
"epoch": 1.74, |
|
"eval_ampere_temperature": 19.90914467925581, |
|
"eval_exact_match": 83.3112582781457, |
|
"eval_f1": 90.48253679391624, |
|
"eval_progress": 0.8343822843822843, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"ampere_temperature": 19.925337539405586, |
|
"ce_loss": 0.6604658047358195, |
|
"distil_loss": 0.0, |
|
"epoch": 1.76, |
|
"learning_rate": 0.0015501165501165502, |
|
"loss": 0.6732, |
|
"nnz_perc": 1.0, |
|
"progress": 0.8448717948717949, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 9750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.960001361021092, |
|
"ce_loss": 0.6589477426409721, |
|
"distil_loss": 0.0, |
|
"epoch": 1.81, |
|
"learning_rate": 0.001258741258741259, |
|
"loss": 0.6589, |
|
"nnz_perc": 1.0, |
|
"progress": 0.874009324009324, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 10000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.981829319025636, |
|
"ce_loss": 0.6645486508607864, |
|
"distil_loss": 0.0, |
|
"epoch": 1.85, |
|
"learning_rate": 0.0009673659673659674, |
|
"loss": 0.6645, |
|
"nnz_perc": 1.0, |
|
"progress": 0.9031468531468532, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 10250, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.99378992951796, |
|
"ce_loss": 0.6627120378017426, |
|
"distil_loss": 0.0, |
|
"epoch": 1.9, |
|
"learning_rate": 0.000675990675990676, |
|
"loss": 0.6627, |
|
"nnz_perc": 1.0, |
|
"progress": 0.9322843822843823, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 10500, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.998851708596806, |
|
"ce_loss": 0.6525639802217483, |
|
"distil_loss": 0.0, |
|
"epoch": 1.94, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 0.6526, |
|
"nnz_perc": 1.0, |
|
"progress": 0.9614219114219115, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 10750, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ampere_temperature": 19.999983172360917, |
|
"ce_loss": 0.630506355702877, |
|
"distil_loss": 0.0, |
|
"epoch": 1.99, |
|
"learning_rate": 9.324009324009324e-05, |
|
"loss": 0.6305, |
|
"nnz_perc": 1.0, |
|
"progress": 0.9905594405594406, |
|
"regu_lambda": 0.0, |
|
"regu_loss": 0.0, |
|
"step": 11000, |
|
"threshold": 1.0 |
|
}, |
|
{ |
|
"ce_loss": 0.6976410485804081, |
|
"distil_loss": 0.0, |
|
"epoch": 1.99, |
|
"eval_ampere_temperature": 19.99999781767362, |
|
"eval_exact_match": 83.74645222327341, |
|
"eval_f1": 90.78776054621733, |
|
"eval_progress": 0.9952214452214452, |
|
"eval_regu_lambda": 0.0, |
|
"eval_threshold": 1.0, |
|
"nnz_perc": 1.0, |
|
"regu_loss": 0.0, |
|
"step": 11040 |
|
} |
|
], |
|
"max_steps": 11080, |
|
"num_train_epochs": 2, |
|
"total_flos": 0, |
|
"trial_name": "hp_mnop-albert-base-v2_tn-albert-base-v2_od-__data_2to__devel_data__nn_pruning__output_sequence__squad_test_teacher___es-steps_pdebs128_nte2_ws2500_ls250_ss1380_stl50_est1380_rn-__da--3c944a736efd9cf3", |
|
"trial_params": {} |
|
} |
|
|