|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6936817157061101, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0185, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-05, |
|
"loss": 0.8649, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012, |
|
"loss": 0.7784, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00016, |
|
"loss": 0.7386, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7037, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001990530303030303, |
|
"loss": 0.7019, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001981060606060606, |
|
"loss": 0.7117, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019715909090909094, |
|
"loss": 0.672, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019621212121212123, |
|
"loss": 0.664, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019526515151515152, |
|
"loss": 0.6666, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001943181818181818, |
|
"loss": 0.6685, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019337121212121213, |
|
"loss": 0.6788, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019242424242424245, |
|
"loss": 0.6673, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019147727272727274, |
|
"loss": 0.6628, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019053030303030303, |
|
"loss": 0.6643, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00018958333333333332, |
|
"loss": 0.6607, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018863636363636364, |
|
"loss": 0.6706, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00018768939393939396, |
|
"loss": 0.6709, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018674242424242425, |
|
"loss": 0.6616, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018579545454545454, |
|
"loss": 0.6566, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018484848484848484, |
|
"loss": 0.6513, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018390151515151518, |
|
"loss": 0.6797, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018295454545454547, |
|
"loss": 0.6599, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018200757575757577, |
|
"loss": 0.6561, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018106060606060606, |
|
"loss": 0.662, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018011363636363638, |
|
"loss": 0.6629, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001791666666666667, |
|
"loss": 0.6475, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017821969696969699, |
|
"loss": 0.6607, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00017727272727272728, |
|
"loss": 0.6512, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00017632575757575757, |
|
"loss": 0.6484, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001753787878787879, |
|
"loss": 0.6403, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001744318181818182, |
|
"loss": 0.6537, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001734848484848485, |
|
"loss": 0.6516, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001725378787878788, |
|
"loss": 0.6577, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00017159090909090908, |
|
"loss": 0.6374, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001706439393939394, |
|
"loss": 0.6551, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00016969696969696972, |
|
"loss": 0.6388, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00016875, |
|
"loss": 0.64, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001678030303030303, |
|
"loss": 0.6579, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001668560606060606, |
|
"loss": 0.6525, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016590909090909094, |
|
"loss": 0.6261, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00016496212121212123, |
|
"loss": 0.6351, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00016401515151515152, |
|
"loss": 0.6537, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001630681818181818, |
|
"loss": 0.6448, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00016212121212121213, |
|
"loss": 0.638, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00016117424242424245, |
|
"loss": 0.6503, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00016022727272727274, |
|
"loss": 0.6378, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00015928030303030303, |
|
"loss": 0.643, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015833333333333332, |
|
"loss": 0.6235, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00015738636363636364, |
|
"loss": 0.647, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015643939393939396, |
|
"loss": 0.6408, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00015549242424242425, |
|
"loss": 0.6391, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015454545454545454, |
|
"loss": 0.6356, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015359848484848484, |
|
"loss": 0.6317, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00015265151515151515, |
|
"loss": 0.6413, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015170454545454547, |
|
"loss": 0.6338, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00015075757575757576, |
|
"loss": 0.6422, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014981060606060606, |
|
"loss": 0.6442, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014886363636363635, |
|
"loss": 0.6523, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001479166666666667, |
|
"loss": 0.6349, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014696969696969698, |
|
"loss": 0.6389, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014602272727272728, |
|
"loss": 0.6468, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014507575757575757, |
|
"loss": 0.6431, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014412878787878789, |
|
"loss": 0.6287, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001431818181818182, |
|
"loss": 0.6438, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001422348484848485, |
|
"loss": 0.6274, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001412878787878788, |
|
"loss": 0.6286, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014034090909090908, |
|
"loss": 0.6401, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001393939393939394, |
|
"loss": 0.6472, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00013844696969696972, |
|
"loss": 0.6458, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001375, |
|
"loss": 0.6117, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001365530303030303, |
|
"loss": 0.6271, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001356060606060606, |
|
"loss": 0.6287, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00013465909090909094, |
|
"loss": 0.6336, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013371212121212123, |
|
"loss": 0.6404, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00013276515151515152, |
|
"loss": 0.6312, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001318181818181818, |
|
"loss": 0.6168, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00013087121212121213, |
|
"loss": 0.6272, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012992424242424245, |
|
"loss": 0.6477, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012897727272727274, |
|
"loss": 0.6477, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00012803030303030303, |
|
"loss": 0.6227, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00012708333333333332, |
|
"loss": 0.6224, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00012613636363636364, |
|
"loss": 0.6315, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012518939393939396, |
|
"loss": 0.631, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00012424242424242425, |
|
"loss": 0.6285, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012329545454545454, |
|
"loss": 0.6359, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00012234848484848484, |
|
"loss": 0.6282, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00012140151515151517, |
|
"loss": 0.6196, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00012045454545454546, |
|
"loss": 0.6346, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00011950757575757576, |
|
"loss": 0.6323, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00011856060606060606, |
|
"loss": 0.6108, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011761363636363636, |
|
"loss": 0.6324, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011666666666666668, |
|
"loss": 0.618, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00011571969696969698, |
|
"loss": 0.6099, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011477272727272728, |
|
"loss": 0.6251, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00011382575757575758, |
|
"loss": 0.6209, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001128787878787879, |
|
"loss": 0.6218, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00011193181818181819, |
|
"loss": 0.6299, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001109848484848485, |
|
"loss": 0.6211, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00011003787878787879, |
|
"loss": 0.6072, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 0.6264, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00010814393939393941, |
|
"loss": 0.6248, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001071969696969697, |
|
"loss": 0.6125, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00010625000000000001, |
|
"loss": 0.6294, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001053030303030303, |
|
"loss": 0.6193, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001043560606060606, |
|
"loss": 0.6293, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00010340909090909092, |
|
"loss": 0.629, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010246212121212121, |
|
"loss": 0.6353, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00010151515151515152, |
|
"loss": 0.6268, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00010056818181818181, |
|
"loss": 0.6256, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.962121212121213e-05, |
|
"loss": 0.6404, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.867424242424242e-05, |
|
"loss": 0.6347, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.772727272727274e-05, |
|
"loss": 0.6317, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.678030303030303e-05, |
|
"loss": 0.6335, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.583333333333334e-05, |
|
"loss": 0.6239, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.488636363636364e-05, |
|
"loss": 0.6302, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.393939393939395e-05, |
|
"loss": 0.6182, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.299242424242425e-05, |
|
"loss": 0.6219, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.204545454545454e-05, |
|
"loss": 0.6229, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.109848484848486e-05, |
|
"loss": 0.6413, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.015151515151515e-05, |
|
"loss": 0.6237, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.920454545454546e-05, |
|
"loss": 0.6397, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.825757575757576e-05, |
|
"loss": 0.6259, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.731060606060605e-05, |
|
"loss": 0.634, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.636363636363637e-05, |
|
"loss": 0.622, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.541666666666666e-05, |
|
"loss": 0.6279, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.446969696969697e-05, |
|
"loss": 0.6306, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.352272727272727e-05, |
|
"loss": 0.6288, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.257575757575758e-05, |
|
"loss": 0.6297, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.162878787878789e-05, |
|
"loss": 0.6119, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.068181818181818e-05, |
|
"loss": 0.6227, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.97348484848485e-05, |
|
"loss": 0.6317, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.878787878787879e-05, |
|
"loss": 0.619, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.784090909090909e-05, |
|
"loss": 0.6156, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.68939393939394e-05, |
|
"loss": 0.6116, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.59469696969697e-05, |
|
"loss": 0.6009, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.6226, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.40530303030303e-05, |
|
"loss": 0.6246, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.310606060606062e-05, |
|
"loss": 0.6035, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.215909090909091e-05, |
|
"loss": 0.6059, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.121212121212121e-05, |
|
"loss": 0.6387, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.026515151515152e-05, |
|
"loss": 0.6314, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.931818181818182e-05, |
|
"loss": 0.6246, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.837121212121213e-05, |
|
"loss": 0.6448, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.742424242424242e-05, |
|
"loss": 0.5997, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.647727272727274e-05, |
|
"loss": 0.6238, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.553030303030303e-05, |
|
"loss": 0.614, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.458333333333334e-05, |
|
"loss": 0.6162, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.363636363636364e-05, |
|
"loss": 0.6087, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.268939393939395e-05, |
|
"loss": 0.6215, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 4324, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 4.765794561611612e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|