|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9951749095295535, |
|
"eval_steps": 500, |
|
"global_step": 2484, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.606425702811245e-05, |
|
"loss": 1.1216, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.132530120481928e-05, |
|
"loss": 0.9435, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.738955823293173e-05, |
|
"loss": 0.7337, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.345381526104418e-05, |
|
"loss": 0.4553, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.951807228915663e-05, |
|
"loss": 0.2854, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.558232931726909e-05, |
|
"loss": 0.2014, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00011164658634538152, |
|
"loss": 0.1777, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00012771084337349396, |
|
"loss": 0.164, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014377510040160642, |
|
"loss": 0.1561, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00015983935742971888, |
|
"loss": 0.1513, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00017590361445783134, |
|
"loss": 0.1481, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019196787148594377, |
|
"loss": 0.1468, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019910514541387027, |
|
"loss": 0.1437, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019731543624161075, |
|
"loss": 0.1408, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019552572706935123, |
|
"loss": 0.1414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019373601789709173, |
|
"loss": 0.137, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001919463087248322, |
|
"loss": 0.1371, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001901565995525727, |
|
"loss": 0.1367, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001883668903803132, |
|
"loss": 0.1358, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001865771812080537, |
|
"loss": 0.1362, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001847874720357942, |
|
"loss": 0.1339, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001829977628635347, |
|
"loss": 0.1345, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00018120805369127517, |
|
"loss": 0.1309, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017941834451901567, |
|
"loss": 0.1315, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017762863534675615, |
|
"loss": 0.1353, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00017583892617449665, |
|
"loss": 0.1339, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00017404921700223716, |
|
"loss": 0.1319, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00017225950782997763, |
|
"loss": 0.1328, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00017046979865771814, |
|
"loss": 0.1332, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001686800894854586, |
|
"loss": 0.131, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00016689038031319912, |
|
"loss": 0.1278, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00016510067114093962, |
|
"loss": 0.1331, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001633109619686801, |
|
"loss": 0.1286, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001615212527964206, |
|
"loss": 0.1276, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00015973154362416107, |
|
"loss": 0.1287, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00015794183445190158, |
|
"loss": 0.1312, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00015615212527964208, |
|
"loss": 0.1298, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00015436241610738256, |
|
"loss": 0.1288, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00015257270693512303, |
|
"loss": 0.1297, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00015078299776286354, |
|
"loss": 0.129, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00014899328859060404, |
|
"loss": 0.1286, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00014720357941834454, |
|
"loss": 0.1289, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00014541387024608502, |
|
"loss": 0.1285, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001436241610738255, |
|
"loss": 0.1283, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00014183445190156602, |
|
"loss": 0.1247, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001400447427293065, |
|
"loss": 0.1279, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00013825503355704698, |
|
"loss": 0.1249, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00013646532438478748, |
|
"loss": 0.1244, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00013467561521252796, |
|
"loss": 0.1259, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013288590604026846, |
|
"loss": 0.1257, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013109619686800896, |
|
"loss": 0.1249, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00012930648769574944, |
|
"loss": 0.1244, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00012751677852348994, |
|
"loss": 0.125, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012572706935123044, |
|
"loss": 0.1213, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012393736017897092, |
|
"loss": 0.1235, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012214765100671142, |
|
"loss": 0.1265, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001203579418344519, |
|
"loss": 0.1245, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011856823266219239, |
|
"loss": 0.1245, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001167785234899329, |
|
"loss": 0.1249, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00011498881431767338, |
|
"loss": 0.1252, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00011319910514541387, |
|
"loss": 0.1236, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00011140939597315436, |
|
"loss": 0.1213, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00010961968680089485, |
|
"loss": 0.1254, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00010782997762863535, |
|
"loss": 0.1221, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00010604026845637584, |
|
"loss": 0.1207, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00010425055928411633, |
|
"loss": 0.1218, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00010246085011185682, |
|
"loss": 0.1244, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00010067114093959733, |
|
"loss": 0.1218, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 9.888143176733782e-05, |
|
"loss": 0.1245, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 9.70917225950783e-05, |
|
"loss": 0.1229, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 9.53020134228188e-05, |
|
"loss": 0.123, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 9.351230425055928e-05, |
|
"loss": 0.1214, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 9.172259507829977e-05, |
|
"loss": 0.1231, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 8.993288590604028e-05, |
|
"loss": 0.123, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.814317673378077e-05, |
|
"loss": 0.123, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.635346756152126e-05, |
|
"loss": 0.1179, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.456375838926175e-05, |
|
"loss": 0.1233, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.277404921700224e-05, |
|
"loss": 0.1195, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.098434004474274e-05, |
|
"loss": 0.1195, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.919463087248322e-05, |
|
"loss": 0.1204, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.740492170022372e-05, |
|
"loss": 0.1197, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.561521252796421e-05, |
|
"loss": 0.1195, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.382550335570471e-05, |
|
"loss": 0.1205, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 7.203579418344519e-05, |
|
"loss": 0.1205, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 7.024608501118568e-05, |
|
"loss": 0.116, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.845637583892618e-05, |
|
"loss": 0.1184, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.1225, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 6.487695749440716e-05, |
|
"loss": 0.1195, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 6.308724832214765e-05, |
|
"loss": 0.1189, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 6.129753914988815e-05, |
|
"loss": 0.1206, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.9507829977628635e-05, |
|
"loss": 0.1199, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.771812080536914e-05, |
|
"loss": 0.1189, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.592841163310962e-05, |
|
"loss": 0.1174, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 5.413870246085011e-05, |
|
"loss": 0.1197, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 5.234899328859061e-05, |
|
"loss": 0.119, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 5.05592841163311e-05, |
|
"loss": 0.1161, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.8769574944071586e-05, |
|
"loss": 0.1171, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.1201, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.519015659955257e-05, |
|
"loss": 0.1191, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.340044742729307e-05, |
|
"loss": 0.1193, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.161073825503356e-05, |
|
"loss": 0.119, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.9821029082774055e-05, |
|
"loss": 0.1188, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.8031319910514545e-05, |
|
"loss": 0.1165, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.6241610738255034e-05, |
|
"loss": 0.1204, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.4451901565995524e-05, |
|
"loss": 0.1196, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.266219239373602e-05, |
|
"loss": 0.1186, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.087248322147651e-05, |
|
"loss": 0.1135, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.9082774049217003e-05, |
|
"loss": 0.1196, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.7293064876957496e-05, |
|
"loss": 0.1161, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.550335570469799e-05, |
|
"loss": 0.1162, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.371364653243848e-05, |
|
"loss": 0.1165, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.192393736017897e-05, |
|
"loss": 0.1164, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.013422818791946e-05, |
|
"loss": 0.116, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.8344519015659954e-05, |
|
"loss": 0.1172, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.6554809843400447e-05, |
|
"loss": 0.117, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.4765100671140942e-05, |
|
"loss": 0.1127, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.2975391498881432e-05, |
|
"loss": 0.1157, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.1185682326621925e-05, |
|
"loss": 0.1193, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 9.395973154362418e-06, |
|
"loss": 0.1153, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 7.606263982102908e-06, |
|
"loss": 0.1164, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 5.8165548098434e-06, |
|
"loss": 0.1181, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.026845637583892e-06, |
|
"loss": 0.1166, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.237136465324385e-06, |
|
"loss": 0.1156, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.4742729306487696e-07, |
|
"loss": 0.1148, |
|
"step": 2480 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 2484, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 1.2566003090639028e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|