|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 199.0662251655629, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.933774834437087e-07, |
|
"loss": 2.1111, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.9867549668874175e-06, |
|
"loss": 2.0911, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 2.9801324503311258e-06, |
|
"loss": 2.0488, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 3.973509933774835e-06, |
|
"loss": 1.9825, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 4.966887417218543e-06, |
|
"loss": 1.8997, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 5.9602649006622515e-06, |
|
"loss": 1.811, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 20.07, |
|
"learning_rate": 6.95364238410596e-06, |
|
"loss": 1.7265, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 23.07, |
|
"learning_rate": 7.94701986754967e-06, |
|
"loss": 1.6527, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 26.07, |
|
"learning_rate": 8.940397350993377e-06, |
|
"loss": 1.5955, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 29.07, |
|
"learning_rate": 9.933774834437086e-06, |
|
"loss": 1.5492, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 32.07, |
|
"learning_rate": 1.0927152317880796e-05, |
|
"loss": 1.508, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 35.07, |
|
"learning_rate": 1.1920529801324503e-05, |
|
"loss": 1.4678, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 38.07, |
|
"learning_rate": 1.2913907284768212e-05, |
|
"loss": 1.4278, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 1.390728476821192e-05, |
|
"loss": 1.3864, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 44.07, |
|
"learning_rate": 1.490066225165563e-05, |
|
"loss": 1.3418, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 47.07, |
|
"learning_rate": 1.589403973509934e-05, |
|
"loss": 1.2955, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 50.07, |
|
"learning_rate": 1.688741721854305e-05, |
|
"loss": 1.2506, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 53.07, |
|
"learning_rate": 1.7880794701986755e-05, |
|
"loss": 1.2051, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 56.07, |
|
"learning_rate": 1.8874172185430464e-05, |
|
"loss": 1.1617, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 59.07, |
|
"learning_rate": 1.9867549668874173e-05, |
|
"loss": 1.1198, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 62.07, |
|
"learning_rate": 2.0860927152317882e-05, |
|
"loss": 1.0815, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 65.07, |
|
"learning_rate": 2.185430463576159e-05, |
|
"loss": 1.0462, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 68.07, |
|
"learning_rate": 2.28476821192053e-05, |
|
"loss": 1.0112, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 71.07, |
|
"learning_rate": 2.3841059602649006e-05, |
|
"loss": 0.9762, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 74.07, |
|
"learning_rate": 2.4834437086092715e-05, |
|
"loss": 0.941, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 77.07, |
|
"learning_rate": 2.5827814569536424e-05, |
|
"loss": 0.9048, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 80.07, |
|
"learning_rate": 2.6821192052980134e-05, |
|
"loss": 0.8728, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 83.07, |
|
"learning_rate": 2.781456953642384e-05, |
|
"loss": 0.839, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 86.07, |
|
"learning_rate": 2.880794701986755e-05, |
|
"loss": 0.8041, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 89.07, |
|
"learning_rate": 2.980132450331126e-05, |
|
"loss": 0.7714, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 92.07, |
|
"learning_rate": 3.079470198675497e-05, |
|
"loss": 0.7396, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 95.07, |
|
"learning_rate": 3.178807947019868e-05, |
|
"loss": 0.7062, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 98.07, |
|
"learning_rate": 3.278145695364239e-05, |
|
"loss": 0.6731, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 101.07, |
|
"learning_rate": 3.37748344370861e-05, |
|
"loss": 0.6376, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 104.07, |
|
"learning_rate": 3.47682119205298e-05, |
|
"loss": 0.6083, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 107.07, |
|
"learning_rate": 3.576158940397351e-05, |
|
"loss": 0.5789, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 110.07, |
|
"learning_rate": 3.675496688741722e-05, |
|
"loss": 0.5487, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 113.07, |
|
"learning_rate": 3.774834437086093e-05, |
|
"loss": 0.5202, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 116.07, |
|
"learning_rate": 3.8741721854304637e-05, |
|
"loss": 0.4941, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 119.07, |
|
"learning_rate": 3.9735099337748346e-05, |
|
"loss": 0.4675, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 122.07, |
|
"learning_rate": 4.0728476821192055e-05, |
|
"loss": 0.4421, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 125.07, |
|
"learning_rate": 4.1721854304635764e-05, |
|
"loss": 0.4175, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 128.07, |
|
"learning_rate": 4.271523178807947e-05, |
|
"loss": 0.3933, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 131.07, |
|
"learning_rate": 4.370860927152318e-05, |
|
"loss": 0.3664, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 134.07, |
|
"learning_rate": 4.470198675496689e-05, |
|
"loss": 0.3446, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 137.07, |
|
"learning_rate": 4.56953642384106e-05, |
|
"loss": 0.32, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 140.07, |
|
"learning_rate": 4.668874172185431e-05, |
|
"loss": 0.305, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 143.07, |
|
"learning_rate": 4.768211920529801e-05, |
|
"loss": 0.2827, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 146.07, |
|
"learning_rate": 4.867549668874172e-05, |
|
"loss": 0.2646, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 149.07, |
|
"learning_rate": 4.966887417218543e-05, |
|
"loss": 0.2455, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 152.07, |
|
"learning_rate": 5.0662251655629146e-05, |
|
"loss": 0.2318, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 155.07, |
|
"learning_rate": 5.165562913907285e-05, |
|
"loss": 0.2141, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 158.07, |
|
"learning_rate": 5.264900662251656e-05, |
|
"loss": 0.1944, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 161.07, |
|
"learning_rate": 5.364238410596027e-05, |
|
"loss": 0.1807, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 164.07, |
|
"learning_rate": 5.4635761589403976e-05, |
|
"loss": 0.1731, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 167.07, |
|
"learning_rate": 5.562913907284768e-05, |
|
"loss": 0.1582, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 170.07, |
|
"learning_rate": 5.6622516556291394e-05, |
|
"loss": 0.1514, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 173.07, |
|
"learning_rate": 5.76158940397351e-05, |
|
"loss": 0.1379, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 176.07, |
|
"learning_rate": 5.860927152317881e-05, |
|
"loss": 0.1324, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 179.07, |
|
"learning_rate": 5.960264900662252e-05, |
|
"loss": 0.1194, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 182.07, |
|
"learning_rate": 6.0596026490066224e-05, |
|
"loss": 0.1099, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 185.07, |
|
"learning_rate": 6.158940397350994e-05, |
|
"loss": 0.1051, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 188.07, |
|
"learning_rate": 6.258278145695365e-05, |
|
"loss": 0.0955, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 191.07, |
|
"learning_rate": 6.357615894039736e-05, |
|
"loss": 0.094, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 194.07, |
|
"learning_rate": 6.456953642384105e-05, |
|
"loss": 0.0853, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 197.07, |
|
"learning_rate": 6.556291390728478e-05, |
|
"loss": 0.0786, |
|
"step": 1980 |
|
} |
|
], |
|
"logging_steps": 30, |
|
"max_steps": 30200, |
|
"num_train_epochs": 200, |
|
"save_steps": 500, |
|
"total_flos": 3.2310451765248e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|