|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 8823, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.5471698113207555e-06, |
|
"loss": 6.6425, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.5094339622641511e-05, |
|
"loss": 4.8785, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9999174617418052e-05, |
|
"loss": 2.0576, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9987722672633802e-05, |
|
"loss": 1.7927, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9962812964571567e-05, |
|
"loss": 1.7317, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9924479057334537e-05, |
|
"loss": 1.7022, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9872772603202818e-05, |
|
"loss": 1.6741, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9807763273035574e-05, |
|
"loss": 1.6608, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9729538662394363e-05, |
|
"loss": 1.6597, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9638204173514217e-05, |
|
"loss": 1.6598, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.953388287328142e-05, |
|
"loss": 1.6501, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9416715327409453e-05, |
|
"loss": 1.637, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.9286859411036396e-05, |
|
"loss": 1.6317, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.914449009599919e-05, |
|
"loss": 1.6292, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.898979921507119e-05, |
|
"loss": 1.6279, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8822995203480823e-05, |
|
"loss": 1.6225, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.86443028180596e-05, |
|
"loss": 1.6234, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.8453962834397847e-05, |
|
"loss": 1.6073, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.8252231722416328e-05, |
|
"loss": 1.6119, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.8039381300790812e-05, |
|
"loss": 1.5936, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.781569837069528e-05, |
|
"loss": 1.6175, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.758148432935723e-05, |
|
"loss": 1.6093, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.7337054763945823e-05, |
|
"loss": 1.6081, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.7082739026340097e-05, |
|
"loss": 1.6064, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6818879789350134e-05, |
|
"loss": 1.6016, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6545832584989235e-05, |
|
"loss": 1.6062, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6263965325419206e-05, |
|
"loss": 1.6042, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5973657807214245e-05, |
|
"loss": 1.5967, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.5675301199611413e-05, |
|
"loss": 1.6095, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.536929751743723e-05, |
|
"loss": 1.5958, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5056059079420575e-05, |
|
"loss": 1.6012, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.4736007952621852e-05, |
|
"loss": 1.5872, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4409575383726852e-05, |
|
"loss": 1.5967, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.4077201217971817e-05, |
|
"loss": 1.5913, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.3739333306482481e-05, |
|
"loss": 1.5902, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3396426902825753e-05, |
|
"loss": 1.5908, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.3048944049587138e-05, |
|
"loss": 1.5883, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.2697352955800396e-05, |
|
"loss": 1.5907, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.2342127366068364e-05, |
|
"loss": 1.5864, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.1983745922224985e-05, |
|
"loss": 1.5912, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1622691518398636e-05, |
|
"loss": 1.59, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.1259450650345798e-05, |
|
"loss": 1.5837, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0894512759931785e-05, |
|
"loss": 1.5805, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0528369575641793e-05, |
|
"loss": 1.5916, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0161514450010882e-05, |
|
"loss": 1.5975, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.794441694865673e-06, |
|
"loss": 1.5885, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.427645915273446e-06, |
|
"loss": 1.5849, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.061621343096156e-06, |
|
"loss": 1.5754, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.696861171047268e-06, |
|
"loss": 1.5906, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.33385688814881e-06, |
|
"loss": 1.5837, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.97309761748402e-06, |
|
"loss": 1.5807, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.615069457137927e-06, |
|
"loss": 1.5907, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.260254825213902e-06, |
|
"loss": 1.5826, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.909131809808755e-06, |
|
"loss": 1.5796, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 6.562173524822188e-06, |
|
"loss": 1.5814, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.219847472468641e-06, |
|
"loss": 1.5861, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.882614913350499e-06, |
|
"loss": 1.5744, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.550930244941448e-06, |
|
"loss": 1.5797, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.2252403893173835e-06, |
|
"loss": 1.5857, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.9059841909599456e-06, |
|
"loss": 1.5728, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.593591825444028e-06, |
|
"loss": 1.5701, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.288484219806016e-06, |
|
"loss": 1.5846, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.991072485373858e-06, |
|
"loss": 1.5716, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.7017573638230296e-06, |
|
"loss": 1.5798, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.420928687204965e-06, |
|
"loss": 1.5739, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.1489648526753913e-06, |
|
"loss": 1.5845, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.8862323126304427e-06, |
|
"loss": 1.5808, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.6330850809374685e-06, |
|
"loss": 1.5704, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.389864255925913e-06, |
|
"loss": 1.5769, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.1568975607809895e-06, |
|
"loss": 1.5765, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.934498901959424e-06, |
|
"loss": 1.5665, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.722967946222277e-06, |
|
"loss": 1.5822, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.5225897168548032e-06, |
|
"loss": 1.5931, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.3336342096173239e-06, |
|
"loss": 1.5855, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.1563560289446819e-06, |
|
"loss": 1.5736, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 9.909940448844412e-07, |
|
"loss": 1.5942, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 8.377710712360631e-07, |
|
"loss": 1.5792, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.968935653247766e-07, |
|
"loss": 1.5715, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.685513498146533e-07, |
|
"loss": 1.5777, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.529173569357459e-07, |
|
"loss": 1.5795, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.5014739546990087e-07, |
|
"loss": 1.5799, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.603799408092389e-07, |
|
"loss": 1.5743, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8373594837017505e-07, |
|
"loss": 1.5779, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.2031869061438494e-07, |
|
"loss": 1.5689, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.02136178963242e-08, |
|
"loss": 1.5854, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.3488243324814044e-08, |
|
"loss": 1.5654, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.0192051793809221e-08, |
|
"loss": 1.5818, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.5643330492995953e-10, |
|
"loss": 1.5853, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 8823, |
|
"total_flos": 6.175868485067866e+18, |
|
"train_loss": 1.6989395520347432, |
|
"train_runtime": 13309.6743, |
|
"train_samples_per_second": 21.208, |
|
"train_steps_per_second": 0.663 |
|
} |
|
], |
|
"max_steps": 8823, |
|
"num_train_epochs": 3, |
|
"total_flos": 6.175868485067866e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|