|
{ |
|
"best_metric": 0.8609164953231812, |
|
"best_model_checkpoint": "/home/coder/qwen2/checkpoint-15900", |
|
"epoch": 0.9999803783832573, |
|
"eval_steps": 300, |
|
"global_step": 15926, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018836752073023807, |
|
"grad_norm": 0.2832883298397064, |
|
"learning_rate": 0.00019629420262546324, |
|
"loss": 1.1403, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.018836752073023807, |
|
"eval_loss": 1.0671226978302002, |
|
"eval_runtime": 300.1436, |
|
"eval_samples_per_second": 17.155, |
|
"eval_steps_per_second": 2.146, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.037673504146047615, |
|
"grad_norm": 0.33913707733154297, |
|
"learning_rate": 0.00019252559512593432, |
|
"loss": 1.0551, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.037673504146047615, |
|
"eval_loss": 1.0309056043624878, |
|
"eval_runtime": 300.0852, |
|
"eval_samples_per_second": 17.158, |
|
"eval_steps_per_second": 2.146, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05651025621907143, |
|
"grad_norm": 0.30800801515579224, |
|
"learning_rate": 0.0001887569876264054, |
|
"loss": 1.0185, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05651025621907143, |
|
"eval_loss": 1.0096280574798584, |
|
"eval_runtime": 299.931, |
|
"eval_samples_per_second": 17.167, |
|
"eval_steps_per_second": 2.147, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07534700829209523, |
|
"grad_norm": 0.317748099565506, |
|
"learning_rate": 0.00018498838012687648, |
|
"loss": 0.9952, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.07534700829209523, |
|
"eval_loss": 0.9937332272529602, |
|
"eval_runtime": 299.9997, |
|
"eval_samples_per_second": 17.163, |
|
"eval_steps_per_second": 2.147, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09418376036511904, |
|
"grad_norm": 0.36215266585350037, |
|
"learning_rate": 0.00018121977262734753, |
|
"loss": 0.9931, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09418376036511904, |
|
"eval_loss": 0.9828254580497742, |
|
"eval_runtime": 299.9242, |
|
"eval_samples_per_second": 17.168, |
|
"eval_steps_per_second": 2.147, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11302051243814286, |
|
"grad_norm": 0.3347627520561218, |
|
"learning_rate": 0.0001774511651278186, |
|
"loss": 0.9763, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.11302051243814286, |
|
"eval_loss": 0.972442626953125, |
|
"eval_runtime": 299.8563, |
|
"eval_samples_per_second": 17.172, |
|
"eval_steps_per_second": 2.148, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.13185726451116667, |
|
"grad_norm": 0.319148451089859, |
|
"learning_rate": 0.0001736825576282897, |
|
"loss": 0.9727, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.13185726451116667, |
|
"eval_loss": 0.965033769607544, |
|
"eval_runtime": 299.797, |
|
"eval_samples_per_second": 17.175, |
|
"eval_steps_per_second": 2.148, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15069401658419046, |
|
"grad_norm": 0.2830144166946411, |
|
"learning_rate": 0.00016991395012876077, |
|
"loss": 0.9674, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.15069401658419046, |
|
"eval_loss": 0.9570498466491699, |
|
"eval_runtime": 300.0087, |
|
"eval_samples_per_second": 17.163, |
|
"eval_steps_per_second": 2.147, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.16953076865721428, |
|
"grad_norm": 0.30889859795570374, |
|
"learning_rate": 0.00016614534262923185, |
|
"loss": 0.9601, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.16953076865721428, |
|
"eval_loss": 0.9515209197998047, |
|
"eval_runtime": 299.9801, |
|
"eval_samples_per_second": 17.164, |
|
"eval_steps_per_second": 2.147, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.18836752073023807, |
|
"grad_norm": 0.33287131786346436, |
|
"learning_rate": 0.0001623767351297029, |
|
"loss": 0.944, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18836752073023807, |
|
"eval_loss": 0.9454107284545898, |
|
"eval_runtime": 299.9554, |
|
"eval_samples_per_second": 17.166, |
|
"eval_steps_per_second": 2.147, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2072042728032619, |
|
"grad_norm": 0.31519943475723267, |
|
"learning_rate": 0.00015860812763017398, |
|
"loss": 0.9462, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2072042728032619, |
|
"eval_loss": 0.9404099583625793, |
|
"eval_runtime": 299.9447, |
|
"eval_samples_per_second": 17.166, |
|
"eval_steps_per_second": 2.147, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.22604102487628572, |
|
"grad_norm": 0.315909743309021, |
|
"learning_rate": 0.00015483952013064506, |
|
"loss": 0.9326, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.22604102487628572, |
|
"eval_loss": 0.9352145195007324, |
|
"eval_runtime": 299.9991, |
|
"eval_samples_per_second": 17.163, |
|
"eval_steps_per_second": 2.147, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.2448777769493095, |
|
"grad_norm": 0.2997918128967285, |
|
"learning_rate": 0.00015107091263111613, |
|
"loss": 0.9298, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.2448777769493095, |
|
"eval_loss": 0.9315630793571472, |
|
"eval_runtime": 299.95, |
|
"eval_samples_per_second": 17.166, |
|
"eval_steps_per_second": 2.147, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.26371452902233333, |
|
"grad_norm": 0.2730711102485657, |
|
"learning_rate": 0.0001473023051315872, |
|
"loss": 0.9345, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.26371452902233333, |
|
"eval_loss": 0.9274590015411377, |
|
"eval_runtime": 300.1027, |
|
"eval_samples_per_second": 17.157, |
|
"eval_steps_per_second": 2.146, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.28255128109535715, |
|
"grad_norm": 0.27338674664497375, |
|
"learning_rate": 0.0001435336976320583, |
|
"loss": 0.9378, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.28255128109535715, |
|
"eval_loss": 0.9236754775047302, |
|
"eval_runtime": 299.988, |
|
"eval_samples_per_second": 17.164, |
|
"eval_steps_per_second": 2.147, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3013880331683809, |
|
"grad_norm": 0.3145460784435272, |
|
"learning_rate": 0.00013976509013252937, |
|
"loss": 0.9235, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.3013880331683809, |
|
"eval_loss": 0.9199886322021484, |
|
"eval_runtime": 299.9923, |
|
"eval_samples_per_second": 17.164, |
|
"eval_steps_per_second": 2.147, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.32022478524140474, |
|
"grad_norm": 0.2656671702861786, |
|
"learning_rate": 0.00013599648263300045, |
|
"loss": 0.9207, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.32022478524140474, |
|
"eval_loss": 0.9165565371513367, |
|
"eval_runtime": 299.8304, |
|
"eval_samples_per_second": 17.173, |
|
"eval_steps_per_second": 2.148, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.33906153731442856, |
|
"grad_norm": 0.2907351851463318, |
|
"learning_rate": 0.00013222787513347153, |
|
"loss": 0.9149, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.33906153731442856, |
|
"eval_loss": 0.9132035374641418, |
|
"eval_runtime": 299.8597, |
|
"eval_samples_per_second": 17.171, |
|
"eval_steps_per_second": 2.148, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.3578982893874524, |
|
"grad_norm": 0.39790818095207214, |
|
"learning_rate": 0.0001284592676339426, |
|
"loss": 0.9063, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.3578982893874524, |
|
"eval_loss": 0.9105966687202454, |
|
"eval_runtime": 300.0175, |
|
"eval_samples_per_second": 17.162, |
|
"eval_steps_per_second": 2.147, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.37673504146047615, |
|
"grad_norm": 0.3338871896266937, |
|
"learning_rate": 0.00012469066013441369, |
|
"loss": 0.9046, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.37673504146047615, |
|
"eval_loss": 0.9074862003326416, |
|
"eval_runtime": 299.8956, |
|
"eval_samples_per_second": 17.169, |
|
"eval_steps_per_second": 2.147, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.39557179353349997, |
|
"grad_norm": 0.2925800383090973, |
|
"learning_rate": 0.00012092205263488474, |
|
"loss": 0.907, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.39557179353349997, |
|
"eval_loss": 0.9044873118400574, |
|
"eval_runtime": 300.0697, |
|
"eval_samples_per_second": 17.159, |
|
"eval_steps_per_second": 2.146, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.4144085456065238, |
|
"grad_norm": 0.34801357984542847, |
|
"learning_rate": 0.00011715344513535582, |
|
"loss": 0.9042, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.4144085456065238, |
|
"eval_loss": 0.9019830822944641, |
|
"eval_runtime": 299.9421, |
|
"eval_samples_per_second": 17.167, |
|
"eval_steps_per_second": 2.147, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.4332452976795476, |
|
"grad_norm": 0.3444356918334961, |
|
"learning_rate": 0.0001133848376358269, |
|
"loss": 0.9019, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.4332452976795476, |
|
"eval_loss": 0.8995754718780518, |
|
"eval_runtime": 299.921, |
|
"eval_samples_per_second": 17.168, |
|
"eval_steps_per_second": 2.147, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.45208204975257144, |
|
"grad_norm": 0.3366526961326599, |
|
"learning_rate": 0.00010961623013629799, |
|
"loss": 0.9041, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.45208204975257144, |
|
"eval_loss": 0.8974488973617554, |
|
"eval_runtime": 299.9169, |
|
"eval_samples_per_second": 17.168, |
|
"eval_steps_per_second": 2.147, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.4709188018255952, |
|
"grad_norm": 0.34138697385787964, |
|
"learning_rate": 0.00010584762263676907, |
|
"loss": 0.9001, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4709188018255952, |
|
"eval_loss": 0.8951303958892822, |
|
"eval_runtime": 299.8867, |
|
"eval_samples_per_second": 17.17, |
|
"eval_steps_per_second": 2.147, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.489755553898619, |
|
"grad_norm": 0.35338446497917175, |
|
"learning_rate": 0.00010207901513724012, |
|
"loss": 0.8962, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.489755553898619, |
|
"eval_loss": 0.8931267261505127, |
|
"eval_runtime": 300.0707, |
|
"eval_samples_per_second": 17.159, |
|
"eval_steps_per_second": 2.146, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.5085923059716428, |
|
"grad_norm": 0.33024904131889343, |
|
"learning_rate": 9.83104076377112e-05, |
|
"loss": 0.901, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5085923059716428, |
|
"eval_loss": 0.8908406496047974, |
|
"eval_runtime": 300.0908, |
|
"eval_samples_per_second": 17.158, |
|
"eval_steps_per_second": 2.146, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5274290580446667, |
|
"grad_norm": 0.30269181728363037, |
|
"learning_rate": 9.454180013818228e-05, |
|
"loss": 0.8886, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.5274290580446667, |
|
"eval_loss": 0.8892831802368164, |
|
"eval_runtime": 299.9819, |
|
"eval_samples_per_second": 17.164, |
|
"eval_steps_per_second": 2.147, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.5462658101176905, |
|
"grad_norm": 0.32455185055732727, |
|
"learning_rate": 9.077319263865335e-05, |
|
"loss": 0.8823, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.5462658101176905, |
|
"eval_loss": 0.887444019317627, |
|
"eval_runtime": 300.1485, |
|
"eval_samples_per_second": 17.155, |
|
"eval_steps_per_second": 2.146, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.5651025621907143, |
|
"grad_norm": 0.32726097106933594, |
|
"learning_rate": 8.700458513912443e-05, |
|
"loss": 0.8773, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5651025621907143, |
|
"eval_loss": 0.885347306728363, |
|
"eval_runtime": 300.0218, |
|
"eval_samples_per_second": 17.162, |
|
"eval_steps_per_second": 2.147, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5839393142637381, |
|
"grad_norm": 0.3211737275123596, |
|
"learning_rate": 8.323597763959551e-05, |
|
"loss": 0.8876, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.5839393142637381, |
|
"eval_loss": 0.8837311267852783, |
|
"eval_runtime": 299.8508, |
|
"eval_samples_per_second": 17.172, |
|
"eval_steps_per_second": 2.148, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.6027760663367618, |
|
"grad_norm": 0.3470586836338043, |
|
"learning_rate": 7.946737014006658e-05, |
|
"loss": 0.888, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.6027760663367618, |
|
"eval_loss": 0.8818086981773376, |
|
"eval_runtime": 299.9724, |
|
"eval_samples_per_second": 17.165, |
|
"eval_steps_per_second": 2.147, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.6216128184097857, |
|
"grad_norm": 0.3012458384037018, |
|
"learning_rate": 7.569876264053766e-05, |
|
"loss": 0.8833, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.6216128184097857, |
|
"eval_loss": 0.8803924322128296, |
|
"eval_runtime": 300.0969, |
|
"eval_samples_per_second": 17.158, |
|
"eval_steps_per_second": 2.146, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.6404495704828095, |
|
"grad_norm": 0.32445794343948364, |
|
"learning_rate": 7.194271716600717e-05, |
|
"loss": 0.8841, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.6404495704828095, |
|
"eval_loss": 0.8785931468009949, |
|
"eval_runtime": 299.8302, |
|
"eval_samples_per_second": 17.173, |
|
"eval_steps_per_second": 2.148, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.6592863225558333, |
|
"grad_norm": 0.33264926075935364, |
|
"learning_rate": 6.817410966647824e-05, |
|
"loss": 0.8852, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6592863225558333, |
|
"eval_loss": 0.8771566152572632, |
|
"eval_runtime": 300.0665, |
|
"eval_samples_per_second": 17.16, |
|
"eval_steps_per_second": 2.146, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6781230746288571, |
|
"grad_norm": 0.3084549307823181, |
|
"learning_rate": 6.440550216694932e-05, |
|
"loss": 0.8793, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.6781230746288571, |
|
"eval_loss": 0.8756351470947266, |
|
"eval_runtime": 300.155, |
|
"eval_samples_per_second": 17.154, |
|
"eval_steps_per_second": 2.146, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.696959826701881, |
|
"grad_norm": 0.3315499722957611, |
|
"learning_rate": 6.0636894667420396e-05, |
|
"loss": 0.8687, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.696959826701881, |
|
"eval_loss": 0.8744714260101318, |
|
"eval_runtime": 300.5183, |
|
"eval_samples_per_second": 17.134, |
|
"eval_steps_per_second": 2.143, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.7157965787749048, |
|
"grad_norm": 0.35962772369384766, |
|
"learning_rate": 5.686828716789147e-05, |
|
"loss": 0.8631, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.7157965787749048, |
|
"eval_loss": 0.8730462789535522, |
|
"eval_runtime": 299.7837, |
|
"eval_samples_per_second": 17.176, |
|
"eval_steps_per_second": 2.148, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.7346333308479286, |
|
"grad_norm": 0.33538639545440674, |
|
"learning_rate": 5.3099679668362547e-05, |
|
"loss": 0.879, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.7346333308479286, |
|
"eval_loss": 0.8714411854743958, |
|
"eval_runtime": 299.9502, |
|
"eval_samples_per_second": 17.166, |
|
"eval_steps_per_second": 2.147, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.7534700829209523, |
|
"grad_norm": 0.3434339165687561, |
|
"learning_rate": 4.933107216883362e-05, |
|
"loss": 0.8616, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7534700829209523, |
|
"eval_loss": 0.8703322410583496, |
|
"eval_runtime": 300.0139, |
|
"eval_samples_per_second": 17.163, |
|
"eval_steps_per_second": 2.147, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7723068349939761, |
|
"grad_norm": 0.34114760160446167, |
|
"learning_rate": 4.55624646693047e-05, |
|
"loss": 0.8708, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.7723068349939761, |
|
"eval_loss": 0.8692737817764282, |
|
"eval_runtime": 299.9274, |
|
"eval_samples_per_second": 17.167, |
|
"eval_steps_per_second": 2.147, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.7911435870669999, |
|
"grad_norm": 0.40352341532707214, |
|
"learning_rate": 4.18064191947742e-05, |
|
"loss": 0.8724, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.7911435870669999, |
|
"eval_loss": 0.8681650161743164, |
|
"eval_runtime": 300.4231, |
|
"eval_samples_per_second": 17.139, |
|
"eval_steps_per_second": 2.144, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.8099803391400238, |
|
"grad_norm": 0.36962220072746277, |
|
"learning_rate": 3.8037811695245274e-05, |
|
"loss": 0.8672, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.8099803391400238, |
|
"eval_loss": 0.8672531247138977, |
|
"eval_runtime": 299.8101, |
|
"eval_samples_per_second": 17.174, |
|
"eval_steps_per_second": 2.148, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.8288170912130476, |
|
"grad_norm": 0.4042891561985016, |
|
"learning_rate": 3.426920419571635e-05, |
|
"loss": 0.8643, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.8288170912130476, |
|
"eval_loss": 0.8664665818214417, |
|
"eval_runtime": 299.8585, |
|
"eval_samples_per_second": 17.171, |
|
"eval_steps_per_second": 2.148, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.8476538432860714, |
|
"grad_norm": 0.2992730140686035, |
|
"learning_rate": 3.0500596696187428e-05, |
|
"loss": 0.8632, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8476538432860714, |
|
"eval_loss": 0.8653113842010498, |
|
"eval_runtime": 299.946, |
|
"eval_samples_per_second": 17.166, |
|
"eval_steps_per_second": 2.147, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8664905953590952, |
|
"grad_norm": 0.31725963950157166, |
|
"learning_rate": 2.6731989196658503e-05, |
|
"loss": 0.8564, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.8664905953590952, |
|
"eval_loss": 0.8644812107086182, |
|
"eval_runtime": 299.8536, |
|
"eval_samples_per_second": 17.172, |
|
"eval_steps_per_second": 2.148, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.885327347432119, |
|
"grad_norm": 0.3101350963115692, |
|
"learning_rate": 2.296338169712958e-05, |
|
"loss": 0.861, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.885327347432119, |
|
"eval_loss": 0.8637036085128784, |
|
"eval_runtime": 300.356, |
|
"eval_samples_per_second": 17.143, |
|
"eval_steps_per_second": 2.144, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.9041640995051429, |
|
"grad_norm": 0.33058223128318787, |
|
"learning_rate": 1.9194774197600654e-05, |
|
"loss": 0.8543, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.9041640995051429, |
|
"eval_loss": 0.8630216121673584, |
|
"eval_runtime": 299.9507, |
|
"eval_samples_per_second": 17.166, |
|
"eval_steps_per_second": 2.147, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.9230008515781666, |
|
"grad_norm": 0.35784465074539185, |
|
"learning_rate": 1.5438728723070158e-05, |
|
"loss": 0.868, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.9230008515781666, |
|
"eval_loss": 0.8623820543289185, |
|
"eval_runtime": 299.933, |
|
"eval_samples_per_second": 17.167, |
|
"eval_steps_per_second": 2.147, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.9418376036511904, |
|
"grad_norm": 0.3938862383365631, |
|
"learning_rate": 1.1670121223541235e-05, |
|
"loss": 0.8607, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9418376036511904, |
|
"eval_loss": 0.861863911151886, |
|
"eval_runtime": 299.8746, |
|
"eval_samples_per_second": 17.171, |
|
"eval_steps_per_second": 2.148, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9606743557242142, |
|
"grad_norm": 0.3867338001728058, |
|
"learning_rate": 7.90151372401231e-06, |
|
"loss": 0.8491, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.9606743557242142, |
|
"eval_loss": 0.8613755106925964, |
|
"eval_runtime": 299.919, |
|
"eval_samples_per_second": 17.168, |
|
"eval_steps_per_second": 2.147, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.979511107797238, |
|
"grad_norm": 0.3372841477394104, |
|
"learning_rate": 4.132906224483387e-06, |
|
"loss": 0.8643, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.979511107797238, |
|
"eval_loss": 0.8610811829566956, |
|
"eval_runtime": 299.8287, |
|
"eval_samples_per_second": 17.173, |
|
"eval_steps_per_second": 2.148, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.9983478598702619, |
|
"grad_norm": 0.3431134819984436, |
|
"learning_rate": 3.642987249544627e-07, |
|
"loss": 0.8633, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.9983478598702619, |
|
"eval_loss": 0.8609164953231812, |
|
"eval_runtime": 299.9008, |
|
"eval_samples_per_second": 17.169, |
|
"eval_steps_per_second": 2.147, |
|
"step": 15900 |
|
} |
|
], |
|
"logging_steps": 300, |
|
"max_steps": 15926, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.1684162719241994e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|