|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.1006036217303823, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002012072434607646, |
|
"grad_norm": 46.70955276489258, |
|
"learning_rate": 6.5e-06, |
|
"loss": 1.4376, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004024144869215292, |
|
"grad_norm": 25.710079193115234, |
|
"learning_rate": 1.3e-05, |
|
"loss": 1.2619, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006036217303822937, |
|
"grad_norm": 26.04344940185547, |
|
"learning_rate": 1.9499999999999996e-05, |
|
"loss": 1.2208, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008048289738430584, |
|
"grad_norm": 8.251016616821289, |
|
"learning_rate": 2.6e-05, |
|
"loss": 1.0171, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01006036217303823, |
|
"grad_norm": 4.199310302734375, |
|
"learning_rate": 3.25e-05, |
|
"loss": 0.9699, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012072434607645875, |
|
"grad_norm": 6.134476661682129, |
|
"learning_rate": 3.899999999999999e-05, |
|
"loss": 0.9899, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014084507042253521, |
|
"grad_norm": 4.348274230957031, |
|
"learning_rate": 4.5499999999999995e-05, |
|
"loss": 0.9959, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01609657947686117, |
|
"grad_norm": 3.9475340843200684, |
|
"learning_rate": 5.2e-05, |
|
"loss": 0.8857, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018108651911468814, |
|
"grad_norm": 3.1918065547943115, |
|
"learning_rate": 5.85e-05, |
|
"loss": 0.9008, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02012072434607646, |
|
"grad_norm": 2.7590606212615967, |
|
"learning_rate": 6.5e-05, |
|
"loss": 0.8703, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022132796780684104, |
|
"grad_norm": 3.0539116859436035, |
|
"learning_rate": 6.499932377117017e-05, |
|
"loss": 0.8258, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02414486921529175, |
|
"grad_norm": 2.821988344192505, |
|
"learning_rate": 6.499729511282135e-05, |
|
"loss": 0.9071, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.026156941649899398, |
|
"grad_norm": 2.776759147644043, |
|
"learning_rate": 6.499391410937425e-05, |
|
"loss": 0.8496, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028169014084507043, |
|
"grad_norm": 2.897883176803589, |
|
"learning_rate": 6.498918090152627e-05, |
|
"loss": 0.8339, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.030181086519114688, |
|
"grad_norm": 2.511650562286377, |
|
"learning_rate": 6.498309568624549e-05, |
|
"loss": 0.8398, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03219315895372234, |
|
"grad_norm": 2.69217848777771, |
|
"learning_rate": 6.497565871676255e-05, |
|
"loss": 0.8373, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03420523138832998, |
|
"grad_norm": 2.5202300548553467, |
|
"learning_rate": 6.496687030256012e-05, |
|
"loss": 0.8418, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03621730382293763, |
|
"grad_norm": 2.4313321113586426, |
|
"learning_rate": 6.495673080935999e-05, |
|
"loss": 0.8482, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03822937625754527, |
|
"grad_norm": 2.278684139251709, |
|
"learning_rate": 6.494524065910784e-05, |
|
"loss": 0.8059, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04024144869215292, |
|
"grad_norm": 2.2294907569885254, |
|
"learning_rate": 6.493240032995573e-05, |
|
"loss": 0.8085, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04225352112676056, |
|
"grad_norm": 2.446382999420166, |
|
"learning_rate": 6.491821035624218e-05, |
|
"loss": 0.8298, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04426559356136821, |
|
"grad_norm": 2.4150474071502686, |
|
"learning_rate": 6.490267132846992e-05, |
|
"loss": 0.9265, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04627766599597585, |
|
"grad_norm": 2.170037269592285, |
|
"learning_rate": 6.488578389328129e-05, |
|
"loss": 0.8323, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0482897384305835, |
|
"grad_norm": 2.010956287384033, |
|
"learning_rate": 6.486754875343145e-05, |
|
"loss": 0.8646, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05030181086519115, |
|
"grad_norm": 1.8752851486206055, |
|
"learning_rate": 6.484796666775895e-05, |
|
"loss": 0.8402, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.052313883299798795, |
|
"grad_norm": 1.8610150814056396, |
|
"learning_rate": 6.482703845115434e-05, |
|
"loss": 0.8256, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05432595573440644, |
|
"grad_norm": 1.8047019243240356, |
|
"learning_rate": 6.480476497452613e-05, |
|
"loss": 0.819, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.056338028169014086, |
|
"grad_norm": 1.6434446573257446, |
|
"learning_rate": 6.47811471647646e-05, |
|
"loss": 0.8671, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05835010060362173, |
|
"grad_norm": 1.5787513256072998, |
|
"learning_rate": 6.475618600470322e-05, |
|
"loss": 0.7992, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.060362173038229376, |
|
"grad_norm": 1.4350961446762085, |
|
"learning_rate": 6.472988253307773e-05, |
|
"loss": 0.7524, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06237424547283702, |
|
"grad_norm": 1.391878604888916, |
|
"learning_rate": 6.470223784448298e-05, |
|
"loss": 0.7541, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06438631790744467, |
|
"grad_norm": 1.4117164611816406, |
|
"learning_rate": 6.467325308932728e-05, |
|
"loss": 0.7899, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06639839034205232, |
|
"grad_norm": 1.3123915195465088, |
|
"learning_rate": 6.464292947378462e-05, |
|
"loss": 0.7402, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06841046277665996, |
|
"grad_norm": 1.4149091243743896, |
|
"learning_rate": 6.46112682597444e-05, |
|
"loss": 0.8195, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07042253521126761, |
|
"grad_norm": 1.2951886653900146, |
|
"learning_rate": 6.457827076475899e-05, |
|
"loss": 0.7179, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07243460764587525, |
|
"grad_norm": 1.3390341997146606, |
|
"learning_rate": 6.454393836198883e-05, |
|
"loss": 0.7973, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0744466800804829, |
|
"grad_norm": 1.307084560394287, |
|
"learning_rate": 6.450827248014535e-05, |
|
"loss": 0.7591, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07645875251509054, |
|
"grad_norm": 1.3605775833129883, |
|
"learning_rate": 6.447127460343148e-05, |
|
"loss": 0.8085, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07847082494969819, |
|
"grad_norm": 1.4956183433532715, |
|
"learning_rate": 6.443294627147988e-05, |
|
"loss": 0.735, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08048289738430583, |
|
"grad_norm": 1.371940016746521, |
|
"learning_rate": 6.439328907928889e-05, |
|
"loss": 0.8025, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08249496981891348, |
|
"grad_norm": 1.4716073274612427, |
|
"learning_rate": 6.435230467715615e-05, |
|
"loss": 0.7381, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08450704225352113, |
|
"grad_norm": 1.3676444292068481, |
|
"learning_rate": 6.430999477060994e-05, |
|
"loss": 0.7601, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08651911468812877, |
|
"grad_norm": 1.5072672367095947, |
|
"learning_rate": 6.426636112033818e-05, |
|
"loss": 0.8209, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08853118712273642, |
|
"grad_norm": 1.3186477422714233, |
|
"learning_rate": 6.422140554211511e-05, |
|
"loss": 0.777, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09054325955734406, |
|
"grad_norm": 1.4689198732376099, |
|
"learning_rate": 6.417512990672591e-05, |
|
"loss": 0.871, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0925553319919517, |
|
"grad_norm": 1.287394642829895, |
|
"learning_rate": 6.412753613988862e-05, |
|
"loss": 0.7281, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09456740442655935, |
|
"grad_norm": 1.1629823446273804, |
|
"learning_rate": 6.407862622217414e-05, |
|
"loss": 0.715, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.096579476861167, |
|
"grad_norm": 1.2146556377410889, |
|
"learning_rate": 6.402840218892381e-05, |
|
"loss": 0.6985, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09859154929577464, |
|
"grad_norm": 1.3280084133148193, |
|
"learning_rate": 6.397686613016466e-05, |
|
"loss": 0.8195, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1006036217303823, |
|
"grad_norm": 1.157867193222046, |
|
"learning_rate": 6.392402019052244e-05, |
|
"loss": 0.7529, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 497, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.25186799555584e+17, |
|
"train_batch_size": 15, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|