{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.007187350263536177, "eval_steps": 500, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011978917105893628, "grad_norm": 0.5244863033294678, "learning_rate": 4e-05, "loss": 1.8048, "step": 1 }, { "epoch": 0.00023957834211787255, "grad_norm": 0.48653438687324524, "learning_rate": 8e-05, "loss": 1.7626, "step": 2 }, { "epoch": 0.0003593675131768088, "grad_norm": 0.5601047277450562, "learning_rate": 0.00012, "loss": 1.7426, "step": 3 }, { "epoch": 0.0004791566842357451, "grad_norm": 0.44419175386428833, "learning_rate": 0.00016, "loss": 1.7164, "step": 4 }, { "epoch": 0.0005989458552946814, "grad_norm": 0.4116540849208832, "learning_rate": 0.0002, "loss": 1.5584, "step": 5 }, { "epoch": 0.0007187350263536176, "grad_norm": 0.4420616328716278, "learning_rate": 0.00019636363636363636, "loss": 1.7818, "step": 6 }, { "epoch": 0.0008385241974125539, "grad_norm": 0.32181981205940247, "learning_rate": 0.00019272727272727274, "loss": 1.5868, "step": 7 }, { "epoch": 0.0009583133684714902, "grad_norm": 0.42711836099624634, "learning_rate": 0.0001890909090909091, "loss": 1.7927, "step": 8 }, { "epoch": 0.0010781025395304265, "grad_norm": 0.38062113523483276, "learning_rate": 0.00018545454545454545, "loss": 1.538, "step": 9 }, { "epoch": 0.0011978917105893627, "grad_norm": 0.42514052987098694, "learning_rate": 0.00018181818181818183, "loss": 1.7095, "step": 10 }, { "epoch": 0.001317680881648299, "grad_norm": 0.46944913268089294, "learning_rate": 0.0001781818181818182, "loss": 1.5404, "step": 11 }, { "epoch": 0.0014374700527072352, "grad_norm": 0.38414081931114197, "learning_rate": 0.00017454545454545454, "loss": 1.4967, "step": 12 }, { "epoch": 0.0015572592237661715, "grad_norm": 0.3784991502761841, "learning_rate": 0.0001709090909090909, "loss": 1.6268, "step": 13 }, { "epoch": 0.0016770483948251077, "grad_norm": 0.4482596516609192, "learning_rate": 0.00016727272727272728, "loss": 1.7873, "step": 14 }, { "epoch": 0.0017968375658840442, "grad_norm": 0.39585763216018677, "learning_rate": 0.00016363636363636366, "loss": 1.4746, "step": 15 }, { "epoch": 0.0019166267369429804, "grad_norm": 0.3013208508491516, "learning_rate": 0.00016, "loss": 1.5157, "step": 16 }, { "epoch": 0.0020364159080019165, "grad_norm": 0.3526206612586975, "learning_rate": 0.00015636363636363637, "loss": 1.5365, "step": 17 }, { "epoch": 0.002156205079060853, "grad_norm": 0.3260374069213867, "learning_rate": 0.00015272727272727275, "loss": 1.5285, "step": 18 }, { "epoch": 0.002275994250119789, "grad_norm": 0.3397838771343231, "learning_rate": 0.0001490909090909091, "loss": 1.5258, "step": 19 }, { "epoch": 0.0023957834211787254, "grad_norm": 0.3514011800289154, "learning_rate": 0.00014545454545454546, "loss": 1.5212, "step": 20 }, { "epoch": 0.002515572592237662, "grad_norm": 0.2991046607494354, "learning_rate": 0.00014181818181818184, "loss": 1.4937, "step": 21 }, { "epoch": 0.002635361763296598, "grad_norm": 0.40670260787010193, "learning_rate": 0.0001381818181818182, "loss": 1.5486, "step": 22 }, { "epoch": 0.0027551509343555344, "grad_norm": 0.44339653849601746, "learning_rate": 0.00013454545454545455, "loss": 1.7084, "step": 23 }, { "epoch": 0.0028749401054144704, "grad_norm": 0.42197489738464355, "learning_rate": 0.00013090909090909093, "loss": 1.6023, "step": 24 }, { "epoch": 0.002994729276473407, "grad_norm": 0.2680957019329071, "learning_rate": 0.00012727272727272728, "loss": 1.5387, "step": 25 }, { "epoch": 0.003114518447532343, "grad_norm": 0.3399680554866791, "learning_rate": 0.00012363636363636364, "loss": 1.4657, "step": 26 }, { "epoch": 0.0032343076185912794, "grad_norm": 0.3926292061805725, "learning_rate": 0.00012, "loss": 1.4645, "step": 27 }, { "epoch": 0.0033540967896502154, "grad_norm": 0.35624024271965027, "learning_rate": 0.00011636363636363636, "loss": 1.6289, "step": 28 }, { "epoch": 0.003473885960709152, "grad_norm": 0.3598816990852356, "learning_rate": 0.00011272727272727272, "loss": 1.5246, "step": 29 }, { "epoch": 0.0035936751317680884, "grad_norm": 0.32090121507644653, "learning_rate": 0.00010909090909090909, "loss": 1.5073, "step": 30 }, { "epoch": 0.0037134643028270244, "grad_norm": 0.41118231415748596, "learning_rate": 0.00010545454545454545, "loss": 1.658, "step": 31 }, { "epoch": 0.003833253473885961, "grad_norm": 0.3706068694591522, "learning_rate": 0.00010181818181818181, "loss": 1.5052, "step": 32 }, { "epoch": 0.003953042644944897, "grad_norm": 0.29132771492004395, "learning_rate": 9.818181818181818e-05, "loss": 1.4859, "step": 33 }, { "epoch": 0.004072831816003833, "grad_norm": 0.4085879921913147, "learning_rate": 9.454545454545455e-05, "loss": 1.7738, "step": 34 }, { "epoch": 0.004192620987062769, "grad_norm": 0.2954878509044647, "learning_rate": 9.090909090909092e-05, "loss": 1.3454, "step": 35 }, { "epoch": 0.004312410158121706, "grad_norm": 0.38627880811691284, "learning_rate": 8.727272727272727e-05, "loss": 1.5715, "step": 36 }, { "epoch": 0.004432199329180642, "grad_norm": 0.3559410274028778, "learning_rate": 8.363636363636364e-05, "loss": 1.5292, "step": 37 }, { "epoch": 0.004551988500239578, "grad_norm": 0.367080420255661, "learning_rate": 8e-05, "loss": 1.5069, "step": 38 }, { "epoch": 0.004671777671298514, "grad_norm": 0.39985454082489014, "learning_rate": 7.636363636363637e-05, "loss": 1.6255, "step": 39 }, { "epoch": 0.004791566842357451, "grad_norm": 0.35325756669044495, "learning_rate": 7.272727272727273e-05, "loss": 1.4723, "step": 40 }, { "epoch": 0.004911356013416387, "grad_norm": 0.40606868267059326, "learning_rate": 6.90909090909091e-05, "loss": 1.5223, "step": 41 }, { "epoch": 0.005031145184475324, "grad_norm": 0.3160095810890198, "learning_rate": 6.545454545454546e-05, "loss": 1.5648, "step": 42 }, { "epoch": 0.005150934355534259, "grad_norm": 0.32667338848114014, "learning_rate": 6.181818181818182e-05, "loss": 1.4576, "step": 43 }, { "epoch": 0.005270723526593196, "grad_norm": 0.36070936918258667, "learning_rate": 5.818181818181818e-05, "loss": 1.4395, "step": 44 }, { "epoch": 0.005390512697652132, "grad_norm": 0.31305474042892456, "learning_rate": 5.4545454545454546e-05, "loss": 1.465, "step": 45 }, { "epoch": 0.005510301868711069, "grad_norm": 0.3811737298965454, "learning_rate": 5.090909090909091e-05, "loss": 1.4937, "step": 46 }, { "epoch": 0.005630091039770004, "grad_norm": 0.37481462955474854, "learning_rate": 4.7272727272727275e-05, "loss": 1.4718, "step": 47 }, { "epoch": 0.005749880210828941, "grad_norm": 0.35345253348350525, "learning_rate": 4.3636363636363636e-05, "loss": 1.4519, "step": 48 }, { "epoch": 0.005869669381887877, "grad_norm": 0.377297580242157, "learning_rate": 4e-05, "loss": 1.326, "step": 49 }, { "epoch": 0.005989458552946814, "grad_norm": 0.32999831438064575, "learning_rate": 3.6363636363636364e-05, "loss": 1.5174, "step": 50 }, { "epoch": 0.00610924772400575, "grad_norm": 0.39304623007774353, "learning_rate": 3.272727272727273e-05, "loss": 1.8864, "step": 51 }, { "epoch": 0.006229036895064686, "grad_norm": 0.3829090893268585, "learning_rate": 2.909090909090909e-05, "loss": 1.6814, "step": 52 }, { "epoch": 0.006348826066123622, "grad_norm": 0.408879816532135, "learning_rate": 2.5454545454545454e-05, "loss": 1.5488, "step": 53 }, { "epoch": 0.006468615237182559, "grad_norm": 0.35209059715270996, "learning_rate": 2.1818181818181818e-05, "loss": 1.5671, "step": 54 }, { "epoch": 0.006588404408241495, "grad_norm": 0.37863975763320923, "learning_rate": 1.8181818181818182e-05, "loss": 1.5382, "step": 55 }, { "epoch": 0.006708193579300431, "grad_norm": 0.3059345483779907, "learning_rate": 1.4545454545454545e-05, "loss": 1.4568, "step": 56 }, { "epoch": 0.006827982750359367, "grad_norm": 0.38418012857437134, "learning_rate": 1.0909090909090909e-05, "loss": 1.5161, "step": 57 }, { "epoch": 0.006947771921418304, "grad_norm": 0.3427809774875641, "learning_rate": 7.272727272727272e-06, "loss": 1.3619, "step": 58 }, { "epoch": 0.00706756109247724, "grad_norm": 0.3328557014465332, "learning_rate": 3.636363636363636e-06, "loss": 1.4661, "step": 59 }, { "epoch": 0.007187350263536177, "grad_norm": 0.3424610197544098, "learning_rate": 0.0, "loss": 1.3327, "step": 60 } ], "logging_steps": 1, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8160664240988160.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }