|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9833024118738405, |
|
"eval_steps": 500, |
|
"global_step": 201, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 1.5933, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 1.4782, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 1.5256, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 1.4709, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.904761904761905e-05, |
|
"loss": 1.5224, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.9975640502598243e-05, |
|
"loss": 1.5039, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 1.4679, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9702957262759964e-05, |
|
"loss": 1.4341, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.945518575599317e-05, |
|
"loss": 1.4291, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.913545457642601e-05, |
|
"loss": 1.4145, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.874619707139396e-05, |
|
"loss": 1.4082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8290375725550417e-05, |
|
"loss": 1.3682, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.777145961456971e-05, |
|
"loss": 1.3838, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7193398003386514e-05, |
|
"loss": 1.3204, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.3371310234069824, |
|
"eval_runtime": 60.5562, |
|
"eval_samples_per_second": 8.207, |
|
"eval_steps_per_second": 2.064, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.6560590289905074e-05, |
|
"loss": 1.3366, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.5877852522924733e-05, |
|
"loss": 1.357, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.5150380749100545e-05, |
|
"loss": 1.3083, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4383711467890776e-05, |
|
"loss": 1.3313, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.3583679495453e-05, |
|
"loss": 1.3263, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.2756373558169992e-05, |
|
"loss": 1.343, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.190808995376545e-05, |
|
"loss": 1.3439, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.1045284632676535e-05, |
|
"loss": 1.2764, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.0174524064372837e-05, |
|
"loss": 1.3138, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.302435262558748e-06, |
|
"loss": 1.3108, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.43565534959769e-06, |
|
"loss": 1.2986, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.580781044003324e-06, |
|
"loss": 1.2803, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.744318455428436e-06, |
|
"loss": 1.3083, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 1.2949973344802856, |
|
"eval_runtime": 60.6302, |
|
"eval_samples_per_second": 8.197, |
|
"eval_steps_per_second": 2.062, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.932633569242e-06, |
|
"loss": 1.3041, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 5.151903797536631e-06, |
|
"loss": 1.2992, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.408070965292534e-06, |
|
"loss": 1.2984, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.7067960895016277e-06, |
|
"loss": 1.284, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.0534162954100264e-06, |
|
"loss": 1.3143, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.45290419777228e-06, |
|
"loss": 1.3138, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.9098300562505266e-06, |
|
"loss": 1.3192, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.4283269929788779e-06, |
|
"loss": 1.3079, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.012059537008332e-06, |
|
"loss": 1.3073, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.641957350279838e-07, |
|
"loss": 1.3209, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.8738304061681107e-07, |
|
"loss": 1.3137, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8372816552336025e-07, |
|
"loss": 1.2787, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.4781046317267103e-08, |
|
"loss": 1.2959, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.5230484360873043e-09, |
|
"loss": 1.2921, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 1.2931873798370361, |
|
"eval_runtime": 60.4523, |
|
"eval_samples_per_second": 8.221, |
|
"eval_steps_per_second": 2.068, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"step": 201, |
|
"total_flos": 1.0527084092262973e+18, |
|
"train_loss": 1.3530203068434303, |
|
"train_runtime": 5442.6995, |
|
"train_samples_per_second": 2.376, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 201, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.0527084092262973e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|