komodo-7b-chat-adapter / trainer_state.json
jan-hq's picture
Model save
8f66fca verified
raw
history blame
6.24 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9833024118738405,
"eval_steps": 500,
"global_step": 201,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 9.523809523809525e-07,
"loss": 1.5933,
"step": 1
},
{
"epoch": 0.07,
"learning_rate": 4.761904761904762e-06,
"loss": 1.4782,
"step": 5
},
{
"epoch": 0.15,
"learning_rate": 9.523809523809525e-06,
"loss": 1.5256,
"step": 10
},
{
"epoch": 0.22,
"learning_rate": 1.4285714285714287e-05,
"loss": 1.4709,
"step": 15
},
{
"epoch": 0.3,
"learning_rate": 1.904761904761905e-05,
"loss": 1.5224,
"step": 20
},
{
"epoch": 0.37,
"learning_rate": 1.9975640502598243e-05,
"loss": 1.5039,
"step": 25
},
{
"epoch": 0.45,
"learning_rate": 1.9876883405951378e-05,
"loss": 1.4679,
"step": 30
},
{
"epoch": 0.52,
"learning_rate": 1.9702957262759964e-05,
"loss": 1.4341,
"step": 35
},
{
"epoch": 0.59,
"learning_rate": 1.945518575599317e-05,
"loss": 1.4291,
"step": 40
},
{
"epoch": 0.67,
"learning_rate": 1.913545457642601e-05,
"loss": 1.4145,
"step": 45
},
{
"epoch": 0.74,
"learning_rate": 1.874619707139396e-05,
"loss": 1.4082,
"step": 50
},
{
"epoch": 0.82,
"learning_rate": 1.8290375725550417e-05,
"loss": 1.3682,
"step": 55
},
{
"epoch": 0.89,
"learning_rate": 1.777145961456971e-05,
"loss": 1.3838,
"step": 60
},
{
"epoch": 0.96,
"learning_rate": 1.7193398003386514e-05,
"loss": 1.3204,
"step": 65
},
{
"epoch": 0.99,
"eval_loss": 1.3371310234069824,
"eval_runtime": 60.5562,
"eval_samples_per_second": 8.207,
"eval_steps_per_second": 2.064,
"step": 67
},
{
"epoch": 1.04,
"learning_rate": 1.6560590289905074e-05,
"loss": 1.3366,
"step": 70
},
{
"epoch": 1.11,
"learning_rate": 1.5877852522924733e-05,
"loss": 1.357,
"step": 75
},
{
"epoch": 1.19,
"learning_rate": 1.5150380749100545e-05,
"loss": 1.3083,
"step": 80
},
{
"epoch": 1.26,
"learning_rate": 1.4383711467890776e-05,
"loss": 1.3313,
"step": 85
},
{
"epoch": 1.34,
"learning_rate": 1.3583679495453e-05,
"loss": 1.3263,
"step": 90
},
{
"epoch": 1.41,
"learning_rate": 1.2756373558169992e-05,
"loss": 1.343,
"step": 95
},
{
"epoch": 1.48,
"learning_rate": 1.190808995376545e-05,
"loss": 1.3439,
"step": 100
},
{
"epoch": 1.56,
"learning_rate": 1.1045284632676535e-05,
"loss": 1.2764,
"step": 105
},
{
"epoch": 1.63,
"learning_rate": 1.0174524064372837e-05,
"loss": 1.3138,
"step": 110
},
{
"epoch": 1.71,
"learning_rate": 9.302435262558748e-06,
"loss": 1.3108,
"step": 115
},
{
"epoch": 1.78,
"learning_rate": 8.43565534959769e-06,
"loss": 1.2986,
"step": 120
},
{
"epoch": 1.86,
"learning_rate": 7.580781044003324e-06,
"loss": 1.2803,
"step": 125
},
{
"epoch": 1.93,
"learning_rate": 6.744318455428436e-06,
"loss": 1.3083,
"step": 130
},
{
"epoch": 1.99,
"eval_loss": 1.2949973344802856,
"eval_runtime": 60.6302,
"eval_samples_per_second": 8.197,
"eval_steps_per_second": 2.062,
"step": 134
},
{
"epoch": 2.0,
"learning_rate": 5.932633569242e-06,
"loss": 1.3041,
"step": 135
},
{
"epoch": 2.08,
"learning_rate": 5.151903797536631e-06,
"loss": 1.2992,
"step": 140
},
{
"epoch": 2.15,
"learning_rate": 4.408070965292534e-06,
"loss": 1.2984,
"step": 145
},
{
"epoch": 2.23,
"learning_rate": 3.7067960895016277e-06,
"loss": 1.284,
"step": 150
},
{
"epoch": 2.3,
"learning_rate": 3.0534162954100264e-06,
"loss": 1.3143,
"step": 155
},
{
"epoch": 2.37,
"learning_rate": 2.45290419777228e-06,
"loss": 1.3138,
"step": 160
},
{
"epoch": 2.45,
"learning_rate": 1.9098300562505266e-06,
"loss": 1.3192,
"step": 165
},
{
"epoch": 2.52,
"learning_rate": 1.4283269929788779e-06,
"loss": 1.3079,
"step": 170
},
{
"epoch": 2.6,
"learning_rate": 1.012059537008332e-06,
"loss": 1.3073,
"step": 175
},
{
"epoch": 2.67,
"learning_rate": 6.641957350279838e-07,
"loss": 1.3209,
"step": 180
},
{
"epoch": 2.75,
"learning_rate": 3.8738304061681107e-07,
"loss": 1.3137,
"step": 185
},
{
"epoch": 2.82,
"learning_rate": 1.8372816552336025e-07,
"loss": 1.2787,
"step": 190
},
{
"epoch": 2.89,
"learning_rate": 5.4781046317267103e-08,
"loss": 1.2959,
"step": 195
},
{
"epoch": 2.97,
"learning_rate": 1.5230484360873043e-09,
"loss": 1.2921,
"step": 200
},
{
"epoch": 2.98,
"eval_loss": 1.2931873798370361,
"eval_runtime": 60.4523,
"eval_samples_per_second": 8.221,
"eval_steps_per_second": 2.068,
"step": 201
},
{
"epoch": 2.98,
"step": 201,
"total_flos": 1.0527084092262973e+18,
"train_loss": 1.3530203068434303,
"train_runtime": 5442.6995,
"train_samples_per_second": 2.376,
"train_steps_per_second": 0.037
}
],
"logging_steps": 5,
"max_steps": 201,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 1.0527084092262973e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}