|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 14403, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.479275151010207e-05, |
|
"loss": 0.8616, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9585503020204125e-05, |
|
"loss": 0.7086, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.437825453030619e-05, |
|
"loss": 0.6876, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.917100604040825e-05, |
|
"loss": 0.6658, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.396375755051031e-05, |
|
"loss": 0.6513, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8756509060612374e-05, |
|
"loss": 0.6115, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3549260570714433e-05, |
|
"loss": 0.5986, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.342012080816496e-06, |
|
"loss": 0.6065, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.134763590918559e-06, |
|
"loss": 0.5997, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 20.0, |
|
"eval_loss": 0.7166666984558105, |
|
"eval_rouge1": 15.1597, |
|
"eval_rouge2": 11.4975, |
|
"eval_rougeL": 14.0709, |
|
"eval_rougeLsum": 14.9737, |
|
"eval_runtime": 13.0755, |
|
"eval_samples_per_second": 6.501, |
|
"eval_steps_per_second": 3.289, |
|
"step": 4801 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.396375755051031e-05, |
|
"loss": 0.535, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.136013330556134e-05, |
|
"loss": 0.5328, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8756509060612374e-05, |
|
"loss": 0.555, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.6152884815663406e-05, |
|
"loss": 0.5381, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.3549260570714433e-05, |
|
"loss": 0.5375, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.0945636325765466e-05, |
|
"loss": 0.5333, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.342012080816496e-06, |
|
"loss": 0.5291, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.738387835867528e-06, |
|
"loss": 0.5196, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.134763590918559e-06, |
|
"loss": 0.4995, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.311393459695897e-07, |
|
"loss": 0.5057, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 20.0, |
|
"eval_loss": 0.6984732747077942, |
|
"eval_rouge1": 15.2746, |
|
"eval_rouge2": 11.537, |
|
"eval_rougeL": 14.1481, |
|
"eval_rougeLsum": 15.1267, |
|
"eval_runtime": 13.3919, |
|
"eval_samples_per_second": 6.347, |
|
"eval_steps_per_second": 3.211, |
|
"step": 9602 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.528501006734708e-05, |
|
"loss": 0.4867, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3549260570714433e-05, |
|
"loss": 0.4811, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.181351107408179e-05, |
|
"loss": 0.4842, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0077761577449144e-05, |
|
"loss": 0.4909, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.342012080816496e-06, |
|
"loss": 0.4659, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.606262584183851e-06, |
|
"loss": 0.4747, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.870513087551205e-06, |
|
"loss": 0.473, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.134763590918559e-06, |
|
"loss": 0.4484, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3990140942859125e-06, |
|
"loss": 0.4804, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 20.0, |
|
"eval_loss": 0.6930598020553589, |
|
"eval_rouge1": 15.2658, |
|
"eval_rouge2": 11.5104, |
|
"eval_rougeL": 14.117, |
|
"eval_rougeLsum": 15.1103, |
|
"eval_runtime": 13.632, |
|
"eval_samples_per_second": 6.235, |
|
"eval_steps_per_second": 3.154, |
|
"step": 14403 |
|
} |
|
], |
|
"max_steps": 14403, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.113119515662336e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|