|
{ |
|
"best_metric": 28.8883, |
|
"best_model_checkpoint": "bin/liputan6-lora-8/checkpoint-252", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1928575038909912, |
|
"learning_rate": 0.0008, |
|
"loss": 3.1146, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 28.445, |
|
"eval_loss": 2.77751088142395, |
|
"eval_rouge1": 25.1926, |
|
"eval_rouge2": 10.831, |
|
"eval_rougeL": 21.8588, |
|
"eval_rougeLsum": 23.0603, |
|
"eval_runtime": 282.5418, |
|
"eval_samples_per_second": 3.539, |
|
"eval_steps_per_second": 0.113, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.15657377243042, |
|
"learning_rate": 0.0006, |
|
"loss": 2.3694, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 35.297, |
|
"eval_loss": 2.6682474613189697, |
|
"eval_rouge1": 27.3169, |
|
"eval_rouge2": 12.0479, |
|
"eval_rougeL": 23.5983, |
|
"eval_rougeLsum": 25.0305, |
|
"eval_runtime": 263.6368, |
|
"eval_samples_per_second": 3.793, |
|
"eval_steps_per_second": 0.121, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.2961732149124146, |
|
"learning_rate": 0.0004, |
|
"loss": 2.2462, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 31.547, |
|
"eval_loss": 2.610564708709717, |
|
"eval_rouge1": 27.8939, |
|
"eval_rouge2": 12.0631, |
|
"eval_rougeL": 23.6765, |
|
"eval_rougeLsum": 25.5954, |
|
"eval_runtime": 173.0596, |
|
"eval_samples_per_second": 5.778, |
|
"eval_steps_per_second": 0.185, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.227543830871582, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1582, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 31.688, |
|
"eval_loss": 2.6337075233459473, |
|
"eval_rouge1": 28.8883, |
|
"eval_rouge2": 12.7056, |
|
"eval_rougeL": 24.6102, |
|
"eval_rougeLsum": 26.4945, |
|
"eval_runtime": 153.1276, |
|
"eval_samples_per_second": 6.53, |
|
"eval_steps_per_second": 0.209, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.2467632293701172, |
|
"learning_rate": 0.0, |
|
"loss": 2.1188, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 32.655, |
|
"eval_loss": 2.638486385345459, |
|
"eval_rouge1": 28.3508, |
|
"eval_rouge2": 12.5031, |
|
"eval_rougeL": 24.1992, |
|
"eval_rougeLsum": 26.092, |
|
"eval_runtime": 177.4741, |
|
"eval_samples_per_second": 5.635, |
|
"eval_steps_per_second": 0.18, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 315, |
|
"total_flos": 3437376307200000.0, |
|
"train_loss": 2.4014356340680805, |
|
"train_runtime": 1409.9107, |
|
"train_samples_per_second": 3.546, |
|
"train_steps_per_second": 0.223 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 3437376307200000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|