|
{ |
|
"best_metric": 0.9361702127659575, |
|
"best_model_checkpoint": "portrait_cosu_exp3/checkpoint-13", |
|
"epoch": 3.6923076923076925, |
|
"eval_steps": 500, |
|
"global_step": 24, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.851063829787234, |
|
"eval_confusion_matrix": [ |
|
[ |
|
17, |
|
3 |
|
], |
|
[ |
|
4, |
|
23 |
|
] |
|
], |
|
"eval_f1": 0.8514750663363719, |
|
"eval_loss": 0.2920527458190918, |
|
"eval_precision": 0.8526615228742889, |
|
"eval_recall": 0.851063829787234, |
|
"eval_runtime": 3.0689, |
|
"eval_samples_per_second": 15.315, |
|
"eval_steps_per_second": 0.978, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 7.153378486633301, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.5415, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9361702127659575, |
|
"eval_confusion_matrix": [ |
|
[ |
|
17, |
|
3 |
|
], |
|
[ |
|
0, |
|
27 |
|
] |
|
], |
|
"eval_f1": 0.9352622499319029, |
|
"eval_loss": 0.25635045766830444, |
|
"eval_precision": 0.9425531914893617, |
|
"eval_recall": 0.9361702127659575, |
|
"eval_runtime": 3.2937, |
|
"eval_samples_per_second": 14.27, |
|
"eval_steps_per_second": 0.911, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 2.9230769230769234, |
|
"eval_accuracy": 0.8723404255319149, |
|
"eval_confusion_matrix": [ |
|
[ |
|
19, |
|
1 |
|
], |
|
[ |
|
5, |
|
22 |
|
] |
|
], |
|
"eval_f1": 0.873036750483559, |
|
"eval_loss": 0.3604692220687866, |
|
"eval_precision": 0.8863706444650015, |
|
"eval_recall": 0.8723404255319149, |
|
"eval_runtime": 3.1924, |
|
"eval_samples_per_second": 14.722, |
|
"eval_steps_per_second": 0.94, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 9.893902778625488, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.378, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.6923076923076925, |
|
"eval_accuracy": 0.9148936170212766, |
|
"eval_confusion_matrix": [ |
|
[ |
|
19, |
|
1 |
|
], |
|
[ |
|
3, |
|
24 |
|
] |
|
], |
|
"eval_f1": 0.9152832982620216, |
|
"eval_loss": 0.25239235162734985, |
|
"eval_precision": 0.9189941972920695, |
|
"eval_recall": 0.9148936170212766, |
|
"eval_runtime": 2.9295, |
|
"eval_samples_per_second": 16.044, |
|
"eval_steps_per_second": 1.024, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 3.6923076923076925, |
|
"step": 24, |
|
"total_flos": 3.505150325906473e+17, |
|
"train_loss": 0.428266316652298, |
|
"train_runtime": 301.1045, |
|
"train_samples_per_second": 5.526, |
|
"train_steps_per_second": 0.08 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 24, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 3.505150325906473e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|