|
{ |
|
"best_metric": 0.5975975975975976, |
|
"best_model_checkpoint": "outputs/xlm-roberta-base-reddit-indonesia-sarcastic/checkpoint-1854", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 2472, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.997611994132918e-06, |
|
"loss": 0.5174, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7725017717930546, |
|
"eval_f1": 0.46410684474123537, |
|
"eval_loss": 0.4617661237716675, |
|
"eval_precision": 0.5650406504065041, |
|
"eval_recall": 0.3937677053824363, |
|
"eval_runtime": 5.4985, |
|
"eval_samples_per_second": 256.614, |
|
"eval_steps_per_second": 4.183, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.990324230393262e-06, |
|
"loss": 0.4462, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7994330262225372, |
|
"eval_f1": 0.5428109854604201, |
|
"eval_loss": 0.44074001908302307, |
|
"eval_precision": 0.631578947368421, |
|
"eval_recall": 0.47592067988668557, |
|
"eval_runtime": 5.4944, |
|
"eval_samples_per_second": 256.808, |
|
"eval_steps_per_second": 4.186, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.97814344098319e-06, |
|
"loss": 0.3952, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8036853295535081, |
|
"eval_f1": 0.4990958408679928, |
|
"eval_loss": 0.46895381808280945, |
|
"eval_precision": 0.69, |
|
"eval_recall": 0.3909348441926346, |
|
"eval_runtime": 5.4972, |
|
"eval_samples_per_second": 256.677, |
|
"eval_steps_per_second": 4.184, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.961018240847407e-06, |
|
"loss": 0.3525, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8079376328844791, |
|
"eval_f1": 0.5152057245080501, |
|
"eval_loss": 0.49054116010665894, |
|
"eval_precision": 0.6990291262135923, |
|
"eval_recall": 0.40793201133144474, |
|
"eval_runtime": 5.492, |
|
"eval_samples_per_second": 256.917, |
|
"eval_steps_per_second": 4.188, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.938997114658899e-06, |
|
"loss": 0.3102, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.81218993621545, |
|
"eval_f1": 0.5916795069337443, |
|
"eval_loss": 0.47409912943840027, |
|
"eval_precision": 0.6486486486486487, |
|
"eval_recall": 0.5439093484419264, |
|
"eval_runtime": 5.495, |
|
"eval_samples_per_second": 256.78, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.912196668799343e-06, |
|
"loss": 0.2645, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8100637845499645, |
|
"eval_f1": 0.5975975975975976, |
|
"eval_loss": 0.4963834285736084, |
|
"eval_precision": 0.6357827476038339, |
|
"eval_recall": 0.5637393767705382, |
|
"eval_runtime": 5.4944, |
|
"eval_samples_per_second": 256.807, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.880469337563785e-06, |
|
"loss": 0.2168, |
|
"step": 2163 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8079376328844791, |
|
"eval_f1": 0.5824345146379045, |
|
"eval_loss": 0.5215579271316528, |
|
"eval_precision": 0.6385135135135135, |
|
"eval_recall": 0.5354107648725213, |
|
"eval_runtime": 5.501, |
|
"eval_samples_per_second": 256.501, |
|
"eval_steps_per_second": 4.181, |
|
"step": 2163 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.843925572318254e-06, |
|
"loss": 0.1759, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8043940467753367, |
|
"eval_f1": 0.5818181818181818, |
|
"eval_loss": 0.6826359033584595, |
|
"eval_precision": 0.6254071661237784, |
|
"eval_recall": 0.5439093484419264, |
|
"eval_runtime": 5.4973, |
|
"eval_samples_per_second": 256.673, |
|
"eval_steps_per_second": 4.184, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2472, |
|
"total_flos": 5199600676024320.0, |
|
"train_loss": 0.3348430090351784, |
|
"train_runtime": 1124.687, |
|
"train_samples_per_second": 878.556, |
|
"train_steps_per_second": 27.474 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 5199600676024320.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|