w11wo's picture
End of training
aea591b
{
"best_metric": 0.7894736842105264,
"best_model_checkpoint": "outputs/xlm-roberta-base-twitter-indonesia-sarcastic/checkpoint-590",
"epoch": 13.0,
"eval_steps": 500,
"global_step": 767,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 9.997697221833061e-06,
"loss": 0.5641,
"step": 59
},
{
"epoch": 1.0,
"eval_accuracy": 0.75,
"eval_f1": 0.0,
"eval_loss": 0.5259643793106079,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 1.0571,
"eval_samples_per_second": 253.522,
"eval_steps_per_second": 4.73,
"step": 59
},
{
"epoch": 2.0,
"learning_rate": 9.990465155165683e-06,
"loss": 0.5317,
"step": 118
},
{
"epoch": 2.0,
"eval_accuracy": 0.75,
"eval_f1": 0.0,
"eval_loss": 0.5029988288879395,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 1.053,
"eval_samples_per_second": 254.519,
"eval_steps_per_second": 4.748,
"step": 118
},
{
"epoch": 3.0,
"learning_rate": 9.978555124138569e-06,
"loss": 0.4995,
"step": 177
},
{
"epoch": 3.0,
"eval_accuracy": 0.75,
"eval_f1": 0.0,
"eval_loss": 0.4655507504940033,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 1.0523,
"eval_samples_per_second": 254.669,
"eval_steps_per_second": 4.751,
"step": 177
},
{
"epoch": 4.0,
"learning_rate": 9.961568226140335e-06,
"loss": 0.4599,
"step": 236
},
{
"epoch": 4.0,
"eval_accuracy": 0.7686567164179104,
"eval_f1": 0.6025641025641025,
"eval_loss": 0.4502911865711212,
"eval_precision": 0.5280898876404494,
"eval_recall": 0.7014925373134329,
"eval_runtime": 1.057,
"eval_samples_per_second": 253.552,
"eval_steps_per_second": 4.73,
"step": 236
},
{
"epoch": 5.0,
"learning_rate": 9.93968485932029e-06,
"loss": 0.4082,
"step": 295
},
{
"epoch": 5.0,
"eval_accuracy": 0.8470149253731343,
"eval_f1": 0.6434782608695653,
"eval_loss": 0.378521591424942,
"eval_precision": 0.7708333333333334,
"eval_recall": 0.5522388059701493,
"eval_runtime": 1.0544,
"eval_samples_per_second": 254.183,
"eval_steps_per_second": 4.742,
"step": 295
},
{
"epoch": 6.0,
"learning_rate": 9.912926619919478e-06,
"loss": 0.3274,
"step": 354
},
{
"epoch": 6.0,
"eval_accuracy": 0.8619402985074627,
"eval_f1": 0.6991869918699187,
"eval_loss": 0.3604646623134613,
"eval_precision": 0.7678571428571429,
"eval_recall": 0.6417910447761194,
"eval_runtime": 1.0555,
"eval_samples_per_second": 253.908,
"eval_steps_per_second": 4.737,
"step": 354
},
{
"epoch": 7.0,
"learning_rate": 9.881895849126432e-06,
"loss": 0.2621,
"step": 413
},
{
"epoch": 7.0,
"eval_accuracy": 0.8619402985074627,
"eval_f1": 0.6837606837606838,
"eval_loss": 0.3764938712120056,
"eval_precision": 0.8,
"eval_recall": 0.5970149253731343,
"eval_runtime": 1.0562,
"eval_samples_per_second": 253.732,
"eval_steps_per_second": 4.734,
"step": 413
},
{
"epoch": 8.0,
"learning_rate": 9.845553233485602e-06,
"loss": 0.2332,
"step": 472
},
{
"epoch": 8.0,
"eval_accuracy": 0.8768656716417911,
"eval_f1": 0.759124087591241,
"eval_loss": 0.34082189202308655,
"eval_precision": 0.7428571428571429,
"eval_recall": 0.7761194029850746,
"eval_runtime": 1.0525,
"eval_samples_per_second": 254.642,
"eval_steps_per_second": 4.751,
"step": 472
},
{
"epoch": 9.0,
"learning_rate": 9.804428641814051e-06,
"loss": 0.1579,
"step": 531
},
{
"epoch": 9.0,
"eval_accuracy": 0.8731343283582089,
"eval_f1": 0.7213114754098361,
"eval_loss": 0.43820273876190186,
"eval_precision": 0.8,
"eval_recall": 0.6567164179104478,
"eval_runtime": 1.055,
"eval_samples_per_second": 254.04,
"eval_steps_per_second": 4.74,
"step": 531
},
{
"epoch": 10.0,
"learning_rate": 9.75856265911873e-06,
"loss": 0.1467,
"step": 590
},
{
"epoch": 10.0,
"eval_accuracy": 0.8805970149253731,
"eval_f1": 0.7894736842105264,
"eval_loss": 0.38548171520233154,
"eval_precision": 0.7058823529411765,
"eval_recall": 0.8955223880597015,
"eval_runtime": 1.053,
"eval_samples_per_second": 254.501,
"eval_steps_per_second": 4.748,
"step": 590
},
{
"epoch": 11.0,
"learning_rate": 9.708000549587091e-06,
"loss": 0.098,
"step": 649
},
{
"epoch": 11.0,
"eval_accuracy": 0.8805970149253731,
"eval_f1": 0.7500000000000001,
"eval_loss": 0.46932676434516907,
"eval_precision": 0.7868852459016393,
"eval_recall": 0.7164179104477612,
"eval_runtime": 1.058,
"eval_samples_per_second": 253.305,
"eval_steps_per_second": 4.726,
"step": 649
},
{
"epoch": 12.0,
"learning_rate": 9.653766360237254e-06,
"loss": 0.0929,
"step": 708
},
{
"epoch": 12.0,
"eval_accuracy": 0.8805970149253731,
"eval_f1": 0.7333333333333334,
"eval_loss": 0.6206381916999817,
"eval_precision": 0.8301886792452831,
"eval_recall": 0.6567164179104478,
"eval_runtime": 1.0526,
"eval_samples_per_second": 254.603,
"eval_steps_per_second": 4.75,
"step": 708
},
{
"epoch": 13.0,
"learning_rate": 9.594043625628571e-06,
"loss": 0.0555,
"step": 767
},
{
"epoch": 13.0,
"eval_accuracy": 0.8843283582089553,
"eval_f1": 0.7633587786259542,
"eval_loss": 0.7134280800819397,
"eval_precision": 0.78125,
"eval_recall": 0.746268656716418,
"eval_runtime": 1.054,
"eval_samples_per_second": 254.279,
"eval_steps_per_second": 4.744,
"step": 767
},
{
"epoch": 13.0,
"step": 767,
"total_flos": 1605898326389760.0,
"train_loss": 0.2951739639471221,
"train_runtime": 432.0494,
"train_samples_per_second": 434.673,
"train_steps_per_second": 13.656
}
],
"logging_steps": 500,
"max_steps": 5900,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 1605898326389760.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}