mgfrantz's picture
commit files to HF hub
18cc8c4
{
"best_metric": 0.7537589073181152,
"best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000",
"epoch": 0.9667673716012085,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 2e-07,
"loss": 1.0361,
"step": 100
},
{
"epoch": 0.1,
"learning_rate": 3.98e-07,
"loss": 0.9751,
"step": 200
},
{
"epoch": 0.15,
"learning_rate": 5.979999999999999e-07,
"loss": 0.9807,
"step": 300
},
{
"epoch": 0.19,
"learning_rate": 7.94e-07,
"loss": 0.9513,
"step": 400
},
{
"epoch": 0.24,
"learning_rate": 9.94e-07,
"loss": 0.9237,
"step": 500
},
{
"epoch": 0.24,
"eval_loss": 0.9464540481567383,
"eval_runtime": 37.5769,
"eval_samples_per_second": 97.587,
"eval_steps_per_second": 24.403,
"step": 500
},
{
"epoch": 0.29,
"learning_rate": 9.955915919781764e-07,
"loss": 0.9367,
"step": 600
},
{
"epoch": 0.34,
"learning_rate": 9.81900265076038e-07,
"loss": 0.8993,
"step": 700
},
{
"epoch": 0.39,
"learning_rate": 9.594490241150311e-07,
"loss": 0.8586,
"step": 800
},
{
"epoch": 0.44,
"learning_rate": 9.282057505552949e-07,
"loss": 0.854,
"step": 900
},
{
"epoch": 0.48,
"learning_rate": 8.889381125453379e-07,
"loss": 0.8642,
"step": 1000
},
{
"epoch": 0.48,
"eval_loss": 0.8270628452301025,
"eval_runtime": 28.2214,
"eval_samples_per_second": 129.937,
"eval_steps_per_second": 32.493,
"step": 1000
},
{
"epoch": 0.53,
"learning_rate": 8.423819662432867e-07,
"loss": 0.8314,
"step": 1100
},
{
"epoch": 0.58,
"learning_rate": 7.894097508558568e-07,
"loss": 0.8317,
"step": 1200
},
{
"epoch": 0.63,
"learning_rate": 7.310141395581585e-07,
"loss": 0.7939,
"step": 1300
},
{
"epoch": 0.68,
"learning_rate": 6.682894372882701e-07,
"loss": 0.803,
"step": 1400
},
{
"epoch": 0.73,
"learning_rate": 6.024110740127264e-07,
"loss": 0.8029,
"step": 1500
},
{
"epoch": 0.73,
"eval_loss": 0.7635419368743896,
"eval_runtime": 28.2809,
"eval_samples_per_second": 129.663,
"eval_steps_per_second": 32.425,
"step": 1500
},
{
"epoch": 0.77,
"learning_rate": 5.346135777490083e-07,
"loss": 0.7959,
"step": 1600
},
{
"epoch": 0.82,
"learning_rate": 4.6616744011972247e-07,
"loss": 0.7708,
"step": 1700
},
{
"epoch": 0.87,
"learning_rate": 3.9835530796656867e-07,
"loss": 0.7636,
"step": 1800
},
{
"epoch": 0.92,
"learning_rate": 3.3244794718149894e-07,
"loss": 0.7864,
"step": 1900
},
{
"epoch": 0.97,
"learning_rate": 2.696804291810131e-07,
"loss": 0.8005,
"step": 2000
},
{
"epoch": 0.97,
"eval_loss": 0.7537589073181152,
"eval_runtime": 28.4034,
"eval_samples_per_second": 129.104,
"eval_steps_per_second": 32.285,
"step": 2000
}
],
"max_steps": 16544,
"num_train_epochs": 8,
"total_flos": 1828884830909760.0,
"trial_name": null,
"trial_params": null
}