File size: 2,063 Bytes
c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 a7a7444 c109064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
{
"best_metric": 1.1194497346878052,
"best_model_checkpoint": "/kaggle/output/checkpoint-4000",
"epoch": 0.16297262059973924,
"eval_steps": 1000,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.7777777777777777e-11,
"loss": 1.029,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 2.7638888888888893e-08,
"loss": 1.181,
"step": 1000
},
{
"epoch": 0.04,
"eval_accuracy": 0.3273453093812375,
"eval_loss": 1.1529844999313354,
"eval_runtime": 54.2837,
"eval_samples_per_second": 92.293,
"eval_steps_per_second": 11.55,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 5.541666666666667e-08,
"loss": 1.1527,
"step": 2000
},
{
"epoch": 0.08,
"eval_accuracy": 0.33013972055888224,
"eval_loss": 1.1351025104522705,
"eval_runtime": 54.0518,
"eval_samples_per_second": 92.689,
"eval_steps_per_second": 11.6,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 8.316666666666666e-08,
"loss": 1.142,
"step": 3000
},
{
"epoch": 0.12,
"eval_accuracy": 0.3317365269461078,
"eval_loss": 1.127414345741272,
"eval_runtime": 54.0871,
"eval_samples_per_second": 92.628,
"eval_steps_per_second": 11.592,
"step": 3000
},
{
"epoch": 0.16,
"learning_rate": 1.1091666666666668e-07,
"loss": 1.1371,
"step": 4000
},
{
"epoch": 0.16,
"eval_accuracy": 0.331936127744511,
"eval_loss": 1.1194497346878052,
"eval_runtime": 54.3907,
"eval_samples_per_second": 92.111,
"eval_steps_per_second": 11.528,
"step": 4000
}
],
"logging_steps": 1000,
"max_steps": 10000000,
"num_train_epochs": 408,
"save_steps": 1000,
"total_flos": 8361420521472000.0,
"trial_name": null,
"trial_params": null
}
|