|
{ |
|
"best_metric": 1.1438716650009155, |
|
"best_model_checkpoint": "/data1/attanasiog/safetune/models/checkpoint-750", |
|
"epoch": 0.9996955859969558, |
|
"global_step": 821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2e-05, |
|
"loss": 3.1406, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.762616515159607, |
|
"eval_mse": 1.7626165039521147, |
|
"eval_runtime": 183.7464, |
|
"eval_samples_per_second": 15.891, |
|
"eval_steps_per_second": 3.973, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7607, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6818, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.4659080505371094, |
|
"eval_mse": 1.4659080196895005, |
|
"eval_runtime": 183.1621, |
|
"eval_samples_per_second": 15.942, |
|
"eval_steps_per_second": 3.986, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5873, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4062, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.6322834491729736, |
|
"eval_mse": 1.632283383560956, |
|
"eval_runtime": 183.308, |
|
"eval_samples_per_second": 15.929, |
|
"eval_steps_per_second": 3.982, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5638, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4777, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.3775594234466553, |
|
"eval_mse": 1.3775594118336603, |
|
"eval_runtime": 183.7408, |
|
"eval_samples_per_second": 15.892, |
|
"eval_steps_per_second": 3.973, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4016, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4106, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.304379940032959, |
|
"eval_mse": 1.3043799442360644, |
|
"eval_runtime": 183.2767, |
|
"eval_samples_per_second": 15.932, |
|
"eval_steps_per_second": 3.983, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3077, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2702, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.3791648149490356, |
|
"eval_mse": 1.3791648080316543, |
|
"eval_runtime": 183.7482, |
|
"eval_samples_per_second": 15.891, |
|
"eval_steps_per_second": 3.973, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3216, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4448, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.3047657012939453, |
|
"eval_mse": 1.3047656766562488, |
|
"eval_runtime": 183.5141, |
|
"eval_samples_per_second": 15.912, |
|
"eval_steps_per_second": 3.978, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3344, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3582, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.248475432395935, |
|
"eval_mse": 1.248475439073647, |
|
"eval_runtime": 183.4404, |
|
"eval_samples_per_second": 15.918, |
|
"eval_steps_per_second": 3.979, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1767, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2357, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.3708600997924805, |
|
"eval_mse": 1.3708599920592506, |
|
"eval_runtime": 183.3578, |
|
"eval_samples_per_second": 15.925, |
|
"eval_steps_per_second": 3.981, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3204, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1075, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.2537685632705688, |
|
"eval_mse": 1.2537685594483436, |
|
"eval_runtime": 183.4476, |
|
"eval_samples_per_second": 15.917, |
|
"eval_steps_per_second": 3.979, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4399, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2406, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.19956636428833, |
|
"eval_mse": 1.1995664079472905, |
|
"eval_runtime": 183.4122, |
|
"eval_samples_per_second": 15.92, |
|
"eval_steps_per_second": 3.98, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1667, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2309, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.2671302556991577, |
|
"eval_mse": 1.2671302648346725, |
|
"eval_runtime": 183.2517, |
|
"eval_samples_per_second": 15.934, |
|
"eval_steps_per_second": 3.984, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1541, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2e-05, |
|
"loss": 1.194, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.4325885772705078, |
|
"eval_mse": 1.4325886574488846, |
|
"eval_runtime": 183.2552, |
|
"eval_samples_per_second": 15.934, |
|
"eval_steps_per_second": 3.984, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1926, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2135, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.176251769065857, |
|
"eval_mse": 1.1762518139201403, |
|
"eval_runtime": 183.5032, |
|
"eval_samples_per_second": 15.913, |
|
"eval_steps_per_second": 3.978, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2e-05, |
|
"loss": 1.111, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2196, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.1438716650009155, |
|
"eval_mse": 1.1438715909390142, |
|
"eval_runtime": 183.3716, |
|
"eval_samples_per_second": 15.924, |
|
"eval_steps_per_second": 3.981, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1891, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1983, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.1710615158081055, |
|
"eval_mse": 1.1710615362599224, |
|
"eval_runtime": 183.5541, |
|
"eval_samples_per_second": 15.908, |
|
"eval_steps_per_second": 3.977, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 821, |
|
"total_flos": 2.448378334121165e+16, |
|
"train_loss": 1.4025647422428107, |
|
"train_runtime": 6674.4502, |
|
"train_samples_per_second": 3.937, |
|
"train_steps_per_second": 0.123 |
|
} |
|
], |
|
"max_steps": 821, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.448378334121165e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|