|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 7299, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.20550760378133992, |
|
"grad_norm": 0.022213317453861237, |
|
"learning_rate": 1.8629949308124403e-05, |
|
"loss": 0.1893, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.20550760378133992, |
|
"eval_accuracy": 0.9274409044193217, |
|
"eval_loss": 0.384206622838974, |
|
"eval_runtime": 66.4904, |
|
"eval_samples_per_second": 73.168, |
|
"eval_steps_per_second": 9.159, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41101520756267984, |
|
"grad_norm": 0.016410009935498238, |
|
"learning_rate": 1.7259898616248804e-05, |
|
"loss": 0.0675, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41101520756267984, |
|
"eval_accuracy": 0.994655704008222, |
|
"eval_loss": 0.028500495478510857, |
|
"eval_runtime": 66.4366, |
|
"eval_samples_per_second": 73.228, |
|
"eval_steps_per_second": 9.167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6165228113440198, |
|
"grad_norm": 0.013172637671232224, |
|
"learning_rate": 1.58898479243732e-05, |
|
"loss": 0.0519, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6165228113440198, |
|
"eval_accuracy": 0.9852004110996917, |
|
"eval_loss": 0.06652244180440903, |
|
"eval_runtime": 37.1596, |
|
"eval_samples_per_second": 130.922, |
|
"eval_steps_per_second": 16.389, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8220304151253597, |
|
"grad_norm": 93.64683532714844, |
|
"learning_rate": 1.4519797232497603e-05, |
|
"loss": 0.0287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8220304151253597, |
|
"eval_accuracy": 0.9891058581706064, |
|
"eval_loss": 0.05017192289233208, |
|
"eval_runtime": 31.6255, |
|
"eval_samples_per_second": 153.832, |
|
"eval_steps_per_second": 19.257, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0275380189066996, |
|
"grad_norm": 0.004394204821437597, |
|
"learning_rate": 1.3149746540622004e-05, |
|
"loss": 0.0334, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0275380189066996, |
|
"eval_accuracy": 0.9905447070914697, |
|
"eval_loss": 0.04894111305475235, |
|
"eval_runtime": 47.4094, |
|
"eval_samples_per_second": 102.617, |
|
"eval_steps_per_second": 12.846, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2330456226880395, |
|
"grad_norm": 0.0032397848553955555, |
|
"learning_rate": 1.1779695848746405e-05, |
|
"loss": 0.0218, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2330456226880395, |
|
"eval_accuracy": 0.9819116135662899, |
|
"eval_loss": 0.10269968956708908, |
|
"eval_runtime": 45.531, |
|
"eval_samples_per_second": 106.85, |
|
"eval_steps_per_second": 13.375, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.4385532264693794, |
|
"grad_norm": 0.0003408812917768955, |
|
"learning_rate": 1.0409645156870804e-05, |
|
"loss": 0.0102, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.4385532264693794, |
|
"eval_accuracy": 0.9944501541623844, |
|
"eval_loss": 0.04378344491124153, |
|
"eval_runtime": 45.7881, |
|
"eval_samples_per_second": 106.25, |
|
"eval_steps_per_second": 13.3, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.6440608302507194, |
|
"grad_norm": 0.0026256833225488663, |
|
"learning_rate": 9.039594464995205e-06, |
|
"loss": 0.0038, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6440608302507194, |
|
"eval_accuracy": 0.9704008221993834, |
|
"eval_loss": 0.19674982130527496, |
|
"eval_runtime": 45.5104, |
|
"eval_samples_per_second": 106.899, |
|
"eval_steps_per_second": 13.382, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.8495684340320593, |
|
"grad_norm": 0.0002484402502886951, |
|
"learning_rate": 7.669543773119606e-06, |
|
"loss": 0.012, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.8495684340320593, |
|
"eval_accuracy": 0.9852004110996917, |
|
"eval_loss": 0.07537884265184402, |
|
"eval_runtime": 45.1736, |
|
"eval_samples_per_second": 107.696, |
|
"eval_steps_per_second": 13.481, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.055076037813399, |
|
"grad_norm": 0.0006908943178132176, |
|
"learning_rate": 6.299493081244007e-06, |
|
"loss": 0.0061, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.055076037813399, |
|
"eval_accuracy": 0.9730729701952724, |
|
"eval_loss": 0.166794553399086, |
|
"eval_runtime": 45.1304, |
|
"eval_samples_per_second": 107.799, |
|
"eval_steps_per_second": 13.494, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.260583641594739, |
|
"grad_norm": 0.00021314685000106692, |
|
"learning_rate": 4.929442389368407e-06, |
|
"loss": 0.003, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.260583641594739, |
|
"eval_accuracy": 0.9954779033915725, |
|
"eval_loss": 0.03057803027331829, |
|
"eval_runtime": 45.2961, |
|
"eval_samples_per_second": 107.404, |
|
"eval_steps_per_second": 13.445, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.466091245376079, |
|
"grad_norm": 0.00010584539995761588, |
|
"learning_rate": 3.5593916974928076e-06, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.466091245376079, |
|
"eval_accuracy": 0.9926002055498458, |
|
"eval_loss": 0.05110383406281471, |
|
"eval_runtime": 45.54, |
|
"eval_samples_per_second": 106.829, |
|
"eval_steps_per_second": 13.373, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.671598849157419, |
|
"grad_norm": 9.755617793416604e-05, |
|
"learning_rate": 2.189341005617208e-06, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.671598849157419, |
|
"eval_accuracy": 0.9921891058581707, |
|
"eval_loss": 0.05986848846077919, |
|
"eval_runtime": 45.0163, |
|
"eval_samples_per_second": 108.072, |
|
"eval_steps_per_second": 13.528, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.877106452938759, |
|
"grad_norm": 0.00011223769251955673, |
|
"learning_rate": 8.192903137416085e-07, |
|
"loss": 0.0003, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.877106452938759, |
|
"eval_accuracy": 0.9868448098663926, |
|
"eval_loss": 0.09568490833044052, |
|
"eval_runtime": 45.1871, |
|
"eval_samples_per_second": 107.663, |
|
"eval_steps_per_second": 13.477, |
|
"step": 7000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7299, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.195019939840466e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|