|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.981366459627329, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6218, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.7417102966841187, |
|
"eval_loss": 0.573567271232605, |
|
"eval_runtime": 9.2285, |
|
"eval_samples_per_second": 62.09, |
|
"eval_steps_per_second": 1.95, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.9326121764495596e-05, |
|
"loss": 0.6103, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.7399650959860384, |
|
"eval_loss": 0.5730276703834534, |
|
"eval_runtime": 9.2303, |
|
"eval_samples_per_second": 62.078, |
|
"eval_steps_per_second": 1.95, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 0.6105, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.7277486910994765, |
|
"eval_loss": 0.5863298177719116, |
|
"eval_runtime": 9.2823, |
|
"eval_samples_per_second": 61.73, |
|
"eval_steps_per_second": 1.939, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.215604094671835e-05, |
|
"loss": 0.6261, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7068062827225131, |
|
"eval_loss": 0.5964760184288025, |
|
"eval_runtime": 9.2516, |
|
"eval_samples_per_second": 61.935, |
|
"eval_steps_per_second": 1.946, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.621997950501156e-05, |
|
"loss": 0.5684, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.7521815008726004, |
|
"eval_loss": 0.567564845085144, |
|
"eval_runtime": 10.2074, |
|
"eval_samples_per_second": 56.136, |
|
"eval_steps_per_second": 1.763, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.9341204441673266e-05, |
|
"loss": 0.5878, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.6561954624781849, |
|
"eval_loss": 0.6583427786827087, |
|
"eval_runtime": 9.2811, |
|
"eval_samples_per_second": 61.739, |
|
"eval_steps_per_second": 1.939, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.2097677146869242e-05, |
|
"loss": 0.5274, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.7521815008726004, |
|
"eval_loss": 0.5735621452331543, |
|
"eval_runtime": 9.3011, |
|
"eval_samples_per_second": 61.605, |
|
"eval_steps_per_second": 1.935, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.509800584902108e-05, |
|
"loss": 0.581, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.7399650959860384, |
|
"eval_loss": 0.5575574636459351, |
|
"eval_runtime": 9.3052, |
|
"eval_samples_per_second": 61.579, |
|
"eval_steps_per_second": 1.934, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 0.527, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.7539267015706806, |
|
"eval_loss": 0.5575008988380432, |
|
"eval_runtime": 9.355, |
|
"eval_samples_per_second": 61.251, |
|
"eval_steps_per_second": 1.924, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.112804714676594e-06, |
|
"loss": 0.5228, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.7521815008726004, |
|
"eval_loss": 0.5663809776306152, |
|
"eval_runtime": 9.3904, |
|
"eval_samples_per_second": 61.02, |
|
"eval_steps_per_second": 1.917, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.0502621921127776e-06, |
|
"loss": 0.4978, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.7539267015706806, |
|
"eval_loss": 0.5707576274871826, |
|
"eval_runtime": 9.7621, |
|
"eval_samples_per_second": 58.697, |
|
"eval_steps_per_second": 1.844, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0, |
|
"loss": 0.5051, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.7469458987783595, |
|
"eval_loss": 0.5715910792350769, |
|
"eval_runtime": 9.7356, |
|
"eval_samples_per_second": 58.856, |
|
"eval_steps_per_second": 1.849, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"step": 120, |
|
"total_flos": 3.5051816769866957e+18, |
|
"train_loss": 0.5655017614364624, |
|
"train_runtime": 483.5766, |
|
"train_samples_per_second": 31.962, |
|
"train_steps_per_second": 0.248 |
|
} |
|
], |
|
"max_steps": 120, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.5051816769866957e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|