|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "resnet-18/checkpoint-22", |
|
"epoch": 9.090909090909092, |
|
"eval_steps": 500, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"eval_accuracy": 0.4155844155844156, |
|
"eval_loss": 1.0317904949188232, |
|
"eval_runtime": 0.3039, |
|
"eval_samples_per_second": 253.336, |
|
"eval_steps_per_second": 9.87, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 2.841965913772583, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.0893, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 0.9519639611244202, |
|
"eval_runtime": 0.2748, |
|
"eval_samples_per_second": 280.217, |
|
"eval_steps_per_second": 10.918, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"eval_accuracy": 0.8441558441558441, |
|
"eval_loss": 0.9016602039337158, |
|
"eval_runtime": 0.3011, |
|
"eval_samples_per_second": 255.709, |
|
"eval_steps_per_second": 9.963, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 3.1325621604919434, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.9912, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.8444482088088989, |
|
"eval_runtime": 0.2042, |
|
"eval_samples_per_second": 377.122, |
|
"eval_steps_per_second": 14.693, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 4.909090909090909, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.802738606929779, |
|
"eval_runtime": 0.221, |
|
"eval_samples_per_second": 348.461, |
|
"eval_steps_per_second": 13.576, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 5.454545454545454, |
|
"grad_norm": 4.148845195770264, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.9248, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.763073205947876, |
|
"eval_runtime": 0.2945, |
|
"eval_samples_per_second": 261.469, |
|
"eval_steps_per_second": 10.187, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 6.909090909090909, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.736884593963623, |
|
"eval_runtime": 0.2212, |
|
"eval_samples_per_second": 348.098, |
|
"eval_steps_per_second": 13.562, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 3.343644142150879, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.8716, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.7155924439430237, |
|
"eval_runtime": 0.261, |
|
"eval_samples_per_second": 295.058, |
|
"eval_steps_per_second": 11.496, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 8.909090909090908, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.7137082815170288, |
|
"eval_runtime": 0.2787, |
|
"eval_samples_per_second": 276.309, |
|
"eval_steps_per_second": 10.765, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"grad_norm": 3.096590757369995, |
|
"learning_rate": 0.0, |
|
"loss": 0.8517, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.7116669416427612, |
|
"eval_runtime": 0.4991, |
|
"eval_samples_per_second": 154.284, |
|
"eval_steps_per_second": 6.011, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"step": 50, |
|
"total_flos": 6.343101899766374e+16, |
|
"train_loss": 0.9457284736633301, |
|
"train_runtime": 45.6784, |
|
"train_samples_per_second": 151.275, |
|
"train_steps_per_second": 1.095 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.343101899766374e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|