|
{ |
|
"best_metric": 0.004768455401062965, |
|
"best_model_checkpoint": "/mnt/ml_drive/kcardenas/limbxy_hands/checkpoint-1020", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 1020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 556763.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2302, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 1358784.625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1563, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.14388924837112427, |
|
"eval_rmse": 0.37932735681533813, |
|
"eval_runtime": 4.9465, |
|
"eval_samples_per_second": 114.83, |
|
"eval_steps_per_second": 1.819, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 4929205.5, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1396, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.9607843137254903, |
|
"grad_norm": 3301384.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.066, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.06285522878170013, |
|
"eval_rmse": 0.2507094442844391, |
|
"eval_runtime": 5.0622, |
|
"eval_samples_per_second": 112.205, |
|
"eval_steps_per_second": 1.778, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.450980392156863, |
|
"grad_norm": 1424321.875, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0443, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 677306.75, |
|
"learning_rate": 3e-05, |
|
"loss": 0.031, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.024024562910199165, |
|
"eval_rmse": 0.15499858558177948, |
|
"eval_runtime": 5.0919, |
|
"eval_samples_per_second": 111.551, |
|
"eval_steps_per_second": 1.768, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 3.431372549019608, |
|
"grad_norm": 1847882.875, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0227, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.9215686274509802, |
|
"grad_norm": 2230610.75, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0189, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.022217219695448875, |
|
"eval_rmse": 0.1490544229745865, |
|
"eval_runtime": 5.1067, |
|
"eval_samples_per_second": 111.227, |
|
"eval_steps_per_second": 1.762, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 4.411764705882353, |
|
"grad_norm": 2538623.25, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0294, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.901960784313726, |
|
"grad_norm": 1391400.75, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0362, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.07003403455018997, |
|
"eval_rmse": 0.2646394371986389, |
|
"eval_runtime": 5.0039, |
|
"eval_samples_per_second": 113.512, |
|
"eval_steps_per_second": 1.799, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 5.392156862745098, |
|
"grad_norm": 1861511.0, |
|
"learning_rate": 4.8376623376623384e-05, |
|
"loss": 0.0243, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 591873.25, |
|
"learning_rate": 4.675324675324675e-05, |
|
"loss": 0.0106, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.011829150840640068, |
|
"eval_rmse": 0.10876189917325974, |
|
"eval_runtime": 5.0793, |
|
"eval_samples_per_second": 111.825, |
|
"eval_steps_per_second": 1.772, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 6.372549019607844, |
|
"grad_norm": 717852.0625, |
|
"learning_rate": 4.5129870129870135e-05, |
|
"loss": 0.011, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 6.862745098039216, |
|
"grad_norm": 5022134.0, |
|
"learning_rate": 4.3506493506493503e-05, |
|
"loss": 0.0217, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.04193717613816261, |
|
"eval_rmse": 0.20478568971157074, |
|
"eval_runtime": 5.0411, |
|
"eval_samples_per_second": 112.673, |
|
"eval_steps_per_second": 1.785, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 7.352941176470588, |
|
"grad_norm": 4874459.0, |
|
"learning_rate": 4.1883116883116886e-05, |
|
"loss": 0.0272, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 7.8431372549019605, |
|
"grad_norm": 2519686.0, |
|
"learning_rate": 4.025974025974026e-05, |
|
"loss": 0.0167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.01697073131799698, |
|
"eval_rmse": 0.1302717626094818, |
|
"eval_runtime": 5.0791, |
|
"eval_samples_per_second": 111.83, |
|
"eval_steps_per_second": 1.772, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 1922916.0, |
|
"learning_rate": 3.8636363636363636e-05, |
|
"loss": 0.0125, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 1355185.75, |
|
"learning_rate": 3.701298701298702e-05, |
|
"loss": 0.0078, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.011065785773098469, |
|
"eval_rmse": 0.10519403964281082, |
|
"eval_runtime": 5.0743, |
|
"eval_samples_per_second": 111.938, |
|
"eval_steps_per_second": 1.774, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 9.313725490196079, |
|
"grad_norm": 3503622.25, |
|
"learning_rate": 3.5389610389610387e-05, |
|
"loss": 0.0128, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 9.803921568627452, |
|
"grad_norm": 2042047.625, |
|
"learning_rate": 3.376623376623377e-05, |
|
"loss": 0.0102, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.01578633300960064, |
|
"eval_rmse": 0.12564367055892944, |
|
"eval_runtime": 5.0736, |
|
"eval_samples_per_second": 111.953, |
|
"eval_steps_per_second": 1.774, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 10.294117647058824, |
|
"grad_norm": 1890998.625, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 0.0067, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 10.784313725490197, |
|
"grad_norm": 1245511.75, |
|
"learning_rate": 3.051948051948052e-05, |
|
"loss": 0.009, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.006434537936002016, |
|
"eval_rmse": 0.08021557331085205, |
|
"eval_runtime": 4.9055, |
|
"eval_samples_per_second": 115.79, |
|
"eval_steps_per_second": 1.835, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 11.27450980392157, |
|
"grad_norm": 1712539.625, |
|
"learning_rate": 2.8896103896103898e-05, |
|
"loss": 0.0044, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 11.764705882352942, |
|
"grad_norm": 3221122.25, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.0067, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.00743119465187192, |
|
"eval_rmse": 0.08620437234640121, |
|
"eval_runtime": 5.1361, |
|
"eval_samples_per_second": 110.589, |
|
"eval_steps_per_second": 1.752, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 12.254901960784313, |
|
"grad_norm": 862932.25, |
|
"learning_rate": 2.5649350649350652e-05, |
|
"loss": 0.0058, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 12.745098039215687, |
|
"grad_norm": 1713839.5, |
|
"learning_rate": 2.4025974025974027e-05, |
|
"loss": 0.0047, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.0054638562723994255, |
|
"eval_rmse": 0.07391790300607681, |
|
"eval_runtime": 5.1487, |
|
"eval_samples_per_second": 110.319, |
|
"eval_steps_per_second": 1.748, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 13.235294117647058, |
|
"grad_norm": 1322453.625, |
|
"learning_rate": 2.2402597402597402e-05, |
|
"loss": 0.0043, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 13.72549019607843, |
|
"grad_norm": 941169.0, |
|
"learning_rate": 2.077922077922078e-05, |
|
"loss": 0.0036, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.006842494942247868, |
|
"eval_rmse": 0.08271937817335129, |
|
"eval_runtime": 4.825, |
|
"eval_samples_per_second": 117.719, |
|
"eval_steps_per_second": 1.865, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 14.215686274509803, |
|
"grad_norm": 2176495.5, |
|
"learning_rate": 1.9155844155844156e-05, |
|
"loss": 0.0036, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 14.705882352941176, |
|
"grad_norm": 411098.53125, |
|
"learning_rate": 1.7532467532467535e-05, |
|
"loss": 0.0072, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.0075486451387405396, |
|
"eval_rmse": 0.08688294142484665, |
|
"eval_runtime": 5.1462, |
|
"eval_samples_per_second": 110.372, |
|
"eval_steps_per_second": 1.749, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 15.196078431372548, |
|
"grad_norm": 2002094.375, |
|
"learning_rate": 1.590909090909091e-05, |
|
"loss": 0.0037, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 15.686274509803921, |
|
"grad_norm": 2112328.75, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.006, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.005468303803354502, |
|
"eval_rmse": 0.073947973549366, |
|
"eval_runtime": 5.0395, |
|
"eval_samples_per_second": 112.709, |
|
"eval_steps_per_second": 1.786, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 16.176470588235293, |
|
"grad_norm": 1033486.875, |
|
"learning_rate": 1.2662337662337662e-05, |
|
"loss": 0.0036, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 321803.625, |
|
"learning_rate": 1.103896103896104e-05, |
|
"loss": 0.0022, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.005201099440455437, |
|
"eval_rmse": 0.07211864739656448, |
|
"eval_runtime": 5.018, |
|
"eval_samples_per_second": 113.192, |
|
"eval_steps_per_second": 1.794, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 17.15686274509804, |
|
"grad_norm": 10877.533203125, |
|
"learning_rate": 9.415584415584416e-06, |
|
"loss": 0.0016, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 17.647058823529413, |
|
"grad_norm": 1244264.875, |
|
"learning_rate": 7.792207792207792e-06, |
|
"loss": 0.0051, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.005439546890556812, |
|
"eval_rmse": 0.07375328242778778, |
|
"eval_runtime": 5.1056, |
|
"eval_samples_per_second": 111.251, |
|
"eval_steps_per_second": 1.763, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 18.137254901960784, |
|
"grad_norm": 395613.0, |
|
"learning_rate": 6.168831168831169e-06, |
|
"loss": 0.0033, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 18.627450980392158, |
|
"grad_norm": 1329285.375, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.0023, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.0050988770090043545, |
|
"eval_rmse": 0.07140642404556274, |
|
"eval_runtime": 5.0748, |
|
"eval_samples_per_second": 111.926, |
|
"eval_steps_per_second": 1.773, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 19.11764705882353, |
|
"grad_norm": 406520.3125, |
|
"learning_rate": 2.922077922077922e-06, |
|
"loss": 0.0019, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 19.607843137254903, |
|
"grad_norm": 275064.03125, |
|
"learning_rate": 1.2987012987012988e-06, |
|
"loss": 0.0016, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.004768455401062965, |
|
"eval_rmse": 0.0690540000796318, |
|
"eval_runtime": 5.0305, |
|
"eval_samples_per_second": 112.912, |
|
"eval_steps_per_second": 1.789, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1020, |
|
"total_flos": 0.0, |
|
"train_loss": 0.02494959205973382, |
|
"train_runtime": 1869.8943, |
|
"train_samples_per_second": 34.398, |
|
"train_steps_per_second": 0.545 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|