{ "best_metric": 0.004768455401062965, "best_model_checkpoint": "/mnt/ml_drive/kcardenas/limbxy_hands/checkpoint-1020", "epoch": 20.0, "eval_steps": 500, "global_step": 1020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.49019607843137253, "grad_norm": 556763.0, "learning_rate": 5e-06, "loss": 0.2302, "step": 25 }, { "epoch": 0.9803921568627451, "grad_norm": 1358784.625, "learning_rate": 1e-05, "loss": 0.1563, "step": 50 }, { "epoch": 1.0, "eval_loss": 0.14388924837112427, "eval_rmse": 0.37932735681533813, "eval_runtime": 4.9465, "eval_samples_per_second": 114.83, "eval_steps_per_second": 1.819, "step": 51 }, { "epoch": 1.4705882352941178, "grad_norm": 4929205.5, "learning_rate": 1.5e-05, "loss": 0.1396, "step": 75 }, { "epoch": 1.9607843137254903, "grad_norm": 3301384.0, "learning_rate": 2e-05, "loss": 0.066, "step": 100 }, { "epoch": 2.0, "eval_loss": 0.06285522878170013, "eval_rmse": 0.2507094442844391, "eval_runtime": 5.0622, "eval_samples_per_second": 112.205, "eval_steps_per_second": 1.778, "step": 102 }, { "epoch": 2.450980392156863, "grad_norm": 1424321.875, "learning_rate": 2.5e-05, "loss": 0.0443, "step": 125 }, { "epoch": 2.9411764705882355, "grad_norm": 677306.75, "learning_rate": 3e-05, "loss": 0.031, "step": 150 }, { "epoch": 3.0, "eval_loss": 0.024024562910199165, "eval_rmse": 0.15499858558177948, "eval_runtime": 5.0919, "eval_samples_per_second": 111.551, "eval_steps_per_second": 1.768, "step": 153 }, { "epoch": 3.431372549019608, "grad_norm": 1847882.875, "learning_rate": 3.5e-05, "loss": 0.0227, "step": 175 }, { "epoch": 3.9215686274509802, "grad_norm": 2230610.75, "learning_rate": 4e-05, "loss": 0.0189, "step": 200 }, { "epoch": 4.0, "eval_loss": 0.022217219695448875, "eval_rmse": 0.1490544229745865, "eval_runtime": 5.1067, "eval_samples_per_second": 111.227, "eval_steps_per_second": 1.762, "step": 204 }, { "epoch": 4.411764705882353, "grad_norm": 2538623.25, "learning_rate": 4.5e-05, "loss": 0.0294, "step": 225 }, { "epoch": 4.901960784313726, "grad_norm": 1391400.75, "learning_rate": 5e-05, "loss": 0.0362, "step": 250 }, { "epoch": 5.0, "eval_loss": 0.07003403455018997, "eval_rmse": 0.2646394371986389, "eval_runtime": 5.0039, "eval_samples_per_second": 113.512, "eval_steps_per_second": 1.799, "step": 255 }, { "epoch": 5.392156862745098, "grad_norm": 1861511.0, "learning_rate": 4.8376623376623384e-05, "loss": 0.0243, "step": 275 }, { "epoch": 5.882352941176471, "grad_norm": 591873.25, "learning_rate": 4.675324675324675e-05, "loss": 0.0106, "step": 300 }, { "epoch": 6.0, "eval_loss": 0.011829150840640068, "eval_rmse": 0.10876189917325974, "eval_runtime": 5.0793, "eval_samples_per_second": 111.825, "eval_steps_per_second": 1.772, "step": 306 }, { "epoch": 6.372549019607844, "grad_norm": 717852.0625, "learning_rate": 4.5129870129870135e-05, "loss": 0.011, "step": 325 }, { "epoch": 6.862745098039216, "grad_norm": 5022134.0, "learning_rate": 4.3506493506493503e-05, "loss": 0.0217, "step": 350 }, { "epoch": 7.0, "eval_loss": 0.04193717613816261, "eval_rmse": 0.20478568971157074, "eval_runtime": 5.0411, "eval_samples_per_second": 112.673, "eval_steps_per_second": 1.785, "step": 357 }, { "epoch": 7.352941176470588, "grad_norm": 4874459.0, "learning_rate": 4.1883116883116886e-05, "loss": 0.0272, "step": 375 }, { "epoch": 7.8431372549019605, "grad_norm": 2519686.0, "learning_rate": 4.025974025974026e-05, "loss": 0.0167, "step": 400 }, { "epoch": 8.0, "eval_loss": 0.01697073131799698, "eval_rmse": 0.1302717626094818, "eval_runtime": 5.0791, "eval_samples_per_second": 111.83, "eval_steps_per_second": 1.772, "step": 408 }, { "epoch": 8.333333333333334, "grad_norm": 1922916.0, "learning_rate": 3.8636363636363636e-05, "loss": 0.0125, "step": 425 }, { "epoch": 8.823529411764707, "grad_norm": 1355185.75, "learning_rate": 3.701298701298702e-05, "loss": 0.0078, "step": 450 }, { "epoch": 9.0, "eval_loss": 0.011065785773098469, "eval_rmse": 0.10519403964281082, "eval_runtime": 5.0743, "eval_samples_per_second": 111.938, "eval_steps_per_second": 1.774, "step": 459 }, { "epoch": 9.313725490196079, "grad_norm": 3503622.25, "learning_rate": 3.5389610389610387e-05, "loss": 0.0128, "step": 475 }, { "epoch": 9.803921568627452, "grad_norm": 2042047.625, "learning_rate": 3.376623376623377e-05, "loss": 0.0102, "step": 500 }, { "epoch": 10.0, "eval_loss": 0.01578633300960064, "eval_rmse": 0.12564367055892944, "eval_runtime": 5.0736, "eval_samples_per_second": 111.953, "eval_steps_per_second": 1.774, "step": 510 }, { "epoch": 10.294117647058824, "grad_norm": 1890998.625, "learning_rate": 3.2142857142857144e-05, "loss": 0.0067, "step": 525 }, { "epoch": 10.784313725490197, "grad_norm": 1245511.75, "learning_rate": 3.051948051948052e-05, "loss": 0.009, "step": 550 }, { "epoch": 11.0, "eval_loss": 0.006434537936002016, "eval_rmse": 0.08021557331085205, "eval_runtime": 4.9055, "eval_samples_per_second": 115.79, "eval_steps_per_second": 1.835, "step": 561 }, { "epoch": 11.27450980392157, "grad_norm": 1712539.625, "learning_rate": 2.8896103896103898e-05, "loss": 0.0044, "step": 575 }, { "epoch": 11.764705882352942, "grad_norm": 3221122.25, "learning_rate": 2.7272727272727273e-05, "loss": 0.0067, "step": 600 }, { "epoch": 12.0, "eval_loss": 0.00743119465187192, "eval_rmse": 0.08620437234640121, "eval_runtime": 5.1361, "eval_samples_per_second": 110.589, "eval_steps_per_second": 1.752, "step": 612 }, { "epoch": 12.254901960784313, "grad_norm": 862932.25, "learning_rate": 2.5649350649350652e-05, "loss": 0.0058, "step": 625 }, { "epoch": 12.745098039215687, "grad_norm": 1713839.5, "learning_rate": 2.4025974025974027e-05, "loss": 0.0047, "step": 650 }, { "epoch": 13.0, "eval_loss": 0.0054638562723994255, "eval_rmse": 0.07391790300607681, "eval_runtime": 5.1487, "eval_samples_per_second": 110.319, "eval_steps_per_second": 1.748, "step": 663 }, { "epoch": 13.235294117647058, "grad_norm": 1322453.625, "learning_rate": 2.2402597402597402e-05, "loss": 0.0043, "step": 675 }, { "epoch": 13.72549019607843, "grad_norm": 941169.0, "learning_rate": 2.077922077922078e-05, "loss": 0.0036, "step": 700 }, { "epoch": 14.0, "eval_loss": 0.006842494942247868, "eval_rmse": 0.08271937817335129, "eval_runtime": 4.825, "eval_samples_per_second": 117.719, "eval_steps_per_second": 1.865, "step": 714 }, { "epoch": 14.215686274509803, "grad_norm": 2176495.5, "learning_rate": 1.9155844155844156e-05, "loss": 0.0036, "step": 725 }, { "epoch": 14.705882352941176, "grad_norm": 411098.53125, "learning_rate": 1.7532467532467535e-05, "loss": 0.0072, "step": 750 }, { "epoch": 15.0, "eval_loss": 0.0075486451387405396, "eval_rmse": 0.08688294142484665, "eval_runtime": 5.1462, "eval_samples_per_second": 110.372, "eval_steps_per_second": 1.749, "step": 765 }, { "epoch": 15.196078431372548, "grad_norm": 2002094.375, "learning_rate": 1.590909090909091e-05, "loss": 0.0037, "step": 775 }, { "epoch": 15.686274509803921, "grad_norm": 2112328.75, "learning_rate": 1.4285714285714285e-05, "loss": 0.006, "step": 800 }, { "epoch": 16.0, "eval_loss": 0.005468303803354502, "eval_rmse": 0.073947973549366, "eval_runtime": 5.0395, "eval_samples_per_second": 112.709, "eval_steps_per_second": 1.786, "step": 816 }, { "epoch": 16.176470588235293, "grad_norm": 1033486.875, "learning_rate": 1.2662337662337662e-05, "loss": 0.0036, "step": 825 }, { "epoch": 16.666666666666668, "grad_norm": 321803.625, "learning_rate": 1.103896103896104e-05, "loss": 0.0022, "step": 850 }, { "epoch": 17.0, "eval_loss": 0.005201099440455437, "eval_rmse": 0.07211864739656448, "eval_runtime": 5.018, "eval_samples_per_second": 113.192, "eval_steps_per_second": 1.794, "step": 867 }, { "epoch": 17.15686274509804, "grad_norm": 10877.533203125, "learning_rate": 9.415584415584416e-06, "loss": 0.0016, "step": 875 }, { "epoch": 17.647058823529413, "grad_norm": 1244264.875, "learning_rate": 7.792207792207792e-06, "loss": 0.0051, "step": 900 }, { "epoch": 18.0, "eval_loss": 0.005439546890556812, "eval_rmse": 0.07375328242778778, "eval_runtime": 5.1056, "eval_samples_per_second": 111.251, "eval_steps_per_second": 1.763, "step": 918 }, { "epoch": 18.137254901960784, "grad_norm": 395613.0, "learning_rate": 6.168831168831169e-06, "loss": 0.0033, "step": 925 }, { "epoch": 18.627450980392158, "grad_norm": 1329285.375, "learning_rate": 4.5454545454545455e-06, "loss": 0.0023, "step": 950 }, { "epoch": 19.0, "eval_loss": 0.0050988770090043545, "eval_rmse": 0.07140642404556274, "eval_runtime": 5.0748, "eval_samples_per_second": 111.926, "eval_steps_per_second": 1.773, "step": 969 }, { "epoch": 19.11764705882353, "grad_norm": 406520.3125, "learning_rate": 2.922077922077922e-06, "loss": 0.0019, "step": 975 }, { "epoch": 19.607843137254903, "grad_norm": 275064.03125, "learning_rate": 1.2987012987012988e-06, "loss": 0.0016, "step": 1000 }, { "epoch": 20.0, "eval_loss": 0.004768455401062965, "eval_rmse": 0.0690540000796318, "eval_runtime": 5.0305, "eval_samples_per_second": 112.912, "eval_steps_per_second": 1.789, "step": 1020 }, { "epoch": 20.0, "step": 1020, "total_flos": 0.0, "train_loss": 0.02494959205973382, "train_runtime": 1869.8943, "train_samples_per_second": 34.398, "train_steps_per_second": 0.545 } ], "logging_steps": 25, "max_steps": 1020, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }