|
{ |
|
"best_metric": 0.8548387096774194, |
|
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-Ocular-Toxoplasmosis-DA/checkpoint-256", |
|
"epoch": 38.51851851851852, |
|
"eval_steps": 500, |
|
"global_step": 520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 4.370074272155762, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 1.3402, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9629629629629629, |
|
"eval_accuracy": 0.5483870967741935, |
|
"eval_loss": 1.1682088375091553, |
|
"eval_runtime": 2.3908, |
|
"eval_samples_per_second": 25.932, |
|
"eval_steps_per_second": 0.837, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"grad_norm": 7.86944580078125, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 1.1725, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6290322580645161, |
|
"eval_loss": 1.0024793148040771, |
|
"eval_runtime": 3.324, |
|
"eval_samples_per_second": 18.652, |
|
"eval_steps_per_second": 0.602, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 4.94896936416626, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 1.0671, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"grad_norm": 4.811951160430908, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.8824, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"eval_accuracy": 0.6612903225806451, |
|
"eval_loss": 0.7644360065460205, |
|
"eval_runtime": 3.388, |
|
"eval_samples_per_second": 18.3, |
|
"eval_steps_per_second": 0.59, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"grad_norm": 7.974093914031982, |
|
"learning_rate": 4.8076923076923084e-05, |
|
"loss": 0.7342, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7258064516129032, |
|
"eval_loss": 0.5839676260948181, |
|
"eval_runtime": 3.0543, |
|
"eval_samples_per_second": 20.299, |
|
"eval_steps_per_second": 0.655, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 8.472794532775879, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.6734, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.962962962962963, |
|
"eval_accuracy": 0.6451612903225806, |
|
"eval_loss": 0.6753666400909424, |
|
"eval_runtime": 2.3642, |
|
"eval_samples_per_second": 26.225, |
|
"eval_steps_per_second": 0.846, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 5.185185185185185, |
|
"grad_norm": 9.15774917602539, |
|
"learning_rate": 4.8076923076923084e-05, |
|
"loss": 0.6373, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.925925925925926, |
|
"grad_norm": 12.02450942993164, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.5167, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6935483870967742, |
|
"eval_loss": 0.5904402136802673, |
|
"eval_runtime": 2.3866, |
|
"eval_samples_per_second": 25.979, |
|
"eval_steps_per_second": 0.838, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 7.348090648651123, |
|
"learning_rate": 4.594017094017094e-05, |
|
"loss": 0.5009, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.962962962962963, |
|
"eval_accuracy": 0.6935483870967742, |
|
"eval_loss": 0.5549384355545044, |
|
"eval_runtime": 2.9982, |
|
"eval_samples_per_second": 20.679, |
|
"eval_steps_per_second": 0.667, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"grad_norm": 5.642479419708252, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.4988, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6774193548387096, |
|
"eval_loss": 0.620449423789978, |
|
"eval_runtime": 2.4283, |
|
"eval_samples_per_second": 25.532, |
|
"eval_steps_per_second": 0.824, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 8.148148148148149, |
|
"grad_norm": 6.128896713256836, |
|
"learning_rate": 4.3803418803418805e-05, |
|
"loss": 0.4619, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 7.555347919464111, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 0.3856, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.962962962962964, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_loss": 0.44631102681159973, |
|
"eval_runtime": 2.3506, |
|
"eval_samples_per_second": 26.376, |
|
"eval_steps_per_second": 0.851, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 9.62962962962963, |
|
"grad_norm": 9.627432823181152, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4057, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7903225806451613, |
|
"eval_loss": 0.5231879353523254, |
|
"eval_runtime": 3.1544, |
|
"eval_samples_per_second": 19.655, |
|
"eval_steps_per_second": 0.634, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 10.37037037037037, |
|
"grad_norm": 8.669109344482422, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.3929, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.962962962962964, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.45801177620887756, |
|
"eval_runtime": 2.3878, |
|
"eval_samples_per_second": 25.965, |
|
"eval_steps_per_second": 0.838, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 11.11111111111111, |
|
"grad_norm": 6.289756774902344, |
|
"learning_rate": 3.952991452991453e-05, |
|
"loss": 0.3673, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 11.851851851851851, |
|
"grad_norm": 12.90579605102539, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.3638, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7741935483870968, |
|
"eval_loss": 0.5114619135856628, |
|
"eval_runtime": 3.3569, |
|
"eval_samples_per_second": 18.47, |
|
"eval_steps_per_second": 0.596, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 12.592592592592592, |
|
"grad_norm": 10.698553085327148, |
|
"learning_rate": 3.739316239316239e-05, |
|
"loss": 0.3248, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 12.962962962962964, |
|
"eval_accuracy": 0.7741935483870968, |
|
"eval_loss": 0.5312773585319519, |
|
"eval_runtime": 2.4335, |
|
"eval_samples_per_second": 25.478, |
|
"eval_steps_per_second": 0.822, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 6.529489994049072, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.2673, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7903225806451613, |
|
"eval_loss": 0.5203306674957275, |
|
"eval_runtime": 3.4828, |
|
"eval_samples_per_second": 17.802, |
|
"eval_steps_per_second": 0.574, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 14.074074074074074, |
|
"grad_norm": 6.994911193847656, |
|
"learning_rate": 3.525641025641026e-05, |
|
"loss": 0.3216, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 14.814814814814815, |
|
"grad_norm": 9.194233894348145, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.2922, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 14.962962962962964, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.4315454959869385, |
|
"eval_runtime": 2.3822, |
|
"eval_samples_per_second": 26.026, |
|
"eval_steps_per_second": 0.84, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 15.555555555555555, |
|
"grad_norm": 6.076256275177002, |
|
"learning_rate": 3.311965811965812e-05, |
|
"loss": 0.2803, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.4577220380306244, |
|
"eval_runtime": 2.9439, |
|
"eval_samples_per_second": 21.06, |
|
"eval_steps_per_second": 0.679, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 16.296296296296298, |
|
"grad_norm": 12.038761138916016, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.2735, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 16.962962962962962, |
|
"eval_accuracy": 0.8064516129032258, |
|
"eval_loss": 0.5466907024383545, |
|
"eval_runtime": 2.3229, |
|
"eval_samples_per_second": 26.691, |
|
"eval_steps_per_second": 0.861, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 17.037037037037038, |
|
"grad_norm": 8.897506713867188, |
|
"learning_rate": 3.098290598290599e-05, |
|
"loss": 0.2776, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 9.66178035736084, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.2586, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.5236416459083557, |
|
"eval_runtime": 3.4253, |
|
"eval_samples_per_second": 18.101, |
|
"eval_steps_per_second": 0.584, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 18.51851851851852, |
|
"grad_norm": 7.729655742645264, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 0.2366, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 18.962962962962962, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_loss": 0.5075119137763977, |
|
"eval_runtime": 2.3943, |
|
"eval_samples_per_second": 25.895, |
|
"eval_steps_per_second": 0.835, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 19.25925925925926, |
|
"grad_norm": 11.543585777282715, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.252, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.877120494842529, |
|
"learning_rate": 2.670940170940171e-05, |
|
"loss": 0.2347, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.5178562998771667, |
|
"eval_runtime": 2.4124, |
|
"eval_samples_per_second": 25.701, |
|
"eval_steps_per_second": 0.829, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 20.74074074074074, |
|
"grad_norm": 7.83768892288208, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.2046, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 20.962962962962962, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.5427502393722534, |
|
"eval_runtime": 3.4728, |
|
"eval_samples_per_second": 17.853, |
|
"eval_steps_per_second": 0.576, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 21.48148148148148, |
|
"grad_norm": 7.919957637786865, |
|
"learning_rate": 2.4572649572649573e-05, |
|
"loss": 0.2289, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.57480788230896, |
|
"eval_runtime": 2.4021, |
|
"eval_samples_per_second": 25.811, |
|
"eval_steps_per_second": 0.833, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"grad_norm": 8.665252685546875, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.2394, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 22.962962962962962, |
|
"grad_norm": 7.902819633483887, |
|
"learning_rate": 2.2435897435897437e-05, |
|
"loss": 0.2195, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 22.962962962962962, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_loss": 0.5968937277793884, |
|
"eval_runtime": 3.4133, |
|
"eval_samples_per_second": 18.164, |
|
"eval_steps_per_second": 0.586, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 23.703703703703702, |
|
"grad_norm": 9.844597816467285, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.2224, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_loss": 0.6092303991317749, |
|
"eval_runtime": 2.3949, |
|
"eval_samples_per_second": 25.888, |
|
"eval_steps_per_second": 0.835, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 24.444444444444443, |
|
"grad_norm": 6.439063549041748, |
|
"learning_rate": 2.02991452991453e-05, |
|
"loss": 0.2167, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 24.962962962962962, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_loss": 0.6333113312721252, |
|
"eval_runtime": 2.4482, |
|
"eval_samples_per_second": 25.325, |
|
"eval_steps_per_second": 0.817, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 25.185185185185187, |
|
"grad_norm": 8.865224838256836, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.2323, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 25.925925925925927, |
|
"grad_norm": 6.462991237640381, |
|
"learning_rate": 1.8162393162393162e-05, |
|
"loss": 0.1956, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_loss": 0.5993022322654724, |
|
"eval_runtime": 2.3358, |
|
"eval_samples_per_second": 26.543, |
|
"eval_steps_per_second": 0.856, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 6.978143692016602, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.2174, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 26.962962962962962, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_loss": 0.6063364744186401, |
|
"eval_runtime": 2.3579, |
|
"eval_samples_per_second": 26.295, |
|
"eval_steps_per_second": 0.848, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 27.40740740740741, |
|
"grad_norm": 8.283989906311035, |
|
"learning_rate": 1.602564102564103e-05, |
|
"loss": 0.1999, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.6413679718971252, |
|
"eval_runtime": 3.4435, |
|
"eval_samples_per_second": 18.005, |
|
"eval_steps_per_second": 0.581, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 28.14814814814815, |
|
"grad_norm": 5.77383279800415, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.1783, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 28.88888888888889, |
|
"grad_norm": 7.4615654945373535, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.1667, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 28.962962962962962, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.6296666860580444, |
|
"eval_runtime": 2.3485, |
|
"eval_samples_per_second": 26.4, |
|
"eval_steps_per_second": 0.852, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 29.62962962962963, |
|
"grad_norm": 9.373270034790039, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.1835, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8225806451612904, |
|
"eval_loss": 0.6148854494094849, |
|
"eval_runtime": 3.1829, |
|
"eval_samples_per_second": 19.479, |
|
"eval_steps_per_second": 0.628, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 30.37037037037037, |
|
"grad_norm": 8.87562370300293, |
|
"learning_rate": 1.1752136752136752e-05, |
|
"loss": 0.186, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 30.962962962962962, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.6429581642150879, |
|
"eval_runtime": 2.4503, |
|
"eval_samples_per_second": 25.303, |
|
"eval_steps_per_second": 0.816, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 31.11111111111111, |
|
"grad_norm": 5.281705856323242, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.1706, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 31.85185185185185, |
|
"grad_norm": 4.753020286560059, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 0.1749, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.6677759885787964, |
|
"eval_runtime": 2.3885, |
|
"eval_samples_per_second": 25.957, |
|
"eval_steps_per_second": 0.837, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 32.592592592592595, |
|
"grad_norm": 7.2512526512146, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.1663, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 32.96296296296296, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.6828835010528564, |
|
"eval_runtime": 2.3483, |
|
"eval_samples_per_second": 26.402, |
|
"eval_steps_per_second": 0.852, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 33.333333333333336, |
|
"grad_norm": 9.678658485412598, |
|
"learning_rate": 7.478632478632479e-06, |
|
"loss": 0.1557, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.655702531337738, |
|
"eval_runtime": 3.1084, |
|
"eval_samples_per_second": 19.946, |
|
"eval_steps_per_second": 0.643, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 34.074074074074076, |
|
"grad_norm": 5.886323928833008, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.2095, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 34.81481481481482, |
|
"grad_norm": 5.312963485717773, |
|
"learning_rate": 5.341880341880342e-06, |
|
"loss": 0.1913, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 34.96296296296296, |
|
"eval_accuracy": 0.8387096774193549, |
|
"eval_loss": 0.6274862885475159, |
|
"eval_runtime": 3.2878, |
|
"eval_samples_per_second": 18.858, |
|
"eval_steps_per_second": 0.608, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 35.55555555555556, |
|
"grad_norm": 7.064798355102539, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.1775, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_loss": 0.6554756760597229, |
|
"eval_runtime": 2.4759, |
|
"eval_samples_per_second": 25.041, |
|
"eval_steps_per_second": 0.808, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 36.2962962962963, |
|
"grad_norm": 5.463845729827881, |
|
"learning_rate": 3.205128205128205e-06, |
|
"loss": 0.152, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 36.96296296296296, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_loss": 0.6653042435646057, |
|
"eval_runtime": 3.3751, |
|
"eval_samples_per_second": 18.37, |
|
"eval_steps_per_second": 0.593, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 37.03703703703704, |
|
"grad_norm": 5.512512683868408, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.1681, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 37.77777777777778, |
|
"grad_norm": 6.535687446594238, |
|
"learning_rate": 1.0683760683760685e-06, |
|
"loss": 0.1897, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_loss": 0.6681959629058838, |
|
"eval_runtime": 2.3421, |
|
"eval_samples_per_second": 26.472, |
|
"eval_steps_per_second": 0.854, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 38.51851851851852, |
|
"grad_norm": 8.290581703186035, |
|
"learning_rate": 0.0, |
|
"loss": 0.1589, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 38.51851851851852, |
|
"eval_accuracy": 0.8548387096774194, |
|
"eval_loss": 0.6678970456123352, |
|
"eval_runtime": 2.3455, |
|
"eval_samples_per_second": 26.434, |
|
"eval_steps_per_second": 0.853, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 38.51851851851852, |
|
"step": 520, |
|
"total_flos": 2.140878196703232e+18, |
|
"train_loss": 0.35049390150950505, |
|
"train_runtime": 3356.7171, |
|
"train_samples_per_second": 20.353, |
|
"train_steps_per_second": 0.155 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 520, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.140878196703232e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|