{ "best_metric": 0.1180819422006607, "best_model_checkpoint": "CXR-Classifier/checkpoint-1224", "epoch": 3.0, "eval_steps": 500, "global_step": 1224, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.592426061630249, "learning_rate": 8.130081300813009e-06, "loss": 0.5972, "step": 20 }, { "epoch": 0.1, "grad_norm": 2.819566249847412, "learning_rate": 1.6260162601626018e-05, "loss": 0.4976, "step": 40 }, { "epoch": 0.15, "grad_norm": 5.789632320404053, "learning_rate": 2.4390243902439026e-05, "loss": 0.3321, "step": 60 }, { "epoch": 0.2, "grad_norm": 5.508607864379883, "learning_rate": 3.2520325203252037e-05, "loss": 0.415, "step": 80 }, { "epoch": 0.25, "grad_norm": 7.562315464019775, "learning_rate": 4.065040650406504e-05, "loss": 0.2412, "step": 100 }, { "epoch": 0.29, "grad_norm": 4.414723873138428, "learning_rate": 4.878048780487805e-05, "loss": 0.3456, "step": 120 }, { "epoch": 0.34, "grad_norm": 2.0423896312713623, "learning_rate": 4.922797456857402e-05, "loss": 0.2415, "step": 140 }, { "epoch": 0.39, "grad_norm": 2.041806221008301, "learning_rate": 4.83197093551317e-05, "loss": 0.3546, "step": 160 }, { "epoch": 0.44, "grad_norm": 1.6937503814697266, "learning_rate": 4.741144414168938e-05, "loss": 0.3947, "step": 180 }, { "epoch": 0.49, "grad_norm": 6.492763996124268, "learning_rate": 4.650317892824705e-05, "loss": 0.3063, "step": 200 }, { "epoch": 0.54, "grad_norm": 1.9708950519561768, "learning_rate": 4.559491371480473e-05, "loss": 0.3115, "step": 220 }, { "epoch": 0.59, "grad_norm": 12.533012390136719, "learning_rate": 4.46866485013624e-05, "loss": 0.5087, "step": 240 }, { "epoch": 0.64, "grad_norm": 8.02456283569336, "learning_rate": 4.377838328792008e-05, "loss": 0.2745, "step": 260 }, { "epoch": 0.69, "grad_norm": 1.0878229141235352, "learning_rate": 4.287011807447775e-05, "loss": 0.1905, "step": 280 }, { "epoch": 0.74, "grad_norm": 7.465769290924072, "learning_rate": 4.196185286103542e-05, "loss": 0.2509, "step": 300 }, { "epoch": 0.78, "grad_norm": 15.646003723144531, "learning_rate": 4.10535876475931e-05, "loss": 0.4353, "step": 320 }, { "epoch": 0.83, "grad_norm": 3.2481565475463867, "learning_rate": 4.014532243415077e-05, "loss": 0.3478, "step": 340 }, { "epoch": 0.88, "grad_norm": 2.395519733428955, "learning_rate": 3.923705722070845e-05, "loss": 0.2199, "step": 360 }, { "epoch": 0.93, "grad_norm": 8.089118003845215, "learning_rate": 3.832879200726612e-05, "loss": 0.2715, "step": 380 }, { "epoch": 0.98, "grad_norm": 8.150867462158203, "learning_rate": 3.74205267938238e-05, "loss": 0.2074, "step": 400 }, { "epoch": 1.0, "eval_accuracy": 0.9387254901960784, "eval_auc": 0.9766835240883684, "eval_f1": 0.957841483979764, "eval_loss": 0.2350389063358307, "eval_precision": 0.961082910321489, "eval_recall": 0.9546218487394958, "eval_runtime": 246.6561, "eval_samples_per_second": 3.308, "eval_steps_per_second": 0.207, "step": 408 }, { "epoch": 1.03, "grad_norm": 6.791078090667725, "learning_rate": 3.651226158038147e-05, "loss": 0.1235, "step": 420 }, { "epoch": 1.08, "grad_norm": 5.592333793640137, "learning_rate": 3.560399636693915e-05, "loss": 0.4199, "step": 440 }, { "epoch": 1.13, "grad_norm": 0.2713923752307892, "learning_rate": 3.469573115349682e-05, "loss": 0.3119, "step": 460 }, { "epoch": 1.18, "grad_norm": 5.907072067260742, "learning_rate": 3.37874659400545e-05, "loss": 0.2118, "step": 480 }, { "epoch": 1.23, "grad_norm": 0.9097113013267517, "learning_rate": 3.287920072661217e-05, "loss": 0.2174, "step": 500 }, { "epoch": 1.27, "grad_norm": 6.9212141036987305, "learning_rate": 3.197093551316985e-05, "loss": 0.2448, "step": 520 }, { "epoch": 1.32, "grad_norm": 6.113616466522217, "learning_rate": 3.106267029972752e-05, "loss": 0.1619, "step": 540 }, { "epoch": 1.37, "grad_norm": 0.9741531014442444, "learning_rate": 3.0154405086285197e-05, "loss": 0.3296, "step": 560 }, { "epoch": 1.42, "grad_norm": 1.604313611984253, "learning_rate": 2.924613987284287e-05, "loss": 0.1598, "step": 580 }, { "epoch": 1.47, "grad_norm": 5.160298824310303, "learning_rate": 2.8337874659400547e-05, "loss": 0.2605, "step": 600 }, { "epoch": 1.52, "grad_norm": 7.961933135986328, "learning_rate": 2.7429609445958222e-05, "loss": 0.295, "step": 620 }, { "epoch": 1.57, "grad_norm": 3.545825719833374, "learning_rate": 2.6521344232515894e-05, "loss": 0.2613, "step": 640 }, { "epoch": 1.62, "grad_norm": 0.7656643390655518, "learning_rate": 2.5613079019073572e-05, "loss": 0.1684, "step": 660 }, { "epoch": 1.67, "grad_norm": 14.269344329833984, "learning_rate": 2.4704813805631247e-05, "loss": 0.3285, "step": 680 }, { "epoch": 1.72, "grad_norm": 0.21142134070396423, "learning_rate": 2.379654859218892e-05, "loss": 0.2071, "step": 700 }, { "epoch": 1.76, "grad_norm": 1.0282666683197021, "learning_rate": 2.2888283378746594e-05, "loss": 0.2701, "step": 720 }, { "epoch": 1.81, "grad_norm": 12.365777969360352, "learning_rate": 2.198001816530427e-05, "loss": 0.1753, "step": 740 }, { "epoch": 1.86, "grad_norm": 6.909509181976318, "learning_rate": 2.1071752951861944e-05, "loss": 0.185, "step": 760 }, { "epoch": 1.91, "grad_norm": 10.059576034545898, "learning_rate": 2.016348773841962e-05, "loss": 0.1403, "step": 780 }, { "epoch": 1.96, "grad_norm": 13.194554328918457, "learning_rate": 1.9255222524977297e-05, "loss": 0.177, "step": 800 }, { "epoch": 2.0, "eval_accuracy": 0.9522058823529411, "eval_auc": 0.9864329442184113, "eval_f1": 0.967418546365915, "eval_loss": 0.15405645966529846, "eval_precision": 0.9617940199335548, "eval_recall": 0.973109243697479, "eval_runtime": 257.0506, "eval_samples_per_second": 3.174, "eval_steps_per_second": 0.198, "step": 816 }, { "epoch": 2.01, "grad_norm": 0.45505988597869873, "learning_rate": 1.834695731153497e-05, "loss": 0.1334, "step": 820 }, { "epoch": 2.06, "grad_norm": 0.5608593821525574, "learning_rate": 1.7438692098092644e-05, "loss": 0.1801, "step": 840 }, { "epoch": 2.11, "grad_norm": 1.9215396642684937, "learning_rate": 1.653042688465032e-05, "loss": 0.1397, "step": 860 }, { "epoch": 2.16, "grad_norm": 0.03459596261382103, "learning_rate": 1.5622161671207994e-05, "loss": 0.0797, "step": 880 }, { "epoch": 2.21, "grad_norm": 4.931589603424072, "learning_rate": 1.4713896457765669e-05, "loss": 0.1547, "step": 900 }, { "epoch": 2.25, "grad_norm": 12.403867721557617, "learning_rate": 1.3805631244323344e-05, "loss": 0.1008, "step": 920 }, { "epoch": 2.3, "grad_norm": 6.834578514099121, "learning_rate": 1.2897366030881017e-05, "loss": 0.3086, "step": 940 }, { "epoch": 2.35, "grad_norm": 0.12356822937726974, "learning_rate": 1.1989100817438692e-05, "loss": 0.1367, "step": 960 }, { "epoch": 2.4, "grad_norm": 0.23836758732795715, "learning_rate": 1.1080835603996367e-05, "loss": 0.1204, "step": 980 }, { "epoch": 2.45, "grad_norm": 0.645460307598114, "learning_rate": 1.0172570390554042e-05, "loss": 0.2857, "step": 1000 }, { "epoch": 2.5, "grad_norm": 6.155028820037842, "learning_rate": 9.264305177111717e-06, "loss": 0.1514, "step": 1020 }, { "epoch": 2.55, "grad_norm": 6.625197410583496, "learning_rate": 8.356039963669392e-06, "loss": 0.1973, "step": 1040 }, { "epoch": 2.6, "grad_norm": 0.4476400911808014, "learning_rate": 7.447774750227067e-06, "loss": 0.1153, "step": 1060 }, { "epoch": 2.65, "grad_norm": 11.432110786437988, "learning_rate": 6.539509536784741e-06, "loss": 0.1943, "step": 1080 }, { "epoch": 2.7, "grad_norm": 6.038093090057373, "learning_rate": 5.631244323342416e-06, "loss": 0.0998, "step": 1100 }, { "epoch": 2.75, "grad_norm": 0.24591827392578125, "learning_rate": 4.722979109900091e-06, "loss": 0.1767, "step": 1120 }, { "epoch": 2.79, "grad_norm": 3.9476640224456787, "learning_rate": 3.814713896457766e-06, "loss": 0.1798, "step": 1140 }, { "epoch": 2.84, "grad_norm": 9.382974624633789, "learning_rate": 2.9064486830154405e-06, "loss": 0.1707, "step": 1160 }, { "epoch": 2.89, "grad_norm": 0.10719335079193115, "learning_rate": 1.9981834695731155e-06, "loss": 0.2662, "step": 1180 }, { "epoch": 2.94, "grad_norm": 10.07032299041748, "learning_rate": 1.0899182561307902e-06, "loss": 0.218, "step": 1200 }, { "epoch": 2.99, "grad_norm": 17.199472427368164, "learning_rate": 1.8165304268846503e-07, "loss": 0.1692, "step": 1220 }, { "epoch": 3.0, "eval_accuracy": 0.9644607843137255, "eval_auc": 0.9916270580630442, "eval_f1": 0.9755686604886269, "eval_loss": 0.1180819422006607, "eval_precision": 0.9780405405405406, "eval_recall": 0.973109243697479, "eval_runtime": 252.4161, "eval_samples_per_second": 3.233, "eval_steps_per_second": 0.202, "step": 1224 } ], "logging_steps": 20, "max_steps": 1224, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7.581041343995535e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }