{ "best_metric": null, "best_model_checkpoint": null, "epoch": 69.94276094276094, "global_step": 980, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "eval_accuracy": 0.4674227882409832, "eval_loss": 2.8955132961273193, "eval_runtime": 18.7933, "eval_samples_per_second": 105.729, "eval_steps_per_second": 0.692, "step": 14 }, { "epoch": 1.94, "eval_accuracy": 0.46653443074701806, "eval_loss": 0.8548561334609985, "eval_runtime": 19.7558, "eval_samples_per_second": 100.578, "eval_steps_per_second": 0.658, "step": 28 }, { "epoch": 2.94, "eval_accuracy": 0.4693885992808441, "eval_loss": 0.4985657036304474, "eval_runtime": 17.6713, "eval_samples_per_second": 112.442, "eval_steps_per_second": 0.736, "step": 42 }, { "epoch": 3.94, "eval_accuracy": 0.47050840246337, "eval_loss": 0.4260375499725342, "eval_runtime": 18.6, "eval_samples_per_second": 106.828, "eval_steps_per_second": 0.699, "step": 56 }, { "epoch": 4.94, "eval_accuracy": 0.4770233523775381, "eval_loss": 0.28108683228492737, "eval_runtime": 20.1989, "eval_samples_per_second": 98.372, "eval_steps_per_second": 0.644, "step": 70 }, { "epoch": 5.94, "eval_accuracy": 0.4891314089527132, "eval_loss": 0.14415042102336884, "eval_runtime": 17.7108, "eval_samples_per_second": 112.191, "eval_steps_per_second": 0.734, "step": 84 }, { "epoch": 6.94, "eval_accuracy": 0.4952228625005786, "eval_loss": 0.07607654482126236, "eval_runtime": 20.5293, "eval_samples_per_second": 96.788, "eval_steps_per_second": 0.633, "step": 98 }, { "epoch": 7.94, "eval_accuracy": 0.4971059440177199, "eval_loss": 0.044076837599277496, "eval_runtime": 19.5255, "eval_samples_per_second": 101.764, "eval_steps_per_second": 0.666, "step": 112 }, { "epoch": 8.94, "eval_accuracy": 0.49830059772080165, "eval_loss": 0.028873631730675697, "eval_runtime": 18.1929, "eval_samples_per_second": 109.218, "eval_steps_per_second": 0.715, "step": 126 }, { "epoch": 9.94, "eval_accuracy": 0.49892894814336236, "eval_loss": 0.02155212126672268, "eval_runtime": 21.2728, "eval_samples_per_second": 93.406, "eval_steps_per_second": 0.611, "step": 140 }, { "epoch": 10.94, "eval_accuracy": 0.4993455503827718, "eval_loss": 0.01726018451154232, "eval_runtime": 18.282, "eval_samples_per_second": 108.686, "eval_steps_per_second": 0.711, "step": 154 }, { "epoch": 11.94, "eval_accuracy": 0.49957699607133255, "eval_loss": 0.01476956345140934, "eval_runtime": 21.5099, "eval_samples_per_second": 92.376, "eval_steps_per_second": 0.604, "step": 168 }, { "epoch": 12.94, "eval_accuracy": 0.49980450225881146, "eval_loss": 0.012352370657026768, "eval_runtime": 20.1169, "eval_samples_per_second": 98.773, "eval_steps_per_second": 0.646, "step": 182 }, { "epoch": 13.94, "eval_accuracy": 0.4999542032999231, "eval_loss": 0.010818206705152988, "eval_runtime": 19.2177, "eval_samples_per_second": 103.395, "eval_steps_per_second": 0.676, "step": 196 }, { "epoch": 14.94, "eval_accuracy": 0.5000457967000769, "eval_loss": 0.009764532558619976, "eval_runtime": 20.2058, "eval_samples_per_second": 98.338, "eval_steps_per_second": 0.643, "step": 210 }, { "epoch": 15.94, "eval_accuracy": 0.5001354203496898, "eval_loss": 0.009084771387279034, "eval_runtime": 18.5782, "eval_samples_per_second": 106.953, "eval_steps_per_second": 0.7, "step": 224 }, { "epoch": 16.94, "eval_accuracy": 0.5001925431153772, "eval_loss": 0.008187664672732353, "eval_runtime": 19.5572, "eval_samples_per_second": 101.599, "eval_steps_per_second": 0.665, "step": 238 }, { "epoch": 17.94, "eval_accuracy": 0.500231938126196, "eval_loss": 0.008109861984848976, "eval_runtime": 18.7388, "eval_samples_per_second": 106.037, "eval_steps_per_second": 0.694, "step": 252 }, { "epoch": 18.94, "eval_accuracy": 0.500286106266072, "eval_loss": 0.00722927413880825, "eval_runtime": 18.6903, "eval_samples_per_second": 106.312, "eval_steps_per_second": 0.696, "step": 266 }, { "epoch": 19.94, "eval_accuracy": 0.5003156525241861, "eval_loss": 0.00708524277433753, "eval_runtime": 22.4973, "eval_samples_per_second": 88.322, "eval_steps_per_second": 0.578, "step": 280 }, { "epoch": 20.94, "eval_accuracy": 0.5003461836575707, "eval_loss": 0.006836502812802792, "eval_runtime": 18.9858, "eval_samples_per_second": 104.657, "eval_steps_per_second": 0.685, "step": 294 }, { "epoch": 21.94, "eval_accuracy": 0.5003905030447419, "eval_loss": 0.006470560096204281, "eval_runtime": 21.4277, "eval_samples_per_second": 92.73, "eval_steps_per_second": 0.607, "step": 308 }, { "epoch": 22.94, "eval_accuracy": 0.5004102005501513, "eval_loss": 0.00611697556450963, "eval_runtime": 18.7742, "eval_samples_per_second": 105.837, "eval_steps_per_second": 0.692, "step": 322 }, { "epoch": 23.94, "eval_accuracy": 0.5004239888039379, "eval_loss": 0.006002925336360931, "eval_runtime": 17.6523, "eval_samples_per_second": 112.563, "eval_steps_per_second": 0.736, "step": 336 }, { "epoch": 24.94, "eval_accuracy": 0.5004466409351588, "eval_loss": 0.0059402757324278355, "eval_runtime": 20.2385, "eval_samples_per_second": 98.179, "eval_steps_per_second": 0.642, "step": 350 }, { "epoch": 25.94, "eval_accuracy": 0.5004584594384044, "eval_loss": 0.005674673244357109, "eval_runtime": 18.6707, "eval_samples_per_second": 106.423, "eval_steps_per_second": 0.696, "step": 364 }, { "epoch": 26.94, "eval_accuracy": 0.5004683081911091, "eval_loss": 0.0056230453774333, "eval_runtime": 19.1333, "eval_samples_per_second": 103.85, "eval_steps_per_second": 0.679, "step": 378 }, { "epoch": 27.94, "eval_accuracy": 0.5004850510707072, "eval_loss": 0.005449134390801191, "eval_runtime": 19.2945, "eval_samples_per_second": 102.983, "eval_steps_per_second": 0.674, "step": 392 }, { "epoch": 28.94, "eval_accuracy": 0.5004929300728709, "eval_loss": 0.005320119671523571, "eval_runtime": 20.0519, "eval_samples_per_second": 99.093, "eval_steps_per_second": 0.648, "step": 406 }, { "epoch": 29.94, "eval_accuracy": 0.5005008090750347, "eval_loss": 0.005209068767726421, "eval_runtime": 19.9363, "eval_samples_per_second": 99.667, "eval_steps_per_second": 0.652, "step": 420 }, { "epoch": 30.94, "eval_accuracy": 0.5005067183266575, "eval_loss": 0.005184635519981384, "eval_runtime": 19.4285, "eval_samples_per_second": 102.272, "eval_steps_per_second": 0.669, "step": 434 }, { "epoch": 31.94, "eval_accuracy": 0.5005254309567965, "eval_loss": 0.004937352146953344, "eval_runtime": 21.1515, "eval_samples_per_second": 93.941, "eval_steps_per_second": 0.615, "step": 448 }, { "epoch": 32.94, "eval_accuracy": 0.5005382343353126, "eval_loss": 0.004831444472074509, "eval_runtime": 19.1176, "eval_samples_per_second": 103.936, "eval_steps_per_second": 0.68, "step": 462 }, { "epoch": 33.94, "eval_accuracy": 0.500543158711665, "eval_loss": 0.004661811515688896, "eval_runtime": 21.9936, "eval_samples_per_second": 90.345, "eval_steps_per_second": 0.591, "step": 476 }, { "epoch": 34.94, "eval_accuracy": 0.5005480830880173, "eval_loss": 0.00474146893247962, "eval_runtime": 18.9478, "eval_samples_per_second": 104.867, "eval_steps_per_second": 0.686, "step": 490 }, { "epoch": 35.67, "learning_rate": 2.448979591836735e-05, "loss": 2.3265, "step": 500 }, { "epoch": 35.94, "eval_accuracy": 0.5005569469654516, "eval_loss": 0.004643740598112345, "eval_runtime": 18.6646, "eval_samples_per_second": 106.458, "eval_steps_per_second": 0.697, "step": 504 }, { "epoch": 36.94, "eval_accuracy": 0.5005628562170744, "eval_loss": 0.00456605339422822, "eval_runtime": 18.4246, "eval_samples_per_second": 107.845, "eval_steps_per_second": 0.706, "step": 518 }, { "epoch": 37.94, "eval_accuracy": 0.5005569469654516, "eval_loss": 0.0045891194604337215, "eval_runtime": 19.9985, "eval_samples_per_second": 99.358, "eval_steps_per_second": 0.65, "step": 532 }, { "epoch": 38.94, "eval_accuracy": 0.5005677805934268, "eval_loss": 0.004413667134940624, "eval_runtime": 20.9756, "eval_samples_per_second": 94.729, "eval_steps_per_second": 0.62, "step": 546 }, { "epoch": 39.94, "eval_accuracy": 0.5005717200945087, "eval_loss": 0.004356020595878363, "eval_runtime": 18.9086, "eval_samples_per_second": 105.084, "eval_steps_per_second": 0.688, "step": 560 }, { "epoch": 40.94, "eval_accuracy": 0.5005707352192381, "eval_loss": 0.004351349081844091, "eval_runtime": 21.2375, "eval_samples_per_second": 93.561, "eval_steps_per_second": 0.612, "step": 574 }, { "epoch": 41.94, "eval_accuracy": 0.5005727049697791, "eval_loss": 0.004294094629585743, "eval_runtime": 20.2798, "eval_samples_per_second": 97.979, "eval_steps_per_second": 0.641, "step": 588 }, { "epoch": 42.94, "eval_accuracy": 0.5005795990966724, "eval_loss": 0.004292026627808809, "eval_runtime": 19.0207, "eval_samples_per_second": 104.465, "eval_steps_per_second": 0.683, "step": 602 }, { "epoch": 43.94, "eval_accuracy": 0.5005815688472134, "eval_loss": 0.004189325030893087, "eval_runtime": 21.9849, "eval_samples_per_second": 90.38, "eval_steps_per_second": 0.591, "step": 616 }, { "epoch": 44.94, "eval_accuracy": 0.5005815688472134, "eval_loss": 0.0041327630169689655, "eval_runtime": 19.2534, "eval_samples_per_second": 103.203, "eval_steps_per_second": 0.675, "step": 630 }, { "epoch": 45.94, "eval_accuracy": 0.5005855083482952, "eval_loss": 0.004192625638097525, "eval_runtime": 21.5055, "eval_samples_per_second": 92.395, "eval_steps_per_second": 0.604, "step": 644 }, { "epoch": 46.94, "eval_accuracy": 0.5005904327246475, "eval_loss": 0.004125718027353287, "eval_runtime": 19.7387, "eval_samples_per_second": 100.665, "eval_steps_per_second": 0.659, "step": 658 }, { "epoch": 47.94, "eval_accuracy": 0.5005953571009999, "eval_loss": 0.0040009464137256145, "eval_runtime": 21.0892, "eval_samples_per_second": 94.219, "eval_steps_per_second": 0.616, "step": 672 }, { "epoch": 48.94, "eval_accuracy": 0.5006002814773522, "eval_loss": 0.00396856851875782, "eval_runtime": 19.1717, "eval_samples_per_second": 103.643, "eval_steps_per_second": 0.678, "step": 686 }, { "epoch": 49.94, "eval_accuracy": 0.5006032361031637, "eval_loss": 0.0039261928759515285, "eval_runtime": 19.9925, "eval_samples_per_second": 99.387, "eval_steps_per_second": 0.65, "step": 700 }, { "epoch": 50.94, "eval_accuracy": 0.500610130230057, "eval_loss": 0.0038781561888754368, "eval_runtime": 22.1008, "eval_samples_per_second": 89.906, "eval_steps_per_second": 0.588, "step": 714 }, { "epoch": 51.94, "eval_accuracy": 0.5006071756042456, "eval_loss": 0.003933804575353861, "eval_runtime": 17.351, "eval_samples_per_second": 114.518, "eval_steps_per_second": 0.749, "step": 728 }, { "epoch": 52.94, "eval_accuracy": 0.500610130230057, "eval_loss": 0.003865364473313093, "eval_runtime": 20.5119, "eval_samples_per_second": 96.871, "eval_steps_per_second": 0.634, "step": 742 }, { "epoch": 53.94, "eval_accuracy": 0.500612099980598, "eval_loss": 0.0038321653846651316, "eval_runtime": 17.3408, "eval_samples_per_second": 114.585, "eval_steps_per_second": 0.75, "step": 756 }, { "epoch": 54.94, "eval_accuracy": 0.5006081604795161, "eval_loss": 0.003891468746587634, "eval_runtime": 19.3846, "eval_samples_per_second": 102.504, "eval_steps_per_second": 0.671, "step": 770 }, { "epoch": 55.94, "eval_accuracy": 0.5006130848558684, "eval_loss": 0.0038119996897876263, "eval_runtime": 19.846, "eval_samples_per_second": 100.121, "eval_steps_per_second": 0.655, "step": 784 }, { "epoch": 56.94, "eval_accuracy": 0.5006130848558684, "eval_loss": 0.003837888827547431, "eval_runtime": 18.0294, "eval_samples_per_second": 110.209, "eval_steps_per_second": 0.721, "step": 798 }, { "epoch": 57.94, "eval_accuracy": 0.5006150546064094, "eval_loss": 0.0037844169419258833, "eval_runtime": 18.0119, "eval_samples_per_second": 110.316, "eval_steps_per_second": 0.722, "step": 812 }, { "epoch": 58.94, "eval_accuracy": 0.5006160394816799, "eval_loss": 0.003779872553423047, "eval_runtime": 19.4256, "eval_samples_per_second": 102.288, "eval_steps_per_second": 0.669, "step": 826 }, { "epoch": 59.94, "eval_accuracy": 0.5006170243569503, "eval_loss": 0.0038144837599247694, "eval_runtime": 18.4675, "eval_samples_per_second": 107.595, "eval_steps_per_second": 0.704, "step": 840 }, { "epoch": 60.94, "eval_accuracy": 0.5006180092322208, "eval_loss": 0.0037365842144936323, "eval_runtime": 20.6911, "eval_samples_per_second": 96.031, "eval_steps_per_second": 0.628, "step": 854 }, { "epoch": 61.94, "eval_accuracy": 0.5006209638580322, "eval_loss": 0.0037281711120158434, "eval_runtime": 17.8768, "eval_samples_per_second": 111.149, "eval_steps_per_second": 0.727, "step": 868 }, { "epoch": 62.94, "eval_accuracy": 0.5006209638580322, "eval_loss": 0.003779030404984951, "eval_runtime": 18.2703, "eval_samples_per_second": 108.756, "eval_steps_per_second": 0.712, "step": 882 }, { "epoch": 63.94, "eval_accuracy": 0.5006239184838436, "eval_loss": 0.0037230353336781263, "eval_runtime": 20.4452, "eval_samples_per_second": 97.187, "eval_steps_per_second": 0.636, "step": 896 }, { "epoch": 64.94, "eval_accuracy": 0.5006249033591141, "eval_loss": 0.003699967870488763, "eval_runtime": 18.5245, "eval_samples_per_second": 107.264, "eval_steps_per_second": 0.702, "step": 910 }, { "epoch": 65.94, "eval_accuracy": 0.5006249033591141, "eval_loss": 0.0036831670440733433, "eval_runtime": 17.7233, "eval_samples_per_second": 112.112, "eval_steps_per_second": 0.733, "step": 924 }, { "epoch": 66.94, "eval_accuracy": 0.5006229336085731, "eval_loss": 0.0037006225902587175, "eval_runtime": 19.5885, "eval_samples_per_second": 101.437, "eval_steps_per_second": 0.664, "step": 938 }, { "epoch": 67.94, "eval_accuracy": 0.5006258882343846, "eval_loss": 0.0036684926599264145, "eval_runtime": 19.3473, "eval_samples_per_second": 102.702, "eval_steps_per_second": 0.672, "step": 952 }, { "epoch": 68.94, "eval_accuracy": 0.5006249033591141, "eval_loss": 0.003660534741356969, "eval_runtime": 19.8083, "eval_samples_per_second": 100.312, "eval_steps_per_second": 0.656, "step": 966 }, { "epoch": 69.94, "eval_accuracy": 0.5006249033591141, "eval_loss": 0.0036585668567568064, "eval_runtime": 17.5658, "eval_samples_per_second": 113.118, "eval_steps_per_second": 0.74, "step": 980 }, { "epoch": 69.94, "step": 980, "total_flos": 1.73221462278144e+17, "train_loss": 1.1901442605621961, "train_runtime": 21048.4328, "train_samples_per_second": 31.521, "train_steps_per_second": 0.047 } ], "max_steps": 980, "num_train_epochs": 70, "total_flos": 1.73221462278144e+17, "trial_name": null, "trial_params": null }