diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,27605 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.896551724137931, + "eval_steps": 250, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.004597701149425287, + "grad_norm": 37.01336935001283, + "learning_rate": 5e-06, + "loss": 0.8637, + "num_input_tokens_seen": 90336, + "step": 1 + }, + { + "epoch": 0.004597701149425287, + "loss": 1.0519328117370605, + "loss_ce": 0.5021281838417053, + "loss_iou": 0.400390625, + "loss_num": 0.10986328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 90336, + "step": 1 + }, + { + "epoch": 0.009195402298850575, + "grad_norm": 40.354572183137385, + "learning_rate": 5e-06, + "loss": 0.8717, + "num_input_tokens_seen": 180800, + "step": 2 + }, + { + "epoch": 0.009195402298850575, + "loss": 0.7949165105819702, + "loss_ce": 0.3349556028842926, + "loss_iou": 0.341796875, + "loss_num": 0.091796875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 180800, + "step": 2 + }, + { + "epoch": 0.013793103448275862, + "grad_norm": 39.06584845163415, + "learning_rate": 5e-06, + "loss": 0.606, + "num_input_tokens_seen": 271252, + "step": 3 + }, + { + "epoch": 0.013793103448275862, + "loss": 0.5930109620094299, + "loss_ce": 0.20678049325942993, + "loss_iou": 0.478515625, + "loss_num": 0.0771484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 271252, + "step": 3 + }, + { + "epoch": 0.01839080459770115, + "grad_norm": 57.83141578006947, + "learning_rate": 5e-06, + "loss": 0.6455, + "num_input_tokens_seen": 361480, + "step": 4 + }, + { + "epoch": 0.01839080459770115, + "loss": 0.7589341402053833, + "loss_ce": 0.3553696274757385, + "loss_iou": 0.51953125, + "loss_num": 0.08056640625, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 361480, + "step": 4 + }, + { + "epoch": 0.022988505747126436, + "grad_norm": 42.90588851722674, + "learning_rate": 5e-06, + "loss": 1.0614, + "num_input_tokens_seen": 451876, + "step": 5 + }, + { + "epoch": 0.022988505747126436, + "loss": 1.2513668537139893, + "loss_ce": 0.703759491443634, + "loss_iou": 0.421875, + "loss_num": 0.109375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 451876, + "step": 5 + }, + { + "epoch": 0.027586206896551724, + "grad_norm": 22.460688537328316, + "learning_rate": 5e-06, + "loss": 0.6396, + "num_input_tokens_seen": 542192, + "step": 6 + }, + { + "epoch": 0.027586206896551724, + "loss": 0.504480242729187, + "loss_ce": 0.05452907085418701, + "loss_iou": 0.4140625, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 542192, + "step": 6 + }, + { + "epoch": 0.03218390804597701, + "grad_norm": 23.415625594005842, + "learning_rate": 5e-06, + "loss": 0.874, + "num_input_tokens_seen": 632568, + "step": 7 + }, + { + "epoch": 0.03218390804597701, + "loss": 1.0449306964874268, + "loss_ce": 0.5966885685920715, + "loss_iou": 0.357421875, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 632568, + "step": 7 + }, + { + "epoch": 0.0367816091954023, + "grad_norm": 31.16740618162066, + "learning_rate": 5e-06, + "loss": 0.7589, + "num_input_tokens_seen": 723052, + "step": 8 + }, + { + "epoch": 0.0367816091954023, + "loss": 0.6076769828796387, + "loss_ce": 0.25794553756713867, + "loss_iou": 0.494140625, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 723052, + "step": 8 + }, + { + "epoch": 0.041379310344827586, + "grad_norm": 79.07479774439705, + "learning_rate": 5e-06, + "loss": 0.687, + "num_input_tokens_seen": 813440, + "step": 9 + }, + { + "epoch": 0.041379310344827586, + "loss": 0.6652740240097046, + "loss_ce": 0.1359771490097046, + "loss_iou": 0.392578125, + "loss_num": 0.10595703125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 813440, + "step": 9 + }, + { + "epoch": 0.04597701149425287, + "grad_norm": 44.77421252011234, + "learning_rate": 5e-06, + "loss": 0.6675, + "num_input_tokens_seen": 903664, + "step": 10 + }, + { + "epoch": 0.04597701149425287, + "loss": 0.6931918859481812, + "loss_ce": 0.2824862599372864, + "loss_iou": 0.388671875, + "loss_num": 0.08203125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 903664, + "step": 10 + }, + { + "epoch": 0.05057471264367816, + "grad_norm": 80.61281743018304, + "learning_rate": 5e-06, + "loss": 0.8424, + "num_input_tokens_seen": 994196, + "step": 11 + }, + { + "epoch": 0.05057471264367816, + "loss": 0.9065788984298706, + "loss_ce": 0.4577263593673706, + "loss_iou": 0.416015625, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 994196, + "step": 11 + }, + { + "epoch": 0.05517241379310345, + "grad_norm": 21.38207584365465, + "learning_rate": 5e-06, + "loss": 0.6596, + "num_input_tokens_seen": 1084712, + "step": 12 + }, + { + "epoch": 0.05517241379310345, + "loss": 0.5851885080337524, + "loss_ce": 0.21836715936660767, + "loss_iou": 0.43359375, + "loss_num": 0.0732421875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 1084712, + "step": 12 + }, + { + "epoch": 0.059770114942528735, + "grad_norm": 27.964832638511165, + "learning_rate": 5e-06, + "loss": 0.5867, + "num_input_tokens_seen": 1175040, + "step": 13 + }, + { + "epoch": 0.059770114942528735, + "loss": 0.5128318071365356, + "loss_ce": 0.13587866723537445, + "loss_iou": 0.421875, + "loss_num": 0.0751953125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 1175040, + "step": 13 + }, + { + "epoch": 0.06436781609195402, + "grad_norm": 15.47526861039646, + "learning_rate": 5e-06, + "loss": 0.6604, + "num_input_tokens_seen": 1265560, + "step": 14 + }, + { + "epoch": 0.06436781609195402, + "loss": 0.6039849519729614, + "loss_ce": 0.20707331597805023, + "loss_iou": 0.466796875, + "loss_num": 0.07958984375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 1265560, + "step": 14 + }, + { + "epoch": 0.06896551724137931, + "grad_norm": 8.807051044677067, + "learning_rate": 5e-06, + "loss": 0.6509, + "num_input_tokens_seen": 1355984, + "step": 15 + }, + { + "epoch": 0.06896551724137931, + "loss": 0.6505950689315796, + "loss_ce": 0.1728118658065796, + "loss_iou": 0.376953125, + "loss_num": 0.095703125, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 1355984, + "step": 15 + }, + { + "epoch": 0.0735632183908046, + "grad_norm": 16.39684002573081, + "learning_rate": 5e-06, + "loss": 0.5639, + "num_input_tokens_seen": 1446400, + "step": 16 + }, + { + "epoch": 0.0735632183908046, + "loss": 0.6162420511245728, + "loss_ce": 0.24746768176555634, + "loss_iou": 0.490234375, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 1446400, + "step": 16 + }, + { + "epoch": 0.07816091954022988, + "grad_norm": 19.846643198159168, + "learning_rate": 5e-06, + "loss": 0.6321, + "num_input_tokens_seen": 1536776, + "step": 17 + }, + { + "epoch": 0.07816091954022988, + "loss": 0.6417566537857056, + "loss_ce": 0.18911993503570557, + "loss_iou": 0.44140625, + "loss_num": 0.0908203125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 1536776, + "step": 17 + }, + { + "epoch": 0.08275862068965517, + "grad_norm": 28.522543896242876, + "learning_rate": 5e-06, + "loss": 0.6082, + "num_input_tokens_seen": 1627260, + "step": 18 + }, + { + "epoch": 0.08275862068965517, + "loss": 0.5867501497268677, + "loss_ce": 0.10921109467744827, + "loss_iou": 0.35546875, + "loss_num": 0.095703125, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 1627260, + "step": 18 + }, + { + "epoch": 0.08735632183908046, + "grad_norm": 37.06692415264521, + "learning_rate": 5e-06, + "loss": 0.6748, + "num_input_tokens_seen": 1717724, + "step": 19 + }, + { + "epoch": 0.08735632183908046, + "loss": 0.691688060760498, + "loss_ce": 0.22208356857299805, + "loss_iou": 0.400390625, + "loss_num": 0.09375, + "loss_xval": 0.46875, + "num_input_tokens_seen": 1717724, + "step": 19 + }, + { + "epoch": 0.09195402298850575, + "grad_norm": 113.58143371326585, + "learning_rate": 5e-06, + "loss": 0.5325, + "num_input_tokens_seen": 1808100, + "step": 20 + }, + { + "epoch": 0.09195402298850575, + "loss": 0.5246105194091797, + "loss_ce": 0.1267833560705185, + "loss_iou": 0.37890625, + "loss_num": 0.07958984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 1808100, + "step": 20 + }, + { + "epoch": 0.09655172413793103, + "grad_norm": 45.45186371478247, + "learning_rate": 5e-06, + "loss": 0.672, + "num_input_tokens_seen": 1898532, + "step": 21 + }, + { + "epoch": 0.09655172413793103, + "loss": 0.6351579427719116, + "loss_ce": 0.06411299854516983, + "loss_iou": 0.50390625, + "loss_num": 0.1142578125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 1898532, + "step": 21 + }, + { + "epoch": 0.10114942528735632, + "grad_norm": 23.89941149750651, + "learning_rate": 5e-06, + "loss": 0.5832, + "num_input_tokens_seen": 1988852, + "step": 22 + }, + { + "epoch": 0.10114942528735632, + "loss": 0.5167936682701111, + "loss_ce": 0.1544889509677887, + "loss_iou": 0.51953125, + "loss_num": 0.07275390625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 1988852, + "step": 22 + }, + { + "epoch": 0.10574712643678161, + "grad_norm": 10.318774820543899, + "learning_rate": 5e-06, + "loss": 0.6361, + "num_input_tokens_seen": 2079304, + "step": 23 + }, + { + "epoch": 0.10574712643678161, + "loss": 0.5845872759819031, + "loss_ce": 0.07311265170574188, + "loss_iou": 0.423828125, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 2079304, + "step": 23 + }, + { + "epoch": 0.1103448275862069, + "grad_norm": 25.73676580542981, + "learning_rate": 5e-06, + "loss": 0.568, + "num_input_tokens_seen": 2169628, + "step": 24 + }, + { + "epoch": 0.1103448275862069, + "loss": 0.5678774118423462, + "loss_ce": 0.06470361351966858, + "loss_iou": 0.52734375, + "loss_num": 0.1005859375, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 2169628, + "step": 24 + }, + { + "epoch": 0.11494252873563218, + "grad_norm": 81.51890029791515, + "learning_rate": 5e-06, + "loss": 0.5453, + "num_input_tokens_seen": 2259984, + "step": 25 + }, + { + "epoch": 0.11494252873563218, + "loss": 0.5036299824714661, + "loss_ce": 0.05953817814588547, + "loss_iou": 0.435546875, + "loss_num": 0.0888671875, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 2259984, + "step": 25 + }, + { + "epoch": 0.11954022988505747, + "grad_norm": 27.04495599462074, + "learning_rate": 5e-06, + "loss": 0.5524, + "num_input_tokens_seen": 2350292, + "step": 26 + }, + { + "epoch": 0.11954022988505747, + "loss": 0.6328690052032471, + "loss_ce": 0.08086705207824707, + "loss_iou": 0.380859375, + "loss_num": 0.1103515625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 2350292, + "step": 26 + }, + { + "epoch": 0.12413793103448276, + "grad_norm": 10.193343220359463, + "learning_rate": 5e-06, + "loss": 0.5916, + "num_input_tokens_seen": 2440640, + "step": 27 + }, + { + "epoch": 0.12413793103448276, + "loss": 0.5718714594841003, + "loss_ce": 0.14609020948410034, + "loss_iou": 0.388671875, + "loss_num": 0.08544921875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 2440640, + "step": 27 + }, + { + "epoch": 0.12873563218390804, + "grad_norm": 19.9427592340637, + "learning_rate": 5e-06, + "loss": 0.581, + "num_input_tokens_seen": 2530228, + "step": 28 + }, + { + "epoch": 0.12873563218390804, + "loss": 0.6976370811462402, + "loss_ce": 0.15210485458374023, + "loss_iou": 0.267578125, + "loss_num": 0.109375, + "loss_xval": 0.546875, + "num_input_tokens_seen": 2530228, + "step": 28 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 22.33962406561534, + "learning_rate": 5e-06, + "loss": 0.5247, + "num_input_tokens_seen": 2619848, + "step": 29 + }, + { + "epoch": 0.13333333333333333, + "loss": 0.570274829864502, + "loss_ce": 0.09835098683834076, + "loss_iou": 0.44921875, + "loss_num": 0.09423828125, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 2619848, + "step": 29 + }, + { + "epoch": 0.13793103448275862, + "grad_norm": 14.599136978515288, + "learning_rate": 5e-06, + "loss": 0.5767, + "num_input_tokens_seen": 2710140, + "step": 30 + }, + { + "epoch": 0.13793103448275862, + "loss": 0.603084146976471, + "loss_ce": 0.09063299000263214, + "loss_iou": 0.427734375, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 2710140, + "step": 30 + }, + { + "epoch": 0.1425287356321839, + "grad_norm": 18.74413636198144, + "learning_rate": 5e-06, + "loss": 0.5167, + "num_input_tokens_seen": 2800536, + "step": 31 + }, + { + "epoch": 0.1425287356321839, + "loss": 0.547565758228302, + "loss_ce": 0.1607249677181244, + "loss_iou": 0.36328125, + "loss_num": 0.0771484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 2800536, + "step": 31 + }, + { + "epoch": 0.1471264367816092, + "grad_norm": 30.495448668876183, + "learning_rate": 5e-06, + "loss": 0.4526, + "num_input_tokens_seen": 2890960, + "step": 32 + }, + { + "epoch": 0.1471264367816092, + "loss": 0.42909184098243713, + "loss_ce": 0.10731448233127594, + "loss_iou": 0.45703125, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 2890960, + "step": 32 + }, + { + "epoch": 0.15172413793103448, + "grad_norm": 82.31686845472512, + "learning_rate": 5e-06, + "loss": 0.5327, + "num_input_tokens_seen": 2979808, + "step": 33 + }, + { + "epoch": 0.15172413793103448, + "loss": 0.4499969482421875, + "loss_ce": 0.008224454708397388, + "loss_iou": 0.408203125, + "loss_num": 0.08837890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 2979808, + "step": 33 + }, + { + "epoch": 0.15632183908045977, + "grad_norm": 10.971908242941907, + "learning_rate": 5e-06, + "loss": 0.4713, + "num_input_tokens_seen": 3070312, + "step": 34 + }, + { + "epoch": 0.15632183908045977, + "loss": 0.3486025929450989, + "loss_ce": 0.06405669450759888, + "loss_iou": 0.466796875, + "loss_num": 0.056884765625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 3070312, + "step": 34 + }, + { + "epoch": 0.16091954022988506, + "grad_norm": 59.48550166875302, + "learning_rate": 5e-06, + "loss": 0.4692, + "num_input_tokens_seen": 3159280, + "step": 35 + }, + { + "epoch": 0.16091954022988506, + "loss": 0.5126940011978149, + "loss_ce": 0.08544795215129852, + "loss_iou": 0.337890625, + "loss_num": 0.08544921875, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 3159280, + "step": 35 + }, + { + "epoch": 0.16551724137931034, + "grad_norm": 60.760015794484936, + "learning_rate": 5e-06, + "loss": 0.4513, + "num_input_tokens_seen": 3249672, + "step": 36 + }, + { + "epoch": 0.16551724137931034, + "loss": 0.4749954044818878, + "loss_ce": 0.11220243573188782, + "loss_iou": 0.490234375, + "loss_num": 0.072265625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 3249672, + "step": 36 + }, + { + "epoch": 0.17011494252873563, + "grad_norm": 24.0106917483809, + "learning_rate": 5e-06, + "loss": 0.481, + "num_input_tokens_seen": 3339988, + "step": 37 + }, + { + "epoch": 0.17011494252873563, + "loss": 0.5111528635025024, + "loss_ce": 0.13212455809116364, + "loss_iou": 0.5078125, + "loss_num": 0.07568359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 3339988, + "step": 37 + }, + { + "epoch": 0.17471264367816092, + "grad_norm": 30.5271253034538, + "learning_rate": 5e-06, + "loss": 0.5096, + "num_input_tokens_seen": 3430444, + "step": 38 + }, + { + "epoch": 0.17471264367816092, + "loss": 0.4569355845451355, + "loss_ce": 0.03676954656839371, + "loss_iou": 0.5234375, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 3430444, + "step": 38 + }, + { + "epoch": 0.1793103448275862, + "grad_norm": 8.31708311536423, + "learning_rate": 5e-06, + "loss": 0.417, + "num_input_tokens_seen": 3519396, + "step": 39 + }, + { + "epoch": 0.1793103448275862, + "loss": 0.4267890453338623, + "loss_ce": 0.0754707008600235, + "loss_iou": 0.337890625, + "loss_num": 0.0703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 3519396, + "step": 39 + }, + { + "epoch": 0.1839080459770115, + "grad_norm": 20.02750098475025, + "learning_rate": 5e-06, + "loss": 0.3488, + "num_input_tokens_seen": 3609736, + "step": 40 + }, + { + "epoch": 0.1839080459770115, + "loss": 0.3892139196395874, + "loss_ce": 0.047050852328538895, + "loss_iou": 0.46484375, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 3609736, + "step": 40 + }, + { + "epoch": 0.18850574712643678, + "grad_norm": 32.78061699301825, + "learning_rate": 5e-06, + "loss": 0.4256, + "num_input_tokens_seen": 3700148, + "step": 41 + }, + { + "epoch": 0.18850574712643678, + "loss": 0.3672151267528534, + "loss_ce": 0.013943652622401714, + "loss_iou": 0.44921875, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 3700148, + "step": 41 + }, + { + "epoch": 0.19310344827586207, + "grad_norm": 9.890488159206065, + "learning_rate": 5e-06, + "loss": 0.4023, + "num_input_tokens_seen": 3790612, + "step": 42 + }, + { + "epoch": 0.19310344827586207, + "loss": 0.42205893993377686, + "loss_ce": 0.034973956644535065, + "loss_iou": 0.5625, + "loss_num": 0.07763671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 3790612, + "step": 42 + }, + { + "epoch": 0.19770114942528735, + "grad_norm": 24.69833771237315, + "learning_rate": 5e-06, + "loss": 0.5508, + "num_input_tokens_seen": 3880836, + "step": 43 + }, + { + "epoch": 0.19770114942528735, + "loss": 0.5896004438400269, + "loss_ce": 0.20044030249118805, + "loss_iou": 0.431640625, + "loss_num": 0.078125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 3880836, + "step": 43 + }, + { + "epoch": 0.20229885057471264, + "grad_norm": 16.669851454492647, + "learning_rate": 5e-06, + "loss": 0.4632, + "num_input_tokens_seen": 3971176, + "step": 44 + }, + { + "epoch": 0.20229885057471264, + "loss": 0.4887722134590149, + "loss_ce": 0.0513942651450634, + "loss_iou": 0.474609375, + "loss_num": 0.08740234375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 3971176, + "step": 44 + }, + { + "epoch": 0.20689655172413793, + "grad_norm": 18.6471842377231, + "learning_rate": 5e-06, + "loss": 0.4398, + "num_input_tokens_seen": 4061524, + "step": 45 + }, + { + "epoch": 0.20689655172413793, + "loss": 0.3761554956436157, + "loss_ce": 0.028621304780244827, + "loss_iou": 0.384765625, + "loss_num": 0.0693359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 4061524, + "step": 45 + }, + { + "epoch": 0.21149425287356322, + "grad_norm": 28.00486282610174, + "learning_rate": 5e-06, + "loss": 0.3932, + "num_input_tokens_seen": 4151828, + "step": 46 + }, + { + "epoch": 0.21149425287356322, + "loss": 0.3925134539604187, + "loss_ce": 0.04986211284995079, + "loss_iou": 0.48828125, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 4151828, + "step": 46 + }, + { + "epoch": 0.2160919540229885, + "grad_norm": 24.306371497861868, + "learning_rate": 5e-06, + "loss": 0.4599, + "num_input_tokens_seen": 4241372, + "step": 47 + }, + { + "epoch": 0.2160919540229885, + "loss": 0.49177491664886475, + "loss_ce": 0.07002198696136475, + "loss_iou": 0.3671875, + "loss_num": 0.08447265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 4241372, + "step": 47 + }, + { + "epoch": 0.2206896551724138, + "grad_norm": 6.774260279559146, + "learning_rate": 5e-06, + "loss": 0.414, + "num_input_tokens_seen": 4331848, + "step": 48 + }, + { + "epoch": 0.2206896551724138, + "loss": 0.3134571313858032, + "loss_ce": 0.009929286316037178, + "loss_iou": 0.439453125, + "loss_num": 0.060791015625, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 4331848, + "step": 48 + }, + { + "epoch": 0.22528735632183908, + "grad_norm": 7.185186363794483, + "learning_rate": 5e-06, + "loss": 0.3401, + "num_input_tokens_seen": 4422192, + "step": 49 + }, + { + "epoch": 0.22528735632183908, + "loss": 0.34919464588165283, + "loss_ce": 0.04316438362002373, + "loss_iou": 0.4140625, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 4422192, + "step": 49 + }, + { + "epoch": 0.22988505747126436, + "grad_norm": 13.427495770074591, + "learning_rate": 5e-06, + "loss": 0.3785, + "num_input_tokens_seen": 4512568, + "step": 50 + }, + { + "epoch": 0.22988505747126436, + "loss": 0.3789505362510681, + "loss_ce": 0.001997418701648712, + "loss_iou": 0.3984375, + "loss_num": 0.0751953125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 4512568, + "step": 50 + }, + { + "epoch": 0.23448275862068965, + "grad_norm": 13.227669876358155, + "learning_rate": 5e-06, + "loss": 0.4474, + "num_input_tokens_seen": 4602984, + "step": 51 + }, + { + "epoch": 0.23448275862068965, + "loss": 0.5125839710235596, + "loss_ce": 0.09559179842472076, + "loss_iou": 0.314453125, + "loss_num": 0.08349609375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 4602984, + "step": 51 + }, + { + "epoch": 0.23908045977011494, + "grad_norm": 23.649689887557532, + "learning_rate": 5e-06, + "loss": 0.3993, + "num_input_tokens_seen": 4693356, + "step": 52 + }, + { + "epoch": 0.23908045977011494, + "loss": 0.3423450291156769, + "loss_ce": 0.011290331371128559, + "loss_iou": 0.4609375, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 4693356, + "step": 52 + }, + { + "epoch": 0.24367816091954023, + "grad_norm": 29.218941891807887, + "learning_rate": 5e-06, + "loss": 0.4166, + "num_input_tokens_seen": 4783796, + "step": 53 + }, + { + "epoch": 0.24367816091954023, + "loss": 0.4327765107154846, + "loss_ce": 0.06705383956432343, + "loss_iou": 0.37890625, + "loss_num": 0.0732421875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 4783796, + "step": 53 + }, + { + "epoch": 0.2482758620689655, + "grad_norm": 12.83340723694157, + "learning_rate": 5e-06, + "loss": 0.4025, + "num_input_tokens_seen": 4874232, + "step": 54 + }, + { + "epoch": 0.2482758620689655, + "loss": 0.3716029226779938, + "loss_ce": 0.03688611835241318, + "loss_iou": 0.50390625, + "loss_num": 0.06689453125, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 4874232, + "step": 54 + }, + { + "epoch": 0.25287356321839083, + "grad_norm": 8.220941129491107, + "learning_rate": 5e-06, + "loss": 0.4638, + "num_input_tokens_seen": 4963396, + "step": 55 + }, + { + "epoch": 0.25287356321839083, + "loss": 0.5566645860671997, + "loss_ce": 0.040917523205280304, + "loss_iou": 0.421875, + "loss_num": 0.10302734375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 4963396, + "step": 55 + }, + { + "epoch": 0.2574712643678161, + "grad_norm": 13.288705604735872, + "learning_rate": 5e-06, + "loss": 0.3137, + "num_input_tokens_seen": 5053628, + "step": 56 + }, + { + "epoch": 0.2574712643678161, + "loss": 0.3751257061958313, + "loss_ce": 0.020999712869524956, + "loss_iou": 0.462890625, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 5053628, + "step": 56 + }, + { + "epoch": 0.2620689655172414, + "grad_norm": 39.13223161382933, + "learning_rate": 5e-06, + "loss": 0.4695, + "num_input_tokens_seen": 5144036, + "step": 57 + }, + { + "epoch": 0.2620689655172414, + "loss": 0.5425558686256409, + "loss_ce": 0.023757044225931168, + "loss_iou": 0.34765625, + "loss_num": 0.103515625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 5144036, + "step": 57 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 15.33841968913398, + "learning_rate": 5e-06, + "loss": 0.3458, + "num_input_tokens_seen": 5234456, + "step": 58 + }, + { + "epoch": 0.26666666666666666, + "loss": 0.38235118985176086, + "loss_ce": 0.03310802951455116, + "loss_iou": 0.412109375, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 5234456, + "step": 58 + }, + { + "epoch": 0.271264367816092, + "grad_norm": 26.180906675160497, + "learning_rate": 5e-06, + "loss": 0.3917, + "num_input_tokens_seen": 5324744, + "step": 59 + }, + { + "epoch": 0.271264367816092, + "loss": 0.43390488624572754, + "loss_ce": 0.039861951023340225, + "loss_iou": 0.47265625, + "loss_num": 0.07861328125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 5324744, + "step": 59 + }, + { + "epoch": 0.27586206896551724, + "grad_norm": 9.816205473208605, + "learning_rate": 5e-06, + "loss": 0.3208, + "num_input_tokens_seen": 5415040, + "step": 60 + }, + { + "epoch": 0.27586206896551724, + "loss": 0.244655042886734, + "loss_ce": 0.01015797071158886, + "loss_iou": 0.423828125, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 5415040, + "step": 60 + }, + { + "epoch": 0.28045977011494255, + "grad_norm": 15.328830837517142, + "learning_rate": 5e-06, + "loss": 0.4713, + "num_input_tokens_seen": 5505400, + "step": 61 + }, + { + "epoch": 0.28045977011494255, + "loss": 0.3520286977291107, + "loss_ce": 0.0024193418212234974, + "loss_iou": 0.37109375, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 5505400, + "step": 61 + }, + { + "epoch": 0.2850574712643678, + "grad_norm": 25.32547400452274, + "learning_rate": 5e-06, + "loss": 0.367, + "num_input_tokens_seen": 5595868, + "step": 62 + }, + { + "epoch": 0.2850574712643678, + "loss": 0.39265066385269165, + "loss_ce": 0.026805955916643143, + "loss_iou": 0.322265625, + "loss_num": 0.0732421875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 5595868, + "step": 62 + }, + { + "epoch": 0.2896551724137931, + "grad_norm": 8.379792746028329, + "learning_rate": 5e-06, + "loss": 0.3734, + "num_input_tokens_seen": 5686176, + "step": 63 + }, + { + "epoch": 0.2896551724137931, + "loss": 0.413876473903656, + "loss_ce": 0.0571870356798172, + "loss_iou": 0.42578125, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 5686176, + "step": 63 + }, + { + "epoch": 0.2942528735632184, + "grad_norm": 9.587938754950969, + "learning_rate": 5e-06, + "loss": 0.431, + "num_input_tokens_seen": 5776472, + "step": 64 + }, + { + "epoch": 0.2942528735632184, + "loss": 0.45100903511047363, + "loss_ce": 0.017171159386634827, + "loss_iou": 0.408203125, + "loss_num": 0.0869140625, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 5776472, + "step": 64 + }, + { + "epoch": 0.2988505747126437, + "grad_norm": 6.157907021347152, + "learning_rate": 5e-06, + "loss": 0.3847, + "num_input_tokens_seen": 5866900, + "step": 65 + }, + { + "epoch": 0.2988505747126437, + "loss": 0.3870934844017029, + "loss_ce": 0.006722383201122284, + "loss_iou": 0.3984375, + "loss_num": 0.076171875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 5866900, + "step": 65 + }, + { + "epoch": 0.30344827586206896, + "grad_norm": 9.170883238290122, + "learning_rate": 5e-06, + "loss": 0.3444, + "num_input_tokens_seen": 5957224, + "step": 66 + }, + { + "epoch": 0.30344827586206896, + "loss": 0.3455989360809326, + "loss_ce": 0.0012385793961584568, + "loss_iou": 0.48828125, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 5957224, + "step": 66 + }, + { + "epoch": 0.3080459770114943, + "grad_norm": 27.234249102852328, + "learning_rate": 5e-06, + "loss": 0.3401, + "num_input_tokens_seen": 6047500, + "step": 67 + }, + { + "epoch": 0.3080459770114943, + "loss": 0.3533991575241089, + "loss_ce": 0.007085674442350864, + "loss_iou": 0.423828125, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 6047500, + "step": 67 + }, + { + "epoch": 0.31264367816091954, + "grad_norm": 47.75785601894446, + "learning_rate": 5e-06, + "loss": 0.3646, + "num_input_tokens_seen": 6137912, + "step": 68 + }, + { + "epoch": 0.31264367816091954, + "loss": 0.3522525429725647, + "loss_ce": 0.019244728609919548, + "loss_iou": 0.45703125, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 6137912, + "step": 68 + }, + { + "epoch": 0.31724137931034485, + "grad_norm": 6.987555924094217, + "learning_rate": 5e-06, + "loss": 0.3722, + "num_input_tokens_seen": 6228264, + "step": 69 + }, + { + "epoch": 0.31724137931034485, + "loss": 0.3856116533279419, + "loss_ce": 0.011588208377361298, + "loss_iou": 0.53125, + "loss_num": 0.07470703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 6228264, + "step": 69 + }, + { + "epoch": 0.3218390804597701, + "grad_norm": 5.498036338797732, + "learning_rate": 5e-06, + "loss": 0.3389, + "num_input_tokens_seen": 6318588, + "step": 70 + }, + { + "epoch": 0.3218390804597701, + "loss": 0.31558507680892944, + "loss_ce": 0.0037564674858003855, + "loss_iou": 0.33984375, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 6318588, + "step": 70 + }, + { + "epoch": 0.3264367816091954, + "grad_norm": 8.031546032564327, + "learning_rate": 5e-06, + "loss": 0.3435, + "num_input_tokens_seen": 6409052, + "step": 71 + }, + { + "epoch": 0.3264367816091954, + "loss": 0.25542575120925903, + "loss_ce": 0.016717255115509033, + "loss_iou": 0.423828125, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 6409052, + "step": 71 + }, + { + "epoch": 0.3310344827586207, + "grad_norm": 6.547993935141339, + "learning_rate": 5e-06, + "loss": 0.4004, + "num_input_tokens_seen": 6499328, + "step": 72 + }, + { + "epoch": 0.3310344827586207, + "loss": 0.5037937164306641, + "loss_ce": 0.02991676703095436, + "loss_iou": 0.375, + "loss_num": 0.0947265625, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 6499328, + "step": 72 + }, + { + "epoch": 0.335632183908046, + "grad_norm": 68.22686102549073, + "learning_rate": 5e-06, + "loss": 0.4303, + "num_input_tokens_seen": 6589628, + "step": 73 + }, + { + "epoch": 0.335632183908046, + "loss": 0.39546552300453186, + "loss_ce": 0.03560223802924156, + "loss_iou": 0.40625, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 6589628, + "step": 73 + }, + { + "epoch": 0.34022988505747126, + "grad_norm": 13.503622709112387, + "learning_rate": 5e-06, + "loss": 0.345, + "num_input_tokens_seen": 6680112, + "step": 74 + }, + { + "epoch": 0.34022988505747126, + "loss": 0.3707137703895569, + "loss_ce": 0.03233487531542778, + "loss_iou": 0.37109375, + "loss_num": 0.06787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 6680112, + "step": 74 + }, + { + "epoch": 0.3448275862068966, + "grad_norm": 13.855537520348836, + "learning_rate": 5e-06, + "loss": 0.3428, + "num_input_tokens_seen": 6770428, + "step": 75 + }, + { + "epoch": 0.3448275862068966, + "loss": 0.34209129214286804, + "loss_ce": 0.017384245991706848, + "loss_iou": 0.4765625, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 6770428, + "step": 75 + }, + { + "epoch": 0.34942528735632183, + "grad_norm": 23.91656100675138, + "learning_rate": 5e-06, + "loss": 0.395, + "num_input_tokens_seen": 6860876, + "step": 76 + }, + { + "epoch": 0.34942528735632183, + "loss": 0.3706984221935272, + "loss_ce": 0.04159684479236603, + "loss_iou": 0.44140625, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 6860876, + "step": 76 + }, + { + "epoch": 0.35402298850574715, + "grad_norm": 12.200721545193472, + "learning_rate": 5e-06, + "loss": 0.3466, + "num_input_tokens_seen": 6950396, + "step": 77 + }, + { + "epoch": 0.35402298850574715, + "loss": 0.4722903072834015, + "loss_ce": 0.05505400151014328, + "loss_iou": 0.384765625, + "loss_num": 0.08349609375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 6950396, + "step": 77 + }, + { + "epoch": 0.3586206896551724, + "grad_norm": 8.19258423406812, + "learning_rate": 5e-06, + "loss": 0.3716, + "num_input_tokens_seen": 7040716, + "step": 78 + }, + { + "epoch": 0.3586206896551724, + "loss": 0.37182319164276123, + "loss_ce": 0.010861298069357872, + "loss_iou": 0.3046875, + "loss_num": 0.072265625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 7040716, + "step": 78 + }, + { + "epoch": 0.3632183908045977, + "grad_norm": 18.895525833487042, + "learning_rate": 5e-06, + "loss": 0.3798, + "num_input_tokens_seen": 7131028, + "step": 79 + }, + { + "epoch": 0.3632183908045977, + "loss": 0.2786239981651306, + "loss_ce": 0.0010361107997596264, + "loss_iou": 0.46875, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 7131028, + "step": 79 + }, + { + "epoch": 0.367816091954023, + "grad_norm": 14.693361209406406, + "learning_rate": 5e-06, + "loss": 0.33, + "num_input_tokens_seen": 7221608, + "step": 80 + }, + { + "epoch": 0.367816091954023, + "loss": 0.33968040347099304, + "loss_ce": 0.019367896020412445, + "loss_iou": 0.44921875, + "loss_num": 0.06396484375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 7221608, + "step": 80 + }, + { + "epoch": 0.3724137931034483, + "grad_norm": 20.35099683349881, + "learning_rate": 5e-06, + "loss": 0.391, + "num_input_tokens_seen": 7311916, + "step": 81 + }, + { + "epoch": 0.3724137931034483, + "loss": 0.451404333114624, + "loss_ce": 0.04014945402741432, + "loss_iou": 0.4765625, + "loss_num": 0.08203125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 7311916, + "step": 81 + }, + { + "epoch": 0.37701149425287356, + "grad_norm": 9.233297779847412, + "learning_rate": 5e-06, + "loss": 0.3335, + "num_input_tokens_seen": 7402256, + "step": 82 + }, + { + "epoch": 0.37701149425287356, + "loss": 0.3448345959186554, + "loss_ce": 0.0010846068616956472, + "loss_iou": 0.46484375, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 7402256, + "step": 82 + }, + { + "epoch": 0.3816091954022989, + "grad_norm": 2.170529749870518, + "learning_rate": 5e-06, + "loss": 0.206, + "num_input_tokens_seen": 7492744, + "step": 83 + }, + { + "epoch": 0.3816091954022989, + "loss": 0.19931316375732422, + "loss_ce": 0.002306930720806122, + "loss_iou": 0.44140625, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 7492744, + "step": 83 + }, + { + "epoch": 0.38620689655172413, + "grad_norm": 5.8890959300403, + "learning_rate": 5e-06, + "loss": 0.3027, + "num_input_tokens_seen": 7583004, + "step": 84 + }, + { + "epoch": 0.38620689655172413, + "loss": 0.3211175799369812, + "loss_ce": 0.01050965953618288, + "loss_iou": 0.4765625, + "loss_num": 0.062255859375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 7583004, + "step": 84 + }, + { + "epoch": 0.39080459770114945, + "grad_norm": 13.034484868845816, + "learning_rate": 5e-06, + "loss": 0.3487, + "num_input_tokens_seen": 7673256, + "step": 85 + }, + { + "epoch": 0.39080459770114945, + "loss": 0.36519187688827515, + "loss_ce": 0.003131319535896182, + "loss_iou": 0.404296875, + "loss_num": 0.072265625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 7673256, + "step": 85 + }, + { + "epoch": 0.3954022988505747, + "grad_norm": 13.756069652716036, + "learning_rate": 5e-06, + "loss": 0.335, + "num_input_tokens_seen": 7763604, + "step": 86 + }, + { + "epoch": 0.3954022988505747, + "loss": 0.37784039974212646, + "loss_ce": 0.018831633031368256, + "loss_iou": 0.39453125, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 7763604, + "step": 86 + }, + { + "epoch": 0.4, + "grad_norm": 15.575683221755641, + "learning_rate": 5e-06, + "loss": 0.3935, + "num_input_tokens_seen": 7853108, + "step": 87 + }, + { + "epoch": 0.4, + "loss": 0.36000126600265503, + "loss_ce": 0.04499881714582443, + "loss_iou": 0.435546875, + "loss_num": 0.06298828125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 7853108, + "step": 87 + }, + { + "epoch": 0.4045977011494253, + "grad_norm": 25.9232530151757, + "learning_rate": 5e-06, + "loss": 0.3719, + "num_input_tokens_seen": 7943488, + "step": 88 + }, + { + "epoch": 0.4045977011494253, + "loss": 0.42258501052856445, + "loss_ce": 0.03049515187740326, + "loss_iou": 0.5078125, + "loss_num": 0.07861328125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 7943488, + "step": 88 + }, + { + "epoch": 0.4091954022988506, + "grad_norm": 13.656470567398008, + "learning_rate": 5e-06, + "loss": 0.3132, + "num_input_tokens_seen": 8033868, + "step": 89 + }, + { + "epoch": 0.4091954022988506, + "loss": 0.2531309425830841, + "loss_ce": 0.0003233220777474344, + "loss_iou": 0.484375, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 8033868, + "step": 89 + }, + { + "epoch": 0.41379310344827586, + "grad_norm": 16.547729090720072, + "learning_rate": 5e-06, + "loss": 0.318, + "num_input_tokens_seen": 8124172, + "step": 90 + }, + { + "epoch": 0.41379310344827586, + "loss": 0.30939018726348877, + "loss_ce": 0.001162638422101736, + "loss_iou": 0.515625, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 8124172, + "step": 90 + }, + { + "epoch": 0.41839080459770117, + "grad_norm": 15.99038307829354, + "learning_rate": 5e-06, + "loss": 0.3983, + "num_input_tokens_seen": 8213772, + "step": 91 + }, + { + "epoch": 0.41839080459770117, + "loss": 0.5176781415939331, + "loss_ce": 0.1472862958908081, + "loss_iou": 0.3046875, + "loss_num": 0.07421875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 8213772, + "step": 91 + }, + { + "epoch": 0.42298850574712643, + "grad_norm": 17.254415632547147, + "learning_rate": 5e-06, + "loss": 0.4036, + "num_input_tokens_seen": 8304136, + "step": 92 + }, + { + "epoch": 0.42298850574712643, + "loss": 0.44501999020576477, + "loss_ce": 0.025098130106925964, + "loss_iou": 0.39453125, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 8304136, + "step": 92 + }, + { + "epoch": 0.42758620689655175, + "grad_norm": 22.369663390664186, + "learning_rate": 5e-06, + "loss": 0.3815, + "num_input_tokens_seen": 8394520, + "step": 93 + }, + { + "epoch": 0.42758620689655175, + "loss": 0.3709450364112854, + "loss_ce": 0.0024147892836481333, + "loss_iou": 0.470703125, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 8394520, + "step": 93 + }, + { + "epoch": 0.432183908045977, + "grad_norm": 7.546128838220428, + "learning_rate": 5e-06, + "loss": 0.3913, + "num_input_tokens_seen": 8484876, + "step": 94 + }, + { + "epoch": 0.432183908045977, + "loss": 0.4254246652126312, + "loss_ce": 0.01648910902440548, + "loss_iou": 0.48828125, + "loss_num": 0.08154296875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 8484876, + "step": 94 + }, + { + "epoch": 0.4367816091954023, + "grad_norm": 11.464537758049973, + "learning_rate": 5e-06, + "loss": 0.3445, + "num_input_tokens_seen": 8575288, + "step": 95 + }, + { + "epoch": 0.4367816091954023, + "loss": 0.4096546173095703, + "loss_ce": 0.011278166435658932, + "loss_iou": 0.396484375, + "loss_num": 0.07958984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 8575288, + "step": 95 + }, + { + "epoch": 0.4413793103448276, + "grad_norm": 14.34926027066774, + "learning_rate": 5e-06, + "loss": 0.3164, + "num_input_tokens_seen": 8665812, + "step": 96 + }, + { + "epoch": 0.4413793103448276, + "loss": 0.3848586082458496, + "loss_ce": 0.016572486609220505, + "loss_iou": 0.359375, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 8665812, + "step": 96 + }, + { + "epoch": 0.4459770114942529, + "grad_norm": 9.735611757167003, + "learning_rate": 5e-06, + "loss": 0.3679, + "num_input_tokens_seen": 8756136, + "step": 97 + }, + { + "epoch": 0.4459770114942529, + "loss": 0.39260411262512207, + "loss_ce": 0.046046510338783264, + "loss_iou": 0.44921875, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 8756136, + "step": 97 + }, + { + "epoch": 0.45057471264367815, + "grad_norm": 8.954384165667761, + "learning_rate": 5e-06, + "loss": 0.2693, + "num_input_tokens_seen": 8846560, + "step": 98 + }, + { + "epoch": 0.45057471264367815, + "loss": 0.2520179748535156, + "loss_ce": 0.0018959222361445427, + "loss_iou": 0.5078125, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 8846560, + "step": 98 + }, + { + "epoch": 0.45517241379310347, + "grad_norm": 4.271408735001314, + "learning_rate": 5e-06, + "loss": 0.2796, + "num_input_tokens_seen": 8936096, + "step": 99 + }, + { + "epoch": 0.45517241379310347, + "loss": 0.2937536835670471, + "loss_ce": 0.01689821295440197, + "loss_iou": 0.376953125, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 8936096, + "step": 99 + }, + { + "epoch": 0.45977011494252873, + "grad_norm": 45.09395035520587, + "learning_rate": 5e-06, + "loss": 0.341, + "num_input_tokens_seen": 9026468, + "step": 100 + }, + { + "epoch": 0.45977011494252873, + "loss": 0.3114800751209259, + "loss_ce": 0.009722266346216202, + "loss_iou": 0.396484375, + "loss_num": 0.060302734375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 9026468, + "step": 100 + }, + { + "epoch": 0.46436781609195404, + "grad_norm": 19.359219772759506, + "learning_rate": 5e-06, + "loss": 0.341, + "num_input_tokens_seen": 9116736, + "step": 101 + }, + { + "epoch": 0.46436781609195404, + "loss": 0.3214173913002014, + "loss_ce": 0.003912519197911024, + "loss_iou": 0.474609375, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 9116736, + "step": 101 + }, + { + "epoch": 0.4689655172413793, + "grad_norm": 6.534059918097954, + "learning_rate": 5e-06, + "loss": 0.3168, + "num_input_tokens_seen": 9207040, + "step": 102 + }, + { + "epoch": 0.4689655172413793, + "loss": 0.3264657258987427, + "loss_ce": 0.02580653503537178, + "loss_iou": 0.451171875, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 9207040, + "step": 102 + }, + { + "epoch": 0.4735632183908046, + "grad_norm": 7.393393813109138, + "learning_rate": 5e-06, + "loss": 0.359, + "num_input_tokens_seen": 9297316, + "step": 103 + }, + { + "epoch": 0.4735632183908046, + "loss": 0.3772013187408447, + "loss_ce": 0.01831459254026413, + "loss_iou": 0.5078125, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 9297316, + "step": 103 + }, + { + "epoch": 0.4781609195402299, + "grad_norm": 41.31290787104306, + "learning_rate": 5e-06, + "loss": 0.3541, + "num_input_tokens_seen": 9387640, + "step": 104 + }, + { + "epoch": 0.4781609195402299, + "loss": 0.32786232233047485, + "loss_ce": 0.0027890922501683235, + "loss_iou": 0.44140625, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 9387640, + "step": 104 + }, + { + "epoch": 0.4827586206896552, + "grad_norm": 5.6085929794757, + "learning_rate": 5e-06, + "loss": 0.3398, + "num_input_tokens_seen": 9477976, + "step": 105 + }, + { + "epoch": 0.4827586206896552, + "loss": 0.3234696090221405, + "loss_ce": 0.02805946208536625, + "loss_iou": 0.41796875, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 9477976, + "step": 105 + }, + { + "epoch": 0.48735632183908045, + "grad_norm": 20.069977336539267, + "learning_rate": 5e-06, + "loss": 0.4092, + "num_input_tokens_seen": 9568252, + "step": 106 + }, + { + "epoch": 0.48735632183908045, + "loss": 0.3579648733139038, + "loss_ce": 0.017388703301548958, + "loss_iou": 0.4296875, + "loss_num": 0.068359375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 9568252, + "step": 106 + }, + { + "epoch": 0.49195402298850577, + "grad_norm": 69.89835804812493, + "learning_rate": 5e-06, + "loss": 0.4106, + "num_input_tokens_seen": 9657112, + "step": 107 + }, + { + "epoch": 0.49195402298850577, + "loss": 0.38876211643218994, + "loss_ce": 0.019133206456899643, + "loss_iou": 0.451171875, + "loss_num": 0.07421875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 9657112, + "step": 107 + }, + { + "epoch": 0.496551724137931, + "grad_norm": 6.848175278978215, + "learning_rate": 5e-06, + "loss": 0.2565, + "num_input_tokens_seen": 9747548, + "step": 108 + }, + { + "epoch": 0.496551724137931, + "loss": 0.21308478713035583, + "loss_ce": 0.00849493220448494, + "loss_iou": 0.423828125, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 9747548, + "step": 108 + }, + { + "epoch": 0.5011494252873563, + "grad_norm": 11.091512135924047, + "learning_rate": 5e-06, + "loss": 0.3075, + "num_input_tokens_seen": 9837824, + "step": 109 + }, + { + "epoch": 0.5011494252873563, + "loss": 0.3376855254173279, + "loss_ce": 0.01969236694276333, + "loss_iou": 0.455078125, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 9837824, + "step": 109 + }, + { + "epoch": 0.5057471264367817, + "grad_norm": 8.760189132358677, + "learning_rate": 5e-06, + "loss": 0.2375, + "num_input_tokens_seen": 9928152, + "step": 110 + }, + { + "epoch": 0.5057471264367817, + "loss": 0.26674026250839233, + "loss_ce": 0.002091821748763323, + "loss_iou": 0.443359375, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 9928152, + "step": 110 + }, + { + "epoch": 0.5103448275862069, + "grad_norm": 8.592645738220657, + "learning_rate": 5e-06, + "loss": 0.3073, + "num_input_tokens_seen": 10017708, + "step": 111 + }, + { + "epoch": 0.5103448275862069, + "loss": 0.29403021931648254, + "loss_ce": 0.005700131878256798, + "loss_iou": 0.435546875, + "loss_num": 0.0576171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 10017708, + "step": 111 + }, + { + "epoch": 0.5149425287356322, + "grad_norm": 13.883237322149405, + "learning_rate": 5e-06, + "loss": 0.2906, + "num_input_tokens_seen": 10108008, + "step": 112 + }, + { + "epoch": 0.5149425287356322, + "loss": 0.22113177180290222, + "loss_ce": 0.011414967477321625, + "loss_iou": 0.44921875, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 10108008, + "step": 112 + }, + { + "epoch": 0.5195402298850574, + "grad_norm": 44.804966100254894, + "learning_rate": 5e-06, + "loss": 0.3082, + "num_input_tokens_seen": 10198460, + "step": 113 + }, + { + "epoch": 0.5195402298850574, + "loss": 0.26152580976486206, + "loss_ce": 0.0007836385047994554, + "loss_iou": 0.44140625, + "loss_num": 0.052001953125, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 10198460, + "step": 113 + }, + { + "epoch": 0.5241379310344828, + "grad_norm": 11.857004672937288, + "learning_rate": 5e-06, + "loss": 0.3335, + "num_input_tokens_seen": 10288856, + "step": 114 + }, + { + "epoch": 0.5241379310344828, + "loss": 0.3588264584541321, + "loss_ce": 0.039612580090761185, + "loss_iou": 0.4609375, + "loss_num": 0.06396484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 10288856, + "step": 114 + }, + { + "epoch": 0.5287356321839081, + "grad_norm": 12.190324307933235, + "learning_rate": 5e-06, + "loss": 0.2532, + "num_input_tokens_seen": 10379180, + "step": 115 + }, + { + "epoch": 0.5287356321839081, + "loss": 0.2677502930164337, + "loss_ce": 0.008900204673409462, + "loss_iou": 0.44140625, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 10379180, + "step": 115 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 5.20934661237588, + "learning_rate": 5e-06, + "loss": 0.3062, + "num_input_tokens_seen": 10468788, + "step": 116 + }, + { + "epoch": 0.5333333333333333, + "loss": 0.3348138928413391, + "loss_ce": 0.0026605918537825346, + "loss_iou": 0.40625, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 10468788, + "step": 116 + }, + { + "epoch": 0.5379310344827586, + "grad_norm": 102.83862228180482, + "learning_rate": 5e-06, + "loss": 0.2783, + "num_input_tokens_seen": 10559244, + "step": 117 + }, + { + "epoch": 0.5379310344827586, + "loss": 0.2607054114341736, + "loss_ce": 0.005578459706157446, + "loss_iou": 0.453125, + "loss_num": 0.051025390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 10559244, + "step": 117 + }, + { + "epoch": 0.542528735632184, + "grad_norm": 11.560087419415964, + "learning_rate": 5e-06, + "loss": 0.2611, + "num_input_tokens_seen": 10649640, + "step": 118 + }, + { + "epoch": 0.542528735632184, + "loss": 0.2891330122947693, + "loss_ce": 0.017282411456108093, + "loss_iou": 0.458984375, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 10649640, + "step": 118 + }, + { + "epoch": 0.5471264367816092, + "grad_norm": 6.8172804181644775, + "learning_rate": 5e-06, + "loss": 0.281, + "num_input_tokens_seen": 10740080, + "step": 119 + }, + { + "epoch": 0.5471264367816092, + "loss": 0.2734673321247101, + "loss_ce": 0.002349165268242359, + "loss_iou": 0.51171875, + "loss_num": 0.05419921875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 10740080, + "step": 119 + }, + { + "epoch": 0.5517241379310345, + "grad_norm": 15.572102183682414, + "learning_rate": 5e-06, + "loss": 0.3282, + "num_input_tokens_seen": 10830516, + "step": 120 + }, + { + "epoch": 0.5517241379310345, + "loss": 0.37165123224258423, + "loss_ce": 0.0017782035283744335, + "loss_iou": 0.478515625, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 10830516, + "step": 120 + }, + { + "epoch": 0.5563218390804597, + "grad_norm": 9.896381717450371, + "learning_rate": 5e-06, + "loss": 0.2514, + "num_input_tokens_seen": 10920860, + "step": 121 + }, + { + "epoch": 0.5563218390804597, + "loss": 0.24367234110832214, + "loss_ce": 0.0051469625905156136, + "loss_iou": 0.4765625, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 10920860, + "step": 121 + }, + { + "epoch": 0.5609195402298851, + "grad_norm": 8.57733919240062, + "learning_rate": 5e-06, + "loss": 0.2907, + "num_input_tokens_seen": 11011300, + "step": 122 + }, + { + "epoch": 0.5609195402298851, + "loss": 0.30107301473617554, + "loss_ce": 0.005418718792498112, + "loss_iou": 0.51953125, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 11011300, + "step": 122 + }, + { + "epoch": 0.5655172413793104, + "grad_norm": 19.20663757306324, + "learning_rate": 5e-06, + "loss": 0.208, + "num_input_tokens_seen": 11101728, + "step": 123 + }, + { + "epoch": 0.5655172413793104, + "loss": 0.1830858439207077, + "loss_ce": 0.006968907080590725, + "loss_iou": 0.515625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 11101728, + "step": 123 + }, + { + "epoch": 0.5701149425287356, + "grad_norm": 15.412744082634788, + "learning_rate": 5e-06, + "loss": 0.3385, + "num_input_tokens_seen": 11192036, + "step": 124 + }, + { + "epoch": 0.5701149425287356, + "loss": 0.36701270937919617, + "loss_ce": 0.0038535220082849264, + "loss_iou": 0.435546875, + "loss_num": 0.07275390625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 11192036, + "step": 124 + }, + { + "epoch": 0.5747126436781609, + "grad_norm": 56.494382183302136, + "learning_rate": 5e-06, + "loss": 0.3616, + "num_input_tokens_seen": 11282432, + "step": 125 + }, + { + "epoch": 0.5747126436781609, + "loss": 0.4529553949832916, + "loss_ce": 0.010572599247097969, + "loss_iou": 0.349609375, + "loss_num": 0.08837890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 11282432, + "step": 125 + }, + { + "epoch": 0.5793103448275863, + "grad_norm": 10.883196608124821, + "learning_rate": 5e-06, + "loss": 0.2749, + "num_input_tokens_seen": 11372852, + "step": 126 + }, + { + "epoch": 0.5793103448275863, + "loss": 0.21984298527240753, + "loss_ce": 0.0012150609400123358, + "loss_iou": 0.421875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 11372852, + "step": 126 + }, + { + "epoch": 0.5839080459770115, + "grad_norm": 13.409455091165006, + "learning_rate": 5e-06, + "loss": 0.2527, + "num_input_tokens_seen": 11463312, + "step": 127 + }, + { + "epoch": 0.5839080459770115, + "loss": 0.21884769201278687, + "loss_ce": 0.008154332637786865, + "loss_iou": 0.453125, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 11463312, + "step": 127 + }, + { + "epoch": 0.5885057471264368, + "grad_norm": 66.84081956875737, + "learning_rate": 5e-06, + "loss": 0.3699, + "num_input_tokens_seen": 11553556, + "step": 128 + }, + { + "epoch": 0.5885057471264368, + "loss": 0.3832904100418091, + "loss_ce": 0.004384158179163933, + "loss_iou": 0.451171875, + "loss_num": 0.07568359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 11553556, + "step": 128 + }, + { + "epoch": 0.593103448275862, + "grad_norm": 12.161700433268052, + "learning_rate": 5e-06, + "loss": 0.3168, + "num_input_tokens_seen": 11643936, + "step": 129 + }, + { + "epoch": 0.593103448275862, + "loss": 0.29100292921066284, + "loss_ce": 0.004259749781340361, + "loss_iou": 0.419921875, + "loss_num": 0.057373046875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 11643936, + "step": 129 + }, + { + "epoch": 0.5977011494252874, + "grad_norm": 29.82460546961223, + "learning_rate": 5e-06, + "loss": 0.4414, + "num_input_tokens_seen": 11734172, + "step": 130 + }, + { + "epoch": 0.5977011494252874, + "loss": 0.43849754333496094, + "loss_ce": 0.007955562323331833, + "loss_iou": 0.302734375, + "loss_num": 0.0859375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 11734172, + "step": 130 + }, + { + "epoch": 0.6022988505747127, + "grad_norm": 9.962930837362645, + "learning_rate": 5e-06, + "loss": 0.3169, + "num_input_tokens_seen": 11824480, + "step": 131 + }, + { + "epoch": 0.6022988505747127, + "loss": 0.28843504190444946, + "loss_ce": 0.01182370726019144, + "loss_iou": 0.384765625, + "loss_num": 0.05517578125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 11824480, + "step": 131 + }, + { + "epoch": 0.6068965517241379, + "grad_norm": 19.583672279804937, + "learning_rate": 5e-06, + "loss": 0.33, + "num_input_tokens_seen": 11914836, + "step": 132 + }, + { + "epoch": 0.6068965517241379, + "loss": 0.43384528160095215, + "loss_ce": 0.0053784484043717384, + "loss_iou": 0.43359375, + "loss_num": 0.08544921875, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 11914836, + "step": 132 + }, + { + "epoch": 0.6114942528735632, + "grad_norm": 25.89247440808492, + "learning_rate": 5e-06, + "loss": 0.2403, + "num_input_tokens_seen": 12005236, + "step": 133 + }, + { + "epoch": 0.6114942528735632, + "loss": 0.23137599229812622, + "loss_ce": 0.003348652273416519, + "loss_iou": 0.427734375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 12005236, + "step": 133 + }, + { + "epoch": 0.6160919540229886, + "grad_norm": 11.009266151622725, + "learning_rate": 5e-06, + "loss": 0.3161, + "num_input_tokens_seen": 12095572, + "step": 134 + }, + { + "epoch": 0.6160919540229886, + "loss": 0.32933273911476135, + "loss_ce": 0.003038788214325905, + "loss_iou": 0.392578125, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 12095572, + "step": 134 + }, + { + "epoch": 0.6206896551724138, + "grad_norm": 4.89493638128289, + "learning_rate": 5e-06, + "loss": 0.2481, + "num_input_tokens_seen": 12185852, + "step": 135 + }, + { + "epoch": 0.6206896551724138, + "loss": 0.17523905634880066, + "loss_ce": 0.0022654307540506124, + "loss_iou": 0.46484375, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 12185852, + "step": 135 + }, + { + "epoch": 0.6252873563218391, + "grad_norm": 9.499104731899472, + "learning_rate": 5e-06, + "loss": 0.2763, + "num_input_tokens_seen": 12276216, + "step": 136 + }, + { + "epoch": 0.6252873563218391, + "loss": 0.24068671464920044, + "loss_ce": 0.010278991423547268, + "loss_iou": 0.45703125, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 12276216, + "step": 136 + }, + { + "epoch": 0.6298850574712643, + "grad_norm": 16.393863320752942, + "learning_rate": 5e-06, + "loss": 0.3345, + "num_input_tokens_seen": 12366584, + "step": 137 + }, + { + "epoch": 0.6298850574712643, + "loss": 0.3390456438064575, + "loss_ce": 0.004023653920739889, + "loss_iou": 0.4140625, + "loss_num": 0.06689453125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 12366584, + "step": 137 + }, + { + "epoch": 0.6344827586206897, + "grad_norm": 12.427779653501139, + "learning_rate": 5e-06, + "loss": 0.2663, + "num_input_tokens_seen": 12457056, + "step": 138 + }, + { + "epoch": 0.6344827586206897, + "loss": 0.2509799599647522, + "loss_ce": 0.004947238601744175, + "loss_iou": 0.53515625, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 12457056, + "step": 138 + }, + { + "epoch": 0.639080459770115, + "grad_norm": 4.879940316834736, + "learning_rate": 5e-06, + "loss": 0.291, + "num_input_tokens_seen": 12547580, + "step": 139 + }, + { + "epoch": 0.639080459770115, + "loss": 0.28111234307289124, + "loss_ce": 0.0024258154444396496, + "loss_iou": 0.44921875, + "loss_num": 0.0556640625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 12547580, + "step": 139 + }, + { + "epoch": 0.6436781609195402, + "grad_norm": 19.612477583005116, + "learning_rate": 5e-06, + "loss": 0.2142, + "num_input_tokens_seen": 12637988, + "step": 140 + }, + { + "epoch": 0.6436781609195402, + "loss": 0.21896348893642426, + "loss_ce": 0.0008238445734605193, + "loss_iou": 0.408203125, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 12637988, + "step": 140 + }, + { + "epoch": 0.6482758620689655, + "grad_norm": 8.925670199142369, + "learning_rate": 5e-06, + "loss": 0.3375, + "num_input_tokens_seen": 12727624, + "step": 141 + }, + { + "epoch": 0.6482758620689655, + "loss": 0.3529576063156128, + "loss_ce": 0.010306272655725479, + "loss_iou": 0.32421875, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 12727624, + "step": 141 + }, + { + "epoch": 0.6528735632183909, + "grad_norm": 12.46478278407722, + "learning_rate": 5e-06, + "loss": 0.2862, + "num_input_tokens_seen": 12818080, + "step": 142 + }, + { + "epoch": 0.6528735632183909, + "loss": 0.2979605793952942, + "loss_ce": 0.011522573418915272, + "loss_iou": 0.41015625, + "loss_num": 0.05712890625, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 12818080, + "step": 142 + }, + { + "epoch": 0.6574712643678161, + "grad_norm": 6.157855338602633, + "learning_rate": 5e-06, + "loss": 0.2823, + "num_input_tokens_seen": 12908420, + "step": 143 + }, + { + "epoch": 0.6574712643678161, + "loss": 0.2000827193260193, + "loss_ce": 0.015390344895422459, + "loss_iou": 0.5, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 12908420, + "step": 143 + }, + { + "epoch": 0.6620689655172414, + "grad_norm": 7.342547102611114, + "learning_rate": 5e-06, + "loss": 0.3044, + "num_input_tokens_seen": 12998800, + "step": 144 + }, + { + "epoch": 0.6620689655172414, + "loss": 0.3009682297706604, + "loss_ce": 0.0017739273607730865, + "loss_iou": 0.470703125, + "loss_num": 0.059814453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 12998800, + "step": 144 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 4.580788750753373, + "learning_rate": 5e-06, + "loss": 0.251, + "num_input_tokens_seen": 13088988, + "step": 145 + }, + { + "epoch": 0.6666666666666666, + "loss": 0.24675819277763367, + "loss_ce": 0.0006034071557223797, + "loss_iou": 0.42578125, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 13088988, + "step": 145 + }, + { + "epoch": 0.671264367816092, + "grad_norm": 36.43993056681854, + "learning_rate": 5e-06, + "loss": 0.3002, + "num_input_tokens_seen": 13179296, + "step": 146 + }, + { + "epoch": 0.671264367816092, + "loss": 0.29325583577156067, + "loss_ce": 0.003033676417544484, + "loss_iou": 0.400390625, + "loss_num": 0.057861328125, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 13179296, + "step": 146 + }, + { + "epoch": 0.6758620689655173, + "grad_norm": 19.476664971027393, + "learning_rate": 5e-06, + "loss": 0.301, + "num_input_tokens_seen": 13269688, + "step": 147 + }, + { + "epoch": 0.6758620689655173, + "loss": 0.27462151646614075, + "loss_ce": 0.014977962709963322, + "loss_iou": 0.390625, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 13269688, + "step": 147 + }, + { + "epoch": 0.6804597701149425, + "grad_norm": 6.680497018215309, + "learning_rate": 5e-06, + "loss": 0.2906, + "num_input_tokens_seen": 13360096, + "step": 148 + }, + { + "epoch": 0.6804597701149425, + "loss": 0.31177955865859985, + "loss_ce": 0.007702387869358063, + "loss_iou": 0.3515625, + "loss_num": 0.060791015625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 13360096, + "step": 148 + }, + { + "epoch": 0.6850574712643678, + "grad_norm": 42.907574919392374, + "learning_rate": 5e-06, + "loss": 0.2479, + "num_input_tokens_seen": 13450548, + "step": 149 + }, + { + "epoch": 0.6850574712643678, + "loss": 0.23805175721645355, + "loss_ce": 0.0016015599248930812, + "loss_iou": 0.4765625, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 13450548, + "step": 149 + }, + { + "epoch": 0.6896551724137931, + "grad_norm": 26.91726277344863, + "learning_rate": 5e-06, + "loss": 0.2976, + "num_input_tokens_seen": 13540784, + "step": 150 + }, + { + "epoch": 0.6896551724137931, + "loss": 0.315399706363678, + "loss_ce": 0.0002141495351679623, + "loss_iou": 0.416015625, + "loss_num": 0.06298828125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 13540784, + "step": 150 + }, + { + "epoch": 0.6942528735632184, + "grad_norm": 14.145343351845199, + "learning_rate": 5e-06, + "loss": 0.3234, + "num_input_tokens_seen": 13630388, + "step": 151 + }, + { + "epoch": 0.6942528735632184, + "loss": 0.29768356680870056, + "loss_ce": 0.0025175553746521473, + "loss_iou": 0.376953125, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 13630388, + "step": 151 + }, + { + "epoch": 0.6988505747126437, + "grad_norm": 6.305485293216934, + "learning_rate": 5e-06, + "loss": 0.34, + "num_input_tokens_seen": 13720696, + "step": 152 + }, + { + "epoch": 0.6988505747126437, + "loss": 0.23166516423225403, + "loss_ce": 0.00021984206978231668, + "loss_iou": 0.375, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 13720696, + "step": 152 + }, + { + "epoch": 0.7034482758620689, + "grad_norm": 7.601656082559478, + "learning_rate": 5e-06, + "loss": 0.2815, + "num_input_tokens_seen": 13811092, + "step": 153 + }, + { + "epoch": 0.7034482758620689, + "loss": 0.32397621870040894, + "loss_ce": 0.010408090427517891, + "loss_iou": 0.5078125, + "loss_num": 0.0625, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 13811092, + "step": 153 + }, + { + "epoch": 0.7080459770114943, + "grad_norm": 9.200935279622126, + "learning_rate": 5e-06, + "loss": 0.2512, + "num_input_tokens_seen": 13900680, + "step": 154 + }, + { + "epoch": 0.7080459770114943, + "loss": 0.1441739946603775, + "loss_ce": 0.0004972339374944568, + "loss_iou": 0.490234375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 13900680, + "step": 154 + }, + { + "epoch": 0.7126436781609196, + "grad_norm": 15.640269655312782, + "learning_rate": 5e-06, + "loss": 0.2777, + "num_input_tokens_seen": 13991164, + "step": 155 + }, + { + "epoch": 0.7126436781609196, + "loss": 0.26036322116851807, + "loss_ce": 0.004870051983743906, + "loss_iou": 0.490234375, + "loss_num": 0.051025390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 13991164, + "step": 155 + }, + { + "epoch": 0.7172413793103448, + "grad_norm": 8.162614855844033, + "learning_rate": 5e-06, + "loss": 0.2297, + "num_input_tokens_seen": 14081500, + "step": 156 + }, + { + "epoch": 0.7172413793103448, + "loss": 0.2218717634677887, + "loss_ce": 0.000680366822052747, + "loss_iou": 0.423828125, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 14081500, + "step": 156 + }, + { + "epoch": 0.7218390804597701, + "grad_norm": 3.320193610069822, + "learning_rate": 5e-06, + "loss": 0.2703, + "num_input_tokens_seen": 14171952, + "step": 157 + }, + { + "epoch": 0.7218390804597701, + "loss": 0.2780265808105469, + "loss_ce": 0.00043868436478078365, + "loss_iou": 0.412109375, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 14171952, + "step": 157 + }, + { + "epoch": 0.7264367816091954, + "grad_norm": 35.54312110471689, + "learning_rate": 5e-06, + "loss": 0.2552, + "num_input_tokens_seen": 14262436, + "step": 158 + }, + { + "epoch": 0.7264367816091954, + "loss": 0.2401730865240097, + "loss_ce": 0.0017697698203846812, + "loss_iou": 0.421875, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 14262436, + "step": 158 + }, + { + "epoch": 0.7310344827586207, + "grad_norm": 7.06971142407152, + "learning_rate": 5e-06, + "loss": 0.2596, + "num_input_tokens_seen": 14352756, + "step": 159 + }, + { + "epoch": 0.7310344827586207, + "loss": 0.20814064145088196, + "loss_ce": 0.00019387324573472142, + "loss_iou": 0.392578125, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 14352756, + "step": 159 + }, + { + "epoch": 0.735632183908046, + "grad_norm": 3.0564053075210946, + "learning_rate": 5e-06, + "loss": 0.2989, + "num_input_tokens_seen": 14443136, + "step": 160 + }, + { + "epoch": 0.735632183908046, + "loss": 0.3017638325691223, + "loss_ce": 0.007879569195210934, + "loss_iou": 0.47265625, + "loss_num": 0.058837890625, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 14443136, + "step": 160 + }, + { + "epoch": 0.7402298850574712, + "grad_norm": 12.412697167878472, + "learning_rate": 5e-06, + "loss": 0.2787, + "num_input_tokens_seen": 14533508, + "step": 161 + }, + { + "epoch": 0.7402298850574712, + "loss": 0.2811751663684845, + "loss_ce": 0.006394892930984497, + "loss_iou": 0.322265625, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 14533508, + "step": 161 + }, + { + "epoch": 0.7448275862068966, + "grad_norm": 7.776759221836541, + "learning_rate": 5e-06, + "loss": 0.2709, + "num_input_tokens_seen": 14623944, + "step": 162 + }, + { + "epoch": 0.7448275862068966, + "loss": 0.27529919147491455, + "loss_ce": 0.004486183635890484, + "loss_iou": 0.46484375, + "loss_num": 0.05419921875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 14623944, + "step": 162 + }, + { + "epoch": 0.7494252873563219, + "grad_norm": 4.690047668781669, + "learning_rate": 5e-06, + "loss": 0.2994, + "num_input_tokens_seen": 14714176, + "step": 163 + }, + { + "epoch": 0.7494252873563219, + "loss": 0.3100579082965851, + "loss_ce": 0.01446463167667389, + "loss_iou": 0.41796875, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 14714176, + "step": 163 + }, + { + "epoch": 0.7540229885057471, + "grad_norm": 14.854640525604244, + "learning_rate": 5e-06, + "loss": 0.3539, + "num_input_tokens_seen": 14804540, + "step": 164 + }, + { + "epoch": 0.7540229885057471, + "loss": 0.44931861758232117, + "loss_ce": 0.01682349294424057, + "loss_iou": 0.390625, + "loss_num": 0.08642578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 14804540, + "step": 164 + }, + { + "epoch": 0.7586206896551724, + "grad_norm": 17.524154236618525, + "learning_rate": 5e-06, + "loss": 0.3198, + "num_input_tokens_seen": 14895024, + "step": 165 + }, + { + "epoch": 0.7586206896551724, + "loss": 0.27517974376678467, + "loss_ce": 0.0019253486534580588, + "loss_iou": 0.37109375, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 14895024, + "step": 165 + }, + { + "epoch": 0.7632183908045977, + "grad_norm": 14.028282838728552, + "learning_rate": 5e-06, + "loss": 0.2588, + "num_input_tokens_seen": 14985468, + "step": 166 + }, + { + "epoch": 0.7632183908045977, + "loss": 0.26632797718048096, + "loss_ce": 0.021088741719722748, + "loss_iou": 0.37890625, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 14985468, + "step": 166 + }, + { + "epoch": 0.767816091954023, + "grad_norm": 13.184310261988879, + "learning_rate": 5e-06, + "loss": 0.3099, + "num_input_tokens_seen": 15075900, + "step": 167 + }, + { + "epoch": 0.767816091954023, + "loss": 0.2652779519557953, + "loss_ce": 0.001728140632621944, + "loss_iou": 0.4375, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 15075900, + "step": 167 + }, + { + "epoch": 0.7724137931034483, + "grad_norm": 14.801504481488628, + "learning_rate": 5e-06, + "loss": 0.2864, + "num_input_tokens_seen": 15166300, + "step": 168 + }, + { + "epoch": 0.7724137931034483, + "loss": 0.2605416774749756, + "loss_ce": 0.012616850435733795, + "loss_iou": 0.408203125, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 15166300, + "step": 168 + }, + { + "epoch": 0.7770114942528735, + "grad_norm": 7.804434978620636, + "learning_rate": 5e-06, + "loss": 0.2893, + "num_input_tokens_seen": 15256556, + "step": 169 + }, + { + "epoch": 0.7770114942528735, + "loss": 0.17921343445777893, + "loss_ce": 0.003676328808069229, + "loss_iou": 0.5390625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 15256556, + "step": 169 + }, + { + "epoch": 0.7816091954022989, + "grad_norm": 7.905968772421947, + "learning_rate": 5e-06, + "loss": 0.2628, + "num_input_tokens_seen": 15346920, + "step": 170 + }, + { + "epoch": 0.7816091954022989, + "loss": 0.26719164848327637, + "loss_ce": 0.0010783508187159896, + "loss_iou": 0.453125, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 15346920, + "step": 170 + }, + { + "epoch": 0.7862068965517242, + "grad_norm": 5.672105961071837, + "learning_rate": 5e-06, + "loss": 0.2997, + "num_input_tokens_seen": 15437236, + "step": 171 + }, + { + "epoch": 0.7862068965517242, + "loss": 0.30997809767723083, + "loss_ce": 0.0011402069358155131, + "loss_iou": 0.462890625, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 15437236, + "step": 171 + }, + { + "epoch": 0.7908045977011494, + "grad_norm": 31.647759717380147, + "learning_rate": 5e-06, + "loss": 0.3455, + "num_input_tokens_seen": 15527680, + "step": 172 + }, + { + "epoch": 0.7908045977011494, + "loss": 0.4378126859664917, + "loss_ce": 0.006202578078955412, + "loss_iou": 0.408203125, + "loss_num": 0.08642578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 15527680, + "step": 172 + }, + { + "epoch": 0.7954022988505747, + "grad_norm": 9.751745462208614, + "learning_rate": 5e-06, + "loss": 0.3265, + "num_input_tokens_seen": 15618124, + "step": 173 + }, + { + "epoch": 0.7954022988505747, + "loss": 0.4681752622127533, + "loss_ce": 0.1417897641658783, + "loss_iou": 0.482421875, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 15618124, + "step": 173 + }, + { + "epoch": 0.8, + "grad_norm": 11.98242525805576, + "learning_rate": 5e-06, + "loss": 0.303, + "num_input_tokens_seen": 15708484, + "step": 174 + }, + { + "epoch": 0.8, + "loss": 0.30590367317199707, + "loss_ce": 0.0006058230064809322, + "loss_iou": 0.427734375, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 15708484, + "step": 174 + }, + { + "epoch": 0.8045977011494253, + "grad_norm": 10.897026680920138, + "learning_rate": 5e-06, + "loss": 0.2461, + "num_input_tokens_seen": 15798884, + "step": 175 + }, + { + "epoch": 0.8045977011494253, + "loss": 0.20932422578334808, + "loss_ce": 0.0005229614907875657, + "loss_iou": 0.515625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 15798884, + "step": 175 + }, + { + "epoch": 0.8091954022988506, + "grad_norm": 8.597962137572907, + "learning_rate": 5e-06, + "loss": 0.2656, + "num_input_tokens_seen": 15889316, + "step": 176 + }, + { + "epoch": 0.8091954022988506, + "loss": 0.2625455856323242, + "loss_ce": 0.0009489042568020523, + "loss_iou": 0.453125, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 15889316, + "step": 176 + }, + { + "epoch": 0.8137931034482758, + "grad_norm": 11.519278966897533, + "learning_rate": 5e-06, + "loss": 0.2704, + "num_input_tokens_seen": 15979592, + "step": 177 + }, + { + "epoch": 0.8137931034482758, + "loss": 0.2799646854400635, + "loss_ce": 0.000789885874837637, + "loss_iou": 0.50390625, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 15979592, + "step": 177 + }, + { + "epoch": 0.8183908045977012, + "grad_norm": 53.5897696229877, + "learning_rate": 5e-06, + "loss": 0.3061, + "num_input_tokens_seen": 16066948, + "step": 178 + }, + { + "epoch": 0.8183908045977012, + "loss": 0.3006019592285156, + "loss_ce": 0.00037002595490776, + "loss_iou": 0.447265625, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 16066948, + "step": 178 + }, + { + "epoch": 0.8229885057471265, + "grad_norm": 8.142699994347275, + "learning_rate": 5e-06, + "loss": 0.2444, + "num_input_tokens_seen": 16157256, + "step": 179 + }, + { + "epoch": 0.8229885057471265, + "loss": 0.21453739702701569, + "loss_ce": 0.00024295142793562263, + "loss_iou": 0.484375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 16157256, + "step": 179 + }, + { + "epoch": 0.8275862068965517, + "grad_norm": 8.723664846573774, + "learning_rate": 5e-06, + "loss": 0.2936, + "num_input_tokens_seen": 16247552, + "step": 180 + }, + { + "epoch": 0.8275862068965517, + "loss": 0.30998989939689636, + "loss_ce": 0.008781395852565765, + "loss_iou": 0.44140625, + "loss_num": 0.060302734375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 16247552, + "step": 180 + }, + { + "epoch": 0.832183908045977, + "grad_norm": 31.640092388654665, + "learning_rate": 5e-06, + "loss": 0.2839, + "num_input_tokens_seen": 16337132, + "step": 181 + }, + { + "epoch": 0.832183908045977, + "loss": 0.2720521092414856, + "loss_ce": 0.006427087355405092, + "loss_iou": 0.3671875, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 16337132, + "step": 181 + }, + { + "epoch": 0.8367816091954023, + "grad_norm": 15.726684290210118, + "learning_rate": 5e-06, + "loss": 0.2782, + "num_input_tokens_seen": 16427532, + "step": 182 + }, + { + "epoch": 0.8367816091954023, + "loss": 0.22582070529460907, + "loss_ce": 0.0006009868229739368, + "loss_iou": 0.400390625, + "loss_num": 0.044921875, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 16427532, + "step": 182 + }, + { + "epoch": 0.8413793103448276, + "grad_norm": 9.413293029313524, + "learning_rate": 5e-06, + "loss": 0.2648, + "num_input_tokens_seen": 16517996, + "step": 183 + }, + { + "epoch": 0.8413793103448276, + "loss": 0.2550521492958069, + "loss_ce": 0.0031600736547261477, + "loss_iou": 0.48046875, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 16517996, + "step": 183 + }, + { + "epoch": 0.8459770114942529, + "grad_norm": 17.034560829091234, + "learning_rate": 5e-06, + "loss": 0.2503, + "num_input_tokens_seen": 16608392, + "step": 184 + }, + { + "epoch": 0.8459770114942529, + "loss": 0.24292418360710144, + "loss_ce": 0.006596065126359463, + "loss_iou": 0.46875, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 16608392, + "step": 184 + }, + { + "epoch": 0.8505747126436781, + "grad_norm": 24.671310704425558, + "learning_rate": 5e-06, + "loss": 0.2523, + "num_input_tokens_seen": 16698852, + "step": 185 + }, + { + "epoch": 0.8505747126436781, + "loss": 0.2351730614900589, + "loss_ce": 0.002690147841349244, + "loss_iou": 0.447265625, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 16698852, + "step": 185 + }, + { + "epoch": 0.8551724137931035, + "grad_norm": 8.258454597087393, + "learning_rate": 5e-06, + "loss": 0.2756, + "num_input_tokens_seen": 16789340, + "step": 186 + }, + { + "epoch": 0.8551724137931035, + "loss": 0.3019639849662781, + "loss_ce": 0.01021592691540718, + "loss_iou": 0.498046875, + "loss_num": 0.058349609375, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 16789340, + "step": 186 + }, + { + "epoch": 0.8597701149425288, + "grad_norm": 4.472141198961523, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 16879736, + "step": 187 + }, + { + "epoch": 0.8597701149425288, + "loss": 0.19520393013954163, + "loss_ce": 0.003004225669428706, + "loss_iou": 0.474609375, + "loss_num": 0.038330078125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 16879736, + "step": 187 + }, + { + "epoch": 0.864367816091954, + "grad_norm": 4.244846244353357, + "learning_rate": 5e-06, + "loss": 0.2684, + "num_input_tokens_seen": 16968620, + "step": 188 + }, + { + "epoch": 0.864367816091954, + "loss": 0.29818519949913025, + "loss_ce": 0.007596845738589764, + "loss_iou": 0.365234375, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 16968620, + "step": 188 + }, + { + "epoch": 0.8689655172413793, + "grad_norm": 17.287226125768598, + "learning_rate": 5e-06, + "loss": 0.2103, + "num_input_tokens_seen": 17058968, + "step": 189 + }, + { + "epoch": 0.8689655172413793, + "loss": 0.23691387474536896, + "loss_ce": 0.00015851360512897372, + "loss_iou": 0.4765625, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 17058968, + "step": 189 + }, + { + "epoch": 0.8735632183908046, + "grad_norm": 11.788400904878815, + "learning_rate": 5e-06, + "loss": 0.2889, + "num_input_tokens_seen": 17149248, + "step": 190 + }, + { + "epoch": 0.8735632183908046, + "loss": 0.24897444248199463, + "loss_ce": 0.00013410599785856903, + "loss_iou": 0.4453125, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 17149248, + "step": 190 + }, + { + "epoch": 0.8781609195402299, + "grad_norm": 13.617723461222461, + "learning_rate": 5e-06, + "loss": 0.2228, + "num_input_tokens_seen": 17239628, + "step": 191 + }, + { + "epoch": 0.8781609195402299, + "loss": 0.23315443098545074, + "loss_ce": 0.005981584079563618, + "loss_iou": 0.388671875, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 17239628, + "step": 191 + }, + { + "epoch": 0.8827586206896552, + "grad_norm": 22.65066129559751, + "learning_rate": 5e-06, + "loss": 0.218, + "num_input_tokens_seen": 17329208, + "step": 192 + }, + { + "epoch": 0.8827586206896552, + "loss": 0.23583698272705078, + "loss_ce": 0.004879951477050781, + "loss_iou": 0.3984375, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 17329208, + "step": 192 + }, + { + "epoch": 0.8873563218390804, + "grad_norm": 21.965810935500244, + "learning_rate": 5e-06, + "loss": 0.2832, + "num_input_tokens_seen": 17419616, + "step": 193 + }, + { + "epoch": 0.8873563218390804, + "loss": 0.2855183780193329, + "loss_ce": 0.004756668582558632, + "loss_iou": 0.458984375, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 17419616, + "step": 193 + }, + { + "epoch": 0.8919540229885058, + "grad_norm": 8.272351331856266, + "learning_rate": 5e-06, + "loss": 0.2343, + "num_input_tokens_seen": 17507752, + "step": 194 + }, + { + "epoch": 0.8919540229885058, + "loss": 0.2399004101753235, + "loss_ce": 0.0027177799493074417, + "loss_iou": 0.451171875, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 17507752, + "step": 194 + }, + { + "epoch": 0.896551724137931, + "grad_norm": 5.04648132108404, + "learning_rate": 5e-06, + "loss": 0.247, + "num_input_tokens_seen": 17595748, + "step": 195 + }, + { + "epoch": 0.896551724137931, + "loss": 0.2049858421087265, + "loss_ce": 0.02108692191541195, + "loss_iou": 0.421875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 17595748, + "step": 195 + }, + { + "epoch": 0.9011494252873563, + "grad_norm": 11.7279486833154, + "learning_rate": 5e-06, + "loss": 0.3365, + "num_input_tokens_seen": 17686040, + "step": 196 + }, + { + "epoch": 0.9011494252873563, + "loss": 0.37410539388656616, + "loss_ce": 0.002889568218961358, + "loss_iou": 0.375, + "loss_num": 0.07421875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 17686040, + "step": 196 + }, + { + "epoch": 0.9057471264367816, + "grad_norm": 9.565742677061259, + "learning_rate": 5e-06, + "loss": 0.293, + "num_input_tokens_seen": 17776348, + "step": 197 + }, + { + "epoch": 0.9057471264367816, + "loss": 0.3795655369758606, + "loss_ce": 0.0005372293526306748, + "loss_iou": 0.361328125, + "loss_num": 0.07568359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 17776348, + "step": 197 + }, + { + "epoch": 0.9103448275862069, + "grad_norm": 5.3718887593573275, + "learning_rate": 5e-06, + "loss": 0.245, + "num_input_tokens_seen": 17866780, + "step": 198 + }, + { + "epoch": 0.9103448275862069, + "loss": 0.23624387383460999, + "loss_ce": 0.004615448880940676, + "loss_iou": 0.515625, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 17866780, + "step": 198 + }, + { + "epoch": 0.9149425287356322, + "grad_norm": 10.445265942368462, + "learning_rate": 5e-06, + "loss": 0.3073, + "num_input_tokens_seen": 17957092, + "step": 199 + }, + { + "epoch": 0.9149425287356322, + "loss": 0.4080125093460083, + "loss_ce": 0.0026475111953914165, + "loss_iou": 0.3984375, + "loss_num": 0.0810546875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 17957092, + "step": 199 + }, + { + "epoch": 0.9195402298850575, + "grad_norm": 13.36367583066565, + "learning_rate": 5e-06, + "loss": 0.2221, + "num_input_tokens_seen": 18047500, + "step": 200 + }, + { + "epoch": 0.9195402298850575, + "loss": 0.18272346258163452, + "loss_ce": 0.0006555922445841134, + "loss_iou": 0.421875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 18047500, + "step": 200 + }, + { + "epoch": 0.9241379310344827, + "grad_norm": 7.033010833292816, + "learning_rate": 5e-06, + "loss": 0.3052, + "num_input_tokens_seen": 18137988, + "step": 201 + }, + { + "epoch": 0.9241379310344827, + "loss": 0.37599673867225647, + "loss_ce": 0.007222320418804884, + "loss_iou": 0.39453125, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 18137988, + "step": 201 + }, + { + "epoch": 0.9287356321839081, + "grad_norm": 3.214939505030853, + "learning_rate": 5e-06, + "loss": 0.2099, + "num_input_tokens_seen": 18228316, + "step": 202 + }, + { + "epoch": 0.9287356321839081, + "loss": 0.2917129397392273, + "loss_ce": 0.0024673594161868095, + "loss_iou": 0.447265625, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 18228316, + "step": 202 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 47.44674206476791, + "learning_rate": 5e-06, + "loss": 0.2881, + "num_input_tokens_seen": 18318712, + "step": 203 + }, + { + "epoch": 0.9333333333333333, + "loss": 0.27268558740615845, + "loss_ce": 0.0027881115674972534, + "loss_iou": 0.369140625, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 18318712, + "step": 203 + }, + { + "epoch": 0.9379310344827586, + "grad_norm": 15.635747866161521, + "learning_rate": 5e-06, + "loss": 0.2178, + "num_input_tokens_seen": 18409128, + "step": 204 + }, + { + "epoch": 0.9379310344827586, + "loss": 0.21751438081264496, + "loss_ce": 0.009933818131685257, + "loss_iou": 0.3828125, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 18409128, + "step": 204 + }, + { + "epoch": 0.9425287356321839, + "grad_norm": 14.842085686479773, + "learning_rate": 5e-06, + "loss": 0.2294, + "num_input_tokens_seen": 18499512, + "step": 205 + }, + { + "epoch": 0.9425287356321839, + "loss": 0.2684970498085022, + "loss_ce": 0.005435529164969921, + "loss_iou": 0.41015625, + "loss_num": 0.052490234375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 18499512, + "step": 205 + }, + { + "epoch": 0.9471264367816092, + "grad_norm": 10.75647435799341, + "learning_rate": 5e-06, + "loss": 0.2631, + "num_input_tokens_seen": 18589756, + "step": 206 + }, + { + "epoch": 0.9471264367816092, + "loss": 0.3199033737182617, + "loss_ce": 0.0008725962834432721, + "loss_iou": 0.4296875, + "loss_num": 0.06396484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 18589756, + "step": 206 + }, + { + "epoch": 0.9517241379310345, + "grad_norm": 10.014451793263762, + "learning_rate": 5e-06, + "loss": 0.3506, + "num_input_tokens_seen": 18680168, + "step": 207 + }, + { + "epoch": 0.9517241379310345, + "loss": 0.41780880093574524, + "loss_ce": 0.004295613616704941, + "loss_iou": 0.5078125, + "loss_num": 0.0830078125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 18680168, + "step": 207 + }, + { + "epoch": 0.9563218390804598, + "grad_norm": 81.22703034283288, + "learning_rate": 5e-06, + "loss": 0.2683, + "num_input_tokens_seen": 18768984, + "step": 208 + }, + { + "epoch": 0.9563218390804598, + "loss": 0.22123399376869202, + "loss_ce": 0.00040880130836740136, + "loss_iou": 0.451171875, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 18768984, + "step": 208 + }, + { + "epoch": 0.960919540229885, + "grad_norm": 18.16487091628093, + "learning_rate": 5e-06, + "loss": 0.2017, + "num_input_tokens_seen": 18859384, + "step": 209 + }, + { + "epoch": 0.960919540229885, + "loss": 0.1849987506866455, + "loss_ce": 0.00650144275277853, + "loss_iou": 0.419921875, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 18859384, + "step": 209 + }, + { + "epoch": 0.9655172413793104, + "grad_norm": 6.997216906550849, + "learning_rate": 5e-06, + "loss": 0.3129, + "num_input_tokens_seen": 18949688, + "step": 210 + }, + { + "epoch": 0.9655172413793104, + "loss": 0.38702672719955444, + "loss_ce": 0.0003079898888245225, + "loss_iou": 0.49609375, + "loss_num": 0.0771484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 18949688, + "step": 210 + }, + { + "epoch": 0.9701149425287356, + "grad_norm": 7.51470392722959, + "learning_rate": 5e-06, + "loss": 0.2452, + "num_input_tokens_seen": 19039948, + "step": 211 + }, + { + "epoch": 0.9701149425287356, + "loss": 0.30642956495285034, + "loss_ce": 0.0001551464811200276, + "loss_iou": 0.408203125, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 19039948, + "step": 211 + }, + { + "epoch": 0.9747126436781609, + "grad_norm": 5.659981606083217, + "learning_rate": 5e-06, + "loss": 0.2912, + "num_input_tokens_seen": 19130216, + "step": 212 + }, + { + "epoch": 0.9747126436781609, + "loss": 0.28307363390922546, + "loss_ce": 0.0021898397244513035, + "loss_iou": 0.458984375, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 19130216, + "step": 212 + }, + { + "epoch": 0.9793103448275862, + "grad_norm": 15.905977611839273, + "learning_rate": 5e-06, + "loss": 0.2863, + "num_input_tokens_seen": 19220568, + "step": 213 + }, + { + "epoch": 0.9793103448275862, + "loss": 0.39371466636657715, + "loss_ce": 0.0007703077862970531, + "loss_iou": 0.44140625, + "loss_num": 0.07861328125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 19220568, + "step": 213 + }, + { + "epoch": 0.9839080459770115, + "grad_norm": 6.117701022668606, + "learning_rate": 5e-06, + "loss": 0.2468, + "num_input_tokens_seen": 19311004, + "step": 214 + }, + { + "epoch": 0.9839080459770115, + "loss": 0.2368617057800293, + "loss_ce": 0.001754299970343709, + "loss_iou": 0.3984375, + "loss_num": 0.046875, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 19311004, + "step": 214 + }, + { + "epoch": 0.9885057471264368, + "grad_norm": 13.736398962240743, + "learning_rate": 5e-06, + "loss": 0.2513, + "num_input_tokens_seen": 19401384, + "step": 215 + }, + { + "epoch": 0.9885057471264368, + "loss": 0.24910268187522888, + "loss_ce": 0.004153335001319647, + "loss_iou": 0.44140625, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 19401384, + "step": 215 + }, + { + "epoch": 0.993103448275862, + "grad_norm": 5.219663028608165, + "learning_rate": 5e-06, + "loss": 0.2163, + "num_input_tokens_seen": 19491860, + "step": 216 + }, + { + "epoch": 0.993103448275862, + "loss": 0.2344357669353485, + "loss_ce": 0.0007016360759735107, + "loss_iou": 0.486328125, + "loss_num": 0.046875, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 19491860, + "step": 216 + }, + { + "epoch": 0.9977011494252873, + "grad_norm": 9.37488702763018, + "learning_rate": 5e-06, + "loss": 0.1707, + "num_input_tokens_seen": 19582284, + "step": 217 + }, + { + "epoch": 0.9977011494252873, + "loss": 0.14838698506355286, + "loss_ce": 0.00046829067287035286, + "loss_iou": 0.51171875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 19582284, + "step": 217 + }, + { + "epoch": 0.9977011494252873, + "loss": 0.24481603503227234, + "loss_ce": 0.001377313630655408, + "loss_iou": 0.41015625, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 19627492, + "step": 217 + }, + { + "epoch": 1.0022988505747126, + "grad_norm": 13.179380791300144, + "learning_rate": 5e-06, + "loss": 0.237, + "num_input_tokens_seen": 19672672, + "step": 218 + }, + { + "epoch": 1.0022988505747126, + "loss": 0.22918623685836792, + "loss_ce": 0.0003043994656763971, + "loss_iou": 0.4140625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 19672672, + "step": 218 + }, + { + "epoch": 1.006896551724138, + "grad_norm": 11.575549644295782, + "learning_rate": 5e-06, + "loss": 0.1865, + "num_input_tokens_seen": 19762928, + "step": 219 + }, + { + "epoch": 1.006896551724138, + "loss": 0.20134694874286652, + "loss_ce": 5.299979602568783e-05, + "loss_iou": 0.52734375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 19762928, + "step": 219 + }, + { + "epoch": 1.0114942528735633, + "grad_norm": 7.770217563711411, + "learning_rate": 5e-06, + "loss": 0.2716, + "num_input_tokens_seen": 19853264, + "step": 220 + }, + { + "epoch": 1.0114942528735633, + "loss": 0.23982341587543488, + "loss_ce": 0.0010538852075114846, + "loss_iou": 0.484375, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 19853264, + "step": 220 + }, + { + "epoch": 1.0160919540229885, + "grad_norm": 13.318468212320983, + "learning_rate": 5e-06, + "loss": 0.2188, + "num_input_tokens_seen": 19942828, + "step": 221 + }, + { + "epoch": 1.0160919540229885, + "loss": 0.19837090373039246, + "loss_ce": 0.000189754442544654, + "loss_iou": 0.412109375, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 19942828, + "step": 221 + }, + { + "epoch": 1.0206896551724138, + "grad_norm": 4.7677670902469735, + "learning_rate": 5e-06, + "loss": 0.1753, + "num_input_tokens_seen": 20033268, + "step": 222 + }, + { + "epoch": 1.0206896551724138, + "loss": 0.23340031504631042, + "loss_ce": 0.01013370230793953, + "loss_iou": 0.46484375, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 20033268, + "step": 222 + }, + { + "epoch": 1.025287356321839, + "grad_norm": 7.294150569078434, + "learning_rate": 5e-06, + "loss": 0.2063, + "num_input_tokens_seen": 20123744, + "step": 223 + }, + { + "epoch": 1.025287356321839, + "loss": 0.16170556843280792, + "loss_ce": 0.00222071073949337, + "loss_iou": 0.44921875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 20123744, + "step": 223 + }, + { + "epoch": 1.0298850574712644, + "grad_norm": 13.473703879708541, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 20214216, + "step": 224 + }, + { + "epoch": 1.0298850574712644, + "loss": 0.12107338011264801, + "loss_ce": 0.0005899769021198153, + "loss_iou": 0.45703125, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 20214216, + "step": 224 + }, + { + "epoch": 1.0344827586206897, + "grad_norm": 16.323294537818267, + "learning_rate": 5e-06, + "loss": 0.2242, + "num_input_tokens_seen": 20304620, + "step": 225 + }, + { + "epoch": 1.0344827586206897, + "loss": 0.2116052806377411, + "loss_ce": 0.002529359422624111, + "loss_iou": 0.361328125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 20304620, + "step": 225 + }, + { + "epoch": 1.0390804597701149, + "grad_norm": 5.045052986814076, + "learning_rate": 5e-06, + "loss": 0.2254, + "num_input_tokens_seen": 20394932, + "step": 226 + }, + { + "epoch": 1.0390804597701149, + "loss": 0.19622498750686646, + "loss_ce": 0.0004242146678734571, + "loss_iou": 0.494140625, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 20394932, + "step": 226 + }, + { + "epoch": 1.0436781609195402, + "grad_norm": 10.85002200148262, + "learning_rate": 5e-06, + "loss": 0.2326, + "num_input_tokens_seen": 20485364, + "step": 227 + }, + { + "epoch": 1.0436781609195402, + "loss": 0.16513219475746155, + "loss_ce": 0.0007034791633486748, + "loss_iou": 0.4453125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 20485364, + "step": 227 + }, + { + "epoch": 1.0482758620689656, + "grad_norm": 10.36785476657075, + "learning_rate": 5e-06, + "loss": 0.189, + "num_input_tokens_seen": 20575780, + "step": 228 + }, + { + "epoch": 1.0482758620689656, + "loss": 0.1746249496936798, + "loss_ce": 0.0005526923341676593, + "loss_iou": 0.3828125, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 20575780, + "step": 228 + }, + { + "epoch": 1.0528735632183908, + "grad_norm": 11.993359880118176, + "learning_rate": 5e-06, + "loss": 0.2183, + "num_input_tokens_seen": 20666132, + "step": 229 + }, + { + "epoch": 1.0528735632183908, + "loss": 0.24332331120967865, + "loss_ce": 0.001074782107025385, + "loss_iou": 0.498046875, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 20666132, + "step": 229 + }, + { + "epoch": 1.0574712643678161, + "grad_norm": 21.11605854531589, + "learning_rate": 5e-06, + "loss": 0.2293, + "num_input_tokens_seen": 20756592, + "step": 230 + }, + { + "epoch": 1.0574712643678161, + "loss": 0.21172873675823212, + "loss_ce": 0.005735091865062714, + "loss_iou": 0.51953125, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 20756592, + "step": 230 + }, + { + "epoch": 1.0620689655172413, + "grad_norm": 13.639736851512211, + "learning_rate": 5e-06, + "loss": 0.1664, + "num_input_tokens_seen": 20846888, + "step": 231 + }, + { + "epoch": 1.0620689655172413, + "loss": 0.1343887746334076, + "loss_ce": 0.010151727125048637, + "loss_iou": 0.53515625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 20846888, + "step": 231 + }, + { + "epoch": 1.0666666666666667, + "grad_norm": 4.8416225246295195, + "learning_rate": 5e-06, + "loss": 0.2231, + "num_input_tokens_seen": 20937220, + "step": 232 + }, + { + "epoch": 1.0666666666666667, + "loss": 0.19790634512901306, + "loss_ce": 0.00024397407833021134, + "loss_iou": 0.341796875, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 20937220, + "step": 232 + }, + { + "epoch": 1.071264367816092, + "grad_norm": 8.735510518442098, + "learning_rate": 5e-06, + "loss": 0.2111, + "num_input_tokens_seen": 21027540, + "step": 233 + }, + { + "epoch": 1.071264367816092, + "loss": 0.2129209041595459, + "loss_ce": 0.014556641690433025, + "loss_iou": 0.5234375, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 21027540, + "step": 233 + }, + { + "epoch": 1.0758620689655172, + "grad_norm": 64.22232050337796, + "learning_rate": 5e-06, + "loss": 0.243, + "num_input_tokens_seen": 21117908, + "step": 234 + }, + { + "epoch": 1.0758620689655172, + "loss": 0.19178733229637146, + "loss_ce": 0.000259012304013595, + "loss_iou": 0.482421875, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 21117908, + "step": 234 + }, + { + "epoch": 1.0804597701149425, + "grad_norm": 29.01468538309071, + "learning_rate": 5e-06, + "loss": 0.2216, + "num_input_tokens_seen": 21208340, + "step": 235 + }, + { + "epoch": 1.0804597701149425, + "loss": 0.22727806866168976, + "loss_ce": 0.0060866596177220345, + "loss_iou": 0.400390625, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 21208340, + "step": 235 + }, + { + "epoch": 1.085057471264368, + "grad_norm": 11.877399551682112, + "learning_rate": 5e-06, + "loss": 0.193, + "num_input_tokens_seen": 21298700, + "step": 236 + }, + { + "epoch": 1.085057471264368, + "loss": 0.2526742219924927, + "loss_ce": 0.00011075517249992117, + "loss_iou": 0.396484375, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 21298700, + "step": 236 + }, + { + "epoch": 1.089655172413793, + "grad_norm": 11.06433974753488, + "learning_rate": 5e-06, + "loss": 0.2513, + "num_input_tokens_seen": 21389112, + "step": 237 + }, + { + "epoch": 1.089655172413793, + "loss": 0.2866198718547821, + "loss_ce": 0.0012194736627861857, + "loss_iou": 0.443359375, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 21389112, + "step": 237 + }, + { + "epoch": 1.0942528735632184, + "grad_norm": 5.522127581505815, + "learning_rate": 5e-06, + "loss": 0.2329, + "num_input_tokens_seen": 21479544, + "step": 238 + }, + { + "epoch": 1.0942528735632184, + "loss": 0.24488475918769836, + "loss_ce": 0.0002558681007940322, + "loss_iou": 0.474609375, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 21479544, + "step": 238 + }, + { + "epoch": 1.0988505747126436, + "grad_norm": 20.5377108131709, + "learning_rate": 5e-06, + "loss": 0.183, + "num_input_tokens_seen": 21569880, + "step": 239 + }, + { + "epoch": 1.0988505747126436, + "loss": 0.17812752723693848, + "loss_ce": 2.6934067136608064e-05, + "loss_iou": 0.482421875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 21569880, + "step": 239 + }, + { + "epoch": 1.103448275862069, + "grad_norm": 12.413266667265427, + "learning_rate": 5e-06, + "loss": 0.2067, + "num_input_tokens_seen": 21660268, + "step": 240 + }, + { + "epoch": 1.103448275862069, + "loss": 0.21825896203517914, + "loss_ce": 0.03881560266017914, + "loss_iou": 0.48828125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 21660268, + "step": 240 + }, + { + "epoch": 1.1080459770114943, + "grad_norm": 3.800385893974569, + "learning_rate": 5e-06, + "loss": 0.2301, + "num_input_tokens_seen": 21750640, + "step": 241 + }, + { + "epoch": 1.1080459770114943, + "loss": 0.2575361132621765, + "loss_ce": 0.000211891092476435, + "loss_iou": 0.482421875, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 21750640, + "step": 241 + }, + { + "epoch": 1.1126436781609195, + "grad_norm": 8.560187437534255, + "learning_rate": 5e-06, + "loss": 0.2571, + "num_input_tokens_seen": 21841068, + "step": 242 + }, + { + "epoch": 1.1126436781609195, + "loss": 0.2956390976905823, + "loss_ce": 0.0056000337935984135, + "loss_iou": 0.38671875, + "loss_num": 0.05810546875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 21841068, + "step": 242 + }, + { + "epoch": 1.1172413793103448, + "grad_norm": 20.63729856829291, + "learning_rate": 5e-06, + "loss": 0.2309, + "num_input_tokens_seen": 21931304, + "step": 243 + }, + { + "epoch": 1.1172413793103448, + "loss": 0.21533158421516418, + "loss_ce": 0.00045730240526609123, + "loss_iou": 0.50390625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 21931304, + "step": 243 + }, + { + "epoch": 1.1218390804597702, + "grad_norm": 25.579280798369926, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 22021568, + "step": 244 + }, + { + "epoch": 1.1218390804597702, + "loss": 0.25474822521209717, + "loss_ce": 0.0018490497022867203, + "loss_iou": 0.4140625, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 22021568, + "step": 244 + }, + { + "epoch": 1.1264367816091954, + "grad_norm": 32.89831549609003, + "learning_rate": 5e-06, + "loss": 0.2793, + "num_input_tokens_seen": 22111816, + "step": 245 + }, + { + "epoch": 1.1264367816091954, + "loss": 0.2946988344192505, + "loss_ce": 0.0027066715992987156, + "loss_iou": 0.3984375, + "loss_num": 0.058349609375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 22111816, + "step": 245 + }, + { + "epoch": 1.1310344827586207, + "grad_norm": 11.404530774167966, + "learning_rate": 5e-06, + "loss": 0.2252, + "num_input_tokens_seen": 22202224, + "step": 246 + }, + { + "epoch": 1.1310344827586207, + "loss": 0.17997342348098755, + "loss_ce": 0.0004079932114109397, + "loss_iou": 0.5, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 22202224, + "step": 246 + }, + { + "epoch": 1.1356321839080459, + "grad_norm": 24.132647296050727, + "learning_rate": 5e-06, + "loss": 0.2185, + "num_input_tokens_seen": 22292492, + "step": 247 + }, + { + "epoch": 1.1356321839080459, + "loss": 0.17757512629032135, + "loss_ce": 0.000573178636841476, + "loss_iou": 0.38671875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 22292492, + "step": 247 + }, + { + "epoch": 1.1402298850574712, + "grad_norm": 7.434676789770062, + "learning_rate": 5e-06, + "loss": 0.2615, + "num_input_tokens_seen": 22382928, + "step": 248 + }, + { + "epoch": 1.1402298850574712, + "loss": 0.27751120924949646, + "loss_ce": 0.010482416488230228, + "loss_iou": 0.35546875, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 22382928, + "step": 248 + }, + { + "epoch": 1.1448275862068966, + "grad_norm": 10.379288916413433, + "learning_rate": 5e-06, + "loss": 0.2081, + "num_input_tokens_seen": 22473304, + "step": 249 + }, + { + "epoch": 1.1448275862068966, + "loss": 0.14225530624389648, + "loss_ce": 0.0012641068315133452, + "loss_iou": 0.46484375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 22473304, + "step": 249 + }, + { + "epoch": 1.1494252873563218, + "grad_norm": 7.839282118755005, + "learning_rate": 5e-06, + "loss": 0.2665, + "num_input_tokens_seen": 22563700, + "step": 250 + }, + { + "epoch": 1.1494252873563218, + "eval_seeclick_CIoU": 0.4888755977153778, + "eval_seeclick_GIoU": 0.47952449321746826, + "eval_seeclick_IoU": 0.5278463363647461, + "eval_seeclick_MAE_all": 0.056948138400912285, + "eval_seeclick_MAE_h": 0.04946732707321644, + "eval_seeclick_MAE_w": 0.09706718474626541, + "eval_seeclick_MAE_x_boxes": 0.09927782043814659, + "eval_seeclick_MAE_y_boxes": 0.046883879229426384, + "eval_seeclick_NUM_probability": 0.9999977946281433, + "eval_seeclick_inside_bbox": 0.8451704680919647, + "eval_seeclick_loss": 0.3242720365524292, + "eval_seeclick_loss_ce": 0.040716785937547684, + "eval_seeclick_loss_iou": 0.467529296875, + "eval_seeclick_loss_num": 0.0596923828125, + "eval_seeclick_loss_xval": 0.29864501953125, + "eval_seeclick_runtime": 75.8262, + "eval_seeclick_samples_per_second": 0.567, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 22563700, + "step": 250 + }, + { + "epoch": 1.1494252873563218, + "eval_icons_CIoU": 0.5218705832958221, + "eval_icons_GIoU": 0.5266680121421814, + "eval_icons_IoU": 0.5595913529396057, + "eval_icons_MAE_all": 0.043722933158278465, + "eval_icons_MAE_h": 0.08452420309185982, + "eval_icons_MAE_w": 0.06406798399984837, + "eval_icons_MAE_x_boxes": 0.05667761527001858, + "eval_icons_MAE_y_boxes": 0.08344150707125664, + "eval_icons_NUM_probability": 0.9999994039535522, + "eval_icons_inside_bbox": 0.7239583432674408, + "eval_icons_loss": 0.2156449854373932, + "eval_icons_loss_ce": 4.674579088259634e-07, + "eval_icons_loss_iou": 0.43145751953125, + "eval_icons_loss_num": 0.04747772216796875, + "eval_icons_loss_xval": 0.237518310546875, + "eval_icons_runtime": 84.5354, + "eval_icons_samples_per_second": 0.591, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 22563700, + "step": 250 + }, + { + "epoch": 1.1494252873563218, + "eval_screenspot_CIoU": 0.30689453333616257, + "eval_screenspot_GIoU": 0.2762495552500089, + "eval_screenspot_IoU": 0.3886234064896901, + "eval_screenspot_MAE_all": 0.10496337960163753, + "eval_screenspot_MAE_h": 0.11666570603847504, + "eval_screenspot_MAE_w": 0.208540049691995, + "eval_screenspot_MAE_x_boxes": 0.18961978455384573, + "eval_screenspot_MAE_y_boxes": 0.1133890226483345, + "eval_screenspot_NUM_probability": 0.9999755620956421, + "eval_screenspot_inside_bbox": 0.6433333357175192, + "eval_screenspot_loss": 0.5237547755241394, + "eval_screenspot_loss_ce": 0.0012614075288486977, + "eval_screenspot_loss_iou": 0.3485921223958333, + "eval_screenspot_loss_num": 0.10777791341145833, + "eval_screenspot_loss_xval": 0.5385335286458334, + "eval_screenspot_runtime": 158.8594, + "eval_screenspot_samples_per_second": 0.56, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 22563700, + "step": 250 + }, + { + "epoch": 1.1494252873563218, + "eval_compot_CIoU": 0.44808267056941986, + "eval_compot_GIoU": 0.4227418601512909, + "eval_compot_IoU": 0.5081988573074341, + "eval_compot_MAE_all": 0.05795424245297909, + "eval_compot_MAE_h": 0.09416088834404945, + "eval_compot_MAE_w": 0.09847152419388294, + "eval_compot_MAE_x_boxes": 0.08727088011801243, + "eval_compot_MAE_y_boxes": 0.09673519805073738, + "eval_compot_NUM_probability": 0.9999892115592957, + "eval_compot_inside_bbox": 0.7604166567325592, + "eval_compot_loss": 0.32618048787117004, + "eval_compot_loss_ce": 0.013961461605504155, + "eval_compot_loss_iou": 0.4866943359375, + "eval_compot_loss_num": 0.0546875, + "eval_compot_loss_xval": 0.273345947265625, + "eval_compot_runtime": 88.276, + "eval_compot_samples_per_second": 0.566, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 22563700, + "step": 250 + }, + { + "epoch": 1.1494252873563218, + "loss": 0.19636714458465576, + "loss_ce": 0.007402303162962198, + "loss_iou": 0.546875, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 22563700, + "step": 250 + }, + { + "epoch": 1.1540229885057471, + "grad_norm": 7.43698227348526, + "learning_rate": 5e-06, + "loss": 0.1928, + "num_input_tokens_seen": 22654036, + "step": 251 + }, + { + "epoch": 1.1540229885057471, + "loss": 0.19022439420223236, + "loss_ce": 0.00022195614292286336, + "loss_iou": 0.5, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 22654036, + "step": 251 + }, + { + "epoch": 1.1586206896551725, + "grad_norm": 7.724863758957877, + "learning_rate": 5e-06, + "loss": 0.235, + "num_input_tokens_seen": 22744428, + "step": 252 + }, + { + "epoch": 1.1586206896551725, + "loss": 0.28669023513793945, + "loss_ce": 0.0006184765952639282, + "loss_iou": 0.375, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 22744428, + "step": 252 + }, + { + "epoch": 1.1632183908045977, + "grad_norm": 52.54527766863086, + "learning_rate": 5e-06, + "loss": 0.2173, + "num_input_tokens_seen": 22834692, + "step": 253 + }, + { + "epoch": 1.1632183908045977, + "loss": 0.1744190752506256, + "loss_ce": 0.0014149189228191972, + "loss_iou": 0.451171875, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 22834692, + "step": 253 + }, + { + "epoch": 1.167816091954023, + "grad_norm": 4.696366017744483, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 22924280, + "step": 254 + }, + { + "epoch": 1.167816091954023, + "loss": 0.12924961745738983, + "loss_ce": 0.0024185676593333483, + "loss_iou": 0.44140625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 22924280, + "step": 254 + }, + { + "epoch": 1.1724137931034484, + "grad_norm": 6.684225949401091, + "learning_rate": 5e-06, + "loss": 0.1772, + "num_input_tokens_seen": 23014664, + "step": 255 + }, + { + "epoch": 1.1724137931034484, + "loss": 0.1868850439786911, + "loss_ce": 0.00048367734416387975, + "loss_iou": 0.5078125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 23014664, + "step": 255 + }, + { + "epoch": 1.1770114942528735, + "grad_norm": 10.115688964897386, + "learning_rate": 5e-06, + "loss": 0.2414, + "num_input_tokens_seen": 23104224, + "step": 256 + }, + { + "epoch": 1.1770114942528735, + "loss": 0.2677534222602844, + "loss_ce": 0.00048048628377728164, + "loss_iou": 0.435546875, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 23104224, + "step": 256 + }, + { + "epoch": 1.181609195402299, + "grad_norm": 5.045379741734736, + "learning_rate": 5e-06, + "loss": 0.2122, + "num_input_tokens_seen": 23194388, + "step": 257 + }, + { + "epoch": 1.181609195402299, + "loss": 0.24545620381832123, + "loss_ce": 9.487089118920267e-05, + "loss_iou": 0.3671875, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 23194388, + "step": 257 + }, + { + "epoch": 1.186206896551724, + "grad_norm": 26.131469875864838, + "learning_rate": 5e-06, + "loss": 0.2115, + "num_input_tokens_seen": 23284736, + "step": 258 + }, + { + "epoch": 1.186206896551724, + "loss": 0.21708114445209503, + "loss_ce": 0.000528403848875314, + "loss_iou": 0.482421875, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 23284736, + "step": 258 + }, + { + "epoch": 1.1908045977011494, + "grad_norm": 19.645438145213465, + "learning_rate": 5e-06, + "loss": 0.2313, + "num_input_tokens_seen": 23375184, + "step": 259 + }, + { + "epoch": 1.1908045977011494, + "loss": 0.268694132566452, + "loss_ce": 0.0001394439022988081, + "loss_iou": 0.41015625, + "loss_num": 0.0537109375, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 23375184, + "step": 259 + }, + { + "epoch": 1.1954022988505748, + "grad_norm": 16.21307669612568, + "learning_rate": 5e-06, + "loss": 0.2792, + "num_input_tokens_seen": 23465472, + "step": 260 + }, + { + "epoch": 1.1954022988505748, + "loss": 0.29360634088516235, + "loss_ce": 0.0001798336743377149, + "loss_iou": 0.314453125, + "loss_num": 0.05859375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 23465472, + "step": 260 + }, + { + "epoch": 1.2, + "grad_norm": 12.81808028387753, + "learning_rate": 5e-06, + "loss": 0.2153, + "num_input_tokens_seen": 23555724, + "step": 261 + }, + { + "epoch": 1.2, + "loss": 0.23135723173618317, + "loss_ce": 0.009524943307042122, + "loss_iou": 0.53125, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 23555724, + "step": 261 + }, + { + "epoch": 1.2045977011494253, + "grad_norm": 4.1999709843832616, + "learning_rate": 5e-06, + "loss": 0.2374, + "num_input_tokens_seen": 23645988, + "step": 262 + }, + { + "epoch": 1.2045977011494253, + "loss": 0.22801579535007477, + "loss_ce": 0.0014532884815707803, + "loss_iou": 0.44140625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 23645988, + "step": 262 + }, + { + "epoch": 1.2091954022988505, + "grad_norm": 6.414285711751042, + "learning_rate": 5e-06, + "loss": 0.1998, + "num_input_tokens_seen": 23736268, + "step": 263 + }, + { + "epoch": 1.2091954022988505, + "loss": 0.15405645966529846, + "loss_ce": 0.0007971944869495928, + "loss_iou": 0.53125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 23736268, + "step": 263 + }, + { + "epoch": 1.2137931034482758, + "grad_norm": 4.480437290662561, + "learning_rate": 5e-06, + "loss": 0.2126, + "num_input_tokens_seen": 23826792, + "step": 264 + }, + { + "epoch": 1.2137931034482758, + "loss": 0.20262828469276428, + "loss_ce": 0.000479854759760201, + "loss_iou": 0.435546875, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 23826792, + "step": 264 + }, + { + "epoch": 1.2183908045977012, + "grad_norm": 15.731375012073059, + "learning_rate": 5e-06, + "loss": 0.1946, + "num_input_tokens_seen": 23916428, + "step": 265 + }, + { + "epoch": 1.2183908045977012, + "loss": 0.15348592400550842, + "loss_ce": 0.00010457600728841498, + "loss_iou": 0.427734375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 23916428, + "step": 265 + }, + { + "epoch": 1.2229885057471264, + "grad_norm": 7.176014460557662, + "learning_rate": 5e-06, + "loss": 0.304, + "num_input_tokens_seen": 24006752, + "step": 266 + }, + { + "epoch": 1.2229885057471264, + "loss": 0.3059394955635071, + "loss_ce": 0.00015336027718149126, + "loss_iou": 0.51953125, + "loss_num": 0.06103515625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 24006752, + "step": 266 + }, + { + "epoch": 1.2275862068965517, + "grad_norm": 6.940435918405699, + "learning_rate": 5e-06, + "loss": 0.1946, + "num_input_tokens_seen": 24097012, + "step": 267 + }, + { + "epoch": 1.2275862068965517, + "loss": 0.18653042614459991, + "loss_ce": 0.0003121576155535877, + "loss_iou": 0.50390625, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 24097012, + "step": 267 + }, + { + "epoch": 1.232183908045977, + "grad_norm": 3.7469836271055623, + "learning_rate": 5e-06, + "loss": 0.3164, + "num_input_tokens_seen": 24187256, + "step": 268 + }, + { + "epoch": 1.232183908045977, + "loss": 0.27794623374938965, + "loss_ce": 0.00569892255589366, + "loss_iou": 0.35546875, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 24187256, + "step": 268 + }, + { + "epoch": 1.2367816091954023, + "grad_norm": 7.413553829750902, + "learning_rate": 5e-06, + "loss": 0.2141, + "num_input_tokens_seen": 24277688, + "step": 269 + }, + { + "epoch": 1.2367816091954023, + "loss": 0.19044940173625946, + "loss_ce": 8.075028017628938e-05, + "loss_iou": 0.45703125, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 24277688, + "step": 269 + }, + { + "epoch": 1.2413793103448276, + "grad_norm": 7.193196745882768, + "learning_rate": 5e-06, + "loss": 0.2068, + "num_input_tokens_seen": 24368024, + "step": 270 + }, + { + "epoch": 1.2413793103448276, + "loss": 0.24729135632514954, + "loss_ce": 3.7946036172797903e-05, + "loss_iou": 0.3515625, + "loss_num": 0.04931640625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 24368024, + "step": 270 + }, + { + "epoch": 1.245977011494253, + "grad_norm": 23.36412717520471, + "learning_rate": 5e-06, + "loss": 0.1889, + "num_input_tokens_seen": 24458400, + "step": 271 + }, + { + "epoch": 1.245977011494253, + "loss": 0.18987199664115906, + "loss_ce": 0.00023576414969284087, + "loss_iou": 0.421875, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 24458400, + "step": 271 + }, + { + "epoch": 1.2505747126436781, + "grad_norm": 6.27939746197536, + "learning_rate": 5e-06, + "loss": 0.1642, + "num_input_tokens_seen": 24548712, + "step": 272 + }, + { + "epoch": 1.2505747126436781, + "loss": 0.193972647190094, + "loss_ce": 0.0008268996607512236, + "loss_iou": 0.484375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 24548712, + "step": 272 + }, + { + "epoch": 1.2551724137931035, + "grad_norm": 25.45105413808657, + "learning_rate": 5e-06, + "loss": 0.2115, + "num_input_tokens_seen": 24639064, + "step": 273 + }, + { + "epoch": 1.2551724137931035, + "loss": 0.2186853289604187, + "loss_ce": 0.0026818893384188414, + "loss_iou": 0.416015625, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 24639064, + "step": 273 + }, + { + "epoch": 1.2597701149425287, + "grad_norm": 37.080183589133725, + "learning_rate": 5e-06, + "loss": 0.1826, + "num_input_tokens_seen": 24729388, + "step": 274 + }, + { + "epoch": 1.2597701149425287, + "loss": 0.19386309385299683, + "loss_ce": 0.0006868370110169053, + "loss_iou": 0.447265625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 24729388, + "step": 274 + }, + { + "epoch": 1.264367816091954, + "grad_norm": 16.018753076692136, + "learning_rate": 5e-06, + "loss": 0.2302, + "num_input_tokens_seen": 24819812, + "step": 275 + }, + { + "epoch": 1.264367816091954, + "loss": 0.2251913845539093, + "loss_ce": 0.0006735554779879749, + "loss_iou": 0.5, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 24819812, + "step": 275 + }, + { + "epoch": 1.2689655172413792, + "grad_norm": 6.147405946334659, + "learning_rate": 5e-06, + "loss": 0.1915, + "num_input_tokens_seen": 24910200, + "step": 276 + }, + { + "epoch": 1.2689655172413792, + "loss": 0.14948949217796326, + "loss_ce": 0.00031957216560840607, + "loss_iou": 0.408203125, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 24910200, + "step": 276 + }, + { + "epoch": 1.2735632183908046, + "grad_norm": 6.051687547447403, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 25000476, + "step": 277 + }, + { + "epoch": 1.2735632183908046, + "loss": 0.2223796248435974, + "loss_ce": 0.008512439206242561, + "loss_iou": 0.474609375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 25000476, + "step": 277 + }, + { + "epoch": 1.27816091954023, + "grad_norm": 8.095544751091358, + "learning_rate": 5e-06, + "loss": 0.2494, + "num_input_tokens_seen": 25090868, + "step": 278 + }, + { + "epoch": 1.27816091954023, + "loss": 0.21112553775310516, + "loss_ce": 0.00047794863348826766, + "loss_iou": 0.44140625, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 25090868, + "step": 278 + }, + { + "epoch": 1.282758620689655, + "grad_norm": 29.745999578899646, + "learning_rate": 5e-06, + "loss": 0.2185, + "num_input_tokens_seen": 25181160, + "step": 279 + }, + { + "epoch": 1.282758620689655, + "loss": 0.16160300374031067, + "loss_ce": 0.00013449997641146183, + "loss_iou": 0.421875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 25181160, + "step": 279 + }, + { + "epoch": 1.2873563218390804, + "grad_norm": 9.770076744230986, + "learning_rate": 5e-06, + "loss": 0.2388, + "num_input_tokens_seen": 25270696, + "step": 280 + }, + { + "epoch": 1.2873563218390804, + "loss": 0.2805434465408325, + "loss_ce": 0.00020895421039313078, + "loss_iou": 0.44921875, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 25270696, + "step": 280 + }, + { + "epoch": 1.2919540229885058, + "grad_norm": 6.866286661032349, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 25361132, + "step": 281 + }, + { + "epoch": 1.2919540229885058, + "loss": 0.21573258936405182, + "loss_ce": 9.537945152260363e-05, + "loss_iou": 0.44140625, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 25361132, + "step": 281 + }, + { + "epoch": 1.296551724137931, + "grad_norm": 14.632378384320322, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 25451508, + "step": 282 + }, + { + "epoch": 1.296551724137931, + "loss": 0.14539626240730286, + "loss_ce": 0.002390899695456028, + "loss_iou": 0.443359375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 25451508, + "step": 282 + }, + { + "epoch": 1.3011494252873563, + "grad_norm": 10.586676404677785, + "learning_rate": 5e-06, + "loss": 0.2193, + "num_input_tokens_seen": 25541988, + "step": 283 + }, + { + "epoch": 1.3011494252873563, + "loss": 0.18449945747852325, + "loss_ce": 0.0038964273408055305, + "loss_iou": 0.470703125, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 25541988, + "step": 283 + }, + { + "epoch": 1.3057471264367817, + "grad_norm": 20.999614027385967, + "learning_rate": 5e-06, + "loss": 0.2048, + "num_input_tokens_seen": 25632416, + "step": 284 + }, + { + "epoch": 1.3057471264367817, + "loss": 0.2276066094636917, + "loss_ce": 0.00018961683963425457, + "loss_iou": 0.361328125, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 25632416, + "step": 284 + }, + { + "epoch": 1.3103448275862069, + "grad_norm": 5.543128202063141, + "learning_rate": 5e-06, + "loss": 0.1622, + "num_input_tokens_seen": 25722744, + "step": 285 + }, + { + "epoch": 1.3103448275862069, + "loss": 0.16011224687099457, + "loss_ce": 0.0001390949182678014, + "loss_iou": 0.4140625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 25722744, + "step": 285 + }, + { + "epoch": 1.3149425287356322, + "grad_norm": 11.112725830839327, + "learning_rate": 5e-06, + "loss": 0.2203, + "num_input_tokens_seen": 25813124, + "step": 286 + }, + { + "epoch": 1.3149425287356322, + "loss": 0.3086729943752289, + "loss_ce": 0.0008421677048318088, + "loss_iou": 0.41015625, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 25813124, + "step": 286 + }, + { + "epoch": 1.3195402298850576, + "grad_norm": 9.783123070255048, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 25903612, + "step": 287 + }, + { + "epoch": 1.3195402298850576, + "loss": 0.17264115810394287, + "loss_ce": 0.0005983194569125772, + "loss_iou": 0.40625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 25903612, + "step": 287 + }, + { + "epoch": 1.3241379310344827, + "grad_norm": 4.636122741394509, + "learning_rate": 5e-06, + "loss": 0.2552, + "num_input_tokens_seen": 25994072, + "step": 288 + }, + { + "epoch": 1.3241379310344827, + "loss": 0.22548282146453857, + "loss_ce": 0.00018680733046494424, + "loss_iou": 0.43359375, + "loss_num": 0.045166015625, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 25994072, + "step": 288 + }, + { + "epoch": 1.328735632183908, + "grad_norm": 3.861529643068919, + "learning_rate": 5e-06, + "loss": 0.1707, + "num_input_tokens_seen": 26084532, + "step": 289 + }, + { + "epoch": 1.328735632183908, + "loss": 0.16129527986049652, + "loss_ce": 0.00016246250015683472, + "loss_iou": 0.515625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 26084532, + "step": 289 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 57.38016500276973, + "learning_rate": 5e-06, + "loss": 0.2326, + "num_input_tokens_seen": 26174984, + "step": 290 + }, + { + "epoch": 1.3333333333333333, + "loss": 0.15951338410377502, + "loss_ce": 0.00039472500793635845, + "loss_iou": 0.443359375, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 26174984, + "step": 290 + }, + { + "epoch": 1.3379310344827586, + "grad_norm": 8.111819603516635, + "learning_rate": 5e-06, + "loss": 0.1983, + "num_input_tokens_seen": 26265348, + "step": 291 + }, + { + "epoch": 1.3379310344827586, + "loss": 0.1832209974527359, + "loss_ce": 0.0004817442095372826, + "loss_iou": 0.4765625, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 26265348, + "step": 291 + }, + { + "epoch": 1.3425287356321838, + "grad_norm": 18.16139378563598, + "learning_rate": 5e-06, + "loss": 0.1927, + "num_input_tokens_seen": 26355688, + "step": 292 + }, + { + "epoch": 1.3425287356321838, + "loss": 0.17206496000289917, + "loss_ce": 6.788483005948365e-05, + "loss_iou": 0.5078125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 26355688, + "step": 292 + }, + { + "epoch": 1.3471264367816091, + "grad_norm": 20.460245063855925, + "learning_rate": 5e-06, + "loss": 0.237, + "num_input_tokens_seen": 26446172, + "step": 293 + }, + { + "epoch": 1.3471264367816091, + "loss": 0.23456808924674988, + "loss_ce": 0.005442116409540176, + "loss_iou": 0.388671875, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 26446172, + "step": 293 + }, + { + "epoch": 1.3517241379310345, + "grad_norm": 12.205499115713149, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 26536628, + "step": 294 + }, + { + "epoch": 1.3517241379310345, + "loss": 0.2167300283908844, + "loss_ce": 0.00017729138198774308, + "loss_iou": 0.494140625, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 26536628, + "step": 294 + }, + { + "epoch": 1.3563218390804597, + "grad_norm": 7.426284095641791, + "learning_rate": 5e-06, + "loss": 0.1613, + "num_input_tokens_seen": 26627064, + "step": 295 + }, + { + "epoch": 1.3563218390804597, + "loss": 0.19740846753120422, + "loss_ce": 0.0002038878737948835, + "loss_iou": 0.38671875, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 26627064, + "step": 295 + }, + { + "epoch": 1.360919540229885, + "grad_norm": 39.074194628927266, + "learning_rate": 5e-06, + "loss": 0.2518, + "num_input_tokens_seen": 26717480, + "step": 296 + }, + { + "epoch": 1.360919540229885, + "loss": 0.3336067795753479, + "loss_ce": 0.0002937709796242416, + "loss_iou": 0.44921875, + "loss_num": 0.06640625, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 26717480, + "step": 296 + }, + { + "epoch": 1.3655172413793104, + "grad_norm": 20.276024886618437, + "learning_rate": 5e-06, + "loss": 0.209, + "num_input_tokens_seen": 26807844, + "step": 297 + }, + { + "epoch": 1.3655172413793104, + "loss": 0.3174262046813965, + "loss_ce": 0.0001654599909670651, + "loss_iou": 0.451171875, + "loss_num": 0.0634765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 26807844, + "step": 297 + }, + { + "epoch": 1.3701149425287356, + "grad_norm": 4.705748915237839, + "learning_rate": 5e-06, + "loss": 0.2168, + "num_input_tokens_seen": 26898224, + "step": 298 + }, + { + "epoch": 1.3701149425287356, + "loss": 0.24118977785110474, + "loss_ce": 0.00010090330033563077, + "loss_iou": 0.4296875, + "loss_num": 0.048095703125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 26898224, + "step": 298 + }, + { + "epoch": 1.374712643678161, + "grad_norm": 25.22660122284678, + "learning_rate": 5e-06, + "loss": 0.2173, + "num_input_tokens_seen": 26988588, + "step": 299 + }, + { + "epoch": 1.374712643678161, + "loss": 0.18158341944217682, + "loss_ce": 6.486591883003712e-05, + "loss_iou": 0.455078125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 26988588, + "step": 299 + }, + { + "epoch": 1.3793103448275863, + "grad_norm": 8.272745007832095, + "learning_rate": 5e-06, + "loss": 0.229, + "num_input_tokens_seen": 27078844, + "step": 300 + }, + { + "epoch": 1.3793103448275863, + "loss": 0.19429120421409607, + "loss_ce": 0.00010784986079670489, + "loss_iou": 0.4140625, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 27078844, + "step": 300 + }, + { + "epoch": 1.3839080459770114, + "grad_norm": 11.158840002018794, + "learning_rate": 5e-06, + "loss": 0.1863, + "num_input_tokens_seen": 27169132, + "step": 301 + }, + { + "epoch": 1.3839080459770114, + "loss": 0.24286043643951416, + "loss_ce": 6.258698704186827e-05, + "loss_iou": 0.443359375, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 27169132, + "step": 301 + }, + { + "epoch": 1.3885057471264368, + "grad_norm": 11.626395028754814, + "learning_rate": 5e-06, + "loss": 0.1938, + "num_input_tokens_seen": 27259360, + "step": 302 + }, + { + "epoch": 1.3885057471264368, + "loss": 0.22401869297027588, + "loss_ce": 0.0002638171426951885, + "loss_iou": 0.412109375, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 27259360, + "step": 302 + }, + { + "epoch": 1.3931034482758622, + "grad_norm": 8.347578373209931, + "learning_rate": 5e-06, + "loss": 0.2332, + "num_input_tokens_seen": 27349756, + "step": 303 + }, + { + "epoch": 1.3931034482758622, + "loss": 0.24985189735889435, + "loss_ce": 0.00015708088176324964, + "loss_iou": 0.421875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 27349756, + "step": 303 + }, + { + "epoch": 1.3977011494252873, + "grad_norm": 9.382990132866077, + "learning_rate": 5e-06, + "loss": 0.2427, + "num_input_tokens_seen": 27440160, + "step": 304 + }, + { + "epoch": 1.3977011494252873, + "loss": 0.23031748831272125, + "loss_ce": 0.002442737342789769, + "loss_iou": 0.390625, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 27440160, + "step": 304 + }, + { + "epoch": 1.4022988505747127, + "grad_norm": 13.016846770254826, + "learning_rate": 5e-06, + "loss": 0.2354, + "num_input_tokens_seen": 27530484, + "step": 305 + }, + { + "epoch": 1.4022988505747127, + "loss": 0.2382659912109375, + "loss_ce": 0.0012664890382438898, + "loss_iou": 0.384765625, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 27530484, + "step": 305 + }, + { + "epoch": 1.4068965517241379, + "grad_norm": 13.820143768808174, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 27620888, + "step": 306 + }, + { + "epoch": 1.4068965517241379, + "loss": 0.1968221664428711, + "loss_ce": 0.00025843450566753745, + "loss_iou": 0.455078125, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 27620888, + "step": 306 + }, + { + "epoch": 1.4114942528735632, + "grad_norm": 3.5475816109452896, + "learning_rate": 5e-06, + "loss": 0.1937, + "num_input_tokens_seen": 27711272, + "step": 307 + }, + { + "epoch": 1.4114942528735632, + "loss": 0.1471329778432846, + "loss_ce": 0.00022135992185212672, + "loss_iou": 0.46875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 27711272, + "step": 307 + }, + { + "epoch": 1.4160919540229884, + "grad_norm": 10.22489016272008, + "learning_rate": 5e-06, + "loss": 0.2147, + "num_input_tokens_seen": 27801544, + "step": 308 + }, + { + "epoch": 1.4160919540229884, + "loss": 0.19165240228176117, + "loss_ce": 0.008699517697095871, + "loss_iou": 0.47265625, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 27801544, + "step": 308 + }, + { + "epoch": 1.4206896551724137, + "grad_norm": 9.134110088295508, + "learning_rate": 5e-06, + "loss": 0.2337, + "num_input_tokens_seen": 27891884, + "step": 309 + }, + { + "epoch": 1.4206896551724137, + "loss": 0.2654152512550354, + "loss_ce": 0.005771718919277191, + "loss_iou": 0.451171875, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 27891884, + "step": 309 + }, + { + "epoch": 1.4252873563218391, + "grad_norm": 10.161788829631169, + "learning_rate": 5e-06, + "loss": 0.2401, + "num_input_tokens_seen": 27982204, + "step": 310 + }, + { + "epoch": 1.4252873563218391, + "loss": 0.2308727651834488, + "loss_ce": 0.003730440977960825, + "loss_iou": 0.4296875, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 27982204, + "step": 310 + }, + { + "epoch": 1.4298850574712643, + "grad_norm": 8.186672197144189, + "learning_rate": 5e-06, + "loss": 0.1798, + "num_input_tokens_seen": 28072556, + "step": 311 + }, + { + "epoch": 1.4298850574712643, + "loss": 0.19206635653972626, + "loss_ce": 0.0002023405977524817, + "loss_iou": 0.435546875, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 28072556, + "step": 311 + }, + { + "epoch": 1.4344827586206896, + "grad_norm": 20.204462885292834, + "learning_rate": 5e-06, + "loss": 0.1739, + "num_input_tokens_seen": 28162972, + "step": 312 + }, + { + "epoch": 1.4344827586206896, + "loss": 0.15177220106124878, + "loss_ce": 9.983143536373973e-05, + "loss_iou": 0.353515625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 28162972, + "step": 312 + }, + { + "epoch": 1.439080459770115, + "grad_norm": 10.349632633496206, + "learning_rate": 5e-06, + "loss": 0.2239, + "num_input_tokens_seen": 28252592, + "step": 313 + }, + { + "epoch": 1.439080459770115, + "loss": 0.23639342188835144, + "loss_ce": 0.0029949769377708435, + "loss_iou": 0.45703125, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 28252592, + "step": 313 + }, + { + "epoch": 1.4436781609195402, + "grad_norm": 4.037411009142002, + "learning_rate": 5e-06, + "loss": 0.2111, + "num_input_tokens_seen": 28342884, + "step": 314 + }, + { + "epoch": 1.4436781609195402, + "loss": 0.14021849632263184, + "loss_ce": 0.00011228647781535983, + "loss_iou": 0.431640625, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 28342884, + "step": 314 + }, + { + "epoch": 1.4482758620689655, + "grad_norm": 10.393150532292086, + "learning_rate": 5e-06, + "loss": 0.118, + "num_input_tokens_seen": 28433376, + "step": 315 + }, + { + "epoch": 1.4482758620689655, + "loss": 0.12029355764389038, + "loss_ce": 0.00014585554890800267, + "loss_iou": 0.48046875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 28433376, + "step": 315 + }, + { + "epoch": 1.452873563218391, + "grad_norm": 3.824936350612978, + "learning_rate": 5e-06, + "loss": 0.2174, + "num_input_tokens_seen": 28523648, + "step": 316 + }, + { + "epoch": 1.452873563218391, + "loss": 0.24938887357711792, + "loss_ce": 0.0014030258171260357, + "loss_iou": 0.484375, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 28523648, + "step": 316 + }, + { + "epoch": 1.457471264367816, + "grad_norm": 11.988045018065144, + "learning_rate": 5e-06, + "loss": 0.1848, + "num_input_tokens_seen": 28613988, + "step": 317 + }, + { + "epoch": 1.457471264367816, + "loss": 0.17838840186595917, + "loss_ce": 0.009259981103241444, + "loss_iou": 0.515625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 28613988, + "step": 317 + }, + { + "epoch": 1.4620689655172414, + "grad_norm": 4.034511511486509, + "learning_rate": 5e-06, + "loss": 0.1713, + "num_input_tokens_seen": 28704372, + "step": 318 + }, + { + "epoch": 1.4620689655172414, + "loss": 0.1520136594772339, + "loss_ce": 0.0004633643548004329, + "loss_iou": 0.384765625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 28704372, + "step": 318 + }, + { + "epoch": 1.4666666666666668, + "grad_norm": 15.615735006728443, + "learning_rate": 5e-06, + "loss": 0.2133, + "num_input_tokens_seen": 28794788, + "step": 319 + }, + { + "epoch": 1.4666666666666668, + "loss": 0.23856469988822937, + "loss_ce": 0.00043603702215477824, + "loss_iou": 0.34375, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 28794788, + "step": 319 + }, + { + "epoch": 1.471264367816092, + "grad_norm": 22.79226873920817, + "learning_rate": 5e-06, + "loss": 0.2221, + "num_input_tokens_seen": 28885240, + "step": 320 + }, + { + "epoch": 1.471264367816092, + "loss": 0.1695161610841751, + "loss_ce": 5.20510075148195e-05, + "loss_iou": 0.396484375, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 28885240, + "step": 320 + }, + { + "epoch": 1.4758620689655173, + "grad_norm": 12.555178366315296, + "learning_rate": 5e-06, + "loss": 0.2411, + "num_input_tokens_seen": 28975712, + "step": 321 + }, + { + "epoch": 1.4758620689655173, + "loss": 0.24558743834495544, + "loss_ce": 0.00040922165499068797, + "loss_iou": 0.419921875, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 28975712, + "step": 321 + }, + { + "epoch": 1.4804597701149425, + "grad_norm": 6.643673356109838, + "learning_rate": 5e-06, + "loss": 0.2199, + "num_input_tokens_seen": 29066048, + "step": 322 + }, + { + "epoch": 1.4804597701149425, + "loss": 0.18256893754005432, + "loss_ce": 0.001660746755078435, + "loss_iou": 0.35546875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 29066048, + "step": 322 + }, + { + "epoch": 1.4850574712643678, + "grad_norm": 5.317104474065933, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 29156544, + "step": 323 + }, + { + "epoch": 1.4850574712643678, + "loss": 0.19987276196479797, + "loss_ce": 0.00025728094624355435, + "loss_iou": 0.45703125, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 29156544, + "step": 323 + }, + { + "epoch": 1.489655172413793, + "grad_norm": 12.208298681282333, + "learning_rate": 5e-06, + "loss": 0.1693, + "num_input_tokens_seen": 29246832, + "step": 324 + }, + { + "epoch": 1.489655172413793, + "loss": 0.15619094669818878, + "loss_ce": 3.249588917242363e-05, + "loss_iou": 0.48828125, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 29246832, + "step": 324 + }, + { + "epoch": 1.4942528735632183, + "grad_norm": 14.551980500080187, + "learning_rate": 5e-06, + "loss": 0.2611, + "num_input_tokens_seen": 29337088, + "step": 325 + }, + { + "epoch": 1.4942528735632183, + "loss": 0.348785400390625, + "loss_ce": 0.0024414421059191227, + "loss_iou": 0.419921875, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 29337088, + "step": 325 + }, + { + "epoch": 1.4988505747126437, + "grad_norm": 11.379206303993785, + "learning_rate": 5e-06, + "loss": 0.1849, + "num_input_tokens_seen": 29427352, + "step": 326 + }, + { + "epoch": 1.4988505747126437, + "loss": 0.22499850392341614, + "loss_ce": 0.0005417080246843398, + "loss_iou": 0.435546875, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 29427352, + "step": 326 + }, + { + "epoch": 1.5034482758620689, + "grad_norm": 90.0439810100679, + "learning_rate": 5e-06, + "loss": 0.2161, + "num_input_tokens_seen": 29517724, + "step": 327 + }, + { + "epoch": 1.5034482758620689, + "loss": 0.18729592859745026, + "loss_ce": 0.0005893875495530665, + "loss_iou": 0.478515625, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 29517724, + "step": 327 + }, + { + "epoch": 1.5080459770114942, + "grad_norm": 19.33964708457954, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 29608140, + "step": 328 + }, + { + "epoch": 1.5080459770114942, + "loss": 0.24188189208507538, + "loss_ce": 0.0005641538882628083, + "loss_iou": 0.44921875, + "loss_num": 0.048095703125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 29608140, + "step": 328 + }, + { + "epoch": 1.5126436781609196, + "grad_norm": 6.833812883588884, + "learning_rate": 5e-06, + "loss": 0.2319, + "num_input_tokens_seen": 29697696, + "step": 329 + }, + { + "epoch": 1.5126436781609196, + "loss": 0.2876734733581543, + "loss_ce": 0.0008998108096420765, + "loss_iou": 0.353515625, + "loss_num": 0.057373046875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 29697696, + "step": 329 + }, + { + "epoch": 1.5172413793103448, + "grad_norm": 10.08968474441476, + "learning_rate": 5e-06, + "loss": 0.1793, + "num_input_tokens_seen": 29787960, + "step": 330 + }, + { + "epoch": 1.5172413793103448, + "loss": 0.15798088908195496, + "loss_ce": 0.00012871227227151394, + "loss_iou": 0.474609375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 29787960, + "step": 330 + }, + { + "epoch": 1.5218390804597701, + "grad_norm": 10.48777960826385, + "learning_rate": 5e-06, + "loss": 0.2303, + "num_input_tokens_seen": 29878288, + "step": 331 + }, + { + "epoch": 1.5218390804597701, + "loss": 0.27822285890579224, + "loss_ce": 0.00014670401287730783, + "loss_iou": 0.42578125, + "loss_num": 0.0556640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 29878288, + "step": 331 + }, + { + "epoch": 1.5264367816091955, + "grad_norm": 15.669568573881612, + "learning_rate": 5e-06, + "loss": 0.1935, + "num_input_tokens_seen": 29968616, + "step": 332 + }, + { + "epoch": 1.5264367816091955, + "loss": 0.1327584981918335, + "loss_ce": 0.00019014105782844126, + "loss_iou": 0.3984375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 29968616, + "step": 332 + }, + { + "epoch": 1.5310344827586206, + "grad_norm": 10.895228295518322, + "learning_rate": 5e-06, + "loss": 0.1781, + "num_input_tokens_seen": 30058964, + "step": 333 + }, + { + "epoch": 1.5310344827586206, + "loss": 0.13389872014522552, + "loss_ce": 0.00044535251799970865, + "loss_iou": 0.4453125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 30058964, + "step": 333 + }, + { + "epoch": 1.535632183908046, + "grad_norm": 12.555340714374447, + "learning_rate": 5e-06, + "loss": 0.2026, + "num_input_tokens_seen": 30149276, + "step": 334 + }, + { + "epoch": 1.535632183908046, + "loss": 0.22426463663578033, + "loss_ce": 0.0006928644143044949, + "loss_iou": 0.390625, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 30149276, + "step": 334 + }, + { + "epoch": 1.5402298850574714, + "grad_norm": 6.653750036819891, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 30239660, + "step": 335 + }, + { + "epoch": 1.5402298850574714, + "loss": 0.2286517322063446, + "loss_ce": 0.009657589718699455, + "loss_iou": 0.47265625, + "loss_num": 0.0439453125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 30239660, + "step": 335 + }, + { + "epoch": 1.5448275862068965, + "grad_norm": 7.196935162615249, + "learning_rate": 5e-06, + "loss": 0.2105, + "num_input_tokens_seen": 30328584, + "step": 336 + }, + { + "epoch": 1.5448275862068965, + "loss": 0.2146240770816803, + "loss_ce": 0.00045170937664806843, + "loss_iou": 0.34375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 30328584, + "step": 336 + }, + { + "epoch": 1.5494252873563217, + "grad_norm": 15.439783012231938, + "learning_rate": 5e-06, + "loss": 0.2042, + "num_input_tokens_seen": 30418984, + "step": 337 + }, + { + "epoch": 1.5494252873563217, + "loss": 0.2664060592651367, + "loss_ce": 0.000842072709929198, + "loss_iou": 0.46484375, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 30418984, + "step": 337 + }, + { + "epoch": 1.5540229885057473, + "grad_norm": 9.972362426319057, + "learning_rate": 5e-06, + "loss": 0.1914, + "num_input_tokens_seen": 30509544, + "step": 338 + }, + { + "epoch": 1.5540229885057473, + "loss": 0.20261165499687195, + "loss_ce": 0.0002190732047893107, + "loss_iou": 0.443359375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 30509544, + "step": 338 + }, + { + "epoch": 1.5586206896551724, + "grad_norm": 15.96705640498333, + "learning_rate": 5e-06, + "loss": 0.1748, + "num_input_tokens_seen": 30599924, + "step": 339 + }, + { + "epoch": 1.5586206896551724, + "loss": 0.14146339893341064, + "loss_ce": 0.0009604630176909268, + "loss_iou": 0.46484375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 30599924, + "step": 339 + }, + { + "epoch": 1.5632183908045976, + "grad_norm": 24.02833413826726, + "learning_rate": 5e-06, + "loss": 0.1581, + "num_input_tokens_seen": 30690300, + "step": 340 + }, + { + "epoch": 1.5632183908045976, + "loss": 0.14789816737174988, + "loss_ce": 0.0007423943607136607, + "loss_iou": 0.451171875, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 30690300, + "step": 340 + }, + { + "epoch": 1.567816091954023, + "grad_norm": 11.132936405519033, + "learning_rate": 5e-06, + "loss": 0.1858, + "num_input_tokens_seen": 30780564, + "step": 341 + }, + { + "epoch": 1.567816091954023, + "loss": 0.18376675248146057, + "loss_ce": 0.00038663047598674893, + "loss_iou": 0.455078125, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 30780564, + "step": 341 + }, + { + "epoch": 1.5724137931034483, + "grad_norm": 11.301299582007728, + "learning_rate": 5e-06, + "loss": 0.1446, + "num_input_tokens_seen": 30870936, + "step": 342 + }, + { + "epoch": 1.5724137931034483, + "loss": 0.15917402505874634, + "loss_ce": 0.00011640776210697368, + "loss_iou": 0.51953125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 30870936, + "step": 342 + }, + { + "epoch": 1.5770114942528735, + "grad_norm": 4.3022352764349225, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 30961368, + "step": 343 + }, + { + "epoch": 1.5770114942528735, + "loss": 0.13939973711967468, + "loss_ce": 0.0004837117448914796, + "loss_iou": 0.4765625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 30961368, + "step": 343 + }, + { + "epoch": 1.5816091954022988, + "grad_norm": 21.124622330389396, + "learning_rate": 5e-06, + "loss": 0.1911, + "num_input_tokens_seen": 31051716, + "step": 344 + }, + { + "epoch": 1.5816091954022988, + "loss": 0.2080269455909729, + "loss_ce": 0.00011068060121033341, + "loss_iou": 0.412109375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 31051716, + "step": 344 + }, + { + "epoch": 1.5862068965517242, + "grad_norm": 6.603147217700077, + "learning_rate": 5e-06, + "loss": 0.2052, + "num_input_tokens_seen": 31142164, + "step": 345 + }, + { + "epoch": 1.5862068965517242, + "loss": 0.19146263599395752, + "loss_ce": 5.637338836095296e-05, + "loss_iou": 0.359375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 31142164, + "step": 345 + }, + { + "epoch": 1.5908045977011493, + "grad_norm": 18.413342940258104, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 31232568, + "step": 346 + }, + { + "epoch": 1.5908045977011493, + "loss": 0.21431368589401245, + "loss_ce": 8.028043521335348e-05, + "loss_iou": 0.4296875, + "loss_num": 0.04296875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 31232568, + "step": 346 + }, + { + "epoch": 1.5954022988505747, + "grad_norm": 3.750494996396292, + "learning_rate": 5e-06, + "loss": 0.2055, + "num_input_tokens_seen": 31322940, + "step": 347 + }, + { + "epoch": 1.5954022988505747, + "loss": 0.2030225545167923, + "loss_ce": 0.00014168729830998927, + "loss_iou": 0.470703125, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 31322940, + "step": 347 + }, + { + "epoch": 1.6, + "grad_norm": 19.608744946070097, + "learning_rate": 5e-06, + "loss": 0.2093, + "num_input_tokens_seen": 31412484, + "step": 348 + }, + { + "epoch": 1.6, + "loss": 0.21284618973731995, + "loss_ce": 0.00016918416076805443, + "loss_iou": 0.4375, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 31412484, + "step": 348 + }, + { + "epoch": 1.6045977011494252, + "grad_norm": 16.926454810756088, + "learning_rate": 5e-06, + "loss": 0.1911, + "num_input_tokens_seen": 31502796, + "step": 349 + }, + { + "epoch": 1.6045977011494252, + "loss": 0.236657053232193, + "loss_ce": 0.002068430185317993, + "loss_iou": 0.3828125, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 31502796, + "step": 349 + }, + { + "epoch": 1.6091954022988506, + "grad_norm": 8.440280678490312, + "learning_rate": 5e-06, + "loss": 0.2148, + "num_input_tokens_seen": 31593140, + "step": 350 + }, + { + "epoch": 1.6091954022988506, + "loss": 0.1520470678806305, + "loss_ce": 3.901964009855874e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 31593140, + "step": 350 + }, + { + "epoch": 1.613793103448276, + "grad_norm": 12.531015324402237, + "learning_rate": 5e-06, + "loss": 0.2053, + "num_input_tokens_seen": 31683560, + "step": 351 + }, + { + "epoch": 1.613793103448276, + "loss": 0.28503406047821045, + "loss_ce": 0.00012196854368085042, + "loss_iou": 0.515625, + "loss_num": 0.056884765625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 31683560, + "step": 351 + }, + { + "epoch": 1.6183908045977011, + "grad_norm": 13.56199610824294, + "learning_rate": 5e-06, + "loss": 0.2573, + "num_input_tokens_seen": 31774020, + "step": 352 + }, + { + "epoch": 1.6183908045977011, + "loss": 0.2630327045917511, + "loss_ce": 0.0005510285845957696, + "loss_iou": 0.39453125, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 31774020, + "step": 352 + }, + { + "epoch": 1.6229885057471263, + "grad_norm": 16.539672672228185, + "learning_rate": 5e-06, + "loss": 0.218, + "num_input_tokens_seen": 31864284, + "step": 353 + }, + { + "epoch": 1.6229885057471263, + "loss": 0.2166557013988495, + "loss_ce": 0.004497497342526913, + "loss_iou": 0.4140625, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 31864284, + "step": 353 + }, + { + "epoch": 1.6275862068965519, + "grad_norm": 6.7039057056478555, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 31954736, + "step": 354 + }, + { + "epoch": 1.6275862068965519, + "loss": 0.12781822681427002, + "loss_ce": 0.00013267630129121244, + "loss_iou": 0.421875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 31954736, + "step": 354 + }, + { + "epoch": 1.632183908045977, + "grad_norm": 24.22876257944078, + "learning_rate": 5e-06, + "loss": 0.1899, + "num_input_tokens_seen": 32045092, + "step": 355 + }, + { + "epoch": 1.632183908045977, + "loss": 0.21440809965133667, + "loss_ce": 5.264465289656073e-05, + "loss_iou": 0.439453125, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 32045092, + "step": 355 + }, + { + "epoch": 1.6367816091954022, + "grad_norm": 6.350667267782482, + "learning_rate": 5e-06, + "loss": 0.1784, + "num_input_tokens_seen": 32135436, + "step": 356 + }, + { + "epoch": 1.6367816091954022, + "loss": 0.20722705125808716, + "loss_ce": 0.005749986041337252, + "loss_iou": 0.39453125, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 32135436, + "step": 356 + }, + { + "epoch": 1.6413793103448275, + "grad_norm": 24.037632779184456, + "learning_rate": 5e-06, + "loss": 0.2292, + "num_input_tokens_seen": 32225760, + "step": 357 + }, + { + "epoch": 1.6413793103448275, + "loss": 0.1975146234035492, + "loss_ce": 0.0004473668523132801, + "loss_iou": 0.546875, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 32225760, + "step": 357 + }, + { + "epoch": 1.645977011494253, + "grad_norm": 10.97286648357755, + "learning_rate": 5e-06, + "loss": 0.2958, + "num_input_tokens_seen": 32314628, + "step": 358 + }, + { + "epoch": 1.645977011494253, + "loss": 0.30159226059913635, + "loss_ce": 7.8596654930152e-05, + "loss_iou": 0.322265625, + "loss_num": 0.060302734375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 32314628, + "step": 358 + }, + { + "epoch": 1.650574712643678, + "grad_norm": 14.188317923001321, + "learning_rate": 5e-06, + "loss": 0.1884, + "num_input_tokens_seen": 32405028, + "step": 359 + }, + { + "epoch": 1.650574712643678, + "loss": 0.1701948344707489, + "loss_ce": 0.0005018344381824136, + "loss_iou": 0.46875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 32405028, + "step": 359 + }, + { + "epoch": 1.6551724137931034, + "grad_norm": 15.866397059455883, + "learning_rate": 5e-06, + "loss": 0.2404, + "num_input_tokens_seen": 32495460, + "step": 360 + }, + { + "epoch": 1.6551724137931034, + "loss": 0.21389362215995789, + "loss_ce": 0.00789997074753046, + "loss_iou": 0.447265625, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 32495460, + "step": 360 + }, + { + "epoch": 1.6597701149425288, + "grad_norm": 22.63781592648334, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 32585812, + "step": 361 + }, + { + "epoch": 1.6597701149425288, + "loss": 0.1410699486732483, + "loss_ce": 0.0007196052465587854, + "loss_iou": 0.37890625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 32585812, + "step": 361 + }, + { + "epoch": 1.664367816091954, + "grad_norm": 24.104025272898923, + "learning_rate": 5e-06, + "loss": 0.1888, + "num_input_tokens_seen": 32676080, + "step": 362 + }, + { + "epoch": 1.664367816091954, + "loss": 0.18728001415729523, + "loss_ce": 2.41522429860197e-05, + "loss_iou": 0.478515625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 32676080, + "step": 362 + }, + { + "epoch": 1.6689655172413793, + "grad_norm": 4.251795484895364, + "learning_rate": 5e-06, + "loss": 0.2331, + "num_input_tokens_seen": 32766524, + "step": 363 + }, + { + "epoch": 1.6689655172413793, + "loss": 0.26753073930740356, + "loss_ce": 0.00013572408352047205, + "loss_iou": 0.408203125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 32766524, + "step": 363 + }, + { + "epoch": 1.6735632183908047, + "grad_norm": 16.06020227141594, + "learning_rate": 5e-06, + "loss": 0.2075, + "num_input_tokens_seen": 32856880, + "step": 364 + }, + { + "epoch": 1.6735632183908047, + "loss": 0.20815755426883698, + "loss_ce": 8.871030149748549e-05, + "loss_iou": 0.392578125, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 32856880, + "step": 364 + }, + { + "epoch": 1.6781609195402298, + "grad_norm": 4.038583152340203, + "learning_rate": 5e-06, + "loss": 0.199, + "num_input_tokens_seen": 32947228, + "step": 365 + }, + { + "epoch": 1.6781609195402298, + "loss": 0.1920887678861618, + "loss_ce": 0.00013320505968295038, + "loss_iou": 0.4765625, + "loss_num": 0.038330078125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 32947228, + "step": 365 + }, + { + "epoch": 1.6827586206896552, + "grad_norm": 10.849275070130034, + "learning_rate": 5e-06, + "loss": 0.2501, + "num_input_tokens_seen": 33037680, + "step": 366 + }, + { + "epoch": 1.6827586206896552, + "loss": 0.28570684790611267, + "loss_ce": 0.00018438987899571657, + "loss_iou": 0.37109375, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 33037680, + "step": 366 + }, + { + "epoch": 1.6873563218390806, + "grad_norm": 15.71736414697125, + "learning_rate": 5e-06, + "loss": 0.2526, + "num_input_tokens_seen": 33127980, + "step": 367 + }, + { + "epoch": 1.6873563218390806, + "loss": 0.19320005178451538, + "loss_ce": 0.00048154895193874836, + "loss_iou": 0.4296875, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 33127980, + "step": 367 + }, + { + "epoch": 1.6919540229885057, + "grad_norm": 11.7136055813051, + "learning_rate": 5e-06, + "loss": 0.214, + "num_input_tokens_seen": 33218308, + "step": 368 + }, + { + "epoch": 1.6919540229885057, + "loss": 0.21528667211532593, + "loss_ce": 0.003677786560729146, + "loss_iou": 0.48046875, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 33218308, + "step": 368 + }, + { + "epoch": 1.6965517241379309, + "grad_norm": 9.03276554803375, + "learning_rate": 5e-06, + "loss": 0.1567, + "num_input_tokens_seen": 33308680, + "step": 369 + }, + { + "epoch": 1.6965517241379309, + "loss": 0.20362484455108643, + "loss_ce": 0.00010311185906175524, + "loss_iou": 0.44140625, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 33308680, + "step": 369 + }, + { + "epoch": 1.7011494252873565, + "grad_norm": 4.661137477640627, + "learning_rate": 5e-06, + "loss": 0.2119, + "num_input_tokens_seen": 33399264, + "step": 370 + }, + { + "epoch": 1.7011494252873565, + "loss": 0.13792553544044495, + "loss_ce": 0.00016917982429731637, + "loss_iou": 0.439453125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 33399264, + "step": 370 + }, + { + "epoch": 1.7057471264367816, + "grad_norm": 3.2251449715057787, + "learning_rate": 5e-06, + "loss": 0.1771, + "num_input_tokens_seen": 33489564, + "step": 371 + }, + { + "epoch": 1.7057471264367816, + "loss": 0.14613063633441925, + "loss_ce": 0.00024134977138601243, + "loss_iou": 0.3984375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 33489564, + "step": 371 + }, + { + "epoch": 1.7103448275862068, + "grad_norm": 8.964171049100557, + "learning_rate": 5e-06, + "loss": 0.2087, + "num_input_tokens_seen": 33579112, + "step": 372 + }, + { + "epoch": 1.7103448275862068, + "loss": 0.22618785500526428, + "loss_ce": 0.006644395180046558, + "loss_iou": 0.578125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 33579112, + "step": 372 + }, + { + "epoch": 1.7149425287356321, + "grad_norm": 5.141600835893525, + "learning_rate": 5e-06, + "loss": 0.1954, + "num_input_tokens_seen": 33669472, + "step": 373 + }, + { + "epoch": 1.7149425287356321, + "loss": 0.24420268833637238, + "loss_ce": 0.0002451670588925481, + "loss_iou": 0.37890625, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 33669472, + "step": 373 + }, + { + "epoch": 1.7195402298850575, + "grad_norm": 8.029154993860924, + "learning_rate": 5e-06, + "loss": 0.2167, + "num_input_tokens_seen": 33759812, + "step": 374 + }, + { + "epoch": 1.7195402298850575, + "loss": 0.21689020097255707, + "loss_ce": 0.005037169903516769, + "loss_iou": 0.40234375, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 33759812, + "step": 374 + }, + { + "epoch": 1.7241379310344827, + "grad_norm": 8.857319520624422, + "learning_rate": 5e-06, + "loss": 0.2094, + "num_input_tokens_seen": 33848696, + "step": 375 + }, + { + "epoch": 1.7241379310344827, + "loss": 0.24398654699325562, + "loss_ce": 5.9540048823691905e-05, + "loss_iou": 0.388671875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 33848696, + "step": 375 + }, + { + "epoch": 1.728735632183908, + "grad_norm": 29.84409998795184, + "learning_rate": 5e-06, + "loss": 0.1964, + "num_input_tokens_seen": 33939052, + "step": 376 + }, + { + "epoch": 1.728735632183908, + "loss": 0.2298891395330429, + "loss_ce": 3.074748383369297e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 33939052, + "step": 376 + }, + { + "epoch": 1.7333333333333334, + "grad_norm": 16.908297420855593, + "learning_rate": 5e-06, + "loss": 0.1799, + "num_input_tokens_seen": 34029428, + "step": 377 + }, + { + "epoch": 1.7333333333333334, + "loss": 0.17225658893585205, + "loss_ce": 0.00032055945484898984, + "loss_iou": 0.486328125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 34029428, + "step": 377 + }, + { + "epoch": 1.7379310344827585, + "grad_norm": 5.924418877591673, + "learning_rate": 5e-06, + "loss": 0.1529, + "num_input_tokens_seen": 34119072, + "step": 378 + }, + { + "epoch": 1.7379310344827585, + "loss": 0.1289907693862915, + "loss_ce": 0.00023710176174063236, + "loss_iou": 0.43359375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 34119072, + "step": 378 + }, + { + "epoch": 1.742528735632184, + "grad_norm": 4.126805999853441, + "learning_rate": 5e-06, + "loss": 0.216, + "num_input_tokens_seen": 34209448, + "step": 379 + }, + { + "epoch": 1.742528735632184, + "loss": 0.21730422973632812, + "loss_ce": 0.00011062939302064478, + "loss_iou": 0.5625, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 34209448, + "step": 379 + }, + { + "epoch": 1.7471264367816093, + "grad_norm": 6.583943856738861, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 34299756, + "step": 380 + }, + { + "epoch": 1.7471264367816093, + "loss": 0.23842403292655945, + "loss_ce": 8.174288086593151e-05, + "loss_iou": 0.416015625, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 34299756, + "step": 380 + }, + { + "epoch": 1.7517241379310344, + "grad_norm": 6.121020487954506, + "learning_rate": 5e-06, + "loss": 0.1894, + "num_input_tokens_seen": 34390068, + "step": 381 + }, + { + "epoch": 1.7517241379310344, + "loss": 0.2400810569524765, + "loss_ce": 0.00012134698044974357, + "loss_iou": 0.365234375, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 34390068, + "step": 381 + }, + { + "epoch": 1.7563218390804598, + "grad_norm": 14.275086476453184, + "learning_rate": 5e-06, + "loss": 0.1368, + "num_input_tokens_seen": 34480520, + "step": 382 + }, + { + "epoch": 1.7563218390804598, + "loss": 0.08874941617250443, + "loss_ce": 9.585011866874993e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 34480520, + "step": 382 + }, + { + "epoch": 1.7609195402298852, + "grad_norm": 10.514767544788414, + "learning_rate": 5e-06, + "loss": 0.1745, + "num_input_tokens_seen": 34570948, + "step": 383 + }, + { + "epoch": 1.7609195402298852, + "loss": 0.1786903589963913, + "loss_ce": 0.001032268744893372, + "loss_iou": 0.4921875, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 34570948, + "step": 383 + }, + { + "epoch": 1.7655172413793103, + "grad_norm": 61.44054500522531, + "learning_rate": 5e-06, + "loss": 0.1866, + "num_input_tokens_seen": 34661360, + "step": 384 + }, + { + "epoch": 1.7655172413793103, + "loss": 0.2136577069759369, + "loss_ce": 0.0018352140905335546, + "loss_iou": 0.3203125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 34661360, + "step": 384 + }, + { + "epoch": 1.7701149425287355, + "grad_norm": 20.341892049512285, + "learning_rate": 5e-06, + "loss": 0.2173, + "num_input_tokens_seen": 34751736, + "step": 385 + }, + { + "epoch": 1.7701149425287355, + "loss": 0.29879891872406006, + "loss_ce": 0.00033700710628181696, + "loss_iou": 0.42578125, + "loss_num": 0.0595703125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 34751736, + "step": 385 + }, + { + "epoch": 1.774712643678161, + "grad_norm": 10.713533548556878, + "learning_rate": 5e-06, + "loss": 0.2004, + "num_input_tokens_seen": 34841944, + "step": 386 + }, + { + "epoch": 1.774712643678161, + "loss": 0.2071501910686493, + "loss_ce": 0.00011893494956893846, + "loss_iou": 0.421875, + "loss_num": 0.041259765625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 34841944, + "step": 386 + }, + { + "epoch": 1.7793103448275862, + "grad_norm": 6.168349303523133, + "learning_rate": 5e-06, + "loss": 0.1741, + "num_input_tokens_seen": 34932320, + "step": 387 + }, + { + "epoch": 1.7793103448275862, + "loss": 0.18104185163974762, + "loss_ce": 7.260293932631612e-05, + "loss_iou": 0.46484375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 34932320, + "step": 387 + }, + { + "epoch": 1.7839080459770114, + "grad_norm": 10.622510358830738, + "learning_rate": 5e-06, + "loss": 0.2128, + "num_input_tokens_seen": 35022772, + "step": 388 + }, + { + "epoch": 1.7839080459770114, + "loss": 0.18907198309898376, + "loss_ce": 0.00041230578790418804, + "loss_iou": 0.42578125, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 35022772, + "step": 388 + }, + { + "epoch": 1.7885057471264367, + "grad_norm": 11.951897075599353, + "learning_rate": 5e-06, + "loss": 0.1647, + "num_input_tokens_seen": 35113260, + "step": 389 + }, + { + "epoch": 1.7885057471264367, + "loss": 0.16620348393917084, + "loss_ce": 0.0003404413000680506, + "loss_iou": 0.357421875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 35113260, + "step": 389 + }, + { + "epoch": 1.793103448275862, + "grad_norm": 4.828903526280168, + "learning_rate": 5e-06, + "loss": 0.2304, + "num_input_tokens_seen": 35203688, + "step": 390 + }, + { + "epoch": 1.793103448275862, + "loss": 0.29129573702812195, + "loss_ce": 0.0006463380996137857, + "loss_iou": 0.4921875, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 35203688, + "step": 390 + }, + { + "epoch": 1.7977011494252872, + "grad_norm": 6.198908160148479, + "learning_rate": 5e-06, + "loss": 0.1866, + "num_input_tokens_seen": 35294020, + "step": 391 + }, + { + "epoch": 1.7977011494252872, + "loss": 0.18983140587806702, + "loss_ce": 0.00046984368236735463, + "loss_iou": 0.435546875, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 35294020, + "step": 391 + }, + { + "epoch": 1.8022988505747126, + "grad_norm": 11.14648078385914, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 35384488, + "step": 392 + }, + { + "epoch": 1.8022988505747126, + "loss": 0.1650119572877884, + "loss_ce": 0.0010715241078287363, + "loss_iou": 0.439453125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 35384488, + "step": 392 + }, + { + "epoch": 1.806896551724138, + "grad_norm": 18.559559905574613, + "learning_rate": 5e-06, + "loss": 0.1779, + "num_input_tokens_seen": 35474708, + "step": 393 + }, + { + "epoch": 1.806896551724138, + "loss": 0.19601336121559143, + "loss_ce": 0.000517760228831321, + "loss_iou": 0.4609375, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 35474708, + "step": 393 + }, + { + "epoch": 1.8114942528735631, + "grad_norm": 5.146339340993819, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 35564984, + "step": 394 + }, + { + "epoch": 1.8114942528735631, + "loss": 0.1250748336315155, + "loss_ce": 9.00992818060331e-05, + "loss_iou": 0.416015625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 35564984, + "step": 394 + }, + { + "epoch": 1.8160919540229885, + "grad_norm": 13.805918822000008, + "learning_rate": 5e-06, + "loss": 0.2097, + "num_input_tokens_seen": 35655304, + "step": 395 + }, + { + "epoch": 1.8160919540229885, + "loss": 0.1994381844997406, + "loss_ce": 6.685496191494167e-05, + "loss_iou": 0.43359375, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 35655304, + "step": 395 + }, + { + "epoch": 1.8206896551724139, + "grad_norm": 17.32882624388584, + "learning_rate": 5e-06, + "loss": 0.2631, + "num_input_tokens_seen": 35744700, + "step": 396 + }, + { + "epoch": 1.8206896551724139, + "loss": 0.3162878155708313, + "loss_ce": 0.006351289339363575, + "loss_iou": 0.3671875, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 35744700, + "step": 396 + }, + { + "epoch": 1.825287356321839, + "grad_norm": 26.732334449078184, + "learning_rate": 5e-06, + "loss": 0.2161, + "num_input_tokens_seen": 35835144, + "step": 397 + }, + { + "epoch": 1.825287356321839, + "loss": 0.2433795928955078, + "loss_ce": 0.0004596640937961638, + "loss_iou": 0.408203125, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 35835144, + "step": 397 + }, + { + "epoch": 1.8298850574712644, + "grad_norm": 5.47457435472519, + "learning_rate": 5e-06, + "loss": 0.1831, + "num_input_tokens_seen": 35925388, + "step": 398 + }, + { + "epoch": 1.8298850574712644, + "loss": 0.1931520402431488, + "loss_ce": 0.00018938624998554587, + "loss_iou": 0.4296875, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 35925388, + "step": 398 + }, + { + "epoch": 1.8344827586206898, + "grad_norm": 6.840227842822447, + "learning_rate": 5e-06, + "loss": 0.1699, + "num_input_tokens_seen": 36015004, + "step": 399 + }, + { + "epoch": 1.8344827586206898, + "loss": 0.142633855342865, + "loss_ce": 2.5207998987752944e-05, + "loss_iou": 0.46484375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 36015004, + "step": 399 + }, + { + "epoch": 1.839080459770115, + "grad_norm": 8.746845766431045, + "learning_rate": 5e-06, + "loss": 0.1852, + "num_input_tokens_seen": 36105580, + "step": 400 + }, + { + "epoch": 1.839080459770115, + "loss": 0.15033434331417084, + "loss_ce": 0.000493036350235343, + "loss_iou": 0.443359375, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 36105580, + "step": 400 + }, + { + "epoch": 1.84367816091954, + "grad_norm": 12.05362200433303, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 36196032, + "step": 401 + }, + { + "epoch": 1.84367816091954, + "loss": 0.10613197833299637, + "loss_ce": 0.00014442511019296944, + "loss_iou": 0.4609375, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 36196032, + "step": 401 + }, + { + "epoch": 1.8482758620689657, + "grad_norm": 12.871135990489064, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 36286396, + "step": 402 + }, + { + "epoch": 1.8482758620689657, + "loss": 0.1379774510860443, + "loss_ce": 3.800613194471225e-05, + "loss_iou": 0.373046875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 36286396, + "step": 402 + }, + { + "epoch": 1.8528735632183908, + "grad_norm": 14.529023016496748, + "learning_rate": 5e-06, + "loss": 0.193, + "num_input_tokens_seen": 36376740, + "step": 403 + }, + { + "epoch": 1.8528735632183908, + "loss": 0.23211072385311127, + "loss_ce": 0.003534059040248394, + "loss_iou": 0.455078125, + "loss_num": 0.0458984375, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 36376740, + "step": 403 + }, + { + "epoch": 1.857471264367816, + "grad_norm": 21.924185342651707, + "learning_rate": 5e-06, + "loss": 0.1735, + "num_input_tokens_seen": 36466984, + "step": 404 + }, + { + "epoch": 1.857471264367816, + "loss": 0.15595406293869019, + "loss_ce": 0.00013130568549968302, + "loss_iou": 0.470703125, + "loss_num": 0.0311279296875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 36466984, + "step": 404 + }, + { + "epoch": 1.8620689655172413, + "grad_norm": 13.574410791488862, + "learning_rate": 5e-06, + "loss": 0.1739, + "num_input_tokens_seen": 36557356, + "step": 405 + }, + { + "epoch": 1.8620689655172413, + "loss": 0.17355313897132874, + "loss_ce": 3.0175322535797022e-05, + "loss_iou": 0.478515625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 36557356, + "step": 405 + }, + { + "epoch": 1.8666666666666667, + "grad_norm": 7.0386023660360335, + "learning_rate": 5e-06, + "loss": 0.2389, + "num_input_tokens_seen": 36647736, + "step": 406 + }, + { + "epoch": 1.8666666666666667, + "loss": 0.25896725058555603, + "loss_ce": 0.0009106049546971917, + "loss_iou": 0.404296875, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 36647736, + "step": 406 + }, + { + "epoch": 1.8712643678160918, + "grad_norm": 22.160625349053383, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 36738104, + "step": 407 + }, + { + "epoch": 1.8712643678160918, + "loss": 0.1394781917333603, + "loss_ce": 7.390179962385446e-05, + "loss_iou": 0.474609375, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 36738104, + "step": 407 + }, + { + "epoch": 1.8758620689655172, + "grad_norm": 17.35116305455773, + "learning_rate": 5e-06, + "loss": 0.2099, + "num_input_tokens_seen": 36828500, + "step": 408 + }, + { + "epoch": 1.8758620689655172, + "loss": 0.1852053552865982, + "loss_ce": 0.00032986659789457917, + "loss_iou": 0.380859375, + "loss_num": 0.037109375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 36828500, + "step": 408 + }, + { + "epoch": 1.8804597701149426, + "grad_norm": 28.24772029390849, + "learning_rate": 5e-06, + "loss": 0.2476, + "num_input_tokens_seen": 36918796, + "step": 409 + }, + { + "epoch": 1.8804597701149426, + "loss": 0.2876579761505127, + "loss_ce": 0.00016715031233616173, + "loss_iou": 0.388671875, + "loss_num": 0.0576171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 36918796, + "step": 409 + }, + { + "epoch": 1.8850574712643677, + "grad_norm": 11.025181667958632, + "learning_rate": 5e-06, + "loss": 0.1974, + "num_input_tokens_seen": 37008328, + "step": 410 + }, + { + "epoch": 1.8850574712643677, + "loss": 0.19846147298812866, + "loss_ce": 0.0004329003859311342, + "loss_iou": 0.40625, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 37008328, + "step": 410 + }, + { + "epoch": 1.889655172413793, + "grad_norm": 9.446092449238963, + "learning_rate": 5e-06, + "loss": 0.2362, + "num_input_tokens_seen": 37098728, + "step": 411 + }, + { + "epoch": 1.889655172413793, + "loss": 0.2067907452583313, + "loss_ce": 0.00021725523401983082, + "loss_iou": 0.453125, + "loss_num": 0.041259765625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 37098728, + "step": 411 + }, + { + "epoch": 1.8942528735632185, + "grad_norm": 5.183189456998249, + "learning_rate": 5e-06, + "loss": 0.159, + "num_input_tokens_seen": 37189052, + "step": 412 + }, + { + "epoch": 1.8942528735632185, + "loss": 0.13279348611831665, + "loss_ce": 4.201898264000192e-05, + "loss_iou": 0.4453125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 37189052, + "step": 412 + }, + { + "epoch": 1.8988505747126436, + "grad_norm": 9.650000103120663, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 37279516, + "step": 413 + }, + { + "epoch": 1.8988505747126436, + "loss": 0.16115236282348633, + "loss_ce": 8.058699313551188e-05, + "loss_iou": 0.421875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 37279516, + "step": 413 + }, + { + "epoch": 1.903448275862069, + "grad_norm": 4.0534506430610495, + "learning_rate": 5e-06, + "loss": 0.2039, + "num_input_tokens_seen": 37369960, + "step": 414 + }, + { + "epoch": 1.903448275862069, + "loss": 0.19549965858459473, + "loss_ce": 0.0015299279475584626, + "loss_iou": 0.50390625, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 37369960, + "step": 414 + }, + { + "epoch": 1.9080459770114944, + "grad_norm": 10.593561048071034, + "learning_rate": 5e-06, + "loss": 0.1738, + "num_input_tokens_seen": 37460424, + "step": 415 + }, + { + "epoch": 1.9080459770114944, + "loss": 0.16938260197639465, + "loss_ce": 0.00025418028235435486, + "loss_iou": 0.419921875, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 37460424, + "step": 415 + }, + { + "epoch": 1.9126436781609195, + "grad_norm": 2.9328584393134003, + "learning_rate": 5e-06, + "loss": 0.1799, + "num_input_tokens_seen": 37550816, + "step": 416 + }, + { + "epoch": 1.9126436781609195, + "loss": 0.17176763713359833, + "loss_ce": 4.522378003457561e-05, + "loss_iou": 0.41015625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 37550816, + "step": 416 + }, + { + "epoch": 1.9172413793103447, + "grad_norm": 6.966385105625737, + "learning_rate": 5e-06, + "loss": 0.1852, + "num_input_tokens_seen": 37640308, + "step": 417 + }, + { + "epoch": 1.9172413793103447, + "loss": 0.18929776549339294, + "loss_ce": 8.878001244738698e-05, + "loss_iou": 0.3359375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 37640308, + "step": 417 + }, + { + "epoch": 1.9218390804597703, + "grad_norm": 28.585591957284574, + "learning_rate": 5e-06, + "loss": 0.1666, + "num_input_tokens_seen": 37730788, + "step": 418 + }, + { + "epoch": 1.9218390804597703, + "loss": 0.20673710107803345, + "loss_ce": 0.005260062403976917, + "loss_iou": 0.373046875, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 37730788, + "step": 418 + }, + { + "epoch": 1.9264367816091954, + "grad_norm": 14.331276777730162, + "learning_rate": 5e-06, + "loss": 0.1901, + "num_input_tokens_seen": 37821240, + "step": 419 + }, + { + "epoch": 1.9264367816091954, + "loss": 0.17201045155525208, + "loss_ce": 0.0006237286143004894, + "loss_iou": 0.474609375, + "loss_num": 0.0341796875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 37821240, + "step": 419 + }, + { + "epoch": 1.9310344827586206, + "grad_norm": 51.49279529680463, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 37911548, + "step": 420 + }, + { + "epoch": 1.9310344827586206, + "loss": 0.10174712538719177, + "loss_ce": 0.0005813572788611054, + "loss_iou": 0.5, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 37911548, + "step": 420 + }, + { + "epoch": 1.935632183908046, + "grad_norm": 20.70402147732612, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 38001920, + "step": 421 + }, + { + "epoch": 1.935632183908046, + "loss": 0.1458110809326172, + "loss_ce": 0.00012014327512588352, + "loss_iou": 0.396484375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 38001920, + "step": 421 + }, + { + "epoch": 1.9402298850574713, + "grad_norm": 15.948122093541322, + "learning_rate": 5e-06, + "loss": 0.2476, + "num_input_tokens_seen": 38092224, + "step": 422 + }, + { + "epoch": 1.9402298850574713, + "loss": 0.23001374304294586, + "loss_ce": 0.00039948339690454304, + "loss_iou": 0.4140625, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 38092224, + "step": 422 + }, + { + "epoch": 1.9448275862068964, + "grad_norm": 4.692813126881015, + "learning_rate": 5e-06, + "loss": 0.19, + "num_input_tokens_seen": 38181780, + "step": 423 + }, + { + "epoch": 1.9448275862068964, + "loss": 0.2313835322856903, + "loss_ce": 0.00016710199997760355, + "loss_iou": 0.41015625, + "loss_num": 0.046142578125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 38181780, + "step": 423 + }, + { + "epoch": 1.9494252873563218, + "grad_norm": 10.416976166382188, + "learning_rate": 5e-06, + "loss": 0.1808, + "num_input_tokens_seen": 38272044, + "step": 424 + }, + { + "epoch": 1.9494252873563218, + "loss": 0.20144467055797577, + "loss_ce": 0.0002117574622388929, + "loss_iou": 0.39453125, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 38272044, + "step": 424 + }, + { + "epoch": 1.9540229885057472, + "grad_norm": 19.032893916715878, + "learning_rate": 5e-06, + "loss": 0.1744, + "num_input_tokens_seen": 38362384, + "step": 425 + }, + { + "epoch": 1.9540229885057472, + "loss": 0.14751462638378143, + "loss_ce": 0.00011472392361611128, + "loss_iou": 0.302734375, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 38362384, + "step": 425 + }, + { + "epoch": 1.9586206896551723, + "grad_norm": 4.116116262537347, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 38452876, + "step": 426 + }, + { + "epoch": 1.9586206896551723, + "loss": 0.20857056975364685, + "loss_ce": 0.00031861409661360085, + "loss_iou": 0.375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 38452876, + "step": 426 + }, + { + "epoch": 1.9632183908045977, + "grad_norm": 12.769621257575608, + "learning_rate": 5e-06, + "loss": 0.2186, + "num_input_tokens_seen": 38543180, + "step": 427 + }, + { + "epoch": 1.9632183908045977, + "loss": 0.21470102667808533, + "loss_ce": 0.0008338369661942124, + "loss_iou": 0.42578125, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 38543180, + "step": 427 + }, + { + "epoch": 1.967816091954023, + "grad_norm": 4.963161851563267, + "learning_rate": 5e-06, + "loss": 0.1961, + "num_input_tokens_seen": 38633460, + "step": 428 + }, + { + "epoch": 1.967816091954023, + "loss": 0.26360833644866943, + "loss_ce": 0.00018057512352243066, + "loss_iou": 0.4140625, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 38633460, + "step": 428 + }, + { + "epoch": 1.9724137931034482, + "grad_norm": 9.041750000087218, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 38723828, + "step": 429 + }, + { + "epoch": 1.9724137931034482, + "loss": 0.12721416354179382, + "loss_ce": 7.794749399181455e-05, + "loss_iou": 0.39453125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 38723828, + "step": 429 + }, + { + "epoch": 1.9770114942528736, + "grad_norm": 4.689071794501053, + "learning_rate": 5e-06, + "loss": 0.1893, + "num_input_tokens_seen": 38814248, + "step": 430 + }, + { + "epoch": 1.9770114942528736, + "loss": 0.1737414002418518, + "loss_ce": 0.0002489687467459589, + "loss_iou": 0.458984375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 38814248, + "step": 430 + }, + { + "epoch": 1.981609195402299, + "grad_norm": 27.154301658955358, + "learning_rate": 5e-06, + "loss": 0.1834, + "num_input_tokens_seen": 38904564, + "step": 431 + }, + { + "epoch": 1.981609195402299, + "loss": 0.1887126863002777, + "loss_ce": 5.3021052735857666e-05, + "loss_iou": 0.375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 38904564, + "step": 431 + }, + { + "epoch": 1.986206896551724, + "grad_norm": 15.753748795396127, + "learning_rate": 5e-06, + "loss": 0.2578, + "num_input_tokens_seen": 38993440, + "step": 432 + }, + { + "epoch": 1.986206896551724, + "loss": 0.17184701561927795, + "loss_ce": 0.00027720603975467384, + "loss_iou": 0.357421875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 38993440, + "step": 432 + }, + { + "epoch": 1.9908045977011493, + "grad_norm": 16.878556629349514, + "learning_rate": 5e-06, + "loss": 0.1694, + "num_input_tokens_seen": 39083816, + "step": 433 + }, + { + "epoch": 1.9908045977011493, + "loss": 0.18325263261795044, + "loss_ce": 2.5101442588493228e-05, + "loss_iou": 0.40234375, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 39083816, + "step": 433 + }, + { + "epoch": 1.9954022988505749, + "grad_norm": 12.650814063184473, + "learning_rate": 5e-06, + "loss": 0.1683, + "num_input_tokens_seen": 39174208, + "step": 434 + }, + { + "epoch": 1.9954022988505749, + "loss": 0.2204468846321106, + "loss_ce": 0.000476180954137817, + "loss_iou": 0.46875, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 39174208, + "step": 434 + }, + { + "epoch": 2.0, + "grad_norm": 10.19888475335077, + "learning_rate": 5e-06, + "loss": 0.2245, + "num_input_tokens_seen": 39264536, + "step": 435 + }, + { + "epoch": 2.0, + "loss": 0.16308581829071045, + "loss_ce": 0.00015246294788084924, + "loss_iou": 0.333984375, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 39264536, + "step": 435 + }, + { + "epoch": 2.004597701149425, + "grad_norm": 5.446086415432793, + "learning_rate": 5e-06, + "loss": 0.1492, + "num_input_tokens_seen": 39355008, + "step": 436 + }, + { + "epoch": 2.004597701149425, + "loss": 0.11079922318458557, + "loss_ce": 0.0006002452573738992, + "loss_iou": 0.388671875, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 39355008, + "step": 436 + }, + { + "epoch": 2.0091954022988507, + "grad_norm": 11.525128525925364, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 39445364, + "step": 437 + }, + { + "epoch": 2.0091954022988507, + "loss": 0.1240125373005867, + "loss_ce": 0.0001722091546980664, + "loss_iou": 0.3515625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 39445364, + "step": 437 + }, + { + "epoch": 2.013793103448276, + "grad_norm": 5.953661546303923, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 39535692, + "step": 438 + }, + { + "epoch": 2.013793103448276, + "loss": 0.14717864990234375, + "loss_ce": 5.339417839422822e-05, + "loss_iou": 0.435546875, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 39535692, + "step": 438 + }, + { + "epoch": 2.018390804597701, + "grad_norm": 9.622272916635373, + "learning_rate": 5e-06, + "loss": 0.2175, + "num_input_tokens_seen": 39625276, + "step": 439 + }, + { + "epoch": 2.018390804597701, + "loss": 0.19843432307243347, + "loss_ce": 0.00040575824095867574, + "loss_iou": 0.3671875, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 39625276, + "step": 439 + }, + { + "epoch": 2.0229885057471266, + "grad_norm": 11.722688812201046, + "learning_rate": 5e-06, + "loss": 0.1435, + "num_input_tokens_seen": 39715664, + "step": 440 + }, + { + "epoch": 2.0229885057471266, + "loss": 0.13606533408164978, + "loss_ce": 1.7956510419026017e-05, + "loss_iou": 0.45703125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 39715664, + "step": 440 + }, + { + "epoch": 2.027586206896552, + "grad_norm": 9.734047142024144, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 39806120, + "step": 441 + }, + { + "epoch": 2.027586206896552, + "loss": 0.15544164180755615, + "loss_ce": 0.00019872028497047722, + "loss_iou": 0.330078125, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 39806120, + "step": 441 + }, + { + "epoch": 2.032183908045977, + "grad_norm": 4.317712136329391, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 39896512, + "step": 442 + }, + { + "epoch": 2.032183908045977, + "loss": 0.19860993325710297, + "loss_ce": 0.004579176660627127, + "loss_iou": 0.390625, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 39896512, + "step": 442 + }, + { + "epoch": 2.036781609195402, + "grad_norm": 20.253389515904008, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 39987032, + "step": 443 + }, + { + "epoch": 2.036781609195402, + "loss": 0.19383695721626282, + "loss_ce": 0.0006912024109624326, + "loss_iou": 0.3984375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 39987032, + "step": 443 + }, + { + "epoch": 2.0413793103448277, + "grad_norm": 4.97212014387009, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 40077476, + "step": 444 + }, + { + "epoch": 2.0413793103448277, + "loss": 0.12523597478866577, + "loss_ce": 0.00011389970313757658, + "loss_iou": 0.427734375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 40077476, + "step": 444 + }, + { + "epoch": 2.045977011494253, + "grad_norm": 20.311552823682124, + "learning_rate": 5e-06, + "loss": 0.1806, + "num_input_tokens_seen": 40168036, + "step": 445 + }, + { + "epoch": 2.045977011494253, + "loss": 0.18241354823112488, + "loss_ce": 0.00022360245930030942, + "loss_iou": 0.3828125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 40168036, + "step": 445 + }, + { + "epoch": 2.050574712643678, + "grad_norm": 20.574323382128274, + "learning_rate": 5e-06, + "loss": 0.2583, + "num_input_tokens_seen": 40258332, + "step": 446 + }, + { + "epoch": 2.050574712643678, + "loss": 0.2653703987598419, + "loss_ce": 0.002186804311349988, + "loss_iou": 0.474609375, + "loss_num": 0.052490234375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 40258332, + "step": 446 + }, + { + "epoch": 2.0551724137931036, + "grad_norm": 7.1556843698299, + "learning_rate": 5e-06, + "loss": 0.2328, + "num_input_tokens_seen": 40348728, + "step": 447 + }, + { + "epoch": 2.0551724137931036, + "loss": 0.2722781300544739, + "loss_ce": 0.00015285555855371058, + "loss_iou": 0.384765625, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 40348728, + "step": 447 + }, + { + "epoch": 2.0597701149425287, + "grad_norm": 2.328436671702478, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 40439056, + "step": 448 + }, + { + "epoch": 2.0597701149425287, + "loss": 0.1083475798368454, + "loss_ce": 2.544172093621455e-05, + "loss_iou": 0.4375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 40439056, + "step": 448 + }, + { + "epoch": 2.064367816091954, + "grad_norm": 7.50649292907234, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 40529452, + "step": 449 + }, + { + "epoch": 2.064367816091954, + "loss": 0.08954399824142456, + "loss_ce": 6.646315159741789e-05, + "loss_iou": 0.326171875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 40529452, + "step": 449 + }, + { + "epoch": 2.0689655172413794, + "grad_norm": 10.363487196000653, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 40619852, + "step": 450 + }, + { + "epoch": 2.0689655172413794, + "loss": 0.19379128515720367, + "loss_ce": 3.5169647162547335e-05, + "loss_iou": 0.34765625, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 40619852, + "step": 450 + }, + { + "epoch": 2.0735632183908046, + "grad_norm": 8.83788070087072, + "learning_rate": 5e-06, + "loss": 0.1502, + "num_input_tokens_seen": 40710184, + "step": 451 + }, + { + "epoch": 2.0735632183908046, + "loss": 0.1824960559606552, + "loss_ce": 3.146529707009904e-05, + "loss_iou": 0.453125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 40710184, + "step": 451 + }, + { + "epoch": 2.0781609195402297, + "grad_norm": 22.605606537183156, + "learning_rate": 5e-06, + "loss": 0.2029, + "num_input_tokens_seen": 40799700, + "step": 452 + }, + { + "epoch": 2.0781609195402297, + "loss": 0.24570101499557495, + "loss_ce": 0.00021761911921203136, + "loss_iou": 0.458984375, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 40799700, + "step": 452 + }, + { + "epoch": 2.0827586206896553, + "grad_norm": 8.39747671515735, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 40890160, + "step": 453 + }, + { + "epoch": 2.0827586206896553, + "loss": 0.11327225714921951, + "loss_ce": 0.00023515010252594948, + "loss_iou": 0.34765625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 40890160, + "step": 453 + }, + { + "epoch": 2.0873563218390805, + "grad_norm": 12.581825524675565, + "learning_rate": 5e-06, + "loss": 0.161, + "num_input_tokens_seen": 40980460, + "step": 454 + }, + { + "epoch": 2.0873563218390805, + "loss": 0.19595982134342194, + "loss_ce": 0.0002200792368967086, + "loss_iou": 0.4609375, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 40980460, + "step": 454 + }, + { + "epoch": 2.0919540229885056, + "grad_norm": 4.810948555506714, + "learning_rate": 5e-06, + "loss": 0.1872, + "num_input_tokens_seen": 41070800, + "step": 455 + }, + { + "epoch": 2.0919540229885056, + "loss": 0.195834219455719, + "loss_ce": 0.0006132656708359718, + "loss_iou": 0.443359375, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 41070800, + "step": 455 + }, + { + "epoch": 2.0965517241379312, + "grad_norm": 16.611126353595154, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 41161136, + "step": 456 + }, + { + "epoch": 2.0965517241379312, + "loss": 0.18046724796295166, + "loss_ce": 4.7336572606582195e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 41161136, + "step": 456 + }, + { + "epoch": 2.1011494252873564, + "grad_norm": 4.0448675422540346, + "learning_rate": 5e-06, + "loss": 0.1711, + "num_input_tokens_seen": 41251520, + "step": 457 + }, + { + "epoch": 2.1011494252873564, + "loss": 0.17239469289779663, + "loss_ce": 0.0003060669405385852, + "loss_iou": 0.44921875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 41251520, + "step": 457 + }, + { + "epoch": 2.1057471264367815, + "grad_norm": 6.408813044691335, + "learning_rate": 5e-06, + "loss": 0.1917, + "num_input_tokens_seen": 41341836, + "step": 458 + }, + { + "epoch": 2.1057471264367815, + "loss": 0.22308608889579773, + "loss_ce": 0.034731604158878326, + "loss_iou": 0.3515625, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 41341836, + "step": 458 + }, + { + "epoch": 2.110344827586207, + "grad_norm": 13.658825889797921, + "learning_rate": 5e-06, + "loss": 0.2127, + "num_input_tokens_seen": 41432320, + "step": 459 + }, + { + "epoch": 2.110344827586207, + "loss": 0.27129611372947693, + "loss_ce": 5.588199564954266e-05, + "loss_iou": 0.40625, + "loss_num": 0.05419921875, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 41432320, + "step": 459 + }, + { + "epoch": 2.1149425287356323, + "grad_norm": 48.461654169692885, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 41522668, + "step": 460 + }, + { + "epoch": 2.1149425287356323, + "loss": 0.15583747625350952, + "loss_ce": 0.0002588740608189255, + "loss_iou": 0.42578125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 41522668, + "step": 460 + }, + { + "epoch": 2.1195402298850574, + "grad_norm": 20.517893200082028, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 41613056, + "step": 461 + }, + { + "epoch": 2.1195402298850574, + "loss": 0.16513849794864655, + "loss_ce": 0.00016046430391725153, + "loss_iou": 0.451171875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 41613056, + "step": 461 + }, + { + "epoch": 2.1241379310344826, + "grad_norm": 5.0252606370642505, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 41703492, + "step": 462 + }, + { + "epoch": 2.1241379310344826, + "loss": 0.16197888553142548, + "loss_ce": 0.00014416445628739893, + "loss_iou": 0.455078125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 41703492, + "step": 462 + }, + { + "epoch": 2.128735632183908, + "grad_norm": 35.99843113408324, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 41792988, + "step": 463 + }, + { + "epoch": 2.128735632183908, + "loss": 0.11322343349456787, + "loss_ce": 3.226540684408974e-06, + "loss_iou": 0.423828125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 41792988, + "step": 463 + }, + { + "epoch": 2.1333333333333333, + "grad_norm": 19.56124215547498, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 41883320, + "step": 464 + }, + { + "epoch": 2.1333333333333333, + "loss": 0.10433776676654816, + "loss_ce": 1.3423067684925627e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 41883320, + "step": 464 + }, + { + "epoch": 2.1379310344827585, + "grad_norm": 5.283946883842529, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 41972864, + "step": 465 + }, + { + "epoch": 2.1379310344827585, + "loss": 0.18815144896507263, + "loss_ce": 0.00022419335437007248, + "loss_iou": 0.5, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 41972864, + "step": 465 + }, + { + "epoch": 2.142528735632184, + "grad_norm": 12.855612844319221, + "learning_rate": 5e-06, + "loss": 0.1953, + "num_input_tokens_seen": 42063268, + "step": 466 + }, + { + "epoch": 2.142528735632184, + "loss": 0.1868751049041748, + "loss_ce": 4.649303446058184e-05, + "loss_iou": 0.375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 42063268, + "step": 466 + }, + { + "epoch": 2.147126436781609, + "grad_norm": 9.976883452360386, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 42152812, + "step": 467 + }, + { + "epoch": 2.147126436781609, + "loss": 0.18522751331329346, + "loss_ce": 0.0004130652523599565, + "loss_iou": 0.341796875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 42152812, + "step": 467 + }, + { + "epoch": 2.1517241379310343, + "grad_norm": 16.988301795489004, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 42243232, + "step": 468 + }, + { + "epoch": 2.1517241379310343, + "loss": 0.16275331377983093, + "loss_ce": 0.0045196665450930595, + "loss_iou": 0.447265625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 42243232, + "step": 468 + }, + { + "epoch": 2.15632183908046, + "grad_norm": 18.341882617271445, + "learning_rate": 5e-06, + "loss": 0.1953, + "num_input_tokens_seen": 42333576, + "step": 469 + }, + { + "epoch": 2.15632183908046, + "loss": 0.2384186089038849, + "loss_ce": 0.0006256342749111354, + "loss_iou": 0.36328125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 42333576, + "step": 469 + }, + { + "epoch": 2.160919540229885, + "grad_norm": 23.406338359813535, + "learning_rate": 5e-06, + "loss": 0.1333, + "num_input_tokens_seen": 42423888, + "step": 470 + }, + { + "epoch": 2.160919540229885, + "loss": 0.09170129895210266, + "loss_ce": 1.1235326383030042e-05, + "loss_iou": 0.482421875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 42423888, + "step": 470 + }, + { + "epoch": 2.1655172413793102, + "grad_norm": 5.704299050854984, + "learning_rate": 5e-06, + "loss": 0.1674, + "num_input_tokens_seen": 42512692, + "step": 471 + }, + { + "epoch": 2.1655172413793102, + "loss": 0.1880715936422348, + "loss_ce": 5.279548349790275e-05, + "loss_iou": 0.287109375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 42512692, + "step": 471 + }, + { + "epoch": 2.170114942528736, + "grad_norm": 7.3341829258742095, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 42602996, + "step": 472 + }, + { + "epoch": 2.170114942528736, + "loss": 0.13333070278167725, + "loss_ce": 0.00010622564150253311, + "loss_iou": 0.4375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 42602996, + "step": 472 + }, + { + "epoch": 2.174712643678161, + "grad_norm": 7.267255134108421, + "learning_rate": 5e-06, + "loss": 0.2403, + "num_input_tokens_seen": 42693276, + "step": 473 + }, + { + "epoch": 2.174712643678161, + "loss": 0.16120409965515137, + "loss_ce": 4.077212361153215e-05, + "loss_iou": 0.404296875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 42693276, + "step": 473 + }, + { + "epoch": 2.179310344827586, + "grad_norm": 128.39041769732546, + "learning_rate": 5e-06, + "loss": 0.2092, + "num_input_tokens_seen": 42782804, + "step": 474 + }, + { + "epoch": 2.179310344827586, + "loss": 0.18164223432540894, + "loss_ce": 3.213401942048222e-05, + "loss_iou": 0.33984375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 42782804, + "step": 474 + }, + { + "epoch": 2.1839080459770113, + "grad_norm": 21.574370414672206, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 42873080, + "step": 475 + }, + { + "epoch": 2.1839080459770113, + "loss": 0.14893415570259094, + "loss_ce": 8.381912266486324e-06, + "loss_iou": 0.416015625, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 42873080, + "step": 475 + }, + { + "epoch": 2.188505747126437, + "grad_norm": 4.41738665455178, + "learning_rate": 5e-06, + "loss": 0.2296, + "num_input_tokens_seen": 42962612, + "step": 476 + }, + { + "epoch": 2.188505747126437, + "loss": 0.2648059129714966, + "loss_ce": 0.001317158224992454, + "loss_iou": 0.2734375, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 42962612, + "step": 476 + }, + { + "epoch": 2.193103448275862, + "grad_norm": 6.382558470057758, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 43052192, + "step": 477 + }, + { + "epoch": 2.193103448275862, + "loss": 0.18961870670318604, + "loss_ce": 4.350763629190624e-05, + "loss_iou": 0.490234375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 43052192, + "step": 477 + }, + { + "epoch": 2.197701149425287, + "grad_norm": 9.913316189631466, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 43142436, + "step": 478 + }, + { + "epoch": 2.197701149425287, + "loss": 0.20609432458877563, + "loss_ce": 0.0001006659513222985, + "loss_iou": 0.416015625, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 43142436, + "step": 478 + }, + { + "epoch": 2.2022988505747128, + "grad_norm": 3.649045492362426, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 43232740, + "step": 479 + }, + { + "epoch": 2.2022988505747128, + "loss": 0.15660002827644348, + "loss_ce": 0.00016692971985321492, + "loss_iou": 0.34375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 43232740, + "step": 479 + }, + { + "epoch": 2.206896551724138, + "grad_norm": 16.113831960073153, + "learning_rate": 5e-06, + "loss": 0.1596, + "num_input_tokens_seen": 43323116, + "step": 480 + }, + { + "epoch": 2.206896551724138, + "loss": 0.14228999614715576, + "loss_ce": 0.00010859394387807697, + "loss_iou": 0.396484375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 43323116, + "step": 480 + }, + { + "epoch": 2.211494252873563, + "grad_norm": 13.400014157011977, + "learning_rate": 5e-06, + "loss": 0.2149, + "num_input_tokens_seen": 43413536, + "step": 481 + }, + { + "epoch": 2.211494252873563, + "loss": 0.181840181350708, + "loss_ce": 0.00013852809206582606, + "loss_iou": 0.337890625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 43413536, + "step": 481 + }, + { + "epoch": 2.2160919540229886, + "grad_norm": 4.019097019730536, + "learning_rate": 5e-06, + "loss": 0.1358, + "num_input_tokens_seen": 43503912, + "step": 482 + }, + { + "epoch": 2.2160919540229886, + "loss": 0.11794019490480423, + "loss_ce": 2.0275372662581503e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 43503912, + "step": 482 + }, + { + "epoch": 2.220689655172414, + "grad_norm": 10.618191683361344, + "learning_rate": 5e-06, + "loss": 0.1288, + "num_input_tokens_seen": 43594288, + "step": 483 + }, + { + "epoch": 2.220689655172414, + "loss": 0.11793670058250427, + "loss_ce": 9.307506843470037e-05, + "loss_iou": 0.490234375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 43594288, + "step": 483 + }, + { + "epoch": 2.225287356321839, + "grad_norm": 3.8979874019779968, + "learning_rate": 5e-06, + "loss": 0.1525, + "num_input_tokens_seen": 43684704, + "step": 484 + }, + { + "epoch": 2.225287356321839, + "loss": 0.16006925702095032, + "loss_ce": 0.0007064603269100189, + "loss_iou": 0.404296875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 43684704, + "step": 484 + }, + { + "epoch": 2.2298850574712645, + "grad_norm": 6.611052035871228, + "learning_rate": 5e-06, + "loss": 0.19, + "num_input_tokens_seen": 43775036, + "step": 485 + }, + { + "epoch": 2.2298850574712645, + "loss": 0.1399611234664917, + "loss_ce": 6.85493359924294e-05, + "loss_iou": 0.369140625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 43775036, + "step": 485 + }, + { + "epoch": 2.2344827586206897, + "grad_norm": 4.314197235501585, + "learning_rate": 5e-06, + "loss": 0.1728, + "num_input_tokens_seen": 43865412, + "step": 486 + }, + { + "epoch": 2.2344827586206897, + "loss": 0.2288285195827484, + "loss_ce": 0.00016030190454330295, + "loss_iou": 0.3828125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 43865412, + "step": 486 + }, + { + "epoch": 2.239080459770115, + "grad_norm": 9.468032363551144, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 43955720, + "step": 487 + }, + { + "epoch": 2.239080459770115, + "loss": 0.152041494846344, + "loss_ce": 9.446687181480229e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 43955720, + "step": 487 + }, + { + "epoch": 2.2436781609195404, + "grad_norm": 13.431440995922976, + "learning_rate": 5e-06, + "loss": 0.145, + "num_input_tokens_seen": 44046112, + "step": 488 + }, + { + "epoch": 2.2436781609195404, + "loss": 0.1250520944595337, + "loss_ce": 0.00011312306742183864, + "loss_iou": 0.421875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 44046112, + "step": 488 + }, + { + "epoch": 2.2482758620689656, + "grad_norm": 3.5222867230041612, + "learning_rate": 5e-06, + "loss": 0.1261, + "num_input_tokens_seen": 44136408, + "step": 489 + }, + { + "epoch": 2.2482758620689656, + "loss": 0.10490469634532928, + "loss_ce": 4.629993418348022e-05, + "loss_iou": 0.443359375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 44136408, + "step": 489 + }, + { + "epoch": 2.2528735632183907, + "grad_norm": 11.11111499247831, + "learning_rate": 5e-06, + "loss": 0.1954, + "num_input_tokens_seen": 44226780, + "step": 490 + }, + { + "epoch": 2.2528735632183907, + "loss": 0.196798175573349, + "loss_ce": 5.1357543270569295e-05, + "loss_iou": 0.43359375, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 44226780, + "step": 490 + }, + { + "epoch": 2.2574712643678163, + "grad_norm": 5.957507308754348, + "learning_rate": 5e-06, + "loss": 0.1419, + "num_input_tokens_seen": 44317112, + "step": 491 + }, + { + "epoch": 2.2574712643678163, + "loss": 0.18355637788772583, + "loss_ce": 0.004021458327770233, + "loss_iou": 0.44921875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 44317112, + "step": 491 + }, + { + "epoch": 2.2620689655172415, + "grad_norm": 6.742726892392994, + "learning_rate": 5e-06, + "loss": 0.1891, + "num_input_tokens_seen": 44407476, + "step": 492 + }, + { + "epoch": 2.2620689655172415, + "loss": 0.1890466809272766, + "loss_ce": 0.0002039193786913529, + "loss_iou": 0.41015625, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 44407476, + "step": 492 + }, + { + "epoch": 2.2666666666666666, + "grad_norm": 11.363198722775557, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 44497844, + "step": 493 + }, + { + "epoch": 2.2666666666666666, + "loss": 0.15371349453926086, + "loss_ce": 0.00021006805764045566, + "loss_iou": 0.384765625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 44497844, + "step": 493 + }, + { + "epoch": 2.2712643678160918, + "grad_norm": 20.995205197621296, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 44588252, + "step": 494 + }, + { + "epoch": 2.2712643678160918, + "loss": 0.14532320201396942, + "loss_ce": 5.9530953876674175e-05, + "loss_iou": 0.42578125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 44588252, + "step": 494 + }, + { + "epoch": 2.2758620689655173, + "grad_norm": 13.929286977818037, + "learning_rate": 5e-06, + "loss": 0.2026, + "num_input_tokens_seen": 44678616, + "step": 495 + }, + { + "epoch": 2.2758620689655173, + "loss": 0.18880240619182587, + "loss_ce": 0.0003868812054861337, + "loss_iou": 0.30859375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 44678616, + "step": 495 + }, + { + "epoch": 2.2804597701149425, + "grad_norm": 5.471350156799274, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 44769036, + "step": 496 + }, + { + "epoch": 2.2804597701149425, + "loss": 0.09233726561069489, + "loss_ce": 9.788307215785608e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 44769036, + "step": 496 + }, + { + "epoch": 2.2850574712643676, + "grad_norm": 3.59565500880543, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 44859352, + "step": 497 + }, + { + "epoch": 2.2850574712643676, + "loss": 0.11752472817897797, + "loss_ce": 9.308746666647494e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 44859352, + "step": 497 + }, + { + "epoch": 2.2896551724137932, + "grad_norm": 9.014441044097715, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 44949672, + "step": 498 + }, + { + "epoch": 2.2896551724137932, + "loss": 0.17396917939186096, + "loss_ce": 0.00014104516594670713, + "loss_iou": 0.40234375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 44949672, + "step": 498 + }, + { + "epoch": 2.2942528735632184, + "grad_norm": 56.84361696439731, + "learning_rate": 5e-06, + "loss": 0.2118, + "num_input_tokens_seen": 45040108, + "step": 499 + }, + { + "epoch": 2.2942528735632184, + "loss": 0.16254353523254395, + "loss_ce": 0.0002510526101104915, + "loss_iou": 0.337890625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 45040108, + "step": 499 + }, + { + "epoch": 2.2988505747126435, + "grad_norm": 4.499065914254272, + "learning_rate": 5e-06, + "loss": 0.2092, + "num_input_tokens_seen": 45130652, + "step": 500 + }, + { + "epoch": 2.2988505747126435, + "eval_seeclick_CIoU": 0.4563731998205185, + "eval_seeclick_GIoU": 0.4370184391736984, + "eval_seeclick_IoU": 0.49749037623405457, + "eval_seeclick_MAE_all": 0.0592306274920702, + "eval_seeclick_MAE_h": 0.04618253372609615, + "eval_seeclick_MAE_w": 0.10425780341029167, + "eval_seeclick_MAE_x_boxes": 0.10805046185851097, + "eval_seeclick_MAE_y_boxes": 0.042274054139852524, + "eval_seeclick_NUM_probability": 0.9999992847442627, + "eval_seeclick_inside_bbox": 0.8607954680919647, + "eval_seeclick_loss": 0.3510092794895172, + "eval_seeclick_loss_ce": 0.05598363280296326, + "eval_seeclick_loss_iou": 0.519287109375, + "eval_seeclick_loss_num": 0.06208038330078125, + "eval_seeclick_loss_xval": 0.310455322265625, + "eval_seeclick_runtime": 73.6206, + "eval_seeclick_samples_per_second": 0.584, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 45130652, + "step": 500 + }, + { + "epoch": 2.2988505747126435, + "eval_icons_CIoU": 0.5248142629861832, + "eval_icons_GIoU": 0.5294373333454132, + "eval_icons_IoU": 0.5619192123413086, + "eval_icons_MAE_all": 0.04526199400424957, + "eval_icons_MAE_h": 0.08578694611787796, + "eval_icons_MAE_w": 0.06393218040466309, + "eval_icons_MAE_x_boxes": 0.05692756548523903, + "eval_icons_MAE_y_boxes": 0.08431091904640198, + "eval_icons_NUM_probability": 0.9999994337558746, + "eval_icons_inside_bbox": 0.7118055522441864, + "eval_icons_loss": 0.22472724318504333, + "eval_icons_loss_ce": 1.0411458504222537e-06, + "eval_icons_loss_iou": 0.43853759765625, + "eval_icons_loss_num": 0.049304962158203125, + "eval_icons_loss_xval": 0.246337890625, + "eval_icons_runtime": 97.2319, + "eval_icons_samples_per_second": 0.514, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 45130652, + "step": 500 + }, + { + "epoch": 2.2988505747126435, + "eval_screenspot_CIoU": 0.36121458808581036, + "eval_screenspot_GIoU": 0.3352409948905309, + "eval_screenspot_IoU": 0.4333700935045878, + "eval_screenspot_MAE_all": 0.09693960969646771, + "eval_screenspot_MAE_h": 0.09257866690556209, + "eval_screenspot_MAE_w": 0.19348373264074326, + "eval_screenspot_MAE_x_boxes": 0.18671841422716776, + "eval_screenspot_MAE_y_boxes": 0.08937582621971767, + "eval_screenspot_NUM_probability": 0.9999986489613851, + "eval_screenspot_inside_bbox": 0.7145833373069763, + "eval_screenspot_loss": 0.4828945994377136, + "eval_screenspot_loss_ce": 0.0005104693118482828, + "eval_screenspot_loss_iou": 0.3957926432291667, + "eval_screenspot_loss_num": 0.099456787109375, + "eval_screenspot_loss_xval": 0.4974365234375, + "eval_screenspot_runtime": 150.5805, + "eval_screenspot_samples_per_second": 0.591, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 45130652, + "step": 500 + }, + { + "epoch": 2.2988505747126435, + "eval_compot_CIoU": 0.4746464341878891, + "eval_compot_GIoU": 0.444294735789299, + "eval_compot_IoU": 0.5337270200252533, + "eval_compot_MAE_all": 0.05769064649939537, + "eval_compot_MAE_h": 0.08894045650959015, + "eval_compot_MAE_w": 0.10224151611328125, + "eval_compot_MAE_x_boxes": 0.08807999640703201, + "eval_compot_MAE_y_boxes": 0.08862848207354546, + "eval_compot_NUM_probability": 0.9999935030937195, + "eval_compot_inside_bbox": 0.7638888955116272, + "eval_compot_loss": 0.31889885663986206, + "eval_compot_loss_ce": 0.01438705949112773, + "eval_compot_loss_iou": 0.5205078125, + "eval_compot_loss_num": 0.05315399169921875, + "eval_compot_loss_xval": 0.265472412109375, + "eval_compot_runtime": 86.6902, + "eval_compot_samples_per_second": 0.577, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 45130652, + "step": 500 + }, + { + "epoch": 2.2988505747126435, + "loss": 0.19853872060775757, + "loss_ce": 0.008414196781814098, + "loss_iou": 0.5703125, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 45130652, + "step": 500 + }, + { + "epoch": 2.303448275862069, + "grad_norm": 25.629104321788386, + "learning_rate": 5e-06, + "loss": 0.1748, + "num_input_tokens_seen": 45221168, + "step": 501 + }, + { + "epoch": 2.303448275862069, + "loss": 0.22815854847431183, + "loss_ce": 0.00013120633957441896, + "loss_iou": 0.3671875, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 45221168, + "step": 501 + }, + { + "epoch": 2.3080459770114943, + "grad_norm": 5.708666769215059, + "learning_rate": 5e-06, + "loss": 0.1472, + "num_input_tokens_seen": 45311552, + "step": 502 + }, + { + "epoch": 2.3080459770114943, + "loss": 0.14808592200279236, + "loss_ce": 0.00019773890380747616, + "loss_iou": 0.37109375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 45311552, + "step": 502 + }, + { + "epoch": 2.3126436781609194, + "grad_norm": 5.468912782546553, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 45401820, + "step": 503 + }, + { + "epoch": 2.3126436781609194, + "loss": 0.17252948880195618, + "loss_ce": 0.00022724125301465392, + "loss_iou": 0.4375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 45401820, + "step": 503 + }, + { + "epoch": 2.317241379310345, + "grad_norm": 3.4368379842335006, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 45491352, + "step": 504 + }, + { + "epoch": 2.317241379310345, + "loss": 0.17304499447345734, + "loss_ce": 0.004771072883158922, + "loss_iou": 0.3046875, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 45491352, + "step": 504 + }, + { + "epoch": 2.32183908045977, + "grad_norm": 7.438752813468672, + "learning_rate": 5e-06, + "loss": 0.1578, + "num_input_tokens_seen": 45581828, + "step": 505 + }, + { + "epoch": 2.32183908045977, + "loss": 0.12591499090194702, + "loss_ce": 6.0488393501145765e-05, + "loss_iou": 0.40625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 45581828, + "step": 505 + }, + { + "epoch": 2.3264367816091953, + "grad_norm": 2.55941882010794, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 45672168, + "step": 506 + }, + { + "epoch": 2.3264367816091953, + "loss": 0.15659506618976593, + "loss_ce": 9.366869562654756e-06, + "loss_iou": 0.408203125, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 45672168, + "step": 506 + }, + { + "epoch": 2.3310344827586205, + "grad_norm": 3.8697008798484838, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 45761816, + "step": 507 + }, + { + "epoch": 2.3310344827586205, + "loss": 0.1356024444103241, + "loss_ce": 0.00010439224570291117, + "loss_iou": 0.42578125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 45761816, + "step": 507 + }, + { + "epoch": 2.335632183908046, + "grad_norm": 9.154165977355914, + "learning_rate": 5e-06, + "loss": 0.165, + "num_input_tokens_seen": 45851328, + "step": 508 + }, + { + "epoch": 2.335632183908046, + "loss": 0.17103195190429688, + "loss_ce": 1.1456304491730407e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 45851328, + "step": 508 + }, + { + "epoch": 2.340229885057471, + "grad_norm": 6.692072817800835, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 45941700, + "step": 509 + }, + { + "epoch": 2.340229885057471, + "loss": 0.149922713637352, + "loss_ce": 0.00017295028374064714, + "loss_iou": 0.38671875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 45941700, + "step": 509 + }, + { + "epoch": 2.344827586206897, + "grad_norm": 6.611698710045344, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 46032036, + "step": 510 + }, + { + "epoch": 2.344827586206897, + "loss": 0.22002628445625305, + "loss_ce": 2.505266820662655e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 46032036, + "step": 510 + }, + { + "epoch": 2.349425287356322, + "grad_norm": 11.300552233375646, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 46122476, + "step": 511 + }, + { + "epoch": 2.349425287356322, + "loss": 0.1223573088645935, + "loss_ce": 4.286051989765838e-05, + "loss_iou": 0.4296875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 46122476, + "step": 511 + }, + { + "epoch": 2.354022988505747, + "grad_norm": 4.419461671248112, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 46213036, + "step": 512 + }, + { + "epoch": 2.354022988505747, + "loss": 0.21517296135425568, + "loss_ce": 8.506829908583313e-05, + "loss_iou": 0.3984375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 46213036, + "step": 512 + }, + { + "epoch": 2.3586206896551722, + "grad_norm": 4.8376770471091755, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 46303360, + "step": 513 + }, + { + "epoch": 2.3586206896551722, + "loss": 0.13146010041236877, + "loss_ce": 5.1406939746811986e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 46303360, + "step": 513 + }, + { + "epoch": 2.363218390804598, + "grad_norm": 14.293011444773292, + "learning_rate": 5e-06, + "loss": 0.1544, + "num_input_tokens_seen": 46393732, + "step": 514 + }, + { + "epoch": 2.363218390804598, + "loss": 0.1853524148464203, + "loss_ce": 0.000568474642932415, + "loss_iou": 0.255859375, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 46393732, + "step": 514 + }, + { + "epoch": 2.367816091954023, + "grad_norm": 5.9219449761184135, + "learning_rate": 5e-06, + "loss": 0.1525, + "num_input_tokens_seen": 46484084, + "step": 515 + }, + { + "epoch": 2.367816091954023, + "loss": 0.13725048303604126, + "loss_ce": 0.005231434479355812, + "loss_iou": 0.4140625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 46484084, + "step": 515 + }, + { + "epoch": 2.372413793103448, + "grad_norm": 11.485628452257725, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 46574492, + "step": 516 + }, + { + "epoch": 2.372413793103448, + "loss": 0.10670308768749237, + "loss_ce": 4.415482544573024e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 46574492, + "step": 516 + }, + { + "epoch": 2.3770114942528737, + "grad_norm": 3.642033089699345, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 46664972, + "step": 517 + }, + { + "epoch": 2.3770114942528737, + "loss": 0.11098849028348923, + "loss_ce": 5.709293327527121e-05, + "loss_iou": 0.318359375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 46664972, + "step": 517 + }, + { + "epoch": 2.381609195402299, + "grad_norm": 5.135666742103272, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 46755488, + "step": 518 + }, + { + "epoch": 2.381609195402299, + "loss": 0.10365065187215805, + "loss_ce": 1.295450420002453e-05, + "loss_iou": 0.3671875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 46755488, + "step": 518 + }, + { + "epoch": 2.386206896551724, + "grad_norm": 14.48664391569232, + "learning_rate": 5e-06, + "loss": 0.1674, + "num_input_tokens_seen": 46845788, + "step": 519 + }, + { + "epoch": 2.386206896551724, + "loss": 0.1562279760837555, + "loss_ce": 3.902369280694984e-05, + "loss_iou": 0.41015625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 46845788, + "step": 519 + }, + { + "epoch": 2.3908045977011496, + "grad_norm": 5.121624515541806, + "learning_rate": 5e-06, + "loss": 0.1334, + "num_input_tokens_seen": 46936132, + "step": 520 + }, + { + "epoch": 2.3908045977011496, + "loss": 0.15609031915664673, + "loss_ce": 0.002129129832610488, + "loss_iou": 0.400390625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 46936132, + "step": 520 + }, + { + "epoch": 2.3954022988505748, + "grad_norm": 12.984930412372549, + "learning_rate": 5e-06, + "loss": 0.1588, + "num_input_tokens_seen": 47026436, + "step": 521 + }, + { + "epoch": 2.3954022988505748, + "loss": 0.13674747943878174, + "loss_ce": 5.9245572629151866e-05, + "loss_iou": 0.333984375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 47026436, + "step": 521 + }, + { + "epoch": 2.4, + "grad_norm": 4.283346201311201, + "learning_rate": 5e-06, + "loss": 0.2249, + "num_input_tokens_seen": 47116864, + "step": 522 + }, + { + "epoch": 2.4, + "loss": 0.1752316951751709, + "loss_ce": 0.00030492368387058377, + "loss_iou": 0.421875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 47116864, + "step": 522 + }, + { + "epoch": 2.4045977011494255, + "grad_norm": 5.233495198593664, + "learning_rate": 5e-06, + "loss": 0.1417, + "num_input_tokens_seen": 47207228, + "step": 523 + }, + { + "epoch": 2.4045977011494255, + "loss": 0.18661803007125854, + "loss_ce": 3.356490924488753e-05, + "loss_iou": 0.341796875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 47207228, + "step": 523 + }, + { + "epoch": 2.4091954022988507, + "grad_norm": 5.2220413793779175, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 47297444, + "step": 524 + }, + { + "epoch": 2.4091954022988507, + "loss": 0.1606762409210205, + "loss_ce": 6.222333468031138e-05, + "loss_iou": 0.5078125, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 47297444, + "step": 524 + }, + { + "epoch": 2.413793103448276, + "grad_norm": 18.74625653768912, + "learning_rate": 5e-06, + "loss": 0.132, + "num_input_tokens_seen": 47387760, + "step": 525 + }, + { + "epoch": 2.413793103448276, + "loss": 0.15789783000946045, + "loss_ce": 0.00012196188617963344, + "loss_iou": 0.361328125, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 47387760, + "step": 525 + }, + { + "epoch": 2.418390804597701, + "grad_norm": 5.191217680993693, + "learning_rate": 5e-06, + "loss": 0.1316, + "num_input_tokens_seen": 47478124, + "step": 526 + }, + { + "epoch": 2.418390804597701, + "loss": 0.13596881926059723, + "loss_ce": 4.352313044364564e-05, + "loss_iou": 0.396484375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 47478124, + "step": 526 + }, + { + "epoch": 2.4229885057471265, + "grad_norm": 4.706845583881529, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 47568600, + "step": 527 + }, + { + "epoch": 2.4229885057471265, + "loss": 0.15037593245506287, + "loss_ce": 4.634057768271305e-05, + "loss_iou": 0.369140625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 47568600, + "step": 527 + }, + { + "epoch": 2.4275862068965517, + "grad_norm": 7.095455232957096, + "learning_rate": 5e-06, + "loss": 0.1918, + "num_input_tokens_seen": 47658948, + "step": 528 + }, + { + "epoch": 2.4275862068965517, + "loss": 0.25855281949043274, + "loss_ce": 0.00025203393306583166, + "loss_iou": 0.34375, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 47658948, + "step": 528 + }, + { + "epoch": 2.432183908045977, + "grad_norm": 6.0875597343514265, + "learning_rate": 5e-06, + "loss": 0.1973, + "num_input_tokens_seen": 47749252, + "step": 529 + }, + { + "epoch": 2.432183908045977, + "loss": 0.21008528769016266, + "loss_ce": 0.001833330374211073, + "loss_iou": 0.330078125, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 47749252, + "step": 529 + }, + { + "epoch": 2.4367816091954024, + "grad_norm": 43.1300948642533, + "learning_rate": 5e-06, + "loss": 0.1807, + "num_input_tokens_seen": 47838180, + "step": 530 + }, + { + "epoch": 2.4367816091954024, + "loss": 0.1620747148990631, + "loss_ce": 1.1108362741651945e-05, + "loss_iou": 0.4453125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 47838180, + "step": 530 + }, + { + "epoch": 2.4413793103448276, + "grad_norm": 34.04511460331946, + "learning_rate": 5e-06, + "loss": 0.1328, + "num_input_tokens_seen": 47928576, + "step": 531 + }, + { + "epoch": 2.4413793103448276, + "loss": 0.14182758331298828, + "loss_ce": 4.29103929491248e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 47928576, + "step": 531 + }, + { + "epoch": 2.4459770114942527, + "grad_norm": 14.495961279987304, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 48019012, + "step": 532 + }, + { + "epoch": 2.4459770114942527, + "loss": 0.205206036567688, + "loss_ce": 0.00012791430344805121, + "loss_iou": 0.421875, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 48019012, + "step": 532 + }, + { + "epoch": 2.4505747126436783, + "grad_norm": 47.008419334105845, + "learning_rate": 5e-06, + "loss": 0.1181, + "num_input_tokens_seen": 48109540, + "step": 533 + }, + { + "epoch": 2.4505747126436783, + "loss": 0.09603258222341537, + "loss_ce": 8.53195961099118e-05, + "loss_iou": 0.416015625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 48109540, + "step": 533 + }, + { + "epoch": 2.4551724137931035, + "grad_norm": 22.246765917362193, + "learning_rate": 5e-06, + "loss": 0.1418, + "num_input_tokens_seen": 48200004, + "step": 534 + }, + { + "epoch": 2.4551724137931035, + "loss": 0.15302670001983643, + "loss_ce": 7.259696576511487e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 48200004, + "step": 534 + }, + { + "epoch": 2.4597701149425286, + "grad_norm": 10.021203723893532, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 48290512, + "step": 535 + }, + { + "epoch": 2.4597701149425286, + "loss": 0.11590239405632019, + "loss_ce": 0.00024077783746179193, + "loss_iou": 0.3984375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 48290512, + "step": 535 + }, + { + "epoch": 2.464367816091954, + "grad_norm": 11.067971491143098, + "learning_rate": 5e-06, + "loss": 0.1704, + "num_input_tokens_seen": 48380860, + "step": 536 + }, + { + "epoch": 2.464367816091954, + "loss": 0.19783857464790344, + "loss_ce": 0.00023726305516902357, + "loss_iou": 0.44921875, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 48380860, + "step": 536 + }, + { + "epoch": 2.4689655172413794, + "grad_norm": 5.653254921318319, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 48471104, + "step": 537 + }, + { + "epoch": 2.4689655172413794, + "loss": 0.1712353229522705, + "loss_ce": 6.221771764103323e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 48471104, + "step": 537 + }, + { + "epoch": 2.4735632183908045, + "grad_norm": 2.9377901589977085, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 48561516, + "step": 538 + }, + { + "epoch": 2.4735632183908045, + "loss": 0.12568363547325134, + "loss_ce": 1.2256616173544899e-05, + "loss_iou": 0.396484375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 48561516, + "step": 538 + }, + { + "epoch": 2.4781609195402297, + "grad_norm": 90.38237762494742, + "learning_rate": 5e-06, + "loss": 0.1775, + "num_input_tokens_seen": 48651772, + "step": 539 + }, + { + "epoch": 2.4781609195402297, + "loss": 0.20320890843868256, + "loss_ce": 8.390971197513863e-05, + "loss_iou": 0.46484375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 48651772, + "step": 539 + }, + { + "epoch": 2.4827586206896552, + "grad_norm": 32.249804456709384, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 48742128, + "step": 540 + }, + { + "epoch": 2.4827586206896552, + "loss": 0.13325834274291992, + "loss_ce": 7.96284293755889e-05, + "loss_iou": 0.451171875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 48742128, + "step": 540 + }, + { + "epoch": 2.4873563218390804, + "grad_norm": 4.3167432311394505, + "learning_rate": 5e-06, + "loss": 0.1451, + "num_input_tokens_seen": 48832520, + "step": 541 + }, + { + "epoch": 2.4873563218390804, + "loss": 0.1864563375711441, + "loss_ce": 5.4961776186246425e-05, + "loss_iou": 0.376953125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 48832520, + "step": 541 + }, + { + "epoch": 2.491954022988506, + "grad_norm": 9.206979571070748, + "learning_rate": 5e-06, + "loss": 0.1701, + "num_input_tokens_seen": 48922880, + "step": 542 + }, + { + "epoch": 2.491954022988506, + "loss": 0.14663755893707275, + "loss_ce": 0.00021421856945380569, + "loss_iou": 0.44140625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 48922880, + "step": 542 + }, + { + "epoch": 2.496551724137931, + "grad_norm": 9.250882650158083, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 49013144, + "step": 543 + }, + { + "epoch": 2.496551724137931, + "loss": 0.1472419798374176, + "loss_ce": 9.938361472450197e-06, + "loss_iou": 0.412109375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 49013144, + "step": 543 + }, + { + "epoch": 2.5011494252873563, + "grad_norm": 4.218071296940112, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 49103612, + "step": 544 + }, + { + "epoch": 2.5011494252873563, + "loss": 0.13012462854385376, + "loss_ce": 0.00018077107961289585, + "loss_iou": 0.45703125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 49103612, + "step": 544 + }, + { + "epoch": 2.5057471264367814, + "grad_norm": 5.7187751949461285, + "learning_rate": 5e-06, + "loss": 0.1346, + "num_input_tokens_seen": 49194032, + "step": 545 + }, + { + "epoch": 2.5057471264367814, + "loss": 0.1249690055847168, + "loss_ce": 0.00015211128629744053, + "loss_iou": 0.357421875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 49194032, + "step": 545 + }, + { + "epoch": 2.510344827586207, + "grad_norm": 4.115869504252711, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 49284388, + "step": 546 + }, + { + "epoch": 2.510344827586207, + "loss": 0.13758164644241333, + "loss_ce": 6.943976768525317e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 49284388, + "step": 546 + }, + { + "epoch": 2.514942528735632, + "grad_norm": 9.743639046477583, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 49374780, + "step": 547 + }, + { + "epoch": 2.514942528735632, + "loss": 0.18289527297019958, + "loss_ce": 3.3943615562748164e-05, + "loss_iou": 0.359375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 49374780, + "step": 547 + }, + { + "epoch": 2.5195402298850573, + "grad_norm": 10.069950958132628, + "learning_rate": 5e-06, + "loss": 0.1805, + "num_input_tokens_seen": 49465116, + "step": 548 + }, + { + "epoch": 2.5195402298850573, + "loss": 0.1874997317790985, + "loss_ce": 3.024038960575126e-05, + "loss_iou": 0.392578125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 49465116, + "step": 548 + }, + { + "epoch": 2.524137931034483, + "grad_norm": 19.870338599096762, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 49555324, + "step": 549 + }, + { + "epoch": 2.524137931034483, + "loss": 0.1507517397403717, + "loss_ce": 5.592878733295947e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 49555324, + "step": 549 + }, + { + "epoch": 2.528735632183908, + "grad_norm": 10.407984592997497, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 49645776, + "step": 550 + }, + { + "epoch": 2.528735632183908, + "loss": 0.18005026876926422, + "loss_ce": 2.7080646759714e-05, + "loss_iou": 0.392578125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 49645776, + "step": 550 + }, + { + "epoch": 2.533333333333333, + "grad_norm": 7.734767791315441, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 49736148, + "step": 551 + }, + { + "epoch": 2.533333333333333, + "loss": 0.16183626651763916, + "loss_ce": 0.00012362068810034543, + "loss_iou": 0.392578125, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 49736148, + "step": 551 + }, + { + "epoch": 2.5379310344827584, + "grad_norm": 6.4853103024455505, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 49826532, + "step": 552 + }, + { + "epoch": 2.5379310344827584, + "loss": 0.10679163783788681, + "loss_ce": 7.166996510932222e-05, + "loss_iou": 0.43359375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 49826532, + "step": 552 + }, + { + "epoch": 2.542528735632184, + "grad_norm": 37.47119278150993, + "learning_rate": 5e-06, + "loss": 0.1699, + "num_input_tokens_seen": 49917020, + "step": 553 + }, + { + "epoch": 2.542528735632184, + "loss": 0.1629483699798584, + "loss_ce": 0.0001065782635123469, + "loss_iou": 0.4375, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 49917020, + "step": 553 + }, + { + "epoch": 2.547126436781609, + "grad_norm": 6.379696690058362, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 50007432, + "step": 554 + }, + { + "epoch": 2.547126436781609, + "loss": 0.15533311665058136, + "loss_ce": 4.4418171455617994e-05, + "loss_iou": 0.48828125, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 50007432, + "step": 554 + }, + { + "epoch": 2.5517241379310347, + "grad_norm": 4.14549282032518, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 50097820, + "step": 555 + }, + { + "epoch": 2.5517241379310347, + "loss": 0.1373203992843628, + "loss_ce": 5.233901902101934e-05, + "loss_iou": 0.443359375, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 50097820, + "step": 555 + }, + { + "epoch": 2.55632183908046, + "grad_norm": 3.4318598421901934, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 50188248, + "step": 556 + }, + { + "epoch": 2.55632183908046, + "loss": 0.13454991579055786, + "loss_ce": 8.948062895797193e-05, + "loss_iou": 0.443359375, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 50188248, + "step": 556 + }, + { + "epoch": 2.560919540229885, + "grad_norm": 3.7413568064084655, + "learning_rate": 5e-06, + "loss": 0.1643, + "num_input_tokens_seen": 50278580, + "step": 557 + }, + { + "epoch": 2.560919540229885, + "loss": 0.10955231636762619, + "loss_ce": 8.576564869144931e-05, + "loss_iou": 0.40625, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 50278580, + "step": 557 + }, + { + "epoch": 2.56551724137931, + "grad_norm": 16.960520342941066, + "learning_rate": 5e-06, + "loss": 0.1843, + "num_input_tokens_seen": 50368852, + "step": 558 + }, + { + "epoch": 2.56551724137931, + "loss": 0.19905152916908264, + "loss_ce": 1.5889523638179526e-05, + "loss_iou": 0.349609375, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 50368852, + "step": 558 + }, + { + "epoch": 2.5701149425287357, + "grad_norm": 2.4502760696851618, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 50459208, + "step": 559 + }, + { + "epoch": 2.5701149425287357, + "loss": 0.15352153778076172, + "loss_ce": 0.00010967279376927763, + "loss_iou": 0.451171875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 50459208, + "step": 559 + }, + { + "epoch": 2.574712643678161, + "grad_norm": 3.003683699720249, + "learning_rate": 5e-06, + "loss": 0.1435, + "num_input_tokens_seen": 50549548, + "step": 560 + }, + { + "epoch": 2.574712643678161, + "loss": 0.14211556315422058, + "loss_ce": 0.00011727242235792801, + "loss_iou": 0.37890625, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 50549548, + "step": 560 + }, + { + "epoch": 2.5793103448275865, + "grad_norm": 8.360372471761355, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 50639992, + "step": 561 + }, + { + "epoch": 2.5793103448275865, + "loss": 0.12777529656887054, + "loss_ce": 4.39732575614471e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 50639992, + "step": 561 + }, + { + "epoch": 2.5839080459770116, + "grad_norm": 17.063605502973996, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 50730300, + "step": 562 + }, + { + "epoch": 2.5839080459770116, + "loss": 0.1004580706357956, + "loss_ce": 2.4719898647163063e-05, + "loss_iou": 0.349609375, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 50730300, + "step": 562 + }, + { + "epoch": 2.5885057471264368, + "grad_norm": 5.4210250306263585, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 50819996, + "step": 563 + }, + { + "epoch": 2.5885057471264368, + "loss": 0.18996095657348633, + "loss_ce": 1.954480103449896e-05, + "loss_iou": 0.44921875, + "loss_num": 0.0380859375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 50819996, + "step": 563 + }, + { + "epoch": 2.593103448275862, + "grad_norm": 7.275873200745777, + "learning_rate": 5e-06, + "loss": 0.1733, + "num_input_tokens_seen": 50910296, + "step": 564 + }, + { + "epoch": 2.593103448275862, + "loss": 0.1952449530363083, + "loss_ce": 8.50430442369543e-05, + "loss_iou": 0.484375, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 50910296, + "step": 564 + }, + { + "epoch": 2.5977011494252875, + "grad_norm": 15.23898519773349, + "learning_rate": 5e-06, + "loss": 0.1368, + "num_input_tokens_seen": 51000576, + "step": 565 + }, + { + "epoch": 2.5977011494252875, + "loss": 0.1579952836036682, + "loss_ce": 2.1030613424954936e-05, + "loss_iou": 0.44921875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 51000576, + "step": 565 + }, + { + "epoch": 2.6022988505747127, + "grad_norm": 4.894484835323628, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 51090992, + "step": 566 + }, + { + "epoch": 2.6022988505747127, + "loss": 0.12843066453933716, + "loss_ce": 4.321142114349641e-05, + "loss_iou": 0.42578125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 51090992, + "step": 566 + }, + { + "epoch": 2.606896551724138, + "grad_norm": 20.80440116305703, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 51181196, + "step": 567 + }, + { + "epoch": 2.606896551724138, + "loss": 0.14310401678085327, + "loss_ce": 3.761286643566564e-05, + "loss_iou": 0.3515625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 51181196, + "step": 567 + }, + { + "epoch": 2.6114942528735634, + "grad_norm": 4.997998048110718, + "learning_rate": 5e-06, + "loss": 0.1867, + "num_input_tokens_seen": 51271612, + "step": 568 + }, + { + "epoch": 2.6114942528735634, + "loss": 0.24052223563194275, + "loss_ce": 4.3717802327591926e-05, + "loss_iou": 0.357421875, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 51271612, + "step": 568 + }, + { + "epoch": 2.6160919540229886, + "grad_norm": 14.548525343911681, + "learning_rate": 5e-06, + "loss": 0.1517, + "num_input_tokens_seen": 51362012, + "step": 569 + }, + { + "epoch": 2.6160919540229886, + "loss": 0.15177559852600098, + "loss_ce": 1.1683812772389501e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 51362012, + "step": 569 + }, + { + "epoch": 2.6206896551724137, + "grad_norm": 8.538017397932672, + "learning_rate": 5e-06, + "loss": 0.138, + "num_input_tokens_seen": 51452264, + "step": 570 + }, + { + "epoch": 2.6206896551724137, + "loss": 0.16324067115783691, + "loss_ce": 3.2669748179614544e-05, + "loss_iou": 0.51171875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 51452264, + "step": 570 + }, + { + "epoch": 2.625287356321839, + "grad_norm": 5.458750598266798, + "learning_rate": 5e-06, + "loss": 0.1717, + "num_input_tokens_seen": 51542688, + "step": 571 + }, + { + "epoch": 2.625287356321839, + "loss": 0.1548556387424469, + "loss_ce": 3.9948965422809124e-05, + "loss_iou": 0.458984375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 51542688, + "step": 571 + }, + { + "epoch": 2.6298850574712644, + "grad_norm": 6.9963748084758475, + "learning_rate": 5e-06, + "loss": 0.1511, + "num_input_tokens_seen": 51633028, + "step": 572 + }, + { + "epoch": 2.6298850574712644, + "loss": 0.10907353460788727, + "loss_ce": 0.00012577624875120819, + "loss_iou": 0.45703125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 51633028, + "step": 572 + }, + { + "epoch": 2.6344827586206896, + "grad_norm": 5.485383848252606, + "learning_rate": 5e-06, + "loss": 0.2015, + "num_input_tokens_seen": 51723304, + "step": 573 + }, + { + "epoch": 2.6344827586206896, + "loss": 0.20509332418441772, + "loss_ce": 0.009658743627369404, + "loss_iou": 0.39453125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 51723304, + "step": 573 + }, + { + "epoch": 2.639080459770115, + "grad_norm": 5.145735431943079, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 51813780, + "step": 574 + }, + { + "epoch": 2.639080459770115, + "loss": 0.14565084874629974, + "loss_ce": 5.1484184950822964e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 51813780, + "step": 574 + }, + { + "epoch": 2.6436781609195403, + "grad_norm": 11.268920979351746, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 51904132, + "step": 575 + }, + { + "epoch": 2.6436781609195403, + "loss": 0.11697958409786224, + "loss_ce": 6.674315227428451e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 51904132, + "step": 575 + }, + { + "epoch": 2.6482758620689655, + "grad_norm": 5.697746433151331, + "learning_rate": 5e-06, + "loss": 0.1411, + "num_input_tokens_seen": 51993764, + "step": 576 + }, + { + "epoch": 2.6482758620689655, + "loss": 0.12572216987609863, + "loss_ce": 5.002422767574899e-06, + "loss_iou": 0.431640625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 51993764, + "step": 576 + }, + { + "epoch": 2.6528735632183906, + "grad_norm": 23.221495764418574, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 52084092, + "step": 577 + }, + { + "epoch": 2.6528735632183906, + "loss": 0.10617563128471375, + "loss_ce": 3.5495380870997906e-05, + "loss_iou": 0.3671875, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 52084092, + "step": 577 + }, + { + "epoch": 2.657471264367816, + "grad_norm": 9.375076718437624, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 52174520, + "step": 578 + }, + { + "epoch": 2.657471264367816, + "loss": 0.15085318684577942, + "loss_ce": 3.5313376429257914e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 52174520, + "step": 578 + }, + { + "epoch": 2.6620689655172414, + "grad_norm": 14.09120273250363, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 52264944, + "step": 579 + }, + { + "epoch": 2.6620689655172414, + "loss": 0.13439422845840454, + "loss_ce": 2.5327437469968572e-05, + "loss_iou": 0.42578125, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 52264944, + "step": 579 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 7.150633563903932, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 52355440, + "step": 580 + }, + { + "epoch": 2.6666666666666665, + "loss": 0.12812362611293793, + "loss_ce": 0.0001328981015831232, + "loss_iou": 0.421875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 52355440, + "step": 580 + }, + { + "epoch": 2.671264367816092, + "grad_norm": 17.68235051618041, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 52445736, + "step": 581 + }, + { + "epoch": 2.671264367816092, + "loss": 0.11226825416088104, + "loss_ce": 2.46073159360094e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 52445736, + "step": 581 + }, + { + "epoch": 2.6758620689655173, + "grad_norm": 9.382663357540876, + "learning_rate": 5e-06, + "loss": 0.1607, + "num_input_tokens_seen": 52536112, + "step": 582 + }, + { + "epoch": 2.6758620689655173, + "loss": 0.12408533692359924, + "loss_ce": 0.0001839685719460249, + "loss_iou": 0.388671875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 52536112, + "step": 582 + }, + { + "epoch": 2.6804597701149424, + "grad_norm": 4.0581740149048615, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 52626460, + "step": 583 + }, + { + "epoch": 2.6804597701149424, + "loss": 0.1088830754160881, + "loss_ce": 4.213307693134993e-05, + "loss_iou": 0.375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 52626460, + "step": 583 + }, + { + "epoch": 2.6850574712643676, + "grad_norm": 21.41413357247865, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 52716824, + "step": 584 + }, + { + "epoch": 2.6850574712643676, + "loss": 0.1613665223121643, + "loss_ce": 8.111814531730488e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 52716824, + "step": 584 + }, + { + "epoch": 2.689655172413793, + "grad_norm": 4.051520688977973, + "learning_rate": 5e-06, + "loss": 0.1631, + "num_input_tokens_seen": 52807040, + "step": 585 + }, + { + "epoch": 2.689655172413793, + "loss": 0.14354471862316132, + "loss_ce": 5.1069248002022505e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 52807040, + "step": 585 + }, + { + "epoch": 2.6942528735632183, + "grad_norm": 8.143732797882166, + "learning_rate": 5e-06, + "loss": 0.2078, + "num_input_tokens_seen": 52897404, + "step": 586 + }, + { + "epoch": 2.6942528735632183, + "loss": 0.21973484754562378, + "loss_ce": 0.0002524274750612676, + "loss_iou": 0.482421875, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 52897404, + "step": 586 + }, + { + "epoch": 2.698850574712644, + "grad_norm": 9.812036652439687, + "learning_rate": 5e-06, + "loss": 0.1997, + "num_input_tokens_seen": 52987688, + "step": 587 + }, + { + "epoch": 2.698850574712644, + "loss": 0.2230190932750702, + "loss_ce": 5.7667512010084465e-05, + "loss_iou": 0.3515625, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 52987688, + "step": 587 + }, + { + "epoch": 2.703448275862069, + "grad_norm": 11.85150221226106, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 53078184, + "step": 588 + }, + { + "epoch": 2.703448275862069, + "loss": 0.16252008080482483, + "loss_ce": 1.3969104657007847e-05, + "loss_iou": 0.44921875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 53078184, + "step": 588 + }, + { + "epoch": 2.708045977011494, + "grad_norm": 4.7260277066471446, + "learning_rate": 5e-06, + "loss": 0.1668, + "num_input_tokens_seen": 53168516, + "step": 589 + }, + { + "epoch": 2.708045977011494, + "loss": 0.21362201869487762, + "loss_ce": 0.0016469230176880956, + "loss_iou": 0.349609375, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 53168516, + "step": 589 + }, + { + "epoch": 2.7126436781609193, + "grad_norm": 18.524483984567052, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 53258748, + "step": 590 + }, + { + "epoch": 2.7126436781609193, + "loss": 0.1348852813243866, + "loss_ce": 2.8092332286178134e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 53258748, + "step": 590 + }, + { + "epoch": 2.717241379310345, + "grad_norm": 9.8553207385232, + "learning_rate": 5e-06, + "loss": 0.216, + "num_input_tokens_seen": 53349272, + "step": 591 + }, + { + "epoch": 2.717241379310345, + "loss": 0.26626330614089966, + "loss_ce": 8.898475061869249e-05, + "loss_iou": 0.5, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 53349272, + "step": 591 + }, + { + "epoch": 2.72183908045977, + "grad_norm": 10.070351523808545, + "learning_rate": 5e-06, + "loss": 0.1794, + "num_input_tokens_seen": 53438892, + "step": 592 + }, + { + "epoch": 2.72183908045977, + "loss": 0.17163211107254028, + "loss_ce": 6.230298458831385e-05, + "loss_iou": 0.48046875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 53438892, + "step": 592 + }, + { + "epoch": 2.7264367816091957, + "grad_norm": 4.7799609797973535, + "learning_rate": 5e-06, + "loss": 0.229, + "num_input_tokens_seen": 53529204, + "step": 593 + }, + { + "epoch": 2.7264367816091957, + "loss": 0.2294745147228241, + "loss_ce": 0.00010438873869134113, + "loss_iou": 0.33203125, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 53529204, + "step": 593 + }, + { + "epoch": 2.731034482758621, + "grad_norm": 10.927919604630798, + "learning_rate": 5e-06, + "loss": 0.1659, + "num_input_tokens_seen": 53619564, + "step": 594 + }, + { + "epoch": 2.731034482758621, + "loss": 0.16697654128074646, + "loss_ce": 1.487006011302583e-05, + "loss_iou": 0.38671875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 53619564, + "step": 594 + }, + { + "epoch": 2.735632183908046, + "grad_norm": 14.00357432861095, + "learning_rate": 5e-06, + "loss": 0.1539, + "num_input_tokens_seen": 53709964, + "step": 595 + }, + { + "epoch": 2.735632183908046, + "loss": 0.1489291936159134, + "loss_ce": 9.496298298472539e-05, + "loss_iou": 0.40625, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 53709964, + "step": 595 + }, + { + "epoch": 2.740229885057471, + "grad_norm": 18.78198114746188, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 53799628, + "step": 596 + }, + { + "epoch": 2.740229885057471, + "loss": 0.17056533694267273, + "loss_ce": 0.006441800855100155, + "loss_iou": 0.375, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 53799628, + "step": 596 + }, + { + "epoch": 2.7448275862068967, + "grad_norm": 10.48163904175623, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 53889936, + "step": 597 + }, + { + "epoch": 2.7448275862068967, + "loss": 0.13397589325904846, + "loss_ce": 1.898076334327925e-05, + "loss_iou": 0.2890625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 53889936, + "step": 597 + }, + { + "epoch": 2.749425287356322, + "grad_norm": 7.682690937441725, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 53980312, + "step": 598 + }, + { + "epoch": 2.749425287356322, + "loss": 0.12832111120224, + "loss_ce": 7.098779315128922e-05, + "loss_iou": 0.443359375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 53980312, + "step": 598 + }, + { + "epoch": 2.754022988505747, + "grad_norm": 5.73861918938428, + "learning_rate": 5e-06, + "loss": 0.1738, + "num_input_tokens_seen": 54070552, + "step": 599 + }, + { + "epoch": 2.754022988505747, + "loss": 0.12556979060173035, + "loss_ce": 8.150518988259137e-05, + "loss_iou": 0.3828125, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 54070552, + "step": 599 + }, + { + "epoch": 2.7586206896551726, + "grad_norm": 6.309494256468956, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 54161016, + "step": 600 + }, + { + "epoch": 2.7586206896551726, + "loss": 0.08496278524398804, + "loss_ce": 3.236144038964994e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 54161016, + "step": 600 + }, + { + "epoch": 2.7632183908045977, + "grad_norm": 8.032143628331568, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 54251360, + "step": 601 + }, + { + "epoch": 2.7632183908045977, + "loss": 0.14277827739715576, + "loss_ce": 1.7043152183759958e-05, + "loss_iou": 0.453125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 54251360, + "step": 601 + }, + { + "epoch": 2.767816091954023, + "grad_norm": 4.475275047608208, + "learning_rate": 5e-06, + "loss": 0.1901, + "num_input_tokens_seen": 54340280, + "step": 602 + }, + { + "epoch": 2.767816091954023, + "loss": 0.16928395628929138, + "loss_ce": 3.346416269778274e-05, + "loss_iou": 0.453125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 54340280, + "step": 602 + }, + { + "epoch": 2.772413793103448, + "grad_norm": 20.093795622997725, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 54430644, + "step": 603 + }, + { + "epoch": 2.772413793103448, + "loss": 0.24863861501216888, + "loss_ce": 4.241707210894674e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 54430644, + "step": 603 + }, + { + "epoch": 2.7770114942528736, + "grad_norm": 7.529800946044408, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 54521100, + "step": 604 + }, + { + "epoch": 2.7770114942528736, + "loss": 0.060331616550683975, + "loss_ce": 0.002012524986639619, + "loss_iou": 0.390625, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 54521100, + "step": 604 + }, + { + "epoch": 2.781609195402299, + "grad_norm": 4.796037523923617, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 54611424, + "step": 605 + }, + { + "epoch": 2.781609195402299, + "loss": 0.1368238478899002, + "loss_ce": 1.3542239685193636e-05, + "loss_iou": 0.38671875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 54611424, + "step": 605 + }, + { + "epoch": 2.7862068965517244, + "grad_norm": 6.220654742397298, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 54701824, + "step": 606 + }, + { + "epoch": 2.7862068965517244, + "loss": 0.12376809120178223, + "loss_ce": 4.983101462130435e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 54701824, + "step": 606 + }, + { + "epoch": 2.7908045977011495, + "grad_norm": 6.281261617719392, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 54792204, + "step": 607 + }, + { + "epoch": 2.7908045977011495, + "loss": 0.13932999968528748, + "loss_ce": 4.776205605594441e-05, + "loss_iou": 0.435546875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 54792204, + "step": 607 + }, + { + "epoch": 2.7954022988505747, + "grad_norm": 5.897586685245029, + "learning_rate": 5e-06, + "loss": 0.1316, + "num_input_tokens_seen": 54882476, + "step": 608 + }, + { + "epoch": 2.7954022988505747, + "loss": 0.11914113909006119, + "loss_ce": 0.00033620704198256135, + "loss_iou": 0.478515625, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 54882476, + "step": 608 + }, + { + "epoch": 2.8, + "grad_norm": 18.865031038984814, + "learning_rate": 5e-06, + "loss": 0.1713, + "num_input_tokens_seen": 54972752, + "step": 609 + }, + { + "epoch": 2.8, + "loss": 0.1230660229921341, + "loss_ce": 1.914523818413727e-05, + "loss_iou": 0.390625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 54972752, + "step": 609 + }, + { + "epoch": 2.8045977011494254, + "grad_norm": 12.75408299711661, + "learning_rate": 5e-06, + "loss": 0.1575, + "num_input_tokens_seen": 55062340, + "step": 610 + }, + { + "epoch": 2.8045977011494254, + "loss": 0.16492819786071777, + "loss_ce": 1.1197053026990034e-05, + "loss_iou": 0.330078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 55062340, + "step": 610 + }, + { + "epoch": 2.8091954022988506, + "grad_norm": 12.5815565888982, + "learning_rate": 5e-06, + "loss": 0.1924, + "num_input_tokens_seen": 55151980, + "step": 611 + }, + { + "epoch": 2.8091954022988506, + "loss": 0.19428588449954987, + "loss_ce": 0.006968984380364418, + "loss_iou": 0.44140625, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 55151980, + "step": 611 + }, + { + "epoch": 2.8137931034482757, + "grad_norm": 3.291240590141606, + "learning_rate": 5e-06, + "loss": 0.1739, + "num_input_tokens_seen": 55242284, + "step": 612 + }, + { + "epoch": 2.8137931034482757, + "loss": 0.15410566329956055, + "loss_ce": 0.00022076835739426315, + "loss_iou": 0.37109375, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 55242284, + "step": 612 + }, + { + "epoch": 2.8183908045977013, + "grad_norm": 16.71758430849643, + "learning_rate": 5e-06, + "loss": 0.1686, + "num_input_tokens_seen": 55332764, + "step": 613 + }, + { + "epoch": 2.8183908045977013, + "loss": 0.21246647834777832, + "loss_ce": 0.00018619894399307668, + "loss_iou": 0.400390625, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 55332764, + "step": 613 + }, + { + "epoch": 2.8229885057471265, + "grad_norm": 9.529924266178037, + "learning_rate": 5e-06, + "loss": 0.1512, + "num_input_tokens_seen": 55423028, + "step": 614 + }, + { + "epoch": 2.8229885057471265, + "loss": 0.10561984032392502, + "loss_ce": 2.9023341994616203e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 55423028, + "step": 614 + }, + { + "epoch": 2.8275862068965516, + "grad_norm": 4.245318747599123, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 55513508, + "step": 615 + }, + { + "epoch": 2.8275862068965516, + "loss": 0.1072404682636261, + "loss_ce": 3.2217059924732894e-05, + "loss_iou": 0.33984375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 55513508, + "step": 615 + }, + { + "epoch": 2.8321839080459768, + "grad_norm": 18.179797794436265, + "learning_rate": 5e-06, + "loss": 0.1493, + "num_input_tokens_seen": 55603844, + "step": 616 + }, + { + "epoch": 2.8321839080459768, + "loss": 0.15227092802524567, + "loss_ce": 4.924844688503072e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 55603844, + "step": 616 + }, + { + "epoch": 2.8367816091954023, + "grad_norm": 5.57072118153668, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 55693488, + "step": 617 + }, + { + "epoch": 2.8367816091954023, + "loss": 0.13283629715442657, + "loss_ce": 0.00017638025747146457, + "loss_iou": 0.47265625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 55693488, + "step": 617 + }, + { + "epoch": 2.8413793103448275, + "grad_norm": 3.7001051290248053, + "learning_rate": 5e-06, + "loss": 0.23, + "num_input_tokens_seen": 55783768, + "step": 618 + }, + { + "epoch": 2.8413793103448275, + "loss": 0.18909384310245514, + "loss_ce": 3.744277637451887e-05, + "loss_iou": 0.423828125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 55783768, + "step": 618 + }, + { + "epoch": 2.845977011494253, + "grad_norm": 8.633989870151986, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 55874088, + "step": 619 + }, + { + "epoch": 2.845977011494253, + "loss": 0.16746875643730164, + "loss_ce": 4.932547381031327e-05, + "loss_iou": 0.400390625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 55874088, + "step": 619 + }, + { + "epoch": 2.8505747126436782, + "grad_norm": 7.508204824741638, + "learning_rate": 5e-06, + "loss": 0.1742, + "num_input_tokens_seen": 55964516, + "step": 620 + }, + { + "epoch": 2.8505747126436782, + "loss": 0.18894179165363312, + "loss_ce": 0.00011428301513660699, + "loss_iou": 0.3515625, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 55964516, + "step": 620 + }, + { + "epoch": 2.8551724137931034, + "grad_norm": 11.720485380185297, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 56054816, + "step": 621 + }, + { + "epoch": 2.8551724137931034, + "loss": 0.12343352288007736, + "loss_ce": 2.0439181753317825e-05, + "loss_iou": 0.333984375, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 56054816, + "step": 621 + }, + { + "epoch": 2.8597701149425285, + "grad_norm": 6.132110793900122, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 56145008, + "step": 622 + }, + { + "epoch": 2.8597701149425285, + "loss": 0.11308827996253967, + "loss_ce": 2.0656258129747584e-05, + "loss_iou": 0.359375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 56145008, + "step": 622 + }, + { + "epoch": 2.864367816091954, + "grad_norm": 27.370691194991768, + "learning_rate": 5e-06, + "loss": 0.2575, + "num_input_tokens_seen": 56234656, + "step": 623 + }, + { + "epoch": 2.864367816091954, + "loss": 0.2561437487602234, + "loss_ce": 0.0002538765547797084, + "loss_iou": 0.3984375, + "loss_num": 0.051025390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 56234656, + "step": 623 + }, + { + "epoch": 2.8689655172413793, + "grad_norm": 4.250971216520788, + "learning_rate": 5e-06, + "loss": 0.1461, + "num_input_tokens_seen": 56325056, + "step": 624 + }, + { + "epoch": 2.8689655172413793, + "loss": 0.15083430707454681, + "loss_ce": 1.643798896111548e-05, + "loss_iou": 0.419921875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 56325056, + "step": 624 + }, + { + "epoch": 2.873563218390805, + "grad_norm": 23.371099875991142, + "learning_rate": 5e-06, + "loss": 0.1696, + "num_input_tokens_seen": 56415548, + "step": 625 + }, + { + "epoch": 2.873563218390805, + "loss": 0.2244938164949417, + "loss_ce": 0.00018961615569423884, + "loss_iou": 0.4453125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 56415548, + "step": 625 + }, + { + "epoch": 2.87816091954023, + "grad_norm": 19.44155033716697, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 56505852, + "step": 626 + }, + { + "epoch": 2.87816091954023, + "loss": 0.12483800947666168, + "loss_ce": 0.008840695023536682, + "loss_iou": 0.4296875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 56505852, + "step": 626 + }, + { + "epoch": 2.882758620689655, + "grad_norm": 2.7000059987309135, + "learning_rate": 5e-06, + "loss": 0.132, + "num_input_tokens_seen": 56596200, + "step": 627 + }, + { + "epoch": 2.882758620689655, + "loss": 0.16553373634815216, + "loss_ce": 3.6913388612447307e-05, + "loss_iou": 0.318359375, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 56596200, + "step": 627 + }, + { + "epoch": 2.8873563218390803, + "grad_norm": 11.738060980857469, + "learning_rate": 5e-06, + "loss": 0.1943, + "num_input_tokens_seen": 56686468, + "step": 628 + }, + { + "epoch": 2.8873563218390803, + "loss": 0.13238102197647095, + "loss_ce": 0.00013309504720382392, + "loss_iou": 0.40625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 56686468, + "step": 628 + }, + { + "epoch": 2.891954022988506, + "grad_norm": 5.374412860929261, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 56776724, + "step": 629 + }, + { + "epoch": 2.891954022988506, + "loss": 0.10836595296859741, + "loss_ce": 0.00018113740952685475, + "loss_iou": 0.388671875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 56776724, + "step": 629 + }, + { + "epoch": 2.896551724137931, + "grad_norm": 17.458934517040042, + "learning_rate": 5e-06, + "loss": 0.1121, + "num_input_tokens_seen": 56867076, + "step": 630 + }, + { + "epoch": 2.896551724137931, + "loss": 0.0917268842458725, + "loss_ce": 8.260130562121049e-05, + "loss_iou": 0.408203125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 56867076, + "step": 630 + }, + { + "epoch": 2.901149425287356, + "grad_norm": 7.144445161490093, + "learning_rate": 5e-06, + "loss": 0.1952, + "num_input_tokens_seen": 56957452, + "step": 631 + }, + { + "epoch": 2.901149425287356, + "loss": 0.17814010381698608, + "loss_ce": 3.951931284973398e-05, + "loss_iou": 0.36328125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 56957452, + "step": 631 + }, + { + "epoch": 2.905747126436782, + "grad_norm": 4.881468932307776, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 57046320, + "step": 632 + }, + { + "epoch": 2.905747126436782, + "loss": 0.158255934715271, + "loss_ce": 8.333477308042347e-05, + "loss_iou": 0.373046875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 57046320, + "step": 632 + }, + { + "epoch": 2.910344827586207, + "grad_norm": 6.020225715103822, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 57136760, + "step": 633 + }, + { + "epoch": 2.910344827586207, + "loss": 0.08915083855390549, + "loss_ce": 0.0001921012590173632, + "loss_iou": 0.373046875, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 57136760, + "step": 633 + }, + { + "epoch": 2.914942528735632, + "grad_norm": 18.893533969376247, + "learning_rate": 5e-06, + "loss": 0.0845, + "num_input_tokens_seen": 57227264, + "step": 634 + }, + { + "epoch": 2.914942528735632, + "loss": 0.0713462084531784, + "loss_ce": 8.766089740674943e-05, + "loss_iou": 0.404296875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 57227264, + "step": 634 + }, + { + "epoch": 2.9195402298850572, + "grad_norm": 18.876600553359836, + "learning_rate": 5e-06, + "loss": 0.1505, + "num_input_tokens_seen": 57316096, + "step": 635 + }, + { + "epoch": 2.9195402298850572, + "loss": 0.14804166555404663, + "loss_ce": 0.0002297760802321136, + "loss_iou": 0.3828125, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 57316096, + "step": 635 + }, + { + "epoch": 2.924137931034483, + "grad_norm": 14.779947666699496, + "learning_rate": 5e-06, + "loss": 0.1474, + "num_input_tokens_seen": 57406464, + "step": 636 + }, + { + "epoch": 2.924137931034483, + "loss": 0.1926122009754181, + "loss_ce": 1.5764764611958526e-05, + "loss_iou": 0.369140625, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 57406464, + "step": 636 + }, + { + "epoch": 2.928735632183908, + "grad_norm": 33.4524141554006, + "learning_rate": 5e-06, + "loss": 0.1697, + "num_input_tokens_seen": 57496896, + "step": 637 + }, + { + "epoch": 2.928735632183908, + "loss": 0.19599446654319763, + "loss_ce": 7.161433313740417e-05, + "loss_iou": 0.431640625, + "loss_num": 0.0390625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 57496896, + "step": 637 + }, + { + "epoch": 2.9333333333333336, + "grad_norm": 8.837724303528487, + "learning_rate": 5e-06, + "loss": 0.1276, + "num_input_tokens_seen": 57587328, + "step": 638 + }, + { + "epoch": 2.9333333333333336, + "loss": 0.11598608642816544, + "loss_ce": 8.032606274355203e-05, + "loss_iou": 0.4375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 57587328, + "step": 638 + }, + { + "epoch": 2.9379310344827587, + "grad_norm": 5.458845083322578, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 57676000, + "step": 639 + }, + { + "epoch": 2.9379310344827587, + "loss": 0.1924465298652649, + "loss_ce": 6.370765186147764e-05, + "loss_iou": 0.3671875, + "loss_num": 0.038330078125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 57676000, + "step": 639 + }, + { + "epoch": 2.942528735632184, + "grad_norm": 3.502931679203287, + "learning_rate": 5e-06, + "loss": 0.1517, + "num_input_tokens_seen": 57766436, + "step": 640 + }, + { + "epoch": 2.942528735632184, + "loss": 0.22913488745689392, + "loss_ce": 0.00022254293435253203, + "loss_iou": 0.39453125, + "loss_num": 0.0458984375, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 57766436, + "step": 640 + }, + { + "epoch": 2.947126436781609, + "grad_norm": 27.45097976056178, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 57856748, + "step": 641 + }, + { + "epoch": 2.947126436781609, + "loss": 0.1526871919631958, + "loss_ce": 9.930751548381522e-05, + "loss_iou": 0.330078125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 57856748, + "step": 641 + }, + { + "epoch": 2.9517241379310346, + "grad_norm": 12.914503955582164, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 57946960, + "step": 642 + }, + { + "epoch": 2.9517241379310346, + "loss": 0.17462590336799622, + "loss_ce": 6.536281580338255e-05, + "loss_iou": 0.373046875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 57946960, + "step": 642 + }, + { + "epoch": 2.9563218390804598, + "grad_norm": 11.39446414052775, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 58035688, + "step": 643 + }, + { + "epoch": 2.9563218390804598, + "loss": 0.11201904714107513, + "loss_ce": 8.057546801865101e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 58035688, + "step": 643 + }, + { + "epoch": 2.960919540229885, + "grad_norm": 3.1615458390497326, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 58126192, + "step": 644 + }, + { + "epoch": 2.960919540229885, + "loss": 0.11491385102272034, + "loss_ce": 0.00010671426571207121, + "loss_iou": 0.3984375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 58126192, + "step": 644 + }, + { + "epoch": 2.9655172413793105, + "grad_norm": 11.115986632833536, + "learning_rate": 5e-06, + "loss": 0.1975, + "num_input_tokens_seen": 58216656, + "step": 645 + }, + { + "epoch": 2.9655172413793105, + "loss": 0.18091078102588654, + "loss_ce": 0.00015516624262090772, + "loss_iou": 0.38671875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 58216656, + "step": 645 + }, + { + "epoch": 2.9701149425287356, + "grad_norm": 10.595330403567505, + "learning_rate": 5e-06, + "loss": 0.1393, + "num_input_tokens_seen": 58306940, + "step": 646 + }, + { + "epoch": 2.9701149425287356, + "loss": 0.18637780845165253, + "loss_ce": 3.746678703464568e-05, + "loss_iou": 0.41015625, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 58306940, + "step": 646 + }, + { + "epoch": 2.974712643678161, + "grad_norm": 12.224220904653137, + "learning_rate": 5e-06, + "loss": 0.1602, + "num_input_tokens_seen": 58397184, + "step": 647 + }, + { + "epoch": 2.974712643678161, + "loss": 0.1820899099111557, + "loss_ce": 2.2039628674974665e-05, + "loss_iou": 0.34765625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 58397184, + "step": 647 + }, + { + "epoch": 2.979310344827586, + "grad_norm": 12.36377678211748, + "learning_rate": 5e-06, + "loss": 0.1605, + "num_input_tokens_seen": 58487644, + "step": 648 + }, + { + "epoch": 2.979310344827586, + "loss": 0.09505946934223175, + "loss_ce": 4.29864194302354e-05, + "loss_iou": 0.384765625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 58487644, + "step": 648 + }, + { + "epoch": 2.9839080459770115, + "grad_norm": 44.77338962392459, + "learning_rate": 5e-06, + "loss": 0.1624, + "num_input_tokens_seen": 58578064, + "step": 649 + }, + { + "epoch": 2.9839080459770115, + "loss": 0.19933252036571503, + "loss_ce": 0.00029687874484807253, + "loss_iou": 0.4375, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 58578064, + "step": 649 + }, + { + "epoch": 2.9885057471264367, + "grad_norm": 10.434155317304565, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 58668520, + "step": 650 + }, + { + "epoch": 2.9885057471264367, + "loss": 0.17232680320739746, + "loss_ce": 2.456527727190405e-05, + "loss_iou": 0.349609375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 58668520, + "step": 650 + }, + { + "epoch": 2.9931034482758623, + "grad_norm": 11.343545620466347, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 58758840, + "step": 651 + }, + { + "epoch": 2.9931034482758623, + "loss": 0.17710255086421967, + "loss_ce": 2.4299104552483186e-05, + "loss_iou": 0.296875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 58758840, + "step": 651 + }, + { + "epoch": 2.9977011494252874, + "grad_norm": 4.426473466923714, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 58849212, + "step": 652 + }, + { + "epoch": 2.9977011494252874, + "loss": 0.1025293692946434, + "loss_ce": 0.00014289778482634574, + "loss_iou": 0.375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 58849212, + "step": 652 + }, + { + "epoch": 2.9977011494252874, + "loss": 0.18885542452335358, + "loss_ce": 1.2649360542127397e-05, + "loss_iou": 0.396484375, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 58894312, + "step": 652 + }, + { + "epoch": 3.0022988505747126, + "grad_norm": 5.109594045617307, + "learning_rate": 5e-06, + "loss": 0.1499, + "num_input_tokens_seen": 58939548, + "step": 653 + }, + { + "epoch": 3.0022988505747126, + "loss": 0.11101669818162918, + "loss_ce": 5.478527600644156e-05, + "loss_iou": 0.3671875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 58939548, + "step": 653 + }, + { + "epoch": 3.0068965517241377, + "grad_norm": 10.518496135733074, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 59029104, + "step": 654 + }, + { + "epoch": 3.0068965517241377, + "loss": 0.15445493161678314, + "loss_ce": 0.00015805772272869945, + "loss_iou": 0.4609375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 59029104, + "step": 654 + }, + { + "epoch": 3.0114942528735633, + "grad_norm": 14.317668018009412, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 59119440, + "step": 655 + }, + { + "epoch": 3.0114942528735633, + "loss": 0.1157938614487648, + "loss_ce": 1.0168273547606077e-05, + "loss_iou": 0.431640625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 59119440, + "step": 655 + }, + { + "epoch": 3.0160919540229885, + "grad_norm": 6.462788772652226, + "learning_rate": 5e-06, + "loss": 0.0712, + "num_input_tokens_seen": 59209864, + "step": 656 + }, + { + "epoch": 3.0160919540229885, + "loss": 0.06505613029003143, + "loss_ce": 0.00011471970356069505, + "loss_iou": 0.390625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 59209864, + "step": 656 + }, + { + "epoch": 3.0206896551724136, + "grad_norm": 5.315176931861487, + "learning_rate": 5e-06, + "loss": 0.0992, + "num_input_tokens_seen": 59300316, + "step": 657 + }, + { + "epoch": 3.0206896551724136, + "loss": 0.064435213804245, + "loss_ce": 0.002515045227482915, + "loss_iou": 0.326171875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 59300316, + "step": 657 + }, + { + "epoch": 3.025287356321839, + "grad_norm": 4.182380926387102, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 59390752, + "step": 658 + }, + { + "epoch": 3.025287356321839, + "loss": 0.13093848526477814, + "loss_ce": 1.8076902051689103e-05, + "loss_iou": 0.435546875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 59390752, + "step": 658 + }, + { + "epoch": 3.0298850574712644, + "grad_norm": 14.571610617843975, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 59481036, + "step": 659 + }, + { + "epoch": 3.0298850574712644, + "loss": 0.08019035309553146, + "loss_ce": 8.17138425190933e-05, + "loss_iou": 0.359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 59481036, + "step": 659 + }, + { + "epoch": 3.0344827586206895, + "grad_norm": 16.65916183345602, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 59571308, + "step": 660 + }, + { + "epoch": 3.0344827586206895, + "loss": 0.15776613354682922, + "loss_ce": 5.127684198669158e-05, + "loss_iou": 0.44140625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 59571308, + "step": 660 + }, + { + "epoch": 3.039080459770115, + "grad_norm": 3.372941981569247, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 59661712, + "step": 661 + }, + { + "epoch": 3.039080459770115, + "loss": 0.0939127653837204, + "loss_ce": 4.0693001210456714e-05, + "loss_iou": 0.384765625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 59661712, + "step": 661 + }, + { + "epoch": 3.0436781609195402, + "grad_norm": 7.8989890189247, + "learning_rate": 5e-06, + "loss": 0.1298, + "num_input_tokens_seen": 59752088, + "step": 662 + }, + { + "epoch": 3.0436781609195402, + "loss": 0.11880500614643097, + "loss_ce": 3.058691436308436e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 59752088, + "step": 662 + }, + { + "epoch": 3.0482758620689654, + "grad_norm": 9.966522515655097, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 59842376, + "step": 663 + }, + { + "epoch": 3.0482758620689654, + "loss": 0.1415707916021347, + "loss_ce": 3.027227103302721e-05, + "loss_iou": 0.453125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 59842376, + "step": 663 + }, + { + "epoch": 3.052873563218391, + "grad_norm": 14.738658620476503, + "learning_rate": 5e-06, + "loss": 0.1078, + "num_input_tokens_seen": 59932932, + "step": 664 + }, + { + "epoch": 3.052873563218391, + "loss": 0.15776753425598145, + "loss_ce": 2.216372377006337e-05, + "loss_iou": 0.390625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 59932932, + "step": 664 + }, + { + "epoch": 3.057471264367816, + "grad_norm": 11.861563289243291, + "learning_rate": 5e-06, + "loss": 0.161, + "num_input_tokens_seen": 60023360, + "step": 665 + }, + { + "epoch": 3.057471264367816, + "loss": 0.15695565938949585, + "loss_ce": 3.427857154747471e-05, + "loss_iou": 0.373046875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 60023360, + "step": 665 + }, + { + "epoch": 3.0620689655172413, + "grad_norm": 8.820370554295812, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 60112940, + "step": 666 + }, + { + "epoch": 3.0620689655172413, + "loss": 0.13253626227378845, + "loss_ce": 8.997396798804402e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 60112940, + "step": 666 + }, + { + "epoch": 3.066666666666667, + "grad_norm": 3.1489432695249526, + "learning_rate": 5e-06, + "loss": 0.1686, + "num_input_tokens_seen": 60203192, + "step": 667 + }, + { + "epoch": 3.066666666666667, + "loss": 0.14637088775634766, + "loss_ce": 8.589012395532336e-06, + "loss_iou": 0.439453125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 60203192, + "step": 667 + }, + { + "epoch": 3.071264367816092, + "grad_norm": 5.702383945793617, + "learning_rate": 5e-06, + "loss": 0.1081, + "num_input_tokens_seen": 60293508, + "step": 668 + }, + { + "epoch": 3.071264367816092, + "loss": 0.09078279137611389, + "loss_ce": 2.3513095584348775e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 60293508, + "step": 668 + }, + { + "epoch": 3.075862068965517, + "grad_norm": 21.441294074851363, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 60383820, + "step": 669 + }, + { + "epoch": 3.075862068965517, + "loss": 0.10486976057291031, + "loss_ce": 1.1363279554643668e-05, + "loss_iou": 0.33203125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 60383820, + "step": 669 + }, + { + "epoch": 3.0804597701149423, + "grad_norm": 3.914442501475495, + "learning_rate": 5e-06, + "loss": 0.193, + "num_input_tokens_seen": 60474148, + "step": 670 + }, + { + "epoch": 3.0804597701149423, + "loss": 0.16388007998466492, + "loss_ce": 1.594252353243064e-05, + "loss_iou": 0.34765625, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 60474148, + "step": 670 + }, + { + "epoch": 3.085057471264368, + "grad_norm": 4.9620660179246885, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 60564532, + "step": 671 + }, + { + "epoch": 3.085057471264368, + "loss": 0.18953979015350342, + "loss_ce": 8.66625050548464e-05, + "loss_iou": 0.359375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 60564532, + "step": 671 + }, + { + "epoch": 3.089655172413793, + "grad_norm": 16.0733034397525, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 60654828, + "step": 672 + }, + { + "epoch": 3.089655172413793, + "loss": 0.14448416233062744, + "loss_ce": 2.920163024100475e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 60654828, + "step": 672 + }, + { + "epoch": 3.094252873563218, + "grad_norm": 2.850437311995024, + "learning_rate": 5e-06, + "loss": 0.0937, + "num_input_tokens_seen": 60745316, + "step": 673 + }, + { + "epoch": 3.094252873563218, + "loss": 0.08657945692539215, + "loss_ce": 0.00012315809726715088, + "loss_iou": 0.41015625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 60745316, + "step": 673 + }, + { + "epoch": 3.098850574712644, + "grad_norm": 6.711572483738216, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 60835616, + "step": 674 + }, + { + "epoch": 3.098850574712644, + "loss": 0.11418451368808746, + "loss_ce": 1.8256445400766097e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 60835616, + "step": 674 + }, + { + "epoch": 3.103448275862069, + "grad_norm": 2.9732106658271267, + "learning_rate": 5e-06, + "loss": 0.2022, + "num_input_tokens_seen": 60926020, + "step": 675 + }, + { + "epoch": 3.103448275862069, + "loss": 0.22187387943267822, + "loss_ce": 0.0012928310316056013, + "loss_iou": 0.408203125, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 60926020, + "step": 675 + }, + { + "epoch": 3.108045977011494, + "grad_norm": 22.393897827054342, + "learning_rate": 5e-06, + "loss": 0.1741, + "num_input_tokens_seen": 61016500, + "step": 676 + }, + { + "epoch": 3.108045977011494, + "loss": 0.17077524960041046, + "loss_ce": 0.0004261271096765995, + "loss_iou": 0.34375, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 61016500, + "step": 676 + }, + { + "epoch": 3.1126436781609197, + "grad_norm": 4.958182554412022, + "learning_rate": 5e-06, + "loss": 0.1796, + "num_input_tokens_seen": 61106876, + "step": 677 + }, + { + "epoch": 3.1126436781609197, + "loss": 0.17057394981384277, + "loss_ce": 8.749846165301278e-05, + "loss_iou": 0.4375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 61106876, + "step": 677 + }, + { + "epoch": 3.117241379310345, + "grad_norm": 5.763483977522711, + "learning_rate": 5e-06, + "loss": 0.1333, + "num_input_tokens_seen": 61197312, + "step": 678 + }, + { + "epoch": 3.117241379310345, + "loss": 0.16354021430015564, + "loss_ce": 0.00014910995378158987, + "loss_iou": 0.408203125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 61197312, + "step": 678 + }, + { + "epoch": 3.12183908045977, + "grad_norm": 22.61759363539356, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 61287748, + "step": 679 + }, + { + "epoch": 3.12183908045977, + "loss": 0.16320019960403442, + "loss_ce": 8.373754099011421e-05, + "loss_iou": 0.39453125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 61287748, + "step": 679 + }, + { + "epoch": 3.1264367816091956, + "grad_norm": 21.37560854861523, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 61378228, + "step": 680 + }, + { + "epoch": 3.1264367816091956, + "loss": 0.1652674674987793, + "loss_ce": 4.528817953541875e-05, + "loss_iou": 0.41796875, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 61378228, + "step": 680 + }, + { + "epoch": 3.1310344827586207, + "grad_norm": 9.912277304010486, + "learning_rate": 5e-06, + "loss": 0.168, + "num_input_tokens_seen": 61468564, + "step": 681 + }, + { + "epoch": 3.1310344827586207, + "loss": 0.14338386058807373, + "loss_ce": 7.332136738114059e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 61468564, + "step": 681 + }, + { + "epoch": 3.135632183908046, + "grad_norm": 14.24831883027225, + "learning_rate": 5e-06, + "loss": 0.1808, + "num_input_tokens_seen": 61559020, + "step": 682 + }, + { + "epoch": 3.135632183908046, + "loss": 0.15874823927879333, + "loss_ce": 0.000117871692054905, + "loss_iou": 0.341796875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 61559020, + "step": 682 + }, + { + "epoch": 3.1402298850574715, + "grad_norm": 10.805661680356696, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 61649428, + "step": 683 + }, + { + "epoch": 3.1402298850574715, + "loss": 0.11267475038766861, + "loss_ce": 8.014314516913146e-05, + "loss_iou": 0.427734375, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 61649428, + "step": 683 + }, + { + "epoch": 3.1448275862068966, + "grad_norm": 10.580707465651063, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 61739904, + "step": 684 + }, + { + "epoch": 3.1448275862068966, + "loss": 0.11008670926094055, + "loss_ce": 2.5071400159504265e-05, + "loss_iou": 0.435546875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 61739904, + "step": 684 + }, + { + "epoch": 3.1494252873563218, + "grad_norm": 9.787018998948723, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 61830168, + "step": 685 + }, + { + "epoch": 3.1494252873563218, + "loss": 0.11129388213157654, + "loss_ce": 7.25619393051602e-05, + "loss_iou": 0.44921875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 61830168, + "step": 685 + }, + { + "epoch": 3.154022988505747, + "grad_norm": 25.016543912767162, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 61920580, + "step": 686 + }, + { + "epoch": 3.154022988505747, + "loss": 0.11655561625957489, + "loss_ce": 8.985316526377574e-06, + "loss_iou": 0.380859375, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 61920580, + "step": 686 + }, + { + "epoch": 3.1586206896551725, + "grad_norm": 27.789033510951985, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 62010984, + "step": 687 + }, + { + "epoch": 3.1586206896551725, + "loss": 0.10948251187801361, + "loss_ce": 1.596645415702369e-05, + "loss_iou": 0.421875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 62010984, + "step": 687 + }, + { + "epoch": 3.1632183908045977, + "grad_norm": 3.436728324433976, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 62101364, + "step": 688 + }, + { + "epoch": 3.1632183908045977, + "loss": 0.12181434035301208, + "loss_ce": 0.0001102387614082545, + "loss_iou": 0.3671875, + "loss_num": 0.0244140625, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 62101364, + "step": 688 + }, + { + "epoch": 3.167816091954023, + "grad_norm": 9.993289616742318, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 62190176, + "step": 689 + }, + { + "epoch": 3.167816091954023, + "loss": 0.14744696021080017, + "loss_ce": 0.003327697515487671, + "loss_iou": 0.375, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 62190176, + "step": 689 + }, + { + "epoch": 3.1724137931034484, + "grad_norm": 12.827182492237142, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 62280480, + "step": 690 + }, + { + "epoch": 3.1724137931034484, + "loss": 0.15000127255916595, + "loss_ce": 3.789434413192794e-05, + "loss_iou": 0.42578125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 62280480, + "step": 690 + }, + { + "epoch": 3.1770114942528735, + "grad_norm": 2.7933249990009013, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 62370884, + "step": 691 + }, + { + "epoch": 3.1770114942528735, + "loss": 0.13926616311073303, + "loss_ce": 0.00022808580251876265, + "loss_iou": 0.357421875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 62370884, + "step": 691 + }, + { + "epoch": 3.1816091954022987, + "grad_norm": 3.400237110893925, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 62461280, + "step": 692 + }, + { + "epoch": 3.1816091954022987, + "loss": 0.07589490711688995, + "loss_ce": 5.872425026609562e-05, + "loss_iou": 0.392578125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 62461280, + "step": 692 + }, + { + "epoch": 3.1862068965517243, + "grad_norm": 10.582817694578983, + "learning_rate": 5e-06, + "loss": 0.137, + "num_input_tokens_seen": 62551580, + "step": 693 + }, + { + "epoch": 3.1862068965517243, + "loss": 0.14650990068912506, + "loss_ce": 2.5522436772007495e-05, + "loss_iou": 0.462890625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 62551580, + "step": 693 + }, + { + "epoch": 3.1908045977011494, + "grad_norm": 13.951673209631243, + "learning_rate": 5e-06, + "loss": 0.0746, + "num_input_tokens_seen": 62641896, + "step": 694 + }, + { + "epoch": 3.1908045977011494, + "loss": 0.10015565156936646, + "loss_ce": 0.00011902584810741246, + "loss_iou": 0.380859375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 62641896, + "step": 694 + }, + { + "epoch": 3.1954022988505746, + "grad_norm": 8.105786030944435, + "learning_rate": 5e-06, + "loss": 0.171, + "num_input_tokens_seen": 62731508, + "step": 695 + }, + { + "epoch": 3.1954022988505746, + "loss": 0.2043384611606598, + "loss_ce": 0.0001148240189650096, + "loss_iou": 0.37109375, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 62731508, + "step": 695 + }, + { + "epoch": 3.2, + "grad_norm": 29.137424088774004, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 62821956, + "step": 696 + }, + { + "epoch": 3.2, + "loss": 0.13569773733615875, + "loss_ce": 7.761328015476465e-05, + "loss_iou": 0.267578125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 62821956, + "step": 696 + }, + { + "epoch": 3.2045977011494253, + "grad_norm": 15.53880122139471, + "learning_rate": 5e-06, + "loss": 0.1688, + "num_input_tokens_seen": 62912308, + "step": 697 + }, + { + "epoch": 3.2045977011494253, + "loss": 0.1253717541694641, + "loss_ce": 8.183569298125803e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 62912308, + "step": 697 + }, + { + "epoch": 3.2091954022988505, + "grad_norm": 10.527367140209382, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 63002620, + "step": 698 + }, + { + "epoch": 3.2091954022988505, + "loss": 0.1609758883714676, + "loss_ce": 5.669322854373604e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 63002620, + "step": 698 + }, + { + "epoch": 3.213793103448276, + "grad_norm": 18.955512801085465, + "learning_rate": 5e-06, + "loss": 0.0993, + "num_input_tokens_seen": 63093068, + "step": 699 + }, + { + "epoch": 3.213793103448276, + "loss": 0.09753237664699554, + "loss_ce": 2.871520700864494e-05, + "loss_iou": 0.431640625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 63093068, + "step": 699 + }, + { + "epoch": 3.218390804597701, + "grad_norm": 19.835239072673776, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 63183464, + "step": 700 + }, + { + "epoch": 3.218390804597701, + "loss": 0.14680443704128265, + "loss_ce": 0.00010644025314832106, + "loss_iou": 0.306640625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 63183464, + "step": 700 + }, + { + "epoch": 3.2229885057471264, + "grad_norm": 8.057607073965688, + "learning_rate": 5e-06, + "loss": 0.1078, + "num_input_tokens_seen": 63273704, + "step": 701 + }, + { + "epoch": 3.2229885057471264, + "loss": 0.10005685687065125, + "loss_ce": 1.2611541023943573e-05, + "loss_iou": 0.28125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 63273704, + "step": 701 + }, + { + "epoch": 3.227586206896552, + "grad_norm": 8.719404497052812, + "learning_rate": 5e-06, + "loss": 0.1474, + "num_input_tokens_seen": 63363992, + "step": 702 + }, + { + "epoch": 3.227586206896552, + "loss": 0.12116096913814545, + "loss_ce": 3.67055217793677e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 63363992, + "step": 702 + }, + { + "epoch": 3.232183908045977, + "grad_norm": 11.659344031825642, + "learning_rate": 5e-06, + "loss": 0.1215, + "num_input_tokens_seen": 63454440, + "step": 703 + }, + { + "epoch": 3.232183908045977, + "loss": 0.0965924859046936, + "loss_ce": 1.9609713490353897e-05, + "loss_iou": 0.392578125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 63454440, + "step": 703 + }, + { + "epoch": 3.2367816091954023, + "grad_norm": 8.032198591188585, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 63544912, + "step": 704 + }, + { + "epoch": 3.2367816091954023, + "loss": 0.1306506246328354, + "loss_ce": 6.59109718981199e-05, + "loss_iou": 0.427734375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 63544912, + "step": 704 + }, + { + "epoch": 3.2413793103448274, + "grad_norm": 6.297596133480101, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 63634348, + "step": 705 + }, + { + "epoch": 3.2413793103448274, + "loss": 0.19925595819950104, + "loss_ce": 9.824423614190891e-05, + "loss_iou": 0.50390625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 63634348, + "step": 705 + }, + { + "epoch": 3.245977011494253, + "grad_norm": 34.35621373849989, + "learning_rate": 5e-06, + "loss": 0.2007, + "num_input_tokens_seen": 63724764, + "step": 706 + }, + { + "epoch": 3.245977011494253, + "loss": 0.22871080040931702, + "loss_ce": 0.0001493933523306623, + "loss_iou": 0.33984375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 63724764, + "step": 706 + }, + { + "epoch": 3.250574712643678, + "grad_norm": 11.328882741656939, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 63815068, + "step": 707 + }, + { + "epoch": 3.250574712643678, + "loss": 0.12251278012990952, + "loss_ce": 4.574016566039063e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 63815068, + "step": 707 + }, + { + "epoch": 3.2551724137931033, + "grad_norm": 2.795600172154129, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 63905392, + "step": 708 + }, + { + "epoch": 3.2551724137931033, + "loss": 0.1475527286529541, + "loss_ce": 0.00013756597763858736, + "loss_iou": 0.36328125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 63905392, + "step": 708 + }, + { + "epoch": 3.259770114942529, + "grad_norm": 6.617540471617317, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 63995760, + "step": 709 + }, + { + "epoch": 3.259770114942529, + "loss": 0.1187780499458313, + "loss_ce": 0.00021725612168665975, + "loss_iou": 0.369140625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 63995760, + "step": 709 + }, + { + "epoch": 3.264367816091954, + "grad_norm": 3.758683889886017, + "learning_rate": 5e-06, + "loss": 0.0999, + "num_input_tokens_seen": 64086168, + "step": 710 + }, + { + "epoch": 3.264367816091954, + "loss": 0.12687164545059204, + "loss_ce": 0.0001016306850942783, + "loss_iou": 0.451171875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 64086168, + "step": 710 + }, + { + "epoch": 3.268965517241379, + "grad_norm": 7.276014322969384, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 64174996, + "step": 711 + }, + { + "epoch": 3.268965517241379, + "loss": 0.10789632797241211, + "loss_ce": 1.669170342211146e-05, + "loss_iou": 0.419921875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 64174996, + "step": 711 + }, + { + "epoch": 3.2735632183908048, + "grad_norm": 9.531874918868848, + "learning_rate": 5e-06, + "loss": 0.1484, + "num_input_tokens_seen": 64265440, + "step": 712 + }, + { + "epoch": 3.2735632183908048, + "loss": 0.14500431716442108, + "loss_ce": 4.581706161843613e-05, + "loss_iou": 0.427734375, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 64265440, + "step": 712 + }, + { + "epoch": 3.27816091954023, + "grad_norm": 4.769697015733171, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 64355800, + "step": 713 + }, + { + "epoch": 3.27816091954023, + "loss": 0.1640397310256958, + "loss_ce": 6.877434498164803e-05, + "loss_iou": 0.421875, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 64355800, + "step": 713 + }, + { + "epoch": 3.282758620689655, + "grad_norm": 10.346574150082782, + "learning_rate": 5e-06, + "loss": 0.147, + "num_input_tokens_seen": 64446020, + "step": 714 + }, + { + "epoch": 3.282758620689655, + "loss": 0.13765498995780945, + "loss_ce": 2.0713385310955346e-05, + "loss_iou": 0.419921875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 64446020, + "step": 714 + }, + { + "epoch": 3.2873563218390807, + "grad_norm": 3.797921190886921, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 64536336, + "step": 715 + }, + { + "epoch": 3.2873563218390807, + "loss": 0.11108782887458801, + "loss_ce": 6.487606151495129e-05, + "loss_iou": 0.34375, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 64536336, + "step": 715 + }, + { + "epoch": 3.291954022988506, + "grad_norm": 7.680464395220625, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 64626672, + "step": 716 + }, + { + "epoch": 3.291954022988506, + "loss": 0.0874263346195221, + "loss_ce": 8.502809214405715e-05, + "loss_iou": 0.296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 64626672, + "step": 716 + }, + { + "epoch": 3.296551724137931, + "grad_norm": 29.00223362689999, + "learning_rate": 5e-06, + "loss": 0.1172, + "num_input_tokens_seen": 64717188, + "step": 717 + }, + { + "epoch": 3.296551724137931, + "loss": 0.09892787039279938, + "loss_ce": 2.0401252186275087e-05, + "loss_iou": 0.380859375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 64717188, + "step": 717 + }, + { + "epoch": 3.301149425287356, + "grad_norm": 2.963145490619517, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 64806948, + "step": 718 + }, + { + "epoch": 3.301149425287356, + "loss": 0.13925355672836304, + "loss_ce": 1.7103782738558948e-05, + "loss_iou": 0.34375, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 64806948, + "step": 718 + }, + { + "epoch": 3.3057471264367817, + "grad_norm": 3.512627488510696, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 64897416, + "step": 719 + }, + { + "epoch": 3.3057471264367817, + "loss": 0.09657322615385056, + "loss_ce": 1.5609241017955355e-05, + "loss_iou": 0.34375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 64897416, + "step": 719 + }, + { + "epoch": 3.310344827586207, + "grad_norm": 9.222756441936497, + "learning_rate": 5e-06, + "loss": 0.1217, + "num_input_tokens_seen": 64986932, + "step": 720 + }, + { + "epoch": 3.310344827586207, + "loss": 0.11704221367835999, + "loss_ce": 3.782210478675552e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 64986932, + "step": 720 + }, + { + "epoch": 3.314942528735632, + "grad_norm": 9.715496271101662, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 65077284, + "step": 721 + }, + { + "epoch": 3.314942528735632, + "loss": 0.09352642297744751, + "loss_ce": 9.685942495707422e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 65077284, + "step": 721 + }, + { + "epoch": 3.3195402298850576, + "grad_norm": 5.296050089954301, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 65167528, + "step": 722 + }, + { + "epoch": 3.3195402298850576, + "loss": 0.19446220993995667, + "loss_ce": 4.202499439998064e-06, + "loss_iou": 0.361328125, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 65167528, + "step": 722 + }, + { + "epoch": 3.3241379310344827, + "grad_norm": 4.298113327898983, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 65257968, + "step": 723 + }, + { + "epoch": 3.3241379310344827, + "loss": 0.12031020224094391, + "loss_ce": 9.91147771856049e-06, + "loss_iou": 0.41015625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 65257968, + "step": 723 + }, + { + "epoch": 3.328735632183908, + "grad_norm": 5.779893006341727, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 65348448, + "step": 724 + }, + { + "epoch": 3.328735632183908, + "loss": 0.19137105345726013, + "loss_ce": 5.6363111070822924e-05, + "loss_iou": 0.34375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 65348448, + "step": 724 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 22.187024011658394, + "learning_rate": 5e-06, + "loss": 0.0947, + "num_input_tokens_seen": 65438876, + "step": 725 + }, + { + "epoch": 3.3333333333333335, + "loss": 0.06496837735176086, + "loss_ce": 2.6973326384904794e-05, + "loss_iou": 0.359375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 65438876, + "step": 725 + }, + { + "epoch": 3.3379310344827586, + "grad_norm": 12.062384100708867, + "learning_rate": 5e-06, + "loss": 0.1188, + "num_input_tokens_seen": 65529280, + "step": 726 + }, + { + "epoch": 3.3379310344827586, + "loss": 0.1274305284023285, + "loss_ce": 1.9637809600681067e-05, + "loss_iou": 0.359375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 65529280, + "step": 726 + }, + { + "epoch": 3.342528735632184, + "grad_norm": 3.5029991469109394, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 65619760, + "step": 727 + }, + { + "epoch": 3.342528735632184, + "loss": 0.11032354086637497, + "loss_ce": 1.7757560272002593e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 65619760, + "step": 727 + }, + { + "epoch": 3.3471264367816094, + "grad_norm": 4.52005838961759, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 65710148, + "step": 728 + }, + { + "epoch": 3.3471264367816094, + "loss": 0.2110876739025116, + "loss_ce": 0.00016542623052373528, + "loss_iou": 0.3203125, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 65710148, + "step": 728 + }, + { + "epoch": 3.3517241379310345, + "grad_norm": 6.440524482881178, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 65800676, + "step": 729 + }, + { + "epoch": 3.3517241379310345, + "loss": 0.10761566460132599, + "loss_ce": 1.0684560038498603e-05, + "loss_iou": 0.458984375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 65800676, + "step": 729 + }, + { + "epoch": 3.3563218390804597, + "grad_norm": 4.6993643427058425, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 65890984, + "step": 730 + }, + { + "epoch": 3.3563218390804597, + "loss": 0.15184611082077026, + "loss_ce": 0.003957913722842932, + "loss_iou": 0.421875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 65890984, + "step": 730 + }, + { + "epoch": 3.360919540229885, + "grad_norm": 5.235041599676305, + "learning_rate": 5e-06, + "loss": 0.0842, + "num_input_tokens_seen": 65981292, + "step": 731 + }, + { + "epoch": 3.360919540229885, + "loss": 0.08815214037895203, + "loss_ce": 6.314841448329389e-05, + "loss_iou": 0.341796875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 65981292, + "step": 731 + }, + { + "epoch": 3.3655172413793104, + "grad_norm": 2.2738505705233805, + "learning_rate": 5e-06, + "loss": 0.1447, + "num_input_tokens_seen": 66071596, + "step": 732 + }, + { + "epoch": 3.3655172413793104, + "loss": 0.10933637619018555, + "loss_ce": 2.240698267996777e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 66071596, + "step": 732 + }, + { + "epoch": 3.3701149425287356, + "grad_norm": 3.85647849045901, + "learning_rate": 5e-06, + "loss": 0.1753, + "num_input_tokens_seen": 66162068, + "step": 733 + }, + { + "epoch": 3.3701149425287356, + "loss": 0.20265939831733704, + "loss_ce": 8.371137664653361e-05, + "loss_iou": 0.4140625, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 66162068, + "step": 733 + }, + { + "epoch": 3.374712643678161, + "grad_norm": 8.57033500179484, + "learning_rate": 5e-06, + "loss": 0.1791, + "num_input_tokens_seen": 66252320, + "step": 734 + }, + { + "epoch": 3.374712643678161, + "loss": 0.14232100546360016, + "loss_ce": 1.754688855726272e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 66252320, + "step": 734 + }, + { + "epoch": 3.3793103448275863, + "grad_norm": 9.318942386130336, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 66342832, + "step": 735 + }, + { + "epoch": 3.3793103448275863, + "loss": 0.11867457628250122, + "loss_ce": 2.2227392037166283e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 66342832, + "step": 735 + }, + { + "epoch": 3.3839080459770114, + "grad_norm": 19.001081916459317, + "learning_rate": 5e-06, + "loss": 0.1317, + "num_input_tokens_seen": 66433196, + "step": 736 + }, + { + "epoch": 3.3839080459770114, + "loss": 0.12269207835197449, + "loss_ce": 1.1412482308514882e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 66433196, + "step": 736 + }, + { + "epoch": 3.3885057471264366, + "grad_norm": 24.510560741814796, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 66523456, + "step": 737 + }, + { + "epoch": 3.3885057471264366, + "loss": 0.12029193341732025, + "loss_ce": 6.896796548971906e-06, + "loss_iou": 0.38671875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 66523456, + "step": 737 + }, + { + "epoch": 3.393103448275862, + "grad_norm": 5.6235077208306095, + "learning_rate": 5e-06, + "loss": 0.1803, + "num_input_tokens_seen": 66613868, + "step": 738 + }, + { + "epoch": 3.393103448275862, + "loss": 0.16011008620262146, + "loss_ce": 1.4873921827529557e-05, + "loss_iou": 0.40625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 66613868, + "step": 738 + }, + { + "epoch": 3.3977011494252873, + "grad_norm": 5.027102839955047, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 66704308, + "step": 739 + }, + { + "epoch": 3.3977011494252873, + "loss": 0.17483371496200562, + "loss_ce": 0.00015872399671934545, + "loss_iou": 0.31640625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 66704308, + "step": 739 + }, + { + "epoch": 3.4022988505747125, + "grad_norm": 7.661403465008269, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 66794608, + "step": 740 + }, + { + "epoch": 3.4022988505747125, + "loss": 0.1427173763513565, + "loss_ce": 1.7180107533931732e-05, + "loss_iou": 0.435546875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 66794608, + "step": 740 + }, + { + "epoch": 3.406896551724138, + "grad_norm": 10.312505826375784, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 66884952, + "step": 741 + }, + { + "epoch": 3.406896551724138, + "loss": 0.19520951807498932, + "loss_ce": 1.9079348930972628e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 66884952, + "step": 741 + }, + { + "epoch": 3.4114942528735632, + "grad_norm": 13.89068989533051, + "learning_rate": 5e-06, + "loss": 0.1618, + "num_input_tokens_seen": 66975348, + "step": 742 + }, + { + "epoch": 3.4114942528735632, + "loss": 0.15027475357055664, + "loss_ce": 0.00012826563033740968, + "loss_iou": 0.439453125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 66975348, + "step": 742 + }, + { + "epoch": 3.4160919540229884, + "grad_norm": 4.196181753619327, + "learning_rate": 5e-06, + "loss": 0.161, + "num_input_tokens_seen": 67065664, + "step": 743 + }, + { + "epoch": 3.4160919540229884, + "loss": 0.13950249552726746, + "loss_ce": 0.00028130554710514843, + "loss_iou": 0.390625, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 67065664, + "step": 743 + }, + { + "epoch": 3.420689655172414, + "grad_norm": 211.3346828186652, + "learning_rate": 5e-06, + "loss": 0.137, + "num_input_tokens_seen": 67155980, + "step": 744 + }, + { + "epoch": 3.420689655172414, + "loss": 0.17403674125671387, + "loss_ce": 2.5510042178211734e-05, + "loss_iou": 0.3984375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 67155980, + "step": 744 + }, + { + "epoch": 3.425287356321839, + "grad_norm": 13.754629395643422, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 67246368, + "step": 745 + }, + { + "epoch": 3.425287356321839, + "loss": 0.13543261587619781, + "loss_ce": 0.0008653545519337058, + "loss_iou": 0.38671875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 67246368, + "step": 745 + }, + { + "epoch": 3.4298850574712643, + "grad_norm": 3.5887963625227863, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 67336752, + "step": 746 + }, + { + "epoch": 3.4298850574712643, + "loss": 0.08411843329668045, + "loss_ce": 1.1989444828941487e-05, + "loss_iou": 0.33984375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 67336752, + "step": 746 + }, + { + "epoch": 3.43448275862069, + "grad_norm": 7.58822953581897, + "learning_rate": 5e-06, + "loss": 0.1642, + "num_input_tokens_seen": 67427208, + "step": 747 + }, + { + "epoch": 3.43448275862069, + "loss": 0.15234288573265076, + "loss_ce": 6.0174254031153396e-05, + "loss_iou": 0.376953125, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 67427208, + "step": 747 + }, + { + "epoch": 3.439080459770115, + "grad_norm": 11.338073894134098, + "learning_rate": 5e-06, + "loss": 0.1288, + "num_input_tokens_seen": 67517560, + "step": 748 + }, + { + "epoch": 3.439080459770115, + "loss": 0.13652345538139343, + "loss_ce": 1.832299676607363e-05, + "loss_iou": 0.296875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 67517560, + "step": 748 + }, + { + "epoch": 3.44367816091954, + "grad_norm": 6.967548952942209, + "learning_rate": 5e-06, + "loss": 0.1256, + "num_input_tokens_seen": 67607920, + "step": 749 + }, + { + "epoch": 3.44367816091954, + "loss": 0.08051057159900665, + "loss_ce": 2.0457886421354488e-05, + "loss_iou": 0.40234375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 67607920, + "step": 749 + }, + { + "epoch": 3.4482758620689653, + "grad_norm": 6.965059583863805, + "learning_rate": 5e-06, + "loss": 0.1495, + "num_input_tokens_seen": 67698360, + "step": 750 + }, + { + "epoch": 3.4482758620689653, + "eval_seeclick_CIoU": 0.484468474984169, + "eval_seeclick_GIoU": 0.467425137758255, + "eval_seeclick_IoU": 0.5228271484375, + "eval_seeclick_MAE_all": 0.05892105959355831, + "eval_seeclick_MAE_h": 0.04424383118748665, + "eval_seeclick_MAE_w": 0.1074240393936634, + "eval_seeclick_MAE_x_boxes": 0.10459480062127113, + "eval_seeclick_MAE_y_boxes": 0.04638782888650894, + "eval_seeclick_NUM_probability": 0.9999992251396179, + "eval_seeclick_inside_bbox": 0.8764204680919647, + "eval_seeclick_loss": 0.3601701557636261, + "eval_seeclick_loss_ce": 0.06872005760669708, + "eval_seeclick_loss_iou": 0.46417236328125, + "eval_seeclick_loss_num": 0.061737060546875, + "eval_seeclick_loss_xval": 0.30859375, + "eval_seeclick_runtime": 76.4319, + "eval_seeclick_samples_per_second": 0.563, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 67698360, + "step": 750 + }, + { + "epoch": 3.4482758620689653, + "eval_icons_CIoU": 0.5398552417755127, + "eval_icons_GIoU": 0.5399449467658997, + "eval_icons_IoU": 0.5800551474094391, + "eval_icons_MAE_all": 0.044397372752428055, + "eval_icons_MAE_h": 0.0814935453236103, + "eval_icons_MAE_w": 0.0699278824031353, + "eval_icons_MAE_x_boxes": 0.06737468019127846, + "eval_icons_MAE_y_boxes": 0.08113038912415504, + "eval_icons_NUM_probability": 0.9999993741512299, + "eval_icons_inside_bbox": 0.7795138955116272, + "eval_icons_loss": 0.22252009809017181, + "eval_icons_loss_ce": 7.370136074769107e-07, + "eval_icons_loss_iou": 0.4132080078125, + "eval_icons_loss_num": 0.047031402587890625, + "eval_icons_loss_xval": 0.234954833984375, + "eval_icons_runtime": 96.2118, + "eval_icons_samples_per_second": 0.52, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 67698360, + "step": 750 + }, + { + "epoch": 3.4482758620689653, + "eval_screenspot_CIoU": 0.41030613084634143, + "eval_screenspot_GIoU": 0.3897818972667058, + "eval_screenspot_IoU": 0.4746982256571452, + "eval_screenspot_MAE_all": 0.08831173926591873, + "eval_screenspot_MAE_h": 0.0866956611474355, + "eval_screenspot_MAE_w": 0.17451148480176926, + "eval_screenspot_MAE_x_boxes": 0.16739005843798319, + "eval_screenspot_MAE_y_boxes": 0.08148317784070969, + "eval_screenspot_NUM_probability": 0.9999986886978149, + "eval_screenspot_inside_bbox": 0.753333330154419, + "eval_screenspot_loss": 0.44167619943618774, + "eval_screenspot_loss_ce": 0.00013617607813406116, + "eval_screenspot_loss_iou": 0.4222005208333333, + "eval_screenspot_loss_num": 0.09005228678385417, + "eval_screenspot_loss_xval": 0.4504191080729167, + "eval_screenspot_runtime": 157.7137, + "eval_screenspot_samples_per_second": 0.564, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 67698360, + "step": 750 + }, + { + "epoch": 3.4482758620689653, + "eval_compot_CIoU": 0.4846196174621582, + "eval_compot_GIoU": 0.46163466572761536, + "eval_compot_IoU": 0.5454895496368408, + "eval_compot_MAE_all": 0.05586290545761585, + "eval_compot_MAE_h": 0.07578141614794731, + "eval_compot_MAE_w": 0.11417003348469734, + "eval_compot_MAE_x_boxes": 0.09730804339051247, + "eval_compot_MAE_y_boxes": 0.07610557973384857, + "eval_compot_NUM_probability": 0.9999949038028717, + "eval_compot_inside_bbox": 0.7638888955116272, + "eval_compot_loss": 0.30647769570350647, + "eval_compot_loss_ce": 0.012538184644654393, + "eval_compot_loss_iou": 0.5220947265625, + "eval_compot_loss_num": 0.050449371337890625, + "eval_compot_loss_xval": 0.2524261474609375, + "eval_compot_runtime": 90.9711, + "eval_compot_samples_per_second": 0.55, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 67698360, + "step": 750 + }, + { + "epoch": 3.4482758620689653, + "loss": 0.18523982167243958, + "loss_ce": 0.007658033166080713, + "loss_iou": 0.55078125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 67698360, + "step": 750 + }, + { + "epoch": 3.452873563218391, + "grad_norm": 5.387146730993583, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 67788820, + "step": 751 + }, + { + "epoch": 3.452873563218391, + "loss": 0.1267109513282776, + "loss_ce": 3.247974018449895e-05, + "loss_iou": 0.373046875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 67788820, + "step": 751 + }, + { + "epoch": 3.457471264367816, + "grad_norm": 13.911926672220762, + "learning_rate": 5e-06, + "loss": 0.1078, + "num_input_tokens_seen": 67879216, + "step": 752 + }, + { + "epoch": 3.457471264367816, + "loss": 0.10455399006605148, + "loss_ce": 6.180434866109863e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 67879216, + "step": 752 + }, + { + "epoch": 3.462068965517241, + "grad_norm": 5.6856250871143486, + "learning_rate": 5e-06, + "loss": 0.1927, + "num_input_tokens_seen": 67969500, + "step": 753 + }, + { + "epoch": 3.462068965517241, + "loss": 0.19283702969551086, + "loss_ce": 8.800413343124092e-05, + "loss_iou": 0.5, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 67969500, + "step": 753 + }, + { + "epoch": 3.466666666666667, + "grad_norm": 7.868167270167063, + "learning_rate": 5e-06, + "loss": 0.0733, + "num_input_tokens_seen": 68059952, + "step": 754 + }, + { + "epoch": 3.466666666666667, + "loss": 0.05673067271709442, + "loss_ce": 1.3753228813584428e-05, + "loss_iou": 0.34375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 68059952, + "step": 754 + }, + { + "epoch": 3.471264367816092, + "grad_norm": 12.024869069661348, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 68150204, + "step": 755 + }, + { + "epoch": 3.471264367816092, + "loss": 0.16925933957099915, + "loss_ce": 8.86403904587496e-06, + "loss_iou": 0.302734375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 68150204, + "step": 755 + }, + { + "epoch": 3.475862068965517, + "grad_norm": 10.067626482591868, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 68240748, + "step": 756 + }, + { + "epoch": 3.475862068965517, + "loss": 0.12415396422147751, + "loss_ce": 8.463855920126662e-06, + "loss_iou": 0.310546875, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 68240748, + "step": 756 + }, + { + "epoch": 3.4804597701149427, + "grad_norm": 13.763593458927147, + "learning_rate": 5e-06, + "loss": 0.2071, + "num_input_tokens_seen": 68331096, + "step": 757 + }, + { + "epoch": 3.4804597701149427, + "loss": 0.22060084342956543, + "loss_ce": 1.9778890418820083e-05, + "loss_iou": 0.392578125, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 68331096, + "step": 757 + }, + { + "epoch": 3.485057471264368, + "grad_norm": 37.000929048972885, + "learning_rate": 5e-06, + "loss": 0.1891, + "num_input_tokens_seen": 68421584, + "step": 758 + }, + { + "epoch": 3.485057471264368, + "loss": 0.1451530158519745, + "loss_ce": 7.243985601235181e-05, + "loss_iou": 0.40625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 68421584, + "step": 758 + }, + { + "epoch": 3.489655172413793, + "grad_norm": 12.987634056120026, + "learning_rate": 5e-06, + "loss": 0.0816, + "num_input_tokens_seen": 68511940, + "step": 759 + }, + { + "epoch": 3.489655172413793, + "loss": 0.051099397242069244, + "loss_ce": 1.2974151104572229e-05, + "loss_iou": 0.37890625, + "loss_num": 0.01025390625, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 68511940, + "step": 759 + }, + { + "epoch": 3.4942528735632186, + "grad_norm": 4.05554262967813, + "learning_rate": 5e-06, + "loss": 0.1256, + "num_input_tokens_seen": 68602372, + "step": 760 + }, + { + "epoch": 3.4942528735632186, + "loss": 0.10796726495027542, + "loss_ce": 1.1330963388900273e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 68602372, + "step": 760 + }, + { + "epoch": 3.4988505747126437, + "grad_norm": 2.872197937123995, + "learning_rate": 5e-06, + "loss": 0.0834, + "num_input_tokens_seen": 68692692, + "step": 761 + }, + { + "epoch": 3.4988505747126437, + "loss": 0.10546976327896118, + "loss_ce": 1.009384050121298e-06, + "loss_iou": 0.380859375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 68692692, + "step": 761 + }, + { + "epoch": 3.503448275862069, + "grad_norm": 2.7436969971531573, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 68783112, + "step": 762 + }, + { + "epoch": 3.503448275862069, + "loss": 0.0926920622587204, + "loss_ce": 1.017252270685276e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 68783112, + "step": 762 + }, + { + "epoch": 3.508045977011494, + "grad_norm": 12.740068722129191, + "learning_rate": 5e-06, + "loss": 0.1955, + "num_input_tokens_seen": 68872560, + "step": 763 + }, + { + "epoch": 3.508045977011494, + "loss": 0.20338374376296997, + "loss_ce": 0.0019982485100626945, + "loss_iou": 0.310546875, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 68872560, + "step": 763 + }, + { + "epoch": 3.5126436781609196, + "grad_norm": 7.842380690749216, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 68962972, + "step": 764 + }, + { + "epoch": 3.5126436781609196, + "loss": 0.14544086158275604, + "loss_ce": 2.4600009055575356e-05, + "loss_iou": 0.423828125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 68962972, + "step": 764 + }, + { + "epoch": 3.5172413793103448, + "grad_norm": 5.499498755138326, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 69053244, + "step": 765 + }, + { + "epoch": 3.5172413793103448, + "loss": 0.09770219027996063, + "loss_ce": 4.593187622958794e-05, + "loss_iou": 0.373046875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 69053244, + "step": 765 + }, + { + "epoch": 3.5218390804597703, + "grad_norm": 4.548325999877122, + "learning_rate": 5e-06, + "loss": 0.0985, + "num_input_tokens_seen": 69143592, + "step": 766 + }, + { + "epoch": 3.5218390804597703, + "loss": 0.06963898241519928, + "loss_ce": 2.8384643883327954e-05, + "loss_iou": 0.298828125, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 69143592, + "step": 766 + }, + { + "epoch": 3.5264367816091955, + "grad_norm": 13.276586752236186, + "learning_rate": 5e-06, + "loss": 0.1276, + "num_input_tokens_seen": 69233884, + "step": 767 + }, + { + "epoch": 3.5264367816091955, + "loss": 0.14585262537002563, + "loss_ce": 0.0010162012185901403, + "loss_iou": 0.267578125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 69233884, + "step": 767 + }, + { + "epoch": 3.5310344827586206, + "grad_norm": 3.288042195364263, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 69324272, + "step": 768 + }, + { + "epoch": 3.5310344827586206, + "loss": 0.10515020787715912, + "loss_ce": 0.00013922779180575162, + "loss_iou": 0.470703125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 69324272, + "step": 768 + }, + { + "epoch": 3.535632183908046, + "grad_norm": 20.003220474729524, + "learning_rate": 5e-06, + "loss": 0.1411, + "num_input_tokens_seen": 69413868, + "step": 769 + }, + { + "epoch": 3.535632183908046, + "loss": 0.1137920469045639, + "loss_ce": 2.2520007405546494e-05, + "loss_iou": 0.3984375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 69413868, + "step": 769 + }, + { + "epoch": 3.5402298850574714, + "grad_norm": 14.09650460702761, + "learning_rate": 5e-06, + "loss": 0.1811, + "num_input_tokens_seen": 69504204, + "step": 770 + }, + { + "epoch": 3.5402298850574714, + "loss": 0.16392265260219574, + "loss_ce": 1.2739261364913546e-05, + "loss_iou": 0.41796875, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 69504204, + "step": 770 + }, + { + "epoch": 3.5448275862068965, + "grad_norm": 5.589999305942861, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 69594556, + "step": 771 + }, + { + "epoch": 3.5448275862068965, + "loss": 0.1477949023246765, + "loss_ce": 2.8777922125300393e-05, + "loss_iou": 0.390625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 69594556, + "step": 771 + }, + { + "epoch": 3.5494252873563217, + "grad_norm": 6.623212624230002, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 69684820, + "step": 772 + }, + { + "epoch": 3.5494252873563217, + "loss": 0.06927981972694397, + "loss_ce": 4.912180884275585e-06, + "loss_iou": 0.30859375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 69684820, + "step": 772 + }, + { + "epoch": 3.5540229885057473, + "grad_norm": 6.206244364237178, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 69773688, + "step": 773 + }, + { + "epoch": 3.5540229885057473, + "loss": 0.12721136212348938, + "loss_ce": 1.4094775906414725e-05, + "loss_iou": 0.42578125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 69773688, + "step": 773 + }, + { + "epoch": 3.5586206896551724, + "grad_norm": 6.055994010471258, + "learning_rate": 5e-06, + "loss": 0.1533, + "num_input_tokens_seen": 69864012, + "step": 774 + }, + { + "epoch": 3.5586206896551724, + "loss": 0.1802106499671936, + "loss_ce": 9.590051922714338e-05, + "loss_iou": 0.375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 69864012, + "step": 774 + }, + { + "epoch": 3.5632183908045976, + "grad_norm": 4.739416821882011, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 69954324, + "step": 775 + }, + { + "epoch": 3.5632183908045976, + "loss": 0.16380281746387482, + "loss_ce": 0.0002896270889323205, + "loss_iou": 0.384765625, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 69954324, + "step": 775 + }, + { + "epoch": 3.5678160919540227, + "grad_norm": 8.653175282625192, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 70044580, + "step": 776 + }, + { + "epoch": 3.5678160919540227, + "loss": 0.09520787745714188, + "loss_ce": 8.29543569125235e-06, + "loss_iou": 0.333984375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 70044580, + "step": 776 + }, + { + "epoch": 3.5724137931034483, + "grad_norm": 21.81495303272478, + "learning_rate": 5e-06, + "loss": 0.1423, + "num_input_tokens_seen": 70134884, + "step": 777 + }, + { + "epoch": 3.5724137931034483, + "loss": 0.11333857476711273, + "loss_ce": 0.000103099322586786, + "loss_iou": 0.33203125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 70134884, + "step": 777 + }, + { + "epoch": 3.5770114942528735, + "grad_norm": 15.12571431595152, + "learning_rate": 5e-06, + "loss": 0.1656, + "num_input_tokens_seen": 70225164, + "step": 778 + }, + { + "epoch": 3.5770114942528735, + "loss": 0.2089380919933319, + "loss_ce": 1.4762490536668338e-05, + "loss_iou": 0.4609375, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 70225164, + "step": 778 + }, + { + "epoch": 3.581609195402299, + "grad_norm": 10.383950864076555, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 70315464, + "step": 779 + }, + { + "epoch": 3.581609195402299, + "loss": 0.11370711028575897, + "loss_ce": 2.912801573984325e-05, + "loss_iou": 0.40625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 70315464, + "step": 779 + }, + { + "epoch": 3.586206896551724, + "grad_norm": 2.1421328839083684, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 70405788, + "step": 780 + }, + { + "epoch": 3.586206896551724, + "loss": 0.1896582543849945, + "loss_ce": 2.202134237450082e-05, + "loss_iou": 0.3203125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 70405788, + "step": 780 + }, + { + "epoch": 3.5908045977011493, + "grad_norm": 9.380525583505861, + "learning_rate": 5e-06, + "loss": 0.2045, + "num_input_tokens_seen": 70496136, + "step": 781 + }, + { + "epoch": 3.5908045977011493, + "loss": 0.21958574652671814, + "loss_ce": 1.1782582078012638e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 70496136, + "step": 781 + }, + { + "epoch": 3.5954022988505745, + "grad_norm": 3.9371848580056517, + "learning_rate": 5e-06, + "loss": 0.0866, + "num_input_tokens_seen": 70586496, + "step": 782 + }, + { + "epoch": 3.5954022988505745, + "loss": 0.05908702313899994, + "loss_ce": 2.025284084083978e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 70586496, + "step": 782 + }, + { + "epoch": 3.6, + "grad_norm": 10.508397334914141, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 70676784, + "step": 783 + }, + { + "epoch": 3.6, + "loss": 0.11284907907247543, + "loss_ce": 1.0333604222978465e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 70676784, + "step": 783 + }, + { + "epoch": 3.6045977011494252, + "grad_norm": 12.149659037494654, + "learning_rate": 5e-06, + "loss": 0.1, + "num_input_tokens_seen": 70767128, + "step": 784 + }, + { + "epoch": 3.6045977011494252, + "loss": 0.10474172979593277, + "loss_ce": 5.399918336479459e-06, + "loss_iou": 0.44140625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 70767128, + "step": 784 + }, + { + "epoch": 3.609195402298851, + "grad_norm": 32.31492723615795, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 70857476, + "step": 785 + }, + { + "epoch": 3.609195402298851, + "loss": 0.11612387001514435, + "loss_ce": 4.4922308006789535e-06, + "loss_iou": 0.404296875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 70857476, + "step": 785 + }, + { + "epoch": 3.613793103448276, + "grad_norm": 13.784691745076751, + "learning_rate": 5e-06, + "loss": 0.0975, + "num_input_tokens_seen": 70947968, + "step": 786 + }, + { + "epoch": 3.613793103448276, + "loss": 0.09685492515563965, + "loss_ce": 2.2650790924672037e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 70947968, + "step": 786 + }, + { + "epoch": 3.618390804597701, + "grad_norm": 5.938316209673259, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 71038404, + "step": 787 + }, + { + "epoch": 3.618390804597701, + "loss": 0.13221478462219238, + "loss_ce": 1.2635980965569615e-05, + "loss_iou": 0.421875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 71038404, + "step": 787 + }, + { + "epoch": 3.6229885057471263, + "grad_norm": 13.011958974161733, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 71128788, + "step": 788 + }, + { + "epoch": 3.6229885057471263, + "loss": 0.14329373836517334, + "loss_ce": 1.3719867638428695e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 71128788, + "step": 788 + }, + { + "epoch": 3.627586206896552, + "grad_norm": 11.936048890187932, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 71219196, + "step": 789 + }, + { + "epoch": 3.627586206896552, + "loss": 0.11237190663814545, + "loss_ce": 8.247944788308814e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 71219196, + "step": 789 + }, + { + "epoch": 3.632183908045977, + "grad_norm": 6.308330166762513, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 71309520, + "step": 790 + }, + { + "epoch": 3.632183908045977, + "loss": 0.1292654573917389, + "loss_ce": 5.403875547926873e-05, + "loss_iou": 0.341796875, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 71309520, + "step": 790 + }, + { + "epoch": 3.636781609195402, + "grad_norm": 5.137471316885716, + "learning_rate": 5e-06, + "loss": 0.1365, + "num_input_tokens_seen": 71399780, + "step": 791 + }, + { + "epoch": 3.636781609195402, + "loss": 0.13990908861160278, + "loss_ce": 1.6500023775734007e-05, + "loss_iou": 0.419921875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 71399780, + "step": 791 + }, + { + "epoch": 3.6413793103448278, + "grad_norm": 9.275105119831409, + "learning_rate": 5e-06, + "loss": 0.0812, + "num_input_tokens_seen": 71490120, + "step": 792 + }, + { + "epoch": 3.6413793103448278, + "loss": 0.06399580091238022, + "loss_ce": 3.09558781736996e-05, + "loss_iou": 0.34375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 71490120, + "step": 792 + }, + { + "epoch": 3.645977011494253, + "grad_norm": 11.0637012290359, + "learning_rate": 5e-06, + "loss": 0.1358, + "num_input_tokens_seen": 71580584, + "step": 793 + }, + { + "epoch": 3.645977011494253, + "loss": 0.15851935744285583, + "loss_ce": 4.156233626417816e-05, + "loss_iou": 0.421875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 71580584, + "step": 793 + }, + { + "epoch": 3.650574712643678, + "grad_norm": 16.625197592400877, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 71670196, + "step": 794 + }, + { + "epoch": 3.650574712643678, + "loss": 0.15688209235668182, + "loss_ce": 2.1741370801464655e-05, + "loss_iou": 0.380859375, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 71670196, + "step": 794 + }, + { + "epoch": 3.655172413793103, + "grad_norm": 5.3921763962194005, + "learning_rate": 5e-06, + "loss": 0.1317, + "num_input_tokens_seen": 71760612, + "step": 795 + }, + { + "epoch": 3.655172413793103, + "loss": 0.10780765116214752, + "loss_ce": 4.304832600610098e-06, + "loss_iou": 0.349609375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 71760612, + "step": 795 + }, + { + "epoch": 3.659770114942529, + "grad_norm": 11.704393512510705, + "learning_rate": 5e-06, + "loss": 0.1639, + "num_input_tokens_seen": 71850916, + "step": 796 + }, + { + "epoch": 3.659770114942529, + "loss": 0.1306782066822052, + "loss_ce": 1.929806785483379e-06, + "loss_iou": 0.451171875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 71850916, + "step": 796 + }, + { + "epoch": 3.664367816091954, + "grad_norm": 16.992640998013997, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 71941204, + "step": 797 + }, + { + "epoch": 3.664367816091954, + "loss": 0.11268985271453857, + "loss_ce": 1.8956783605972305e-05, + "loss_iou": 0.435546875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 71941204, + "step": 797 + }, + { + "epoch": 3.6689655172413795, + "grad_norm": 11.094649026288351, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 72031516, + "step": 798 + }, + { + "epoch": 3.6689655172413795, + "loss": 0.15380123257637024, + "loss_ce": 2.316595782758668e-05, + "loss_iou": 0.4765625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 72031516, + "step": 798 + }, + { + "epoch": 3.6735632183908047, + "grad_norm": 9.218816872173734, + "learning_rate": 5e-06, + "loss": 0.0814, + "num_input_tokens_seen": 72121944, + "step": 799 + }, + { + "epoch": 3.6735632183908047, + "loss": 0.10079614073038101, + "loss_ce": 2.710282933549024e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 72121944, + "step": 799 + }, + { + "epoch": 3.67816091954023, + "grad_norm": 15.692187876785969, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 72212332, + "step": 800 + }, + { + "epoch": 3.67816091954023, + "loss": 0.08001083880662918, + "loss_ce": 9.010436770040542e-06, + "loss_iou": 0.470703125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 72212332, + "step": 800 + }, + { + "epoch": 3.682758620689655, + "grad_norm": 3.364192050864013, + "learning_rate": 5e-06, + "loss": 0.0848, + "num_input_tokens_seen": 72302688, + "step": 801 + }, + { + "epoch": 3.682758620689655, + "loss": 0.060924116522073746, + "loss_ce": 0.00010258361726300791, + "loss_iou": 0.359375, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 72302688, + "step": 801 + }, + { + "epoch": 3.6873563218390806, + "grad_norm": 13.859096616510685, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 72393104, + "step": 802 + }, + { + "epoch": 3.6873563218390806, + "loss": 0.1117105633020401, + "loss_ce": 1.622791023692116e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 72393104, + "step": 802 + }, + { + "epoch": 3.6919540229885057, + "grad_norm": 13.956089000343894, + "learning_rate": 5e-06, + "loss": 0.0844, + "num_input_tokens_seen": 72483416, + "step": 803 + }, + { + "epoch": 3.6919540229885057, + "loss": 0.0864303782582283, + "loss_ce": 4.60219871456502e-06, + "loss_iou": 0.39453125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 72483416, + "step": 803 + }, + { + "epoch": 3.696551724137931, + "grad_norm": 10.769508987312188, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 72573912, + "step": 804 + }, + { + "epoch": 3.696551724137931, + "loss": 0.12728144228458405, + "loss_ce": 7.876707968534902e-06, + "loss_iou": 0.3203125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 72573912, + "step": 804 + }, + { + "epoch": 3.7011494252873565, + "grad_norm": 3.244021220688608, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 72664244, + "step": 805 + }, + { + "epoch": 3.7011494252873565, + "loss": 0.14200298488140106, + "loss_ce": 5.0472979637561366e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 72664244, + "step": 805 + }, + { + "epoch": 3.7057471264367816, + "grad_norm": 4.828124192128039, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 72753736, + "step": 806 + }, + { + "epoch": 3.7057471264367816, + "loss": 0.0866774469614029, + "loss_ce": 7.529814411100233e-06, + "loss_iou": 0.35546875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 72753736, + "step": 806 + }, + { + "epoch": 3.7103448275862068, + "grad_norm": 10.090685737843089, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 72844080, + "step": 807 + }, + { + "epoch": 3.7103448275862068, + "loss": 0.14415577054023743, + "loss_ce": 3.650196231319569e-05, + "loss_iou": 0.416015625, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 72844080, + "step": 807 + }, + { + "epoch": 3.714942528735632, + "grad_norm": 11.901790776803901, + "learning_rate": 5e-06, + "loss": 0.196, + "num_input_tokens_seen": 72934468, + "step": 808 + }, + { + "epoch": 3.714942528735632, + "loss": 0.21217146515846252, + "loss_ce": 2.8517068130895495e-05, + "loss_iou": 0.44921875, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 72934468, + "step": 808 + }, + { + "epoch": 3.7195402298850575, + "grad_norm": 24.55270500907039, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 73024692, + "step": 809 + }, + { + "epoch": 3.7195402298850575, + "loss": 0.10771137475967407, + "loss_ce": 4.536272172117606e-05, + "loss_iou": 0.427734375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 73024692, + "step": 809 + }, + { + "epoch": 3.7241379310344827, + "grad_norm": 15.553335565571802, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 73115056, + "step": 810 + }, + { + "epoch": 3.7241379310344827, + "loss": 0.14749327301979065, + "loss_ce": 3.232985909562558e-05, + "loss_iou": 0.404296875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 73115056, + "step": 810 + }, + { + "epoch": 3.7287356321839082, + "grad_norm": 21.65190443149484, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 73205556, + "step": 811 + }, + { + "epoch": 3.7287356321839082, + "loss": 0.10954000800848007, + "loss_ce": 0.0008363968227058649, + "loss_iou": 0.3671875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 73205556, + "step": 811 + }, + { + "epoch": 3.7333333333333334, + "grad_norm": 3.28808735511797, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 73296004, + "step": 812 + }, + { + "epoch": 3.7333333333333334, + "loss": 0.09380966424942017, + "loss_ce": 1.3889082765672356e-05, + "loss_iou": 0.392578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 73296004, + "step": 812 + }, + { + "epoch": 3.7379310344827585, + "grad_norm": 7.828759411600456, + "learning_rate": 5e-06, + "loss": 0.1538, + "num_input_tokens_seen": 73386480, + "step": 813 + }, + { + "epoch": 3.7379310344827585, + "loss": 0.167840838432312, + "loss_ce": 0.0001162344342446886, + "loss_iou": 0.412109375, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 73386480, + "step": 813 + }, + { + "epoch": 3.7425287356321837, + "grad_norm": 27.164938076027006, + "learning_rate": 5e-06, + "loss": 0.1196, + "num_input_tokens_seen": 73476720, + "step": 814 + }, + { + "epoch": 3.7425287356321837, + "loss": 0.13790945708751678, + "loss_ce": 6.155119626782835e-05, + "loss_iou": 0.46875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 73476720, + "step": 814 + }, + { + "epoch": 3.7471264367816093, + "grad_norm": 7.036276194280488, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 73566368, + "step": 815 + }, + { + "epoch": 3.7471264367816093, + "loss": 0.11970978230237961, + "loss_ce": 3.510632086545229e-05, + "loss_iou": 0.29296875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 73566368, + "step": 815 + }, + { + "epoch": 3.7517241379310344, + "grad_norm": 6.291798562723388, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 73656748, + "step": 816 + }, + { + "epoch": 3.7517241379310344, + "loss": 0.15857738256454468, + "loss_ce": 3.092413680860773e-05, + "loss_iou": 0.255859375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 73656748, + "step": 816 + }, + { + "epoch": 3.75632183908046, + "grad_norm": 4.395310325624689, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 73747096, + "step": 817 + }, + { + "epoch": 3.75632183908046, + "loss": 0.1381591111421585, + "loss_ce": 6.042053428245708e-06, + "loss_iou": 0.314453125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 73747096, + "step": 817 + }, + { + "epoch": 3.760919540229885, + "grad_norm": 4.574856648294534, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 73837368, + "step": 818 + }, + { + "epoch": 3.760919540229885, + "loss": 0.1333150714635849, + "loss_ce": 0.00028894448769278824, + "loss_iou": 0.419921875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 73837368, + "step": 818 + }, + { + "epoch": 3.7655172413793103, + "grad_norm": 20.895031812009428, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 73927844, + "step": 819 + }, + { + "epoch": 3.7655172413793103, + "loss": 0.10864730179309845, + "loss_ce": 3.5239758290117607e-05, + "loss_iou": 0.375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 73927844, + "step": 819 + }, + { + "epoch": 3.7701149425287355, + "grad_norm": 15.134371186789252, + "learning_rate": 5e-06, + "loss": 0.0982, + "num_input_tokens_seen": 74018240, + "step": 820 + }, + { + "epoch": 3.7701149425287355, + "loss": 0.09329765290021896, + "loss_ce": 0.00020377803593873978, + "loss_iou": 0.27734375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 74018240, + "step": 820 + }, + { + "epoch": 3.774712643678161, + "grad_norm": 15.175525511944207, + "learning_rate": 5e-06, + "loss": 0.1241, + "num_input_tokens_seen": 74108676, + "step": 821 + }, + { + "epoch": 3.774712643678161, + "loss": 0.13372212648391724, + "loss_ce": 7.039559568511322e-05, + "loss_iou": 0.400390625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 74108676, + "step": 821 + }, + { + "epoch": 3.779310344827586, + "grad_norm": 27.19521596346353, + "learning_rate": 5e-06, + "loss": 0.1087, + "num_input_tokens_seen": 74199076, + "step": 822 + }, + { + "epoch": 3.779310344827586, + "loss": 0.14935675263404846, + "loss_ce": 0.00011052708578063175, + "loss_iou": 0.3828125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 74199076, + "step": 822 + }, + { + "epoch": 3.7839080459770114, + "grad_norm": 10.150253946725481, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 74288004, + "step": 823 + }, + { + "epoch": 3.7839080459770114, + "loss": 0.1666683554649353, + "loss_ce": 0.0001491915318183601, + "loss_iou": 0.51953125, + "loss_num": 0.033203125, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 74288004, + "step": 823 + }, + { + "epoch": 3.788505747126437, + "grad_norm": 3.7134569300100755, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 74378480, + "step": 824 + }, + { + "epoch": 3.788505747126437, + "loss": 0.118888720870018, + "loss_ce": 5.327704275259748e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 74378480, + "step": 824 + }, + { + "epoch": 3.793103448275862, + "grad_norm": 5.136391279757606, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 74468904, + "step": 825 + }, + { + "epoch": 3.793103448275862, + "loss": 0.10247914493083954, + "loss_ce": 0.00015370690380223095, + "loss_iou": 0.380859375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 74468904, + "step": 825 + }, + { + "epoch": 3.7977011494252872, + "grad_norm": 3.4776597012299777, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 74559308, + "step": 826 + }, + { + "epoch": 3.7977011494252872, + "loss": 0.08007881045341492, + "loss_ce": 0.00010749474313342944, + "loss_iou": 0.328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 74559308, + "step": 826 + }, + { + "epoch": 3.8022988505747124, + "grad_norm": 13.304246207016769, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 74649636, + "step": 827 + }, + { + "epoch": 3.8022988505747124, + "loss": 0.144487202167511, + "loss_ce": 1.7336749351670733e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 74649636, + "step": 827 + }, + { + "epoch": 3.806896551724138, + "grad_norm": 8.020543075324909, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 74740060, + "step": 828 + }, + { + "epoch": 3.806896551724138, + "loss": 0.09062141180038452, + "loss_ce": 1.4721297702635638e-05, + "loss_iou": 0.416015625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 74740060, + "step": 828 + }, + { + "epoch": 3.811494252873563, + "grad_norm": 14.35279728790665, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 74830480, + "step": 829 + }, + { + "epoch": 3.811494252873563, + "loss": 0.06638382375240326, + "loss_ce": 8.095349585346412e-06, + "loss_iou": 0.37109375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 74830480, + "step": 829 + }, + { + "epoch": 3.8160919540229887, + "grad_norm": 14.243365874652318, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 74920808, + "step": 830 + }, + { + "epoch": 3.8160919540229887, + "loss": 0.1177852526307106, + "loss_ce": 2.654241143318359e-06, + "loss_iou": 0.28125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 74920808, + "step": 830 + }, + { + "epoch": 3.820689655172414, + "grad_norm": 10.427621446470681, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 75011112, + "step": 831 + }, + { + "epoch": 3.820689655172414, + "loss": 0.09321253001689911, + "loss_ce": 0.00024072162341326475, + "loss_iou": 0.482421875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 75011112, + "step": 831 + }, + { + "epoch": 3.825287356321839, + "grad_norm": 3.5623265997795768, + "learning_rate": 5e-06, + "loss": 0.1278, + "num_input_tokens_seen": 75101356, + "step": 832 + }, + { + "epoch": 3.825287356321839, + "loss": 0.1735125035047531, + "loss_ce": 5.05839052493684e-05, + "loss_iou": 0.455078125, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 75101356, + "step": 832 + }, + { + "epoch": 3.829885057471264, + "grad_norm": 4.146150429296354, + "learning_rate": 5e-06, + "loss": 0.1608, + "num_input_tokens_seen": 75190144, + "step": 833 + }, + { + "epoch": 3.829885057471264, + "loss": 0.1830693781375885, + "loss_ce": 0.0004521902301348746, + "loss_iou": 0.439453125, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 75190144, + "step": 833 + }, + { + "epoch": 3.8344827586206898, + "grad_norm": 6.028978250099342, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 75280528, + "step": 834 + }, + { + "epoch": 3.8344827586206898, + "loss": 0.12013056129217148, + "loss_ce": 4.389049718156457e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 75280528, + "step": 834 + }, + { + "epoch": 3.839080459770115, + "grad_norm": 5.005804908205812, + "learning_rate": 5e-06, + "loss": 0.1431, + "num_input_tokens_seen": 75370936, + "step": 835 + }, + { + "epoch": 3.839080459770115, + "loss": 0.17678186297416687, + "loss_ce": 2.404209772066679e-05, + "loss_iou": 0.41015625, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 75370936, + "step": 835 + }, + { + "epoch": 3.84367816091954, + "grad_norm": 11.780827894531459, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 75461384, + "step": 836 + }, + { + "epoch": 3.84367816091954, + "loss": 0.0846051275730133, + "loss_ce": 8.669264207128435e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 75461384, + "step": 836 + }, + { + "epoch": 3.8482758620689657, + "grad_norm": 9.132134187365157, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 75551680, + "step": 837 + }, + { + "epoch": 3.8482758620689657, + "loss": 0.12721320986747742, + "loss_ce": 6.894965736137237e-07, + "loss_iou": 0.37109375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 75551680, + "step": 837 + }, + { + "epoch": 3.852873563218391, + "grad_norm": 26.114035834080024, + "learning_rate": 5e-06, + "loss": 0.0882, + "num_input_tokens_seen": 75642112, + "step": 838 + }, + { + "epoch": 3.852873563218391, + "loss": 0.08660604059696198, + "loss_ce": 5.8186371461488307e-05, + "loss_iou": 0.333984375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 75642112, + "step": 838 + }, + { + "epoch": 3.857471264367816, + "grad_norm": 11.804335104556394, + "learning_rate": 5e-06, + "loss": 0.0909, + "num_input_tokens_seen": 75732404, + "step": 839 + }, + { + "epoch": 3.857471264367816, + "loss": 0.06891857087612152, + "loss_ce": 9.877001502900384e-06, + "loss_iou": 0.4296875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 75732404, + "step": 839 + }, + { + "epoch": 3.862068965517241, + "grad_norm": 8.148540211603276, + "learning_rate": 5e-06, + "loss": 0.1853, + "num_input_tokens_seen": 75822708, + "step": 840 + }, + { + "epoch": 3.862068965517241, + "loss": 0.1470450758934021, + "loss_ce": 1.1382823686290067e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 75822708, + "step": 840 + }, + { + "epoch": 3.8666666666666667, + "grad_norm": 2.959579079163402, + "learning_rate": 5e-06, + "loss": 0.1515, + "num_input_tokens_seen": 75912948, + "step": 841 + }, + { + "epoch": 3.8666666666666667, + "loss": 0.14687013626098633, + "loss_ce": 4.280491793906549e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 75912948, + "step": 841 + }, + { + "epoch": 3.871264367816092, + "grad_norm": 3.2788587846844477, + "learning_rate": 5e-06, + "loss": 0.1256, + "num_input_tokens_seen": 76003272, + "step": 842 + }, + { + "epoch": 3.871264367816092, + "loss": 0.1434180587530136, + "loss_ce": 1.5959709344315343e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 76003272, + "step": 842 + }, + { + "epoch": 3.8758620689655174, + "grad_norm": 6.603617127888935, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 76093704, + "step": 843 + }, + { + "epoch": 3.8758620689655174, + "loss": 0.18056106567382812, + "loss_ce": 4.960661317454651e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 76093704, + "step": 843 + }, + { + "epoch": 3.8804597701149426, + "grad_norm": 2.2614424128970096, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 76183952, + "step": 844 + }, + { + "epoch": 3.8804597701149426, + "loss": 0.11200746148824692, + "loss_ce": 7.948267921165098e-06, + "loss_iou": 0.298828125, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 76183952, + "step": 844 + }, + { + "epoch": 3.8850574712643677, + "grad_norm": 9.237162950267727, + "learning_rate": 5e-06, + "loss": 0.1355, + "num_input_tokens_seen": 76274384, + "step": 845 + }, + { + "epoch": 3.8850574712643677, + "loss": 0.13711686432361603, + "loss_ce": 3.19016762659885e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 76274384, + "step": 845 + }, + { + "epoch": 3.889655172413793, + "grad_norm": 3.4174539263582626, + "learning_rate": 5e-06, + "loss": 0.1565, + "num_input_tokens_seen": 76364656, + "step": 846 + }, + { + "epoch": 3.889655172413793, + "loss": 0.15261296927928925, + "loss_ce": 5.560090721701272e-05, + "loss_iou": 0.4609375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 76364656, + "step": 846 + }, + { + "epoch": 3.8942528735632185, + "grad_norm": 5.269387019291706, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 76455084, + "step": 847 + }, + { + "epoch": 3.8942528735632185, + "loss": 0.12015949189662933, + "loss_ce": 4.230613194522448e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 76455084, + "step": 847 + }, + { + "epoch": 3.8988505747126436, + "grad_norm": 33.611586771121246, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 76545436, + "step": 848 + }, + { + "epoch": 3.8988505747126436, + "loss": 0.11951969563961029, + "loss_ce": 1.2851842257077806e-05, + "loss_iou": 0.28125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 76545436, + "step": 848 + }, + { + "epoch": 3.903448275862069, + "grad_norm": 3.5055722837258783, + "learning_rate": 5e-06, + "loss": 0.1269, + "num_input_tokens_seen": 76634196, + "step": 849 + }, + { + "epoch": 3.903448275862069, + "loss": 0.12913568317890167, + "loss_ce": 1.5821904526092112e-05, + "loss_iou": 0.30859375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 76634196, + "step": 849 + }, + { + "epoch": 3.9080459770114944, + "grad_norm": 8.97172584427251, + "learning_rate": 5e-06, + "loss": 0.0848, + "num_input_tokens_seen": 76724524, + "step": 850 + }, + { + "epoch": 3.9080459770114944, + "loss": 0.10759300738573074, + "loss_ce": 1.8539070879342034e-05, + "loss_iou": 0.41015625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 76724524, + "step": 850 + }, + { + "epoch": 3.9126436781609195, + "grad_norm": 2.3536210529744888, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 76815068, + "step": 851 + }, + { + "epoch": 3.9126436781609195, + "loss": 0.0927945151925087, + "loss_ce": 0.00011263469059485942, + "loss_iou": 0.33984375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 76815068, + "step": 851 + }, + { + "epoch": 3.9172413793103447, + "grad_norm": 6.886385253300276, + "learning_rate": 5e-06, + "loss": 0.0821, + "num_input_tokens_seen": 76905484, + "step": 852 + }, + { + "epoch": 3.9172413793103447, + "loss": 0.09015928208827972, + "loss_ce": 7.139760418795049e-05, + "loss_iou": 0.390625, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 76905484, + "step": 852 + }, + { + "epoch": 3.9218390804597703, + "grad_norm": 8.891761743105253, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 76995800, + "step": 853 + }, + { + "epoch": 3.9218390804597703, + "loss": 0.15627999603748322, + "loss_ce": 3.000300239364151e-05, + "loss_iou": 0.388671875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 76995800, + "step": 853 + }, + { + "epoch": 3.9264367816091954, + "grad_norm": 5.132513236049143, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 77085280, + "step": 854 + }, + { + "epoch": 3.9264367816091954, + "loss": 0.10330615192651749, + "loss_ce": 4.150414042669581e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 77085280, + "step": 854 + }, + { + "epoch": 3.9310344827586206, + "grad_norm": 11.013320716371243, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 77174948, + "step": 855 + }, + { + "epoch": 3.9310344827586206, + "loss": 0.09394749999046326, + "loss_ce": 1.439205152564682e-05, + "loss_iou": 0.279296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 77174948, + "step": 855 + }, + { + "epoch": 3.935632183908046, + "grad_norm": 6.10534980851738, + "learning_rate": 5e-06, + "loss": 0.0767, + "num_input_tokens_seen": 77265376, + "step": 856 + }, + { + "epoch": 3.935632183908046, + "loss": 0.06424582004547119, + "loss_ce": 6.319404747046065e-06, + "loss_iou": 0.359375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 77265376, + "step": 856 + }, + { + "epoch": 3.9402298850574713, + "grad_norm": 15.651592669210814, + "learning_rate": 5e-06, + "loss": 0.0864, + "num_input_tokens_seen": 77355760, + "step": 857 + }, + { + "epoch": 3.9402298850574713, + "loss": 0.0786062479019165, + "loss_ce": 2.3480024538002908e-05, + "loss_iou": 0.322265625, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 77355760, + "step": 857 + }, + { + "epoch": 3.9448275862068964, + "grad_norm": 6.800574472479352, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 77446208, + "step": 858 + }, + { + "epoch": 3.9448275862068964, + "loss": 0.09560946375131607, + "loss_ce": 0.0001809898967621848, + "loss_iou": 0.384765625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 77446208, + "step": 858 + }, + { + "epoch": 3.9494252873563216, + "grad_norm": 14.49037795832371, + "learning_rate": 5e-06, + "loss": 0.1543, + "num_input_tokens_seen": 77536448, + "step": 859 + }, + { + "epoch": 3.9494252873563216, + "loss": 0.14268243312835693, + "loss_ce": 1.2764152415911667e-05, + "loss_iou": 0.369140625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 77536448, + "step": 859 + }, + { + "epoch": 3.954022988505747, + "grad_norm": 3.136445093906524, + "learning_rate": 5e-06, + "loss": 0.099, + "num_input_tokens_seen": 77626764, + "step": 860 + }, + { + "epoch": 3.954022988505747, + "loss": 0.09950034320354462, + "loss_ce": 1.3036394193477463e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 77626764, + "step": 860 + }, + { + "epoch": 3.9586206896551723, + "grad_norm": 5.399769066083071, + "learning_rate": 5e-06, + "loss": 0.2064, + "num_input_tokens_seen": 77717140, + "step": 861 + }, + { + "epoch": 3.9586206896551723, + "loss": 0.19695694744586945, + "loss_ce": 1.1762541362259071e-05, + "loss_iou": 0.392578125, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 77717140, + "step": 861 + }, + { + "epoch": 3.963218390804598, + "grad_norm": 18.891060577417118, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 77807488, + "step": 862 + }, + { + "epoch": 3.963218390804598, + "loss": 0.09799706935882568, + "loss_ce": 5.1299293772899546e-06, + "loss_iou": 0.4296875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 77807488, + "step": 862 + }, + { + "epoch": 3.967816091954023, + "grad_norm": 5.482893632604601, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 77897924, + "step": 863 + }, + { + "epoch": 3.967816091954023, + "loss": 0.08787819743156433, + "loss_ce": 1.808757588150911e-05, + "loss_iou": 0.341796875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 77897924, + "step": 863 + }, + { + "epoch": 3.972413793103448, + "grad_norm": 22.690601947302575, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 77988320, + "step": 864 + }, + { + "epoch": 3.972413793103448, + "loss": 0.14658576250076294, + "loss_ce": 9.828477232076693e-06, + "loss_iou": 0.337890625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 77988320, + "step": 864 + }, + { + "epoch": 3.9770114942528734, + "grad_norm": 6.60885795449724, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 78078664, + "step": 865 + }, + { + "epoch": 3.9770114942528734, + "loss": 0.14846225082874298, + "loss_ce": 4.0008031646721065e-05, + "loss_iou": 0.470703125, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 78078664, + "step": 865 + }, + { + "epoch": 3.981609195402299, + "grad_norm": 8.750514149287007, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 78168928, + "step": 866 + }, + { + "epoch": 3.981609195402299, + "loss": 0.11572016775608063, + "loss_ce": 2.8033886337652802e-05, + "loss_iou": 0.306640625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 78168928, + "step": 866 + }, + { + "epoch": 3.986206896551724, + "grad_norm": 7.203279541438859, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 78259188, + "step": 867 + }, + { + "epoch": 3.986206896551724, + "loss": 0.10868757218122482, + "loss_ce": 1.4477924196398817e-05, + "loss_iou": 0.306640625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 78259188, + "step": 867 + }, + { + "epoch": 3.9908045977011493, + "grad_norm": 3.5527677092629077, + "learning_rate": 5e-06, + "loss": 0.0888, + "num_input_tokens_seen": 78349456, + "step": 868 + }, + { + "epoch": 3.9908045977011493, + "loss": 0.061215296387672424, + "loss_ce": 2.7547748686629348e-05, + "loss_iou": 0.326171875, + "loss_num": 0.01220703125, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 78349456, + "step": 868 + }, + { + "epoch": 3.995402298850575, + "grad_norm": 13.601719729469064, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 78439776, + "step": 869 + }, + { + "epoch": 3.995402298850575, + "loss": 0.06812618672847748, + "loss_ce": 2.621453495521564e-05, + "loss_iou": 0.2734375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 78439776, + "step": 869 + }, + { + "epoch": 4.0, + "grad_norm": 12.850021260799155, + "learning_rate": 5e-06, + "loss": 0.0894, + "num_input_tokens_seen": 78530044, + "step": 870 + }, + { + "epoch": 4.0, + "loss": 0.07683277875185013, + "loss_ce": 4.778397851623595e-06, + "loss_iou": 0.322265625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 78530044, + "step": 870 + }, + { + "epoch": 4.004597701149425, + "grad_norm": 12.896033435436056, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 78620484, + "step": 871 + }, + { + "epoch": 4.004597701149425, + "loss": 0.07385321706533432, + "loss_ce": 1.593406886968296e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 78620484, + "step": 871 + }, + { + "epoch": 4.00919540229885, + "grad_norm": 10.038095509850997, + "learning_rate": 5e-06, + "loss": 0.1039, + "num_input_tokens_seen": 78710856, + "step": 872 + }, + { + "epoch": 4.00919540229885, + "loss": 0.12039355933666229, + "loss_ce": 0.00012378332030493766, + "loss_iou": 0.36328125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 78710856, + "step": 872 + }, + { + "epoch": 4.0137931034482754, + "grad_norm": 7.577320908621721, + "learning_rate": 5e-06, + "loss": 0.0845, + "num_input_tokens_seen": 78801316, + "step": 873 + }, + { + "epoch": 4.0137931034482754, + "loss": 0.08987805247306824, + "loss_ce": 3.429901698837057e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 78801316, + "step": 873 + }, + { + "epoch": 4.0183908045977015, + "grad_norm": 8.350163864558647, + "learning_rate": 5e-06, + "loss": 0.0975, + "num_input_tokens_seen": 78891520, + "step": 874 + }, + { + "epoch": 4.0183908045977015, + "loss": 0.0876561775803566, + "loss_ce": 9.694716936792247e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0174560546875, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 78891520, + "step": 874 + }, + { + "epoch": 4.022988505747127, + "grad_norm": 8.769669621263414, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 78981876, + "step": 875 + }, + { + "epoch": 4.022988505747127, + "loss": 0.15618135035037994, + "loss_ce": 2.289763870066963e-05, + "loss_iou": 0.390625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 78981876, + "step": 875 + }, + { + "epoch": 4.027586206896552, + "grad_norm": 16.167283950116676, + "learning_rate": 5e-06, + "loss": 0.1022, + "num_input_tokens_seen": 79072252, + "step": 876 + }, + { + "epoch": 4.027586206896552, + "loss": 0.09752378612756729, + "loss_ce": 2.0128632968408056e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 79072252, + "step": 876 + }, + { + "epoch": 4.032183908045977, + "grad_norm": 8.509588312264029, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 79162600, + "step": 877 + }, + { + "epoch": 4.032183908045977, + "loss": 0.1270662546157837, + "loss_ce": 8.26121904538013e-05, + "loss_iou": 0.34765625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 79162600, + "step": 877 + }, + { + "epoch": 4.036781609195402, + "grad_norm": 10.729375299726154, + "learning_rate": 5e-06, + "loss": 0.0858, + "num_input_tokens_seen": 79253000, + "step": 878 + }, + { + "epoch": 4.036781609195402, + "loss": 0.07850313931703568, + "loss_ce": 1.1927713785553351e-05, + "loss_iou": 0.3125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 79253000, + "step": 878 + }, + { + "epoch": 4.041379310344827, + "grad_norm": 9.480711461561928, + "learning_rate": 5e-06, + "loss": 0.092, + "num_input_tokens_seen": 79343448, + "step": 879 + }, + { + "epoch": 4.041379310344827, + "loss": 0.11301976442337036, + "loss_ce": 4.369192538433708e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 79343448, + "step": 879 + }, + { + "epoch": 4.045977011494253, + "grad_norm": 20.962984913528285, + "learning_rate": 5e-06, + "loss": 0.0826, + "num_input_tokens_seen": 79433948, + "step": 880 + }, + { + "epoch": 4.045977011494253, + "loss": 0.060304559767246246, + "loss_ce": 1.8244732018501963e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 79433948, + "step": 880 + }, + { + "epoch": 4.050574712643678, + "grad_norm": 27.220075486414753, + "learning_rate": 5e-06, + "loss": 0.082, + "num_input_tokens_seen": 79524228, + "step": 881 + }, + { + "epoch": 4.050574712643678, + "loss": 0.07480692863464355, + "loss_ce": 8.341698048752733e-06, + "loss_iou": 0.4375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 79524228, + "step": 881 + }, + { + "epoch": 4.055172413793104, + "grad_norm": 9.60955652874984, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 79614560, + "step": 882 + }, + { + "epoch": 4.055172413793104, + "loss": 0.2088443636894226, + "loss_ce": 0.00010411619587102905, + "loss_iou": 0.42578125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 79614560, + "step": 882 + }, + { + "epoch": 4.059770114942529, + "grad_norm": 13.229041315882611, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 79705048, + "step": 883 + }, + { + "epoch": 4.059770114942529, + "loss": 0.11971049755811691, + "loss_ce": 0.00014262790500652045, + "loss_iou": 0.38671875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 79705048, + "step": 883 + }, + { + "epoch": 4.064367816091954, + "grad_norm": 11.345287474225346, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 79795468, + "step": 884 + }, + { + "epoch": 4.064367816091954, + "loss": 0.15525750815868378, + "loss_ce": 7.562241080449894e-05, + "loss_iou": 0.4453125, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 79795468, + "step": 884 + }, + { + "epoch": 4.068965517241379, + "grad_norm": 5.046401225257881, + "learning_rate": 5e-06, + "loss": 0.1107, + "num_input_tokens_seen": 79885904, + "step": 885 + }, + { + "epoch": 4.068965517241379, + "loss": 0.09430328011512756, + "loss_ce": 3.965928954130504e-06, + "loss_iou": 0.3671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 79885904, + "step": 885 + }, + { + "epoch": 4.073563218390804, + "grad_norm": 7.149591467541043, + "learning_rate": 5e-06, + "loss": 0.0859, + "num_input_tokens_seen": 79976308, + "step": 886 + }, + { + "epoch": 4.073563218390804, + "loss": 0.06827942281961441, + "loss_ce": 2.6857467673835345e-05, + "loss_iou": 0.322265625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 79976308, + "step": 886 + }, + { + "epoch": 4.07816091954023, + "grad_norm": 14.125533156236072, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 80066680, + "step": 887 + }, + { + "epoch": 4.07816091954023, + "loss": 0.11485080420970917, + "loss_ce": 1.3159661648387555e-05, + "loss_iou": 0.302734375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 80066680, + "step": 887 + }, + { + "epoch": 4.082758620689655, + "grad_norm": 7.34272733165029, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 80157060, + "step": 888 + }, + { + "epoch": 4.082758620689655, + "loss": 0.13511279225349426, + "loss_ce": 7.250368071254343e-05, + "loss_iou": 0.37890625, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 80157060, + "step": 888 + }, + { + "epoch": 4.0873563218390805, + "grad_norm": 7.121447843023311, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 80247628, + "step": 889 + }, + { + "epoch": 4.0873563218390805, + "loss": 0.1320275515317917, + "loss_ce": 8.516525667801034e-06, + "loss_iou": 0.3984375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 80247628, + "step": 889 + }, + { + "epoch": 4.091954022988506, + "grad_norm": 9.329754076401972, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 80337164, + "step": 890 + }, + { + "epoch": 4.091954022988506, + "loss": 0.10165956616401672, + "loss_ce": 5.516031251318054e-06, + "loss_iou": 0.3828125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 80337164, + "step": 890 + }, + { + "epoch": 4.096551724137931, + "grad_norm": 4.7416331340636875, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 80426632, + "step": 891 + }, + { + "epoch": 4.096551724137931, + "loss": 0.14343947172164917, + "loss_ce": 3.7382742448244244e-05, + "loss_iou": 0.353515625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 80426632, + "step": 891 + }, + { + "epoch": 4.101149425287356, + "grad_norm": 23.466418685698162, + "learning_rate": 5e-06, + "loss": 0.1543, + "num_input_tokens_seen": 80516208, + "step": 892 + }, + { + "epoch": 4.101149425287356, + "loss": 0.18008266389369965, + "loss_ce": 2.8955022571608424e-05, + "loss_iou": 0.4375, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 80516208, + "step": 892 + }, + { + "epoch": 4.105747126436782, + "grad_norm": 20.195736171276238, + "learning_rate": 5e-06, + "loss": 0.103, + "num_input_tokens_seen": 80606536, + "step": 893 + }, + { + "epoch": 4.105747126436782, + "loss": 0.09322860836982727, + "loss_ce": 5.844266706844792e-05, + "loss_iou": 0.390625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 80606536, + "step": 893 + }, + { + "epoch": 4.110344827586207, + "grad_norm": 4.925478130148734, + "learning_rate": 5e-06, + "loss": 0.1001, + "num_input_tokens_seen": 80696904, + "step": 894 + }, + { + "epoch": 4.110344827586207, + "loss": 0.1146705150604248, + "loss_ce": 1.5975076166796498e-05, + "loss_iou": 0.29296875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 80696904, + "step": 894 + }, + { + "epoch": 4.114942528735632, + "grad_norm": 18.783331502764156, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 80787324, + "step": 895 + }, + { + "epoch": 4.114942528735632, + "loss": 0.10798490047454834, + "loss_ce": 0.0011123453732579947, + "loss_iou": 0.384765625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 80787324, + "step": 895 + }, + { + "epoch": 4.119540229885057, + "grad_norm": 6.031295114157052, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 80877804, + "step": 896 + }, + { + "epoch": 4.119540229885057, + "loss": 0.12141025811433792, + "loss_ce": 8.762812649365515e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 80877804, + "step": 896 + }, + { + "epoch": 4.124137931034483, + "grad_norm": 4.270748379873812, + "learning_rate": 5e-06, + "loss": 0.0957, + "num_input_tokens_seen": 80967380, + "step": 897 + }, + { + "epoch": 4.124137931034483, + "loss": 0.08645440638065338, + "loss_ce": 2.8627207939280197e-05, + "loss_iou": 0.349609375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 80967380, + "step": 897 + }, + { + "epoch": 4.128735632183908, + "grad_norm": 4.246728442816423, + "learning_rate": 5e-06, + "loss": 0.1038, + "num_input_tokens_seen": 81057784, + "step": 898 + }, + { + "epoch": 4.128735632183908, + "loss": 0.09482555836439133, + "loss_ce": 6.847563054179773e-05, + "loss_iou": 0.369140625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 81057784, + "step": 898 + }, + { + "epoch": 4.133333333333334, + "grad_norm": 4.522025434777465, + "learning_rate": 5e-06, + "loss": 0.1549, + "num_input_tokens_seen": 81148164, + "step": 899 + }, + { + "epoch": 4.133333333333334, + "loss": 0.20047786831855774, + "loss_ce": 3.8414596929214895e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 81148164, + "step": 899 + }, + { + "epoch": 4.137931034482759, + "grad_norm": 12.60515016043542, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 81237064, + "step": 900 + }, + { + "epoch": 4.137931034482759, + "loss": 0.09033560007810593, + "loss_ce": 3.5681719054991845e-06, + "loss_iou": 0.326171875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 81237064, + "step": 900 + }, + { + "epoch": 4.142528735632184, + "grad_norm": 12.203513946052261, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 81327320, + "step": 901 + }, + { + "epoch": 4.142528735632184, + "loss": 0.09698610007762909, + "loss_ce": 1.236280354532937e-06, + "loss_iou": 0.322265625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 81327320, + "step": 901 + }, + { + "epoch": 4.147126436781609, + "grad_norm": 5.265820346765914, + "learning_rate": 5e-06, + "loss": 0.0975, + "num_input_tokens_seen": 81417700, + "step": 902 + }, + { + "epoch": 4.147126436781609, + "loss": 0.13237115740776062, + "loss_ce": 1.6427336959168315e-05, + "loss_iou": 0.412109375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 81417700, + "step": 902 + }, + { + "epoch": 4.151724137931034, + "grad_norm": 5.402431273469456, + "learning_rate": 5e-06, + "loss": 0.0975, + "num_input_tokens_seen": 81508048, + "step": 903 + }, + { + "epoch": 4.151724137931034, + "loss": 0.10200782120227814, + "loss_ce": 1.8078684661304578e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 81508048, + "step": 903 + }, + { + "epoch": 4.1563218390804595, + "grad_norm": 7.802227527691613, + "learning_rate": 5e-06, + "loss": 0.1275, + "num_input_tokens_seen": 81598384, + "step": 904 + }, + { + "epoch": 4.1563218390804595, + "loss": 0.0613405816257, + "loss_ce": 4.60283481515944e-05, + "loss_iou": 0.345703125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 81598384, + "step": 904 + }, + { + "epoch": 4.160919540229885, + "grad_norm": 16.9420578658568, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 81688732, + "step": 905 + }, + { + "epoch": 4.160919540229885, + "loss": 0.16473174095153809, + "loss_ce": 1.3099732314003631e-05, + "loss_iou": 0.41796875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 81688732, + "step": 905 + }, + { + "epoch": 4.165517241379311, + "grad_norm": 3.5038961317329633, + "learning_rate": 5e-06, + "loss": 0.0949, + "num_input_tokens_seen": 81779124, + "step": 906 + }, + { + "epoch": 4.165517241379311, + "loss": 0.09057803452014923, + "loss_ce": 1.8635427068147692e-06, + "loss_iou": 0.345703125, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 81779124, + "step": 906 + }, + { + "epoch": 4.170114942528736, + "grad_norm": 6.320699544904618, + "learning_rate": 5e-06, + "loss": 0.0818, + "num_input_tokens_seen": 81869528, + "step": 907 + }, + { + "epoch": 4.170114942528736, + "loss": 0.0921754315495491, + "loss_ce": 4.286535840947181e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 81869528, + "step": 907 + }, + { + "epoch": 4.174712643678161, + "grad_norm": 20.78428241303839, + "learning_rate": 5e-06, + "loss": 0.1552, + "num_input_tokens_seen": 81959744, + "step": 908 + }, + { + "epoch": 4.174712643678161, + "loss": 0.17669041454792023, + "loss_ce": 8.907296432880685e-06, + "loss_iou": 0.37109375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 81959744, + "step": 908 + }, + { + "epoch": 4.179310344827586, + "grad_norm": 8.303007780105153, + "learning_rate": 5e-06, + "loss": 0.0563, + "num_input_tokens_seen": 82050236, + "step": 909 + }, + { + "epoch": 4.179310344827586, + "loss": 0.04546171426773071, + "loss_ce": 2.1037711121607572e-05, + "loss_iou": 0.279296875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 82050236, + "step": 909 + }, + { + "epoch": 4.183908045977011, + "grad_norm": 11.886180061278875, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 82140600, + "step": 910 + }, + { + "epoch": 4.183908045977011, + "loss": 0.10049276053905487, + "loss_ce": 2.888959352276288e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 82140600, + "step": 910 + }, + { + "epoch": 4.188505747126436, + "grad_norm": 8.89001202277643, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 82231004, + "step": 911 + }, + { + "epoch": 4.188505747126436, + "loss": 0.156759113073349, + "loss_ce": 2.0833926100749522e-05, + "loss_iou": 0.34765625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 82231004, + "step": 911 + }, + { + "epoch": 4.1931034482758625, + "grad_norm": 12.14512131743274, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 82321456, + "step": 912 + }, + { + "epoch": 4.1931034482758625, + "loss": 0.0638713464140892, + "loss_ce": 2.8572279916261323e-05, + "loss_iou": 0.365234375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 82321456, + "step": 912 + }, + { + "epoch": 4.197701149425288, + "grad_norm": 6.273360573385607, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 82411704, + "step": 913 + }, + { + "epoch": 4.197701149425288, + "loss": 0.08992119133472443, + "loss_ce": 1.1499548691062955e-06, + "loss_iou": 0.376953125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 82411704, + "step": 913 + }, + { + "epoch": 4.202298850574713, + "grad_norm": 4.863327392449604, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 82500532, + "step": 914 + }, + { + "epoch": 4.202298850574713, + "loss": 0.15036578476428986, + "loss_ce": 5.677235094481148e-06, + "loss_iou": 0.296875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 82500532, + "step": 914 + }, + { + "epoch": 4.206896551724138, + "grad_norm": 6.0320903674371795, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 82591044, + "step": 915 + }, + { + "epoch": 4.206896551724138, + "loss": 0.10052984952926636, + "loss_ce": 3.5464396205497906e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 82591044, + "step": 915 + }, + { + "epoch": 4.211494252873563, + "grad_norm": 15.82805235612327, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 82681520, + "step": 916 + }, + { + "epoch": 4.211494252873563, + "loss": 0.09116413444280624, + "loss_ce": 2.3386701286653988e-05, + "loss_iou": 0.3125, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 82681520, + "step": 916 + }, + { + "epoch": 4.216091954022988, + "grad_norm": 2.3437945707030305, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 82771904, + "step": 917 + }, + { + "epoch": 4.216091954022988, + "loss": 0.09512968361377716, + "loss_ce": 6.396087883331347e-06, + "loss_iou": 0.392578125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 82771904, + "step": 917 + }, + { + "epoch": 4.220689655172414, + "grad_norm": 8.364208607807933, + "learning_rate": 5e-06, + "loss": 0.0796, + "num_input_tokens_seen": 82862244, + "step": 918 + }, + { + "epoch": 4.220689655172414, + "loss": 0.08399280905723572, + "loss_ce": 8.435705240117386e-06, + "loss_iou": 0.375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 82862244, + "step": 918 + }, + { + "epoch": 4.225287356321839, + "grad_norm": 3.326406479061023, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 82952644, + "step": 919 + }, + { + "epoch": 4.225287356321839, + "loss": 0.06023194640874863, + "loss_ce": 2.0768720787600614e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 82952644, + "step": 919 + }, + { + "epoch": 4.2298850574712645, + "grad_norm": 3.4665111443776944, + "learning_rate": 5e-06, + "loss": 0.131, + "num_input_tokens_seen": 83042996, + "step": 920 + }, + { + "epoch": 4.2298850574712645, + "loss": 0.19263532757759094, + "loss_ce": 8.370232535526156e-06, + "loss_iou": 0.40625, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 83042996, + "step": 920 + }, + { + "epoch": 4.23448275862069, + "grad_norm": 9.069959650004307, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 83133396, + "step": 921 + }, + { + "epoch": 4.23448275862069, + "loss": 0.11099687963724136, + "loss_ce": 3.496557837934233e-05, + "loss_iou": 0.32421875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 83133396, + "step": 921 + }, + { + "epoch": 4.239080459770115, + "grad_norm": 2.5690493136384003, + "learning_rate": 5e-06, + "loss": 0.0982, + "num_input_tokens_seen": 83223872, + "step": 922 + }, + { + "epoch": 4.239080459770115, + "loss": 0.11521795392036438, + "loss_ce": 1.4101822671364062e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 83223872, + "step": 922 + }, + { + "epoch": 4.24367816091954, + "grad_norm": 7.69995972428386, + "learning_rate": 5e-06, + "loss": 0.1505, + "num_input_tokens_seen": 83314296, + "step": 923 + }, + { + "epoch": 4.24367816091954, + "loss": 0.125550776720047, + "loss_ce": 1.4667482446384383e-06, + "loss_iou": 0.30078125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 83314296, + "step": 923 + }, + { + "epoch": 4.248275862068965, + "grad_norm": 6.582981564440316, + "learning_rate": 5e-06, + "loss": 0.1091, + "num_input_tokens_seen": 83404576, + "step": 924 + }, + { + "epoch": 4.248275862068965, + "loss": 0.11165105551481247, + "loss_ce": 1.7746649973560125e-05, + "loss_iou": 0.427734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 83404576, + "step": 924 + }, + { + "epoch": 4.252873563218391, + "grad_norm": 3.1233405856262006, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 83494884, + "step": 925 + }, + { + "epoch": 4.252873563218391, + "loss": 0.13092860579490662, + "loss_ce": 8.193834219127893e-06, + "loss_iou": 0.345703125, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 83494884, + "step": 925 + }, + { + "epoch": 4.257471264367816, + "grad_norm": 11.295201522679497, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 83585372, + "step": 926 + }, + { + "epoch": 4.257471264367816, + "loss": 0.09147991985082626, + "loss_ce": 3.3995776902884245e-05, + "loss_iou": 0.37109375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 83585372, + "step": 926 + }, + { + "epoch": 4.2620689655172415, + "grad_norm": 8.74331179817824, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 83675776, + "step": 927 + }, + { + "epoch": 4.2620689655172415, + "loss": 0.11818952858448029, + "loss_ce": 1.0204122190771159e-05, + "loss_iou": 0.43359375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 83675776, + "step": 927 + }, + { + "epoch": 4.266666666666667, + "grad_norm": 36.691461336314475, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 83766024, + "step": 928 + }, + { + "epoch": 4.266666666666667, + "loss": 0.11256686598062515, + "loss_ce": 2.7873388717125636e-06, + "loss_iou": 0.404296875, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 83766024, + "step": 928 + }, + { + "epoch": 4.271264367816092, + "grad_norm": 20.28534876927393, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 83856372, + "step": 929 + }, + { + "epoch": 4.271264367816092, + "loss": 0.1561427265405655, + "loss_ce": 0.0033259547781199217, + "loss_iou": 0.376953125, + "loss_num": 0.0306396484375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 83856372, + "step": 929 + }, + { + "epoch": 4.275862068965517, + "grad_norm": 21.074305399488686, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 83946688, + "step": 930 + }, + { + "epoch": 4.275862068965517, + "loss": 0.10977804660797119, + "loss_ce": 6.307951480266638e-06, + "loss_iou": 0.298828125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 83946688, + "step": 930 + }, + { + "epoch": 4.280459770114943, + "grad_norm": 18.474400113113084, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 84037100, + "step": 931 + }, + { + "epoch": 4.280459770114943, + "loss": 0.08449871093034744, + "loss_ce": 5.657064320985228e-05, + "loss_iou": 0.431640625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 84037100, + "step": 931 + }, + { + "epoch": 4.285057471264368, + "grad_norm": 6.907766753439544, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 84127468, + "step": 932 + }, + { + "epoch": 4.285057471264368, + "loss": 0.10681600868701935, + "loss_ce": 4.487107617023867e-06, + "loss_iou": 0.400390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 84127468, + "step": 932 + }, + { + "epoch": 4.289655172413793, + "grad_norm": 11.040747277806908, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 84216928, + "step": 933 + }, + { + "epoch": 4.289655172413793, + "loss": 0.1147032380104065, + "loss_ce": 4.869817348662764e-05, + "loss_iou": 0.40234375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 84216928, + "step": 933 + }, + { + "epoch": 4.294252873563218, + "grad_norm": 5.536586522856837, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 84307292, + "step": 934 + }, + { + "epoch": 4.294252873563218, + "loss": 0.14283494651317596, + "loss_ce": 0.0001957785279955715, + "loss_iou": 0.349609375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 84307292, + "step": 934 + }, + { + "epoch": 4.2988505747126435, + "grad_norm": 2.131997523716501, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 84396872, + "step": 935 + }, + { + "epoch": 4.2988505747126435, + "loss": 0.12867018580436707, + "loss_ce": 8.078463906713296e-06, + "loss_iou": 0.359375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 84396872, + "step": 935 + }, + { + "epoch": 4.303448275862069, + "grad_norm": 11.27288052453101, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 84487316, + "step": 936 + }, + { + "epoch": 4.303448275862069, + "loss": 0.06220962479710579, + "loss_ce": 0.0010371371172368526, + "loss_iou": 0.349609375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 84487316, + "step": 936 + }, + { + "epoch": 4.308045977011494, + "grad_norm": 3.4892119545715974, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 84577824, + "step": 937 + }, + { + "epoch": 4.308045977011494, + "loss": 0.1438092142343521, + "loss_ce": 1.0385462701378856e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 84577824, + "step": 937 + }, + { + "epoch": 4.31264367816092, + "grad_norm": 6.088518946016204, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 84668208, + "step": 938 + }, + { + "epoch": 4.31264367816092, + "loss": 0.08154906332492828, + "loss_ce": 2.134743226633873e-05, + "loss_iou": 0.37109375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 84668208, + "step": 938 + }, + { + "epoch": 4.317241379310345, + "grad_norm": 10.741692080234259, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 84758388, + "step": 939 + }, + { + "epoch": 4.317241379310345, + "loss": 0.12125536799430847, + "loss_ce": 9.027476153278258e-06, + "loss_iou": 0.4140625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 84758388, + "step": 939 + }, + { + "epoch": 4.32183908045977, + "grad_norm": 12.104640117183845, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 84848688, + "step": 940 + }, + { + "epoch": 4.32183908045977, + "loss": 0.125244140625, + "loss_ce": 0.0001831004919949919, + "loss_iou": 0.40234375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 84848688, + "step": 940 + }, + { + "epoch": 4.326436781609195, + "grad_norm": 13.842704032927974, + "learning_rate": 5e-06, + "loss": 0.1121, + "num_input_tokens_seen": 84938896, + "step": 941 + }, + { + "epoch": 4.326436781609195, + "loss": 0.08458052575588226, + "loss_ce": 1.0556854022070183e-06, + "loss_iou": 0.345703125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 84938896, + "step": 941 + }, + { + "epoch": 4.3310344827586205, + "grad_norm": 27.64969062986627, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 85029124, + "step": 942 + }, + { + "epoch": 4.3310344827586205, + "loss": 0.11936682462692261, + "loss_ce": 0.00025671368348412216, + "loss_iou": 0.294921875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 85029124, + "step": 942 + }, + { + "epoch": 4.335632183908046, + "grad_norm": 7.051941352640006, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 85119572, + "step": 943 + }, + { + "epoch": 4.335632183908046, + "loss": 0.08214512467384338, + "loss_ce": 7.063375960569829e-06, + "loss_iou": 0.34375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 85119572, + "step": 943 + }, + { + "epoch": 4.340229885057472, + "grad_norm": 5.222428783369038, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 85209984, + "step": 944 + }, + { + "epoch": 4.340229885057472, + "loss": 0.13096053898334503, + "loss_ce": 9.61720070336014e-06, + "loss_iou": 0.32421875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 85209984, + "step": 944 + }, + { + "epoch": 4.344827586206897, + "grad_norm": 8.261994222444807, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 85300436, + "step": 945 + }, + { + "epoch": 4.344827586206897, + "loss": 0.11582186818122864, + "loss_ce": 0.0001144730849773623, + "loss_iou": 0.28515625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 85300436, + "step": 945 + }, + { + "epoch": 4.349425287356322, + "grad_norm": 4.239609844753722, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 85390688, + "step": 946 + }, + { + "epoch": 4.349425287356322, + "loss": 0.08635582774877548, + "loss_ce": 6.338353159662802e-06, + "loss_iou": 0.37109375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 85390688, + "step": 946 + }, + { + "epoch": 4.354022988505747, + "grad_norm": 15.411419922368433, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 85481212, + "step": 947 + }, + { + "epoch": 4.354022988505747, + "loss": 0.11237078905105591, + "loss_ce": 5.069013241154607e-06, + "loss_iou": 0.39453125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 85481212, + "step": 947 + }, + { + "epoch": 4.358620689655172, + "grad_norm": 3.634268474402103, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 85570804, + "step": 948 + }, + { + "epoch": 4.358620689655172, + "loss": 0.1522293984889984, + "loss_ce": 7.724566785327625e-06, + "loss_iou": 0.31640625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 85570804, + "step": 948 + }, + { + "epoch": 4.363218390804597, + "grad_norm": 8.013548594881062, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 85661312, + "step": 949 + }, + { + "epoch": 4.363218390804597, + "loss": 0.14923954010009766, + "loss_ce": 8.487531158607453e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 85661312, + "step": 949 + }, + { + "epoch": 4.3678160919540225, + "grad_norm": 2.762105405490039, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 85751636, + "step": 950 + }, + { + "epoch": 4.3678160919540225, + "loss": 0.1016370952129364, + "loss_ce": 1.3553465578297619e-05, + "loss_iou": 0.35546875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 85751636, + "step": 950 + }, + { + "epoch": 4.372413793103449, + "grad_norm": 5.105256776246609, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 85842060, + "step": 951 + }, + { + "epoch": 4.372413793103449, + "loss": 0.10755231976509094, + "loss_ce": 8.379087375942618e-06, + "loss_iou": 0.35546875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 85842060, + "step": 951 + }, + { + "epoch": 4.377011494252874, + "grad_norm": 5.570764796509669, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 85932444, + "step": 952 + }, + { + "epoch": 4.377011494252874, + "loss": 0.0883231908082962, + "loss_ce": 3.58409270120319e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 85932444, + "step": 952 + }, + { + "epoch": 4.381609195402299, + "grad_norm": 10.856984323349382, + "learning_rate": 5e-06, + "loss": 0.1382, + "num_input_tokens_seen": 86022780, + "step": 953 + }, + { + "epoch": 4.381609195402299, + "loss": 0.18059954047203064, + "loss_ce": 5.753596997237764e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 86022780, + "step": 953 + }, + { + "epoch": 4.386206896551724, + "grad_norm": 13.39000045516796, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 86113048, + "step": 954 + }, + { + "epoch": 4.386206896551724, + "loss": 0.10757958889007568, + "loss_ce": 3.564245707821101e-05, + "loss_iou": 0.37890625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 86113048, + "step": 954 + }, + { + "epoch": 4.390804597701149, + "grad_norm": 3.0587557281723505, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 86203536, + "step": 955 + }, + { + "epoch": 4.390804597701149, + "loss": 0.08970493823289871, + "loss_ce": 1.377277476422023e-05, + "loss_iou": 0.47265625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 86203536, + "step": 955 + }, + { + "epoch": 4.395402298850574, + "grad_norm": 7.714066456128423, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 86293828, + "step": 956 + }, + { + "epoch": 4.395402298850574, + "loss": 0.14037680625915527, + "loss_ce": 8.751240238780156e-05, + "loss_iou": 0.40234375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 86293828, + "step": 956 + }, + { + "epoch": 4.4, + "grad_norm": 6.43769065897863, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 86384244, + "step": 957 + }, + { + "epoch": 4.4, + "loss": 0.12558409571647644, + "loss_ce": 0.0003399590204935521, + "loss_iou": 0.326171875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 86384244, + "step": 957 + }, + { + "epoch": 4.4045977011494255, + "grad_norm": 7.325715877251544, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 86474564, + "step": 958 + }, + { + "epoch": 4.4045977011494255, + "loss": 0.07348855584859848, + "loss_ce": 3.274789924034849e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 86474564, + "step": 958 + }, + { + "epoch": 4.409195402298851, + "grad_norm": 3.1430755970159763, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 86565012, + "step": 959 + }, + { + "epoch": 4.409195402298851, + "loss": 0.10845954716205597, + "loss_ce": 9.163121285382658e-05, + "loss_iou": 0.267578125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 86565012, + "step": 959 + }, + { + "epoch": 4.413793103448276, + "grad_norm": 2.479253070842102, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 86655508, + "step": 960 + }, + { + "epoch": 4.413793103448276, + "loss": 0.09556898474693298, + "loss_ce": 3.370657577761449e-05, + "loss_iou": 0.328125, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 86655508, + "step": 960 + }, + { + "epoch": 4.418390804597701, + "grad_norm": 8.037612054391559, + "learning_rate": 5e-06, + "loss": 0.1075, + "num_input_tokens_seen": 86744356, + "step": 961 + }, + { + "epoch": 4.418390804597701, + "loss": 0.13194900751113892, + "loss_ce": 2.151706939912401e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 86744356, + "step": 961 + }, + { + "epoch": 4.422988505747126, + "grad_norm": 4.558636962872201, + "learning_rate": 5e-06, + "loss": 0.0655, + "num_input_tokens_seen": 86834692, + "step": 962 + }, + { + "epoch": 4.422988505747126, + "loss": 0.05887310206890106, + "loss_ce": 4.69345968667767e-06, + "loss_iou": 0.31640625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 86834692, + "step": 962 + }, + { + "epoch": 4.427586206896552, + "grad_norm": 11.446145366052058, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 86925088, + "step": 963 + }, + { + "epoch": 4.427586206896552, + "loss": 0.0905739963054657, + "loss_ce": 1.3087726983940229e-05, + "loss_iou": 0.349609375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 86925088, + "step": 963 + }, + { + "epoch": 4.432183908045977, + "grad_norm": 9.136099668207656, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 87015388, + "step": 964 + }, + { + "epoch": 4.432183908045977, + "loss": 0.16606704890727997, + "loss_ce": 2.0905536075588316e-05, + "loss_iou": 0.42578125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 87015388, + "step": 964 + }, + { + "epoch": 4.436781609195402, + "grad_norm": 5.4148068009345645, + "learning_rate": 5e-06, + "loss": 0.1154, + "num_input_tokens_seen": 87104260, + "step": 965 + }, + { + "epoch": 4.436781609195402, + "loss": 0.1504197120666504, + "loss_ce": 5.959868576610461e-05, + "loss_iou": 0.296875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 87104260, + "step": 965 + }, + { + "epoch": 4.441379310344828, + "grad_norm": 7.551140001822664, + "learning_rate": 5e-06, + "loss": 0.072, + "num_input_tokens_seen": 87194688, + "step": 966 + }, + { + "epoch": 4.441379310344828, + "loss": 0.0605931356549263, + "loss_ce": 0.0001225552405230701, + "loss_iou": 0.34375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 87194688, + "step": 966 + }, + { + "epoch": 4.445977011494253, + "grad_norm": 5.492004326107786, + "learning_rate": 5e-06, + "loss": 0.1128, + "num_input_tokens_seen": 87285044, + "step": 967 + }, + { + "epoch": 4.445977011494253, + "loss": 0.13285866379737854, + "loss_ce": 4.616224396158941e-05, + "loss_iou": 0.333984375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 87285044, + "step": 967 + }, + { + "epoch": 4.450574712643678, + "grad_norm": 7.815560936649822, + "learning_rate": 5e-06, + "loss": 0.0891, + "num_input_tokens_seen": 87375332, + "step": 968 + }, + { + "epoch": 4.450574712643678, + "loss": 0.12036024034023285, + "loss_ce": 2.943439358205069e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 87375332, + "step": 968 + }, + { + "epoch": 4.455172413793104, + "grad_norm": 19.127660857216004, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 87465720, + "step": 969 + }, + { + "epoch": 4.455172413793104, + "loss": 0.09645415097475052, + "loss_ce": 3.339698650961509e-06, + "loss_iou": 0.380859375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 87465720, + "step": 969 + }, + { + "epoch": 4.459770114942529, + "grad_norm": 2.8889092802079226, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 87555976, + "step": 970 + }, + { + "epoch": 4.459770114942529, + "loss": 0.07163398712873459, + "loss_ce": 9.234347089659423e-06, + "loss_iou": 0.287109375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 87555976, + "step": 970 + }, + { + "epoch": 4.464367816091954, + "grad_norm": 4.010750930380829, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 87646328, + "step": 971 + }, + { + "epoch": 4.464367816091954, + "loss": 0.11213794350624084, + "loss_ce": 1.108814331018948e-06, + "loss_iou": 0.435546875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 87646328, + "step": 971 + }, + { + "epoch": 4.468965517241379, + "grad_norm": 8.210323216736478, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 87736608, + "step": 972 + }, + { + "epoch": 4.468965517241379, + "loss": 0.18182888627052307, + "loss_ce": 5.140444045537151e-06, + "loss_iou": 0.412109375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 87736608, + "step": 972 + }, + { + "epoch": 4.4735632183908045, + "grad_norm": 9.865783278700588, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 87826276, + "step": 973 + }, + { + "epoch": 4.4735632183908045, + "loss": 0.06133665144443512, + "loss_ce": 1.1581903891055845e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 87826276, + "step": 973 + }, + { + "epoch": 4.47816091954023, + "grad_norm": 4.060783363175244, + "learning_rate": 5e-06, + "loss": 0.1517, + "num_input_tokens_seen": 87915748, + "step": 974 + }, + { + "epoch": 4.47816091954023, + "loss": 0.1866350769996643, + "loss_ce": 2.0090221369173378e-05, + "loss_iou": 0.337890625, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 87915748, + "step": 974 + }, + { + "epoch": 4.482758620689655, + "grad_norm": 4.078469036538909, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 88006180, + "step": 975 + }, + { + "epoch": 4.482758620689655, + "loss": 0.16530264914035797, + "loss_ce": 1.9446521037025377e-05, + "loss_iou": 0.345703125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 88006180, + "step": 975 + }, + { + "epoch": 4.487356321839081, + "grad_norm": 14.551219328912012, + "learning_rate": 5e-06, + "loss": 0.1101, + "num_input_tokens_seen": 88096576, + "step": 976 + }, + { + "epoch": 4.487356321839081, + "loss": 0.09992989897727966, + "loss_ce": 1.5359226381406188e-05, + "loss_iou": 0.3515625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 88096576, + "step": 976 + }, + { + "epoch": 4.491954022988506, + "grad_norm": 15.074496002244894, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 88186864, + "step": 977 + }, + { + "epoch": 4.491954022988506, + "loss": 0.098409004509449, + "loss_ce": 5.076651632407447e-06, + "loss_iou": 0.205078125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 88186864, + "step": 977 + }, + { + "epoch": 4.496551724137931, + "grad_norm": 8.357602751730505, + "learning_rate": 5e-06, + "loss": 0.094, + "num_input_tokens_seen": 88277108, + "step": 978 + }, + { + "epoch": 4.496551724137931, + "loss": 0.12131184339523315, + "loss_ce": 4.475433343031909e-06, + "loss_iou": 0.318359375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 88277108, + "step": 978 + }, + { + "epoch": 4.501149425287356, + "grad_norm": 4.402022300916744, + "learning_rate": 5e-06, + "loss": 0.0903, + "num_input_tokens_seen": 88367416, + "step": 979 + }, + { + "epoch": 4.501149425287356, + "loss": 0.07329022884368896, + "loss_ce": 7.855774310883135e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 88367416, + "step": 979 + }, + { + "epoch": 4.505747126436781, + "grad_norm": 14.341101022137071, + "learning_rate": 5e-06, + "loss": 0.0673, + "num_input_tokens_seen": 88457876, + "step": 980 + }, + { + "epoch": 4.505747126436781, + "loss": 0.053125277161598206, + "loss_ce": 9.428293196833692e-06, + "loss_iou": 0.296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 88457876, + "step": 980 + }, + { + "epoch": 4.510344827586207, + "grad_norm": 10.397148286106711, + "learning_rate": 5e-06, + "loss": 0.1395, + "num_input_tokens_seen": 88548152, + "step": 981 + }, + { + "epoch": 4.510344827586207, + "loss": 0.17081327736377716, + "loss_ce": 2.164947181881871e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 88548152, + "step": 981 + }, + { + "epoch": 4.514942528735633, + "grad_norm": 4.078102449108354, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 88638536, + "step": 982 + }, + { + "epoch": 4.514942528735633, + "loss": 0.1220250129699707, + "loss_ce": 7.67759484006092e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 88638536, + "step": 982 + }, + { + "epoch": 4.519540229885058, + "grad_norm": 11.283949648161077, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 88728900, + "step": 983 + }, + { + "epoch": 4.519540229885058, + "loss": 0.09133091568946838, + "loss_ce": 7.060511507006595e-06, + "loss_iou": 0.29296875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 88728900, + "step": 983 + }, + { + "epoch": 4.524137931034483, + "grad_norm": 9.40893126127556, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 88819192, + "step": 984 + }, + { + "epoch": 4.524137931034483, + "loss": 0.08349698781967163, + "loss_ce": 8.912971338759235e-07, + "loss_iou": 0.380859375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 88819192, + "step": 984 + }, + { + "epoch": 4.528735632183908, + "grad_norm": 7.932363219291443, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 88909616, + "step": 985 + }, + { + "epoch": 4.528735632183908, + "loss": 0.10644952207803726, + "loss_ce": 3.472416938166134e-05, + "loss_iou": 0.396484375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 88909616, + "step": 985 + }, + { + "epoch": 4.533333333333333, + "grad_norm": 2.4934351424602568, + "learning_rate": 5e-06, + "loss": 0.0931, + "num_input_tokens_seen": 88999228, + "step": 986 + }, + { + "epoch": 4.533333333333333, + "loss": 0.09027735888957977, + "loss_ce": 6.3577076616638806e-06, + "loss_iou": 0.349609375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 88999228, + "step": 986 + }, + { + "epoch": 4.537931034482758, + "grad_norm": 4.191824583419648, + "learning_rate": 5e-06, + "loss": 0.0875, + "num_input_tokens_seen": 89089628, + "step": 987 + }, + { + "epoch": 4.537931034482758, + "loss": 0.05755450576543808, + "loss_ce": 5.9386413340689614e-05, + "loss_iou": 0.36328125, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 89089628, + "step": 987 + }, + { + "epoch": 4.5425287356321835, + "grad_norm": 4.980888074280528, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 89179956, + "step": 988 + }, + { + "epoch": 4.5425287356321835, + "loss": 0.06999941170215607, + "loss_ce": 7.347244263655739e-06, + "loss_iou": 0.330078125, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 89179956, + "step": 988 + }, + { + "epoch": 4.5471264367816095, + "grad_norm": 6.574375021382182, + "learning_rate": 5e-06, + "loss": 0.1534, + "num_input_tokens_seen": 89270268, + "step": 989 + }, + { + "epoch": 4.5471264367816095, + "loss": 0.1247449517250061, + "loss_ce": 4.3468216972541995e-06, + "loss_iou": 0.359375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 89270268, + "step": 989 + }, + { + "epoch": 4.551724137931035, + "grad_norm": 5.770679845977342, + "learning_rate": 5e-06, + "loss": 0.0699, + "num_input_tokens_seen": 89360680, + "step": 990 + }, + { + "epoch": 4.551724137931035, + "loss": 0.05505823343992233, + "loss_ce": 4.520721631706692e-06, + "loss_iou": 0.38671875, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 89360680, + "step": 990 + }, + { + "epoch": 4.55632183908046, + "grad_norm": 3.4680196649545714, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 89451124, + "step": 991 + }, + { + "epoch": 4.55632183908046, + "loss": 0.1168176680803299, + "loss_ce": 2.6902296667685732e-05, + "loss_iou": 0.435546875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 89451124, + "step": 991 + }, + { + "epoch": 4.560919540229885, + "grad_norm": 6.00004334718162, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 89541568, + "step": 992 + }, + { + "epoch": 4.560919540229885, + "loss": 0.11234864592552185, + "loss_ce": 1.3441143892123364e-05, + "loss_iou": 0.380859375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 89541568, + "step": 992 + }, + { + "epoch": 4.56551724137931, + "grad_norm": 6.73223448714399, + "learning_rate": 5e-06, + "loss": 0.0733, + "num_input_tokens_seen": 89632052, + "step": 993 + }, + { + "epoch": 4.56551724137931, + "loss": 0.07431768625974655, + "loss_ce": 7.383117917925119e-06, + "loss_iou": 0.32421875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 89632052, + "step": 993 + }, + { + "epoch": 4.570114942528735, + "grad_norm": 11.420047670032142, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 89722536, + "step": 994 + }, + { + "epoch": 4.570114942528735, + "loss": 0.09373007714748383, + "loss_ce": 0.008082492277026176, + "loss_iou": 0.32421875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 89722536, + "step": 994 + }, + { + "epoch": 4.574712643678161, + "grad_norm": 14.176346167444665, + "learning_rate": 5e-06, + "loss": 0.113, + "num_input_tokens_seen": 89812932, + "step": 995 + }, + { + "epoch": 4.574712643678161, + "loss": 0.12351539731025696, + "loss_ce": 1.0755041330412496e-05, + "loss_iou": 0.373046875, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 89812932, + "step": 995 + }, + { + "epoch": 4.5793103448275865, + "grad_norm": 10.667470789331928, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 89903292, + "step": 996 + }, + { + "epoch": 4.5793103448275865, + "loss": 0.11320923268795013, + "loss_ce": 1.1899982382601593e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 89903292, + "step": 996 + }, + { + "epoch": 4.583908045977012, + "grad_norm": 7.2249750864413205, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 89993576, + "step": 997 + }, + { + "epoch": 4.583908045977012, + "loss": 0.11794456839561462, + "loss_ce": 2.465084253344685e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 89993576, + "step": 997 + }, + { + "epoch": 4.588505747126437, + "grad_norm": 10.042877155900863, + "learning_rate": 5e-06, + "loss": 0.1231, + "num_input_tokens_seen": 90083884, + "step": 998 + }, + { + "epoch": 4.588505747126437, + "loss": 0.1203087866306305, + "loss_ce": 8.497871931467671e-06, + "loss_iou": 0.345703125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 90083884, + "step": 998 + }, + { + "epoch": 4.593103448275862, + "grad_norm": 4.373260926369358, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 90174300, + "step": 999 + }, + { + "epoch": 4.593103448275862, + "loss": 0.08949369937181473, + "loss_ce": 1.6161524399649352e-05, + "loss_iou": 0.3984375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 90174300, + "step": 999 + }, + { + "epoch": 4.597701149425287, + "grad_norm": 25.49877787319248, + "learning_rate": 5e-06, + "loss": 0.1146, + "num_input_tokens_seen": 90264624, + "step": 1000 + }, + { + "epoch": 4.597701149425287, + "eval_seeclick_CIoU": 0.4661611318588257, + "eval_seeclick_GIoU": 0.4498722702264786, + "eval_seeclick_IoU": 0.5080912709236145, + "eval_seeclick_MAE_all": 0.060889746993780136, + "eval_seeclick_MAE_h": 0.05166458524763584, + "eval_seeclick_MAE_w": 0.10992535948753357, + "eval_seeclick_MAE_x_boxes": 0.1068052388727665, + "eval_seeclick_MAE_y_boxes": 0.054549889639019966, + "eval_seeclick_NUM_probability": 0.9999993443489075, + "eval_seeclick_inside_bbox": 0.7698863744735718, + "eval_seeclick_loss": 0.3709671199321747, + "eval_seeclick_loss_ce": 0.07325447350740433, + "eval_seeclick_loss_iou": 0.47247314453125, + "eval_seeclick_loss_num": 0.06253814697265625, + "eval_seeclick_loss_xval": 0.3125, + "eval_seeclick_runtime": 75.293, + "eval_seeclick_samples_per_second": 0.571, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 90264624, + "step": 1000 + }, + { + "epoch": 4.597701149425287, + "eval_icons_CIoU": 0.5760022103786469, + "eval_icons_GIoU": 0.576844722032547, + "eval_icons_IoU": 0.613362729549408, + "eval_icons_MAE_all": 0.04183553345501423, + "eval_icons_MAE_h": 0.07954326272010803, + "eval_icons_MAE_w": 0.06392070464789867, + "eval_icons_MAE_x_boxes": 0.059913450852036476, + "eval_icons_MAE_y_boxes": 0.07867859303951263, + "eval_icons_NUM_probability": 0.9999994337558746, + "eval_icons_inside_bbox": 0.8107638955116272, + "eval_icons_loss": 0.20698395371437073, + "eval_icons_loss_ce": 1.277818611811199e-06, + "eval_icons_loss_iou": 0.42694091796875, + "eval_icons_loss_num": 0.04441070556640625, + "eval_icons_loss_xval": 0.2221221923828125, + "eval_icons_runtime": 88.2999, + "eval_icons_samples_per_second": 0.566, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 90264624, + "step": 1000 + }, + { + "epoch": 4.597701149425287, + "eval_screenspot_CIoU": 0.4215618173281352, + "eval_screenspot_GIoU": 0.4092308084170024, + "eval_screenspot_IoU": 0.48668718338012695, + "eval_screenspot_MAE_all": 0.08650621399283409, + "eval_screenspot_MAE_h": 0.08026436219612758, + "eval_screenspot_MAE_w": 0.17552465697129568, + "eval_screenspot_MAE_x_boxes": 0.16668692231178284, + "eval_screenspot_MAE_y_boxes": 0.07600387185811996, + "eval_screenspot_NUM_probability": 0.9999993443489075, + "eval_screenspot_inside_bbox": 0.7637499968210856, + "eval_screenspot_loss": 0.43140459060668945, + "eval_screenspot_loss_ce": 0.00010789081594945553, + "eval_screenspot_loss_iou": 0.4072265625, + "eval_screenspot_loss_num": 0.08810933430989583, + "eval_screenspot_loss_xval": 0.4403483072916667, + "eval_screenspot_runtime": 149.3508, + "eval_screenspot_samples_per_second": 0.596, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 90264624, + "step": 1000 + }, + { + "epoch": 4.597701149425287, + "eval_compot_CIoU": 0.47925493121147156, + "eval_compot_GIoU": 0.4560560882091522, + "eval_compot_IoU": 0.5442279279232025, + "eval_compot_MAE_all": 0.0558595210313797, + "eval_compot_MAE_h": 0.07545717805624008, + "eval_compot_MAE_w": 0.11159718781709671, + "eval_compot_MAE_x_boxes": 0.10114440321922302, + "eval_compot_MAE_y_boxes": 0.07544495910406113, + "eval_compot_NUM_probability": 0.9999988079071045, + "eval_compot_inside_bbox": 0.8072916567325592, + "eval_compot_loss": 0.3027111887931824, + "eval_compot_loss_ce": 0.01121709169819951, + "eval_compot_loss_iou": 0.4888916015625, + "eval_compot_loss_num": 0.05005645751953125, + "eval_compot_loss_xval": 0.250244140625, + "eval_compot_runtime": 86.8458, + "eval_compot_samples_per_second": 0.576, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 90264624, + "step": 1000 + }, + { + "epoch": 4.597701149425287, + "loss": 0.18884405493736267, + "loss_ce": 0.006959293968975544, + "loss_iou": 0.51171875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 90264624, + "step": 1000 + }, + { + "epoch": 4.602298850574712, + "grad_norm": 9.40244134173418, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 90355016, + "step": 1001 + }, + { + "epoch": 4.602298850574712, + "loss": 0.08515298366546631, + "loss_ce": 5.4719315812690184e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 90355016, + "step": 1001 + }, + { + "epoch": 4.606896551724138, + "grad_norm": 11.782183850460743, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 90445512, + "step": 1002 + }, + { + "epoch": 4.606896551724138, + "loss": 0.08575557172298431, + "loss_ce": 1.6436308214906603e-05, + "loss_iou": 0.365234375, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 90445512, + "step": 1002 + }, + { + "epoch": 4.611494252873563, + "grad_norm": 17.076982393187414, + "learning_rate": 5e-06, + "loss": 0.1481, + "num_input_tokens_seen": 90536000, + "step": 1003 + }, + { + "epoch": 4.611494252873563, + "loss": 0.10569582879543304, + "loss_ce": 4.39713076048065e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 90536000, + "step": 1003 + }, + { + "epoch": 4.6160919540229886, + "grad_norm": 16.92181833640432, + "learning_rate": 5e-06, + "loss": 0.0887, + "num_input_tokens_seen": 90626368, + "step": 1004 + }, + { + "epoch": 4.6160919540229886, + "loss": 0.10335642099380493, + "loss_ce": 8.644953595648985e-06, + "loss_iou": 0.2890625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 90626368, + "step": 1004 + }, + { + "epoch": 4.620689655172414, + "grad_norm": 5.272669970898714, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 90716756, + "step": 1005 + }, + { + "epoch": 4.620689655172414, + "loss": 0.12034394592046738, + "loss_ce": 1.3136214874975849e-05, + "loss_iou": 0.3125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 90716756, + "step": 1005 + }, + { + "epoch": 4.625287356321839, + "grad_norm": 5.5450374658240476, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 90806388, + "step": 1006 + }, + { + "epoch": 4.625287356321839, + "loss": 0.12717095017433167, + "loss_ce": 4.190186700725462e-06, + "loss_iou": 0.333984375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 90806388, + "step": 1006 + }, + { + "epoch": 4.629885057471264, + "grad_norm": 10.252060177606076, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 90896652, + "step": 1007 + }, + { + "epoch": 4.629885057471264, + "loss": 0.10986798256635666, + "loss_ce": 4.700279987446265e-06, + "loss_iou": 0.306640625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 90896652, + "step": 1007 + }, + { + "epoch": 4.63448275862069, + "grad_norm": 18.179346667786145, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 90986972, + "step": 1008 + }, + { + "epoch": 4.63448275862069, + "loss": 0.09801249206066132, + "loss_ce": 5.289793534757337e-06, + "loss_iou": 0.341796875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 90986972, + "step": 1008 + }, + { + "epoch": 4.639080459770115, + "grad_norm": 7.486242267110328, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 91077320, + "step": 1009 + }, + { + "epoch": 4.639080459770115, + "loss": 0.0817301869392395, + "loss_ce": 4.111347152502276e-06, + "loss_iou": 0.396484375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 91077320, + "step": 1009 + }, + { + "epoch": 4.64367816091954, + "grad_norm": 7.378317449558524, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 91167668, + "step": 1010 + }, + { + "epoch": 4.64367816091954, + "loss": 0.11923195421695709, + "loss_ce": 1.5039631762192585e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 91167668, + "step": 1010 + }, + { + "epoch": 4.6482758620689655, + "grad_norm": 6.873925032428975, + "learning_rate": 5e-06, + "loss": 0.1342, + "num_input_tokens_seen": 91257984, + "step": 1011 + }, + { + "epoch": 4.6482758620689655, + "loss": 0.13492737710475922, + "loss_ce": 9.166550626105163e-06, + "loss_iou": 0.3828125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 91257984, + "step": 1011 + }, + { + "epoch": 4.652873563218391, + "grad_norm": 13.994177721226423, + "learning_rate": 5e-06, + "loss": 0.087, + "num_input_tokens_seen": 91348380, + "step": 1012 + }, + { + "epoch": 4.652873563218391, + "loss": 0.07507447898387909, + "loss_ce": 1.2354627187960432e-06, + "loss_iou": 0.369140625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 91348380, + "step": 1012 + }, + { + "epoch": 4.657471264367816, + "grad_norm": 3.014366999503986, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 91438616, + "step": 1013 + }, + { + "epoch": 4.657471264367816, + "loss": 0.16335362195968628, + "loss_ce": 2.354383104830049e-05, + "loss_iou": 0.37890625, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 91438616, + "step": 1013 + }, + { + "epoch": 4.662068965517241, + "grad_norm": 17.72787254515268, + "learning_rate": 5e-06, + "loss": 0.1255, + "num_input_tokens_seen": 91529032, + "step": 1014 + }, + { + "epoch": 4.662068965517241, + "loss": 0.13172364234924316, + "loss_ce": 9.771740224095993e-06, + "loss_iou": 0.4140625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 91529032, + "step": 1014 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 3.6520684848160605, + "learning_rate": 5e-06, + "loss": 0.126, + "num_input_tokens_seen": 91618688, + "step": 1015 + }, + { + "epoch": 4.666666666666667, + "loss": 0.12861014902591705, + "loss_ce": 9.073386536329053e-06, + "loss_iou": 0.36328125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 91618688, + "step": 1015 + }, + { + "epoch": 4.671264367816092, + "grad_norm": 6.527601418047689, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 91709076, + "step": 1016 + }, + { + "epoch": 4.671264367816092, + "loss": 0.09489475935697556, + "loss_ce": 3.086655124207027e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 91709076, + "step": 1016 + }, + { + "epoch": 4.675862068965517, + "grad_norm": 14.192420949264795, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 91799384, + "step": 1017 + }, + { + "epoch": 4.675862068965517, + "loss": 0.08747270703315735, + "loss_ce": 9.329753083875403e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 91799384, + "step": 1017 + }, + { + "epoch": 4.680459770114942, + "grad_norm": 5.653695210589391, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 91889776, + "step": 1018 + }, + { + "epoch": 4.680459770114942, + "loss": 0.08493378758430481, + "loss_ce": 3.372960463821073e-06, + "loss_iou": 0.357421875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 91889776, + "step": 1018 + }, + { + "epoch": 4.685057471264368, + "grad_norm": 4.63777017993729, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 91980096, + "step": 1019 + }, + { + "epoch": 4.685057471264368, + "loss": 0.07615326344966888, + "loss_ce": 1.1908843589480966e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 91980096, + "step": 1019 + }, + { + "epoch": 4.689655172413794, + "grad_norm": 4.472710831706082, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 92070584, + "step": 1020 + }, + { + "epoch": 4.689655172413794, + "loss": 0.11327888071537018, + "loss_ce": 2.8146971089881845e-05, + "loss_iou": 0.369140625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 92070584, + "step": 1020 + }, + { + "epoch": 4.694252873563219, + "grad_norm": 3.4544586422872907, + "learning_rate": 5e-06, + "loss": 0.096, + "num_input_tokens_seen": 92161012, + "step": 1021 + }, + { + "epoch": 4.694252873563219, + "loss": 0.06871644407510757, + "loss_ce": 6.117389602877665e-06, + "loss_iou": 0.328125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 92161012, + "step": 1021 + }, + { + "epoch": 4.698850574712644, + "grad_norm": 7.42414792402292, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 92251412, + "step": 1022 + }, + { + "epoch": 4.698850574712644, + "loss": 0.10006897151470184, + "loss_ce": 9.33870833250694e-05, + "loss_iou": 0.26953125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 92251412, + "step": 1022 + }, + { + "epoch": 4.703448275862069, + "grad_norm": 6.69514367017083, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 92341956, + "step": 1023 + }, + { + "epoch": 4.703448275862069, + "loss": 0.14697983860969543, + "loss_ce": 6.821977149229497e-05, + "loss_iou": 0.380859375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 92341956, + "step": 1023 + }, + { + "epoch": 4.708045977011494, + "grad_norm": 14.417454722090794, + "learning_rate": 5e-06, + "loss": 0.1256, + "num_input_tokens_seen": 92432144, + "step": 1024 + }, + { + "epoch": 4.708045977011494, + "loss": 0.13804002106189728, + "loss_ce": 9.01172825251706e-06, + "loss_iou": 0.359375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 92432144, + "step": 1024 + }, + { + "epoch": 4.712643678160919, + "grad_norm": 7.595947463221086, + "learning_rate": 5e-06, + "loss": 0.1053, + "num_input_tokens_seen": 92522620, + "step": 1025 + }, + { + "epoch": 4.712643678160919, + "loss": 0.10795444995164871, + "loss_ce": 2.1407777239801362e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 92522620, + "step": 1025 + }, + { + "epoch": 4.7172413793103445, + "grad_norm": 3.0300131913282713, + "learning_rate": 5e-06, + "loss": 0.1128, + "num_input_tokens_seen": 92612984, + "step": 1026 + }, + { + "epoch": 4.7172413793103445, + "loss": 0.06870204210281372, + "loss_ce": 6.9793131842743605e-06, + "loss_iou": 0.302734375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 92612984, + "step": 1026 + }, + { + "epoch": 4.72183908045977, + "grad_norm": 7.887427930687986, + "learning_rate": 5e-06, + "loss": 0.0749, + "num_input_tokens_seen": 92702504, + "step": 1027 + }, + { + "epoch": 4.72183908045977, + "loss": 0.07237892597913742, + "loss_ce": 2.1748670405941084e-05, + "loss_iou": 0.302734375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 92702504, + "step": 1027 + }, + { + "epoch": 4.726436781609196, + "grad_norm": 9.408243473561752, + "learning_rate": 5e-06, + "loss": 0.0949, + "num_input_tokens_seen": 92792888, + "step": 1028 + }, + { + "epoch": 4.726436781609196, + "loss": 0.07289677858352661, + "loss_ce": 5.5426976359740365e-06, + "loss_iou": 0.2734375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 92792888, + "step": 1028 + }, + { + "epoch": 4.731034482758621, + "grad_norm": 9.742419905000565, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 92883336, + "step": 1029 + }, + { + "epoch": 4.731034482758621, + "loss": 0.10518839955329895, + "loss_ce": 2.4827564629958943e-05, + "loss_iou": 0.326171875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 92883336, + "step": 1029 + }, + { + "epoch": 4.735632183908046, + "grad_norm": 2.7055194664846183, + "learning_rate": 5e-06, + "loss": 0.1231, + "num_input_tokens_seen": 92972936, + "step": 1030 + }, + { + "epoch": 4.735632183908046, + "loss": 0.10875724256038666, + "loss_ce": 8.414130570599809e-05, + "loss_iou": 0.373046875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 92972936, + "step": 1030 + }, + { + "epoch": 4.740229885057471, + "grad_norm": 11.304101815777114, + "learning_rate": 5e-06, + "loss": 0.1415, + "num_input_tokens_seen": 93063392, + "step": 1031 + }, + { + "epoch": 4.740229885057471, + "loss": 0.09561444818973541, + "loss_ce": 2.8712015591736417e-06, + "loss_iou": 0.3203125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 93063392, + "step": 1031 + }, + { + "epoch": 4.744827586206896, + "grad_norm": 15.258461645144374, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 93152916, + "step": 1032 + }, + { + "epoch": 4.744827586206896, + "loss": 0.11910735070705414, + "loss_ce": 4.878862000623485e-06, + "loss_iou": 0.39453125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 93152916, + "step": 1032 + }, + { + "epoch": 4.749425287356322, + "grad_norm": 2.356961429838356, + "learning_rate": 5e-06, + "loss": 0.0921, + "num_input_tokens_seen": 93243212, + "step": 1033 + }, + { + "epoch": 4.749425287356322, + "loss": 0.11762569844722748, + "loss_ce": 0.0005450131138786674, + "loss_iou": 0.375, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 93243212, + "step": 1033 + }, + { + "epoch": 4.7540229885057474, + "grad_norm": 15.1692410364505, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 93333636, + "step": 1034 + }, + { + "epoch": 4.7540229885057474, + "loss": 0.147009015083313, + "loss_ce": 5.839167897647712e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 93333636, + "step": 1034 + }, + { + "epoch": 4.758620689655173, + "grad_norm": 7.946502842662023, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 93423992, + "step": 1035 + }, + { + "epoch": 4.758620689655173, + "loss": 0.1617765724658966, + "loss_ce": 3.341192496009171e-05, + "loss_iou": 0.36328125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 93423992, + "step": 1035 + }, + { + "epoch": 4.763218390804598, + "grad_norm": 15.109850534366604, + "learning_rate": 5e-06, + "loss": 0.0785, + "num_input_tokens_seen": 93514320, + "step": 1036 + }, + { + "epoch": 4.763218390804598, + "loss": 0.0966891348361969, + "loss_ce": 9.446171134186443e-06, + "loss_iou": 0.41015625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 93514320, + "step": 1036 + }, + { + "epoch": 4.767816091954023, + "grad_norm": 4.842981275277206, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 93603144, + "step": 1037 + }, + { + "epoch": 4.767816091954023, + "loss": 0.11073972284793854, + "loss_ce": 2.194262560806237e-05, + "loss_iou": 0.46875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 93603144, + "step": 1037 + }, + { + "epoch": 4.772413793103448, + "grad_norm": 21.01549868516104, + "learning_rate": 5e-06, + "loss": 0.0947, + "num_input_tokens_seen": 93693660, + "step": 1038 + }, + { + "epoch": 4.772413793103448, + "loss": 0.08537042886018753, + "loss_ce": 1.2760566278302576e-05, + "loss_iou": 0.283203125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 93693660, + "step": 1038 + }, + { + "epoch": 4.777011494252873, + "grad_norm": 10.712563478328072, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 93783296, + "step": 1039 + }, + { + "epoch": 4.777011494252873, + "loss": 0.11916904151439667, + "loss_ce": 2.8420639864634722e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 93783296, + "step": 1039 + }, + { + "epoch": 4.781609195402299, + "grad_norm": 7.345903871032101, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 93873768, + "step": 1040 + }, + { + "epoch": 4.781609195402299, + "loss": 0.05897822231054306, + "loss_ce": 3.0012429306225386e-06, + "loss_iou": 0.35546875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 93873768, + "step": 1040 + }, + { + "epoch": 4.786206896551724, + "grad_norm": 4.133426960663791, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 93964260, + "step": 1041 + }, + { + "epoch": 4.786206896551724, + "loss": 0.0858168751001358, + "loss_ce": 1.6706742826499976e-05, + "loss_iou": 0.375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 93964260, + "step": 1041 + }, + { + "epoch": 4.7908045977011495, + "grad_norm": 7.686995682812649, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 94054556, + "step": 1042 + }, + { + "epoch": 4.7908045977011495, + "loss": 0.12363208830356598, + "loss_ce": 5.381094069889514e-06, + "loss_iou": 0.322265625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 94054556, + "step": 1042 + }, + { + "epoch": 4.795402298850575, + "grad_norm": 1.6904479508606451, + "learning_rate": 5e-06, + "loss": 0.0778, + "num_input_tokens_seen": 94145004, + "step": 1043 + }, + { + "epoch": 4.795402298850575, + "loss": 0.06091611459851265, + "loss_ce": 3.0297642297227867e-06, + "loss_iou": 0.349609375, + "loss_num": 0.01214599609375, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 94145004, + "step": 1043 + }, + { + "epoch": 4.8, + "grad_norm": 3.669144596101602, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 94235372, + "step": 1044 + }, + { + "epoch": 4.8, + "loss": 0.13778972625732422, + "loss_ce": 2.869950094464002e-06, + "loss_iou": 0.3828125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 94235372, + "step": 1044 + }, + { + "epoch": 4.804597701149425, + "grad_norm": 6.97279172429298, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 94325696, + "step": 1045 + }, + { + "epoch": 4.804597701149425, + "loss": 0.05801108479499817, + "loss_ce": 0.0023165077436715364, + "loss_iou": 0.349609375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 94325696, + "step": 1045 + }, + { + "epoch": 4.809195402298851, + "grad_norm": 11.58305315850037, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 94416004, + "step": 1046 + }, + { + "epoch": 4.809195402298851, + "loss": 0.1216389536857605, + "loss_ce": 1.1142959920107387e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 94416004, + "step": 1046 + }, + { + "epoch": 4.813793103448276, + "grad_norm": 16.0940836142544, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 94506316, + "step": 1047 + }, + { + "epoch": 4.813793103448276, + "loss": 0.10546806454658508, + "loss_ce": 2.9831333449692465e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 94506316, + "step": 1047 + }, + { + "epoch": 4.818390804597701, + "grad_norm": 10.955820630731282, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 94596544, + "step": 1048 + }, + { + "epoch": 4.818390804597701, + "loss": 0.16584840416908264, + "loss_ce": 1.5889574569882825e-05, + "loss_iou": 0.333984375, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 94596544, + "step": 1048 + }, + { + "epoch": 4.8229885057471265, + "grad_norm": 6.034273447193758, + "learning_rate": 5e-06, + "loss": 0.0587, + "num_input_tokens_seen": 94686864, + "step": 1049 + }, + { + "epoch": 4.8229885057471265, + "loss": 0.06973493099212646, + "loss_ce": 2.2640974748355802e-06, + "loss_iou": 0.359375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 94686864, + "step": 1049 + }, + { + "epoch": 4.827586206896552, + "grad_norm": 4.252496465368457, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 94777296, + "step": 1050 + }, + { + "epoch": 4.827586206896552, + "loss": 0.10622747987508774, + "loss_ce": 7.20810130587779e-05, + "loss_iou": 0.42578125, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 94777296, + "step": 1050 + }, + { + "epoch": 4.832183908045977, + "grad_norm": 3.0187179996735334, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 94867600, + "step": 1051 + }, + { + "epoch": 4.832183908045977, + "loss": 0.1049966886639595, + "loss_ce": 9.584927056494053e-07, + "loss_iou": 0.33984375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 94867600, + "step": 1051 + }, + { + "epoch": 4.836781609195402, + "grad_norm": 16.087126169900518, + "learning_rate": 5e-06, + "loss": 0.082, + "num_input_tokens_seen": 94958008, + "step": 1052 + }, + { + "epoch": 4.836781609195402, + "loss": 0.1101231724023819, + "loss_ce": 1.5750989405205473e-05, + "loss_iou": 0.4296875, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 94958008, + "step": 1052 + }, + { + "epoch": 4.841379310344828, + "grad_norm": 12.550203705796418, + "learning_rate": 5e-06, + "loss": 0.0795, + "num_input_tokens_seen": 95048356, + "step": 1053 + }, + { + "epoch": 4.841379310344828, + "loss": 0.08209192752838135, + "loss_ce": 1.4905896023265086e-05, + "loss_iou": 0.31640625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 95048356, + "step": 1053 + }, + { + "epoch": 4.845977011494253, + "grad_norm": 4.5783225747939555, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 95138768, + "step": 1054 + }, + { + "epoch": 4.845977011494253, + "loss": 0.1782418191432953, + "loss_ce": 3.904753612005152e-06, + "loss_iou": 0.35546875, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 95138768, + "step": 1054 + }, + { + "epoch": 4.850574712643678, + "grad_norm": 17.243051689458277, + "learning_rate": 5e-06, + "loss": 0.1833, + "num_input_tokens_seen": 95229088, + "step": 1055 + }, + { + "epoch": 4.850574712643678, + "loss": 0.11458714306354523, + "loss_ce": 2.4146735086105764e-05, + "loss_iou": 0.40234375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 95229088, + "step": 1055 + }, + { + "epoch": 4.855172413793103, + "grad_norm": 6.0951803316443876, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 95318604, + "step": 1056 + }, + { + "epoch": 4.855172413793103, + "loss": 0.09546582400798798, + "loss_ce": 6.840703463240061e-06, + "loss_iou": 0.36328125, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 95318604, + "step": 1056 + }, + { + "epoch": 4.8597701149425285, + "grad_norm": 2.513374225059268, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 95409032, + "step": 1057 + }, + { + "epoch": 4.8597701149425285, + "loss": 0.105290487408638, + "loss_ce": 0.00047786219511181116, + "loss_iou": 0.330078125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 95409032, + "step": 1057 + }, + { + "epoch": 4.864367816091954, + "grad_norm": 3.376425029688418, + "learning_rate": 5e-06, + "loss": 0.1288, + "num_input_tokens_seen": 95499352, + "step": 1058 + }, + { + "epoch": 4.864367816091954, + "loss": 0.1294291913509369, + "loss_ce": 0.0005839776713401079, + "loss_iou": 0.369140625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 95499352, + "step": 1058 + }, + { + "epoch": 4.86896551724138, + "grad_norm": 4.718684401574836, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 95589880, + "step": 1059 + }, + { + "epoch": 4.86896551724138, + "loss": 0.15736906230449677, + "loss_ce": 2.0424929971341044e-05, + "loss_iou": 0.376953125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 95589880, + "step": 1059 + }, + { + "epoch": 4.873563218390805, + "grad_norm": 8.564949517103082, + "learning_rate": 5e-06, + "loss": 0.0653, + "num_input_tokens_seen": 95680168, + "step": 1060 + }, + { + "epoch": 4.873563218390805, + "loss": 0.07507706433534622, + "loss_ce": 3.8181360650924034e-06, + "loss_iou": 0.421875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 95680168, + "step": 1060 + }, + { + "epoch": 4.87816091954023, + "grad_norm": 10.678428697285344, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 95770520, + "step": 1061 + }, + { + "epoch": 4.87816091954023, + "loss": 0.13752196729183197, + "loss_ce": 4.027618342661299e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 95770520, + "step": 1061 + }, + { + "epoch": 4.882758620689655, + "grad_norm": 5.476416486972618, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 95860856, + "step": 1062 + }, + { + "epoch": 4.882758620689655, + "loss": 0.10755479335784912, + "loss_ce": 1.0849686077563092e-05, + "loss_iou": 0.302734375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 95860856, + "step": 1062 + }, + { + "epoch": 4.88735632183908, + "grad_norm": 7.341716557148394, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 95951252, + "step": 1063 + }, + { + "epoch": 4.88735632183908, + "loss": 0.08713214844465256, + "loss_ce": 4.465159690880682e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 95951252, + "step": 1063 + }, + { + "epoch": 4.8919540229885055, + "grad_norm": 17.624799280413747, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 96041592, + "step": 1064 + }, + { + "epoch": 4.8919540229885055, + "loss": 0.1143837422132492, + "loss_ce": 3.4374650567770004e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 96041592, + "step": 1064 + }, + { + "epoch": 4.896551724137931, + "grad_norm": 9.416499244614185, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 96132008, + "step": 1065 + }, + { + "epoch": 4.896551724137931, + "loss": 0.11976835876703262, + "loss_ce": 4.7893059672787786e-05, + "loss_iou": 0.42578125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 96132008, + "step": 1065 + }, + { + "epoch": 4.901149425287357, + "grad_norm": 6.806788764833692, + "learning_rate": 5e-06, + "loss": 0.1071, + "num_input_tokens_seen": 96222480, + "step": 1066 + }, + { + "epoch": 4.901149425287357, + "loss": 0.09189382195472717, + "loss_ce": 5.3979856602381915e-06, + "loss_iou": 0.302734375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 96222480, + "step": 1066 + }, + { + "epoch": 4.905747126436782, + "grad_norm": 5.549592637186866, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 96312836, + "step": 1067 + }, + { + "epoch": 4.905747126436782, + "loss": 0.08161412179470062, + "loss_ce": 4.0634346078149974e-05, + "loss_iou": 0.349609375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 96312836, + "step": 1067 + }, + { + "epoch": 4.910344827586207, + "grad_norm": 6.49206654790828, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 96403204, + "step": 1068 + }, + { + "epoch": 4.910344827586207, + "loss": 0.10242622345685959, + "loss_ce": 9.231689546140842e-06, + "loss_iou": 0.240234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 96403204, + "step": 1068 + }, + { + "epoch": 4.914942528735632, + "grad_norm": 3.007842215921885, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 96493592, + "step": 1069 + }, + { + "epoch": 4.914942528735632, + "loss": 0.09680457413196564, + "loss_ce": 6.385076267179102e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 96493592, + "step": 1069 + }, + { + "epoch": 4.919540229885057, + "grad_norm": 10.319391804852176, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 96583944, + "step": 1070 + }, + { + "epoch": 4.919540229885057, + "loss": 0.08242271840572357, + "loss_ce": 5.577308911597356e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 96583944, + "step": 1070 + }, + { + "epoch": 4.924137931034482, + "grad_norm": 2.879146983290818, + "learning_rate": 5e-06, + "loss": 0.0895, + "num_input_tokens_seen": 96674328, + "step": 1071 + }, + { + "epoch": 4.924137931034482, + "loss": 0.10010585933923721, + "loss_ce": 8.201654054573737e-06, + "loss_iou": 0.38671875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 96674328, + "step": 1071 + }, + { + "epoch": 4.928735632183908, + "grad_norm": 15.403924468504222, + "learning_rate": 5e-06, + "loss": 0.0968, + "num_input_tokens_seen": 96764696, + "step": 1072 + }, + { + "epoch": 4.928735632183908, + "loss": 0.12627822160720825, + "loss_ce": 2.7013094950234517e-05, + "loss_iou": 0.34375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 96764696, + "step": 1072 + }, + { + "epoch": 4.933333333333334, + "grad_norm": 24.679178097861215, + "learning_rate": 5e-06, + "loss": 0.0828, + "num_input_tokens_seen": 96855144, + "step": 1073 + }, + { + "epoch": 4.933333333333334, + "loss": 0.11346793919801712, + "loss_ce": 3.5857883631251752e-06, + "loss_iou": 0.400390625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 96855144, + "step": 1073 + }, + { + "epoch": 4.937931034482759, + "grad_norm": 17.443099366886536, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 96945600, + "step": 1074 + }, + { + "epoch": 4.937931034482759, + "loss": 0.09844504296779633, + "loss_ce": 5.636948117171414e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 96945600, + "step": 1074 + }, + { + "epoch": 4.942528735632184, + "grad_norm": 11.649607579048189, + "learning_rate": 5e-06, + "loss": 0.0836, + "num_input_tokens_seen": 97035976, + "step": 1075 + }, + { + "epoch": 4.942528735632184, + "loss": 0.09174925833940506, + "loss_ce": 1.3420964933175128e-05, + "loss_iou": 0.357421875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 97035976, + "step": 1075 + }, + { + "epoch": 4.947126436781609, + "grad_norm": 8.321494891514785, + "learning_rate": 5e-06, + "loss": 0.0985, + "num_input_tokens_seen": 97126184, + "step": 1076 + }, + { + "epoch": 4.947126436781609, + "loss": 0.1108294129371643, + "loss_ce": 4.831304522667779e-06, + "loss_iou": 0.3203125, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 97126184, + "step": 1076 + }, + { + "epoch": 4.951724137931034, + "grad_norm": 2.7753841701876714, + "learning_rate": 5e-06, + "loss": 0.0898, + "num_input_tokens_seen": 97215208, + "step": 1077 + }, + { + "epoch": 4.951724137931034, + "loss": 0.10865768790245056, + "loss_ce": 7.61427145334892e-05, + "loss_iou": 0.265625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 97215208, + "step": 1077 + }, + { + "epoch": 4.956321839080459, + "grad_norm": 3.502619253449482, + "learning_rate": 5e-06, + "loss": 0.1241, + "num_input_tokens_seen": 97305660, + "step": 1078 + }, + { + "epoch": 4.956321839080459, + "loss": 0.07795646786689758, + "loss_ce": 1.4574313354387414e-05, + "loss_iou": 0.376953125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 97305660, + "step": 1078 + }, + { + "epoch": 4.960919540229885, + "grad_norm": 2.214432026018128, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 97395964, + "step": 1079 + }, + { + "epoch": 4.960919540229885, + "loss": 0.09012555330991745, + "loss_ce": 3.7664180126739666e-05, + "loss_iou": 0.322265625, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 97395964, + "step": 1079 + }, + { + "epoch": 4.9655172413793105, + "grad_norm": 6.4074181213192105, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 97486300, + "step": 1080 + }, + { + "epoch": 4.9655172413793105, + "loss": 0.07362109422683716, + "loss_ce": 1.2694945326074958e-05, + "loss_iou": 0.345703125, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 97486300, + "step": 1080 + }, + { + "epoch": 4.970114942528736, + "grad_norm": 10.501486543841219, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 97576584, + "step": 1081 + }, + { + "epoch": 4.970114942528736, + "loss": 0.07947252690792084, + "loss_ce": 4.7581602302670944e-06, + "loss_iou": 0.3515625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 97576584, + "step": 1081 + }, + { + "epoch": 4.974712643678161, + "grad_norm": 5.4733017757114615, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 97666176, + "step": 1082 + }, + { + "epoch": 4.974712643678161, + "loss": 0.2110956907272339, + "loss_ce": 5.60448916075984e-06, + "loss_iou": 0.4765625, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 97666176, + "step": 1082 + }, + { + "epoch": 4.979310344827586, + "grad_norm": 4.717726419007518, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 97756556, + "step": 1083 + }, + { + "epoch": 4.979310344827586, + "loss": 0.12327490001916885, + "loss_ce": 1.4395311154657975e-05, + "loss_iou": 0.328125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 97756556, + "step": 1083 + }, + { + "epoch": 4.983908045977012, + "grad_norm": 6.386820886831165, + "learning_rate": 5e-06, + "loss": 0.1261, + "num_input_tokens_seen": 97845408, + "step": 1084 + }, + { + "epoch": 4.983908045977012, + "loss": 0.1393074244260788, + "loss_ce": 0.00013201337424106896, + "loss_iou": 0.376953125, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 97845408, + "step": 1084 + }, + { + "epoch": 4.988505747126437, + "grad_norm": 17.923854404624937, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 97935700, + "step": 1085 + }, + { + "epoch": 4.988505747126437, + "loss": 0.1198144182562828, + "loss_ce": 2.406018211331684e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0240478515625, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 97935700, + "step": 1085 + }, + { + "epoch": 4.993103448275862, + "grad_norm": 8.89371007698411, + "learning_rate": 5e-06, + "loss": 0.1061, + "num_input_tokens_seen": 98026076, + "step": 1086 + }, + { + "epoch": 4.993103448275862, + "loss": 0.08179621398448944, + "loss_ce": 3.9620990719413385e-05, + "loss_iou": 0.373046875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 98026076, + "step": 1086 + }, + { + "epoch": 4.997701149425287, + "grad_norm": 3.2738674103409777, + "learning_rate": 5e-06, + "loss": 0.1316, + "num_input_tokens_seen": 98116464, + "step": 1087 + }, + { + "epoch": 4.997701149425287, + "loss": 0.1348174810409546, + "loss_ce": 2.13292059925152e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 98116464, + "step": 1087 + }, + { + "epoch": 4.997701149425287, + "loss": 0.11705206334590912, + "loss_ce": 0.00010870777623495087, + "loss_iou": 0.447265625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 98160852, + "step": 1087 + }, + { + "epoch": 5.002298850574713, + "grad_norm": 3.097439221083733, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 98206020, + "step": 1088 + }, + { + "epoch": 5.002298850574713, + "loss": 0.09875764697790146, + "loss_ce": 6.379698606906459e-05, + "loss_iou": 0.328125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 98206020, + "step": 1088 + }, + { + "epoch": 5.006896551724138, + "grad_norm": 12.127549215108248, + "learning_rate": 5e-06, + "loss": 0.1121, + "num_input_tokens_seen": 98296448, + "step": 1089 + }, + { + "epoch": 5.006896551724138, + "loss": 0.1051742285490036, + "loss_ce": 5.6425316870445386e-05, + "loss_iou": 0.40234375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 98296448, + "step": 1089 + }, + { + "epoch": 5.011494252873563, + "grad_norm": 3.0110884831761777, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 98386704, + "step": 1090 + }, + { + "epoch": 5.011494252873563, + "loss": 0.117917500436306, + "loss_ce": 1.2841372154070996e-05, + "loss_iou": 0.390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 98386704, + "step": 1090 + }, + { + "epoch": 5.016091954022989, + "grad_norm": 8.800434556379146, + "learning_rate": 5e-06, + "loss": 0.1628, + "num_input_tokens_seen": 98475456, + "step": 1091 + }, + { + "epoch": 5.016091954022989, + "loss": 0.15691357851028442, + "loss_ce": 0.0011213450925424695, + "loss_iou": 0.390625, + "loss_num": 0.0311279296875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 98475456, + "step": 1091 + }, + { + "epoch": 5.020689655172414, + "grad_norm": 17.875153503898837, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 98565748, + "step": 1092 + }, + { + "epoch": 5.020689655172414, + "loss": 0.08784059435129166, + "loss_ce": 2.6260444428771734e-05, + "loss_iou": 0.3671875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 98565748, + "step": 1092 + }, + { + "epoch": 5.025287356321839, + "grad_norm": 1.9310524258242443, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 98656216, + "step": 1093 + }, + { + "epoch": 5.025287356321839, + "loss": 0.10792776942253113, + "loss_ce": 1.7617947378312238e-05, + "loss_iou": 0.328125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 98656216, + "step": 1093 + }, + { + "epoch": 5.029885057471264, + "grad_norm": 5.2811306139564245, + "learning_rate": 5e-06, + "loss": 0.082, + "num_input_tokens_seen": 98746572, + "step": 1094 + }, + { + "epoch": 5.029885057471264, + "loss": 0.05932975560426712, + "loss_ce": 3.58494685315236e-06, + "loss_iou": 0.302734375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 98746572, + "step": 1094 + }, + { + "epoch": 5.0344827586206895, + "grad_norm": 14.24076067000812, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 98836948, + "step": 1095 + }, + { + "epoch": 5.0344827586206895, + "loss": 0.08336181938648224, + "loss_ce": 4.8835227062227204e-05, + "loss_iou": 0.34375, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 98836948, + "step": 1095 + }, + { + "epoch": 5.039080459770115, + "grad_norm": 7.5844239875355415, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 98927324, + "step": 1096 + }, + { + "epoch": 5.039080459770115, + "loss": 0.16165432333946228, + "loss_ce": 3.3243813959416e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 98927324, + "step": 1096 + }, + { + "epoch": 5.04367816091954, + "grad_norm": 8.539898456692784, + "learning_rate": 5e-06, + "loss": 0.0702, + "num_input_tokens_seen": 99017760, + "step": 1097 + }, + { + "epoch": 5.04367816091954, + "loss": 0.08533644676208496, + "loss_ce": 2.455884896335192e-05, + "loss_iou": 0.3828125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 99017760, + "step": 1097 + }, + { + "epoch": 5.048275862068966, + "grad_norm": 8.297830161901864, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 99108192, + "step": 1098 + }, + { + "epoch": 5.048275862068966, + "loss": 0.084136001765728, + "loss_ce": 2.95531572191976e-05, + "loss_iou": 0.314453125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 99108192, + "step": 1098 + }, + { + "epoch": 5.052873563218391, + "grad_norm": 10.472390357500974, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 99198528, + "step": 1099 + }, + { + "epoch": 5.052873563218391, + "loss": 0.10827698558568954, + "loss_ce": 3.113792627118528e-05, + "loss_iou": 0.318359375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 99198528, + "step": 1099 + }, + { + "epoch": 5.057471264367816, + "grad_norm": 9.292431953931642, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 99288908, + "step": 1100 + }, + { + "epoch": 5.057471264367816, + "loss": 0.14652323722839355, + "loss_ce": 9.990337275667116e-05, + "loss_iou": 0.306640625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 99288908, + "step": 1100 + }, + { + "epoch": 5.062068965517241, + "grad_norm": 6.538074511107595, + "learning_rate": 5e-06, + "loss": 0.0735, + "num_input_tokens_seen": 99379280, + "step": 1101 + }, + { + "epoch": 5.062068965517241, + "loss": 0.07154995948076248, + "loss_ce": 3.2016840123105794e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 99379280, + "step": 1101 + }, + { + "epoch": 5.066666666666666, + "grad_norm": 11.332646756842689, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 99469616, + "step": 1102 + }, + { + "epoch": 5.066666666666666, + "loss": 0.13791075348854065, + "loss_ce": 1.82047119778872e-06, + "loss_iou": 0.365234375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 99469616, + "step": 1102 + }, + { + "epoch": 5.071264367816092, + "grad_norm": 4.595074957103007, + "learning_rate": 5e-06, + "loss": 0.0588, + "num_input_tokens_seen": 99559192, + "step": 1103 + }, + { + "epoch": 5.071264367816092, + "loss": 0.06943254172801971, + "loss_ce": 0.0002797079214360565, + "loss_iou": 0.39453125, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 99559192, + "step": 1103 + }, + { + "epoch": 5.075862068965518, + "grad_norm": 9.00228465896505, + "learning_rate": 5e-06, + "loss": 0.0921, + "num_input_tokens_seen": 99649544, + "step": 1104 + }, + { + "epoch": 5.075862068965518, + "loss": 0.09139812737703323, + "loss_ce": 5.6064582167891786e-06, + "loss_iou": 0.3203125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 99649544, + "step": 1104 + }, + { + "epoch": 5.080459770114943, + "grad_norm": 6.434548797647199, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 99739820, + "step": 1105 + }, + { + "epoch": 5.080459770114943, + "loss": 0.07129809260368347, + "loss_ce": 2.4284672690555453e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 99739820, + "step": 1105 + }, + { + "epoch": 5.085057471264368, + "grad_norm": 4.039364108519724, + "learning_rate": 5e-06, + "loss": 0.086, + "num_input_tokens_seen": 99830196, + "step": 1106 + }, + { + "epoch": 5.085057471264368, + "loss": 0.07135014235973358, + "loss_ce": 1.5301206076401286e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 99830196, + "step": 1106 + }, + { + "epoch": 5.089655172413793, + "grad_norm": 3.655688996317076, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 99920560, + "step": 1107 + }, + { + "epoch": 5.089655172413793, + "loss": 0.10698340833187103, + "loss_ce": 1.9302660803077742e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 99920560, + "step": 1107 + }, + { + "epoch": 5.094252873563218, + "grad_norm": 2.852608379852805, + "learning_rate": 5e-06, + "loss": 0.0846, + "num_input_tokens_seen": 100010896, + "step": 1108 + }, + { + "epoch": 5.094252873563218, + "loss": 0.10475088655948639, + "loss_ce": 1.4554200788552407e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 100010896, + "step": 1108 + }, + { + "epoch": 5.098850574712643, + "grad_norm": 4.647429106957139, + "learning_rate": 5e-06, + "loss": 0.0739, + "num_input_tokens_seen": 100101200, + "step": 1109 + }, + { + "epoch": 5.098850574712643, + "loss": 0.0705321729183197, + "loss_ce": 6.04609158472158e-06, + "loss_iou": 0.36328125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 100101200, + "step": 1109 + }, + { + "epoch": 5.103448275862069, + "grad_norm": 4.542358695843599, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 100191484, + "step": 1110 + }, + { + "epoch": 5.103448275862069, + "loss": 0.11061207950115204, + "loss_ce": 1.637555760680698e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 100191484, + "step": 1110 + }, + { + "epoch": 5.1080459770114945, + "grad_norm": 3.826753741184423, + "learning_rate": 5e-06, + "loss": 0.0499, + "num_input_tokens_seen": 100281800, + "step": 1111 + }, + { + "epoch": 5.1080459770114945, + "loss": 0.06089954450726509, + "loss_ce": 1.7182098872581264e-06, + "loss_iou": 0.28515625, + "loss_num": 0.01220703125, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 100281800, + "step": 1111 + }, + { + "epoch": 5.11264367816092, + "grad_norm": 6.265815731921294, + "learning_rate": 5e-06, + "loss": 0.1078, + "num_input_tokens_seen": 100371368, + "step": 1112 + }, + { + "epoch": 5.11264367816092, + "loss": 0.12157793343067169, + "loss_ce": 1.1157572771480773e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 100371368, + "step": 1112 + }, + { + "epoch": 5.117241379310345, + "grad_norm": 15.357245102652293, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 100461748, + "step": 1113 + }, + { + "epoch": 5.117241379310345, + "loss": 0.07990702241659164, + "loss_ce": 1.2005009921267629e-05, + "loss_iou": 0.234375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 100461748, + "step": 1113 + }, + { + "epoch": 5.12183908045977, + "grad_norm": 18.749188171843112, + "learning_rate": 5e-06, + "loss": 0.0803, + "num_input_tokens_seen": 100552144, + "step": 1114 + }, + { + "epoch": 5.12183908045977, + "loss": 0.08655305206775665, + "loss_ce": 5.20814774063183e-06, + "loss_iou": 0.359375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 100552144, + "step": 1114 + }, + { + "epoch": 5.126436781609195, + "grad_norm": 11.884415815340965, + "learning_rate": 5e-06, + "loss": 0.0712, + "num_input_tokens_seen": 100642552, + "step": 1115 + }, + { + "epoch": 5.126436781609195, + "loss": 0.09320227056741714, + "loss_ce": 1.5868474747549044e-06, + "loss_iou": 0.337890625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 100642552, + "step": 1115 + }, + { + "epoch": 5.13103448275862, + "grad_norm": 8.417856730574274, + "learning_rate": 5e-06, + "loss": 0.1048, + "num_input_tokens_seen": 100732936, + "step": 1116 + }, + { + "epoch": 5.13103448275862, + "loss": 0.10708998143672943, + "loss_ce": 1.9059289115830325e-05, + "loss_iou": 0.328125, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 100732936, + "step": 1116 + }, + { + "epoch": 5.135632183908046, + "grad_norm": 10.398734680131996, + "learning_rate": 5e-06, + "loss": 0.062, + "num_input_tokens_seen": 100823412, + "step": 1117 + }, + { + "epoch": 5.135632183908046, + "loss": 0.0529075488448143, + "loss_ce": 3.5846733226208016e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 100823412, + "step": 1117 + }, + { + "epoch": 5.1402298850574715, + "grad_norm": 15.371538486553407, + "learning_rate": 5e-06, + "loss": 0.0618, + "num_input_tokens_seen": 100913968, + "step": 1118 + }, + { + "epoch": 5.1402298850574715, + "loss": 0.05358627811074257, + "loss_ce": 8.896154758986086e-05, + "loss_iou": 0.345703125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 100913968, + "step": 1118 + }, + { + "epoch": 5.144827586206897, + "grad_norm": 3.823708765476094, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 101004252, + "step": 1119 + }, + { + "epoch": 5.144827586206897, + "loss": 0.08534368127584457, + "loss_ce": 1.6531796063645743e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 101004252, + "step": 1119 + }, + { + "epoch": 5.149425287356322, + "grad_norm": 3.7000663900486743, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 101094664, + "step": 1120 + }, + { + "epoch": 5.149425287356322, + "loss": 0.09906667470932007, + "loss_ce": 0.0002965327585116029, + "loss_iou": 0.341796875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 101094664, + "step": 1120 + }, + { + "epoch": 5.154022988505747, + "grad_norm": 10.501783275375674, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 101185136, + "step": 1121 + }, + { + "epoch": 5.154022988505747, + "loss": 0.06211956962943077, + "loss_ce": 1.6294376109726727e-05, + "loss_iou": 0.310546875, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 101185136, + "step": 1121 + }, + { + "epoch": 5.158620689655172, + "grad_norm": 5.293184104736612, + "learning_rate": 5e-06, + "loss": 0.1864, + "num_input_tokens_seen": 101275444, + "step": 1122 + }, + { + "epoch": 5.158620689655172, + "loss": 0.13397647440433502, + "loss_ce": 4.307346898713149e-06, + "loss_iou": 0.337890625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 101275444, + "step": 1122 + }, + { + "epoch": 5.163218390804598, + "grad_norm": 7.75756649950046, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 101365108, + "step": 1123 + }, + { + "epoch": 5.163218390804598, + "loss": 0.07096749544143677, + "loss_ce": 1.4126431779004633e-05, + "loss_iou": 0.396484375, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 101365108, + "step": 1123 + }, + { + "epoch": 5.167816091954023, + "grad_norm": 7.295865334996816, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 101455576, + "step": 1124 + }, + { + "epoch": 5.167816091954023, + "loss": 0.07797445356845856, + "loss_ce": 0.00015463109593838453, + "loss_iou": 0.451171875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 101455576, + "step": 1124 + }, + { + "epoch": 5.172413793103448, + "grad_norm": 13.03580556396787, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 101546000, + "step": 1125 + }, + { + "epoch": 5.172413793103448, + "loss": 0.15164653956890106, + "loss_ce": 4.69675296699279e-06, + "loss_iou": 0.37890625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 101546000, + "step": 1125 + }, + { + "epoch": 5.1770114942528735, + "grad_norm": 6.396126737755581, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 101636448, + "step": 1126 + }, + { + "epoch": 5.1770114942528735, + "loss": 0.07902251929044724, + "loss_ce": 7.354409899562597e-05, + "loss_iou": 0.27734375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 101636448, + "step": 1126 + }, + { + "epoch": 5.181609195402299, + "grad_norm": 10.026494787186246, + "learning_rate": 5e-06, + "loss": 0.1172, + "num_input_tokens_seen": 101725992, + "step": 1127 + }, + { + "epoch": 5.181609195402299, + "loss": 0.15227390825748444, + "loss_ce": 6.44809642835753e-06, + "loss_iou": 0.376953125, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 101725992, + "step": 1127 + }, + { + "epoch": 5.186206896551724, + "grad_norm": 4.0752860409330935, + "learning_rate": 5e-06, + "loss": 0.0827, + "num_input_tokens_seen": 101816380, + "step": 1128 + }, + { + "epoch": 5.186206896551724, + "loss": 0.06123349815607071, + "loss_ce": 1.5233559679472819e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 101816380, + "step": 1128 + }, + { + "epoch": 5.190804597701149, + "grad_norm": 2.938814507290908, + "learning_rate": 5e-06, + "loss": 0.0785, + "num_input_tokens_seen": 101906756, + "step": 1129 + }, + { + "epoch": 5.190804597701149, + "loss": 0.06633394211530685, + "loss_ce": 1.9246750525780953e-05, + "loss_iou": 0.310546875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 101906756, + "step": 1129 + }, + { + "epoch": 5.195402298850575, + "grad_norm": 4.357895106909357, + "learning_rate": 5e-06, + "loss": 0.0746, + "num_input_tokens_seen": 101997112, + "step": 1130 + }, + { + "epoch": 5.195402298850575, + "loss": 0.062284551560878754, + "loss_ce": 1.343094845651649e-05, + "loss_iou": 0.345703125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 101997112, + "step": 1130 + }, + { + "epoch": 5.2, + "grad_norm": 3.8775406744142624, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 102087380, + "step": 1131 + }, + { + "epoch": 5.2, + "loss": 0.08111874014139175, + "loss_ce": 3.01617683362565e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 102087380, + "step": 1131 + }, + { + "epoch": 5.204597701149425, + "grad_norm": 4.692014408620994, + "learning_rate": 5e-06, + "loss": 0.087, + "num_input_tokens_seen": 102177880, + "step": 1132 + }, + { + "epoch": 5.204597701149425, + "loss": 0.11302285641431808, + "loss_ce": 3.152359931846149e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 102177880, + "step": 1132 + }, + { + "epoch": 5.2091954022988505, + "grad_norm": 3.3544978398050005, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 102267436, + "step": 1133 + }, + { + "epoch": 5.2091954022988505, + "loss": 0.15970894694328308, + "loss_ce": 1.0458235919941217e-05, + "loss_iou": 0.35546875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 102267436, + "step": 1133 + }, + { + "epoch": 5.213793103448276, + "grad_norm": 2.4937616664035356, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 102357772, + "step": 1134 + }, + { + "epoch": 5.213793103448276, + "loss": 0.07588402926921844, + "loss_ce": 4.784332122653723e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 102357772, + "step": 1134 + }, + { + "epoch": 5.218390804597701, + "grad_norm": 11.78713407810388, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 102448228, + "step": 1135 + }, + { + "epoch": 5.218390804597701, + "loss": 0.12686412036418915, + "loss_ce": 2.5471808839938603e-06, + "loss_iou": 0.333984375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 102448228, + "step": 1135 + }, + { + "epoch": 5.222988505747127, + "grad_norm": 7.345884023163392, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 102538620, + "step": 1136 + }, + { + "epoch": 5.222988505747127, + "loss": 0.07740141451358795, + "loss_ce": 2.40960180235561e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 102538620, + "step": 1136 + }, + { + "epoch": 5.227586206896552, + "grad_norm": 9.68121463931108, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 102628180, + "step": 1137 + }, + { + "epoch": 5.227586206896552, + "loss": 0.09852880239486694, + "loss_ce": 1.8055738109978847e-05, + "loss_iou": 0.412109375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 102628180, + "step": 1137 + }, + { + "epoch": 5.232183908045977, + "grad_norm": 3.7617569064718013, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 102717736, + "step": 1138 + }, + { + "epoch": 5.232183908045977, + "loss": 0.08675423264503479, + "loss_ce": 2.3268486984306946e-05, + "loss_iou": 0.32421875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 102717736, + "step": 1138 + }, + { + "epoch": 5.236781609195402, + "grad_norm": 3.9125840477144767, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 102808252, + "step": 1139 + }, + { + "epoch": 5.236781609195402, + "loss": 0.1034986600279808, + "loss_ce": 1.3551202755479608e-05, + "loss_iou": 0.328125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 102808252, + "step": 1139 + }, + { + "epoch": 5.241379310344827, + "grad_norm": 2.097538565665048, + "learning_rate": 5e-06, + "loss": 0.0827, + "num_input_tokens_seen": 102898628, + "step": 1140 + }, + { + "epoch": 5.241379310344827, + "loss": 0.06704915314912796, + "loss_ce": 2.035472562056384e-06, + "loss_iou": 0.296875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 102898628, + "step": 1140 + }, + { + "epoch": 5.2459770114942526, + "grad_norm": 10.818635533541947, + "learning_rate": 5e-06, + "loss": 0.0848, + "num_input_tokens_seen": 102988948, + "step": 1141 + }, + { + "epoch": 5.2459770114942526, + "loss": 0.07962171733379364, + "loss_ce": 2.4237215257016942e-05, + "loss_iou": 0.275390625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 102988948, + "step": 1141 + }, + { + "epoch": 5.250574712643678, + "grad_norm": 7.192797833007836, + "learning_rate": 5e-06, + "loss": 0.0685, + "num_input_tokens_seen": 103079352, + "step": 1142 + }, + { + "epoch": 5.250574712643678, + "loss": 0.08450967818498611, + "loss_ce": 6.502830729004927e-06, + "loss_iou": 0.34375, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 103079352, + "step": 1142 + }, + { + "epoch": 5.255172413793104, + "grad_norm": 6.327504848695889, + "learning_rate": 5e-06, + "loss": 0.0917, + "num_input_tokens_seen": 103169612, + "step": 1143 + }, + { + "epoch": 5.255172413793104, + "loss": 0.12516465783119202, + "loss_ce": 1.2068423529854044e-05, + "loss_iou": 0.28125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 103169612, + "step": 1143 + }, + { + "epoch": 5.259770114942529, + "grad_norm": 3.5430313631820884, + "learning_rate": 5e-06, + "loss": 0.0836, + "num_input_tokens_seen": 103260048, + "step": 1144 + }, + { + "epoch": 5.259770114942529, + "loss": 0.06030872091650963, + "loss_ce": 5.9857538872165605e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 103260048, + "step": 1144 + }, + { + "epoch": 5.264367816091954, + "grad_norm": 15.453671778683573, + "learning_rate": 5e-06, + "loss": 0.0821, + "num_input_tokens_seen": 103350296, + "step": 1145 + }, + { + "epoch": 5.264367816091954, + "loss": 0.08191235363483429, + "loss_ce": 3.175610572725418e-06, + "loss_iou": 0.35546875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 103350296, + "step": 1145 + }, + { + "epoch": 5.268965517241379, + "grad_norm": 6.718717304064819, + "learning_rate": 5e-06, + "loss": 0.0728, + "num_input_tokens_seen": 103440532, + "step": 1146 + }, + { + "epoch": 5.268965517241379, + "loss": 0.06786315143108368, + "loss_ce": 2.2574939066544175e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 103440532, + "step": 1146 + }, + { + "epoch": 5.273563218390804, + "grad_norm": 17.691788722797366, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 103530900, + "step": 1147 + }, + { + "epoch": 5.273563218390804, + "loss": 0.11429747939109802, + "loss_ce": 9.143472198047675e-06, + "loss_iou": 0.38671875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 103530900, + "step": 1147 + }, + { + "epoch": 5.2781609195402295, + "grad_norm": 18.892872568107364, + "learning_rate": 5e-06, + "loss": 0.0927, + "num_input_tokens_seen": 103621200, + "step": 1148 + }, + { + "epoch": 5.2781609195402295, + "loss": 0.05986202508211136, + "loss_ce": 1.7945828858501045e-06, + "loss_iou": 0.373046875, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 103621200, + "step": 1148 + }, + { + "epoch": 5.2827586206896555, + "grad_norm": 19.67550320719364, + "learning_rate": 5e-06, + "loss": 0.1304, + "num_input_tokens_seen": 103711424, + "step": 1149 + }, + { + "epoch": 5.2827586206896555, + "loss": 0.15571549534797668, + "loss_ce": 1.4810606444370933e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 103711424, + "step": 1149 + }, + { + "epoch": 5.287356321839081, + "grad_norm": 10.959254538309226, + "learning_rate": 5e-06, + "loss": 0.1328, + "num_input_tokens_seen": 103801748, + "step": 1150 + }, + { + "epoch": 5.287356321839081, + "loss": 0.08585269004106522, + "loss_ce": 6.745824521203758e-06, + "loss_iou": 0.369140625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 103801748, + "step": 1150 + }, + { + "epoch": 5.291954022988506, + "grad_norm": 6.5980592228912345, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 103892096, + "step": 1151 + }, + { + "epoch": 5.291954022988506, + "loss": 0.13581782579421997, + "loss_ce": 1.4601598195440602e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 103892096, + "step": 1151 + }, + { + "epoch": 5.296551724137931, + "grad_norm": 4.876191398306122, + "learning_rate": 5e-06, + "loss": 0.0575, + "num_input_tokens_seen": 103982468, + "step": 1152 + }, + { + "epoch": 5.296551724137931, + "loss": 0.03845709189772606, + "loss_ce": 4.945307409798261e-06, + "loss_iou": 0.283203125, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 103982468, + "step": 1152 + }, + { + "epoch": 5.301149425287356, + "grad_norm": 8.328380265990786, + "learning_rate": 5e-06, + "loss": 0.0661, + "num_input_tokens_seen": 104072856, + "step": 1153 + }, + { + "epoch": 5.301149425287356, + "loss": 0.07374840974807739, + "loss_ce": 2.674235474842135e-06, + "loss_iou": 0.296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 104072856, + "step": 1153 + }, + { + "epoch": 5.305747126436781, + "grad_norm": 8.532773650525401, + "learning_rate": 5e-06, + "loss": 0.0816, + "num_input_tokens_seen": 104162432, + "step": 1154 + }, + { + "epoch": 5.305747126436781, + "loss": 0.10141049325466156, + "loss_ce": 1.5837054888834246e-05, + "loss_iou": 0.302734375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 104162432, + "step": 1154 + }, + { + "epoch": 5.310344827586207, + "grad_norm": 3.6361735465088287, + "learning_rate": 5e-06, + "loss": 0.0861, + "num_input_tokens_seen": 104252752, + "step": 1155 + }, + { + "epoch": 5.310344827586207, + "loss": 0.06680650264024734, + "loss_ce": 1.8778815501718782e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 104252752, + "step": 1155 + }, + { + "epoch": 5.314942528735632, + "grad_norm": 27.691441920507174, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 104343184, + "step": 1156 + }, + { + "epoch": 5.314942528735632, + "loss": 0.11418163776397705, + "loss_ce": 1.5387857274618e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 104343184, + "step": 1156 + }, + { + "epoch": 5.319540229885058, + "grad_norm": 8.129126905990978, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 104433600, + "step": 1157 + }, + { + "epoch": 5.319540229885058, + "loss": 0.1396959125995636, + "loss_ce": 4.747589264297858e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 104433600, + "step": 1157 + }, + { + "epoch": 5.324137931034483, + "grad_norm": 2.5246901713346284, + "learning_rate": 5e-06, + "loss": 0.0845, + "num_input_tokens_seen": 104524052, + "step": 1158 + }, + { + "epoch": 5.324137931034483, + "loss": 0.08805723488330841, + "loss_ce": 1.4025717064214405e-05, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 104524052, + "step": 1158 + }, + { + "epoch": 5.328735632183908, + "grad_norm": 6.032560857969416, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 104613016, + "step": 1159 + }, + { + "epoch": 5.328735632183908, + "loss": 0.10382804274559021, + "loss_ce": 2.2498687030747533e-05, + "loss_iou": 0.28515625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 104613016, + "step": 1159 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 3.524592869861034, + "learning_rate": 5e-06, + "loss": 0.0834, + "num_input_tokens_seen": 104703380, + "step": 1160 + }, + { + "epoch": 5.333333333333333, + "loss": 0.0802207663655281, + "loss_ce": 5.31496243638685e-06, + "loss_iou": 0.279296875, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 104703380, + "step": 1160 + }, + { + "epoch": 5.337931034482759, + "grad_norm": 4.969952758545786, + "learning_rate": 5e-06, + "loss": 0.0857, + "num_input_tokens_seen": 104793836, + "step": 1161 + }, + { + "epoch": 5.337931034482759, + "loss": 0.09672747552394867, + "loss_ce": 2.0068077901669312e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 104793836, + "step": 1161 + }, + { + "epoch": 5.342528735632184, + "grad_norm": 6.83000765916347, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 104884288, + "step": 1162 + }, + { + "epoch": 5.342528735632184, + "loss": 0.061434537172317505, + "loss_ce": 1.7906917491927743e-05, + "loss_iou": 0.3203125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 104884288, + "step": 1162 + }, + { + "epoch": 5.347126436781609, + "grad_norm": 15.595761412741526, + "learning_rate": 5e-06, + "loss": 0.0816, + "num_input_tokens_seen": 104974708, + "step": 1163 + }, + { + "epoch": 5.347126436781609, + "loss": 0.09743352234363556, + "loss_ce": 6.154959919513203e-06, + "loss_iou": 0.37109375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 104974708, + "step": 1163 + }, + { + "epoch": 5.3517241379310345, + "grad_norm": 9.98309693591081, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 105064256, + "step": 1164 + }, + { + "epoch": 5.3517241379310345, + "loss": 0.06634046137332916, + "loss_ce": 7.154278864618391e-05, + "loss_iou": 0.322265625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 105064256, + "step": 1164 + }, + { + "epoch": 5.35632183908046, + "grad_norm": 14.072679950902984, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 105154564, + "step": 1165 + }, + { + "epoch": 5.35632183908046, + "loss": 0.07407604902982712, + "loss_ce": 9.88530428003287e-06, + "loss_iou": 0.291015625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 105154564, + "step": 1165 + }, + { + "epoch": 5.360919540229885, + "grad_norm": 18.28267664671943, + "learning_rate": 5e-06, + "loss": 0.0982, + "num_input_tokens_seen": 105244844, + "step": 1166 + }, + { + "epoch": 5.360919540229885, + "loss": 0.12162074446678162, + "loss_ce": 8.195744158001617e-06, + "loss_iou": 0.42578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 105244844, + "step": 1166 + }, + { + "epoch": 5.36551724137931, + "grad_norm": 20.34315476487668, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 105335136, + "step": 1167 + }, + { + "epoch": 5.36551724137931, + "loss": 0.08798626065254211, + "loss_ce": 1.933791281771846e-05, + "loss_iou": 0.384765625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 105335136, + "step": 1167 + }, + { + "epoch": 5.370114942528736, + "grad_norm": 3.8998763710651003, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 105425568, + "step": 1168 + }, + { + "epoch": 5.370114942528736, + "loss": 0.13412779569625854, + "loss_ce": 3.038228669538512e-06, + "loss_iou": 0.2890625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 105425568, + "step": 1168 + }, + { + "epoch": 5.374712643678161, + "grad_norm": 3.9686217500508056, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 105515932, + "step": 1169 + }, + { + "epoch": 5.374712643678161, + "loss": 0.12314353883266449, + "loss_ce": 5.114484793011798e-06, + "loss_iou": 0.31640625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 105515932, + "step": 1169 + }, + { + "epoch": 5.379310344827586, + "grad_norm": 9.188483207270677, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 105606460, + "step": 1170 + }, + { + "epoch": 5.379310344827586, + "loss": 0.0674906075000763, + "loss_ce": 3.1501404009759426e-05, + "loss_iou": 0.341796875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 105606460, + "step": 1170 + }, + { + "epoch": 5.3839080459770114, + "grad_norm": 13.62728698765477, + "learning_rate": 5e-06, + "loss": 0.0687, + "num_input_tokens_seen": 105696844, + "step": 1171 + }, + { + "epoch": 5.3839080459770114, + "loss": 0.05981367826461792, + "loss_ce": 1.448656985303387e-05, + "loss_iou": 0.30078125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 105696844, + "step": 1171 + }, + { + "epoch": 5.388505747126437, + "grad_norm": 7.114022345551658, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 105786460, + "step": 1172 + }, + { + "epoch": 5.388505747126437, + "loss": 0.09999861568212509, + "loss_ce": 2.3021595552563667e-05, + "loss_iou": 0.3515625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 105786460, + "step": 1172 + }, + { + "epoch": 5.393103448275862, + "grad_norm": 4.472914999467795, + "learning_rate": 5e-06, + "loss": 0.0697, + "num_input_tokens_seen": 105876844, + "step": 1173 + }, + { + "epoch": 5.393103448275862, + "loss": 0.0646452009677887, + "loss_ce": 8.972834621090442e-06, + "loss_iou": 0.34375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 105876844, + "step": 1173 + }, + { + "epoch": 5.397701149425288, + "grad_norm": 10.796359253655176, + "learning_rate": 5e-06, + "loss": 0.1288, + "num_input_tokens_seen": 105967272, + "step": 1174 + }, + { + "epoch": 5.397701149425288, + "loss": 0.07185741513967514, + "loss_ce": 3.7756042274850188e-06, + "loss_iou": 0.37109375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 105967272, + "step": 1174 + }, + { + "epoch": 5.402298850574713, + "grad_norm": 3.5477662420576417, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 106057604, + "step": 1175 + }, + { + "epoch": 5.402298850574713, + "loss": 0.07314296066761017, + "loss_ce": 5.335675086826086e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 106057604, + "step": 1175 + }, + { + "epoch": 5.406896551724138, + "grad_norm": 3.654190105765733, + "learning_rate": 5e-06, + "loss": 0.0883, + "num_input_tokens_seen": 106148012, + "step": 1176 + }, + { + "epoch": 5.406896551724138, + "loss": 0.0644906684756279, + "loss_ce": 7.029044809314655e-06, + "loss_iou": 0.353515625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 106148012, + "step": 1176 + }, + { + "epoch": 5.411494252873563, + "grad_norm": 2.0857683176392814, + "learning_rate": 5e-06, + "loss": 0.0829, + "num_input_tokens_seen": 106236844, + "step": 1177 + }, + { + "epoch": 5.411494252873563, + "loss": 0.10485847294330597, + "loss_ce": 1.5335266652982682e-05, + "loss_iou": 0.41796875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 106236844, + "step": 1177 + }, + { + "epoch": 5.416091954022988, + "grad_norm": 4.7865908582620245, + "learning_rate": 5e-06, + "loss": 0.058, + "num_input_tokens_seen": 106327196, + "step": 1178 + }, + { + "epoch": 5.416091954022988, + "loss": 0.07083334028720856, + "loss_ce": 2.0433958525245544e-06, + "loss_iou": 0.400390625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 106327196, + "step": 1178 + }, + { + "epoch": 5.4206896551724135, + "grad_norm": 4.381050617741422, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 106417488, + "step": 1179 + }, + { + "epoch": 5.4206896551724135, + "loss": 0.1030469685792923, + "loss_ce": 4.370722763269441e-06, + "loss_iou": 0.255859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 106417488, + "step": 1179 + }, + { + "epoch": 5.425287356321839, + "grad_norm": 13.90099727079854, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 106507752, + "step": 1180 + }, + { + "epoch": 5.425287356321839, + "loss": 0.11128388345241547, + "loss_ce": 1.5395814898511162e-06, + "loss_iou": 0.298828125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 106507752, + "step": 1180 + }, + { + "epoch": 5.429885057471265, + "grad_norm": 3.514125111179275, + "learning_rate": 5e-06, + "loss": 0.0897, + "num_input_tokens_seen": 106598104, + "step": 1181 + }, + { + "epoch": 5.429885057471265, + "loss": 0.05190886929631233, + "loss_ce": 1.372814949718304e-05, + "loss_iou": 0.2490234375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 106598104, + "step": 1181 + }, + { + "epoch": 5.43448275862069, + "grad_norm": 7.033128839020526, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 106688460, + "step": 1182 + }, + { + "epoch": 5.43448275862069, + "loss": 0.10943891108036041, + "loss_ce": 2.8767924504791154e-06, + "loss_iou": 0.296875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 106688460, + "step": 1182 + }, + { + "epoch": 5.439080459770115, + "grad_norm": 7.6220799806368005, + "learning_rate": 5e-06, + "loss": 0.1275, + "num_input_tokens_seen": 106778892, + "step": 1183 + }, + { + "epoch": 5.439080459770115, + "loss": 0.13684214651584625, + "loss_ce": 3.1839987059356645e-05, + "loss_iou": 0.30078125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 106778892, + "step": 1183 + }, + { + "epoch": 5.44367816091954, + "grad_norm": 3.201233161347327, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 106869232, + "step": 1184 + }, + { + "epoch": 5.44367816091954, + "loss": 0.11746550351381302, + "loss_ce": 3.350429324200377e-06, + "loss_iou": 0.33203125, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 106869232, + "step": 1184 + }, + { + "epoch": 5.448275862068965, + "grad_norm": 27.107960993966334, + "learning_rate": 5e-06, + "loss": 0.0706, + "num_input_tokens_seen": 106959604, + "step": 1185 + }, + { + "epoch": 5.448275862068965, + "loss": 0.08980852365493774, + "loss_ce": 2.580695945653133e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 106959604, + "step": 1185 + }, + { + "epoch": 5.4528735632183905, + "grad_norm": 5.056052589774106, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 107049260, + "step": 1186 + }, + { + "epoch": 5.4528735632183905, + "loss": 0.0906330794095993, + "loss_ce": 1.1129596714454237e-05, + "loss_iou": 0.396484375, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 107049260, + "step": 1186 + }, + { + "epoch": 5.4574712643678165, + "grad_norm": 12.077013452361433, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 107139660, + "step": 1187 + }, + { + "epoch": 5.4574712643678165, + "loss": 0.08940693736076355, + "loss_ce": 2.0952680642949417e-05, + "loss_iou": 0.392578125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 107139660, + "step": 1187 + }, + { + "epoch": 5.462068965517242, + "grad_norm": 5.117358275086491, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 107229976, + "step": 1188 + }, + { + "epoch": 5.462068965517242, + "loss": 0.1243157610297203, + "loss_ce": 0.0007195670041255653, + "loss_iou": 0.36328125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 107229976, + "step": 1188 + }, + { + "epoch": 5.466666666666667, + "grad_norm": 3.84883008143984, + "learning_rate": 5e-06, + "loss": 0.063, + "num_input_tokens_seen": 107320400, + "step": 1189 + }, + { + "epoch": 5.466666666666667, + "loss": 0.04762866720557213, + "loss_ce": 5.176388003746979e-05, + "loss_iou": 0.30859375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 107320400, + "step": 1189 + }, + { + "epoch": 5.471264367816092, + "grad_norm": 11.839810250471555, + "learning_rate": 5e-06, + "loss": 0.1302, + "num_input_tokens_seen": 107410776, + "step": 1190 + }, + { + "epoch": 5.471264367816092, + "loss": 0.15773043036460876, + "loss_ce": 1.558406620461028e-05, + "loss_iou": 0.357421875, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 107410776, + "step": 1190 + }, + { + "epoch": 5.475862068965517, + "grad_norm": 3.3411639885164677, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 107500968, + "step": 1191 + }, + { + "epoch": 5.475862068965517, + "loss": 0.08212100714445114, + "loss_ce": 0.00012027601769659668, + "loss_iou": 0.310546875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 107500968, + "step": 1191 + }, + { + "epoch": 5.480459770114942, + "grad_norm": 2.668518928020011, + "learning_rate": 5e-06, + "loss": 0.0891, + "num_input_tokens_seen": 107591340, + "step": 1192 + }, + { + "epoch": 5.480459770114942, + "loss": 0.11270363628864288, + "loss_ce": 6.325638969428837e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 107591340, + "step": 1192 + }, + { + "epoch": 5.485057471264367, + "grad_norm": 3.423089746700433, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 107681688, + "step": 1193 + }, + { + "epoch": 5.485057471264367, + "loss": 0.06189355626702309, + "loss_ce": 3.9072983781807125e-06, + "loss_iou": 0.30859375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 107681688, + "step": 1193 + }, + { + "epoch": 5.489655172413793, + "grad_norm": 10.782127348669686, + "learning_rate": 5e-06, + "loss": 0.0853, + "num_input_tokens_seen": 107772020, + "step": 1194 + }, + { + "epoch": 5.489655172413793, + "loss": 0.10825426876544952, + "loss_ce": 3.8937610952416435e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 107772020, + "step": 1194 + }, + { + "epoch": 5.494252873563219, + "grad_norm": 10.321202311466154, + "learning_rate": 5e-06, + "loss": 0.058, + "num_input_tokens_seen": 107862420, + "step": 1195 + }, + { + "epoch": 5.494252873563219, + "loss": 0.06930500268936157, + "loss_ce": 1.4843457392998971e-05, + "loss_iou": 0.41015625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 107862420, + "step": 1195 + }, + { + "epoch": 5.498850574712644, + "grad_norm": 1.7063662050581758, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 107952696, + "step": 1196 + }, + { + "epoch": 5.498850574712644, + "loss": 0.09334798157215118, + "loss_ce": 9.968431186280213e-06, + "loss_iou": 0.328125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 107952696, + "step": 1196 + }, + { + "epoch": 5.503448275862069, + "grad_norm": 5.082277526279771, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 108043016, + "step": 1197 + }, + { + "epoch": 5.503448275862069, + "loss": 0.0524345263838768, + "loss_ce": 5.3265985116013326e-06, + "loss_iou": 0.28125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 108043016, + "step": 1197 + }, + { + "epoch": 5.508045977011494, + "grad_norm": 3.926403547610472, + "learning_rate": 5e-06, + "loss": 0.0637, + "num_input_tokens_seen": 108133392, + "step": 1198 + }, + { + "epoch": 5.508045977011494, + "loss": 0.055503346025943756, + "loss_ce": 7.1315121203952e-06, + "loss_iou": 0.345703125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 108133392, + "step": 1198 + }, + { + "epoch": 5.512643678160919, + "grad_norm": 19.466891089230415, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 108223928, + "step": 1199 + }, + { + "epoch": 5.512643678160919, + "loss": 0.1550014615058899, + "loss_ce": 3.3197462471434847e-05, + "loss_iou": 0.3515625, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 108223928, + "step": 1199 + }, + { + "epoch": 5.517241379310345, + "grad_norm": 8.883212548201756, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 108314296, + "step": 1200 + }, + { + "epoch": 5.517241379310345, + "loss": 0.11142435669898987, + "loss_ce": 1.9941233404097147e-05, + "loss_iou": 0.39453125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 108314296, + "step": 1200 + }, + { + "epoch": 5.52183908045977, + "grad_norm": 4.261788928644466, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 108404724, + "step": 1201 + }, + { + "epoch": 5.52183908045977, + "loss": 0.09745465219020844, + "loss_ce": 1.2026385775243398e-05, + "loss_iou": 0.439453125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 108404724, + "step": 1201 + }, + { + "epoch": 5.5264367816091955, + "grad_norm": 2.987130600386024, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 108495024, + "step": 1202 + }, + { + "epoch": 5.5264367816091955, + "loss": 0.15418361127376556, + "loss_ce": 6.983517960179597e-05, + "loss_iou": 0.283203125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 108495024, + "step": 1202 + }, + { + "epoch": 5.531034482758621, + "grad_norm": 2.3653856974799323, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 108585416, + "step": 1203 + }, + { + "epoch": 5.531034482758621, + "loss": 0.06593763828277588, + "loss_ce": 1.96732235053787e-05, + "loss_iou": 0.318359375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 108585416, + "step": 1203 + }, + { + "epoch": 5.535632183908046, + "grad_norm": 3.330630153861718, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 108675664, + "step": 1204 + }, + { + "epoch": 5.535632183908046, + "loss": 0.11529748141765594, + "loss_ce": 2.066693014057819e-06, + "loss_iou": 0.302734375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 108675664, + "step": 1204 + }, + { + "epoch": 5.540229885057471, + "grad_norm": 6.486089157651149, + "learning_rate": 5e-06, + "loss": 0.0872, + "num_input_tokens_seen": 108765884, + "step": 1205 + }, + { + "epoch": 5.540229885057471, + "loss": 0.07939766347408295, + "loss_ce": 6.185707206896041e-06, + "loss_iou": 0.302734375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 108765884, + "step": 1205 + }, + { + "epoch": 5.544827586206896, + "grad_norm": 5.30690182772674, + "learning_rate": 5e-06, + "loss": 0.0865, + "num_input_tokens_seen": 108856308, + "step": 1206 + }, + { + "epoch": 5.544827586206896, + "loss": 0.07844828069210052, + "loss_ce": 1.810871071938891e-05, + "loss_iou": 0.44921875, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 108856308, + "step": 1206 + }, + { + "epoch": 5.549425287356322, + "grad_norm": 3.5054972022904183, + "learning_rate": 5e-06, + "loss": 0.0758, + "num_input_tokens_seen": 108946648, + "step": 1207 + }, + { + "epoch": 5.549425287356322, + "loss": 0.07729589194059372, + "loss_ce": 1.0129077963938471e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 108946648, + "step": 1207 + }, + { + "epoch": 5.554022988505747, + "grad_norm": 9.78898145858432, + "learning_rate": 5e-06, + "loss": 0.0716, + "num_input_tokens_seen": 109037112, + "step": 1208 + }, + { + "epoch": 5.554022988505747, + "loss": 0.07796503603458405, + "loss_ce": 7.88130364526296e-06, + "loss_iou": 0.30859375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 109037112, + "step": 1208 + }, + { + "epoch": 5.558620689655172, + "grad_norm": 4.85707186606737, + "learning_rate": 5e-06, + "loss": 0.109, + "num_input_tokens_seen": 109126120, + "step": 1209 + }, + { + "epoch": 5.558620689655172, + "loss": 0.12425880879163742, + "loss_ce": 2.1751711756223813e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 109126120, + "step": 1209 + }, + { + "epoch": 5.563218390804598, + "grad_norm": 8.786051137164725, + "learning_rate": 5e-06, + "loss": 0.0709, + "num_input_tokens_seen": 109216380, + "step": 1210 + }, + { + "epoch": 5.563218390804598, + "loss": 0.0687236338853836, + "loss_ce": 2.855926322808955e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 109216380, + "step": 1210 + }, + { + "epoch": 5.567816091954023, + "grad_norm": 3.482256798617991, + "learning_rate": 5e-06, + "loss": 0.0865, + "num_input_tokens_seen": 109306644, + "step": 1211 + }, + { + "epoch": 5.567816091954023, + "loss": 0.0845988318324089, + "loss_ce": 4.1002945181389805e-06, + "loss_iou": 0.310546875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 109306644, + "step": 1211 + }, + { + "epoch": 5.572413793103449, + "grad_norm": 4.264015760408713, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 109397144, + "step": 1212 + }, + { + "epoch": 5.572413793103449, + "loss": 0.07220568507909775, + "loss_ce": 1.6351415979443118e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 109397144, + "step": 1212 + }, + { + "epoch": 5.577011494252874, + "grad_norm": 3.1255568749212816, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 109487456, + "step": 1213 + }, + { + "epoch": 5.577011494252874, + "loss": 0.052804555743932724, + "loss_ce": 9.145278454525396e-06, + "loss_iou": 0.3046875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 109487456, + "step": 1213 + }, + { + "epoch": 5.581609195402299, + "grad_norm": 3.94664764195231, + "learning_rate": 5e-06, + "loss": 0.067, + "num_input_tokens_seen": 109577820, + "step": 1214 + }, + { + "epoch": 5.581609195402299, + "loss": 0.04463765025138855, + "loss_ce": 5.693935236195102e-06, + "loss_iou": 0.314453125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 109577820, + "step": 1214 + }, + { + "epoch": 5.586206896551724, + "grad_norm": 8.212489399334382, + "learning_rate": 5e-06, + "loss": 0.0566, + "num_input_tokens_seen": 109668260, + "step": 1215 + }, + { + "epoch": 5.586206896551724, + "loss": 0.06496123224496841, + "loss_ce": 5.034160494687967e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 109668260, + "step": 1215 + }, + { + "epoch": 5.590804597701149, + "grad_norm": 9.692768417013069, + "learning_rate": 5e-06, + "loss": 0.0697, + "num_input_tokens_seen": 109758576, + "step": 1216 + }, + { + "epoch": 5.590804597701149, + "loss": 0.07001283019781113, + "loss_ce": 5.50604636373464e-06, + "loss_iou": 0.296875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 109758576, + "step": 1216 + }, + { + "epoch": 5.5954022988505745, + "grad_norm": 8.222517521217215, + "learning_rate": 5e-06, + "loss": 0.0974, + "num_input_tokens_seen": 109848992, + "step": 1217 + }, + { + "epoch": 5.5954022988505745, + "loss": 0.09305752068758011, + "loss_ce": 2.4687436962267384e-05, + "loss_iou": 0.359375, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 109848992, + "step": 1217 + }, + { + "epoch": 5.6, + "grad_norm": 14.945824448106285, + "learning_rate": 5e-06, + "loss": 0.0871, + "num_input_tokens_seen": 109939364, + "step": 1218 + }, + { + "epoch": 5.6, + "loss": 0.09159138798713684, + "loss_ce": 8.140663339872845e-06, + "loss_iou": 0.3828125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 109939364, + "step": 1218 + }, + { + "epoch": 5.604597701149425, + "grad_norm": 28.416530398349355, + "learning_rate": 5e-06, + "loss": 0.0836, + "num_input_tokens_seen": 110029792, + "step": 1219 + }, + { + "epoch": 5.604597701149425, + "loss": 0.046073831617832184, + "loss_ce": 7.547159839305095e-06, + "loss_iou": 0.3984375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 110029792, + "step": 1219 + }, + { + "epoch": 5.609195402298851, + "grad_norm": 12.008787286117224, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 110120336, + "step": 1220 + }, + { + "epoch": 5.609195402298851, + "loss": 0.11564863473176956, + "loss_ce": 4.804190029972233e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 110120336, + "step": 1220 + }, + { + "epoch": 5.613793103448276, + "grad_norm": 22.46676513633078, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 110210540, + "step": 1221 + }, + { + "epoch": 5.613793103448276, + "loss": 0.07196103781461716, + "loss_ce": 5.928850441705436e-07, + "loss_iou": 0.390625, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 110210540, + "step": 1221 + }, + { + "epoch": 5.618390804597701, + "grad_norm": 23.841340782969, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 110299420, + "step": 1222 + }, + { + "epoch": 5.618390804597701, + "loss": 0.11307486146688461, + "loss_ce": 7.235145858430769e-06, + "loss_iou": 0.37109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 110299420, + "step": 1222 + }, + { + "epoch": 5.622988505747126, + "grad_norm": 7.337714001096004, + "learning_rate": 5e-06, + "loss": 0.088, + "num_input_tokens_seen": 110389696, + "step": 1223 + }, + { + "epoch": 5.622988505747126, + "loss": 0.07472731918096542, + "loss_ce": 6.606188981095329e-05, + "loss_iou": 0.388671875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 110389696, + "step": 1223 + }, + { + "epoch": 5.627586206896551, + "grad_norm": 11.251552468198575, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 110480088, + "step": 1224 + }, + { + "epoch": 5.627586206896551, + "loss": 0.0974973812699318, + "loss_ce": 1.3505153901860467e-06, + "loss_iou": 0.404296875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 110480088, + "step": 1224 + }, + { + "epoch": 5.6321839080459775, + "grad_norm": 5.577183353924994, + "learning_rate": 5e-06, + "loss": 0.0834, + "num_input_tokens_seen": 110570532, + "step": 1225 + }, + { + "epoch": 5.6321839080459775, + "loss": 0.10643593221902847, + "loss_ce": 2.1139007003512233e-05, + "loss_iou": 0.30078125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 110570532, + "step": 1225 + }, + { + "epoch": 5.636781609195403, + "grad_norm": 6.701001289894439, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 110659360, + "step": 1226 + }, + { + "epoch": 5.636781609195403, + "loss": 0.1065259799361229, + "loss_ce": 5.0150600145570934e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 110659360, + "step": 1226 + }, + { + "epoch": 5.641379310344828, + "grad_norm": 5.386629642258452, + "learning_rate": 5e-06, + "loss": 0.0618, + "num_input_tokens_seen": 110749656, + "step": 1227 + }, + { + "epoch": 5.641379310344828, + "loss": 0.07365281879901886, + "loss_ce": 1.3907001630286686e-05, + "loss_iou": 0.27734375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 110749656, + "step": 1227 + }, + { + "epoch": 5.645977011494253, + "grad_norm": 5.0480129670553175, + "learning_rate": 5e-06, + "loss": 0.0804, + "num_input_tokens_seen": 110839976, + "step": 1228 + }, + { + "epoch": 5.645977011494253, + "loss": 0.09352147579193115, + "loss_ce": 1.5612586139468476e-05, + "loss_iou": 0.296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 110839976, + "step": 1228 + }, + { + "epoch": 5.650574712643678, + "grad_norm": 3.2142834138747904, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 110930364, + "step": 1229 + }, + { + "epoch": 5.650574712643678, + "loss": 0.1116681694984436, + "loss_ce": 4.347142748883925e-06, + "loss_iou": 0.427734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 110930364, + "step": 1229 + }, + { + "epoch": 5.655172413793103, + "grad_norm": 4.682890336305807, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 111020076, + "step": 1230 + }, + { + "epoch": 5.655172413793103, + "loss": 0.14498014748096466, + "loss_ce": 6.385195320035564e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 111020076, + "step": 1230 + }, + { + "epoch": 5.659770114942528, + "grad_norm": 7.235632320302035, + "learning_rate": 5e-06, + "loss": 0.0682, + "num_input_tokens_seen": 111110316, + "step": 1231 + }, + { + "epoch": 5.659770114942528, + "loss": 0.0729096531867981, + "loss_ce": 3.36788289132528e-05, + "loss_iou": 0.333984375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 111110316, + "step": 1231 + }, + { + "epoch": 5.664367816091954, + "grad_norm": 2.3169355950035992, + "learning_rate": 5e-06, + "loss": 0.0644, + "num_input_tokens_seen": 111200728, + "step": 1232 + }, + { + "epoch": 5.664367816091954, + "loss": 0.07670523226261139, + "loss_ce": 1.4556415408151224e-05, + "loss_iou": 0.3359375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 111200728, + "step": 1232 + }, + { + "epoch": 5.6689655172413795, + "grad_norm": 9.054517929993095, + "learning_rate": 5e-06, + "loss": 0.1226, + "num_input_tokens_seen": 111290364, + "step": 1233 + }, + { + "epoch": 5.6689655172413795, + "loss": 0.13384370505809784, + "loss_ce": 2.4130109522957355e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 111290364, + "step": 1233 + }, + { + "epoch": 5.673563218390805, + "grad_norm": 2.4147250325583536, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 111380760, + "step": 1234 + }, + { + "epoch": 5.673563218390805, + "loss": 0.08226744830608368, + "loss_ce": 7.31718364477274e-06, + "loss_iou": 0.2353515625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 111380760, + "step": 1234 + }, + { + "epoch": 5.67816091954023, + "grad_norm": 4.243893457772928, + "learning_rate": 5e-06, + "loss": 0.0735, + "num_input_tokens_seen": 111471116, + "step": 1235 + }, + { + "epoch": 5.67816091954023, + "loss": 0.07256424427032471, + "loss_ce": 8.707445886102505e-06, + "loss_iou": 0.291015625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 111471116, + "step": 1235 + }, + { + "epoch": 5.682758620689655, + "grad_norm": 1.8993669782662472, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 111561436, + "step": 1236 + }, + { + "epoch": 5.682758620689655, + "loss": 0.13686691224575043, + "loss_ce": 3.2013495001592673e-06, + "loss_iou": 0.28125, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 111561436, + "step": 1236 + }, + { + "epoch": 5.68735632183908, + "grad_norm": 4.878685087840704, + "learning_rate": 5e-06, + "loss": 0.0888, + "num_input_tokens_seen": 111651816, + "step": 1237 + }, + { + "epoch": 5.68735632183908, + "loss": 0.07835354655981064, + "loss_ce": 1.4925600225978997e-05, + "loss_iou": 0.283203125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 111651816, + "step": 1237 + }, + { + "epoch": 5.691954022988506, + "grad_norm": 8.928325096054786, + "learning_rate": 5e-06, + "loss": 0.1009, + "num_input_tokens_seen": 111742112, + "step": 1238 + }, + { + "epoch": 5.691954022988506, + "loss": 0.12431655079126358, + "loss_ce": 1.8453329175827093e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 111742112, + "step": 1238 + }, + { + "epoch": 5.696551724137931, + "grad_norm": 4.857562908474219, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 111832440, + "step": 1239 + }, + { + "epoch": 5.696551724137931, + "loss": 0.10367751866579056, + "loss_ce": 9.310015229857527e-06, + "loss_iou": 0.359375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 111832440, + "step": 1239 + }, + { + "epoch": 5.7011494252873565, + "grad_norm": 2.1347164816602997, + "learning_rate": 5e-06, + "loss": 0.0597, + "num_input_tokens_seen": 111922904, + "step": 1240 + }, + { + "epoch": 5.7011494252873565, + "loss": 0.06786856055259705, + "loss_ce": 1.2722593055514153e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 111922904, + "step": 1240 + }, + { + "epoch": 5.705747126436782, + "grad_norm": 8.531354475094487, + "learning_rate": 5e-06, + "loss": 0.0715, + "num_input_tokens_seen": 112013212, + "step": 1241 + }, + { + "epoch": 5.705747126436782, + "loss": 0.061662279069423676, + "loss_ce": 1.5145051293075085e-06, + "loss_iou": 0.35546875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 112013212, + "step": 1241 + }, + { + "epoch": 5.710344827586207, + "grad_norm": 18.82697007656036, + "learning_rate": 5e-06, + "loss": 0.1572, + "num_input_tokens_seen": 112103552, + "step": 1242 + }, + { + "epoch": 5.710344827586207, + "loss": 0.1454518437385559, + "loss_ce": 5.055922429164639e-06, + "loss_iou": 0.267578125, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 112103552, + "step": 1242 + }, + { + "epoch": 5.714942528735632, + "grad_norm": 5.439005001756043, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 112193936, + "step": 1243 + }, + { + "epoch": 5.714942528735632, + "loss": 0.04381529614329338, + "loss_ce": 3.7829264329047874e-05, + "loss_iou": 0.236328125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 112193936, + "step": 1243 + }, + { + "epoch": 5.719540229885057, + "grad_norm": 18.879016825135775, + "learning_rate": 5e-06, + "loss": 0.0826, + "num_input_tokens_seen": 112284184, + "step": 1244 + }, + { + "epoch": 5.719540229885057, + "loss": 0.10435809940099716, + "loss_ce": 1.8497697965358384e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 112284184, + "step": 1244 + }, + { + "epoch": 5.724137931034483, + "grad_norm": 4.028482212688753, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 112374556, + "step": 1245 + }, + { + "epoch": 5.724137931034483, + "loss": 0.1041383296251297, + "loss_ce": 1.2350090401014313e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 112374556, + "step": 1245 + }, + { + "epoch": 5.728735632183908, + "grad_norm": 6.123310827199504, + "learning_rate": 5e-06, + "loss": 0.0711, + "num_input_tokens_seen": 112464124, + "step": 1246 + }, + { + "epoch": 5.728735632183908, + "loss": 0.0810786560177803, + "loss_ce": 8.710509973752778e-06, + "loss_iou": 0.294921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 112464124, + "step": 1246 + }, + { + "epoch": 5.733333333333333, + "grad_norm": 6.108386978174192, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 112554496, + "step": 1247 + }, + { + "epoch": 5.733333333333333, + "loss": 0.06712520867586136, + "loss_ce": 1.7052239854820073e-05, + "loss_iou": 0.32421875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 112554496, + "step": 1247 + }, + { + "epoch": 5.7379310344827585, + "grad_norm": 6.640331304354758, + "learning_rate": 5e-06, + "loss": 0.0834, + "num_input_tokens_seen": 112644988, + "step": 1248 + }, + { + "epoch": 5.7379310344827585, + "loss": 0.10246039181947708, + "loss_ce": 2.814024264807813e-05, + "loss_iou": 0.3671875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 112644988, + "step": 1248 + }, + { + "epoch": 5.742528735632184, + "grad_norm": 14.848833163560919, + "learning_rate": 5e-06, + "loss": 0.0836, + "num_input_tokens_seen": 112735364, + "step": 1249 + }, + { + "epoch": 5.742528735632184, + "loss": 0.0761798620223999, + "loss_ce": 7.98174187366385e-06, + "loss_iou": 0.330078125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 112735364, + "step": 1249 + }, + { + "epoch": 5.747126436781609, + "grad_norm": 11.72709348916867, + "learning_rate": 5e-06, + "loss": 0.0848, + "num_input_tokens_seen": 112825776, + "step": 1250 + }, + { + "epoch": 5.747126436781609, + "eval_seeclick_CIoU": 0.4573807418346405, + "eval_seeclick_GIoU": 0.4410470873117447, + "eval_seeclick_IoU": 0.49894072115421295, + "eval_seeclick_MAE_all": 0.061043232679367065, + "eval_seeclick_MAE_h": 0.053627705201506615, + "eval_seeclick_MAE_w": 0.11304067447781563, + "eval_seeclick_MAE_x_boxes": 0.10517356544733047, + "eval_seeclick_MAE_y_boxes": 0.05592040531337261, + "eval_seeclick_NUM_probability": 0.9999991357326508, + "eval_seeclick_inside_bbox": 0.7542613744735718, + "eval_seeclick_loss": 0.37355467677116394, + "eval_seeclick_loss_ce": 0.07825379073619843, + "eval_seeclick_loss_iou": 0.5123291015625, + "eval_seeclick_loss_num": 0.06250762939453125, + "eval_seeclick_loss_xval": 0.31231689453125, + "eval_seeclick_runtime": 85.2206, + "eval_seeclick_samples_per_second": 0.505, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 112825776, + "step": 1250 + }, + { + "epoch": 5.747126436781609, + "eval_icons_CIoU": 0.5825834572315216, + "eval_icons_GIoU": 0.5834551155567169, + "eval_icons_IoU": 0.616531640291214, + "eval_icons_MAE_all": 0.043770069256424904, + "eval_icons_MAE_h": 0.07383041456341743, + "eval_icons_MAE_w": 0.06421982683241367, + "eval_icons_MAE_x_boxes": 0.05823229439556599, + "eval_icons_MAE_y_boxes": 0.07248594611883163, + "eval_icons_NUM_probability": 0.999999612569809, + "eval_icons_inside_bbox": 0.7673611044883728, + "eval_icons_loss": 0.21254034340381622, + "eval_icons_loss_ce": 4.21214093648814e-06, + "eval_icons_loss_iou": 0.447265625, + "eval_icons_loss_num": 0.04521942138671875, + "eval_icons_loss_xval": 0.226104736328125, + "eval_icons_runtime": 85.9172, + "eval_icons_samples_per_second": 0.582, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 112825776, + "step": 1250 + }, + { + "epoch": 5.747126436781609, + "eval_screenspot_CIoU": 0.4307680130004883, + "eval_screenspot_GIoU": 0.4167415201663971, + "eval_screenspot_IoU": 0.4970965087413788, + "eval_screenspot_MAE_all": 0.08507336676120758, + "eval_screenspot_MAE_h": 0.07819427798191707, + "eval_screenspot_MAE_w": 0.17445524781942368, + "eval_screenspot_MAE_x_boxes": 0.1695632884899775, + "eval_screenspot_MAE_y_boxes": 0.0742349624633789, + "eval_screenspot_NUM_probability": 0.9999984900156657, + "eval_screenspot_inside_bbox": 0.7637499968210856, + "eval_screenspot_loss": 0.4254446029663086, + "eval_screenspot_loss_ce": 6.250914157135412e-05, + "eval_screenspot_loss_iou": 0.3890787760416667, + "eval_screenspot_loss_num": 0.087158203125, + "eval_screenspot_loss_xval": 0.4358723958333333, + "eval_screenspot_runtime": 160.2493, + "eval_screenspot_samples_per_second": 0.555, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 112825776, + "step": 1250 + }, + { + "epoch": 5.747126436781609, + "eval_compot_CIoU": 0.48233287036418915, + "eval_compot_GIoU": 0.4589613825082779, + "eval_compot_IoU": 0.5473670959472656, + "eval_compot_MAE_all": 0.05806807801127434, + "eval_compot_MAE_h": 0.07524906471371651, + "eval_compot_MAE_w": 0.11501751467585564, + "eval_compot_MAE_x_boxes": 0.10463830083608627, + "eval_compot_MAE_y_boxes": 0.07366756349802017, + "eval_compot_NUM_probability": 0.999998927116394, + "eval_compot_inside_bbox": 0.7760416567325592, + "eval_compot_loss": 0.3192625343799591, + "eval_compot_loss_ce": 0.0109380844514817, + "eval_compot_loss_iou": 0.501953125, + "eval_compot_loss_num": 0.0531005859375, + "eval_compot_loss_xval": 0.265533447265625, + "eval_compot_runtime": 86.8999, + "eval_compot_samples_per_second": 0.575, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 112825776, + "step": 1250 + }, + { + "epoch": 5.747126436781609, + "loss": 0.1921016126871109, + "loss_ce": 0.006859918590635061, + "loss_iou": 0.53515625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 112825776, + "step": 1250 + }, + { + "epoch": 5.751724137931035, + "grad_norm": 48.39229529580768, + "learning_rate": 5e-06, + "loss": 0.1628, + "num_input_tokens_seen": 112916064, + "step": 1251 + }, + { + "epoch": 5.751724137931035, + "loss": 0.14167295396327972, + "loss_ce": 4.0873368561733514e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 112916064, + "step": 1251 + }, + { + "epoch": 5.75632183908046, + "grad_norm": 7.309880590381852, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 113006264, + "step": 1252 + }, + { + "epoch": 5.75632183908046, + "loss": 0.1379764974117279, + "loss_ce": 6.526359356939793e-06, + "loss_iou": 0.3984375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 113006264, + "step": 1252 + }, + { + "epoch": 5.760919540229885, + "grad_norm": 4.7842464607453, + "learning_rate": 5e-06, + "loss": 0.0712, + "num_input_tokens_seen": 113096588, + "step": 1253 + }, + { + "epoch": 5.760919540229885, + "loss": 0.06593406945466995, + "loss_ce": 8.451766007055994e-07, + "loss_iou": 0.279296875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 113096588, + "step": 1253 + }, + { + "epoch": 5.76551724137931, + "grad_norm": 9.30963816664603, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 113186932, + "step": 1254 + }, + { + "epoch": 5.76551724137931, + "loss": 0.10627120733261108, + "loss_ce": 9.002267688629217e-06, + "loss_iou": 0.396484375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 113186932, + "step": 1254 + }, + { + "epoch": 5.7701149425287355, + "grad_norm": 5.032701026633435, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 113277316, + "step": 1255 + }, + { + "epoch": 5.7701149425287355, + "loss": 0.1125558465719223, + "loss_ce": 7.017050393187674e-06, + "loss_iou": 0.341796875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 113277316, + "step": 1255 + }, + { + "epoch": 5.774712643678161, + "grad_norm": 7.348457813587205, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 113367640, + "step": 1256 + }, + { + "epoch": 5.774712643678161, + "loss": 0.06398795545101166, + "loss_ce": 0.00011466428986750543, + "loss_iou": 0.365234375, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 113367640, + "step": 1256 + }, + { + "epoch": 5.779310344827586, + "grad_norm": 3.6430091269186304, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 113457940, + "step": 1257 + }, + { + "epoch": 5.779310344827586, + "loss": 0.07985039055347443, + "loss_ce": 1.6411700926255435e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 113457940, + "step": 1257 + }, + { + "epoch": 5.783908045977012, + "grad_norm": 3.3598636033304454, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 113548324, + "step": 1258 + }, + { + "epoch": 5.783908045977012, + "loss": 0.11753726005554199, + "loss_ce": 4.458225521375425e-05, + "loss_iou": 0.3125, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 113548324, + "step": 1258 + }, + { + "epoch": 5.788505747126437, + "grad_norm": 4.068043795707238, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 113638644, + "step": 1259 + }, + { + "epoch": 5.788505747126437, + "loss": 0.138657346367836, + "loss_ce": 7.249969939948642e-07, + "loss_iou": 0.2578125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 113638644, + "step": 1259 + }, + { + "epoch": 5.793103448275862, + "grad_norm": 2.279662032054951, + "learning_rate": 5e-06, + "loss": 0.0673, + "num_input_tokens_seen": 113729080, + "step": 1260 + }, + { + "epoch": 5.793103448275862, + "loss": 0.10126355290412903, + "loss_ce": 6.22008246864425e-06, + "loss_iou": 0.3359375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 113729080, + "step": 1260 + }, + { + "epoch": 5.797701149425287, + "grad_norm": 5.649226738355132, + "learning_rate": 5e-06, + "loss": 0.0636, + "num_input_tokens_seen": 113819484, + "step": 1261 + }, + { + "epoch": 5.797701149425287, + "loss": 0.08105180412530899, + "loss_ce": 4.743245881400071e-06, + "loss_iou": 0.275390625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 113819484, + "step": 1261 + }, + { + "epoch": 5.802298850574712, + "grad_norm": 1.5641332197845963, + "learning_rate": 5e-06, + "loss": 0.1053, + "num_input_tokens_seen": 113909888, + "step": 1262 + }, + { + "epoch": 5.802298850574712, + "loss": 0.061422187834978104, + "loss_ce": 5.559354576689657e-06, + "loss_iou": 0.248046875, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 113909888, + "step": 1262 + }, + { + "epoch": 5.8068965517241375, + "grad_norm": 15.670084350710225, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 114000272, + "step": 1263 + }, + { + "epoch": 5.8068965517241375, + "loss": 0.11508636921644211, + "loss_ce": 4.585368969856063e-06, + "loss_iou": 0.326171875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 114000272, + "step": 1263 + }, + { + "epoch": 5.811494252873564, + "grad_norm": 3.835335458461329, + "learning_rate": 5e-06, + "loss": 0.1287, + "num_input_tokens_seen": 114090524, + "step": 1264 + }, + { + "epoch": 5.811494252873564, + "loss": 0.08618977665901184, + "loss_ce": 8.132398761517834e-06, + "loss_iou": 0.357421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 114090524, + "step": 1264 + }, + { + "epoch": 5.816091954022989, + "grad_norm": 9.99731943967783, + "learning_rate": 5e-06, + "loss": 0.1382, + "num_input_tokens_seen": 114180868, + "step": 1265 + }, + { + "epoch": 5.816091954022989, + "loss": 0.1053665354847908, + "loss_ce": 4.596740836859681e-06, + "loss_iou": 0.30078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 114180868, + "step": 1265 + }, + { + "epoch": 5.820689655172414, + "grad_norm": 2.911511418981655, + "learning_rate": 5e-06, + "loss": 0.0754, + "num_input_tokens_seen": 114271204, + "step": 1266 + }, + { + "epoch": 5.820689655172414, + "loss": 0.07806918770074844, + "loss_ce": 5.221021638135426e-06, + "loss_iou": 0.40234375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 114271204, + "step": 1266 + }, + { + "epoch": 5.825287356321839, + "grad_norm": 6.2344029082542, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 114361536, + "step": 1267 + }, + { + "epoch": 5.825287356321839, + "loss": 0.05028009042143822, + "loss_ce": 2.380430260018329e-06, + "loss_iou": 0.388671875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 114361536, + "step": 1267 + }, + { + "epoch": 5.829885057471264, + "grad_norm": 3.7663498969437175, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 114451868, + "step": 1268 + }, + { + "epoch": 5.829885057471264, + "loss": 0.08314789086580276, + "loss_ce": 2.7451824280433357e-06, + "loss_iou": 0.3515625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 114451868, + "step": 1268 + }, + { + "epoch": 5.834482758620689, + "grad_norm": 7.1687713447544406, + "learning_rate": 5e-06, + "loss": 0.0694, + "num_input_tokens_seen": 114542224, + "step": 1269 + }, + { + "epoch": 5.834482758620689, + "loss": 0.08698830008506775, + "loss_ce": 4.3723342969315127e-05, + "loss_iou": 0.40625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 114542224, + "step": 1269 + }, + { + "epoch": 5.8390804597701145, + "grad_norm": 7.785551994387596, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 114632548, + "step": 1270 + }, + { + "epoch": 5.8390804597701145, + "loss": 0.04482455551624298, + "loss_ce": 9.493617653788533e-06, + "loss_iou": 0.302734375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 114632548, + "step": 1270 + }, + { + "epoch": 5.8436781609195405, + "grad_norm": 4.9792230982606815, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 114722960, + "step": 1271 + }, + { + "epoch": 5.8436781609195405, + "loss": 0.0937369242310524, + "loss_ce": 2.1818657387484564e-06, + "loss_iou": 0.396484375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 114722960, + "step": 1271 + }, + { + "epoch": 5.848275862068966, + "grad_norm": 7.05750958333928, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 114813360, + "step": 1272 + }, + { + "epoch": 5.848275862068966, + "loss": 0.08838266134262085, + "loss_ce": 1.9016588339582086e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 114813360, + "step": 1272 + }, + { + "epoch": 5.852873563218391, + "grad_norm": 2.2929149537355933, + "learning_rate": 5e-06, + "loss": 0.0831, + "num_input_tokens_seen": 114903664, + "step": 1273 + }, + { + "epoch": 5.852873563218391, + "loss": 0.08243682235479355, + "loss_ce": 8.84367273101816e-06, + "loss_iou": 0.375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 114903664, + "step": 1273 + }, + { + "epoch": 5.857471264367816, + "grad_norm": 11.633034929488161, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 114994008, + "step": 1274 + }, + { + "epoch": 5.857471264367816, + "loss": 0.1149655431509018, + "loss_ce": 5.821747436129954e-06, + "loss_iou": 0.267578125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 114994008, + "step": 1274 + }, + { + "epoch": 5.862068965517241, + "grad_norm": 13.85967700936422, + "learning_rate": 5e-06, + "loss": 0.0762, + "num_input_tokens_seen": 115084404, + "step": 1275 + }, + { + "epoch": 5.862068965517241, + "loss": 0.07548947632312775, + "loss_ce": 4.2469814616197255e-06, + "loss_iou": 0.2578125, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 115084404, + "step": 1275 + }, + { + "epoch": 5.866666666666667, + "grad_norm": 8.591339370910877, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 115174752, + "step": 1276 + }, + { + "epoch": 5.866666666666667, + "loss": 0.08044753968715668, + "loss_ce": 3.201591425749939e-06, + "loss_iou": 0.18359375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 115174752, + "step": 1276 + }, + { + "epoch": 5.871264367816092, + "grad_norm": 10.263498686615506, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 115265232, + "step": 1277 + }, + { + "epoch": 5.871264367816092, + "loss": 0.12569460272789001, + "loss_ce": 7.947778613015544e-06, + "loss_iou": 0.2490234375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 115265232, + "step": 1277 + }, + { + "epoch": 5.875862068965517, + "grad_norm": 11.400124255336879, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 115355708, + "step": 1278 + }, + { + "epoch": 5.875862068965517, + "loss": 0.1197689026594162, + "loss_ce": 2.66920051217312e-06, + "loss_iou": 0.3125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 115355708, + "step": 1278 + }, + { + "epoch": 5.880459770114943, + "grad_norm": 4.3793822993822715, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 115446204, + "step": 1279 + }, + { + "epoch": 5.880459770114943, + "loss": 0.1235429123044014, + "loss_ce": 7.75079388404265e-06, + "loss_iou": 0.431640625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 115446204, + "step": 1279 + }, + { + "epoch": 5.885057471264368, + "grad_norm": 8.059241246157443, + "learning_rate": 5e-06, + "loss": 0.0801, + "num_input_tokens_seen": 115536632, + "step": 1280 + }, + { + "epoch": 5.885057471264368, + "loss": 0.08218298107385635, + "loss_ce": 1.439858169760555e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 115536632, + "step": 1280 + }, + { + "epoch": 5.889655172413793, + "grad_norm": 9.48342209591454, + "learning_rate": 5e-06, + "loss": 0.0749, + "num_input_tokens_seen": 115627096, + "step": 1281 + }, + { + "epoch": 5.889655172413793, + "loss": 0.08530169725418091, + "loss_ce": 5.068644895800389e-06, + "loss_iou": 0.3203125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 115627096, + "step": 1281 + }, + { + "epoch": 5.894252873563218, + "grad_norm": 4.958627136670461, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 115717508, + "step": 1282 + }, + { + "epoch": 5.894252873563218, + "loss": 0.10856582224369049, + "loss_ce": 1.4800659300817642e-05, + "loss_iou": 0.369140625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 115717508, + "step": 1282 + }, + { + "epoch": 5.898850574712643, + "grad_norm": 2.619956686440511, + "learning_rate": 5e-06, + "loss": 0.0937, + "num_input_tokens_seen": 115807116, + "step": 1283 + }, + { + "epoch": 5.898850574712643, + "loss": 0.1267540454864502, + "loss_ce": 1.4542956705554388e-05, + "loss_iou": 0.28515625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 115807116, + "step": 1283 + }, + { + "epoch": 5.903448275862069, + "grad_norm": 1.9178286629575005, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 115897536, + "step": 1284 + }, + { + "epoch": 5.903448275862069, + "loss": 0.06888516247272491, + "loss_ce": 6.993210263317451e-06, + "loss_iou": 0.25390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 115897536, + "step": 1284 + }, + { + "epoch": 5.908045977011494, + "grad_norm": 4.687249341290416, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 115987980, + "step": 1285 + }, + { + "epoch": 5.908045977011494, + "loss": 0.05946702882647514, + "loss_ce": 1.87891364475945e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 115987980, + "step": 1285 + }, + { + "epoch": 5.9126436781609195, + "grad_norm": 9.032450106052789, + "learning_rate": 5e-06, + "loss": 0.0588, + "num_input_tokens_seen": 116078456, + "step": 1286 + }, + { + "epoch": 5.9126436781609195, + "loss": 0.051251400262117386, + "loss_ce": 1.238779896084452e-05, + "loss_iou": 0.2890625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 116078456, + "step": 1286 + }, + { + "epoch": 5.917241379310345, + "grad_norm": 6.2264189550968805, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 116168752, + "step": 1287 + }, + { + "epoch": 5.917241379310345, + "loss": 0.1135876327753067, + "loss_ce": 1.2120165138185257e-06, + "loss_iou": 0.3359375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 116168752, + "step": 1287 + }, + { + "epoch": 5.92183908045977, + "grad_norm": 4.579123122057683, + "learning_rate": 5e-06, + "loss": 0.0763, + "num_input_tokens_seen": 116259044, + "step": 1288 + }, + { + "epoch": 5.92183908045977, + "loss": 0.09734739363193512, + "loss_ce": 2.6833829906536266e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 116259044, + "step": 1288 + }, + { + "epoch": 5.926436781609196, + "grad_norm": 6.37974017894556, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 116349420, + "step": 1289 + }, + { + "epoch": 5.926436781609196, + "loss": 0.12337080389261246, + "loss_ce": 3.4989259347639745e-06, + "loss_iou": 0.37890625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 116349420, + "step": 1289 + }, + { + "epoch": 5.931034482758621, + "grad_norm": 5.236407477085609, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 116439780, + "step": 1290 + }, + { + "epoch": 5.931034482758621, + "loss": 0.11725269258022308, + "loss_ce": 4.161834567639744e-06, + "loss_iou": 0.2353515625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 116439780, + "step": 1290 + }, + { + "epoch": 5.935632183908046, + "grad_norm": 6.939019502168552, + "learning_rate": 5e-06, + "loss": 0.0819, + "num_input_tokens_seen": 116530192, + "step": 1291 + }, + { + "epoch": 5.935632183908046, + "loss": 0.10045474022626877, + "loss_ce": 5.191009404370561e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 116530192, + "step": 1291 + }, + { + "epoch": 5.940229885057471, + "grad_norm": 5.923858548372062, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 116620592, + "step": 1292 + }, + { + "epoch": 5.940229885057471, + "loss": 0.061371780931949615, + "loss_ce": 9.32706370804226e-07, + "loss_iou": 0.28515625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 116620592, + "step": 1292 + }, + { + "epoch": 5.944827586206896, + "grad_norm": 5.436359048849817, + "learning_rate": 5e-06, + "loss": 0.0778, + "num_input_tokens_seen": 116710964, + "step": 1293 + }, + { + "epoch": 5.944827586206896, + "loss": 0.05396192520856857, + "loss_ce": 6.846322776254965e-06, + "loss_iou": 0.32421875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 116710964, + "step": 1293 + }, + { + "epoch": 5.949425287356322, + "grad_norm": 7.23604737625205, + "learning_rate": 5e-06, + "loss": 0.0746, + "num_input_tokens_seen": 116800480, + "step": 1294 + }, + { + "epoch": 5.949425287356322, + "loss": 0.08210571110248566, + "loss_ce": 1.3423375094134826e-05, + "loss_iou": 0.326171875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 116800480, + "step": 1294 + }, + { + "epoch": 5.954022988505747, + "grad_norm": 2.028553713875407, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 116890808, + "step": 1295 + }, + { + "epoch": 5.954022988505747, + "loss": 0.09970621019601822, + "loss_ce": 3.580232078093104e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 116890808, + "step": 1295 + }, + { + "epoch": 5.958620689655173, + "grad_norm": 8.871343329486912, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 116981168, + "step": 1296 + }, + { + "epoch": 5.958620689655173, + "loss": 0.0988428071141243, + "loss_ce": 2.6887415515375324e-05, + "loss_iou": 0.31640625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 116981168, + "step": 1296 + }, + { + "epoch": 5.963218390804598, + "grad_norm": 6.717350736332018, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 117071416, + "step": 1297 + }, + { + "epoch": 5.963218390804598, + "loss": 0.10025615990161896, + "loss_ce": 5.9157691794098355e-06, + "loss_iou": 0.35546875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 117071416, + "step": 1297 + }, + { + "epoch": 5.967816091954023, + "grad_norm": 8.58100221704676, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 117161736, + "step": 1298 + }, + { + "epoch": 5.967816091954023, + "loss": 0.10571430623531342, + "loss_ce": 3.193915108568035e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 117161736, + "step": 1298 + }, + { + "epoch": 5.972413793103448, + "grad_norm": 11.414603962669956, + "learning_rate": 5e-06, + "loss": 0.0951, + "num_input_tokens_seen": 117252180, + "step": 1299 + }, + { + "epoch": 5.972413793103448, + "loss": 0.09330064058303833, + "loss_ce": 3.891930464305915e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 117252180, + "step": 1299 + }, + { + "epoch": 5.977011494252873, + "grad_norm": 14.302282290509867, + "learning_rate": 5e-06, + "loss": 0.1022, + "num_input_tokens_seen": 117342672, + "step": 1300 + }, + { + "epoch": 5.977011494252873, + "loss": 0.09649240970611572, + "loss_ce": 1.108789183490444e-05, + "loss_iou": 0.322265625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 117342672, + "step": 1300 + }, + { + "epoch": 5.9816091954022985, + "grad_norm": 4.2991078616550515, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 117433108, + "step": 1301 + }, + { + "epoch": 5.9816091954022985, + "loss": 0.11118273437023163, + "loss_ce": 7.203209406725364e-06, + "loss_iou": 0.3984375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 117433108, + "step": 1301 + }, + { + "epoch": 5.9862068965517246, + "grad_norm": 8.51577704791888, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 117523460, + "step": 1302 + }, + { + "epoch": 5.9862068965517246, + "loss": 0.07513649761676788, + "loss_ce": 2.222008106400608e-06, + "loss_iou": 0.32421875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 117523460, + "step": 1302 + }, + { + "epoch": 5.99080459770115, + "grad_norm": 2.677824187892606, + "learning_rate": 5e-06, + "loss": 0.0844, + "num_input_tokens_seen": 117613652, + "step": 1303 + }, + { + "epoch": 5.99080459770115, + "loss": 0.12823861837387085, + "loss_ce": 1.9021113985218108e-05, + "loss_iou": 0.359375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 117613652, + "step": 1303 + }, + { + "epoch": 5.995402298850575, + "grad_norm": 7.857525256311372, + "learning_rate": 5e-06, + "loss": 0.0986, + "num_input_tokens_seen": 117703216, + "step": 1304 + }, + { + "epoch": 5.995402298850575, + "loss": 0.12224046885967255, + "loss_ce": 3.282908437540755e-05, + "loss_iou": 0.296875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 117703216, + "step": 1304 + }, + { + "epoch": 6.0, + "grad_norm": 3.121211959039489, + "learning_rate": 5e-06, + "loss": 0.0844, + "num_input_tokens_seen": 117793536, + "step": 1305 + }, + { + "epoch": 6.0, + "loss": 0.0870618224143982, + "loss_ce": 2.569025309639983e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 117793536, + "step": 1305 + }, + { + "epoch": 6.004597701149425, + "grad_norm": 10.240533439865134, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 117883776, + "step": 1306 + }, + { + "epoch": 6.004597701149425, + "loss": 0.1209506243467331, + "loss_ce": 1.8270541204401525e-06, + "loss_iou": 0.361328125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 117883776, + "step": 1306 + }, + { + "epoch": 6.00919540229885, + "grad_norm": 9.764972826473027, + "learning_rate": 5e-06, + "loss": 0.0707, + "num_input_tokens_seen": 117974056, + "step": 1307 + }, + { + "epoch": 6.00919540229885, + "loss": 0.05394062399864197, + "loss_ce": 8.061311973506236e-07, + "loss_iou": 0.318359375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 117974056, + "step": 1307 + }, + { + "epoch": 6.0137931034482754, + "grad_norm": 2.2633345565968166, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 118064428, + "step": 1308 + }, + { + "epoch": 6.0137931034482754, + "loss": 0.09164643287658691, + "loss_ce": 2.152237811969826e-06, + "loss_iou": 0.287109375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 118064428, + "step": 1308 + }, + { + "epoch": 6.0183908045977015, + "grad_norm": 16.306990357156884, + "learning_rate": 5e-06, + "loss": 0.0845, + "num_input_tokens_seen": 118154820, + "step": 1309 + }, + { + "epoch": 6.0183908045977015, + "loss": 0.0783190131187439, + "loss_ce": 2.6165927920374088e-05, + "loss_iou": 0.279296875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 118154820, + "step": 1309 + }, + { + "epoch": 6.022988505747127, + "grad_norm": 5.540727316669466, + "learning_rate": 5e-06, + "loss": 0.093, + "num_input_tokens_seen": 118245100, + "step": 1310 + }, + { + "epoch": 6.022988505747127, + "loss": 0.07801572233438492, + "loss_ce": 1.2792523193638772e-05, + "loss_iou": 0.271484375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 118245100, + "step": 1310 + }, + { + "epoch": 6.027586206896552, + "grad_norm": 6.643253537988018, + "learning_rate": 5e-06, + "loss": 0.0683, + "num_input_tokens_seen": 118335420, + "step": 1311 + }, + { + "epoch": 6.027586206896552, + "loss": 0.07764066010713577, + "loss_ce": 3.9446958908229135e-06, + "loss_iou": 0.322265625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 118335420, + "step": 1311 + }, + { + "epoch": 6.032183908045977, + "grad_norm": 6.788294653006372, + "learning_rate": 5e-06, + "loss": 0.0802, + "num_input_tokens_seen": 118425852, + "step": 1312 + }, + { + "epoch": 6.032183908045977, + "loss": 0.09127424657344818, + "loss_ce": 2.6687364879762754e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 118425852, + "step": 1312 + }, + { + "epoch": 6.036781609195402, + "grad_norm": 3.573469877891761, + "learning_rate": 5e-06, + "loss": 0.0963, + "num_input_tokens_seen": 118516176, + "step": 1313 + }, + { + "epoch": 6.036781609195402, + "loss": 0.10177914798259735, + "loss_ce": 3.0242531465773936e-06, + "loss_iou": 0.341796875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 118516176, + "step": 1313 + }, + { + "epoch": 6.041379310344827, + "grad_norm": 2.587490346214488, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 118606480, + "step": 1314 + }, + { + "epoch": 6.041379310344827, + "loss": 0.07138314843177795, + "loss_ce": 2.536031388444826e-06, + "loss_iou": 0.294921875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 118606480, + "step": 1314 + }, + { + "epoch": 6.045977011494253, + "grad_norm": 8.936376542422272, + "learning_rate": 5e-06, + "loss": 0.0712, + "num_input_tokens_seen": 118696880, + "step": 1315 + }, + { + "epoch": 6.045977011494253, + "loss": 0.07341183722019196, + "loss_ce": 1.8055545751849422e-06, + "loss_iou": 0.263671875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 118696880, + "step": 1315 + }, + { + "epoch": 6.050574712643678, + "grad_norm": 12.767342588452228, + "learning_rate": 5e-06, + "loss": 0.1053, + "num_input_tokens_seen": 118787220, + "step": 1316 + }, + { + "epoch": 6.050574712643678, + "loss": 0.062339168041944504, + "loss_ce": 0.00011382619413780048, + "loss_iou": 0.3203125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 118787220, + "step": 1316 + }, + { + "epoch": 6.055172413793104, + "grad_norm": 10.482681799720183, + "learning_rate": 5e-06, + "loss": 0.0796, + "num_input_tokens_seen": 118877572, + "step": 1317 + }, + { + "epoch": 6.055172413793104, + "loss": 0.04859533905982971, + "loss_ce": 1.1354484740877524e-05, + "loss_iou": 0.322265625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 118877572, + "step": 1317 + }, + { + "epoch": 6.059770114942529, + "grad_norm": 12.105797073827482, + "learning_rate": 5e-06, + "loss": 0.1086, + "num_input_tokens_seen": 118967880, + "step": 1318 + }, + { + "epoch": 6.059770114942529, + "loss": 0.1294127106666565, + "loss_ce": 2.920265615102835e-06, + "loss_iou": 0.3125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 118967880, + "step": 1318 + }, + { + "epoch": 6.064367816091954, + "grad_norm": 1.9154409626850923, + "learning_rate": 5e-06, + "loss": 0.0643, + "num_input_tokens_seen": 119058296, + "step": 1319 + }, + { + "epoch": 6.064367816091954, + "loss": 0.09125930815935135, + "loss_ce": 2.701348603295628e-05, + "loss_iou": 0.40234375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 119058296, + "step": 1319 + }, + { + "epoch": 6.068965517241379, + "grad_norm": 1.9985880290089337, + "learning_rate": 5e-06, + "loss": 0.0933, + "num_input_tokens_seen": 119148684, + "step": 1320 + }, + { + "epoch": 6.068965517241379, + "loss": 0.1152973622083664, + "loss_ce": 1.952744241862092e-06, + "loss_iou": 0.353515625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 119148684, + "step": 1320 + }, + { + "epoch": 6.073563218390804, + "grad_norm": 6.069434025871692, + "learning_rate": 5e-06, + "loss": 0.0762, + "num_input_tokens_seen": 119239004, + "step": 1321 + }, + { + "epoch": 6.073563218390804, + "loss": 0.060495294630527496, + "loss_ce": 5.5230746511369944e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 119239004, + "step": 1321 + }, + { + "epoch": 6.07816091954023, + "grad_norm": 10.274039091733039, + "learning_rate": 5e-06, + "loss": 0.0822, + "num_input_tokens_seen": 119329324, + "step": 1322 + }, + { + "epoch": 6.07816091954023, + "loss": 0.10678257048130035, + "loss_ce": 1.5705069245086634e-06, + "loss_iou": 0.302734375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 119329324, + "step": 1322 + }, + { + "epoch": 6.082758620689655, + "grad_norm": 3.0466261855959544, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 119419688, + "step": 1323 + }, + { + "epoch": 6.082758620689655, + "loss": 0.06655121594667435, + "loss_ce": 7.638142960786354e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 119419688, + "step": 1323 + }, + { + "epoch": 6.0873563218390805, + "grad_norm": 3.872617007458599, + "learning_rate": 5e-06, + "loss": 0.0655, + "num_input_tokens_seen": 119510040, + "step": 1324 + }, + { + "epoch": 6.0873563218390805, + "loss": 0.06537115573883057, + "loss_ce": 4.8276036977767944e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 119510040, + "step": 1324 + }, + { + "epoch": 6.091954022988506, + "grad_norm": 14.262285949730552, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 119600444, + "step": 1325 + }, + { + "epoch": 6.091954022988506, + "loss": 0.06211470812559128, + "loss_ce": 1.1435970009188168e-05, + "loss_iou": 0.337890625, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 119600444, + "step": 1325 + }, + { + "epoch": 6.096551724137931, + "grad_norm": 5.582731893935351, + "learning_rate": 5e-06, + "loss": 0.0856, + "num_input_tokens_seen": 119690784, + "step": 1326 + }, + { + "epoch": 6.096551724137931, + "loss": 0.08985207229852676, + "loss_ce": 6.9361602072604e-05, + "loss_iou": 0.404296875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 119690784, + "step": 1326 + }, + { + "epoch": 6.101149425287356, + "grad_norm": 8.165861222533081, + "learning_rate": 5e-06, + "loss": 0.0707, + "num_input_tokens_seen": 119781104, + "step": 1327 + }, + { + "epoch": 6.101149425287356, + "loss": 0.06276784837245941, + "loss_ce": 8.212633133553027e-07, + "loss_iou": 0.2890625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 119781104, + "step": 1327 + }, + { + "epoch": 6.105747126436782, + "grad_norm": 11.7019496158795, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 119871456, + "step": 1328 + }, + { + "epoch": 6.105747126436782, + "loss": 0.06695958971977234, + "loss_ce": 4.02033947466407e-06, + "loss_iou": 0.306640625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 119871456, + "step": 1328 + }, + { + "epoch": 6.110344827586207, + "grad_norm": 11.09087965893546, + "learning_rate": 5e-06, + "loss": 0.0629, + "num_input_tokens_seen": 119961804, + "step": 1329 + }, + { + "epoch": 6.110344827586207, + "loss": 0.06232057511806488, + "loss_ce": 3.6832166188105475e-06, + "loss_iou": 0.36328125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 119961804, + "step": 1329 + }, + { + "epoch": 6.114942528735632, + "grad_norm": 1.7273225707646134, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 120052288, + "step": 1330 + }, + { + "epoch": 6.114942528735632, + "loss": 0.05487591400742531, + "loss_ce": 5.3095800467417575e-06, + "loss_iou": 0.33984375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 120052288, + "step": 1330 + }, + { + "epoch": 6.119540229885057, + "grad_norm": 1.895873348755958, + "learning_rate": 5e-06, + "loss": 0.0637, + "num_input_tokens_seen": 120142748, + "step": 1331 + }, + { + "epoch": 6.119540229885057, + "loss": 0.08318766951560974, + "loss_ce": 1.2013872037641704e-05, + "loss_iou": 0.296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 120142748, + "step": 1331 + }, + { + "epoch": 6.124137931034483, + "grad_norm": 12.365279177414369, + "learning_rate": 5e-06, + "loss": 0.0879, + "num_input_tokens_seen": 120231640, + "step": 1332 + }, + { + "epoch": 6.124137931034483, + "loss": 0.11951163411140442, + "loss_ce": 2.006320391956251e-05, + "loss_iou": 0.3671875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 120231640, + "step": 1332 + }, + { + "epoch": 6.128735632183908, + "grad_norm": 8.100321593651932, + "learning_rate": 5e-06, + "loss": 0.0759, + "num_input_tokens_seen": 120322024, + "step": 1333 + }, + { + "epoch": 6.128735632183908, + "loss": 0.07466816902160645, + "loss_ce": 6.915668109286344e-06, + "loss_iou": 0.3125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 120322024, + "step": 1333 + }, + { + "epoch": 6.133333333333334, + "grad_norm": 10.20027536647147, + "learning_rate": 5e-06, + "loss": 0.0724, + "num_input_tokens_seen": 120412328, + "step": 1334 + }, + { + "epoch": 6.133333333333334, + "loss": 0.07674149423837662, + "loss_ce": 5.042035354563268e-06, + "loss_iou": 0.36328125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 120412328, + "step": 1334 + }, + { + "epoch": 6.137931034482759, + "grad_norm": 15.927575829987184, + "learning_rate": 5e-06, + "loss": 0.0715, + "num_input_tokens_seen": 120502728, + "step": 1335 + }, + { + "epoch": 6.137931034482759, + "loss": 0.07090835273265839, + "loss_ce": 1.6013153071980923e-05, + "loss_iou": 0.296875, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 120502728, + "step": 1335 + }, + { + "epoch": 6.142528735632184, + "grad_norm": 6.003633344367404, + "learning_rate": 5e-06, + "loss": 0.093, + "num_input_tokens_seen": 120592968, + "step": 1336 + }, + { + "epoch": 6.142528735632184, + "loss": 0.08331505209207535, + "loss_ce": 2.065743046841817e-06, + "loss_iou": 0.30859375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 120592968, + "step": 1336 + }, + { + "epoch": 6.147126436781609, + "grad_norm": 16.256143484292767, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 120683224, + "step": 1337 + }, + { + "epoch": 6.147126436781609, + "loss": 0.05023811012506485, + "loss_ce": 6.1769642343278974e-06, + "loss_iou": 0.314453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 120683224, + "step": 1337 + }, + { + "epoch": 6.151724137931034, + "grad_norm": 18.84221449609671, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 120773604, + "step": 1338 + }, + { + "epoch": 6.151724137931034, + "loss": 0.1148335188627243, + "loss_ce": 1.1128912774438504e-05, + "loss_iou": 0.2890625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 120773604, + "step": 1338 + }, + { + "epoch": 6.1563218390804595, + "grad_norm": 3.6340664937584357, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 120863972, + "step": 1339 + }, + { + "epoch": 6.1563218390804595, + "loss": 0.10637908428907394, + "loss_ce": 2.5327462935820222e-05, + "loss_iou": 0.376953125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 120863972, + "step": 1339 + }, + { + "epoch": 6.160919540229885, + "grad_norm": 12.35950915110001, + "learning_rate": 5e-06, + "loss": 0.0642, + "num_input_tokens_seen": 120954292, + "step": 1340 + }, + { + "epoch": 6.160919540229885, + "loss": 0.050802938640117645, + "loss_ce": 2.1687461412511766e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 120954292, + "step": 1340 + }, + { + "epoch": 6.165517241379311, + "grad_norm": 13.05285924563968, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 121044680, + "step": 1341 + }, + { + "epoch": 6.165517241379311, + "loss": 0.09323087334632874, + "loss_ce": 1.4931548321328592e-05, + "loss_iou": 0.26171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 121044680, + "step": 1341 + }, + { + "epoch": 6.170114942528736, + "grad_norm": 3.75282762212465, + "learning_rate": 5e-06, + "loss": 0.0687, + "num_input_tokens_seen": 121135088, + "step": 1342 + }, + { + "epoch": 6.170114942528736, + "loss": 0.06280642002820969, + "loss_ce": 3.1762905564391986e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 121135088, + "step": 1342 + }, + { + "epoch": 6.174712643678161, + "grad_norm": 9.367037578077188, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 121225428, + "step": 1343 + }, + { + "epoch": 6.174712643678161, + "loss": 0.1399770975112915, + "loss_ce": 8.234508641180582e-06, + "loss_iou": 0.296875, + "loss_num": 0.028076171875, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 121225428, + "step": 1343 + }, + { + "epoch": 6.179310344827586, + "grad_norm": 19.171060340321993, + "learning_rate": 5e-06, + "loss": 0.087, + "num_input_tokens_seen": 121315824, + "step": 1344 + }, + { + "epoch": 6.179310344827586, + "loss": 0.06587206572294235, + "loss_ce": 1.513374263595324e-05, + "loss_iou": 0.275390625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 121315824, + "step": 1344 + }, + { + "epoch": 6.183908045977011, + "grad_norm": 4.876284313063547, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 121406176, + "step": 1345 + }, + { + "epoch": 6.183908045977011, + "loss": 0.06390294432640076, + "loss_ce": 6.762678367522312e-06, + "loss_iou": 0.30078125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 121406176, + "step": 1345 + }, + { + "epoch": 6.188505747126436, + "grad_norm": 16.38884750049181, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 121496544, + "step": 1346 + }, + { + "epoch": 6.188505747126436, + "loss": 0.1316826045513153, + "loss_ce": 2.9771730623906478e-05, + "loss_iou": 0.29296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 121496544, + "step": 1346 + }, + { + "epoch": 6.1931034482758625, + "grad_norm": 11.544406525213137, + "learning_rate": 5e-06, + "loss": 0.0773, + "num_input_tokens_seen": 121587024, + "step": 1347 + }, + { + "epoch": 6.1931034482758625, + "loss": 0.08613596856594086, + "loss_ce": 1.535906267235987e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 121587024, + "step": 1347 + }, + { + "epoch": 6.197701149425288, + "grad_norm": 2.445337746033652, + "learning_rate": 5e-06, + "loss": 0.0589, + "num_input_tokens_seen": 121677320, + "step": 1348 + }, + { + "epoch": 6.197701149425288, + "loss": 0.04452810809016228, + "loss_ce": 2.960706069643493e-06, + "loss_iou": 0.267578125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 121677320, + "step": 1348 + }, + { + "epoch": 6.202298850574713, + "grad_norm": 6.030279032225266, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 121767660, + "step": 1349 + }, + { + "epoch": 6.202298850574713, + "loss": 0.10142619907855988, + "loss_ce": 1.0309655635865056e-06, + "loss_iou": 0.392578125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 121767660, + "step": 1349 + }, + { + "epoch": 6.206896551724138, + "grad_norm": 13.218267366174958, + "learning_rate": 5e-06, + "loss": 0.0749, + "num_input_tokens_seen": 121857952, + "step": 1350 + }, + { + "epoch": 6.206896551724138, + "loss": 0.08160283416509628, + "loss_ce": 1.4093273421167396e-05, + "loss_iou": 0.31640625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 121857952, + "step": 1350 + }, + { + "epoch": 6.211494252873563, + "grad_norm": 7.3910687621686115, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 121948348, + "step": 1351 + }, + { + "epoch": 6.211494252873563, + "loss": 0.1298474818468094, + "loss_ce": 4.096345946891233e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 121948348, + "step": 1351 + }, + { + "epoch": 6.216091954022988, + "grad_norm": 3.477775072702562, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 122038552, + "step": 1352 + }, + { + "epoch": 6.216091954022988, + "loss": 0.06851699203252792, + "loss_ce": 5.026361122872913e-06, + "loss_iou": 0.27734375, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 122038552, + "step": 1352 + }, + { + "epoch": 6.220689655172414, + "grad_norm": 4.659650631220517, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 122128880, + "step": 1353 + }, + { + "epoch": 6.220689655172414, + "loss": 0.09352925419807434, + "loss_ce": 2.3392831280943938e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 122128880, + "step": 1353 + }, + { + "epoch": 6.225287356321839, + "grad_norm": 7.501436100973512, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 122219280, + "step": 1354 + }, + { + "epoch": 6.225287356321839, + "loss": 0.11285711824893951, + "loss_ce": 1.0735830983321648e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 122219280, + "step": 1354 + }, + { + "epoch": 6.2298850574712645, + "grad_norm": 7.168147699957736, + "learning_rate": 5e-06, + "loss": 0.0745, + "num_input_tokens_seen": 122308872, + "step": 1355 + }, + { + "epoch": 6.2298850574712645, + "loss": 0.07650821655988693, + "loss_ce": 6.474406859524606e-07, + "loss_iou": 0.3515625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 122308872, + "step": 1355 + }, + { + "epoch": 6.23448275862069, + "grad_norm": 2.803107500761448, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 122399160, + "step": 1356 + }, + { + "epoch": 6.23448275862069, + "loss": 0.17111368477344513, + "loss_ce": 1.622533659428882e-06, + "loss_iou": 0.380859375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 122399160, + "step": 1356 + }, + { + "epoch": 6.239080459770115, + "grad_norm": 1.8395032972537242, + "learning_rate": 5e-06, + "loss": 0.0541, + "num_input_tokens_seen": 122489652, + "step": 1357 + }, + { + "epoch": 6.239080459770115, + "loss": 0.07030150294303894, + "loss_ce": 4.265766619937494e-06, + "loss_iou": 0.353515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 122489652, + "step": 1357 + }, + { + "epoch": 6.24367816091954, + "grad_norm": 1.8235275088698975, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 122580024, + "step": 1358 + }, + { + "epoch": 6.24367816091954, + "loss": 0.05960645154118538, + "loss_ce": 5.619423973257653e-06, + "loss_iou": 0.3046875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 122580024, + "step": 1358 + }, + { + "epoch": 6.248275862068965, + "grad_norm": 2.3308796814869392, + "learning_rate": 5e-06, + "loss": 0.0665, + "num_input_tokens_seen": 122670360, + "step": 1359 + }, + { + "epoch": 6.248275862068965, + "loss": 0.08280164748430252, + "loss_ce": 7.4597710408852436e-06, + "loss_iou": 0.298828125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 122670360, + "step": 1359 + }, + { + "epoch": 6.252873563218391, + "grad_norm": 9.546289417744838, + "learning_rate": 5e-06, + "loss": 0.0745, + "num_input_tokens_seen": 122760712, + "step": 1360 + }, + { + "epoch": 6.252873563218391, + "loss": 0.06621609628200531, + "loss_ce": 8.209115549107082e-06, + "loss_iou": 0.306640625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 122760712, + "step": 1360 + }, + { + "epoch": 6.257471264367816, + "grad_norm": 21.086995926043166, + "learning_rate": 5e-06, + "loss": 0.083, + "num_input_tokens_seen": 122850908, + "step": 1361 + }, + { + "epoch": 6.257471264367816, + "loss": 0.08541100472211838, + "loss_ce": 7.5566276791505516e-06, + "loss_iou": 0.404296875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 122850908, + "step": 1361 + }, + { + "epoch": 6.2620689655172415, + "grad_norm": 9.01280263169444, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 122941356, + "step": 1362 + }, + { + "epoch": 6.2620689655172415, + "loss": 0.1276589035987854, + "loss_ce": 3.869728971039876e-06, + "loss_iou": 0.31640625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 122941356, + "step": 1362 + }, + { + "epoch": 6.266666666666667, + "grad_norm": 20.135580773469314, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 123031748, + "step": 1363 + }, + { + "epoch": 6.266666666666667, + "loss": 0.10605251789093018, + "loss_ce": 3.935991117032245e-06, + "loss_iou": 0.431640625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 123031748, + "step": 1363 + }, + { + "epoch": 6.271264367816092, + "grad_norm": 13.434332845560562, + "learning_rate": 5e-06, + "loss": 0.0927, + "num_input_tokens_seen": 123121280, + "step": 1364 + }, + { + "epoch": 6.271264367816092, + "loss": 0.07541698217391968, + "loss_ce": 8.042437002586666e-06, + "loss_iou": 0.38671875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 123121280, + "step": 1364 + }, + { + "epoch": 6.275862068965517, + "grad_norm": 4.409333354948716, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 123211620, + "step": 1365 + }, + { + "epoch": 6.275862068965517, + "loss": 0.10695058107376099, + "loss_ce": 1.728949655444012e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 123211620, + "step": 1365 + }, + { + "epoch": 6.280459770114943, + "grad_norm": 3.154737625505015, + "learning_rate": 5e-06, + "loss": 0.075, + "num_input_tokens_seen": 123301896, + "step": 1366 + }, + { + "epoch": 6.280459770114943, + "loss": 0.07635073363780975, + "loss_ce": 1.1014501978934277e-05, + "loss_iou": 0.328125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 123301896, + "step": 1366 + }, + { + "epoch": 6.285057471264368, + "grad_norm": 2.211275058736632, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 123392472, + "step": 1367 + }, + { + "epoch": 6.285057471264368, + "loss": 0.07074186205863953, + "loss_ce": 1.7377531548845582e-05, + "loss_iou": 0.314453125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 123392472, + "step": 1367 + }, + { + "epoch": 6.289655172413793, + "grad_norm": 10.219890906499858, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 123482900, + "step": 1368 + }, + { + "epoch": 6.289655172413793, + "loss": 0.07744231820106506, + "loss_ce": 3.968204964621691e-06, + "loss_iou": 0.365234375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 123482900, + "step": 1368 + }, + { + "epoch": 6.294252873563218, + "grad_norm": 5.327344530549374, + "learning_rate": 5e-06, + "loss": 0.0759, + "num_input_tokens_seen": 123573300, + "step": 1369 + }, + { + "epoch": 6.294252873563218, + "loss": 0.09067036956548691, + "loss_ce": 9.419472189620137e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 123573300, + "step": 1369 + }, + { + "epoch": 6.2988505747126435, + "grad_norm": 9.772421156940164, + "learning_rate": 5e-06, + "loss": 0.0702, + "num_input_tokens_seen": 123663696, + "step": 1370 + }, + { + "epoch": 6.2988505747126435, + "loss": 0.0640704333782196, + "loss_ce": 2.9297052606125362e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 123663696, + "step": 1370 + }, + { + "epoch": 6.303448275862069, + "grad_norm": 18.114247114460188, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 123754076, + "step": 1371 + }, + { + "epoch": 6.303448275862069, + "loss": 0.07899777591228485, + "loss_ce": 3.0194664759619627e-06, + "loss_iou": 0.35546875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 123754076, + "step": 1371 + }, + { + "epoch": 6.308045977011494, + "grad_norm": 3.332469449720361, + "learning_rate": 5e-06, + "loss": 0.0676, + "num_input_tokens_seen": 123844508, + "step": 1372 + }, + { + "epoch": 6.308045977011494, + "loss": 0.06758559495210648, + "loss_ce": 4.417397576617077e-06, + "loss_iou": 0.275390625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 123844508, + "step": 1372 + }, + { + "epoch": 6.31264367816092, + "grad_norm": 6.634405744544336, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 123934812, + "step": 1373 + }, + { + "epoch": 6.31264367816092, + "loss": 0.08992957323789597, + "loss_ce": 9.527670954412315e-06, + "loss_iou": 0.33984375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 123934812, + "step": 1373 + }, + { + "epoch": 6.317241379310345, + "grad_norm": 10.333106801799953, + "learning_rate": 5e-06, + "loss": 0.0718, + "num_input_tokens_seen": 124025200, + "step": 1374 + }, + { + "epoch": 6.317241379310345, + "loss": 0.04593438282608986, + "loss_ce": 2.068851244985126e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 124025200, + "step": 1374 + }, + { + "epoch": 6.32183908045977, + "grad_norm": 6.513003814868987, + "learning_rate": 5e-06, + "loss": 0.0896, + "num_input_tokens_seen": 124115436, + "step": 1375 + }, + { + "epoch": 6.32183908045977, + "loss": 0.07307623326778412, + "loss_ce": 1.8943998156828457e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 124115436, + "step": 1375 + }, + { + "epoch": 6.326436781609195, + "grad_norm": 7.020049530325385, + "learning_rate": 5e-06, + "loss": 0.0601, + "num_input_tokens_seen": 124205816, + "step": 1376 + }, + { + "epoch": 6.326436781609195, + "loss": 0.062137503176927567, + "loss_ce": 3.71252508557518e-06, + "loss_iou": 0.314453125, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 124205816, + "step": 1376 + }, + { + "epoch": 6.3310344827586205, + "grad_norm": 6.7984756030733005, + "learning_rate": 5e-06, + "loss": 0.0769, + "num_input_tokens_seen": 124296160, + "step": 1377 + }, + { + "epoch": 6.3310344827586205, + "loss": 0.09830499440431595, + "loss_ce": 6.89133012201637e-05, + "loss_iou": 0.375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 124296160, + "step": 1377 + }, + { + "epoch": 6.335632183908046, + "grad_norm": 9.550154500204966, + "learning_rate": 5e-06, + "loss": 0.0343, + "num_input_tokens_seen": 124386652, + "step": 1378 + }, + { + "epoch": 6.335632183908046, + "loss": 0.032549649477005005, + "loss_ce": 2.6519669518165756e-06, + "loss_iou": 0.29296875, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 124386652, + "step": 1378 + }, + { + "epoch": 6.340229885057472, + "grad_norm": 15.868279771186451, + "learning_rate": 5e-06, + "loss": 0.1227, + "num_input_tokens_seen": 124477136, + "step": 1379 + }, + { + "epoch": 6.340229885057472, + "loss": 0.10919815301895142, + "loss_ce": 6.263924660743214e-06, + "loss_iou": 0.244140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 124477136, + "step": 1379 + }, + { + "epoch": 6.344827586206897, + "grad_norm": 9.149745216189178, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 124567584, + "step": 1380 + }, + { + "epoch": 6.344827586206897, + "loss": 0.043360911309719086, + "loss_ce": 1.0688753718568478e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 124567584, + "step": 1380 + }, + { + "epoch": 6.349425287356322, + "grad_norm": 6.148379301187082, + "learning_rate": 5e-06, + "loss": 0.0698, + "num_input_tokens_seen": 124657880, + "step": 1381 + }, + { + "epoch": 6.349425287356322, + "loss": 0.07773515582084656, + "loss_ce": 6.885257789690513e-06, + "loss_iou": 0.3671875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 124657880, + "step": 1381 + }, + { + "epoch": 6.354022988505747, + "grad_norm": 6.06320096249193, + "learning_rate": 5e-06, + "loss": 0.0709, + "num_input_tokens_seen": 124748200, + "step": 1382 + }, + { + "epoch": 6.354022988505747, + "loss": 0.08277206867933273, + "loss_ce": 8.399106263823342e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 124748200, + "step": 1382 + }, + { + "epoch": 6.358620689655172, + "grad_norm": 6.051287795431972, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 124838752, + "step": 1383 + }, + { + "epoch": 6.358620689655172, + "loss": 0.056419774889945984, + "loss_ce": 8.03236889623804e-06, + "loss_iou": 0.35546875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 124838752, + "step": 1383 + }, + { + "epoch": 6.363218390804597, + "grad_norm": 13.7747801267887, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 124929112, + "step": 1384 + }, + { + "epoch": 6.363218390804597, + "loss": 0.07443118095397949, + "loss_ce": 7.509520219173282e-05, + "loss_iou": 0.302734375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 124929112, + "step": 1384 + }, + { + "epoch": 6.3678160919540225, + "grad_norm": 2.09029899827099, + "learning_rate": 5e-06, + "loss": 0.0845, + "num_input_tokens_seen": 125019604, + "step": 1385 + }, + { + "epoch": 6.3678160919540225, + "loss": 0.08999022096395493, + "loss_ce": 9.14329484658083e-06, + "loss_iou": 0.365234375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 125019604, + "step": 1385 + }, + { + "epoch": 6.372413793103449, + "grad_norm": 4.966748140294823, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 125109988, + "step": 1386 + }, + { + "epoch": 6.372413793103449, + "loss": 0.08931561559438705, + "loss_ce": 5.922461241425481e-06, + "loss_iou": 0.298828125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 125109988, + "step": 1386 + }, + { + "epoch": 6.377011494252874, + "grad_norm": 1.8678820550766893, + "learning_rate": 5e-06, + "loss": 0.0808, + "num_input_tokens_seen": 125200440, + "step": 1387 + }, + { + "epoch": 6.377011494252874, + "loss": 0.0770668312907219, + "loss_ce": 5.572132067754865e-05, + "loss_iou": 0.283203125, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 125200440, + "step": 1387 + }, + { + "epoch": 6.381609195402299, + "grad_norm": 16.6653828543424, + "learning_rate": 5e-06, + "loss": 0.0891, + "num_input_tokens_seen": 125290784, + "step": 1388 + }, + { + "epoch": 6.381609195402299, + "loss": 0.09468194842338562, + "loss_ce": 1.167914092548017e-06, + "loss_iou": 0.25, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 125290784, + "step": 1388 + }, + { + "epoch": 6.386206896551724, + "grad_norm": 9.34884327902768, + "learning_rate": 5e-06, + "loss": 0.0721, + "num_input_tokens_seen": 125381168, + "step": 1389 + }, + { + "epoch": 6.386206896551724, + "loss": 0.060101285576820374, + "loss_ce": 2.7430016416474245e-05, + "loss_iou": 0.30078125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 125381168, + "step": 1389 + }, + { + "epoch": 6.390804597701149, + "grad_norm": 4.121494614520851, + "learning_rate": 5e-06, + "loss": 0.0703, + "num_input_tokens_seen": 125471516, + "step": 1390 + }, + { + "epoch": 6.390804597701149, + "loss": 0.09581901133060455, + "loss_ce": 2.433588269923348e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 125471516, + "step": 1390 + }, + { + "epoch": 6.395402298850574, + "grad_norm": 14.675052033868381, + "learning_rate": 5e-06, + "loss": 0.1022, + "num_input_tokens_seen": 125561880, + "step": 1391 + }, + { + "epoch": 6.395402298850574, + "loss": 0.1025380939245224, + "loss_ce": 4.480792267713696e-05, + "loss_iou": 0.251953125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 125561880, + "step": 1391 + }, + { + "epoch": 6.4, + "grad_norm": 7.391101398441527, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 125651460, + "step": 1392 + }, + { + "epoch": 6.4, + "loss": 0.1120477020740509, + "loss_ce": 2.4182809283956885e-06, + "loss_iou": 0.345703125, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 125651460, + "step": 1392 + }, + { + "epoch": 6.4045977011494255, + "grad_norm": 4.598424061793132, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 125741836, + "step": 1393 + }, + { + "epoch": 6.4045977011494255, + "loss": 0.08984687924385071, + "loss_ce": 3.1254223813448334e-06, + "loss_iou": 0.296875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 125741836, + "step": 1393 + }, + { + "epoch": 6.409195402298851, + "grad_norm": 10.543183735469755, + "learning_rate": 5e-06, + "loss": 0.0705, + "num_input_tokens_seen": 125832188, + "step": 1394 + }, + { + "epoch": 6.409195402298851, + "loss": 0.06207028031349182, + "loss_ce": 1.2784106729668565e-05, + "loss_iou": 0.306640625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 125832188, + "step": 1394 + }, + { + "epoch": 6.413793103448276, + "grad_norm": 11.282419963710336, + "learning_rate": 5e-06, + "loss": 0.0785, + "num_input_tokens_seen": 125922616, + "step": 1395 + }, + { + "epoch": 6.413793103448276, + "loss": 0.09065688401460648, + "loss_ce": 4.416605861479184e-06, + "loss_iou": 0.359375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 125922616, + "step": 1395 + }, + { + "epoch": 6.418390804597701, + "grad_norm": 6.53067476157625, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 126012968, + "step": 1396 + }, + { + "epoch": 6.418390804597701, + "loss": 0.11000474542379379, + "loss_ce": 4.139963493798859e-06, + "loss_iou": 0.408203125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 126012968, + "step": 1396 + }, + { + "epoch": 6.422988505747126, + "grad_norm": 5.213448872545618, + "learning_rate": 5e-06, + "loss": 0.0968, + "num_input_tokens_seen": 126102532, + "step": 1397 + }, + { + "epoch": 6.422988505747126, + "loss": 0.10766983777284622, + "loss_ce": 3.8197222238522954e-06, + "loss_iou": 0.3515625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 126102532, + "step": 1397 + }, + { + "epoch": 6.427586206896552, + "grad_norm": 19.99194183396297, + "learning_rate": 5e-06, + "loss": 0.069, + "num_input_tokens_seen": 126192844, + "step": 1398 + }, + { + "epoch": 6.427586206896552, + "loss": 0.07072758674621582, + "loss_ce": 3.0986652745923493e-06, + "loss_iou": 0.271484375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 126192844, + "step": 1398 + }, + { + "epoch": 6.432183908045977, + "grad_norm": 2.972490756323674, + "learning_rate": 5e-06, + "loss": 0.0783, + "num_input_tokens_seen": 126282472, + "step": 1399 + }, + { + "epoch": 6.432183908045977, + "loss": 0.0742267370223999, + "loss_ce": 5.376310582505539e-05, + "loss_iou": 0.279296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 126282472, + "step": 1399 + }, + { + "epoch": 6.436781609195402, + "grad_norm": 3.093510331762982, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 126372972, + "step": 1400 + }, + { + "epoch": 6.436781609195402, + "loss": 0.06427352130413055, + "loss_ce": 1.876192618510686e-05, + "loss_iou": 0.322265625, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 126372972, + "step": 1400 + }, + { + "epoch": 6.441379310344828, + "grad_norm": 8.92440544924096, + "learning_rate": 5e-06, + "loss": 0.092, + "num_input_tokens_seen": 126463292, + "step": 1401 + }, + { + "epoch": 6.441379310344828, + "loss": 0.07695218920707703, + "loss_ce": 2.11300380215107e-06, + "loss_iou": 0.314453125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 126463292, + "step": 1401 + }, + { + "epoch": 6.445977011494253, + "grad_norm": 5.049167412556098, + "learning_rate": 5e-06, + "loss": 0.096, + "num_input_tokens_seen": 126553568, + "step": 1402 + }, + { + "epoch": 6.445977011494253, + "loss": 0.12945988774299622, + "loss_ce": 4.316266313253436e-06, + "loss_iou": 0.26953125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 126553568, + "step": 1402 + }, + { + "epoch": 6.450574712643678, + "grad_norm": 7.016975178744314, + "learning_rate": 5e-06, + "loss": 0.0665, + "num_input_tokens_seen": 126643956, + "step": 1403 + }, + { + "epoch": 6.450574712643678, + "loss": 0.0685601681470871, + "loss_ce": 2.4308599222422345e-06, + "loss_iou": 0.328125, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 126643956, + "step": 1403 + }, + { + "epoch": 6.455172413793104, + "grad_norm": 4.991828966505337, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 126734232, + "step": 1404 + }, + { + "epoch": 6.455172413793104, + "loss": 0.13235914707183838, + "loss_ce": 4.41625525127165e-06, + "loss_iou": 0.28515625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 126734232, + "step": 1404 + }, + { + "epoch": 6.459770114942529, + "grad_norm": 4.41915443276764, + "learning_rate": 5e-06, + "loss": 0.0643, + "num_input_tokens_seen": 126824688, + "step": 1405 + }, + { + "epoch": 6.459770114942529, + "loss": 0.05354461073875427, + "loss_ce": 1.52462337155157e-06, + "loss_iou": 0.298828125, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 126824688, + "step": 1405 + }, + { + "epoch": 6.464367816091954, + "grad_norm": 27.333175354486055, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 126914992, + "step": 1406 + }, + { + "epoch": 6.464367816091954, + "loss": 0.08278225362300873, + "loss_ce": 3.324659246572992e-06, + "loss_iou": 0.25390625, + "loss_num": 0.0164794921875, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 126914992, + "step": 1406 + }, + { + "epoch": 6.468965517241379, + "grad_norm": 15.2120482775101, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 127005376, + "step": 1407 + }, + { + "epoch": 6.468965517241379, + "loss": 0.07776758074760437, + "loss_ce": 8.793870620138478e-06, + "loss_iou": 0.306640625, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 127005376, + "step": 1407 + }, + { + "epoch": 6.4735632183908045, + "grad_norm": 5.4209480783540585, + "learning_rate": 5e-06, + "loss": 0.0926, + "num_input_tokens_seen": 127094892, + "step": 1408 + }, + { + "epoch": 6.4735632183908045, + "loss": 0.11467273533344269, + "loss_ce": 2.9248494683997706e-06, + "loss_iou": 0.333984375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 127094892, + "step": 1408 + }, + { + "epoch": 6.47816091954023, + "grad_norm": 6.961408738662496, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 127184420, + "step": 1409 + }, + { + "epoch": 6.47816091954023, + "loss": 0.08804384618997574, + "loss_ce": 6.312624805104861e-07, + "loss_iou": 0.294921875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 127184420, + "step": 1409 + }, + { + "epoch": 6.482758620689655, + "grad_norm": 5.090185336665533, + "learning_rate": 5e-06, + "loss": 0.0674, + "num_input_tokens_seen": 127274716, + "step": 1410 + }, + { + "epoch": 6.482758620689655, + "loss": 0.09357266128063202, + "loss_ce": 2.103199039993342e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 127274716, + "step": 1410 + }, + { + "epoch": 6.487356321839081, + "grad_norm": 3.1966959428594626, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 127365044, + "step": 1411 + }, + { + "epoch": 6.487356321839081, + "loss": 0.04377942159771919, + "loss_ce": 1.9543890630302485e-06, + "loss_iou": 0.26953125, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 127365044, + "step": 1411 + }, + { + "epoch": 6.491954022988506, + "grad_norm": 8.572838699176348, + "learning_rate": 5e-06, + "loss": 0.0988, + "num_input_tokens_seen": 127455440, + "step": 1412 + }, + { + "epoch": 6.491954022988506, + "loss": 0.09225938469171524, + "loss_ce": 4.743013505503768e-06, + "loss_iou": 0.29296875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 127455440, + "step": 1412 + }, + { + "epoch": 6.496551724137931, + "grad_norm": 13.582941616327785, + "learning_rate": 5e-06, + "loss": 0.0462, + "num_input_tokens_seen": 127545812, + "step": 1413 + }, + { + "epoch": 6.496551724137931, + "loss": 0.044346556067466736, + "loss_ce": 4.514195552474121e-06, + "loss_iou": 0.2734375, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 127545812, + "step": 1413 + }, + { + "epoch": 6.501149425287356, + "grad_norm": 8.548817159191486, + "learning_rate": 5e-06, + "loss": 0.0947, + "num_input_tokens_seen": 127636132, + "step": 1414 + }, + { + "epoch": 6.501149425287356, + "loss": 0.10972800850868225, + "loss_ce": 2.04993671104603e-06, + "loss_iou": 0.3125, + "loss_num": 0.0218505859375, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 127636132, + "step": 1414 + }, + { + "epoch": 6.505747126436781, + "grad_norm": 5.849875972289943, + "learning_rate": 5e-06, + "loss": 0.0632, + "num_input_tokens_seen": 127724944, + "step": 1415 + }, + { + "epoch": 6.505747126436781, + "loss": 0.04938147962093353, + "loss_ce": 4.03994590669754e-06, + "loss_iou": 0.3515625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 127724944, + "step": 1415 + }, + { + "epoch": 6.510344827586207, + "grad_norm": 2.6655195062146237, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 127815308, + "step": 1416 + }, + { + "epoch": 6.510344827586207, + "loss": 0.17061986029148102, + "loss_ce": 5.711633275495842e-05, + "loss_iou": 0.328125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 127815308, + "step": 1416 + }, + { + "epoch": 6.514942528735633, + "grad_norm": 11.77741222539578, + "learning_rate": 5e-06, + "loss": 0.0706, + "num_input_tokens_seen": 127905652, + "step": 1417 + }, + { + "epoch": 6.514942528735633, + "loss": 0.06571365892887115, + "loss_ce": 9.316358045907691e-06, + "loss_iou": 0.2578125, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 127905652, + "step": 1417 + }, + { + "epoch": 6.519540229885058, + "grad_norm": 7.004727703721243, + "learning_rate": 5e-06, + "loss": 0.0649, + "num_input_tokens_seen": 127996084, + "step": 1418 + }, + { + "epoch": 6.519540229885058, + "loss": 0.0499795600771904, + "loss_ce": 7.023472790024243e-06, + "loss_iou": 0.28515625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 127996084, + "step": 1418 + }, + { + "epoch": 6.524137931034483, + "grad_norm": 11.550186444604932, + "learning_rate": 5e-06, + "loss": 0.0998, + "num_input_tokens_seen": 128086452, + "step": 1419 + }, + { + "epoch": 6.524137931034483, + "loss": 0.1050664559006691, + "loss_ce": 2.4955639673862606e-05, + "loss_iou": 0.29296875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 128086452, + "step": 1419 + }, + { + "epoch": 6.528735632183908, + "grad_norm": 8.09948814876527, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 128176808, + "step": 1420 + }, + { + "epoch": 6.528735632183908, + "loss": 0.09395314753055573, + "loss_ce": 4.7787834773771465e-06, + "loss_iou": 0.314453125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 128176808, + "step": 1420 + }, + { + "epoch": 6.533333333333333, + "grad_norm": 23.81012046657335, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 128267156, + "step": 1421 + }, + { + "epoch": 6.533333333333333, + "loss": 0.1023278459906578, + "loss_ce": 2.4068172024271917e-06, + "loss_iou": 0.322265625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 128267156, + "step": 1421 + }, + { + "epoch": 6.537931034482758, + "grad_norm": 4.4144379527625315, + "learning_rate": 5e-06, + "loss": 0.0728, + "num_input_tokens_seen": 128357584, + "step": 1422 + }, + { + "epoch": 6.537931034482758, + "loss": 0.0876457542181015, + "loss_ce": 2.978431984956842e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 128357584, + "step": 1422 + }, + { + "epoch": 6.5425287356321835, + "grad_norm": 3.0109543272903045, + "learning_rate": 5e-06, + "loss": 0.0654, + "num_input_tokens_seen": 128447236, + "step": 1423 + }, + { + "epoch": 6.5425287356321835, + "loss": 0.06799499690532684, + "loss_ce": 6.287074938882142e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 128447236, + "step": 1423 + }, + { + "epoch": 6.5471264367816095, + "grad_norm": 3.6963053002280675, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 128537560, + "step": 1424 + }, + { + "epoch": 6.5471264367816095, + "loss": 0.06803764402866364, + "loss_ce": 1.3963182937004603e-05, + "loss_iou": 0.333984375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 128537560, + "step": 1424 + }, + { + "epoch": 6.551724137931035, + "grad_norm": 3.0893766848228514, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 128627956, + "step": 1425 + }, + { + "epoch": 6.551724137931035, + "loss": 0.0690060555934906, + "loss_ce": 2.1069548893137835e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 128627956, + "step": 1425 + }, + { + "epoch": 6.55632183908046, + "grad_norm": 2.559876796681711, + "learning_rate": 5e-06, + "loss": 0.0667, + "num_input_tokens_seen": 128718388, + "step": 1426 + }, + { + "epoch": 6.55632183908046, + "loss": 0.08054003119468689, + "loss_ce": 4.145399088884005e-06, + "loss_iou": 0.333984375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 128718388, + "step": 1426 + }, + { + "epoch": 6.560919540229885, + "grad_norm": 13.711469163719162, + "learning_rate": 5e-06, + "loss": 0.1533, + "num_input_tokens_seen": 128808732, + "step": 1427 + }, + { + "epoch": 6.560919540229885, + "loss": 0.1875627636909485, + "loss_ce": 3.2246465707430616e-05, + "loss_iou": 0.30078125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 128808732, + "step": 1427 + }, + { + "epoch": 6.56551724137931, + "grad_norm": 8.158037791630148, + "learning_rate": 5e-06, + "loss": 0.0782, + "num_input_tokens_seen": 128898284, + "step": 1428 + }, + { + "epoch": 6.56551724137931, + "loss": 0.08450591564178467, + "loss_ce": 2.7392322863306617e-06, + "loss_iou": 0.2578125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 128898284, + "step": 1428 + }, + { + "epoch": 6.570114942528735, + "grad_norm": 16.342416907286246, + "learning_rate": 5e-06, + "loss": 0.0653, + "num_input_tokens_seen": 128988672, + "step": 1429 + }, + { + "epoch": 6.570114942528735, + "loss": 0.06996608525514603, + "loss_ce": 4.540259851637529e-06, + "loss_iou": 0.328125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 128988672, + "step": 1429 + }, + { + "epoch": 6.574712643678161, + "grad_norm": 2.9446395510222763, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 129079076, + "step": 1430 + }, + { + "epoch": 6.574712643678161, + "loss": 0.07431189715862274, + "loss_ce": 6.262610259000212e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 129079076, + "step": 1430 + }, + { + "epoch": 6.5793103448275865, + "grad_norm": 5.022862929220284, + "learning_rate": 5e-06, + "loss": 0.0586, + "num_input_tokens_seen": 129169404, + "step": 1431 + }, + { + "epoch": 6.5793103448275865, + "loss": 0.042931437492370605, + "loss_ce": 8.465913197142072e-06, + "loss_iou": 0.322265625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 129169404, + "step": 1431 + }, + { + "epoch": 6.583908045977012, + "grad_norm": 5.866439214352878, + "learning_rate": 5e-06, + "loss": 0.0915, + "num_input_tokens_seen": 129258248, + "step": 1432 + }, + { + "epoch": 6.583908045977012, + "loss": 0.11402902007102966, + "loss_ce": 1.534522198198829e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 129258248, + "step": 1432 + }, + { + "epoch": 6.588505747126437, + "grad_norm": 2.4783513803938586, + "learning_rate": 5e-06, + "loss": 0.0603, + "num_input_tokens_seen": 129348620, + "step": 1433 + }, + { + "epoch": 6.588505747126437, + "loss": 0.0649535059928894, + "loss_ce": 4.467387498152675e-06, + "loss_iou": 0.322265625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 129348620, + "step": 1433 + }, + { + "epoch": 6.593103448275862, + "grad_norm": 44.36894368647196, + "learning_rate": 5e-06, + "loss": 0.0781, + "num_input_tokens_seen": 129439104, + "step": 1434 + }, + { + "epoch": 6.593103448275862, + "loss": 0.06779413670301437, + "loss_ce": 6.966247383388691e-06, + "loss_iou": 0.25, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 129439104, + "step": 1434 + }, + { + "epoch": 6.597701149425287, + "grad_norm": 6.9878597014692225, + "learning_rate": 5e-06, + "loss": 0.0779, + "num_input_tokens_seen": 129529500, + "step": 1435 + }, + { + "epoch": 6.597701149425287, + "loss": 0.06153077632188797, + "loss_ce": 7.337920578720514e-06, + "loss_iou": 0.318359375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 129529500, + "step": 1435 + }, + { + "epoch": 6.602298850574712, + "grad_norm": 14.338290986997505, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 129619932, + "step": 1436 + }, + { + "epoch": 6.602298850574712, + "loss": 0.09894341975450516, + "loss_ce": 3.59489640686661e-05, + "loss_iou": 0.322265625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 129619932, + "step": 1436 + }, + { + "epoch": 6.606896551724138, + "grad_norm": 8.118572650309352, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 129710368, + "step": 1437 + }, + { + "epoch": 6.606896551724138, + "loss": 0.05395686253905296, + "loss_ce": 1.782671802175173e-06, + "loss_iou": 0.357421875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 129710368, + "step": 1437 + }, + { + "epoch": 6.611494252873563, + "grad_norm": 13.176137538777503, + "learning_rate": 5e-06, + "loss": 0.0747, + "num_input_tokens_seen": 129800728, + "step": 1438 + }, + { + "epoch": 6.611494252873563, + "loss": 0.10741189867258072, + "loss_ce": 2.0546713130897842e-05, + "loss_iou": 0.2734375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 129800728, + "step": 1438 + }, + { + "epoch": 6.6160919540229886, + "grad_norm": 2.9757478665850132, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 129891160, + "step": 1439 + }, + { + "epoch": 6.6160919540229886, + "loss": 0.11056976020336151, + "loss_ce": 4.582754172588466e-06, + "loss_iou": 0.35546875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 129891160, + "step": 1439 + }, + { + "epoch": 6.620689655172414, + "grad_norm": 2.0836625866660023, + "learning_rate": 5e-06, + "loss": 0.093, + "num_input_tokens_seen": 129981596, + "step": 1440 + }, + { + "epoch": 6.620689655172414, + "loss": 0.07989851385354996, + "loss_ce": 3.400920468266122e-05, + "loss_iou": 0.328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 129981596, + "step": 1440 + }, + { + "epoch": 6.625287356321839, + "grad_norm": 11.346287982435365, + "learning_rate": 5e-06, + "loss": 0.0822, + "num_input_tokens_seen": 130070368, + "step": 1441 + }, + { + "epoch": 6.625287356321839, + "loss": 0.1127094179391861, + "loss_ce": 8.00615543994354e-06, + "loss_iou": 0.359375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 130070368, + "step": 1441 + }, + { + "epoch": 6.629885057471264, + "grad_norm": 11.945688660639014, + "learning_rate": 5e-06, + "loss": 0.0604, + "num_input_tokens_seen": 130160760, + "step": 1442 + }, + { + "epoch": 6.629885057471264, + "loss": 0.06225637346506119, + "loss_ce": 1.5771218386362307e-05, + "loss_iou": 0.314453125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 130160760, + "step": 1442 + }, + { + "epoch": 6.63448275862069, + "grad_norm": 5.975406754271047, + "learning_rate": 5e-06, + "loss": 0.0666, + "num_input_tokens_seen": 130251268, + "step": 1443 + }, + { + "epoch": 6.63448275862069, + "loss": 0.0756722092628479, + "loss_ce": 1.9129147403873503e-05, + "loss_iou": 0.30078125, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 130251268, + "step": 1443 + }, + { + "epoch": 6.639080459770115, + "grad_norm": 4.4111769643184005, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 130341548, + "step": 1444 + }, + { + "epoch": 6.639080459770115, + "loss": 0.09213773906230927, + "loss_ce": 5.1713404900510795e-06, + "loss_iou": 0.275390625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 130341548, + "step": 1444 + }, + { + "epoch": 6.64367816091954, + "grad_norm": 1.9533172759588555, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 130431868, + "step": 1445 + }, + { + "epoch": 6.64367816091954, + "loss": 0.03545193374156952, + "loss_ce": 5.765972673543729e-06, + "loss_iou": 0.2470703125, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 130431868, + "step": 1445 + }, + { + "epoch": 6.6482758620689655, + "grad_norm": 1.6643815600000895, + "learning_rate": 5e-06, + "loss": 0.0661, + "num_input_tokens_seen": 130522236, + "step": 1446 + }, + { + "epoch": 6.6482758620689655, + "loss": 0.06866565346717834, + "loss_ce": 1.09890947896929e-06, + "loss_iou": 0.287109375, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 130522236, + "step": 1446 + }, + { + "epoch": 6.652873563218391, + "grad_norm": 8.868765611159223, + "learning_rate": 5e-06, + "loss": 0.0745, + "num_input_tokens_seen": 130612552, + "step": 1447 + }, + { + "epoch": 6.652873563218391, + "loss": 0.07027009129524231, + "loss_ce": 3.366212013133918e-06, + "loss_iou": 0.3125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 130612552, + "step": 1447 + }, + { + "epoch": 6.657471264367816, + "grad_norm": 8.270525030687544, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 130702956, + "step": 1448 + }, + { + "epoch": 6.657471264367816, + "loss": 0.09026643633842468, + "loss_ce": 1.069144764187513e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 130702956, + "step": 1448 + }, + { + "epoch": 6.662068965517241, + "grad_norm": 3.3647597113536123, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 130793328, + "step": 1449 + }, + { + "epoch": 6.662068965517241, + "loss": 0.09332942962646484, + "loss_ce": 6.675434633507393e-06, + "loss_iou": 0.38671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 130793328, + "step": 1449 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 131.97107940993814, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 130883796, + "step": 1450 + }, + { + "epoch": 6.666666666666667, + "loss": 0.057534217834472656, + "loss_ce": 2.383916944381781e-05, + "loss_iou": 0.30078125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 130883796, + "step": 1450 + }, + { + "epoch": 6.671264367816092, + "grad_norm": 23.1470764725479, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 130974220, + "step": 1451 + }, + { + "epoch": 6.671264367816092, + "loss": 0.09790711104869843, + "loss_ce": 6.722117177559994e-06, + "loss_iou": 0.326171875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 130974220, + "step": 1451 + }, + { + "epoch": 6.675862068965517, + "grad_norm": 10.16468406745285, + "learning_rate": 5e-06, + "loss": 0.0803, + "num_input_tokens_seen": 131064624, + "step": 1452 + }, + { + "epoch": 6.675862068965517, + "loss": 0.061509158462285995, + "loss_ce": 3.912944157491438e-05, + "loss_iou": 0.30859375, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 131064624, + "step": 1452 + }, + { + "epoch": 6.680459770114942, + "grad_norm": 4.911042024533096, + "learning_rate": 5e-06, + "loss": 0.0926, + "num_input_tokens_seen": 131154952, + "step": 1453 + }, + { + "epoch": 6.680459770114942, + "loss": 0.09288729727268219, + "loss_ce": 2.2301146600511856e-05, + "loss_iou": 0.375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 131154952, + "step": 1453 + }, + { + "epoch": 6.685057471264368, + "grad_norm": 2.22095720475405, + "learning_rate": 5e-06, + "loss": 0.0656, + "num_input_tokens_seen": 131245408, + "step": 1454 + }, + { + "epoch": 6.685057471264368, + "loss": 0.07592545449733734, + "loss_ce": 5.346221314539434e-06, + "loss_iou": 0.361328125, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 131245408, + "step": 1454 + }, + { + "epoch": 6.689655172413794, + "grad_norm": 11.830161052137443, + "learning_rate": 5e-06, + "loss": 0.0673, + "num_input_tokens_seen": 131335812, + "step": 1455 + }, + { + "epoch": 6.689655172413794, + "loss": 0.06331153213977814, + "loss_ce": 3.3334035833831877e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 131335812, + "step": 1455 + }, + { + "epoch": 6.694252873563219, + "grad_norm": 3.0578099363890328, + "learning_rate": 5e-06, + "loss": 0.0779, + "num_input_tokens_seen": 131426204, + "step": 1456 + }, + { + "epoch": 6.694252873563219, + "loss": 0.07001975178718567, + "loss_ce": 1.2430735296220519e-05, + "loss_iou": 0.267578125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 131426204, + "step": 1456 + }, + { + "epoch": 6.698850574712644, + "grad_norm": 5.750992935778732, + "learning_rate": 5e-06, + "loss": 0.0576, + "num_input_tokens_seen": 131516528, + "step": 1457 + }, + { + "epoch": 6.698850574712644, + "loss": 0.05864344537258148, + "loss_ce": 3.921055849787081e-06, + "loss_iou": 0.34375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 131516528, + "step": 1457 + }, + { + "epoch": 6.703448275862069, + "grad_norm": 10.893446380591408, + "learning_rate": 5e-06, + "loss": 0.1175, + "num_input_tokens_seen": 131606848, + "step": 1458 + }, + { + "epoch": 6.703448275862069, + "loss": 0.11121590435504913, + "loss_ce": 9.852826224232558e-06, + "loss_iou": 0.322265625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 131606848, + "step": 1458 + }, + { + "epoch": 6.708045977011494, + "grad_norm": 27.414007106230482, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 131697268, + "step": 1459 + }, + { + "epoch": 6.708045977011494, + "loss": 0.11673033237457275, + "loss_ce": 1.5854706362006254e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 131697268, + "step": 1459 + }, + { + "epoch": 6.712643678160919, + "grad_norm": 5.804991202555291, + "learning_rate": 5e-06, + "loss": 0.0791, + "num_input_tokens_seen": 131787672, + "step": 1460 + }, + { + "epoch": 6.712643678160919, + "loss": 0.09766550362110138, + "loss_ce": 9.254023098037578e-06, + "loss_iou": 0.275390625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 131787672, + "step": 1460 + }, + { + "epoch": 6.7172413793103445, + "grad_norm": 3.1386099033424135, + "learning_rate": 5e-06, + "loss": 0.0823, + "num_input_tokens_seen": 131878152, + "step": 1461 + }, + { + "epoch": 6.7172413793103445, + "loss": 0.13510626554489136, + "loss_ce": 4.960803835274419e-06, + "loss_iou": 0.28515625, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 131878152, + "step": 1461 + }, + { + "epoch": 6.72183908045977, + "grad_norm": 9.852881694264854, + "learning_rate": 5e-06, + "loss": 0.0609, + "num_input_tokens_seen": 131968480, + "step": 1462 + }, + { + "epoch": 6.72183908045977, + "loss": 0.06898908317089081, + "loss_ce": 4.097350029041991e-06, + "loss_iou": 0.328125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 131968480, + "step": 1462 + }, + { + "epoch": 6.726436781609196, + "grad_norm": 2.7071823032354185, + "learning_rate": 5e-06, + "loss": 0.0568, + "num_input_tokens_seen": 132059024, + "step": 1463 + }, + { + "epoch": 6.726436781609196, + "loss": 0.059280022978782654, + "loss_ce": 1.489080568717327e-05, + "loss_iou": 0.3125, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 132059024, + "step": 1463 + }, + { + "epoch": 6.731034482758621, + "grad_norm": 7.777141629876652, + "learning_rate": 5e-06, + "loss": 0.0897, + "num_input_tokens_seen": 132149448, + "step": 1464 + }, + { + "epoch": 6.731034482758621, + "loss": 0.07515916228294373, + "loss_ce": 2.0007742023153696e-06, + "loss_iou": 0.337890625, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 132149448, + "step": 1464 + }, + { + "epoch": 6.735632183908046, + "grad_norm": 10.178285238489115, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 132239800, + "step": 1465 + }, + { + "epoch": 6.735632183908046, + "loss": 0.08457478135824203, + "loss_ce": 7.161090616136789e-05, + "loss_iou": 0.328125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 132239800, + "step": 1465 + }, + { + "epoch": 6.740229885057471, + "grad_norm": 6.325965805360473, + "learning_rate": 5e-06, + "loss": 0.0763, + "num_input_tokens_seen": 132330264, + "step": 1466 + }, + { + "epoch": 6.740229885057471, + "loss": 0.0843062549829483, + "loss_ce": 1.4451411516347434e-06, + "loss_iou": 0.275390625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 132330264, + "step": 1466 + }, + { + "epoch": 6.744827586206896, + "grad_norm": 6.120131274499989, + "learning_rate": 5e-06, + "loss": 0.0625, + "num_input_tokens_seen": 132420728, + "step": 1467 + }, + { + "epoch": 6.744827586206896, + "loss": 0.07916628569364548, + "loss_ce": 3.6849833122687414e-06, + "loss_iou": 0.375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 132420728, + "step": 1467 + }, + { + "epoch": 6.749425287356322, + "grad_norm": 19.37810950613584, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 132511056, + "step": 1468 + }, + { + "epoch": 6.749425287356322, + "loss": 0.06741497665643692, + "loss_ce": 1.6471570916110068e-06, + "loss_iou": 0.353515625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 132511056, + "step": 1468 + }, + { + "epoch": 6.7540229885057474, + "grad_norm": 1.9425313486502551, + "learning_rate": 5e-06, + "loss": 0.0781, + "num_input_tokens_seen": 132601416, + "step": 1469 + }, + { + "epoch": 6.7540229885057474, + "loss": 0.09817390143871307, + "loss_ce": 1.4117615137365647e-05, + "loss_iou": 0.26171875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 132601416, + "step": 1469 + }, + { + "epoch": 6.758620689655173, + "grad_norm": 2.6250049604837926, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 132691676, + "step": 1470 + }, + { + "epoch": 6.758620689655173, + "loss": 0.08522357046604156, + "loss_ce": 3.2353859751310665e-06, + "loss_iou": 0.421875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 132691676, + "step": 1470 + }, + { + "epoch": 6.763218390804598, + "grad_norm": 2.724148470573844, + "learning_rate": 5e-06, + "loss": 0.0697, + "num_input_tokens_seen": 132782044, + "step": 1471 + }, + { + "epoch": 6.763218390804598, + "loss": 0.06568065285682678, + "loss_ce": 6.820980161137413e-06, + "loss_iou": 0.322265625, + "loss_num": 0.01318359375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 132782044, + "step": 1471 + }, + { + "epoch": 6.767816091954023, + "grad_norm": 1.3975437384035634, + "learning_rate": 5e-06, + "loss": 0.0587, + "num_input_tokens_seen": 132872492, + "step": 1472 + }, + { + "epoch": 6.767816091954023, + "loss": 0.03942399471998215, + "loss_ce": 1.0540796210989356e-05, + "loss_iou": 0.234375, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 132872492, + "step": 1472 + }, + { + "epoch": 6.772413793103448, + "grad_norm": 3.6657098912800854, + "learning_rate": 5e-06, + "loss": 0.0834, + "num_input_tokens_seen": 132962916, + "step": 1473 + }, + { + "epoch": 6.772413793103448, + "loss": 0.09079530835151672, + "loss_ce": 5.5107075240812264e-06, + "loss_iou": 0.330078125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 132962916, + "step": 1473 + }, + { + "epoch": 6.777011494252873, + "grad_norm": 6.399156287473962, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 133053228, + "step": 1474 + }, + { + "epoch": 6.777011494252873, + "loss": 0.10846640169620514, + "loss_ce": 6.926821697561536e-06, + "loss_iou": 0.30078125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 133053228, + "step": 1474 + }, + { + "epoch": 6.781609195402299, + "grad_norm": 13.807814737472036, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 133143584, + "step": 1475 + }, + { + "epoch": 6.781609195402299, + "loss": 0.10274454951286316, + "loss_ce": 9.868255438050255e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 133143584, + "step": 1475 + }, + { + "epoch": 6.786206896551724, + "grad_norm": 4.909907187391009, + "learning_rate": 5e-06, + "loss": 0.0846, + "num_input_tokens_seen": 133233944, + "step": 1476 + }, + { + "epoch": 6.786206896551724, + "loss": 0.037097539752721786, + "loss_ce": 3.4241477351315552e-06, + "loss_iou": 0.255859375, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 133233944, + "step": 1476 + }, + { + "epoch": 6.7908045977011495, + "grad_norm": 10.088133006314749, + "learning_rate": 5e-06, + "loss": 0.0748, + "num_input_tokens_seen": 133324236, + "step": 1477 + }, + { + "epoch": 6.7908045977011495, + "loss": 0.07998788356781006, + "loss_ce": 1.318014255957678e-06, + "loss_iou": 0.380859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 133324236, + "step": 1477 + }, + { + "epoch": 6.795402298850575, + "grad_norm": 23.2424717637074, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 133414528, + "step": 1478 + }, + { + "epoch": 6.795402298850575, + "loss": 0.060220953077077866, + "loss_ce": 9.768824384082109e-06, + "loss_iou": 0.32421875, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 133414528, + "step": 1478 + }, + { + "epoch": 6.8, + "grad_norm": 5.280653577103845, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 133504924, + "step": 1479 + }, + { + "epoch": 6.8, + "loss": 0.09979541599750519, + "loss_ce": 2.9406905923679005e-06, + "loss_iou": 0.38671875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 133504924, + "step": 1479 + }, + { + "epoch": 6.804597701149425, + "grad_norm": 5.817883669287115, + "learning_rate": 5e-06, + "loss": 0.078, + "num_input_tokens_seen": 133595204, + "step": 1480 + }, + { + "epoch": 6.804597701149425, + "loss": 0.05428166314959526, + "loss_ce": 6.149261480459245e-06, + "loss_iou": 0.3203125, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 133595204, + "step": 1480 + }, + { + "epoch": 6.809195402298851, + "grad_norm": 23.814326518568098, + "learning_rate": 5e-06, + "loss": 0.0782, + "num_input_tokens_seen": 133685616, + "step": 1481 + }, + { + "epoch": 6.809195402298851, + "loss": 0.11151270568370819, + "loss_ce": 1.4743751535206684e-06, + "loss_iou": 0.26171875, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 133685616, + "step": 1481 + }, + { + "epoch": 6.813793103448276, + "grad_norm": 8.881491653313477, + "learning_rate": 5e-06, + "loss": 0.0439, + "num_input_tokens_seen": 133775972, + "step": 1482 + }, + { + "epoch": 6.813793103448276, + "loss": 0.04635809361934662, + "loss_ce": 9.521063475403935e-06, + "loss_iou": 0.28125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 133775972, + "step": 1482 + }, + { + "epoch": 6.818390804597701, + "grad_norm": 11.628206791707466, + "learning_rate": 5e-06, + "loss": 0.076, + "num_input_tokens_seen": 133866336, + "step": 1483 + }, + { + "epoch": 6.818390804597701, + "loss": 0.08055493235588074, + "loss_ce": 3.7818542750756023e-06, + "loss_iou": 0.291015625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 133866336, + "step": 1483 + }, + { + "epoch": 6.8229885057471265, + "grad_norm": 2.105574615398993, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 133956784, + "step": 1484 + }, + { + "epoch": 6.8229885057471265, + "loss": 0.09039635956287384, + "loss_ce": 3.2979687603074126e-06, + "loss_iou": 0.33203125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 133956784, + "step": 1484 + }, + { + "epoch": 6.827586206896552, + "grad_norm": 3.8079419854599075, + "learning_rate": 5e-06, + "loss": 0.1018, + "num_input_tokens_seen": 134047040, + "step": 1485 + }, + { + "epoch": 6.827586206896552, + "loss": 0.09102049469947815, + "loss_ce": 0.00044431857531890273, + "loss_iou": 0.34765625, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 134047040, + "step": 1485 + }, + { + "epoch": 6.832183908045977, + "grad_norm": 21.116070086676018, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 134137528, + "step": 1486 + }, + { + "epoch": 6.832183908045977, + "loss": 0.0954207181930542, + "loss_ce": 7.509706392738735e-06, + "loss_iou": 0.31640625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 134137528, + "step": 1486 + }, + { + "epoch": 6.836781609195402, + "grad_norm": 14.756310820481287, + "learning_rate": 5e-06, + "loss": 0.0697, + "num_input_tokens_seen": 134227828, + "step": 1487 + }, + { + "epoch": 6.836781609195402, + "loss": 0.06289245933294296, + "loss_ce": 1.098936172638787e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 134227828, + "step": 1487 + }, + { + "epoch": 6.841379310344828, + "grad_norm": 2.276241824617552, + "learning_rate": 5e-06, + "loss": 0.073, + "num_input_tokens_seen": 134318180, + "step": 1488 + }, + { + "epoch": 6.841379310344828, + "loss": 0.07049933075904846, + "loss_ce": 3.7283075471350458e-06, + "loss_iou": 0.326171875, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 134318180, + "step": 1488 + }, + { + "epoch": 6.845977011494253, + "grad_norm": 9.670203919462505, + "learning_rate": 5e-06, + "loss": 0.0647, + "num_input_tokens_seen": 134408616, + "step": 1489 + }, + { + "epoch": 6.845977011494253, + "loss": 0.05651029944419861, + "loss_ce": 7.007898602751084e-06, + "loss_iou": 0.322265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 134408616, + "step": 1489 + }, + { + "epoch": 6.850574712643678, + "grad_norm": 4.106681680803513, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 134498968, + "step": 1490 + }, + { + "epoch": 6.850574712643678, + "loss": 0.055772751569747925, + "loss_ce": 1.8806914567903732e-06, + "loss_iou": 0.2353515625, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 134498968, + "step": 1490 + }, + { + "epoch": 6.855172413793103, + "grad_norm": 8.347874999340169, + "learning_rate": 5e-06, + "loss": 0.0872, + "num_input_tokens_seen": 134589296, + "step": 1491 + }, + { + "epoch": 6.855172413793103, + "loss": 0.10776175558567047, + "loss_ce": 1.9452994820312597e-05, + "loss_iou": 0.3828125, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 134589296, + "step": 1491 + }, + { + "epoch": 6.8597701149425285, + "grad_norm": 11.405237946200163, + "learning_rate": 5e-06, + "loss": 0.0905, + "num_input_tokens_seen": 134679776, + "step": 1492 + }, + { + "epoch": 6.8597701149425285, + "loss": 0.09717725962400436, + "loss_ce": 9.286872227676213e-06, + "loss_iou": 0.361328125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 134679776, + "step": 1492 + }, + { + "epoch": 6.864367816091954, + "grad_norm": 3.675422132416597, + "learning_rate": 5e-06, + "loss": 0.1053, + "num_input_tokens_seen": 134769980, + "step": 1493 + }, + { + "epoch": 6.864367816091954, + "loss": 0.0675213634967804, + "loss_ce": 1.2203744290673058e-06, + "loss_iou": 0.29296875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 134769980, + "step": 1493 + }, + { + "epoch": 6.86896551724138, + "grad_norm": 5.2386125622777415, + "learning_rate": 5e-06, + "loss": 0.0628, + "num_input_tokens_seen": 134860216, + "step": 1494 + }, + { + "epoch": 6.86896551724138, + "loss": 0.07779578119516373, + "loss_ce": 2.9361290216911584e-05, + "loss_iou": 0.27734375, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 134860216, + "step": 1494 + }, + { + "epoch": 6.873563218390805, + "grad_norm": 3.015206476190183, + "learning_rate": 5e-06, + "loss": 0.0875, + "num_input_tokens_seen": 134950708, + "step": 1495 + }, + { + "epoch": 6.873563218390805, + "loss": 0.06314820051193237, + "loss_ce": 7.3293504101457074e-06, + "loss_iou": 0.255859375, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 134950708, + "step": 1495 + }, + { + "epoch": 6.87816091954023, + "grad_norm": 2.4409371869924272, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 135041104, + "step": 1496 + }, + { + "epoch": 6.87816091954023, + "loss": 0.046701833605766296, + "loss_ce": 9.940345989889465e-06, + "loss_iou": 0.271484375, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 135041104, + "step": 1496 + }, + { + "epoch": 6.882758620689655, + "grad_norm": 4.738944993160565, + "learning_rate": 5e-06, + "loss": 0.0552, + "num_input_tokens_seen": 135131460, + "step": 1497 + }, + { + "epoch": 6.882758620689655, + "loss": 0.04962436109781265, + "loss_ce": 2.7832118121295935e-06, + "loss_iou": 0.3125, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 135131460, + "step": 1497 + }, + { + "epoch": 6.88735632183908, + "grad_norm": 8.49862390193789, + "learning_rate": 5e-06, + "loss": 0.0653, + "num_input_tokens_seen": 135221032, + "step": 1498 + }, + { + "epoch": 6.88735632183908, + "loss": 0.041058339178562164, + "loss_ce": 4.569673365040217e-06, + "loss_iou": 0.2890625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 135221032, + "step": 1498 + }, + { + "epoch": 6.8919540229885055, + "grad_norm": 15.242863888922875, + "learning_rate": 5e-06, + "loss": 0.064, + "num_input_tokens_seen": 135311548, + "step": 1499 + }, + { + "epoch": 6.8919540229885055, + "loss": 0.06402122974395752, + "loss_ce": 1.0608761840558145e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 135311548, + "step": 1499 + }, + { + "epoch": 6.896551724137931, + "grad_norm": 2.704452207506696, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 135401836, + "step": 1500 + }, + { + "epoch": 6.896551724137931, + "eval_seeclick_CIoU": 0.46876952052116394, + "eval_seeclick_GIoU": 0.4556391090154648, + "eval_seeclick_IoU": 0.5108004212379456, + "eval_seeclick_MAE_all": 0.061132827773690224, + "eval_seeclick_MAE_h": 0.04886363446712494, + "eval_seeclick_MAE_w": 0.12054737657308578, + "eval_seeclick_MAE_x_boxes": 0.10729708895087242, + "eval_seeclick_MAE_y_boxes": 0.052372606471180916, + "eval_seeclick_NUM_probability": 0.9999991655349731, + "eval_seeclick_inside_bbox": 0.7542613744735718, + "eval_seeclick_loss": 0.37735113501548767, + "eval_seeclick_loss_ce": 0.07988087832927704, + "eval_seeclick_loss_iou": 0.4791259765625, + "eval_seeclick_loss_num": 0.06346893310546875, + "eval_seeclick_loss_xval": 0.317535400390625, + "eval_seeclick_runtime": 75.9161, + "eval_seeclick_samples_per_second": 0.566, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 135401836, + "step": 1500 + }, + { + "epoch": 6.896551724137931, + "eval_icons_CIoU": 0.6196990609169006, + "eval_icons_GIoU": 0.6126499474048615, + "eval_icons_IoU": 0.6482784748077393, + "eval_icons_MAE_all": 0.03874216973781586, + "eval_icons_MAE_h": 0.0633212048560381, + "eval_icons_MAE_w": 0.06230618245899677, + "eval_icons_MAE_x_boxes": 0.056700803339481354, + "eval_icons_MAE_y_boxes": 0.06309299729764462, + "eval_icons_NUM_probability": 0.9999997913837433, + "eval_icons_inside_bbox": 0.8107638955116272, + "eval_icons_loss": 0.1911141723394394, + "eval_icons_loss_ce": 1.2011703915959515e-06, + "eval_icons_loss_iou": 0.45794677734375, + "eval_icons_loss_num": 0.04096221923828125, + "eval_icons_loss_xval": 0.2046356201171875, + "eval_icons_runtime": 99.4274, + "eval_icons_samples_per_second": 0.503, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 135401836, + "step": 1500 + }, + { + "epoch": 6.896551724137931, + "eval_screenspot_CIoU": 0.4273842175801595, + "eval_screenspot_GIoU": 0.4152764479319255, + "eval_screenspot_IoU": 0.4939347803592682, + "eval_screenspot_MAE_all": 0.083914448817571, + "eval_screenspot_MAE_h": 0.07696965336799622, + "eval_screenspot_MAE_w": 0.17438126603762308, + "eval_screenspot_MAE_x_boxes": 0.1727352738380432, + "eval_screenspot_MAE_y_boxes": 0.0719092587629954, + "eval_screenspot_NUM_probability": 0.9999995628992716, + "eval_screenspot_inside_bbox": 0.7504166762034098, + "eval_screenspot_loss": 0.41863083839416504, + "eval_screenspot_loss_ce": 4.884489499090705e-05, + "eval_screenspot_loss_iou": 0.3917236328125, + "eval_screenspot_loss_num": 0.08594767252604167, + "eval_screenspot_loss_xval": 0.4297281901041667, + "eval_screenspot_runtime": 164.7584, + "eval_screenspot_samples_per_second": 0.54, + "eval_screenspot_steps_per_second": 0.018, + "num_input_tokens_seen": 135401836, + "step": 1500 + }, + { + "epoch": 6.896551724137931, + "eval_compot_CIoU": 0.4806292653083801, + "eval_compot_GIoU": 0.4541057199239731, + "eval_compot_IoU": 0.546671450138092, + "eval_compot_MAE_all": 0.05766832269728184, + "eval_compot_MAE_h": 0.07456954568624496, + "eval_compot_MAE_w": 0.11397556215524673, + "eval_compot_MAE_x_boxes": 0.10434515029191971, + "eval_compot_MAE_y_boxes": 0.07447323948144913, + "eval_compot_NUM_probability": 0.9999996423721313, + "eval_compot_inside_bbox": 0.7604166567325592, + "eval_compot_loss": 0.31393927335739136, + "eval_compot_loss_ce": 0.01060745446011424, + "eval_compot_loss_iou": 0.49609375, + "eval_compot_loss_num": 0.05194854736328125, + "eval_compot_loss_xval": 0.259796142578125, + "eval_compot_runtime": 92.9548, + "eval_compot_samples_per_second": 0.538, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 135401836, + "step": 1500 + } + ], + "logging_steps": 1.0, + "max_steps": 10000, + "num_input_tokens_seen": 135401836, + "num_train_epochs": 47, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 839231021613056.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}