diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,124439 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 76.60623229461757, + "eval_steps": 250, + "global_step": 6750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00026303675938712433, + "grad_norm": 174.60673820638053, + "learning_rate": 5e-06, + "loss": 6.7232, + "num_input_tokens_seen": 172192, + "step": 1 + }, + { + "epoch": 0.00026303675938712433, + "loss": 6.8635101318359375, + "loss_ce": 5.2287445068359375, + "loss_iou": 0.81640625, + "loss_num": 0.328125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 172192, + "step": 1 + }, + { + "epoch": 0.0005260735187742487, + "grad_norm": 113.61145210636417, + "learning_rate": 5e-06, + "loss": 5.2198, + "num_input_tokens_seen": 344520, + "step": 2 + }, + { + "epoch": 0.0005260735187742487, + "loss": 5.116145133972168, + "loss_ce": 3.611750364303589, + "loss_iou": 0.0, + "loss_num": 0.30078125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 344520, + "step": 2 + }, + { + "epoch": 0.000789110278161373, + "grad_norm": 181.6487958512906, + "learning_rate": 5e-06, + "loss": 3.6895, + "num_input_tokens_seen": 516868, + "step": 3 + }, + { + "epoch": 0.000789110278161373, + "loss": 3.7434756755828857, + "loss_ce": 2.4915225505828857, + "loss_iou": 0.0, + "loss_num": 0.25, + "loss_xval": 1.25, + "num_input_tokens_seen": 516868, + "step": 3 + }, + { + "epoch": 0.0010521470375484973, + "grad_norm": 136.74708656913646, + "learning_rate": 5e-06, + "loss": 3.5593, + "num_input_tokens_seen": 687240, + "step": 4 + }, + { + "epoch": 0.0010521470375484973, + "loss": 3.5931553840637207, + "loss_ce": 1.3441319465637207, + "loss_iou": 0.80859375, + "loss_num": 0.44921875, + "loss_xval": 2.25, + "num_input_tokens_seen": 687240, + "step": 4 + }, + { + "epoch": 0.0013151837969356218, + "grad_norm": 79.92211667172693, + "learning_rate": 5e-06, + "loss": 2.8187, + "num_input_tokens_seen": 859388, + "step": 5 + }, + { + "epoch": 0.0013151837969356218, + "loss": 2.8585305213928223, + "loss_ce": 1.6251322031021118, + "loss_iou": 0.2216796875, + "loss_num": 0.24609375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 859388, + "step": 5 + }, + { + "epoch": 0.001578220556322746, + "grad_norm": 51.97557929429645, + "learning_rate": 5e-06, + "loss": 2.1961, + "num_input_tokens_seen": 1029916, + "step": 6 + }, + { + "epoch": 0.001578220556322746, + "loss": 2.261239528656006, + "loss_ce": 1.1806731224060059, + "loss_iou": 0.0, + "loss_num": 0.2158203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 1029916, + "step": 6 + }, + { + "epoch": 0.0018412573157098704, + "grad_norm": 60.93429618456285, + "learning_rate": 5e-06, + "loss": 2.066, + "num_input_tokens_seen": 1202056, + "step": 7 + }, + { + "epoch": 0.0018412573157098704, + "loss": 1.9779821634292603, + "loss_ce": 1.0946813821792603, + "loss_iou": 0.0966796875, + "loss_num": 0.1767578125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 1202056, + "step": 7 + }, + { + "epoch": 0.0021042940750969946, + "grad_norm": 42.670837197481326, + "learning_rate": 5e-06, + "loss": 2.0543, + "num_input_tokens_seen": 1373904, + "step": 8 + }, + { + "epoch": 0.0021042940750969946, + "loss": 2.022350311279297, + "loss_ce": 0.9168814420700073, + "loss_iou": 0.0, + "loss_num": 0.220703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 1373904, + "step": 8 + }, + { + "epoch": 0.0023673308344841193, + "grad_norm": 48.53251052050697, + "learning_rate": 5e-06, + "loss": 2.4818, + "num_input_tokens_seen": 1546124, + "step": 9 + }, + { + "epoch": 0.0023673308344841193, + "loss": 2.4849071502685547, + "loss_ce": 0.8374460935592651, + "loss_iou": 0.01202392578125, + "loss_num": 0.330078125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 1546124, + "step": 9 + }, + { + "epoch": 0.0026303675938712436, + "grad_norm": 30.87810784426679, + "learning_rate": 5e-06, + "loss": 1.8016, + "num_input_tokens_seen": 1715836, + "step": 10 + }, + { + "epoch": 0.0026303675938712436, + "loss": 1.7729861736297607, + "loss_ce": 0.8613650798797607, + "loss_iou": 0.09326171875, + "loss_num": 0.1826171875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 1715836, + "step": 10 + }, + { + "epoch": 0.002893404353258368, + "grad_norm": 25.204935627992594, + "learning_rate": 5e-06, + "loss": 1.6242, + "num_input_tokens_seen": 1887800, + "step": 11 + }, + { + "epoch": 0.002893404353258368, + "loss": 1.8032605648040771, + "loss_ce": 0.7612683176994324, + "loss_iou": 0.1513671875, + "loss_num": 0.2080078125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 1887800, + "step": 11 + }, + { + "epoch": 0.003156441112645492, + "grad_norm": 27.719293877780373, + "learning_rate": 5e-06, + "loss": 1.5953, + "num_input_tokens_seen": 2058072, + "step": 12 + }, + { + "epoch": 0.003156441112645492, + "loss": 1.530227541923523, + "loss_ce": 0.660110354423523, + "loss_iou": 0.017822265625, + "loss_num": 0.173828125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 2058072, + "step": 12 + }, + { + "epoch": 0.0034194778720326164, + "grad_norm": 77.78462332066468, + "learning_rate": 5e-06, + "loss": 1.9432, + "num_input_tokens_seen": 2230244, + "step": 13 + }, + { + "epoch": 0.0034194778720326164, + "loss": 2.0266342163085938, + "loss_ce": 0.7092512845993042, + "loss_iou": 0.193359375, + "loss_num": 0.263671875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 2230244, + "step": 13 + }, + { + "epoch": 0.0036825146314197407, + "grad_norm": 51.375242508900634, + "learning_rate": 5e-06, + "loss": 1.9389, + "num_input_tokens_seen": 2402352, + "step": 14 + }, + { + "epoch": 0.0036825146314197407, + "loss": 1.8962193727493286, + "loss_ce": 0.9186803102493286, + "loss_iou": 0.037841796875, + "loss_num": 0.1953125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 2402352, + "step": 14 + }, + { + "epoch": 0.0039455513908068654, + "grad_norm": 29.266495242442826, + "learning_rate": 5e-06, + "loss": 1.6229, + "num_input_tokens_seen": 2574284, + "step": 15 + }, + { + "epoch": 0.0039455513908068654, + "loss": 1.7341835498809814, + "loss_ce": 0.7971718311309814, + "loss_iou": 0.052734375, + "loss_num": 0.1875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 2574284, + "step": 15 + }, + { + "epoch": 0.004208588150193989, + "grad_norm": 22.43237476170095, + "learning_rate": 5e-06, + "loss": 1.4957, + "num_input_tokens_seen": 2746500, + "step": 16 + }, + { + "epoch": 0.004208588150193989, + "loss": 1.5246176719665527, + "loss_ce": 0.6452231407165527, + "loss_iou": 0.2041015625, + "loss_num": 0.17578125, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 2746500, + "step": 16 + }, + { + "epoch": 0.004471624909581114, + "grad_norm": 20.976636929732265, + "learning_rate": 5e-06, + "loss": 1.3572, + "num_input_tokens_seen": 2918736, + "step": 17 + }, + { + "epoch": 0.004471624909581114, + "loss": 1.3845367431640625, + "loss_ce": 0.6897125244140625, + "loss_iou": 0.0140380859375, + "loss_num": 0.138671875, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 2918736, + "step": 17 + }, + { + "epoch": 0.004734661668968239, + "grad_norm": 23.059953533674786, + "learning_rate": 5e-06, + "loss": 1.3283, + "num_input_tokens_seen": 3091200, + "step": 18 + }, + { + "epoch": 0.004734661668968239, + "loss": 1.3037350177764893, + "loss_ce": 0.5959713459014893, + "loss_iou": 0.052490234375, + "loss_num": 0.1416015625, + "loss_xval": 0.70703125, + "num_input_tokens_seen": 3091200, + "step": 18 + }, + { + "epoch": 0.0049976984283553625, + "grad_norm": 28.375886109478266, + "learning_rate": 5e-06, + "loss": 1.4794, + "num_input_tokens_seen": 3263136, + "step": 19 + }, + { + "epoch": 0.0049976984283553625, + "loss": 1.406355381011963, + "loss_ce": 0.6358475685119629, + "loss_iou": 0.03173828125, + "loss_num": 0.154296875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 3263136, + "step": 19 + }, + { + "epoch": 0.005260735187742487, + "grad_norm": 25.383372657888618, + "learning_rate": 5e-06, + "loss": 1.4853, + "num_input_tokens_seen": 3435304, + "step": 20 + }, + { + "epoch": 0.005260735187742487, + "loss": 1.4931797981262207, + "loss_ce": 0.6591953635215759, + "loss_iou": 0.1748046875, + "loss_num": 0.1669921875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 3435304, + "step": 20 + }, + { + "epoch": 0.005523771947129611, + "grad_norm": 20.893788220271592, + "learning_rate": 5e-06, + "loss": 1.2467, + "num_input_tokens_seen": 3604172, + "step": 21 + }, + { + "epoch": 0.005523771947129611, + "loss": 1.2556164264678955, + "loss_ce": 0.5534679889678955, + "loss_iou": NaN, + "loss_num": 0.140625, + "loss_xval": 0.703125, + "num_input_tokens_seen": 3604172, + "step": 21 + }, + { + "epoch": 0.005786808706516736, + "grad_norm": 16.968829307639602, + "learning_rate": 5e-06, + "loss": 1.331, + "num_input_tokens_seen": 3776444, + "step": 22 + }, + { + "epoch": 0.005786808706516736, + "loss": 1.4187769889831543, + "loss_ce": 0.5535425543785095, + "loss_iou": 0.076171875, + "loss_num": 0.1728515625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 3776444, + "step": 22 + }, + { + "epoch": 0.00604984546590386, + "grad_norm": 17.03009551099335, + "learning_rate": 5e-06, + "loss": 1.2354, + "num_input_tokens_seen": 3948760, + "step": 23 + }, + { + "epoch": 0.00604984546590386, + "loss": 1.2475543022155762, + "loss_ce": 0.5319781303405762, + "loss_iou": 0.07275390625, + "loss_num": 0.142578125, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 3948760, + "step": 23 + }, + { + "epoch": 0.006312882225290984, + "grad_norm": 17.413119153715428, + "learning_rate": 5e-06, + "loss": 1.2253, + "num_input_tokens_seen": 4120896, + "step": 24 + }, + { + "epoch": 0.006312882225290984, + "loss": 1.2492257356643677, + "loss_ce": 0.6032296419143677, + "loss_iou": 0.0400390625, + "loss_num": 0.12890625, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 4120896, + "step": 24 + }, + { + "epoch": 0.006575918984678109, + "grad_norm": 15.646317268104498, + "learning_rate": 5e-06, + "loss": 1.1599, + "num_input_tokens_seen": 4293068, + "step": 25 + }, + { + "epoch": 0.006575918984678109, + "loss": 1.221449613571167, + "loss_ce": 0.590346097946167, + "loss_iou": 0.26171875, + "loss_num": 0.1259765625, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 4293068, + "step": 25 + }, + { + "epoch": 0.006838955744065233, + "grad_norm": 16.80260761287207, + "learning_rate": 5e-06, + "loss": 1.1163, + "num_input_tokens_seen": 4465316, + "step": 26 + }, + { + "epoch": 0.006838955744065233, + "loss": 1.0530353784561157, + "loss_ce": 0.5227619409561157, + "loss_iou": 0.08447265625, + "loss_num": 0.10595703125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 4465316, + "step": 26 + }, + { + "epoch": 0.007101992503452358, + "grad_norm": 15.596544427195678, + "learning_rate": 5e-06, + "loss": 1.0849, + "num_input_tokens_seen": 4636968, + "step": 27 + }, + { + "epoch": 0.007101992503452358, + "loss": 1.1465504169464111, + "loss_ce": 0.5161793231964111, + "loss_iou": 0.173828125, + "loss_num": 0.1259765625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 4636968, + "step": 27 + }, + { + "epoch": 0.0073650292628394814, + "grad_norm": 17.97152237441673, + "learning_rate": 5e-06, + "loss": 1.1255, + "num_input_tokens_seen": 4808916, + "step": 28 + }, + { + "epoch": 0.0073650292628394814, + "loss": 1.0834856033325195, + "loss_ce": 0.5703020095825195, + "loss_iou": 0.1171875, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 4808916, + "step": 28 + }, + { + "epoch": 0.007628066022226606, + "grad_norm": 26.650637151013097, + "learning_rate": 5e-06, + "loss": 1.0819, + "num_input_tokens_seen": 4979072, + "step": 29 + }, + { + "epoch": 0.007628066022226606, + "loss": 1.083469271659851, + "loss_ce": 0.49557873606681824, + "loss_iou": 0.2470703125, + "loss_num": 0.11767578125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 4979072, + "step": 29 + }, + { + "epoch": 0.007891102781613731, + "grad_norm": 47.98868961106415, + "learning_rate": 5e-06, + "loss": 1.5465, + "num_input_tokens_seen": 5150996, + "step": 30 + }, + { + "epoch": 0.007891102781613731, + "loss": 1.4472854137420654, + "loss_ce": 0.4790237545967102, + "loss_iou": 0.2001953125, + "loss_num": 0.193359375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 5150996, + "step": 30 + }, + { + "epoch": 0.008154139541000855, + "grad_norm": 36.03247925799523, + "learning_rate": 5e-06, + "loss": 1.479, + "num_input_tokens_seen": 5321588, + "step": 31 + }, + { + "epoch": 0.008154139541000855, + "loss": 1.3790192604064941, + "loss_ce": 0.5357575416564941, + "loss_iou": 0.0703125, + "loss_num": 0.1689453125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 5321588, + "step": 31 + }, + { + "epoch": 0.008417176300387979, + "grad_norm": 18.14805166699206, + "learning_rate": 5e-06, + "loss": 1.1322, + "num_input_tokens_seen": 5493708, + "step": 32 + }, + { + "epoch": 0.008417176300387979, + "loss": 1.069289207458496, + "loss_ce": 0.5192403793334961, + "loss_iou": 0.0654296875, + "loss_num": 0.10986328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 5493708, + "step": 32 + }, + { + "epoch": 0.008680213059775104, + "grad_norm": 17.23835135853453, + "learning_rate": 5e-06, + "loss": 1.0735, + "num_input_tokens_seen": 5665524, + "step": 33 + }, + { + "epoch": 0.008680213059775104, + "loss": 1.063108205795288, + "loss_ce": 0.4869362711906433, + "loss_iou": 0.0556640625, + "loss_num": 0.115234375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 5665524, + "step": 33 + }, + { + "epoch": 0.008943249819162228, + "grad_norm": 15.02530944865542, + "learning_rate": 5e-06, + "loss": 1.0261, + "num_input_tokens_seen": 5837680, + "step": 34 + }, + { + "epoch": 0.008943249819162228, + "loss": 1.00763738155365, + "loss_ce": 0.4573444128036499, + "loss_iou": 0.16796875, + "loss_num": 0.1103515625, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 5837680, + "step": 34 + }, + { + "epoch": 0.009206286578549352, + "grad_norm": 16.61205610634839, + "learning_rate": 5e-06, + "loss": 1.0035, + "num_input_tokens_seen": 6009848, + "step": 35 + }, + { + "epoch": 0.009206286578549352, + "loss": 0.9831359386444092, + "loss_ce": 0.46995237469673157, + "loss_iou": 0.123046875, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 6009848, + "step": 35 + }, + { + "epoch": 0.009469323337936477, + "grad_norm": 15.512257057890888, + "learning_rate": 5e-06, + "loss": 1.0223, + "num_input_tokens_seen": 6182164, + "step": 36 + }, + { + "epoch": 0.009469323337936477, + "loss": 1.0216686725616455, + "loss_ce": 0.5036022663116455, + "loss_iou": 0.2021484375, + "loss_num": 0.103515625, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 6182164, + "step": 36 + }, + { + "epoch": 0.009732360097323601, + "grad_norm": 19.305295502162483, + "learning_rate": 5e-06, + "loss": 1.0144, + "num_input_tokens_seen": 6352672, + "step": 37 + }, + { + "epoch": 0.009732360097323601, + "loss": 1.0505774021148682, + "loss_ce": 0.4739171862602234, + "loss_iou": 0.12451171875, + "loss_num": 0.115234375, + "loss_xval": 0.578125, + "num_input_tokens_seen": 6352672, + "step": 37 + }, + { + "epoch": 0.009995396856710725, + "grad_norm": 20.69114777569909, + "learning_rate": 5e-06, + "loss": 1.0293, + "num_input_tokens_seen": 6525264, + "step": 38 + }, + { + "epoch": 0.009995396856710725, + "loss": 1.069566249847412, + "loss_ce": 0.4638533592224121, + "loss_iou": 0.02099609375, + "loss_num": 0.12109375, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 6525264, + "step": 38 + }, + { + "epoch": 0.010258433616097849, + "grad_norm": 32.793827182045035, + "learning_rate": 5e-06, + "loss": 1.0754, + "num_input_tokens_seen": 6697524, + "step": 39 + }, + { + "epoch": 0.010258433616097849, + "loss": 1.0720198154449463, + "loss_ce": 0.5224591493606567, + "loss_iou": 0.099609375, + "loss_num": 0.10986328125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 6697524, + "step": 39 + }, + { + "epoch": 0.010521470375484974, + "grad_norm": 22.436128254245773, + "learning_rate": 5e-06, + "loss": 1.125, + "num_input_tokens_seen": 6869752, + "step": 40 + }, + { + "epoch": 0.010521470375484974, + "loss": 1.1199672222137451, + "loss_ce": 0.5101040601730347, + "loss_iou": NaN, + "loss_num": 0.1220703125, + "loss_xval": 0.609375, + "num_input_tokens_seen": 6869752, + "step": 40 + }, + { + "epoch": 0.010784507134872098, + "grad_norm": 17.66019658365854, + "learning_rate": 5e-06, + "loss": 0.9975, + "num_input_tokens_seen": 7041884, + "step": 41 + }, + { + "epoch": 0.010784507134872098, + "loss": 0.9673187732696533, + "loss_ce": 0.37991636991500854, + "loss_iou": 0.166015625, + "loss_num": 0.11767578125, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 7041884, + "step": 41 + }, + { + "epoch": 0.011047543894259222, + "grad_norm": 17.977306189337394, + "learning_rate": 5e-06, + "loss": 0.9918, + "num_input_tokens_seen": 7214400, + "step": 42 + }, + { + "epoch": 0.011047543894259222, + "loss": 0.9905650615692139, + "loss_ce": 0.42928582429885864, + "loss_iou": 0.236328125, + "loss_num": 0.1123046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 7214400, + "step": 42 + }, + { + "epoch": 0.011310580653646348, + "grad_norm": 16.758234031981292, + "learning_rate": 5e-06, + "loss": 1.0186, + "num_input_tokens_seen": 7386432, + "step": 43 + }, + { + "epoch": 0.011310580653646348, + "loss": 1.0381113290786743, + "loss_ce": 0.47536715865135193, + "loss_iou": 0.1748046875, + "loss_num": 0.1123046875, + "loss_xval": 0.5625, + "num_input_tokens_seen": 7386432, + "step": 43 + }, + { + "epoch": 0.011573617413033472, + "grad_norm": 19.20192611310373, + "learning_rate": 5e-06, + "loss": 0.9474, + "num_input_tokens_seen": 7558604, + "step": 44 + }, + { + "epoch": 0.011573617413033472, + "loss": 0.8546841144561768, + "loss_ce": 0.45356106758117676, + "loss_iou": 0.0634765625, + "loss_num": 0.080078125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 7558604, + "step": 44 + }, + { + "epoch": 0.011836654172420595, + "grad_norm": 24.07755280606732, + "learning_rate": 5e-06, + "loss": 1.0256, + "num_input_tokens_seen": 7730896, + "step": 45 + }, + { + "epoch": 0.011836654172420595, + "loss": 0.960330605506897, + "loss_ce": 0.48523297905921936, + "loss_iou": 0.259765625, + "loss_num": 0.09521484375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 7730896, + "step": 45 + }, + { + "epoch": 0.01209969093180772, + "grad_norm": 21.918622706923347, + "learning_rate": 5e-06, + "loss": 0.9858, + "num_input_tokens_seen": 7903036, + "step": 46 + }, + { + "epoch": 0.01209969093180772, + "loss": 1.0328285694122314, + "loss_ce": 0.41710585355758667, + "loss_iou": 0.119140625, + "loss_num": 0.123046875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 7903036, + "step": 46 + }, + { + "epoch": 0.012362727691194845, + "grad_norm": 17.78591800932899, + "learning_rate": 5e-06, + "loss": 0.9217, + "num_input_tokens_seen": 8075568, + "step": 47 + }, + { + "epoch": 0.012362727691194845, + "loss": 0.9050750136375427, + "loss_ce": 0.4551238417625427, + "loss_iou": 0.21875, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 8075568, + "step": 47 + }, + { + "epoch": 0.012625764450581969, + "grad_norm": 18.144694112865135, + "learning_rate": 5e-06, + "loss": 0.8763, + "num_input_tokens_seen": 8247888, + "step": 48 + }, + { + "epoch": 0.012625764450581969, + "loss": 0.8872632384300232, + "loss_ce": 0.3972730040550232, + "loss_iou": 0.333984375, + "loss_num": 0.09765625, + "loss_xval": 0.490234375, + "num_input_tokens_seen": 8247888, + "step": 48 + }, + { + "epoch": 0.012888801209969093, + "grad_norm": 18.397929867872243, + "learning_rate": 5e-06, + "loss": 0.8479, + "num_input_tokens_seen": 8419884, + "step": 49 + }, + { + "epoch": 0.012888801209969093, + "loss": 0.8515866994857788, + "loss_ce": 0.3969968557357788, + "loss_iou": 0.109375, + "loss_num": 0.0908203125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 8419884, + "step": 49 + }, + { + "epoch": 0.013151837969356218, + "grad_norm": 16.926260406149144, + "learning_rate": 5e-06, + "loss": 0.8125, + "num_input_tokens_seen": 8592264, + "step": 50 + }, + { + "epoch": 0.013151837969356218, + "loss": 0.8142160177230835, + "loss_ce": 0.3877023756504059, + "loss_iou": 0.1123046875, + "loss_num": 0.08544921875, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 8592264, + "step": 50 + }, + { + "epoch": 0.013414874728743342, + "grad_norm": 24.314274074580883, + "learning_rate": 5e-06, + "loss": 0.8943, + "num_input_tokens_seen": 8764528, + "step": 51 + }, + { + "epoch": 0.013414874728743342, + "loss": 0.8517386317253113, + "loss_ce": 0.43084025382995605, + "loss_iou": 0.1923828125, + "loss_num": 0.083984375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 8764528, + "step": 51 + }, + { + "epoch": 0.013677911488130466, + "grad_norm": 26.15955176323275, + "learning_rate": 5e-06, + "loss": 0.9129, + "num_input_tokens_seen": 8936892, + "step": 52 + }, + { + "epoch": 0.013677911488130466, + "loss": 0.9079768657684326, + "loss_ce": 0.3640315532684326, + "loss_iou": 0.0654296875, + "loss_num": 0.10888671875, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 8936892, + "step": 52 + }, + { + "epoch": 0.013940948247517591, + "grad_norm": 29.358592778676385, + "learning_rate": 5e-06, + "loss": 0.9409, + "num_input_tokens_seen": 9108972, + "step": 53 + }, + { + "epoch": 0.013940948247517591, + "loss": 0.8754456043243408, + "loss_ce": 0.4201233685016632, + "loss_iou": 0.1279296875, + "loss_num": 0.0908203125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 9108972, + "step": 53 + }, + { + "epoch": 0.014203985006904715, + "grad_norm": 20.238375239505068, + "learning_rate": 5e-06, + "loss": 0.8928, + "num_input_tokens_seen": 9281248, + "step": 54 + }, + { + "epoch": 0.014203985006904715, + "loss": 0.8624146580696106, + "loss_ce": 0.4217408299446106, + "loss_iou": 0.154296875, + "loss_num": 0.087890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 9281248, + "step": 54 + }, + { + "epoch": 0.014467021766291839, + "grad_norm": 21.380412065088443, + "learning_rate": 5e-06, + "loss": 0.8996, + "num_input_tokens_seen": 9453248, + "step": 55 + }, + { + "epoch": 0.014467021766291839, + "loss": 0.8669720888137817, + "loss_ce": 0.3911420404911041, + "loss_iou": 0.095703125, + "loss_num": 0.09521484375, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 9453248, + "step": 55 + }, + { + "epoch": 0.014730058525678963, + "grad_norm": 21.398016209346718, + "learning_rate": 5e-06, + "loss": 0.8695, + "num_input_tokens_seen": 9625308, + "step": 56 + }, + { + "epoch": 0.014730058525678963, + "loss": 0.7916536331176758, + "loss_ce": 0.3697786033153534, + "loss_iou": 0.265625, + "loss_num": 0.08447265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 9625308, + "step": 56 + }, + { + "epoch": 0.014993095285066088, + "grad_norm": 22.596856129715338, + "learning_rate": 5e-06, + "loss": 0.8385, + "num_input_tokens_seen": 9797472, + "step": 57 + }, + { + "epoch": 0.014993095285066088, + "loss": 0.8159988522529602, + "loss_ce": 0.3955886960029602, + "loss_iou": 0.173828125, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 9797472, + "step": 57 + }, + { + "epoch": 0.015256132044453212, + "grad_norm": 19.06235083584469, + "learning_rate": 5e-06, + "loss": 0.791, + "num_input_tokens_seen": 9969708, + "step": 58 + }, + { + "epoch": 0.015256132044453212, + "loss": 0.8669772148132324, + "loss_ce": 0.3650240898132324, + "loss_iou": 0.1318359375, + "loss_num": 0.1005859375, + "loss_xval": 0.5, + "num_input_tokens_seen": 9969708, + "step": 58 + }, + { + "epoch": 0.015519168803840336, + "grad_norm": 19.2496987382769, + "learning_rate": 5e-06, + "loss": 0.8251, + "num_input_tokens_seen": 10141772, + "step": 59 + }, + { + "epoch": 0.015519168803840336, + "loss": 0.8268835544586182, + "loss_ce": 0.39744019508361816, + "loss_iou": 0.3046875, + "loss_num": 0.0859375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 10141772, + "step": 59 + }, + { + "epoch": 0.015782205563227462, + "grad_norm": 18.91781603668572, + "learning_rate": 5e-06, + "loss": 0.8407, + "num_input_tokens_seen": 10312188, + "step": 60 + }, + { + "epoch": 0.015782205563227462, + "loss": 0.7802078723907471, + "loss_ce": 0.35906529426574707, + "loss_iou": 0.318359375, + "loss_num": 0.083984375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 10312188, + "step": 60 + }, + { + "epoch": 0.016045242322614586, + "grad_norm": 17.697618089112353, + "learning_rate": 5e-06, + "loss": 0.7755, + "num_input_tokens_seen": 10484428, + "step": 61 + }, + { + "epoch": 0.016045242322614586, + "loss": 0.8741220235824585, + "loss_ce": 0.3609383702278137, + "loss_iou": 0.1376953125, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 10484428, + "step": 61 + }, + { + "epoch": 0.01630827908200171, + "grad_norm": 19.734011329204773, + "learning_rate": 5e-06, + "loss": 0.769, + "num_input_tokens_seen": 10656840, + "step": 62 + }, + { + "epoch": 0.01630827908200171, + "loss": 0.7350368499755859, + "loss_ce": 0.36150169372558594, + "loss_iou": 0.099609375, + "loss_num": 0.07470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 10656840, + "step": 62 + }, + { + "epoch": 0.016571315841388833, + "grad_norm": 24.730627091015997, + "learning_rate": 5e-06, + "loss": 0.7931, + "num_input_tokens_seen": 10828884, + "step": 63 + }, + { + "epoch": 0.016571315841388833, + "loss": 0.7593971490859985, + "loss_ce": 0.36462172865867615, + "loss_iou": NaN, + "loss_num": 0.0791015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 10828884, + "step": 63 + }, + { + "epoch": 0.016834352600775957, + "grad_norm": 18.667625860875532, + "learning_rate": 5e-06, + "loss": 0.8089, + "num_input_tokens_seen": 11001164, + "step": 64 + }, + { + "epoch": 0.016834352600775957, + "loss": 0.8844671845436096, + "loss_ce": 0.3607855439186096, + "loss_iou": 0.376953125, + "loss_num": 0.10498046875, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 11001164, + "step": 64 + }, + { + "epoch": 0.017097389360163084, + "grad_norm": 23.265809096889907, + "learning_rate": 5e-06, + "loss": 0.7793, + "num_input_tokens_seen": 11173188, + "step": 65 + }, + { + "epoch": 0.017097389360163084, + "loss": 0.7827451229095459, + "loss_ce": 0.3672178089618683, + "loss_iou": 0.255859375, + "loss_num": 0.0830078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 11173188, + "step": 65 + }, + { + "epoch": 0.017360426119550208, + "grad_norm": 24.005431476496838, + "learning_rate": 5e-06, + "loss": 0.8061, + "num_input_tokens_seen": 11345216, + "step": 66 + }, + { + "epoch": 0.017360426119550208, + "loss": 0.7301403284072876, + "loss_ce": 0.3616100549697876, + "loss_iou": 0.28515625, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 11345216, + "step": 66 + }, + { + "epoch": 0.017623462878937332, + "grad_norm": 30.491458847817103, + "learning_rate": 5e-06, + "loss": 0.8474, + "num_input_tokens_seen": 11517388, + "step": 67 + }, + { + "epoch": 0.017623462878937332, + "loss": 0.8404921889305115, + "loss_ce": 0.3583144545555115, + "loss_iou": 0.07861328125, + "loss_num": 0.0966796875, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 11517388, + "step": 67 + }, + { + "epoch": 0.017886499638324456, + "grad_norm": 17.38452589704735, + "learning_rate": 5e-06, + "loss": 0.8004, + "num_input_tokens_seen": 11689284, + "step": 68 + }, + { + "epoch": 0.017886499638324456, + "loss": 0.8011830449104309, + "loss_ce": 0.3470814824104309, + "loss_iou": 0.095703125, + "loss_num": 0.0908203125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 11689284, + "step": 68 + }, + { + "epoch": 0.01814953639771158, + "grad_norm": 22.0592753440465, + "learning_rate": 5e-06, + "loss": 0.7808, + "num_input_tokens_seen": 11861484, + "step": 69 + }, + { + "epoch": 0.01814953639771158, + "loss": 0.8063881397247314, + "loss_ce": 0.38475728034973145, + "loss_iou": 0.1826171875, + "loss_num": 0.08447265625, + "loss_xval": 0.421875, + "num_input_tokens_seen": 11861484, + "step": 69 + }, + { + "epoch": 0.018412573157098704, + "grad_norm": 18.31950796902093, + "learning_rate": 5e-06, + "loss": 0.766, + "num_input_tokens_seen": 12033752, + "step": 70 + }, + { + "epoch": 0.018412573157098704, + "loss": 0.7670217156410217, + "loss_ce": 0.31560570001602173, + "loss_iou": 0.275390625, + "loss_num": 0.09033203125, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 12033752, + "step": 70 + }, + { + "epoch": 0.018675609916485827, + "grad_norm": 18.14951438740657, + "learning_rate": 5e-06, + "loss": 0.6971, + "num_input_tokens_seen": 12205604, + "step": 71 + }, + { + "epoch": 0.018675609916485827, + "loss": 0.6512900590896606, + "loss_ce": 0.31205666065216064, + "loss_iou": 0.169921875, + "loss_num": 0.06787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 12205604, + "step": 71 + }, + { + "epoch": 0.018938646675872955, + "grad_norm": 18.910816353150064, + "learning_rate": 5e-06, + "loss": 0.7748, + "num_input_tokens_seen": 12377968, + "step": 72 + }, + { + "epoch": 0.018938646675872955, + "loss": 0.8064651489257812, + "loss_ce": 0.3355178236961365, + "loss_iou": 0.171875, + "loss_num": 0.09423828125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 12377968, + "step": 72 + }, + { + "epoch": 0.01920168343526008, + "grad_norm": 18.381185745083314, + "learning_rate": 5e-06, + "loss": 0.7552, + "num_input_tokens_seen": 12549896, + "step": 73 + }, + { + "epoch": 0.01920168343526008, + "loss": 0.817658007144928, + "loss_ce": 0.28372251987457275, + "loss_iou": 0.236328125, + "loss_num": 0.10693359375, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 12549896, + "step": 73 + }, + { + "epoch": 0.019464720194647202, + "grad_norm": 16.482511811798446, + "learning_rate": 5e-06, + "loss": 0.6763, + "num_input_tokens_seen": 12721856, + "step": 74 + }, + { + "epoch": 0.019464720194647202, + "loss": 0.5998687744140625, + "loss_ce": 0.2707671821117401, + "loss_iou": 0.333984375, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 12721856, + "step": 74 + }, + { + "epoch": 0.019727756954034326, + "grad_norm": 15.444850813034535, + "learning_rate": 5e-06, + "loss": 0.6806, + "num_input_tokens_seen": 12894040, + "step": 75 + }, + { + "epoch": 0.019727756954034326, + "loss": 0.7105993032455444, + "loss_ce": 0.27493035793304443, + "loss_iou": 0.26953125, + "loss_num": 0.08740234375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 12894040, + "step": 75 + }, + { + "epoch": 0.01999079371342145, + "grad_norm": 20.590812596799903, + "learning_rate": 5e-06, + "loss": 0.7622, + "num_input_tokens_seen": 13064296, + "step": 76 + }, + { + "epoch": 0.01999079371342145, + "loss": 0.6805918216705322, + "loss_ce": 0.3353770077228546, + "loss_iou": 0.26171875, + "loss_num": 0.06884765625, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 13064296, + "step": 76 + }, + { + "epoch": 0.020253830472808574, + "grad_norm": 20.47406440355872, + "learning_rate": 5e-06, + "loss": 0.74, + "num_input_tokens_seen": 13233888, + "step": 77 + }, + { + "epoch": 0.020253830472808574, + "loss": 0.7564910650253296, + "loss_ce": 0.3099578022956848, + "loss_iou": 0.2109375, + "loss_num": 0.08935546875, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 13233888, + "step": 77 + }, + { + "epoch": 0.020516867232195698, + "grad_norm": 19.122832804118445, + "learning_rate": 5e-06, + "loss": 0.741, + "num_input_tokens_seen": 13405900, + "step": 78 + }, + { + "epoch": 0.020516867232195698, + "loss": 0.6968704462051392, + "loss_ce": 0.26059114933013916, + "loss_iou": 0.224609375, + "loss_num": 0.08740234375, + "loss_xval": 0.435546875, + "num_input_tokens_seen": 13405900, + "step": 78 + }, + { + "epoch": 0.020779903991582825, + "grad_norm": 16.68220779277857, + "learning_rate": 5e-06, + "loss": 0.6822, + "num_input_tokens_seen": 13578336, + "step": 79 + }, + { + "epoch": 0.020779903991582825, + "loss": 0.6721813082695007, + "loss_ce": 0.26910513639450073, + "loss_iou": 0.125, + "loss_num": 0.08056640625, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 13578336, + "step": 79 + }, + { + "epoch": 0.02104294075096995, + "grad_norm": 15.592038335799979, + "learning_rate": 5e-06, + "loss": 0.6458, + "num_input_tokens_seen": 13750256, + "step": 80 + }, + { + "epoch": 0.02104294075096995, + "loss": 0.604525625705719, + "loss_ce": 0.253695547580719, + "loss_iou": 0.169921875, + "loss_num": 0.0703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 13750256, + "step": 80 + }, + { + "epoch": 0.021305977510357073, + "grad_norm": 19.123452214665015, + "learning_rate": 5e-06, + "loss": 0.7034, + "num_input_tokens_seen": 13922220, + "step": 81 + }, + { + "epoch": 0.021305977510357073, + "loss": 0.7461546659469604, + "loss_ce": 0.28350815176963806, + "loss_iou": 0.27734375, + "loss_num": 0.0927734375, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 13922220, + "step": 81 + }, + { + "epoch": 0.021569014269744197, + "grad_norm": 14.596994780035258, + "learning_rate": 5e-06, + "loss": 0.6357, + "num_input_tokens_seen": 14094312, + "step": 82 + }, + { + "epoch": 0.021569014269744197, + "loss": 0.602331280708313, + "loss_ce": 0.2593136727809906, + "loss_iou": 0.328125, + "loss_num": 0.068359375, + "loss_xval": 0.34375, + "num_input_tokens_seen": 14094312, + "step": 82 + }, + { + "epoch": 0.02183205102913132, + "grad_norm": 16.4283520636599, + "learning_rate": 5e-06, + "loss": 0.5782, + "num_input_tokens_seen": 14264984, + "step": 83 + }, + { + "epoch": 0.02183205102913132, + "loss": 0.5699411630630493, + "loss_ce": 0.2635447084903717, + "loss_iou": 0.306640625, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 14264984, + "step": 83 + }, + { + "epoch": 0.022095087788518444, + "grad_norm": 22.311971257546926, + "learning_rate": 5e-06, + "loss": 0.6398, + "num_input_tokens_seen": 14437168, + "step": 84 + }, + { + "epoch": 0.022095087788518444, + "loss": 0.6384698748588562, + "loss_ce": 0.240032359957695, + "loss_iou": 0.21875, + "loss_num": 0.07958984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 14437168, + "step": 84 + }, + { + "epoch": 0.022358124547905568, + "grad_norm": 26.609904157941, + "learning_rate": 5e-06, + "loss": 0.8114, + "num_input_tokens_seen": 14609504, + "step": 85 + }, + { + "epoch": 0.022358124547905568, + "loss": 0.7600446939468384, + "loss_ce": 0.23172441124916077, + "loss_iou": 0.076171875, + "loss_num": 0.10546875, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 14609504, + "step": 85 + }, + { + "epoch": 0.022621161307292696, + "grad_norm": 28.311475657648764, + "learning_rate": 5e-06, + "loss": 0.8067, + "num_input_tokens_seen": 14781812, + "step": 86 + }, + { + "epoch": 0.022621161307292696, + "loss": 0.839857816696167, + "loss_ce": 0.2500140368938446, + "loss_iou": 0.38671875, + "loss_num": 0.1181640625, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 14781812, + "step": 86 + }, + { + "epoch": 0.02288419806667982, + "grad_norm": 24.92836303632298, + "learning_rate": 5e-06, + "loss": 0.731, + "num_input_tokens_seen": 14954408, + "step": 87 + }, + { + "epoch": 0.02288419806667982, + "loss": 0.7506657838821411, + "loss_ce": 0.2797185182571411, + "loss_iou": 0.12890625, + "loss_num": 0.09423828125, + "loss_xval": 0.470703125, + "num_input_tokens_seen": 14954408, + "step": 87 + }, + { + "epoch": 0.023147234826066943, + "grad_norm": 26.5172487427058, + "learning_rate": 5e-06, + "loss": 0.8202, + "num_input_tokens_seen": 15123552, + "step": 88 + }, + { + "epoch": 0.023147234826066943, + "loss": 0.808368980884552, + "loss_ce": 0.243183434009552, + "loss_iou": 0.234375, + "loss_num": 0.11328125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 15123552, + "step": 88 + }, + { + "epoch": 0.023410271585454067, + "grad_norm": 17.71060921250277, + "learning_rate": 5e-06, + "loss": 0.6546, + "num_input_tokens_seen": 15295844, + "step": 89 + }, + { + "epoch": 0.023410271585454067, + "loss": 0.6125390529632568, + "loss_ce": 0.24852542579174042, + "loss_iou": 0.2021484375, + "loss_num": 0.07275390625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 15295844, + "step": 89 + }, + { + "epoch": 0.02367330834484119, + "grad_norm": 15.735800845783544, + "learning_rate": 5e-06, + "loss": 0.5838, + "num_input_tokens_seen": 15467856, + "step": 90 + }, + { + "epoch": 0.02367330834484119, + "loss": 0.5133854150772095, + "loss_ce": 0.22456704080104828, + "loss_iou": 0.2412109375, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 15467856, + "step": 90 + }, + { + "epoch": 0.023936345104228315, + "grad_norm": 16.198479692260427, + "learning_rate": 5e-06, + "loss": 0.6229, + "num_input_tokens_seen": 15640280, + "step": 91 + }, + { + "epoch": 0.023936345104228315, + "loss": 0.5497957468032837, + "loss_ce": 0.23717370629310608, + "loss_iou": 0.30859375, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 15640280, + "step": 91 + }, + { + "epoch": 0.02419938186361544, + "grad_norm": 15.820711258384152, + "learning_rate": 5e-06, + "loss": 0.5852, + "num_input_tokens_seen": 15812532, + "step": 92 + }, + { + "epoch": 0.02419938186361544, + "loss": 0.5448044538497925, + "loss_ce": 0.22046364843845367, + "loss_iou": 0.1826171875, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 15812532, + "step": 92 + }, + { + "epoch": 0.024462418623002566, + "grad_norm": 19.666745517026683, + "learning_rate": 5e-06, + "loss": 0.5956, + "num_input_tokens_seen": 15984492, + "step": 93 + }, + { + "epoch": 0.024462418623002566, + "loss": 0.6414846181869507, + "loss_ce": 0.22668972611427307, + "loss_iou": 0.232421875, + "loss_num": 0.0830078125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 15984492, + "step": 93 + }, + { + "epoch": 0.02472545538238969, + "grad_norm": 18.823515656198328, + "learning_rate": 5e-06, + "loss": 0.5443, + "num_input_tokens_seen": 16156556, + "step": 94 + }, + { + "epoch": 0.02472545538238969, + "loss": 0.507426381111145, + "loss_ce": 0.2339888960123062, + "loss_iou": 0.396484375, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 16156556, + "step": 94 + }, + { + "epoch": 0.024988492141776814, + "grad_norm": 14.625660314629584, + "learning_rate": 5e-06, + "loss": 0.5402, + "num_input_tokens_seen": 16326928, + "step": 95 + }, + { + "epoch": 0.024988492141776814, + "loss": 0.5663172006607056, + "loss_ce": 0.20572152733802795, + "loss_iou": 0.2421875, + "loss_num": 0.072265625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 16326928, + "step": 95 + }, + { + "epoch": 0.025251528901163937, + "grad_norm": 15.783911320035779, + "learning_rate": 5e-06, + "loss": 0.5712, + "num_input_tokens_seen": 16499268, + "step": 96 + }, + { + "epoch": 0.025251528901163937, + "loss": 0.5024785399436951, + "loss_ce": 0.19632619619369507, + "loss_iou": 0.251953125, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 16499268, + "step": 96 + }, + { + "epoch": 0.02551456566055106, + "grad_norm": 16.72236617723868, + "learning_rate": 5e-06, + "loss": 0.6175, + "num_input_tokens_seen": 16668388, + "step": 97 + }, + { + "epoch": 0.02551456566055106, + "loss": 0.6046057939529419, + "loss_ce": 0.19554820656776428, + "loss_iou": 0.287109375, + "loss_num": 0.08203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 16668388, + "step": 97 + }, + { + "epoch": 0.025777602419938185, + "grad_norm": 19.132328111043208, + "learning_rate": 5e-06, + "loss": 0.6948, + "num_input_tokens_seen": 16840748, + "step": 98 + }, + { + "epoch": 0.025777602419938185, + "loss": 0.635749340057373, + "loss_ce": 0.22290757298469543, + "loss_iou": 0.1416015625, + "loss_num": 0.08251953125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 16840748, + "step": 98 + }, + { + "epoch": 0.026040639179325312, + "grad_norm": 17.546404729768323, + "learning_rate": 5e-06, + "loss": 0.6342, + "num_input_tokens_seen": 17012828, + "step": 99 + }, + { + "epoch": 0.026040639179325312, + "loss": 0.6548875570297241, + "loss_ce": 0.23105943202972412, + "loss_iou": 0.283203125, + "loss_num": 0.0849609375, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 17012828, + "step": 99 + }, + { + "epoch": 0.026303675938712436, + "grad_norm": 18.526246284946534, + "learning_rate": 5e-06, + "loss": 0.6155, + "num_input_tokens_seen": 17185216, + "step": 100 + }, + { + "epoch": 0.026303675938712436, + "loss": 0.6633030772209167, + "loss_ce": 0.20383042097091675, + "loss_iou": 0.263671875, + "loss_num": 0.091796875, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 17185216, + "step": 100 + }, + { + "epoch": 0.02656671269809956, + "grad_norm": 19.37711860101834, + "learning_rate": 5e-06, + "loss": 0.6252, + "num_input_tokens_seen": 17355688, + "step": 101 + }, + { + "epoch": 0.02656671269809956, + "loss": 0.6485173106193542, + "loss_ce": 0.20686691999435425, + "loss_iou": 0.146484375, + "loss_num": 0.08837890625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 17355688, + "step": 101 + }, + { + "epoch": 0.026829749457486684, + "grad_norm": 24.272978763244147, + "learning_rate": 5e-06, + "loss": 0.6144, + "num_input_tokens_seen": 17527784, + "step": 102 + }, + { + "epoch": 0.026829749457486684, + "loss": 0.6552723050117493, + "loss_ce": 0.20190313458442688, + "loss_iou": 0.1513671875, + "loss_num": 0.0908203125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 17527784, + "step": 102 + }, + { + "epoch": 0.027092786216873808, + "grad_norm": 28.829409367834085, + "learning_rate": 5e-06, + "loss": 0.6606, + "num_input_tokens_seen": 17700208, + "step": 103 + }, + { + "epoch": 0.027092786216873808, + "loss": 0.7094471454620361, + "loss_ce": 0.19260142743587494, + "loss_iou": 0.3046875, + "loss_num": 0.10302734375, + "loss_xval": 0.515625, + "num_input_tokens_seen": 17700208, + "step": 103 + }, + { + "epoch": 0.02735582297626093, + "grad_norm": 23.160720035944347, + "learning_rate": 5e-06, + "loss": 0.7452, + "num_input_tokens_seen": 17872308, + "step": 104 + }, + { + "epoch": 0.02735582297626093, + "loss": 0.7341784238815308, + "loss_ce": 0.19169792532920837, + "loss_iou": 0.1513671875, + "loss_num": 0.1083984375, + "loss_xval": 0.54296875, + "num_input_tokens_seen": 17872308, + "step": 104 + }, + { + "epoch": 0.027618859735648055, + "grad_norm": 17.418429269221175, + "learning_rate": 5e-06, + "loss": 0.5803, + "num_input_tokens_seen": 18044728, + "step": 105 + }, + { + "epoch": 0.027618859735648055, + "loss": 0.5607779026031494, + "loss_ce": 0.20787259936332703, + "loss_iou": 0.2353515625, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 18044728, + "step": 105 + }, + { + "epoch": 0.027881896495035183, + "grad_norm": 17.11728575546532, + "learning_rate": 5e-06, + "loss": 0.5934, + "num_input_tokens_seen": 18215172, + "step": 106 + }, + { + "epoch": 0.027881896495035183, + "loss": 0.609626293182373, + "loss_ce": 0.17749738693237305, + "loss_iou": 0.09228515625, + "loss_num": 0.08642578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 18215172, + "step": 106 + }, + { + "epoch": 0.028144933254422307, + "grad_norm": 17.86366659461009, + "learning_rate": 5e-06, + "loss": 0.5335, + "num_input_tokens_seen": 18387084, + "step": 107 + }, + { + "epoch": 0.028144933254422307, + "loss": 0.48998841643333435, + "loss_ce": 0.19213685393333435, + "loss_iou": 0.3046875, + "loss_num": 0.0595703125, + "loss_xval": 0.296875, + "num_input_tokens_seen": 18387084, + "step": 107 + }, + { + "epoch": 0.02840797001380943, + "grad_norm": 20.662496273440333, + "learning_rate": 5e-06, + "loss": 0.5971, + "num_input_tokens_seen": 18559272, + "step": 108 + }, + { + "epoch": 0.02840797001380943, + "loss": 0.6421129703521729, + "loss_ce": 0.18044306337833405, + "loss_iou": 0.14453125, + "loss_num": 0.09228515625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 18559272, + "step": 108 + }, + { + "epoch": 0.028671006773196554, + "grad_norm": 21.435817028210696, + "learning_rate": 5e-06, + "loss": 0.5635, + "num_input_tokens_seen": 18731380, + "step": 109 + }, + { + "epoch": 0.028671006773196554, + "loss": 0.5063576698303223, + "loss_ce": 0.18104028701782227, + "loss_iou": 0.1884765625, + "loss_num": 0.06494140625, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 18731380, + "step": 109 + }, + { + "epoch": 0.028934043532583678, + "grad_norm": 18.8758769532993, + "learning_rate": 5e-06, + "loss": 0.6115, + "num_input_tokens_seen": 18901956, + "step": 110 + }, + { + "epoch": 0.028934043532583678, + "loss": 0.6692255139350891, + "loss_ce": 0.17020206153392792, + "loss_iou": 0.20703125, + "loss_num": 0.099609375, + "loss_xval": 0.5, + "num_input_tokens_seen": 18901956, + "step": 110 + }, + { + "epoch": 0.029197080291970802, + "grad_norm": 17.651564073843637, + "learning_rate": 5e-06, + "loss": 0.5336, + "num_input_tokens_seen": 19072248, + "step": 111 + }, + { + "epoch": 0.029197080291970802, + "loss": 0.4951699376106262, + "loss_ce": 0.17558985948562622, + "loss_iou": 0.6640625, + "loss_num": 0.06396484375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 19072248, + "step": 111 + }, + { + "epoch": 0.029460117051357926, + "grad_norm": 16.158969266118735, + "learning_rate": 5e-06, + "loss": 0.4952, + "num_input_tokens_seen": 19244772, + "step": 112 + }, + { + "epoch": 0.029460117051357926, + "loss": 0.4770987629890442, + "loss_ce": 0.1765616536140442, + "loss_iou": 0.1904296875, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 19244772, + "step": 112 + }, + { + "epoch": 0.029723153810745053, + "grad_norm": 14.450830992407502, + "learning_rate": 5e-06, + "loss": 0.5026, + "num_input_tokens_seen": 19416868, + "step": 113 + }, + { + "epoch": 0.029723153810745053, + "loss": 0.4577527642250061, + "loss_ce": 0.1758924424648285, + "loss_iou": 0.400390625, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 19416868, + "step": 113 + }, + { + "epoch": 0.029986190570132177, + "grad_norm": 14.560235178346835, + "learning_rate": 5e-06, + "loss": 0.4865, + "num_input_tokens_seen": 19589016, + "step": 114 + }, + { + "epoch": 0.029986190570132177, + "loss": 0.5253910422325134, + "loss_ce": 0.15832561254501343, + "loss_iou": 0.173828125, + "loss_num": 0.0732421875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 19589016, + "step": 114 + }, + { + "epoch": 0.0302492273295193, + "grad_norm": 18.605711889816632, + "learning_rate": 5e-06, + "loss": 0.49, + "num_input_tokens_seen": 19760904, + "step": 115 + }, + { + "epoch": 0.0302492273295193, + "loss": 0.4801591634750366, + "loss_ce": 0.17290815711021423, + "loss_iou": 0.416015625, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 19760904, + "step": 115 + }, + { + "epoch": 0.030512264088906425, + "grad_norm": 20.839979972241576, + "learning_rate": 5e-06, + "loss": 0.5137, + "num_input_tokens_seen": 19932724, + "step": 116 + }, + { + "epoch": 0.030512264088906425, + "loss": 0.4843531847000122, + "loss_ce": 0.14585217833518982, + "loss_iou": NaN, + "loss_num": 0.06787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 19932724, + "step": 116 + }, + { + "epoch": 0.03077530084829355, + "grad_norm": 20.407619855634906, + "learning_rate": 5e-06, + "loss": 0.5188, + "num_input_tokens_seen": 20105196, + "step": 117 + }, + { + "epoch": 0.03077530084829355, + "loss": 0.521455705165863, + "loss_ce": 0.17148011922836304, + "loss_iou": 0.275390625, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 20105196, + "step": 117 + }, + { + "epoch": 0.031038337607680672, + "grad_norm": 17.136634668569023, + "learning_rate": 5e-06, + "loss": 0.529, + "num_input_tokens_seen": 20277468, + "step": 118 + }, + { + "epoch": 0.031038337607680672, + "loss": 0.5619306564331055, + "loss_ce": 0.15323926508426666, + "loss_iou": 0.224609375, + "loss_num": 0.08203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 20277468, + "step": 118 + }, + { + "epoch": 0.0313013743670678, + "grad_norm": 15.540166390852267, + "learning_rate": 5e-06, + "loss": 0.5196, + "num_input_tokens_seen": 20448108, + "step": 119 + }, + { + "epoch": 0.0313013743670678, + "loss": 0.5409432649612427, + "loss_ce": 0.15117278695106506, + "loss_iou": 0.40234375, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 20448108, + "step": 119 + }, + { + "epoch": 0.031564411126454923, + "grad_norm": 12.306363086115368, + "learning_rate": 5e-06, + "loss": 0.4996, + "num_input_tokens_seen": 20620368, + "step": 120 + }, + { + "epoch": 0.031564411126454923, + "loss": 0.49895310401916504, + "loss_ce": 0.13860151171684265, + "loss_iou": 0.32421875, + "loss_num": 0.072265625, + "loss_xval": 0.359375, + "num_input_tokens_seen": 20620368, + "step": 120 + }, + { + "epoch": 0.03182744788584205, + "grad_norm": 13.090987000723873, + "learning_rate": 5e-06, + "loss": 0.4537, + "num_input_tokens_seen": 20792584, + "step": 121 + }, + { + "epoch": 0.03182744788584205, + "loss": 0.47374969720840454, + "loss_ce": 0.14452606439590454, + "loss_iou": 0.3203125, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 20792584, + "step": 121 + }, + { + "epoch": 0.03209048464522917, + "grad_norm": 12.574059354034341, + "learning_rate": 5e-06, + "loss": 0.4245, + "num_input_tokens_seen": 20964948, + "step": 122 + }, + { + "epoch": 0.03209048464522917, + "loss": 0.4678102135658264, + "loss_ce": 0.12357192486524582, + "loss_iou": 0.10400390625, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 20964948, + "step": 122 + }, + { + "epoch": 0.032353521404616295, + "grad_norm": 34.33651980128175, + "learning_rate": 5e-06, + "loss": 0.5624, + "num_input_tokens_seen": 21134744, + "step": 123 + }, + { + "epoch": 0.032353521404616295, + "loss": 0.4556346535682678, + "loss_ce": 0.13812974095344543, + "loss_iou": 0.6640625, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 21134744, + "step": 123 + }, + { + "epoch": 0.03261655816400342, + "grad_norm": 22.118966434798295, + "learning_rate": 5e-06, + "loss": 0.6017, + "num_input_tokens_seen": 21305320, + "step": 124 + }, + { + "epoch": 0.03261655816400342, + "loss": 0.5379438996315002, + "loss_ce": 0.14316853880882263, + "loss_iou": 0.345703125, + "loss_num": 0.0791015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 21305320, + "step": 124 + }, + { + "epoch": 0.03287959492339054, + "grad_norm": 22.565129824621987, + "learning_rate": 5e-06, + "loss": 0.6452, + "num_input_tokens_seen": 21477544, + "step": 125 + }, + { + "epoch": 0.03287959492339054, + "loss": 0.5944963693618774, + "loss_ce": 0.13990655541419983, + "loss_iou": 0.216796875, + "loss_num": 0.0908203125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 21477544, + "step": 125 + }, + { + "epoch": 0.033142631682777667, + "grad_norm": 13.89281994742959, + "learning_rate": 5e-06, + "loss": 0.5755, + "num_input_tokens_seen": 21649600, + "step": 126 + }, + { + "epoch": 0.033142631682777667, + "loss": 0.5376471877098083, + "loss_ce": 0.12211985141038895, + "loss_iou": 0.10888671875, + "loss_num": 0.0830078125, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 21649600, + "step": 126 + }, + { + "epoch": 0.03340566844216479, + "grad_norm": 15.070169412653112, + "learning_rate": 5e-06, + "loss": 0.4704, + "num_input_tokens_seen": 21821564, + "step": 127 + }, + { + "epoch": 0.03340566844216479, + "loss": 0.4526183605194092, + "loss_ce": 0.12595820426940918, + "loss_iou": 0.21484375, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 21821564, + "step": 127 + }, + { + "epoch": 0.033668705201551914, + "grad_norm": 14.62625838728594, + "learning_rate": 5e-06, + "loss": 0.4885, + "num_input_tokens_seen": 21993864, + "step": 128 + }, + { + "epoch": 0.033668705201551914, + "loss": 0.5165751576423645, + "loss_ce": 0.12289837747812271, + "loss_iou": 0.28515625, + "loss_num": 0.07861328125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 21993864, + "step": 128 + }, + { + "epoch": 0.03393174196093904, + "grad_norm": 20.05433315099477, + "learning_rate": 5e-06, + "loss": 0.4754, + "num_input_tokens_seen": 22162596, + "step": 129 + }, + { + "epoch": 0.03393174196093904, + "loss": 0.4400935173034668, + "loss_ce": 0.11538645625114441, + "loss_iou": 0.59375, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 22162596, + "step": 129 + }, + { + "epoch": 0.03419477872032617, + "grad_norm": 17.28797678972647, + "learning_rate": 5e-06, + "loss": 0.5272, + "num_input_tokens_seen": 22334624, + "step": 130 + }, + { + "epoch": 0.03419477872032617, + "loss": 0.5726144313812256, + "loss_ce": 0.1124093234539032, + "loss_iou": 0.17578125, + "loss_num": 0.091796875, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 22334624, + "step": 130 + }, + { + "epoch": 0.03445781547971329, + "grad_norm": 19.611364973231527, + "learning_rate": 5e-06, + "loss": 0.5629, + "num_input_tokens_seen": 22506552, + "step": 131 + }, + { + "epoch": 0.03445781547971329, + "loss": 0.5982115268707275, + "loss_ce": 0.10993030667304993, + "loss_iou": 0.08642578125, + "loss_num": 0.09765625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 22506552, + "step": 131 + }, + { + "epoch": 0.034720852239100417, + "grad_norm": 16.543739789854843, + "learning_rate": 5e-06, + "loss": 0.5381, + "num_input_tokens_seen": 22678808, + "step": 132 + }, + { + "epoch": 0.034720852239100417, + "loss": 0.45885854959487915, + "loss_ce": 0.11523060500621796, + "loss_iou": 0.337890625, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 22678808, + "step": 132 + }, + { + "epoch": 0.03498388899848754, + "grad_norm": 17.674510611725847, + "learning_rate": 5e-06, + "loss": 0.4525, + "num_input_tokens_seen": 22850736, + "step": 133 + }, + { + "epoch": 0.03498388899848754, + "loss": 0.45272764563560486, + "loss_ce": 0.12191709131002426, + "loss_iou": 0.2216796875, + "loss_num": 0.06640625, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 22850736, + "step": 133 + }, + { + "epoch": 0.035246925757874664, + "grad_norm": 15.972341632067714, + "learning_rate": 5e-06, + "loss": 0.4719, + "num_input_tokens_seen": 23020040, + "step": 134 + }, + { + "epoch": 0.035246925757874664, + "loss": 0.5267431139945984, + "loss_ce": 0.11499997228384018, + "loss_iou": 0.232421875, + "loss_num": 0.08251953125, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 23020040, + "step": 134 + }, + { + "epoch": 0.03550996251726179, + "grad_norm": 18.16452757572112, + "learning_rate": 5e-06, + "loss": 0.4878, + "num_input_tokens_seen": 23192048, + "step": 135 + }, + { + "epoch": 0.03550996251726179, + "loss": 0.42464134097099304, + "loss_ce": 0.10701439529657364, + "loss_iou": 0.3671875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 23192048, + "step": 135 + }, + { + "epoch": 0.03577299927664891, + "grad_norm": 16.237572483673375, + "learning_rate": 5e-06, + "loss": 0.4644, + "num_input_tokens_seen": 23360448, + "step": 136 + }, + { + "epoch": 0.03577299927664891, + "loss": 0.4710727334022522, + "loss_ce": 0.0985141396522522, + "loss_iou": 0.263671875, + "loss_num": 0.07470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 23360448, + "step": 136 + }, + { + "epoch": 0.036036036036036036, + "grad_norm": 18.804237772436053, + "learning_rate": 5e-06, + "loss": 0.4291, + "num_input_tokens_seen": 23530300, + "step": 137 + }, + { + "epoch": 0.036036036036036036, + "loss": 0.47930601239204407, + "loss_ce": 0.10406187921762466, + "loss_iou": 0.29296875, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 23530300, + "step": 137 + }, + { + "epoch": 0.03629907279542316, + "grad_norm": 17.355021474463314, + "learning_rate": 5e-06, + "loss": 0.5374, + "num_input_tokens_seen": 23702304, + "step": 138 + }, + { + "epoch": 0.03629907279542316, + "loss": 0.6615355014801025, + "loss_ce": 0.10416243970394135, + "loss_iou": 0.267578125, + "loss_num": 0.111328125, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 23702304, + "step": 138 + }, + { + "epoch": 0.03656210955481028, + "grad_norm": 14.642948575679453, + "learning_rate": 5e-06, + "loss": 0.4417, + "num_input_tokens_seen": 23872132, + "step": 139 + }, + { + "epoch": 0.03656210955481028, + "loss": 0.44469529390335083, + "loss_ce": 0.10735400021076202, + "loss_iou": 0.376953125, + "loss_num": 0.0673828125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 23872132, + "step": 139 + }, + { + "epoch": 0.03682514631419741, + "grad_norm": 14.422554847110444, + "learning_rate": 5e-06, + "loss": 0.4228, + "num_input_tokens_seen": 24044200, + "step": 140 + }, + { + "epoch": 0.03682514631419741, + "loss": 0.4664979577064514, + "loss_ce": 0.10651260614395142, + "loss_iou": 0.275390625, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 24044200, + "step": 140 + }, + { + "epoch": 0.03708818307358453, + "grad_norm": 27.027123187239088, + "learning_rate": 5e-06, + "loss": 0.4106, + "num_input_tokens_seen": 24216464, + "step": 141 + }, + { + "epoch": 0.03708818307358453, + "loss": 0.4140710234642029, + "loss_ce": 0.09876340627670288, + "loss_iou": 0.2275390625, + "loss_num": 0.06298828125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 24216464, + "step": 141 + }, + { + "epoch": 0.037351219832971655, + "grad_norm": 20.30988259642017, + "learning_rate": 5e-06, + "loss": 0.5663, + "num_input_tokens_seen": 24386876, + "step": 142 + }, + { + "epoch": 0.037351219832971655, + "loss": 0.500027596950531, + "loss_ce": 0.091580331325531, + "loss_iou": 0.25390625, + "loss_num": 0.08154296875, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 24386876, + "step": 142 + }, + { + "epoch": 0.03761425659235878, + "grad_norm": 17.151019381080523, + "learning_rate": 5e-06, + "loss": 0.4943, + "num_input_tokens_seen": 24559004, + "step": 143 + }, + { + "epoch": 0.03761425659235878, + "loss": 0.5077698826789856, + "loss_ce": 0.08748182654380798, + "loss_iou": 0.419921875, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 24559004, + "step": 143 + }, + { + "epoch": 0.03787729335174591, + "grad_norm": 13.449145519268138, + "learning_rate": 5e-06, + "loss": 0.4628, + "num_input_tokens_seen": 24729228, + "step": 144 + }, + { + "epoch": 0.03787729335174591, + "loss": 0.477780282497406, + "loss_ce": 0.0906953439116478, + "loss_iou": 0.2353515625, + "loss_num": 0.07763671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 24729228, + "step": 144 + }, + { + "epoch": 0.03814033011113303, + "grad_norm": 15.658548235445116, + "learning_rate": 5e-06, + "loss": 0.4482, + "num_input_tokens_seen": 24901324, + "step": 145 + }, + { + "epoch": 0.03814033011113303, + "loss": 0.460429847240448, + "loss_ce": 0.086406409740448, + "loss_iou": 0.267578125, + "loss_num": 0.07470703125, + "loss_xval": 0.375, + "num_input_tokens_seen": 24901324, + "step": 145 + }, + { + "epoch": 0.03840336687052016, + "grad_norm": 14.557303161182968, + "learning_rate": 5e-06, + "loss": 0.5115, + "num_input_tokens_seen": 25073508, + "step": 146 + }, + { + "epoch": 0.03840336687052016, + "loss": 0.6442508697509766, + "loss_ce": 0.07833293080329895, + "loss_iou": 0.11376953125, + "loss_num": 0.11328125, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 25073508, + "step": 146 + }, + { + "epoch": 0.03866640362990728, + "grad_norm": 17.777843374612736, + "learning_rate": 5e-06, + "loss": 0.4213, + "num_input_tokens_seen": 25245752, + "step": 147 + }, + { + "epoch": 0.03866640362990728, + "loss": 0.40006011724472046, + "loss_ce": 0.08243316411972046, + "loss_iou": 0.419921875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 25245752, + "step": 147 + }, + { + "epoch": 0.038929440389294405, + "grad_norm": 13.44125897811313, + "learning_rate": 5e-06, + "loss": 0.4791, + "num_input_tokens_seen": 25418156, + "step": 148 + }, + { + "epoch": 0.038929440389294405, + "loss": 0.5981014966964722, + "loss_ce": 0.08491791784763336, + "loss_iou": 0.18359375, + "loss_num": 0.1025390625, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 25418156, + "step": 148 + }, + { + "epoch": 0.03919247714868153, + "grad_norm": 11.76514004625091, + "learning_rate": 5e-06, + "loss": 0.4001, + "num_input_tokens_seen": 25589904, + "step": 149 + }, + { + "epoch": 0.03919247714868153, + "loss": 0.35078275203704834, + "loss_ce": 0.08393705636262894, + "loss_iou": 0.267578125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 25589904, + "step": 149 + }, + { + "epoch": 0.03945551390806865, + "grad_norm": 12.03724933893627, + "learning_rate": 5e-06, + "loss": 0.4147, + "num_input_tokens_seen": 25762000, + "step": 150 + }, + { + "epoch": 0.03945551390806865, + "loss": 0.4596315622329712, + "loss_ce": 0.0690065547823906, + "loss_iou": 0.376953125, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 25762000, + "step": 150 + }, + { + "epoch": 0.039718550667455776, + "grad_norm": 13.05163620933256, + "learning_rate": 5e-06, + "loss": 0.398, + "num_input_tokens_seen": 25934528, + "step": 151 + }, + { + "epoch": 0.039718550667455776, + "loss": 0.38974958658218384, + "loss_ce": 0.07322127372026443, + "loss_iou": 0.1591796875, + "loss_num": 0.0634765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 25934528, + "step": 151 + }, + { + "epoch": 0.0399815874268429, + "grad_norm": 23.58601457584165, + "learning_rate": 5e-06, + "loss": 0.3897, + "num_input_tokens_seen": 26104148, + "step": 152 + }, + { + "epoch": 0.0399815874268429, + "loss": 0.3437004089355469, + "loss_ce": 0.07172775268554688, + "loss_iou": 0.326171875, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 26104148, + "step": 152 + }, + { + "epoch": 0.040244624186230024, + "grad_norm": 18.61300216202182, + "learning_rate": 5e-06, + "loss": 0.4682, + "num_input_tokens_seen": 26276256, + "step": 153 + }, + { + "epoch": 0.040244624186230024, + "loss": 0.43069130182266235, + "loss_ce": 0.06960733234882355, + "loss_iou": 0.419921875, + "loss_num": 0.072265625, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 26276256, + "step": 153 + }, + { + "epoch": 0.04050766094561715, + "grad_norm": 14.116759080889954, + "learning_rate": 5e-06, + "loss": 0.4744, + "num_input_tokens_seen": 26448288, + "step": 154 + }, + { + "epoch": 0.04050766094561715, + "loss": 0.5761303901672363, + "loss_ce": 0.0805249810218811, + "loss_iou": 0.333984375, + "loss_num": 0.09912109375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 26448288, + "step": 154 + }, + { + "epoch": 0.04077069770500427, + "grad_norm": 10.146662886533779, + "learning_rate": 5e-06, + "loss": 0.3974, + "num_input_tokens_seen": 26620604, + "step": 155 + }, + { + "epoch": 0.04077069770500427, + "loss": 0.3559998869895935, + "loss_ce": 0.07047741115093231, + "loss_iou": 0.310546875, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 26620604, + "step": 155 + }, + { + "epoch": 0.041033734464391396, + "grad_norm": 11.74557828439257, + "learning_rate": 5e-06, + "loss": 0.3616, + "num_input_tokens_seen": 26789104, + "step": 156 + }, + { + "epoch": 0.041033734464391396, + "loss": 0.3762925863265991, + "loss_ce": 0.06867540627717972, + "loss_iou": 0.2138671875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 26789104, + "step": 156 + }, + { + "epoch": 0.041296771223778526, + "grad_norm": 11.967339752616043, + "learning_rate": 5e-06, + "loss": 0.3655, + "num_input_tokens_seen": 26961408, + "step": 157 + }, + { + "epoch": 0.041296771223778526, + "loss": 0.36129921674728394, + "loss_ce": 0.060029659420251846, + "loss_iou": 0.369140625, + "loss_num": 0.060302734375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 26961408, + "step": 157 + }, + { + "epoch": 0.04155980798316565, + "grad_norm": 16.462145017062344, + "learning_rate": 5e-06, + "loss": 0.3599, + "num_input_tokens_seen": 27133600, + "step": 158 + }, + { + "epoch": 0.04155980798316565, + "loss": 0.4060816764831543, + "loss_ce": 0.06208755075931549, + "loss_iou": 0.291015625, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 27133600, + "step": 158 + }, + { + "epoch": 0.041822844742552774, + "grad_norm": 18.2471111237373, + "learning_rate": 5e-06, + "loss": 0.4173, + "num_input_tokens_seen": 27305676, + "step": 159 + }, + { + "epoch": 0.041822844742552774, + "loss": 0.3555372357368469, + "loss_ce": 0.07025889307260513, + "loss_iou": 0.271484375, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 27305676, + "step": 159 + }, + { + "epoch": 0.0420858815019399, + "grad_norm": 22.735392677918405, + "learning_rate": 5e-06, + "loss": 0.5962, + "num_input_tokens_seen": 27477824, + "step": 160 + }, + { + "epoch": 0.0420858815019399, + "loss": 0.6341134309768677, + "loss_ce": 0.06184776872396469, + "loss_iou": 0.333984375, + "loss_num": 0.1142578125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 27477824, + "step": 160 + }, + { + "epoch": 0.04234891826132702, + "grad_norm": 13.104760953447814, + "learning_rate": 5e-06, + "loss": 0.4091, + "num_input_tokens_seen": 27650024, + "step": 161 + }, + { + "epoch": 0.04234891826132702, + "loss": 0.4603307843208313, + "loss_ce": 0.059085663408041, + "loss_iou": 0.275390625, + "loss_num": 0.080078125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 27650024, + "step": 161 + }, + { + "epoch": 0.042611955020714146, + "grad_norm": 15.303759630511669, + "learning_rate": 5e-06, + "loss": 0.4433, + "num_input_tokens_seen": 27822408, + "step": 162 + }, + { + "epoch": 0.042611955020714146, + "loss": 0.43349525332450867, + "loss_ce": 0.06215735524892807, + "loss_iou": 0.23828125, + "loss_num": 0.07421875, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 27822408, + "step": 162 + }, + { + "epoch": 0.04287499178010127, + "grad_norm": 14.84522191071012, + "learning_rate": 5e-06, + "loss": 0.3769, + "num_input_tokens_seen": 27992440, + "step": 163 + }, + { + "epoch": 0.04287499178010127, + "loss": 0.34604543447494507, + "loss_ce": 0.05673878639936447, + "loss_iou": 0.24609375, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 27992440, + "step": 163 + }, + { + "epoch": 0.04313802853948839, + "grad_norm": 17.265704948492278, + "learning_rate": 5e-06, + "loss": 0.4298, + "num_input_tokens_seen": 28164464, + "step": 164 + }, + { + "epoch": 0.04313802853948839, + "loss": 0.48720821738243103, + "loss_ce": 0.05849727243185043, + "loss_iou": 0.2890625, + "loss_num": 0.0859375, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 28164464, + "step": 164 + }, + { + "epoch": 0.04340106529887552, + "grad_norm": 13.53959312060916, + "learning_rate": 5e-06, + "loss": 0.3991, + "num_input_tokens_seen": 28334436, + "step": 165 + }, + { + "epoch": 0.04340106529887552, + "loss": 0.3757469058036804, + "loss_ce": 0.05201642960309982, + "loss_iou": 0.1220703125, + "loss_num": 0.064453125, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 28334436, + "step": 165 + }, + { + "epoch": 0.04366410205826264, + "grad_norm": 12.759789778648042, + "learning_rate": 5e-06, + "loss": 0.3563, + "num_input_tokens_seen": 28506772, + "step": 166 + }, + { + "epoch": 0.04366410205826264, + "loss": 0.3605605363845825, + "loss_ce": 0.05233298987150192, + "loss_iou": 0.341796875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 28506772, + "step": 166 + }, + { + "epoch": 0.043927138817649765, + "grad_norm": 14.640887630603752, + "learning_rate": 5e-06, + "loss": 0.3881, + "num_input_tokens_seen": 28676980, + "step": 167 + }, + { + "epoch": 0.043927138817649765, + "loss": 0.3610857427120209, + "loss_ce": 0.056642383337020874, + "loss_iou": 0.240234375, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 28676980, + "step": 167 + }, + { + "epoch": 0.04419017557703689, + "grad_norm": 12.095247845748437, + "learning_rate": 5e-06, + "loss": 0.3742, + "num_input_tokens_seen": 28849236, + "step": 168 + }, + { + "epoch": 0.04419017557703689, + "loss": 0.40532949566841125, + "loss_ce": 0.05388907343149185, + "loss_iou": 0.3203125, + "loss_num": 0.0703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 28849236, + "step": 168 + }, + { + "epoch": 0.04445321233642401, + "grad_norm": 10.33587361024043, + "learning_rate": 5e-06, + "loss": 0.3837, + "num_input_tokens_seen": 29021180, + "step": 169 + }, + { + "epoch": 0.04445321233642401, + "loss": 0.3248461186885834, + "loss_ce": 0.046464771032333374, + "loss_iou": 0.1787109375, + "loss_num": 0.0556640625, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 29021180, + "step": 169 + }, + { + "epoch": 0.044716249095811136, + "grad_norm": 10.708625450816182, + "learning_rate": 5e-06, + "loss": 0.3951, + "num_input_tokens_seen": 29191532, + "step": 170 + }, + { + "epoch": 0.044716249095811136, + "loss": 0.4214698076248169, + "loss_ce": 0.052573323249816895, + "loss_iou": 0.33203125, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 29191532, + "step": 170 + }, + { + "epoch": 0.04497928585519827, + "grad_norm": 14.817164885231227, + "learning_rate": 5e-06, + "loss": 0.3936, + "num_input_tokens_seen": 29363536, + "step": 171 + }, + { + "epoch": 0.04497928585519827, + "loss": 0.40210169553756714, + "loss_ce": 0.04602260887622833, + "loss_iou": 0.3046875, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 29363536, + "step": 171 + }, + { + "epoch": 0.04524232261458539, + "grad_norm": 11.284174351804506, + "learning_rate": 5e-06, + "loss": 0.3753, + "num_input_tokens_seen": 29533848, + "step": 172 + }, + { + "epoch": 0.04524232261458539, + "loss": 0.3555999994277954, + "loss_ce": 0.04358828812837601, + "loss_iou": 0.2890625, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 29533848, + "step": 172 + }, + { + "epoch": 0.045505359373972515, + "grad_norm": 9.66534144364822, + "learning_rate": 5e-06, + "loss": 0.307, + "num_input_tokens_seen": 29706424, + "step": 173 + }, + { + "epoch": 0.045505359373972515, + "loss": 0.3183665871620178, + "loss_ce": 0.04297598451375961, + "loss_iou": 0.35546875, + "loss_num": 0.05517578125, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 29706424, + "step": 173 + }, + { + "epoch": 0.04576839613335964, + "grad_norm": 15.933225853148851, + "learning_rate": 5e-06, + "loss": 0.3645, + "num_input_tokens_seen": 29878348, + "step": 174 + }, + { + "epoch": 0.04576839613335964, + "loss": 0.24233956634998322, + "loss_ce": 0.051421597599983215, + "loss_iou": 0.609375, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 29878348, + "step": 174 + }, + { + "epoch": 0.04603143289274676, + "grad_norm": 15.32149954029387, + "learning_rate": 5e-06, + "loss": 0.403, + "num_input_tokens_seen": 30050700, + "step": 175 + }, + { + "epoch": 0.04603143289274676, + "loss": 0.41682887077331543, + "loss_ce": 0.047810301184654236, + "loss_iou": 0.2734375, + "loss_num": 0.07373046875, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 30050700, + "step": 175 + }, + { + "epoch": 0.046294469652133886, + "grad_norm": 11.301125755600708, + "learning_rate": 5e-06, + "loss": 0.3852, + "num_input_tokens_seen": 30222872, + "step": 176 + }, + { + "epoch": 0.046294469652133886, + "loss": 0.35361334681510925, + "loss_ce": 0.04599615931510925, + "loss_iou": 0.341796875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 30222872, + "step": 176 + }, + { + "epoch": 0.04655750641152101, + "grad_norm": 15.04642729797989, + "learning_rate": 5e-06, + "loss": 0.3569, + "num_input_tokens_seen": 30394820, + "step": 177 + }, + { + "epoch": 0.04655750641152101, + "loss": 0.31377077102661133, + "loss_ce": 0.04106569290161133, + "loss_iou": 0.2109375, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 30394820, + "step": 177 + }, + { + "epoch": 0.046820543170908134, + "grad_norm": 14.180764275493368, + "learning_rate": 5e-06, + "loss": 0.3856, + "num_input_tokens_seen": 30567088, + "step": 178 + }, + { + "epoch": 0.046820543170908134, + "loss": 0.3383968770503998, + "loss_ce": 0.04188808798789978, + "loss_iou": 0.36328125, + "loss_num": 0.059326171875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 30567088, + "step": 178 + }, + { + "epoch": 0.04708357993029526, + "grad_norm": 11.43960593758222, + "learning_rate": 5e-06, + "loss": 0.3723, + "num_input_tokens_seen": 30739392, + "step": 179 + }, + { + "epoch": 0.04708357993029526, + "loss": 0.4008994996547699, + "loss_ce": 0.0434776172041893, + "loss_iou": 0.2451171875, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 30739392, + "step": 179 + }, + { + "epoch": 0.04734661668968238, + "grad_norm": 10.118856123139876, + "learning_rate": 5e-06, + "loss": 0.3842, + "num_input_tokens_seen": 30911592, + "step": 180 + }, + { + "epoch": 0.04734661668968238, + "loss": 0.33974575996398926, + "loss_ce": 0.03798792511224747, + "loss_iou": 0.49609375, + "loss_num": 0.060546875, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 30911592, + "step": 180 + }, + { + "epoch": 0.047609653449069506, + "grad_norm": 8.662737873806904, + "learning_rate": 5e-06, + "loss": 0.2718, + "num_input_tokens_seen": 31083320, + "step": 181 + }, + { + "epoch": 0.047609653449069506, + "loss": 0.24805772304534912, + "loss_ce": 0.034434687346220016, + "loss_iou": 0.330078125, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 31083320, + "step": 181 + }, + { + "epoch": 0.04787269020845663, + "grad_norm": 9.949547683003779, + "learning_rate": 5e-06, + "loss": 0.3404, + "num_input_tokens_seen": 31255136, + "step": 182 + }, + { + "epoch": 0.04787269020845663, + "loss": 0.3085269331932068, + "loss_ce": 0.03093905746936798, + "loss_iou": 0.61328125, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 31255136, + "step": 182 + }, + { + "epoch": 0.04813572696784375, + "grad_norm": 11.304350377916183, + "learning_rate": 5e-06, + "loss": 0.3478, + "num_input_tokens_seen": 31427380, + "step": 183 + }, + { + "epoch": 0.04813572696784375, + "loss": 0.2798244059085846, + "loss_ce": 0.037881046533584595, + "loss_iou": 0.380859375, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 31427380, + "step": 183 + }, + { + "epoch": 0.04839876372723088, + "grad_norm": 20.65800125552813, + "learning_rate": 5e-06, + "loss": 0.4261, + "num_input_tokens_seen": 31599344, + "step": 184 + }, + { + "epoch": 0.04839876372723088, + "loss": 0.37513959407806396, + "loss_ce": 0.03566203638911247, + "loss_iou": 0.59375, + "loss_num": 0.06787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 31599344, + "step": 184 + }, + { + "epoch": 0.04866180048661801, + "grad_norm": 21.767539584052432, + "learning_rate": 5e-06, + "loss": 0.4585, + "num_input_tokens_seen": 31771536, + "step": 185 + }, + { + "epoch": 0.04866180048661801, + "loss": 0.437938392162323, + "loss_ce": 0.031200092285871506, + "loss_iou": 0.275390625, + "loss_num": 0.08154296875, + "loss_xval": 0.40625, + "num_input_tokens_seen": 31771536, + "step": 185 + }, + { + "epoch": 0.04892483724600513, + "grad_norm": 19.478046485138204, + "learning_rate": 5e-06, + "loss": 0.5036, + "num_input_tokens_seen": 31943688, + "step": 186 + }, + { + "epoch": 0.04892483724600513, + "loss": 0.4687976837158203, + "loss_ce": 0.03593636304140091, + "loss_iou": 0.201171875, + "loss_num": 0.08642578125, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 31943688, + "step": 186 + }, + { + "epoch": 0.049187874005392256, + "grad_norm": 11.185610516547799, + "learning_rate": 5e-06, + "loss": 0.3628, + "num_input_tokens_seen": 32116040, + "step": 187 + }, + { + "epoch": 0.049187874005392256, + "loss": 0.39327770471572876, + "loss_ce": 0.03439096361398697, + "loss_iou": 0.36328125, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 32116040, + "step": 187 + }, + { + "epoch": 0.04945091076477938, + "grad_norm": 11.155969289477222, + "learning_rate": 5e-06, + "loss": 0.2974, + "num_input_tokens_seen": 32288500, + "step": 188 + }, + { + "epoch": 0.04945091076477938, + "loss": 0.31713372468948364, + "loss_ce": 0.028681576251983643, + "loss_iou": 0.31640625, + "loss_num": 0.0576171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 32288500, + "step": 188 + }, + { + "epoch": 0.0497139475241665, + "grad_norm": 11.834615664464208, + "learning_rate": 5e-06, + "loss": 0.3369, + "num_input_tokens_seen": 32460772, + "step": 189 + }, + { + "epoch": 0.0497139475241665, + "loss": 0.3497753441333771, + "loss_ce": 0.02921871840953827, + "loss_iou": 0.484375, + "loss_num": 0.06396484375, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 32460772, + "step": 189 + }, + { + "epoch": 0.04997698428355363, + "grad_norm": 10.434886006122214, + "learning_rate": 5e-06, + "loss": 0.3387, + "num_input_tokens_seen": 32632848, + "step": 190 + }, + { + "epoch": 0.04997698428355363, + "loss": 0.36610347032546997, + "loss_ce": 0.029921812936663628, + "loss_iou": 0.38671875, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 32632848, + "step": 190 + }, + { + "epoch": 0.05024002104294075, + "grad_norm": 9.800996766479896, + "learning_rate": 5e-06, + "loss": 0.3599, + "num_input_tokens_seen": 32805200, + "step": 191 + }, + { + "epoch": 0.05024002104294075, + "loss": 0.3484704792499542, + "loss_ce": 0.032674580812454224, + "loss_iou": 0.466796875, + "loss_num": 0.06298828125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 32805200, + "step": 191 + }, + { + "epoch": 0.050503057802327875, + "grad_norm": 9.1652653754744, + "learning_rate": 5e-06, + "loss": 0.3086, + "num_input_tokens_seen": 32977420, + "step": 192 + }, + { + "epoch": 0.050503057802327875, + "loss": 0.3598284125328064, + "loss_ce": 0.027797123417258263, + "loss_iou": 0.283203125, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 32977420, + "step": 192 + }, + { + "epoch": 0.050766094561715, + "grad_norm": 11.082061385505076, + "learning_rate": 5e-06, + "loss": 0.3206, + "num_input_tokens_seen": 33149792, + "step": 193 + }, + { + "epoch": 0.050766094561715, + "loss": 0.3082242012023926, + "loss_ce": 0.035763248801231384, + "loss_iou": 0.41796875, + "loss_num": 0.054443359375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 33149792, + "step": 193 + }, + { + "epoch": 0.05102913132110212, + "grad_norm": 10.154212185692936, + "learning_rate": 5e-06, + "loss": 0.3308, + "num_input_tokens_seen": 33321580, + "step": 194 + }, + { + "epoch": 0.05102913132110212, + "loss": 0.3002238869667053, + "loss_ce": 0.02568773366510868, + "loss_iou": 0.36328125, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 33321580, + "step": 194 + }, + { + "epoch": 0.051292168080489246, + "grad_norm": 12.486303050942563, + "learning_rate": 5e-06, + "loss": 0.3875, + "num_input_tokens_seen": 33493632, + "step": 195 + }, + { + "epoch": 0.051292168080489246, + "loss": 0.35659241676330566, + "loss_ce": 0.030542613938450813, + "loss_iou": 0.455078125, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 33493632, + "step": 195 + }, + { + "epoch": 0.05155520483987637, + "grad_norm": 13.41419397842875, + "learning_rate": 5e-06, + "loss": 0.3335, + "num_input_tokens_seen": 33665544, + "step": 196 + }, + { + "epoch": 0.05155520483987637, + "loss": 0.36018121242523193, + "loss_ce": 0.02436576411128044, + "loss_iou": 0.37890625, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 33665544, + "step": 196 + }, + { + "epoch": 0.051818241599263494, + "grad_norm": 19.34380060773709, + "learning_rate": 5e-06, + "loss": 0.3472, + "num_input_tokens_seen": 33837492, + "step": 197 + }, + { + "epoch": 0.051818241599263494, + "loss": 0.37059885263442993, + "loss_ce": 0.024651601910591125, + "loss_iou": 0.341796875, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 33837492, + "step": 197 + }, + { + "epoch": 0.052081278358650625, + "grad_norm": 12.626529129648052, + "learning_rate": 5e-06, + "loss": 0.3657, + "num_input_tokens_seen": 34009512, + "step": 198 + }, + { + "epoch": 0.052081278358650625, + "loss": 0.29348307847976685, + "loss_ce": 0.028102193027734756, + "loss_iou": 0.376953125, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 34009512, + "step": 198 + }, + { + "epoch": 0.05234431511803775, + "grad_norm": 10.467016024509215, + "learning_rate": 5e-06, + "loss": 0.3464, + "num_input_tokens_seen": 34181600, + "step": 199 + }, + { + "epoch": 0.05234431511803775, + "loss": 0.30455371737480164, + "loss_ce": 0.022815439850091934, + "loss_iou": 0.400390625, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 34181600, + "step": 199 + }, + { + "epoch": 0.05260735187742487, + "grad_norm": 13.453464722997529, + "learning_rate": 5e-06, + "loss": 0.3248, + "num_input_tokens_seen": 34352348, + "step": 200 + }, + { + "epoch": 0.05260735187742487, + "loss": 0.45307034254074097, + "loss_ce": 0.020819369703531265, + "loss_iou": 0.3515625, + "loss_num": 0.08642578125, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 34352348, + "step": 200 + }, + { + "epoch": 0.052870388636811996, + "grad_norm": 20.491451966511576, + "learning_rate": 5e-06, + "loss": 0.4317, + "num_input_tokens_seen": 34524600, + "step": 201 + }, + { + "epoch": 0.052870388636811996, + "loss": 0.41993263363838196, + "loss_ce": 0.024668946862220764, + "loss_iou": 0.51171875, + "loss_num": 0.0791015625, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 34524600, + "step": 201 + }, + { + "epoch": 0.05313342539619912, + "grad_norm": 17.643454599965338, + "learning_rate": 5e-06, + "loss": 0.4565, + "num_input_tokens_seen": 34696600, + "step": 202 + }, + { + "epoch": 0.05313342539619912, + "loss": 0.5106714963912964, + "loss_ce": 0.022634411230683327, + "loss_iou": 0.0888671875, + "loss_num": 0.09765625, + "loss_xval": 0.48828125, + "num_input_tokens_seen": 34696600, + "step": 202 + }, + { + "epoch": 0.053396462155586244, + "grad_norm": 13.599251013665107, + "learning_rate": 5e-06, + "loss": 0.3165, + "num_input_tokens_seen": 34868596, + "step": 203 + }, + { + "epoch": 0.053396462155586244, + "loss": 0.3383103609085083, + "loss_ce": 0.026054508984088898, + "loss_iou": 0.26953125, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 34868596, + "step": 203 + }, + { + "epoch": 0.05365949891497337, + "grad_norm": 15.289442638831803, + "learning_rate": 5e-06, + "loss": 0.3217, + "num_input_tokens_seen": 35038040, + "step": 204 + }, + { + "epoch": 0.05365949891497337, + "loss": 0.306024968624115, + "loss_ce": 0.022089410573244095, + "loss_iou": 0.39453125, + "loss_num": 0.056884765625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 35038040, + "step": 204 + }, + { + "epoch": 0.05392253567436049, + "grad_norm": 10.407634168683858, + "learning_rate": 5e-06, + "loss": 0.3039, + "num_input_tokens_seen": 35208292, + "step": 205 + }, + { + "epoch": 0.05392253567436049, + "loss": 0.35352200269699097, + "loss_ce": 0.026007331907749176, + "loss_iou": 0.423828125, + "loss_num": 0.0654296875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 35208292, + "step": 205 + }, + { + "epoch": 0.054185572433747616, + "grad_norm": 11.313536084612993, + "learning_rate": 5e-06, + "loss": 0.3141, + "num_input_tokens_seen": 35377388, + "step": 206 + }, + { + "epoch": 0.054185572433747616, + "loss": 0.27950865030288696, + "loss_ce": 0.02438168227672577, + "loss_iou": 0.431640625, + "loss_num": 0.051025390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 35377388, + "step": 206 + }, + { + "epoch": 0.05444860919313474, + "grad_norm": 11.696274899578064, + "learning_rate": 5e-06, + "loss": 0.3578, + "num_input_tokens_seen": 35547812, + "step": 207 + }, + { + "epoch": 0.05444860919313474, + "loss": 0.3501763641834259, + "loss_ce": 0.0216851644217968, + "loss_iou": 0.53125, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 35547812, + "step": 207 + }, + { + "epoch": 0.05471164595252186, + "grad_norm": 10.19535036839742, + "learning_rate": 5e-06, + "loss": 0.2938, + "num_input_tokens_seen": 35720256, + "step": 208 + }, + { + "epoch": 0.05471164595252186, + "loss": 0.28618597984313965, + "loss_ce": 0.023246534168720245, + "loss_iou": 0.421875, + "loss_num": 0.052490234375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 35720256, + "step": 208 + }, + { + "epoch": 0.05497468271190899, + "grad_norm": 10.108238117808526, + "learning_rate": 5e-06, + "loss": 0.2815, + "num_input_tokens_seen": 35892100, + "step": 209 + }, + { + "epoch": 0.05497468271190899, + "loss": 0.28424978256225586, + "loss_ce": 0.025460712611675262, + "loss_iou": 0.3125, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 35892100, + "step": 209 + }, + { + "epoch": 0.05523771947129611, + "grad_norm": 9.765870248571328, + "learning_rate": 5e-06, + "loss": 0.3151, + "num_input_tokens_seen": 36064296, + "step": 210 + }, + { + "epoch": 0.05523771947129611, + "loss": 0.30667591094970703, + "loss_ce": 0.02457140013575554, + "loss_iou": 0.373046875, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 36064296, + "step": 210 + }, + { + "epoch": 0.055500756230683235, + "grad_norm": 8.667238391078952, + "learning_rate": 5e-06, + "loss": 0.3062, + "num_input_tokens_seen": 36236356, + "step": 211 + }, + { + "epoch": 0.055500756230683235, + "loss": 0.32360944151878357, + "loss_ce": 0.021241270005702972, + "loss_iou": 0.318359375, + "loss_num": 0.060546875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 36236356, + "step": 211 + }, + { + "epoch": 0.055763792990070365, + "grad_norm": 7.623662373992362, + "learning_rate": 5e-06, + "loss": 0.2441, + "num_input_tokens_seen": 36406508, + "step": 212 + }, + { + "epoch": 0.055763792990070365, + "loss": 0.2571391761302948, + "loss_ce": 0.022825222462415695, + "loss_iou": 0.50390625, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 36406508, + "step": 212 + }, + { + "epoch": 0.05602682974945749, + "grad_norm": 9.522758823829298, + "learning_rate": 5e-06, + "loss": 0.263, + "num_input_tokens_seen": 36578384, + "step": 213 + }, + { + "epoch": 0.05602682974945749, + "loss": 0.20427684485912323, + "loss_ce": 0.022819336503744125, + "loss_iou": 0.32421875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 36578384, + "step": 213 + }, + { + "epoch": 0.05628986650884461, + "grad_norm": 14.400118985818274, + "learning_rate": 5e-06, + "loss": 0.293, + "num_input_tokens_seen": 36750552, + "step": 214 + }, + { + "epoch": 0.05628986650884461, + "loss": 0.29619529843330383, + "loss_ce": 0.021537089720368385, + "loss_iou": 0.44140625, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 36750552, + "step": 214 + }, + { + "epoch": 0.05655290326823174, + "grad_norm": 16.206900517822692, + "learning_rate": 5e-06, + "loss": 0.3487, + "num_input_tokens_seen": 36922680, + "step": 215 + }, + { + "epoch": 0.05655290326823174, + "loss": 0.37616318464279175, + "loss_ce": 0.018497148528695107, + "loss_iou": 0.224609375, + "loss_num": 0.07177734375, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 36922680, + "step": 215 + }, + { + "epoch": 0.05681594002761886, + "grad_norm": 16.214529098872383, + "learning_rate": 5e-06, + "loss": 0.3418, + "num_input_tokens_seen": 37095056, + "step": 216 + }, + { + "epoch": 0.05681594002761886, + "loss": 0.37693583965301514, + "loss_ce": 0.018171211704611778, + "loss_iou": 0.208984375, + "loss_num": 0.07177734375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 37095056, + "step": 216 + }, + { + "epoch": 0.057078976787005985, + "grad_norm": 11.604435704258858, + "learning_rate": 5e-06, + "loss": 0.3517, + "num_input_tokens_seen": 37267308, + "step": 217 + }, + { + "epoch": 0.057078976787005985, + "loss": 0.37936773896217346, + "loss_ce": 0.022922419011592865, + "loss_iou": 0.55078125, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 37267308, + "step": 217 + }, + { + "epoch": 0.05734201354639311, + "grad_norm": 25.63677528330297, + "learning_rate": 5e-06, + "loss": 0.2984, + "num_input_tokens_seen": 37434780, + "step": 218 + }, + { + "epoch": 0.05734201354639311, + "loss": 0.31407007575035095, + "loss_ce": 0.02378687635064125, + "loss_iou": 0.455078125, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 37434780, + "step": 218 + }, + { + "epoch": 0.05760505030578023, + "grad_norm": 11.86583190083979, + "learning_rate": 5e-06, + "loss": 0.3346, + "num_input_tokens_seen": 37605076, + "step": 219 + }, + { + "epoch": 0.05760505030578023, + "loss": 0.31814104318618774, + "loss_ce": 0.02236468717455864, + "loss_iou": 0.279296875, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 37605076, + "step": 219 + }, + { + "epoch": 0.057868087065167356, + "grad_norm": 14.940327825772103, + "learning_rate": 5e-06, + "loss": 0.3733, + "num_input_tokens_seen": 37777704, + "step": 220 + }, + { + "epoch": 0.057868087065167356, + "loss": 0.37386685609817505, + "loss_ce": 0.020107077434659004, + "loss_iou": 0.443359375, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 37777704, + "step": 220 + }, + { + "epoch": 0.05813112382455448, + "grad_norm": 13.873681072861537, + "learning_rate": 5e-06, + "loss": 0.3326, + "num_input_tokens_seen": 37948064, + "step": 221 + }, + { + "epoch": 0.05813112382455448, + "loss": 0.3946155905723572, + "loss_ce": 0.019127311185002327, + "loss_iou": 0.18359375, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 37948064, + "step": 221 + }, + { + "epoch": 0.058394160583941604, + "grad_norm": 13.253297702701516, + "learning_rate": 5e-06, + "loss": 0.3428, + "num_input_tokens_seen": 38118984, + "step": 222 + }, + { + "epoch": 0.058394160583941604, + "loss": 0.3087061643600464, + "loss_ce": 0.018300898373126984, + "loss_iou": 0.18359375, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 38118984, + "step": 222 + }, + { + "epoch": 0.05865719734332873, + "grad_norm": 11.082820014401106, + "learning_rate": 5e-06, + "loss": 0.2927, + "num_input_tokens_seen": 38291092, + "step": 223 + }, + { + "epoch": 0.05865719734332873, + "loss": 0.2926178574562073, + "loss_ce": 0.017837589606642723, + "loss_iou": 0.337890625, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 38291092, + "step": 223 + }, + { + "epoch": 0.05892023410271585, + "grad_norm": 13.606311852622206, + "learning_rate": 5e-06, + "loss": 0.2877, + "num_input_tokens_seen": 38461712, + "step": 224 + }, + { + "epoch": 0.05892023410271585, + "loss": 0.2914125323295593, + "loss_ce": 0.016632266342639923, + "loss_iou": 0.29296875, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 38461712, + "step": 224 + }, + { + "epoch": 0.059183270862102975, + "grad_norm": 12.858292254216362, + "learning_rate": 5e-06, + "loss": 0.3576, + "num_input_tokens_seen": 38633936, + "step": 225 + }, + { + "epoch": 0.059183270862102975, + "loss": 0.36546608805656433, + "loss_ce": 0.015124273486435413, + "loss_iou": 0.2890625, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 38633936, + "step": 225 + }, + { + "epoch": 0.059446307621490106, + "grad_norm": 15.356311927838101, + "learning_rate": 5e-06, + "loss": 0.2591, + "num_input_tokens_seen": 38802732, + "step": 226 + }, + { + "epoch": 0.059446307621490106, + "loss": 0.24970154464244843, + "loss_ce": 0.01520447339862585, + "loss_iou": 0.412109375, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 38802732, + "step": 226 + }, + { + "epoch": 0.05970934438087723, + "grad_norm": 33.523690387446095, + "learning_rate": 5e-06, + "loss": 0.3181, + "num_input_tokens_seen": 38975160, + "step": 227 + }, + { + "epoch": 0.05970934438087723, + "loss": 0.3422040641307831, + "loss_ce": 0.015910113230347633, + "loss_iou": 0.33984375, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 38975160, + "step": 227 + }, + { + "epoch": 0.059972381140264354, + "grad_norm": 8.426822348321474, + "learning_rate": 5e-06, + "loss": 0.2888, + "num_input_tokens_seen": 39147352, + "step": 228 + }, + { + "epoch": 0.059972381140264354, + "loss": 0.29814714193344116, + "loss_ce": 0.014455747790634632, + "loss_iou": 0.3359375, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 39147352, + "step": 228 + }, + { + "epoch": 0.06023541789965148, + "grad_norm": 9.384703696891318, + "learning_rate": 5e-06, + "loss": 0.2861, + "num_input_tokens_seen": 39319476, + "step": 229 + }, + { + "epoch": 0.06023541789965148, + "loss": 0.2668268382549286, + "loss_ce": 0.020000681281089783, + "loss_iou": 0.474609375, + "loss_num": 0.04931640625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 39319476, + "step": 229 + }, + { + "epoch": 0.0604984546590386, + "grad_norm": 18.427622180632213, + "learning_rate": 5e-06, + "loss": 0.3917, + "num_input_tokens_seen": 39491664, + "step": 230 + }, + { + "epoch": 0.0604984546590386, + "loss": 0.390484094619751, + "loss_ce": 0.015484098345041275, + "loss_iou": 0.875, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 39491664, + "step": 230 + }, + { + "epoch": 0.060761491418425725, + "grad_norm": 10.977429362928614, + "learning_rate": 5e-06, + "loss": 0.3949, + "num_input_tokens_seen": 39663560, + "step": 231 + }, + { + "epoch": 0.060761491418425725, + "loss": 0.334339439868927, + "loss_ce": 0.015735914930701256, + "loss_iou": 0.283203125, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 39663560, + "step": 231 + }, + { + "epoch": 0.06102452817781285, + "grad_norm": 7.478546882380691, + "learning_rate": 5e-06, + "loss": 0.2993, + "num_input_tokens_seen": 39835708, + "step": 232 + }, + { + "epoch": 0.06102452817781285, + "loss": 0.2514447867870331, + "loss_ce": 0.014872531406581402, + "loss_iou": 0.45703125, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 39835708, + "step": 232 + }, + { + "epoch": 0.06128756493719997, + "grad_norm": 7.3196146291196635, + "learning_rate": 5e-06, + "loss": 0.2693, + "num_input_tokens_seen": 40008008, + "step": 233 + }, + { + "epoch": 0.06128756493719997, + "loss": 0.3245670795440674, + "loss_ce": 0.01768231764435768, + "loss_iou": 0.380859375, + "loss_num": 0.0615234375, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 40008008, + "step": 233 + }, + { + "epoch": 0.0615506016965871, + "grad_norm": 10.123913601017227, + "learning_rate": 5e-06, + "loss": 0.2834, + "num_input_tokens_seen": 40179816, + "step": 234 + }, + { + "epoch": 0.0615506016965871, + "loss": 0.2691105008125305, + "loss_ce": 0.01740150898694992, + "loss_iou": 0.462890625, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 40179816, + "step": 234 + }, + { + "epoch": 0.06181363845597422, + "grad_norm": 16.840337823170596, + "learning_rate": 5e-06, + "loss": 0.3163, + "num_input_tokens_seen": 40352180, + "step": 235 + }, + { + "epoch": 0.06181363845597422, + "loss": 0.3705546259880066, + "loss_ce": 0.015330012887716293, + "loss_iou": 0.232421875, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 40352180, + "step": 235 + }, + { + "epoch": 0.062076675215361345, + "grad_norm": 15.151708802383407, + "learning_rate": 5e-06, + "loss": 0.3181, + "num_input_tokens_seen": 40524204, + "step": 236 + }, + { + "epoch": 0.062076675215361345, + "loss": 0.342043399810791, + "loss_ce": 0.015383241698145866, + "loss_iou": 0.1884765625, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 40524204, + "step": 236 + }, + { + "epoch": 0.06233971197474847, + "grad_norm": 23.279462740978527, + "learning_rate": 5e-06, + "loss": 0.3112, + "num_input_tokens_seen": 40694412, + "step": 237 + }, + { + "epoch": 0.06233971197474847, + "loss": 0.3619577884674072, + "loss_ce": 0.014667754992842674, + "loss_iou": NaN, + "loss_num": 0.0693359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 40694412, + "step": 237 + }, + { + "epoch": 0.0626027487341356, + "grad_norm": 7.854110724383197, + "learning_rate": 5e-06, + "loss": 0.3206, + "num_input_tokens_seen": 40866332, + "step": 238 + }, + { + "epoch": 0.0626027487341356, + "loss": 0.31817951798439026, + "loss_ce": 0.013125804252922535, + "loss_iou": 0.390625, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 40866332, + "step": 238 + }, + { + "epoch": 0.06286578549352272, + "grad_norm": 10.293296601258389, + "learning_rate": 5e-06, + "loss": 0.2924, + "num_input_tokens_seen": 41036884, + "step": 239 + }, + { + "epoch": 0.06286578549352272, + "loss": 0.20027026534080505, + "loss_ce": 0.012953377328813076, + "loss_iou": 0.470703125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 41036884, + "step": 239 + }, + { + "epoch": 0.06312882225290985, + "grad_norm": 27.954530904789568, + "learning_rate": 5e-06, + "loss": 0.3242, + "num_input_tokens_seen": 41209080, + "step": 240 + }, + { + "epoch": 0.06312882225290985, + "loss": 0.3325929641723633, + "loss_ce": 0.015088059939444065, + "loss_iou": 0.388671875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 41209080, + "step": 240 + }, + { + "epoch": 0.06339185901229696, + "grad_norm": 10.084557723509068, + "learning_rate": 5e-06, + "loss": 0.4017, + "num_input_tokens_seen": 41378712, + "step": 241 + }, + { + "epoch": 0.06339185901229696, + "loss": 0.42962974309921265, + "loss_ce": 0.01459068525582552, + "loss_iou": 0.1796875, + "loss_num": 0.0830078125, + "loss_xval": 0.4140625, + "num_input_tokens_seen": 41378712, + "step": 241 + }, + { + "epoch": 0.0636548957716841, + "grad_norm": 7.845640861327211, + "learning_rate": 5e-06, + "loss": 0.3082, + "num_input_tokens_seen": 41550924, + "step": 242 + }, + { + "epoch": 0.0636548957716841, + "loss": 0.3617081940174103, + "loss_ce": 0.013441601768136024, + "loss_iou": 0.306640625, + "loss_num": 0.06982421875, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 41550924, + "step": 242 + }, + { + "epoch": 0.06391793253107121, + "grad_norm": 7.755635758991768, + "learning_rate": 5e-06, + "loss": 0.2863, + "num_input_tokens_seen": 41723448, + "step": 243 + }, + { + "epoch": 0.06391793253107121, + "loss": 0.31799250841140747, + "loss_ce": 0.01220636535435915, + "loss_iou": 0.4609375, + "loss_num": 0.06103515625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 41723448, + "step": 243 + }, + { + "epoch": 0.06418096929045834, + "grad_norm": 10.028128094591828, + "learning_rate": 5e-06, + "loss": 0.3455, + "num_input_tokens_seen": 41895440, + "step": 244 + }, + { + "epoch": 0.06418096929045834, + "loss": 0.2664147913455963, + "loss_ce": 0.015926510095596313, + "loss_iou": 0.38671875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 41895440, + "step": 244 + }, + { + "epoch": 0.06444400604984546, + "grad_norm": 7.455815676512238, + "learning_rate": 5e-06, + "loss": 0.3158, + "num_input_tokens_seen": 42067812, + "step": 245 + }, + { + "epoch": 0.06444400604984546, + "loss": 0.315701961517334, + "loss_ce": 0.012784458696842194, + "loss_iou": 0.3515625, + "loss_num": 0.060546875, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 42067812, + "step": 245 + }, + { + "epoch": 0.06470704280923259, + "grad_norm": 7.183484053450612, + "learning_rate": 5e-06, + "loss": 0.2873, + "num_input_tokens_seen": 42238248, + "step": 246 + }, + { + "epoch": 0.06470704280923259, + "loss": 0.408550500869751, + "loss_ce": 0.015972375869750977, + "loss_iou": 0.46484375, + "loss_num": 0.07861328125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 42238248, + "step": 246 + }, + { + "epoch": 0.06497007956861972, + "grad_norm": 11.483582566544728, + "learning_rate": 5e-06, + "loss": 0.2422, + "num_input_tokens_seen": 42410540, + "step": 247 + }, + { + "epoch": 0.06497007956861972, + "loss": 0.24055451154708862, + "loss_ce": 0.012405097484588623, + "loss_iou": 0.455078125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 42410540, + "step": 247 + }, + { + "epoch": 0.06523311632800684, + "grad_norm": 9.093470574393532, + "learning_rate": 5e-06, + "loss": 0.2447, + "num_input_tokens_seen": 42582896, + "step": 248 + }, + { + "epoch": 0.06523311632800684, + "loss": 0.2380836009979248, + "loss_ce": 0.009445905685424805, + "loss_iou": 0.51171875, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 42582896, + "step": 248 + }, + { + "epoch": 0.06549615308739397, + "grad_norm": 11.56824851448475, + "learning_rate": 5e-06, + "loss": 0.3024, + "num_input_tokens_seen": 42755248, + "step": 249 + }, + { + "epoch": 0.06549615308739397, + "loss": 0.3298885226249695, + "loss_ce": 0.012871915474534035, + "loss_iou": 0.359375, + "loss_num": 0.0634765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 42755248, + "step": 249 + }, + { + "epoch": 0.06575918984678109, + "grad_norm": 12.53262678976841, + "learning_rate": 5e-06, + "loss": 0.3279, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_websight_new_CIoU": 0.6498270332813263, + "eval_websight_new_GIoU": 0.6454348266124725, + "eval_websight_new_IoU": 0.6616384983062744, + "eval_websight_new_MAE_all": 0.05562719702720642, + "eval_websight_new_MAE_h": 0.049970587715506554, + "eval_websight_new_MAE_w": 0.07883360609412193, + "eval_websight_new_MAE_x": 0.06017959490418434, + "eval_websight_new_MAE_y": 0.03352500405162573, + "eval_websight_new_NUM_probability": 0.9025295078754425, + "eval_websight_new_inside_bbox": 0.9253472089767456, + "eval_websight_new_loss": 0.27164769172668457, + "eval_websight_new_loss_ce": 0.01081773592159152, + "eval_websight_new_loss_iou": 0.693359375, + "eval_websight_new_loss_num": 0.049407958984375, + "eval_websight_new_loss_xval": 0.24688720703125, + "eval_websight_new_runtime": 55.9304, + "eval_websight_new_samples_per_second": 0.894, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_seeclick_CIoU": 0.3509342074394226, + "eval_seeclick_GIoU": 0.3347575068473816, + "eval_seeclick_IoU": 0.3994097113609314, + "eval_seeclick_MAE_all": 0.0986204668879509, + "eval_seeclick_MAE_h": 0.0804138220846653, + "eval_seeclick_MAE_w": 0.13582541793584824, + "eval_seeclick_MAE_x": 0.12244484201073647, + "eval_seeclick_MAE_y": 0.05579778365790844, + "eval_seeclick_NUM_probability": 0.898758739233017, + "eval_seeclick_inside_bbox": 0.47727273404598236, + "eval_seeclick_loss": 0.4158971905708313, + "eval_seeclick_loss_ce": 0.027117961086332798, + "eval_seeclick_loss_iou": 0.66552734375, + "eval_seeclick_loss_num": 0.0771331787109375, + "eval_seeclick_loss_xval": 0.3856201171875, + "eval_seeclick_runtime": 77.1195, + "eval_seeclick_samples_per_second": 0.558, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_icons_CIoU": 0.533334881067276, + "eval_icons_GIoU": 0.525450587272644, + "eval_icons_IoU": 0.5554244518280029, + "eval_icons_MAE_all": 0.06896939501166344, + "eval_icons_MAE_h": 0.07008247822523117, + "eval_icons_MAE_w": 0.07993372157216072, + "eval_icons_MAE_x": 0.06088143028318882, + "eval_icons_MAE_y": 0.06497994437813759, + "eval_icons_NUM_probability": 0.9118345677852631, + "eval_icons_inside_bbox": 0.8038194477558136, + "eval_icons_loss": 0.238239586353302, + "eval_icons_loss_ce": 0.01238260930404067, + "eval_icons_loss_iou": 0.45098876953125, + "eval_icons_loss_num": 0.04291534423828125, + "eval_icons_loss_xval": 0.2144775390625, + "eval_icons_runtime": 89.0702, + "eval_icons_samples_per_second": 0.561, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "eval_screenspot_CIoU": 0.40743913253148395, + "eval_screenspot_GIoU": 0.38262539108594257, + "eval_screenspot_IoU": 0.4510061542193095, + "eval_screenspot_MAE_all": 0.10948735972245534, + "eval_screenspot_MAE_h": 0.07818744828303655, + "eval_screenspot_MAE_w": 0.15290210396051407, + "eval_screenspot_MAE_x": 0.12599809964497885, + "eval_screenspot_MAE_y": 0.08086179196834564, + "eval_screenspot_NUM_probability": 0.9086714188257853, + "eval_screenspot_inside_bbox": 0.7354166706403097, + "eval_screenspot_loss": 0.9193825125694275, + "eval_screenspot_loss_ce": 0.48994149764378864, + "eval_screenspot_loss_iou": 0.5732421875, + "eval_screenspot_loss_num": 0.08390299479166667, + "eval_screenspot_loss_xval": 0.4192708333333333, + "eval_screenspot_runtime": 138.8534, + "eval_screenspot_samples_per_second": 0.641, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06575918984678109, + "loss": 0.8642065525054932, + "loss_ce": 0.45453858375549316, + "loss_iou": 0.53125, + "loss_num": 0.08203125, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 42927164, + "step": 250 + }, + { + "epoch": 0.06602222660616822, + "grad_norm": 16.162110660395133, + "learning_rate": 5e-06, + "loss": 0.3389, + "num_input_tokens_seen": 43099272, + "step": 251 + }, + { + "epoch": 0.06602222660616822, + "loss": 0.3324888348579407, + "loss_ce": 0.009490801021456718, + "loss_iou": 0.484375, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 43099272, + "step": 251 + }, + { + "epoch": 0.06628526336555533, + "grad_norm": 15.788401372124632, + "learning_rate": 5e-06, + "loss": 0.3827, + "num_input_tokens_seen": 43271440, + "step": 252 + }, + { + "epoch": 0.06628526336555533, + "loss": 0.3184235990047455, + "loss_ce": 0.009219500236213207, + "loss_iou": 0.453125, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 43271440, + "step": 252 + }, + { + "epoch": 0.06654830012494246, + "grad_norm": 19.21368677855186, + "learning_rate": 5e-06, + "loss": 0.4031, + "num_input_tokens_seen": 43443704, + "step": 253 + }, + { + "epoch": 0.06654830012494246, + "loss": 0.3337656557559967, + "loss_ce": 0.009546896442770958, + "loss_iou": 0.1796875, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 43443704, + "step": 253 + }, + { + "epoch": 0.06681133688432958, + "grad_norm": 9.350851106906616, + "learning_rate": 5e-06, + "loss": 0.2501, + "num_input_tokens_seen": 43615820, + "step": 254 + }, + { + "epoch": 0.06681133688432958, + "loss": 0.22743698954582214, + "loss_ce": 0.009236322715878487, + "loss_iou": 0.408203125, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 43615820, + "step": 254 + }, + { + "epoch": 0.06707437364371671, + "grad_norm": 12.730744024793406, + "learning_rate": 5e-06, + "loss": 0.3033, + "num_input_tokens_seen": 43787664, + "step": 255 + }, + { + "epoch": 0.06707437364371671, + "loss": 0.3385705351829529, + "loss_ce": 0.009468959644436836, + "loss_iou": 0.193359375, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 43787664, + "step": 255 + }, + { + "epoch": 0.06733741040310383, + "grad_norm": 20.602499715875325, + "learning_rate": 5e-06, + "loss": 0.335, + "num_input_tokens_seen": 43956324, + "step": 256 + }, + { + "epoch": 0.06733741040310383, + "loss": 0.33848732709884644, + "loss_ce": 0.012559601105749607, + "loss_iou": 0.32421875, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 43956324, + "step": 256 + }, + { + "epoch": 0.06760044716249096, + "grad_norm": 11.274485245872459, + "learning_rate": 5e-06, + "loss": 0.241, + "num_input_tokens_seen": 44128740, + "step": 257 + }, + { + "epoch": 0.06760044716249096, + "loss": 0.24955862760543823, + "loss_ce": 0.011765659786760807, + "loss_iou": 0.330078125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 44128740, + "step": 257 + }, + { + "epoch": 0.06786348392187808, + "grad_norm": 12.119376554642344, + "learning_rate": 5e-06, + "loss": 0.359, + "num_input_tokens_seen": 44299372, + "step": 258 + }, + { + "epoch": 0.06786348392187808, + "loss": 0.26774948835372925, + "loss_ce": 0.01604049652814865, + "loss_iou": 0.56640625, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 44299372, + "step": 258 + }, + { + "epoch": 0.0681265206812652, + "grad_norm": 15.925957923618926, + "learning_rate": 5e-06, + "loss": 0.3045, + "num_input_tokens_seen": 44469736, + "step": 259 + }, + { + "epoch": 0.0681265206812652, + "loss": 0.35633599758148193, + "loss_ce": 0.01502738706767559, + "loss_iou": 0.515625, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 44469736, + "step": 259 + }, + { + "epoch": 0.06838955744065234, + "grad_norm": 13.189087784364153, + "learning_rate": 5e-06, + "loss": 0.328, + "num_input_tokens_seen": 44642020, + "step": 260 + }, + { + "epoch": 0.06838955744065234, + "loss": 0.4303101599216461, + "loss_ce": 0.01173105463385582, + "loss_iou": 0.44921875, + "loss_num": 0.08349609375, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 44642020, + "step": 260 + }, + { + "epoch": 0.06865259420003945, + "grad_norm": 11.878594650017698, + "learning_rate": 5e-06, + "loss": 0.2752, + "num_input_tokens_seen": 44814452, + "step": 261 + }, + { + "epoch": 0.06865259420003945, + "loss": 0.28906819224357605, + "loss_ce": 0.0090388935059309, + "loss_iou": 0.251953125, + "loss_num": 0.05615234375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 44814452, + "step": 261 + }, + { + "epoch": 0.06891563095942659, + "grad_norm": 8.62440603357029, + "learning_rate": 5e-06, + "loss": 0.2926, + "num_input_tokens_seen": 44986656, + "step": 262 + }, + { + "epoch": 0.06891563095942659, + "loss": 0.24431782960891724, + "loss_ce": 0.008111760020256042, + "loss_iou": 0.486328125, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 44986656, + "step": 262 + }, + { + "epoch": 0.0691786677188137, + "grad_norm": 7.7035777517167645, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 45158540, + "step": 263 + }, + { + "epoch": 0.0691786677188137, + "loss": 0.23680636286735535, + "loss_ce": 0.013417699374258518, + "loss_iou": 0.46484375, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 45158540, + "step": 263 + }, + { + "epoch": 0.06944170447820083, + "grad_norm": 11.11690642897216, + "learning_rate": 5e-06, + "loss": 0.2503, + "num_input_tokens_seen": 45330356, + "step": 264 + }, + { + "epoch": 0.06944170447820083, + "loss": 0.25701650977134705, + "loss_ce": 0.01019032672047615, + "loss_iou": 0.24609375, + "loss_num": 0.04931640625, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 45330356, + "step": 264 + }, + { + "epoch": 0.06970474123758795, + "grad_norm": 16.27774203292456, + "learning_rate": 5e-06, + "loss": 0.327, + "num_input_tokens_seen": 45502460, + "step": 265 + }, + { + "epoch": 0.06970474123758795, + "loss": 0.3339051902294159, + "loss_ce": 0.011639568954706192, + "loss_iou": 0.4921875, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 45502460, + "step": 265 + }, + { + "epoch": 0.06996777799697508, + "grad_norm": 11.740826510166434, + "learning_rate": 5e-06, + "loss": 0.3816, + "num_input_tokens_seen": 45674688, + "step": 266 + }, + { + "epoch": 0.06996777799697508, + "loss": 0.30319204926490784, + "loss_ce": 0.016082679852843285, + "loss_iou": 0.255859375, + "loss_num": 0.057373046875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 45674688, + "step": 266 + }, + { + "epoch": 0.0702308147563622, + "grad_norm": 7.300788157576167, + "learning_rate": 5e-06, + "loss": 0.29, + "num_input_tokens_seen": 45847124, + "step": 267 + }, + { + "epoch": 0.0702308147563622, + "loss": 0.3247534930706024, + "loss_ce": 0.009140691719949245, + "loss_iou": 0.33203125, + "loss_num": 0.06298828125, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 45847124, + "step": 267 + }, + { + "epoch": 0.07049385151574933, + "grad_norm": 7.309781555609638, + "learning_rate": 5e-06, + "loss": 0.2756, + "num_input_tokens_seen": 46019340, + "step": 268 + }, + { + "epoch": 0.07049385151574933, + "loss": 0.2533281445503235, + "loss_ce": 0.008699209429323673, + "loss_iou": 0.38671875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 46019340, + "step": 268 + }, + { + "epoch": 0.07075688827513645, + "grad_norm": 13.089258421868523, + "learning_rate": 5e-06, + "loss": 0.2701, + "num_input_tokens_seen": 46189784, + "step": 269 + }, + { + "epoch": 0.07075688827513645, + "loss": 0.2624807357788086, + "loss_ce": 0.01223657839000225, + "loss_iou": 0.435546875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 46189784, + "step": 269 + }, + { + "epoch": 0.07101992503452358, + "grad_norm": 6.658182143864948, + "learning_rate": 5e-06, + "loss": 0.2118, + "num_input_tokens_seen": 46361728, + "step": 270 + }, + { + "epoch": 0.07101992503452358, + "loss": 0.2072766274213791, + "loss_ce": 0.007569607347249985, + "loss_iou": 0.4453125, + "loss_num": 0.0400390625, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 46361728, + "step": 270 + }, + { + "epoch": 0.07128296179391069, + "grad_norm": 9.079661259155806, + "learning_rate": 5e-06, + "loss": 0.2305, + "num_input_tokens_seen": 46534100, + "step": 271 + }, + { + "epoch": 0.07128296179391069, + "loss": 0.21106520295143127, + "loss_ce": 0.008977807126939297, + "loss_iou": 0.609375, + "loss_num": 0.040283203125, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 46534100, + "step": 271 + }, + { + "epoch": 0.07154599855329782, + "grad_norm": 9.513732560495592, + "learning_rate": 5e-06, + "loss": 0.2646, + "num_input_tokens_seen": 46704648, + "step": 272 + }, + { + "epoch": 0.07154599855329782, + "loss": 0.2694496214389801, + "loss_ce": 0.007120497524738312, + "loss_iou": 0.361328125, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 46704648, + "step": 272 + }, + { + "epoch": 0.07180903531268495, + "grad_norm": 15.714221513863984, + "learning_rate": 5e-06, + "loss": 0.3078, + "num_input_tokens_seen": 46876784, + "step": 273 + }, + { + "epoch": 0.07180903531268495, + "loss": 0.39289578795433044, + "loss_ce": 0.006909476593136787, + "loss_iou": 0.5, + "loss_num": 0.0771484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 46876784, + "step": 273 + }, + { + "epoch": 0.07207207207207207, + "grad_norm": 8.093127002316251, + "learning_rate": 5e-06, + "loss": 0.3025, + "num_input_tokens_seen": 47048876, + "step": 274 + }, + { + "epoch": 0.07207207207207207, + "loss": 0.25725314021110535, + "loss_ce": 0.011525607667863369, + "loss_iou": 0.30859375, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 47048876, + "step": 274 + }, + { + "epoch": 0.0723351088314592, + "grad_norm": 8.569390693529225, + "learning_rate": 5e-06, + "loss": 0.297, + "num_input_tokens_seen": 47221068, + "step": 275 + }, + { + "epoch": 0.0723351088314592, + "loss": 0.2686833143234253, + "loss_ce": 0.006720416247844696, + "loss_iou": 0.404296875, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 47221068, + "step": 275 + }, + { + "epoch": 0.07259814559084632, + "grad_norm": 10.067643264055917, + "learning_rate": 5e-06, + "loss": 0.2437, + "num_input_tokens_seen": 47391436, + "step": 276 + }, + { + "epoch": 0.07259814559084632, + "loss": 0.2530132830142975, + "loss_ce": 0.007896100170910358, + "loss_iou": 0.330078125, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 47391436, + "step": 276 + }, + { + "epoch": 0.07286118235023345, + "grad_norm": 13.479763232881819, + "learning_rate": 5e-06, + "loss": 0.3643, + "num_input_tokens_seen": 47563512, + "step": 277 + }, + { + "epoch": 0.07286118235023345, + "loss": 0.2713325619697571, + "loss_ce": 0.013642151840031147, + "loss_iou": 0.337890625, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 47563512, + "step": 277 + }, + { + "epoch": 0.07312421910962057, + "grad_norm": 16.755961470396134, + "learning_rate": 5e-06, + "loss": 0.3279, + "num_input_tokens_seen": 47735372, + "step": 278 + }, + { + "epoch": 0.07312421910962057, + "loss": 0.42755797505378723, + "loss_ce": 0.008124373853206635, + "loss_iou": 0.365234375, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 47735372, + "step": 278 + }, + { + "epoch": 0.0733872558690077, + "grad_norm": 13.89589851066108, + "learning_rate": 5e-06, + "loss": 0.3883, + "num_input_tokens_seen": 47907704, + "step": 279 + }, + { + "epoch": 0.0733872558690077, + "loss": 0.41463613510131836, + "loss_ce": 0.013879301026463509, + "loss_iou": 0.1337890625, + "loss_num": 0.080078125, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 47907704, + "step": 279 + }, + { + "epoch": 0.07365029262839481, + "grad_norm": 13.52575149586993, + "learning_rate": 5e-06, + "loss": 0.2798, + "num_input_tokens_seen": 48079692, + "step": 280 + }, + { + "epoch": 0.07365029262839481, + "loss": 0.2906692624092102, + "loss_ce": 0.007099920883774757, + "loss_iou": 0.45703125, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 48079692, + "step": 280 + }, + { + "epoch": 0.07391332938778195, + "grad_norm": 11.637653668969445, + "learning_rate": 5e-06, + "loss": 0.2883, + "num_input_tokens_seen": 48251572, + "step": 281 + }, + { + "epoch": 0.07391332938778195, + "loss": 0.2844490110874176, + "loss_ce": 0.010157023556530476, + "loss_iou": 0.490234375, + "loss_num": 0.054931640625, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 48251572, + "step": 281 + }, + { + "epoch": 0.07417636614716906, + "grad_norm": 9.21180215619434, + "learning_rate": 5e-06, + "loss": 0.2978, + "num_input_tokens_seen": 48423560, + "step": 282 + }, + { + "epoch": 0.07417636614716906, + "loss": 0.32979628443717957, + "loss_ce": 0.012047262862324715, + "loss_iou": 0.42578125, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 48423560, + "step": 282 + }, + { + "epoch": 0.07443940290655619, + "grad_norm": 7.167038204924764, + "learning_rate": 5e-06, + "loss": 0.256, + "num_input_tokens_seen": 48595680, + "step": 283 + }, + { + "epoch": 0.07443940290655619, + "loss": 0.25096797943115234, + "loss_ce": 0.007925992831587791, + "loss_iou": 0.330078125, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 48595680, + "step": 283 + }, + { + "epoch": 0.07470243966594331, + "grad_norm": 11.715910802137108, + "learning_rate": 5e-06, + "loss": 0.2476, + "num_input_tokens_seen": 48766076, + "step": 284 + }, + { + "epoch": 0.07470243966594331, + "loss": 0.19452951848506927, + "loss_ce": 0.005930891260504723, + "loss_iou": 0.51171875, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 48766076, + "step": 284 + }, + { + "epoch": 0.07496547642533044, + "grad_norm": 11.971840696280033, + "learning_rate": 5e-06, + "loss": 0.3413, + "num_input_tokens_seen": 48937948, + "step": 285 + }, + { + "epoch": 0.07496547642533044, + "loss": 0.349312424659729, + "loss_ce": 0.00910246279090643, + "loss_iou": 0.5078125, + "loss_num": 0.06787109375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 48937948, + "step": 285 + }, + { + "epoch": 0.07522851318471756, + "grad_norm": 9.505447479710712, + "learning_rate": 5e-06, + "loss": 0.2998, + "num_input_tokens_seen": 49110048, + "step": 286 + }, + { + "epoch": 0.07522851318471756, + "loss": 0.3620803654193878, + "loss_ce": 0.0063674794510006905, + "loss_iou": 0.376953125, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 49110048, + "step": 286 + }, + { + "epoch": 0.07549154994410469, + "grad_norm": 9.524937593510838, + "learning_rate": 5e-06, + "loss": 0.2473, + "num_input_tokens_seen": 49281924, + "step": 287 + }, + { + "epoch": 0.07549154994410469, + "loss": 0.21198034286499023, + "loss_ce": 0.0065360115841031075, + "loss_iou": 0.4140625, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 49281924, + "step": 287 + }, + { + "epoch": 0.07575458670349182, + "grad_norm": 9.713489351185899, + "learning_rate": 5e-06, + "loss": 0.2911, + "num_input_tokens_seen": 49454288, + "step": 288 + }, + { + "epoch": 0.07575458670349182, + "loss": 0.35456106066703796, + "loss_ce": 0.01032277476042509, + "loss_iou": 0.4375, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 49454288, + "step": 288 + }, + { + "epoch": 0.07601762346287894, + "grad_norm": 8.377848047343468, + "learning_rate": 5e-06, + "loss": 0.2711, + "num_input_tokens_seen": 49626816, + "step": 289 + }, + { + "epoch": 0.07601762346287894, + "loss": 0.2953585684299469, + "loss_ce": 0.005685708485543728, + "loss_iou": 0.2890625, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 49626816, + "step": 289 + }, + { + "epoch": 0.07628066022226607, + "grad_norm": 10.429005548611427, + "learning_rate": 5e-06, + "loss": 0.2446, + "num_input_tokens_seen": 49798788, + "step": 290 + }, + { + "epoch": 0.07628066022226607, + "loss": 0.19721034169197083, + "loss_ce": 0.007268924731761217, + "loss_iou": 0.39453125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 49798788, + "step": 290 + }, + { + "epoch": 0.07654369698165318, + "grad_norm": 13.641317737771786, + "learning_rate": 5e-06, + "loss": 0.3162, + "num_input_tokens_seen": 49970924, + "step": 291 + }, + { + "epoch": 0.07654369698165318, + "loss": 0.3052144944667816, + "loss_ce": 0.00528772734105587, + "loss_iou": 0.67578125, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 49970924, + "step": 291 + }, + { + "epoch": 0.07680673374104031, + "grad_norm": 13.717892281601975, + "learning_rate": 5e-06, + "loss": 0.2925, + "num_input_tokens_seen": 50143128, + "step": 292 + }, + { + "epoch": 0.07680673374104031, + "loss": 0.3288165032863617, + "loss_ce": 0.006795027758926153, + "loss_iou": 0.34375, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 50143128, + "step": 292 + }, + { + "epoch": 0.07706977050042743, + "grad_norm": 9.087941964788197, + "learning_rate": 5e-06, + "loss": 0.2844, + "num_input_tokens_seen": 50315356, + "step": 293 + }, + { + "epoch": 0.07706977050042743, + "loss": 0.30062055587768555, + "loss_ce": 0.006186962127685547, + "loss_iou": 0.4140625, + "loss_num": 0.058837890625, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 50315356, + "step": 293 + }, + { + "epoch": 0.07733280725981456, + "grad_norm": 8.950504335091761, + "learning_rate": 5e-06, + "loss": 0.2474, + "num_input_tokens_seen": 50487452, + "step": 294 + }, + { + "epoch": 0.07733280725981456, + "loss": 0.20027770102024078, + "loss_ce": 0.007284537889063358, + "loss_iou": 0.58203125, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 50487452, + "step": 294 + }, + { + "epoch": 0.07759584401920168, + "grad_norm": 12.276590112874837, + "learning_rate": 5e-06, + "loss": 0.2962, + "num_input_tokens_seen": 50659340, + "step": 295 + }, + { + "epoch": 0.07759584401920168, + "loss": 0.3079564869403839, + "loss_ce": 0.008151799440383911, + "loss_iou": 0.5625, + "loss_num": 0.059814453125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 50659340, + "step": 295 + }, + { + "epoch": 0.07785888077858881, + "grad_norm": 10.920612112253043, + "learning_rate": 5e-06, + "loss": 0.2779, + "num_input_tokens_seen": 50831668, + "step": 296 + }, + { + "epoch": 0.07785888077858881, + "loss": 0.3526954650878906, + "loss_ce": 0.01395035907626152, + "loss_iou": 0.470703125, + "loss_num": 0.06787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 50831668, + "step": 296 + }, + { + "epoch": 0.07812191753797593, + "grad_norm": 14.39030370540137, + "learning_rate": 5e-06, + "loss": 0.317, + "num_input_tokens_seen": 51004036, + "step": 297 + }, + { + "epoch": 0.07812191753797593, + "loss": 0.3473885655403137, + "loss_ce": 0.01047449093312025, + "loss_iou": 0.3046875, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 51004036, + "step": 297 + }, + { + "epoch": 0.07838495429736306, + "grad_norm": 10.790411077010845, + "learning_rate": 5e-06, + "loss": 0.2641, + "num_input_tokens_seen": 51173952, + "step": 298 + }, + { + "epoch": 0.07838495429736306, + "loss": 0.24334201216697693, + "loss_ce": 0.0049387002363801, + "loss_iou": 0.453125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 51173952, + "step": 298 + }, + { + "epoch": 0.07864799105675017, + "grad_norm": 12.163313302369062, + "learning_rate": 5e-06, + "loss": 0.277, + "num_input_tokens_seen": 51346208, + "step": 299 + }, + { + "epoch": 0.07864799105675017, + "loss": 0.33479589223861694, + "loss_ce": 0.005816406104713678, + "loss_iou": 0.408203125, + "loss_num": 0.06591796875, + "loss_xval": 0.328125, + "num_input_tokens_seen": 51346208, + "step": 299 + }, + { + "epoch": 0.0789110278161373, + "grad_norm": 13.775522127007955, + "learning_rate": 5e-06, + "loss": 0.3084, + "num_input_tokens_seen": 51516552, + "step": 300 + }, + { + "epoch": 0.0789110278161373, + "loss": 0.2704133987426758, + "loss_ce": 0.004422190133482218, + "loss_iou": 0.2294921875, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 51516552, + "step": 300 + }, + { + "epoch": 0.07917406457552444, + "grad_norm": 11.14460650886844, + "learning_rate": 5e-06, + "loss": 0.2851, + "num_input_tokens_seen": 51688716, + "step": 301 + }, + { + "epoch": 0.07917406457552444, + "loss": 0.2599673867225647, + "loss_ce": 0.008197364397346973, + "loss_iou": 0.4921875, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 51688716, + "step": 301 + }, + { + "epoch": 0.07943710133491155, + "grad_norm": 14.089462259095386, + "learning_rate": 5e-06, + "loss": 0.3106, + "num_input_tokens_seen": 51861284, + "step": 302 + }, + { + "epoch": 0.07943710133491155, + "loss": 0.25006186962127686, + "loss_ce": 0.004822590388357639, + "loss_iou": 0.2734375, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 51861284, + "step": 302 + }, + { + "epoch": 0.07970013809429868, + "grad_norm": 11.560263696218914, + "learning_rate": 5e-06, + "loss": 0.2256, + "num_input_tokens_seen": 52033296, + "step": 303 + }, + { + "epoch": 0.07970013809429868, + "loss": 0.2260829657316208, + "loss_ce": 0.005379845853894949, + "loss_iou": 0.58984375, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 52033296, + "step": 303 + }, + { + "epoch": 0.0799631748536858, + "grad_norm": 11.109765340752924, + "learning_rate": 5e-06, + "loss": 0.2728, + "num_input_tokens_seen": 52205488, + "step": 304 + }, + { + "epoch": 0.0799631748536858, + "loss": 0.26471367478370667, + "loss_ce": 0.0049480353482067585, + "loss_iou": 0.296875, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 52205488, + "step": 304 + }, + { + "epoch": 0.08022621161307293, + "grad_norm": 9.561565409079957, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 52377756, + "step": 305 + }, + { + "epoch": 0.08022621161307293, + "loss": 0.18537692725658417, + "loss_ce": 0.0050790803506970406, + "loss_iou": 0.431640625, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 52377756, + "step": 305 + }, + { + "epoch": 0.08048924837246005, + "grad_norm": 11.190229795777121, + "learning_rate": 5e-06, + "loss": 0.2198, + "num_input_tokens_seen": 52549784, + "step": 306 + }, + { + "epoch": 0.08048924837246005, + "loss": 0.1936880648136139, + "loss_ce": 0.009972257539629936, + "loss_iou": 0.490234375, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 52549784, + "step": 306 + }, + { + "epoch": 0.08075228513184718, + "grad_norm": 9.790391883995309, + "learning_rate": 5e-06, + "loss": 0.2383, + "num_input_tokens_seen": 52719856, + "step": 307 + }, + { + "epoch": 0.08075228513184718, + "loss": 0.16280654072761536, + "loss_ce": 0.007838279940187931, + "loss_iou": 0.5859375, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 52719856, + "step": 307 + }, + { + "epoch": 0.0810153218912343, + "grad_norm": 9.839075915450485, + "learning_rate": 5e-06, + "loss": 0.2743, + "num_input_tokens_seen": 52892084, + "step": 308 + }, + { + "epoch": 0.0810153218912343, + "loss": 0.15204857289791107, + "loss_ce": 0.004587644245475531, + "loss_iou": 0.416015625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 52892084, + "step": 308 + }, + { + "epoch": 0.08127835865062143, + "grad_norm": 12.7104246981192, + "learning_rate": 5e-06, + "loss": 0.2624, + "num_input_tokens_seen": 53064144, + "step": 309 + }, + { + "epoch": 0.08127835865062143, + "loss": 0.25361326336860657, + "loss_ce": 0.009228497743606567, + "loss_iou": 0.7109375, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 53064144, + "step": 309 + }, + { + "epoch": 0.08154139541000854, + "grad_norm": 9.929561281282899, + "learning_rate": 5e-06, + "loss": 0.3246, + "num_input_tokens_seen": 53236468, + "step": 310 + }, + { + "epoch": 0.08154139541000854, + "loss": 0.39354461431503296, + "loss_ce": 0.005116886459290981, + "loss_iou": 0.44140625, + "loss_num": 0.07763671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 53236468, + "step": 310 + }, + { + "epoch": 0.08180443216939567, + "grad_norm": 7.973294747772443, + "learning_rate": 5e-06, + "loss": 0.3206, + "num_input_tokens_seen": 53408452, + "step": 311 + }, + { + "epoch": 0.08180443216939567, + "loss": 0.2905076742172241, + "loss_ce": 0.005351424217224121, + "loss_iou": NaN, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 53408452, + "step": 311 + }, + { + "epoch": 0.08206746892878279, + "grad_norm": 7.623528366567178, + "learning_rate": 5e-06, + "loss": 0.2153, + "num_input_tokens_seen": 53580848, + "step": 312 + }, + { + "epoch": 0.08206746892878279, + "loss": 0.19026613235473633, + "loss_ce": 0.005817888304591179, + "loss_iou": 0.5546875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 53580848, + "step": 312 + }, + { + "epoch": 0.08233050568816992, + "grad_norm": 8.591099989866738, + "learning_rate": 5e-06, + "loss": 0.2192, + "num_input_tokens_seen": 53751420, + "step": 313 + }, + { + "epoch": 0.08233050568816992, + "loss": 0.16798478364944458, + "loss_ce": 0.005326096434146166, + "loss_iou": 0.515625, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 53751420, + "step": 313 + }, + { + "epoch": 0.08259354244755705, + "grad_norm": 8.168930137137453, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 53923692, + "step": 314 + }, + { + "epoch": 0.08259354244755705, + "loss": 0.1808127462863922, + "loss_ce": 0.006557377986609936, + "loss_iou": 0.474609375, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 53923692, + "step": 314 + }, + { + "epoch": 0.08285657920694417, + "grad_norm": 12.69374602286009, + "learning_rate": 5e-06, + "loss": 0.255, + "num_input_tokens_seen": 54095596, + "step": 315 + }, + { + "epoch": 0.08285657920694417, + "loss": 0.2128537893295288, + "loss_ce": 0.00447976216673851, + "loss_iou": 0.470703125, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 54095596, + "step": 315 + }, + { + "epoch": 0.0831196159663313, + "grad_norm": 15.86710980709006, + "learning_rate": 5e-06, + "loss": 0.3231, + "num_input_tokens_seen": 54267660, + "step": 316 + }, + { + "epoch": 0.0831196159663313, + "loss": 0.29302194714546204, + "loss_ce": 0.004081526771187782, + "loss_iou": 0.2890625, + "loss_num": 0.0576171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 54267660, + "step": 316 + }, + { + "epoch": 0.08338265272571842, + "grad_norm": 15.523568167979095, + "learning_rate": 5e-06, + "loss": 0.2565, + "num_input_tokens_seen": 54439776, + "step": 317 + }, + { + "epoch": 0.08338265272571842, + "loss": 0.45379549264907837, + "loss_ce": 0.00494293263182044, + "loss_iou": 0.421875, + "loss_num": 0.08984375, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 54439776, + "step": 317 + }, + { + "epoch": 0.08364568948510555, + "grad_norm": 8.655289602311944, + "learning_rate": 5e-06, + "loss": 0.2346, + "num_input_tokens_seen": 54612224, + "step": 318 + }, + { + "epoch": 0.08364568948510555, + "loss": 0.22014674544334412, + "loss_ce": 0.008476818911731243, + "loss_iou": 0.53515625, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 54612224, + "step": 318 + }, + { + "epoch": 0.08390872624449267, + "grad_norm": 10.678383550359573, + "learning_rate": 5e-06, + "loss": 0.2569, + "num_input_tokens_seen": 54782956, + "step": 319 + }, + { + "epoch": 0.08390872624449267, + "loss": 0.2195930778980255, + "loss_ce": 0.004505200777202845, + "loss_iou": 0.36328125, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 54782956, + "step": 319 + }, + { + "epoch": 0.0841717630038798, + "grad_norm": 9.930533969178247, + "learning_rate": 5e-06, + "loss": 0.2305, + "num_input_tokens_seen": 54955104, + "step": 320 + }, + { + "epoch": 0.0841717630038798, + "loss": 0.24099573493003845, + "loss_ce": 0.008909543044865131, + "loss_iou": 0.57421875, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 54955104, + "step": 320 + }, + { + "epoch": 0.08443479976326691, + "grad_norm": 12.929508923145086, + "learning_rate": 5e-06, + "loss": 0.2998, + "num_input_tokens_seen": 55127092, + "step": 321 + }, + { + "epoch": 0.08443479976326691, + "loss": 0.3190174698829651, + "loss_ce": 0.008958851918578148, + "loss_iou": 0.53125, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 55127092, + "step": 321 + }, + { + "epoch": 0.08469783652265404, + "grad_norm": 8.336572461824584, + "learning_rate": 5e-06, + "loss": 0.2781, + "num_input_tokens_seen": 55299364, + "step": 322 + }, + { + "epoch": 0.08469783652265404, + "loss": 0.21446217596530914, + "loss_ce": 0.005966084077954292, + "loss_iou": 0.1796875, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 55299364, + "step": 322 + }, + { + "epoch": 0.08496087328204116, + "grad_norm": 9.09772392048227, + "learning_rate": 5e-06, + "loss": 0.2929, + "num_input_tokens_seen": 55468252, + "step": 323 + }, + { + "epoch": 0.08496087328204116, + "loss": 0.28219783306121826, + "loss_ce": 0.007356537505984306, + "loss_iou": 0.48046875, + "loss_num": 0.054931640625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 55468252, + "step": 323 + }, + { + "epoch": 0.08522391004142829, + "grad_norm": 9.926795422915726, + "learning_rate": 5e-06, + "loss": 0.2412, + "num_input_tokens_seen": 55640432, + "step": 324 + }, + { + "epoch": 0.08522391004142829, + "loss": 0.2594638466835022, + "loss_ce": 0.006412106566131115, + "loss_iou": 0.59375, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 55640432, + "step": 324 + }, + { + "epoch": 0.08548694680081541, + "grad_norm": 13.161460539721551, + "learning_rate": 5e-06, + "loss": 0.3056, + "num_input_tokens_seen": 55812284, + "step": 325 + }, + { + "epoch": 0.08548694680081541, + "loss": 0.30750948190689087, + "loss_ce": 0.003920634277164936, + "loss_iou": 0.416015625, + "loss_num": 0.060791015625, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 55812284, + "step": 325 + }, + { + "epoch": 0.08574998356020254, + "grad_norm": 12.20729496362805, + "learning_rate": 5e-06, + "loss": 0.2492, + "num_input_tokens_seen": 55982812, + "step": 326 + }, + { + "epoch": 0.08574998356020254, + "loss": 0.25009769201278687, + "loss_ce": 0.003637729212641716, + "loss_iou": 0.435546875, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 55982812, + "step": 326 + }, + { + "epoch": 0.08601302031958966, + "grad_norm": 8.486549179327342, + "learning_rate": 5e-06, + "loss": 0.2612, + "num_input_tokens_seen": 56154880, + "step": 327 + }, + { + "epoch": 0.08601302031958966, + "loss": 0.24035008251667023, + "loss_ce": 0.006341288331896067, + "loss_iou": 0.4453125, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 56154880, + "step": 327 + }, + { + "epoch": 0.08627605707897679, + "grad_norm": 12.368715754730403, + "learning_rate": 5e-06, + "loss": 0.2435, + "num_input_tokens_seen": 56327104, + "step": 328 + }, + { + "epoch": 0.08627605707897679, + "loss": 0.2434852123260498, + "loss_ce": 0.003983248956501484, + "loss_iou": 0.65234375, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 56327104, + "step": 328 + }, + { + "epoch": 0.08653909383836392, + "grad_norm": 16.286201391843196, + "learning_rate": 5e-06, + "loss": 0.3578, + "num_input_tokens_seen": 56498760, + "step": 329 + }, + { + "epoch": 0.08653909383836392, + "loss": 0.3906838297843933, + "loss_ce": 0.00481957383453846, + "loss_iou": 0.388671875, + "loss_num": 0.0771484375, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 56498760, + "step": 329 + }, + { + "epoch": 0.08680213059775103, + "grad_norm": 9.709126690100668, + "learning_rate": 5e-06, + "loss": 0.2709, + "num_input_tokens_seen": 56669204, + "step": 330 + }, + { + "epoch": 0.08680213059775103, + "loss": 0.2887915074825287, + "loss_ce": 0.006198735907673836, + "loss_iou": 0.359375, + "loss_num": 0.056396484375, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 56669204, + "step": 330 + }, + { + "epoch": 0.08706516735713817, + "grad_norm": 9.72107226431463, + "learning_rate": 5e-06, + "loss": 0.2546, + "num_input_tokens_seen": 56841324, + "step": 331 + }, + { + "epoch": 0.08706516735713817, + "loss": 0.17188376188278198, + "loss_ce": 0.005013656336814165, + "loss_iou": 0.423828125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 56841324, + "step": 331 + }, + { + "epoch": 0.08732820411652528, + "grad_norm": 14.410704147758572, + "learning_rate": 5e-06, + "loss": 0.2888, + "num_input_tokens_seen": 57013308, + "step": 332 + }, + { + "epoch": 0.08732820411652528, + "loss": 0.2731274664402008, + "loss_ce": 0.007868663407862186, + "loss_iou": 0.5859375, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 57013308, + "step": 332 + }, + { + "epoch": 0.08759124087591241, + "grad_norm": 9.729446702170758, + "learning_rate": 5e-06, + "loss": 0.2747, + "num_input_tokens_seen": 57185320, + "step": 333 + }, + { + "epoch": 0.08759124087591241, + "loss": 0.18412570655345917, + "loss_ce": 0.004499247297644615, + "loss_iou": 0.57421875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 57185320, + "step": 333 + }, + { + "epoch": 0.08785427763529953, + "grad_norm": 13.290037619570391, + "learning_rate": 5e-06, + "loss": 0.2065, + "num_input_tokens_seen": 57355852, + "step": 334 + }, + { + "epoch": 0.08785427763529953, + "loss": 0.17899873852729797, + "loss_ce": 0.007001678459346294, + "loss_iou": 0.32421875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 57355852, + "step": 334 + }, + { + "epoch": 0.08811731439468666, + "grad_norm": 13.54518630333599, + "learning_rate": 5e-06, + "loss": 0.3901, + "num_input_tokens_seen": 57526160, + "step": 335 + }, + { + "epoch": 0.08811731439468666, + "loss": 0.42548078298568726, + "loss_ce": 0.004582356195896864, + "loss_iou": 0.72265625, + "loss_num": 0.083984375, + "loss_xval": 0.421875, + "num_input_tokens_seen": 57526160, + "step": 335 + }, + { + "epoch": 0.08838035115407378, + "grad_norm": 12.0949131727458, + "learning_rate": 5e-06, + "loss": 0.3092, + "num_input_tokens_seen": 57698240, + "step": 336 + }, + { + "epoch": 0.08838035115407378, + "loss": 0.2829555869102478, + "loss_ce": 0.005245603621006012, + "loss_iou": 0.4765625, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 57698240, + "step": 336 + }, + { + "epoch": 0.08864338791346091, + "grad_norm": 11.320262534807219, + "learning_rate": 5e-06, + "loss": 0.2301, + "num_input_tokens_seen": 57870476, + "step": 337 + }, + { + "epoch": 0.08864338791346091, + "loss": 0.18072998523712158, + "loss_ce": 0.004704595077782869, + "loss_iou": 0.578125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 57870476, + "step": 337 + }, + { + "epoch": 0.08890642467284803, + "grad_norm": 7.708963725187913, + "learning_rate": 5e-06, + "loss": 0.2643, + "num_input_tokens_seen": 58042960, + "step": 338 + }, + { + "epoch": 0.08890642467284803, + "loss": 0.2719469666481018, + "loss_ce": 0.004002647008746862, + "loss_iou": 0.2890625, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 58042960, + "step": 338 + }, + { + "epoch": 0.08916946143223516, + "grad_norm": 8.11559752489591, + "learning_rate": 5e-06, + "loss": 0.285, + "num_input_tokens_seen": 58215276, + "step": 339 + }, + { + "epoch": 0.08916946143223516, + "loss": 0.2689915895462036, + "loss_ce": 0.004709342960268259, + "loss_iou": 0.578125, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 58215276, + "step": 339 + }, + { + "epoch": 0.08943249819162227, + "grad_norm": 16.72537995408633, + "learning_rate": 5e-06, + "loss": 0.2206, + "num_input_tokens_seen": 58387140, + "step": 340 + }, + { + "epoch": 0.08943249819162227, + "loss": 0.19433817267417908, + "loss_ce": 0.007021272089332342, + "loss_iou": 0.63671875, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 58387140, + "step": 340 + }, + { + "epoch": 0.0896955349510094, + "grad_norm": 10.068757947032363, + "learning_rate": 5e-06, + "loss": 0.2708, + "num_input_tokens_seen": 58559132, + "step": 341 + }, + { + "epoch": 0.0896955349510094, + "loss": 0.19732418656349182, + "loss_ce": 0.004819315858185291, + "loss_iou": 0.4375, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 58559132, + "step": 341 + }, + { + "epoch": 0.08995857171039653, + "grad_norm": 15.667162296382404, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 58729752, + "step": 342 + }, + { + "epoch": 0.08995857171039653, + "loss": 0.251327782869339, + "loss_ce": 0.006515774410218, + "loss_iou": 0.5859375, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 58729752, + "step": 342 + }, + { + "epoch": 0.09022160846978365, + "grad_norm": 10.706496762114405, + "learning_rate": 5e-06, + "loss": 0.2862, + "num_input_tokens_seen": 58901996, + "step": 343 + }, + { + "epoch": 0.09022160846978365, + "loss": 0.2672620415687561, + "loss_ce": 0.003956370986998081, + "loss_iou": 0.240234375, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 58901996, + "step": 343 + }, + { + "epoch": 0.09048464522917078, + "grad_norm": 8.994760858015587, + "learning_rate": 5e-06, + "loss": 0.2729, + "num_input_tokens_seen": 59071632, + "step": 344 + }, + { + "epoch": 0.09048464522917078, + "loss": 0.2578084170818329, + "loss_ce": 0.0040242355316877365, + "loss_iou": 0.365234375, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 59071632, + "step": 344 + }, + { + "epoch": 0.0907476819885579, + "grad_norm": 10.48877247334524, + "learning_rate": 5e-06, + "loss": 0.1963, + "num_input_tokens_seen": 59240464, + "step": 345 + }, + { + "epoch": 0.0907476819885579, + "loss": 0.20404496788978577, + "loss_ce": 0.0031172330491244793, + "loss_iou": 0.283203125, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 59240464, + "step": 345 + }, + { + "epoch": 0.09101071874794503, + "grad_norm": 11.358390932213611, + "learning_rate": 5e-06, + "loss": 0.2309, + "num_input_tokens_seen": 59411172, + "step": 346 + }, + { + "epoch": 0.09101071874794503, + "loss": 0.23203009366989136, + "loss_ce": 0.013524244539439678, + "loss_iou": 0.5859375, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 59411172, + "step": 346 + }, + { + "epoch": 0.09127375550733215, + "grad_norm": 11.905025621505947, + "learning_rate": 5e-06, + "loss": 0.2869, + "num_input_tokens_seen": 59581764, + "step": 347 + }, + { + "epoch": 0.09127375550733215, + "loss": 0.34626448154449463, + "loss_ce": 0.005199995823204517, + "loss_iou": 0.416015625, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 59581764, + "step": 347 + }, + { + "epoch": 0.09153679226671928, + "grad_norm": 12.735636545678894, + "learning_rate": 5e-06, + "loss": 0.3255, + "num_input_tokens_seen": 59754336, + "step": 348 + }, + { + "epoch": 0.09153679226671928, + "loss": 0.395874559879303, + "loss_ce": 0.005859900265932083, + "loss_iou": 0.3125, + "loss_num": 0.078125, + "loss_xval": 0.390625, + "num_input_tokens_seen": 59754336, + "step": 348 + }, + { + "epoch": 0.0917998290261064, + "grad_norm": 12.687289585677311, + "learning_rate": 5e-06, + "loss": 0.3009, + "num_input_tokens_seen": 59926836, + "step": 349 + }, + { + "epoch": 0.0917998290261064, + "loss": 0.25403833389282227, + "loss_ce": 0.0031838202849030495, + "loss_iou": 0.349609375, + "loss_num": 0.05029296875, + "loss_xval": 0.25, + "num_input_tokens_seen": 59926836, + "step": 349 + }, + { + "epoch": 0.09206286578549353, + "grad_norm": 7.2501298492412145, + "learning_rate": 5e-06, + "loss": 0.193, + "num_input_tokens_seen": 60099096, + "step": 350 + }, + { + "epoch": 0.09206286578549353, + "loss": 0.25642409920692444, + "loss_ce": 0.005325470119714737, + "loss_iou": 0.3671875, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 60099096, + "step": 350 + }, + { + "epoch": 0.09232590254488064, + "grad_norm": 7.233766618859882, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 60271456, + "step": 351 + }, + { + "epoch": 0.09232590254488064, + "loss": 0.23899057507514954, + "loss_ce": 0.003883154597133398, + "loss_iou": 0.51953125, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 60271456, + "step": 351 + }, + { + "epoch": 0.09258893930426777, + "grad_norm": 10.116609125862757, + "learning_rate": 5e-06, + "loss": 0.2124, + "num_input_tokens_seen": 60443440, + "step": 352 + }, + { + "epoch": 0.09258893930426777, + "loss": 0.2204253077507019, + "loss_ce": 0.005032242741435766, + "loss_iou": 0.53125, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 60443440, + "step": 352 + }, + { + "epoch": 0.09285197606365489, + "grad_norm": 11.612967488254396, + "learning_rate": 5e-06, + "loss": 0.2054, + "num_input_tokens_seen": 60615884, + "step": 353 + }, + { + "epoch": 0.09285197606365489, + "loss": 0.20439094305038452, + "loss_ce": 0.005050132982432842, + "loss_iou": 0.416015625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 60615884, + "step": 353 + }, + { + "epoch": 0.09311501282304202, + "grad_norm": 14.924807522940846, + "learning_rate": 5e-06, + "loss": 0.2902, + "num_input_tokens_seen": 60787976, + "step": 354 + }, + { + "epoch": 0.09311501282304202, + "loss": 0.35237956047058105, + "loss_ce": 0.006432283669710159, + "loss_iou": 0.63671875, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 60787976, + "step": 354 + }, + { + "epoch": 0.09337804958242915, + "grad_norm": 14.749952758612022, + "learning_rate": 5e-06, + "loss": 0.2866, + "num_input_tokens_seen": 60960464, + "step": 355 + }, + { + "epoch": 0.09337804958242915, + "loss": 0.27518266439437866, + "loss_ce": 0.008092833682894707, + "loss_iou": 0.51953125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 60960464, + "step": 355 + }, + { + "epoch": 0.09364108634181627, + "grad_norm": 8.028682980536022, + "learning_rate": 5e-06, + "loss": 0.2222, + "num_input_tokens_seen": 61130588, + "step": 356 + }, + { + "epoch": 0.09364108634181627, + "loss": 0.2519834041595459, + "loss_ce": 0.004424803890287876, + "loss_iou": 0.37109375, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 61130588, + "step": 356 + }, + { + "epoch": 0.0939041231012034, + "grad_norm": 9.199423751114487, + "learning_rate": 5e-06, + "loss": 0.2881, + "num_input_tokens_seen": 61302652, + "step": 357 + }, + { + "epoch": 0.0939041231012034, + "loss": 0.39324474334716797, + "loss_ce": 0.012019152753055096, + "loss_iou": 0.5234375, + "loss_num": 0.076171875, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 61302652, + "step": 357 + }, + { + "epoch": 0.09416715986059052, + "grad_norm": 8.678464430801892, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 61473012, + "step": 358 + }, + { + "epoch": 0.09416715986059052, + "loss": 0.20840191841125488, + "loss_ce": 0.00448344461619854, + "loss_iou": 0.4765625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 61473012, + "step": 358 + }, + { + "epoch": 0.09443019661997765, + "grad_norm": 13.785653955051378, + "learning_rate": 5e-06, + "loss": 0.2401, + "num_input_tokens_seen": 61645272, + "step": 359 + }, + { + "epoch": 0.09443019661997765, + "loss": 0.19086723029613495, + "loss_ce": 0.0067241680808365345, + "loss_iou": 0.462890625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 61645272, + "step": 359 + }, + { + "epoch": 0.09469323337936476, + "grad_norm": 7.119442902133065, + "learning_rate": 5e-06, + "loss": 0.2102, + "num_input_tokens_seen": 61817436, + "step": 360 + }, + { + "epoch": 0.09469323337936476, + "loss": 0.21873927116394043, + "loss_ce": 0.0036513670347630978, + "loss_iou": 0.44140625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 61817436, + "step": 360 + }, + { + "epoch": 0.0949562701387519, + "grad_norm": 10.914204757351408, + "learning_rate": 5e-06, + "loss": 0.2247, + "num_input_tokens_seen": 61987764, + "step": 361 + }, + { + "epoch": 0.0949562701387519, + "loss": 0.2072867900133133, + "loss_ce": 0.0077018230222165585, + "loss_iou": 0.6171875, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 61987764, + "step": 361 + }, + { + "epoch": 0.09521930689813901, + "grad_norm": 10.090324495449488, + "learning_rate": 5e-06, + "loss": 0.2923, + "num_input_tokens_seen": 62158356, + "step": 362 + }, + { + "epoch": 0.09521930689813901, + "loss": 0.2174176275730133, + "loss_ce": 0.007944983430206776, + "loss_iou": 0.345703125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 62158356, + "step": 362 + }, + { + "epoch": 0.09548234365752614, + "grad_norm": 8.545687476622918, + "learning_rate": 5e-06, + "loss": 0.2599, + "num_input_tokens_seen": 62330412, + "step": 363 + }, + { + "epoch": 0.09548234365752614, + "loss": 0.2114790380001068, + "loss_ce": 0.004081577528268099, + "loss_iou": 0.546875, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 62330412, + "step": 363 + }, + { + "epoch": 0.09574538041691326, + "grad_norm": 9.203465723081402, + "learning_rate": 5e-06, + "loss": 0.1892, + "num_input_tokens_seen": 62502416, + "step": 364 + }, + { + "epoch": 0.09574538041691326, + "loss": 0.20543332397937775, + "loss_ce": 0.004505585879087448, + "loss_iou": 0.62890625, + "loss_num": 0.0400390625, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 62502416, + "step": 364 + }, + { + "epoch": 0.09600841717630039, + "grad_norm": 11.930210195365788, + "learning_rate": 5e-06, + "loss": 0.3152, + "num_input_tokens_seen": 62674708, + "step": 365 + }, + { + "epoch": 0.09600841717630039, + "loss": 0.3104754686355591, + "loss_ce": 0.006398319266736507, + "loss_iou": 0.5859375, + "loss_num": 0.060791015625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 62674708, + "step": 365 + }, + { + "epoch": 0.0962714539356875, + "grad_norm": 9.520122083335602, + "learning_rate": 5e-06, + "loss": 0.2852, + "num_input_tokens_seen": 62846712, + "step": 366 + }, + { + "epoch": 0.0962714539356875, + "loss": 0.30926454067230225, + "loss_ce": 0.008239164017140865, + "loss_iou": 0.3984375, + "loss_num": 0.06005859375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 62846712, + "step": 366 + }, + { + "epoch": 0.09653449069507464, + "grad_norm": 7.113583060734605, + "learning_rate": 5e-06, + "loss": 0.2553, + "num_input_tokens_seen": 63018700, + "step": 367 + }, + { + "epoch": 0.09653449069507464, + "loss": 0.27072834968566895, + "loss_ce": 0.006201992742717266, + "loss_iou": 0.470703125, + "loss_num": 0.052978515625, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 63018700, + "step": 367 + }, + { + "epoch": 0.09679752745446175, + "grad_norm": 7.613528329197277, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 63188284, + "step": 368 + }, + { + "epoch": 0.09679752745446175, + "loss": 0.19734525680541992, + "loss_ce": 0.0055117676965892315, + "loss_iou": 0.53515625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 63188284, + "step": 368 + }, + { + "epoch": 0.09706056421384888, + "grad_norm": 7.59501735101136, + "learning_rate": 5e-06, + "loss": 0.2185, + "num_input_tokens_seen": 63360584, + "step": 369 + }, + { + "epoch": 0.09706056421384888, + "loss": 0.23239563405513763, + "loss_ce": 0.003757936879992485, + "loss_iou": 0.58984375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 63360584, + "step": 369 + }, + { + "epoch": 0.09732360097323602, + "grad_norm": 9.071713140748203, + "learning_rate": 5e-06, + "loss": 0.2417, + "num_input_tokens_seen": 63530348, + "step": 370 + }, + { + "epoch": 0.09732360097323602, + "loss": 0.3295820653438568, + "loss_ce": 0.004997124895453453, + "loss_iou": 0.36328125, + "loss_num": 0.06494140625, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 63530348, + "step": 370 + }, + { + "epoch": 0.09758663773262313, + "grad_norm": 18.945858878396198, + "learning_rate": 5e-06, + "loss": 0.2916, + "num_input_tokens_seen": 63702772, + "step": 371 + }, + { + "epoch": 0.09758663773262313, + "loss": 0.38020581007003784, + "loss_ce": 0.007525136228650808, + "loss_iou": 0.54296875, + "loss_num": 0.07421875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 63702772, + "step": 371 + }, + { + "epoch": 0.09784967449201026, + "grad_norm": 10.004542997696944, + "learning_rate": 5e-06, + "loss": 0.2707, + "num_input_tokens_seen": 63874988, + "step": 372 + }, + { + "epoch": 0.09784967449201026, + "loss": 0.21838681399822235, + "loss_ce": 0.005313089117407799, + "loss_iou": 0.39453125, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 63874988, + "step": 372 + }, + { + "epoch": 0.09811271125139738, + "grad_norm": 12.569402693685879, + "learning_rate": 5e-06, + "loss": 0.3095, + "num_input_tokens_seen": 64047192, + "step": 373 + }, + { + "epoch": 0.09811271125139738, + "loss": 0.23138675093650818, + "loss_ce": 0.0027490435168147087, + "loss_iou": 0.45703125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 64047192, + "step": 373 + }, + { + "epoch": 0.09837574801078451, + "grad_norm": 7.761350865641741, + "learning_rate": 5e-06, + "loss": 0.2808, + "num_input_tokens_seen": 64217628, + "step": 374 + }, + { + "epoch": 0.09837574801078451, + "loss": 0.22036589682102203, + "loss_ce": 0.005583181045949459, + "loss_iou": 0.40234375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 64217628, + "step": 374 + }, + { + "epoch": 0.09863878477017163, + "grad_norm": 15.752581561948926, + "learning_rate": 5e-06, + "loss": 0.2404, + "num_input_tokens_seen": 64389824, + "step": 375 + }, + { + "epoch": 0.09863878477017163, + "loss": 0.31320562958717346, + "loss_ce": 0.0048560285940766335, + "loss_iou": 0.40234375, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 64389824, + "step": 375 + }, + { + "epoch": 0.09890182152955876, + "grad_norm": 8.987788273052438, + "learning_rate": 5e-06, + "loss": 0.2167, + "num_input_tokens_seen": 64557964, + "step": 376 + }, + { + "epoch": 0.09890182152955876, + "loss": 0.15088969469070435, + "loss_ce": 0.0028794521931558847, + "loss_iou": 0.48828125, + "loss_num": 0.029541015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 64557964, + "step": 376 + }, + { + "epoch": 0.09916485828894588, + "grad_norm": 14.201905900297023, + "learning_rate": 5e-06, + "loss": 0.282, + "num_input_tokens_seen": 64730276, + "step": 377 + }, + { + "epoch": 0.09916485828894588, + "loss": 0.2574768662452698, + "loss_ce": 0.004425112158060074, + "loss_iou": 0.46484375, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 64730276, + "step": 377 + }, + { + "epoch": 0.099427895048333, + "grad_norm": 10.355784479360198, + "learning_rate": 5e-06, + "loss": 0.2585, + "num_input_tokens_seen": 64902196, + "step": 378 + }, + { + "epoch": 0.099427895048333, + "loss": 0.38254526257514954, + "loss_ce": 0.00461557786911726, + "loss_iou": 0.345703125, + "loss_num": 0.07568359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 64902196, + "step": 378 + }, + { + "epoch": 0.09969093180772012, + "grad_norm": 8.387859091991706, + "learning_rate": 5e-06, + "loss": 0.266, + "num_input_tokens_seen": 65074196, + "step": 379 + }, + { + "epoch": 0.09969093180772012, + "loss": 0.3536115884780884, + "loss_ce": 0.007664328906685114, + "loss_iou": 0.484375, + "loss_num": 0.0693359375, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 65074196, + "step": 379 + }, + { + "epoch": 0.09995396856710725, + "grad_norm": 6.106776880682037, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 65245768, + "step": 380 + }, + { + "epoch": 0.09995396856710725, + "loss": 0.16343596577644348, + "loss_ce": 0.003645919729024172, + "loss_iou": 0.38671875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 65245768, + "step": 380 + }, + { + "epoch": 0.10021700532649437, + "grad_norm": 13.780341497897325, + "learning_rate": 5e-06, + "loss": 0.203, + "num_input_tokens_seen": 65418016, + "step": 381 + }, + { + "epoch": 0.10021700532649437, + "loss": 0.21609731018543243, + "loss_ce": 0.004000143148005009, + "loss_iou": 0.48046875, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 65418016, + "step": 381 + }, + { + "epoch": 0.1004800420858815, + "grad_norm": 9.32261949794866, + "learning_rate": 5e-06, + "loss": 0.2265, + "num_input_tokens_seen": 65590312, + "step": 382 + }, + { + "epoch": 0.1004800420858815, + "loss": 0.22935181856155396, + "loss_ce": 0.00425416324287653, + "loss_iou": 0.61328125, + "loss_num": 0.045166015625, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 65590312, + "step": 382 + }, + { + "epoch": 0.10074307884526863, + "grad_norm": 8.048917738163752, + "learning_rate": 5e-06, + "loss": 0.2741, + "num_input_tokens_seen": 65762432, + "step": 383 + }, + { + "epoch": 0.10074307884526863, + "loss": 0.3397749662399292, + "loss_ce": 0.0026167738251388073, + "loss_iou": 0.2734375, + "loss_num": 0.0673828125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 65762432, + "step": 383 + }, + { + "epoch": 0.10100611560465575, + "grad_norm": 10.637781772264608, + "learning_rate": 5e-06, + "loss": 0.2218, + "num_input_tokens_seen": 65934468, + "step": 384 + }, + { + "epoch": 0.10100611560465575, + "loss": 0.2347402572631836, + "loss_ce": 0.01049709226936102, + "loss_iou": 0.41015625, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 65934468, + "step": 384 + }, + { + "epoch": 0.10126915236404288, + "grad_norm": 8.002274766525902, + "learning_rate": 5e-06, + "loss": 0.2433, + "num_input_tokens_seen": 66106856, + "step": 385 + }, + { + "epoch": 0.10126915236404288, + "loss": 0.2602759301662445, + "loss_ce": 0.005881410092115402, + "loss_iou": 0.51171875, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 66106856, + "step": 385 + }, + { + "epoch": 0.10153218912343, + "grad_norm": 5.854621538534226, + "learning_rate": 5e-06, + "loss": 0.2458, + "num_input_tokens_seen": 66278808, + "step": 386 + }, + { + "epoch": 0.10153218912343, + "loss": 0.24760979413986206, + "loss_ce": 0.008718185126781464, + "loss_iou": 0.435546875, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 66278808, + "step": 386 + }, + { + "epoch": 0.10179522588281713, + "grad_norm": 8.25201471581488, + "learning_rate": 5e-06, + "loss": 0.2036, + "num_input_tokens_seen": 66449192, + "step": 387 + }, + { + "epoch": 0.10179522588281713, + "loss": 0.18327301740646362, + "loss_ce": 0.007003485690802336, + "loss_iou": 0.6171875, + "loss_num": 0.035400390625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 66449192, + "step": 387 + }, + { + "epoch": 0.10205826264220424, + "grad_norm": 7.445331676424441, + "learning_rate": 5e-06, + "loss": 0.2392, + "num_input_tokens_seen": 66619484, + "step": 388 + }, + { + "epoch": 0.10205826264220424, + "loss": 0.19971191883087158, + "loss_ce": 0.0065356409177184105, + "loss_iou": 0.55859375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 66619484, + "step": 388 + }, + { + "epoch": 0.10232129940159138, + "grad_norm": 6.590904938513716, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 66791680, + "step": 389 + }, + { + "epoch": 0.10232129940159138, + "loss": 0.186821848154068, + "loss_ce": 0.0036553400568664074, + "loss_iou": 0.4453125, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 66791680, + "step": 389 + }, + { + "epoch": 0.10258433616097849, + "grad_norm": 7.115969301930752, + "learning_rate": 5e-06, + "loss": 0.2258, + "num_input_tokens_seen": 66963964, + "step": 390 + }, + { + "epoch": 0.10258433616097849, + "loss": 0.21056599915027618, + "loss_ce": 0.005732023622840643, + "loss_iou": 0.5234375, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 66963964, + "step": 390 + }, + { + "epoch": 0.10284737292036562, + "grad_norm": 10.39999100726405, + "learning_rate": 5e-06, + "loss": 0.2688, + "num_input_tokens_seen": 67136264, + "step": 391 + }, + { + "epoch": 0.10284737292036562, + "loss": 0.32519546151161194, + "loss_ce": 0.0023194823879748583, + "loss_iou": 0.330078125, + "loss_num": 0.064453125, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 67136264, + "step": 391 + }, + { + "epoch": 0.10311040967975274, + "grad_norm": 10.853923665788189, + "learning_rate": 5e-06, + "loss": 0.2335, + "num_input_tokens_seen": 67308332, + "step": 392 + }, + { + "epoch": 0.10311040967975274, + "loss": 0.22598545253276825, + "loss_ce": 0.004671982489526272, + "loss_iou": 0.578125, + "loss_num": 0.044189453125, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 67308332, + "step": 392 + }, + { + "epoch": 0.10337344643913987, + "grad_norm": 12.644758786705145, + "learning_rate": 5e-06, + "loss": 0.2568, + "num_input_tokens_seen": 67480732, + "step": 393 + }, + { + "epoch": 0.10337344643913987, + "loss": 0.22954288125038147, + "loss_ce": 0.0029803775250911713, + "loss_iou": 0.322265625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 67480732, + "step": 393 + }, + { + "epoch": 0.10363648319852699, + "grad_norm": 9.802676625201595, + "learning_rate": 5e-06, + "loss": 0.2089, + "num_input_tokens_seen": 67652736, + "step": 394 + }, + { + "epoch": 0.10363648319852699, + "loss": 0.2672483026981354, + "loss_ce": 0.006017843261361122, + "loss_iou": 0.240234375, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 67652736, + "step": 394 + }, + { + "epoch": 0.10389951995791412, + "grad_norm": 7.134550479825786, + "learning_rate": 5e-06, + "loss": 0.2498, + "num_input_tokens_seen": 67825356, + "step": 395 + }, + { + "epoch": 0.10389951995791412, + "loss": 0.2973036766052246, + "loss_ce": 0.004334905184805393, + "loss_iou": 0.53515625, + "loss_num": 0.05859375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 67825356, + "step": 395 + }, + { + "epoch": 0.10416255671730125, + "grad_norm": 9.967886960744174, + "learning_rate": 5e-06, + "loss": 0.1892, + "num_input_tokens_seen": 67997632, + "step": 396 + }, + { + "epoch": 0.10416255671730125, + "loss": 0.16415753960609436, + "loss_ce": 0.0022923052310943604, + "loss_iou": 0.62109375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 67997632, + "step": 396 + }, + { + "epoch": 0.10442559347668837, + "grad_norm": 10.176506863521315, + "learning_rate": 5e-06, + "loss": 0.2534, + "num_input_tokens_seen": 68168484, + "step": 397 + }, + { + "epoch": 0.10442559347668837, + "loss": 0.2146688550710678, + "loss_ce": 0.002388589084148407, + "loss_iou": 0.455078125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 68168484, + "step": 397 + }, + { + "epoch": 0.1046886302360755, + "grad_norm": 12.732415116106893, + "learning_rate": 5e-06, + "loss": 0.2673, + "num_input_tokens_seen": 68337496, + "step": 398 + }, + { + "epoch": 0.1046886302360755, + "loss": 0.28035295009613037, + "loss_ce": 0.003985744901001453, + "loss_iou": 0.5390625, + "loss_num": 0.05517578125, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 68337496, + "step": 398 + }, + { + "epoch": 0.10495166699546261, + "grad_norm": 8.052010611714167, + "learning_rate": 5e-06, + "loss": 0.278, + "num_input_tokens_seen": 68509860, + "step": 399 + }, + { + "epoch": 0.10495166699546261, + "loss": 0.36081743240356445, + "loss_ce": 0.003395556937903166, + "loss_iou": 0.486328125, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 68509860, + "step": 399 + }, + { + "epoch": 0.10521470375484974, + "grad_norm": 7.830511214158693, + "learning_rate": 5e-06, + "loss": 0.2325, + "num_input_tokens_seen": 68681940, + "step": 400 + }, + { + "epoch": 0.10521470375484974, + "loss": 0.2878537178039551, + "loss_ce": 0.005016806535422802, + "loss_iou": 0.40625, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 68681940, + "step": 400 + }, + { + "epoch": 0.10547774051423686, + "grad_norm": 8.196278314466337, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 68854332, + "step": 401 + }, + { + "epoch": 0.10547774051423686, + "loss": 0.27157318592071533, + "loss_ce": 0.004483355674892664, + "loss_iou": 0.455078125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 68854332, + "step": 401 + }, + { + "epoch": 0.10574077727362399, + "grad_norm": 8.502085361700777, + "learning_rate": 5e-06, + "loss": 0.2004, + "num_input_tokens_seen": 69024884, + "step": 402 + }, + { + "epoch": 0.10574077727362399, + "loss": 0.20401804149150848, + "loss_ce": 0.002235804684460163, + "loss_iou": 0.34765625, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 69024884, + "step": 402 + }, + { + "epoch": 0.10600381403301111, + "grad_norm": 12.566157085058673, + "learning_rate": 5e-06, + "loss": 0.2292, + "num_input_tokens_seen": 69197236, + "step": 403 + }, + { + "epoch": 0.10600381403301111, + "loss": 0.24189046025276184, + "loss_ce": 0.0023274626582860947, + "loss_iou": 0.4609375, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 69197236, + "step": 403 + }, + { + "epoch": 0.10626685079239824, + "grad_norm": 8.163593280105717, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 69369436, + "step": 404 + }, + { + "epoch": 0.10626685079239824, + "loss": 0.17368575930595398, + "loss_ce": 0.005472864024341106, + "loss_iou": 0.35546875, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 69369436, + "step": 404 + }, + { + "epoch": 0.10652988755178536, + "grad_norm": 7.3996201552939596, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 69540036, + "step": 405 + }, + { + "epoch": 0.10652988755178536, + "loss": 0.1399962604045868, + "loss_ce": 0.004925462882965803, + "loss_iou": 0.5234375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 69540036, + "step": 405 + }, + { + "epoch": 0.10679292431117249, + "grad_norm": 10.197562859133864, + "learning_rate": 5e-06, + "loss": 0.2373, + "num_input_tokens_seen": 69712000, + "step": 406 + }, + { + "epoch": 0.10679292431117249, + "loss": 0.19172173738479614, + "loss_ce": 0.0054424479603767395, + "loss_iou": 0.71875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 69712000, + "step": 406 + }, + { + "epoch": 0.1070559610705596, + "grad_norm": 13.16161294170375, + "learning_rate": 5e-06, + "loss": 0.2782, + "num_input_tokens_seen": 69883872, + "step": 407 + }, + { + "epoch": 0.1070559610705596, + "loss": 0.28615695238113403, + "loss_ce": 0.002465539611876011, + "loss_iou": 0.357421875, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 69883872, + "step": 407 + }, + { + "epoch": 0.10731899782994674, + "grad_norm": 8.88729554885214, + "learning_rate": 5e-06, + "loss": 0.2054, + "num_input_tokens_seen": 70056060, + "step": 408 + }, + { + "epoch": 0.10731899782994674, + "loss": 0.19157525897026062, + "loss_ce": 0.0030986934434622526, + "loss_iou": 0.5078125, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 70056060, + "step": 408 + }, + { + "epoch": 0.10758203458933385, + "grad_norm": 10.615365655634278, + "learning_rate": 5e-06, + "loss": 0.2465, + "num_input_tokens_seen": 70228104, + "step": 409 + }, + { + "epoch": 0.10758203458933385, + "loss": 0.315712034702301, + "loss_ce": 0.003150993725284934, + "loss_iou": 0.40234375, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 70228104, + "step": 409 + }, + { + "epoch": 0.10784507134872098, + "grad_norm": 8.946094916164988, + "learning_rate": 5e-06, + "loss": 0.2711, + "num_input_tokens_seen": 70398676, + "step": 410 + }, + { + "epoch": 0.10784507134872098, + "loss": 0.37502604722976685, + "loss_ce": 0.010768221691250801, + "loss_iou": 0.412109375, + "loss_num": 0.07275390625, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 70398676, + "step": 410 + }, + { + "epoch": 0.10810810810810811, + "grad_norm": 10.043204254830277, + "learning_rate": 5e-06, + "loss": 0.2838, + "num_input_tokens_seen": 70570976, + "step": 411 + }, + { + "epoch": 0.10810810810810811, + "loss": 0.23639510571956635, + "loss_ce": 0.004095299169421196, + "loss_iou": 0.443359375, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 70570976, + "step": 411 + }, + { + "epoch": 0.10837114486749523, + "grad_norm": 6.784469948331844, + "learning_rate": 5e-06, + "loss": 0.2968, + "num_input_tokens_seen": 70741228, + "step": 412 + }, + { + "epoch": 0.10837114486749523, + "loss": 0.3139989376068115, + "loss_ce": 0.001865162281319499, + "loss_iou": 0.3671875, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 70741228, + "step": 412 + }, + { + "epoch": 0.10863418162688236, + "grad_norm": 6.343923096243914, + "learning_rate": 5e-06, + "loss": 0.2107, + "num_input_tokens_seen": 70913516, + "step": 413 + }, + { + "epoch": 0.10863418162688236, + "loss": 0.19345833361148834, + "loss_ce": 0.0018079333240166306, + "loss_iou": 0.5, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 70913516, + "step": 413 + }, + { + "epoch": 0.10889721838626948, + "grad_norm": 7.92695485009758, + "learning_rate": 5e-06, + "loss": 0.1941, + "num_input_tokens_seen": 71085736, + "step": 414 + }, + { + "epoch": 0.10889721838626948, + "loss": 0.18501858413219452, + "loss_ce": 0.001791047165170312, + "loss_iou": 0.65234375, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 71085736, + "step": 414 + }, + { + "epoch": 0.10916025514565661, + "grad_norm": 8.754602822026959, + "learning_rate": 5e-06, + "loss": 0.2194, + "num_input_tokens_seen": 71257772, + "step": 415 + }, + { + "epoch": 0.10916025514565661, + "loss": 0.2221953421831131, + "loss_ce": 0.0017973824869841337, + "loss_iou": 0.65234375, + "loss_num": 0.0439453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 71257772, + "step": 415 + }, + { + "epoch": 0.10942329190504373, + "grad_norm": 7.459161826900908, + "learning_rate": 5e-06, + "loss": 0.2477, + "num_input_tokens_seen": 71430004, + "step": 416 + }, + { + "epoch": 0.10942329190504373, + "loss": 0.2557048797607422, + "loss_ce": 0.0026531266048550606, + "loss_iou": 0.578125, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 71430004, + "step": 416 + }, + { + "epoch": 0.10968632866443086, + "grad_norm": 6.337226894093766, + "learning_rate": 5e-06, + "loss": 0.236, + "num_input_tokens_seen": 71602160, + "step": 417 + }, + { + "epoch": 0.10968632866443086, + "loss": 0.1684013307094574, + "loss_ce": 0.0018363934941589832, + "loss_iou": 0.59375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 71602160, + "step": 417 + }, + { + "epoch": 0.10994936542381797, + "grad_norm": 10.092397777907028, + "learning_rate": 5e-06, + "loss": 0.1998, + "num_input_tokens_seen": 71774264, + "step": 418 + }, + { + "epoch": 0.10994936542381797, + "loss": 0.17219412326812744, + "loss_ce": 0.0024553609546273947, + "loss_iou": 0.53125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 71774264, + "step": 418 + }, + { + "epoch": 0.1102124021832051, + "grad_norm": 10.933777042764003, + "learning_rate": 5e-06, + "loss": 0.2549, + "num_input_tokens_seen": 71943760, + "step": 419 + }, + { + "epoch": 0.1102124021832051, + "loss": 0.2607925236225128, + "loss_ce": 0.0052993567660450935, + "loss_iou": 0.43359375, + "loss_num": 0.051025390625, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 71943760, + "step": 419 + }, + { + "epoch": 0.11047543894259222, + "grad_norm": 18.099442946915016, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 72114360, + "step": 420 + }, + { + "epoch": 0.11047543894259222, + "loss": 0.2702986001968384, + "loss_ce": 0.0022321869619190693, + "loss_iou": 0.3515625, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 72114360, + "step": 420 + }, + { + "epoch": 0.11073847570197935, + "grad_norm": 6.645880081378423, + "learning_rate": 5e-06, + "loss": 0.2374, + "num_input_tokens_seen": 72285984, + "step": 421 + }, + { + "epoch": 0.11073847570197935, + "loss": 0.17263615131378174, + "loss_ce": 0.0023480583913624287, + "loss_iou": 0.38671875, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 72285984, + "step": 421 + }, + { + "epoch": 0.11100151246136647, + "grad_norm": 10.576055281968134, + "learning_rate": 5e-06, + "loss": 0.1819, + "num_input_tokens_seen": 72458472, + "step": 422 + }, + { + "epoch": 0.11100151246136647, + "loss": 0.2114211916923523, + "loss_ce": 0.005244437139481306, + "loss_iou": 0.5546875, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 72458472, + "step": 422 + }, + { + "epoch": 0.1112645492207536, + "grad_norm": 12.150497670240854, + "learning_rate": 5e-06, + "loss": 0.2879, + "num_input_tokens_seen": 72630848, + "step": 423 + }, + { + "epoch": 0.1112645492207536, + "loss": 0.195449560880661, + "loss_ce": 0.0027005516458302736, + "loss_iou": 0.27734375, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 72630848, + "step": 423 + }, + { + "epoch": 0.11152758598014073, + "grad_norm": 9.206681239637351, + "learning_rate": 5e-06, + "loss": 0.1882, + "num_input_tokens_seen": 72802948, + "step": 424 + }, + { + "epoch": 0.11152758598014073, + "loss": 0.20556196570396423, + "loss_ce": 0.003230433911085129, + "loss_iou": 0.4453125, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 72802948, + "step": 424 + }, + { + "epoch": 0.11179062273952785, + "grad_norm": 7.914405274492032, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 72973304, + "step": 425 + }, + { + "epoch": 0.11179062273952785, + "loss": 0.24065472185611725, + "loss_ce": 0.002800729824230075, + "loss_iou": 0.46875, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 72973304, + "step": 425 + }, + { + "epoch": 0.11205365949891498, + "grad_norm": 8.079757468011927, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 73145520, + "step": 426 + }, + { + "epoch": 0.11205365949891498, + "loss": 0.17487749457359314, + "loss_ce": 0.007641167379915714, + "loss_iou": 0.515625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 73145520, + "step": 426 + }, + { + "epoch": 0.1123166962583021, + "grad_norm": 26.685292496143536, + "learning_rate": 5e-06, + "loss": 0.2674, + "num_input_tokens_seen": 73317728, + "step": 427 + }, + { + "epoch": 0.1123166962583021, + "loss": 0.24447987973690033, + "loss_ce": 0.003635148983448744, + "loss_iou": 0.7109375, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 73317728, + "step": 427 + }, + { + "epoch": 0.11257973301768923, + "grad_norm": 10.587399579059058, + "learning_rate": 5e-06, + "loss": 0.2517, + "num_input_tokens_seen": 73490108, + "step": 428 + }, + { + "epoch": 0.11257973301768923, + "loss": 0.24800382554531097, + "loss_ce": 0.002154202200472355, + "loss_iou": 0.37890625, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 73490108, + "step": 428 + }, + { + "epoch": 0.11284276977707634, + "grad_norm": 6.6654701515468515, + "learning_rate": 5e-06, + "loss": 0.2127, + "num_input_tokens_seen": 73662104, + "step": 429 + }, + { + "epoch": 0.11284276977707634, + "loss": 0.35326629877090454, + "loss_ce": 0.004389348905533552, + "loss_iou": 0.474609375, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 73662104, + "step": 429 + }, + { + "epoch": 0.11310580653646347, + "grad_norm": 9.359106186429873, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 73833940, + "step": 430 + }, + { + "epoch": 0.11310580653646347, + "loss": 0.19281955063343048, + "loss_ce": 0.0022678023669868708, + "loss_iou": 0.52734375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 73833940, + "step": 430 + }, + { + "epoch": 0.11336884329585059, + "grad_norm": 10.912300657166487, + "learning_rate": 5e-06, + "loss": 0.2706, + "num_input_tokens_seen": 74006248, + "step": 431 + }, + { + "epoch": 0.11336884329585059, + "loss": 0.270729124546051, + "loss_ce": 0.0032730703242123127, + "loss_iou": 0.56640625, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 74006248, + "step": 431 + }, + { + "epoch": 0.11363188005523772, + "grad_norm": 7.80923377535479, + "learning_rate": 5e-06, + "loss": 0.2294, + "num_input_tokens_seen": 74178292, + "step": 432 + }, + { + "epoch": 0.11363188005523772, + "loss": 0.21906697750091553, + "loss_ce": 0.002758371876552701, + "loss_iou": 0.45703125, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 74178292, + "step": 432 + }, + { + "epoch": 0.11389491681462484, + "grad_norm": 7.423483614252486, + "learning_rate": 5e-06, + "loss": 0.2228, + "num_input_tokens_seen": 74350588, + "step": 433 + }, + { + "epoch": 0.11389491681462484, + "loss": 0.17986077070236206, + "loss_ce": 0.0017601896543055773, + "loss_iou": 0.48828125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 74350588, + "step": 433 + }, + { + "epoch": 0.11415795357401197, + "grad_norm": 8.108512259957333, + "learning_rate": 5e-06, + "loss": 0.226, + "num_input_tokens_seen": 74520920, + "step": 434 + }, + { + "epoch": 0.11415795357401197, + "loss": 0.21928107738494873, + "loss_ce": 0.00455939956009388, + "loss_iou": 0.640625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 74520920, + "step": 434 + }, + { + "epoch": 0.11442099033339909, + "grad_norm": 19.006805430751477, + "learning_rate": 5e-06, + "loss": 0.2661, + "num_input_tokens_seen": 74693212, + "step": 435 + }, + { + "epoch": 0.11442099033339909, + "loss": 0.3151628077030182, + "loss_ce": 0.0064469738863408566, + "loss_iou": 0.625, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 74693212, + "step": 435 + }, + { + "epoch": 0.11468402709278622, + "grad_norm": 8.863661674496596, + "learning_rate": 5e-06, + "loss": 0.2843, + "num_input_tokens_seen": 74865396, + "step": 436 + }, + { + "epoch": 0.11468402709278622, + "loss": 0.280520498752594, + "loss_ce": 0.006594708655029535, + "loss_iou": 0.44921875, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 74865396, + "step": 436 + }, + { + "epoch": 0.11494706385217335, + "grad_norm": 7.497393138459489, + "learning_rate": 5e-06, + "loss": 0.1834, + "num_input_tokens_seen": 75037856, + "step": 437 + }, + { + "epoch": 0.11494706385217335, + "loss": 0.18996562063694, + "loss_ce": 0.0047849551774561405, + "loss_iou": 0.470703125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 75037856, + "step": 437 + }, + { + "epoch": 0.11521010061156046, + "grad_norm": 12.688606481249035, + "learning_rate": 5e-06, + "loss": 0.2195, + "num_input_tokens_seen": 75210192, + "step": 438 + }, + { + "epoch": 0.11521010061156046, + "loss": 0.20892465114593506, + "loss_ce": 0.006348971277475357, + "loss_iou": 0.470703125, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 75210192, + "step": 438 + }, + { + "epoch": 0.1154731373709476, + "grad_norm": 8.208628036837384, + "learning_rate": 5e-06, + "loss": 0.2418, + "num_input_tokens_seen": 75382312, + "step": 439 + }, + { + "epoch": 0.1154731373709476, + "loss": 0.20543652772903442, + "loss_ce": 0.001701178727671504, + "loss_iou": 0.50390625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 75382312, + "step": 439 + }, + { + "epoch": 0.11573617413033471, + "grad_norm": 9.785336279235814, + "learning_rate": 5e-06, + "loss": 0.2151, + "num_input_tokens_seen": 75554688, + "step": 440 + }, + { + "epoch": 0.11573617413033471, + "loss": 0.26715782284736633, + "loss_ce": 0.0069039189256727695, + "loss_iou": 0.65234375, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 75554688, + "step": 440 + }, + { + "epoch": 0.11599921088972184, + "grad_norm": 23.031133350657914, + "learning_rate": 5e-06, + "loss": 0.2357, + "num_input_tokens_seen": 75726964, + "step": 441 + }, + { + "epoch": 0.11599921088972184, + "loss": 0.3105895519256592, + "loss_ce": 0.004559269640594721, + "loss_iou": 0.4609375, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 75726964, + "step": 441 + }, + { + "epoch": 0.11626224764910896, + "grad_norm": 8.337171777387358, + "learning_rate": 5e-06, + "loss": 0.239, + "num_input_tokens_seen": 75899032, + "step": 442 + }, + { + "epoch": 0.11626224764910896, + "loss": 0.20333924889564514, + "loss_ce": 0.002655645599588752, + "loss_iou": 0.55078125, + "loss_num": 0.0400390625, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 75899032, + "step": 442 + }, + { + "epoch": 0.11652528440849609, + "grad_norm": 4.896294530096544, + "learning_rate": 5e-06, + "loss": 0.1725, + "num_input_tokens_seen": 76071304, + "step": 443 + }, + { + "epoch": 0.11652528440849609, + "loss": 0.20664632320404053, + "loss_ce": 0.0032161371782422066, + "loss_iou": 0.439453125, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 76071304, + "step": 443 + }, + { + "epoch": 0.11678832116788321, + "grad_norm": 7.74925868300208, + "learning_rate": 5e-06, + "loss": 0.2369, + "num_input_tokens_seen": 76241732, + "step": 444 + }, + { + "epoch": 0.11678832116788321, + "loss": 0.2300896942615509, + "loss_ce": 0.0016351052327081561, + "loss_iou": 0.51953125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 76241732, + "step": 444 + }, + { + "epoch": 0.11705135792727034, + "grad_norm": 8.593064888198587, + "learning_rate": 5e-06, + "loss": 0.2023, + "num_input_tokens_seen": 76414004, + "step": 445 + }, + { + "epoch": 0.11705135792727034, + "loss": 0.14537394046783447, + "loss_ce": 0.0013919961638748646, + "loss_iou": 0.404296875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 76414004, + "step": 445 + }, + { + "epoch": 0.11731439468665746, + "grad_norm": 10.869613464978638, + "learning_rate": 5e-06, + "loss": 0.2657, + "num_input_tokens_seen": 76586248, + "step": 446 + }, + { + "epoch": 0.11731439468665746, + "loss": 0.2349330186843872, + "loss_ce": 0.004464263096451759, + "loss_iou": 0.4140625, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 76586248, + "step": 446 + }, + { + "epoch": 0.11757743144604459, + "grad_norm": 12.757935439615991, + "learning_rate": 5e-06, + "loss": 0.2717, + "num_input_tokens_seen": 76758328, + "step": 447 + }, + { + "epoch": 0.11757743144604459, + "loss": 0.331451952457428, + "loss_ce": 0.0016179666854441166, + "loss_iou": 0.359375, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 76758328, + "step": 447 + }, + { + "epoch": 0.1178404682054317, + "grad_norm": 11.375965574655721, + "learning_rate": 5e-06, + "loss": 0.2032, + "num_input_tokens_seen": 76930392, + "step": 448 + }, + { + "epoch": 0.1178404682054317, + "loss": 0.21405665576457977, + "loss_ce": 0.004584001377224922, + "loss_iou": 0.42578125, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 76930392, + "step": 448 + }, + { + "epoch": 0.11810350496481883, + "grad_norm": 7.336838831766789, + "learning_rate": 5e-06, + "loss": 0.206, + "num_input_tokens_seen": 77102556, + "step": 449 + }, + { + "epoch": 0.11810350496481883, + "loss": 0.2166377753019333, + "loss_ce": 0.005700268317013979, + "loss_iou": 0.51171875, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 77102556, + "step": 449 + }, + { + "epoch": 0.11836654172420595, + "grad_norm": 11.990630578782412, + "learning_rate": 5e-06, + "loss": 0.2454, + "num_input_tokens_seen": 77274600, + "step": 450 + }, + { + "epoch": 0.11836654172420595, + "loss": 0.24853403866291046, + "loss_ce": 0.00378305627964437, + "loss_iou": 0.58984375, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 77274600, + "step": 450 + }, + { + "epoch": 0.11862957848359308, + "grad_norm": 9.139541774912184, + "learning_rate": 5e-06, + "loss": 0.2682, + "num_input_tokens_seen": 77446596, + "step": 451 + }, + { + "epoch": 0.11862957848359308, + "loss": 0.2904722988605499, + "loss_ce": 0.002020149724557996, + "loss_iou": 0.259765625, + "loss_num": 0.0576171875, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 77446596, + "step": 451 + }, + { + "epoch": 0.11889261524298021, + "grad_norm": 7.052664643006658, + "learning_rate": 5e-06, + "loss": 0.199, + "num_input_tokens_seen": 77618608, + "step": 452 + }, + { + "epoch": 0.11889261524298021, + "loss": 0.23526260256767273, + "loss_ce": 0.004305572714656591, + "loss_iou": 0.5078125, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 77618608, + "step": 452 + }, + { + "epoch": 0.11915565200236733, + "grad_norm": 8.748276102929202, + "learning_rate": 5e-06, + "loss": 0.2101, + "num_input_tokens_seen": 77790812, + "step": 453 + }, + { + "epoch": 0.11915565200236733, + "loss": 0.2472519874572754, + "loss_ce": 0.0025620569940656424, + "loss_iou": 0.369140625, + "loss_num": 0.048828125, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 77790812, + "step": 453 + }, + { + "epoch": 0.11941868876175446, + "grad_norm": 5.877127785016851, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 77962644, + "step": 454 + }, + { + "epoch": 0.11941868876175446, + "loss": 0.2044929563999176, + "loss_ce": 0.005640420597046614, + "loss_iou": 0.5625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 77962644, + "step": 454 + }, + { + "epoch": 0.11968172552114158, + "grad_norm": 12.849038288174842, + "learning_rate": 5e-06, + "loss": 0.1941, + "num_input_tokens_seen": 78134616, + "step": 455 + }, + { + "epoch": 0.11968172552114158, + "loss": 0.20492224395275116, + "loss_ce": 0.002163449302315712, + "loss_iou": 0.640625, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 78134616, + "step": 455 + }, + { + "epoch": 0.11994476228052871, + "grad_norm": 9.804877236674342, + "learning_rate": 5e-06, + "loss": 0.2629, + "num_input_tokens_seen": 78303656, + "step": 456 + }, + { + "epoch": 0.11994476228052871, + "loss": 0.23393824696540833, + "loss_ce": 0.0058498685248196125, + "loss_iou": NaN, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 78303656, + "step": 456 + }, + { + "epoch": 0.12020779903991582, + "grad_norm": 23.369495819583523, + "learning_rate": 5e-06, + "loss": 0.2449, + "num_input_tokens_seen": 78475936, + "step": 457 + }, + { + "epoch": 0.12020779903991582, + "loss": 0.2364155352115631, + "loss_ce": 0.009792003780603409, + "loss_iou": 0.4921875, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 78475936, + "step": 457 + }, + { + "epoch": 0.12047083579930296, + "grad_norm": 9.860704537193882, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 78648124, + "step": 458 + }, + { + "epoch": 0.12047083579930296, + "loss": 0.1498676836490631, + "loss_ce": 0.002711937762796879, + "loss_iou": 0.66015625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 78648124, + "step": 458 + }, + { + "epoch": 0.12073387255869007, + "grad_norm": 8.562392622535716, + "learning_rate": 5e-06, + "loss": 0.234, + "num_input_tokens_seen": 78820104, + "step": 459 + }, + { + "epoch": 0.12073387255869007, + "loss": 0.23113086819648743, + "loss_ce": 0.0024931649677455425, + "loss_iou": 0.484375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 78820104, + "step": 459 + }, + { + "epoch": 0.1209969093180772, + "grad_norm": 6.866997211199077, + "learning_rate": 5e-06, + "loss": 0.2359, + "num_input_tokens_seen": 78992332, + "step": 460 + }, + { + "epoch": 0.1209969093180772, + "loss": 0.15971623361110687, + "loss_ce": 0.0017572464421391487, + "loss_iou": 0.369140625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 78992332, + "step": 460 + }, + { + "epoch": 0.12125994607746432, + "grad_norm": 6.404476112237903, + "learning_rate": 5e-06, + "loss": 0.2015, + "num_input_tokens_seen": 79164376, + "step": 461 + }, + { + "epoch": 0.12125994607746432, + "loss": 0.2323172241449356, + "loss_ce": 0.005937827751040459, + "loss_iou": 0.59765625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 79164376, + "step": 461 + }, + { + "epoch": 0.12152298283685145, + "grad_norm": 9.290291620775372, + "learning_rate": 5e-06, + "loss": 0.1959, + "num_input_tokens_seen": 79336652, + "step": 462 + }, + { + "epoch": 0.12152298283685145, + "loss": 0.18938115239143372, + "loss_ce": 0.002125292085111141, + "loss_iou": 0.5390625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 79336652, + "step": 462 + }, + { + "epoch": 0.12178601959623857, + "grad_norm": 11.240674872659573, + "learning_rate": 5e-06, + "loss": 0.1971, + "num_input_tokens_seen": 79508724, + "step": 463 + }, + { + "epoch": 0.12178601959623857, + "loss": 0.2277367115020752, + "loss_ce": 0.001418360392563045, + "loss_iou": 0.306640625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 79508724, + "step": 463 + }, + { + "epoch": 0.1220490563556257, + "grad_norm": 13.32960488624403, + "learning_rate": 5e-06, + "loss": 0.1817, + "num_input_tokens_seen": 79678784, + "step": 464 + }, + { + "epoch": 0.1220490563556257, + "loss": 0.21636496484279633, + "loss_ce": 0.00182638771366328, + "loss_iou": 0.376953125, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 79678784, + "step": 464 + }, + { + "epoch": 0.12231209311501283, + "grad_norm": 12.994343511615464, + "learning_rate": 5e-06, + "loss": 0.2429, + "num_input_tokens_seen": 79850852, + "step": 465 + }, + { + "epoch": 0.12231209311501283, + "loss": 0.24236971139907837, + "loss_ce": 0.0015249918214976788, + "loss_iou": 0.419921875, + "loss_num": 0.048095703125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 79850852, + "step": 465 + }, + { + "epoch": 0.12257512987439995, + "grad_norm": 10.746140563102669, + "learning_rate": 5e-06, + "loss": 0.2493, + "num_input_tokens_seen": 80021192, + "step": 466 + }, + { + "epoch": 0.12257512987439995, + "loss": 0.1711360514163971, + "loss_ce": 0.001702459529042244, + "loss_iou": 0.59765625, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 80021192, + "step": 466 + }, + { + "epoch": 0.12283816663378708, + "grad_norm": 20.444744144323252, + "learning_rate": 5e-06, + "loss": 0.2662, + "num_input_tokens_seen": 80190112, + "step": 467 + }, + { + "epoch": 0.12283816663378708, + "loss": 0.21875979006290436, + "loss_ce": 0.0026953346095979214, + "loss_iou": 0.4921875, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 80190112, + "step": 467 + }, + { + "epoch": 0.1231012033931742, + "grad_norm": 12.488827753013481, + "learning_rate": 5e-06, + "loss": 0.2328, + "num_input_tokens_seen": 80362892, + "step": 468 + }, + { + "epoch": 0.1231012033931742, + "loss": 0.17285287380218506, + "loss_ce": 0.002381683327257633, + "loss_iou": 0.5234375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 80362892, + "step": 468 + }, + { + "epoch": 0.12336424015256132, + "grad_norm": 9.176610530631967, + "learning_rate": 5e-06, + "loss": 0.2275, + "num_input_tokens_seen": 80534972, + "step": 469 + }, + { + "epoch": 0.12336424015256132, + "loss": 0.27365219593048096, + "loss_ce": 0.0032664609607309103, + "loss_iou": 0.421875, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 80534972, + "step": 469 + }, + { + "epoch": 0.12362727691194844, + "grad_norm": 33.167673801928274, + "learning_rate": 5e-06, + "loss": 0.2499, + "num_input_tokens_seen": 80705332, + "step": 470 + }, + { + "epoch": 0.12362727691194844, + "loss": 0.23707842826843262, + "loss_ce": 0.0022151488810777664, + "loss_iou": 0.337890625, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 80705332, + "step": 470 + }, + { + "epoch": 0.12389031367133557, + "grad_norm": 7.668114037963123, + "learning_rate": 5e-06, + "loss": 0.2394, + "num_input_tokens_seen": 80874564, + "step": 471 + }, + { + "epoch": 0.12389031367133557, + "loss": 0.23063993453979492, + "loss_ce": 0.006457816809415817, + "loss_iou": 0.6171875, + "loss_num": 0.044677734375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 80874564, + "step": 471 + }, + { + "epoch": 0.12415335043072269, + "grad_norm": 10.510373606639076, + "learning_rate": 5e-06, + "loss": 0.2114, + "num_input_tokens_seen": 81046768, + "step": 472 + }, + { + "epoch": 0.12415335043072269, + "loss": 0.21059830486774445, + "loss_ce": 0.002834630198776722, + "loss_iou": 0.52734375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 81046768, + "step": 472 + }, + { + "epoch": 0.12441638719010982, + "grad_norm": 11.032749822897834, + "learning_rate": 5e-06, + "loss": 0.2413, + "num_input_tokens_seen": 81218788, + "step": 473 + }, + { + "epoch": 0.12441638719010982, + "loss": 0.2567683458328247, + "loss_ce": 0.004815223626792431, + "loss_iou": 0.66015625, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 81218788, + "step": 473 + }, + { + "epoch": 0.12467942394949694, + "grad_norm": 8.640049648990784, + "learning_rate": 5e-06, + "loss": 0.2498, + "num_input_tokens_seen": 81390764, + "step": 474 + }, + { + "epoch": 0.12467942394949694, + "loss": 0.3335922062397003, + "loss_ce": 0.0035140730906277895, + "loss_iou": 0.40625, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 81390764, + "step": 474 + }, + { + "epoch": 0.12494246070888407, + "grad_norm": 6.133084134287981, + "learning_rate": 5e-06, + "loss": 0.1862, + "num_input_tokens_seen": 81561192, + "step": 475 + }, + { + "epoch": 0.12494246070888407, + "loss": 0.19583408534526825, + "loss_ce": 0.002474710112437606, + "loss_iou": 0.28125, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 81561192, + "step": 475 + }, + { + "epoch": 0.1252054974682712, + "grad_norm": 15.123599266995042, + "learning_rate": 5e-06, + "loss": 0.212, + "num_input_tokens_seen": 81731608, + "step": 476 + }, + { + "epoch": 0.1252054974682712, + "loss": 0.2318619191646576, + "loss_ce": 0.0012100562453269958, + "loss_iou": 0.41796875, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 81731608, + "step": 476 + }, + { + "epoch": 0.1254685342276583, + "grad_norm": 10.43813000825477, + "learning_rate": 5e-06, + "loss": 0.1887, + "num_input_tokens_seen": 81902388, + "step": 477 + }, + { + "epoch": 0.1254685342276583, + "loss": 0.1947634220123291, + "loss_ce": 0.0017092193011194468, + "loss_iou": 0.35546875, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 81902388, + "step": 477 + }, + { + "epoch": 0.12573157098704543, + "grad_norm": 15.053311018918661, + "learning_rate": 5e-06, + "loss": 0.2559, + "num_input_tokens_seen": 82074796, + "step": 478 + }, + { + "epoch": 0.12573157098704543, + "loss": 0.23443953692913055, + "loss_ce": 0.002872154116630554, + "loss_iou": 0.59375, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 82074796, + "step": 478 + }, + { + "epoch": 0.12599460774643256, + "grad_norm": 8.071545338749708, + "learning_rate": 5e-06, + "loss": 0.2732, + "num_input_tokens_seen": 82246976, + "step": 479 + }, + { + "epoch": 0.12599460774643256, + "loss": 0.22861449420452118, + "loss_ce": 0.001929928082972765, + "loss_iou": NaN, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 82246976, + "step": 479 + }, + { + "epoch": 0.1262576445058197, + "grad_norm": 13.388933170286325, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 82419244, + "step": 480 + }, + { + "epoch": 0.1262576445058197, + "loss": 0.24052694439888, + "loss_ce": 0.002611914649605751, + "loss_iou": 0.423828125, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 82419244, + "step": 480 + }, + { + "epoch": 0.12652068126520682, + "grad_norm": 10.19375568056882, + "learning_rate": 5e-06, + "loss": 0.2132, + "num_input_tokens_seen": 82591568, + "step": 481 + }, + { + "epoch": 0.12652068126520682, + "loss": 0.27330607175827026, + "loss_ce": 0.007314843591302633, + "loss_iou": 0.4453125, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 82591568, + "step": 481 + }, + { + "epoch": 0.12678371802459393, + "grad_norm": 8.166078619911394, + "learning_rate": 5e-06, + "loss": 0.21, + "num_input_tokens_seen": 82760532, + "step": 482 + }, + { + "epoch": 0.12678371802459393, + "loss": 0.19459792971611023, + "loss_ce": 0.003313753753900528, + "loss_iou": 0.40234375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 82760532, + "step": 482 + }, + { + "epoch": 0.12704675478398106, + "grad_norm": 8.980724720396978, + "learning_rate": 5e-06, + "loss": 0.2429, + "num_input_tokens_seen": 82932700, + "step": 483 + }, + { + "epoch": 0.12704675478398106, + "loss": 0.2369249314069748, + "loss_ce": 0.0062120286747813225, + "loss_iou": 0.455078125, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 82932700, + "step": 483 + }, + { + "epoch": 0.1273097915433682, + "grad_norm": 7.529582302992287, + "learning_rate": 5e-06, + "loss": 0.2337, + "num_input_tokens_seen": 83104784, + "step": 484 + }, + { + "epoch": 0.1273097915433682, + "loss": 0.27052199840545654, + "loss_ce": 0.0020893928594887257, + "loss_iou": 0.59765625, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 83104784, + "step": 484 + }, + { + "epoch": 0.12757282830275532, + "grad_norm": 9.051361983377177, + "learning_rate": 5e-06, + "loss": 0.2223, + "num_input_tokens_seen": 83276660, + "step": 485 + }, + { + "epoch": 0.12757282830275532, + "loss": 0.1893678605556488, + "loss_ce": 0.004553401842713356, + "loss_iou": 0.48046875, + "loss_num": 0.037109375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 83276660, + "step": 485 + }, + { + "epoch": 0.12783586506214242, + "grad_norm": 7.363403269312881, + "learning_rate": 5e-06, + "loss": 0.2164, + "num_input_tokens_seen": 83448804, + "step": 486 + }, + { + "epoch": 0.12783586506214242, + "loss": 0.18258926272392273, + "loss_ce": 0.004244527779519558, + "loss_iou": 0.6015625, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 83448804, + "step": 486 + }, + { + "epoch": 0.12809890182152955, + "grad_norm": 6.462059422866227, + "learning_rate": 5e-06, + "loss": 0.1922, + "num_input_tokens_seen": 83621024, + "step": 487 + }, + { + "epoch": 0.12809890182152955, + "loss": 0.14729665219783783, + "loss_ce": 0.003986096940934658, + "loss_iou": 0.578125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 83621024, + "step": 487 + }, + { + "epoch": 0.12836193858091668, + "grad_norm": 9.164596498872053, + "learning_rate": 5e-06, + "loss": 0.2078, + "num_input_tokens_seen": 83793612, + "step": 488 + }, + { + "epoch": 0.12836193858091668, + "loss": 0.20358332991600037, + "loss_ce": 0.0016790404915809631, + "loss_iou": 0.52734375, + "loss_num": 0.040283203125, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 83793612, + "step": 488 + }, + { + "epoch": 0.12862497534030382, + "grad_norm": 13.35296525183839, + "learning_rate": 5e-06, + "loss": 0.3196, + "num_input_tokens_seen": 83965664, + "step": 489 + }, + { + "epoch": 0.12862497534030382, + "loss": 0.36003273725509644, + "loss_ce": 0.007981948554515839, + "loss_iou": 0.58984375, + "loss_num": 0.0703125, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 83965664, + "step": 489 + }, + { + "epoch": 0.12888801209969092, + "grad_norm": 8.219901608770293, + "learning_rate": 5e-06, + "loss": 0.2425, + "num_input_tokens_seen": 84137656, + "step": 490 + }, + { + "epoch": 0.12888801209969092, + "loss": 0.21684233844280243, + "loss_ce": 0.0033413656055927277, + "loss_iou": 0.3359375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 84137656, + "step": 490 + }, + { + "epoch": 0.12915104885907805, + "grad_norm": 5.772697796397355, + "learning_rate": 5e-06, + "loss": 0.1873, + "num_input_tokens_seen": 84308240, + "step": 491 + }, + { + "epoch": 0.12915104885907805, + "loss": 0.1743691861629486, + "loss_ce": 0.0028604045510292053, + "loss_iou": 0.484375, + "loss_num": 0.0341796875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 84308240, + "step": 491 + }, + { + "epoch": 0.12941408561846518, + "grad_norm": 7.154674159301149, + "learning_rate": 5e-06, + "loss": 0.1965, + "num_input_tokens_seen": 84478680, + "step": 492 + }, + { + "epoch": 0.12941408561846518, + "loss": 0.15929880738258362, + "loss_ce": 0.003598117269575596, + "loss_iou": 0.66015625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 84478680, + "step": 492 + }, + { + "epoch": 0.1296771223778523, + "grad_norm": 10.931778081568359, + "learning_rate": 5e-06, + "loss": 0.2397, + "num_input_tokens_seen": 84648900, + "step": 493 + }, + { + "epoch": 0.1296771223778523, + "loss": 0.24825721979141235, + "loss_ce": 0.002285533118993044, + "loss_iou": 0.6640625, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 84648900, + "step": 493 + }, + { + "epoch": 0.12994015913723944, + "grad_norm": 10.142675625135299, + "learning_rate": 5e-06, + "loss": 0.2203, + "num_input_tokens_seen": 84820972, + "step": 494 + }, + { + "epoch": 0.12994015913723944, + "loss": 0.23026269674301147, + "loss_ce": 0.0015639647608622909, + "loss_iou": 0.59765625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 84820972, + "step": 494 + }, + { + "epoch": 0.13020319589662654, + "grad_norm": 10.196346960471569, + "learning_rate": 5e-06, + "loss": 0.197, + "num_input_tokens_seen": 84993508, + "step": 495 + }, + { + "epoch": 0.13020319589662654, + "loss": 0.21560978889465332, + "loss_ce": 0.001620523864403367, + "loss_iou": 0.65234375, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 84993508, + "step": 495 + }, + { + "epoch": 0.13046623265601368, + "grad_norm": 8.523677693002021, + "learning_rate": 5e-06, + "loss": 0.2357, + "num_input_tokens_seen": 85165596, + "step": 496 + }, + { + "epoch": 0.13046623265601368, + "loss": 0.17495451867580414, + "loss_ce": 0.003750909585505724, + "loss_iou": 0.416015625, + "loss_num": 0.034423828125, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 85165596, + "step": 496 + }, + { + "epoch": 0.1307292694154008, + "grad_norm": 5.883749010160293, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 85337976, + "step": 497 + }, + { + "epoch": 0.1307292694154008, + "loss": 0.22726929187774658, + "loss_ce": 0.009862057864665985, + "loss_iou": 0.494140625, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 85337976, + "step": 497 + }, + { + "epoch": 0.13099230617478794, + "grad_norm": 7.408260148240015, + "learning_rate": 5e-06, + "loss": 0.2231, + "num_input_tokens_seen": 85509860, + "step": 498 + }, + { + "epoch": 0.13099230617478794, + "loss": 0.1337101012468338, + "loss_ce": 0.004010388161987066, + "loss_iou": 0.515625, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 85509860, + "step": 498 + }, + { + "epoch": 0.13125534293417504, + "grad_norm": 15.7224480316305, + "learning_rate": 5e-06, + "loss": 0.2184, + "num_input_tokens_seen": 85680488, + "step": 499 + }, + { + "epoch": 0.13125534293417504, + "loss": 0.3409336507320404, + "loss_ce": 0.002554745879024267, + "loss_iou": 0.423828125, + "loss_num": 0.06787109375, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 85680488, + "step": 499 + }, + { + "epoch": 0.13151837969356217, + "grad_norm": 9.533912931076111, + "learning_rate": 5e-06, + "loss": 0.2073, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_websight_new_CIoU": 0.7392345666885376, + "eval_websight_new_GIoU": 0.7380270659923553, + "eval_websight_new_IoU": 0.7466294467449188, + "eval_websight_new_MAE_all": 0.04153955727815628, + "eval_websight_new_MAE_h": 0.03558222949504852, + "eval_websight_new_MAE_w": 0.06108394265174866, + "eval_websight_new_MAE_x": 0.04947785474359989, + "eval_websight_new_MAE_y": 0.020014189183712006, + "eval_websight_new_NUM_probability": 0.9849532246589661, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.20474952459335327, + "eval_websight_new_loss_ce": 0.0016258999821729958, + "eval_websight_new_loss_iou": 0.635986328125, + "eval_websight_new_loss_num": 0.037567138671875, + "eval_websight_new_loss_xval": 0.187774658203125, + "eval_websight_new_runtime": 54.6509, + "eval_websight_new_samples_per_second": 0.915, + "eval_websight_new_steps_per_second": 0.037, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_seeclick_CIoU": 0.41250014305114746, + "eval_seeclick_GIoU": 0.40925678610801697, + "eval_seeclick_IoU": 0.4613874703645706, + "eval_seeclick_MAE_all": 0.08603048324584961, + "eval_seeclick_MAE_h": 0.05444946512579918, + "eval_seeclick_MAE_w": 0.12106707319617271, + "eval_seeclick_MAE_x": 0.12660933285951614, + "eval_seeclick_MAE_y": 0.04199606738984585, + "eval_seeclick_NUM_probability": 0.9906161725521088, + "eval_seeclick_inside_bbox": 0.7698863744735718, + "eval_seeclick_loss": 0.3508862257003784, + "eval_seeclick_loss_ce": 0.013088095001876354, + "eval_seeclick_loss_iou": 0.609375, + "eval_seeclick_loss_num": 0.0647735595703125, + "eval_seeclick_loss_xval": 0.323974609375, + "eval_seeclick_runtime": 71.4374, + "eval_seeclick_samples_per_second": 0.602, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_icons_CIoU": 0.7077827751636505, + "eval_icons_GIoU": 0.7009360492229462, + "eval_icons_IoU": 0.7175993025302887, + "eval_icons_MAE_all": 0.041869472712278366, + "eval_icons_MAE_h": 0.04292410984635353, + "eval_icons_MAE_w": 0.04752085544168949, + "eval_icons_MAE_x": 0.038647109642624855, + "eval_icons_MAE_y": 0.03838581405580044, + "eval_icons_NUM_probability": 0.9924971163272858, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.13132929801940918, + "eval_icons_loss_ce": 0.003774530749069527, + "eval_icons_loss_iou": 0.590576171875, + "eval_icons_loss_num": 0.024749755859375, + "eval_icons_loss_xval": 0.12384033203125, + "eval_icons_runtime": 78.9038, + "eval_icons_samples_per_second": 0.634, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "eval_screenspot_CIoU": 0.4913978377978007, + "eval_screenspot_GIoU": 0.47330527504285175, + "eval_screenspot_IoU": 0.5310356616973877, + "eval_screenspot_MAE_all": 0.09648379683494568, + "eval_screenspot_MAE_h": 0.061782063295443855, + "eval_screenspot_MAE_w": 0.14932986597220102, + "eval_screenspot_MAE_x": 0.11150848865509033, + "eval_screenspot_MAE_y": 0.0633147731423378, + "eval_screenspot_NUM_probability": 0.9926036596298218, + "eval_screenspot_inside_bbox": 0.8454166650772095, + "eval_screenspot_loss": 0.8485715389251709, + "eval_screenspot_loss_ce": 0.47645074129104614, + "eval_screenspot_loss_iou": 0.5421142578125, + "eval_screenspot_loss_num": 0.07304890950520833, + "eval_screenspot_loss_xval": 0.3654378255208333, + "eval_screenspot_runtime": 144.4943, + "eval_screenspot_samples_per_second": 0.616, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.13151837969356217, + "loss": 0.8175798058509827, + "loss_ce": 0.44428879022598267, + "loss_iou": 0.5, + "loss_num": 0.07470703125, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 85852688, + "step": 500 + }, + { + "epoch": 0.1317814164529493, + "grad_norm": 9.07691940041295, + "learning_rate": 5e-06, + "loss": 0.1533, + "num_input_tokens_seen": 86021700, + "step": 501 + }, + { + "epoch": 0.1317814164529493, + "loss": 0.15382635593414307, + "loss_ce": 0.003191583789885044, + "loss_iou": 0.6640625, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 86021700, + "step": 501 + }, + { + "epoch": 0.13204445321233643, + "grad_norm": 8.240988876623113, + "learning_rate": 5e-06, + "loss": 0.2297, + "num_input_tokens_seen": 86193792, + "step": 502 + }, + { + "epoch": 0.13204445321233643, + "loss": 0.23657214641571045, + "loss_ce": 0.003112667240202427, + "loss_iou": 0.625, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 86193792, + "step": 502 + }, + { + "epoch": 0.13230748997172354, + "grad_norm": 8.766196459329715, + "learning_rate": 5e-06, + "loss": 0.2049, + "num_input_tokens_seen": 86365784, + "step": 503 + }, + { + "epoch": 0.13230748997172354, + "loss": 0.23612971603870392, + "loss_ce": 0.004379219841212034, + "loss_iou": 0.54296875, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 86365784, + "step": 503 + }, + { + "epoch": 0.13257052673111067, + "grad_norm": 5.336005731654041, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 86538044, + "step": 504 + }, + { + "epoch": 0.13257052673111067, + "loss": 0.15480023622512817, + "loss_ce": 0.0017545849550515413, + "loss_iou": 0.625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 86538044, + "step": 504 + }, + { + "epoch": 0.1328335634904978, + "grad_norm": 15.391291602176388, + "learning_rate": 5e-06, + "loss": 0.2019, + "num_input_tokens_seen": 86710012, + "step": 505 + }, + { + "epoch": 0.1328335634904978, + "loss": 0.16446326673030853, + "loss_ce": 0.0012552611296996474, + "loss_iou": 0.6796875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 86710012, + "step": 505 + }, + { + "epoch": 0.13309660024988493, + "grad_norm": 7.414971095415403, + "learning_rate": 5e-06, + "loss": 0.2046, + "num_input_tokens_seen": 86882600, + "step": 506 + }, + { + "epoch": 0.13309660024988493, + "loss": 0.18114808201789856, + "loss_ce": 0.0028033575508743525, + "loss_iou": 0.625, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 86882600, + "step": 506 + }, + { + "epoch": 0.13335963700927206, + "grad_norm": 7.149578381431456, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 87055160, + "step": 507 + }, + { + "epoch": 0.13335963700927206, + "loss": 0.185621976852417, + "loss_ce": 0.0038592712953686714, + "loss_iou": 0.39453125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 87055160, + "step": 507 + }, + { + "epoch": 0.13362267376865916, + "grad_norm": 7.2770390495694235, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 87227176, + "step": 508 + }, + { + "epoch": 0.13362267376865916, + "loss": 0.12990637123584747, + "loss_ce": 0.0011221927125006914, + "loss_iou": 0.453125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 87227176, + "step": 508 + }, + { + "epoch": 0.1338857105280463, + "grad_norm": 13.071840767784375, + "learning_rate": 5e-06, + "loss": 0.2921, + "num_input_tokens_seen": 87397716, + "step": 509 + }, + { + "epoch": 0.1338857105280463, + "loss": 0.31984156370162964, + "loss_ce": 0.0027639116160571575, + "loss_iou": NaN, + "loss_num": 0.0634765625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 87397716, + "step": 509 + }, + { + "epoch": 0.13414874728743342, + "grad_norm": 9.630666522841075, + "learning_rate": 5e-06, + "loss": 0.1771, + "num_input_tokens_seen": 87570180, + "step": 510 + }, + { + "epoch": 0.13414874728743342, + "loss": 0.14270807802677155, + "loss_ce": 0.002327217720448971, + "loss_iou": 0.6328125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 87570180, + "step": 510 + }, + { + "epoch": 0.13441178404682055, + "grad_norm": 7.92909505971618, + "learning_rate": 5e-06, + "loss": 0.2147, + "num_input_tokens_seen": 87742132, + "step": 511 + }, + { + "epoch": 0.13441178404682055, + "loss": 0.24002233147621155, + "loss_ce": 0.0020462563261389732, + "loss_iou": 0.5859375, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 87742132, + "step": 511 + }, + { + "epoch": 0.13467482080620766, + "grad_norm": 11.73787088896753, + "learning_rate": 5e-06, + "loss": 0.2146, + "num_input_tokens_seen": 87914144, + "step": 512 + }, + { + "epoch": 0.13467482080620766, + "loss": 0.2217179834842682, + "loss_ce": 0.00376143422909081, + "loss_iou": 0.5859375, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 87914144, + "step": 512 + }, + { + "epoch": 0.1349378575655948, + "grad_norm": 14.481870714144165, + "learning_rate": 5e-06, + "loss": 0.2105, + "num_input_tokens_seen": 88086392, + "step": 513 + }, + { + "epoch": 0.1349378575655948, + "loss": 0.16927534341812134, + "loss_ce": 0.0027714259922504425, + "loss_iou": 0.36328125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 88086392, + "step": 513 + }, + { + "epoch": 0.13520089432498192, + "grad_norm": 11.732408058803708, + "learning_rate": 5e-06, + "loss": 0.2117, + "num_input_tokens_seen": 88258824, + "step": 514 + }, + { + "epoch": 0.13520089432498192, + "loss": 0.21276208758354187, + "loss_ce": 0.0047542620450258255, + "loss_iou": 0.6171875, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 88258824, + "step": 514 + }, + { + "epoch": 0.13546393108436905, + "grad_norm": 8.699627697080732, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 88431280, + "step": 515 + }, + { + "epoch": 0.13546393108436905, + "loss": 0.13956406712532043, + "loss_ce": 0.005469819065183401, + "loss_iou": 0.62890625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 88431280, + "step": 515 + }, + { + "epoch": 0.13572696784375615, + "grad_norm": 9.441247877196542, + "learning_rate": 5e-06, + "loss": 0.2418, + "num_input_tokens_seen": 88603308, + "step": 516 + }, + { + "epoch": 0.13572696784375615, + "loss": 0.16743244230747223, + "loss_ce": 0.00141681800596416, + "loss_iou": 0.49609375, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 88603308, + "step": 516 + }, + { + "epoch": 0.13599000460314328, + "grad_norm": 11.156675067329255, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 88775492, + "step": 517 + }, + { + "epoch": 0.13599000460314328, + "loss": 0.167589008808136, + "loss_ce": 0.0012681989464908838, + "loss_iou": 0.50390625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 88775492, + "step": 517 + }, + { + "epoch": 0.1362530413625304, + "grad_norm": 9.64852272360873, + "learning_rate": 5e-06, + "loss": 0.2377, + "num_input_tokens_seen": 88946204, + "step": 518 + }, + { + "epoch": 0.1362530413625304, + "loss": 0.2998642921447754, + "loss_ce": 0.004698258824646473, + "loss_iou": 0.5859375, + "loss_num": 0.05908203125, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 88946204, + "step": 518 + }, + { + "epoch": 0.13651607812191754, + "grad_norm": 8.486100540746042, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 89116056, + "step": 519 + }, + { + "epoch": 0.13651607812191754, + "loss": 0.21377842128276825, + "loss_ce": 0.004488877020776272, + "loss_iou": 0.64453125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 89116056, + "step": 519 + }, + { + "epoch": 0.13677911488130468, + "grad_norm": 7.241270611453955, + "learning_rate": 5e-06, + "loss": 0.2361, + "num_input_tokens_seen": 89287012, + "step": 520 + }, + { + "epoch": 0.13677911488130468, + "loss": 0.23230193555355072, + "loss_ce": 0.0012228279374539852, + "loss_iou": 0.447265625, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 89287012, + "step": 520 + }, + { + "epoch": 0.13704215164069178, + "grad_norm": 9.086385631838745, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 89457780, + "step": 521 + }, + { + "epoch": 0.13704215164069178, + "loss": 0.16174045205116272, + "loss_ce": 0.0012180046178400517, + "loss_iou": 0.478515625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 89457780, + "step": 521 + }, + { + "epoch": 0.1373051884000789, + "grad_norm": 9.260504659218878, + "learning_rate": 5e-06, + "loss": 0.1871, + "num_input_tokens_seen": 89628244, + "step": 522 + }, + { + "epoch": 0.1373051884000789, + "loss": 0.18322458863258362, + "loss_ce": 0.005673316773027182, + "loss_iou": 0.5390625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 89628244, + "step": 522 + }, + { + "epoch": 0.13756822515946604, + "grad_norm": 10.862554096761864, + "learning_rate": 5e-06, + "loss": 0.1938, + "num_input_tokens_seen": 89798512, + "step": 523 + }, + { + "epoch": 0.13756822515946604, + "loss": 0.18914146721363068, + "loss_ce": 0.002434919821098447, + "loss_iou": 0.62109375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 89798512, + "step": 523 + }, + { + "epoch": 0.13783126191885317, + "grad_norm": 8.527732112130064, + "learning_rate": 5e-06, + "loss": 0.2597, + "num_input_tokens_seen": 89968992, + "step": 524 + }, + { + "epoch": 0.13783126191885317, + "loss": 0.2936267554759979, + "loss_ce": 0.0038318424485623837, + "loss_iou": 0.40234375, + "loss_num": 0.057861328125, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 89968992, + "step": 524 + }, + { + "epoch": 0.13809429867824027, + "grad_norm": 8.96210431629978, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 90140828, + "step": 525 + }, + { + "epoch": 0.13809429867824027, + "loss": 0.22792883217334747, + "loss_ce": 0.001427364069968462, + "loss_iou": 0.396484375, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 90140828, + "step": 525 + }, + { + "epoch": 0.1383573354376274, + "grad_norm": 10.303553365642298, + "learning_rate": 5e-06, + "loss": 0.1672, + "num_input_tokens_seen": 90311476, + "step": 526 + }, + { + "epoch": 0.1383573354376274, + "loss": 0.09766636043787003, + "loss_ce": 0.00202426896430552, + "loss_iou": 0.59765625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 90311476, + "step": 526 + }, + { + "epoch": 0.13862037219701454, + "grad_norm": 7.578553389392675, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 90483668, + "step": 527 + }, + { + "epoch": 0.13862037219701454, + "loss": 0.2255394458770752, + "loss_ce": 0.0017845738912001252, + "loss_iou": 0.5234375, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 90483668, + "step": 527 + }, + { + "epoch": 0.13888340895640167, + "grad_norm": 11.866590519507064, + "learning_rate": 5e-06, + "loss": 0.2463, + "num_input_tokens_seen": 90655996, + "step": 528 + }, + { + "epoch": 0.13888340895640167, + "loss": 0.21356430649757385, + "loss_ce": 0.004030614625662565, + "loss_iou": 0.6953125, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 90655996, + "step": 528 + }, + { + "epoch": 0.13914644571578877, + "grad_norm": 9.66204006000912, + "learning_rate": 5e-06, + "loss": 0.2311, + "num_input_tokens_seen": 90828348, + "step": 529 + }, + { + "epoch": 0.13914644571578877, + "loss": 0.21380871534347534, + "loss_ce": 0.0014063662383705378, + "loss_iou": 0.5546875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 90828348, + "step": 529 + }, + { + "epoch": 0.1394094824751759, + "grad_norm": 7.3801351915919975, + "learning_rate": 5e-06, + "loss": 0.2048, + "num_input_tokens_seen": 91000476, + "step": 530 + }, + { + "epoch": 0.1394094824751759, + "loss": 0.19109413027763367, + "loss_ce": 0.0008780673379078507, + "loss_iou": 0.29296875, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 91000476, + "step": 530 + }, + { + "epoch": 0.13967251923456303, + "grad_norm": 6.737214273696564, + "learning_rate": 5e-06, + "loss": 0.2037, + "num_input_tokens_seen": 91173056, + "step": 531 + }, + { + "epoch": 0.13967251923456303, + "loss": 0.26445770263671875, + "loss_ce": 0.0033187787048518658, + "loss_iou": 0.486328125, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 91173056, + "step": 531 + }, + { + "epoch": 0.13993555599395016, + "grad_norm": 5.9400720051741835, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 91345516, + "step": 532 + }, + { + "epoch": 0.13993555599395016, + "loss": 0.12527181208133698, + "loss_ce": 0.001858725561760366, + "loss_iou": 0.59765625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 91345516, + "step": 532 + }, + { + "epoch": 0.1401985927533373, + "grad_norm": 8.591575042379741, + "learning_rate": 5e-06, + "loss": 0.2351, + "num_input_tokens_seen": 91516156, + "step": 533 + }, + { + "epoch": 0.1401985927533373, + "loss": 0.22694742679595947, + "loss_ce": 0.005511872004717588, + "loss_iou": 0.443359375, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 91516156, + "step": 533 + }, + { + "epoch": 0.1404616295127244, + "grad_norm": 26.674816384255838, + "learning_rate": 5e-06, + "loss": 0.2705, + "num_input_tokens_seen": 91685124, + "step": 534 + }, + { + "epoch": 0.1404616295127244, + "loss": 0.2157442569732666, + "loss_ce": 0.005356077570468187, + "loss_iou": 0.52734375, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 91685124, + "step": 534 + }, + { + "epoch": 0.14072466627211153, + "grad_norm": 7.992225607802382, + "learning_rate": 5e-06, + "loss": 0.2194, + "num_input_tokens_seen": 91857436, + "step": 535 + }, + { + "epoch": 0.14072466627211153, + "loss": 0.18514756858348846, + "loss_ce": 0.0036290136631578207, + "loss_iou": 0.625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 91857436, + "step": 535 + }, + { + "epoch": 0.14098770303149866, + "grad_norm": 7.005269973220872, + "learning_rate": 5e-06, + "loss": 0.2265, + "num_input_tokens_seen": 92029236, + "step": 536 + }, + { + "epoch": 0.14098770303149866, + "loss": 0.23437106609344482, + "loss_ce": 0.0035971456672996283, + "loss_iou": 0.6640625, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 92029236, + "step": 536 + }, + { + "epoch": 0.1412507397908858, + "grad_norm": 13.679910251313718, + "learning_rate": 5e-06, + "loss": 0.2237, + "num_input_tokens_seen": 92201248, + "step": 537 + }, + { + "epoch": 0.1412507397908858, + "loss": 0.26536300778388977, + "loss_ce": 0.006451865192502737, + "loss_iou": 0.625, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 92201248, + "step": 537 + }, + { + "epoch": 0.1415137765502729, + "grad_norm": 15.448137214976848, + "learning_rate": 5e-06, + "loss": 0.2463, + "num_input_tokens_seen": 92373276, + "step": 538 + }, + { + "epoch": 0.1415137765502729, + "loss": 0.2220609188079834, + "loss_ce": 0.0029447050765156746, + "loss_iou": 0.69140625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 92373276, + "step": 538 + }, + { + "epoch": 0.14177681330966002, + "grad_norm": 11.236165761153213, + "learning_rate": 5e-06, + "loss": 0.2009, + "num_input_tokens_seen": 92545880, + "step": 539 + }, + { + "epoch": 0.14177681330966002, + "loss": 0.21155700087547302, + "loss_ce": 0.006112661678344011, + "loss_iou": 0.3515625, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 92545880, + "step": 539 + }, + { + "epoch": 0.14203985006904715, + "grad_norm": 7.303097321727043, + "learning_rate": 5e-06, + "loss": 0.2029, + "num_input_tokens_seen": 92717956, + "step": 540 + }, + { + "epoch": 0.14203985006904715, + "loss": 0.2111251950263977, + "loss_ce": 0.0041549778543412685, + "loss_iou": 0.37109375, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 92717956, + "step": 540 + }, + { + "epoch": 0.14230288682843428, + "grad_norm": 28.317923328050057, + "learning_rate": 5e-06, + "loss": 0.1946, + "num_input_tokens_seen": 92890260, + "step": 541 + }, + { + "epoch": 0.14230288682843428, + "loss": 0.18131288886070251, + "loss_ce": 0.004371959716081619, + "loss_iou": 0.498046875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 92890260, + "step": 541 + }, + { + "epoch": 0.14256592358782139, + "grad_norm": 12.97806529461824, + "learning_rate": 5e-06, + "loss": 0.2534, + "num_input_tokens_seen": 93062192, + "step": 542 + }, + { + "epoch": 0.14256592358782139, + "loss": 0.30919933319091797, + "loss_ce": 0.003779401071369648, + "loss_iou": 0.640625, + "loss_num": 0.06103515625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 93062192, + "step": 542 + }, + { + "epoch": 0.14282896034720852, + "grad_norm": 7.8709246328059725, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 93234480, + "step": 543 + }, + { + "epoch": 0.14282896034720852, + "loss": 0.2343926727771759, + "loss_ce": 0.002581145381554961, + "loss_iou": 0.40234375, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 93234480, + "step": 543 + }, + { + "epoch": 0.14309199710659565, + "grad_norm": 8.436976597382053, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 93406784, + "step": 544 + }, + { + "epoch": 0.14309199710659565, + "loss": 0.20959031581878662, + "loss_ce": 0.0013383585028350353, + "loss_iou": 0.4453125, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 93406784, + "step": 544 + }, + { + "epoch": 0.14335503386598278, + "grad_norm": 12.053836172217155, + "learning_rate": 5e-06, + "loss": 0.2433, + "num_input_tokens_seen": 93577272, + "step": 545 + }, + { + "epoch": 0.14335503386598278, + "loss": 0.18772834539413452, + "loss_ce": 0.002242510672658682, + "loss_iou": 0.52734375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 93577272, + "step": 545 + }, + { + "epoch": 0.1436180706253699, + "grad_norm": 9.099796427619822, + "learning_rate": 5e-06, + "loss": 0.1713, + "num_input_tokens_seen": 93749292, + "step": 546 + }, + { + "epoch": 0.1436180706253699, + "loss": 0.2121119648218155, + "loss_ce": 0.00813247635960579, + "loss_iou": 0.671875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 93749292, + "step": 546 + }, + { + "epoch": 0.143881107384757, + "grad_norm": 13.08678717007557, + "learning_rate": 5e-06, + "loss": 0.2241, + "num_input_tokens_seen": 93921812, + "step": 547 + }, + { + "epoch": 0.143881107384757, + "loss": 0.22192896902561188, + "loss_ce": 0.002934828167781234, + "loss_iou": 0.38671875, + "loss_num": 0.0439453125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 93921812, + "step": 547 + }, + { + "epoch": 0.14414414414414414, + "grad_norm": 8.230228011363112, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 94093976, + "step": 548 + }, + { + "epoch": 0.14414414414414414, + "loss": 0.21945567429065704, + "loss_ce": 0.0016822349280118942, + "loss_iou": 0.45703125, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 94093976, + "step": 548 + }, + { + "epoch": 0.14440718090353127, + "grad_norm": 6.524669746029216, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 94262972, + "step": 549 + }, + { + "epoch": 0.14440718090353127, + "loss": 0.13413353264331818, + "loss_ce": 0.0032741604372859, + "loss_iou": 0.53515625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 94262972, + "step": 549 + }, + { + "epoch": 0.1446702176629184, + "grad_norm": 7.614359717596038, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 94435240, + "step": 550 + }, + { + "epoch": 0.1446702176629184, + "loss": 0.22648407518863678, + "loss_ce": 0.0034616070333868265, + "loss_iou": 0.427734375, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 94435240, + "step": 550 + }, + { + "epoch": 0.1449332544223055, + "grad_norm": 7.224416897933664, + "learning_rate": 5e-06, + "loss": 0.1771, + "num_input_tokens_seen": 94607488, + "step": 551 + }, + { + "epoch": 0.1449332544223055, + "loss": 0.18614572286605835, + "loss_ce": 0.0017585159512236714, + "loss_iou": 0.6640625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 94607488, + "step": 551 + }, + { + "epoch": 0.14519629118169264, + "grad_norm": 7.882321147436634, + "learning_rate": 5e-06, + "loss": 0.1957, + "num_input_tokens_seen": 94776340, + "step": 552 + }, + { + "epoch": 0.14519629118169264, + "loss": 0.22142915427684784, + "loss_ce": 0.003350543323904276, + "loss_iou": 0.53125, + "loss_num": 0.043701171875, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 94776340, + "step": 552 + }, + { + "epoch": 0.14545932794107977, + "grad_norm": 8.491750307418846, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 94948568, + "step": 553 + }, + { + "epoch": 0.14545932794107977, + "loss": 0.17424368858337402, + "loss_ce": 0.0024297323543578386, + "loss_iou": 0.578125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 94948568, + "step": 553 + }, + { + "epoch": 0.1457223647004669, + "grad_norm": 9.839418631011416, + "learning_rate": 5e-06, + "loss": 0.2278, + "num_input_tokens_seen": 95120868, + "step": 554 + }, + { + "epoch": 0.1457223647004669, + "loss": 0.23207631707191467, + "loss_ce": 0.001241360092535615, + "loss_iou": 0.5703125, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 95120868, + "step": 554 + }, + { + "epoch": 0.145985401459854, + "grad_norm": 10.298790259293808, + "learning_rate": 5e-06, + "loss": 0.2316, + "num_input_tokens_seen": 95293380, + "step": 555 + }, + { + "epoch": 0.145985401459854, + "loss": 0.2527633607387543, + "loss_ce": 0.0020309346728026867, + "loss_iou": 0.40234375, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 95293380, + "step": 555 + }, + { + "epoch": 0.14624843821924113, + "grad_norm": 11.750914646944318, + "learning_rate": 5e-06, + "loss": 0.1353, + "num_input_tokens_seen": 95465572, + "step": 556 + }, + { + "epoch": 0.14624843821924113, + "loss": 0.14519110321998596, + "loss_ce": 0.0012702068779617548, + "loss_iou": 0.478515625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 95465572, + "step": 556 + }, + { + "epoch": 0.14651147497862826, + "grad_norm": 11.674444875910362, + "learning_rate": 5e-06, + "loss": 0.248, + "num_input_tokens_seen": 95636384, + "step": 557 + }, + { + "epoch": 0.14651147497862826, + "loss": 0.2826082706451416, + "loss_ce": 0.0019686208106577396, + "loss_iou": 0.5078125, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 95636384, + "step": 557 + }, + { + "epoch": 0.1467745117380154, + "grad_norm": 9.602535161901319, + "learning_rate": 5e-06, + "loss": 0.2109, + "num_input_tokens_seen": 95808820, + "step": 558 + }, + { + "epoch": 0.1467745117380154, + "loss": 0.1991540640592575, + "loss_ce": 0.0036584637127816677, + "loss_iou": 0.51953125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 95808820, + "step": 558 + }, + { + "epoch": 0.1470375484974025, + "grad_norm": 8.512482701290667, + "learning_rate": 5e-06, + "loss": 0.2626, + "num_input_tokens_seen": 95980876, + "step": 559 + }, + { + "epoch": 0.1470375484974025, + "loss": 0.3269794285297394, + "loss_ce": 0.0010516871698200703, + "loss_iou": 0.3203125, + "loss_num": 0.0654296875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 95980876, + "step": 559 + }, + { + "epoch": 0.14730058525678963, + "grad_norm": 8.300386324136479, + "learning_rate": 5e-06, + "loss": 0.178, + "num_input_tokens_seen": 96153036, + "step": 560 + }, + { + "epoch": 0.14730058525678963, + "loss": 0.2191367745399475, + "loss_ce": 0.006795480381697416, + "loss_iou": 0.5625, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 96153036, + "step": 560 + }, + { + "epoch": 0.14756362201617676, + "grad_norm": 5.775876970805374, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 96322532, + "step": 561 + }, + { + "epoch": 0.14756362201617676, + "loss": 0.1977005898952484, + "loss_ce": 0.006996248383074999, + "loss_iou": 0.625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 96322532, + "step": 561 + }, + { + "epoch": 0.1478266587755639, + "grad_norm": 6.533356383107408, + "learning_rate": 5e-06, + "loss": 0.2025, + "num_input_tokens_seen": 96494568, + "step": 562 + }, + { + "epoch": 0.1478266587755639, + "loss": 0.15407304465770721, + "loss_ce": 0.0012410087510943413, + "loss_iou": 0.62890625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 96494568, + "step": 562 + }, + { + "epoch": 0.14808969553495102, + "grad_norm": 9.772718457216582, + "learning_rate": 5e-06, + "loss": 0.2334, + "num_input_tokens_seen": 96666892, + "step": 563 + }, + { + "epoch": 0.14808969553495102, + "loss": 0.18993595242500305, + "loss_ce": 0.002985279308632016, + "loss_iou": 0.75, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 96666892, + "step": 563 + }, + { + "epoch": 0.14835273229433812, + "grad_norm": 10.172183960721854, + "learning_rate": 5e-06, + "loss": 0.2474, + "num_input_tokens_seen": 96837620, + "step": 564 + }, + { + "epoch": 0.14835273229433812, + "loss": 0.20410403609275818, + "loss_ce": 0.004641146864742041, + "loss_iou": 0.5, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 96837620, + "step": 564 + }, + { + "epoch": 0.14861576905372526, + "grad_norm": 9.445540103601473, + "learning_rate": 5e-06, + "loss": 0.2275, + "num_input_tokens_seen": 97009692, + "step": 565 + }, + { + "epoch": 0.14861576905372526, + "loss": 0.13070067763328552, + "loss_ce": 0.0018554661655798554, + "loss_iou": 0.6015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 97009692, + "step": 565 + }, + { + "epoch": 0.14887880581311239, + "grad_norm": 7.521927615990519, + "learning_rate": 5e-06, + "loss": 0.2008, + "num_input_tokens_seen": 97182076, + "step": 566 + }, + { + "epoch": 0.14887880581311239, + "loss": 0.26979702711105347, + "loss_ce": 0.009176918305456638, + "loss_iou": 0.60546875, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 97182076, + "step": 566 + }, + { + "epoch": 0.14914184257249952, + "grad_norm": 6.945071393253576, + "learning_rate": 5e-06, + "loss": 0.2175, + "num_input_tokens_seen": 97352348, + "step": 567 + }, + { + "epoch": 0.14914184257249952, + "loss": 0.2486119419336319, + "loss_ce": 0.004898556973785162, + "loss_iou": 0.5078125, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 97352348, + "step": 567 + }, + { + "epoch": 0.14940487933188662, + "grad_norm": 19.761163824718725, + "learning_rate": 5e-06, + "loss": 0.2266, + "num_input_tokens_seen": 97524808, + "step": 568 + }, + { + "epoch": 0.14940487933188662, + "loss": 0.2611408531665802, + "loss_ce": 0.0016193758929148316, + "loss_iou": 0.43359375, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 97524808, + "step": 568 + }, + { + "epoch": 0.14966791609127375, + "grad_norm": 11.05044000141412, + "learning_rate": 5e-06, + "loss": 0.2071, + "num_input_tokens_seen": 97696860, + "step": 569 + }, + { + "epoch": 0.14966791609127375, + "loss": 0.17859557271003723, + "loss_ce": 0.002020859392359853, + "loss_iou": 0.6796875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 97696860, + "step": 569 + }, + { + "epoch": 0.14993095285066088, + "grad_norm": 10.09349160636077, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 97869048, + "step": 570 + }, + { + "epoch": 0.14993095285066088, + "loss": 0.1531415581703186, + "loss_ce": 0.001164011424407363, + "loss_iou": 0.66796875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 97869048, + "step": 570 + }, + { + "epoch": 0.150193989610048, + "grad_norm": 6.128271963212333, + "learning_rate": 5e-06, + "loss": 0.1797, + "num_input_tokens_seen": 98041152, + "step": 571 + }, + { + "epoch": 0.150193989610048, + "loss": 0.18703126907348633, + "loss_ce": 0.003956317901611328, + "loss_iou": 0.376953125, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 98041152, + "step": 571 + }, + { + "epoch": 0.15045702636943512, + "grad_norm": 9.404399204091265, + "learning_rate": 5e-06, + "loss": 0.1789, + "num_input_tokens_seen": 98213508, + "step": 572 + }, + { + "epoch": 0.15045702636943512, + "loss": 0.1673622578382492, + "loss_ce": 0.011722613126039505, + "loss_iou": 0.60546875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 98213508, + "step": 572 + }, + { + "epoch": 0.15072006312882225, + "grad_norm": 8.281687271797518, + "learning_rate": 5e-06, + "loss": 0.2554, + "num_input_tokens_seen": 98385768, + "step": 573 + }, + { + "epoch": 0.15072006312882225, + "loss": 0.29049456119537354, + "loss_ce": 0.007413491606712341, + "loss_iou": 0.5703125, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 98385768, + "step": 573 + }, + { + "epoch": 0.15098309988820938, + "grad_norm": 9.329556868568911, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 98555404, + "step": 574 + }, + { + "epoch": 0.15098309988820938, + "loss": 0.17510266602039337, + "loss_ce": 0.006096326746046543, + "loss_iou": 0.6015625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 98555404, + "step": 574 + }, + { + "epoch": 0.1512461366475965, + "grad_norm": 6.666929195510284, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 98727452, + "step": 575 + }, + { + "epoch": 0.1512461366475965, + "loss": 0.2249300628900528, + "loss_ce": 0.008560429327189922, + "loss_iou": 0.400390625, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 98727452, + "step": 575 + }, + { + "epoch": 0.15150917340698364, + "grad_norm": 12.576347626904536, + "learning_rate": 5e-06, + "loss": 0.2151, + "num_input_tokens_seen": 98899788, + "step": 576 + }, + { + "epoch": 0.15150917340698364, + "loss": 0.21397414803504944, + "loss_ce": 0.004837184213101864, + "loss_iou": 0.60546875, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 98899788, + "step": 576 + }, + { + "epoch": 0.15177221016637074, + "grad_norm": 9.891929331498387, + "learning_rate": 5e-06, + "loss": 0.2031, + "num_input_tokens_seen": 99072416, + "step": 577 + }, + { + "epoch": 0.15177221016637074, + "loss": 0.24624097347259521, + "loss_ce": 0.011255611665546894, + "loss_iou": 0.546875, + "loss_num": 0.046875, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 99072416, + "step": 577 + }, + { + "epoch": 0.15203524692575787, + "grad_norm": 12.530200633920343, + "learning_rate": 5e-06, + "loss": 0.2216, + "num_input_tokens_seen": 99244280, + "step": 578 + }, + { + "epoch": 0.15203524692575787, + "loss": 0.19744133949279785, + "loss_ce": 0.001030205050483346, + "loss_iou": 0.671875, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 99244280, + "step": 578 + }, + { + "epoch": 0.152298283685145, + "grad_norm": 10.630444163918117, + "learning_rate": 5e-06, + "loss": 0.2121, + "num_input_tokens_seen": 99416184, + "step": 579 + }, + { + "epoch": 0.152298283685145, + "loss": 0.2667901813983917, + "loss_ce": 0.0020807269029319286, + "loss_iou": 0.40234375, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 99416184, + "step": 579 + }, + { + "epoch": 0.15256132044453213, + "grad_norm": 10.945427939330838, + "learning_rate": 5e-06, + "loss": 0.1943, + "num_input_tokens_seen": 99588500, + "step": 580 + }, + { + "epoch": 0.15256132044453213, + "loss": 0.19293344020843506, + "loss_ce": 0.005006188526749611, + "loss_iou": 0.4921875, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 99588500, + "step": 580 + }, + { + "epoch": 0.15282435720391924, + "grad_norm": 10.349037140854193, + "learning_rate": 5e-06, + "loss": 0.1856, + "num_input_tokens_seen": 99760948, + "step": 581 + }, + { + "epoch": 0.15282435720391924, + "loss": 0.1601850688457489, + "loss_ce": 0.0018598883179947734, + "loss_iou": 0.55859375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 99760948, + "step": 581 + }, + { + "epoch": 0.15308739396330637, + "grad_norm": 13.119978211180065, + "learning_rate": 5e-06, + "loss": 0.1813, + "num_input_tokens_seen": 99932992, + "step": 582 + }, + { + "epoch": 0.15308739396330637, + "loss": 0.24380066990852356, + "loss_ce": 0.002528695622459054, + "loss_iou": 0.7109375, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 99932992, + "step": 582 + }, + { + "epoch": 0.1533504307226935, + "grad_norm": 8.165624923829593, + "learning_rate": 5e-06, + "loss": 0.1637, + "num_input_tokens_seen": 100104992, + "step": 583 + }, + { + "epoch": 0.1533504307226935, + "loss": 0.18439146876335144, + "loss_ce": 0.0024456623941659927, + "loss_iou": 0.6640625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 100104992, + "step": 583 + }, + { + "epoch": 0.15361346748208063, + "grad_norm": 9.086411500582741, + "learning_rate": 5e-06, + "loss": 0.1853, + "num_input_tokens_seen": 100277144, + "step": 584 + }, + { + "epoch": 0.15361346748208063, + "loss": 0.17580674588680267, + "loss_ce": 0.0032603610306978226, + "loss_iou": 0.4296875, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 100277144, + "step": 584 + }, + { + "epoch": 0.15387650424146773, + "grad_norm": 11.772959826434088, + "learning_rate": 5e-06, + "loss": 0.2266, + "num_input_tokens_seen": 100449660, + "step": 585 + }, + { + "epoch": 0.15387650424146773, + "loss": 0.2301916778087616, + "loss_ce": 0.0029577831737697124, + "loss_iou": 0.482421875, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 100449660, + "step": 585 + }, + { + "epoch": 0.15413954100085486, + "grad_norm": 8.411861716804005, + "learning_rate": 5e-06, + "loss": 0.2131, + "num_input_tokens_seen": 100622028, + "step": 586 + }, + { + "epoch": 0.15413954100085486, + "loss": 0.183104008436203, + "loss_ce": 0.0029892674647271633, + "loss_iou": 0.6015625, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 100622028, + "step": 586 + }, + { + "epoch": 0.154402577760242, + "grad_norm": 8.219849696412924, + "learning_rate": 5e-06, + "loss": 0.2095, + "num_input_tokens_seen": 100792792, + "step": 587 + }, + { + "epoch": 0.154402577760242, + "loss": 0.23411154747009277, + "loss_ce": 0.0037648691795766354, + "loss_iou": 0.458984375, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 100792792, + "step": 587 + }, + { + "epoch": 0.15466561451962912, + "grad_norm": 7.806947443925601, + "learning_rate": 5e-06, + "loss": 0.1858, + "num_input_tokens_seen": 100964900, + "step": 588 + }, + { + "epoch": 0.15466561451962912, + "loss": 0.15801170468330383, + "loss_ce": 0.002433100016787648, + "loss_iou": 0.59765625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 100964900, + "step": 588 + }, + { + "epoch": 0.15492865127901626, + "grad_norm": 9.15652521254843, + "learning_rate": 5e-06, + "loss": 0.1918, + "num_input_tokens_seen": 101137068, + "step": 589 + }, + { + "epoch": 0.15492865127901626, + "loss": 0.14395104348659515, + "loss_ce": 0.0018611999694257975, + "loss_iou": 0.62109375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 101137068, + "step": 589 + }, + { + "epoch": 0.15519168803840336, + "grad_norm": 18.2857885682754, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 101309424, + "step": 590 + }, + { + "epoch": 0.15519168803840336, + "loss": 0.19831930100917816, + "loss_ce": 0.0009926356142386794, + "loss_iou": 0.546875, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 101309424, + "step": 590 + }, + { + "epoch": 0.1554547247977905, + "grad_norm": 7.944412820463009, + "learning_rate": 5e-06, + "loss": 0.205, + "num_input_tokens_seen": 101478352, + "step": 591 + }, + { + "epoch": 0.1554547247977905, + "loss": 0.21606285870075226, + "loss_ce": 0.00683434447273612, + "loss_iou": 0.431640625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 101478352, + "step": 591 + }, + { + "epoch": 0.15571776155717762, + "grad_norm": 6.618575384086146, + "learning_rate": 5e-06, + "loss": 0.1821, + "num_input_tokens_seen": 101650324, + "step": 592 + }, + { + "epoch": 0.15571776155717762, + "loss": 0.20387296378612518, + "loss_ce": 0.0040438538417220116, + "loss_iou": 0.4765625, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 101650324, + "step": 592 + }, + { + "epoch": 0.15598079831656475, + "grad_norm": 13.644949403576716, + "learning_rate": 5e-06, + "loss": 0.1791, + "num_input_tokens_seen": 101822564, + "step": 593 + }, + { + "epoch": 0.15598079831656475, + "loss": 0.12713779509067535, + "loss_ce": 0.004823335446417332, + "loss_iou": 0.609375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 101822564, + "step": 593 + }, + { + "epoch": 0.15624383507595185, + "grad_norm": 11.597792275294081, + "learning_rate": 5e-06, + "loss": 0.2129, + "num_input_tokens_seen": 101994900, + "step": 594 + }, + { + "epoch": 0.15624383507595185, + "loss": 0.17677326500415802, + "loss_ce": 0.0048372335731983185, + "loss_iou": 0.68359375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 101994900, + "step": 594 + }, + { + "epoch": 0.15650687183533898, + "grad_norm": 7.4047027755949175, + "learning_rate": 5e-06, + "loss": 0.1867, + "num_input_tokens_seen": 102165488, + "step": 595 + }, + { + "epoch": 0.15650687183533898, + "loss": 0.1512867510318756, + "loss_ce": 0.0011402517557144165, + "loss_iou": 0.6015625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 102165488, + "step": 595 + }, + { + "epoch": 0.15676990859472612, + "grad_norm": 7.866562092019479, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 102337576, + "step": 596 + }, + { + "epoch": 0.15676990859472612, + "loss": 0.11417586356401443, + "loss_ce": 0.0016270325286313891, + "loss_iou": 0.55078125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 102337576, + "step": 596 + }, + { + "epoch": 0.15703294535411325, + "grad_norm": 14.250796858508032, + "learning_rate": 5e-06, + "loss": 0.2038, + "num_input_tokens_seen": 102509976, + "step": 597 + }, + { + "epoch": 0.15703294535411325, + "loss": 0.20375049114227295, + "loss_ce": 0.003677244298160076, + "loss_iou": 0.53125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 102509976, + "step": 597 + }, + { + "epoch": 0.15729598211350035, + "grad_norm": 5.485478227776666, + "learning_rate": 5e-06, + "loss": 0.1839, + "num_input_tokens_seen": 102682292, + "step": 598 + }, + { + "epoch": 0.15729598211350035, + "loss": 0.1260145604610443, + "loss_ce": 0.002143711317330599, + "loss_iou": 0.65625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 102682292, + "step": 598 + }, + { + "epoch": 0.15755901887288748, + "grad_norm": 16.90449659888107, + "learning_rate": 5e-06, + "loss": 0.1869, + "num_input_tokens_seen": 102854396, + "step": 599 + }, + { + "epoch": 0.15755901887288748, + "loss": 0.18277683854103088, + "loss_ce": 0.002356911078095436, + "loss_iou": 0.46484375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 102854396, + "step": 599 + }, + { + "epoch": 0.1578220556322746, + "grad_norm": 8.022384772643738, + "learning_rate": 5e-06, + "loss": 0.2261, + "num_input_tokens_seen": 103024668, + "step": 600 + }, + { + "epoch": 0.1578220556322746, + "loss": 0.2725946605205536, + "loss_ce": 0.0058710225857794285, + "loss_iou": 0.63671875, + "loss_num": 0.05322265625, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 103024668, + "step": 600 + }, + { + "epoch": 0.15808509239166174, + "grad_norm": 6.502303135299271, + "learning_rate": 5e-06, + "loss": 0.2163, + "num_input_tokens_seen": 103195160, + "step": 601 + }, + { + "epoch": 0.15808509239166174, + "loss": 0.16184811294078827, + "loss_ce": 0.003950160928070545, + "loss_iou": 0.6015625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 103195160, + "step": 601 + }, + { + "epoch": 0.15834812915104887, + "grad_norm": 11.6613104395809, + "learning_rate": 5e-06, + "loss": 0.1783, + "num_input_tokens_seen": 103365588, + "step": 602 + }, + { + "epoch": 0.15834812915104887, + "loss": 0.27217578887939453, + "loss_ce": 0.006367700640112162, + "loss_iou": 0.62109375, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 103365588, + "step": 602 + }, + { + "epoch": 0.15861116591043598, + "grad_norm": 6.846414395262611, + "learning_rate": 5e-06, + "loss": 0.2057, + "num_input_tokens_seen": 103537444, + "step": 603 + }, + { + "epoch": 0.15861116591043598, + "loss": 0.24833053350448608, + "loss_ce": 0.004678180906921625, + "loss_iou": 0.625, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 103537444, + "step": 603 + }, + { + "epoch": 0.1588742026698231, + "grad_norm": 5.663069385724008, + "learning_rate": 5e-06, + "loss": 0.1744, + "num_input_tokens_seen": 103709932, + "step": 604 + }, + { + "epoch": 0.1588742026698231, + "loss": 0.19428852200508118, + "loss_ce": 0.000929144793190062, + "loss_iou": 0.51953125, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 103709932, + "step": 604 + }, + { + "epoch": 0.15913723942921024, + "grad_norm": 6.358230492219544, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 103882084, + "step": 605 + }, + { + "epoch": 0.15913723942921024, + "loss": 0.19055569171905518, + "loss_ce": 0.005802282597869635, + "loss_iou": 0.51953125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 103882084, + "step": 605 + }, + { + "epoch": 0.15940027618859737, + "grad_norm": 8.229813023952058, + "learning_rate": 5e-06, + "loss": 0.194, + "num_input_tokens_seen": 104054236, + "step": 606 + }, + { + "epoch": 0.15940027618859737, + "loss": 0.14631760120391846, + "loss_ce": 0.0008098002290353179, + "loss_iou": 0.45703125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 104054236, + "step": 606 + }, + { + "epoch": 0.15966331294798447, + "grad_norm": 8.649522876210598, + "learning_rate": 5e-06, + "loss": 0.1982, + "num_input_tokens_seen": 104224580, + "step": 607 + }, + { + "epoch": 0.15966331294798447, + "loss": 0.2813430428504944, + "loss_ce": 0.0039382753893733025, + "loss_iou": 0.51171875, + "loss_num": 0.0556640625, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 104224580, + "step": 607 + }, + { + "epoch": 0.1599263497073716, + "grad_norm": 9.061064934100147, + "learning_rate": 5e-06, + "loss": 0.2389, + "num_input_tokens_seen": 104396656, + "step": 608 + }, + { + "epoch": 0.1599263497073716, + "loss": 0.25585615634918213, + "loss_ce": 0.005581488832831383, + "loss_iou": 0.3515625, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 104396656, + "step": 608 + }, + { + "epoch": 0.16018938646675873, + "grad_norm": 10.782439869738488, + "learning_rate": 5e-06, + "loss": 0.212, + "num_input_tokens_seen": 104568804, + "step": 609 + }, + { + "epoch": 0.16018938646675873, + "loss": 0.22171396017074585, + "loss_ce": 0.0010718655539676547, + "loss_iou": 0.6640625, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 104568804, + "step": 609 + }, + { + "epoch": 0.16045242322614586, + "grad_norm": 7.912260836349583, + "learning_rate": 5e-06, + "loss": 0.2335, + "num_input_tokens_seen": 104740976, + "step": 610 + }, + { + "epoch": 0.16045242322614586, + "loss": 0.20210719108581543, + "loss_ce": 0.002094991272315383, + "loss_iou": 0.60546875, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 104740976, + "step": 610 + }, + { + "epoch": 0.16071545998553297, + "grad_norm": 7.072100604468161, + "learning_rate": 5e-06, + "loss": 0.1877, + "num_input_tokens_seen": 104913120, + "step": 611 + }, + { + "epoch": 0.16071545998553297, + "loss": 0.24669376015663147, + "loss_ce": 0.005971122998744249, + "loss_iou": 0.451171875, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 104913120, + "step": 611 + }, + { + "epoch": 0.1609784967449201, + "grad_norm": 6.775986925165555, + "learning_rate": 5e-06, + "loss": 0.2033, + "num_input_tokens_seen": 105085268, + "step": 612 + }, + { + "epoch": 0.1609784967449201, + "loss": 0.17514100670814514, + "loss_ce": 0.005036028102040291, + "loss_iou": 0.63671875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 105085268, + "step": 612 + }, + { + "epoch": 0.16124153350430723, + "grad_norm": 13.436082189650097, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 105255536, + "step": 613 + }, + { + "epoch": 0.16124153350430723, + "loss": 0.1662948876619339, + "loss_ce": 0.004856906831264496, + "loss_iou": 0.56640625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 105255536, + "step": 613 + }, + { + "epoch": 0.16150457026369436, + "grad_norm": 15.212823710443937, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 105426276, + "step": 614 + }, + { + "epoch": 0.16150457026369436, + "loss": 0.21024103462696075, + "loss_ce": 0.000707346829585731, + "loss_iou": 0.349609375, + "loss_num": 0.041748046875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 105426276, + "step": 614 + }, + { + "epoch": 0.1617676070230815, + "grad_norm": 5.811817664832679, + "learning_rate": 5e-06, + "loss": 0.1759, + "num_input_tokens_seen": 105598528, + "step": 615 + }, + { + "epoch": 0.1617676070230815, + "loss": 0.17331555485725403, + "loss_ce": 0.0019593401812016964, + "loss_iou": 0.55078125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 105598528, + "step": 615 + }, + { + "epoch": 0.1620306437824686, + "grad_norm": 7.929569482892307, + "learning_rate": 5e-06, + "loss": 0.1901, + "num_input_tokens_seen": 105768684, + "step": 616 + }, + { + "epoch": 0.1620306437824686, + "loss": 0.22191157937049866, + "loss_ce": 0.0013305249158293009, + "loss_iou": 0.470703125, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 105768684, + "step": 616 + }, + { + "epoch": 0.16229368054185572, + "grad_norm": 9.722668759038529, + "learning_rate": 5e-06, + "loss": 0.238, + "num_input_tokens_seen": 105938984, + "step": 617 + }, + { + "epoch": 0.16229368054185572, + "loss": 0.2918306887149811, + "loss_ce": 0.004660272039473057, + "loss_iou": 0.76953125, + "loss_num": 0.057373046875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 105938984, + "step": 617 + }, + { + "epoch": 0.16255671730124285, + "grad_norm": 8.495376084254536, + "learning_rate": 5e-06, + "loss": 0.2003, + "num_input_tokens_seen": 106111216, + "step": 618 + }, + { + "epoch": 0.16255671730124285, + "loss": 0.13662417232990265, + "loss_ce": 0.0008209550869651139, + "loss_iou": 0.46875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 106111216, + "step": 618 + }, + { + "epoch": 0.16281975406062998, + "grad_norm": 10.294809858036002, + "learning_rate": 5e-06, + "loss": 0.2276, + "num_input_tokens_seen": 106283444, + "step": 619 + }, + { + "epoch": 0.16281975406062998, + "loss": 0.23945499956607819, + "loss_ce": 0.0016620358219370246, + "loss_iou": 0.55859375, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 106283444, + "step": 619 + }, + { + "epoch": 0.1630827908200171, + "grad_norm": 8.591329712303546, + "learning_rate": 5e-06, + "loss": 0.1832, + "num_input_tokens_seen": 106455556, + "step": 620 + }, + { + "epoch": 0.1630827908200171, + "loss": 0.24216794967651367, + "loss_ce": 0.004680164158344269, + "loss_iou": 0.703125, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 106455556, + "step": 620 + }, + { + "epoch": 0.16334582757940422, + "grad_norm": 11.580746408707443, + "learning_rate": 5e-06, + "loss": 0.222, + "num_input_tokens_seen": 106625564, + "step": 621 + }, + { + "epoch": 0.16334582757940422, + "loss": 0.27062344551086426, + "loss_ce": 0.0013363163452595472, + "loss_iou": 0.625, + "loss_num": 0.0537109375, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 106625564, + "step": 621 + }, + { + "epoch": 0.16360886433879135, + "grad_norm": 9.075656365353165, + "learning_rate": 5e-06, + "loss": 0.2241, + "num_input_tokens_seen": 106797500, + "step": 622 + }, + { + "epoch": 0.16360886433879135, + "loss": 0.25023964047431946, + "loss_ce": 0.002986219245940447, + "loss_iou": 0.66015625, + "loss_num": 0.049560546875, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 106797500, + "step": 622 + }, + { + "epoch": 0.16387190109817848, + "grad_norm": 7.173557460385501, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 106969624, + "step": 623 + }, + { + "epoch": 0.16387190109817848, + "loss": 0.15543386340141296, + "loss_ce": 0.004310814663767815, + "loss_iou": 0.51171875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 106969624, + "step": 623 + }, + { + "epoch": 0.16413493785756558, + "grad_norm": 7.146771956956073, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 107139936, + "step": 624 + }, + { + "epoch": 0.16413493785756558, + "loss": 0.2210107445716858, + "loss_ce": 0.0020776439923793077, + "loss_iou": 0.376953125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 107139936, + "step": 624 + }, + { + "epoch": 0.1643979746169527, + "grad_norm": 7.784232141951156, + "learning_rate": 5e-06, + "loss": 0.1852, + "num_input_tokens_seen": 107312016, + "step": 625 + }, + { + "epoch": 0.1643979746169527, + "loss": 0.16655078530311584, + "loss_ce": 0.00200000312179327, + "loss_iou": 0.5078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 107312016, + "step": 625 + }, + { + "epoch": 0.16466101137633984, + "grad_norm": 9.666585010057217, + "learning_rate": 5e-06, + "loss": 0.2351, + "num_input_tokens_seen": 107484484, + "step": 626 + }, + { + "epoch": 0.16466101137633984, + "loss": 0.17668788135051727, + "loss_ce": 0.0009676595800556242, + "loss_iou": 0.70703125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 107484484, + "step": 626 + }, + { + "epoch": 0.16492404813572697, + "grad_norm": 15.599284961255806, + "learning_rate": 5e-06, + "loss": 0.1943, + "num_input_tokens_seen": 107656148, + "step": 627 + }, + { + "epoch": 0.16492404813572697, + "loss": 0.2105821818113327, + "loss_ce": 0.0011095235822722316, + "loss_iou": 0.5, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 107656148, + "step": 627 + }, + { + "epoch": 0.1651870848951141, + "grad_norm": 8.248204010218137, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 107828348, + "step": 628 + }, + { + "epoch": 0.1651870848951141, + "loss": 0.1219111904501915, + "loss_ce": 0.009545465931296349, + "loss_iou": 0.609375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 107828348, + "step": 628 + }, + { + "epoch": 0.1654501216545012, + "grad_norm": 10.643869540803188, + "learning_rate": 5e-06, + "loss": 0.191, + "num_input_tokens_seen": 108000340, + "step": 629 + }, + { + "epoch": 0.1654501216545012, + "loss": 0.21501825749874115, + "loss_ce": 0.0015172738349065185, + "loss_iou": 0.6171875, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 108000340, + "step": 629 + }, + { + "epoch": 0.16571315841388834, + "grad_norm": 6.957892002207251, + "learning_rate": 5e-06, + "loss": 0.1723, + "num_input_tokens_seen": 108172648, + "step": 630 + }, + { + "epoch": 0.16571315841388834, + "loss": 0.17599767446517944, + "loss_ce": 0.0010709069902077317, + "loss_iou": 0.4921875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 108172648, + "step": 630 + }, + { + "epoch": 0.16597619517327547, + "grad_norm": 8.642857572938437, + "learning_rate": 5e-06, + "loss": 0.2342, + "num_input_tokens_seen": 108343044, + "step": 631 + }, + { + "epoch": 0.16597619517327547, + "loss": 0.18351054191589355, + "loss_ce": 0.00296853668987751, + "loss_iou": 0.69921875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 108343044, + "step": 631 + }, + { + "epoch": 0.1662392319326626, + "grad_norm": 7.980653444631839, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 108515048, + "step": 632 + }, + { + "epoch": 0.1662392319326626, + "loss": 0.1818259358406067, + "loss_ce": 0.008302995935082436, + "loss_iou": 0.5625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 108515048, + "step": 632 + }, + { + "epoch": 0.1665022686920497, + "grad_norm": 7.668823525125559, + "learning_rate": 5e-06, + "loss": 0.1603, + "num_input_tokens_seen": 108687320, + "step": 633 + }, + { + "epoch": 0.1665022686920497, + "loss": 0.2121184766292572, + "loss_ce": 0.0024016951210796833, + "loss_iou": 0.56640625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 108687320, + "step": 633 + }, + { + "epoch": 0.16676530545143683, + "grad_norm": 8.952178505337718, + "learning_rate": 5e-06, + "loss": 0.2244, + "num_input_tokens_seen": 108859852, + "step": 634 + }, + { + "epoch": 0.16676530545143683, + "loss": 0.24805203080177307, + "loss_ce": 0.004399674944579601, + "loss_iou": 0.59765625, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 108859852, + "step": 634 + }, + { + "epoch": 0.16702834221082397, + "grad_norm": 8.269015730220213, + "learning_rate": 5e-06, + "loss": 0.2335, + "num_input_tokens_seen": 109031824, + "step": 635 + }, + { + "epoch": 0.16702834221082397, + "loss": 0.3403623104095459, + "loss_ce": 0.01095556654036045, + "loss_iou": 0.314453125, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 109031824, + "step": 635 + }, + { + "epoch": 0.1672913789702111, + "grad_norm": 11.588758914669935, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 109204100, + "step": 636 + }, + { + "epoch": 0.1672913789702111, + "loss": 0.25584501028060913, + "loss_ce": 0.007065705489367247, + "loss_iou": 0.578125, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 109204100, + "step": 636 + }, + { + "epoch": 0.1675544157295982, + "grad_norm": 9.273803296838299, + "learning_rate": 5e-06, + "loss": 0.203, + "num_input_tokens_seen": 109376120, + "step": 637 + }, + { + "epoch": 0.1675544157295982, + "loss": 0.24991419911384583, + "loss_ce": 0.0015011176001280546, + "loss_iou": 0.64453125, + "loss_num": 0.0498046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 109376120, + "step": 637 + }, + { + "epoch": 0.16781745248898533, + "grad_norm": 5.629607113211884, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 109548116, + "step": 638 + }, + { + "epoch": 0.16781745248898533, + "loss": 0.22593827545642853, + "loss_ce": 0.005753945559263229, + "loss_iou": 0.470703125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 109548116, + "step": 638 + }, + { + "epoch": 0.16808048924837246, + "grad_norm": 10.827873954115349, + "learning_rate": 5e-06, + "loss": 0.154, + "num_input_tokens_seen": 109720360, + "step": 639 + }, + { + "epoch": 0.16808048924837246, + "loss": 0.10586154460906982, + "loss_ce": 0.0011862462852150202, + "loss_iou": 0.474609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 109720360, + "step": 639 + }, + { + "epoch": 0.1683435260077596, + "grad_norm": 6.979104215801615, + "learning_rate": 5e-06, + "loss": 0.1811, + "num_input_tokens_seen": 109892700, + "step": 640 + }, + { + "epoch": 0.1683435260077596, + "loss": 0.21827656030654907, + "loss_ce": 0.0010524489916861057, + "loss_iou": 0.62109375, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 109892700, + "step": 640 + }, + { + "epoch": 0.1686065627671467, + "grad_norm": 12.485157254065346, + "learning_rate": 5e-06, + "loss": 0.2113, + "num_input_tokens_seen": 110064992, + "step": 641 + }, + { + "epoch": 0.1686065627671467, + "loss": 0.26375001668930054, + "loss_ce": 0.0014819505158811808, + "loss_iou": 0.56640625, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 110064992, + "step": 641 + }, + { + "epoch": 0.16886959952653383, + "grad_norm": 9.158584740167319, + "learning_rate": 5e-06, + "loss": 0.1418, + "num_input_tokens_seen": 110237352, + "step": 642 + }, + { + "epoch": 0.16886959952653383, + "loss": 0.12861773371696472, + "loss_ce": 0.006120163947343826, + "loss_iou": 0.53125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 110237352, + "step": 642 + }, + { + "epoch": 0.16913263628592096, + "grad_norm": 12.50685540012725, + "learning_rate": 5e-06, + "loss": 0.226, + "num_input_tokens_seen": 110409596, + "step": 643 + }, + { + "epoch": 0.16913263628592096, + "loss": 0.29422521591186523, + "loss_ce": 0.006322397850453854, + "loss_iou": 0.6171875, + "loss_num": 0.0576171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 110409596, + "step": 643 + }, + { + "epoch": 0.1693956730453081, + "grad_norm": 8.91219200815607, + "learning_rate": 5e-06, + "loss": 0.1801, + "num_input_tokens_seen": 110581872, + "step": 644 + }, + { + "epoch": 0.1693956730453081, + "loss": 0.19362246990203857, + "loss_ce": 0.0011175863910466433, + "loss_iou": 0.6328125, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 110581872, + "step": 644 + }, + { + "epoch": 0.16965870980469522, + "grad_norm": 6.648045150838095, + "learning_rate": 5e-06, + "loss": 0.1817, + "num_input_tokens_seen": 110754112, + "step": 645 + }, + { + "epoch": 0.16965870980469522, + "loss": 0.22110968828201294, + "loss_ce": 0.0007727851625531912, + "loss_iou": 0.44921875, + "loss_num": 0.0439453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 110754112, + "step": 645 + }, + { + "epoch": 0.16992174656408232, + "grad_norm": 5.685997850783031, + "learning_rate": 5e-06, + "loss": 0.1755, + "num_input_tokens_seen": 110926320, + "step": 646 + }, + { + "epoch": 0.16992174656408232, + "loss": 0.16139136254787445, + "loss_ce": 0.0022421882022172213, + "loss_iou": 0.54296875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 110926320, + "step": 646 + }, + { + "epoch": 0.17018478332346945, + "grad_norm": 5.619576982998229, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 111098480, + "step": 647 + }, + { + "epoch": 0.17018478332346945, + "loss": 0.07204495370388031, + "loss_ce": 0.0005727877141907811, + "loss_iou": 0.484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 111098480, + "step": 647 + }, + { + "epoch": 0.17044782008285658, + "grad_norm": 7.873961542866977, + "learning_rate": 5e-06, + "loss": 0.2025, + "num_input_tokens_seen": 111270660, + "step": 648 + }, + { + "epoch": 0.17044782008285658, + "loss": 0.2468957006931305, + "loss_ce": 0.001473332871682942, + "loss_iou": 0.4453125, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 111270660, + "step": 648 + }, + { + "epoch": 0.1707108568422437, + "grad_norm": 10.015757077433259, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 111442504, + "step": 649 + }, + { + "epoch": 0.1707108568422437, + "loss": 0.14880256354808807, + "loss_ce": 0.0109546585008502, + "loss_iou": 0.52734375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 111442504, + "step": 649 + }, + { + "epoch": 0.17097389360163082, + "grad_norm": 7.899725217893395, + "learning_rate": 5e-06, + "loss": 0.2213, + "num_input_tokens_seen": 111614612, + "step": 650 + }, + { + "epoch": 0.17097389360163082, + "loss": 0.31617793440818787, + "loss_ce": 0.005386924371123314, + "loss_iou": 0.39453125, + "loss_num": 0.062255859375, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 111614612, + "step": 650 + }, + { + "epoch": 0.17123693036101795, + "grad_norm": 11.203783050737329, + "learning_rate": 5e-06, + "loss": 0.2011, + "num_input_tokens_seen": 111786832, + "step": 651 + }, + { + "epoch": 0.17123693036101795, + "loss": 0.16059955954551697, + "loss_ce": 0.003617130685597658, + "loss_iou": 0.57421875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 111786832, + "step": 651 + }, + { + "epoch": 0.17149996712040508, + "grad_norm": 10.553074580421196, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 111959112, + "step": 652 + }, + { + "epoch": 0.17149996712040508, + "loss": 0.10537019371986389, + "loss_ce": 0.0024038811679929495, + "loss_iou": 0.40234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 111959112, + "step": 652 + }, + { + "epoch": 0.1717630038797922, + "grad_norm": 7.878994667708907, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 112129472, + "step": 653 + }, + { + "epoch": 0.1717630038797922, + "loss": 0.14291326701641083, + "loss_ce": 0.0017999822739511728, + "loss_iou": 0.50390625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 112129472, + "step": 653 + }, + { + "epoch": 0.1720260406391793, + "grad_norm": 8.502469919546648, + "learning_rate": 5e-06, + "loss": 0.2067, + "num_input_tokens_seen": 112301764, + "step": 654 + }, + { + "epoch": 0.1720260406391793, + "loss": 0.14102406799793243, + "loss_ce": 0.0035118628293275833, + "loss_iou": 0.640625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 112301764, + "step": 654 + }, + { + "epoch": 0.17228907739856644, + "grad_norm": 7.248102860819298, + "learning_rate": 5e-06, + "loss": 0.1947, + "num_input_tokens_seen": 112473728, + "step": 655 + }, + { + "epoch": 0.17228907739856644, + "loss": 0.3125525116920471, + "loss_ce": 0.002799113281071186, + "loss_iou": 0.455078125, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 112473728, + "step": 655 + }, + { + "epoch": 0.17255211415795357, + "grad_norm": 7.514690707459716, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 112646020, + "step": 656 + }, + { + "epoch": 0.17255211415795357, + "loss": 0.19242502748966217, + "loss_ce": 0.0032160417176783085, + "loss_iou": 0.451171875, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 112646020, + "step": 656 + }, + { + "epoch": 0.1728151509173407, + "grad_norm": 12.157018929724885, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 112818344, + "step": 657 + }, + { + "epoch": 0.1728151509173407, + "loss": 0.17007869482040405, + "loss_ce": 0.0014995899982750416, + "loss_iou": 0.63671875, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 112818344, + "step": 657 + }, + { + "epoch": 0.17307818767672783, + "grad_norm": 12.713612456980975, + "learning_rate": 5e-06, + "loss": 0.1854, + "num_input_tokens_seen": 112984384, + "step": 658 + }, + { + "epoch": 0.17307818767672783, + "loss": 0.2311791479587555, + "loss_ce": 0.003090762998908758, + "loss_iou": 0.69140625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 112984384, + "step": 658 + }, + { + "epoch": 0.17334122443611494, + "grad_norm": 7.9869346345765235, + "learning_rate": 5e-06, + "loss": 0.1714, + "num_input_tokens_seen": 113156440, + "step": 659 + }, + { + "epoch": 0.17334122443611494, + "loss": 0.14270631968975067, + "loss_ce": 0.007757591083645821, + "loss_iou": 0.59765625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 113156440, + "step": 659 + }, + { + "epoch": 0.17360426119550207, + "grad_norm": 19.43755709327552, + "learning_rate": 5e-06, + "loss": 0.2275, + "num_input_tokens_seen": 113328684, + "step": 660 + }, + { + "epoch": 0.17360426119550207, + "loss": 0.19741018116474152, + "loss_ce": 0.023887230083346367, + "loss_iou": 0.47265625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 113328684, + "step": 660 + }, + { + "epoch": 0.1738672979548892, + "grad_norm": 12.233621879837852, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 113499120, + "step": 661 + }, + { + "epoch": 0.1738672979548892, + "loss": 0.17290905117988586, + "loss_ce": 0.03179576247930527, + "loss_iou": 0.640625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 113499120, + "step": 661 + }, + { + "epoch": 0.17413033471427633, + "grad_norm": 8.034436568495584, + "learning_rate": 5e-06, + "loss": 0.2288, + "num_input_tokens_seen": 113670976, + "step": 662 + }, + { + "epoch": 0.17413033471427633, + "loss": 0.18118935823440552, + "loss_ce": 0.0038822239730507135, + "loss_iou": 0.5625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 113670976, + "step": 662 + }, + { + "epoch": 0.17439337147366343, + "grad_norm": 6.838416843124344, + "learning_rate": 5e-06, + "loss": 0.211, + "num_input_tokens_seen": 113841352, + "step": 663 + }, + { + "epoch": 0.17439337147366343, + "loss": 0.16460734605789185, + "loss_ce": 0.0039017903618514538, + "loss_iou": 0.44140625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 113841352, + "step": 663 + }, + { + "epoch": 0.17465640823305056, + "grad_norm": 8.007803887726045, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 114013720, + "step": 664 + }, + { + "epoch": 0.17465640823305056, + "loss": 0.19987425208091736, + "loss_ce": 0.004256566055119038, + "loss_iou": 0.3671875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 114013720, + "step": 664 + }, + { + "epoch": 0.1749194449924377, + "grad_norm": 15.416941151295424, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 114185964, + "step": 665 + }, + { + "epoch": 0.1749194449924377, + "loss": 0.12474516034126282, + "loss_ce": 0.0013320783618837595, + "loss_iou": 0.53125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 114185964, + "step": 665 + }, + { + "epoch": 0.17518248175182483, + "grad_norm": 7.3780372763976105, + "learning_rate": 5e-06, + "loss": 0.1978, + "num_input_tokens_seen": 114354848, + "step": 666 + }, + { + "epoch": 0.17518248175182483, + "loss": 0.22682063281536102, + "loss_ce": 0.002394365146756172, + "loss_iou": 0.48828125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 114354848, + "step": 666 + }, + { + "epoch": 0.17544551851121193, + "grad_norm": 6.805100830892847, + "learning_rate": 5e-06, + "loss": 0.1893, + "num_input_tokens_seen": 114526920, + "step": 667 + }, + { + "epoch": 0.17544551851121193, + "loss": 0.16719527542591095, + "loss_ce": 0.003987260162830353, + "loss_iou": 0.4921875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 114526920, + "step": 667 + }, + { + "epoch": 0.17570855527059906, + "grad_norm": 13.214372039286259, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 114698988, + "step": 668 + }, + { + "epoch": 0.17570855527059906, + "loss": 0.11197628825902939, + "loss_ce": 0.0018078316934406757, + "loss_iou": 0.50390625, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 114698988, + "step": 668 + }, + { + "epoch": 0.1759715920299862, + "grad_norm": 11.38353136981813, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 114871164, + "step": 669 + }, + { + "epoch": 0.1759715920299862, + "loss": 0.13217589259147644, + "loss_ce": 0.0022930747363716364, + "loss_iou": 0.60546875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 114871164, + "step": 669 + }, + { + "epoch": 0.17623462878937332, + "grad_norm": 6.3416006325736705, + "learning_rate": 5e-06, + "loss": 0.1994, + "num_input_tokens_seen": 115043272, + "step": 670 + }, + { + "epoch": 0.17623462878937332, + "loss": 0.2425597459077835, + "loss_ce": 0.00726923206821084, + "loss_iou": 0.462890625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 115043272, + "step": 670 + }, + { + "epoch": 0.17649766554876045, + "grad_norm": 7.208802786478081, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 115215332, + "step": 671 + }, + { + "epoch": 0.17649766554876045, + "loss": 0.2342541515827179, + "loss_ce": 0.006531976629048586, + "loss_iou": 0.474609375, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 115215332, + "step": 671 + }, + { + "epoch": 0.17676070230814755, + "grad_norm": 5.677644761077757, + "learning_rate": 5e-06, + "loss": 0.1422, + "num_input_tokens_seen": 115387420, + "step": 672 + }, + { + "epoch": 0.17676070230814755, + "loss": 0.1173291727900505, + "loss_ce": 0.0031934345606714487, + "loss_iou": 0.5546875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 115387420, + "step": 672 + }, + { + "epoch": 0.17702373906753469, + "grad_norm": 13.804747880548247, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 115559764, + "step": 673 + }, + { + "epoch": 0.17702373906753469, + "loss": 0.11581701785326004, + "loss_ce": 0.0009488522773608565, + "loss_iou": 0.546875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 115559764, + "step": 673 + }, + { + "epoch": 0.17728677582692182, + "grad_norm": 7.7796454748869435, + "learning_rate": 5e-06, + "loss": 0.2202, + "num_input_tokens_seen": 115731572, + "step": 674 + }, + { + "epoch": 0.17728677582692182, + "loss": 0.22158196568489075, + "loss_ce": 0.00228265137411654, + "loss_iou": 0.53125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 115731572, + "step": 674 + }, + { + "epoch": 0.17754981258630895, + "grad_norm": 7.463691885246712, + "learning_rate": 5e-06, + "loss": 0.1882, + "num_input_tokens_seen": 115903680, + "step": 675 + }, + { + "epoch": 0.17754981258630895, + "loss": 0.22185131907463074, + "loss_ce": 0.002002692548558116, + "loss_iou": 0.54296875, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 115903680, + "step": 675 + }, + { + "epoch": 0.17781284934569605, + "grad_norm": 8.426002860661324, + "learning_rate": 5e-06, + "loss": 0.1793, + "num_input_tokens_seen": 116075520, + "step": 676 + }, + { + "epoch": 0.17781284934569605, + "loss": 0.1606462597846985, + "loss_ce": 0.0009782931301742792, + "loss_iou": 0.5625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 116075520, + "step": 676 + }, + { + "epoch": 0.17807588610508318, + "grad_norm": 8.301724485985723, + "learning_rate": 5e-06, + "loss": 0.2, + "num_input_tokens_seen": 116243620, + "step": 677 + }, + { + "epoch": 0.17807588610508318, + "loss": 0.23937323689460754, + "loss_ce": 0.002251650206744671, + "loss_iou": 0.73828125, + "loss_num": 0.047607421875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 116243620, + "step": 677 + }, + { + "epoch": 0.1783389228644703, + "grad_norm": 8.16746221218087, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 116414372, + "step": 678 + }, + { + "epoch": 0.1783389228644703, + "loss": 0.2846035361289978, + "loss_ce": 0.003506140550598502, + "loss_iou": 0.498046875, + "loss_num": 0.05615234375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 116414372, + "step": 678 + }, + { + "epoch": 0.17860195962385744, + "grad_norm": 11.233813688882323, + "learning_rate": 5e-06, + "loss": 0.2052, + "num_input_tokens_seen": 116586316, + "step": 679 + }, + { + "epoch": 0.17860195962385744, + "loss": 0.19775235652923584, + "loss_ce": 0.0026840060018002987, + "loss_iou": 0.482421875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 116586316, + "step": 679 + }, + { + "epoch": 0.17886499638324455, + "grad_norm": 7.134240923845786, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 116758540, + "step": 680 + }, + { + "epoch": 0.17886499638324455, + "loss": 0.17349669337272644, + "loss_ce": 0.0008892616024240851, + "loss_iou": 0.6953125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 116758540, + "step": 680 + }, + { + "epoch": 0.17912803314263168, + "grad_norm": 8.457982820816902, + "learning_rate": 5e-06, + "loss": 0.2133, + "num_input_tokens_seen": 116930944, + "step": 681 + }, + { + "epoch": 0.17912803314263168, + "loss": 0.2098885327577591, + "loss_ce": 0.0027962373569607735, + "loss_iou": 0.6796875, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 116930944, + "step": 681 + }, + { + "epoch": 0.1793910699020188, + "grad_norm": 8.07197838986281, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 117102756, + "step": 682 + }, + { + "epoch": 0.1793910699020188, + "loss": 0.18535807728767395, + "loss_ce": 0.003778480924665928, + "loss_iou": 0.578125, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 117102756, + "step": 682 + }, + { + "epoch": 0.17965410666140594, + "grad_norm": 7.799168173540665, + "learning_rate": 5e-06, + "loss": 0.2208, + "num_input_tokens_seen": 117274704, + "step": 683 + }, + { + "epoch": 0.17965410666140594, + "loss": 0.22785347700119019, + "loss_ce": 0.0029999692924320698, + "loss_iou": 0.3125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 117274704, + "step": 683 + }, + { + "epoch": 0.17991714342079307, + "grad_norm": 6.357635509161791, + "learning_rate": 5e-06, + "loss": 0.1508, + "num_input_tokens_seen": 117446896, + "step": 684 + }, + { + "epoch": 0.17991714342079307, + "loss": 0.2038782835006714, + "loss_ce": 0.010396835394203663, + "loss_iou": 0.50390625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 117446896, + "step": 684 + }, + { + "epoch": 0.18018018018018017, + "grad_norm": 7.961998959961417, + "learning_rate": 5e-06, + "loss": 0.1759, + "num_input_tokens_seen": 117618944, + "step": 685 + }, + { + "epoch": 0.18018018018018017, + "loss": 0.18519067764282227, + "loss_ce": 0.0017190101789310575, + "loss_iou": 0.54296875, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 117618944, + "step": 685 + }, + { + "epoch": 0.1804432169395673, + "grad_norm": 9.08254127907995, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 117791316, + "step": 686 + }, + { + "epoch": 0.1804432169395673, + "loss": 0.20686465501785278, + "loss_ce": 0.0025189572479575872, + "loss_iou": 0.6796875, + "loss_num": 0.041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 117791316, + "step": 686 + }, + { + "epoch": 0.18070625369895443, + "grad_norm": 10.367656958651155, + "learning_rate": 5e-06, + "loss": 0.1855, + "num_input_tokens_seen": 117963308, + "step": 687 + }, + { + "epoch": 0.18070625369895443, + "loss": 0.13359057903289795, + "loss_ce": 0.0041044931858778, + "loss_iou": 0.6953125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 117963308, + "step": 687 + }, + { + "epoch": 0.18096929045834156, + "grad_norm": 7.549954240364692, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 118135556, + "step": 688 + }, + { + "epoch": 0.18096929045834156, + "loss": 0.14778929948806763, + "loss_ce": 0.0011828583665192127, + "loss_iou": 0.494140625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 118135556, + "step": 688 + }, + { + "epoch": 0.18123232721772867, + "grad_norm": 9.741300529319401, + "learning_rate": 5e-06, + "loss": 0.166, + "num_input_tokens_seen": 118305884, + "step": 689 + }, + { + "epoch": 0.18123232721772867, + "loss": 0.1753019541501999, + "loss_ce": 0.003182805608958006, + "loss_iou": 0.470703125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 118305884, + "step": 689 + }, + { + "epoch": 0.1814953639771158, + "grad_norm": 11.31734919561229, + "learning_rate": 5e-06, + "loss": 0.1811, + "num_input_tokens_seen": 118478000, + "step": 690 + }, + { + "epoch": 0.1814953639771158, + "loss": 0.189756840467453, + "loss_ce": 0.0007919906638562679, + "loss_iou": 0.77734375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 118478000, + "step": 690 + }, + { + "epoch": 0.18175840073650293, + "grad_norm": 9.003341647966222, + "learning_rate": 5e-06, + "loss": 0.1755, + "num_input_tokens_seen": 118650436, + "step": 691 + }, + { + "epoch": 0.18175840073650293, + "loss": 0.11313121020793915, + "loss_ce": 0.0006434204406104982, + "loss_iou": 0.53515625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 118650436, + "step": 691 + }, + { + "epoch": 0.18202143749589006, + "grad_norm": 15.98200660549263, + "learning_rate": 5e-06, + "loss": 0.1717, + "num_input_tokens_seen": 118822416, + "step": 692 + }, + { + "epoch": 0.18202143749589006, + "loss": 0.14253322780132294, + "loss_ce": 0.001908229780383408, + "loss_iou": 0.453125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 118822416, + "step": 692 + }, + { + "epoch": 0.18228447425527716, + "grad_norm": 5.776705438700134, + "learning_rate": 5e-06, + "loss": 0.1588, + "num_input_tokens_seen": 118994492, + "step": 693 + }, + { + "epoch": 0.18228447425527716, + "loss": 0.15580043196678162, + "loss_ce": 0.0014730504481121898, + "loss_iou": 0.40625, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 118994492, + "step": 693 + }, + { + "epoch": 0.1825475110146643, + "grad_norm": 13.97214493017941, + "learning_rate": 5e-06, + "loss": 0.1825, + "num_input_tokens_seen": 119166828, + "step": 694 + }, + { + "epoch": 0.1825475110146643, + "loss": 0.14117339253425598, + "loss_ce": 0.005309135653078556, + "loss_iou": 0.6171875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 119166828, + "step": 694 + }, + { + "epoch": 0.18281054777405142, + "grad_norm": 9.340996582360178, + "learning_rate": 5e-06, + "loss": 0.2039, + "num_input_tokens_seen": 119339040, + "step": 695 + }, + { + "epoch": 0.18281054777405142, + "loss": 0.21793845295906067, + "loss_ce": 0.005658184178173542, + "loss_iou": 0.4921875, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 119339040, + "step": 695 + }, + { + "epoch": 0.18307358453343855, + "grad_norm": 6.679514738557093, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 119511024, + "step": 696 + }, + { + "epoch": 0.18307358453343855, + "loss": 0.18582630157470703, + "loss_ce": 0.0012559981551021338, + "loss_iou": 0.5546875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 119511024, + "step": 696 + }, + { + "epoch": 0.18333662129282569, + "grad_norm": 5.326914153992354, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 119683564, + "step": 697 + }, + { + "epoch": 0.18333662129282569, + "loss": 0.15467330813407898, + "loss_ce": 0.0042826831340789795, + "loss_iou": 0.5703125, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 119683564, + "step": 697 + }, + { + "epoch": 0.1835996580522128, + "grad_norm": 8.515559517433303, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 119856000, + "step": 698 + }, + { + "epoch": 0.1835996580522128, + "loss": 0.12481513619422913, + "loss_ce": 0.0006696260534226894, + "loss_iou": 0.6796875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 119856000, + "step": 698 + }, + { + "epoch": 0.18386269481159992, + "grad_norm": 9.420262409376758, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 120028676, + "step": 699 + }, + { + "epoch": 0.18386269481159992, + "loss": 0.22470733523368835, + "loss_ce": 0.001868000952526927, + "loss_iou": 0.6875, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 120028676, + "step": 699 + }, + { + "epoch": 0.18412573157098705, + "grad_norm": 7.530169678535974, + "learning_rate": 5e-06, + "loss": 0.1624, + "num_input_tokens_seen": 120201208, + "step": 700 + }, + { + "epoch": 0.18412573157098705, + "loss": 0.10793297737836838, + "loss_ce": 0.0036849307361990213, + "loss_iou": 0.58203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 120201208, + "step": 700 + }, + { + "epoch": 0.18438876833037418, + "grad_norm": 8.205415154202601, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 120373444, + "step": 701 + }, + { + "epoch": 0.18438876833037418, + "loss": 0.1353437304496765, + "loss_ce": 0.001066376455128193, + "loss_iou": 0.5390625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 120373444, + "step": 701 + }, + { + "epoch": 0.18465180508976128, + "grad_norm": 8.319297966322562, + "learning_rate": 5e-06, + "loss": 0.1785, + "num_input_tokens_seen": 120545552, + "step": 702 + }, + { + "epoch": 0.18465180508976128, + "loss": 0.18953613936901093, + "loss_ce": 0.0019751053769141436, + "loss_iou": 0.69140625, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 120545552, + "step": 702 + }, + { + "epoch": 0.18491484184914841, + "grad_norm": 8.464289102867136, + "learning_rate": 5e-06, + "loss": 0.219, + "num_input_tokens_seen": 120717584, + "step": 703 + }, + { + "epoch": 0.18491484184914841, + "loss": 0.24215811491012573, + "loss_ce": 0.004975516349077225, + "loss_iou": 0.33203125, + "loss_num": 0.047607421875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 120717584, + "step": 703 + }, + { + "epoch": 0.18517787860853555, + "grad_norm": 12.962080293446185, + "learning_rate": 5e-06, + "loss": 0.2162, + "num_input_tokens_seen": 120889756, + "step": 704 + }, + { + "epoch": 0.18517787860853555, + "loss": 0.17355692386627197, + "loss_ce": 0.0025364109314978123, + "loss_iou": 0.7109375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 120889756, + "step": 704 + }, + { + "epoch": 0.18544091536792268, + "grad_norm": 13.968822655265907, + "learning_rate": 5e-06, + "loss": 0.2237, + "num_input_tokens_seen": 121062200, + "step": 705 + }, + { + "epoch": 0.18544091536792268, + "loss": 0.2510673403739929, + "loss_ce": 0.005400843918323517, + "loss_iou": 0.3984375, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 121062200, + "step": 705 + }, + { + "epoch": 0.18570395212730978, + "grad_norm": 15.418397137980255, + "learning_rate": 5e-06, + "loss": 0.2126, + "num_input_tokens_seen": 121234300, + "step": 706 + }, + { + "epoch": 0.18570395212730978, + "loss": 0.239446759223938, + "loss_ce": 0.0006772410124540329, + "loss_iou": 0.55078125, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 121234300, + "step": 706 + }, + { + "epoch": 0.1859669888866969, + "grad_norm": 4.8632608475065355, + "learning_rate": 5e-06, + "loss": 0.1269, + "num_input_tokens_seen": 121406408, + "step": 707 + }, + { + "epoch": 0.1859669888866969, + "loss": 0.10976609587669373, + "loss_ce": 0.003137679770588875, + "loss_iou": 0.50390625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 121406408, + "step": 707 + }, + { + "epoch": 0.18623002564608404, + "grad_norm": 9.856831060146035, + "learning_rate": 5e-06, + "loss": 0.1699, + "num_input_tokens_seen": 121578364, + "step": 708 + }, + { + "epoch": 0.18623002564608404, + "loss": 0.06338398158550262, + "loss_ce": 0.0025319333653897047, + "loss_iou": 0.4765625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 121578364, + "step": 708 + }, + { + "epoch": 0.18649306240547117, + "grad_norm": 12.596787814338684, + "learning_rate": 5e-06, + "loss": 0.1839, + "num_input_tokens_seen": 121750612, + "step": 709 + }, + { + "epoch": 0.18649306240547117, + "loss": 0.2992492616176605, + "loss_ce": 0.0012756290379911661, + "loss_iou": 0.5234375, + "loss_num": 0.0595703125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 121750612, + "step": 709 + }, + { + "epoch": 0.1867560991648583, + "grad_norm": 7.147545819350773, + "learning_rate": 5e-06, + "loss": 0.1776, + "num_input_tokens_seen": 121922804, + "step": 710 + }, + { + "epoch": 0.1867560991648583, + "loss": 0.18950411677360535, + "loss_ce": 0.0032858517952263355, + "loss_iou": 0.609375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 121922804, + "step": 710 + }, + { + "epoch": 0.1870191359242454, + "grad_norm": 22.63136110983146, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 122094924, + "step": 711 + }, + { + "epoch": 0.1870191359242454, + "loss": 0.12849926948547363, + "loss_ce": 0.0014851100277155638, + "loss_iou": 0.625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 122094924, + "step": 711 + }, + { + "epoch": 0.18728217268363254, + "grad_norm": 6.766417510968721, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 122265668, + "step": 712 + }, + { + "epoch": 0.18728217268363254, + "loss": 0.15207967162132263, + "loss_ce": 0.006968589033931494, + "loss_iou": 0.58984375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 122265668, + "step": 712 + }, + { + "epoch": 0.18754520944301967, + "grad_norm": 7.923158875388113, + "learning_rate": 5e-06, + "loss": 0.1904, + "num_input_tokens_seen": 122437796, + "step": 713 + }, + { + "epoch": 0.18754520944301967, + "loss": 0.20568042993545532, + "loss_ce": 0.002189208287745714, + "loss_iou": 0.5703125, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 122437796, + "step": 713 + }, + { + "epoch": 0.1878082462024068, + "grad_norm": 8.667331104283747, + "learning_rate": 5e-06, + "loss": 0.1887, + "num_input_tokens_seen": 122610336, + "step": 714 + }, + { + "epoch": 0.1878082462024068, + "loss": 0.1953202784061432, + "loss_ce": 0.0012895169202238321, + "loss_iou": 0.287109375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 122610336, + "step": 714 + }, + { + "epoch": 0.1880712829617939, + "grad_norm": 12.96410805672465, + "learning_rate": 5e-06, + "loss": 0.2143, + "num_input_tokens_seen": 122779392, + "step": 715 + }, + { + "epoch": 0.1880712829617939, + "loss": 0.21114467084407806, + "loss_ce": 0.001916159177199006, + "loss_iou": 0.41015625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 122779392, + "step": 715 + }, + { + "epoch": 0.18833431972118103, + "grad_norm": 6.980680552365313, + "learning_rate": 5e-06, + "loss": 0.1831, + "num_input_tokens_seen": 122951724, + "step": 716 + }, + { + "epoch": 0.18833431972118103, + "loss": 0.24306175112724304, + "loss_ce": 0.006245340220630169, + "loss_iou": 0.3359375, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 122951724, + "step": 716 + }, + { + "epoch": 0.18859735648056816, + "grad_norm": 12.855577653554155, + "learning_rate": 5e-06, + "loss": 0.1591, + "num_input_tokens_seen": 123122088, + "step": 717 + }, + { + "epoch": 0.18859735648056816, + "loss": 0.1986149549484253, + "loss_ce": 0.0012882874580100179, + "loss_iou": 0.51953125, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 123122088, + "step": 717 + }, + { + "epoch": 0.1888603932399553, + "grad_norm": 6.930918072961039, + "learning_rate": 5e-06, + "loss": 0.1899, + "num_input_tokens_seen": 123294160, + "step": 718 + }, + { + "epoch": 0.1888603932399553, + "loss": 0.19642534852027893, + "loss_ce": 0.0014790646964684129, + "loss_iou": 0.40625, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 123294160, + "step": 718 + }, + { + "epoch": 0.1891234299993424, + "grad_norm": 6.929009893187008, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 123466060, + "step": 719 + }, + { + "epoch": 0.1891234299993424, + "loss": 0.1527690589427948, + "loss_ce": 0.005094507243484259, + "loss_iou": 0.40234375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 123466060, + "step": 719 + }, + { + "epoch": 0.18938646675872953, + "grad_norm": 10.25908293873023, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 123636388, + "step": 720 + }, + { + "epoch": 0.18938646675872953, + "loss": 0.18421480059623718, + "loss_ce": 0.0032455746550112963, + "loss_iou": 0.60546875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 123636388, + "step": 720 + }, + { + "epoch": 0.18964950351811666, + "grad_norm": 7.318122077183922, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 123808316, + "step": 721 + }, + { + "epoch": 0.18964950351811666, + "loss": 0.1352817267179489, + "loss_ce": 0.0030185491777956486, + "loss_iou": 0.34765625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 123808316, + "step": 721 + }, + { + "epoch": 0.1899125402775038, + "grad_norm": 7.382171237495957, + "learning_rate": 5e-06, + "loss": 0.2283, + "num_input_tokens_seen": 123980672, + "step": 722 + }, + { + "epoch": 0.1899125402775038, + "loss": 0.15074753761291504, + "loss_ce": 0.0036528080236166716, + "loss_iou": 0.6640625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 123980672, + "step": 722 + }, + { + "epoch": 0.19017557703689092, + "grad_norm": 5.982120923855094, + "learning_rate": 5e-06, + "loss": 0.1499, + "num_input_tokens_seen": 124150936, + "step": 723 + }, + { + "epoch": 0.19017557703689092, + "loss": 0.10008575022220612, + "loss_ce": 0.0014224194455891848, + "loss_iou": 0.48828125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 124150936, + "step": 723 + }, + { + "epoch": 0.19043861379627802, + "grad_norm": 8.136060280729641, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 124323280, + "step": 724 + }, + { + "epoch": 0.19043861379627802, + "loss": 0.12422403693199158, + "loss_ce": 0.0017264705384150147, + "loss_iou": 0.69140625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 124323280, + "step": 724 + }, + { + "epoch": 0.19070165055566515, + "grad_norm": 8.724019912821047, + "learning_rate": 5e-06, + "loss": 0.2613, + "num_input_tokens_seen": 124495636, + "step": 725 + }, + { + "epoch": 0.19070165055566515, + "loss": 0.13688622415065765, + "loss_ce": 0.0040737236849963665, + "loss_iou": 0.57421875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 124495636, + "step": 725 + }, + { + "epoch": 0.19096468731505228, + "grad_norm": 7.423558318920191, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 124667644, + "step": 726 + }, + { + "epoch": 0.19096468731505228, + "loss": 0.2597463130950928, + "loss_ce": 0.0010182850528508425, + "loss_iou": 0.416015625, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 124667644, + "step": 726 + }, + { + "epoch": 0.19122772407443941, + "grad_norm": 7.827506457517143, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 124839252, + "step": 727 + }, + { + "epoch": 0.19122772407443941, + "loss": 0.1310674101114273, + "loss_ce": 0.003076688153669238, + "loss_iou": 0.40234375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 124839252, + "step": 727 + }, + { + "epoch": 0.19149076083382652, + "grad_norm": 9.183421040665964, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 125011496, + "step": 728 + }, + { + "epoch": 0.19149076083382652, + "loss": 0.11314553767442703, + "loss_ce": 0.0003525639185681939, + "loss_iou": 0.625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 125011496, + "step": 728 + }, + { + "epoch": 0.19175379759321365, + "grad_norm": 10.185706416909431, + "learning_rate": 5e-06, + "loss": 0.2047, + "num_input_tokens_seen": 125179336, + "step": 729 + }, + { + "epoch": 0.19175379759321365, + "loss": 0.1552383303642273, + "loss_ce": 0.005152884405106306, + "loss_iou": 0.6015625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 125179336, + "step": 729 + }, + { + "epoch": 0.19201683435260078, + "grad_norm": 8.517948232569886, + "learning_rate": 5e-06, + "loss": 0.1789, + "num_input_tokens_seen": 125351404, + "step": 730 + }, + { + "epoch": 0.19201683435260078, + "loss": 0.23154987394809723, + "loss_ce": 0.0012031885562464595, + "loss_iou": 0.50390625, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 125351404, + "step": 730 + }, + { + "epoch": 0.1922798711119879, + "grad_norm": 8.61986864690339, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 125523692, + "step": 731 + }, + { + "epoch": 0.1922798711119879, + "loss": 0.14681357145309448, + "loss_ce": 0.0021907794289290905, + "loss_iou": 0.357421875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 125523692, + "step": 731 + }, + { + "epoch": 0.192542907871375, + "grad_norm": 5.859065568818718, + "learning_rate": 5e-06, + "loss": 0.178, + "num_input_tokens_seen": 125693908, + "step": 732 + }, + { + "epoch": 0.192542907871375, + "loss": 0.15787754952907562, + "loss_ce": 0.0012613451108336449, + "loss_iou": 0.46875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 125693908, + "step": 732 + }, + { + "epoch": 0.19280594463076214, + "grad_norm": 10.469269184120652, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 125865868, + "step": 733 + }, + { + "epoch": 0.19280594463076214, + "loss": 0.13217297196388245, + "loss_ce": 0.0014051578473299742, + "loss_iou": 0.578125, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 125865868, + "step": 733 + }, + { + "epoch": 0.19306898139014927, + "grad_norm": 9.209135916468261, + "learning_rate": 5e-06, + "loss": 0.1745, + "num_input_tokens_seen": 126037852, + "step": 734 + }, + { + "epoch": 0.19306898139014927, + "loss": 0.17599225044250488, + "loss_ce": 0.0004551436868496239, + "loss_iou": 0.65625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 126037852, + "step": 734 + }, + { + "epoch": 0.1933320181495364, + "grad_norm": 8.89690790064976, + "learning_rate": 5e-06, + "loss": 0.2197, + "num_input_tokens_seen": 126209984, + "step": 735 + }, + { + "epoch": 0.1933320181495364, + "loss": 0.1378391683101654, + "loss_ce": 0.0020359433256089687, + "loss_iou": 0.6875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 126209984, + "step": 735 + }, + { + "epoch": 0.1935950549089235, + "grad_norm": 9.250244451235272, + "learning_rate": 5e-06, + "loss": 0.1999, + "num_input_tokens_seen": 126382140, + "step": 736 + }, + { + "epoch": 0.1935950549089235, + "loss": 0.24104052782058716, + "loss_ce": 0.0028813518583774567, + "loss_iou": 0.671875, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 126382140, + "step": 736 + }, + { + "epoch": 0.19385809166831064, + "grad_norm": 6.538014726838841, + "learning_rate": 5e-06, + "loss": 0.1574, + "num_input_tokens_seen": 126554124, + "step": 737 + }, + { + "epoch": 0.19385809166831064, + "loss": 0.19325746595859528, + "loss_ce": 0.0034991574939340353, + "loss_iou": 0.48046875, + "loss_num": 0.0380859375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 126554124, + "step": 737 + }, + { + "epoch": 0.19412112842769777, + "grad_norm": 6.359733816292311, + "learning_rate": 5e-06, + "loss": 0.1727, + "num_input_tokens_seen": 126726352, + "step": 738 + }, + { + "epoch": 0.19412112842769777, + "loss": 0.1890466809272766, + "loss_ce": 0.003866007784381509, + "loss_iou": 0.7109375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 126726352, + "step": 738 + }, + { + "epoch": 0.1943841651870849, + "grad_norm": 6.737716233597974, + "learning_rate": 5e-06, + "loss": 0.168, + "num_input_tokens_seen": 126898504, + "step": 739 + }, + { + "epoch": 0.1943841651870849, + "loss": 0.22711391746997833, + "loss_ce": 0.0020162612199783325, + "loss_iou": 0.6640625, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 126898504, + "step": 739 + }, + { + "epoch": 0.19464720194647203, + "grad_norm": 10.63587672601612, + "learning_rate": 5e-06, + "loss": 0.2406, + "num_input_tokens_seen": 127070440, + "step": 740 + }, + { + "epoch": 0.19464720194647203, + "loss": 0.2545185983181, + "loss_ce": 0.0035420400090515614, + "loss_iou": 0.53125, + "loss_num": 0.05029296875, + "loss_xval": 0.25, + "num_input_tokens_seen": 127070440, + "step": 740 + }, + { + "epoch": 0.19491023870585913, + "grad_norm": 10.573425928841477, + "learning_rate": 5e-06, + "loss": 0.1461, + "num_input_tokens_seen": 127242884, + "step": 741 + }, + { + "epoch": 0.19491023870585913, + "loss": 0.07598280906677246, + "loss_ce": 0.0007874930743128061, + "loss_iou": 0.5546875, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 127242884, + "step": 741 + }, + { + "epoch": 0.19517327546524627, + "grad_norm": 5.624166188859615, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 127413548, + "step": 742 + }, + { + "epoch": 0.19517327546524627, + "loss": 0.12612518668174744, + "loss_ce": 0.0008200095035135746, + "loss_iou": 0.447265625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 127413548, + "step": 742 + }, + { + "epoch": 0.1954363122246334, + "grad_norm": 5.416712373816477, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 127583504, + "step": 743 + }, + { + "epoch": 0.1954363122246334, + "loss": 0.14368192851543427, + "loss_ce": 0.003209515009075403, + "loss_iou": 0.474609375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 127583504, + "step": 743 + }, + { + "epoch": 0.19569934898402053, + "grad_norm": 7.55903022235747, + "learning_rate": 5e-06, + "loss": 0.1741, + "num_input_tokens_seen": 127753924, + "step": 744 + }, + { + "epoch": 0.19569934898402053, + "loss": 0.26765167713165283, + "loss_ce": 0.009717106819152832, + "loss_iou": 0.451171875, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 127753924, + "step": 744 + }, + { + "epoch": 0.19596238574340763, + "grad_norm": 6.325854098350077, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 127925840, + "step": 745 + }, + { + "epoch": 0.19596238574340763, + "loss": 0.13880833983421326, + "loss_ce": 0.0011435477063059807, + "loss_iou": 0.50390625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 127925840, + "step": 745 + }, + { + "epoch": 0.19622542250279476, + "grad_norm": 7.469182967125795, + "learning_rate": 5e-06, + "loss": 0.1661, + "num_input_tokens_seen": 128098212, + "step": 746 + }, + { + "epoch": 0.19622542250279476, + "loss": 0.22049343585968018, + "loss_ce": 0.002109651220962405, + "loss_iou": 0.61328125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 128098212, + "step": 746 + }, + { + "epoch": 0.1964884592621819, + "grad_norm": 8.506872378366532, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 128270372, + "step": 747 + }, + { + "epoch": 0.1964884592621819, + "loss": 0.1590556651353836, + "loss_ce": 0.0010356476996093988, + "loss_iou": 0.515625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 128270372, + "step": 747 + }, + { + "epoch": 0.19675149602156902, + "grad_norm": 7.2518370649313075, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 128439832, + "step": 748 + }, + { + "epoch": 0.19675149602156902, + "loss": 0.16326385736465454, + "loss_ce": 0.0021310443989932537, + "loss_iou": 0.55859375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 128439832, + "step": 748 + }, + { + "epoch": 0.19701453278095613, + "grad_norm": 9.929926428237398, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 128612032, + "step": 749 + }, + { + "epoch": 0.19701453278095613, + "loss": 0.09666653722524643, + "loss_ce": 0.002489290665835142, + "loss_iou": 0.71484375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 128612032, + "step": 749 + }, + { + "epoch": 0.19727756954034326, + "grad_norm": 8.129979685164507, + "learning_rate": 5e-06, + "loss": 0.144, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_websight_new_CIoU": 0.7427884042263031, + "eval_websight_new_GIoU": 0.7464376986026764, + "eval_websight_new_IoU": 0.7504112422466278, + "eval_websight_new_MAE_all": 0.03649342246353626, + "eval_websight_new_MAE_h": 0.03233582433313131, + "eval_websight_new_MAE_w": 0.056174855679273605, + "eval_websight_new_MAE_x": 0.046134982258081436, + "eval_websight_new_MAE_y": 0.01132803549990058, + "eval_websight_new_NUM_probability": 0.9972327351570129, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.19190411269664764, + "eval_websight_new_loss_ce": 0.00034623414103407413, + "eval_websight_new_loss_iou": 0.777099609375, + "eval_websight_new_loss_num": 0.03619384765625, + "eval_websight_new_loss_xval": 0.18096923828125, + "eval_websight_new_runtime": 55.1355, + "eval_websight_new_samples_per_second": 0.907, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_seeclick_CIoU": 0.4580978453159332, + "eval_seeclick_GIoU": 0.44802993535995483, + "eval_seeclick_IoU": 0.4842703342437744, + "eval_seeclick_MAE_all": 0.05876399576663971, + "eval_seeclick_MAE_h": 0.0604591965675354, + "eval_seeclick_MAE_w": 0.06378005631268024, + "eval_seeclick_MAE_x": 0.07332894578576088, + "eval_seeclick_MAE_y": 0.03748778998851776, + "eval_seeclick_NUM_probability": 0.9986195266246796, + "eval_seeclick_inside_bbox": 0.9375, + "eval_seeclick_loss": 0.3042093813419342, + "eval_seeclick_loss_ce": 0.012256910093128681, + "eval_seeclick_loss_iou": 0.6614990234375, + "eval_seeclick_loss_num": 0.055084228515625, + "eval_seeclick_loss_xval": 0.27545166015625, + "eval_seeclick_runtime": 69.7623, + "eval_seeclick_samples_per_second": 0.616, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_icons_CIoU": 0.7633987963199615, + "eval_icons_GIoU": 0.7561427056789398, + "eval_icons_IoU": 0.7705403864383698, + "eval_icons_MAE_all": 0.029183855280280113, + "eval_icons_MAE_h": 0.035788778215646744, + "eval_icons_MAE_w": 0.03255164809525013, + "eval_icons_MAE_x": 0.02722846996039152, + "eval_icons_MAE_y": 0.02116652298718691, + "eval_icons_NUM_probability": 0.9982486963272095, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.13746696710586548, + "eval_icons_loss_ce": 0.0027548681828193367, + "eval_icons_loss_iou": 0.7633056640625, + "eval_icons_loss_num": 0.02550506591796875, + "eval_icons_loss_xval": 0.127532958984375, + "eval_icons_runtime": 87.2396, + "eval_icons_samples_per_second": 0.573, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "eval_screenspot_CIoU": 0.4680892725785573, + "eval_screenspot_GIoU": 0.45600322882334393, + "eval_screenspot_IoU": 0.507049967845281, + "eval_screenspot_MAE_all": 0.09560441970825195, + "eval_screenspot_MAE_h": 0.08166227489709854, + "eval_screenspot_MAE_w": 0.14845576385656992, + "eval_screenspot_MAE_x": 0.09815465907255809, + "eval_screenspot_MAE_y": 0.054144968589146934, + "eval_screenspot_NUM_probability": 0.9986165563265482, + "eval_screenspot_inside_bbox": 0.8558333317438761, + "eval_screenspot_loss": 0.805972158908844, + "eval_screenspot_loss_ce": 0.38150885701179504, + "eval_screenspot_loss_iou": 0.580078125, + "eval_screenspot_loss_num": 0.0841064453125, + "eval_screenspot_loss_xval": 0.4202473958333333, + "eval_screenspot_runtime": 148.1708, + "eval_screenspot_samples_per_second": 0.601, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.19727756954034326, + "loss": 0.7833826541900635, + "loss_ce": 0.36297255754470825, + "loss_iou": 0.41015625, + "loss_num": 0.083984375, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 128784236, + "step": 750 + }, + { + "epoch": 0.1975406062997304, + "grad_norm": 6.107960207443286, + "learning_rate": 5e-06, + "loss": 0.2391, + "num_input_tokens_seen": 128956536, + "step": 751 + }, + { + "epoch": 0.1975406062997304, + "loss": 0.27341228723526, + "loss_ce": 0.0016227375017479062, + "loss_iou": 0.734375, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 128956536, + "step": 751 + }, + { + "epoch": 0.19780364305911752, + "grad_norm": 12.720415598286513, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 129128836, + "step": 752 + }, + { + "epoch": 0.19780364305911752, + "loss": 0.12072408944368362, + "loss_ce": 0.002621056977659464, + "loss_iou": 0.515625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 129128836, + "step": 752 + }, + { + "epoch": 0.19806667981850465, + "grad_norm": 14.921842726715303, + "learning_rate": 5e-06, + "loss": 0.1648, + "num_input_tokens_seen": 129300844, + "step": 753 + }, + { + "epoch": 0.19806667981850465, + "loss": 0.1643597036600113, + "loss_ce": 0.0029217104893177748, + "loss_iou": 0.546875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 129300844, + "step": 753 + }, + { + "epoch": 0.19832971657789175, + "grad_norm": 6.005245677684448, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 129471448, + "step": 754 + }, + { + "epoch": 0.19832971657789175, + "loss": 0.09330937266349792, + "loss_ce": 0.0012683530803769827, + "loss_iou": 0.5625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 129471448, + "step": 754 + }, + { + "epoch": 0.19859275333727888, + "grad_norm": 10.390450693463213, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 129643296, + "step": 755 + }, + { + "epoch": 0.19859275333727888, + "loss": 0.19149892032146454, + "loss_ce": 0.000733550579752773, + "loss_iou": 0.58984375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 129643296, + "step": 755 + }, + { + "epoch": 0.198855790096666, + "grad_norm": 12.828044454318112, + "learning_rate": 5e-06, + "loss": 0.1805, + "num_input_tokens_seen": 129815492, + "step": 756 + }, + { + "epoch": 0.198855790096666, + "loss": 0.17852596938610077, + "loss_ce": 0.003294031834229827, + "loss_iou": 0.486328125, + "loss_num": 0.03515625, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 129815492, + "step": 756 + }, + { + "epoch": 0.19911882685605314, + "grad_norm": 8.380872387581206, + "learning_rate": 5e-06, + "loss": 0.2269, + "num_input_tokens_seen": 129987692, + "step": 757 + }, + { + "epoch": 0.19911882685605314, + "loss": 0.20438869297504425, + "loss_ce": 0.00040920061292126775, + "loss_iou": 0.64453125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 129987692, + "step": 757 + }, + { + "epoch": 0.19938186361544025, + "grad_norm": 11.044900252894326, + "learning_rate": 5e-06, + "loss": 0.1723, + "num_input_tokens_seen": 130159828, + "step": 758 + }, + { + "epoch": 0.19938186361544025, + "loss": 0.1886797845363617, + "loss_ce": 0.0009661591611802578, + "loss_iou": 0.56640625, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 130159828, + "step": 758 + }, + { + "epoch": 0.19964490037482738, + "grad_norm": 8.638981565005851, + "learning_rate": 5e-06, + "loss": 0.1719, + "num_input_tokens_seen": 130331756, + "step": 759 + }, + { + "epoch": 0.19964490037482738, + "loss": 0.1088617593050003, + "loss_ce": 0.0055597638711333275, + "loss_iou": 0.5703125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 130331756, + "step": 759 + }, + { + "epoch": 0.1999079371342145, + "grad_norm": 16.96292619187744, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 130503960, + "step": 760 + }, + { + "epoch": 0.1999079371342145, + "loss": 0.1997271627187729, + "loss_ce": 0.001668088138103485, + "loss_iou": 0.4609375, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 130503960, + "step": 760 + }, + { + "epoch": 0.20017097389360164, + "grad_norm": 18.254879073962975, + "learning_rate": 5e-06, + "loss": 0.209, + "num_input_tokens_seen": 130676124, + "step": 761 + }, + { + "epoch": 0.20017097389360164, + "loss": 0.22872218489646912, + "loss_ce": 0.002464861376211047, + "loss_iou": 0.55078125, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 130676124, + "step": 761 + }, + { + "epoch": 0.20043401065298874, + "grad_norm": 16.873564483656185, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 130848448, + "step": 762 + }, + { + "epoch": 0.20043401065298874, + "loss": 0.11374461650848389, + "loss_ce": 0.001867176266387105, + "loss_iou": 0.5546875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 130848448, + "step": 762 + }, + { + "epoch": 0.20069704741237587, + "grad_norm": 8.061510131196302, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 131020600, + "step": 763 + }, + { + "epoch": 0.20069704741237587, + "loss": 0.24045339226722717, + "loss_ce": 0.003575955517590046, + "loss_iou": 0.59375, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 131020600, + "step": 763 + }, + { + "epoch": 0.200960084171763, + "grad_norm": 7.1675953763190865, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 131191064, + "step": 764 + }, + { + "epoch": 0.200960084171763, + "loss": 0.1495012640953064, + "loss_ce": 0.004115516785532236, + "loss_iou": 0.462890625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 131191064, + "step": 764 + }, + { + "epoch": 0.20122312093115013, + "grad_norm": 5.20536952677903, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 131363148, + "step": 765 + }, + { + "epoch": 0.20122312093115013, + "loss": 0.23652556538581848, + "loss_ce": 0.009566339664161205, + "loss_iou": 0.546875, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 131363148, + "step": 765 + }, + { + "epoch": 0.20148615769053727, + "grad_norm": 8.038569612667256, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 131535488, + "step": 766 + }, + { + "epoch": 0.20148615769053727, + "loss": 0.24296867847442627, + "loss_ce": 0.0015136117581278086, + "loss_iou": 0.490234375, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 131535488, + "step": 766 + }, + { + "epoch": 0.20174919444992437, + "grad_norm": 6.190176430910417, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 131707808, + "step": 767 + }, + { + "epoch": 0.20174919444992437, + "loss": 0.24412932991981506, + "loss_ce": 0.00218596076592803, + "loss_iou": 0.5859375, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 131707808, + "step": 767 + }, + { + "epoch": 0.2020122312093115, + "grad_norm": 6.201791565366413, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 131879928, + "step": 768 + }, + { + "epoch": 0.2020122312093115, + "loss": 0.09396322071552277, + "loss_ce": 0.002288414863869548, + "loss_iou": 0.47265625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 131879928, + "step": 768 + }, + { + "epoch": 0.20227526796869863, + "grad_norm": 7.34120818873771, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 132052340, + "step": 769 + }, + { + "epoch": 0.20227526796869863, + "loss": 0.22239628434181213, + "loss_ce": 0.005477334372699261, + "loss_iou": 0.51171875, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 132052340, + "step": 769 + }, + { + "epoch": 0.20253830472808576, + "grad_norm": 6.124473321987556, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 132224712, + "step": 770 + }, + { + "epoch": 0.20253830472808576, + "loss": 0.17558854818344116, + "loss_ce": 0.0018824923317879438, + "loss_iou": 0.54296875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 132224712, + "step": 770 + }, + { + "epoch": 0.20280134148747286, + "grad_norm": 9.284836415516965, + "learning_rate": 5e-06, + "loss": 0.1512, + "num_input_tokens_seen": 132396772, + "step": 771 + }, + { + "epoch": 0.20280134148747286, + "loss": 0.15597118437290192, + "loss_ce": 0.0003925706841982901, + "loss_iou": 0.734375, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 132396772, + "step": 771 + }, + { + "epoch": 0.20306437824686, + "grad_norm": 7.764344629279706, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 132568908, + "step": 772 + }, + { + "epoch": 0.20306437824686, + "loss": 0.18731489777565002, + "loss_ce": 0.0028056304436177015, + "loss_iou": 0.53125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 132568908, + "step": 772 + }, + { + "epoch": 0.20332741500624713, + "grad_norm": 10.387981116967136, + "learning_rate": 5e-06, + "loss": 0.2105, + "num_input_tokens_seen": 132741140, + "step": 773 + }, + { + "epoch": 0.20332741500624713, + "loss": 0.2155558466911316, + "loss_ce": 0.00147504813503474, + "loss_iou": 0.5625, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 132741140, + "step": 773 + }, + { + "epoch": 0.20359045176563426, + "grad_norm": 11.81609487087241, + "learning_rate": 5e-06, + "loss": 0.1972, + "num_input_tokens_seen": 132913412, + "step": 774 + }, + { + "epoch": 0.20359045176563426, + "loss": 0.2562327980995178, + "loss_ce": 0.0022044687066227198, + "loss_iou": 0.625, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 132913412, + "step": 774 + }, + { + "epoch": 0.20385348852502136, + "grad_norm": 11.070638283175898, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 133085388, + "step": 775 + }, + { + "epoch": 0.20385348852502136, + "loss": 0.1850009262561798, + "loss_ce": 0.0011630415683612227, + "loss_iou": 0.671875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 133085388, + "step": 775 + }, + { + "epoch": 0.2041165252844085, + "grad_norm": 8.169381502927404, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 133257492, + "step": 776 + }, + { + "epoch": 0.2041165252844085, + "loss": 0.17405013740062714, + "loss_ce": 0.0007713312515988946, + "loss_iou": 0.609375, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 133257492, + "step": 776 + }, + { + "epoch": 0.20437956204379562, + "grad_norm": 7.483205340151747, + "learning_rate": 5e-06, + "loss": 0.1812, + "num_input_tokens_seen": 133429516, + "step": 777 + }, + { + "epoch": 0.20437956204379562, + "loss": 0.21947714686393738, + "loss_ce": 0.0011543984292075038, + "loss_iou": 0.6171875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 133429516, + "step": 777 + }, + { + "epoch": 0.20464259880318275, + "grad_norm": 15.835699409497824, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 133599692, + "step": 778 + }, + { + "epoch": 0.20464259880318275, + "loss": 0.1631580889225006, + "loss_ce": 0.0015064696781337261, + "loss_iou": 0.50390625, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 133599692, + "step": 778 + }, + { + "epoch": 0.20490563556256988, + "grad_norm": 6.9792797252935, + "learning_rate": 5e-06, + "loss": 0.1661, + "num_input_tokens_seen": 133771756, + "step": 779 + }, + { + "epoch": 0.20490563556256988, + "loss": 0.16169646382331848, + "loss_ce": 0.0007467527757398784, + "loss_iou": 0.435546875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 133771756, + "step": 779 + }, + { + "epoch": 0.20516867232195699, + "grad_norm": 12.331720736489249, + "learning_rate": 5e-06, + "loss": 0.1981, + "num_input_tokens_seen": 133944348, + "step": 780 + }, + { + "epoch": 0.20516867232195699, + "loss": 0.13550271093845367, + "loss_ce": 0.0025071091949939728, + "loss_iou": 0.65234375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 133944348, + "step": 780 + }, + { + "epoch": 0.20543170908134412, + "grad_norm": 5.7846193156700725, + "learning_rate": 5e-06, + "loss": 0.1172, + "num_input_tokens_seen": 134116796, + "step": 781 + }, + { + "epoch": 0.20543170908134412, + "loss": 0.09005297720432281, + "loss_ce": 0.0008500947151333094, + "loss_iou": 0.5390625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 134116796, + "step": 781 + }, + { + "epoch": 0.20569474584073125, + "grad_norm": 20.38915176858911, + "learning_rate": 5e-06, + "loss": 0.2145, + "num_input_tokens_seen": 134285564, + "step": 782 + }, + { + "epoch": 0.20569474584073125, + "loss": 0.2173474133014679, + "loss_ce": 0.0005505430162884295, + "loss_iou": 0.5390625, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 134285564, + "step": 782 + }, + { + "epoch": 0.20595778260011838, + "grad_norm": 13.943592884184664, + "learning_rate": 5e-06, + "loss": 0.2368, + "num_input_tokens_seen": 134457600, + "step": 783 + }, + { + "epoch": 0.20595778260011838, + "loss": 0.23010152578353882, + "loss_ce": 0.00341693963855505, + "loss_iou": 0.54296875, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 134457600, + "step": 783 + }, + { + "epoch": 0.20622081935950548, + "grad_norm": 7.47232647542354, + "learning_rate": 5e-06, + "loss": 0.1884, + "num_input_tokens_seen": 134627836, + "step": 784 + }, + { + "epoch": 0.20622081935950548, + "loss": 0.31371766328811646, + "loss_ce": 0.0035369964316487312, + "loss_iou": 0.494140625, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 134627836, + "step": 784 + }, + { + "epoch": 0.2064838561188926, + "grad_norm": 7.561434198006136, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 134800024, + "step": 785 + }, + { + "epoch": 0.2064838561188926, + "loss": 0.25232362747192383, + "loss_ce": 0.003910548985004425, + "loss_iou": 0.63671875, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 134800024, + "step": 785 + }, + { + "epoch": 0.20674689287827974, + "grad_norm": 7.384265604656037, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 134972276, + "step": 786 + }, + { + "epoch": 0.20674689287827974, + "loss": 0.22025543451309204, + "loss_ce": 0.0012307591969147325, + "loss_iou": 0.48046875, + "loss_num": 0.0439453125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 134972276, + "step": 786 + }, + { + "epoch": 0.20700992963766687, + "grad_norm": 6.5497695680105705, + "learning_rate": 5e-06, + "loss": 0.1985, + "num_input_tokens_seen": 135144300, + "step": 787 + }, + { + "epoch": 0.20700992963766687, + "loss": 0.24491435289382935, + "loss_ce": 0.0007126981508918107, + "loss_iou": 0.5546875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 135144300, + "step": 787 + }, + { + "epoch": 0.20727296639705398, + "grad_norm": 7.321479905780818, + "learning_rate": 5e-06, + "loss": 0.1764, + "num_input_tokens_seen": 135316344, + "step": 788 + }, + { + "epoch": 0.20727296639705398, + "loss": 0.19332991540431976, + "loss_ce": 0.0017405691323801875, + "loss_iou": 0.52734375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 135316344, + "step": 788 + }, + { + "epoch": 0.2075360031564411, + "grad_norm": 5.255608280002089, + "learning_rate": 5e-06, + "loss": 0.1867, + "num_input_tokens_seen": 135488352, + "step": 789 + }, + { + "epoch": 0.2075360031564411, + "loss": 0.18961402773857117, + "loss_ce": 0.0032126582227647305, + "loss_iou": 0.6015625, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 135488352, + "step": 789 + }, + { + "epoch": 0.20779903991582824, + "grad_norm": 6.817599663694252, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 135660716, + "step": 790 + }, + { + "epoch": 0.20779903991582824, + "loss": 0.11433705687522888, + "loss_ce": 0.0007506305119022727, + "loss_iou": 0.5234375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 135660716, + "step": 790 + }, + { + "epoch": 0.20806207667521537, + "grad_norm": 7.049616575343475, + "learning_rate": 5e-06, + "loss": 0.2041, + "num_input_tokens_seen": 135833108, + "step": 791 + }, + { + "epoch": 0.20806207667521537, + "loss": 0.16387248039245605, + "loss_ce": 0.005669359117746353, + "loss_iou": 0.64453125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 135833108, + "step": 791 + }, + { + "epoch": 0.2083251134346025, + "grad_norm": 9.83283000996709, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 136001408, + "step": 792 + }, + { + "epoch": 0.2083251134346025, + "loss": 0.13693515956401825, + "loss_ce": 0.0015286724083125591, + "loss_iou": 0.546875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 136001408, + "step": 792 + }, + { + "epoch": 0.2085881501939896, + "grad_norm": 18.56546458626088, + "learning_rate": 5e-06, + "loss": 0.2126, + "num_input_tokens_seen": 136173768, + "step": 793 + }, + { + "epoch": 0.2085881501939896, + "loss": 0.2671317458152771, + "loss_ce": 0.004436435177922249, + "loss_iou": 0.384765625, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 136173768, + "step": 793 + }, + { + "epoch": 0.20885118695337673, + "grad_norm": 7.460522492414201, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 136345864, + "step": 794 + }, + { + "epoch": 0.20885118695337673, + "loss": 0.19496268033981323, + "loss_ce": 0.003495402168482542, + "loss_iou": 0.58984375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 136345864, + "step": 794 + }, + { + "epoch": 0.20911422371276386, + "grad_norm": 6.238640410439663, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 136518076, + "step": 795 + }, + { + "epoch": 0.20911422371276386, + "loss": 0.1869560331106186, + "loss_ce": 0.0006157027091830969, + "loss_iou": 0.66796875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 136518076, + "step": 795 + }, + { + "epoch": 0.209377260472151, + "grad_norm": 4.750721959517019, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 136690192, + "step": 796 + }, + { + "epoch": 0.209377260472151, + "loss": 0.10885806381702423, + "loss_ce": 0.0011920429533347487, + "loss_iou": 0.52734375, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 136690192, + "step": 796 + }, + { + "epoch": 0.2096402972315381, + "grad_norm": 11.655608393949823, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 136862596, + "step": 797 + }, + { + "epoch": 0.2096402972315381, + "loss": 0.13095784187316895, + "loss_ce": 0.0011970889754593372, + "loss_iou": 0.54296875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 136862596, + "step": 797 + }, + { + "epoch": 0.20990333399092523, + "grad_norm": 7.6147795291746645, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 137033096, + "step": 798 + }, + { + "epoch": 0.20990333399092523, + "loss": 0.14115872979164124, + "loss_ce": 0.002456323243677616, + "loss_iou": 0.494140625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 137033096, + "step": 798 + }, + { + "epoch": 0.21016637075031236, + "grad_norm": 6.436556075292691, + "learning_rate": 5e-06, + "loss": 0.164, + "num_input_tokens_seen": 137203708, + "step": 799 + }, + { + "epoch": 0.21016637075031236, + "loss": 0.15657231211662292, + "loss_ce": 0.0031909646932035685, + "loss_iou": 0.48046875, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 137203708, + "step": 799 + }, + { + "epoch": 0.2104294075096995, + "grad_norm": 5.272376224873815, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 137374352, + "step": 800 + }, + { + "epoch": 0.2104294075096995, + "loss": 0.19644752144813538, + "loss_ce": 0.003332281718030572, + "loss_iou": 0.5234375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 137374352, + "step": 800 + }, + { + "epoch": 0.2106924442690866, + "grad_norm": 9.274938800244652, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 137546372, + "step": 801 + }, + { + "epoch": 0.2106924442690866, + "loss": 0.23770156502723694, + "loss_ce": 0.0010682701831683517, + "loss_iou": 0.5078125, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 137546372, + "step": 801 + }, + { + "epoch": 0.21095548102847372, + "grad_norm": 12.494816629710325, + "learning_rate": 5e-06, + "loss": 0.1484, + "num_input_tokens_seen": 137718496, + "step": 802 + }, + { + "epoch": 0.21095548102847372, + "loss": 0.13832518458366394, + "loss_ce": 0.00380369508638978, + "loss_iou": 0.59765625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 137718496, + "step": 802 + }, + { + "epoch": 0.21121851778786085, + "grad_norm": 11.143811760293481, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 137890492, + "step": 803 + }, + { + "epoch": 0.21121851778786085, + "loss": 0.17278538644313812, + "loss_ce": 0.0006662444211542606, + "loss_iou": 0.6015625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 137890492, + "step": 803 + }, + { + "epoch": 0.21148155454724799, + "grad_norm": 6.694127616757882, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 138062576, + "step": 804 + }, + { + "epoch": 0.21148155454724799, + "loss": 0.08578141778707504, + "loss_ce": 0.0013697945978492498, + "loss_iou": 0.6328125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 138062576, + "step": 804 + }, + { + "epoch": 0.21174459130663512, + "grad_norm": 6.879766720907239, + "learning_rate": 5e-06, + "loss": 0.2407, + "num_input_tokens_seen": 138234992, + "step": 805 + }, + { + "epoch": 0.21174459130663512, + "loss": 0.26608556509017944, + "loss_ce": 0.0011929699685424566, + "loss_iou": 0.671875, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 138234992, + "step": 805 + }, + { + "epoch": 0.21200762806602222, + "grad_norm": 10.68020187128554, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 138407296, + "step": 806 + }, + { + "epoch": 0.21200762806602222, + "loss": 0.16970132291316986, + "loss_ce": 0.003929831553250551, + "loss_iou": 0.5078125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 138407296, + "step": 806 + }, + { + "epoch": 0.21227066482540935, + "grad_norm": 6.8835699764215, + "learning_rate": 5e-06, + "loss": 0.2142, + "num_input_tokens_seen": 138579820, + "step": 807 + }, + { + "epoch": 0.21227066482540935, + "loss": 0.20114630460739136, + "loss_ce": 0.0011341023491695523, + "loss_iou": 0.703125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 138579820, + "step": 807 + }, + { + "epoch": 0.21253370158479648, + "grad_norm": 5.0783612426941795, + "learning_rate": 5e-06, + "loss": 0.1658, + "num_input_tokens_seen": 138752112, + "step": 808 + }, + { + "epoch": 0.21253370158479648, + "loss": 0.22068345546722412, + "loss_ce": 0.004802103620022535, + "loss_iou": 0.42578125, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 138752112, + "step": 808 + }, + { + "epoch": 0.2127967383441836, + "grad_norm": 6.76267182747039, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 138924516, + "step": 809 + }, + { + "epoch": 0.2127967383441836, + "loss": 0.16195307672023773, + "loss_ce": 0.0010644117137417197, + "loss_iou": 0.5234375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 138924516, + "step": 809 + }, + { + "epoch": 0.21305977510357071, + "grad_norm": 12.983359860046427, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 139096976, + "step": 810 + }, + { + "epoch": 0.21305977510357071, + "loss": 0.14437143504619598, + "loss_ce": 0.0023426164407283068, + "loss_iou": 0.50390625, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 139096976, + "step": 810 + }, + { + "epoch": 0.21332281186295785, + "grad_norm": 5.900519173332491, + "learning_rate": 5e-06, + "loss": 0.1796, + "num_input_tokens_seen": 139269224, + "step": 811 + }, + { + "epoch": 0.21332281186295785, + "loss": 0.11451567709445953, + "loss_ce": 0.0014175281394273043, + "loss_iou": 0.58203125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 139269224, + "step": 811 + }, + { + "epoch": 0.21358584862234498, + "grad_norm": 11.704739365440266, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 139441048, + "step": 812 + }, + { + "epoch": 0.21358584862234498, + "loss": 0.06064599007368088, + "loss_ce": 0.0007399858441203833, + "loss_iou": 0.3984375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 139441048, + "step": 812 + }, + { + "epoch": 0.2138488853817321, + "grad_norm": 5.765338635306649, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 139613180, + "step": 813 + }, + { + "epoch": 0.2138488853817321, + "loss": 0.1868004947900772, + "loss_ce": 0.001986048649996519, + "loss_iou": 0.6171875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 139613180, + "step": 813 + }, + { + "epoch": 0.2141119221411192, + "grad_norm": 8.301852392840289, + "learning_rate": 5e-06, + "loss": 0.1731, + "num_input_tokens_seen": 139785340, + "step": 814 + }, + { + "epoch": 0.2141119221411192, + "loss": 0.21647384762763977, + "loss_ce": 0.002240448724478483, + "loss_iou": 0.5078125, + "loss_num": 0.04296875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 139785340, + "step": 814 + }, + { + "epoch": 0.21437495890050634, + "grad_norm": 10.75059655667213, + "learning_rate": 5e-06, + "loss": 0.2066, + "num_input_tokens_seen": 139957612, + "step": 815 + }, + { + "epoch": 0.21437495890050634, + "loss": 0.2657305598258972, + "loss_ce": 0.007979076355695724, + "loss_iou": 0.41796875, + "loss_num": 0.051513671875, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 139957612, + "step": 815 + }, + { + "epoch": 0.21463799565989347, + "grad_norm": 8.1554830347417, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 140129832, + "step": 816 + }, + { + "epoch": 0.21463799565989347, + "loss": 0.21090401709079742, + "loss_ce": 0.0022858483716845512, + "loss_iou": 0.6015625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 140129832, + "step": 816 + }, + { + "epoch": 0.2149010324192806, + "grad_norm": 6.39307582100075, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 140302204, + "step": 817 + }, + { + "epoch": 0.2149010324192806, + "loss": 0.12444409728050232, + "loss_ce": 0.0020075817592442036, + "loss_iou": 0.5859375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 140302204, + "step": 817 + }, + { + "epoch": 0.2151640691786677, + "grad_norm": 11.031920247903537, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 140470936, + "step": 818 + }, + { + "epoch": 0.2151640691786677, + "loss": 0.24400946497917175, + "loss_ce": 0.00112004519905895, + "loss_iou": 0.5078125, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 140470936, + "step": 818 + }, + { + "epoch": 0.21542710593805484, + "grad_norm": 5.897881271346391, + "learning_rate": 5e-06, + "loss": 0.194, + "num_input_tokens_seen": 140643120, + "step": 819 + }, + { + "epoch": 0.21542710593805484, + "loss": 0.12999695539474487, + "loss_ce": 0.001182260224595666, + "loss_iou": 0.5390625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 140643120, + "step": 819 + }, + { + "epoch": 0.21569014269744197, + "grad_norm": 13.442111711356437, + "learning_rate": 5e-06, + "loss": 0.1744, + "num_input_tokens_seen": 140815608, + "step": 820 + }, + { + "epoch": 0.21569014269744197, + "loss": 0.1816408634185791, + "loss_ce": 0.0014040416572242975, + "loss_iou": 0.345703125, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 140815608, + "step": 820 + }, + { + "epoch": 0.2159531794568291, + "grad_norm": 13.297750331398602, + "learning_rate": 5e-06, + "loss": 0.1926, + "num_input_tokens_seen": 140987824, + "step": 821 + }, + { + "epoch": 0.2159531794568291, + "loss": 0.29018890857696533, + "loss_ce": 0.002347112400457263, + "loss_iou": 0.478515625, + "loss_num": 0.0576171875, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 140987824, + "step": 821 + }, + { + "epoch": 0.21621621621621623, + "grad_norm": 18.74775436478718, + "learning_rate": 5e-06, + "loss": 0.2948, + "num_input_tokens_seen": 141160200, + "step": 822 + }, + { + "epoch": 0.21621621621621623, + "loss": 0.2370918244123459, + "loss_ce": 0.1260078400373459, + "loss_iou": 0.59765625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 141160200, + "step": 822 + }, + { + "epoch": 0.21647925297560333, + "grad_norm": 19.14528392541103, + "learning_rate": 5e-06, + "loss": 0.1977, + "num_input_tokens_seen": 141329928, + "step": 823 + }, + { + "epoch": 0.21647925297560333, + "loss": 0.1949683427810669, + "loss_ce": 0.0917884111404419, + "loss_iou": 0.474609375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 141329928, + "step": 823 + }, + { + "epoch": 0.21674228973499046, + "grad_norm": 6.8551193847587735, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 141502236, + "step": 824 + }, + { + "epoch": 0.21674228973499046, + "loss": 0.09739409387111664, + "loss_ce": 0.007184132467955351, + "loss_iou": 0.6640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 141502236, + "step": 824 + }, + { + "epoch": 0.2170053264943776, + "grad_norm": 7.934353513499139, + "learning_rate": 5e-06, + "loss": 0.2174, + "num_input_tokens_seen": 141674272, + "step": 825 + }, + { + "epoch": 0.2170053264943776, + "loss": 0.3033770024776459, + "loss_ce": 0.026491012424230576, + "loss_iou": 0.45703125, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 141674272, + "step": 825 + }, + { + "epoch": 0.21726836325376472, + "grad_norm": 6.296485584445855, + "learning_rate": 5e-06, + "loss": 0.2048, + "num_input_tokens_seen": 141846556, + "step": 826 + }, + { + "epoch": 0.21726836325376472, + "loss": 0.16873130202293396, + "loss_ce": 0.0013118635397404432, + "loss_iou": 0.59765625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 141846556, + "step": 826 + }, + { + "epoch": 0.21753140001315183, + "grad_norm": 10.428529158159186, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 142018940, + "step": 827 + }, + { + "epoch": 0.21753140001315183, + "loss": 0.15984642505645752, + "loss_ce": 0.0018874472007155418, + "loss_iou": 0.421875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 142018940, + "step": 827 + }, + { + "epoch": 0.21779443677253896, + "grad_norm": 7.702760290749295, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 142189520, + "step": 828 + }, + { + "epoch": 0.21779443677253896, + "loss": 0.17661917209625244, + "loss_ce": 0.007002475671470165, + "loss_iou": 0.5078125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 142189520, + "step": 828 + }, + { + "epoch": 0.2180574735319261, + "grad_norm": 5.154197949486892, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 142359752, + "step": 829 + }, + { + "epoch": 0.2180574735319261, + "loss": 0.11216248571872711, + "loss_ce": 0.0025128289125859737, + "loss_iou": 0.52734375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 142359752, + "step": 829 + }, + { + "epoch": 0.21832051029131322, + "grad_norm": 9.212654853971248, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 142530028, + "step": 830 + }, + { + "epoch": 0.21832051029131322, + "loss": 0.17883381247520447, + "loss_ce": 0.006104309111833572, + "loss_iou": 0.515625, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 142530028, + "step": 830 + }, + { + "epoch": 0.21858354705070032, + "grad_norm": 22.355687744069783, + "learning_rate": 5e-06, + "loss": 0.2118, + "num_input_tokens_seen": 142698896, + "step": 831 + }, + { + "epoch": 0.21858354705070032, + "loss": 0.2873075604438782, + "loss_ce": 0.00788860023021698, + "loss_iou": 0.392578125, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 142698896, + "step": 831 + }, + { + "epoch": 0.21884658381008745, + "grad_norm": 7.646461001716363, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 142869236, + "step": 832 + }, + { + "epoch": 0.21884658381008745, + "loss": 0.19723272323608398, + "loss_ce": 0.00063848658464849, + "loss_iou": 0.53515625, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 142869236, + "step": 832 + }, + { + "epoch": 0.21910962056947458, + "grad_norm": 5.709950173979812, + "learning_rate": 5e-06, + "loss": 0.1462, + "num_input_tokens_seen": 143041432, + "step": 833 + }, + { + "epoch": 0.21910962056947458, + "loss": 0.13923318684101105, + "loss_ce": 0.006512241438031197, + "loss_iou": 0.470703125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 143041432, + "step": 833 + }, + { + "epoch": 0.21937265732886171, + "grad_norm": 7.759034787872298, + "learning_rate": 5e-06, + "loss": 0.1411, + "num_input_tokens_seen": 143213276, + "step": 834 + }, + { + "epoch": 0.21937265732886171, + "loss": 0.13741275668144226, + "loss_ce": 0.00820133276283741, + "loss_iou": 0.4609375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 143213276, + "step": 834 + }, + { + "epoch": 0.21963569408824885, + "grad_norm": 14.10397350971688, + "learning_rate": 5e-06, + "loss": 0.1819, + "num_input_tokens_seen": 143385916, + "step": 835 + }, + { + "epoch": 0.21963569408824885, + "loss": 0.17109227180480957, + "loss_ce": 0.009959458373486996, + "loss_iou": 0.62109375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 143385916, + "step": 835 + }, + { + "epoch": 0.21989873084763595, + "grad_norm": 10.610026241323633, + "learning_rate": 5e-06, + "loss": 0.1959, + "num_input_tokens_seen": 143558108, + "step": 836 + }, + { + "epoch": 0.21989873084763595, + "loss": 0.1955014169216156, + "loss_ce": 0.006353452801704407, + "loss_iou": 0.58984375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 143558108, + "step": 836 + }, + { + "epoch": 0.22016176760702308, + "grad_norm": 6.884069616265665, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 143730560, + "step": 837 + }, + { + "epoch": 0.22016176760702308, + "loss": 0.1851910948753357, + "loss_ce": 0.001475287601351738, + "loss_iou": 0.50390625, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 143730560, + "step": 837 + }, + { + "epoch": 0.2204248043664102, + "grad_norm": 8.581569398296145, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 143901168, + "step": 838 + }, + { + "epoch": 0.2204248043664102, + "loss": 0.10049735009670258, + "loss_ce": 0.00043021421879529953, + "loss_iou": 0.55859375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 143901168, + "step": 838 + }, + { + "epoch": 0.22068784112579734, + "grad_norm": 8.473859363920706, + "learning_rate": 5e-06, + "loss": 0.1656, + "num_input_tokens_seen": 144073300, + "step": 839 + }, + { + "epoch": 0.22068784112579734, + "loss": 0.18692679703235626, + "loss_ce": 0.001440959284082055, + "loss_iou": 0.37109375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 144073300, + "step": 839 + }, + { + "epoch": 0.22095087788518444, + "grad_norm": 4.5516671382314495, + "learning_rate": 5e-06, + "loss": 0.1346, + "num_input_tokens_seen": 144245656, + "step": 840 + }, + { + "epoch": 0.22095087788518444, + "loss": 0.1278304159641266, + "loss_ce": 0.003227140521630645, + "loss_iou": 0.64453125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 144245656, + "step": 840 + }, + { + "epoch": 0.22121391464457157, + "grad_norm": 8.854170908003372, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 144414684, + "step": 841 + }, + { + "epoch": 0.22121391464457157, + "loss": 0.11698315292596817, + "loss_ce": 0.003732412587851286, + "loss_iou": 0.58984375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 144414684, + "step": 841 + }, + { + "epoch": 0.2214769514039587, + "grad_norm": 6.139198378486474, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 144586900, + "step": 842 + }, + { + "epoch": 0.2214769514039587, + "loss": 0.16039735078811646, + "loss_ce": 0.001461806707084179, + "loss_iou": 0.66796875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 144586900, + "step": 842 + }, + { + "epoch": 0.22173998816334584, + "grad_norm": 10.644206851478819, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 144759108, + "step": 843 + }, + { + "epoch": 0.22173998816334584, + "loss": 0.14353252947330475, + "loss_ce": 0.0032432209700345993, + "loss_iou": 0.4453125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 144759108, + "step": 843 + }, + { + "epoch": 0.22200302492273294, + "grad_norm": 6.739417345524847, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 144931444, + "step": 844 + }, + { + "epoch": 0.22200302492273294, + "loss": 0.09944656491279602, + "loss_ce": 0.0004170280881226063, + "loss_iou": 0.6328125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 144931444, + "step": 844 + }, + { + "epoch": 0.22226606168212007, + "grad_norm": 8.110959144365463, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 145102036, + "step": 845 + }, + { + "epoch": 0.22226606168212007, + "loss": 0.2286926507949829, + "loss_ce": 0.001336704008281231, + "loss_iou": 0.56640625, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 145102036, + "step": 845 + }, + { + "epoch": 0.2225290984415072, + "grad_norm": 10.034770268919976, + "learning_rate": 5e-06, + "loss": 0.2222, + "num_input_tokens_seen": 145274324, + "step": 846 + }, + { + "epoch": 0.2225290984415072, + "loss": 0.23924441635608673, + "loss_ce": 0.004075955133885145, + "loss_iou": 0.53515625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 145274324, + "step": 846 + }, + { + "epoch": 0.22279213520089433, + "grad_norm": 6.615833632469255, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 145446460, + "step": 847 + }, + { + "epoch": 0.22279213520089433, + "loss": 0.1300104260444641, + "loss_ce": 0.001287288498133421, + "loss_iou": 0.56640625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 145446460, + "step": 847 + }, + { + "epoch": 0.22305517196028146, + "grad_norm": 6.286344612415352, + "learning_rate": 5e-06, + "loss": 0.2284, + "num_input_tokens_seen": 145618532, + "step": 848 + }, + { + "epoch": 0.22305517196028146, + "loss": 0.18182075023651123, + "loss_ce": 0.0016144568799063563, + "loss_iou": NaN, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 145618532, + "step": 848 + }, + { + "epoch": 0.22331820871966857, + "grad_norm": 15.090592675837648, + "learning_rate": 5e-06, + "loss": 0.1974, + "num_input_tokens_seen": 145788852, + "step": 849 + }, + { + "epoch": 0.22331820871966857, + "loss": 0.20251962542533875, + "loss_ce": 0.0014087767340242863, + "loss_iou": 0.5859375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 145788852, + "step": 849 + }, + { + "epoch": 0.2235812454790557, + "grad_norm": 6.5100345965661806, + "learning_rate": 5e-06, + "loss": 0.1365, + "num_input_tokens_seen": 145961180, + "step": 850 + }, + { + "epoch": 0.2235812454790557, + "loss": 0.15530481934547424, + "loss_ce": 0.002533819992095232, + "loss_iou": 0.5390625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 145961180, + "step": 850 + }, + { + "epoch": 0.22384428223844283, + "grad_norm": 6.940039828670214, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 146133828, + "step": 851 + }, + { + "epoch": 0.22384428223844283, + "loss": 0.1814190298318863, + "loss_ce": 0.001731535536237061, + "loss_iou": 0.474609375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 146133828, + "step": 851 + }, + { + "epoch": 0.22410731899782996, + "grad_norm": 8.305849772963127, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 146306088, + "step": 852 + }, + { + "epoch": 0.22410731899782996, + "loss": 0.18216609954833984, + "loss_ce": 0.002936376491561532, + "loss_iou": 0.490234375, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 146306088, + "step": 852 + }, + { + "epoch": 0.22437035575721706, + "grad_norm": 7.934420093916033, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 146478248, + "step": 853 + }, + { + "epoch": 0.22437035575721706, + "loss": 0.1829485148191452, + "loss_ce": 0.0025896350853145123, + "loss_iou": 0.671875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 146478248, + "step": 853 + }, + { + "epoch": 0.2246333925166042, + "grad_norm": 8.916802424159366, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 146650532, + "step": 854 + }, + { + "epoch": 0.2246333925166042, + "loss": 0.13404083251953125, + "loss_ce": 0.0006179830525070429, + "loss_iou": 0.63671875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 146650532, + "step": 854 + }, + { + "epoch": 0.22489642927599132, + "grad_norm": 16.03094644250741, + "learning_rate": 5e-06, + "loss": 0.1819, + "num_input_tokens_seen": 146822816, + "step": 855 + }, + { + "epoch": 0.22489642927599132, + "loss": 0.19942086935043335, + "loss_ce": 0.007404262199997902, + "loss_iou": 0.41015625, + "loss_num": 0.038330078125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 146822816, + "step": 855 + }, + { + "epoch": 0.22515946603537845, + "grad_norm": 10.96099831988347, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 146994984, + "step": 856 + }, + { + "epoch": 0.22515946603537845, + "loss": 0.1878020018339157, + "loss_ce": 0.0012785641010850668, + "loss_iou": 0.546875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 146994984, + "step": 856 + }, + { + "epoch": 0.22542250279476556, + "grad_norm": 5.009376224527154, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 147167200, + "step": 857 + }, + { + "epoch": 0.22542250279476556, + "loss": 0.16484007239341736, + "loss_ce": 0.005507797468453646, + "loss_iou": 0.64453125, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 147167200, + "step": 857 + }, + { + "epoch": 0.2256855395541527, + "grad_norm": 6.031288325368203, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 147337356, + "step": 858 + }, + { + "epoch": 0.2256855395541527, + "loss": 0.14030741155147552, + "loss_ce": 0.0010862206108868122, + "loss_iou": 0.71484375, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 147337356, + "step": 858 + }, + { + "epoch": 0.22594857631353982, + "grad_norm": 8.953664182124317, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 147509464, + "step": 859 + }, + { + "epoch": 0.22594857631353982, + "loss": 0.19507214426994324, + "loss_ce": 0.0016822540201246738, + "loss_iou": 0.359375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 147509464, + "step": 859 + }, + { + "epoch": 0.22621161307292695, + "grad_norm": 5.535666138038005, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 147681864, + "step": 860 + }, + { + "epoch": 0.22621161307292695, + "loss": 0.194808691740036, + "loss_ce": 0.002822606358677149, + "loss_iou": 0.5, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 147681864, + "step": 860 + }, + { + "epoch": 0.22647464983231408, + "grad_norm": 7.1540218345582, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 147854184, + "step": 861 + }, + { + "epoch": 0.22647464983231408, + "loss": 0.18444868922233582, + "loss_ce": 0.0037846285849809647, + "loss_iou": 0.609375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 147854184, + "step": 861 + }, + { + "epoch": 0.22673768659170118, + "grad_norm": 7.954229564657017, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 148026492, + "step": 862 + }, + { + "epoch": 0.22673768659170118, + "loss": 0.1205034852027893, + "loss_ce": 0.0003863019519485533, + "loss_iou": 0.7421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 148026492, + "step": 862 + }, + { + "epoch": 0.2270007233510883, + "grad_norm": 10.790250508426157, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 148198492, + "step": 863 + }, + { + "epoch": 0.2270007233510883, + "loss": 0.08790126442909241, + "loss_ce": 0.000956688541918993, + "loss_iou": 0.52734375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 148198492, + "step": 863 + }, + { + "epoch": 0.22726376011047544, + "grad_norm": 7.413582271316403, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 148370744, + "step": 864 + }, + { + "epoch": 0.22726376011047544, + "loss": 0.11584703624248505, + "loss_ce": 0.0007652430795133114, + "loss_iou": 0.625, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 148370744, + "step": 864 + }, + { + "epoch": 0.22752679686986257, + "grad_norm": 6.505762518470224, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 148541368, + "step": 865 + }, + { + "epoch": 0.22752679686986257, + "loss": 0.12583567202091217, + "loss_ce": 0.0008661894826218486, + "loss_iou": 0.62109375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 148541368, + "step": 865 + }, + { + "epoch": 0.22778983362924968, + "grad_norm": 11.31233359994311, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 148713652, + "step": 866 + }, + { + "epoch": 0.22778983362924968, + "loss": 0.09625812619924545, + "loss_ce": 0.0005244807107374072, + "loss_iou": 0.73828125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 148713652, + "step": 866 + }, + { + "epoch": 0.2280528703886368, + "grad_norm": 8.070061922275455, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 148885724, + "step": 867 + }, + { + "epoch": 0.2280528703886368, + "loss": 0.12818799912929535, + "loss_ce": 0.003035409841686487, + "loss_iou": 0.443359375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 148885724, + "step": 867 + }, + { + "epoch": 0.22831590714802394, + "grad_norm": 8.83029409823491, + "learning_rate": 5e-06, + "loss": 0.2104, + "num_input_tokens_seen": 149057764, + "step": 868 + }, + { + "epoch": 0.22831590714802394, + "loss": 0.2020527571439743, + "loss_ce": 0.00414624810218811, + "loss_iou": 0.51953125, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 149057764, + "step": 868 + }, + { + "epoch": 0.22857894390741107, + "grad_norm": 5.578690860970403, + "learning_rate": 5e-06, + "loss": 0.1492, + "num_input_tokens_seen": 149230132, + "step": 869 + }, + { + "epoch": 0.22857894390741107, + "loss": 0.16226467490196228, + "loss_ce": 0.0007046046666800976, + "loss_iou": 0.66015625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 149230132, + "step": 869 + }, + { + "epoch": 0.22884198066679817, + "grad_norm": 8.388877746304392, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 149402312, + "step": 870 + }, + { + "epoch": 0.22884198066679817, + "loss": 0.13199341297149658, + "loss_ce": 0.001866456470452249, + "loss_iou": 0.51953125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 149402312, + "step": 870 + }, + { + "epoch": 0.2291050174261853, + "grad_norm": 13.402550744349123, + "learning_rate": 5e-06, + "loss": 0.1888, + "num_input_tokens_seen": 149572632, + "step": 871 + }, + { + "epoch": 0.2291050174261853, + "loss": 0.20936883985996246, + "loss_ce": 0.004016047343611717, + "loss_iou": 0.470703125, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 149572632, + "step": 871 + }, + { + "epoch": 0.22936805418557243, + "grad_norm": 5.899366870528114, + "learning_rate": 5e-06, + "loss": 0.1651, + "num_input_tokens_seen": 149744908, + "step": 872 + }, + { + "epoch": 0.22936805418557243, + "loss": 0.1176171749830246, + "loss_ce": 0.001223139464855194, + "loss_iou": 0.51953125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 149744908, + "step": 872 + }, + { + "epoch": 0.22963109094495956, + "grad_norm": 8.241998846381511, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 149917100, + "step": 873 + }, + { + "epoch": 0.22963109094495956, + "loss": 0.1720806509256363, + "loss_ce": 0.004966393578797579, + "loss_iou": 0.546875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 149917100, + "step": 873 + }, + { + "epoch": 0.2298941277043467, + "grad_norm": 9.153818862978659, + "learning_rate": 5e-06, + "loss": 0.1555, + "num_input_tokens_seen": 150087628, + "step": 874 + }, + { + "epoch": 0.2298941277043467, + "loss": 0.14027956128120422, + "loss_ce": 0.002584239235147834, + "loss_iou": 0.5625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 150087628, + "step": 874 + }, + { + "epoch": 0.2301571644637338, + "grad_norm": 25.107851811290338, + "learning_rate": 5e-06, + "loss": 0.1805, + "num_input_tokens_seen": 150258428, + "step": 875 + }, + { + "epoch": 0.2301571644637338, + "loss": 0.17914238572120667, + "loss_ce": 0.0014690514653921127, + "loss_iou": 0.5625, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 150258428, + "step": 875 + }, + { + "epoch": 0.23042020122312093, + "grad_norm": 6.938403780407342, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 150430692, + "step": 876 + }, + { + "epoch": 0.23042020122312093, + "loss": 0.13967271149158478, + "loss_ce": 0.002038437407463789, + "loss_iou": 0.392578125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 150430692, + "step": 876 + }, + { + "epoch": 0.23068323798250806, + "grad_norm": 8.594672662971634, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 150603108, + "step": 877 + }, + { + "epoch": 0.23068323798250806, + "loss": 0.3066813349723816, + "loss_ce": 0.0005289965192787349, + "loss_iou": 0.44921875, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 150603108, + "step": 877 + }, + { + "epoch": 0.2309462747418952, + "grad_norm": 5.613988390531258, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 150775124, + "step": 878 + }, + { + "epoch": 0.2309462747418952, + "loss": 0.16111034154891968, + "loss_ce": 0.008644518442451954, + "loss_iou": 0.640625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 150775124, + "step": 878 + }, + { + "epoch": 0.2312093115012823, + "grad_norm": 34.35737633666629, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 150947036, + "step": 879 + }, + { + "epoch": 0.2312093115012823, + "loss": 0.15406344830989838, + "loss_ce": 0.0015213302103802562, + "loss_iou": 0.439453125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 150947036, + "step": 879 + }, + { + "epoch": 0.23147234826066942, + "grad_norm": 6.1651043542261466, + "learning_rate": 5e-06, + "loss": 0.1981, + "num_input_tokens_seen": 151119388, + "step": 880 + }, + { + "epoch": 0.23147234826066942, + "loss": 0.29976093769073486, + "loss_ce": 0.00825704075396061, + "loss_iou": 0.48828125, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 151119388, + "step": 880 + }, + { + "epoch": 0.23173538502005656, + "grad_norm": 8.982832830215536, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 151290092, + "step": 881 + }, + { + "epoch": 0.23173538502005656, + "loss": 0.18080484867095947, + "loss_ce": 0.0034977139439433813, + "loss_iou": 0.5078125, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 151290092, + "step": 881 + }, + { + "epoch": 0.2319984217794437, + "grad_norm": 13.943571194131867, + "learning_rate": 5e-06, + "loss": 0.214, + "num_input_tokens_seen": 151462268, + "step": 882 + }, + { + "epoch": 0.2319984217794437, + "loss": 0.16014625132083893, + "loss_ce": 0.004170912317931652, + "loss_iou": 0.625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 151462268, + "step": 882 + }, + { + "epoch": 0.2322614585388308, + "grad_norm": 7.043443131491897, + "learning_rate": 5e-06, + "loss": 0.1524, + "num_input_tokens_seen": 151634180, + "step": 883 + }, + { + "epoch": 0.2322614585388308, + "loss": 0.12871429324150085, + "loss_ce": 0.0008456383948214352, + "loss_iou": 0.44921875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 151634180, + "step": 883 + }, + { + "epoch": 0.23252449529821792, + "grad_norm": 6.396513380644998, + "learning_rate": 5e-06, + "loss": 0.1833, + "num_input_tokens_seen": 151806384, + "step": 884 + }, + { + "epoch": 0.23252449529821792, + "loss": 0.18081963062286377, + "loss_ce": 0.002169727347791195, + "loss_iou": 0.421875, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 151806384, + "step": 884 + }, + { + "epoch": 0.23278753205760505, + "grad_norm": 13.149227804701054, + "learning_rate": 5e-06, + "loss": 0.152, + "num_input_tokens_seen": 151978708, + "step": 885 + }, + { + "epoch": 0.23278753205760505, + "loss": 0.1582297682762146, + "loss_ce": 0.007228789385408163, + "loss_iou": 0.546875, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 151978708, + "step": 885 + }, + { + "epoch": 0.23305056881699218, + "grad_norm": 6.784444096578918, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 152150848, + "step": 886 + }, + { + "epoch": 0.23305056881699218, + "loss": 0.19164127111434937, + "loss_ce": 0.0013946772087365389, + "loss_iou": NaN, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 152150848, + "step": 886 + }, + { + "epoch": 0.2333136055763793, + "grad_norm": 6.295673229022739, + "learning_rate": 5e-06, + "loss": 0.1772, + "num_input_tokens_seen": 152323052, + "step": 887 + }, + { + "epoch": 0.2333136055763793, + "loss": 0.22069396078586578, + "loss_ce": 0.0015167115489020944, + "loss_iou": 0.478515625, + "loss_num": 0.0439453125, + "loss_xval": 0.21875, + "num_input_tokens_seen": 152323052, + "step": 887 + }, + { + "epoch": 0.23357664233576642, + "grad_norm": 11.781517950000717, + "learning_rate": 5e-06, + "loss": 0.1798, + "num_input_tokens_seen": 152493800, + "step": 888 + }, + { + "epoch": 0.23357664233576642, + "loss": 0.1682368516921997, + "loss_ce": 0.0009394832304678857, + "loss_iou": 0.54296875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 152493800, + "step": 888 + }, + { + "epoch": 0.23383967909515355, + "grad_norm": 11.368208703500017, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 152665776, + "step": 889 + }, + { + "epoch": 0.23383967909515355, + "loss": 0.2839386761188507, + "loss_ce": 0.003970403224229813, + "loss_iou": 0.51171875, + "loss_num": 0.05615234375, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 152665776, + "step": 889 + }, + { + "epoch": 0.23410271585454068, + "grad_norm": 8.093455275529028, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 152834764, + "step": 890 + }, + { + "epoch": 0.23410271585454068, + "loss": 0.1241624653339386, + "loss_ce": 0.004472525790333748, + "loss_iou": 0.59765625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 152834764, + "step": 890 + }, + { + "epoch": 0.2343657526139278, + "grad_norm": 12.52857780858346, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 153007304, + "step": 891 + }, + { + "epoch": 0.2343657526139278, + "loss": 0.21788766980171204, + "loss_ce": 0.002006314927712083, + "loss_iou": 0.59765625, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 153007304, + "step": 891 + }, + { + "epoch": 0.2346287893733149, + "grad_norm": 7.50091412461157, + "learning_rate": 5e-06, + "loss": 0.1804, + "num_input_tokens_seen": 153179168, + "step": 892 + }, + { + "epoch": 0.2346287893733149, + "loss": 0.17867043614387512, + "loss_ce": 0.0008750315755605698, + "loss_iou": NaN, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 153179168, + "step": 892 + }, + { + "epoch": 0.23489182613270204, + "grad_norm": 6.297998473336444, + "learning_rate": 5e-06, + "loss": 0.1679, + "num_input_tokens_seen": 153351220, + "step": 893 + }, + { + "epoch": 0.23489182613270204, + "loss": 0.13165241479873657, + "loss_ce": 0.001311830012127757, + "loss_iou": 0.380859375, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 153351220, + "step": 893 + }, + { + "epoch": 0.23515486289208917, + "grad_norm": 7.909655629561448, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 153523232, + "step": 894 + }, + { + "epoch": 0.23515486289208917, + "loss": 0.13778507709503174, + "loss_ce": 0.002287032548338175, + "loss_iou": 0.51953125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 153523232, + "step": 894 + }, + { + "epoch": 0.2354178996514763, + "grad_norm": 5.883815728394397, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 153692208, + "step": 895 + }, + { + "epoch": 0.2354178996514763, + "loss": 0.1446894109249115, + "loss_ce": 0.001439890475012362, + "loss_iou": 0.462890625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 153692208, + "step": 895 + }, + { + "epoch": 0.2356809364108634, + "grad_norm": 15.735436674991021, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 153862388, + "step": 896 + }, + { + "epoch": 0.2356809364108634, + "loss": 0.16044028103351593, + "loss_ce": 0.00043661610106937587, + "loss_iou": 0.6328125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 153862388, + "step": 896 + }, + { + "epoch": 0.23594397317025054, + "grad_norm": 5.835513921908954, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 154034480, + "step": 897 + }, + { + "epoch": 0.23594397317025054, + "loss": 0.14927011728286743, + "loss_ce": 0.0002833124599419534, + "loss_iou": 0.515625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 154034480, + "step": 897 + }, + { + "epoch": 0.23620700992963767, + "grad_norm": 4.347875981694168, + "learning_rate": 5e-06, + "loss": 0.1672, + "num_input_tokens_seen": 154203000, + "step": 898 + }, + { + "epoch": 0.23620700992963767, + "loss": 0.1722668707370758, + "loss_ce": 0.0003613463486544788, + "loss_iou": 0.5390625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 154203000, + "step": 898 + }, + { + "epoch": 0.2364700466890248, + "grad_norm": 6.555211118822418, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 154375292, + "step": 899 + }, + { + "epoch": 0.2364700466890248, + "loss": 0.12109389901161194, + "loss_ce": 0.001983799273148179, + "loss_iou": 0.53125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 154375292, + "step": 899 + }, + { + "epoch": 0.2367330834484119, + "grad_norm": 4.963219405700268, + "learning_rate": 5e-06, + "loss": 0.1507, + "num_input_tokens_seen": 154547472, + "step": 900 + }, + { + "epoch": 0.2367330834484119, + "loss": 0.1948363184928894, + "loss_ce": 0.001843146630562842, + "loss_iou": 0.5625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 154547472, + "step": 900 + }, + { + "epoch": 0.23699612020779903, + "grad_norm": 9.476812377662082, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 154719756, + "step": 901 + }, + { + "epoch": 0.23699612020779903, + "loss": 0.1722353994846344, + "loss_ce": 0.0028628362342715263, + "loss_iou": 0.61328125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 154719756, + "step": 901 + }, + { + "epoch": 0.23725915696718616, + "grad_norm": 8.867901615298983, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 154891816, + "step": 902 + }, + { + "epoch": 0.23725915696718616, + "loss": 0.13942725956439972, + "loss_ce": 0.0014267791993916035, + "loss_iou": 0.71484375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 154891816, + "step": 902 + }, + { + "epoch": 0.2375221937265733, + "grad_norm": 7.06026317137913, + "learning_rate": 5e-06, + "loss": 0.1247, + "num_input_tokens_seen": 155064164, + "step": 903 + }, + { + "epoch": 0.2375221937265733, + "loss": 0.13094615936279297, + "loss_ce": 0.000575068814214319, + "loss_iou": 0.47265625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 155064164, + "step": 903 + }, + { + "epoch": 0.23778523048596042, + "grad_norm": 6.065414379643311, + "learning_rate": 5e-06, + "loss": 0.1882, + "num_input_tokens_seen": 155236136, + "step": 904 + }, + { + "epoch": 0.23778523048596042, + "loss": 0.17110927402973175, + "loss_ce": 0.0006380859995260835, + "loss_iou": 0.7421875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 155236136, + "step": 904 + }, + { + "epoch": 0.23804826724534753, + "grad_norm": 6.4154554676892275, + "learning_rate": 5e-06, + "loss": 0.1664, + "num_input_tokens_seen": 155408400, + "step": 905 + }, + { + "epoch": 0.23804826724534753, + "loss": 0.18800213932991028, + "loss_ce": 0.005415464285761118, + "loss_iou": 0.515625, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 155408400, + "step": 905 + }, + { + "epoch": 0.23831130400473466, + "grad_norm": 6.061588736644807, + "learning_rate": 5e-06, + "loss": 0.1656, + "num_input_tokens_seen": 155578772, + "step": 906 + }, + { + "epoch": 0.23831130400473466, + "loss": 0.11921393871307373, + "loss_ce": 0.0009278038050979376, + "loss_iou": 0.609375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 155578772, + "step": 906 + }, + { + "epoch": 0.2385743407641218, + "grad_norm": 9.748504628242028, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 155751088, + "step": 907 + }, + { + "epoch": 0.2385743407641218, + "loss": 0.13661867380142212, + "loss_ce": 0.0004492364823818207, + "loss_iou": 0.62890625, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 155751088, + "step": 907 + }, + { + "epoch": 0.23883737752350892, + "grad_norm": 11.132080080363805, + "learning_rate": 5e-06, + "loss": 0.2072, + "num_input_tokens_seen": 155923228, + "step": 908 + }, + { + "epoch": 0.23883737752350892, + "loss": 0.2608073353767395, + "loss_ce": 0.006229718215763569, + "loss_iou": 0.447265625, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 155923228, + "step": 908 + }, + { + "epoch": 0.23910041428289602, + "grad_norm": 6.802731098529186, + "learning_rate": 5e-06, + "loss": 0.1071, + "num_input_tokens_seen": 156095144, + "step": 909 + }, + { + "epoch": 0.23910041428289602, + "loss": 0.12534289062023163, + "loss_ce": 0.0022044701036065817, + "loss_iou": 0.67578125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 156095144, + "step": 909 + }, + { + "epoch": 0.23936345104228315, + "grad_norm": 5.86214467113572, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 156267348, + "step": 910 + }, + { + "epoch": 0.23936345104228315, + "loss": 0.1707063615322113, + "loss_ce": 0.0013948287814855576, + "loss_iou": 0.5859375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 156267348, + "step": 910 + }, + { + "epoch": 0.23962648780167028, + "grad_norm": 5.562832964795879, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 156439364, + "step": 911 + }, + { + "epoch": 0.23962648780167028, + "loss": 0.12454073876142502, + "loss_ce": 0.0003036795533262193, + "loss_iou": 0.578125, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 156439364, + "step": 911 + }, + { + "epoch": 0.23988952456105742, + "grad_norm": 13.223359809657884, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 156609552, + "step": 912 + }, + { + "epoch": 0.23988952456105742, + "loss": 0.15059047937393188, + "loss_ce": 0.00239713117480278, + "loss_iou": 0.59375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 156609552, + "step": 912 + }, + { + "epoch": 0.24015256132044452, + "grad_norm": 6.777584209416996, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 156781920, + "step": 913 + }, + { + "epoch": 0.24015256132044452, + "loss": 0.13880465924739838, + "loss_ce": 0.0020859187934547663, + "loss_iou": 0.6015625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 156781920, + "step": 913 + }, + { + "epoch": 0.24041559807983165, + "grad_norm": 11.055320488873154, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 156954340, + "step": 914 + }, + { + "epoch": 0.24041559807983165, + "loss": 0.2452932596206665, + "loss_ce": 0.002312319353222847, + "loss_iou": 0.4765625, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 156954340, + "step": 914 + }, + { + "epoch": 0.24067863483921878, + "grad_norm": 4.458423775022664, + "learning_rate": 5e-06, + "loss": 0.1784, + "num_input_tokens_seen": 157126652, + "step": 915 + }, + { + "epoch": 0.24067863483921878, + "loss": 0.13856951892375946, + "loss_ce": 0.0020949181634932756, + "loss_iou": 0.416015625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 157126652, + "step": 915 + }, + { + "epoch": 0.2409416715986059, + "grad_norm": 5.681838115677692, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 157298968, + "step": 916 + }, + { + "epoch": 0.2409416715986059, + "loss": 0.1539350152015686, + "loss_ce": 0.00638251006603241, + "loss_iou": 0.412109375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 157298968, + "step": 916 + }, + { + "epoch": 0.24120470835799304, + "grad_norm": 5.180460717860643, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 157471004, + "step": 917 + }, + { + "epoch": 0.24120470835799304, + "loss": 0.10441954433917999, + "loss_ce": 0.0008428902365267277, + "loss_iou": 0.43359375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 157471004, + "step": 917 + }, + { + "epoch": 0.24146774511738014, + "grad_norm": 10.247033875410487, + "learning_rate": 5e-06, + "loss": 0.161, + "num_input_tokens_seen": 157643232, + "step": 918 + }, + { + "epoch": 0.24146774511738014, + "loss": 0.18021947145462036, + "loss_ce": 0.002607175149023533, + "loss_iou": 0.59375, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 157643232, + "step": 918 + }, + { + "epoch": 0.24173078187676728, + "grad_norm": 6.8962184523908725, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 157815292, + "step": 919 + }, + { + "epoch": 0.24173078187676728, + "loss": 0.1455521434545517, + "loss_ce": 0.0008988262270577252, + "loss_iou": 0.66015625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 157815292, + "step": 919 + }, + { + "epoch": 0.2419938186361544, + "grad_norm": 6.7883399023716775, + "learning_rate": 5e-06, + "loss": 0.1916, + "num_input_tokens_seen": 157987716, + "step": 920 + }, + { + "epoch": 0.2419938186361544, + "loss": 0.19279745221138, + "loss_ce": 0.0026729374658316374, + "loss_iou": 0.65234375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 157987716, + "step": 920 + }, + { + "epoch": 0.24225685539554154, + "grad_norm": 7.426866121442803, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 158160224, + "step": 921 + }, + { + "epoch": 0.24225685539554154, + "loss": 0.1912505030632019, + "loss_ce": 0.001980474451556802, + "loss_iou": 0.470703125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 158160224, + "step": 921 + }, + { + "epoch": 0.24251989215492864, + "grad_norm": 7.602353481412061, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 158332464, + "step": 922 + }, + { + "epoch": 0.24251989215492864, + "loss": 0.1421346664428711, + "loss_ce": 0.003951081074774265, + "loss_iou": 0.494140625, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 158332464, + "step": 922 + }, + { + "epoch": 0.24278292891431577, + "grad_norm": 9.798997838296735, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 158504560, + "step": 923 + }, + { + "epoch": 0.24278292891431577, + "loss": 0.15015605092048645, + "loss_ce": 0.0013523304369300604, + "loss_iou": 0.44140625, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 158504560, + "step": 923 + }, + { + "epoch": 0.2430459656737029, + "grad_norm": 9.276091083653826, + "learning_rate": 5e-06, + "loss": 0.1582, + "num_input_tokens_seen": 158676676, + "step": 924 + }, + { + "epoch": 0.2430459656737029, + "loss": 0.13057658076286316, + "loss_ce": 0.0024943118914961815, + "loss_iou": 0.59765625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 158676676, + "step": 924 + }, + { + "epoch": 0.24330900243309003, + "grad_norm": 7.79676627099927, + "learning_rate": 5e-06, + "loss": 0.1547, + "num_input_tokens_seen": 158849084, + "step": 925 + }, + { + "epoch": 0.24330900243309003, + "loss": 0.1701420098543167, + "loss_ce": 0.004248456098139286, + "loss_iou": 0.447265625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 158849084, + "step": 925 + }, + { + "epoch": 0.24357203919247714, + "grad_norm": 7.627935616299721, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 159019648, + "step": 926 + }, + { + "epoch": 0.24357203919247714, + "loss": 0.09866867959499359, + "loss_ce": 0.002080547623336315, + "loss_iou": 0.59765625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 159019648, + "step": 926 + }, + { + "epoch": 0.24383507595186427, + "grad_norm": 7.674777640749283, + "learning_rate": 5e-06, + "loss": 0.1837, + "num_input_tokens_seen": 159191744, + "step": 927 + }, + { + "epoch": 0.24383507595186427, + "loss": 0.09513729810714722, + "loss_ce": 0.0016314350068569183, + "loss_iou": 0.451171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 159191744, + "step": 927 + }, + { + "epoch": 0.2440981127112514, + "grad_norm": 8.029174499906352, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 159364172, + "step": 928 + }, + { + "epoch": 0.2440981127112514, + "loss": 0.2412642240524292, + "loss_ce": 0.0012739873491227627, + "loss_iou": 0.48046875, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 159364172, + "step": 928 + }, + { + "epoch": 0.24436114947063853, + "grad_norm": 3.8948353119563537, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 159534608, + "step": 929 + }, + { + "epoch": 0.24436114947063853, + "loss": 0.11882825195789337, + "loss_ce": 0.0011219491716474295, + "loss_iou": 0.58203125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 159534608, + "step": 929 + }, + { + "epoch": 0.24462418623002566, + "grad_norm": 12.725190215625348, + "learning_rate": 5e-06, + "loss": 0.1728, + "num_input_tokens_seen": 159705228, + "step": 930 + }, + { + "epoch": 0.24462418623002566, + "loss": 0.15004633367061615, + "loss_ce": 0.000662794045638293, + "loss_iou": 0.451171875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 159705228, + "step": 930 + }, + { + "epoch": 0.24488722298941276, + "grad_norm": 8.38127835969171, + "learning_rate": 5e-06, + "loss": 0.1789, + "num_input_tokens_seen": 159875324, + "step": 931 + }, + { + "epoch": 0.24488722298941276, + "loss": 0.1881195604801178, + "loss_ce": 0.0022675050422549248, + "loss_iou": 0.40234375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 159875324, + "step": 931 + }, + { + "epoch": 0.2451502597487999, + "grad_norm": 8.09047633001587, + "learning_rate": 5e-06, + "loss": 0.1537, + "num_input_tokens_seen": 160045932, + "step": 932 + }, + { + "epoch": 0.2451502597487999, + "loss": 0.23839473724365234, + "loss_ce": 0.0010290088830515742, + "loss_iou": 0.494140625, + "loss_num": 0.047607421875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 160045932, + "step": 932 + }, + { + "epoch": 0.24541329650818702, + "grad_norm": 14.720243529942747, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 160218252, + "step": 933 + }, + { + "epoch": 0.24541329650818702, + "loss": 0.12284128367900848, + "loss_ce": 0.0022663308773189783, + "loss_iou": 0.4296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 160218252, + "step": 933 + }, + { + "epoch": 0.24567633326757415, + "grad_norm": 6.031479541696206, + "learning_rate": 5e-06, + "loss": 0.1669, + "num_input_tokens_seen": 160390560, + "step": 934 + }, + { + "epoch": 0.24567633326757415, + "loss": 0.1301022320985794, + "loss_ce": 0.0005245967186056077, + "loss_iou": 0.70703125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 160390560, + "step": 934 + }, + { + "epoch": 0.24593937002696126, + "grad_norm": 6.383554643597018, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 160562528, + "step": 935 + }, + { + "epoch": 0.24593937002696126, + "loss": 0.17644909024238586, + "loss_ce": 0.0030787207651883364, + "loss_iou": 0.375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 160562528, + "step": 935 + }, + { + "epoch": 0.2462024067863484, + "grad_norm": 20.862114065213355, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 160734384, + "step": 936 + }, + { + "epoch": 0.2462024067863484, + "loss": 0.11419504880905151, + "loss_ce": 0.0004255172680132091, + "loss_iou": 0.466796875, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 160734384, + "step": 936 + }, + { + "epoch": 0.24646544354573552, + "grad_norm": 8.0386150468435, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 160906448, + "step": 937 + }, + { + "epoch": 0.24646544354573552, + "loss": 0.14023496210575104, + "loss_ce": 0.0016241249395534396, + "loss_iou": 0.388671875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 160906448, + "step": 937 + }, + { + "epoch": 0.24672848030512265, + "grad_norm": 7.276863008633557, + "learning_rate": 5e-06, + "loss": 0.1694, + "num_input_tokens_seen": 161078828, + "step": 938 + }, + { + "epoch": 0.24672848030512265, + "loss": 0.21741217374801636, + "loss_ce": 0.004765682853758335, + "loss_iou": 0.7265625, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 161078828, + "step": 938 + }, + { + "epoch": 0.24699151706450975, + "grad_norm": 10.285417073408015, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 161249228, + "step": 939 + }, + { + "epoch": 0.24699151706450975, + "loss": 0.2533302903175354, + "loss_ce": 0.004825636278837919, + "loss_iou": 0.3671875, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 161249228, + "step": 939 + }, + { + "epoch": 0.24725455382389688, + "grad_norm": 7.503087196122763, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 161421624, + "step": 940 + }, + { + "epoch": 0.24725455382389688, + "loss": 0.1497778743505478, + "loss_ce": 0.003537639044225216, + "loss_iou": 0.71484375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 161421624, + "step": 940 + }, + { + "epoch": 0.24751759058328401, + "grad_norm": 6.078271645066026, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 161593500, + "step": 941 + }, + { + "epoch": 0.24751759058328401, + "loss": 0.15894815325737, + "loss_ce": 0.0006839816924184561, + "loss_iou": 0.6015625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 161593500, + "step": 941 + }, + { + "epoch": 0.24778062734267114, + "grad_norm": 16.470075984430842, + "learning_rate": 5e-06, + "loss": 0.1725, + "num_input_tokens_seen": 161764108, + "step": 942 + }, + { + "epoch": 0.24778062734267114, + "loss": 0.2659192383289337, + "loss_ce": 0.003468066919595003, + "loss_iou": 0.5546875, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 161764108, + "step": 942 + }, + { + "epoch": 0.24804366410205828, + "grad_norm": 18.319111638013048, + "learning_rate": 5e-06, + "loss": 0.1876, + "num_input_tokens_seen": 161936252, + "step": 943 + }, + { + "epoch": 0.24804366410205828, + "loss": 0.23192915320396423, + "loss_ce": 0.004817330744117498, + "loss_iou": 0.443359375, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 161936252, + "step": 943 + }, + { + "epoch": 0.24830670086144538, + "grad_norm": 11.296239885199183, + "learning_rate": 5e-06, + "loss": 0.1547, + "num_input_tokens_seen": 162108564, + "step": 944 + }, + { + "epoch": 0.24830670086144538, + "loss": 0.13432571291923523, + "loss_ce": 0.002459259470924735, + "loss_iou": 0.7578125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 162108564, + "step": 944 + }, + { + "epoch": 0.2485697376208325, + "grad_norm": 5.511055668564898, + "learning_rate": 5e-06, + "loss": 0.1393, + "num_input_tokens_seen": 162280792, + "step": 945 + }, + { + "epoch": 0.2485697376208325, + "loss": 0.13674385845661163, + "loss_ce": 0.0015509906224906445, + "loss_iou": 0.61328125, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 162280792, + "step": 945 + }, + { + "epoch": 0.24883277438021964, + "grad_norm": 10.355301687463253, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 162449528, + "step": 946 + }, + { + "epoch": 0.24883277438021964, + "loss": 0.2201877236366272, + "loss_ce": 0.0006442689918912947, + "loss_iou": 0.33984375, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 162449528, + "step": 946 + }, + { + "epoch": 0.24909581113960677, + "grad_norm": 11.25377814108321, + "learning_rate": 5e-06, + "loss": 0.1493, + "num_input_tokens_seen": 162617256, + "step": 947 + }, + { + "epoch": 0.24909581113960677, + "loss": 0.188047856092453, + "loss_ce": 0.0031418518628925085, + "loss_iou": 0.58203125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 162617256, + "step": 947 + }, + { + "epoch": 0.24935884789899387, + "grad_norm": 12.235451694711202, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 162789400, + "step": 948 + }, + { + "epoch": 0.24935884789899387, + "loss": 0.15606345236301422, + "loss_ce": 0.005215056240558624, + "loss_iou": 0.66796875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 162789400, + "step": 948 + }, + { + "epoch": 0.249621884658381, + "grad_norm": 7.1804139994510034, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 162961432, + "step": 949 + }, + { + "epoch": 0.249621884658381, + "loss": 0.10785458981990814, + "loss_ce": 0.002507905475795269, + "loss_iou": 0.56640625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 162961432, + "step": 949 + }, + { + "epoch": 0.24988492141776814, + "grad_norm": 14.388122342778688, + "learning_rate": 5e-06, + "loss": 0.1883, + "num_input_tokens_seen": 163133524, + "step": 950 + }, + { + "epoch": 0.24988492141776814, + "loss": 0.16639769077301025, + "loss_ce": 0.001511220121756196, + "loss_iou": 0.60546875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 163133524, + "step": 950 + }, + { + "epoch": 0.25014795817715524, + "grad_norm": 10.331941667190199, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 163305816, + "step": 951 + }, + { + "epoch": 0.25014795817715524, + "loss": 0.1127045676112175, + "loss_ce": 0.0008271271362900734, + "loss_iou": 0.56640625, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 163305816, + "step": 951 + }, + { + "epoch": 0.2504109949365424, + "grad_norm": 6.829599375925184, + "learning_rate": 5e-06, + "loss": 0.154, + "num_input_tokens_seen": 163478368, + "step": 952 + }, + { + "epoch": 0.2504109949365424, + "loss": 0.1521347612142563, + "loss_ce": 0.0029038134962320328, + "loss_iou": 0.54296875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 163478368, + "step": 952 + }, + { + "epoch": 0.2506740316959295, + "grad_norm": 5.382923356179618, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 163650500, + "step": 953 + }, + { + "epoch": 0.2506740316959295, + "loss": 0.14390447735786438, + "loss_ce": 0.00224187970161438, + "loss_iou": 0.62890625, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 163650500, + "step": 953 + }, + { + "epoch": 0.2509370684553166, + "grad_norm": 4.693926744893699, + "learning_rate": 5e-06, + "loss": 0.1612, + "num_input_tokens_seen": 163822252, + "step": 954 + }, + { + "epoch": 0.2509370684553166, + "loss": 0.1426747441291809, + "loss_ce": 0.001286811544559896, + "loss_iou": 0.41015625, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 163822252, + "step": 954 + }, + { + "epoch": 0.25120010521470376, + "grad_norm": 6.416178956242753, + "learning_rate": 5e-06, + "loss": 0.1743, + "num_input_tokens_seen": 163992628, + "step": 955 + }, + { + "epoch": 0.25120010521470376, + "loss": 0.2167130708694458, + "loss_ce": 0.0013200179673731327, + "loss_iou": 0.41015625, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 163992628, + "step": 955 + }, + { + "epoch": 0.25146314197409086, + "grad_norm": 8.38564274924814, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 164164972, + "step": 956 + }, + { + "epoch": 0.25146314197409086, + "loss": 0.14087224006652832, + "loss_ce": 0.0018951823003590107, + "loss_iou": 0.53515625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 164164972, + "step": 956 + }, + { + "epoch": 0.251726178733478, + "grad_norm": 10.557767516476979, + "learning_rate": 5e-06, + "loss": 0.1952, + "num_input_tokens_seen": 164334056, + "step": 957 + }, + { + "epoch": 0.251726178733478, + "loss": 0.10351097583770752, + "loss_ce": 0.003108144039288163, + "loss_iou": 0.5703125, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 164334056, + "step": 957 + }, + { + "epoch": 0.2519892154928651, + "grad_norm": 6.187664908064733, + "learning_rate": 5e-06, + "loss": 0.1717, + "num_input_tokens_seen": 164506496, + "step": 958 + }, + { + "epoch": 0.2519892154928651, + "loss": 0.15677396953105927, + "loss_ce": 0.005193163640797138, + "loss_iou": 0.55859375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 164506496, + "step": 958 + }, + { + "epoch": 0.25225225225225223, + "grad_norm": 9.49095916832686, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 164678644, + "step": 959 + }, + { + "epoch": 0.25225225225225223, + "loss": 0.18635977804660797, + "loss_ce": 0.002003092784434557, + "loss_iou": 0.5, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 164678644, + "step": 959 + }, + { + "epoch": 0.2525152890116394, + "grad_norm": 6.024170228931713, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 164851184, + "step": 960 + }, + { + "epoch": 0.2525152890116394, + "loss": 0.13998761773109436, + "loss_ce": 0.003940259106457233, + "loss_iou": 0.58203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 164851184, + "step": 960 + }, + { + "epoch": 0.2527783257710265, + "grad_norm": 6.120337923811076, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 165023544, + "step": 961 + }, + { + "epoch": 0.2527783257710265, + "loss": 0.09865675866603851, + "loss_ce": 0.003258807584643364, + "loss_iou": 0.66796875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 165023544, + "step": 961 + }, + { + "epoch": 0.25304136253041365, + "grad_norm": 9.066155915123039, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 165193832, + "step": 962 + }, + { + "epoch": 0.25304136253041365, + "loss": 0.1392088085412979, + "loss_ce": 0.0009946945356205106, + "loss_iou": 0.6328125, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 165193832, + "step": 962 + }, + { + "epoch": 0.25330439928980075, + "grad_norm": 7.472497977314338, + "learning_rate": 5e-06, + "loss": 0.1704, + "num_input_tokens_seen": 165365892, + "step": 963 + }, + { + "epoch": 0.25330439928980075, + "loss": 0.23057040572166443, + "loss_ce": 0.002054777694866061, + "loss_iou": 0.47265625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 165365892, + "step": 963 + }, + { + "epoch": 0.25356743604918786, + "grad_norm": 8.732932808526876, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 165538272, + "step": 964 + }, + { + "epoch": 0.25356743604918786, + "loss": 0.16193270683288574, + "loss_ce": 0.0018985318019986153, + "loss_iou": 0.640625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 165538272, + "step": 964 + }, + { + "epoch": 0.253830472808575, + "grad_norm": 7.508690685861332, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 165710728, + "step": 965 + }, + { + "epoch": 0.253830472808575, + "loss": 0.16999930143356323, + "loss_ce": 0.002335726749151945, + "loss_iou": 0.50390625, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 165710728, + "step": 965 + }, + { + "epoch": 0.2540935095679621, + "grad_norm": 8.520838856927949, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 165882912, + "step": 966 + }, + { + "epoch": 0.2540935095679621, + "loss": 0.1505521535873413, + "loss_ce": 0.0006498107686638832, + "loss_iou": 0.56640625, + "loss_num": 0.0299072265625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 165882912, + "step": 966 + }, + { + "epoch": 0.2543565463273492, + "grad_norm": 7.07336887298293, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 166053292, + "step": 967 + }, + { + "epoch": 0.2543565463273492, + "loss": 0.13186028599739075, + "loss_ce": 0.002404727740213275, + "loss_iou": 0.51953125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 166053292, + "step": 967 + }, + { + "epoch": 0.2546195830867364, + "grad_norm": 5.861693184502666, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 166222324, + "step": 968 + }, + { + "epoch": 0.2546195830867364, + "loss": 0.12424527108669281, + "loss_ce": 0.0015340839745476842, + "loss_iou": 0.66796875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 166222324, + "step": 968 + }, + { + "epoch": 0.2548826198461235, + "grad_norm": 4.8001560190338335, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 166394732, + "step": 969 + }, + { + "epoch": 0.2548826198461235, + "loss": 0.13953326642513275, + "loss_ce": 0.0008613896206952631, + "loss_iou": 0.5390625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 166394732, + "step": 969 + }, + { + "epoch": 0.25514565660551064, + "grad_norm": 4.5364064648643065, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 166566920, + "step": 970 + }, + { + "epoch": 0.25514565660551064, + "loss": 0.15965688228607178, + "loss_ce": 0.0016368532087653875, + "loss_iou": 0.498046875, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 166566920, + "step": 970 + }, + { + "epoch": 0.25540869336489774, + "grad_norm": 6.371608621088164, + "learning_rate": 5e-06, + "loss": 0.1801, + "num_input_tokens_seen": 166739024, + "step": 971 + }, + { + "epoch": 0.25540869336489774, + "loss": 0.2314717024564743, + "loss_ce": 0.003017107956111431, + "loss_iou": 0.5625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 166739024, + "step": 971 + }, + { + "epoch": 0.25567173012428485, + "grad_norm": 10.601164768004658, + "learning_rate": 5e-06, + "loss": 0.1504, + "num_input_tokens_seen": 166911376, + "step": 972 + }, + { + "epoch": 0.25567173012428485, + "loss": 0.16122400760650635, + "loss_ce": 0.001586547470651567, + "loss_iou": 0.75390625, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 166911376, + "step": 972 + }, + { + "epoch": 0.255934766883672, + "grad_norm": 7.052937396329928, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 167083504, + "step": 973 + }, + { + "epoch": 0.255934766883672, + "loss": 0.09488484263420105, + "loss_ce": 0.0012263880344107747, + "loss_iou": 0.5078125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 167083504, + "step": 973 + }, + { + "epoch": 0.2561978036430591, + "grad_norm": 11.500260657406463, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 167255708, + "step": 974 + }, + { + "epoch": 0.2561978036430591, + "loss": 0.08826225996017456, + "loss_ce": 0.0011956070084124804, + "loss_iou": 0.6171875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 167255708, + "step": 974 + }, + { + "epoch": 0.25646084040244627, + "grad_norm": 15.92469225824967, + "learning_rate": 5e-06, + "loss": 0.1805, + "num_input_tokens_seen": 167428028, + "step": 975 + }, + { + "epoch": 0.25646084040244627, + "loss": 0.2100502997636795, + "loss_ce": 0.0013405811041593552, + "loss_iou": 0.65234375, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 167428028, + "step": 975 + }, + { + "epoch": 0.25672387716183337, + "grad_norm": 8.58380851703317, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 167600008, + "step": 976 + }, + { + "epoch": 0.25672387716183337, + "loss": 0.16451242566108704, + "loss_ce": 0.0012128613889217377, + "loss_iou": 0.73046875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 167600008, + "step": 976 + }, + { + "epoch": 0.2569869139212205, + "grad_norm": 18.1690929466902, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 167772344, + "step": 977 + }, + { + "epoch": 0.2569869139212205, + "loss": 0.1349577009677887, + "loss_ce": 0.0059293946251273155, + "loss_iou": 0.64453125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 167772344, + "step": 977 + }, + { + "epoch": 0.25724995068060763, + "grad_norm": 19.89834413612689, + "learning_rate": 5e-06, + "loss": 0.1866, + "num_input_tokens_seen": 167944388, + "step": 978 + }, + { + "epoch": 0.25724995068060763, + "loss": 0.2309381365776062, + "loss_ce": 0.0008356063044629991, + "loss_iou": 0.6015625, + "loss_num": 0.0458984375, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 167944388, + "step": 978 + }, + { + "epoch": 0.25751298743999473, + "grad_norm": 9.124352830191828, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 168116168, + "step": 979 + }, + { + "epoch": 0.25751298743999473, + "loss": 0.11332334578037262, + "loss_ce": 0.0035516121424734592, + "loss_iou": 0.43359375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 168116168, + "step": 979 + }, + { + "epoch": 0.25777602419938184, + "grad_norm": 13.324870310905945, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 168288220, + "step": 980 + }, + { + "epoch": 0.25777602419938184, + "loss": 0.14565590023994446, + "loss_ce": 0.0014908593147993088, + "loss_iou": 0.50390625, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 168288220, + "step": 980 + }, + { + "epoch": 0.258039060958769, + "grad_norm": 12.888295473836926, + "learning_rate": 5e-06, + "loss": 0.147, + "num_input_tokens_seen": 168460592, + "step": 981 + }, + { + "epoch": 0.258039060958769, + "loss": 0.13789984583854675, + "loss_ce": 0.0010895461309701204, + "loss_iou": 0.47265625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 168460592, + "step": 981 + }, + { + "epoch": 0.2583020977181561, + "grad_norm": 7.142842896707461, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 168632604, + "step": 982 + }, + { + "epoch": 0.2583020977181561, + "loss": 0.2077009379863739, + "loss_ce": 0.0013105443213135004, + "loss_iou": 0.59375, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 168632604, + "step": 982 + }, + { + "epoch": 0.25856513447754326, + "grad_norm": 5.171300059065281, + "learning_rate": 5e-06, + "loss": 0.194, + "num_input_tokens_seen": 168804468, + "step": 983 + }, + { + "epoch": 0.25856513447754326, + "loss": 0.15598775446414948, + "loss_ce": 0.005291945766657591, + "loss_iou": 0.51953125, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 168804468, + "step": 983 + }, + { + "epoch": 0.25882817123693036, + "grad_norm": 11.204489011375072, + "learning_rate": 5e-06, + "loss": 0.1807, + "num_input_tokens_seen": 168976856, + "step": 984 + }, + { + "epoch": 0.25882817123693036, + "loss": 0.19184689223766327, + "loss_ce": 0.0026989425532519817, + "loss_iou": 0.6328125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 168976856, + "step": 984 + }, + { + "epoch": 0.25909120799631746, + "grad_norm": 6.6174997360939205, + "learning_rate": 5e-06, + "loss": 0.1608, + "num_input_tokens_seen": 169149104, + "step": 985 + }, + { + "epoch": 0.25909120799631746, + "loss": 0.23251253366470337, + "loss_ce": 0.0004568799340631813, + "loss_iou": 0.578125, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 169149104, + "step": 985 + }, + { + "epoch": 0.2593542447557046, + "grad_norm": 7.169144370545132, + "learning_rate": 5e-06, + "loss": 0.1498, + "num_input_tokens_seen": 169321296, + "step": 986 + }, + { + "epoch": 0.2593542447557046, + "loss": 0.12904971837997437, + "loss_ce": 0.0024627982638776302, + "loss_iou": 0.66796875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 169321296, + "step": 986 + }, + { + "epoch": 0.2596172815150917, + "grad_norm": 17.414290331962896, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 169493420, + "step": 987 + }, + { + "epoch": 0.2596172815150917, + "loss": 0.13824069499969482, + "loss_ce": 0.0014914304483681917, + "loss_iou": 0.54296875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 169493420, + "step": 987 + }, + { + "epoch": 0.2598803182744789, + "grad_norm": 5.807379438415879, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 169664140, + "step": 988 + }, + { + "epoch": 0.2598803182744789, + "loss": 0.15334829688072205, + "loss_ce": 0.001828520093113184, + "loss_iou": 0.58984375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 169664140, + "step": 988 + }, + { + "epoch": 0.260143355033866, + "grad_norm": 6.246751757081568, + "learning_rate": 5e-06, + "loss": 0.1808, + "num_input_tokens_seen": 169836220, + "step": 989 + }, + { + "epoch": 0.260143355033866, + "loss": 0.25014275312423706, + "loss_ce": 0.003926943056285381, + "loss_iou": 0.5703125, + "loss_num": 0.049072265625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 169836220, + "step": 989 + }, + { + "epoch": 0.2604063917932531, + "grad_norm": 11.305993296500802, + "learning_rate": 5e-06, + "loss": 0.1627, + "num_input_tokens_seen": 170008204, + "step": 990 + }, + { + "epoch": 0.2604063917932531, + "loss": 0.19628843665122986, + "loss_ce": 0.00878843106329441, + "loss_iou": 0.60546875, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 170008204, + "step": 990 + }, + { + "epoch": 0.26066942855264025, + "grad_norm": 6.544136822322436, + "learning_rate": 5e-06, + "loss": 0.1516, + "num_input_tokens_seen": 170178608, + "step": 991 + }, + { + "epoch": 0.26066942855264025, + "loss": 0.23060224950313568, + "loss_ce": 0.004283890128135681, + "loss_iou": 0.41796875, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 170178608, + "step": 991 + }, + { + "epoch": 0.26093246531202735, + "grad_norm": 6.798376445965723, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 170350580, + "step": 992 + }, + { + "epoch": 0.26093246531202735, + "loss": 0.15799343585968018, + "loss_ce": 0.0012551653198897839, + "loss_iou": 0.5859375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 170350580, + "step": 992 + }, + { + "epoch": 0.26119550207141445, + "grad_norm": 6.259630629604519, + "learning_rate": 5e-06, + "loss": 0.1793, + "num_input_tokens_seen": 170522692, + "step": 993 + }, + { + "epoch": 0.26119550207141445, + "loss": 0.2015601247549057, + "loss_ce": 0.000510324549395591, + "loss_iou": 0.66796875, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 170522692, + "step": 993 + }, + { + "epoch": 0.2614585388308016, + "grad_norm": 6.902940616756998, + "learning_rate": 5e-06, + "loss": 0.2081, + "num_input_tokens_seen": 170695152, + "step": 994 + }, + { + "epoch": 0.2614585388308016, + "loss": 0.15382197499275208, + "loss_ce": 0.0003795886295847595, + "loss_iou": 0.60546875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 170695152, + "step": 994 + }, + { + "epoch": 0.2617215755901887, + "grad_norm": 4.496994630933735, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 170867532, + "step": 995 + }, + { + "epoch": 0.2617215755901887, + "loss": 0.15487955510616302, + "loss_ce": 0.005709626711905003, + "loss_iou": 0.41015625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 170867532, + "step": 995 + }, + { + "epoch": 0.2619846123495759, + "grad_norm": 5.819986109817203, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 171039632, + "step": 996 + }, + { + "epoch": 0.2619846123495759, + "loss": 0.1341342031955719, + "loss_ce": 0.0008944571018218994, + "loss_iou": 0.458984375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 171039632, + "step": 996 + }, + { + "epoch": 0.262247649108963, + "grad_norm": 4.6990222732792075, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 171211920, + "step": 997 + }, + { + "epoch": 0.262247649108963, + "loss": 0.2527538239955902, + "loss_ce": 0.0029674398247152567, + "loss_iou": 0.48828125, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 171211920, + "step": 997 + }, + { + "epoch": 0.2625106858683501, + "grad_norm": 6.547042177560564, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 171384024, + "step": 998 + }, + { + "epoch": 0.2625106858683501, + "loss": 0.18961063027381897, + "loss_ce": 0.0035754733253270388, + "loss_iou": 0.62890625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 171384024, + "step": 998 + }, + { + "epoch": 0.26277372262773724, + "grad_norm": 17.093722901281232, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 171556336, + "step": 999 + }, + { + "epoch": 0.26277372262773724, + "loss": 0.18390598893165588, + "loss_ce": 0.0009531003306619823, + "loss_iou": 0.24609375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 171556336, + "step": 999 + }, + { + "epoch": 0.26303675938712434, + "grad_norm": 5.862402162816912, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_websight_new_CIoU": 0.7987666130065918, + "eval_websight_new_GIoU": 0.7927780747413635, + "eval_websight_new_IoU": 0.8096525371074677, + "eval_websight_new_MAE_all": 0.03391252178698778, + "eval_websight_new_MAE_h": 0.024881365709006786, + "eval_websight_new_MAE_w": 0.04275708086788654, + "eval_websight_new_MAE_x": 0.046329958364367485, + "eval_websight_new_MAE_y": 0.021681691519916058, + "eval_websight_new_NUM_probability": 0.9994822144508362, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.15745767951011658, + "eval_websight_new_loss_ce": 9.287914144806564e-05, + "eval_websight_new_loss_iou": 0.457275390625, + "eval_websight_new_loss_num": 0.027721405029296875, + "eval_websight_new_loss_xval": 0.1386260986328125, + "eval_websight_new_runtime": 55.0835, + "eval_websight_new_samples_per_second": 0.908, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_seeclick_CIoU": 0.5508884787559509, + "eval_seeclick_GIoU": 0.5434170663356781, + "eval_seeclick_IoU": 0.5745402276515961, + "eval_seeclick_MAE_all": 0.057029979303479195, + "eval_seeclick_MAE_h": 0.03887217864394188, + "eval_seeclick_MAE_w": 0.08262282982468605, + "eval_seeclick_MAE_x": 0.07410039007663727, + "eval_seeclick_MAE_y": 0.032524523325264454, + "eval_seeclick_NUM_probability": 0.9997861981391907, + "eval_seeclick_inside_bbox": 0.9076704680919647, + "eval_seeclick_loss": 0.24120275676250458, + "eval_seeclick_loss_ce": 0.00980278616771102, + "eval_seeclick_loss_iou": 0.60888671875, + "eval_seeclick_loss_num": 0.0429229736328125, + "eval_seeclick_loss_xval": 0.214630126953125, + "eval_seeclick_runtime": 69.8263, + "eval_seeclick_samples_per_second": 0.616, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_icons_CIoU": 0.8137890696525574, + "eval_icons_GIoU": 0.8057061433792114, + "eval_icons_IoU": 0.8213592171669006, + "eval_icons_MAE_all": 0.024967025965452194, + "eval_icons_MAE_h": 0.028143037110567093, + "eval_icons_MAE_w": 0.026135658845305443, + "eval_icons_MAE_x": 0.022455199621617794, + "eval_icons_MAE_y": 0.023134205490350723, + "eval_icons_NUM_probability": 0.9995008409023285, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.09549810737371445, + "eval_icons_loss_ce": 0.0016497992401127703, + "eval_icons_loss_iou": 0.6103515625, + "eval_icons_loss_num": 0.01790618896484375, + "eval_icons_loss_xval": 0.0895233154296875, + "eval_icons_runtime": 88.8842, + "eval_icons_samples_per_second": 0.563, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "eval_screenspot_CIoU": 0.5512810548146566, + "eval_screenspot_GIoU": 0.5352131823698679, + "eval_screenspot_IoU": 0.5900407036145529, + "eval_screenspot_MAE_all": 0.08661519487698872, + "eval_screenspot_MAE_h": 0.056614277263482414, + "eval_screenspot_MAE_w": 0.13663912812868753, + "eval_screenspot_MAE_x": 0.10082270950078964, + "eval_screenspot_MAE_y": 0.052384667098522186, + "eval_screenspot_NUM_probability": 0.9995323220888773, + "eval_screenspot_inside_bbox": 0.8454166650772095, + "eval_screenspot_loss": 0.7656806111335754, + "eval_screenspot_loss_ce": 0.42391865452130634, + "eval_screenspot_loss_iou": 0.4834391276041667, + "eval_screenspot_loss_num": 0.0673370361328125, + "eval_screenspot_loss_xval": 0.3365885416666667, + "eval_screenspot_runtime": 148.461, + "eval_screenspot_samples_per_second": 0.599, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.26303675938712434, + "loss": 0.7618361711502075, + "loss_ce": 0.4089309275150299, + "loss_iou": 0.43359375, + "loss_num": 0.07080078125, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 171726848, + "step": 1000 + }, + { + "epoch": 0.2632997961465115, + "grad_norm": 12.369274544442106, + "learning_rate": 5e-06, + "loss": 0.1395, + "num_input_tokens_seen": 171897180, + "step": 1001 + }, + { + "epoch": 0.2632997961465115, + "loss": 0.23002395033836365, + "loss_ce": 0.0021186815574765205, + "loss_iou": 0.462890625, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 171897180, + "step": 1001 + }, + { + "epoch": 0.2635628329058986, + "grad_norm": 6.891119328212443, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 172069384, + "step": 1002 + }, + { + "epoch": 0.2635628329058986, + "loss": 0.1037362664937973, + "loss_ce": 0.0008004722185432911, + "loss_iou": 0.45703125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 172069384, + "step": 1002 + }, + { + "epoch": 0.2638258696652857, + "grad_norm": 5.298705978749544, + "learning_rate": 5e-06, + "loss": 0.1493, + "num_input_tokens_seen": 172239736, + "step": 1003 + }, + { + "epoch": 0.2638258696652857, + "loss": 0.16244152188301086, + "loss_ce": 0.0035059780348092318, + "loss_iou": 0.408203125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 172239736, + "step": 1003 + }, + { + "epoch": 0.26408890642467286, + "grad_norm": 28.95390611927557, + "learning_rate": 5e-06, + "loss": 0.1674, + "num_input_tokens_seen": 172409356, + "step": 1004 + }, + { + "epoch": 0.26408890642467286, + "loss": 0.16002172231674194, + "loss_ce": 0.001452386612072587, + "loss_iou": 0.70703125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 172409356, + "step": 1004 + }, + { + "epoch": 0.26435194318405997, + "grad_norm": 6.613551299237438, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 172579624, + "step": 1005 + }, + { + "epoch": 0.26435194318405997, + "loss": 0.11175885051488876, + "loss_ce": 0.0038792139384895563, + "loss_iou": 0.61328125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 172579624, + "step": 1005 + }, + { + "epoch": 0.26461497994344707, + "grad_norm": 5.235806011716001, + "learning_rate": 5e-06, + "loss": 0.181, + "num_input_tokens_seen": 172752052, + "step": 1006 + }, + { + "epoch": 0.26461497994344707, + "loss": 0.19632884860038757, + "loss_ce": 0.001321525895036757, + "loss_iou": 0.75390625, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 172752052, + "step": 1006 + }, + { + "epoch": 0.26487801670283423, + "grad_norm": 11.226831704177643, + "learning_rate": 5e-06, + "loss": 0.164, + "num_input_tokens_seen": 172924420, + "step": 1007 + }, + { + "epoch": 0.26487801670283423, + "loss": 0.16282187402248383, + "loss_ce": 0.0012007836485281587, + "loss_iou": 0.52734375, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 172924420, + "step": 1007 + }, + { + "epoch": 0.26514105346222133, + "grad_norm": 5.82233219860515, + "learning_rate": 5e-06, + "loss": 0.1824, + "num_input_tokens_seen": 173096824, + "step": 1008 + }, + { + "epoch": 0.26514105346222133, + "loss": 0.17045088112354279, + "loss_ce": 0.005320262163877487, + "loss_iou": 0.53515625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 173096824, + "step": 1008 + }, + { + "epoch": 0.2654040902216085, + "grad_norm": 5.52283167756611, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 173268948, + "step": 1009 + }, + { + "epoch": 0.2654040902216085, + "loss": 0.17849504947662354, + "loss_ce": 0.004483824595808983, + "loss_iou": 0.61328125, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 173268948, + "step": 1009 + }, + { + "epoch": 0.2656671269809956, + "grad_norm": 5.624587832123806, + "learning_rate": 5e-06, + "loss": 0.1733, + "num_input_tokens_seen": 173441324, + "step": 1010 + }, + { + "epoch": 0.2656671269809956, + "loss": 0.15812638401985168, + "loss_ce": 0.0008387943962588906, + "loss_iou": 0.62890625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 173441324, + "step": 1010 + }, + { + "epoch": 0.2659301637403827, + "grad_norm": 8.141965504781345, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 173611452, + "step": 1011 + }, + { + "epoch": 0.2659301637403827, + "loss": 0.2730504870414734, + "loss_ce": 0.004678931087255478, + "loss_iou": 0.53125, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 173611452, + "step": 1011 + }, + { + "epoch": 0.26619320049976986, + "grad_norm": 9.595372252411392, + "learning_rate": 5e-06, + "loss": 0.1596, + "num_input_tokens_seen": 173783736, + "step": 1012 + }, + { + "epoch": 0.26619320049976986, + "loss": 0.23428162932395935, + "loss_ce": 0.0011273245327174664, + "loss_iou": 0.54296875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 173783736, + "step": 1012 + }, + { + "epoch": 0.26645623725915696, + "grad_norm": 8.106802909743756, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 173955656, + "step": 1013 + }, + { + "epoch": 0.26645623725915696, + "loss": 0.15512457489967346, + "loss_ce": 0.003147047944366932, + "loss_iou": 0.59375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 173955656, + "step": 1013 + }, + { + "epoch": 0.2667192740185441, + "grad_norm": 5.365195100132044, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 174126100, + "step": 1014 + }, + { + "epoch": 0.2667192740185441, + "loss": 0.17733250558376312, + "loss_ce": 0.004175758454948664, + "loss_iou": 0.6640625, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 174126100, + "step": 1014 + }, + { + "epoch": 0.2669823107779312, + "grad_norm": 6.156957339464345, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 174298328, + "step": 1015 + }, + { + "epoch": 0.2669823107779312, + "loss": 0.1237088292837143, + "loss_ce": 0.001119712833315134, + "loss_iou": 0.41796875, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 174298328, + "step": 1015 + }, + { + "epoch": 0.2672453475373183, + "grad_norm": 5.364063512164187, + "learning_rate": 5e-06, + "loss": 0.1536, + "num_input_tokens_seen": 174470324, + "step": 1016 + }, + { + "epoch": 0.2672453475373183, + "loss": 0.2085983008146286, + "loss_ce": 0.003947417717427015, + "loss_iou": 0.5546875, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 174470324, + "step": 1016 + }, + { + "epoch": 0.2675083842967055, + "grad_norm": 26.166059849551687, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 174642656, + "step": 1017 + }, + { + "epoch": 0.2675083842967055, + "loss": 0.0957513153553009, + "loss_ce": 0.0004143980913795531, + "loss_iou": 0.5546875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 174642656, + "step": 1017 + }, + { + "epoch": 0.2677714210560926, + "grad_norm": 13.570714399931546, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 174813336, + "step": 1018 + }, + { + "epoch": 0.2677714210560926, + "loss": 0.08781825006008148, + "loss_ce": 0.0015755778877064586, + "loss_iou": 0.5703125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 174813336, + "step": 1018 + }, + { + "epoch": 0.2680344578154797, + "grad_norm": 5.193615864048545, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 174983476, + "step": 1019 + }, + { + "epoch": 0.2680344578154797, + "loss": 0.09173595905303955, + "loss_ce": 0.0038148202002048492, + "loss_iou": 0.4609375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 174983476, + "step": 1019 + }, + { + "epoch": 0.26829749457486685, + "grad_norm": 6.860708381616737, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 175155688, + "step": 1020 + }, + { + "epoch": 0.26829749457486685, + "loss": 0.1804433912038803, + "loss_ce": 0.002403826452791691, + "loss_iou": 0.48828125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 175155688, + "step": 1020 + }, + { + "epoch": 0.26856053133425395, + "grad_norm": 4.544920825241194, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 175327636, + "step": 1021 + }, + { + "epoch": 0.26856053133425395, + "loss": 0.14717841148376465, + "loss_ce": 0.0010602545225992799, + "loss_iou": 0.62890625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 175327636, + "step": 1021 + }, + { + "epoch": 0.2688235680936411, + "grad_norm": 8.925039478850847, + "learning_rate": 5e-06, + "loss": 0.1755, + "num_input_tokens_seen": 175499748, + "step": 1022 + }, + { + "epoch": 0.2688235680936411, + "loss": 0.1853310763835907, + "loss_ce": 0.0006386763998307288, + "loss_iou": 0.51953125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 175499748, + "step": 1022 + }, + { + "epoch": 0.2690866048530282, + "grad_norm": 5.74787431130144, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 175671808, + "step": 1023 + }, + { + "epoch": 0.2690866048530282, + "loss": 0.14083652198314667, + "loss_ce": 0.0009744655108079314, + "loss_iou": 0.4140625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 175671808, + "step": 1023 + }, + { + "epoch": 0.2693496416124153, + "grad_norm": 6.6733989699604725, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 175844188, + "step": 1024 + }, + { + "epoch": 0.2693496416124153, + "loss": 0.1698358803987503, + "loss_ce": 0.0007990067824721336, + "loss_iou": 0.6640625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 175844188, + "step": 1024 + }, + { + "epoch": 0.26961267837180247, + "grad_norm": 8.873866851189364, + "learning_rate": 5e-06, + "loss": 0.1894, + "num_input_tokens_seen": 176016264, + "step": 1025 + }, + { + "epoch": 0.26961267837180247, + "loss": 0.28822407126426697, + "loss_ce": 0.006241639144718647, + "loss_iou": 0.6171875, + "loss_num": 0.056396484375, + "loss_xval": 0.28125, + "num_input_tokens_seen": 176016264, + "step": 1025 + }, + { + "epoch": 0.2698757151311896, + "grad_norm": 5.745558264753103, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 176188840, + "step": 1026 + }, + { + "epoch": 0.2698757151311896, + "loss": 0.20782078802585602, + "loss_ce": 0.003353000618517399, + "loss_iou": 0.53125, + "loss_num": 0.041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 176188840, + "step": 1026 + }, + { + "epoch": 0.27013875189057673, + "grad_norm": 6.250713326486415, + "learning_rate": 5e-06, + "loss": 0.1462, + "num_input_tokens_seen": 176360864, + "step": 1027 + }, + { + "epoch": 0.27013875189057673, + "loss": 0.17586824297904968, + "loss_ce": 0.0016433752607554197, + "loss_iou": 0.59375, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 176360864, + "step": 1027 + }, + { + "epoch": 0.27040178864996384, + "grad_norm": 9.18813465584604, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 176533304, + "step": 1028 + }, + { + "epoch": 0.27040178864996384, + "loss": 0.1184663325548172, + "loss_ce": 0.0005769361741840839, + "loss_iou": 0.640625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 176533304, + "step": 1028 + }, + { + "epoch": 0.27066482540935094, + "grad_norm": 6.1602828275187, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 176705536, + "step": 1029 + }, + { + "epoch": 0.27066482540935094, + "loss": 0.20641186833381653, + "loss_ce": 0.0027985800988972187, + "loss_iou": 0.54296875, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 176705536, + "step": 1029 + }, + { + "epoch": 0.2709278621687381, + "grad_norm": 6.699314102286077, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 176877744, + "step": 1030 + }, + { + "epoch": 0.2709278621687381, + "loss": 0.12170865386724472, + "loss_ce": 0.003544584382325411, + "loss_iou": 0.5546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 176877744, + "step": 1030 + }, + { + "epoch": 0.2711908989281252, + "grad_norm": 7.305231502769662, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 177050116, + "step": 1031 + }, + { + "epoch": 0.2711908989281252, + "loss": 0.1749892234802246, + "loss_ce": 0.001527311746031046, + "loss_iou": 0.482421875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 177050116, + "step": 1031 + }, + { + "epoch": 0.2714539356875123, + "grad_norm": 23.98337624564377, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 177222180, + "step": 1032 + }, + { + "epoch": 0.2714539356875123, + "loss": 0.11504107713699341, + "loss_ce": 0.0008137800614349544, + "loss_iou": 0.64453125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 177222180, + "step": 1032 + }, + { + "epoch": 0.27171697244689946, + "grad_norm": 8.870505808545992, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 177394132, + "step": 1033 + }, + { + "epoch": 0.27171697244689946, + "loss": 0.1380763053894043, + "loss_ce": 0.001357543864287436, + "loss_iou": 0.55078125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 177394132, + "step": 1033 + }, + { + "epoch": 0.27198000920628657, + "grad_norm": 6.7102870745927, + "learning_rate": 5e-06, + "loss": 0.152, + "num_input_tokens_seen": 177564532, + "step": 1034 + }, + { + "epoch": 0.27198000920628657, + "loss": 0.1144593358039856, + "loss_ce": 0.0005372193409129977, + "loss_iou": 0.625, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 177564532, + "step": 1034 + }, + { + "epoch": 0.2722430459656737, + "grad_norm": 7.475407895210686, + "learning_rate": 5e-06, + "loss": 0.1639, + "num_input_tokens_seen": 177735192, + "step": 1035 + }, + { + "epoch": 0.2722430459656737, + "loss": 0.1714543104171753, + "loss_ce": 0.001501921215094626, + "loss_iou": 0.435546875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 177735192, + "step": 1035 + }, + { + "epoch": 0.2725060827250608, + "grad_norm": 7.019122274754133, + "learning_rate": 5e-06, + "loss": 0.183, + "num_input_tokens_seen": 177905512, + "step": 1036 + }, + { + "epoch": 0.2725060827250608, + "loss": 0.23724797368049622, + "loss_ce": 0.0011640018783509731, + "loss_iou": 0.3671875, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 177905512, + "step": 1036 + }, + { + "epoch": 0.27276911948444793, + "grad_norm": 6.609200156734422, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 178077776, + "step": 1037 + }, + { + "epoch": 0.27276911948444793, + "loss": 0.1989010125398636, + "loss_ce": 0.002367813140153885, + "loss_iou": 0.421875, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 178077776, + "step": 1037 + }, + { + "epoch": 0.2730321562438351, + "grad_norm": 7.044877026833013, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 178250200, + "step": 1038 + }, + { + "epoch": 0.2730321562438351, + "loss": 0.1487899273633957, + "loss_ce": 0.0012069200165569782, + "loss_iou": 0.7109375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 178250200, + "step": 1038 + }, + { + "epoch": 0.2732951930032222, + "grad_norm": 7.367246122761307, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 178420744, + "step": 1039 + }, + { + "epoch": 0.2732951930032222, + "loss": 0.1740129590034485, + "loss_ce": 0.002565213944762945, + "loss_iou": 0.53125, + "loss_num": 0.0341796875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 178420744, + "step": 1039 + }, + { + "epoch": 0.27355822976260935, + "grad_norm": 6.283509699437948, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 178592888, + "step": 1040 + }, + { + "epoch": 0.27355822976260935, + "loss": 0.09046860039234161, + "loss_ce": 0.0011436456115916371, + "loss_iou": 0.6171875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 178592888, + "step": 1040 + }, + { + "epoch": 0.27382126652199645, + "grad_norm": 8.68361428045609, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 178763324, + "step": 1041 + }, + { + "epoch": 0.27382126652199645, + "loss": 0.10273198038339615, + "loss_ce": 0.0014746561646461487, + "loss_iou": 0.42578125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 178763324, + "step": 1041 + }, + { + "epoch": 0.27408430328138356, + "grad_norm": 5.923468447654299, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 178935692, + "step": 1042 + }, + { + "epoch": 0.27408430328138356, + "loss": 0.11228330433368683, + "loss_ce": 0.0015655276365578175, + "loss_iou": 0.7265625, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 178935692, + "step": 1042 + }, + { + "epoch": 0.2743473400407707, + "grad_norm": 12.335079487643208, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 179105416, + "step": 1043 + }, + { + "epoch": 0.2743473400407707, + "loss": 0.13459762930870056, + "loss_ce": 0.0016020219773054123, + "loss_iou": 0.59375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 179105416, + "step": 1043 + }, + { + "epoch": 0.2746103768001578, + "grad_norm": 5.798275831622124, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 179277628, + "step": 1044 + }, + { + "epoch": 0.2746103768001578, + "loss": 0.07603298872709274, + "loss_ce": 0.0007766383932903409, + "loss_iou": 0.59765625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 179277628, + "step": 1044 + }, + { + "epoch": 0.2748734135595449, + "grad_norm": 23.44861216824249, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 179447304, + "step": 1045 + }, + { + "epoch": 0.2748734135595449, + "loss": 0.143830344080925, + "loss_ce": 0.0031137943733483553, + "loss_iou": 0.5390625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 179447304, + "step": 1045 + }, + { + "epoch": 0.2751364503189321, + "grad_norm": 5.56459204907325, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 179619344, + "step": 1046 + }, + { + "epoch": 0.2751364503189321, + "loss": 0.1308884471654892, + "loss_ce": 0.0006089094094932079, + "loss_iou": 0.66015625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 179619344, + "step": 1046 + }, + { + "epoch": 0.2753994870783192, + "grad_norm": 7.96724794117892, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 179791548, + "step": 1047 + }, + { + "epoch": 0.2753994870783192, + "loss": 0.1258929818868637, + "loss_ce": 0.006264072842895985, + "loss_iou": 0.515625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 179791548, + "step": 1047 + }, + { + "epoch": 0.27566252383770634, + "grad_norm": 5.070360731345708, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 179963676, + "step": 1048 + }, + { + "epoch": 0.27566252383770634, + "loss": 0.11563927680253983, + "loss_ce": 0.00031335209496319294, + "loss_iou": 0.466796875, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 179963676, + "step": 1048 + }, + { + "epoch": 0.27592556059709344, + "grad_norm": 7.345106018369934, + "learning_rate": 5e-06, + "loss": 0.1455, + "num_input_tokens_seen": 180135984, + "step": 1049 + }, + { + "epoch": 0.27592556059709344, + "loss": 0.1866682916879654, + "loss_ce": 0.0012129689566791058, + "loss_iou": 0.609375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 180135984, + "step": 1049 + }, + { + "epoch": 0.27618859735648055, + "grad_norm": 9.442805115405356, + "learning_rate": 5e-06, + "loss": 0.2447, + "num_input_tokens_seen": 180308156, + "step": 1050 + }, + { + "epoch": 0.27618859735648055, + "loss": 0.23040437698364258, + "loss_ce": 0.001797212054952979, + "loss_iou": 0.578125, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 180308156, + "step": 1050 + }, + { + "epoch": 0.2764516341158677, + "grad_norm": 10.313405249427115, + "learning_rate": 5e-06, + "loss": 0.1419, + "num_input_tokens_seen": 180480268, + "step": 1051 + }, + { + "epoch": 0.2764516341158677, + "loss": 0.1318507045507431, + "loss_ce": 0.003188594477251172, + "loss_iou": 0.443359375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 180480268, + "step": 1051 + }, + { + "epoch": 0.2767146708752548, + "grad_norm": 12.938419489278349, + "learning_rate": 5e-06, + "loss": 0.1389, + "num_input_tokens_seen": 180652480, + "step": 1052 + }, + { + "epoch": 0.2767146708752548, + "loss": 0.1456303596496582, + "loss_ce": 0.0045475889928638935, + "loss_iou": 0.6484375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 180652480, + "step": 1052 + }, + { + "epoch": 0.27697770763464197, + "grad_norm": 6.129454843218688, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 180824868, + "step": 1053 + }, + { + "epoch": 0.27697770763464197, + "loss": 0.10654839873313904, + "loss_ce": 0.0010186205618083477, + "loss_iou": 0.53125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 180824868, + "step": 1053 + }, + { + "epoch": 0.27724074439402907, + "grad_norm": 6.137928561662, + "learning_rate": 5e-06, + "loss": 0.1994, + "num_input_tokens_seen": 180996888, + "step": 1054 + }, + { + "epoch": 0.27724074439402907, + "loss": 0.21584706008434296, + "loss_ce": 0.0034447195939719677, + "loss_iou": 0.6171875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 180996888, + "step": 1054 + }, + { + "epoch": 0.2775037811534162, + "grad_norm": 4.330377156785293, + "learning_rate": 5e-06, + "loss": 0.098, + "num_input_tokens_seen": 181168924, + "step": 1055 + }, + { + "epoch": 0.2775037811534162, + "loss": 0.08097569644451141, + "loss_ce": 0.0013553331373259425, + "loss_iou": 0.64453125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 181168924, + "step": 1055 + }, + { + "epoch": 0.27776681791280333, + "grad_norm": 6.97802036205834, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 181341092, + "step": 1056 + }, + { + "epoch": 0.27776681791280333, + "loss": 0.1544983983039856, + "loss_ce": 0.0006592837744392455, + "loss_iou": 0.5078125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 181341092, + "step": 1056 + }, + { + "epoch": 0.27802985467219044, + "grad_norm": 5.494026690913253, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 181513136, + "step": 1057 + }, + { + "epoch": 0.27802985467219044, + "loss": 0.09461110830307007, + "loss_ce": 0.004279076587408781, + "loss_iou": 0.46875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 181513136, + "step": 1057 + }, + { + "epoch": 0.27829289143157754, + "grad_norm": 8.725452399730845, + "learning_rate": 5e-06, + "loss": 0.1293, + "num_input_tokens_seen": 181684960, + "step": 1058 + }, + { + "epoch": 0.27829289143157754, + "loss": 0.14191409945487976, + "loss_ce": 0.004157752729952335, + "loss_iou": 0.6015625, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 181684960, + "step": 1058 + }, + { + "epoch": 0.2785559281909647, + "grad_norm": 5.556456214125238, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 181857208, + "step": 1059 + }, + { + "epoch": 0.2785559281909647, + "loss": 0.14501094818115234, + "loss_ce": 0.0029821395874023438, + "loss_iou": 0.5, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 181857208, + "step": 1059 + }, + { + "epoch": 0.2788189649503518, + "grad_norm": 5.694123648635864, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 182029272, + "step": 1060 + }, + { + "epoch": 0.2788189649503518, + "loss": 0.1433970034122467, + "loss_ce": 0.0002085179730784148, + "loss_iou": 0.49609375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 182029272, + "step": 1060 + }, + { + "epoch": 0.27908200170973896, + "grad_norm": 11.17547001843261, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 182201700, + "step": 1061 + }, + { + "epoch": 0.27908200170973896, + "loss": 0.2361234724521637, + "loss_ce": 0.0002225826756330207, + "loss_iou": 0.5390625, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 182201700, + "step": 1061 + }, + { + "epoch": 0.27934503846912606, + "grad_norm": 7.952701295024384, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 182373760, + "step": 1062 + }, + { + "epoch": 0.27934503846912606, + "loss": 0.12168803811073303, + "loss_ce": 0.00483623007312417, + "loss_iou": 0.66015625, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 182373760, + "step": 1062 + }, + { + "epoch": 0.27960807522851316, + "grad_norm": 8.381017586610616, + "learning_rate": 5e-06, + "loss": 0.1264, + "num_input_tokens_seen": 182544140, + "step": 1063 + }, + { + "epoch": 0.27960807522851316, + "loss": 0.18032154440879822, + "loss_ce": 0.003350101877003908, + "loss_iou": 0.5078125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 182544140, + "step": 1063 + }, + { + "epoch": 0.2798711119879003, + "grad_norm": 7.079859253709746, + "learning_rate": 5e-06, + "loss": 0.1588, + "num_input_tokens_seen": 182716400, + "step": 1064 + }, + { + "epoch": 0.2798711119879003, + "loss": 0.2143479734659195, + "loss_ce": 0.005302563309669495, + "loss_iou": 0.56640625, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 182716400, + "step": 1064 + }, + { + "epoch": 0.2801341487472874, + "grad_norm": 5.981555911348935, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 182888924, + "step": 1065 + }, + { + "epoch": 0.2801341487472874, + "loss": 0.07972423732280731, + "loss_ce": 0.0004090492147952318, + "loss_iou": 0.5390625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 182888924, + "step": 1065 + }, + { + "epoch": 0.2803971855066746, + "grad_norm": 6.957965061807421, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 183061040, + "step": 1066 + }, + { + "epoch": 0.2803971855066746, + "loss": 0.18109014630317688, + "loss_ce": 0.0029285247437655926, + "loss_iou": 0.6171875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 183061040, + "step": 1066 + }, + { + "epoch": 0.2806602222660617, + "grad_norm": 7.354674178304409, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 183233464, + "step": 1067 + }, + { + "epoch": 0.2806602222660617, + "loss": 0.19740189611911774, + "loss_ce": 0.0007771397940814495, + "loss_iou": 0.49609375, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 183233464, + "step": 1067 + }, + { + "epoch": 0.2809232590254488, + "grad_norm": 6.666818027916791, + "learning_rate": 5e-06, + "loss": 0.1868, + "num_input_tokens_seen": 183405764, + "step": 1068 + }, + { + "epoch": 0.2809232590254488, + "loss": 0.15091687440872192, + "loss_ce": 0.0006788407335989177, + "loss_iou": 0.5546875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 183405764, + "step": 1068 + }, + { + "epoch": 0.28118629578483595, + "grad_norm": 5.646804925247109, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 183578300, + "step": 1069 + }, + { + "epoch": 0.28118629578483595, + "loss": 0.15913698077201843, + "loss_ce": 0.0021545523777604103, + "loss_iou": 0.5859375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 183578300, + "step": 1069 + }, + { + "epoch": 0.28144933254422305, + "grad_norm": 20.5753376454494, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 183750176, + "step": 1070 + }, + { + "epoch": 0.28144933254422305, + "loss": 0.11271088570356369, + "loss_ce": 0.0023593269288539886, + "loss_iou": 0.609375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 183750176, + "step": 1070 + }, + { + "epoch": 0.28171236930361016, + "grad_norm": 4.644203324007545, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 183922404, + "step": 1071 + }, + { + "epoch": 0.28171236930361016, + "loss": 0.12499827146530151, + "loss_ce": 0.0020124230068176985, + "loss_iou": 0.546875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 183922404, + "step": 1071 + }, + { + "epoch": 0.2819754060629973, + "grad_norm": 5.233661334274966, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 184094188, + "step": 1072 + }, + { + "epoch": 0.2819754060629973, + "loss": 0.1377188265323639, + "loss_ce": 0.0002676558797247708, + "loss_iou": 0.443359375, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 184094188, + "step": 1072 + }, + { + "epoch": 0.2822384428223844, + "grad_norm": 7.522178114598793, + "learning_rate": 5e-06, + "loss": 0.1911, + "num_input_tokens_seen": 184262844, + "step": 1073 + }, + { + "epoch": 0.2822384428223844, + "loss": 0.2804732322692871, + "loss_ce": 0.0030379469972103834, + "loss_iou": 0.5390625, + "loss_num": 0.055419921875, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 184262844, + "step": 1073 + }, + { + "epoch": 0.2825014795817716, + "grad_norm": 10.418392232208523, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 184431224, + "step": 1074 + }, + { + "epoch": 0.2825014795817716, + "loss": 0.17388112843036652, + "loss_ce": 0.002219748916104436, + "loss_iou": 0.5859375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 184431224, + "step": 1074 + }, + { + "epoch": 0.2827645163411587, + "grad_norm": 7.018369739955214, + "learning_rate": 5e-06, + "loss": 0.1685, + "num_input_tokens_seen": 184603440, + "step": 1075 + }, + { + "epoch": 0.2827645163411587, + "loss": 0.24287168681621552, + "loss_ce": 0.0036749078426510096, + "loss_iou": 0.466796875, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 184603440, + "step": 1075 + }, + { + "epoch": 0.2830275531005458, + "grad_norm": 8.441416409211774, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 184775664, + "step": 1076 + }, + { + "epoch": 0.2830275531005458, + "loss": 0.253373384475708, + "loss_ce": 0.0034344326704740524, + "loss_iou": 0.61328125, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 184775664, + "step": 1076 + }, + { + "epoch": 0.28329058985993294, + "grad_norm": 5.273714948615874, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 184947684, + "step": 1077 + }, + { + "epoch": 0.28329058985993294, + "loss": 0.12758958339691162, + "loss_ce": 0.0030168381053954363, + "loss_iou": 0.59375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 184947684, + "step": 1077 + }, + { + "epoch": 0.28355362661932004, + "grad_norm": 5.622485240246158, + "learning_rate": 5e-06, + "loss": 0.1707, + "num_input_tokens_seen": 185119764, + "step": 1078 + }, + { + "epoch": 0.28355362661932004, + "loss": 0.15722918510437012, + "loss_ce": 0.0001552198955323547, + "loss_iou": 0.546875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 185119764, + "step": 1078 + }, + { + "epoch": 0.2838166633787072, + "grad_norm": 6.064053191448847, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 185291940, + "step": 1079 + }, + { + "epoch": 0.2838166633787072, + "loss": 0.12457242608070374, + "loss_ce": 0.0010983101092278957, + "loss_iou": 0.53125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 185291940, + "step": 1079 + }, + { + "epoch": 0.2840797001380943, + "grad_norm": 8.508529448099232, + "learning_rate": 5e-06, + "loss": 0.1667, + "num_input_tokens_seen": 185460372, + "step": 1080 + }, + { + "epoch": 0.2840797001380943, + "loss": 0.2045111060142517, + "loss_ce": 0.0020574983209371567, + "loss_iou": 0.6484375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 185460372, + "step": 1080 + }, + { + "epoch": 0.2843427368974814, + "grad_norm": 5.269932611835083, + "learning_rate": 5e-06, + "loss": 0.1797, + "num_input_tokens_seen": 185632912, + "step": 1081 + }, + { + "epoch": 0.2843427368974814, + "loss": 0.13937309384346008, + "loss_ce": 0.009490270167589188, + "loss_iou": 0.462890625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 185632912, + "step": 1081 + }, + { + "epoch": 0.28460577365686857, + "grad_norm": 8.826575998844994, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 185804888, + "step": 1082 + }, + { + "epoch": 0.28460577365686857, + "loss": 0.099105603992939, + "loss_ce": 0.0018155663274228573, + "loss_iou": 0.515625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 185804888, + "step": 1082 + }, + { + "epoch": 0.28486881041625567, + "grad_norm": 5.9573043544472535, + "learning_rate": 5e-06, + "loss": 0.1276, + "num_input_tokens_seen": 185977324, + "step": 1083 + }, + { + "epoch": 0.28486881041625567, + "loss": 0.17646890878677368, + "loss_ce": 0.0035868186969310045, + "loss_iou": 0.5546875, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 185977324, + "step": 1083 + }, + { + "epoch": 0.28513184717564277, + "grad_norm": 5.170909045758908, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 186149548, + "step": 1084 + }, + { + "epoch": 0.28513184717564277, + "loss": 0.19262221455574036, + "loss_ce": 0.000941307342145592, + "loss_iou": 0.6640625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 186149548, + "step": 1084 + }, + { + "epoch": 0.28539488393502993, + "grad_norm": 16.76166177176129, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 186321516, + "step": 1085 + }, + { + "epoch": 0.28539488393502993, + "loss": 0.13177794218063354, + "loss_ce": 0.0014678854495286942, + "loss_iou": 0.50390625, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 186321516, + "step": 1085 + }, + { + "epoch": 0.28565792069441703, + "grad_norm": 13.385145963732795, + "learning_rate": 5e-06, + "loss": 0.1565, + "num_input_tokens_seen": 186493748, + "step": 1086 + }, + { + "epoch": 0.28565792069441703, + "loss": 0.24282805621623993, + "loss_ce": 0.0057369922287762165, + "loss_iou": 0.48828125, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 186493748, + "step": 1086 + }, + { + "epoch": 0.2859209574538042, + "grad_norm": 7.0564105719656025, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 186666020, + "step": 1087 + }, + { + "epoch": 0.2859209574538042, + "loss": 0.10800403356552124, + "loss_ce": 0.000704226375091821, + "loss_iou": 0.388671875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 186666020, + "step": 1087 + }, + { + "epoch": 0.2861839942131913, + "grad_norm": 5.57534276048312, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 186834708, + "step": 1088 + }, + { + "epoch": 0.2861839942131913, + "loss": 0.1970679759979248, + "loss_ce": 0.0005652993568219244, + "loss_iou": 0.474609375, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 186834708, + "step": 1088 + }, + { + "epoch": 0.2864470309725784, + "grad_norm": 12.382219859522896, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 187006628, + "step": 1089 + }, + { + "epoch": 0.2864470309725784, + "loss": 0.13300946354866028, + "loss_ce": 0.0005631742533296347, + "loss_iou": 0.55859375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 187006628, + "step": 1089 + }, + { + "epoch": 0.28671006773196556, + "grad_norm": 6.678243304603748, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 187178940, + "step": 1090 + }, + { + "epoch": 0.28671006773196556, + "loss": 0.16839508712291718, + "loss_ce": 0.004485180135816336, + "loss_iou": 0.53125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 187178940, + "step": 1090 + }, + { + "epoch": 0.28697310449135266, + "grad_norm": 17.285977120564166, + "learning_rate": 5e-06, + "loss": 0.1817, + "num_input_tokens_seen": 187351348, + "step": 1091 + }, + { + "epoch": 0.28697310449135266, + "loss": 0.24864555895328522, + "loss_ce": 0.0034063111525028944, + "loss_iou": 0.4921875, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 187351348, + "step": 1091 + }, + { + "epoch": 0.2872361412507398, + "grad_norm": 5.362480990027185, + "learning_rate": 5e-06, + "loss": 0.1333, + "num_input_tokens_seen": 187523476, + "step": 1092 + }, + { + "epoch": 0.2872361412507398, + "loss": 0.19447633624076843, + "loss_ce": 0.005084256641566753, + "loss_iou": 0.70703125, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 187523476, + "step": 1092 + }, + { + "epoch": 0.2874991780101269, + "grad_norm": 9.00142705860072, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 187695596, + "step": 1093 + }, + { + "epoch": 0.2874991780101269, + "loss": 0.13948455452919006, + "loss_ce": 0.0006295705679804087, + "loss_iou": 0.44140625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 187695596, + "step": 1093 + }, + { + "epoch": 0.287762214769514, + "grad_norm": 11.592832787644355, + "learning_rate": 5e-06, + "loss": 0.1673, + "num_input_tokens_seen": 187867844, + "step": 1094 + }, + { + "epoch": 0.287762214769514, + "loss": 0.1788289099931717, + "loss_ce": 0.0009724590927362442, + "loss_iou": 0.5859375, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 187867844, + "step": 1094 + }, + { + "epoch": 0.2880252515289012, + "grad_norm": 5.4984009631677, + "learning_rate": 5e-06, + "loss": 0.1593, + "num_input_tokens_seen": 188039960, + "step": 1095 + }, + { + "epoch": 0.2880252515289012, + "loss": 0.2135852873325348, + "loss_ce": 0.0043262611143291, + "loss_iou": 0.458984375, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 188039960, + "step": 1095 + }, + { + "epoch": 0.2882882882882883, + "grad_norm": 6.039228998020695, + "learning_rate": 5e-06, + "loss": 0.1504, + "num_input_tokens_seen": 188212228, + "step": 1096 + }, + { + "epoch": 0.2882882882882883, + "loss": 0.10800454020500183, + "loss_ce": 0.0013761227019131184, + "loss_iou": 0.63671875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 188212228, + "step": 1096 + }, + { + "epoch": 0.2885513250476754, + "grad_norm": 6.337937600162702, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 188384548, + "step": 1097 + }, + { + "epoch": 0.2885513250476754, + "loss": 0.2018011212348938, + "loss_ce": 0.0031927230302244425, + "loss_iou": 0.40234375, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 188384548, + "step": 1097 + }, + { + "epoch": 0.28881436180706255, + "grad_norm": 6.282757808284281, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 188556612, + "step": 1098 + }, + { + "epoch": 0.28881436180706255, + "loss": 0.07978774607181549, + "loss_ce": 0.0007472233846783638, + "loss_iou": 0.6015625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 188556612, + "step": 1098 + }, + { + "epoch": 0.28907739856644965, + "grad_norm": 5.5362247942440135, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 188727124, + "step": 1099 + }, + { + "epoch": 0.28907739856644965, + "loss": 0.18471282720565796, + "loss_ce": 0.0071005141362547874, + "loss_iou": 0.455078125, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 188727124, + "step": 1099 + }, + { + "epoch": 0.2893404353258368, + "grad_norm": 5.455389129921704, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 188899384, + "step": 1100 + }, + { + "epoch": 0.2893404353258368, + "loss": 0.08721458911895752, + "loss_ce": 0.0006362219573929906, + "loss_iou": 0.63671875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 188899384, + "step": 1100 + }, + { + "epoch": 0.2896034720852239, + "grad_norm": 6.057305949672798, + "learning_rate": 5e-06, + "loss": 0.1669, + "num_input_tokens_seen": 189068024, + "step": 1101 + }, + { + "epoch": 0.2896034720852239, + "loss": 0.1826099157333374, + "loss_ce": 0.00026738218730315566, + "loss_iou": 0.61328125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 189068024, + "step": 1101 + }, + { + "epoch": 0.289866508844611, + "grad_norm": 10.886874649593773, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 189240248, + "step": 1102 + }, + { + "epoch": 0.289866508844611, + "loss": 0.1780916154384613, + "loss_ce": 0.0009065555641427636, + "loss_iou": 0.474609375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 189240248, + "step": 1102 + }, + { + "epoch": 0.2901295456039982, + "grad_norm": 8.357621038563343, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 189412208, + "step": 1103 + }, + { + "epoch": 0.2901295456039982, + "loss": 0.13078002631664276, + "loss_ce": 0.001751709496602416, + "loss_iou": 0.45703125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 189412208, + "step": 1103 + }, + { + "epoch": 0.2903925823633853, + "grad_norm": 13.284219513588615, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 189584480, + "step": 1104 + }, + { + "epoch": 0.2903925823633853, + "loss": 0.23028159141540527, + "loss_ce": 0.003474962431937456, + "loss_iou": 0.62890625, + "loss_num": 0.04541015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 189584480, + "step": 1104 + }, + { + "epoch": 0.29065561912277244, + "grad_norm": 6.716057098151779, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 189755004, + "step": 1105 + }, + { + "epoch": 0.29065561912277244, + "loss": 0.12404203414916992, + "loss_ce": 0.002643106272444129, + "loss_iou": 0.61328125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 189755004, + "step": 1105 + }, + { + "epoch": 0.29091865588215954, + "grad_norm": 13.264130378049543, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 189927084, + "step": 1106 + }, + { + "epoch": 0.29091865588215954, + "loss": 0.10699759423732758, + "loss_ce": 0.0014678104780614376, + "loss_iou": 0.392578125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 189927084, + "step": 1106 + }, + { + "epoch": 0.29118169264154664, + "grad_norm": 19.87883744683546, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 190097456, + "step": 1107 + }, + { + "epoch": 0.29118169264154664, + "loss": 0.19540926814079285, + "loss_ce": 0.0016531546134501696, + "loss_iou": 0.447265625, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 190097456, + "step": 1107 + }, + { + "epoch": 0.2914447294009338, + "grad_norm": 7.462386686254148, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 190269508, + "step": 1108 + }, + { + "epoch": 0.2914447294009338, + "loss": 0.08214541524648666, + "loss_ce": 0.004813872277736664, + "loss_iou": 0.59375, + "loss_num": 0.0155029296875, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 190269508, + "step": 1108 + }, + { + "epoch": 0.2917077661603209, + "grad_norm": 10.674953183668224, + "learning_rate": 5e-06, + "loss": 0.1763, + "num_input_tokens_seen": 190441836, + "step": 1109 + }, + { + "epoch": 0.2917077661603209, + "loss": 0.1546817272901535, + "loss_ce": 0.006824057083576918, + "loss_iou": 0.32421875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 190441836, + "step": 1109 + }, + { + "epoch": 0.291970802919708, + "grad_norm": 6.315839345670245, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 190614052, + "step": 1110 + }, + { + "epoch": 0.291970802919708, + "loss": 0.15810704231262207, + "loss_ce": 0.0053970692679286, + "loss_iou": 0.62109375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 190614052, + "step": 1110 + }, + { + "epoch": 0.29223383967909516, + "grad_norm": 6.217305097644484, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 190786084, + "step": 1111 + }, + { + "epoch": 0.29223383967909516, + "loss": 0.07959192991256714, + "loss_ce": 0.0006734670605510473, + "loss_iou": 0.53125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 190786084, + "step": 1111 + }, + { + "epoch": 0.29249687643848227, + "grad_norm": 5.126517505767914, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 190956588, + "step": 1112 + }, + { + "epoch": 0.29249687643848227, + "loss": 0.13071726262569427, + "loss_ce": 0.003184308996424079, + "loss_iou": 0.6328125, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 190956588, + "step": 1112 + }, + { + "epoch": 0.2927599131978694, + "grad_norm": 8.996821720570772, + "learning_rate": 5e-06, + "loss": 0.1638, + "num_input_tokens_seen": 191126948, + "step": 1113 + }, + { + "epoch": 0.2927599131978694, + "loss": 0.07688204199075699, + "loss_ce": 0.0015036254189908504, + "loss_iou": 0.490234375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 191126948, + "step": 1113 + }, + { + "epoch": 0.29302294995725653, + "grad_norm": 5.558729986145218, + "learning_rate": 5e-06, + "loss": 0.1861, + "num_input_tokens_seen": 191299288, + "step": 1114 + }, + { + "epoch": 0.29302294995725653, + "loss": 0.15988363325595856, + "loss_ce": 0.0014058587839826941, + "loss_iou": 0.5390625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 191299288, + "step": 1114 + }, + { + "epoch": 0.29328598671664363, + "grad_norm": 6.8874530652883825, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 191471672, + "step": 1115 + }, + { + "epoch": 0.29328598671664363, + "loss": 0.09309347718954086, + "loss_ce": 0.0007167698349803686, + "loss_iou": 0.4453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 191471672, + "step": 1115 + }, + { + "epoch": 0.2935490234760308, + "grad_norm": 13.843828225173697, + "learning_rate": 5e-06, + "loss": 0.1446, + "num_input_tokens_seen": 191643552, + "step": 1116 + }, + { + "epoch": 0.2935490234760308, + "loss": 0.08135861903429031, + "loss_ce": 0.00021237613691482693, + "loss_iou": 0.49609375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 191643552, + "step": 1116 + }, + { + "epoch": 0.2938120602354179, + "grad_norm": 5.436785087484111, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 191815984, + "step": 1117 + }, + { + "epoch": 0.2938120602354179, + "loss": 0.10953962057828903, + "loss_ce": 0.0004392758710309863, + "loss_iou": 0.5546875, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 191815984, + "step": 1117 + }, + { + "epoch": 0.294075096994805, + "grad_norm": 11.598374668500005, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 191988036, + "step": 1118 + }, + { + "epoch": 0.294075096994805, + "loss": 0.0961490124464035, + "loss_ce": 0.0009952039690688252, + "loss_iou": 0.65625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 191988036, + "step": 1118 + }, + { + "epoch": 0.29433813375419215, + "grad_norm": 7.498841309335166, + "learning_rate": 5e-06, + "loss": 0.1726, + "num_input_tokens_seen": 192160116, + "step": 1119 + }, + { + "epoch": 0.29433813375419215, + "loss": 0.25876477360725403, + "loss_ce": 0.004339736420661211, + "loss_iou": 0.56640625, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 192160116, + "step": 1119 + }, + { + "epoch": 0.29460117051357926, + "grad_norm": 5.645236594614784, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 192332060, + "step": 1120 + }, + { + "epoch": 0.29460117051357926, + "loss": 0.07815182209014893, + "loss_ce": 0.0011254575802013278, + "loss_iou": 0.6171875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 192332060, + "step": 1120 + }, + { + "epoch": 0.2948642072729664, + "grad_norm": 5.473412167736241, + "learning_rate": 5e-06, + "loss": 0.1499, + "num_input_tokens_seen": 192504064, + "step": 1121 + }, + { + "epoch": 0.2948642072729664, + "loss": 0.19324743747711182, + "loss_ce": 0.0004373906413093209, + "loss_iou": 0.51171875, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 192504064, + "step": 1121 + }, + { + "epoch": 0.2951272440323535, + "grad_norm": 15.77937603528804, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 192676312, + "step": 1122 + }, + { + "epoch": 0.2951272440323535, + "loss": 0.11738383769989014, + "loss_ce": 0.0015696310438215733, + "loss_iou": 0.56640625, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 192676312, + "step": 1122 + }, + { + "epoch": 0.2953902807917406, + "grad_norm": 6.595809745274096, + "learning_rate": 5e-06, + "loss": 0.1605, + "num_input_tokens_seen": 192848608, + "step": 1123 + }, + { + "epoch": 0.2953902807917406, + "loss": 0.1380428671836853, + "loss_ce": 0.0005306481616571546, + "loss_iou": 0.453125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 192848608, + "step": 1123 + }, + { + "epoch": 0.2956533175511278, + "grad_norm": 5.143986978274753, + "learning_rate": 5e-06, + "loss": 0.2009, + "num_input_tokens_seen": 193021044, + "step": 1124 + }, + { + "epoch": 0.2956533175511278, + "loss": 0.2367524653673172, + "loss_ce": 0.00335403298959136, + "loss_iou": 0.796875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 193021044, + "step": 1124 + }, + { + "epoch": 0.2959163543105149, + "grad_norm": 6.676850446443053, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 193193124, + "step": 1125 + }, + { + "epoch": 0.2959163543105149, + "loss": 0.1376284509897232, + "loss_ce": 0.002099899807944894, + "loss_iou": 0.5234375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 193193124, + "step": 1125 + }, + { + "epoch": 0.29617939106990204, + "grad_norm": 6.051359778802083, + "learning_rate": 5e-06, + "loss": 0.168, + "num_input_tokens_seen": 193365612, + "step": 1126 + }, + { + "epoch": 0.29617939106990204, + "loss": 0.11157628893852234, + "loss_ce": 0.001163685112260282, + "loss_iou": 0.640625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 193365612, + "step": 1126 + }, + { + "epoch": 0.29644242782928915, + "grad_norm": 9.91415103149531, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 193537580, + "step": 1127 + }, + { + "epoch": 0.29644242782928915, + "loss": 0.06942566484212875, + "loss_ce": 0.004575815983116627, + "loss_iou": 0.625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 193537580, + "step": 1127 + }, + { + "epoch": 0.29670546458867625, + "grad_norm": 8.171761917591171, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 193709920, + "step": 1128 + }, + { + "epoch": 0.29670546458867625, + "loss": 0.15288731455802917, + "loss_ce": 0.0006046106573194265, + "loss_iou": 0.65234375, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 193709920, + "step": 1128 + }, + { + "epoch": 0.2969685013480634, + "grad_norm": 4.591948701112417, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 193878608, + "step": 1129 + }, + { + "epoch": 0.2969685013480634, + "loss": 0.15515002608299255, + "loss_ce": 0.005400286056101322, + "loss_iou": 0.44140625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 193878608, + "step": 1129 + }, + { + "epoch": 0.2972315381074505, + "grad_norm": 9.7802730005981, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 194051024, + "step": 1130 + }, + { + "epoch": 0.2972315381074505, + "loss": 0.13672733306884766, + "loss_ce": 0.0005579069838859141, + "loss_iou": 0.6328125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 194051024, + "step": 1130 + }, + { + "epoch": 0.2974945748668376, + "grad_norm": 17.109119761118713, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 194223696, + "step": 1131 + }, + { + "epoch": 0.2974945748668376, + "loss": 0.0957454964518547, + "loss_ce": 0.0012325569987297058, + "loss_iou": 0.435546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 194223696, + "step": 1131 + }, + { + "epoch": 0.29775761162622477, + "grad_norm": 4.844704657006506, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 194393332, + "step": 1132 + }, + { + "epoch": 0.29775761162622477, + "loss": 0.08335787057876587, + "loss_ce": 0.000533167680259794, + "loss_iou": 0.70703125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 194393332, + "step": 1132 + }, + { + "epoch": 0.2980206483856119, + "grad_norm": 12.589562992323502, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 194563660, + "step": 1133 + }, + { + "epoch": 0.2980206483856119, + "loss": 0.10209088772535324, + "loss_ce": 0.003366521093994379, + "loss_iou": 0.4453125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 194563660, + "step": 1133 + }, + { + "epoch": 0.29828368514499903, + "grad_norm": 5.818392431333322, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 194735892, + "step": 1134 + }, + { + "epoch": 0.29828368514499903, + "loss": 0.09164222329854965, + "loss_ce": 0.004392467439174652, + "loss_iou": 0.6328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 194735892, + "step": 1134 + }, + { + "epoch": 0.29854672190438614, + "grad_norm": 6.569465101392607, + "learning_rate": 5e-06, + "loss": 0.1288, + "num_input_tokens_seen": 194907844, + "step": 1135 + }, + { + "epoch": 0.29854672190438614, + "loss": 0.12394984811544418, + "loss_ce": 0.0028560941573232412, + "loss_iou": 0.390625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 194907844, + "step": 1135 + }, + { + "epoch": 0.29880975866377324, + "grad_norm": 5.132755422401755, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 195079704, + "step": 1136 + }, + { + "epoch": 0.29880975866377324, + "loss": 0.11163240671157837, + "loss_ce": 0.001341882161796093, + "loss_iou": 0.4921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 195079704, + "step": 1136 + }, + { + "epoch": 0.2990727954231604, + "grad_norm": 12.81251876887425, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 195251904, + "step": 1137 + }, + { + "epoch": 0.2990727954231604, + "loss": 0.18034929037094116, + "loss_ce": 0.0009364524157717824, + "loss_iou": 0.482421875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 195251904, + "step": 1137 + }, + { + "epoch": 0.2993358321825475, + "grad_norm": 6.432718409068677, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 195424180, + "step": 1138 + }, + { + "epoch": 0.2993358321825475, + "loss": 0.08383812010288239, + "loss_ce": 0.0012575555592775345, + "loss_iou": 0.59375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 195424180, + "step": 1138 + }, + { + "epoch": 0.29959886894193466, + "grad_norm": 11.352446611671667, + "learning_rate": 5e-06, + "loss": 0.1314, + "num_input_tokens_seen": 195596468, + "step": 1139 + }, + { + "epoch": 0.29959886894193466, + "loss": 0.1503646820783615, + "loss_ce": 0.0016220146790146828, + "loss_iou": 0.52734375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 195596468, + "step": 1139 + }, + { + "epoch": 0.29986190570132176, + "grad_norm": 7.444433348783749, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 195768360, + "step": 1140 + }, + { + "epoch": 0.29986190570132176, + "loss": 0.12922053039073944, + "loss_ce": 0.0006804917939007282, + "loss_iou": 0.62109375, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 195768360, + "step": 1140 + }, + { + "epoch": 0.30012494246070887, + "grad_norm": 6.813355666039827, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 195940504, + "step": 1141 + }, + { + "epoch": 0.30012494246070887, + "loss": 0.14036604762077332, + "loss_ce": 0.002609711140394211, + "loss_iou": 0.515625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 195940504, + "step": 1141 + }, + { + "epoch": 0.300387979220096, + "grad_norm": 9.605056179900458, + "learning_rate": 5e-06, + "loss": 0.1463, + "num_input_tokens_seen": 196112784, + "step": 1142 + }, + { + "epoch": 0.300387979220096, + "loss": 0.14964430034160614, + "loss_ce": 0.0015119716990739107, + "loss_iou": 0.44140625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 196112784, + "step": 1142 + }, + { + "epoch": 0.3006510159794831, + "grad_norm": 7.338861721143607, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 196285072, + "step": 1143 + }, + { + "epoch": 0.3006510159794831, + "loss": 0.13087643682956696, + "loss_ce": 0.0021532890386879444, + "loss_iou": 0.46875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 196285072, + "step": 1143 + }, + { + "epoch": 0.30091405273887023, + "grad_norm": 11.553065995211972, + "learning_rate": 5e-06, + "loss": 0.1705, + "num_input_tokens_seen": 196457252, + "step": 1144 + }, + { + "epoch": 0.30091405273887023, + "loss": 0.20345042645931244, + "loss_ce": 0.0005085308803245425, + "loss_iou": 0.5390625, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 196457252, + "step": 1144 + }, + { + "epoch": 0.3011770894982574, + "grad_norm": 7.552187403058781, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 196629168, + "step": 1145 + }, + { + "epoch": 0.3011770894982574, + "loss": 0.17339852452278137, + "loss_ce": 0.004331144969910383, + "loss_iou": 0.412109375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 196629168, + "step": 1145 + }, + { + "epoch": 0.3014401262576445, + "grad_norm": 5.762343246857412, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 196801432, + "step": 1146 + }, + { + "epoch": 0.3014401262576445, + "loss": 0.07828192412853241, + "loss_ce": 0.001377625041641295, + "loss_iou": 0.416015625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 196801432, + "step": 1146 + }, + { + "epoch": 0.30170316301703165, + "grad_norm": 5.174420810424456, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 196973908, + "step": 1147 + }, + { + "epoch": 0.30170316301703165, + "loss": 0.22339066863059998, + "loss_ce": 0.004701712634414434, + "loss_iou": 0.53125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 196973908, + "step": 1147 + }, + { + "epoch": 0.30196619977641875, + "grad_norm": 10.875286083758624, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 197146088, + "step": 1148 + }, + { + "epoch": 0.30196619977641875, + "loss": 0.1432519108057022, + "loss_ce": 0.002657424658536911, + "loss_iou": 0.66015625, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 197146088, + "step": 1148 + }, + { + "epoch": 0.30222923653580586, + "grad_norm": 5.705651681197872, + "learning_rate": 5e-06, + "loss": 0.1464, + "num_input_tokens_seen": 197318460, + "step": 1149 + }, + { + "epoch": 0.30222923653580586, + "loss": 0.12248589098453522, + "loss_ce": 0.002002496039494872, + "loss_iou": 0.5234375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 197318460, + "step": 1149 + }, + { + "epoch": 0.302492273295193, + "grad_norm": 6.2074966030457706, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 197490428, + "step": 1150 + }, + { + "epoch": 0.302492273295193, + "loss": 0.1659487783908844, + "loss_ce": 0.0009402353898622096, + "loss_iou": 0.5, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 197490428, + "step": 1150 + }, + { + "epoch": 0.3027553100545801, + "grad_norm": 11.278645202454504, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 197662972, + "step": 1151 + }, + { + "epoch": 0.3027553100545801, + "loss": 0.12700702250003815, + "loss_ce": 0.0012135641882196069, + "loss_iou": 0.58984375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 197662972, + "step": 1151 + }, + { + "epoch": 0.3030183468139673, + "grad_norm": 5.7121477930459665, + "learning_rate": 5e-06, + "loss": 0.1739, + "num_input_tokens_seen": 197835168, + "step": 1152 + }, + { + "epoch": 0.3030183468139673, + "loss": 0.10988777130842209, + "loss_ce": 0.004174881149083376, + "loss_iou": 0.546875, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 197835168, + "step": 1152 + }, + { + "epoch": 0.3032813835733544, + "grad_norm": 12.425343043979453, + "learning_rate": 5e-06, + "loss": 0.1541, + "num_input_tokens_seen": 198007440, + "step": 1153 + }, + { + "epoch": 0.3032813835733544, + "loss": 0.18807393312454224, + "loss_ce": 0.0031679358799010515, + "loss_iou": 0.478515625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 198007440, + "step": 1153 + }, + { + "epoch": 0.3035444203327415, + "grad_norm": 5.925789792578691, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 198179896, + "step": 1154 + }, + { + "epoch": 0.3035444203327415, + "loss": 0.07772859930992126, + "loss_ce": 0.0008548187324777246, + "loss_iou": 0.66015625, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 198179896, + "step": 1154 + }, + { + "epoch": 0.30380745709212864, + "grad_norm": 5.901916078113717, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 198352320, + "step": 1155 + }, + { + "epoch": 0.30380745709212864, + "loss": 0.17940585315227509, + "loss_ce": 0.0011221629101783037, + "loss_iou": 0.6484375, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 198352320, + "step": 1155 + }, + { + "epoch": 0.30407049385151574, + "grad_norm": 10.483911413706299, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 198524512, + "step": 1156 + }, + { + "epoch": 0.30407049385151574, + "loss": 0.12064538896083832, + "loss_ce": 0.0006197594339028001, + "loss_iou": 0.6328125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 198524512, + "step": 1156 + }, + { + "epoch": 0.30433353061090285, + "grad_norm": 4.522315194840346, + "learning_rate": 5e-06, + "loss": 0.1278, + "num_input_tokens_seen": 198696612, + "step": 1157 + }, + { + "epoch": 0.30433353061090285, + "loss": 0.09706555306911469, + "loss_ce": 0.0013929473934695125, + "loss_iou": 0.515625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 198696612, + "step": 1157 + }, + { + "epoch": 0.30459656737029, + "grad_norm": 28.08652601411638, + "learning_rate": 5e-06, + "loss": 0.1261, + "num_input_tokens_seen": 198868856, + "step": 1158 + }, + { + "epoch": 0.30459656737029, + "loss": 0.052179381251335144, + "loss_ce": 0.00020794683950953186, + "loss_iou": 0.53515625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 198868856, + "step": 1158 + }, + { + "epoch": 0.3048596041296771, + "grad_norm": 7.240097871891992, + "learning_rate": 5e-06, + "loss": 0.1803, + "num_input_tokens_seen": 199041064, + "step": 1159 + }, + { + "epoch": 0.3048596041296771, + "loss": 0.18336477875709534, + "loss_ce": 0.0020293283741921186, + "loss_iou": 0.5546875, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 199041064, + "step": 1159 + }, + { + "epoch": 0.30512264088906427, + "grad_norm": 5.628334621930633, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 199213476, + "step": 1160 + }, + { + "epoch": 0.30512264088906427, + "loss": 0.12429136037826538, + "loss_ce": 0.0002068809699267149, + "loss_iou": 0.79296875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 199213476, + "step": 1160 + }, + { + "epoch": 0.30538567764845137, + "grad_norm": 5.043014865834959, + "learning_rate": 5e-06, + "loss": 0.1778, + "num_input_tokens_seen": 199385580, + "step": 1161 + }, + { + "epoch": 0.30538567764845137, + "loss": 0.09477389603853226, + "loss_ce": 0.001115447492338717, + "loss_iou": 0.44140625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 199385580, + "step": 1161 + }, + { + "epoch": 0.3056487144078385, + "grad_norm": 5.156573422162299, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 199556156, + "step": 1162 + }, + { + "epoch": 0.3056487144078385, + "loss": 0.14625222980976105, + "loss_ce": 0.00046976495650596917, + "loss_iou": 0.64453125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 199556156, + "step": 1162 + }, + { + "epoch": 0.30591175116722563, + "grad_norm": 4.702903740874693, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 199728484, + "step": 1163 + }, + { + "epoch": 0.30591175116722563, + "loss": 0.156254380941391, + "loss_ce": 0.001774407341144979, + "loss_iou": 0.5078125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 199728484, + "step": 1163 + }, + { + "epoch": 0.30617478792661273, + "grad_norm": 5.380502082020364, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 199900748, + "step": 1164 + }, + { + "epoch": 0.30617478792661273, + "loss": 0.16096284985542297, + "loss_ce": 0.0019052416319027543, + "loss_iou": 0.7421875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 199900748, + "step": 1164 + }, + { + "epoch": 0.3064378246859999, + "grad_norm": 5.7025282283031595, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 200072488, + "step": 1165 + }, + { + "epoch": 0.3064378246859999, + "loss": 0.17774316668510437, + "loss_ce": 0.00037499924656003714, + "loss_iou": 0.4921875, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 200072488, + "step": 1165 + }, + { + "epoch": 0.306700861445387, + "grad_norm": 5.532941140209234, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 200244696, + "step": 1166 + }, + { + "epoch": 0.306700861445387, + "loss": 0.12291580438613892, + "loss_ce": 0.0016694690566509962, + "loss_iou": 0.703125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 200244696, + "step": 1166 + }, + { + "epoch": 0.3069638982047741, + "grad_norm": 5.5856010120270705, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 200417156, + "step": 1167 + }, + { + "epoch": 0.3069638982047741, + "loss": 0.12629887461662292, + "loss_ce": 0.0023059630766510963, + "loss_iou": 0.5859375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 200417156, + "step": 1167 + }, + { + "epoch": 0.30722693496416126, + "grad_norm": 4.725090769923517, + "learning_rate": 5e-06, + "loss": 0.1267, + "num_input_tokens_seen": 200587780, + "step": 1168 + }, + { + "epoch": 0.30722693496416126, + "loss": 0.1289183497428894, + "loss_ce": 0.0026366179808974266, + "loss_iou": 0.6953125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 200587780, + "step": 1168 + }, + { + "epoch": 0.30748997172354836, + "grad_norm": 4.307353086128186, + "learning_rate": 5e-06, + "loss": 0.1389, + "num_input_tokens_seen": 200759832, + "step": 1169 + }, + { + "epoch": 0.30748997172354836, + "loss": 0.16026800870895386, + "loss_ce": 0.0027362804394215345, + "loss_iou": 0.431640625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 200759832, + "step": 1169 + }, + { + "epoch": 0.30775300848293546, + "grad_norm": 11.530918199945434, + "learning_rate": 5e-06, + "loss": 0.1914, + "num_input_tokens_seen": 200929932, + "step": 1170 + }, + { + "epoch": 0.30775300848293546, + "loss": 0.19007167220115662, + "loss_ce": 0.0034261636901646852, + "loss_iou": 0.51953125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 200929932, + "step": 1170 + }, + { + "epoch": 0.3080160452423226, + "grad_norm": 7.858471531155482, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 201100520, + "step": 1171 + }, + { + "epoch": 0.3080160452423226, + "loss": 0.204483300447464, + "loss_ce": 0.0005648470250889659, + "loss_iou": 0.55859375, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 201100520, + "step": 1171 + }, + { + "epoch": 0.3082790820017097, + "grad_norm": 5.1657164225988, + "learning_rate": 5e-06, + "loss": 0.1676, + "num_input_tokens_seen": 201272808, + "step": 1172 + }, + { + "epoch": 0.3082790820017097, + "loss": 0.12782040238380432, + "loss_ce": 0.006909756921231747, + "loss_iou": 0.67578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 201272808, + "step": 1172 + }, + { + "epoch": 0.3085421187610969, + "grad_norm": 5.7855382812859295, + "learning_rate": 5e-06, + "loss": 0.1718, + "num_input_tokens_seen": 201445044, + "step": 1173 + }, + { + "epoch": 0.3085421187610969, + "loss": 0.22742098569869995, + "loss_ce": 0.0040017981082201, + "loss_iou": 0.42578125, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 201445044, + "step": 1173 + }, + { + "epoch": 0.308805155520484, + "grad_norm": 19.043518960426944, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 201617324, + "step": 1174 + }, + { + "epoch": 0.308805155520484, + "loss": 0.09914430975914001, + "loss_ce": 0.001243914244696498, + "loss_iou": 0.6171875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 201617324, + "step": 1174 + }, + { + "epoch": 0.3090681922798711, + "grad_norm": 7.337870221119852, + "learning_rate": 5e-06, + "loss": 0.1898, + "num_input_tokens_seen": 201789648, + "step": 1175 + }, + { + "epoch": 0.3090681922798711, + "loss": 0.2523178160190582, + "loss_ce": 0.0009750226745381951, + "loss_iou": 0.52734375, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 201789648, + "step": 1175 + }, + { + "epoch": 0.30933122903925825, + "grad_norm": 18.238078934317997, + "learning_rate": 5e-06, + "loss": 0.1987, + "num_input_tokens_seen": 201961724, + "step": 1176 + }, + { + "epoch": 0.30933122903925825, + "loss": 0.17623552680015564, + "loss_ce": 0.003994307480752468, + "loss_iou": 0.396484375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 201961724, + "step": 1176 + }, + { + "epoch": 0.30959426579864535, + "grad_norm": 4.51493162048169, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 202133700, + "step": 1177 + }, + { + "epoch": 0.30959426579864535, + "loss": 0.1072007492184639, + "loss_ce": 0.005058412905782461, + "loss_iou": 0.52734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 202133700, + "step": 1177 + }, + { + "epoch": 0.3098573025580325, + "grad_norm": 5.072583382632679, + "learning_rate": 5e-06, + "loss": 0.1459, + "num_input_tokens_seen": 202304256, + "step": 1178 + }, + { + "epoch": 0.3098573025580325, + "loss": 0.14473965764045715, + "loss_ce": 0.0024361968971788883, + "loss_iou": 0.51953125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 202304256, + "step": 1178 + }, + { + "epoch": 0.3101203393174196, + "grad_norm": 7.387732741099245, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 202476488, + "step": 1179 + }, + { + "epoch": 0.3101203393174196, + "loss": 0.25686854124069214, + "loss_ce": 0.005464731715619564, + "loss_iou": 0.408203125, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 202476488, + "step": 1179 + }, + { + "epoch": 0.3103833760768067, + "grad_norm": 7.564357354372493, + "learning_rate": 5e-06, + "loss": 0.1721, + "num_input_tokens_seen": 202648844, + "step": 1180 + }, + { + "epoch": 0.3103833760768067, + "loss": 0.2612742781639099, + "loss_ce": 0.00047106179408729076, + "loss_iou": 0.546875, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 202648844, + "step": 1180 + }, + { + "epoch": 0.3106464128361939, + "grad_norm": 12.304004570686834, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 202821176, + "step": 1181 + }, + { + "epoch": 0.3106464128361939, + "loss": 0.2418098747730255, + "loss_ce": 0.0016365369083359838, + "loss_iou": 0.453125, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 202821176, + "step": 1181 + }, + { + "epoch": 0.310909449595581, + "grad_norm": 4.510810550512272, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 202993428, + "step": 1182 + }, + { + "epoch": 0.310909449595581, + "loss": 0.058775611221790314, + "loss_ce": 0.0007006583036854863, + "loss_iou": 0.390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 202993428, + "step": 1182 + }, + { + "epoch": 0.3111724863549681, + "grad_norm": 44.4539532395822, + "learning_rate": 5e-06, + "loss": 0.1567, + "num_input_tokens_seen": 203164156, + "step": 1183 + }, + { + "epoch": 0.3111724863549681, + "loss": 0.16502994298934937, + "loss_ce": 0.0006927890353836119, + "loss_iou": 0.55078125, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 203164156, + "step": 1183 + }, + { + "epoch": 0.31143552311435524, + "grad_norm": 7.885064131627441, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 203336328, + "step": 1184 + }, + { + "epoch": 0.31143552311435524, + "loss": 0.07413887977600098, + "loss_ce": 0.00013374855916481465, + "loss_iou": 0.62109375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 203336328, + "step": 1184 + }, + { + "epoch": 0.31169855987374234, + "grad_norm": 5.056026310722222, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 203508700, + "step": 1185 + }, + { + "epoch": 0.31169855987374234, + "loss": 0.07386209070682526, + "loss_ce": 0.0002842045505531132, + "loss_iou": 0.4609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 203508700, + "step": 1185 + }, + { + "epoch": 0.3119615966331295, + "grad_norm": 9.611820717208333, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 203680940, + "step": 1186 + }, + { + "epoch": 0.3119615966331295, + "loss": 0.13154730200767517, + "loss_ce": 0.0037701984401792288, + "loss_iou": 0.59765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 203680940, + "step": 1186 + }, + { + "epoch": 0.3122246333925166, + "grad_norm": 10.99160414493074, + "learning_rate": 5e-06, + "loss": 0.1515, + "num_input_tokens_seen": 203852944, + "step": 1187 + }, + { + "epoch": 0.3122246333925166, + "loss": 0.17437157034873962, + "loss_ce": 0.0008486199658364058, + "loss_iou": 0.53515625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 203852944, + "step": 1187 + }, + { + "epoch": 0.3124876701519037, + "grad_norm": 7.566892022382808, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 204025224, + "step": 1188 + }, + { + "epoch": 0.3124876701519037, + "loss": 0.16989970207214355, + "loss_ce": 0.004982716403901577, + "loss_iou": 0.4765625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 204025224, + "step": 1188 + }, + { + "epoch": 0.31275070691129087, + "grad_norm": 4.336576845925468, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 204197436, + "step": 1189 + }, + { + "epoch": 0.31275070691129087, + "loss": 0.30051514506340027, + "loss_ce": 0.0009546162909828126, + "loss_iou": 0.390625, + "loss_num": 0.06005859375, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 204197436, + "step": 1189 + }, + { + "epoch": 0.31301374367067797, + "grad_norm": 8.328730704134305, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 204369676, + "step": 1190 + }, + { + "epoch": 0.31301374367067797, + "loss": 0.18648235499858856, + "loss_ce": 0.0020035963971167803, + "loss_iou": 0.44921875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 204369676, + "step": 1190 + }, + { + "epoch": 0.3132767804300651, + "grad_norm": 6.248602390066733, + "learning_rate": 5e-06, + "loss": 0.2073, + "num_input_tokens_seen": 204541920, + "step": 1191 + }, + { + "epoch": 0.3132767804300651, + "loss": 0.12927605211734772, + "loss_ce": 0.0012548138620331883, + "loss_iou": 0.51953125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 204541920, + "step": 1191 + }, + { + "epoch": 0.31353981718945223, + "grad_norm": 13.468629090291936, + "learning_rate": 5e-06, + "loss": 0.1227, + "num_input_tokens_seen": 204714352, + "step": 1192 + }, + { + "epoch": 0.31353981718945223, + "loss": 0.13916926085948944, + "loss_ce": 0.002206363482400775, + "loss_iou": 0.64453125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 204714352, + "step": 1192 + }, + { + "epoch": 0.31380285394883933, + "grad_norm": 14.97613941577105, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 204886516, + "step": 1193 + }, + { + "epoch": 0.31380285394883933, + "loss": 0.07034310698509216, + "loss_ce": 0.002716155955567956, + "loss_iou": 0.53515625, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 204886516, + "step": 1193 + }, + { + "epoch": 0.3140658907082265, + "grad_norm": 7.0534203975755085, + "learning_rate": 5e-06, + "loss": 0.1572, + "num_input_tokens_seen": 205058780, + "step": 1194 + }, + { + "epoch": 0.3140658907082265, + "loss": 0.1390216201543808, + "loss_ce": 0.00144837680272758, + "loss_iou": 0.63671875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 205058780, + "step": 1194 + }, + { + "epoch": 0.3143289274676136, + "grad_norm": 8.447198281833328, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 205231072, + "step": 1195 + }, + { + "epoch": 0.3143289274676136, + "loss": 0.13909873366355896, + "loss_ce": 0.00030477988184429705, + "loss_iou": 0.5390625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 205231072, + "step": 1195 + }, + { + "epoch": 0.3145919642270007, + "grad_norm": 11.546456728325778, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 205403380, + "step": 1196 + }, + { + "epoch": 0.3145919642270007, + "loss": 0.21954891085624695, + "loss_ce": 0.0075127785094082355, + "loss_iou": NaN, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 205403380, + "step": 1196 + }, + { + "epoch": 0.31485500098638786, + "grad_norm": 22.283163028622614, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 205575536, + "step": 1197 + }, + { + "epoch": 0.31485500098638786, + "loss": 0.10035932064056396, + "loss_ce": 0.0016349535435438156, + "loss_iou": 0.5703125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 205575536, + "step": 1197 + }, + { + "epoch": 0.31511803774577496, + "grad_norm": 13.092900629924303, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 205747664, + "step": 1198 + }, + { + "epoch": 0.31511803774577496, + "loss": 0.1355750560760498, + "loss_ce": 0.0009009751374833286, + "loss_iou": 0.46484375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 205747664, + "step": 1198 + }, + { + "epoch": 0.3153810745051621, + "grad_norm": 4.560396330595116, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 205919612, + "step": 1199 + }, + { + "epoch": 0.3153810745051621, + "loss": 0.085871621966362, + "loss_ce": 0.0012463756138458848, + "loss_iou": 0.478515625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 205919612, + "step": 1199 + }, + { + "epoch": 0.3156441112645492, + "grad_norm": 7.441372297845623, + "learning_rate": 5e-06, + "loss": 0.1451, + "num_input_tokens_seen": 206092012, + "step": 1200 + }, + { + "epoch": 0.3156441112645492, + "loss": 0.13713139295578003, + "loss_ce": 0.004868209362030029, + "loss_iou": 0.6015625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 206092012, + "step": 1200 + }, + { + "epoch": 0.3159071480239363, + "grad_norm": 5.3666102379931795, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 206262016, + "step": 1201 + }, + { + "epoch": 0.3159071480239363, + "loss": 0.14059683680534363, + "loss_ce": 0.001070460770279169, + "loss_iou": 0.53125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 206262016, + "step": 1201 + }, + { + "epoch": 0.3161701847833235, + "grad_norm": 8.935322327098595, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 206434192, + "step": 1202 + }, + { + "epoch": 0.3161701847833235, + "loss": 0.13417284190654755, + "loss_ce": 0.0015129297971725464, + "loss_iou": 0.671875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 206434192, + "step": 1202 + }, + { + "epoch": 0.3164332215427106, + "grad_norm": 10.825802927092472, + "learning_rate": 5e-06, + "loss": 0.1619, + "num_input_tokens_seen": 206604572, + "step": 1203 + }, + { + "epoch": 0.3164332215427106, + "loss": 0.1651817262172699, + "loss_ce": 0.002462008036673069, + "loss_iou": 0.38671875, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 206604572, + "step": 1203 + }, + { + "epoch": 0.31669625830209774, + "grad_norm": 14.238886956988445, + "learning_rate": 5e-06, + "loss": 0.1368, + "num_input_tokens_seen": 206776604, + "step": 1204 + }, + { + "epoch": 0.31669625830209774, + "loss": 0.1644083857536316, + "loss_ce": 0.0012003772426396608, + "loss_iou": 0.75390625, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 206776604, + "step": 1204 + }, + { + "epoch": 0.31695929506148485, + "grad_norm": 7.214768469241478, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 206948516, + "step": 1205 + }, + { + "epoch": 0.31695929506148485, + "loss": 0.15767714381217957, + "loss_ce": 0.0004200635012239218, + "loss_iou": 0.6015625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 206948516, + "step": 1205 + }, + { + "epoch": 0.31722233182087195, + "grad_norm": 8.494853848101869, + "learning_rate": 5e-06, + "loss": 0.1633, + "num_input_tokens_seen": 207120552, + "step": 1206 + }, + { + "epoch": 0.31722233182087195, + "loss": 0.20658773183822632, + "loss_ce": 0.007216397672891617, + "loss_iou": 0.3671875, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 207120552, + "step": 1206 + }, + { + "epoch": 0.3174853685802591, + "grad_norm": 8.328419666401985, + "learning_rate": 5e-06, + "loss": 0.1285, + "num_input_tokens_seen": 207292524, + "step": 1207 + }, + { + "epoch": 0.3174853685802591, + "loss": 0.1329333782196045, + "loss_ce": 0.0009753549238666892, + "loss_iou": 0.55078125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 207292524, + "step": 1207 + }, + { + "epoch": 0.3177484053396462, + "grad_norm": 10.047313768362223, + "learning_rate": 5e-06, + "loss": 0.1291, + "num_input_tokens_seen": 207464680, + "step": 1208 + }, + { + "epoch": 0.3177484053396462, + "loss": 0.12349092215299606, + "loss_ce": 0.0011764641385525465, + "loss_iou": 0.515625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 207464680, + "step": 1208 + }, + { + "epoch": 0.3180114420990333, + "grad_norm": 12.242484724411801, + "learning_rate": 5e-06, + "loss": 0.1693, + "num_input_tokens_seen": 207634788, + "step": 1209 + }, + { + "epoch": 0.3180114420990333, + "loss": 0.21163830161094666, + "loss_ce": 0.0032947922591120005, + "loss_iou": 0.54296875, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 207634788, + "step": 1209 + }, + { + "epoch": 0.3182744788584205, + "grad_norm": 5.397067809441159, + "learning_rate": 5e-06, + "loss": 0.1287, + "num_input_tokens_seen": 207806604, + "step": 1210 + }, + { + "epoch": 0.3182744788584205, + "loss": 0.14931055903434753, + "loss_ce": 0.00026271765818819404, + "loss_iou": 0.5859375, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 207806604, + "step": 1210 + }, + { + "epoch": 0.3185375156178076, + "grad_norm": 5.849344287815652, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 207979236, + "step": 1211 + }, + { + "epoch": 0.3185375156178076, + "loss": 0.16523879766464233, + "loss_ce": 0.000810077937785536, + "loss_iou": 0.65234375, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 207979236, + "step": 1211 + }, + { + "epoch": 0.31880055237719473, + "grad_norm": 6.099253346488696, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 208151368, + "step": 1212 + }, + { + "epoch": 0.31880055237719473, + "loss": 0.12017640471458435, + "loss_ce": 0.00015077157877385616, + "loss_iou": 0.6171875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 208151368, + "step": 1212 + }, + { + "epoch": 0.31906358913658184, + "grad_norm": 6.749805208741857, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 208323600, + "step": 1213 + }, + { + "epoch": 0.31906358913658184, + "loss": 0.1262407749891281, + "loss_ce": 0.0037432105746120214, + "loss_iou": 0.4765625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 208323600, + "step": 1213 + }, + { + "epoch": 0.31932662589596894, + "grad_norm": 7.672226851345336, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 208495836, + "step": 1214 + }, + { + "epoch": 0.31932662589596894, + "loss": 0.09719178080558777, + "loss_ce": 0.0004205434233881533, + "loss_iou": 0.5625, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 208495836, + "step": 1214 + }, + { + "epoch": 0.3195896626553561, + "grad_norm": 7.992027772652848, + "learning_rate": 5e-06, + "loss": 0.1721, + "num_input_tokens_seen": 208667840, + "step": 1215 + }, + { + "epoch": 0.3195896626553561, + "loss": 0.1424046754837036, + "loss_ce": 0.00037586723919957876, + "loss_iou": 0.46484375, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 208667840, + "step": 1215 + }, + { + "epoch": 0.3198526994147432, + "grad_norm": 6.160546703869267, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 208840040, + "step": 1216 + }, + { + "epoch": 0.3198526994147432, + "loss": 0.11277418583631516, + "loss_ce": 0.0004084610554855317, + "loss_iou": 0.48046875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 208840040, + "step": 1216 + }, + { + "epoch": 0.32011573617413036, + "grad_norm": 14.924491605710916, + "learning_rate": 5e-06, + "loss": 0.1657, + "num_input_tokens_seen": 209010176, + "step": 1217 + }, + { + "epoch": 0.32011573617413036, + "loss": 0.1102285385131836, + "loss_ce": 0.0007619824027642608, + "loss_iou": 0.51171875, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 209010176, + "step": 1217 + }, + { + "epoch": 0.32037877293351746, + "grad_norm": 13.936435144745488, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 209182304, + "step": 1218 + }, + { + "epoch": 0.32037877293351746, + "loss": 0.12400620430707932, + "loss_ce": 0.0005168293137103319, + "loss_iou": 0.53515625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 209182304, + "step": 1218 + }, + { + "epoch": 0.32064180969290457, + "grad_norm": 5.31537857924368, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 209354400, + "step": 1219 + }, + { + "epoch": 0.32064180969290457, + "loss": 0.15276062488555908, + "loss_ce": 0.0029498354997485876, + "loss_iou": 0.50390625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 209354400, + "step": 1219 + }, + { + "epoch": 0.3209048464522917, + "grad_norm": 8.044699509860337, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 209526592, + "step": 1220 + }, + { + "epoch": 0.3209048464522917, + "loss": 0.13095101714134216, + "loss_ce": 0.00137336365878582, + "loss_iou": 0.61328125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 209526592, + "step": 1220 + }, + { + "epoch": 0.32116788321167883, + "grad_norm": 6.178496640552883, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 209699004, + "step": 1221 + }, + { + "epoch": 0.32116788321167883, + "loss": 0.11922457814216614, + "loss_ce": 0.00017550383927300572, + "loss_iou": 0.625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 209699004, + "step": 1221 + }, + { + "epoch": 0.32143091997106593, + "grad_norm": 5.682094913930693, + "learning_rate": 5e-06, + "loss": 0.1394, + "num_input_tokens_seen": 209871116, + "step": 1222 + }, + { + "epoch": 0.32143091997106593, + "loss": 0.14516758918762207, + "loss_ce": 0.004481561481952667, + "loss_iou": 0.328125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 209871116, + "step": 1222 + }, + { + "epoch": 0.3216939567304531, + "grad_norm": 7.037588469004313, + "learning_rate": 5e-06, + "loss": 0.1381, + "num_input_tokens_seen": 210041224, + "step": 1223 + }, + { + "epoch": 0.3216939567304531, + "loss": 0.1431758999824524, + "loss_ce": 0.002764530945569277, + "loss_iou": 0.54296875, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 210041224, + "step": 1223 + }, + { + "epoch": 0.3219569934898402, + "grad_norm": 23.622534135841367, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 210211380, + "step": 1224 + }, + { + "epoch": 0.3219569934898402, + "loss": 0.1730581820011139, + "loss_ce": 0.0024649298284202814, + "loss_iou": 0.69921875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 210211380, + "step": 1224 + }, + { + "epoch": 0.32222003024922735, + "grad_norm": 12.04380926493331, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 210383088, + "step": 1225 + }, + { + "epoch": 0.32222003024922735, + "loss": 0.23281848430633545, + "loss_ce": 0.00115956028457731, + "loss_iou": 0.4296875, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 210383088, + "step": 1225 + }, + { + "epoch": 0.32248306700861445, + "grad_norm": 4.756101930631895, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 210555072, + "step": 1226 + }, + { + "epoch": 0.32248306700861445, + "loss": 0.06910556554794312, + "loss_ce": 0.003004492959007621, + "loss_iou": 0.5859375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 210555072, + "step": 1226 + }, + { + "epoch": 0.32274610376800156, + "grad_norm": 3.8368982745320284, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 210727236, + "step": 1227 + }, + { + "epoch": 0.32274610376800156, + "loss": 0.10896629840135574, + "loss_ce": 0.0005983852897770703, + "loss_iou": 0.54296875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 210727236, + "step": 1227 + }, + { + "epoch": 0.3230091405273887, + "grad_norm": 5.290425948494437, + "learning_rate": 5e-06, + "loss": 0.1453, + "num_input_tokens_seen": 210899444, + "step": 1228 + }, + { + "epoch": 0.3230091405273887, + "loss": 0.12627889215946198, + "loss_ce": 8.869695011526346e-05, + "loss_iou": 0.671875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 210899444, + "step": 1228 + }, + { + "epoch": 0.3232721772867758, + "grad_norm": 5.380666634187743, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 211071748, + "step": 1229 + }, + { + "epoch": 0.3232721772867758, + "loss": 0.16187983751296997, + "loss_ce": 0.003463074564933777, + "loss_iou": 0.5234375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 211071748, + "step": 1229 + }, + { + "epoch": 0.323535214046163, + "grad_norm": 6.929606907390503, + "learning_rate": 5e-06, + "loss": 0.1638, + "num_input_tokens_seen": 211243884, + "step": 1230 + }, + { + "epoch": 0.323535214046163, + "loss": 0.16732358932495117, + "loss_ce": 0.003932466730475426, + "loss_iou": 0.76171875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 211243884, + "step": 1230 + }, + { + "epoch": 0.3237982508055501, + "grad_norm": 4.103469407229988, + "learning_rate": 5e-06, + "loss": 0.1433, + "num_input_tokens_seen": 211416144, + "step": 1231 + }, + { + "epoch": 0.3237982508055501, + "loss": 0.16871199011802673, + "loss_ce": 0.0031236184295266867, + "loss_iou": 0.54296875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 211416144, + "step": 1231 + }, + { + "epoch": 0.3240612875649372, + "grad_norm": 4.57933905462206, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 211588296, + "step": 1232 + }, + { + "epoch": 0.3240612875649372, + "loss": 0.16614994406700134, + "loss_ce": 0.0009277852368541062, + "loss_iou": 0.62109375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 211588296, + "step": 1232 + }, + { + "epoch": 0.32432432432432434, + "grad_norm": 4.940862588040882, + "learning_rate": 5e-06, + "loss": 0.1402, + "num_input_tokens_seen": 211758972, + "step": 1233 + }, + { + "epoch": 0.32432432432432434, + "loss": 0.14131012558937073, + "loss_ce": 0.0015701348893344402, + "loss_iou": 0.482421875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 211758972, + "step": 1233 + }, + { + "epoch": 0.32458736108371145, + "grad_norm": 11.781672019450719, + "learning_rate": 5e-06, + "loss": 0.1631, + "num_input_tokens_seen": 211928944, + "step": 1234 + }, + { + "epoch": 0.32458736108371145, + "loss": 0.14883792400360107, + "loss_ce": 0.0010107720736414194, + "loss_iou": 0.6875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 211928944, + "step": 1234 + }, + { + "epoch": 0.32485039784309855, + "grad_norm": 19.10070261855633, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 212100832, + "step": 1235 + }, + { + "epoch": 0.32485039784309855, + "loss": 0.07846543192863464, + "loss_ce": 0.0002793997118715197, + "loss_iou": 0.6328125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 212100832, + "step": 1235 + }, + { + "epoch": 0.3251134346024857, + "grad_norm": 15.821804140128867, + "learning_rate": 5e-06, + "loss": 0.1606, + "num_input_tokens_seen": 212271444, + "step": 1236 + }, + { + "epoch": 0.3251134346024857, + "loss": 0.1032799631357193, + "loss_ce": 0.004799742251634598, + "loss_iou": 0.5546875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 212271444, + "step": 1236 + }, + { + "epoch": 0.3253764713618728, + "grad_norm": 5.984115475857608, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 212443976, + "step": 1237 + }, + { + "epoch": 0.3253764713618728, + "loss": 0.06768647581338882, + "loss_ce": 0.0013717777328565717, + "loss_iou": 0.6796875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 212443976, + "step": 1237 + }, + { + "epoch": 0.32563950812125997, + "grad_norm": 7.104938203697124, + "learning_rate": 5e-06, + "loss": 0.1686, + "num_input_tokens_seen": 212612720, + "step": 1238 + }, + { + "epoch": 0.32563950812125997, + "loss": 0.10599420964717865, + "loss_ce": 0.002387029118835926, + "loss_iou": 0.68359375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 212612720, + "step": 1238 + }, + { + "epoch": 0.32590254488064707, + "grad_norm": 5.6715016587403655, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 212784924, + "step": 1239 + }, + { + "epoch": 0.32590254488064707, + "loss": 0.07988797873258591, + "loss_ce": 0.00020658349967561662, + "loss_iou": 0.5078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 212784924, + "step": 1239 + }, + { + "epoch": 0.3261655816400342, + "grad_norm": 10.066783726153659, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 212957204, + "step": 1240 + }, + { + "epoch": 0.3261655816400342, + "loss": 0.24682722985744476, + "loss_ce": 0.0004893409786745906, + "loss_iou": 0.46484375, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 212957204, + "step": 1240 + }, + { + "epoch": 0.32642861839942133, + "grad_norm": 6.605871983356305, + "learning_rate": 5e-06, + "loss": 0.1476, + "num_input_tokens_seen": 213129636, + "step": 1241 + }, + { + "epoch": 0.32642861839942133, + "loss": 0.14497268199920654, + "loss_ce": 0.008375998586416245, + "loss_iou": 0.55859375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 213129636, + "step": 1241 + }, + { + "epoch": 0.32669165515880844, + "grad_norm": 6.65301903793999, + "learning_rate": 5e-06, + "loss": 0.1883, + "num_input_tokens_seen": 213302188, + "step": 1242 + }, + { + "epoch": 0.32669165515880844, + "loss": 0.25356027483940125, + "loss_ce": 0.002705805469304323, + "loss_iou": 0.71875, + "loss_num": 0.05029296875, + "loss_xval": 0.25, + "num_input_tokens_seen": 213302188, + "step": 1242 + }, + { + "epoch": 0.3269546919181956, + "grad_norm": 6.148620904940904, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 213474112, + "step": 1243 + }, + { + "epoch": 0.3269546919181956, + "loss": 0.10272787511348724, + "loss_ce": 0.00427817041054368, + "loss_iou": 0.57421875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 213474112, + "step": 1243 + }, + { + "epoch": 0.3272177286775827, + "grad_norm": 8.559691577151979, + "learning_rate": 5e-06, + "loss": 0.214, + "num_input_tokens_seen": 213646008, + "step": 1244 + }, + { + "epoch": 0.3272177286775827, + "loss": 0.2430175393819809, + "loss_ce": 0.0016845206264406443, + "loss_iou": 0.5625, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 213646008, + "step": 1244 + }, + { + "epoch": 0.3274807654369698, + "grad_norm": 4.550203288561425, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 213816528, + "step": 1245 + }, + { + "epoch": 0.3274807654369698, + "loss": 0.10727906972169876, + "loss_ce": 0.001383074326440692, + "loss_iou": 0.59765625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 213816528, + "step": 1245 + }, + { + "epoch": 0.32774380219635696, + "grad_norm": 8.680127486966335, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 213988932, + "step": 1246 + }, + { + "epoch": 0.32774380219635696, + "loss": 0.16796328127384186, + "loss_ce": 0.0009405763121321797, + "loss_iou": 0.470703125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 213988932, + "step": 1246 + }, + { + "epoch": 0.32800683895574406, + "grad_norm": 9.402943236610666, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 214161184, + "step": 1247 + }, + { + "epoch": 0.32800683895574406, + "loss": 0.18202053010463715, + "loss_ce": 0.008070331066846848, + "loss_iou": 0.5859375, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 214161184, + "step": 1247 + }, + { + "epoch": 0.32826987571513117, + "grad_norm": 8.949806900643816, + "learning_rate": 5e-06, + "loss": 0.1733, + "num_input_tokens_seen": 214333220, + "step": 1248 + }, + { + "epoch": 0.32826987571513117, + "loss": 0.18508628010749817, + "loss_ce": 0.001980806002393365, + "loss_iou": 0.67578125, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 214333220, + "step": 1248 + }, + { + "epoch": 0.3285329124745183, + "grad_norm": 9.623428359724258, + "learning_rate": 5e-06, + "loss": 0.1899, + "num_input_tokens_seen": 214501988, + "step": 1249 + }, + { + "epoch": 0.3285329124745183, + "loss": 0.18457330763339996, + "loss_ce": 0.002108700107783079, + "loss_iou": 0.46875, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 214501988, + "step": 1249 + }, + { + "epoch": 0.3287959492339054, + "grad_norm": 5.549112096762312, + "learning_rate": 5e-06, + "loss": 0.1748, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_websight_new_CIoU": 0.8321745097637177, + "eval_websight_new_GIoU": 0.8282029330730438, + "eval_websight_new_IoU": 0.8425185084342957, + "eval_websight_new_MAE_all": 0.030896120704710484, + "eval_websight_new_MAE_h": 0.020350518636405468, + "eval_websight_new_MAE_w": 0.041066043078899384, + "eval_websight_new_MAE_x": 0.04287236928939819, + "eval_websight_new_MAE_y": 0.019295550882816315, + "eval_websight_new_NUM_probability": 0.9998577535152435, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.13457995653152466, + "eval_websight_new_loss_ce": 2.8165732146590017e-05, + "eval_websight_new_loss_iou": 0.39599609375, + "eval_websight_new_loss_num": 0.022901535034179688, + "eval_websight_new_loss_xval": 0.1145477294921875, + "eval_websight_new_runtime": 54.6461, + "eval_websight_new_samples_per_second": 0.915, + "eval_websight_new_steps_per_second": 0.037, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_seeclick_CIoU": 0.5724749565124512, + "eval_seeclick_GIoU": 0.5666466653347015, + "eval_seeclick_IoU": 0.5994701087474823, + "eval_seeclick_MAE_all": 0.054681919515132904, + "eval_seeclick_MAE_h": 0.03272205591201782, + "eval_seeclick_MAE_w": 0.07547581382095814, + "eval_seeclick_MAE_x": 0.07945681735873222, + "eval_seeclick_MAE_y": 0.031073003076016903, + "eval_seeclick_NUM_probability": 0.9999328255653381, + "eval_seeclick_inside_bbox": 0.8465909063816071, + "eval_seeclick_loss": 0.2362295389175415, + "eval_seeclick_loss_ce": 0.008988222572952509, + "eval_seeclick_loss_iou": 0.5384521484375, + "eval_seeclick_loss_num": 0.04405975341796875, + "eval_seeclick_loss_xval": 0.22039794921875, + "eval_seeclick_runtime": 77.1136, + "eval_seeclick_samples_per_second": 0.558, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_icons_CIoU": 0.8087750673294067, + "eval_icons_GIoU": 0.7993076145648956, + "eval_icons_IoU": 0.8191726505756378, + "eval_icons_MAE_all": 0.02884785458445549, + "eval_icons_MAE_h": 0.033479243516922, + "eval_icons_MAE_w": 0.024947408586740494, + "eval_icons_MAE_x": 0.02466664183884859, + "eval_icons_MAE_y": 0.0322981309145689, + "eval_icons_NUM_probability": 0.9998048841953278, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.08432088792324066, + "eval_icons_loss_ce": 7.355650814133696e-05, + "eval_icons_loss_iou": 0.567626953125, + "eval_icons_loss_num": 0.015069961547851562, + "eval_icons_loss_xval": 0.0753631591796875, + "eval_icons_runtime": 78.2995, + "eval_icons_samples_per_second": 0.639, + "eval_icons_steps_per_second": 0.026, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "eval_screenspot_CIoU": 0.5492339730262756, + "eval_screenspot_GIoU": 0.5352775057156881, + "eval_screenspot_IoU": 0.5879749854405721, + "eval_screenspot_MAE_all": 0.0870671272277832, + "eval_screenspot_MAE_h": 0.051297743494311966, + "eval_screenspot_MAE_w": 0.14652628699938455, + "eval_screenspot_MAE_x": 0.10099601248900096, + "eval_screenspot_MAE_y": 0.04944847462077936, + "eval_screenspot_NUM_probability": 0.999727189540863, + "eval_screenspot_inside_bbox": 0.8529166579246521, + "eval_screenspot_loss": 0.8003170490264893, + "eval_screenspot_loss_ce": 0.4581688741842906, + "eval_screenspot_loss_iou": 0.4616292317708333, + "eval_screenspot_loss_num": 0.06711324055989583, + "eval_screenspot_loss_xval": 0.3355305989583333, + "eval_screenspot_runtime": 146.7221, + "eval_screenspot_samples_per_second": 0.607, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3287959492339054, + "loss": 0.7908815741539001, + "loss_ce": 0.44920679926872253, + "loss_iou": 0.390625, + "loss_num": 0.068359375, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 214672212, + "step": 1250 + }, + { + "epoch": 0.3290589859932926, + "grad_norm": 5.65145087220912, + "learning_rate": 5e-06, + "loss": 0.1612, + "num_input_tokens_seen": 214840640, + "step": 1251 + }, + { + "epoch": 0.3290589859932926, + "loss": 0.17782355844974518, + "loss_ce": 0.0008826321572996676, + "loss_iou": 0.458984375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 214840640, + "step": 1251 + }, + { + "epoch": 0.3293220227526797, + "grad_norm": 7.22202274640801, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 215011212, + "step": 1252 + }, + { + "epoch": 0.3293220227526797, + "loss": 0.13640594482421875, + "loss_ce": 0.0011520386906340718, + "loss_iou": 0.5859375, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 215011212, + "step": 1252 + }, + { + "epoch": 0.3295850595120668, + "grad_norm": 5.1577086011143685, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 215183424, + "step": 1253 + }, + { + "epoch": 0.3295850595120668, + "loss": 0.12353098392486572, + "loss_ce": 0.0006061755702830851, + "loss_iou": 0.5703125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 215183424, + "step": 1253 + }, + { + "epoch": 0.32984809627145395, + "grad_norm": 7.368992653841811, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 215355224, + "step": 1254 + }, + { + "epoch": 0.32984809627145395, + "loss": 0.15012162923812866, + "loss_ce": 0.0011348105035722256, + "loss_iou": 0.51171875, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 215355224, + "step": 1254 + }, + { + "epoch": 0.33011113303084105, + "grad_norm": 5.485255366846693, + "learning_rate": 5e-06, + "loss": 0.1751, + "num_input_tokens_seen": 215525468, + "step": 1255 + }, + { + "epoch": 0.33011113303084105, + "loss": 0.14393854141235352, + "loss_ce": 0.0008110922062769532, + "loss_iou": 0.4765625, + "loss_num": 0.028564453125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 215525468, + "step": 1255 + }, + { + "epoch": 0.3303741697902282, + "grad_norm": 5.150585875058386, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 215697664, + "step": 1256 + }, + { + "epoch": 0.3303741697902282, + "loss": 0.1360258162021637, + "loss_ce": 0.0006498318398371339, + "loss_iou": 0.6875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 215697664, + "step": 1256 + }, + { + "epoch": 0.3306372065496153, + "grad_norm": 4.902248025801288, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 215869772, + "step": 1257 + }, + { + "epoch": 0.3306372065496153, + "loss": 0.08430365473031998, + "loss_ce": 0.0006854891544207931, + "loss_iou": 0.671875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 215869772, + "step": 1257 + }, + { + "epoch": 0.3309002433090024, + "grad_norm": 8.067058733106478, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 216041972, + "step": 1258 + }, + { + "epoch": 0.3309002433090024, + "loss": 0.12436328083276749, + "loss_ce": 0.0012248535640537739, + "loss_iou": 0.66015625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 216041972, + "step": 1258 + }, + { + "epoch": 0.3311632800683896, + "grad_norm": 10.281119628319207, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 216213968, + "step": 1259 + }, + { + "epoch": 0.3311632800683896, + "loss": 0.1294836401939392, + "loss_ce": 0.002225350122898817, + "loss_iou": 0.490234375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 216213968, + "step": 1259 + }, + { + "epoch": 0.3314263168277767, + "grad_norm": 9.188895459960923, + "learning_rate": 5e-06, + "loss": 0.1358, + "num_input_tokens_seen": 216384484, + "step": 1260 + }, + { + "epoch": 0.3314263168277767, + "loss": 0.17113396525382996, + "loss_ce": 0.0021886550821363926, + "loss_iou": 0.5234375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 216384484, + "step": 1260 + }, + { + "epoch": 0.3316893535871638, + "grad_norm": 9.858408287132631, + "learning_rate": 5e-06, + "loss": 0.1691, + "num_input_tokens_seen": 216556712, + "step": 1261 + }, + { + "epoch": 0.3316893535871638, + "loss": 0.09802193194627762, + "loss_ce": 0.004027791786938906, + "loss_iou": 0.53125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 216556712, + "step": 1261 + }, + { + "epoch": 0.33195239034655094, + "grad_norm": 13.43147706837513, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 216727532, + "step": 1262 + }, + { + "epoch": 0.33195239034655094, + "loss": 0.12088888883590698, + "loss_ce": 0.001778791076503694, + "loss_iou": NaN, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 216727532, + "step": 1262 + }, + { + "epoch": 0.33221542710593804, + "grad_norm": 22.307350229032924, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 216899424, + "step": 1263 + }, + { + "epoch": 0.33221542710593804, + "loss": 0.13408984243869781, + "loss_ce": 0.00048388654249720275, + "loss_iou": 0.5546875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 216899424, + "step": 1263 + }, + { + "epoch": 0.3324784638653252, + "grad_norm": 12.430687637818028, + "learning_rate": 5e-06, + "loss": 0.1269, + "num_input_tokens_seen": 217071512, + "step": 1264 + }, + { + "epoch": 0.3324784638653252, + "loss": 0.12219381332397461, + "loss_ce": 0.0014357574982568622, + "loss_iou": 0.72265625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 217071512, + "step": 1264 + }, + { + "epoch": 0.3327415006247123, + "grad_norm": 5.892819252370443, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 217241728, + "step": 1265 + }, + { + "epoch": 0.3327415006247123, + "loss": 0.07451170682907104, + "loss_ce": 0.0026122929994016886, + "loss_iou": 0.5234375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 217241728, + "step": 1265 + }, + { + "epoch": 0.3330045373840994, + "grad_norm": 7.045469734572638, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 217413824, + "step": 1266 + }, + { + "epoch": 0.3330045373840994, + "loss": 0.19595244526863098, + "loss_ce": 0.0024709957651793957, + "loss_iou": 0.5234375, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 217413824, + "step": 1266 + }, + { + "epoch": 0.33326757414348657, + "grad_norm": 10.385925957202762, + "learning_rate": 5e-06, + "loss": 0.1572, + "num_input_tokens_seen": 217585864, + "step": 1267 + }, + { + "epoch": 0.33326757414348657, + "loss": 0.14605620503425598, + "loss_ce": 0.0005483938148245215, + "loss_iou": 0.57421875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 217585864, + "step": 1267 + }, + { + "epoch": 0.33353061090287367, + "grad_norm": 6.514979263584694, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 217757720, + "step": 1268 + }, + { + "epoch": 0.33353061090287367, + "loss": 0.1336522251367569, + "loss_ce": 0.000595581834204495, + "loss_iou": 0.66015625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 217757720, + "step": 1268 + }, + { + "epoch": 0.33379364766226083, + "grad_norm": 7.782042489694296, + "learning_rate": 5e-06, + "loss": 0.1537, + "num_input_tokens_seen": 217929844, + "step": 1269 + }, + { + "epoch": 0.33379364766226083, + "loss": 0.14830312132835388, + "loss_ce": 0.0005370010621845722, + "loss_iou": 0.41796875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 217929844, + "step": 1269 + }, + { + "epoch": 0.33405668442164793, + "grad_norm": 37.57598415614969, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 218102180, + "step": 1270 + }, + { + "epoch": 0.33405668442164793, + "loss": 0.125936821103096, + "loss_ce": 0.000570611678995192, + "loss_iou": 0.77734375, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 218102180, + "step": 1270 + }, + { + "epoch": 0.33431972118103503, + "grad_norm": 11.611735941773203, + "learning_rate": 5e-06, + "loss": 0.1685, + "num_input_tokens_seen": 218272528, + "step": 1271 + }, + { + "epoch": 0.33431972118103503, + "loss": 0.15356029570102692, + "loss_ce": 0.0005146406474523246, + "loss_iou": 0.51171875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 218272528, + "step": 1271 + }, + { + "epoch": 0.3345827579404222, + "grad_norm": 8.349189669659024, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 218445008, + "step": 1272 + }, + { + "epoch": 0.3345827579404222, + "loss": 0.14823183417320251, + "loss_ce": 0.0009540055179968476, + "loss_iou": 0.5390625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 218445008, + "step": 1272 + }, + { + "epoch": 0.3348457946998093, + "grad_norm": 8.794519146282962, + "learning_rate": 5e-06, + "loss": 0.1529, + "num_input_tokens_seen": 218617480, + "step": 1273 + }, + { + "epoch": 0.3348457946998093, + "loss": 0.10918127745389938, + "loss_ce": 0.0011185340117663145, + "loss_iou": 0.625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 218617480, + "step": 1273 + }, + { + "epoch": 0.3351088314591964, + "grad_norm": 5.0624867047638045, + "learning_rate": 5e-06, + "loss": 0.1463, + "num_input_tokens_seen": 218789900, + "step": 1274 + }, + { + "epoch": 0.3351088314591964, + "loss": 0.13429242372512817, + "loss_ce": 0.00411969143897295, + "loss_iou": 0.369140625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 218789900, + "step": 1274 + }, + { + "epoch": 0.33537186821858356, + "grad_norm": 5.751729189048832, + "learning_rate": 5e-06, + "loss": 0.1612, + "num_input_tokens_seen": 218962156, + "step": 1275 + }, + { + "epoch": 0.33537186821858356, + "loss": 0.11978072673082352, + "loss_ce": 0.00350876129232347, + "loss_iou": 0.453125, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 218962156, + "step": 1275 + }, + { + "epoch": 0.33563490497797066, + "grad_norm": 6.311955311958974, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 219132288, + "step": 1276 + }, + { + "epoch": 0.33563490497797066, + "loss": 0.1355704814195633, + "loss_ce": 0.0025443662889301777, + "loss_iou": 0.46484375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 219132288, + "step": 1276 + }, + { + "epoch": 0.3358979417373578, + "grad_norm": 7.099991467306234, + "learning_rate": 5e-06, + "loss": 0.1531, + "num_input_tokens_seen": 219304616, + "step": 1277 + }, + { + "epoch": 0.3358979417373578, + "loss": 0.13075271248817444, + "loss_ce": 0.0017243996262550354, + "loss_iou": 0.55078125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 219304616, + "step": 1277 + }, + { + "epoch": 0.3361609784967449, + "grad_norm": 7.34332951683246, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 219476876, + "step": 1278 + }, + { + "epoch": 0.3361609784967449, + "loss": 0.08354561030864716, + "loss_ce": 0.0010565929114818573, + "loss_iou": 0.62890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 219476876, + "step": 1278 + }, + { + "epoch": 0.336424015256132, + "grad_norm": 20.864457223612828, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 219649440, + "step": 1279 + }, + { + "epoch": 0.336424015256132, + "loss": 0.14009949564933777, + "loss_ce": 0.00029846589313820004, + "loss_iou": 0.51953125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 219649440, + "step": 1279 + }, + { + "epoch": 0.3366870520155192, + "grad_norm": 6.27874029788255, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 219821712, + "step": 1280 + }, + { + "epoch": 0.3366870520155192, + "loss": 0.14031162858009338, + "loss_ce": 0.0007242212886922061, + "loss_iou": 0.640625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 219821712, + "step": 1280 + }, + { + "epoch": 0.3369500887749063, + "grad_norm": 4.5668271174612025, + "learning_rate": 5e-06, + "loss": 0.1628, + "num_input_tokens_seen": 219993864, + "step": 1281 + }, + { + "epoch": 0.3369500887749063, + "loss": 0.10837851464748383, + "loss_ce": 0.0014449162408709526, + "loss_iou": 0.40234375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 219993864, + "step": 1281 + }, + { + "epoch": 0.3372131255342934, + "grad_norm": 4.389742759586719, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 220166248, + "step": 1282 + }, + { + "epoch": 0.3372131255342934, + "loss": 0.23685070872306824, + "loss_ce": 0.0005836054333485663, + "loss_iou": 0.54296875, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 220166248, + "step": 1282 + }, + { + "epoch": 0.33747616229368055, + "grad_norm": 5.570317525274513, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 220338428, + "step": 1283 + }, + { + "epoch": 0.33747616229368055, + "loss": 0.13890241086483002, + "loss_ce": 0.0007493281736969948, + "loss_iou": 0.44921875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 220338428, + "step": 1283 + }, + { + "epoch": 0.33773919905306765, + "grad_norm": 10.985772181850571, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 220510776, + "step": 1284 + }, + { + "epoch": 0.33773919905306765, + "loss": 0.12931521236896515, + "loss_ce": 0.0006531005492433906, + "loss_iou": 0.6015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 220510776, + "step": 1284 + }, + { + "epoch": 0.3380022358124548, + "grad_norm": 5.926973995328122, + "learning_rate": 5e-06, + "loss": 0.139, + "num_input_tokens_seen": 220682848, + "step": 1285 + }, + { + "epoch": 0.3380022358124548, + "loss": 0.21676884591579437, + "loss_ce": 0.007204629480838776, + "loss_iou": 0.390625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 220682848, + "step": 1285 + }, + { + "epoch": 0.3382652725718419, + "grad_norm": 7.996976963449769, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 220855140, + "step": 1286 + }, + { + "epoch": 0.3382652725718419, + "loss": 0.08844804763793945, + "loss_ce": 0.0003895749687217176, + "loss_iou": 0.59375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 220855140, + "step": 1286 + }, + { + "epoch": 0.338528309331229, + "grad_norm": 8.499510082866491, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 221027260, + "step": 1287 + }, + { + "epoch": 0.338528309331229, + "loss": 0.12149707973003387, + "loss_ce": 0.0004948831629008055, + "loss_iou": 0.5703125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 221027260, + "step": 1287 + }, + { + "epoch": 0.3387913460906162, + "grad_norm": 5.973131676418204, + "learning_rate": 5e-06, + "loss": 0.1328, + "num_input_tokens_seen": 221199416, + "step": 1288 + }, + { + "epoch": 0.3387913460906162, + "loss": 0.09466279298067093, + "loss_ce": 0.00011933030327782035, + "loss_iou": 0.671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 221199416, + "step": 1288 + }, + { + "epoch": 0.3390543828500033, + "grad_norm": 4.808893821005273, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 221371720, + "step": 1289 + }, + { + "epoch": 0.3390543828500033, + "loss": 0.09180793166160583, + "loss_ce": 0.0001941679511219263, + "loss_iou": 0.54296875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 221371720, + "step": 1289 + }, + { + "epoch": 0.33931741960939044, + "grad_norm": 7.016843255216574, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 221542276, + "step": 1290 + }, + { + "epoch": 0.33931741960939044, + "loss": 0.19763490557670593, + "loss_ce": 0.0007354922126978636, + "loss_iou": 0.44921875, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 221542276, + "step": 1290 + }, + { + "epoch": 0.33958045636877754, + "grad_norm": 6.06110322351886, + "learning_rate": 5e-06, + "loss": 0.1227, + "num_input_tokens_seen": 221714344, + "step": 1291 + }, + { + "epoch": 0.33958045636877754, + "loss": 0.15020573139190674, + "loss_ce": 0.0027447929605841637, + "loss_iou": 0.4375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 221714344, + "step": 1291 + }, + { + "epoch": 0.33984349312816464, + "grad_norm": 8.158154012506673, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 221886284, + "step": 1292 + }, + { + "epoch": 0.33984349312816464, + "loss": 0.06007716804742813, + "loss_ce": 0.0002016789367189631, + "loss_iou": 0.609375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 221886284, + "step": 1292 + }, + { + "epoch": 0.3401065298875518, + "grad_norm": 4.887380704800873, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 222058520, + "step": 1293 + }, + { + "epoch": 0.3401065298875518, + "loss": 0.09021516144275665, + "loss_ce": 0.0008291734848171473, + "loss_iou": 0.45703125, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 222058520, + "step": 1293 + }, + { + "epoch": 0.3403695666469389, + "grad_norm": 13.907525065592614, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 222231156, + "step": 1294 + }, + { + "epoch": 0.3403695666469389, + "loss": 0.22907251119613647, + "loss_ce": 0.0007704915478825569, + "loss_iou": 0.6640625, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 222231156, + "step": 1294 + }, + { + "epoch": 0.340632603406326, + "grad_norm": 7.536500056913184, + "learning_rate": 5e-06, + "loss": 0.1822, + "num_input_tokens_seen": 222403476, + "step": 1295 + }, + { + "epoch": 0.340632603406326, + "loss": 0.1321713924407959, + "loss_ce": 0.002853148616850376, + "loss_iou": 0.5703125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 222403476, + "step": 1295 + }, + { + "epoch": 0.34089564016571317, + "grad_norm": 8.538688840874034, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 222575540, + "step": 1296 + }, + { + "epoch": 0.34089564016571317, + "loss": 0.09548459947109222, + "loss_ce": 0.0024365070275962353, + "loss_iou": 0.5625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 222575540, + "step": 1296 + }, + { + "epoch": 0.34115867692510027, + "grad_norm": 4.787366743041949, + "learning_rate": 5e-06, + "loss": 0.1417, + "num_input_tokens_seen": 222747704, + "step": 1297 + }, + { + "epoch": 0.34115867692510027, + "loss": 0.1637537181377411, + "loss_ce": 0.0010034843580797315, + "loss_iou": 0.45703125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 222747704, + "step": 1297 + }, + { + "epoch": 0.3414217136844874, + "grad_norm": 6.917043775641942, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 222919896, + "step": 1298 + }, + { + "epoch": 0.3414217136844874, + "loss": 0.05339755862951279, + "loss_ce": 0.001883886638097465, + "loss_iou": 0.419921875, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 222919896, + "step": 1298 + }, + { + "epoch": 0.34168475044387453, + "grad_norm": 8.403015648704343, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 223092124, + "step": 1299 + }, + { + "epoch": 0.34168475044387453, + "loss": 0.15119820833206177, + "loss_ce": 0.0011737870518118143, + "loss_iou": 0.62890625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 223092124, + "step": 1299 + }, + { + "epoch": 0.34194778720326163, + "grad_norm": 7.291522615294142, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 223264436, + "step": 1300 + }, + { + "epoch": 0.34194778720326163, + "loss": 0.14489704370498657, + "loss_ce": 0.0020442616660147905, + "loss_iou": 0.41796875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 223264436, + "step": 1300 + }, + { + "epoch": 0.3422108239626488, + "grad_norm": 8.380731335959293, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 223436588, + "step": 1301 + }, + { + "epoch": 0.3422108239626488, + "loss": 0.15774638950824738, + "loss_ce": 0.0007334585534408689, + "loss_iou": 0.51171875, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 223436588, + "step": 1301 + }, + { + "epoch": 0.3424738607220359, + "grad_norm": 4.869592726119341, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 223607388, + "step": 1302 + }, + { + "epoch": 0.3424738607220359, + "loss": 0.15213216841220856, + "loss_ce": 0.0007344604237005115, + "loss_iou": 0.5546875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 223607388, + "step": 1302 + }, + { + "epoch": 0.34273689748142305, + "grad_norm": 5.639489902327201, + "learning_rate": 5e-06, + "loss": 0.1483, + "num_input_tokens_seen": 223779372, + "step": 1303 + }, + { + "epoch": 0.34273689748142305, + "loss": 0.17259347438812256, + "loss_ce": 0.0032819565385580063, + "loss_iou": 0.490234375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 223779372, + "step": 1303 + }, + { + "epoch": 0.34299993424081016, + "grad_norm": 9.093938521490475, + "learning_rate": 5e-06, + "loss": 0.2003, + "num_input_tokens_seen": 223951388, + "step": 1304 + }, + { + "epoch": 0.34299993424081016, + "loss": 0.22136257588863373, + "loss_ce": 0.0015749745070934296, + "loss_iou": 0.62109375, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 223951388, + "step": 1304 + }, + { + "epoch": 0.34326297100019726, + "grad_norm": 6.510612178547868, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 224123456, + "step": 1305 + }, + { + "epoch": 0.34326297100019726, + "loss": 0.14626947045326233, + "loss_ce": 0.0030199564062058926, + "loss_iou": 0.455078125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 224123456, + "step": 1305 + }, + { + "epoch": 0.3435260077595844, + "grad_norm": 6.4702586677675855, + "learning_rate": 5e-06, + "loss": 0.1824, + "num_input_tokens_seen": 224295676, + "step": 1306 + }, + { + "epoch": 0.3435260077595844, + "loss": 0.20608918368816376, + "loss_ce": 0.0004007022944279015, + "loss_iou": 0.671875, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 224295676, + "step": 1306 + }, + { + "epoch": 0.3437890445189715, + "grad_norm": 24.89374059454028, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 224467880, + "step": 1307 + }, + { + "epoch": 0.3437890445189715, + "loss": 0.12941348552703857, + "loss_ce": 0.000507236341945827, + "loss_iou": 0.58203125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 224467880, + "step": 1307 + }, + { + "epoch": 0.3440520812783586, + "grad_norm": 8.792041343116132, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 224640076, + "step": 1308 + }, + { + "epoch": 0.3440520812783586, + "loss": 0.18377459049224854, + "loss_ce": 0.0007301591685973108, + "loss_iou": 0.53125, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 224640076, + "step": 1308 + }, + { + "epoch": 0.3443151180377458, + "grad_norm": 7.075463244307207, + "learning_rate": 5e-06, + "loss": 0.1801, + "num_input_tokens_seen": 224812516, + "step": 1309 + }, + { + "epoch": 0.3443151180377458, + "loss": 0.10930690169334412, + "loss_ce": 0.0017934793140739202, + "loss_iou": 0.5078125, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 224812516, + "step": 1309 + }, + { + "epoch": 0.3445781547971329, + "grad_norm": 5.622949079750027, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 224985136, + "step": 1310 + }, + { + "epoch": 0.3445781547971329, + "loss": 0.13441142439842224, + "loss_ce": 0.0033689369447529316, + "loss_iou": 0.625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 224985136, + "step": 1310 + }, + { + "epoch": 0.34484119155652004, + "grad_norm": 6.126844030056675, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 225157120, + "step": 1311 + }, + { + "epoch": 0.34484119155652004, + "loss": 0.1415342092514038, + "loss_ce": 0.000787146098446101, + "loss_iou": 0.59375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 225157120, + "step": 1311 + }, + { + "epoch": 0.34510422831590715, + "grad_norm": 5.277811585596711, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 225329012, + "step": 1312 + }, + { + "epoch": 0.34510422831590715, + "loss": 0.08680924028158188, + "loss_ce": 0.006761634722352028, + "loss_iou": 0.58203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 225329012, + "step": 1312 + }, + { + "epoch": 0.34536726507529425, + "grad_norm": 4.953551061650772, + "learning_rate": 5e-06, + "loss": 0.0972, + "num_input_tokens_seen": 225501536, + "step": 1313 + }, + { + "epoch": 0.34536726507529425, + "loss": 0.10339295864105225, + "loss_ce": 0.0035394439473748207, + "loss_iou": 0.451171875, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 225501536, + "step": 1313 + }, + { + "epoch": 0.3456303018346814, + "grad_norm": 14.376989767483847, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 225673976, + "step": 1314 + }, + { + "epoch": 0.3456303018346814, + "loss": 0.13013647496700287, + "loss_ce": 0.0019016144797205925, + "loss_iou": 0.76953125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 225673976, + "step": 1314 + }, + { + "epoch": 0.3458933385940685, + "grad_norm": 4.351015648559307, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 225844284, + "step": 1315 + }, + { + "epoch": 0.3458933385940685, + "loss": 0.07338554412126541, + "loss_ce": 0.0006926720961928368, + "loss_iou": 0.478515625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 225844284, + "step": 1315 + }, + { + "epoch": 0.34615637535345567, + "grad_norm": 8.704702003668412, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 226014724, + "step": 1316 + }, + { + "epoch": 0.34615637535345567, + "loss": 0.13227002322673798, + "loss_ce": 0.00031201643287204206, + "loss_iou": 0.58984375, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 226014724, + "step": 1316 + }, + { + "epoch": 0.3464194121128428, + "grad_norm": 9.402885662503637, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 226186864, + "step": 1317 + }, + { + "epoch": 0.3464194121128428, + "loss": 0.13149945437908173, + "loss_ce": 0.0032035536132752895, + "loss_iou": 0.466796875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 226186864, + "step": 1317 + }, + { + "epoch": 0.3466824488722299, + "grad_norm": 5.4441598572448475, + "learning_rate": 5e-06, + "loss": 0.1247, + "num_input_tokens_seen": 226359284, + "step": 1318 + }, + { + "epoch": 0.3466824488722299, + "loss": 0.09241662174463272, + "loss_ce": 0.00010094831668538973, + "loss_iou": 0.68359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 226359284, + "step": 1318 + }, + { + "epoch": 0.34694548563161703, + "grad_norm": 5.838383036724073, + "learning_rate": 5e-06, + "loss": 0.1544, + "num_input_tokens_seen": 226531468, + "step": 1319 + }, + { + "epoch": 0.34694548563161703, + "loss": 0.10588014125823975, + "loss_ce": 0.00016725034220144153, + "loss_iou": 0.56640625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 226531468, + "step": 1319 + }, + { + "epoch": 0.34720852239100414, + "grad_norm": 6.350689517126913, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 226703648, + "step": 1320 + }, + { + "epoch": 0.34720852239100414, + "loss": 0.11672288179397583, + "loss_ce": 0.0005119539564475417, + "loss_iou": 0.4765625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 226703648, + "step": 1320 + }, + { + "epoch": 0.34747155915039124, + "grad_norm": 13.847941425419814, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 226873276, + "step": 1321 + }, + { + "epoch": 0.34747155915039124, + "loss": 0.1297488510608673, + "loss_ce": 0.0002322565414942801, + "loss_iou": 0.435546875, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 226873276, + "step": 1321 + }, + { + "epoch": 0.3477345959097784, + "grad_norm": 23.595553939708278, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 227045376, + "step": 1322 + }, + { + "epoch": 0.3477345959097784, + "loss": 0.1232631504535675, + "loss_ce": 0.00033834436908364296, + "loss_iou": 0.63671875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 227045376, + "step": 1322 + }, + { + "epoch": 0.3479976326691655, + "grad_norm": 44.36951009678579, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 227215620, + "step": 1323 + }, + { + "epoch": 0.3479976326691655, + "loss": 0.07254654914140701, + "loss_ce": 0.000280924781691283, + "loss_iou": 0.51953125, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 227215620, + "step": 1323 + }, + { + "epoch": 0.34826066942855266, + "grad_norm": 5.242558618670207, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 227387716, + "step": 1324 + }, + { + "epoch": 0.34826066942855266, + "loss": 0.11591322720050812, + "loss_ce": 0.0015028227353468537, + "loss_iou": 0.5703125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 227387716, + "step": 1324 + }, + { + "epoch": 0.34852370618793976, + "grad_norm": 6.076654000097945, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 227560176, + "step": 1325 + }, + { + "epoch": 0.34852370618793976, + "loss": 0.09558144956827164, + "loss_ce": 0.0009464399190619588, + "loss_iou": 0.56640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 227560176, + "step": 1325 + }, + { + "epoch": 0.34878674294732687, + "grad_norm": 8.445480824601002, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 227732380, + "step": 1326 + }, + { + "epoch": 0.34878674294732687, + "loss": 0.18825021386146545, + "loss_ce": 0.0008112426148727536, + "loss_iou": 0.376953125, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 227732380, + "step": 1326 + }, + { + "epoch": 0.349049779706714, + "grad_norm": 7.063227159799308, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 227904472, + "step": 1327 + }, + { + "epoch": 0.349049779706714, + "loss": 0.23714260756969452, + "loss_ce": 0.0035610701888799667, + "loss_iou": 0.466796875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 227904472, + "step": 1327 + }, + { + "epoch": 0.34931281646610113, + "grad_norm": 5.912334076761878, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 228076948, + "step": 1328 + }, + { + "epoch": 0.34931281646610113, + "loss": 0.14180362224578857, + "loss_ce": 0.0037115837913006544, + "loss_iou": 0.65234375, + "loss_num": 0.0277099609375, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 228076948, + "step": 1328 + }, + { + "epoch": 0.3495758532254883, + "grad_norm": 6.00754661823903, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 228248972, + "step": 1329 + }, + { + "epoch": 0.3495758532254883, + "loss": 0.14186972379684448, + "loss_ce": 0.0010616088984534144, + "loss_iou": 0.490234375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 228248972, + "step": 1329 + }, + { + "epoch": 0.3498388899848754, + "grad_norm": 5.090693648053783, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 228421168, + "step": 1330 + }, + { + "epoch": 0.3498388899848754, + "loss": 0.08804985880851746, + "loss_ce": 0.0005559585988521576, + "loss_iou": 0.466796875, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 228421168, + "step": 1330 + }, + { + "epoch": 0.3501019267442625, + "grad_norm": 5.2360127706674335, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 228593364, + "step": 1331 + }, + { + "epoch": 0.3501019267442625, + "loss": 0.09556721895933151, + "loss_ce": 0.0012373843928799033, + "loss_iou": 0.515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 228593364, + "step": 1331 + }, + { + "epoch": 0.35036496350364965, + "grad_norm": 7.817751449255831, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 228765480, + "step": 1332 + }, + { + "epoch": 0.35036496350364965, + "loss": 0.19872622191905975, + "loss_ce": 0.0040851132944226265, + "loss_iou": 0.427734375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 228765480, + "step": 1332 + }, + { + "epoch": 0.35062800026303675, + "grad_norm": 6.154736113026185, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 228938028, + "step": 1333 + }, + { + "epoch": 0.35062800026303675, + "loss": 0.08339493721723557, + "loss_ce": 0.0010737692937254906, + "loss_iou": 0.5703125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 228938028, + "step": 1333 + }, + { + "epoch": 0.35089103702242386, + "grad_norm": 9.10695022243035, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 229110180, + "step": 1334 + }, + { + "epoch": 0.35089103702242386, + "loss": 0.17051678895950317, + "loss_ce": 0.0020597607363015413, + "loss_iou": 0.65234375, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 229110180, + "step": 1334 + }, + { + "epoch": 0.351154073781811, + "grad_norm": 6.861938259215169, + "learning_rate": 5e-06, + "loss": 0.192, + "num_input_tokens_seen": 229282592, + "step": 1335 + }, + { + "epoch": 0.351154073781811, + "loss": 0.20712369680404663, + "loss_ce": 0.003968170844018459, + "loss_iou": 0.52734375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 229282592, + "step": 1335 + }, + { + "epoch": 0.3514171105411981, + "grad_norm": 6.702358306191994, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 229454940, + "step": 1336 + }, + { + "epoch": 0.3514171105411981, + "loss": 0.1241491287946701, + "loss_ce": 0.002719694282859564, + "loss_iou": 0.6015625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 229454940, + "step": 1336 + }, + { + "epoch": 0.3516801473005853, + "grad_norm": 25.54665143162104, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 229626740, + "step": 1337 + }, + { + "epoch": 0.3516801473005853, + "loss": 0.13124999403953552, + "loss_ce": 0.0006042490131221712, + "loss_iou": 0.50390625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 229626740, + "step": 1337 + }, + { + "epoch": 0.3519431840599724, + "grad_norm": 10.986092691383933, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 229795396, + "step": 1338 + }, + { + "epoch": 0.3519431840599724, + "loss": 0.07223416119813919, + "loss_ce": 0.00028897292213514447, + "loss_iou": 0.60546875, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 229795396, + "step": 1338 + }, + { + "epoch": 0.3522062208193595, + "grad_norm": 9.146823823601064, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 229967592, + "step": 1339 + }, + { + "epoch": 0.3522062208193595, + "loss": 0.14658141136169434, + "loss_ce": 0.00021910574287176132, + "loss_iou": 0.58203125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 229967592, + "step": 1339 + }, + { + "epoch": 0.35246925757874664, + "grad_norm": 12.31823820919867, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 230139960, + "step": 1340 + }, + { + "epoch": 0.35246925757874664, + "loss": 0.07799485325813293, + "loss_ce": 0.0002360617509111762, + "loss_iou": 0.6328125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 230139960, + "step": 1340 + }, + { + "epoch": 0.35273229433813375, + "grad_norm": 6.277536451715721, + "learning_rate": 5e-06, + "loss": 0.1745, + "num_input_tokens_seen": 230312536, + "step": 1341 + }, + { + "epoch": 0.35273229433813375, + "loss": 0.15891912579536438, + "loss_ce": 0.0026691171806305647, + "loss_iou": 0.5625, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 230312536, + "step": 1341 + }, + { + "epoch": 0.3529953310975209, + "grad_norm": 10.816707100229003, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 230482960, + "step": 1342 + }, + { + "epoch": 0.3529953310975209, + "loss": 0.10626394301652908, + "loss_ce": 0.0003069115919061005, + "loss_iou": 0.64453125, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 230482960, + "step": 1342 + }, + { + "epoch": 0.353258367856908, + "grad_norm": 7.461204110486792, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 230655564, + "step": 1343 + }, + { + "epoch": 0.353258367856908, + "loss": 0.14832650125026703, + "loss_ce": 0.005412681493908167, + "loss_iou": 0.431640625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 230655564, + "step": 1343 + }, + { + "epoch": 0.3535214046162951, + "grad_norm": 3.829230892926663, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 230827776, + "step": 1344 + }, + { + "epoch": 0.3535214046162951, + "loss": 0.11698116362094879, + "loss_ce": 0.0007397143635898829, + "loss_iou": 0.59765625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 230827776, + "step": 1344 + }, + { + "epoch": 0.35378444137568227, + "grad_norm": 7.33327936841338, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 230999976, + "step": 1345 + }, + { + "epoch": 0.35378444137568227, + "loss": 0.18063044548034668, + "loss_ce": 0.005978355184197426, + "loss_iou": 0.41796875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 230999976, + "step": 1345 + }, + { + "epoch": 0.35404747813506937, + "grad_norm": 4.588965012769037, + "learning_rate": 5e-06, + "loss": 0.1907, + "num_input_tokens_seen": 231171944, + "step": 1346 + }, + { + "epoch": 0.35404747813506937, + "loss": 0.19280412793159485, + "loss_ce": 0.0011537342797964811, + "loss_iou": 0.60546875, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 231171944, + "step": 1346 + }, + { + "epoch": 0.3543105148944565, + "grad_norm": 15.486355760387976, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 231342336, + "step": 1347 + }, + { + "epoch": 0.3543105148944565, + "loss": 0.11234519630670547, + "loss_ce": 0.0008644815534353256, + "loss_iou": 0.49609375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 231342336, + "step": 1347 + }, + { + "epoch": 0.35457355165384363, + "grad_norm": 17.3485969876628, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 231514328, + "step": 1348 + }, + { + "epoch": 0.35457355165384363, + "loss": 0.12869834899902344, + "loss_ce": 0.0009517711587250233, + "loss_iou": 0.68359375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 231514328, + "step": 1348 + }, + { + "epoch": 0.35483658841323074, + "grad_norm": 19.41812876236682, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 231686780, + "step": 1349 + }, + { + "epoch": 0.35483658841323074, + "loss": 0.13225057721138, + "loss_ce": 0.0008724014624021947, + "loss_iou": 0.5625, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 231686780, + "step": 1349 + }, + { + "epoch": 0.3550996251726179, + "grad_norm": 10.841969185593292, + "learning_rate": 5e-06, + "loss": 0.1386, + "num_input_tokens_seen": 231858892, + "step": 1350 + }, + { + "epoch": 0.3550996251726179, + "loss": 0.1675529032945633, + "loss_ce": 0.00019450299441814423, + "loss_iou": 0.6875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 231858892, + "step": 1350 + }, + { + "epoch": 0.355362661932005, + "grad_norm": 9.874209565962254, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 232029240, + "step": 1351 + }, + { + "epoch": 0.355362661932005, + "loss": 0.13038085401058197, + "loss_ce": 0.00037596753099933267, + "loss_iou": 0.63671875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 232029240, + "step": 1351 + }, + { + "epoch": 0.3556256986913921, + "grad_norm": 6.572064767991026, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 232199320, + "step": 1352 + }, + { + "epoch": 0.3556256986913921, + "loss": 0.11705964803695679, + "loss_ce": 0.00029938769876025617, + "loss_iou": 0.439453125, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 232199320, + "step": 1352 + }, + { + "epoch": 0.35588873545077926, + "grad_norm": 19.449170559601413, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 232371336, + "step": 1353 + }, + { + "epoch": 0.35588873545077926, + "loss": 0.10365074872970581, + "loss_ce": 0.00291221821680665, + "loss_iou": 0.470703125, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 232371336, + "step": 1353 + }, + { + "epoch": 0.35615177221016636, + "grad_norm": 6.060818585296291, + "learning_rate": 5e-06, + "loss": 0.1758, + "num_input_tokens_seen": 232543572, + "step": 1354 + }, + { + "epoch": 0.35615177221016636, + "loss": 0.22082458436489105, + "loss_ce": 0.005858768709003925, + "loss_iou": 0.56640625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 232543572, + "step": 1354 + }, + { + "epoch": 0.3564148089695535, + "grad_norm": 5.2405872305191386, + "learning_rate": 5e-06, + "loss": 0.1508, + "num_input_tokens_seen": 232715652, + "step": 1355 + }, + { + "epoch": 0.3564148089695535, + "loss": 0.1176377683877945, + "loss_ce": 0.001854072092100978, + "loss_iou": 0.55859375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 232715652, + "step": 1355 + }, + { + "epoch": 0.3566778457289406, + "grad_norm": 9.250810729782357, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 232888076, + "step": 1356 + }, + { + "epoch": 0.3566778457289406, + "loss": 0.2215210646390915, + "loss_ce": 0.0008179338765330613, + "loss_iou": 0.59375, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 232888076, + "step": 1356 + }, + { + "epoch": 0.3569408824883277, + "grad_norm": 5.562200706936175, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 233060156, + "step": 1357 + }, + { + "epoch": 0.3569408824883277, + "loss": 0.15043510496616364, + "loss_ce": 0.0006548258243128657, + "loss_iou": 0.48828125, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 233060156, + "step": 1357 + }, + { + "epoch": 0.3572039192477149, + "grad_norm": 5.308030458771483, + "learning_rate": 5e-06, + "loss": 0.0933, + "num_input_tokens_seen": 233232200, + "step": 1358 + }, + { + "epoch": 0.3572039192477149, + "loss": 0.09737985581159592, + "loss_ce": 0.0021955338306725025, + "loss_iou": 0.482421875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 233232200, + "step": 1358 + }, + { + "epoch": 0.357466956007102, + "grad_norm": 22.688298388253326, + "learning_rate": 5e-06, + "loss": 0.1826, + "num_input_tokens_seen": 233402524, + "step": 1359 + }, + { + "epoch": 0.357466956007102, + "loss": 0.13730812072753906, + "loss_ce": 0.000497827713843435, + "loss_iou": 0.578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 233402524, + "step": 1359 + }, + { + "epoch": 0.3577299927664891, + "grad_norm": 7.149196234866028, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 233574352, + "step": 1360 + }, + { + "epoch": 0.3577299927664891, + "loss": 0.13957476615905762, + "loss_ce": 0.002062564715743065, + "loss_iou": 0.6328125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 233574352, + "step": 1360 + }, + { + "epoch": 0.35799302952587625, + "grad_norm": 4.747458347619557, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 233746476, + "step": 1361 + }, + { + "epoch": 0.35799302952587625, + "loss": 0.09390648454427719, + "loss_ce": 0.001377185108140111, + "loss_iou": 0.44921875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 233746476, + "step": 1361 + }, + { + "epoch": 0.35825606628526335, + "grad_norm": 4.140560580446189, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 233918696, + "step": 1362 + }, + { + "epoch": 0.35825606628526335, + "loss": 0.10030673444271088, + "loss_ce": 0.0024216112215071917, + "loss_iou": 0.5625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 233918696, + "step": 1362 + }, + { + "epoch": 0.3585191030446505, + "grad_norm": 5.484628612516693, + "learning_rate": 5e-06, + "loss": 0.1618, + "num_input_tokens_seen": 234090660, + "step": 1363 + }, + { + "epoch": 0.3585191030446505, + "loss": 0.1829943060874939, + "loss_ce": 0.001201094826683402, + "loss_iou": 0.443359375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 234090660, + "step": 1363 + }, + { + "epoch": 0.3587821398040376, + "grad_norm": 7.875545161330649, + "learning_rate": 5e-06, + "loss": 0.0939, + "num_input_tokens_seen": 234262444, + "step": 1364 + }, + { + "epoch": 0.3587821398040376, + "loss": 0.10195118188858032, + "loss_ce": 0.00035817097523249686, + "loss_iou": 0.482421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 234262444, + "step": 1364 + }, + { + "epoch": 0.3590451765634247, + "grad_norm": 5.112293535327179, + "learning_rate": 5e-06, + "loss": 0.1759, + "num_input_tokens_seen": 234434852, + "step": 1365 + }, + { + "epoch": 0.3590451765634247, + "loss": 0.12388080358505249, + "loss_ce": 0.0015663461526855826, + "loss_iou": 0.5625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 234434852, + "step": 1365 + }, + { + "epoch": 0.3593082133228119, + "grad_norm": 7.038268029638918, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 234607044, + "step": 1366 + }, + { + "epoch": 0.3593082133228119, + "loss": 0.10762982070446014, + "loss_ce": 0.0011539864353835583, + "loss_iou": 0.6328125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 234607044, + "step": 1366 + }, + { + "epoch": 0.359571250082199, + "grad_norm": 23.600297531220633, + "learning_rate": 5e-06, + "loss": 0.1188, + "num_input_tokens_seen": 234779280, + "step": 1367 + }, + { + "epoch": 0.359571250082199, + "loss": 0.06237838417291641, + "loss_ce": 0.0010838248999789357, + "loss_iou": 0.470703125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 234779280, + "step": 1367 + }, + { + "epoch": 0.35983428684158614, + "grad_norm": 3.7489621345618547, + "learning_rate": 5e-06, + "loss": 0.1625, + "num_input_tokens_seen": 234951544, + "step": 1368 + }, + { + "epoch": 0.35983428684158614, + "loss": 0.09055154025554657, + "loss_ce": 0.0044309417717158794, + "loss_iou": 0.609375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 234951544, + "step": 1368 + }, + { + "epoch": 0.36009732360097324, + "grad_norm": 11.577809346393625, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 235124044, + "step": 1369 + }, + { + "epoch": 0.36009732360097324, + "loss": 0.10378709435462952, + "loss_ce": 0.0021635598968714476, + "loss_iou": 0.57421875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 235124044, + "step": 1369 + }, + { + "epoch": 0.36036036036036034, + "grad_norm": 12.169579645431945, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 235295776, + "step": 1370 + }, + { + "epoch": 0.36036036036036034, + "loss": 0.26028677821159363, + "loss_ce": 0.00123831897508353, + "loss_iou": 0.4140625, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 235295776, + "step": 1370 + }, + { + "epoch": 0.3606233971197475, + "grad_norm": 6.068163006201439, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 235467876, + "step": 1371 + }, + { + "epoch": 0.3606233971197475, + "loss": 0.12217633426189423, + "loss_ce": 0.0007163715199567378, + "loss_iou": 0.55078125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 235467876, + "step": 1371 + }, + { + "epoch": 0.3608864338791346, + "grad_norm": 5.0641588944194575, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 235640260, + "step": 1372 + }, + { + "epoch": 0.3608864338791346, + "loss": 0.13001395761966705, + "loss_ce": 0.003243940882384777, + "loss_iou": 0.5546875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 235640260, + "step": 1372 + }, + { + "epoch": 0.3611494706385217, + "grad_norm": 5.364003970603183, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 235812276, + "step": 1373 + }, + { + "epoch": 0.3611494706385217, + "loss": 0.1422898769378662, + "loss_ce": 0.002214185893535614, + "loss_iou": 0.57421875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 235812276, + "step": 1373 + }, + { + "epoch": 0.36141250739790887, + "grad_norm": 20.072012728606875, + "learning_rate": 5e-06, + "loss": 0.1562, + "num_input_tokens_seen": 235984384, + "step": 1374 + }, + { + "epoch": 0.36141250739790887, + "loss": 0.1751733273267746, + "loss_ce": 0.0007043338264338672, + "loss_iou": NaN, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 235984384, + "step": 1374 + }, + { + "epoch": 0.36167554415729597, + "grad_norm": 9.286393703464922, + "learning_rate": 5e-06, + "loss": 0.1541, + "num_input_tokens_seen": 236156884, + "step": 1375 + }, + { + "epoch": 0.36167554415729597, + "loss": 0.21126675605773926, + "loss_ce": 0.0010006362572312355, + "loss_iou": 0.431640625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 236156884, + "step": 1375 + }, + { + "epoch": 0.36193858091668313, + "grad_norm": 11.903565521275382, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 236328968, + "step": 1376 + }, + { + "epoch": 0.36193858091668313, + "loss": 0.12723658978939056, + "loss_ce": 0.0002834574261214584, + "loss_iou": 0.57421875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 236328968, + "step": 1376 + }, + { + "epoch": 0.36220161767607023, + "grad_norm": 16.599820050329747, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 236501060, + "step": 1377 + }, + { + "epoch": 0.36220161767607023, + "loss": 0.11763446778059006, + "loss_ce": 0.0014235277194529772, + "loss_iou": 0.57421875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 236501060, + "step": 1377 + }, + { + "epoch": 0.36246465443545733, + "grad_norm": 12.84169775199896, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 236673348, + "step": 1378 + }, + { + "epoch": 0.36246465443545733, + "loss": 0.1577835977077484, + "loss_ce": 0.00424966961145401, + "loss_iou": 0.703125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 236673348, + "step": 1378 + }, + { + "epoch": 0.3627276911948445, + "grad_norm": 4.530082641105361, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 236845952, + "step": 1379 + }, + { + "epoch": 0.3627276911948445, + "loss": 0.16972720623016357, + "loss_ce": 0.0046881334856152534, + "loss_iou": 0.546875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 236845952, + "step": 1379 + }, + { + "epoch": 0.3629907279542316, + "grad_norm": 4.453547029103764, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 237016412, + "step": 1380 + }, + { + "epoch": 0.3629907279542316, + "loss": 0.1337898075580597, + "loss_ce": 0.0024726560804992914, + "loss_iou": 0.58203125, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 237016412, + "step": 1380 + }, + { + "epoch": 0.36325376471361875, + "grad_norm": 14.926786019286814, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 237188648, + "step": 1381 + }, + { + "epoch": 0.36325376471361875, + "loss": 0.09095098078250885, + "loss_ce": 0.0004968784051015973, + "loss_iou": 0.59765625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 237188648, + "step": 1381 + }, + { + "epoch": 0.36351680147300586, + "grad_norm": 5.283936723228349, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 237360968, + "step": 1382 + }, + { + "epoch": 0.36351680147300586, + "loss": 0.10726694762706757, + "loss_ce": 0.002011828124523163, + "loss_iou": 0.6640625, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 237360968, + "step": 1382 + }, + { + "epoch": 0.36377983823239296, + "grad_norm": 6.9353605673209175, + "learning_rate": 5e-06, + "loss": 0.1229, + "num_input_tokens_seen": 237533240, + "step": 1383 + }, + { + "epoch": 0.36377983823239296, + "loss": 0.12320241332054138, + "loss_ce": 0.0009337374940514565, + "loss_iou": 0.5390625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 237533240, + "step": 1383 + }, + { + "epoch": 0.3640428749917801, + "grad_norm": 7.872299034496559, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 237705412, + "step": 1384 + }, + { + "epoch": 0.3640428749917801, + "loss": 0.10494999587535858, + "loss_ce": 0.0007019541808404028, + "loss_iou": 0.5703125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 237705412, + "step": 1384 + }, + { + "epoch": 0.3643059117511672, + "grad_norm": 9.299219065335537, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 237875984, + "step": 1385 + }, + { + "epoch": 0.3643059117511672, + "loss": 0.09333762526512146, + "loss_ce": 0.0001979749504243955, + "loss_iou": 0.6171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 237875984, + "step": 1385 + }, + { + "epoch": 0.3645689485105543, + "grad_norm": 4.989443397895941, + "learning_rate": 5e-06, + "loss": 0.1464, + "num_input_tokens_seen": 238048268, + "step": 1386 + }, + { + "epoch": 0.3645689485105543, + "loss": 0.14278042316436768, + "loss_ce": 0.0023690357338637114, + "loss_iou": 0.55078125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 238048268, + "step": 1386 + }, + { + "epoch": 0.3648319852699415, + "grad_norm": 12.398705819599233, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 238218704, + "step": 1387 + }, + { + "epoch": 0.3648319852699415, + "loss": 0.18032394349575043, + "loss_ce": 0.000819545122794807, + "loss_iou": 0.5703125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 238218704, + "step": 1387 + }, + { + "epoch": 0.3650950220293286, + "grad_norm": 6.66114485908042, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 238390452, + "step": 1388 + }, + { + "epoch": 0.3650950220293286, + "loss": 0.23675096035003662, + "loss_ce": 0.0027726897969841957, + "loss_iou": 0.39453125, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 238390452, + "step": 1388 + }, + { + "epoch": 0.36535805878871574, + "grad_norm": 9.52863817159708, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 238562708, + "step": 1389 + }, + { + "epoch": 0.36535805878871574, + "loss": 0.10956455767154694, + "loss_ce": 0.0007693836814723909, + "loss_iou": 0.6171875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 238562708, + "step": 1389 + }, + { + "epoch": 0.36562109554810285, + "grad_norm": 14.33207869070322, + "learning_rate": 5e-06, + "loss": 0.1554, + "num_input_tokens_seen": 238734768, + "step": 1390 + }, + { + "epoch": 0.36562109554810285, + "loss": 0.11770792305469513, + "loss_ce": 0.00018472480587661266, + "loss_iou": 0.71484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 238734768, + "step": 1390 + }, + { + "epoch": 0.36588413230748995, + "grad_norm": 6.704869940759653, + "learning_rate": 5e-06, + "loss": 0.1267, + "num_input_tokens_seen": 238906944, + "step": 1391 + }, + { + "epoch": 0.36588413230748995, + "loss": 0.12555649876594543, + "loss_ce": 0.0015330680180341005, + "loss_iou": 0.6953125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 238906944, + "step": 1391 + }, + { + "epoch": 0.3661471690668771, + "grad_norm": 7.947535402036419, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 239078728, + "step": 1392 + }, + { + "epoch": 0.3661471690668771, + "loss": 0.12712188065052032, + "loss_ce": 0.0004739244468510151, + "loss_iou": 0.40234375, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 239078728, + "step": 1392 + }, + { + "epoch": 0.3664102058262642, + "grad_norm": 21.67277442987669, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 239251004, + "step": 1393 + }, + { + "epoch": 0.3664102058262642, + "loss": 0.16826726496219635, + "loss_ce": 0.0013361112214624882, + "loss_iou": 0.58984375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 239251004, + "step": 1393 + }, + { + "epoch": 0.36667324258565137, + "grad_norm": 10.958283524277078, + "learning_rate": 5e-06, + "loss": 0.0975, + "num_input_tokens_seen": 239418772, + "step": 1394 + }, + { + "epoch": 0.36667324258565137, + "loss": 0.05772838741540909, + "loss_ce": 0.0009962135227397084, + "loss_iou": 0.6171875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 239418772, + "step": 1394 + }, + { + "epoch": 0.3669362793450385, + "grad_norm": 6.917710662760069, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 239591128, + "step": 1395 + }, + { + "epoch": 0.3669362793450385, + "loss": 0.09961553663015366, + "loss_ce": 0.001104797120206058, + "loss_iou": 0.54296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 239591128, + "step": 1395 + }, + { + "epoch": 0.3671993161044256, + "grad_norm": 15.902726176045867, + "learning_rate": 5e-06, + "loss": 0.1675, + "num_input_tokens_seen": 239763344, + "step": 1396 + }, + { + "epoch": 0.3671993161044256, + "loss": 0.32926326990127563, + "loss_ce": 0.011209084652364254, + "loss_iou": 0.59765625, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 239763344, + "step": 1396 + }, + { + "epoch": 0.36746235286381274, + "grad_norm": 13.284413053795395, + "learning_rate": 5e-06, + "loss": 0.1494, + "num_input_tokens_seen": 239933592, + "step": 1397 + }, + { + "epoch": 0.36746235286381274, + "loss": 0.11202233284711838, + "loss_ce": 0.004936150275170803, + "loss_iou": 0.5625, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 239933592, + "step": 1397 + }, + { + "epoch": 0.36772538962319984, + "grad_norm": 4.642604596899854, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 240106096, + "step": 1398 + }, + { + "epoch": 0.36772538962319984, + "loss": 0.12524864077568054, + "loss_ce": 0.0025679690297693014, + "loss_iou": 0.6640625, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 240106096, + "step": 1398 + }, + { + "epoch": 0.36798842638258694, + "grad_norm": 6.5389674494027, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 240278216, + "step": 1399 + }, + { + "epoch": 0.36798842638258694, + "loss": 0.17912393808364868, + "loss_ce": 0.002244053641334176, + "loss_iou": 0.53125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 240278216, + "step": 1399 + }, + { + "epoch": 0.3682514631419741, + "grad_norm": 6.524251878336125, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 240450288, + "step": 1400 + }, + { + "epoch": 0.3682514631419741, + "loss": 0.1508733034133911, + "loss_ce": 0.00200856477022171, + "loss_iou": 0.57421875, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 240450288, + "step": 1400 + }, + { + "epoch": 0.3685144999013612, + "grad_norm": 7.825263953291389, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 240619860, + "step": 1401 + }, + { + "epoch": 0.3685144999013612, + "loss": 0.21069373190402985, + "loss_ce": 0.002014526631683111, + "loss_iou": 0.5234375, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 240619860, + "step": 1401 + }, + { + "epoch": 0.36877753666074836, + "grad_norm": 21.746555075327876, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 240792004, + "step": 1402 + }, + { + "epoch": 0.36877753666074836, + "loss": 0.10193300247192383, + "loss_ce": 0.003086569719016552, + "loss_iou": 0.546875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 240792004, + "step": 1402 + }, + { + "epoch": 0.36904057342013546, + "grad_norm": 13.416067582793307, + "learning_rate": 5e-06, + "loss": 0.1275, + "num_input_tokens_seen": 240963952, + "step": 1403 + }, + { + "epoch": 0.36904057342013546, + "loss": 0.14882555603981018, + "loss_ce": 0.0034703421406447887, + "loss_iou": 0.482421875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 240963952, + "step": 1403 + }, + { + "epoch": 0.36930361017952257, + "grad_norm": 7.0351108134980604, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 241135992, + "step": 1404 + }, + { + "epoch": 0.36930361017952257, + "loss": 0.16983582079410553, + "loss_ce": 0.004186400678008795, + "loss_iou": 0.6328125, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 241135992, + "step": 1404 + }, + { + "epoch": 0.3695666469389097, + "grad_norm": 10.37111011550267, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 241306532, + "step": 1405 + }, + { + "epoch": 0.3695666469389097, + "loss": 0.13351476192474365, + "loss_ce": 0.0033267755061388016, + "loss_iou": 0.5078125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 241306532, + "step": 1405 + }, + { + "epoch": 0.36982968369829683, + "grad_norm": 4.1471191240598, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 241478648, + "step": 1406 + }, + { + "epoch": 0.36982968369829683, + "loss": 0.08705037832260132, + "loss_ce": 0.0007466700626537204, + "loss_iou": 0.52734375, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 241478648, + "step": 1406 + }, + { + "epoch": 0.370092720457684, + "grad_norm": 4.611801393612808, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 241650796, + "step": 1407 + }, + { + "epoch": 0.370092720457684, + "loss": 0.22668591141700745, + "loss_ce": 0.0017713564448058605, + "loss_iou": 0.49609375, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 241650796, + "step": 1407 + }, + { + "epoch": 0.3703557572170711, + "grad_norm": 8.388794391473104, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 241822848, + "step": 1408 + }, + { + "epoch": 0.3703557572170711, + "loss": 0.11565081030130386, + "loss_ce": 0.006123221945017576, + "loss_iou": 0.48828125, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 241822848, + "step": 1408 + }, + { + "epoch": 0.3706187939764582, + "grad_norm": 4.295439998788119, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 241994956, + "step": 1409 + }, + { + "epoch": 0.3706187939764582, + "loss": 0.19000545144081116, + "loss_ce": 0.0015594041906297207, + "loss_iou": 0.59375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 241994956, + "step": 1409 + }, + { + "epoch": 0.37088183073584535, + "grad_norm": 5.005427734613651, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 242167240, + "step": 1410 + }, + { + "epoch": 0.37088183073584535, + "loss": 0.10752324759960175, + "loss_ce": 0.00040654875920154154, + "loss_iou": 0.609375, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 242167240, + "step": 1410 + }, + { + "epoch": 0.37114486749523246, + "grad_norm": 5.984568060178908, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 242339532, + "step": 1411 + }, + { + "epoch": 0.37114486749523246, + "loss": 0.14311346411705017, + "loss_ce": 0.0001386186049785465, + "loss_iou": 0.47265625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 242339532, + "step": 1411 + }, + { + "epoch": 0.37140790425461956, + "grad_norm": 43.56689122924667, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 242511708, + "step": 1412 + }, + { + "epoch": 0.37140790425461956, + "loss": 0.127020463347435, + "loss_ce": 0.0014406184200197458, + "loss_iou": 0.52734375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 242511708, + "step": 1412 + }, + { + "epoch": 0.3716709410140067, + "grad_norm": 8.230300180002782, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 242684000, + "step": 1413 + }, + { + "epoch": 0.3716709410140067, + "loss": 0.1775522530078888, + "loss_ce": 0.000489264028146863, + "loss_iou": 0.62109375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 242684000, + "step": 1413 + }, + { + "epoch": 0.3719339777733938, + "grad_norm": 7.623956423887346, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 242856400, + "step": 1414 + }, + { + "epoch": 0.3719339777733938, + "loss": 0.172633096575737, + "loss_ce": 0.0017346586100757122, + "loss_iou": 0.60546875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 242856400, + "step": 1414 + }, + { + "epoch": 0.372197014532781, + "grad_norm": 5.086262256381093, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 243028760, + "step": 1415 + }, + { + "epoch": 0.372197014532781, + "loss": 0.11333785206079483, + "loss_ce": 0.0045731994323432446, + "loss_iou": 0.5546875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 243028760, + "step": 1415 + }, + { + "epoch": 0.3724600512921681, + "grad_norm": 7.319953499179629, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 243200888, + "step": 1416 + }, + { + "epoch": 0.3724600512921681, + "loss": 0.12842552363872528, + "loss_ce": 0.0017775753512978554, + "loss_iou": 0.69921875, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 243200888, + "step": 1416 + }, + { + "epoch": 0.3727230880515552, + "grad_norm": 7.14806250434601, + "learning_rate": 5e-06, + "loss": 0.1676, + "num_input_tokens_seen": 243373224, + "step": 1417 + }, + { + "epoch": 0.3727230880515552, + "loss": 0.17957568168640137, + "loss_ce": 0.004038581624627113, + "loss_iou": 0.4765625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 243373224, + "step": 1417 + }, + { + "epoch": 0.37298612481094234, + "grad_norm": 10.549476863868529, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 243545548, + "step": 1418 + }, + { + "epoch": 0.37298612481094234, + "loss": 0.14556309580802917, + "loss_ce": 0.0017947773449122906, + "loss_iou": 0.61328125, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 243545548, + "step": 1418 + }, + { + "epoch": 0.37324916157032945, + "grad_norm": 10.0643806609024, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 243715796, + "step": 1419 + }, + { + "epoch": 0.37324916157032945, + "loss": 0.13023152947425842, + "loss_ce": 0.0014473494375124574, + "loss_iou": 0.5703125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 243715796, + "step": 1419 + }, + { + "epoch": 0.3735121983297166, + "grad_norm": 6.667777106999036, + "learning_rate": 5e-06, + "loss": 0.1642, + "num_input_tokens_seen": 243886244, + "step": 1420 + }, + { + "epoch": 0.3735121983297166, + "loss": 0.21719014644622803, + "loss_ce": 0.0024379536043852568, + "loss_iou": 0.40625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 243886244, + "step": 1420 + }, + { + "epoch": 0.3737752350891037, + "grad_norm": 6.203781395805212, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 244058256, + "step": 1421 + }, + { + "epoch": 0.3737752350891037, + "loss": 0.14644312858581543, + "loss_ce": 0.003773454576730728, + "loss_iou": 0.50390625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 244058256, + "step": 1421 + }, + { + "epoch": 0.3740382718484908, + "grad_norm": 11.121295779991378, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 244230348, + "step": 1422 + }, + { + "epoch": 0.3740382718484908, + "loss": 0.13860073685646057, + "loss_ce": 0.004567527212202549, + "loss_iou": 0.41796875, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 244230348, + "step": 1422 + }, + { + "epoch": 0.37430130860787797, + "grad_norm": 8.262433746875137, + "learning_rate": 5e-06, + "loss": 0.1552, + "num_input_tokens_seen": 244402756, + "step": 1423 + }, + { + "epoch": 0.37430130860787797, + "loss": 0.17389166355133057, + "loss_ce": 0.0005212996620684862, + "loss_iou": 0.36328125, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 244402756, + "step": 1423 + }, + { + "epoch": 0.3745643453672651, + "grad_norm": 4.574844176213277, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 244575028, + "step": 1424 + }, + { + "epoch": 0.3745643453672651, + "loss": 0.2532828748226166, + "loss_ce": 0.002657280070707202, + "loss_iou": 0.51171875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 244575028, + "step": 1424 + }, + { + "epoch": 0.3748273821266522, + "grad_norm": 4.771481627209834, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 244745344, + "step": 1425 + }, + { + "epoch": 0.3748273821266522, + "loss": 0.1295078694820404, + "loss_ce": 0.004813054576516151, + "loss_iou": 0.5390625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 244745344, + "step": 1425 + }, + { + "epoch": 0.37509041888603933, + "grad_norm": 9.163217114560078, + "learning_rate": 5e-06, + "loss": 0.1447, + "num_input_tokens_seen": 244917724, + "step": 1426 + }, + { + "epoch": 0.37509041888603933, + "loss": 0.08564335107803345, + "loss_ce": 0.004375034011900425, + "loss_iou": 0.52734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 244917724, + "step": 1426 + }, + { + "epoch": 0.37535345564542644, + "grad_norm": 4.314222759697504, + "learning_rate": 5e-06, + "loss": 0.1353, + "num_input_tokens_seen": 245089960, + "step": 1427 + }, + { + "epoch": 0.37535345564542644, + "loss": 0.1637798547744751, + "loss_ce": 0.004539141897112131, + "loss_iou": 0.55859375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 245089960, + "step": 1427 + }, + { + "epoch": 0.3756164924048136, + "grad_norm": 4.131046733957264, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 245261980, + "step": 1428 + }, + { + "epoch": 0.3756164924048136, + "loss": 0.07748128473758698, + "loss_ce": 0.0020113117061555386, + "loss_iou": 0.4296875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 245261980, + "step": 1428 + }, + { + "epoch": 0.3758795291642007, + "grad_norm": 5.689319334005767, + "learning_rate": 5e-06, + "loss": 0.1536, + "num_input_tokens_seen": 245434416, + "step": 1429 + }, + { + "epoch": 0.3758795291642007, + "loss": 0.11119981110095978, + "loss_ce": 0.0007566966232843697, + "loss_iou": 0.6015625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 245434416, + "step": 1429 + }, + { + "epoch": 0.3761425659235878, + "grad_norm": 20.10795595759354, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 245606636, + "step": 1430 + }, + { + "epoch": 0.3761425659235878, + "loss": 0.12385141104459763, + "loss_ce": 0.005015961825847626, + "loss_iou": 0.474609375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 245606636, + "step": 1430 + }, + { + "epoch": 0.37640560268297496, + "grad_norm": 5.445619300828039, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 245778556, + "step": 1431 + }, + { + "epoch": 0.37640560268297496, + "loss": 0.09182294458150864, + "loss_ce": 0.0022538499906659126, + "loss_iou": 0.609375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 245778556, + "step": 1431 + }, + { + "epoch": 0.37666863944236206, + "grad_norm": 6.5911007721042045, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 245950596, + "step": 1432 + }, + { + "epoch": 0.37666863944236206, + "loss": 0.11425422877073288, + "loss_ce": 0.002651446033269167, + "loss_iou": 0.6328125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 245950596, + "step": 1432 + }, + { + "epoch": 0.3769316762017492, + "grad_norm": 5.727028044767281, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 246120388, + "step": 1433 + }, + { + "epoch": 0.3769316762017492, + "loss": 0.10625661909580231, + "loss_ce": 0.0015508129727095366, + "loss_iou": 0.58203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 246120388, + "step": 1433 + }, + { + "epoch": 0.3771947129611363, + "grad_norm": 11.215881906654301, + "learning_rate": 5e-06, + "loss": 0.138, + "num_input_tokens_seen": 246292408, + "step": 1434 + }, + { + "epoch": 0.3771947129611363, + "loss": 0.1351088285446167, + "loss_ce": 0.0014418261125683784, + "loss_iou": 0.412109375, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 246292408, + "step": 1434 + }, + { + "epoch": 0.37745774972052343, + "grad_norm": 3.9647228124096485, + "learning_rate": 5e-06, + "loss": 0.1515, + "num_input_tokens_seen": 246464396, + "step": 1435 + }, + { + "epoch": 0.37745774972052343, + "loss": 0.12623311579227448, + "loss_ce": 0.00034810492070391774, + "loss_iou": 0.31640625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 246464396, + "step": 1435 + }, + { + "epoch": 0.3777207864799106, + "grad_norm": 3.981981545629712, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 246636468, + "step": 1436 + }, + { + "epoch": 0.3777207864799106, + "loss": 0.08713729679584503, + "loss_ce": 0.0009556564036756754, + "loss_iou": 0.416015625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 246636468, + "step": 1436 + }, + { + "epoch": 0.3779838232392977, + "grad_norm": 8.169176906981303, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 246808440, + "step": 1437 + }, + { + "epoch": 0.3779838232392977, + "loss": 0.10194505006074905, + "loss_ce": 0.005204326473176479, + "loss_iou": 0.4453125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 246808440, + "step": 1437 + }, + { + "epoch": 0.3782468599986848, + "grad_norm": 11.188980116706098, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 246980788, + "step": 1438 + }, + { + "epoch": 0.3782468599986848, + "loss": 0.19040054082870483, + "loss_ce": 0.0052198669873178005, + "loss_iou": 0.462890625, + "loss_num": 0.036865234375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 246980788, + "step": 1438 + }, + { + "epoch": 0.37850989675807195, + "grad_norm": 7.739931791211071, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 247152940, + "step": 1439 + }, + { + "epoch": 0.37850989675807195, + "loss": 0.057150378823280334, + "loss_ce": 0.00032664957689121366, + "loss_iou": 0.4609375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 247152940, + "step": 1439 + }, + { + "epoch": 0.37877293351745905, + "grad_norm": 4.659888569440988, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 247321728, + "step": 1440 + }, + { + "epoch": 0.37877293351745905, + "loss": 0.23644393682479858, + "loss_ce": 0.0026182467117905617, + "loss_iou": 0.63671875, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 247321728, + "step": 1440 + }, + { + "epoch": 0.3790359702768462, + "grad_norm": 4.432660256180253, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 247493788, + "step": 1441 + }, + { + "epoch": 0.3790359702768462, + "loss": 0.13106387853622437, + "loss_ce": 0.0005707137170247734, + "loss_iou": 0.52734375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 247493788, + "step": 1441 + }, + { + "epoch": 0.3792990070362333, + "grad_norm": 14.394067386487718, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 247666148, + "step": 1442 + }, + { + "epoch": 0.3792990070362333, + "loss": 0.06981781125068665, + "loss_ce": 0.0019467112142592669, + "loss_iou": 0.5234375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 247666148, + "step": 1442 + }, + { + "epoch": 0.3795620437956204, + "grad_norm": 7.3199746686785785, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 247838148, + "step": 1443 + }, + { + "epoch": 0.3795620437956204, + "loss": 0.12706537544727325, + "loss_ce": 0.001332960557192564, + "loss_iou": 0.58203125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 247838148, + "step": 1443 + }, + { + "epoch": 0.3798250805550076, + "grad_norm": 9.946074142043194, + "learning_rate": 5e-06, + "loss": 0.1498, + "num_input_tokens_seen": 248008104, + "step": 1444 + }, + { + "epoch": 0.3798250805550076, + "loss": 0.08655127882957458, + "loss_ce": 0.002902593230828643, + "loss_iou": 0.62890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 248008104, + "step": 1444 + }, + { + "epoch": 0.3800881173143947, + "grad_norm": 6.769456974696547, + "learning_rate": 5e-06, + "loss": 0.1537, + "num_input_tokens_seen": 248178340, + "step": 1445 + }, + { + "epoch": 0.3800881173143947, + "loss": 0.12096725404262543, + "loss_ce": 0.0002702331403270364, + "loss_iou": 0.59765625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 248178340, + "step": 1445 + }, + { + "epoch": 0.38035115407378184, + "grad_norm": 8.140876700736197, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 248350556, + "step": 1446 + }, + { + "epoch": 0.38035115407378184, + "loss": 0.09277988225221634, + "loss_ce": 0.004370463080704212, + "loss_iou": 0.53125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 248350556, + "step": 1446 + }, + { + "epoch": 0.38061419083316894, + "grad_norm": 5.023139574097896, + "learning_rate": 5e-06, + "loss": 0.1556, + "num_input_tokens_seen": 248522684, + "step": 1447 + }, + { + "epoch": 0.38061419083316894, + "loss": 0.19670161604881287, + "loss_ce": 0.002487761899828911, + "loss_iou": 0.53515625, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 248522684, + "step": 1447 + }, + { + "epoch": 0.38087722759255604, + "grad_norm": 10.119019719981985, + "learning_rate": 5e-06, + "loss": 0.1368, + "num_input_tokens_seen": 248692816, + "step": 1448 + }, + { + "epoch": 0.38087722759255604, + "loss": 0.14071118831634521, + "loss_ce": 0.001856213086284697, + "loss_iou": 0.486328125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 248692816, + "step": 1448 + }, + { + "epoch": 0.3811402643519432, + "grad_norm": 4.80565522402478, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 248865220, + "step": 1449 + }, + { + "epoch": 0.3811402643519432, + "loss": 0.138558492064476, + "loss_ce": 0.005471333395689726, + "loss_iou": 0.546875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 248865220, + "step": 1449 + }, + { + "epoch": 0.3814033011113303, + "grad_norm": 5.799079918279409, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 249037228, + "step": 1450 + }, + { + "epoch": 0.3814033011113303, + "loss": 0.1300484836101532, + "loss_ce": 0.0007149941520765424, + "loss_iou": 0.59765625, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 249037228, + "step": 1450 + }, + { + "epoch": 0.3816663378707174, + "grad_norm": 5.998961200928992, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 249209488, + "step": 1451 + }, + { + "epoch": 0.3816663378707174, + "loss": 0.15324485301971436, + "loss_ce": 0.0011757557513192296, + "loss_iou": 0.62890625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 249209488, + "step": 1451 + }, + { + "epoch": 0.38192937463010457, + "grad_norm": 6.571427699631083, + "learning_rate": 5e-06, + "loss": 0.1472, + "num_input_tokens_seen": 249381876, + "step": 1452 + }, + { + "epoch": 0.38192937463010457, + "loss": 0.09512484073638916, + "loss_ce": 0.001863121404312551, + "loss_iou": 0.640625, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 249381876, + "step": 1452 + }, + { + "epoch": 0.38219241138949167, + "grad_norm": 5.686083518821853, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 249554160, + "step": 1453 + }, + { + "epoch": 0.38219241138949167, + "loss": 0.12024913728237152, + "loss_ce": 0.002786980476230383, + "loss_iou": 0.49609375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 249554160, + "step": 1453 + }, + { + "epoch": 0.38245544814887883, + "grad_norm": 5.286753977297326, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 249726068, + "step": 1454 + }, + { + "epoch": 0.38245544814887883, + "loss": 0.15219104290008545, + "loss_ce": 0.001312148873694241, + "loss_iou": NaN, + "loss_num": 0.0302734375, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 249726068, + "step": 1454 + }, + { + "epoch": 0.38271848490826593, + "grad_norm": 6.1144167801042295, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 249898264, + "step": 1455 + }, + { + "epoch": 0.38271848490826593, + "loss": 0.20618750154972076, + "loss_ce": 0.001353507163003087, + "loss_iou": 0.5703125, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 249898264, + "step": 1455 + }, + { + "epoch": 0.38298152166765304, + "grad_norm": 5.254827116017921, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 250070312, + "step": 1456 + }, + { + "epoch": 0.38298152166765304, + "loss": 0.09227914363145828, + "loss_ce": 0.00014657669817097485, + "loss_iou": 0.6484375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 250070312, + "step": 1456 + }, + { + "epoch": 0.3832445584270402, + "grad_norm": 6.7921237774310095, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 250242620, + "step": 1457 + }, + { + "epoch": 0.3832445584270402, + "loss": 0.16699600219726562, + "loss_ce": 0.0013771126978099346, + "loss_iou": 0.435546875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 250242620, + "step": 1457 + }, + { + "epoch": 0.3835075951864273, + "grad_norm": 5.478639252002706, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 250414736, + "step": 1458 + }, + { + "epoch": 0.3835075951864273, + "loss": 0.05368629842996597, + "loss_ce": 0.00015846370661165565, + "loss_iou": 0.734375, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 250414736, + "step": 1458 + }, + { + "epoch": 0.3837706319458144, + "grad_norm": 5.108471243837392, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 250586900, + "step": 1459 + }, + { + "epoch": 0.3837706319458144, + "loss": 0.12694557011127472, + "loss_ce": 0.0016709101619198918, + "loss_iou": 0.44921875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 250586900, + "step": 1459 + }, + { + "epoch": 0.38403366870520156, + "grad_norm": 5.673015930654883, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 250757288, + "step": 1460 + }, + { + "epoch": 0.38403366870520156, + "loss": 0.13443070650100708, + "loss_ce": 0.0023811361752450466, + "loss_iou": 0.671875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 250757288, + "step": 1460 + }, + { + "epoch": 0.38429670546458866, + "grad_norm": 8.46180134502284, + "learning_rate": 5e-06, + "loss": 0.1569, + "num_input_tokens_seen": 250929524, + "step": 1461 + }, + { + "epoch": 0.38429670546458866, + "loss": 0.10377843677997589, + "loss_ce": 0.0022159344516694546, + "loss_iou": 0.55078125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 250929524, + "step": 1461 + }, + { + "epoch": 0.3845597422239758, + "grad_norm": 19.056127756550847, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 251100012, + "step": 1462 + }, + { + "epoch": 0.3845597422239758, + "loss": 0.13582435250282288, + "loss_ce": 0.0002042302949121222, + "loss_iou": 0.68359375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 251100012, + "step": 1462 + }, + { + "epoch": 0.3848227789833629, + "grad_norm": 13.96527055803463, + "learning_rate": 5e-06, + "loss": 0.1363, + "num_input_tokens_seen": 251272524, + "step": 1463 + }, + { + "epoch": 0.3848227789833629, + "loss": 0.09230969846248627, + "loss_ce": 0.0005128234624862671, + "loss_iou": 0.474609375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 251272524, + "step": 1463 + }, + { + "epoch": 0.38508581574275, + "grad_norm": 8.704363897996222, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 251444684, + "step": 1464 + }, + { + "epoch": 0.38508581574275, + "loss": 0.06702134013175964, + "loss_ce": 0.0022020034957677126, + "loss_iou": 0.59375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 251444684, + "step": 1464 + }, + { + "epoch": 0.3853488525021372, + "grad_norm": 5.865897344217803, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 251616872, + "step": 1465 + }, + { + "epoch": 0.3853488525021372, + "loss": 0.0932367742061615, + "loss_ce": 0.0029200036078691483, + "loss_iou": 0.6015625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 251616872, + "step": 1465 + }, + { + "epoch": 0.3856118892615243, + "grad_norm": 7.51772092325757, + "learning_rate": 5e-06, + "loss": 0.1453, + "num_input_tokens_seen": 251787464, + "step": 1466 + }, + { + "epoch": 0.3856118892615243, + "loss": 0.08029404282569885, + "loss_ce": 0.0014366218820214272, + "loss_iou": 0.6015625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 251787464, + "step": 1466 + }, + { + "epoch": 0.38587492602091145, + "grad_norm": 5.797632938807012, + "learning_rate": 5e-06, + "loss": 0.1405, + "num_input_tokens_seen": 251959420, + "step": 1467 + }, + { + "epoch": 0.38587492602091145, + "loss": 0.1458974927663803, + "loss_ce": 0.0016409052768722177, + "loss_iou": 0.373046875, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 251959420, + "step": 1467 + }, + { + "epoch": 0.38613796278029855, + "grad_norm": 11.05258372992866, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 252130168, + "step": 1468 + }, + { + "epoch": 0.38613796278029855, + "loss": 0.13372868299484253, + "loss_ce": 0.002472587861120701, + "loss_iou": 0.6015625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 252130168, + "step": 1468 + }, + { + "epoch": 0.38640099953968565, + "grad_norm": 14.608414515106098, + "learning_rate": 5e-06, + "loss": 0.2148, + "num_input_tokens_seen": 252302400, + "step": 1469 + }, + { + "epoch": 0.38640099953968565, + "loss": 0.17314574122428894, + "loss_ce": 0.0009960737079381943, + "loss_iou": 0.45703125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 252302400, + "step": 1469 + }, + { + "epoch": 0.3866640362990728, + "grad_norm": 5.302717622816423, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 252474472, + "step": 1470 + }, + { + "epoch": 0.3866640362990728, + "loss": 0.11253952234983444, + "loss_ce": 0.0038511687889695168, + "loss_iou": 0.6015625, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 252474472, + "step": 1470 + }, + { + "epoch": 0.3869270730584599, + "grad_norm": 6.326570096654801, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 252646620, + "step": 1471 + }, + { + "epoch": 0.3869270730584599, + "loss": 0.12959828972816467, + "loss_ce": 0.0016685951268300414, + "loss_iou": 0.4296875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 252646620, + "step": 1471 + }, + { + "epoch": 0.387190109817847, + "grad_norm": 4.47840778836987, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 252818676, + "step": 1472 + }, + { + "epoch": 0.387190109817847, + "loss": 0.09814205765724182, + "loss_ce": 0.0010656400118023157, + "loss_iou": 0.6796875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 252818676, + "step": 1472 + }, + { + "epoch": 0.3874531465772342, + "grad_norm": 8.330831236084894, + "learning_rate": 5e-06, + "loss": 0.1907, + "num_input_tokens_seen": 252989028, + "step": 1473 + }, + { + "epoch": 0.3874531465772342, + "loss": 0.13005918264389038, + "loss_ce": 0.0018853526562452316, + "loss_iou": 0.408203125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 252989028, + "step": 1473 + }, + { + "epoch": 0.3877161833366213, + "grad_norm": 5.836741339054395, + "learning_rate": 5e-06, + "loss": 0.1451, + "num_input_tokens_seen": 253160776, + "step": 1474 + }, + { + "epoch": 0.3877161833366213, + "loss": 0.16334399580955505, + "loss_ce": 0.0019365199841558933, + "loss_iou": 0.61328125, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 253160776, + "step": 1474 + }, + { + "epoch": 0.38797922009600844, + "grad_norm": 6.563945205492379, + "learning_rate": 5e-06, + "loss": 0.0887, + "num_input_tokens_seen": 253333080, + "step": 1475 + }, + { + "epoch": 0.38797922009600844, + "loss": 0.11816126108169556, + "loss_ce": 0.0005465176654979587, + "loss_iou": 0.462890625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 253333080, + "step": 1475 + }, + { + "epoch": 0.38824225685539554, + "grad_norm": 5.424235438541153, + "learning_rate": 5e-06, + "loss": 0.1446, + "num_input_tokens_seen": 253504996, + "step": 1476 + }, + { + "epoch": 0.38824225685539554, + "loss": 0.2769371271133423, + "loss_ce": 0.0038658371195197105, + "loss_iou": 0.65625, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 253504996, + "step": 1476 + }, + { + "epoch": 0.38850529361478264, + "grad_norm": 8.554483113755165, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 253677224, + "step": 1477 + }, + { + "epoch": 0.38850529361478264, + "loss": 0.2999889850616455, + "loss_ce": 0.0013744828756898642, + "loss_iou": 0.373046875, + "loss_num": 0.0595703125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 253677224, + "step": 1477 + }, + { + "epoch": 0.3887683303741698, + "grad_norm": 9.64593506404694, + "learning_rate": 5e-06, + "loss": 0.1791, + "num_input_tokens_seen": 253849140, + "step": 1478 + }, + { + "epoch": 0.3887683303741698, + "loss": 0.10786094516515732, + "loss_ce": 0.0013545926194638014, + "loss_iou": 0.6015625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 253849140, + "step": 1478 + }, + { + "epoch": 0.3890313671335569, + "grad_norm": 4.897950380873555, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 254021328, + "step": 1479 + }, + { + "epoch": 0.3890313671335569, + "loss": 0.09722624719142914, + "loss_ce": 0.0006075926939956844, + "loss_iou": 0.5078125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 254021328, + "step": 1479 + }, + { + "epoch": 0.38929440389294406, + "grad_norm": 15.722256371647902, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 254191780, + "step": 1480 + }, + { + "epoch": 0.38929440389294406, + "loss": 0.24498049914836884, + "loss_ce": 0.003861122764647007, + "loss_iou": 0.466796875, + "loss_num": 0.04833984375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 254191780, + "step": 1480 + }, + { + "epoch": 0.38955744065233117, + "grad_norm": 8.008352967265257, + "learning_rate": 5e-06, + "loss": 0.1663, + "num_input_tokens_seen": 254363968, + "step": 1481 + }, + { + "epoch": 0.38955744065233117, + "loss": 0.11913852393627167, + "loss_ce": 0.004117771051824093, + "loss_iou": 0.57421875, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 254363968, + "step": 1481 + }, + { + "epoch": 0.38982047741171827, + "grad_norm": 49.00606130732605, + "learning_rate": 5e-06, + "loss": 0.1557, + "num_input_tokens_seen": 254534476, + "step": 1482 + }, + { + "epoch": 0.38982047741171827, + "loss": 0.10790035128593445, + "loss_ce": 0.001271925400942564, + "loss_iou": 0.67578125, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 254534476, + "step": 1482 + }, + { + "epoch": 0.3900835141711054, + "grad_norm": 7.1385674525154705, + "learning_rate": 5e-06, + "loss": 0.1465, + "num_input_tokens_seen": 254706676, + "step": 1483 + }, + { + "epoch": 0.3900835141711054, + "loss": 0.21054460108280182, + "loss_ce": 0.0007057388429529965, + "loss_iou": 0.458984375, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 254706676, + "step": 1483 + }, + { + "epoch": 0.39034655093049253, + "grad_norm": 11.652107001526453, + "learning_rate": 5e-06, + "loss": 0.1735, + "num_input_tokens_seen": 254877300, + "step": 1484 + }, + { + "epoch": 0.39034655093049253, + "loss": 0.1516016721725464, + "loss_ce": 0.0008448172593489289, + "loss_iou": 0.62890625, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 254877300, + "step": 1484 + }, + { + "epoch": 0.39060958768987963, + "grad_norm": 6.761059905321281, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 255049560, + "step": 1485 + }, + { + "epoch": 0.39060958768987963, + "loss": 0.17154067754745483, + "loss_ce": 0.00045912445057183504, + "loss_iou": 0.431640625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 255049560, + "step": 1485 + }, + { + "epoch": 0.3908726244492668, + "grad_norm": 4.6049295798328655, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 255221840, + "step": 1486 + }, + { + "epoch": 0.3908726244492668, + "loss": 0.12419994175434113, + "loss_ce": 0.0011530672200024128, + "loss_iou": 0.52734375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 255221840, + "step": 1486 + }, + { + "epoch": 0.3911356612086539, + "grad_norm": 9.631901215468224, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 255393804, + "step": 1487 + }, + { + "epoch": 0.3911356612086539, + "loss": 0.14977988600730896, + "loss_ce": 0.006835547741502523, + "loss_iou": 0.40234375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 255393804, + "step": 1487 + }, + { + "epoch": 0.39139869796804105, + "grad_norm": 6.350539398563022, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 255565956, + "step": 1488 + }, + { + "epoch": 0.39139869796804105, + "loss": 0.08242587745189667, + "loss_ce": 0.0012491194065660238, + "loss_iou": 0.62109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 255565956, + "step": 1488 + }, + { + "epoch": 0.39166173472742816, + "grad_norm": 8.23209899989984, + "learning_rate": 5e-06, + "loss": 0.1259, + "num_input_tokens_seen": 255738272, + "step": 1489 + }, + { + "epoch": 0.39166173472742816, + "loss": 0.11557637155056, + "loss_ce": 0.00025044637732207775, + "loss_iou": 0.52734375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 255738272, + "step": 1489 + }, + { + "epoch": 0.39192477148681526, + "grad_norm": 6.0709262055147555, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 255910628, + "step": 1490 + }, + { + "epoch": 0.39192477148681526, + "loss": 0.06516115367412567, + "loss_ce": 0.0002197464054916054, + "loss_iou": 0.578125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 255910628, + "step": 1490 + }, + { + "epoch": 0.3921878082462024, + "grad_norm": 4.913795659722062, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 256079800, + "step": 1491 + }, + { + "epoch": 0.3921878082462024, + "loss": 0.12981277704238892, + "loss_ce": 0.0006013567326590419, + "loss_iou": 0.6015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 256079800, + "step": 1491 + }, + { + "epoch": 0.3924508450055895, + "grad_norm": 10.053329401258154, + "learning_rate": 5e-06, + "loss": 0.0995, + "num_input_tokens_seen": 256251908, + "step": 1492 + }, + { + "epoch": 0.3924508450055895, + "loss": 0.11420565843582153, + "loss_ce": 0.0003750904288608581, + "loss_iou": 0.5859375, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 256251908, + "step": 1492 + }, + { + "epoch": 0.3927138817649767, + "grad_norm": 9.9668682397801, + "learning_rate": 5e-06, + "loss": 0.1763, + "num_input_tokens_seen": 256423824, + "step": 1493 + }, + { + "epoch": 0.3927138817649767, + "loss": 0.1758217215538025, + "loss_ce": 0.0006203037919476628, + "loss_iou": 0.40234375, + "loss_num": 0.03515625, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 256423824, + "step": 1493 + }, + { + "epoch": 0.3929769185243638, + "grad_norm": 11.611915023620941, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 256596028, + "step": 1494 + }, + { + "epoch": 0.3929769185243638, + "loss": 0.1324601024389267, + "loss_ce": 0.0007157221552915871, + "loss_iou": 0.4765625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 256596028, + "step": 1494 + }, + { + "epoch": 0.3932399552837509, + "grad_norm": 8.374710474816515, + "learning_rate": 5e-06, + "loss": 0.1707, + "num_input_tokens_seen": 256764620, + "step": 1495 + }, + { + "epoch": 0.3932399552837509, + "loss": 0.14535440504550934, + "loss_ce": 0.00076211744453758, + "loss_iou": 0.35546875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 256764620, + "step": 1495 + }, + { + "epoch": 0.39350299204313804, + "grad_norm": 3.7888421024340295, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 256936860, + "step": 1496 + }, + { + "epoch": 0.39350299204313804, + "loss": 0.11324183642864227, + "loss_ce": 0.0021883677691221237, + "loss_iou": 0.75, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 256936860, + "step": 1496 + }, + { + "epoch": 0.39376602880252515, + "grad_norm": 6.4208864756609865, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 257108880, + "step": 1497 + }, + { + "epoch": 0.39376602880252515, + "loss": 0.14706888794898987, + "loss_ce": 0.0031480020843446255, + "loss_iou": 0.50390625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 257108880, + "step": 1497 + }, + { + "epoch": 0.39402906556191225, + "grad_norm": 7.914747299122871, + "learning_rate": 5e-06, + "loss": 0.1717, + "num_input_tokens_seen": 257280748, + "step": 1498 + }, + { + "epoch": 0.39402906556191225, + "loss": 0.12499146163463593, + "loss_ce": 0.0012732032919302583, + "loss_iou": 0.58203125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 257280748, + "step": 1498 + }, + { + "epoch": 0.3942921023212994, + "grad_norm": 6.479265884809814, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 257452832, + "step": 1499 + }, + { + "epoch": 0.3942921023212994, + "loss": 0.15066663920879364, + "loss_ce": 0.000764301570598036, + "loss_iou": 0.515625, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 257452832, + "step": 1499 + }, + { + "epoch": 0.3945551390806865, + "grad_norm": 30.54873849483752, + "learning_rate": 5e-06, + "loss": 0.1459, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_websight_new_CIoU": 0.8511096835136414, + "eval_websight_new_GIoU": 0.8473265767097473, + "eval_websight_new_IoU": 0.8611634373664856, + "eval_websight_new_MAE_all": 0.029499702155590057, + "eval_websight_new_MAE_h": 0.019038498401641846, + "eval_websight_new_MAE_w": 0.04285556077957153, + "eval_websight_new_MAE_x": 0.04122600890696049, + "eval_websight_new_MAE_y": 0.014878739370033145, + "eval_websight_new_NUM_probability": 0.9999458193778992, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.13057953119277954, + "eval_websight_new_loss_ce": 1.3184878298488911e-05, + "eval_websight_new_loss_iou": 0.335205078125, + "eval_websight_new_loss_num": 0.022043228149414062, + "eval_websight_new_loss_xval": 0.1102447509765625, + "eval_websight_new_runtime": 55.84, + "eval_websight_new_samples_per_second": 0.895, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_seeclick_CIoU": 0.6286773383617401, + "eval_seeclick_GIoU": 0.627009391784668, + "eval_seeclick_IoU": 0.646778017282486, + "eval_seeclick_MAE_all": 0.04383368603885174, + "eval_seeclick_MAE_h": 0.028551836498081684, + "eval_seeclick_MAE_w": 0.05717572197318077, + "eval_seeclick_MAE_x": 0.06311946921050549, + "eval_seeclick_MAE_y": 0.02648772206157446, + "eval_seeclick_NUM_probability": 0.9999659061431885, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.205887109041214, + "eval_seeclick_loss_ce": 0.008159147575497627, + "eval_seeclick_loss_iou": 0.525634765625, + "eval_seeclick_loss_num": 0.0388031005859375, + "eval_seeclick_loss_xval": 0.1939697265625, + "eval_seeclick_runtime": 77.0722, + "eval_seeclick_samples_per_second": 0.558, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_icons_CIoU": 0.8122206926345825, + "eval_icons_GIoU": 0.8060621917247772, + "eval_icons_IoU": 0.8242988586425781, + "eval_icons_MAE_all": 0.029898496810346842, + "eval_icons_MAE_h": 0.02304963255301118, + "eval_icons_MAE_w": 0.03834102302789688, + "eval_icons_MAE_x": 0.035982510074973106, + "eval_icons_MAE_y": 0.022220822051167488, + "eval_icons_NUM_probability": 0.9998922646045685, + "eval_icons_inside_bbox": 0.9288194477558136, + "eval_icons_loss": 0.08346885442733765, + "eval_icons_loss_ce": 3.6747020203620195e-05, + "eval_icons_loss_iou": 0.5107421875, + "eval_icons_loss_num": 0.014827728271484375, + "eval_icons_loss_xval": 0.0741424560546875, + "eval_icons_runtime": 80.4282, + "eval_icons_samples_per_second": 0.622, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "eval_screenspot_CIoU": 0.5216498871644338, + "eval_screenspot_GIoU": 0.5137112041314443, + "eval_screenspot_IoU": 0.5683091680208842, + "eval_screenspot_MAE_all": 0.09167192876338959, + "eval_screenspot_MAE_h": 0.04900683710972468, + "eval_screenspot_MAE_w": 0.15865817666053772, + "eval_screenspot_MAE_x": 0.1116797278324763, + "eval_screenspot_MAE_y": 0.04734297779699167, + "eval_screenspot_NUM_probability": 0.9997473557790121, + "eval_screenspot_inside_bbox": 0.8291666706403097, + "eval_screenspot_loss": 0.8493114709854126, + "eval_screenspot_loss_ce": 0.5014328161875407, + "eval_screenspot_loss_iou": 0.4333089192708333, + "eval_screenspot_loss_num": 0.06844584147135417, + "eval_screenspot_loss_xval": 0.3421223958333333, + "eval_screenspot_runtime": 144.4244, + "eval_screenspot_samples_per_second": 0.616, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.3945551390806865, + "loss": 0.8482523560523987, + "loss_ce": 0.4914408326148987, + "loss_iou": 0.369140625, + "loss_num": 0.0712890625, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 257625396, + "step": 1500 + }, + { + "epoch": 0.39481817584007367, + "grad_norm": 4.597601910727454, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 257797176, + "step": 1501 + }, + { + "epoch": 0.39481817584007367, + "loss": 0.11587628722190857, + "loss_ce": 0.0010386451613157988, + "loss_iou": 0.412109375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 257797176, + "step": 1501 + }, + { + "epoch": 0.3950812125994608, + "grad_norm": 20.41167402936065, + "learning_rate": 5e-06, + "loss": 0.1429, + "num_input_tokens_seen": 257969472, + "step": 1502 + }, + { + "epoch": 0.3950812125994608, + "loss": 0.13194513320922852, + "loss_ce": 0.00023126379528548568, + "loss_iou": 0.5078125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 257969472, + "step": 1502 + }, + { + "epoch": 0.3953442493588479, + "grad_norm": 5.303324872084742, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 258141772, + "step": 1503 + }, + { + "epoch": 0.3953442493588479, + "loss": 0.26048120856285095, + "loss_ce": 0.0004714562091976404, + "loss_iou": 0.478515625, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 258141772, + "step": 1503 + }, + { + "epoch": 0.39560728611823504, + "grad_norm": 8.26410044022957, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 258312152, + "step": 1504 + }, + { + "epoch": 0.39560728611823504, + "loss": 0.11661653220653534, + "loss_ce": 0.0012600838672369719, + "loss_iou": 0.58984375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 258312152, + "step": 1504 + }, + { + "epoch": 0.39587032287762214, + "grad_norm": 5.069526368720703, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 258484140, + "step": 1505 + }, + { + "epoch": 0.39587032287762214, + "loss": 0.15614590048789978, + "loss_ce": 0.002184713026508689, + "loss_iou": 0.578125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 258484140, + "step": 1505 + }, + { + "epoch": 0.3961333596370093, + "grad_norm": 8.21868694477306, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 258656432, + "step": 1506 + }, + { + "epoch": 0.3961333596370093, + "loss": 0.15648871660232544, + "loss_ce": 0.006220155395567417, + "loss_iou": 0.65234375, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 258656432, + "step": 1506 + }, + { + "epoch": 0.3963963963963964, + "grad_norm": 4.840531799006556, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 258828268, + "step": 1507 + }, + { + "epoch": 0.3963963963963964, + "loss": 0.16232186555862427, + "loss_ce": 0.0027149375528097153, + "loss_iou": 0.51953125, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 258828268, + "step": 1507 + }, + { + "epoch": 0.3966594331557835, + "grad_norm": 13.037528909207806, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 259000404, + "step": 1508 + }, + { + "epoch": 0.3966594331557835, + "loss": 0.14472803473472595, + "loss_ce": 0.0009902361780405045, + "loss_iou": 0.625, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 259000404, + "step": 1508 + }, + { + "epoch": 0.39692246991517066, + "grad_norm": 5.085713777784683, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 259172564, + "step": 1509 + }, + { + "epoch": 0.39692246991517066, + "loss": 0.07742594182491302, + "loss_ce": 0.001498206052929163, + "loss_iou": 0.50390625, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 259172564, + "step": 1509 + }, + { + "epoch": 0.39718550667455776, + "grad_norm": 11.735468019604104, + "learning_rate": 5e-06, + "loss": 0.1383, + "num_input_tokens_seen": 259344984, + "step": 1510 + }, + { + "epoch": 0.39718550667455776, + "loss": 0.19085130095481873, + "loss_ce": 0.006021593697369099, + "loss_iou": 0.17578125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 259344984, + "step": 1510 + }, + { + "epoch": 0.39744854343394487, + "grad_norm": 5.2454273554423505, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 259517348, + "step": 1511 + }, + { + "epoch": 0.39744854343394487, + "loss": 0.18470171093940735, + "loss_ce": 0.0007722551235929132, + "loss_iou": 0.38671875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 259517348, + "step": 1511 + }, + { + "epoch": 0.397711580193332, + "grad_norm": 5.93272222700996, + "learning_rate": 5e-06, + "loss": 0.1389, + "num_input_tokens_seen": 259689536, + "step": 1512 + }, + { + "epoch": 0.397711580193332, + "loss": 0.08698225021362305, + "loss_ce": 9.870098438113928e-05, + "loss_iou": 0.46875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 259689536, + "step": 1512 + }, + { + "epoch": 0.39797461695271913, + "grad_norm": 6.869955142591404, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 259861960, + "step": 1513 + }, + { + "epoch": 0.39797461695271913, + "loss": 0.17443957924842834, + "loss_ce": 0.0013743957970291376, + "loss_iou": 0.48828125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 259861960, + "step": 1513 + }, + { + "epoch": 0.3982376537121063, + "grad_norm": 5.07838683649017, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 260034008, + "step": 1514 + }, + { + "epoch": 0.3982376537121063, + "loss": 0.11854679882526398, + "loss_ce": 0.0005353257874958217, + "loss_iou": 0.498046875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 260034008, + "step": 1514 + }, + { + "epoch": 0.3985006904714934, + "grad_norm": 4.287082894022155, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 260206128, + "step": 1515 + }, + { + "epoch": 0.3985006904714934, + "loss": 0.13691899180412292, + "loss_ce": 0.004594762809574604, + "loss_iou": 0.57421875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 260206128, + "step": 1515 + }, + { + "epoch": 0.3987637272308805, + "grad_norm": 11.905443274772528, + "learning_rate": 5e-06, + "loss": 0.1483, + "num_input_tokens_seen": 260377024, + "step": 1516 + }, + { + "epoch": 0.3987637272308805, + "loss": 0.2244867980480194, + "loss_ce": 0.004058347083628178, + "loss_iou": 0.451171875, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 260377024, + "step": 1516 + }, + { + "epoch": 0.39902676399026765, + "grad_norm": 5.162416897168446, + "learning_rate": 5e-06, + "loss": 0.1643, + "num_input_tokens_seen": 260549044, + "step": 1517 + }, + { + "epoch": 0.39902676399026765, + "loss": 0.1554432064294815, + "loss_ce": 0.006700540892779827, + "loss_iou": 0.49609375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 260549044, + "step": 1517 + }, + { + "epoch": 0.39928980074965476, + "grad_norm": 10.132521694727306, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 260721092, + "step": 1518 + }, + { + "epoch": 0.39928980074965476, + "loss": 0.10422030091285706, + "loss_ce": 0.0018338197842240334, + "loss_iou": 0.56640625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 260721092, + "step": 1518 + }, + { + "epoch": 0.3995528375090419, + "grad_norm": 5.444540349408582, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 260893156, + "step": 1519 + }, + { + "epoch": 0.3995528375090419, + "loss": 0.151461660861969, + "loss_ce": 0.0018339705420657992, + "loss_iou": 0.45703125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 260893156, + "step": 1519 + }, + { + "epoch": 0.399815874268429, + "grad_norm": 11.977186984155779, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 261065364, + "step": 1520 + }, + { + "epoch": 0.399815874268429, + "loss": 0.10812121629714966, + "loss_ce": 0.0015843516448512673, + "loss_iou": 0.53125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 261065364, + "step": 1520 + }, + { + "epoch": 0.4000789110278161, + "grad_norm": 18.63106485848457, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 261237376, + "step": 1521 + }, + { + "epoch": 0.4000789110278161, + "loss": 0.16819404065608978, + "loss_ce": 0.0020105685107409954, + "loss_iou": 0.4296875, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 261237376, + "step": 1521 + }, + { + "epoch": 0.4003419477872033, + "grad_norm": 10.102453678806217, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 261409528, + "step": 1522 + }, + { + "epoch": 0.4003419477872033, + "loss": 0.16048389673233032, + "loss_ce": 0.0018229965353384614, + "loss_iou": 0.53515625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 261409528, + "step": 1522 + }, + { + "epoch": 0.4006049845465904, + "grad_norm": 4.815296030108717, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 261581516, + "step": 1523 + }, + { + "epoch": 0.4006049845465904, + "loss": 0.16827590763568878, + "loss_ce": 0.0006428433116525412, + "loss_iou": 0.5390625, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 261581516, + "step": 1523 + }, + { + "epoch": 0.4008680213059775, + "grad_norm": 7.523079284642639, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 261753732, + "step": 1524 + }, + { + "epoch": 0.4008680213059775, + "loss": 0.07389776408672333, + "loss_ce": 0.0012354037025943398, + "loss_iou": 0.58984375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 261753732, + "step": 1524 + }, + { + "epoch": 0.40113105806536464, + "grad_norm": 5.8778602893728955, + "learning_rate": 5e-06, + "loss": 0.167, + "num_input_tokens_seen": 261925788, + "step": 1525 + }, + { + "epoch": 0.40113105806536464, + "loss": 0.1510400027036667, + "loss_ce": 0.00025264715077355504, + "loss_iou": 0.359375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 261925788, + "step": 1525 + }, + { + "epoch": 0.40139409482475175, + "grad_norm": 5.850697556727792, + "learning_rate": 5e-06, + "loss": 0.1457, + "num_input_tokens_seen": 262097916, + "step": 1526 + }, + { + "epoch": 0.40139409482475175, + "loss": 0.1881968379020691, + "loss_ce": 0.002436349866911769, + "loss_iou": 0.53125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 262097916, + "step": 1526 + }, + { + "epoch": 0.4016571315841389, + "grad_norm": 6.537837429507002, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 262269964, + "step": 1527 + }, + { + "epoch": 0.4016571315841389, + "loss": 0.09432707726955414, + "loss_ce": 0.004178154282271862, + "loss_iou": 0.486328125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 262269964, + "step": 1527 + }, + { + "epoch": 0.401920168343526, + "grad_norm": 16.05135902232025, + "learning_rate": 5e-06, + "loss": 0.148, + "num_input_tokens_seen": 262442240, + "step": 1528 + }, + { + "epoch": 0.401920168343526, + "loss": 0.22578378021717072, + "loss_ce": 0.0013269821647554636, + "loss_iou": 0.6484375, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 262442240, + "step": 1528 + }, + { + "epoch": 0.4021832051029131, + "grad_norm": 23.071196756112307, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 262612560, + "step": 1529 + }, + { + "epoch": 0.4021832051029131, + "loss": 0.1546820104122162, + "loss_ce": 0.0018499757861718535, + "loss_iou": 0.451171875, + "loss_num": 0.0306396484375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 262612560, + "step": 1529 + }, + { + "epoch": 0.40244624186230027, + "grad_norm": 15.456725226696141, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 262784424, + "step": 1530 + }, + { + "epoch": 0.40244624186230027, + "loss": 0.09581418335437775, + "loss_ce": 0.00661130016669631, + "loss_iou": 0.55859375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 262784424, + "step": 1530 + }, + { + "epoch": 0.40270927862168737, + "grad_norm": 4.753085917564063, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 262956820, + "step": 1531 + }, + { + "epoch": 0.40270927862168737, + "loss": 0.17357602715492249, + "loss_ce": 0.000693951384164393, + "loss_iou": 0.484375, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 262956820, + "step": 1531 + }, + { + "epoch": 0.40297231538107453, + "grad_norm": 8.23993450342922, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 263129000, + "step": 1532 + }, + { + "epoch": 0.40297231538107453, + "loss": 0.104616180062294, + "loss_ce": 0.002107633277773857, + "loss_iou": 0.65234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 263129000, + "step": 1532 + }, + { + "epoch": 0.40323535214046163, + "grad_norm": 5.28557757850573, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 263301064, + "step": 1533 + }, + { + "epoch": 0.40323535214046163, + "loss": 0.1304527074098587, + "loss_ce": 0.004811838734894991, + "loss_iou": 0.48046875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 263301064, + "step": 1533 + }, + { + "epoch": 0.40349838889984874, + "grad_norm": 6.190146085552892, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 263473624, + "step": 1534 + }, + { + "epoch": 0.40349838889984874, + "loss": 0.18483060598373413, + "loss_ce": 0.00016873711138032377, + "loss_iou": 0.609375, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 263473624, + "step": 1534 + }, + { + "epoch": 0.4037614256592359, + "grad_norm": 7.04287854016945, + "learning_rate": 5e-06, + "loss": 0.1756, + "num_input_tokens_seen": 263645984, + "step": 1535 + }, + { + "epoch": 0.4037614256592359, + "loss": 0.2618888020515442, + "loss_ce": 0.003191267838701606, + "loss_iou": 0.51953125, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 263645984, + "step": 1535 + }, + { + "epoch": 0.404024462418623, + "grad_norm": 12.524958974577965, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 263815796, + "step": 1536 + }, + { + "epoch": 0.404024462418623, + "loss": 0.1076509952545166, + "loss_ce": 0.0004732571542263031, + "loss_iou": 0.48046875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 263815796, + "step": 1536 + }, + { + "epoch": 0.4042874991780101, + "grad_norm": 11.13747389496862, + "learning_rate": 5e-06, + "loss": 0.1302, + "num_input_tokens_seen": 263985512, + "step": 1537 + }, + { + "epoch": 0.4042874991780101, + "loss": 0.12391631305217743, + "loss_ce": 0.004134822636842728, + "loss_iou": 0.484375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 263985512, + "step": 1537 + }, + { + "epoch": 0.40455053593739726, + "grad_norm": 13.524733657823086, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 264157652, + "step": 1538 + }, + { + "epoch": 0.40455053593739726, + "loss": 0.1372789740562439, + "loss_ce": 0.00034661110839806497, + "loss_iou": 0.498046875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 264157652, + "step": 1538 + }, + { + "epoch": 0.40481357269678436, + "grad_norm": 10.591750756623403, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 264328132, + "step": 1539 + }, + { + "epoch": 0.40481357269678436, + "loss": 0.11795895546674728, + "loss_ce": 0.0021752638276666403, + "loss_iou": 0.484375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 264328132, + "step": 1539 + }, + { + "epoch": 0.4050766094561715, + "grad_norm": 13.38043942604694, + "learning_rate": 5e-06, + "loss": 0.159, + "num_input_tokens_seen": 264500388, + "step": 1540 + }, + { + "epoch": 0.4050766094561715, + "loss": 0.19694536924362183, + "loss_ce": 0.00218218588270247, + "loss_iou": 0.66015625, + "loss_num": 0.0390625, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 264500388, + "step": 1540 + }, + { + "epoch": 0.4053396462155586, + "grad_norm": 4.192366808850045, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 264672276, + "step": 1541 + }, + { + "epoch": 0.4053396462155586, + "loss": 0.07517112791538239, + "loss_ce": 0.00021995243150740862, + "loss_iou": 0.55078125, + "loss_num": 0.01495361328125, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 264672276, + "step": 1541 + }, + { + "epoch": 0.4056026829749457, + "grad_norm": 5.297574673677269, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 264844464, + "step": 1542 + }, + { + "epoch": 0.4056026829749457, + "loss": 0.12331673502922058, + "loss_ce": 0.00359627278521657, + "loss_iou": 0.6171875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 264844464, + "step": 1542 + }, + { + "epoch": 0.4058657197343329, + "grad_norm": 5.716341192152297, + "learning_rate": 5e-06, + "loss": 0.1333, + "num_input_tokens_seen": 265016664, + "step": 1543 + }, + { + "epoch": 0.4058657197343329, + "loss": 0.16498282551765442, + "loss_ce": 0.001957924338057637, + "loss_iou": 0.68359375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 265016664, + "step": 1543 + }, + { + "epoch": 0.40612875649372, + "grad_norm": 6.6104135503037895, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 265188924, + "step": 1544 + }, + { + "epoch": 0.40612875649372, + "loss": 0.15974299609661102, + "loss_ce": 0.0041338615119457245, + "loss_iou": 0.5546875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 265188924, + "step": 1544 + }, + { + "epoch": 0.40639179325310715, + "grad_norm": 5.4155340578643365, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 265359312, + "step": 1545 + }, + { + "epoch": 0.40639179325310715, + "loss": 0.21989840269088745, + "loss_ce": 0.0024301379453390837, + "loss_iou": 0.5546875, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 265359312, + "step": 1545 + }, + { + "epoch": 0.40665483001249425, + "grad_norm": 4.753477134622953, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 265531700, + "step": 1546 + }, + { + "epoch": 0.40665483001249425, + "loss": 0.05390855669975281, + "loss_ce": 0.0012352181365713477, + "loss_iou": 0.734375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 265531700, + "step": 1546 + }, + { + "epoch": 0.40691786677188135, + "grad_norm": 6.220827805530868, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 265700312, + "step": 1547 + }, + { + "epoch": 0.40691786677188135, + "loss": 0.12480812519788742, + "loss_ce": 0.0009983108611777425, + "loss_iou": 0.4375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 265700312, + "step": 1547 + }, + { + "epoch": 0.4071809035312685, + "grad_norm": 6.131445725549716, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 265872224, + "step": 1548 + }, + { + "epoch": 0.4071809035312685, + "loss": 0.12791498005390167, + "loss_ce": 0.0017858227947726846, + "loss_iou": 0.416015625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 265872224, + "step": 1548 + }, + { + "epoch": 0.4074439402906556, + "grad_norm": 6.062694054094362, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 266044368, + "step": 1549 + }, + { + "epoch": 0.4074439402906556, + "loss": 0.09559094905853271, + "loss_ce": 0.00013196848158258945, + "loss_iou": 0.625, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 266044368, + "step": 1549 + }, + { + "epoch": 0.4077069770500427, + "grad_norm": 6.379902327089613, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 266216648, + "step": 1550 + }, + { + "epoch": 0.4077069770500427, + "loss": 0.26357126235961914, + "loss_ce": 0.007589830085635185, + "loss_iou": 0.62890625, + "loss_num": 0.05126953125, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 266216648, + "step": 1550 + }, + { + "epoch": 0.4079700138094299, + "grad_norm": 5.16918021793661, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 266387044, + "step": 1551 + }, + { + "epoch": 0.4079700138094299, + "loss": 0.0737496167421341, + "loss_ce": 0.00215538265183568, + "loss_iou": 0.53515625, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 266387044, + "step": 1551 + }, + { + "epoch": 0.408233050568817, + "grad_norm": 36.40974082123275, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 266557400, + "step": 1552 + }, + { + "epoch": 0.408233050568817, + "loss": 0.13131964206695557, + "loss_ce": 0.0013757951091974974, + "loss_iou": 0.55078125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 266557400, + "step": 1552 + }, + { + "epoch": 0.40849608732820414, + "grad_norm": 4.785820854925791, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 266729564, + "step": 1553 + }, + { + "epoch": 0.40849608732820414, + "loss": 0.12293117493391037, + "loss_ce": 0.0009524148190394044, + "loss_iou": 0.447265625, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 266729564, + "step": 1553 + }, + { + "epoch": 0.40875912408759124, + "grad_norm": 19.710178190555972, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 266901912, + "step": 1554 + }, + { + "epoch": 0.40875912408759124, + "loss": 0.10157528519630432, + "loss_ce": 0.000501063244882971, + "loss_iou": 0.51171875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 266901912, + "step": 1554 + }, + { + "epoch": 0.40902216084697834, + "grad_norm": 8.53788862222279, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 267074204, + "step": 1555 + }, + { + "epoch": 0.40902216084697834, + "loss": 0.18818530440330505, + "loss_ce": 0.0011735922889783978, + "loss_iou": 0.59765625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 267074204, + "step": 1555 + }, + { + "epoch": 0.4092851976063655, + "grad_norm": 9.427604183463405, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 267246448, + "step": 1556 + }, + { + "epoch": 0.4092851976063655, + "loss": 0.11521363258361816, + "loss_ce": 0.0002539134002290666, + "loss_iou": 0.52734375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 267246448, + "step": 1556 + }, + { + "epoch": 0.4095482343657526, + "grad_norm": 6.05814112958034, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 267418524, + "step": 1557 + }, + { + "epoch": 0.4095482343657526, + "loss": 0.17979669570922852, + "loss_ce": 0.002214906271547079, + "loss_iou": 0.451171875, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 267418524, + "step": 1557 + }, + { + "epoch": 0.40981127112513976, + "grad_norm": 5.372569051634822, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 267590932, + "step": 1558 + }, + { + "epoch": 0.40981127112513976, + "loss": 0.08670764416456223, + "loss_ce": 0.0017467074794694781, + "loss_iou": 0.59375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 267590932, + "step": 1558 + }, + { + "epoch": 0.41007430788452687, + "grad_norm": 7.2829791835725635, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 267763292, + "step": 1559 + }, + { + "epoch": 0.41007430788452687, + "loss": 0.1519029438495636, + "loss_ce": 0.002458356786519289, + "loss_iou": 0.416015625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 267763292, + "step": 1559 + }, + { + "epoch": 0.41033734464391397, + "grad_norm": 5.098616866005624, + "learning_rate": 5e-06, + "loss": 0.1, + "num_input_tokens_seen": 267935712, + "step": 1560 + }, + { + "epoch": 0.41033734464391397, + "loss": 0.11065052449703217, + "loss_ce": 0.0015807072632014751, + "loss_iou": 0.69140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 267935712, + "step": 1560 + }, + { + "epoch": 0.41060038140330113, + "grad_norm": 10.199812853245577, + "learning_rate": 5e-06, + "loss": 0.147, + "num_input_tokens_seen": 268108048, + "step": 1561 + }, + { + "epoch": 0.41060038140330113, + "loss": 0.09520716965198517, + "loss_ce": 0.003898570779711008, + "loss_iou": 0.46484375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 268108048, + "step": 1561 + }, + { + "epoch": 0.41086341816268823, + "grad_norm": 40.24459139135473, + "learning_rate": 5e-06, + "loss": 0.131, + "num_input_tokens_seen": 268280092, + "step": 1562 + }, + { + "epoch": 0.41086341816268823, + "loss": 0.07629628479480743, + "loss_ce": 0.0009788942988961935, + "loss_iou": 0.50390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 268280092, + "step": 1562 + }, + { + "epoch": 0.41112645492207534, + "grad_norm": 10.392982307740652, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 268450660, + "step": 1563 + }, + { + "epoch": 0.41112645492207534, + "loss": 0.122515007853508, + "loss_ce": 0.0010550380684435368, + "loss_iou": 0.4453125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 268450660, + "step": 1563 + }, + { + "epoch": 0.4113894916814625, + "grad_norm": 9.96483413417414, + "learning_rate": 5e-06, + "loss": 0.1875, + "num_input_tokens_seen": 268622864, + "step": 1564 + }, + { + "epoch": 0.4113894916814625, + "loss": 0.22188733518123627, + "loss_ce": 0.002923724940046668, + "loss_iou": 0.5078125, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 268622864, + "step": 1564 + }, + { + "epoch": 0.4116525284408496, + "grad_norm": 4.677477301563895, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 268793508, + "step": 1565 + }, + { + "epoch": 0.4116525284408496, + "loss": 0.08342467993497849, + "loss_ce": 0.00023376270837616175, + "loss_iou": 0.53125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 268793508, + "step": 1565 + }, + { + "epoch": 0.41191556520023676, + "grad_norm": 5.461599242953511, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 268965604, + "step": 1566 + }, + { + "epoch": 0.41191556520023676, + "loss": 0.21633076667785645, + "loss_ce": 0.003531699301674962, + "loss_iou": 0.71875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 268965604, + "step": 1566 + }, + { + "epoch": 0.41217860195962386, + "grad_norm": 4.406500798076577, + "learning_rate": 5e-06, + "loss": 0.1875, + "num_input_tokens_seen": 269137692, + "step": 1567 + }, + { + "epoch": 0.41217860195962386, + "loss": 0.1873759627342224, + "loss_ce": 0.0010966623667627573, + "loss_iou": 0.5703125, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 269137692, + "step": 1567 + }, + { + "epoch": 0.41244163871901096, + "grad_norm": 5.2924295964415595, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 269309988, + "step": 1568 + }, + { + "epoch": 0.41244163871901096, + "loss": 0.20892329514026642, + "loss_ce": 0.0004729711217805743, + "loss_iou": 0.57421875, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 269309988, + "step": 1568 + }, + { + "epoch": 0.4127046754783981, + "grad_norm": 5.459543507128776, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 269482356, + "step": 1569 + }, + { + "epoch": 0.4127046754783981, + "loss": 0.18314355611801147, + "loss_ce": 0.005287110339850187, + "loss_iou": 0.4921875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 269482356, + "step": 1569 + }, + { + "epoch": 0.4129677122377852, + "grad_norm": 7.266131528232208, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 269654572, + "step": 1570 + }, + { + "epoch": 0.4129677122377852, + "loss": 0.1481064409017563, + "loss_ce": 0.002049315720796585, + "loss_iou": 0.63671875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 269654572, + "step": 1570 + }, + { + "epoch": 0.4132307489971724, + "grad_norm": 9.868014321011369, + "learning_rate": 5e-06, + "loss": 0.169, + "num_input_tokens_seen": 269826912, + "step": 1571 + }, + { + "epoch": 0.4132307489971724, + "loss": 0.21563705801963806, + "loss_ce": 0.0007627883460372686, + "loss_iou": 0.390625, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 269826912, + "step": 1571 + }, + { + "epoch": 0.4134937857565595, + "grad_norm": 6.475065616065912, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 269998912, + "step": 1572 + }, + { + "epoch": 0.4134937857565595, + "loss": 0.20219947397708893, + "loss_ce": 0.0007529358845204115, + "loss_iou": 0.609375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 269998912, + "step": 1572 + }, + { + "epoch": 0.4137568225159466, + "grad_norm": 5.376660767415141, + "learning_rate": 5e-06, + "loss": 0.1247, + "num_input_tokens_seen": 270171036, + "step": 1573 + }, + { + "epoch": 0.4137568225159466, + "loss": 0.2056845724582672, + "loss_ce": 0.0031394038815051317, + "loss_iou": 0.4296875, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 270171036, + "step": 1573 + }, + { + "epoch": 0.41401985927533375, + "grad_norm": 7.42827277488161, + "learning_rate": 5e-06, + "loss": 0.1727, + "num_input_tokens_seen": 270342784, + "step": 1574 + }, + { + "epoch": 0.41401985927533375, + "loss": 0.15383180975914001, + "loss_ce": 0.00045046067680232227, + "loss_iou": 0.4609375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 270342784, + "step": 1574 + }, + { + "epoch": 0.41428289603472085, + "grad_norm": 5.130198059184452, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 270514876, + "step": 1575 + }, + { + "epoch": 0.41428289603472085, + "loss": 0.17609894275665283, + "loss_ce": 0.001187438378110528, + "loss_iou": 0.44140625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 270514876, + "step": 1575 + }, + { + "epoch": 0.41454593279410795, + "grad_norm": 7.421258492917825, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 270687292, + "step": 1576 + }, + { + "epoch": 0.41454593279410795, + "loss": 0.07864829152822495, + "loss_ce": 0.0015761489048600197, + "loss_iou": 0.58984375, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 270687292, + "step": 1576 + }, + { + "epoch": 0.4148089695534951, + "grad_norm": 6.818009590404441, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 270859520, + "step": 1577 + }, + { + "epoch": 0.4148089695534951, + "loss": 0.18715041875839233, + "loss_ce": 0.0005964583833701909, + "loss_iou": 0.5234375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 270859520, + "step": 1577 + }, + { + "epoch": 0.4150720063128822, + "grad_norm": 16.40610165209138, + "learning_rate": 5e-06, + "loss": 0.1347, + "num_input_tokens_seen": 271029200, + "step": 1578 + }, + { + "epoch": 0.4150720063128822, + "loss": 0.15075725317001343, + "loss_ce": 0.0008091325289569795, + "loss_iou": 0.61328125, + "loss_num": 0.0299072265625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 271029200, + "step": 1578 + }, + { + "epoch": 0.41533504307226937, + "grad_norm": 5.724561524768373, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 271201432, + "step": 1579 + }, + { + "epoch": 0.41533504307226937, + "loss": 0.18409447371959686, + "loss_ce": 0.0010500368662178516, + "loss_iou": 0.443359375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 271201432, + "step": 1579 + }, + { + "epoch": 0.4155980798316565, + "grad_norm": 5.423586189008012, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 271373604, + "step": 1580 + }, + { + "epoch": 0.4155980798316565, + "loss": 0.10582901537418365, + "loss_ce": 0.0017030383460223675, + "loss_iou": 0.58203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 271373604, + "step": 1580 + }, + { + "epoch": 0.4158611165910436, + "grad_norm": 4.639007188262618, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 271544156, + "step": 1581 + }, + { + "epoch": 0.4158611165910436, + "loss": 0.08775737881660461, + "loss_ce": 0.00032452167943120003, + "loss_iou": 0.49609375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 271544156, + "step": 1581 + }, + { + "epoch": 0.41612415335043074, + "grad_norm": 4.194479063501224, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 271716244, + "step": 1582 + }, + { + "epoch": 0.41612415335043074, + "loss": 0.07552362233400345, + "loss_ce": 5.3650168410968035e-05, + "loss_iou": 0.5859375, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 271716244, + "step": 1582 + }, + { + "epoch": 0.41638719010981784, + "grad_norm": 6.1414517921780325, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 271888460, + "step": 1583 + }, + { + "epoch": 0.41638719010981784, + "loss": 0.09023694694042206, + "loss_ce": 0.004726693499833345, + "loss_iou": 0.62890625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 271888460, + "step": 1583 + }, + { + "epoch": 0.416650226869205, + "grad_norm": 4.588576414677694, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 272060548, + "step": 1584 + }, + { + "epoch": 0.416650226869205, + "loss": 0.06312213093042374, + "loss_ce": 0.004131653346121311, + "loss_iou": 0.56640625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 272060548, + "step": 1584 + }, + { + "epoch": 0.4169132636285921, + "grad_norm": 6.028144781636759, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 272232820, + "step": 1585 + }, + { + "epoch": 0.4169132636285921, + "loss": 0.0890943706035614, + "loss_ce": 0.0011732284910976887, + "loss_iou": 0.62109375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 272232820, + "step": 1585 + }, + { + "epoch": 0.4171763003879792, + "grad_norm": 4.472287089666441, + "learning_rate": 5e-06, + "loss": 0.1729, + "num_input_tokens_seen": 272405268, + "step": 1586 + }, + { + "epoch": 0.4171763003879792, + "loss": 0.2705861032009125, + "loss_ce": 0.00334367249161005, + "loss_iou": 0.5625, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 272405268, + "step": 1586 + }, + { + "epoch": 0.41743933714736636, + "grad_norm": 3.852608585701711, + "learning_rate": 5e-06, + "loss": 0.1019, + "num_input_tokens_seen": 272577520, + "step": 1587 + }, + { + "epoch": 0.41743933714736636, + "loss": 0.1504625827074051, + "loss_ce": 0.00028558107442222536, + "loss_iou": 0.388671875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 272577520, + "step": 1587 + }, + { + "epoch": 0.41770237390675347, + "grad_norm": 5.376381690191942, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 272747276, + "step": 1588 + }, + { + "epoch": 0.41770237390675347, + "loss": 0.15104855597019196, + "loss_ce": 0.005449185613542795, + "loss_iou": 0.6640625, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 272747276, + "step": 1588 + }, + { + "epoch": 0.41796541066614057, + "grad_norm": 6.161958958679319, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 272919388, + "step": 1589 + }, + { + "epoch": 0.41796541066614057, + "loss": 0.19656141102313995, + "loss_ce": 0.0006995859439484775, + "loss_iou": 0.3984375, + "loss_num": 0.0390625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 272919388, + "step": 1589 + }, + { + "epoch": 0.4182284474255277, + "grad_norm": 9.65888325757511, + "learning_rate": 5e-06, + "loss": 0.1406, + "num_input_tokens_seen": 273091656, + "step": 1590 + }, + { + "epoch": 0.4182284474255277, + "loss": 0.21912047266960144, + "loss_ce": 0.0045361267402768135, + "loss_iou": 0.62109375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 273091656, + "step": 1590 + }, + { + "epoch": 0.41849148418491483, + "grad_norm": 8.813988618374006, + "learning_rate": 5e-06, + "loss": 0.1735, + "num_input_tokens_seen": 273263768, + "step": 1591 + }, + { + "epoch": 0.41849148418491483, + "loss": 0.23192375898361206, + "loss_ce": 0.00888605136424303, + "loss_iou": 0.48046875, + "loss_num": 0.044677734375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 273263768, + "step": 1591 + }, + { + "epoch": 0.418754520944302, + "grad_norm": 7.699871319865347, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 273435676, + "step": 1592 + }, + { + "epoch": 0.418754520944302, + "loss": 0.10040029883384705, + "loss_ce": 0.0014928288292139769, + "loss_iou": 0.5859375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 273435676, + "step": 1592 + }, + { + "epoch": 0.4190175577036891, + "grad_norm": 5.319980033934329, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 273607832, + "step": 1593 + }, + { + "epoch": 0.4190175577036891, + "loss": 0.149379163980484, + "loss_ce": 0.004207056015729904, + "loss_iou": 0.59375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 273607832, + "step": 1593 + }, + { + "epoch": 0.4192805944630762, + "grad_norm": 9.352989040804356, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 273778508, + "step": 1594 + }, + { + "epoch": 0.4192805944630762, + "loss": 0.12792231142520905, + "loss_ce": 0.0009081543539650738, + "loss_iou": 0.66015625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 273778508, + "step": 1594 + }, + { + "epoch": 0.41954363122246335, + "grad_norm": 10.015857880890572, + "learning_rate": 5e-06, + "loss": 0.1659, + "num_input_tokens_seen": 273950492, + "step": 1595 + }, + { + "epoch": 0.41954363122246335, + "loss": 0.1797780990600586, + "loss_ce": 0.004515648819506168, + "loss_iou": 0.392578125, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 273950492, + "step": 1595 + }, + { + "epoch": 0.41980666798185046, + "grad_norm": 5.437610936264711, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 274122788, + "step": 1596 + }, + { + "epoch": 0.41980666798185046, + "loss": 0.18884079158306122, + "loss_ce": 0.0007914789603091776, + "loss_iou": 0.59765625, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 274122788, + "step": 1596 + }, + { + "epoch": 0.4200697047412376, + "grad_norm": 7.822775629663786, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 274294960, + "step": 1597 + }, + { + "epoch": 0.4200697047412376, + "loss": 0.10433492064476013, + "loss_ce": 0.000544635346159339, + "loss_iou": 0.208984375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 274294960, + "step": 1597 + }, + { + "epoch": 0.4203327415006247, + "grad_norm": 5.742663276174034, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 274466828, + "step": 1598 + }, + { + "epoch": 0.4203327415006247, + "loss": 0.21367287635803223, + "loss_ce": 0.0007822535699233413, + "loss_iou": 0.546875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 274466828, + "step": 1598 + }, + { + "epoch": 0.4205957782600118, + "grad_norm": 7.632689703873666, + "learning_rate": 5e-06, + "loss": 0.1806, + "num_input_tokens_seen": 274639136, + "step": 1599 + }, + { + "epoch": 0.4205957782600118, + "loss": 0.15357787907123566, + "loss_ce": 0.0009594644652679563, + "loss_iou": 0.5234375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 274639136, + "step": 1599 + }, + { + "epoch": 0.420858815019399, + "grad_norm": 6.551977275944205, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 274811300, + "step": 1600 + }, + { + "epoch": 0.420858815019399, + "loss": 0.10377545654773712, + "loss_ce": 0.005798771977424622, + "loss_iou": 0.5859375, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 274811300, + "step": 1600 + }, + { + "epoch": 0.4211218517787861, + "grad_norm": 3.634688713329233, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 274983232, + "step": 1601 + }, + { + "epoch": 0.4211218517787861, + "loss": 0.0826454609632492, + "loss_ce": 0.0008583518210798502, + "loss_iou": 0.4453125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 274983232, + "step": 1601 + }, + { + "epoch": 0.4213848885381732, + "grad_norm": 6.513487522029516, + "learning_rate": 5e-06, + "loss": 0.1857, + "num_input_tokens_seen": 275155252, + "step": 1602 + }, + { + "epoch": 0.4213848885381732, + "loss": 0.22003334760665894, + "loss_ce": 0.0016495751915499568, + "loss_iou": 0.54296875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 275155252, + "step": 1602 + }, + { + "epoch": 0.42164792529756034, + "grad_norm": 7.844073689048222, + "learning_rate": 5e-06, + "loss": 0.164, + "num_input_tokens_seen": 275327452, + "step": 1603 + }, + { + "epoch": 0.42164792529756034, + "loss": 0.21324840188026428, + "loss_ce": 0.001090196194127202, + "loss_iou": 0.5703125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 275327452, + "step": 1603 + }, + { + "epoch": 0.42191096205694745, + "grad_norm": 5.702970135636054, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 275499556, + "step": 1604 + }, + { + "epoch": 0.42191096205694745, + "loss": 0.12330840528011322, + "loss_ce": 0.00029205146711319685, + "loss_iou": 0.67578125, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 275499556, + "step": 1604 + }, + { + "epoch": 0.4221739988163346, + "grad_norm": 7.295315362378517, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 275671424, + "step": 1605 + }, + { + "epoch": 0.4221739988163346, + "loss": 0.09813028573989868, + "loss_ce": 0.0004130033776164055, + "loss_iou": 0.48046875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 275671424, + "step": 1605 + }, + { + "epoch": 0.4224370355757217, + "grad_norm": 9.326948217180478, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 275843716, + "step": 1606 + }, + { + "epoch": 0.4224370355757217, + "loss": 0.08426269888877869, + "loss_ce": 0.00183471804484725, + "loss_iou": 0.671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 275843716, + "step": 1606 + }, + { + "epoch": 0.4227000723351088, + "grad_norm": 5.932370446925655, + "learning_rate": 5e-06, + "loss": 0.1547, + "num_input_tokens_seen": 276016052, + "step": 1607 + }, + { + "epoch": 0.4227000723351088, + "loss": 0.1150435283780098, + "loss_ce": 0.0007857157033868134, + "loss_iou": 0.474609375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 276016052, + "step": 1607 + }, + { + "epoch": 0.42296310909449597, + "grad_norm": 14.783104322463815, + "learning_rate": 5e-06, + "loss": 0.1635, + "num_input_tokens_seen": 276188232, + "step": 1608 + }, + { + "epoch": 0.42296310909449597, + "loss": 0.2393971085548401, + "loss_ce": 0.005510389804840088, + "loss_iou": 0.419921875, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 276188232, + "step": 1608 + }, + { + "epoch": 0.4232261458538831, + "grad_norm": 37.72538153737276, + "learning_rate": 5e-06, + "loss": 0.1175, + "num_input_tokens_seen": 276360224, + "step": 1609 + }, + { + "epoch": 0.4232261458538831, + "loss": 0.10361947864294052, + "loss_ce": 0.001171966316178441, + "loss_iou": 0.54296875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 276360224, + "step": 1609 + }, + { + "epoch": 0.42348918261327023, + "grad_norm": 5.929115604459541, + "learning_rate": 5e-06, + "loss": 0.185, + "num_input_tokens_seen": 276532388, + "step": 1610 + }, + { + "epoch": 0.42348918261327023, + "loss": 0.2490251511335373, + "loss_ce": 0.001466565066948533, + "loss_iou": 0.515625, + "loss_num": 0.049560546875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 276532388, + "step": 1610 + }, + { + "epoch": 0.42375221937265733, + "grad_norm": 4.065855918819384, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 276704452, + "step": 1611 + }, + { + "epoch": 0.42375221937265733, + "loss": 0.08056612312793732, + "loss_ce": 0.0004574810154736042, + "loss_iou": 0.59375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 276704452, + "step": 1611 + }, + { + "epoch": 0.42401525613204444, + "grad_norm": 5.144248302598616, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 276876660, + "step": 1612 + }, + { + "epoch": 0.42401525613204444, + "loss": 0.1256069540977478, + "loss_ce": 0.003719748929142952, + "loss_iou": 0.470703125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 276876660, + "step": 1612 + }, + { + "epoch": 0.4242782928914316, + "grad_norm": 6.484383508592383, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 277048524, + "step": 1613 + }, + { + "epoch": 0.4242782928914316, + "loss": 0.08503228425979614, + "loss_ce": 0.0004375518183223903, + "loss_iou": 0.51953125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 277048524, + "step": 1613 + }, + { + "epoch": 0.4245413296508187, + "grad_norm": 4.995173925299756, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 277220588, + "step": 1614 + }, + { + "epoch": 0.4245413296508187, + "loss": 0.17099690437316895, + "loss_ce": 0.0004646642482839525, + "loss_iou": 0.609375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 277220588, + "step": 1614 + }, + { + "epoch": 0.4248043664102058, + "grad_norm": 8.497364636941706, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 277392824, + "step": 1615 + }, + { + "epoch": 0.4248043664102058, + "loss": 0.13706764578819275, + "loss_ce": 0.0016916776075959206, + "loss_iou": 0.4609375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 277392824, + "step": 1615 + }, + { + "epoch": 0.42506740316959296, + "grad_norm": 6.760370062216128, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 277565104, + "step": 1616 + }, + { + "epoch": 0.42506740316959296, + "loss": 0.10927695780992508, + "loss_ce": 0.00048179191071540117, + "loss_iou": 0.65234375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 277565104, + "step": 1616 + }, + { + "epoch": 0.42533043992898006, + "grad_norm": 5.035401913498838, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 277737224, + "step": 1617 + }, + { + "epoch": 0.42533043992898006, + "loss": 0.13474591076374054, + "loss_ce": 0.002299622166901827, + "loss_iou": 0.41796875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 277737224, + "step": 1617 + }, + { + "epoch": 0.4255934766883672, + "grad_norm": 6.777922664584483, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 277907488, + "step": 1618 + }, + { + "epoch": 0.4255934766883672, + "loss": 0.11205422878265381, + "loss_ce": 0.0009702442912384868, + "loss_iou": 0.59765625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 277907488, + "step": 1618 + }, + { + "epoch": 0.4258565134477543, + "grad_norm": 7.0419776595399695, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 278079908, + "step": 1619 + }, + { + "epoch": 0.4258565134477543, + "loss": 0.1803514063358307, + "loss_ce": 0.0011216606944799423, + "loss_iou": 0.51171875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 278079908, + "step": 1619 + }, + { + "epoch": 0.42611955020714143, + "grad_norm": 4.869531542128126, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 278252060, + "step": 1620 + }, + { + "epoch": 0.42611955020714143, + "loss": 0.17065666615962982, + "loss_ce": 0.0023522234987467527, + "loss_iou": 0.478515625, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 278252060, + "step": 1620 + }, + { + "epoch": 0.4263825869665286, + "grad_norm": 15.025432063122246, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 278424152, + "step": 1621 + }, + { + "epoch": 0.4263825869665286, + "loss": 0.16370511054992676, + "loss_ce": 0.0011379658244550228, + "loss_iou": 0.53125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 278424152, + "step": 1621 + }, + { + "epoch": 0.4266456237259157, + "grad_norm": 7.359642440972258, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 278596208, + "step": 1622 + }, + { + "epoch": 0.4266456237259157, + "loss": 0.18550482392311096, + "loss_ce": 0.0022162585519254208, + "loss_iou": 0.69921875, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 278596208, + "step": 1622 + }, + { + "epoch": 0.42690866048530285, + "grad_norm": 3.859276017343743, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 278768348, + "step": 1623 + }, + { + "epoch": 0.42690866048530285, + "loss": 0.08928422629833221, + "loss_ce": 0.003163617569953203, + "loss_iou": 0.447265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 278768348, + "step": 1623 + }, + { + "epoch": 0.42717169724468995, + "grad_norm": 3.491408872838399, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 278940432, + "step": 1624 + }, + { + "epoch": 0.42717169724468995, + "loss": 0.10469355434179306, + "loss_ce": 0.004763749893754721, + "loss_iou": 0.609375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 278940432, + "step": 1624 + }, + { + "epoch": 0.42743473400407705, + "grad_norm": 8.925089197668054, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 279112480, + "step": 1625 + }, + { + "epoch": 0.42743473400407705, + "loss": 0.15482491254806519, + "loss_ce": 0.0005127866170369089, + "loss_iou": 0.5859375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 279112480, + "step": 1625 + }, + { + "epoch": 0.4276977707634642, + "grad_norm": 5.017252116965269, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 279284544, + "step": 1626 + }, + { + "epoch": 0.4276977707634642, + "loss": 0.11934304237365723, + "loss_ce": 0.0036509071942418814, + "loss_iou": 0.80859375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 279284544, + "step": 1626 + }, + { + "epoch": 0.4279608075228513, + "grad_norm": 11.28367561887442, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 279456684, + "step": 1627 + }, + { + "epoch": 0.4279608075228513, + "loss": 0.08618146926164627, + "loss_ce": 0.0002134529349859804, + "loss_iou": 0.435546875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 279456684, + "step": 1627 + }, + { + "epoch": 0.4282238442822384, + "grad_norm": 14.334013868271164, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 279629144, + "step": 1628 + }, + { + "epoch": 0.4282238442822384, + "loss": 0.21622659265995026, + "loss_ce": 0.0035800987388938665, + "loss_iou": 0.5546875, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 279629144, + "step": 1628 + }, + { + "epoch": 0.4284868810416256, + "grad_norm": 5.177758249324022, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 279800920, + "step": 1629 + }, + { + "epoch": 0.4284868810416256, + "loss": 0.11388795077800751, + "loss_ce": 0.0024377545341849327, + "loss_iou": 0.46484375, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 279800920, + "step": 1629 + }, + { + "epoch": 0.4287499178010127, + "grad_norm": 5.002716354322989, + "learning_rate": 5e-06, + "loss": 0.16, + "num_input_tokens_seen": 279973164, + "step": 1630 + }, + { + "epoch": 0.4287499178010127, + "loss": 0.11221545934677124, + "loss_ce": 0.0021690744906663895, + "loss_iou": 0.5546875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 279973164, + "step": 1630 + }, + { + "epoch": 0.42901295456039984, + "grad_norm": 11.888987422058314, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 280144932, + "step": 1631 + }, + { + "epoch": 0.42901295456039984, + "loss": 0.10366816818714142, + "loss_ce": 0.0032348139211535454, + "loss_iou": 0.53125, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 280144932, + "step": 1631 + }, + { + "epoch": 0.42927599131978694, + "grad_norm": 5.9683348593169105, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 280316892, + "step": 1632 + }, + { + "epoch": 0.42927599131978694, + "loss": 0.1221093013882637, + "loss_ce": 0.0030907560139894485, + "loss_iou": 0.734375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 280316892, + "step": 1632 + }, + { + "epoch": 0.42953902807917405, + "grad_norm": 4.306403695098148, + "learning_rate": 5e-06, + "loss": 0.1676, + "num_input_tokens_seen": 280488680, + "step": 1633 + }, + { + "epoch": 0.42953902807917405, + "loss": 0.16619864106178284, + "loss_ce": 0.0007323222234845161, + "loss_iou": 0.421875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 280488680, + "step": 1633 + }, + { + "epoch": 0.4298020648385612, + "grad_norm": 6.542037684380333, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 280658812, + "step": 1634 + }, + { + "epoch": 0.4298020648385612, + "loss": 0.1335218995809555, + "loss_ce": 0.0008009535376913846, + "loss_iou": 0.419921875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 280658812, + "step": 1634 + }, + { + "epoch": 0.4300651015979483, + "grad_norm": 5.780919966084494, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 280829020, + "step": 1635 + }, + { + "epoch": 0.4300651015979483, + "loss": 0.13609978556632996, + "loss_ce": 0.0019139924552291632, + "loss_iou": 0.375, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 280829020, + "step": 1635 + }, + { + "epoch": 0.4303281383573354, + "grad_norm": 4.625756427735958, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 280999460, + "step": 1636 + }, + { + "epoch": 0.4303281383573354, + "loss": 0.05524425953626633, + "loss_ce": 0.0004957281635142863, + "loss_iou": 0.6640625, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 280999460, + "step": 1636 + }, + { + "epoch": 0.43059117511672257, + "grad_norm": 4.597155715023034, + "learning_rate": 5e-06, + "loss": 0.1455, + "num_input_tokens_seen": 281169544, + "step": 1637 + }, + { + "epoch": 0.43059117511672257, + "loss": 0.09542928636074066, + "loss_ce": 0.002106534782797098, + "loss_iou": 0.60546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 281169544, + "step": 1637 + }, + { + "epoch": 0.43085421187610967, + "grad_norm": 6.599089127666852, + "learning_rate": 5e-06, + "loss": 0.1655, + "num_input_tokens_seen": 281341764, + "step": 1638 + }, + { + "epoch": 0.43085421187610967, + "loss": 0.15369150042533875, + "loss_ce": 0.0017444868572056293, + "loss_iou": 0.5390625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 281341764, + "step": 1638 + }, + { + "epoch": 0.43111724863549683, + "grad_norm": 10.985722993191203, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 281514480, + "step": 1639 + }, + { + "epoch": 0.43111724863549683, + "loss": 0.12693974375724792, + "loss_ce": 0.004442187491804361, + "loss_iou": 0.427734375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 281514480, + "step": 1639 + }, + { + "epoch": 0.43138028539488393, + "grad_norm": 5.112052305423048, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 281686636, + "step": 1640 + }, + { + "epoch": 0.43138028539488393, + "loss": 0.13622896373271942, + "loss_ce": 0.001615930232219398, + "loss_iou": 0.6875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 281686636, + "step": 1640 + }, + { + "epoch": 0.43164332215427104, + "grad_norm": 4.445172786952326, + "learning_rate": 5e-06, + "loss": 0.1298, + "num_input_tokens_seen": 281859068, + "step": 1641 + }, + { + "epoch": 0.43164332215427104, + "loss": 0.16077426075935364, + "loss_ce": 0.0008926668670028448, + "loss_iou": 0.478515625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 281859068, + "step": 1641 + }, + { + "epoch": 0.4319063589136582, + "grad_norm": 18.643962143254182, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 282031296, + "step": 1642 + }, + { + "epoch": 0.4319063589136582, + "loss": 0.11041103303432465, + "loss_ce": 0.00216518621891737, + "loss_iou": 0.5703125, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 282031296, + "step": 1642 + }, + { + "epoch": 0.4321693956730453, + "grad_norm": 14.94723205702143, + "learning_rate": 5e-06, + "loss": 0.0973, + "num_input_tokens_seen": 282203456, + "step": 1643 + }, + { + "epoch": 0.4321693956730453, + "loss": 0.10057017207145691, + "loss_ce": 0.002517198445275426, + "loss_iou": 0.470703125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 282203456, + "step": 1643 + }, + { + "epoch": 0.43243243243243246, + "grad_norm": 3.591864332881924, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 282374020, + "step": 1644 + }, + { + "epoch": 0.43243243243243246, + "loss": 0.08621848374605179, + "loss_ce": 0.00044883223017677665, + "loss_iou": 0.625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 282374020, + "step": 1644 + }, + { + "epoch": 0.43269546919181956, + "grad_norm": 6.406579991141497, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 282544200, + "step": 1645 + }, + { + "epoch": 0.43269546919181956, + "loss": 0.10012087225914001, + "loss_ce": 0.001396512147039175, + "loss_iou": 0.48046875, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 282544200, + "step": 1645 + }, + { + "epoch": 0.43295850595120666, + "grad_norm": 6.633263160453237, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 282716276, + "step": 1646 + }, + { + "epoch": 0.43295850595120666, + "loss": 0.09250755608081818, + "loss_ce": 0.0007411979604512453, + "loss_iou": 0.57421875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 282716276, + "step": 1646 + }, + { + "epoch": 0.4332215427105938, + "grad_norm": 11.88641544130966, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 282888756, + "step": 1647 + }, + { + "epoch": 0.4332215427105938, + "loss": 0.11388491094112396, + "loss_ce": 0.0009088441729545593, + "loss_iou": 0.55078125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 282888756, + "step": 1647 + }, + { + "epoch": 0.4334845794699809, + "grad_norm": 5.478375817282589, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 283060788, + "step": 1648 + }, + { + "epoch": 0.4334845794699809, + "loss": 0.08237907290458679, + "loss_ce": 0.003521653823554516, + "loss_iou": 0.5859375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 283060788, + "step": 1648 + }, + { + "epoch": 0.433747616229368, + "grad_norm": 7.7141358569237015, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 283232744, + "step": 1649 + }, + { + "epoch": 0.433747616229368, + "loss": 0.16650542616844177, + "loss_ce": 0.001252750400453806, + "loss_iou": 0.515625, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 283232744, + "step": 1649 + }, + { + "epoch": 0.4340106529887552, + "grad_norm": 4.902197150094181, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 283404860, + "step": 1650 + }, + { + "epoch": 0.4340106529887552, + "loss": 0.11392060667276382, + "loss_ce": 0.0009140149923041463, + "loss_iou": 0.67578125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 283404860, + "step": 1650 + }, + { + "epoch": 0.4342736897481423, + "grad_norm": 6.611385564549315, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 283577340, + "step": 1651 + }, + { + "epoch": 0.4342736897481423, + "loss": 0.14234262704849243, + "loss_ce": 0.0011072808410972357, + "loss_iou": 0.46875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 283577340, + "step": 1651 + }, + { + "epoch": 0.43453672650752945, + "grad_norm": 5.270564238834203, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 283747872, + "step": 1652 + }, + { + "epoch": 0.43453672650752945, + "loss": 0.08932007849216461, + "loss_ce": 0.0014904842246323824, + "loss_iou": 0.515625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 283747872, + "step": 1652 + }, + { + "epoch": 0.43479976326691655, + "grad_norm": 5.150524639297202, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 283920212, + "step": 1653 + }, + { + "epoch": 0.43479976326691655, + "loss": 0.08878134936094284, + "loss_ce": 0.00037192486342974007, + "loss_iou": 0.51171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 283920212, + "step": 1653 + }, + { + "epoch": 0.43506280002630365, + "grad_norm": 5.389390494681365, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 284090700, + "step": 1654 + }, + { + "epoch": 0.43506280002630365, + "loss": 0.09808811545372009, + "loss_ce": 0.0017746419180184603, + "loss_iou": 0.59375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 284090700, + "step": 1654 + }, + { + "epoch": 0.4353258367856908, + "grad_norm": 5.2829708598355625, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 284262932, + "step": 1655 + }, + { + "epoch": 0.4353258367856908, + "loss": 0.24406926333904266, + "loss_ce": 0.0009052163222804666, + "loss_iou": 0.404296875, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 284262932, + "step": 1655 + }, + { + "epoch": 0.4355888735450779, + "grad_norm": 4.415055027745429, + "learning_rate": 5e-06, + "loss": 0.1467, + "num_input_tokens_seen": 284433136, + "step": 1656 + }, + { + "epoch": 0.4355888735450779, + "loss": 0.14210422337055206, + "loss_ce": 0.0005942133138887584, + "loss_iou": 0.482421875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 284433136, + "step": 1656 + }, + { + "epoch": 0.4358519103044651, + "grad_norm": 11.10469204899232, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 284604980, + "step": 1657 + }, + { + "epoch": 0.4358519103044651, + "loss": 0.06616829335689545, + "loss_ce": 0.0029969080351293087, + "loss_iou": 0.5078125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 284604980, + "step": 1657 + }, + { + "epoch": 0.4361149470638522, + "grad_norm": 37.454091592129345, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 284777024, + "step": 1658 + }, + { + "epoch": 0.4361149470638522, + "loss": 0.09428656101226807, + "loss_ce": 0.00023138479446060956, + "loss_iou": 0.5234375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 284777024, + "step": 1658 + }, + { + "epoch": 0.4363779838232393, + "grad_norm": 6.943748076830087, + "learning_rate": 5e-06, + "loss": 0.1641, + "num_input_tokens_seen": 284948692, + "step": 1659 + }, + { + "epoch": 0.4363779838232393, + "loss": 0.12960584461688995, + "loss_ce": 0.0008827036363072693, + "loss_iou": 0.546875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 284948692, + "step": 1659 + }, + { + "epoch": 0.43664102058262644, + "grad_norm": 6.322360699878544, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 285120852, + "step": 1660 + }, + { + "epoch": 0.43664102058262644, + "loss": 0.08696160465478897, + "loss_ce": 0.0016649758908897638, + "loss_iou": 0.609375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 285120852, + "step": 1660 + }, + { + "epoch": 0.43690405734201354, + "grad_norm": 4.9857397500362, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 285293104, + "step": 1661 + }, + { + "epoch": 0.43690405734201354, + "loss": 0.1620713770389557, + "loss_ce": 0.0027085873298346996, + "loss_iou": 0.359375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 285293104, + "step": 1661 + }, + { + "epoch": 0.43716709410140064, + "grad_norm": 6.167930639257179, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 285465116, + "step": 1662 + }, + { + "epoch": 0.43716709410140064, + "loss": 0.13285255432128906, + "loss_ce": 0.002481454983353615, + "loss_iou": 0.60546875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 285465116, + "step": 1662 + }, + { + "epoch": 0.4374301308607878, + "grad_norm": 4.796617637287243, + "learning_rate": 5e-06, + "loss": 0.1067, + "num_input_tokens_seen": 285637712, + "step": 1663 + }, + { + "epoch": 0.4374301308607878, + "loss": 0.0941682979464531, + "loss_ce": 0.0005708856624551117, + "loss_iou": 0.58203125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 285637712, + "step": 1663 + }, + { + "epoch": 0.4376931676201749, + "grad_norm": 6.365145441420231, + "learning_rate": 5e-06, + "loss": 0.1524, + "num_input_tokens_seen": 285808140, + "step": 1664 + }, + { + "epoch": 0.4376931676201749, + "loss": 0.08691577613353729, + "loss_ce": 0.0016801799647510052, + "loss_iou": 0.703125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 285808140, + "step": 1664 + }, + { + "epoch": 0.43795620437956206, + "grad_norm": 9.14556589502857, + "learning_rate": 5e-06, + "loss": 0.1419, + "num_input_tokens_seen": 285980020, + "step": 1665 + }, + { + "epoch": 0.43795620437956206, + "loss": 0.11803478002548218, + "loss_ce": 0.0006641793297603726, + "loss_iou": 0.41796875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 285980020, + "step": 1665 + }, + { + "epoch": 0.43821924113894917, + "grad_norm": 8.36736614358982, + "learning_rate": 5e-06, + "loss": 0.1476, + "num_input_tokens_seen": 286152224, + "step": 1666 + }, + { + "epoch": 0.43821924113894917, + "loss": 0.17869716882705688, + "loss_ce": 0.002091944683343172, + "loss_iou": 0.578125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 286152224, + "step": 1666 + }, + { + "epoch": 0.43848227789833627, + "grad_norm": 5.977656991369799, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 286324392, + "step": 1667 + }, + { + "epoch": 0.43848227789833627, + "loss": 0.07165973633527756, + "loss_ce": 0.0008894691127352417, + "loss_iou": 0.6015625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 286324392, + "step": 1667 + }, + { + "epoch": 0.43874531465772343, + "grad_norm": 4.564829435087821, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 286496724, + "step": 1668 + }, + { + "epoch": 0.43874531465772343, + "loss": 0.10358568280935287, + "loss_ce": 0.0014128325274214149, + "loss_iou": 0.53125, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 286496724, + "step": 1668 + }, + { + "epoch": 0.43900835141711053, + "grad_norm": 5.942642735170272, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 286669072, + "step": 1669 + }, + { + "epoch": 0.43900835141711053, + "loss": 0.15151304006576538, + "loss_ce": 0.00020689686061814427, + "loss_iou": 0.46484375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 286669072, + "step": 1669 + }, + { + "epoch": 0.4392713881764977, + "grad_norm": 10.62471442505103, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 286839656, + "step": 1670 + }, + { + "epoch": 0.4392713881764977, + "loss": 0.07143907248973846, + "loss_ce": 0.0015233027515932918, + "loss_iou": 0.56640625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 286839656, + "step": 1670 + }, + { + "epoch": 0.4395344249358848, + "grad_norm": 10.099626860743896, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 287009348, + "step": 1671 + }, + { + "epoch": 0.4395344249358848, + "loss": 0.07566662132740021, + "loss_ce": 0.0009290680172853172, + "loss_iou": 0.5625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 287009348, + "step": 1671 + }, + { + "epoch": 0.4397974616952719, + "grad_norm": 5.653901499064607, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 287181772, + "step": 1672 + }, + { + "epoch": 0.4397974616952719, + "loss": 0.13149940967559814, + "loss_ce": 0.003600239520892501, + "loss_iou": 0.62890625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 287181772, + "step": 1672 + }, + { + "epoch": 0.44006049845465905, + "grad_norm": 5.524031672248575, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 287353976, + "step": 1673 + }, + { + "epoch": 0.44006049845465905, + "loss": 0.09840566664934158, + "loss_ce": 0.0025804713368415833, + "loss_iou": 0.51953125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 287353976, + "step": 1673 + }, + { + "epoch": 0.44032353521404616, + "grad_norm": 9.273568707994169, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 287526172, + "step": 1674 + }, + { + "epoch": 0.44032353521404616, + "loss": 0.06107574701309204, + "loss_ce": 0.0005288777174428105, + "loss_iou": 0.59765625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 287526172, + "step": 1674 + }, + { + "epoch": 0.44058657197343326, + "grad_norm": 4.970980583351768, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 287698712, + "step": 1675 + }, + { + "epoch": 0.44058657197343326, + "loss": 0.15458138287067413, + "loss_ce": 0.004831631202250719, + "loss_iou": 0.44921875, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 287698712, + "step": 1675 + }, + { + "epoch": 0.4408496087328204, + "grad_norm": 5.818694779717612, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 287871108, + "step": 1676 + }, + { + "epoch": 0.4408496087328204, + "loss": 0.09261530637741089, + "loss_ce": 0.0006353222415782511, + "loss_iou": 0.65625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 287871108, + "step": 1676 + }, + { + "epoch": 0.4411126454922075, + "grad_norm": 13.742421763597438, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 288043424, + "step": 1677 + }, + { + "epoch": 0.4411126454922075, + "loss": 0.0773499608039856, + "loss_ce": 0.0007813603151589632, + "loss_iou": 0.498046875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 288043424, + "step": 1677 + }, + { + "epoch": 0.4413756822515947, + "grad_norm": 4.64225389501199, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 288215812, + "step": 1678 + }, + { + "epoch": 0.4413756822515947, + "loss": 0.1821221113204956, + "loss_ce": 0.0025872092228382826, + "loss_iou": 0.61328125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 288215812, + "step": 1678 + }, + { + "epoch": 0.4416387190109818, + "grad_norm": 3.729084500729301, + "learning_rate": 5e-06, + "loss": 0.1359, + "num_input_tokens_seen": 288388096, + "step": 1679 + }, + { + "epoch": 0.4416387190109818, + "loss": 0.07683113217353821, + "loss_ce": 0.000735556473955512, + "loss_iou": 0.390625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 288388096, + "step": 1679 + }, + { + "epoch": 0.4419017557703689, + "grad_norm": 3.7239911787320956, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 288560596, + "step": 1680 + }, + { + "epoch": 0.4419017557703689, + "loss": 0.09967576712369919, + "loss_ce": 0.002248398493975401, + "loss_iou": 0.5234375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 288560596, + "step": 1680 + }, + { + "epoch": 0.44216479252975605, + "grad_norm": 7.1544540740612845, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 288730844, + "step": 1681 + }, + { + "epoch": 0.44216479252975605, + "loss": 0.14395672082901, + "loss_ce": 0.0017753278370946646, + "loss_iou": 0.384765625, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 288730844, + "step": 1681 + }, + { + "epoch": 0.44242782928914315, + "grad_norm": 6.963458789097864, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 288901440, + "step": 1682 + }, + { + "epoch": 0.44242782928914315, + "loss": 0.12861037254333496, + "loss_ce": 0.0015962182078510523, + "loss_iou": 0.58984375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 288901440, + "step": 1682 + }, + { + "epoch": 0.4426908660485303, + "grad_norm": 6.124914951048281, + "learning_rate": 5e-06, + "loss": 0.2107, + "num_input_tokens_seen": 289073420, + "step": 1683 + }, + { + "epoch": 0.4426908660485303, + "loss": 0.19420379400253296, + "loss_ce": 0.0013021675404161215, + "loss_iou": 0.58984375, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 289073420, + "step": 1683 + }, + { + "epoch": 0.4429539028079174, + "grad_norm": 9.860837139100692, + "learning_rate": 5e-06, + "loss": 0.172, + "num_input_tokens_seen": 289245604, + "step": 1684 + }, + { + "epoch": 0.4429539028079174, + "loss": 0.18933850526809692, + "loss_ce": 0.002204704098403454, + "loss_iou": 0.73828125, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 289245604, + "step": 1684 + }, + { + "epoch": 0.4432169395673045, + "grad_norm": 7.0676385774671875, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 289417760, + "step": 1685 + }, + { + "epoch": 0.4432169395673045, + "loss": 0.16348493099212646, + "loss_ce": 0.0013145222328603268, + "loss_iou": 0.41015625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 289417760, + "step": 1685 + }, + { + "epoch": 0.44347997632669167, + "grad_norm": 5.183221235705912, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 289590036, + "step": 1686 + }, + { + "epoch": 0.44347997632669167, + "loss": 0.07584193348884583, + "loss_ce": 0.003545790910720825, + "loss_iou": 0.51171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 289590036, + "step": 1686 + }, + { + "epoch": 0.4437430130860788, + "grad_norm": 4.680777365130206, + "learning_rate": 5e-06, + "loss": 0.13, + "num_input_tokens_seen": 289762148, + "step": 1687 + }, + { + "epoch": 0.4437430130860788, + "loss": 0.15222427248954773, + "loss_ce": 0.000765529228374362, + "loss_iou": 0.53515625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 289762148, + "step": 1687 + }, + { + "epoch": 0.4440060498454659, + "grad_norm": 8.359052775831522, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 289932260, + "step": 1688 + }, + { + "epoch": 0.4440060498454659, + "loss": 0.15807722508907318, + "loss_ce": 0.003322579897940159, + "loss_iou": 0.62109375, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 289932260, + "step": 1688 + }, + { + "epoch": 0.44426908660485304, + "grad_norm": 12.300759876906785, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 290101064, + "step": 1689 + }, + { + "epoch": 0.44426908660485304, + "loss": 0.1050846129655838, + "loss_ce": 0.00016517633048351854, + "loss_iou": 0.48046875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 290101064, + "step": 1689 + }, + { + "epoch": 0.44453212336424014, + "grad_norm": 7.817323041044055, + "learning_rate": 5e-06, + "loss": 0.153, + "num_input_tokens_seen": 290273392, + "step": 1690 + }, + { + "epoch": 0.44453212336424014, + "loss": 0.10968184471130371, + "loss_ce": 0.004731892608106136, + "loss_iou": 0.53125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 290273392, + "step": 1690 + }, + { + "epoch": 0.4447951601236273, + "grad_norm": 10.723057383347166, + "learning_rate": 5e-06, + "loss": 0.1448, + "num_input_tokens_seen": 290445728, + "step": 1691 + }, + { + "epoch": 0.4447951601236273, + "loss": 0.11715184152126312, + "loss_ce": 0.0027719633653759956, + "loss_iou": 0.5, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 290445728, + "step": 1691 + }, + { + "epoch": 0.4450581968830144, + "grad_norm": 5.5921534938980795, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 290618040, + "step": 1692 + }, + { + "epoch": 0.4450581968830144, + "loss": 0.11884011328220367, + "loss_ce": 0.0018509816145524383, + "loss_iou": 0.62890625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 290618040, + "step": 1692 + }, + { + "epoch": 0.4453212336424015, + "grad_norm": 14.56261412387689, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 290790260, + "step": 1693 + }, + { + "epoch": 0.4453212336424015, + "loss": 0.12701714038848877, + "loss_ce": 0.0006743660196661949, + "loss_iou": 0.33203125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 290790260, + "step": 1693 + }, + { + "epoch": 0.44558427040178866, + "grad_norm": 18.011685132647553, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 290960676, + "step": 1694 + }, + { + "epoch": 0.44558427040178866, + "loss": 0.17821697890758514, + "loss_ce": 0.00457195146009326, + "loss_iou": 0.62109375, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 290960676, + "step": 1694 + }, + { + "epoch": 0.44584730716117577, + "grad_norm": 5.536946199556455, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 291132980, + "step": 1695 + }, + { + "epoch": 0.44584730716117577, + "loss": 0.10020774602890015, + "loss_ce": 0.0019716662354767323, + "loss_iou": 0.5390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 291132980, + "step": 1695 + }, + { + "epoch": 0.4461103439205629, + "grad_norm": 14.998078131446578, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 291305020, + "step": 1696 + }, + { + "epoch": 0.4461103439205629, + "loss": 0.26434189081192017, + "loss_ce": 0.003111420664936304, + "loss_iou": 0.37890625, + "loss_num": 0.05224609375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 291305020, + "step": 1696 + }, + { + "epoch": 0.44637338067995, + "grad_norm": 5.503328090464836, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 291477360, + "step": 1697 + }, + { + "epoch": 0.44637338067995, + "loss": 0.08737830072641373, + "loss_ce": 0.0004947560373693705, + "loss_iou": 0.5390625, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 291477360, + "step": 1697 + }, + { + "epoch": 0.44663641743933713, + "grad_norm": 4.950750612606285, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 291647704, + "step": 1698 + }, + { + "epoch": 0.44663641743933713, + "loss": 0.09089531749486923, + "loss_ce": 0.00031914000282995403, + "loss_iou": 0.5234375, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 291647704, + "step": 1698 + }, + { + "epoch": 0.4468994541987243, + "grad_norm": 7.442760799509784, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 291820320, + "step": 1699 + }, + { + "epoch": 0.4468994541987243, + "loss": 0.1341613531112671, + "loss_ce": 0.004461641423404217, + "loss_iou": 0.423828125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 291820320, + "step": 1699 + }, + { + "epoch": 0.4471624909581114, + "grad_norm": 12.789945030223832, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 291992692, + "step": 1700 + }, + { + "epoch": 0.4471624909581114, + "loss": 0.12477381527423859, + "loss_ce": 0.002901863306760788, + "loss_iou": 0.53125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 291992692, + "step": 1700 + }, + { + "epoch": 0.4474255277174985, + "grad_norm": 4.791603869867936, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 292164820, + "step": 1701 + }, + { + "epoch": 0.4474255277174985, + "loss": 0.1102890819311142, + "loss_ce": 0.002928241156041622, + "loss_iou": 0.421875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 292164820, + "step": 1701 + }, + { + "epoch": 0.44768856447688565, + "grad_norm": 4.5597072248390145, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 292336840, + "step": 1702 + }, + { + "epoch": 0.44768856447688565, + "loss": 0.13155938684940338, + "loss_ce": 0.0015850251074880362, + "loss_iou": 0.703125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 292336840, + "step": 1702 + }, + { + "epoch": 0.44795160123627276, + "grad_norm": 4.229506785629678, + "learning_rate": 5e-06, + "loss": 0.1794, + "num_input_tokens_seen": 292507240, + "step": 1703 + }, + { + "epoch": 0.44795160123627276, + "loss": 0.12941154837608337, + "loss_ce": 0.0014818647177889943, + "loss_iou": 0.54296875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 292507240, + "step": 1703 + }, + { + "epoch": 0.4482146379956599, + "grad_norm": 5.641270696096721, + "learning_rate": 5e-06, + "loss": 0.1276, + "num_input_tokens_seen": 292679176, + "step": 1704 + }, + { + "epoch": 0.4482146379956599, + "loss": 0.12304902821779251, + "loss_ce": 0.002321491949260235, + "loss_iou": 0.451171875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 292679176, + "step": 1704 + }, + { + "epoch": 0.448477674755047, + "grad_norm": 4.862520419399453, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 292851500, + "step": 1705 + }, + { + "epoch": 0.448477674755047, + "loss": 0.089045949280262, + "loss_ce": 0.00478691840544343, + "loss_iou": 0.546875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 292851500, + "step": 1705 + }, + { + "epoch": 0.4487407115144341, + "grad_norm": 5.2865399556662975, + "learning_rate": 5e-06, + "loss": 0.1533, + "num_input_tokens_seen": 293020992, + "step": 1706 + }, + { + "epoch": 0.4487407115144341, + "loss": 0.20740769803524017, + "loss_ce": 0.0010478447657078505, + "loss_iou": 0.66015625, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 293020992, + "step": 1706 + }, + { + "epoch": 0.4490037482738213, + "grad_norm": 4.239149151423396, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 293192840, + "step": 1707 + }, + { + "epoch": 0.4490037482738213, + "loss": 0.1247292309999466, + "loss_ce": 0.002048558322712779, + "loss_iou": 0.578125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 293192840, + "step": 1707 + }, + { + "epoch": 0.4492667850332084, + "grad_norm": 14.697036954882604, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 293359152, + "step": 1708 + }, + { + "epoch": 0.4492667850332084, + "loss": 0.17449304461479187, + "loss_ce": 0.000573370314668864, + "loss_iou": 0.53515625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 293359152, + "step": 1708 + }, + { + "epoch": 0.44952982179259554, + "grad_norm": 6.428150274293997, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 293531168, + "step": 1709 + }, + { + "epoch": 0.44952982179259554, + "loss": 0.0824984684586525, + "loss_ce": 0.00025359162827953696, + "loss_iou": 0.43359375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 293531168, + "step": 1709 + }, + { + "epoch": 0.44979285855198264, + "grad_norm": 12.082892344047602, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 293701596, + "step": 1710 + }, + { + "epoch": 0.44979285855198264, + "loss": 0.075187087059021, + "loss_ce": 0.00011384247045498341, + "loss_iou": 0.77734375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 293701596, + "step": 1710 + }, + { + "epoch": 0.45005589531136975, + "grad_norm": 6.447369973784934, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 293874048, + "step": 1711 + }, + { + "epoch": 0.45005589531136975, + "loss": 0.12107305228710175, + "loss_ce": 0.0001013779838103801, + "loss_iou": 0.44140625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 293874048, + "step": 1711 + }, + { + "epoch": 0.4503189320707569, + "grad_norm": 4.920690909069883, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 294046324, + "step": 1712 + }, + { + "epoch": 0.4503189320707569, + "loss": 0.09293599426746368, + "loss_ce": 0.0002541054564062506, + "loss_iou": 0.62890625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 294046324, + "step": 1712 + }, + { + "epoch": 0.450581968830144, + "grad_norm": 5.111263917035011, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 294218352, + "step": 1713 + }, + { + "epoch": 0.450581968830144, + "loss": 0.08807346224784851, + "loss_ce": 0.005675997585058212, + "loss_iou": 0.546875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 294218352, + "step": 1713 + }, + { + "epoch": 0.4508450055895311, + "grad_norm": 22.02715708529402, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 294390700, + "step": 1714 + }, + { + "epoch": 0.4508450055895311, + "loss": 0.16420426964759827, + "loss_ce": 0.00023331816191785038, + "loss_iou": 0.66796875, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 294390700, + "step": 1714 + }, + { + "epoch": 0.45110804234891827, + "grad_norm": 36.948349834543606, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 294562520, + "step": 1715 + }, + { + "epoch": 0.45110804234891827, + "loss": 0.20613673329353333, + "loss_ce": 0.0074672941118478775, + "loss_iou": 0.6015625, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 294562520, + "step": 1715 + }, + { + "epoch": 0.4513710791083054, + "grad_norm": 11.731491784871416, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 294732980, + "step": 1716 + }, + { + "epoch": 0.4513710791083054, + "loss": 0.2512318789958954, + "loss_ce": 0.0024831017944961786, + "loss_iou": 0.5859375, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 294732980, + "step": 1716 + }, + { + "epoch": 0.45163411586769253, + "grad_norm": 14.33875314671704, + "learning_rate": 5e-06, + "loss": 0.146, + "num_input_tokens_seen": 294904972, + "step": 1717 + }, + { + "epoch": 0.45163411586769253, + "loss": 0.1545875370502472, + "loss_ce": 0.0034644976258277893, + "loss_iou": 0.609375, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 294904972, + "step": 1717 + }, + { + "epoch": 0.45189715262707963, + "grad_norm": 4.667158870565546, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 295077136, + "step": 1718 + }, + { + "epoch": 0.45189715262707963, + "loss": 0.10374893248081207, + "loss_ce": 0.0009657314512878656, + "loss_iou": 0.55859375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 295077136, + "step": 1718 + }, + { + "epoch": 0.45216018938646674, + "grad_norm": 4.671478799286235, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 295249644, + "step": 1719 + }, + { + "epoch": 0.45216018938646674, + "loss": 0.12534737586975098, + "loss_ce": 0.004924997687339783, + "loss_iou": 0.57421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 295249644, + "step": 1719 + }, + { + "epoch": 0.4524232261458539, + "grad_norm": 9.77050555348203, + "learning_rate": 5e-06, + "loss": 0.1039, + "num_input_tokens_seen": 295421628, + "step": 1720 + }, + { + "epoch": 0.4524232261458539, + "loss": 0.10829440504312515, + "loss_ce": 0.0021237479522824287, + "loss_iou": 0.65625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 295421628, + "step": 1720 + }, + { + "epoch": 0.452686262905241, + "grad_norm": 6.10598748249797, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 295593664, + "step": 1721 + }, + { + "epoch": 0.452686262905241, + "loss": 0.05939865857362747, + "loss_ce": 0.0012016374384984374, + "loss_iou": 0.5625, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 295593664, + "step": 1721 + }, + { + "epoch": 0.45294929966462816, + "grad_norm": 10.371953857475111, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 295765768, + "step": 1722 + }, + { + "epoch": 0.45294929966462816, + "loss": 0.10428635776042938, + "loss_ce": 0.0005265921936370432, + "loss_iou": 0.76171875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 295765768, + "step": 1722 + }, + { + "epoch": 0.45321233642401526, + "grad_norm": 6.589623724090896, + "learning_rate": 5e-06, + "loss": 0.1679, + "num_input_tokens_seen": 295937984, + "step": 1723 + }, + { + "epoch": 0.45321233642401526, + "loss": 0.12776082754135132, + "loss_ce": 0.0015095948474481702, + "loss_iou": 0.392578125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 295937984, + "step": 1723 + }, + { + "epoch": 0.45347537318340236, + "grad_norm": 5.38306851263364, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 296109952, + "step": 1724 + }, + { + "epoch": 0.45347537318340236, + "loss": 0.10374290496110916, + "loss_ce": 0.0004714199749287218, + "loss_iou": 0.60546875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 296109952, + "step": 1724 + }, + { + "epoch": 0.4537384099427895, + "grad_norm": 16.069341006935545, + "learning_rate": 5e-06, + "loss": 0.1861, + "num_input_tokens_seen": 296282320, + "step": 1725 + }, + { + "epoch": 0.4537384099427895, + "loss": 0.3360915184020996, + "loss_ce": 0.003205763641744852, + "loss_iou": 0.49609375, + "loss_num": 0.06640625, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 296282320, + "step": 1725 + }, + { + "epoch": 0.4540014467021766, + "grad_norm": 7.355868991768278, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 296454412, + "step": 1726 + }, + { + "epoch": 0.4540014467021766, + "loss": 0.11106541752815247, + "loss_ce": 0.0014462803956121206, + "loss_iou": 0.44140625, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 296454412, + "step": 1726 + }, + { + "epoch": 0.45426448346156373, + "grad_norm": 4.637757034723655, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 296626392, + "step": 1727 + }, + { + "epoch": 0.45426448346156373, + "loss": 0.07483752816915512, + "loss_ce": 0.00046619208296760917, + "loss_iou": 0.359375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 296626392, + "step": 1727 + }, + { + "epoch": 0.4545275202209509, + "grad_norm": 4.094172069183009, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 296795792, + "step": 1728 + }, + { + "epoch": 0.4545275202209509, + "loss": 0.1533362716436386, + "loss_ce": 0.00154183991253376, + "loss_iou": 0.53125, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 296795792, + "step": 1728 + }, + { + "epoch": 0.454790556980338, + "grad_norm": 4.550319533659712, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 296966316, + "step": 1729 + }, + { + "epoch": 0.454790556980338, + "loss": 0.25365394353866577, + "loss_ce": 0.002402704209089279, + "loss_iou": 0.32421875, + "loss_num": 0.05029296875, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 296966316, + "step": 1729 + }, + { + "epoch": 0.45505359373972515, + "grad_norm": 5.052226011800126, + "learning_rate": 5e-06, + "loss": 0.1398, + "num_input_tokens_seen": 297136040, + "step": 1730 + }, + { + "epoch": 0.45505359373972515, + "loss": 0.1388048231601715, + "loss_ce": 0.0004991517635062337, + "loss_iou": 0.640625, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 297136040, + "step": 1730 + }, + { + "epoch": 0.45531663049911225, + "grad_norm": 9.365703053895057, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 297306228, + "step": 1731 + }, + { + "epoch": 0.45531663049911225, + "loss": 0.09631586819887161, + "loss_ce": 0.0004296356055419892, + "loss_iou": 0.4453125, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 297306228, + "step": 1731 + }, + { + "epoch": 0.45557966725849935, + "grad_norm": 5.623520624492047, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 297478380, + "step": 1732 + }, + { + "epoch": 0.45557966725849935, + "loss": 0.21242645382881165, + "loss_ce": 0.0005734282894991338, + "loss_iou": 0.58203125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 297478380, + "step": 1732 + }, + { + "epoch": 0.4558427040178865, + "grad_norm": 22.29793682222897, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 297650336, + "step": 1733 + }, + { + "epoch": 0.4558427040178865, + "loss": 0.12880732119083405, + "loss_ce": 0.0012133296113461256, + "loss_iou": 0.56640625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 297650336, + "step": 1733 + }, + { + "epoch": 0.4561057407772736, + "grad_norm": 19.5277076823797, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 297822376, + "step": 1734 + }, + { + "epoch": 0.4561057407772736, + "loss": 0.10357876121997833, + "loss_ce": 0.00030727204284630716, + "loss_iou": 0.447265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 297822376, + "step": 1734 + }, + { + "epoch": 0.4563687775366608, + "grad_norm": 4.597687331185091, + "learning_rate": 5e-06, + "loss": 0.0992, + "num_input_tokens_seen": 297994524, + "step": 1735 + }, + { + "epoch": 0.4563687775366608, + "loss": 0.11487319320440292, + "loss_ce": 0.0003712356265168637, + "loss_iou": 0.51171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 297994524, + "step": 1735 + }, + { + "epoch": 0.4566318142960479, + "grad_norm": 17.34014327670223, + "learning_rate": 5e-06, + "loss": 0.1904, + "num_input_tokens_seen": 298164216, + "step": 1736 + }, + { + "epoch": 0.4566318142960479, + "loss": 0.21512141823768616, + "loss_ce": 0.0003387040051165968, + "loss_iou": 0.54296875, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 298164216, + "step": 1736 + }, + { + "epoch": 0.456894851055435, + "grad_norm": 5.201392853210846, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 298336416, + "step": 1737 + }, + { + "epoch": 0.456894851055435, + "loss": 0.14383375644683838, + "loss_ce": 0.0030866768211126328, + "loss_iou": 0.388671875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 298336416, + "step": 1737 + }, + { + "epoch": 0.45715788781482214, + "grad_norm": 4.466741788292063, + "learning_rate": 5e-06, + "loss": 0.1267, + "num_input_tokens_seen": 298508336, + "step": 1738 + }, + { + "epoch": 0.45715788781482214, + "loss": 0.08519221842288971, + "loss_ce": 0.001879227813333273, + "loss_iou": 0.484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 298508336, + "step": 1738 + }, + { + "epoch": 0.45742092457420924, + "grad_norm": 5.105148151648125, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 298680480, + "step": 1739 + }, + { + "epoch": 0.45742092457420924, + "loss": 0.08455468714237213, + "loss_ce": 0.00038720344309695065, + "loss_iou": 0.40234375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 298680480, + "step": 1739 + }, + { + "epoch": 0.45768396133359635, + "grad_norm": 4.88765152218918, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 298852576, + "step": 1740 + }, + { + "epoch": 0.45768396133359635, + "loss": 0.06836723536252975, + "loss_ce": 0.0006487306673079729, + "loss_iou": 0.57421875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 298852576, + "step": 1740 + }, + { + "epoch": 0.4579469980929835, + "grad_norm": 11.438189750287325, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 299025188, + "step": 1741 + }, + { + "epoch": 0.4579469980929835, + "loss": 0.2103656679391861, + "loss_ce": 0.0009235285106115043, + "loss_iou": 0.458984375, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 299025188, + "step": 1741 + }, + { + "epoch": 0.4582100348523706, + "grad_norm": 17.844442720051195, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 299195652, + "step": 1742 + }, + { + "epoch": 0.4582100348523706, + "loss": 0.0902654230594635, + "loss_ce": 0.0008184025646187365, + "loss_iou": 0.6171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 299195652, + "step": 1742 + }, + { + "epoch": 0.45847307161175777, + "grad_norm": 5.593927162724121, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 299367692, + "step": 1743 + }, + { + "epoch": 0.45847307161175777, + "loss": 0.0877409279346466, + "loss_ce": 0.004031214863061905, + "loss_iou": 0.47265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 299367692, + "step": 1743 + }, + { + "epoch": 0.45873610837114487, + "grad_norm": 6.084084258485456, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 299539820, + "step": 1744 + }, + { + "epoch": 0.45873610837114487, + "loss": 0.17818066477775574, + "loss_ce": 0.0005988804623484612, + "loss_iou": 0.67578125, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 299539820, + "step": 1744 + }, + { + "epoch": 0.45899914513053197, + "grad_norm": 4.3161100884144785, + "learning_rate": 5e-06, + "loss": 0.0857, + "num_input_tokens_seen": 299710180, + "step": 1745 + }, + { + "epoch": 0.45899914513053197, + "loss": 0.11408813297748566, + "loss_ce": 0.005079346243292093, + "loss_iou": 0.52734375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 299710180, + "step": 1745 + }, + { + "epoch": 0.45926218188991913, + "grad_norm": 9.432280511482096, + "learning_rate": 5e-06, + "loss": 0.1824, + "num_input_tokens_seen": 299882192, + "step": 1746 + }, + { + "epoch": 0.45926218188991913, + "loss": 0.28233322501182556, + "loss_ce": 0.002944799605756998, + "loss_iou": 0.40234375, + "loss_num": 0.055908203125, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 299882192, + "step": 1746 + }, + { + "epoch": 0.45952521864930623, + "grad_norm": 4.947993402328502, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 300054328, + "step": 1747 + }, + { + "epoch": 0.45952521864930623, + "loss": 0.12365365773439407, + "loss_ce": 0.00454354751855135, + "loss_iou": 0.515625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 300054328, + "step": 1747 + }, + { + "epoch": 0.4597882554086934, + "grad_norm": 29.241280731418787, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 300226268, + "step": 1748 + }, + { + "epoch": 0.4597882554086934, + "loss": 0.09467601031064987, + "loss_ce": 0.002909653354436159, + "loss_iou": 0.546875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 300226268, + "step": 1748 + }, + { + "epoch": 0.4600512921680805, + "grad_norm": 40.89605718973821, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 300398324, + "step": 1749 + }, + { + "epoch": 0.4600512921680805, + "loss": 0.18484918773174286, + "loss_ce": 0.002537175314500928, + "loss_iou": 0.48828125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 300398324, + "step": 1749 + }, + { + "epoch": 0.4603143289274676, + "grad_norm": 6.154438338656609, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_websight_new_CIoU": 0.8543897271156311, + "eval_websight_new_GIoU": 0.8555817008018494, + "eval_websight_new_IoU": 0.8602511882781982, + "eval_websight_new_MAE_all": 0.022463313303887844, + "eval_websight_new_MAE_h": 0.007148948730900884, + "eval_websight_new_MAE_w": 0.03532572276890278, + "eval_websight_new_MAE_x": 0.03696838486939669, + "eval_websight_new_MAE_y": 0.010410191491246223, + "eval_websight_new_NUM_probability": 0.9999746978282928, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.11025163531303406, + "eval_websight_new_loss_ce": 7.8859661698516e-06, + "eval_websight_new_loss_iou": 0.3748779296875, + "eval_websight_new_loss_num": 0.019195556640625, + "eval_websight_new_loss_xval": 0.09600830078125, + "eval_websight_new_runtime": 55.1926, + "eval_websight_new_samples_per_second": 0.906, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_seeclick_CIoU": 0.6177069246768951, + "eval_seeclick_GIoU": 0.6163533926010132, + "eval_seeclick_IoU": 0.6425465941429138, + "eval_seeclick_MAE_all": 0.04845697060227394, + "eval_seeclick_MAE_h": 0.026225415989756584, + "eval_seeclick_MAE_w": 0.06570588797330856, + "eval_seeclick_MAE_x": 0.07359151728451252, + "eval_seeclick_MAE_y": 0.0283050537109375, + "eval_seeclick_NUM_probability": 0.9999766051769257, + "eval_seeclick_inside_bbox": 0.890625, + "eval_seeclick_loss": 0.23021526634693146, + "eval_seeclick_loss_ce": 0.009315645787864923, + "eval_seeclick_loss_iou": 0.5054931640625, + "eval_seeclick_loss_num": 0.04460906982421875, + "eval_seeclick_loss_xval": 0.22314453125, + "eval_seeclick_runtime": 72.9378, + "eval_seeclick_samples_per_second": 0.59, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_icons_CIoU": 0.8409464359283447, + "eval_icons_GIoU": 0.8350943326950073, + "eval_icons_IoU": 0.847510576248169, + "eval_icons_MAE_all": 0.022730856202542782, + "eval_icons_MAE_h": 0.02164691872894764, + "eval_icons_MAE_w": 0.023992713540792465, + "eval_icons_MAE_x": 0.025108729489147663, + "eval_icons_MAE_y": 0.02017505932599306, + "eval_icons_NUM_probability": 0.9999510943889618, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.079879030585289, + "eval_icons_loss_ce": 2.25404792217887e-05, + "eval_icons_loss_iou": 0.5426025390625, + "eval_icons_loss_num": 0.014867782592773438, + "eval_icons_loss_xval": 0.0743560791015625, + "eval_icons_runtime": 87.1455, + "eval_icons_samples_per_second": 0.574, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "eval_screenspot_CIoU": 0.5540184179941813, + "eval_screenspot_GIoU": 0.543454110622406, + "eval_screenspot_IoU": 0.5961946249008179, + "eval_screenspot_MAE_all": 0.08684368679920833, + "eval_screenspot_MAE_h": 0.04899499130745729, + "eval_screenspot_MAE_w": 0.1521986499428749, + "eval_screenspot_MAE_x": 0.09981551021337509, + "eval_screenspot_MAE_y": 0.046365607529878616, + "eval_screenspot_NUM_probability": 0.9998787045478821, + "eval_screenspot_inside_bbox": 0.8737499912579855, + "eval_screenspot_loss": 0.8499577641487122, + "eval_screenspot_loss_ce": 0.5143006145954132, + "eval_screenspot_loss_iou": 0.4506022135416667, + "eval_screenspot_loss_num": 0.06610107421875, + "eval_screenspot_loss_xval": 0.3305257161458333, + "eval_screenspot_runtime": 149.7903, + "eval_screenspot_samples_per_second": 0.594, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.4603143289274676, + "loss": 0.855069637298584, + "loss_ce": 0.507047176361084, + "loss_iou": 0.392578125, + "loss_num": 0.0693359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 300570420, + "step": 1750 + }, + { + "epoch": 0.46057736568685476, + "grad_norm": 6.10997370548099, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 300742608, + "step": 1751 + }, + { + "epoch": 0.46057736568685476, + "loss": 0.11830765753984451, + "loss_ce": 0.0002961806021630764, + "loss_iou": 0.60546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 300742608, + "step": 1751 + }, + { + "epoch": 0.46084040244624186, + "grad_norm": 4.63619140673612, + "learning_rate": 5e-06, + "loss": 0.1648, + "num_input_tokens_seen": 300914728, + "step": 1752 + }, + { + "epoch": 0.46084040244624186, + "loss": 0.14693626761436462, + "loss_ce": 0.0036867514718323946, + "loss_iou": 0.58984375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 300914728, + "step": 1752 + }, + { + "epoch": 0.46110343920562896, + "grad_norm": 11.051347194862576, + "learning_rate": 5e-06, + "loss": 0.1718, + "num_input_tokens_seen": 301087180, + "step": 1753 + }, + { + "epoch": 0.46110343920562896, + "loss": 0.2443966418504715, + "loss_ce": 0.0005001651006750762, + "loss_iou": 0.57421875, + "loss_num": 0.048828125, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 301087180, + "step": 1753 + }, + { + "epoch": 0.4613664759650161, + "grad_norm": 9.081946267145833, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 301259116, + "step": 1754 + }, + { + "epoch": 0.4613664759650161, + "loss": 0.09555494785308838, + "loss_ce": 0.002583135850727558, + "loss_iou": 0.59765625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 301259116, + "step": 1754 + }, + { + "epoch": 0.4616295127244032, + "grad_norm": 7.9541431928925395, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 301427764, + "step": 1755 + }, + { + "epoch": 0.4616295127244032, + "loss": 0.12469062209129333, + "loss_ce": 0.0007892490248195827, + "loss_iou": 0.5078125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 301427764, + "step": 1755 + }, + { + "epoch": 0.4618925494837904, + "grad_norm": 4.563316138635991, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 301599772, + "step": 1756 + }, + { + "epoch": 0.4618925494837904, + "loss": 0.0649976134300232, + "loss_ce": 0.0009717366192489862, + "loss_iou": 0.5234375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 301599772, + "step": 1756 + }, + { + "epoch": 0.4621555862431775, + "grad_norm": 13.734783204569586, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 301771888, + "step": 1757 + }, + { + "epoch": 0.4621555862431775, + "loss": 0.2562709152698517, + "loss_ce": 0.0031581264920532703, + "loss_iou": 0.26171875, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 301771888, + "step": 1757 + }, + { + "epoch": 0.4624186230025646, + "grad_norm": 8.489244421344079, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 301944272, + "step": 1758 + }, + { + "epoch": 0.4624186230025646, + "loss": 0.08469030261039734, + "loss_ce": 0.00088903569849208, + "loss_iou": 0.5390625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 301944272, + "step": 1758 + }, + { + "epoch": 0.46268165976195175, + "grad_norm": 5.029084121420607, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 302116376, + "step": 1759 + }, + { + "epoch": 0.46268165976195175, + "loss": 0.12341859936714172, + "loss_ce": 0.0011651779059320688, + "loss_iou": 0.44921875, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 302116376, + "step": 1759 + }, + { + "epoch": 0.46294469652133885, + "grad_norm": 20.972527246553025, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 302288436, + "step": 1760 + }, + { + "epoch": 0.46294469652133885, + "loss": 0.0873933807015419, + "loss_ce": 0.0027071028016507626, + "loss_iou": 0.6171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 302288436, + "step": 1760 + }, + { + "epoch": 0.463207733280726, + "grad_norm": 4.449125782082312, + "learning_rate": 5e-06, + "loss": 0.1406, + "num_input_tokens_seen": 302460504, + "step": 1761 + }, + { + "epoch": 0.463207733280726, + "loss": 0.10758376121520996, + "loss_ce": 0.0050141820684075356, + "loss_iou": 0.423828125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 302460504, + "step": 1761 + }, + { + "epoch": 0.4634707700401131, + "grad_norm": 6.144634607422675, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 302632396, + "step": 1762 + }, + { + "epoch": 0.4634707700401131, + "loss": 0.19673708081245422, + "loss_ce": 0.0027063230518251657, + "loss_iou": 0.484375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 302632396, + "step": 1762 + }, + { + "epoch": 0.4637338067995002, + "grad_norm": 4.45571483495035, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 302804644, + "step": 1763 + }, + { + "epoch": 0.4637338067995002, + "loss": 0.0787121132016182, + "loss_ce": 0.005683551542460918, + "loss_iou": 0.35546875, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 302804644, + "step": 1763 + }, + { + "epoch": 0.4639968435588874, + "grad_norm": 20.80296883365536, + "learning_rate": 5e-06, + "loss": 0.1304, + "num_input_tokens_seen": 302976856, + "step": 1764 + }, + { + "epoch": 0.4639968435588874, + "loss": 0.14017972350120544, + "loss_ce": 0.0037661464884877205, + "loss_iou": 0.51171875, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 302976856, + "step": 1764 + }, + { + "epoch": 0.4642598803182745, + "grad_norm": 5.519977681881848, + "learning_rate": 5e-06, + "loss": 0.1241, + "num_input_tokens_seen": 303146760, + "step": 1765 + }, + { + "epoch": 0.4642598803182745, + "loss": 0.12335249036550522, + "loss_ce": 0.002991169923916459, + "loss_iou": 0.5546875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 303146760, + "step": 1765 + }, + { + "epoch": 0.4645229170776616, + "grad_norm": 6.31203658122147, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 303318968, + "step": 1766 + }, + { + "epoch": 0.4645229170776616, + "loss": 0.08828569203615189, + "loss_ce": 0.002073533833026886, + "loss_iou": 0.703125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 303318968, + "step": 1766 + }, + { + "epoch": 0.46478595383704874, + "grad_norm": 5.046640432518341, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 303491036, + "step": 1767 + }, + { + "epoch": 0.46478595383704874, + "loss": 0.08855307102203369, + "loss_ce": 0.002066256944090128, + "loss_iou": 0.640625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 303491036, + "step": 1767 + }, + { + "epoch": 0.46504899059643584, + "grad_norm": 8.451371161082179, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 303663404, + "step": 1768 + }, + { + "epoch": 0.46504899059643584, + "loss": 0.16699054837226868, + "loss_ce": 0.002805991331115365, + "loss_iou": 0.640625, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 303663404, + "step": 1768 + }, + { + "epoch": 0.465312027355823, + "grad_norm": 9.603701773353785, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 303832160, + "step": 1769 + }, + { + "epoch": 0.465312027355823, + "loss": 0.11529731005430222, + "loss_ce": 0.0019550304859876633, + "loss_iou": 0.4921875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 303832160, + "step": 1769 + }, + { + "epoch": 0.4655750641152101, + "grad_norm": 4.893083096864921, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 304004460, + "step": 1770 + }, + { + "epoch": 0.4655750641152101, + "loss": 0.15418250858783722, + "loss_ce": 0.0016556488117203116, + "loss_iou": 0.6484375, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 304004460, + "step": 1770 + }, + { + "epoch": 0.4658381008745972, + "grad_norm": 5.299927971595323, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 304176564, + "step": 1771 + }, + { + "epoch": 0.4658381008745972, + "loss": 0.11469468474388123, + "loss_ce": 0.004251571837812662, + "loss_iou": 0.53515625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 304176564, + "step": 1771 + }, + { + "epoch": 0.46610113763398436, + "grad_norm": 4.786837798902794, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 304349064, + "step": 1772 + }, + { + "epoch": 0.46610113763398436, + "loss": 0.13777077198028564, + "loss_ce": 0.002303245011717081, + "loss_iou": 0.63671875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 304349064, + "step": 1772 + }, + { + "epoch": 0.46636417439337147, + "grad_norm": 7.199109826840915, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 304521336, + "step": 1773 + }, + { + "epoch": 0.46636417439337147, + "loss": 0.13359083235263824, + "loss_ce": 0.0007783286855556071, + "loss_iou": 0.6328125, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 304521336, + "step": 1773 + }, + { + "epoch": 0.4666272111527586, + "grad_norm": 5.7773703638814355, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 304693604, + "step": 1774 + }, + { + "epoch": 0.4666272111527586, + "loss": 0.09553907811641693, + "loss_ce": 0.0015449414495378733, + "loss_iou": 0.7109375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 304693604, + "step": 1774 + }, + { + "epoch": 0.46689024791214573, + "grad_norm": 4.7750261071300475, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 304865896, + "step": 1775 + }, + { + "epoch": 0.46689024791214573, + "loss": 0.17456401884555817, + "loss_ce": 0.0003696825006045401, + "loss_iou": 0.466796875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 304865896, + "step": 1775 + }, + { + "epoch": 0.46715328467153283, + "grad_norm": 4.560916465958671, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 305038112, + "step": 1776 + }, + { + "epoch": 0.46715328467153283, + "loss": 0.16979777812957764, + "loss_ce": 0.002805587835609913, + "loss_iou": 0.6875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 305038112, + "step": 1776 + }, + { + "epoch": 0.46741632143092, + "grad_norm": 9.580731840657464, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 305210520, + "step": 1777 + }, + { + "epoch": 0.46741632143092, + "loss": 0.1295488327741623, + "loss_ce": 0.0010087917326018214, + "loss_iou": 0.51953125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 305210520, + "step": 1777 + }, + { + "epoch": 0.4676793581903071, + "grad_norm": 12.328201105743592, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 305382852, + "step": 1778 + }, + { + "epoch": 0.4676793581903071, + "loss": 0.10579667240381241, + "loss_ce": 0.0018843174912035465, + "loss_iou": 0.5625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 305382852, + "step": 1778 + }, + { + "epoch": 0.4679423949496942, + "grad_norm": 7.809054499493732, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 305554812, + "step": 1779 + }, + { + "epoch": 0.4679423949496942, + "loss": 0.12438150495290756, + "loss_ce": 0.003974398132413626, + "loss_iou": 0.59375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 305554812, + "step": 1779 + }, + { + "epoch": 0.46820543170908135, + "grad_norm": 8.973200505315825, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 305727124, + "step": 1780 + }, + { + "epoch": 0.46820543170908135, + "loss": 0.08462625741958618, + "loss_ce": 0.0006571417325176299, + "loss_iou": 0.62890625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 305727124, + "step": 1780 + }, + { + "epoch": 0.46846846846846846, + "grad_norm": 5.227611768036767, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 305899256, + "step": 1781 + }, + { + "epoch": 0.46846846846846846, + "loss": 0.1279701292514801, + "loss_ce": 0.0015358042437583208, + "loss_iou": 0.376953125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 305899256, + "step": 1781 + }, + { + "epoch": 0.4687315052278556, + "grad_norm": 3.599452107221254, + "learning_rate": 5e-06, + "loss": 0.109, + "num_input_tokens_seen": 306069952, + "step": 1782 + }, + { + "epoch": 0.4687315052278556, + "loss": 0.1316523402929306, + "loss_ce": 0.002959707286208868, + "loss_iou": 0.6328125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 306069952, + "step": 1782 + }, + { + "epoch": 0.4689945419872427, + "grad_norm": 6.0196869037820235, + "learning_rate": 5e-06, + "loss": 0.1144, + "num_input_tokens_seen": 306242268, + "step": 1783 + }, + { + "epoch": 0.4689945419872427, + "loss": 0.0880986899137497, + "loss_ce": 0.0006353051285259426, + "loss_iou": 0.369140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 306242268, + "step": 1783 + }, + { + "epoch": 0.4692575787466298, + "grad_norm": 5.3230563681220735, + "learning_rate": 5e-06, + "loss": 0.1359, + "num_input_tokens_seen": 306414220, + "step": 1784 + }, + { + "epoch": 0.4692575787466298, + "loss": 0.08738180994987488, + "loss_ce": 0.0023750958498567343, + "loss_iou": 0.63671875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 306414220, + "step": 1784 + }, + { + "epoch": 0.469520615506017, + "grad_norm": 4.772051394078166, + "learning_rate": 5e-06, + "loss": 0.1417, + "num_input_tokens_seen": 306586436, + "step": 1785 + }, + { + "epoch": 0.469520615506017, + "loss": 0.17895328998565674, + "loss_ce": 0.000791671103797853, + "loss_iou": 0.5390625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 306586436, + "step": 1785 + }, + { + "epoch": 0.4697836522654041, + "grad_norm": 5.377031057320062, + "learning_rate": 5e-06, + "loss": 0.1954, + "num_input_tokens_seen": 306758712, + "step": 1786 + }, + { + "epoch": 0.4697836522654041, + "loss": 0.201407790184021, + "loss_ce": 0.004172691144049168, + "loss_iou": 0.72265625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 306758712, + "step": 1786 + }, + { + "epoch": 0.47004668902479124, + "grad_norm": 4.6699721270287275, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 306931112, + "step": 1787 + }, + { + "epoch": 0.47004668902479124, + "loss": 0.10016533732414246, + "loss_ce": 0.00031182204838842154, + "loss_iou": 0.53515625, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 306931112, + "step": 1787 + }, + { + "epoch": 0.47030972578417835, + "grad_norm": 8.618398116569972, + "learning_rate": 5e-06, + "loss": 0.1445, + "num_input_tokens_seen": 307103388, + "step": 1788 + }, + { + "epoch": 0.47030972578417835, + "loss": 0.27383407950401306, + "loss_ce": 0.0001524553372291848, + "loss_iou": 0.515625, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 307103388, + "step": 1788 + }, + { + "epoch": 0.47057276254356545, + "grad_norm": 11.779661781344625, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 307275620, + "step": 1789 + }, + { + "epoch": 0.47057276254356545, + "loss": 0.08500531315803528, + "loss_ce": 0.00019696829258464277, + "loss_iou": 0.4453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 307275620, + "step": 1789 + }, + { + "epoch": 0.4708357993029526, + "grad_norm": 23.708154257705488, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 307448180, + "step": 1790 + }, + { + "epoch": 0.4708357993029526, + "loss": 0.17157645523548126, + "loss_ce": 0.002493813633918762, + "loss_iou": 0.453125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 307448180, + "step": 1790 + }, + { + "epoch": 0.4710988360623397, + "grad_norm": 4.7495128426496, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 307620540, + "step": 1791 + }, + { + "epoch": 0.4710988360623397, + "loss": 0.23302927613258362, + "loss_ce": 0.0009736126521602273, + "loss_iou": 0.443359375, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 307620540, + "step": 1791 + }, + { + "epoch": 0.4713618728217268, + "grad_norm": 7.18577313542124, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 307792704, + "step": 1792 + }, + { + "epoch": 0.4713618728217268, + "loss": 0.13438743352890015, + "loss_ce": 0.0005068117170594633, + "loss_iou": 0.4765625, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 307792704, + "step": 1792 + }, + { + "epoch": 0.47162490958111397, + "grad_norm": 7.179208918692874, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 307965184, + "step": 1793 + }, + { + "epoch": 0.47162490958111397, + "loss": 0.15465694665908813, + "loss_ce": 0.00011593455565162003, + "loss_iou": 0.42578125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 307965184, + "step": 1793 + }, + { + "epoch": 0.4718879463405011, + "grad_norm": 6.391344075166243, + "learning_rate": 5e-06, + "loss": 0.1742, + "num_input_tokens_seen": 308135620, + "step": 1794 + }, + { + "epoch": 0.4718879463405011, + "loss": 0.23051750659942627, + "loss_ce": 0.00010977771307807416, + "loss_iou": 0.58984375, + "loss_num": 0.046142578125, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 308135620, + "step": 1794 + }, + { + "epoch": 0.47215098309988823, + "grad_norm": 8.145404749300642, + "learning_rate": 5e-06, + "loss": 0.1465, + "num_input_tokens_seen": 308308152, + "step": 1795 + }, + { + "epoch": 0.47215098309988823, + "loss": 0.1917172074317932, + "loss_ce": 0.008871147409081459, + "loss_iou": 0.640625, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 308308152, + "step": 1795 + }, + { + "epoch": 0.47241401985927534, + "grad_norm": 5.398862877955347, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 308477996, + "step": 1796 + }, + { + "epoch": 0.47241401985927534, + "loss": 0.08200335502624512, + "loss_ce": 0.0003993565624114126, + "loss_iou": 0.48046875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 308477996, + "step": 1796 + }, + { + "epoch": 0.47267705661866244, + "grad_norm": 9.91724255704017, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 308648424, + "step": 1797 + }, + { + "epoch": 0.47267705661866244, + "loss": 0.12875403463840485, + "loss_ce": 0.0009769393363967538, + "loss_iou": 0.515625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 308648424, + "step": 1797 + }, + { + "epoch": 0.4729400933780496, + "grad_norm": 6.138930079819964, + "learning_rate": 5e-06, + "loss": 0.1842, + "num_input_tokens_seen": 308818888, + "step": 1798 + }, + { + "epoch": 0.4729400933780496, + "loss": 0.16927096247673035, + "loss_ce": 0.0021567128133028746, + "loss_iou": 0.73046875, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 308818888, + "step": 1798 + }, + { + "epoch": 0.4732031301374367, + "grad_norm": 19.105244313122192, + "learning_rate": 5e-06, + "loss": 0.1766, + "num_input_tokens_seen": 308991284, + "step": 1799 + }, + { + "epoch": 0.4732031301374367, + "loss": 0.21268007159233093, + "loss_ce": 9.461388253839687e-05, + "loss_iou": 0.431640625, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 308991284, + "step": 1799 + }, + { + "epoch": 0.4734661668968238, + "grad_norm": 6.0514901455791215, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 309163244, + "step": 1800 + }, + { + "epoch": 0.4734661668968238, + "loss": 0.11982670426368713, + "loss_ce": 0.00047245476162061095, + "loss_iou": 0.455078125, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 309163244, + "step": 1800 + }, + { + "epoch": 0.47372920365621096, + "grad_norm": 4.529009692158789, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 309335260, + "step": 1801 + }, + { + "epoch": 0.47372920365621096, + "loss": 0.16322672367095947, + "loss_ce": 0.000812180747743696, + "loss_iou": 0.4921875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 309335260, + "step": 1801 + }, + { + "epoch": 0.47399224041559807, + "grad_norm": 4.778436741374609, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 309507516, + "step": 1802 + }, + { + "epoch": 0.47399224041559807, + "loss": 0.09654629230499268, + "loss_ce": 0.0012398946564644575, + "loss_iou": 0.412109375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 309507516, + "step": 1802 + }, + { + "epoch": 0.4742552771749852, + "grad_norm": 4.954945704123918, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 309677988, + "step": 1803 + }, + { + "epoch": 0.4742552771749852, + "loss": 0.0787278264760971, + "loss_ce": 0.00014505814760923386, + "loss_iou": 0.59375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 309677988, + "step": 1803 + }, + { + "epoch": 0.4745183139343723, + "grad_norm": 12.56847461072702, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 309850180, + "step": 1804 + }, + { + "epoch": 0.4745183139343723, + "loss": 0.13278400897979736, + "loss_ce": 0.00015460627037100494, + "loss_iou": 0.52734375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 309850180, + "step": 1804 + }, + { + "epoch": 0.47478135069375943, + "grad_norm": 15.493537781093892, + "learning_rate": 5e-06, + "loss": 0.0899, + "num_input_tokens_seen": 310019768, + "step": 1805 + }, + { + "epoch": 0.47478135069375943, + "loss": 0.05194393917918205, + "loss_ce": 0.0007049225969240069, + "loss_iou": 0.53515625, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 310019768, + "step": 1805 + }, + { + "epoch": 0.4750443874531466, + "grad_norm": 6.1982861025667795, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 310191904, + "step": 1806 + }, + { + "epoch": 0.4750443874531466, + "loss": 0.09039468318223953, + "loss_ce": 0.00045937972026877105, + "loss_iou": 0.7265625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 310191904, + "step": 1806 + }, + { + "epoch": 0.4753074242125337, + "grad_norm": 8.01906133906792, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 310364020, + "step": 1807 + }, + { + "epoch": 0.4753074242125337, + "loss": 0.1431303471326828, + "loss_ce": 0.002688447944819927, + "loss_iou": 0.5703125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 310364020, + "step": 1807 + }, + { + "epoch": 0.47557046097192085, + "grad_norm": 8.410195178652696, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 310536388, + "step": 1808 + }, + { + "epoch": 0.47557046097192085, + "loss": 0.1313067376613617, + "loss_ce": 0.002064801286906004, + "loss_iou": 0.73046875, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 310536388, + "step": 1808 + }, + { + "epoch": 0.47583349773130795, + "grad_norm": 5.758329237623371, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 310706940, + "step": 1809 + }, + { + "epoch": 0.47583349773130795, + "loss": 0.0813891738653183, + "loss_ce": 0.0004565550771076232, + "loss_iou": 0.484375, + "loss_num": 0.01611328125, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 310706940, + "step": 1809 + }, + { + "epoch": 0.47609653449069506, + "grad_norm": 38.441949828692486, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 310879292, + "step": 1810 + }, + { + "epoch": 0.47609653449069506, + "loss": 0.06832106411457062, + "loss_ce": 0.007591082248836756, + "loss_iou": 0.75, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 310879292, + "step": 1810 + }, + { + "epoch": 0.4763595712500822, + "grad_norm": 7.22086526843683, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 311051572, + "step": 1811 + }, + { + "epoch": 0.4763595712500822, + "loss": 0.0902928039431572, + "loss_ce": 0.00047957096830941737, + "loss_iou": 0.53125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 311051572, + "step": 1811 + }, + { + "epoch": 0.4766226080094693, + "grad_norm": 11.063199574808042, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 311222148, + "step": 1812 + }, + { + "epoch": 0.4766226080094693, + "loss": 0.2204355001449585, + "loss_ce": 0.001777050900273025, + "loss_iou": 0.447265625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 311222148, + "step": 1812 + }, + { + "epoch": 0.4768856447688564, + "grad_norm": 10.035743209995298, + "learning_rate": 5e-06, + "loss": 0.0919, + "num_input_tokens_seen": 311394364, + "step": 1813 + }, + { + "epoch": 0.4768856447688564, + "loss": 0.07709920406341553, + "loss_ce": 0.003307701088488102, + "loss_iou": 0.421875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 311394364, + "step": 1813 + }, + { + "epoch": 0.4771486815282436, + "grad_norm": 8.932384890899103, + "learning_rate": 5e-06, + "loss": 0.1796, + "num_input_tokens_seen": 311566412, + "step": 1814 + }, + { + "epoch": 0.4771486815282436, + "loss": 0.18021875619888306, + "loss_ce": 0.002743777120485902, + "loss_iou": 0.4375, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 311566412, + "step": 1814 + }, + { + "epoch": 0.4774117182876307, + "grad_norm": 7.670958022785577, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 311738816, + "step": 1815 + }, + { + "epoch": 0.4774117182876307, + "loss": 0.06826284527778625, + "loss_ce": 0.002039696555584669, + "loss_iou": 0.546875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 311738816, + "step": 1815 + }, + { + "epoch": 0.47767475504701784, + "grad_norm": 9.178964340109715, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 311910812, + "step": 1816 + }, + { + "epoch": 0.47767475504701784, + "loss": 0.15946456789970398, + "loss_ce": 0.0008036750950850546, + "loss_iou": 0.4921875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 311910812, + "step": 1816 + }, + { + "epoch": 0.47793779180640494, + "grad_norm": 8.864036975405911, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 312082820, + "step": 1817 + }, + { + "epoch": 0.47793779180640494, + "loss": 0.0714251697063446, + "loss_ce": 0.002028202638030052, + "loss_iou": 0.55078125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 312082820, + "step": 1817 + }, + { + "epoch": 0.47820082856579205, + "grad_norm": 4.5785017279302185, + "learning_rate": 5e-06, + "loss": 0.1308, + "num_input_tokens_seen": 312255312, + "step": 1818 + }, + { + "epoch": 0.47820082856579205, + "loss": 0.1785389930009842, + "loss_ce": 0.0035817159805446863, + "loss_iou": 0.5234375, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 312255312, + "step": 1818 + }, + { + "epoch": 0.4784638653251792, + "grad_norm": 9.103884045463392, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 312427552, + "step": 1819 + }, + { + "epoch": 0.4784638653251792, + "loss": 0.09079764038324356, + "loss_ce": 0.0007707877666689456, + "loss_iou": 0.6484375, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 312427552, + "step": 1819 + }, + { + "epoch": 0.4787269020845663, + "grad_norm": 3.611814351166419, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 312599880, + "step": 1820 + }, + { + "epoch": 0.4787269020845663, + "loss": 0.16918551921844482, + "loss_ce": 0.0004843563656322658, + "loss_iou": 0.59375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 312599880, + "step": 1820 + }, + { + "epoch": 0.47898993884395347, + "grad_norm": 6.886471414344964, + "learning_rate": 5e-06, + "loss": 0.144, + "num_input_tokens_seen": 312771960, + "step": 1821 + }, + { + "epoch": 0.47898993884395347, + "loss": 0.1718224436044693, + "loss_ce": 0.0013817725703120232, + "loss_iou": 0.61328125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 312771960, + "step": 1821 + }, + { + "epoch": 0.47925297560334057, + "grad_norm": 12.670187653553288, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 312943884, + "step": 1822 + }, + { + "epoch": 0.47925297560334057, + "loss": 0.17194947600364685, + "loss_ce": 0.0042553916573524475, + "loss_iou": 0.486328125, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 312943884, + "step": 1822 + }, + { + "epoch": 0.4795160123627277, + "grad_norm": 5.735564443277563, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 313115988, + "step": 1823 + }, + { + "epoch": 0.4795160123627277, + "loss": 0.11030334234237671, + "loss_ce": 0.00010437482706038281, + "loss_iou": 0.392578125, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 313115988, + "step": 1823 + }, + { + "epoch": 0.47977904912211483, + "grad_norm": 5.327091023011001, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 313288456, + "step": 1824 + }, + { + "epoch": 0.47977904912211483, + "loss": 0.2672034204006195, + "loss_ce": 0.0009680833900347352, + "loss_iou": 0.42578125, + "loss_num": 0.05322265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 313288456, + "step": 1824 + }, + { + "epoch": 0.48004208588150193, + "grad_norm": 8.698833105847095, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 313460532, + "step": 1825 + }, + { + "epoch": 0.48004208588150193, + "loss": 0.11867256462574005, + "loss_ce": 0.0026752520352602005, + "loss_iou": 0.419921875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 313460532, + "step": 1825 + }, + { + "epoch": 0.48030512264088904, + "grad_norm": 4.784883256464839, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 313630464, + "step": 1826 + }, + { + "epoch": 0.48030512264088904, + "loss": 0.18028897047042847, + "loss_ce": 0.0021883829031139612, + "loss_iou": 0.57421875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 313630464, + "step": 1826 + }, + { + "epoch": 0.4805681594002762, + "grad_norm": 7.070087319121284, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 313802864, + "step": 1827 + }, + { + "epoch": 0.4805681594002762, + "loss": 0.19596196711063385, + "loss_ce": 0.0022058698814362288, + "loss_iou": 0.5546875, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 313802864, + "step": 1827 + }, + { + "epoch": 0.4808311961596633, + "grad_norm": 6.746240646813248, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 313975036, + "step": 1828 + }, + { + "epoch": 0.4808311961596633, + "loss": 0.12373416870832443, + "loss_ce": 0.0009009129134938121, + "loss_iou": 0.59765625, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 313975036, + "step": 1828 + }, + { + "epoch": 0.48109423291905046, + "grad_norm": 12.282763523774303, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 314147252, + "step": 1829 + }, + { + "epoch": 0.48109423291905046, + "loss": 0.09647711366415024, + "loss_ce": 0.0027576321735978127, + "loss_iou": 0.380859375, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 314147252, + "step": 1829 + }, + { + "epoch": 0.48135726967843756, + "grad_norm": 6.5229587096602915, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 314317636, + "step": 1830 + }, + { + "epoch": 0.48135726967843756, + "loss": 0.11963652074337006, + "loss_ce": 0.004478443879634142, + "loss_iou": 0.55859375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 314317636, + "step": 1830 + }, + { + "epoch": 0.48162030643782466, + "grad_norm": 10.474914605426608, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 314489804, + "step": 1831 + }, + { + "epoch": 0.48162030643782466, + "loss": 0.08893167972564697, + "loss_ce": 0.0019260660046711564, + "loss_iou": 0.458984375, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 314489804, + "step": 1831 + }, + { + "epoch": 0.4818833431972118, + "grad_norm": 4.796918355311461, + "learning_rate": 5e-06, + "loss": 0.1701, + "num_input_tokens_seen": 314661804, + "step": 1832 + }, + { + "epoch": 0.4818833431972118, + "loss": 0.18161356449127197, + "loss_ce": 0.0023533080238848925, + "loss_iou": 0.51953125, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 314661804, + "step": 1832 + }, + { + "epoch": 0.4821463799565989, + "grad_norm": 6.244958331343755, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 314834036, + "step": 1833 + }, + { + "epoch": 0.4821463799565989, + "loss": 0.13272178173065186, + "loss_ce": 0.003785028588026762, + "loss_iou": 0.478515625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 314834036, + "step": 1833 + }, + { + "epoch": 0.4824094167159861, + "grad_norm": 7.896413991695191, + "learning_rate": 5e-06, + "loss": 0.1255, + "num_input_tokens_seen": 315004484, + "step": 1834 + }, + { + "epoch": 0.4824094167159861, + "loss": 0.12204363942146301, + "loss_ce": 0.0003090191457886249, + "loss_iou": 0.58984375, + "loss_num": 0.0244140625, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 315004484, + "step": 1834 + }, + { + "epoch": 0.4826724534753732, + "grad_norm": 7.031269313570851, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 315176780, + "step": 1835 + }, + { + "epoch": 0.4826724534753732, + "loss": 0.12622271478176117, + "loss_ce": 0.002534976229071617, + "loss_iou": 0.62109375, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 315176780, + "step": 1835 + }, + { + "epoch": 0.4829354902347603, + "grad_norm": 6.072447688187852, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 315348656, + "step": 1836 + }, + { + "epoch": 0.4829354902347603, + "loss": 0.08488506823778152, + "loss_ce": 0.0010227651800960302, + "loss_iou": 0.462890625, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 315348656, + "step": 1836 + }, + { + "epoch": 0.48319852699414745, + "grad_norm": 14.427685690814952, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 315520828, + "step": 1837 + }, + { + "epoch": 0.48319852699414745, + "loss": 0.0883752852678299, + "loss_ce": 0.00042362496606074274, + "loss_iou": 0.5625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 315520828, + "step": 1837 + }, + { + "epoch": 0.48346156375353455, + "grad_norm": 17.485755335788554, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 315692964, + "step": 1838 + }, + { + "epoch": 0.48346156375353455, + "loss": 0.10190844535827637, + "loss_ce": 0.0014140586135908961, + "loss_iou": 0.359375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 315692964, + "step": 1838 + }, + { + "epoch": 0.48372460051292165, + "grad_norm": 9.721032690775148, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 315863376, + "step": 1839 + }, + { + "epoch": 0.48372460051292165, + "loss": 0.18402375280857086, + "loss_ce": 0.0028408921789377928, + "loss_iou": 0.40234375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 315863376, + "step": 1839 + }, + { + "epoch": 0.4839876372723088, + "grad_norm": 7.726124987543399, + "learning_rate": 5e-06, + "loss": 0.1728, + "num_input_tokens_seen": 316035520, + "step": 1840 + }, + { + "epoch": 0.4839876372723088, + "loss": 0.13577872514724731, + "loss_ce": 0.00031119072809815407, + "loss_iou": 0.498046875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 316035520, + "step": 1840 + }, + { + "epoch": 0.4842506740316959, + "grad_norm": 7.757576022533387, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 316207616, + "step": 1841 + }, + { + "epoch": 0.4842506740316959, + "loss": 0.2208271026611328, + "loss_ce": 0.0022144389804452658, + "loss_iou": 0.44140625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 316207616, + "step": 1841 + }, + { + "epoch": 0.4845137107910831, + "grad_norm": 11.993527938590784, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 316379548, + "step": 1842 + }, + { + "epoch": 0.4845137107910831, + "loss": 0.17184340953826904, + "loss_ce": 0.002074118936434388, + "loss_iou": 0.341796875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 316379548, + "step": 1842 + }, + { + "epoch": 0.4847767475504702, + "grad_norm": 5.880659481834313, + "learning_rate": 5e-06, + "loss": 0.1596, + "num_input_tokens_seen": 316551724, + "step": 1843 + }, + { + "epoch": 0.4847767475504702, + "loss": 0.30887043476104736, + "loss_ce": 0.0007344337645918131, + "loss_iou": 0.48046875, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 316551724, + "step": 1843 + }, + { + "epoch": 0.4850397843098573, + "grad_norm": 9.164014647902235, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 316724140, + "step": 1844 + }, + { + "epoch": 0.4850397843098573, + "loss": 0.1765921413898468, + "loss_ce": 0.0012076160637661815, + "loss_iou": 0.62109375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 316724140, + "step": 1844 + }, + { + "epoch": 0.48530282106924444, + "grad_norm": 25.18318601638769, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 316896084, + "step": 1845 + }, + { + "epoch": 0.48530282106924444, + "loss": 0.14695365726947784, + "loss_ce": 0.0009270399459637702, + "loss_iou": 0.47265625, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 316896084, + "step": 1845 + }, + { + "epoch": 0.48556585782863154, + "grad_norm": 10.73033101211756, + "learning_rate": 5e-06, + "loss": 0.183, + "num_input_tokens_seen": 317068004, + "step": 1846 + }, + { + "epoch": 0.48556585782863154, + "loss": 0.3143799304962158, + "loss_ce": 0.004962218925356865, + "loss_iou": 0.58203125, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 317068004, + "step": 1846 + }, + { + "epoch": 0.4858288945880187, + "grad_norm": 14.757236176384808, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 317240256, + "step": 1847 + }, + { + "epoch": 0.4858288945880187, + "loss": 0.074017733335495, + "loss_ce": 0.0028812657110393047, + "loss_iou": 0.439453125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 317240256, + "step": 1847 + }, + { + "epoch": 0.4860919313474058, + "grad_norm": 5.470394923350598, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 317412376, + "step": 1848 + }, + { + "epoch": 0.4860919313474058, + "loss": 0.13736534118652344, + "loss_ce": 0.0006160617922432721, + "loss_iou": 0.67578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 317412376, + "step": 1848 + }, + { + "epoch": 0.4863549681067929, + "grad_norm": 11.769346294638343, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 317584644, + "step": 1849 + }, + { + "epoch": 0.4863549681067929, + "loss": 0.07671768963336945, + "loss_ce": 0.002804110525175929, + "loss_iou": 0.5078125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 317584644, + "step": 1849 + }, + { + "epoch": 0.48661800486618007, + "grad_norm": 7.934603221234343, + "learning_rate": 5e-06, + "loss": 0.157, + "num_input_tokens_seen": 317756784, + "step": 1850 + }, + { + "epoch": 0.48661800486618007, + "loss": 0.1591799259185791, + "loss_ce": 0.0034792337100952864, + "loss_iou": 0.6328125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 317756784, + "step": 1850 + }, + { + "epoch": 0.48688104162556717, + "grad_norm": 5.949457674338729, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 317929000, + "step": 1851 + }, + { + "epoch": 0.48688104162556717, + "loss": 0.15429449081420898, + "loss_ce": 0.0003638358903117478, + "loss_iou": 0.443359375, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 317929000, + "step": 1851 + }, + { + "epoch": 0.48714407838495427, + "grad_norm": 4.538326131208253, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 318101400, + "step": 1852 + }, + { + "epoch": 0.48714407838495427, + "loss": 0.14405781030654907, + "loss_ce": 0.004745060577988625, + "loss_iou": 0.51171875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 318101400, + "step": 1852 + }, + { + "epoch": 0.48740711514434143, + "grad_norm": 11.796493242549497, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 318273392, + "step": 1853 + }, + { + "epoch": 0.48740711514434143, + "loss": 0.05749022588133812, + "loss_ce": 0.0008801189833320677, + "loss_iou": 0.453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 318273392, + "step": 1853 + }, + { + "epoch": 0.48767015190372853, + "grad_norm": 4.839316076441186, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 318444936, + "step": 1854 + }, + { + "epoch": 0.48767015190372853, + "loss": 0.15674933791160583, + "loss_ce": 0.0008960673003457487, + "loss_iou": 0.5625, + "loss_num": 0.0311279296875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 318444936, + "step": 1854 + }, + { + "epoch": 0.4879331886631157, + "grad_norm": 4.489339979618492, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 318617236, + "step": 1855 + }, + { + "epoch": 0.4879331886631157, + "loss": 0.17843472957611084, + "loss_ce": 0.001768482499755919, + "loss_iou": 0.57421875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 318617236, + "step": 1855 + }, + { + "epoch": 0.4881962254225028, + "grad_norm": 5.676162725297575, + "learning_rate": 5e-06, + "loss": 0.178, + "num_input_tokens_seen": 318787468, + "step": 1856 + }, + { + "epoch": 0.4881962254225028, + "loss": 0.25485190749168396, + "loss_ce": 0.0009456594125367701, + "loss_iou": 0.466796875, + "loss_num": 0.05078125, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 318787468, + "step": 1856 + }, + { + "epoch": 0.4884592621818899, + "grad_norm": 4.081201767827966, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 318957884, + "step": 1857 + }, + { + "epoch": 0.4884592621818899, + "loss": 0.0997210294008255, + "loss_ce": 0.00020320963812991977, + "loss_iou": 0.34765625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 318957884, + "step": 1857 + }, + { + "epoch": 0.48872229894127706, + "grad_norm": 23.818400579938295, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 319129856, + "step": 1858 + }, + { + "epoch": 0.48872229894127706, + "loss": 0.07231907546520233, + "loss_ce": 0.00011447950237197801, + "loss_iou": 0.6171875, + "loss_num": 0.014404296875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 319129856, + "step": 1858 + }, + { + "epoch": 0.48898533570066416, + "grad_norm": 6.068112059865649, + "learning_rate": 5e-06, + "loss": 0.1742, + "num_input_tokens_seen": 319302108, + "step": 1859 + }, + { + "epoch": 0.48898533570066416, + "loss": 0.1806957721710205, + "loss_ce": 0.00039790940354578197, + "loss_iou": 0.51171875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 319302108, + "step": 1859 + }, + { + "epoch": 0.4892483724600513, + "grad_norm": 4.764918083593623, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 319473076, + "step": 1860 + }, + { + "epoch": 0.4892483724600513, + "loss": 0.056661054491996765, + "loss_ce": 0.0034689174499362707, + "loss_iou": 0.51953125, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 319473076, + "step": 1860 + }, + { + "epoch": 0.4895114092194384, + "grad_norm": 4.591820064091911, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 319645292, + "step": 1861 + }, + { + "epoch": 0.4895114092194384, + "loss": 0.10820820182561874, + "loss_ce": 8.442218677373603e-05, + "loss_iou": 0.50390625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 319645292, + "step": 1861 + }, + { + "epoch": 0.4897744459788255, + "grad_norm": 11.395874847747328, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 319817560, + "step": 1862 + }, + { + "epoch": 0.4897744459788255, + "loss": 0.1541745364665985, + "loss_ce": 0.0011288827518001199, + "loss_iou": 0.53125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 319817560, + "step": 1862 + }, + { + "epoch": 0.4900374827382127, + "grad_norm": 8.74594499498662, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 319990008, + "step": 1863 + }, + { + "epoch": 0.4900374827382127, + "loss": 0.1368078887462616, + "loss_ce": 0.0013403687626123428, + "loss_iou": 0.5625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 319990008, + "step": 1863 + }, + { + "epoch": 0.4903005194975998, + "grad_norm": 4.454006905645775, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 320162308, + "step": 1864 + }, + { + "epoch": 0.4903005194975998, + "loss": 0.09877588599920273, + "loss_ce": 0.0007534276228398085, + "loss_iou": 0.5546875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 320162308, + "step": 1864 + }, + { + "epoch": 0.4905635562569869, + "grad_norm": 9.654102944086466, + "learning_rate": 5e-06, + "loss": 0.1351, + "num_input_tokens_seen": 320334456, + "step": 1865 + }, + { + "epoch": 0.4905635562569869, + "loss": 0.09071889519691467, + "loss_ce": 0.0011192907113581896, + "loss_iou": 0.6171875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 320334456, + "step": 1865 + }, + { + "epoch": 0.49082659301637405, + "grad_norm": 7.49930478522843, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 320506784, + "step": 1866 + }, + { + "epoch": 0.49082659301637405, + "loss": 0.1436455398797989, + "loss_ce": 0.0007622435805387795, + "loss_iou": 0.5390625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 320506784, + "step": 1866 + }, + { + "epoch": 0.49108962977576115, + "grad_norm": 4.213229463422797, + "learning_rate": 5e-06, + "loss": 0.1405, + "num_input_tokens_seen": 320678800, + "step": 1867 + }, + { + "epoch": 0.49108962977576115, + "loss": 0.19276383519172668, + "loss_ce": 0.0017237972933799028, + "loss_iou": 0.5703125, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 320678800, + "step": 1867 + }, + { + "epoch": 0.4913526665351483, + "grad_norm": 13.139928650685244, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 320851152, + "step": 1868 + }, + { + "epoch": 0.4913526665351483, + "loss": 0.13707411289215088, + "loss_ce": 0.0001722496235743165, + "loss_iou": 0.6640625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 320851152, + "step": 1868 + }, + { + "epoch": 0.4916157032945354, + "grad_norm": 10.621311625988106, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 321023324, + "step": 1869 + }, + { + "epoch": 0.4916157032945354, + "loss": 0.17078933119773865, + "loss_ce": 0.0011115875095129013, + "loss_iou": 0.466796875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 321023324, + "step": 1869 + }, + { + "epoch": 0.4918787400539225, + "grad_norm": 9.794878479481557, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 321195396, + "step": 1870 + }, + { + "epoch": 0.4918787400539225, + "loss": 0.1828850954771042, + "loss_ce": 0.0016412028344348073, + "loss_iou": 0.65234375, + "loss_num": 0.0361328125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 321195396, + "step": 1870 + }, + { + "epoch": 0.4921417768133097, + "grad_norm": 3.6593406817873193, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 321367508, + "step": 1871 + }, + { + "epoch": 0.4921417768133097, + "loss": 0.07714089751243591, + "loss_ce": 0.0005722964997403324, + "loss_iou": 0.478515625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 321367508, + "step": 1871 + }, + { + "epoch": 0.4924048135726968, + "grad_norm": 3.9995708653462874, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 321538180, + "step": 1872 + }, + { + "epoch": 0.4924048135726968, + "loss": 0.10363311320543289, + "loss_ce": 0.004054257180541754, + "loss_iou": 0.5078125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 321538180, + "step": 1872 + }, + { + "epoch": 0.49266785033208393, + "grad_norm": 8.331617815221565, + "learning_rate": 5e-06, + "loss": 0.1423, + "num_input_tokens_seen": 321710632, + "step": 1873 + }, + { + "epoch": 0.49266785033208393, + "loss": 0.13812920451164246, + "loss_ce": 0.0021428640466183424, + "loss_iou": 0.58203125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 321710632, + "step": 1873 + }, + { + "epoch": 0.49293088709147104, + "grad_norm": 5.452515467611325, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 321882748, + "step": 1874 + }, + { + "epoch": 0.49293088709147104, + "loss": 0.14979197084903717, + "loss_ce": 0.002147932071238756, + "loss_iou": NaN, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 321882748, + "step": 1874 + }, + { + "epoch": 0.49319392385085814, + "grad_norm": 14.062855136219687, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 322054892, + "step": 1875 + }, + { + "epoch": 0.49319392385085814, + "loss": 0.24241477251052856, + "loss_ce": 0.0016921274363994598, + "loss_iou": 0.447265625, + "loss_num": 0.048095703125, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 322054892, + "step": 1875 + }, + { + "epoch": 0.4934569606102453, + "grad_norm": 3.3198848239673966, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 322226992, + "step": 1876 + }, + { + "epoch": 0.4934569606102453, + "loss": 0.06338231265544891, + "loss_ce": 0.0020724977366626263, + "loss_iou": 0.62890625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 322226992, + "step": 1876 + }, + { + "epoch": 0.4937199973696324, + "grad_norm": 5.974832705883753, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 322399220, + "step": 1877 + }, + { + "epoch": 0.4937199973696324, + "loss": 0.20875243842601776, + "loss_ce": 0.004009997006505728, + "loss_iou": 0.53125, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 322399220, + "step": 1877 + }, + { + "epoch": 0.4939830341290195, + "grad_norm": 11.103490307412729, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 322569372, + "step": 1878 + }, + { + "epoch": 0.4939830341290195, + "loss": 0.10719159990549088, + "loss_ce": 0.006331001408398151, + "loss_iou": 0.64453125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 322569372, + "step": 1878 + }, + { + "epoch": 0.49424607088840666, + "grad_norm": 4.615361697814784, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 322741596, + "step": 1879 + }, + { + "epoch": 0.49424607088840666, + "loss": 0.13756033778190613, + "loss_ce": 0.002703155390918255, + "loss_iou": 0.46484375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 322741596, + "step": 1879 + }, + { + "epoch": 0.49450910764779377, + "grad_norm": 5.99195178330634, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 322913828, + "step": 1880 + }, + { + "epoch": 0.49450910764779377, + "loss": 0.20525991916656494, + "loss_ce": 0.0015550723765045404, + "loss_iou": 0.47265625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 322913828, + "step": 1880 + }, + { + "epoch": 0.4947721444071809, + "grad_norm": 8.544161694628455, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 323084272, + "step": 1881 + }, + { + "epoch": 0.4947721444071809, + "loss": 0.13442979753017426, + "loss_ce": 0.0013731509679928422, + "loss_iou": 0.462890625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 323084272, + "step": 1881 + }, + { + "epoch": 0.49503518116656803, + "grad_norm": 12.157441672743493, + "learning_rate": 5e-06, + "loss": 0.1382, + "num_input_tokens_seen": 323254564, + "step": 1882 + }, + { + "epoch": 0.49503518116656803, + "loss": 0.06910552829504013, + "loss_ce": 0.00015105513739399612, + "loss_iou": 0.52734375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 323254564, + "step": 1882 + }, + { + "epoch": 0.49529821792595513, + "grad_norm": 7.758308533551838, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 323426936, + "step": 1883 + }, + { + "epoch": 0.49529821792595513, + "loss": 0.07692838460206985, + "loss_ce": 0.000985392602160573, + "loss_iou": 0.51953125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 323426936, + "step": 1883 + }, + { + "epoch": 0.4955612546853423, + "grad_norm": 10.56929332370978, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 323599244, + "step": 1884 + }, + { + "epoch": 0.4955612546853423, + "loss": 0.17598523199558258, + "loss_ce": 0.0006922531756572425, + "loss_iou": 0.52734375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 323599244, + "step": 1884 + }, + { + "epoch": 0.4958242914447294, + "grad_norm": 4.870883244723278, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 323771396, + "step": 1885 + }, + { + "epoch": 0.4958242914447294, + "loss": 0.08608455955982208, + "loss_ce": 0.0016424173954874277, + "loss_iou": 0.443359375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 323771396, + "step": 1885 + }, + { + "epoch": 0.49608732820411655, + "grad_norm": 9.112063415949194, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 323943464, + "step": 1886 + }, + { + "epoch": 0.49608732820411655, + "loss": 0.05756930261850357, + "loss_ce": 0.00025729500339366496, + "loss_iou": 0.494140625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 323943464, + "step": 1886 + }, + { + "epoch": 0.49635036496350365, + "grad_norm": 11.77050689912044, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 324115664, + "step": 1887 + }, + { + "epoch": 0.49635036496350365, + "loss": 0.13590523600578308, + "loss_ce": 0.0011243472108617425, + "loss_iou": 0.39453125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 324115664, + "step": 1887 + }, + { + "epoch": 0.49661340172289076, + "grad_norm": 19.144215192825804, + "learning_rate": 5e-06, + "loss": 0.154, + "num_input_tokens_seen": 324288076, + "step": 1888 + }, + { + "epoch": 0.49661340172289076, + "loss": 0.1540539562702179, + "loss_ce": 0.00036742445081472397, + "loss_iou": 0.49609375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 324288076, + "step": 1888 + }, + { + "epoch": 0.4968764384822779, + "grad_norm": 4.245370979797126, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 324460724, + "step": 1889 + }, + { + "epoch": 0.4968764384822779, + "loss": 0.08321575820446014, + "loss_ce": 0.0006657101330347359, + "loss_iou": 0.59375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 324460724, + "step": 1889 + }, + { + "epoch": 0.497139475241665, + "grad_norm": 4.913753500635943, + "learning_rate": 5e-06, + "loss": 0.1567, + "num_input_tokens_seen": 324633088, + "step": 1890 + }, + { + "epoch": 0.497139475241665, + "loss": 0.13971787691116333, + "loss_ce": 0.001625841949135065, + "loss_iou": 0.51953125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 324633088, + "step": 1890 + }, + { + "epoch": 0.4974025120010521, + "grad_norm": 4.7392923084206275, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 324805420, + "step": 1891 + }, + { + "epoch": 0.4974025120010521, + "loss": 0.10387594997882843, + "loss_ce": 0.001977758714929223, + "loss_iou": 0.52734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 324805420, + "step": 1891 + }, + { + "epoch": 0.4976655487604393, + "grad_norm": 3.934998355466595, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 324977628, + "step": 1892 + }, + { + "epoch": 0.4976655487604393, + "loss": 0.08378120511770248, + "loss_ce": 0.0015668454580008984, + "loss_iou": 0.4921875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 324977628, + "step": 1892 + }, + { + "epoch": 0.4979285855198264, + "grad_norm": 16.6043348209483, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 325148040, + "step": 1893 + }, + { + "epoch": 0.4979285855198264, + "loss": 0.11267328262329102, + "loss_ce": 0.0008110918570309877, + "loss_iou": 0.51953125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 325148040, + "step": 1893 + }, + { + "epoch": 0.49819162227921354, + "grad_norm": 4.246925011279612, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 325317336, + "step": 1894 + }, + { + "epoch": 0.49819162227921354, + "loss": 0.20916813611984253, + "loss_ce": 0.000855154765304178, + "loss_iou": 0.49609375, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 325317336, + "step": 1894 + }, + { + "epoch": 0.49845465903860064, + "grad_norm": 22.177021376541713, + "learning_rate": 5e-06, + "loss": 0.1381, + "num_input_tokens_seen": 325489652, + "step": 1895 + }, + { + "epoch": 0.49845465903860064, + "loss": 0.15358038246631622, + "loss_ce": 0.0006873153615742922, + "loss_iou": 0.333984375, + "loss_num": 0.030517578125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 325489652, + "step": 1895 + }, + { + "epoch": 0.49871769579798775, + "grad_norm": 4.369246978330159, + "learning_rate": 5e-06, + "loss": 0.1545, + "num_input_tokens_seen": 325661796, + "step": 1896 + }, + { + "epoch": 0.49871769579798775, + "loss": 0.11801205575466156, + "loss_ce": 0.0016180112725123763, + "loss_iou": 0.6640625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 325661796, + "step": 1896 + }, + { + "epoch": 0.4989807325573749, + "grad_norm": 4.8736707793033975, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 325834028, + "step": 1897 + }, + { + "epoch": 0.4989807325573749, + "loss": 0.09736193716526031, + "loss_ce": 0.0003770706243813038, + "loss_iou": 0.482421875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 325834028, + "step": 1897 + }, + { + "epoch": 0.499243769316762, + "grad_norm": 13.34167032829397, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 326006352, + "step": 1898 + }, + { + "epoch": 0.499243769316762, + "loss": 0.1529536247253418, + "loss_ce": 0.0012812747154384851, + "loss_iou": 0.640625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 326006352, + "step": 1898 + }, + { + "epoch": 0.49950680607614917, + "grad_norm": 8.275559071074456, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 326178328, + "step": 1899 + }, + { + "epoch": 0.49950680607614917, + "loss": 0.040668413043022156, + "loss_ce": 0.0004920191713608801, + "loss_iou": 0.515625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 326178328, + "step": 1899 + }, + { + "epoch": 0.49976984283553627, + "grad_norm": 6.7636763663201105, + "learning_rate": 5e-06, + "loss": 0.146, + "num_input_tokens_seen": 326347792, + "step": 1900 + }, + { + "epoch": 0.49976984283553627, + "loss": 0.12708882987499237, + "loss_ce": 0.0006239861249923706, + "loss_iou": 0.50390625, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 326347792, + "step": 1900 + }, + { + "epoch": 0.5000328795949234, + "grad_norm": 11.91106437107254, + "learning_rate": 5e-06, + "loss": 0.1809, + "num_input_tokens_seen": 326520084, + "step": 1901 + }, + { + "epoch": 0.5000328795949234, + "loss": 0.24568378925323486, + "loss_ce": 0.0030690436251461506, + "loss_iou": 0.41796875, + "loss_num": 0.048583984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 326520084, + "step": 1901 + }, + { + "epoch": 0.5002959163543105, + "grad_norm": 15.380203467807723, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 326691988, + "step": 1902 + }, + { + "epoch": 0.5002959163543105, + "loss": 0.1489952951669693, + "loss_ce": 0.00034418603172525764, + "loss_iou": 0.53125, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 326691988, + "step": 1902 + }, + { + "epoch": 0.5005589531136977, + "grad_norm": 6.172818867405067, + "learning_rate": 5e-06, + "loss": 0.1834, + "num_input_tokens_seen": 326864500, + "step": 1903 + }, + { + "epoch": 0.5005589531136977, + "loss": 0.08618461340665817, + "loss_ce": 0.0021086866036057472, + "loss_iou": 0.54296875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 326864500, + "step": 1903 + }, + { + "epoch": 0.5008219898730848, + "grad_norm": 6.280435924544313, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 327036512, + "step": 1904 + }, + { + "epoch": 0.5008219898730848, + "loss": 0.16495370864868164, + "loss_ce": 0.0016236326191574335, + "loss_iou": 0.58984375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 327036512, + "step": 1904 + }, + { + "epoch": 0.5010850266324719, + "grad_norm": 5.592537029701005, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 327208668, + "step": 1905 + }, + { + "epoch": 0.5010850266324719, + "loss": 0.09431658685207367, + "loss_ce": 0.0009938328294083476, + "loss_iou": 0.40234375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 327208668, + "step": 1905 + }, + { + "epoch": 0.501348063391859, + "grad_norm": 6.449445852698605, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 327378984, + "step": 1906 + }, + { + "epoch": 0.501348063391859, + "loss": 0.1146991178393364, + "loss_ce": 0.0006091539980843663, + "loss_iou": 0.498046875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 327378984, + "step": 1906 + }, + { + "epoch": 0.5016111001512461, + "grad_norm": 4.708352961414117, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 327551376, + "step": 1907 + }, + { + "epoch": 0.5016111001512461, + "loss": 0.15458180010318756, + "loss_ce": 0.0006816480308771133, + "loss_iou": 0.4140625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 327551376, + "step": 1907 + }, + { + "epoch": 0.5018741369106332, + "grad_norm": 11.511613556933392, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 327724012, + "step": 1908 + }, + { + "epoch": 0.5018741369106332, + "loss": 0.20126180350780487, + "loss_ce": 0.0008528655744157732, + "loss_iou": 0.5703125, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 327724012, + "step": 1908 + }, + { + "epoch": 0.5021371736700204, + "grad_norm": 5.154439626897641, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 327896420, + "step": 1909 + }, + { + "epoch": 0.5021371736700204, + "loss": 0.13825711607933044, + "loss_ce": 0.0017519897082820535, + "loss_iou": 0.484375, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 327896420, + "step": 1909 + }, + { + "epoch": 0.5024002104294075, + "grad_norm": 9.76818168107923, + "learning_rate": 5e-06, + "loss": 0.1211, + "num_input_tokens_seen": 328066120, + "step": 1910 + }, + { + "epoch": 0.5024002104294075, + "loss": 0.20892378687858582, + "loss_ce": 0.00096173956990242, + "loss_iou": 0.609375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 328066120, + "step": 1910 + }, + { + "epoch": 0.5026632471887946, + "grad_norm": 6.89128626408132, + "learning_rate": 5e-06, + "loss": 0.1391, + "num_input_tokens_seen": 328238388, + "step": 1911 + }, + { + "epoch": 0.5026632471887946, + "loss": 0.19616608321666718, + "loss_ce": 0.001982737798243761, + "loss_iou": 0.4453125, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 328238388, + "step": 1911 + }, + { + "epoch": 0.5029262839481817, + "grad_norm": 4.852078459217102, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 328410228, + "step": 1912 + }, + { + "epoch": 0.5029262839481817, + "loss": 0.13402841985225677, + "loss_ce": 0.0019178204238414764, + "loss_iou": 0.40625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 328410228, + "step": 1912 + }, + { + "epoch": 0.5031893207075688, + "grad_norm": 8.495391184677569, + "learning_rate": 5e-06, + "loss": 0.1507, + "num_input_tokens_seen": 328582660, + "step": 1913 + }, + { + "epoch": 0.5031893207075688, + "loss": 0.14262062311172485, + "loss_ce": 0.004253931809216738, + "loss_iou": 0.5546875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 328582660, + "step": 1913 + }, + { + "epoch": 0.503452357466956, + "grad_norm": 7.2140273397639385, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 328754772, + "step": 1914 + }, + { + "epoch": 0.503452357466956, + "loss": 0.09518692642450333, + "loss_ce": 0.003878333605825901, + "loss_iou": 0.5859375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 328754772, + "step": 1914 + }, + { + "epoch": 0.5037153942263431, + "grad_norm": 4.268432766820409, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 328926900, + "step": 1915 + }, + { + "epoch": 0.5037153942263431, + "loss": 0.11751651018857956, + "loss_ce": 0.0052423360757529736, + "loss_iou": 0.5234375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 328926900, + "step": 1915 + }, + { + "epoch": 0.5039784309857303, + "grad_norm": 3.9588210706780522, + "learning_rate": 5e-06, + "loss": 0.1548, + "num_input_tokens_seen": 329096944, + "step": 1916 + }, + { + "epoch": 0.5039784309857303, + "loss": 0.1656898707151413, + "loss_ce": 0.005258964374661446, + "loss_iou": 0.4453125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 329096944, + "step": 1916 + }, + { + "epoch": 0.5042414677451174, + "grad_norm": 5.63809501756183, + "learning_rate": 5e-06, + "loss": 0.0966, + "num_input_tokens_seen": 329268892, + "step": 1917 + }, + { + "epoch": 0.5042414677451174, + "loss": 0.07544635236263275, + "loss_ce": 0.000571468030102551, + "loss_iou": 0.373046875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 329268892, + "step": 1917 + }, + { + "epoch": 0.5045045045045045, + "grad_norm": 7.590332140913489, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 329441344, + "step": 1918 + }, + { + "epoch": 0.5045045045045045, + "loss": 0.15229275822639465, + "loss_ce": 0.006144077517092228, + "loss_iou": 0.52734375, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 329441344, + "step": 1918 + }, + { + "epoch": 0.5047675412638917, + "grad_norm": 9.59054902064166, + "learning_rate": 5e-06, + "loss": 0.1483, + "num_input_tokens_seen": 329613564, + "step": 1919 + }, + { + "epoch": 0.5047675412638917, + "loss": 0.08887225389480591, + "loss_ce": 0.001500429236330092, + "loss_iou": 0.515625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 329613564, + "step": 1919 + }, + { + "epoch": 0.5050305780232788, + "grad_norm": 4.210072070601249, + "learning_rate": 5e-06, + "loss": 0.1766, + "num_input_tokens_seen": 329785680, + "step": 1920 + }, + { + "epoch": 0.5050305780232788, + "loss": 0.2718814015388489, + "loss_ce": 0.002228060271590948, + "loss_iou": 0.3359375, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 329785680, + "step": 1920 + }, + { + "epoch": 0.5052936147826659, + "grad_norm": 8.380948003304292, + "learning_rate": 5e-06, + "loss": 0.1587, + "num_input_tokens_seen": 329957636, + "step": 1921 + }, + { + "epoch": 0.5052936147826659, + "loss": 0.1278029978275299, + "loss_ce": 0.00286403251811862, + "loss_iou": 0.56640625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 329957636, + "step": 1921 + }, + { + "epoch": 0.505556651542053, + "grad_norm": 7.7625200359697715, + "learning_rate": 5e-06, + "loss": 0.1491, + "num_input_tokens_seen": 330129632, + "step": 1922 + }, + { + "epoch": 0.505556651542053, + "loss": 0.11747082322835922, + "loss_ce": 0.004799924790859222, + "loss_iou": 0.5859375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 330129632, + "step": 1922 + }, + { + "epoch": 0.5058196883014401, + "grad_norm": 6.9606844299684205, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 330301512, + "step": 1923 + }, + { + "epoch": 0.5058196883014401, + "loss": 0.09333023428916931, + "loss_ce": 0.0011671524262055755, + "loss_iou": 0.470703125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 330301512, + "step": 1923 + }, + { + "epoch": 0.5060827250608273, + "grad_norm": 13.729941886848659, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 330473972, + "step": 1924 + }, + { + "epoch": 0.5060827250608273, + "loss": 0.09531684219837189, + "loss_ce": 0.000803898845333606, + "loss_iou": 0.390625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 330473972, + "step": 1924 + }, + { + "epoch": 0.5063457618202144, + "grad_norm": 6.989119579903773, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 330644424, + "step": 1925 + }, + { + "epoch": 0.5063457618202144, + "loss": 0.17603036761283875, + "loss_ce": 0.0006458393763750792, + "loss_iou": 0.451171875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 330644424, + "step": 1925 + }, + { + "epoch": 0.5066087985796015, + "grad_norm": 33.383727313004705, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 330816512, + "step": 1926 + }, + { + "epoch": 0.5066087985796015, + "loss": 0.07775846868753433, + "loss_ce": 0.0003048558428417891, + "loss_iou": 0.330078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 330816512, + "step": 1926 + }, + { + "epoch": 0.5068718353389886, + "grad_norm": 7.427483033873037, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 330988952, + "step": 1927 + }, + { + "epoch": 0.5068718353389886, + "loss": 0.16052240133285522, + "loss_ce": 0.000579762679990381, + "loss_iou": 0.35546875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 330988952, + "step": 1927 + }, + { + "epoch": 0.5071348720983757, + "grad_norm": 8.482091734395144, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 331161228, + "step": 1928 + }, + { + "epoch": 0.5071348720983757, + "loss": 0.14751845598220825, + "loss_ce": 0.000576329359319061, + "loss_iou": 0.72265625, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 331161228, + "step": 1928 + }, + { + "epoch": 0.5073979088577629, + "grad_norm": 6.087066534105735, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 331329832, + "step": 1929 + }, + { + "epoch": 0.5073979088577629, + "loss": 0.09639698266983032, + "loss_ce": 0.002372324001044035, + "loss_iou": 0.4765625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 331329832, + "step": 1929 + }, + { + "epoch": 0.50766094561715, + "grad_norm": 6.499679773970415, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 331502148, + "step": 1930 + }, + { + "epoch": 0.50766094561715, + "loss": 0.11911525577306747, + "loss_ce": 0.0002492967469152063, + "loss_iou": 0.61328125, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 331502148, + "step": 1930 + }, + { + "epoch": 0.5079239823765371, + "grad_norm": 16.477307771999516, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 331674540, + "step": 1931 + }, + { + "epoch": 0.5079239823765371, + "loss": 0.14295902848243713, + "loss_ce": 0.0038904245011508465, + "loss_iou": 0.341796875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 331674540, + "step": 1931 + }, + { + "epoch": 0.5081870191359242, + "grad_norm": 4.977000748222245, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 331846988, + "step": 1932 + }, + { + "epoch": 0.5081870191359242, + "loss": 0.1609978973865509, + "loss_ce": 0.00042966773617081344, + "loss_iou": 0.66015625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 331846988, + "step": 1932 + }, + { + "epoch": 0.5084500558953113, + "grad_norm": 4.68697406244425, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 332019216, + "step": 1933 + }, + { + "epoch": 0.5084500558953113, + "loss": 0.12378443777561188, + "loss_ce": 0.0009511768585070968, + "loss_iou": 0.5, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 332019216, + "step": 1933 + }, + { + "epoch": 0.5087130926546984, + "grad_norm": 5.061790101979302, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 332191420, + "step": 1934 + }, + { + "epoch": 0.5087130926546984, + "loss": 0.09300635755062103, + "loss_ce": 0.0008737844182178378, + "loss_iou": 0.51171875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 332191420, + "step": 1934 + }, + { + "epoch": 0.5089761294140857, + "grad_norm": 18.6525779104533, + "learning_rate": 5e-06, + "loss": 0.1468, + "num_input_tokens_seen": 332363456, + "step": 1935 + }, + { + "epoch": 0.5089761294140857, + "loss": 0.13239170610904694, + "loss_ce": 0.0003726637805812061, + "loss_iou": 0.42578125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 332363456, + "step": 1935 + }, + { + "epoch": 0.5092391661734728, + "grad_norm": 16.50490516998537, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 332535912, + "step": 1936 + }, + { + "epoch": 0.5092391661734728, + "loss": 0.08137423545122147, + "loss_ce": 0.0015249941498041153, + "loss_iou": 0.423828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 332535912, + "step": 1936 + }, + { + "epoch": 0.5095022029328599, + "grad_norm": 7.868678191164257, + "learning_rate": 5e-06, + "loss": 0.1762, + "num_input_tokens_seen": 332708064, + "step": 1937 + }, + { + "epoch": 0.5095022029328599, + "loss": 0.2602325677871704, + "loss_ce": 0.005715976003557444, + "loss_iou": 0.470703125, + "loss_num": 0.051025390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 332708064, + "step": 1937 + }, + { + "epoch": 0.509765239692247, + "grad_norm": 4.650960242488344, + "learning_rate": 5e-06, + "loss": 0.1441, + "num_input_tokens_seen": 332880260, + "step": 1938 + }, + { + "epoch": 0.509765239692247, + "loss": 0.08695125579833984, + "loss_ce": 0.0018987648654729128, + "loss_iou": 0.7578125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 332880260, + "step": 1938 + }, + { + "epoch": 0.5100282764516341, + "grad_norm": 5.979900826127572, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 333052492, + "step": 1939 + }, + { + "epoch": 0.5100282764516341, + "loss": 0.11689235270023346, + "loss_ce": 0.0012612489517778158, + "loss_iou": 0.455078125, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 333052492, + "step": 1939 + }, + { + "epoch": 0.5102913132110213, + "grad_norm": 15.330918281866088, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 333222132, + "step": 1940 + }, + { + "epoch": 0.5102913132110213, + "loss": 0.16124433279037476, + "loss_ce": 0.0012101602042093873, + "loss_iou": 0.58203125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 333222132, + "step": 1940 + }, + { + "epoch": 0.5105543499704084, + "grad_norm": 5.298876176442514, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 333394244, + "step": 1941 + }, + { + "epoch": 0.5105543499704084, + "loss": 0.09125322848558426, + "loss_ce": 0.0003718816442415118, + "loss_iou": 0.6328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 333394244, + "step": 1941 + }, + { + "epoch": 0.5108173867297955, + "grad_norm": 25.903681591063247, + "learning_rate": 5e-06, + "loss": 0.0986, + "num_input_tokens_seen": 333564616, + "step": 1942 + }, + { + "epoch": 0.5108173867297955, + "loss": 0.0830872505903244, + "loss_ce": 0.003009122796356678, + "loss_iou": 0.56640625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 333564616, + "step": 1942 + }, + { + "epoch": 0.5110804234891826, + "grad_norm": 7.412064312390374, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 333736828, + "step": 1943 + }, + { + "epoch": 0.5110804234891826, + "loss": 0.13648918271064758, + "loss_ce": 0.0012352685444056988, + "loss_iou": 0.49609375, + "loss_num": 0.027099609375, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 333736828, + "step": 1943 + }, + { + "epoch": 0.5113434602485697, + "grad_norm": 5.823332421421152, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 333909000, + "step": 1944 + }, + { + "epoch": 0.5113434602485697, + "loss": 0.08375194668769836, + "loss_ce": 0.00024059813586063683, + "loss_iou": 0.55078125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 333909000, + "step": 1944 + }, + { + "epoch": 0.5116064970079569, + "grad_norm": 5.7512491483697685, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 334081116, + "step": 1945 + }, + { + "epoch": 0.5116064970079569, + "loss": 0.11631600558757782, + "loss_ce": 0.0020276757422834635, + "loss_iou": 0.390625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 334081116, + "step": 1945 + }, + { + "epoch": 0.511869533767344, + "grad_norm": 6.061348306489617, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 334252872, + "step": 1946 + }, + { + "epoch": 0.511869533767344, + "loss": 0.1273353099822998, + "loss_ce": 0.0007484056986868382, + "loss_iou": 0.671875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 334252872, + "step": 1946 + }, + { + "epoch": 0.5121325705267311, + "grad_norm": 4.656625920812538, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 334421696, + "step": 1947 + }, + { + "epoch": 0.5121325705267311, + "loss": 0.07545529305934906, + "loss_ce": 0.002487759804353118, + "loss_iou": 0.5390625, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 334421696, + "step": 1947 + }, + { + "epoch": 0.5123956072861182, + "grad_norm": 11.2997609374861, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 334593968, + "step": 1948 + }, + { + "epoch": 0.5123956072861182, + "loss": 0.15095758438110352, + "loss_ce": 0.00047542020911350846, + "loss_iou": 0.65625, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 334593968, + "step": 1948 + }, + { + "epoch": 0.5126586440455053, + "grad_norm": 4.740639931376309, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 334766256, + "step": 1949 + }, + { + "epoch": 0.5126586440455053, + "loss": 0.0767994076013565, + "loss_ce": 0.00020028470316901803, + "loss_iou": 0.49609375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 334766256, + "step": 1949 + }, + { + "epoch": 0.5129216808048925, + "grad_norm": 15.829229511064487, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 334938720, + "step": 1950 + }, + { + "epoch": 0.5129216808048925, + "loss": 0.10283501446247101, + "loss_ce": 0.003225642256438732, + "loss_iou": 0.57421875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 334938720, + "step": 1950 + }, + { + "epoch": 0.5131847175642796, + "grad_norm": 4.547176342272478, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 335108948, + "step": 1951 + }, + { + "epoch": 0.5131847175642796, + "loss": 0.07713186740875244, + "loss_ce": 0.00047171738697215915, + "loss_iou": 0.53515625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 335108948, + "step": 1951 + }, + { + "epoch": 0.5134477543236667, + "grad_norm": 13.66162052286571, + "learning_rate": 5e-06, + "loss": 0.1628, + "num_input_tokens_seen": 335281148, + "step": 1952 + }, + { + "epoch": 0.5134477543236667, + "loss": 0.2089971899986267, + "loss_ce": 0.0048346007242798805, + "loss_iou": 0.390625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 335281148, + "step": 1952 + }, + { + "epoch": 0.5137107910830538, + "grad_norm": 4.53549366428838, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 335453172, + "step": 1953 + }, + { + "epoch": 0.5137107910830538, + "loss": 0.0756625235080719, + "loss_ce": 0.0019930913113057613, + "loss_iou": 0.5078125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 335453172, + "step": 1953 + }, + { + "epoch": 0.513973827842441, + "grad_norm": 5.655145923562407, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 335623636, + "step": 1954 + }, + { + "epoch": 0.513973827842441, + "loss": 0.14977125823497772, + "loss_ce": 0.0032868883572518826, + "loss_iou": NaN, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 335623636, + "step": 1954 + }, + { + "epoch": 0.5142368646018282, + "grad_norm": 9.198223458900488, + "learning_rate": 5e-06, + "loss": 0.158, + "num_input_tokens_seen": 335793500, + "step": 1955 + }, + { + "epoch": 0.5142368646018282, + "loss": 0.13681824505329132, + "loss_ce": 0.0008624300826340914, + "loss_iou": 0.61328125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 335793500, + "step": 1955 + }, + { + "epoch": 0.5144999013612153, + "grad_norm": 14.576489713233544, + "learning_rate": 5e-06, + "loss": 0.1376, + "num_input_tokens_seen": 335965788, + "step": 1956 + }, + { + "epoch": 0.5144999013612153, + "loss": 0.14106330275535583, + "loss_ce": 0.0012928071664646268, + "loss_iou": 0.56640625, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 335965788, + "step": 1956 + }, + { + "epoch": 0.5147629381206024, + "grad_norm": 12.321724088847931, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 336138060, + "step": 1957 + }, + { + "epoch": 0.5147629381206024, + "loss": 0.1251417100429535, + "loss_ce": 0.0011182638118043542, + "loss_iou": 0.53515625, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 336138060, + "step": 1957 + }, + { + "epoch": 0.5150259748799895, + "grad_norm": 8.213839245926183, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 336308692, + "step": 1958 + }, + { + "epoch": 0.5150259748799895, + "loss": 0.06178643926978111, + "loss_ce": 0.00011041228572139516, + "loss_iou": 0.62890625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 336308692, + "step": 1958 + }, + { + "epoch": 0.5152890116393766, + "grad_norm": 4.514500271556744, + "learning_rate": 5e-06, + "loss": 0.1589, + "num_input_tokens_seen": 336481248, + "step": 1959 + }, + { + "epoch": 0.5152890116393766, + "loss": 0.15207350254058838, + "loss_ce": 0.0027662513311952353, + "loss_iou": 0.4453125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 336481248, + "step": 1959 + }, + { + "epoch": 0.5155520483987637, + "grad_norm": 3.270020202020867, + "learning_rate": 5e-06, + "loss": 0.0831, + "num_input_tokens_seen": 336653556, + "step": 1960 + }, + { + "epoch": 0.5155520483987637, + "loss": 0.07474862039089203, + "loss_ce": 0.00031624053372070193, + "loss_iou": 0.57421875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 336653556, + "step": 1960 + }, + { + "epoch": 0.5158150851581509, + "grad_norm": 5.036795139632983, + "learning_rate": 5e-06, + "loss": 0.1524, + "num_input_tokens_seen": 336825632, + "step": 1961 + }, + { + "epoch": 0.5158150851581509, + "loss": 0.0949145182967186, + "loss_ce": 0.004765587393194437, + "loss_iou": 0.52734375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 336825632, + "step": 1961 + }, + { + "epoch": 0.516078121917538, + "grad_norm": 5.18788380960706, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 336997908, + "step": 1962 + }, + { + "epoch": 0.516078121917538, + "loss": 0.1347174346446991, + "loss_ce": 0.0003027569910045713, + "loss_iou": 0.38671875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 336997908, + "step": 1962 + }, + { + "epoch": 0.5163411586769251, + "grad_norm": 19.722687183501755, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 337170032, + "step": 1963 + }, + { + "epoch": 0.5163411586769251, + "loss": 0.15522822737693787, + "loss_ce": 0.004196731373667717, + "loss_iou": 0.490234375, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 337170032, + "step": 1963 + }, + { + "epoch": 0.5166041954363122, + "grad_norm": 5.156355195655944, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 337342000, + "step": 1964 + }, + { + "epoch": 0.5166041954363122, + "loss": 0.0796535462141037, + "loss_ce": 0.0009487088536843657, + "loss_iou": 0.66796875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 337342000, + "step": 1964 + }, + { + "epoch": 0.5168672321956993, + "grad_norm": 4.599075113473689, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 337514164, + "step": 1965 + }, + { + "epoch": 0.5168672321956993, + "loss": 0.21942217648029327, + "loss_ce": 0.0008857909124344587, + "loss_iou": 0.5546875, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 337514164, + "step": 1965 + }, + { + "epoch": 0.5171302689550865, + "grad_norm": 8.512554957251284, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 337686236, + "step": 1966 + }, + { + "epoch": 0.5171302689550865, + "loss": 0.08479119837284088, + "loss_ce": 0.001386661664582789, + "loss_iou": 0.5234375, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 337686236, + "step": 1966 + }, + { + "epoch": 0.5173933057144736, + "grad_norm": 5.381969767631673, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 337858240, + "step": 1967 + }, + { + "epoch": 0.5173933057144736, + "loss": 0.15276584029197693, + "loss_ce": 0.004938698373734951, + "loss_iou": 0.52734375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 337858240, + "step": 1967 + }, + { + "epoch": 0.5176563424738607, + "grad_norm": 12.700271834998917, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 338030560, + "step": 1968 + }, + { + "epoch": 0.5176563424738607, + "loss": 0.15190255641937256, + "loss_ce": 0.0028852252289652824, + "loss_iou": 0.494140625, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 338030560, + "step": 1968 + }, + { + "epoch": 0.5179193792332478, + "grad_norm": 21.983637705034123, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 338202560, + "step": 1969 + }, + { + "epoch": 0.5179193792332478, + "loss": 0.0981290340423584, + "loss_ce": 0.0023496169596910477, + "loss_iou": 0.369140625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 338202560, + "step": 1969 + }, + { + "epoch": 0.5181824159926349, + "grad_norm": 10.469128023533157, + "learning_rate": 5e-06, + "loss": 0.1446, + "num_input_tokens_seen": 338374836, + "step": 1970 + }, + { + "epoch": 0.5181824159926349, + "loss": 0.16515450179576874, + "loss_ce": 0.0016413143603131175, + "loss_iou": 0.51953125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 338374836, + "step": 1970 + }, + { + "epoch": 0.5184454527520221, + "grad_norm": 5.186485351415196, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 338546996, + "step": 1971 + }, + { + "epoch": 0.5184454527520221, + "loss": 0.24401625990867615, + "loss_ce": 0.004056546837091446, + "loss_iou": 0.42578125, + "loss_num": 0.0478515625, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 338546996, + "step": 1971 + }, + { + "epoch": 0.5187084895114092, + "grad_norm": 6.802546863240928, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 338716756, + "step": 1972 + }, + { + "epoch": 0.5187084895114092, + "loss": 0.0805911123752594, + "loss_ce": 0.0030764644034206867, + "loss_iou": 0.43359375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 338716756, + "step": 1972 + }, + { + "epoch": 0.5189715262707963, + "grad_norm": 11.859766109061349, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 338886988, + "step": 1973 + }, + { + "epoch": 0.5189715262707963, + "loss": 0.10678447037935257, + "loss_ce": 0.0013767611235380173, + "loss_iou": 0.50390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 338886988, + "step": 1973 + }, + { + "epoch": 0.5192345630301834, + "grad_norm": 7.006309240426411, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 339059260, + "step": 1974 + }, + { + "epoch": 0.5192345630301834, + "loss": 0.1808294951915741, + "loss_ce": 0.00034853501711040735, + "loss_iou": 0.6796875, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 339059260, + "step": 1974 + }, + { + "epoch": 0.5194975997895706, + "grad_norm": 21.985452701611596, + "learning_rate": 5e-06, + "loss": 0.1751, + "num_input_tokens_seen": 339231652, + "step": 1975 + }, + { + "epoch": 0.5194975997895706, + "loss": 0.16675767302513123, + "loss_ce": 0.0018406773451715708, + "loss_iou": 0.37109375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 339231652, + "step": 1975 + }, + { + "epoch": 0.5197606365489578, + "grad_norm": 6.071372766680396, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 339403696, + "step": 1976 + }, + { + "epoch": 0.5197606365489578, + "loss": 0.08192337304353714, + "loss_ce": 0.0025166317354887724, + "loss_iou": 0.48046875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 339403696, + "step": 1976 + }, + { + "epoch": 0.5200236733083449, + "grad_norm": 7.303457699958576, + "learning_rate": 5e-06, + "loss": 0.0961, + "num_input_tokens_seen": 339576232, + "step": 1977 + }, + { + "epoch": 0.5200236733083449, + "loss": 0.06348910182714462, + "loss_ce": 0.0037356873508542776, + "loss_iou": 0.455078125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 339576232, + "step": 1977 + }, + { + "epoch": 0.520286710067732, + "grad_norm": 4.058844588183022, + "learning_rate": 5e-06, + "loss": 0.0786, + "num_input_tokens_seen": 339748468, + "step": 1978 + }, + { + "epoch": 0.520286710067732, + "loss": 0.07935698330402374, + "loss_ce": 0.00019439200696069747, + "loss_iou": 0.5234375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 339748468, + "step": 1978 + }, + { + "epoch": 0.5205497468271191, + "grad_norm": 9.520705989391612, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 339920428, + "step": 1979 + }, + { + "epoch": 0.5205497468271191, + "loss": 0.10209144651889801, + "loss_ce": 0.002482067793607712, + "loss_iou": 0.546875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 339920428, + "step": 1979 + }, + { + "epoch": 0.5208127835865062, + "grad_norm": 22.26578813252007, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 340092516, + "step": 1980 + }, + { + "epoch": 0.5208127835865062, + "loss": 0.19446319341659546, + "loss_ce": 0.006719036493450403, + "loss_iou": 0.5703125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 340092516, + "step": 1980 + }, + { + "epoch": 0.5210758203458934, + "grad_norm": 9.547756324115959, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 340265052, + "step": 1981 + }, + { + "epoch": 0.5210758203458934, + "loss": 0.14365576207637787, + "loss_ce": 0.0007572055910713971, + "loss_iou": 0.60546875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 340265052, + "step": 1981 + }, + { + "epoch": 0.5213388571052805, + "grad_norm": 7.840208454239884, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 340436896, + "step": 1982 + }, + { + "epoch": 0.5213388571052805, + "loss": 0.11067777872085571, + "loss_ce": 0.0016995080513879657, + "loss_iou": 0.55859375, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 340436896, + "step": 1982 + }, + { + "epoch": 0.5216018938646676, + "grad_norm": 24.43574592347015, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 340609344, + "step": 1983 + }, + { + "epoch": 0.5216018938646676, + "loss": 0.10227973759174347, + "loss_ce": 0.0038605397567152977, + "loss_iou": 0.36328125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 340609344, + "step": 1983 + }, + { + "epoch": 0.5218649306240547, + "grad_norm": 6.254699637052052, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 340779700, + "step": 1984 + }, + { + "epoch": 0.5218649306240547, + "loss": 0.1373625099658966, + "loss_ce": 0.0002012598270084709, + "loss_iou": 0.5234375, + "loss_num": 0.0274658203125, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 340779700, + "step": 1984 + }, + { + "epoch": 0.5221279673834418, + "grad_norm": 4.970529369812158, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 340952012, + "step": 1985 + }, + { + "epoch": 0.5221279673834418, + "loss": 0.07744970917701721, + "loss_ce": 0.00027074594981968403, + "loss_iou": 0.625, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 340952012, + "step": 1985 + }, + { + "epoch": 0.5223910041428289, + "grad_norm": 9.386513844488379, + "learning_rate": 5e-06, + "loss": 0.1525, + "num_input_tokens_seen": 341124172, + "step": 1986 + }, + { + "epoch": 0.5223910041428289, + "loss": 0.16602635383605957, + "loss_ce": 0.0037033448461443186, + "loss_iou": 0.6640625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 341124172, + "step": 1986 + }, + { + "epoch": 0.5226540409022161, + "grad_norm": 75.36425054276123, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 341296144, + "step": 1987 + }, + { + "epoch": 0.5226540409022161, + "loss": 0.08925444632768631, + "loss_ce": 0.0014248627703636885, + "loss_iou": 0.51953125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 341296144, + "step": 1987 + }, + { + "epoch": 0.5229170776616032, + "grad_norm": 6.0244434485136455, + "learning_rate": 5e-06, + "loss": 0.1624, + "num_input_tokens_seen": 341468128, + "step": 1988 + }, + { + "epoch": 0.5229170776616032, + "loss": 0.18709853291511536, + "loss_ce": 0.0007887266110628843, + "loss_iou": 0.65234375, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 341468128, + "step": 1988 + }, + { + "epoch": 0.5231801144209903, + "grad_norm": 6.46446584669901, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 341640532, + "step": 1989 + }, + { + "epoch": 0.5231801144209903, + "loss": 0.13015246391296387, + "loss_ce": 0.00030016410164535046, + "loss_iou": 0.484375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 341640532, + "step": 1989 + }, + { + "epoch": 0.5234431511803774, + "grad_norm": 12.345485884753447, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 341812700, + "step": 1990 + }, + { + "epoch": 0.5234431511803774, + "loss": 0.10668568313121796, + "loss_ce": 0.00027087965281680226, + "loss_iou": 0.55078125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 341812700, + "step": 1990 + }, + { + "epoch": 0.5237061879397645, + "grad_norm": 3.3770749440530894, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 341984700, + "step": 1991 + }, + { + "epoch": 0.5237061879397645, + "loss": 0.1165972501039505, + "loss_ce": 0.0006304577691480517, + "loss_iou": 0.431640625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 341984700, + "step": 1991 + }, + { + "epoch": 0.5239692246991517, + "grad_norm": 9.677414053666478, + "learning_rate": 5e-06, + "loss": 0.1048, + "num_input_tokens_seen": 342156884, + "step": 1992 + }, + { + "epoch": 0.5239692246991517, + "loss": 0.1423652172088623, + "loss_ce": 0.0010383009212091565, + "loss_iou": 0.58203125, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 342156884, + "step": 1992 + }, + { + "epoch": 0.5242322614585389, + "grad_norm": 3.6750881151911643, + "learning_rate": 5e-06, + "loss": 0.1894, + "num_input_tokens_seen": 342329140, + "step": 1993 + }, + { + "epoch": 0.5242322614585389, + "loss": 0.18173760175704956, + "loss_ce": 0.009221725165843964, + "loss_iou": 0.46484375, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 342329140, + "step": 1993 + }, + { + "epoch": 0.524495298217926, + "grad_norm": 19.882541327036233, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 342501472, + "step": 1994 + }, + { + "epoch": 0.524495298217926, + "loss": 0.0589841827750206, + "loss_ce": 0.001641655690036714, + "loss_iou": 0.357421875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 342501472, + "step": 1994 + }, + { + "epoch": 0.5247583349773131, + "grad_norm": 16.32555909861509, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 342673528, + "step": 1995 + }, + { + "epoch": 0.5247583349773131, + "loss": 0.18063244223594666, + "loss_ce": 0.0004871786804869771, + "loss_iou": 0.5546875, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 342673528, + "step": 1995 + }, + { + "epoch": 0.5250213717367002, + "grad_norm": 9.845315436294685, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 342845892, + "step": 1996 + }, + { + "epoch": 0.5250213717367002, + "loss": 0.17416182160377502, + "loss_ce": 0.0006388599867932498, + "loss_iou": 0.46875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 342845892, + "step": 1996 + }, + { + "epoch": 0.5252844084960874, + "grad_norm": 4.531599732042389, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 343018040, + "step": 1997 + }, + { + "epoch": 0.5252844084960874, + "loss": 0.15178070962429047, + "loss_ce": 0.0012069816002622247, + "loss_iou": 0.490234375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 343018040, + "step": 1997 + }, + { + "epoch": 0.5255474452554745, + "grad_norm": 15.562861804573469, + "learning_rate": 5e-06, + "loss": 0.1677, + "num_input_tokens_seen": 343189936, + "step": 1998 + }, + { + "epoch": 0.5255474452554745, + "loss": 0.1337474286556244, + "loss_ce": 0.0038340911269187927, + "loss_iou": 0.37890625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 343189936, + "step": 1998 + }, + { + "epoch": 0.5258104820148616, + "grad_norm": 6.338186207451289, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 343361876, + "step": 1999 + }, + { + "epoch": 0.5258104820148616, + "loss": 0.08655121922492981, + "loss_ce": 0.0022006274666637182, + "loss_iou": 0.5, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 343361876, + "step": 1999 + }, + { + "epoch": 0.5260735187742487, + "grad_norm": 36.314036588641784, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_websight_new_CIoU": 0.8649432361125946, + "eval_websight_new_GIoU": 0.8667054772377014, + "eval_websight_new_IoU": 0.8704231679439545, + "eval_websight_new_MAE_all": 0.021235700696706772, + "eval_websight_new_MAE_h": 0.007067237980663776, + "eval_websight_new_MAE_w": 0.03388772998005152, + "eval_websight_new_MAE_x": 0.03554858546704054, + "eval_websight_new_MAE_y": 0.008439254947006702, + "eval_websight_new_NUM_probability": 0.999984085559845, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.10596824437379837, + "eval_websight_new_loss_ce": 6.8271494910732144e-06, + "eval_websight_new_loss_iou": 0.36328125, + "eval_websight_new_loss_num": 0.018640518188476562, + "eval_websight_new_loss_xval": 0.09316253662109375, + "eval_websight_new_runtime": 54.898, + "eval_websight_new_samples_per_second": 0.911, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_seeclick_CIoU": 0.6263805329799652, + "eval_seeclick_GIoU": 0.627049595117569, + "eval_seeclick_IoU": 0.6531890332698822, + "eval_seeclick_MAE_all": 0.050390077754855156, + "eval_seeclick_MAE_h": 0.02558732032775879, + "eval_seeclick_MAE_w": 0.07135490141808987, + "eval_seeclick_MAE_x": 0.07951905764639378, + "eval_seeclick_MAE_y": 0.025099032558500767, + "eval_seeclick_NUM_probability": 0.9999794960021973, + "eval_seeclick_inside_bbox": 0.921875, + "eval_seeclick_loss": 0.22126971185207367, + "eval_seeclick_loss_ce": 0.008990719448775053, + "eval_seeclick_loss_iou": 0.47021484375, + "eval_seeclick_loss_num": 0.04229736328125, + "eval_seeclick_loss_xval": 0.211334228515625, + "eval_seeclick_runtime": 78.1311, + "eval_seeclick_samples_per_second": 0.55, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_icons_CIoU": 0.8664080798625946, + "eval_icons_GIoU": 0.8631992936134338, + "eval_icons_IoU": 0.8702020049095154, + "eval_icons_MAE_all": 0.0180177534930408, + "eval_icons_MAE_h": 0.016653602942824364, + "eval_icons_MAE_w": 0.018555423244833946, + "eval_icons_MAE_x": 0.01882947515696287, + "eval_icons_MAE_y": 0.018032516352832317, + "eval_icons_NUM_probability": 0.9999754428863525, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.059605177491903305, + "eval_icons_loss_ce": 1.412479377904674e-05, + "eval_icons_loss_iou": 0.5333251953125, + "eval_icons_loss_num": 0.011335372924804688, + "eval_icons_loss_xval": 0.05667877197265625, + "eval_icons_runtime": 80.1161, + "eval_icons_samples_per_second": 0.624, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "eval_screenspot_CIoU": 0.5386523008346558, + "eval_screenspot_GIoU": 0.5300994714101156, + "eval_screenspot_IoU": 0.5833619435628256, + "eval_screenspot_MAE_all": 0.08941345165173213, + "eval_screenspot_MAE_h": 0.04660519336660703, + "eval_screenspot_MAE_w": 0.1580625375111898, + "eval_screenspot_MAE_x": 0.10776859025160472, + "eval_screenspot_MAE_y": 0.04521748423576355, + "eval_screenspot_NUM_probability": 0.9995922644933065, + "eval_screenspot_inside_bbox": 0.850000003973643, + "eval_screenspot_loss": 0.8645088076591492, + "eval_screenspot_loss_ce": 0.5230478445688883, + "eval_screenspot_loss_iou": 0.4464925130208333, + "eval_screenspot_loss_num": 0.06682078043619792, + "eval_screenspot_loss_xval": 0.3342692057291667, + "eval_screenspot_runtime": 148.9183, + "eval_screenspot_samples_per_second": 0.598, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5260735187742487, + "loss": 0.8609392046928406, + "loss_ce": 0.5118181109428406, + "loss_iou": 0.390625, + "loss_num": 0.06982421875, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 343532304, + "step": 2000 + }, + { + "epoch": 0.5263365555336358, + "grad_norm": 8.429330373903323, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 343704524, + "step": 2001 + }, + { + "epoch": 0.5263365555336358, + "loss": 0.21621274948120117, + "loss_ce": 0.0006976150907576084, + "loss_iou": 0.2578125, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 343704524, + "step": 2001 + }, + { + "epoch": 0.526599592293023, + "grad_norm": 16.618545138422856, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 343876532, + "step": 2002 + }, + { + "epoch": 0.526599592293023, + "loss": 0.14034898579120636, + "loss_ce": 0.0025926402304321527, + "loss_iou": 0.515625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 343876532, + "step": 2002 + }, + { + "epoch": 0.5268626290524101, + "grad_norm": 7.347451448031887, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 344048440, + "step": 2003 + }, + { + "epoch": 0.5268626290524101, + "loss": 0.11765069514513016, + "loss_ce": 0.0015160476323217154, + "loss_iou": 0.376953125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 344048440, + "step": 2003 + }, + { + "epoch": 0.5271256658117972, + "grad_norm": 11.90854860345847, + "learning_rate": 5e-06, + "loss": 0.1657, + "num_input_tokens_seen": 344220524, + "step": 2004 + }, + { + "epoch": 0.5271256658117972, + "loss": 0.09579437971115112, + "loss_ce": 0.0003353926877025515, + "loss_iou": 0.490234375, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 344220524, + "step": 2004 + }, + { + "epoch": 0.5273887025711843, + "grad_norm": 15.406217883818895, + "learning_rate": 5e-06, + "loss": 0.1745, + "num_input_tokens_seen": 344391120, + "step": 2005 + }, + { + "epoch": 0.5273887025711843, + "loss": 0.08335284888744354, + "loss_ce": 0.005258369259536266, + "loss_iou": 0.419921875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 344391120, + "step": 2005 + }, + { + "epoch": 0.5276517393305714, + "grad_norm": 6.625644981750321, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 344563368, + "step": 2006 + }, + { + "epoch": 0.5276517393305714, + "loss": 0.1281585693359375, + "loss_ce": 0.0008697626180946827, + "loss_iou": 0.326171875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 344563368, + "step": 2006 + }, + { + "epoch": 0.5279147760899586, + "grad_norm": 5.116207808263436, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 344735272, + "step": 2007 + }, + { + "epoch": 0.5279147760899586, + "loss": 0.10428653657436371, + "loss_ce": 0.00014529118197970092, + "loss_iou": 0.373046875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 344735272, + "step": 2007 + }, + { + "epoch": 0.5281778128493457, + "grad_norm": 5.758545412441202, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 344907448, + "step": 2008 + }, + { + "epoch": 0.5281778128493457, + "loss": 0.07528108358383179, + "loss_ce": 0.0017337151803076267, + "loss_iou": 0.59375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 344907448, + "step": 2008 + }, + { + "epoch": 0.5284408496087328, + "grad_norm": 5.079743263348732, + "learning_rate": 5e-06, + "loss": 0.0995, + "num_input_tokens_seen": 345080020, + "step": 2009 + }, + { + "epoch": 0.5284408496087328, + "loss": 0.06423554569482803, + "loss_ce": 0.0006674337200820446, + "loss_iou": 0.5859375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 345080020, + "step": 2009 + }, + { + "epoch": 0.5287038863681199, + "grad_norm": 6.791724647885975, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 345251684, + "step": 2010 + }, + { + "epoch": 0.5287038863681199, + "loss": 0.13502314686775208, + "loss_ce": 0.0006847689510323107, + "loss_iou": 0.484375, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 345251684, + "step": 2010 + }, + { + "epoch": 0.528966923127507, + "grad_norm": 7.5731259754899245, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 345421972, + "step": 2011 + }, + { + "epoch": 0.528966923127507, + "loss": 0.10425636172294617, + "loss_ce": 0.0018088552169501781, + "loss_iou": 0.357421875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 345421972, + "step": 2011 + }, + { + "epoch": 0.5292299598868941, + "grad_norm": 4.024834656974236, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 345594140, + "step": 2012 + }, + { + "epoch": 0.5292299598868941, + "loss": 0.05266657471656799, + "loss_ce": 0.00011530861956998706, + "loss_iou": 0.53125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 345594140, + "step": 2012 + }, + { + "epoch": 0.5294929966462814, + "grad_norm": 4.913874816205936, + "learning_rate": 5e-06, + "loss": 0.1334, + "num_input_tokens_seen": 345766220, + "step": 2013 + }, + { + "epoch": 0.5294929966462814, + "loss": 0.08730873465538025, + "loss_ce": 0.0039957487024366856, + "loss_iou": 0.55859375, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 345766220, + "step": 2013 + }, + { + "epoch": 0.5297560334056685, + "grad_norm": 5.534989831922425, + "learning_rate": 5e-06, + "loss": 0.1479, + "num_input_tokens_seen": 345938392, + "step": 2014 + }, + { + "epoch": 0.5297560334056685, + "loss": 0.11161148548126221, + "loss_ce": 0.0032130456529557705, + "loss_iou": 0.63671875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 345938392, + "step": 2014 + }, + { + "epoch": 0.5300190701650556, + "grad_norm": 7.209865250975092, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 346110656, + "step": 2015 + }, + { + "epoch": 0.5300190701650556, + "loss": 0.12729424238204956, + "loss_ce": 0.0035759946331381798, + "loss_iou": 0.625, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 346110656, + "step": 2015 + }, + { + "epoch": 0.5302821069244427, + "grad_norm": 5.146240130208517, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 346283032, + "step": 2016 + }, + { + "epoch": 0.5302821069244427, + "loss": 0.12445038557052612, + "loss_ce": 0.0015866123139858246, + "loss_iou": 0.55078125, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 346283032, + "step": 2016 + }, + { + "epoch": 0.5305451436838298, + "grad_norm": 4.097776262787885, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 346455396, + "step": 2017 + }, + { + "epoch": 0.5305451436838298, + "loss": 0.08194537460803986, + "loss_ce": 0.0027369949966669083, + "loss_iou": 0.46875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 346455396, + "step": 2017 + }, + { + "epoch": 0.530808180443217, + "grad_norm": 8.807636765638556, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 346627308, + "step": 2018 + }, + { + "epoch": 0.530808180443217, + "loss": 0.16153287887573242, + "loss_ce": 0.0020022375974804163, + "loss_iou": 0.37109375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 346627308, + "step": 2018 + }, + { + "epoch": 0.5310712172026041, + "grad_norm": 10.071725544370869, + "learning_rate": 5e-06, + "loss": 0.1096, + "num_input_tokens_seen": 346799504, + "step": 2019 + }, + { + "epoch": 0.5310712172026041, + "loss": 0.10446594655513763, + "loss_ce": 0.0008282391354441643, + "loss_iou": 0.4765625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 346799504, + "step": 2019 + }, + { + "epoch": 0.5313342539619912, + "grad_norm": 5.791284152439371, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 346971328, + "step": 2020 + }, + { + "epoch": 0.5313342539619912, + "loss": 0.21368807554244995, + "loss_ce": 0.002475916873663664, + "loss_iou": 0.578125, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 346971328, + "step": 2020 + }, + { + "epoch": 0.5315972907213783, + "grad_norm": 9.778332313218632, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 347139872, + "step": 2021 + }, + { + "epoch": 0.5315972907213783, + "loss": 0.2882145047187805, + "loss_ce": 0.0021427052561193705, + "loss_iou": 0.5078125, + "loss_num": 0.05712890625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 347139872, + "step": 2021 + }, + { + "epoch": 0.5318603274807654, + "grad_norm": 7.217119694281503, + "learning_rate": 5e-06, + "loss": 0.1443, + "num_input_tokens_seen": 347312176, + "step": 2022 + }, + { + "epoch": 0.5318603274807654, + "loss": 0.14617526531219482, + "loss_ce": 0.0012472879607230425, + "loss_iou": 0.5546875, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 347312176, + "step": 2022 + }, + { + "epoch": 0.5321233642401526, + "grad_norm": 5.444211224879485, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 347483864, + "step": 2023 + }, + { + "epoch": 0.5321233642401526, + "loss": 0.15533965826034546, + "loss_ce": 0.0009817371610552073, + "loss_iou": 0.47265625, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 347483864, + "step": 2023 + }, + { + "epoch": 0.5323864009995397, + "grad_norm": 5.88671307787278, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 347654444, + "step": 2024 + }, + { + "epoch": 0.5323864009995397, + "loss": 0.10597267001867294, + "loss_ce": 0.0017551433993503451, + "loss_iou": 0.6484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 347654444, + "step": 2024 + }, + { + "epoch": 0.5326494377589268, + "grad_norm": 4.540449525857699, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 347826732, + "step": 2025 + }, + { + "epoch": 0.5326494377589268, + "loss": 0.10671254992485046, + "loss_ce": 0.002159323776140809, + "loss_iou": 0.51171875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 347826732, + "step": 2025 + }, + { + "epoch": 0.5329124745183139, + "grad_norm": 12.403111361726802, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 347995312, + "step": 2026 + }, + { + "epoch": 0.5329124745183139, + "loss": 0.1710810512304306, + "loss_ce": 0.00019787647761404514, + "loss_iou": 0.396484375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 347995312, + "step": 2026 + }, + { + "epoch": 0.533175511277701, + "grad_norm": 4.826031974791996, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 348167268, + "step": 2027 + }, + { + "epoch": 0.533175511277701, + "loss": 0.14301443099975586, + "loss_ce": 0.004235736560076475, + "loss_iou": 0.48046875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 348167268, + "step": 2027 + }, + { + "epoch": 0.5334385480370882, + "grad_norm": 5.858834044093049, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 348339324, + "step": 2028 + }, + { + "epoch": 0.5334385480370882, + "loss": 0.13567912578582764, + "loss_ce": 0.0013865029904991388, + "loss_iou": 0.5078125, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 348339324, + "step": 2028 + }, + { + "epoch": 0.5337015847964753, + "grad_norm": 18.687645390034255, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 348511688, + "step": 2029 + }, + { + "epoch": 0.5337015847964753, + "loss": 0.09527582675218582, + "loss_ce": 0.0008391792071051896, + "loss_iou": 0.484375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 348511688, + "step": 2029 + }, + { + "epoch": 0.5339646215558624, + "grad_norm": 13.127056473400154, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 348682072, + "step": 2030 + }, + { + "epoch": 0.5339646215558624, + "loss": 0.10760138183832169, + "loss_ce": 0.003078682580962777, + "loss_iou": 0.5078125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 348682072, + "step": 2030 + }, + { + "epoch": 0.5342276583152495, + "grad_norm": 4.612341549056293, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 348854384, + "step": 2031 + }, + { + "epoch": 0.5342276583152495, + "loss": 0.12830308079719543, + "loss_ce": 0.004981548525393009, + "loss_iou": 0.462890625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 348854384, + "step": 2031 + }, + { + "epoch": 0.5344906950746366, + "grad_norm": 8.941916447828152, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 349026540, + "step": 2032 + }, + { + "epoch": 0.5344906950746366, + "loss": 0.11764685809612274, + "loss_ce": 0.0009781570406630635, + "loss_iou": 0.515625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 349026540, + "step": 2032 + }, + { + "epoch": 0.5347537318340239, + "grad_norm": 3.1856540562472104, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 349195704, + "step": 2033 + }, + { + "epoch": 0.5347537318340239, + "loss": 0.15431983768939972, + "loss_ce": 0.001899797236546874, + "loss_iou": 0.455078125, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 349195704, + "step": 2033 + }, + { + "epoch": 0.535016768593411, + "grad_norm": 6.665681138831115, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 349367820, + "step": 2034 + }, + { + "epoch": 0.535016768593411, + "loss": 0.17121072113513947, + "loss_ce": 0.001441433560103178, + "loss_iou": 0.455078125, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 349367820, + "step": 2034 + }, + { + "epoch": 0.5352798053527981, + "grad_norm": 5.921520862992091, + "learning_rate": 5e-06, + "loss": 0.1264, + "num_input_tokens_seen": 349540064, + "step": 2035 + }, + { + "epoch": 0.5352798053527981, + "loss": 0.102360799908638, + "loss_ce": 0.0002947567554656416, + "loss_iou": 0.39453125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 349540064, + "step": 2035 + }, + { + "epoch": 0.5355428421121852, + "grad_norm": 5.3538979358096785, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 349710408, + "step": 2036 + }, + { + "epoch": 0.5355428421121852, + "loss": 0.06293447315692902, + "loss_ce": 0.0006481010350398719, + "loss_iou": 0.392578125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 349710408, + "step": 2036 + }, + { + "epoch": 0.5358058788715723, + "grad_norm": 11.23909243130062, + "learning_rate": 5e-06, + "loss": 0.1226, + "num_input_tokens_seen": 349880916, + "step": 2037 + }, + { + "epoch": 0.5358058788715723, + "loss": 0.1736185997724533, + "loss_ce": 0.003116899635642767, + "loss_iou": 0.474609375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 349880916, + "step": 2037 + }, + { + "epoch": 0.5360689156309594, + "grad_norm": 11.327258827718145, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 350053024, + "step": 2038 + }, + { + "epoch": 0.5360689156309594, + "loss": 0.14642956852912903, + "loss_ce": 0.0015015878016129136, + "loss_iou": 0.53515625, + "loss_num": 0.029052734375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 350053024, + "step": 2038 + }, + { + "epoch": 0.5363319523903466, + "grad_norm": 10.572751348187758, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 350225056, + "step": 2039 + }, + { + "epoch": 0.5363319523903466, + "loss": 0.08779959380626678, + "loss_ce": 0.0008397561614401639, + "loss_iou": 0.57421875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 350225056, + "step": 2039 + }, + { + "epoch": 0.5365949891497337, + "grad_norm": 4.830573829484519, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 350395628, + "step": 2040 + }, + { + "epoch": 0.5365949891497337, + "loss": 0.15380313992500305, + "loss_ce": 0.0012152513954788446, + "loss_iou": 0.53515625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 350395628, + "step": 2040 + }, + { + "epoch": 0.5368580259091208, + "grad_norm": 6.867013774487234, + "learning_rate": 5e-06, + "loss": 0.1852, + "num_input_tokens_seen": 350567756, + "step": 2041 + }, + { + "epoch": 0.5368580259091208, + "loss": 0.1892320066690445, + "loss_ce": 0.00420394167304039, + "loss_iou": 0.5859375, + "loss_num": 0.037109375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 350567756, + "step": 2041 + }, + { + "epoch": 0.5371210626685079, + "grad_norm": 9.243159752879412, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 350739840, + "step": 2042 + }, + { + "epoch": 0.5371210626685079, + "loss": 0.1004796102643013, + "loss_ce": 0.00013781688176095486, + "loss_iou": 0.54296875, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 350739840, + "step": 2042 + }, + { + "epoch": 0.537384099427895, + "grad_norm": 11.965655673255789, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 350912084, + "step": 2043 + }, + { + "epoch": 0.537384099427895, + "loss": 0.20747891068458557, + "loss_ce": 0.0016378372674807906, + "loss_iou": 0.34765625, + "loss_num": 0.041015625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 350912084, + "step": 2043 + }, + { + "epoch": 0.5376471361872822, + "grad_norm": 3.1288535063063327, + "learning_rate": 5e-06, + "loss": 0.0913, + "num_input_tokens_seen": 351081480, + "step": 2044 + }, + { + "epoch": 0.5376471361872822, + "loss": 0.1123102456331253, + "loss_ce": 0.001989200245589018, + "loss_iou": 0.48828125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 351081480, + "step": 2044 + }, + { + "epoch": 0.5379101729466693, + "grad_norm": 4.103876123324951, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 351253596, + "step": 2045 + }, + { + "epoch": 0.5379101729466693, + "loss": 0.11436055600643158, + "loss_ce": 0.0011403337121009827, + "loss_iou": 0.4296875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 351253596, + "step": 2045 + }, + { + "epoch": 0.5381732097060564, + "grad_norm": 5.468839567129726, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 351425656, + "step": 2046 + }, + { + "epoch": 0.5381732097060564, + "loss": 0.0928923487663269, + "loss_ce": 0.0006071930401958525, + "loss_iou": 0.5625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 351425656, + "step": 2046 + }, + { + "epoch": 0.5384362464654435, + "grad_norm": 14.247562489878419, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 351597816, + "step": 2047 + }, + { + "epoch": 0.5384362464654435, + "loss": 0.2033635675907135, + "loss_ce": 0.0019475510343909264, + "loss_iou": 0.392578125, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 351597816, + "step": 2047 + }, + { + "epoch": 0.5386992832248306, + "grad_norm": 4.3738903937509885, + "learning_rate": 5e-06, + "loss": 0.1305, + "num_input_tokens_seen": 351769980, + "step": 2048 + }, + { + "epoch": 0.5386992832248306, + "loss": 0.059591565281152725, + "loss_ce": 8.22915681055747e-05, + "loss_iou": 0.578125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 351769980, + "step": 2048 + }, + { + "epoch": 0.5389623199842178, + "grad_norm": 4.415343795853702, + "learning_rate": 5e-06, + "loss": 0.1496, + "num_input_tokens_seen": 351939320, + "step": 2049 + }, + { + "epoch": 0.5389623199842178, + "loss": 0.24034851789474487, + "loss_ce": 0.0008465623832307756, + "loss_iou": 0.703125, + "loss_num": 0.0478515625, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 351939320, + "step": 2049 + }, + { + "epoch": 0.5392253567436049, + "grad_norm": 16.797461240634142, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 352111376, + "step": 2050 + }, + { + "epoch": 0.5392253567436049, + "loss": 0.14554640650749207, + "loss_ce": 0.0012287711724638939, + "loss_iou": 0.45703125, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 352111376, + "step": 2050 + }, + { + "epoch": 0.539488393502992, + "grad_norm": 7.010827810107144, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 352283872, + "step": 2051 + }, + { + "epoch": 0.539488393502992, + "loss": 0.1082100197672844, + "loss_ce": 0.0016121190274134278, + "loss_iou": 0.546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 352283872, + "step": 2051 + }, + { + "epoch": 0.5397514302623792, + "grad_norm": 5.086953515153334, + "learning_rate": 5e-06, + "loss": 0.0939, + "num_input_tokens_seen": 352456124, + "step": 2052 + }, + { + "epoch": 0.5397514302623792, + "loss": 0.05017915368080139, + "loss_ce": 0.0004965342814102769, + "loss_iou": 0.53125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 352456124, + "step": 2052 + }, + { + "epoch": 0.5400144670217663, + "grad_norm": 9.025520856951152, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 352626592, + "step": 2053 + }, + { + "epoch": 0.5400144670217663, + "loss": 0.16316679120063782, + "loss_ce": 0.0005996549734845757, + "loss_iou": 0.3828125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 352626592, + "step": 2053 + }, + { + "epoch": 0.5402775037811535, + "grad_norm": 6.628974819075429, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 352798532, + "step": 2054 + }, + { + "epoch": 0.5402775037811535, + "loss": 0.10173699259757996, + "loss_ce": 0.002814263803884387, + "loss_iou": 0.61328125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 352798532, + "step": 2054 + }, + { + "epoch": 0.5405405405405406, + "grad_norm": 4.016489890807633, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 352970876, + "step": 2055 + }, + { + "epoch": 0.5405405405405406, + "loss": 0.12183534353971481, + "loss_ce": 0.00010072031000163406, + "loss_iou": 0.578125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 352970876, + "step": 2055 + }, + { + "epoch": 0.5408035772999277, + "grad_norm": 10.022531221280579, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 353143148, + "step": 2056 + }, + { + "epoch": 0.5408035772999277, + "loss": 0.11547866463661194, + "loss_ce": 0.0016328342026099563, + "loss_iou": 0.45703125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 353143148, + "step": 2056 + }, + { + "epoch": 0.5410666140593148, + "grad_norm": 4.0813858316372205, + "learning_rate": 5e-06, + "loss": 0.1298, + "num_input_tokens_seen": 353315092, + "step": 2057 + }, + { + "epoch": 0.5410666140593148, + "loss": 0.2096938192844391, + "loss_ce": 0.005348118022084236, + "loss_iou": 0.57421875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 353315092, + "step": 2057 + }, + { + "epoch": 0.5413296508187019, + "grad_norm": 6.060936634204627, + "learning_rate": 5e-06, + "loss": 0.1086, + "num_input_tokens_seen": 353487316, + "step": 2058 + }, + { + "epoch": 0.5413296508187019, + "loss": 0.0933343917131424, + "loss_ce": 0.0019037279998883605, + "loss_iou": 0.6171875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 353487316, + "step": 2058 + }, + { + "epoch": 0.541592687578089, + "grad_norm": 5.183981355840974, + "learning_rate": 5e-06, + "loss": 0.1516, + "num_input_tokens_seen": 353659672, + "step": 2059 + }, + { + "epoch": 0.541592687578089, + "loss": 0.1340794712305069, + "loss_ce": 0.001663701143115759, + "loss_iou": 0.6328125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 353659672, + "step": 2059 + }, + { + "epoch": 0.5418557243374762, + "grad_norm": 4.266501842679085, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 353829984, + "step": 2060 + }, + { + "epoch": 0.5418557243374762, + "loss": 0.16655325889587402, + "loss_ce": 0.0011174663668498397, + "loss_iou": 0.4609375, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 353829984, + "step": 2060 + }, + { + "epoch": 0.5421187610968633, + "grad_norm": 5.044214417820774, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 354002080, + "step": 2061 + }, + { + "epoch": 0.5421187610968633, + "loss": 0.11388491839170456, + "loss_ce": 0.0009240994695574045, + "loss_iou": 0.412109375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 354002080, + "step": 2061 + }, + { + "epoch": 0.5423817978562504, + "grad_norm": 5.933256816179162, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 354174264, + "step": 2062 + }, + { + "epoch": 0.5423817978562504, + "loss": 0.07782945036888123, + "loss_ce": 0.00034532046993263066, + "loss_iou": 0.515625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 354174264, + "step": 2062 + }, + { + "epoch": 0.5426448346156375, + "grad_norm": 6.3010547634359915, + "learning_rate": 5e-06, + "loss": 0.1807, + "num_input_tokens_seen": 354346228, + "step": 2063 + }, + { + "epoch": 0.5426448346156375, + "loss": 0.2516539692878723, + "loss_ce": 0.0021422426216304302, + "loss_iou": 0.57421875, + "loss_num": 0.0498046875, + "loss_xval": 0.25, + "num_input_tokens_seen": 354346228, + "step": 2063 + }, + { + "epoch": 0.5429078713750246, + "grad_norm": 6.370139521144386, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 354516236, + "step": 2064 + }, + { + "epoch": 0.5429078713750246, + "loss": 0.1860380321741104, + "loss_ce": 0.0007352972170338035, + "loss_iou": 0.310546875, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 354516236, + "step": 2064 + }, + { + "epoch": 0.5431709081344118, + "grad_norm": 5.277591681669997, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 354688188, + "step": 2065 + }, + { + "epoch": 0.5431709081344118, + "loss": 0.14346206188201904, + "loss_ce": 0.0007008376996964216, + "loss_iou": 0.462890625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 354688188, + "step": 2065 + }, + { + "epoch": 0.5434339448937989, + "grad_norm": 4.479693164703334, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 354860592, + "step": 2066 + }, + { + "epoch": 0.5434339448937989, + "loss": 0.11085185408592224, + "loss_ce": 7.304361497517675e-05, + "loss_iou": 0.44921875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 354860592, + "step": 2066 + }, + { + "epoch": 0.543696981653186, + "grad_norm": 4.919682261722731, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 355033176, + "step": 2067 + }, + { + "epoch": 0.543696981653186, + "loss": 0.16546472907066345, + "loss_ce": 0.001188602764159441, + "loss_iou": 0.5078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 355033176, + "step": 2067 + }, + { + "epoch": 0.5439600184125731, + "grad_norm": 13.936900281461735, + "learning_rate": 5e-06, + "loss": 0.1039, + "num_input_tokens_seen": 355205536, + "step": 2068 + }, + { + "epoch": 0.5439600184125731, + "loss": 0.08491555601358414, + "loss_ce": 1.5654470189474523e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 355205536, + "step": 2068 + }, + { + "epoch": 0.5442230551719602, + "grad_norm": 7.496864557637642, + "learning_rate": 5e-06, + "loss": 0.1187, + "num_input_tokens_seen": 355375248, + "step": 2069 + }, + { + "epoch": 0.5442230551719602, + "loss": 0.11474957317113876, + "loss_ce": 0.005954409018158913, + "loss_iou": 0.671875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 355375248, + "step": 2069 + }, + { + "epoch": 0.5444860919313474, + "grad_norm": 7.5192560384682325, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 355545380, + "step": 2070 + }, + { + "epoch": 0.5444860919313474, + "loss": 0.156136155128479, + "loss_ce": 0.0023580677807331085, + "loss_iou": 0.41015625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 355545380, + "step": 2070 + }, + { + "epoch": 0.5447491286907346, + "grad_norm": 5.871220250123853, + "learning_rate": 5e-06, + "loss": 0.1422, + "num_input_tokens_seen": 355717808, + "step": 2071 + }, + { + "epoch": 0.5447491286907346, + "loss": 0.1303853988647461, + "loss_ce": 0.0030660659540444613, + "loss_iou": 0.443359375, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 355717808, + "step": 2071 + }, + { + "epoch": 0.5450121654501217, + "grad_norm": 5.18139773459563, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 355889920, + "step": 2072 + }, + { + "epoch": 0.5450121654501217, + "loss": 0.08777904510498047, + "loss_ce": 0.0009260187507607043, + "loss_iou": 0.51953125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 355889920, + "step": 2072 + }, + { + "epoch": 0.5452752022095088, + "grad_norm": 5.534523394507571, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 356062284, + "step": 2073 + }, + { + "epoch": 0.5452752022095088, + "loss": 0.07954730838537216, + "loss_ce": 0.0016664512222632766, + "loss_iou": 0.515625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 356062284, + "step": 2073 + }, + { + "epoch": 0.5455382389688959, + "grad_norm": 5.798318301378571, + "learning_rate": 5e-06, + "loss": 0.13, + "num_input_tokens_seen": 356234572, + "step": 2074 + }, + { + "epoch": 0.5455382389688959, + "loss": 0.11368724703788757, + "loss_ce": 0.0001923761737998575, + "loss_iou": 0.43359375, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 356234572, + "step": 2074 + }, + { + "epoch": 0.5458012757282831, + "grad_norm": 7.153408449848725, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 356406460, + "step": 2075 + }, + { + "epoch": 0.5458012757282831, + "loss": 0.12134350836277008, + "loss_ce": 0.0004023421206511557, + "loss_iou": 0.55078125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 356406460, + "step": 2075 + }, + { + "epoch": 0.5460643124876702, + "grad_norm": 5.616257612175736, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 356578828, + "step": 2076 + }, + { + "epoch": 0.5460643124876702, + "loss": 0.15952152013778687, + "loss_ce": 0.007452425081282854, + "loss_iou": 0.44140625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 356578828, + "step": 2076 + }, + { + "epoch": 0.5463273492470573, + "grad_norm": 4.268860828505962, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 356750940, + "step": 2077 + }, + { + "epoch": 0.5463273492470573, + "loss": 0.12928339838981628, + "loss_ce": 0.0006517980364151299, + "loss_iou": 0.53125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 356750940, + "step": 2077 + }, + { + "epoch": 0.5465903860064444, + "grad_norm": 5.706661955848177, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 356923100, + "step": 2078 + }, + { + "epoch": 0.5465903860064444, + "loss": 0.07689663022756577, + "loss_ce": 0.002494774293154478, + "loss_iou": 0.51171875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 356923100, + "step": 2078 + }, + { + "epoch": 0.5468534227658315, + "grad_norm": 6.622312056596725, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 357095216, + "step": 2079 + }, + { + "epoch": 0.5468534227658315, + "loss": 0.2388056218624115, + "loss_ce": 0.0006769794854335487, + "loss_iou": 0.353515625, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 357095216, + "step": 2079 + }, + { + "epoch": 0.5471164595252187, + "grad_norm": 5.101694254730695, + "learning_rate": 5e-06, + "loss": 0.131, + "num_input_tokens_seen": 357265716, + "step": 2080 + }, + { + "epoch": 0.5471164595252187, + "loss": 0.14891289174556732, + "loss_ce": 0.0022454019635915756, + "loss_iou": 0.482421875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 357265716, + "step": 2080 + }, + { + "epoch": 0.5473794962846058, + "grad_norm": 7.015564745743221, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 357437784, + "step": 2081 + }, + { + "epoch": 0.5473794962846058, + "loss": 0.09505030512809753, + "loss_ce": 0.003283948404714465, + "loss_iou": 0.462890625, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 357437784, + "step": 2081 + }, + { + "epoch": 0.5476425330439929, + "grad_norm": 6.361890201898857, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 357609848, + "step": 2082 + }, + { + "epoch": 0.5476425330439929, + "loss": 0.11097072064876556, + "loss_ce": 0.0005886423168703914, + "loss_iou": 0.5859375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 357609848, + "step": 2082 + }, + { + "epoch": 0.54790556980338, + "grad_norm": 11.534449289054661, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 357782120, + "step": 2083 + }, + { + "epoch": 0.54790556980338, + "loss": 0.20264874398708344, + "loss_ce": 0.0027280959766358137, + "loss_iou": 0.322265625, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 357782120, + "step": 2083 + }, + { + "epoch": 0.5481686065627671, + "grad_norm": 5.295463538809882, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 357954088, + "step": 2084 + }, + { + "epoch": 0.5481686065627671, + "loss": 0.13939827680587769, + "loss_ce": 0.0004822692717425525, + "loss_iou": 0.52734375, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 357954088, + "step": 2084 + }, + { + "epoch": 0.5484316433221542, + "grad_norm": 5.108081687062298, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 358123060, + "step": 2085 + }, + { + "epoch": 0.5484316433221542, + "loss": 0.11673710495233536, + "loss_ce": 0.002631876850500703, + "loss_iou": 0.53125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 358123060, + "step": 2085 + }, + { + "epoch": 0.5486946800815414, + "grad_norm": 8.727300119230549, + "learning_rate": 5e-06, + "loss": 0.1803, + "num_input_tokens_seen": 358295276, + "step": 2086 + }, + { + "epoch": 0.5486946800815414, + "loss": 0.21835477650165558, + "loss_ce": 0.0030837799422442913, + "loss_iou": 0.51171875, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 358295276, + "step": 2086 + }, + { + "epoch": 0.5489577168409285, + "grad_norm": 11.066658379743997, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 358467252, + "step": 2087 + }, + { + "epoch": 0.5489577168409285, + "loss": 0.10189958661794662, + "loss_ce": 0.00464006420224905, + "loss_iou": 0.5390625, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 358467252, + "step": 2087 + }, + { + "epoch": 0.5492207536003156, + "grad_norm": 6.149852742286746, + "learning_rate": 5e-06, + "loss": 0.1068, + "num_input_tokens_seen": 358639392, + "step": 2088 + }, + { + "epoch": 0.5492207536003156, + "loss": 0.11351503431797028, + "loss_ce": 0.0006915500853210688, + "loss_iou": 0.4375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 358639392, + "step": 2088 + }, + { + "epoch": 0.5494837903597027, + "grad_norm": 4.11715937749224, + "learning_rate": 5e-06, + "loss": 0.0832, + "num_input_tokens_seen": 358811532, + "step": 2089 + }, + { + "epoch": 0.5494837903597027, + "loss": 0.06601230055093765, + "loss_ce": 0.0033291929867118597, + "loss_iou": 0.6640625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 358811532, + "step": 2089 + }, + { + "epoch": 0.5497468271190898, + "grad_norm": 5.926803480110229, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 358982120, + "step": 2090 + }, + { + "epoch": 0.5497468271190898, + "loss": 0.165444478392601, + "loss_ce": 0.0021449108608067036, + "loss_iou": 0.40234375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 358982120, + "step": 2090 + }, + { + "epoch": 0.5500098638784771, + "grad_norm": 3.8279286211407215, + "learning_rate": 5e-06, + "loss": 0.1365, + "num_input_tokens_seen": 359154336, + "step": 2091 + }, + { + "epoch": 0.5500098638784771, + "loss": 0.1499071568250656, + "loss_ce": 0.0012255202746018767, + "loss_iou": 0.46484375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 359154336, + "step": 2091 + }, + { + "epoch": 0.5502729006378642, + "grad_norm": 11.488893840776536, + "learning_rate": 5e-06, + "loss": 0.1522, + "num_input_tokens_seen": 359326324, + "step": 2092 + }, + { + "epoch": 0.5502729006378642, + "loss": 0.169376939535141, + "loss_ce": 0.0019880137406289577, + "loss_iou": 0.50390625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 359326324, + "step": 2092 + }, + { + "epoch": 0.5505359373972513, + "grad_norm": 7.053437919021253, + "learning_rate": 5e-06, + "loss": 0.1738, + "num_input_tokens_seen": 359498532, + "step": 2093 + }, + { + "epoch": 0.5505359373972513, + "loss": 0.17925216257572174, + "loss_ce": 0.003562463214620948, + "loss_iou": 0.57421875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 359498532, + "step": 2093 + }, + { + "epoch": 0.5507989741566384, + "grad_norm": 4.495803041591062, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 359668724, + "step": 2094 + }, + { + "epoch": 0.5507989741566384, + "loss": 0.16115409135818481, + "loss_ce": 0.001760771730914712, + "loss_iou": 0.5546875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 359668724, + "step": 2094 + }, + { + "epoch": 0.5510620109160255, + "grad_norm": 4.2291818189369135, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 359841036, + "step": 2095 + }, + { + "epoch": 0.5510620109160255, + "loss": 0.04658431187272072, + "loss_ce": 0.000533288111910224, + "loss_iou": NaN, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 359841036, + "step": 2095 + }, + { + "epoch": 0.5513250476754127, + "grad_norm": 14.952283770001413, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 360013076, + "step": 2096 + }, + { + "epoch": 0.5513250476754127, + "loss": 0.056577593088150024, + "loss_ce": 0.00405684020370245, + "loss_iou": 0.55078125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 360013076, + "step": 2096 + }, + { + "epoch": 0.5515880844347998, + "grad_norm": 6.919816354273125, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 360181792, + "step": 2097 + }, + { + "epoch": 0.5515880844347998, + "loss": 0.08817453682422638, + "loss_ce": 0.0007874465081840754, + "loss_iou": 0.546875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 360181792, + "step": 2097 + }, + { + "epoch": 0.5518511211941869, + "grad_norm": 4.617511415402225, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 360353784, + "step": 2098 + }, + { + "epoch": 0.5518511211941869, + "loss": 0.10340078175067902, + "loss_ce": 0.0006938728038221598, + "loss_iou": 0.5234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 360353784, + "step": 2098 + }, + { + "epoch": 0.552114157953574, + "grad_norm": 21.83477585251536, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 360526236, + "step": 2099 + }, + { + "epoch": 0.552114157953574, + "loss": 0.09873877465724945, + "loss_ce": 0.005629643332213163, + "loss_iou": 0.6171875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 360526236, + "step": 2099 + }, + { + "epoch": 0.5523771947129611, + "grad_norm": 22.010760760138638, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 360698124, + "step": 2100 + }, + { + "epoch": 0.5523771947129611, + "loss": 0.1067737340927124, + "loss_ce": 0.0001147918519563973, + "loss_iou": 0.44921875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 360698124, + "step": 2100 + }, + { + "epoch": 0.5526402314723483, + "grad_norm": 8.309454312868692, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 360867340, + "step": 2101 + }, + { + "epoch": 0.5526402314723483, + "loss": 0.1460711508989334, + "loss_ce": 0.002302848733961582, + "loss_iou": 0.6171875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 360867340, + "step": 2101 + }, + { + "epoch": 0.5529032682317354, + "grad_norm": 15.176993944828276, + "learning_rate": 5e-06, + "loss": 0.1412, + "num_input_tokens_seen": 361039524, + "step": 2102 + }, + { + "epoch": 0.5529032682317354, + "loss": 0.18546344339847565, + "loss_ce": 0.003029361367225647, + "loss_iou": 0.546875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 361039524, + "step": 2102 + }, + { + "epoch": 0.5531663049911225, + "grad_norm": 7.240375850988684, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 361212152, + "step": 2103 + }, + { + "epoch": 0.5531663049911225, + "loss": 0.1453348845243454, + "loss_ce": 0.0008341491920873523, + "loss_iou": 0.65625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 361212152, + "step": 2103 + }, + { + "epoch": 0.5534293417505096, + "grad_norm": 4.504467098918756, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 361384636, + "step": 2104 + }, + { + "epoch": 0.5534293417505096, + "loss": 0.14879915118217468, + "loss_ce": 0.0031692716293036938, + "loss_iou": 0.54296875, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 361384636, + "step": 2104 + }, + { + "epoch": 0.5536923785098967, + "grad_norm": 14.969295535193908, + "learning_rate": 5e-06, + "loss": 0.1179, + "num_input_tokens_seen": 361556736, + "step": 2105 + }, + { + "epoch": 0.5536923785098967, + "loss": 0.14452342689037323, + "loss_ce": 0.0027692681178450584, + "loss_iou": 0.46875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 361556736, + "step": 2105 + }, + { + "epoch": 0.5539554152692839, + "grad_norm": 4.430127573397192, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 361726936, + "step": 2106 + }, + { + "epoch": 0.5539554152692839, + "loss": 0.22009092569351196, + "loss_ce": 0.0009747114963829517, + "loss_iou": 0.431640625, + "loss_num": 0.043701171875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 361726936, + "step": 2106 + }, + { + "epoch": 0.554218452028671, + "grad_norm": 4.147747665163274, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 361899184, + "step": 2107 + }, + { + "epoch": 0.554218452028671, + "loss": 0.13676050305366516, + "loss_ce": 0.0002858861698769033, + "loss_iou": 0.703125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 361899184, + "step": 2107 + }, + { + "epoch": 0.5544814887880581, + "grad_norm": 3.7276098432513174, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 362071768, + "step": 2108 + }, + { + "epoch": 0.5544814887880581, + "loss": 0.15069580078125, + "loss_ce": 0.0011291508562862873, + "loss_iou": 0.38671875, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 362071768, + "step": 2108 + }, + { + "epoch": 0.5547445255474452, + "grad_norm": 5.320538695832165, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 362240216, + "step": 2109 + }, + { + "epoch": 0.5547445255474452, + "loss": 0.09280645847320557, + "loss_ce": 0.001528381835669279, + "loss_iou": 0.52734375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 362240216, + "step": 2109 + }, + { + "epoch": 0.5550075623068323, + "grad_norm": 11.977017579320059, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 362412692, + "step": 2110 + }, + { + "epoch": 0.5550075623068323, + "loss": 0.09514741599559784, + "loss_ce": 0.005608838051557541, + "loss_iou": 0.498046875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 362412692, + "step": 2110 + }, + { + "epoch": 0.5552705990662195, + "grad_norm": 5.133455963412773, + "learning_rate": 5e-06, + "loss": 0.1081, + "num_input_tokens_seen": 362583480, + "step": 2111 + }, + { + "epoch": 0.5552705990662195, + "loss": 0.20054732263088226, + "loss_ce": 0.0018321146490052342, + "loss_iou": NaN, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 362583480, + "step": 2111 + }, + { + "epoch": 0.5555336358256067, + "grad_norm": 13.046079025621871, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 362755416, + "step": 2112 + }, + { + "epoch": 0.5555336358256067, + "loss": 0.09682411700487137, + "loss_ce": 0.005820699501782656, + "loss_iou": 0.5234375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 362755416, + "step": 2112 + }, + { + "epoch": 0.5557966725849938, + "grad_norm": 5.170449556654708, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 362927780, + "step": 2113 + }, + { + "epoch": 0.5557966725849938, + "loss": 0.1499776542186737, + "loss_ce": 0.0003499832237139344, + "loss_iou": 0.5234375, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 362927780, + "step": 2113 + }, + { + "epoch": 0.5560597093443809, + "grad_norm": 4.492587389090409, + "learning_rate": 5e-06, + "loss": 0.1634, + "num_input_tokens_seen": 363100176, + "step": 2114 + }, + { + "epoch": 0.5560597093443809, + "loss": 0.12822586297988892, + "loss_ce": 0.001638943562284112, + "loss_iou": 0.435546875, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 363100176, + "step": 2114 + }, + { + "epoch": 0.556322746103768, + "grad_norm": 6.53280654225724, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 363272032, + "step": 2115 + }, + { + "epoch": 0.556322746103768, + "loss": 0.09251531958580017, + "loss_ce": 0.0013898293254896998, + "loss_iou": 0.59375, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 363272032, + "step": 2115 + }, + { + "epoch": 0.5565857828631551, + "grad_norm": 6.772073322495271, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 363444356, + "step": 2116 + }, + { + "epoch": 0.5565857828631551, + "loss": 0.22190499305725098, + "loss_ce": 0.00468086265027523, + "loss_iou": 0.486328125, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 363444356, + "step": 2116 + }, + { + "epoch": 0.5568488196225423, + "grad_norm": 4.259888500977366, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 363616480, + "step": 2117 + }, + { + "epoch": 0.5568488196225423, + "loss": 0.11629009246826172, + "loss_ce": 0.0017881433013826609, + "loss_iou": 0.59765625, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 363616480, + "step": 2117 + }, + { + "epoch": 0.5571118563819294, + "grad_norm": 5.727482587569898, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 363788792, + "step": 2118 + }, + { + "epoch": 0.5571118563819294, + "loss": 0.14177094399929047, + "loss_ce": 0.0018478452693670988, + "loss_iou": 0.357421875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 363788792, + "step": 2118 + }, + { + "epoch": 0.5573748931413165, + "grad_norm": 10.912411227157135, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 363960764, + "step": 2119 + }, + { + "epoch": 0.5573748931413165, + "loss": 0.1305292397737503, + "loss_ce": 0.0011652277316898108, + "loss_iou": 0.46484375, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 363960764, + "step": 2119 + }, + { + "epoch": 0.5576379299007036, + "grad_norm": 3.94400025085083, + "learning_rate": 5e-06, + "loss": 0.1587, + "num_input_tokens_seen": 364133156, + "step": 2120 + }, + { + "epoch": 0.5576379299007036, + "loss": 0.19977153837680817, + "loss_ce": 0.008945131674408913, + "loss_iou": 0.40625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 364133156, + "step": 2120 + }, + { + "epoch": 0.5579009666600907, + "grad_norm": 3.6301701938092217, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 364305580, + "step": 2121 + }, + { + "epoch": 0.5579009666600907, + "loss": 0.1176564022898674, + "loss_ce": 0.0034901422914117575, + "loss_iou": 0.55859375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 364305580, + "step": 2121 + }, + { + "epoch": 0.5581640034194779, + "grad_norm": 19.838531274898244, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 364478040, + "step": 2122 + }, + { + "epoch": 0.5581640034194779, + "loss": 0.06633633375167847, + "loss_ce": 0.0024020099081099033, + "loss_iou": 0.53125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 364478040, + "step": 2122 + }, + { + "epoch": 0.558427040178865, + "grad_norm": 9.488127206942975, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 364647728, + "step": 2123 + }, + { + "epoch": 0.558427040178865, + "loss": 0.07145293056964874, + "loss_ce": 0.00010283520532539114, + "loss_iou": 0.5, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 364647728, + "step": 2123 + }, + { + "epoch": 0.5586900769382521, + "grad_norm": 4.8177202903523115, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 364816920, + "step": 2124 + }, + { + "epoch": 0.5586900769382521, + "loss": 0.13919737935066223, + "loss_ce": 0.002829591976478696, + "loss_iou": 0.494140625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 364816920, + "step": 2124 + }, + { + "epoch": 0.5589531136976392, + "grad_norm": 3.304918735754454, + "learning_rate": 5e-06, + "loss": 0.0913, + "num_input_tokens_seen": 364989156, + "step": 2125 + }, + { + "epoch": 0.5589531136976392, + "loss": 0.062035560607910156, + "loss_ce": 0.002953530289232731, + "loss_iou": 0.412109375, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 364989156, + "step": 2125 + }, + { + "epoch": 0.5592161504570263, + "grad_norm": 2.856854904917954, + "learning_rate": 5e-06, + "loss": 0.0901, + "num_input_tokens_seen": 365161052, + "step": 2126 + }, + { + "epoch": 0.5592161504570263, + "loss": 0.11198246479034424, + "loss_ce": 0.007825973443686962, + "loss_iou": 0.33203125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 365161052, + "step": 2126 + }, + { + "epoch": 0.5594791872164135, + "grad_norm": 3.111770745456122, + "learning_rate": 5e-06, + "loss": 0.196, + "num_input_tokens_seen": 365333284, + "step": 2127 + }, + { + "epoch": 0.5594791872164135, + "loss": 0.0993409976363182, + "loss_ce": 0.00046404742170125246, + "loss_iou": 0.61328125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 365333284, + "step": 2127 + }, + { + "epoch": 0.5597422239758006, + "grad_norm": 4.17183040056909, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 365505612, + "step": 2128 + }, + { + "epoch": 0.5597422239758006, + "loss": 0.08107022941112518, + "loss_ce": 0.000320716411806643, + "loss_iou": 0.62890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 365505612, + "step": 2128 + }, + { + "epoch": 0.5600052607351877, + "grad_norm": 5.3777331039695175, + "learning_rate": 5e-06, + "loss": 0.1018, + "num_input_tokens_seen": 365677880, + "step": 2129 + }, + { + "epoch": 0.5600052607351877, + "loss": 0.10484224557876587, + "loss_ce": 0.0005636783316731453, + "loss_iou": 0.5625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 365677880, + "step": 2129 + }, + { + "epoch": 0.5602682974945749, + "grad_norm": 13.479247744568044, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 365849928, + "step": 2130 + }, + { + "epoch": 0.5602682974945749, + "loss": 0.1259067952632904, + "loss_ce": 0.0024021633435040712, + "loss_iou": 0.484375, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 365849928, + "step": 2130 + }, + { + "epoch": 0.560531334253962, + "grad_norm": 20.004883443678533, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 366021916, + "step": 2131 + }, + { + "epoch": 0.560531334253962, + "loss": 0.11340343207120895, + "loss_ce": 0.0057374173775315285, + "loss_iou": 0.515625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 366021916, + "step": 2131 + }, + { + "epoch": 0.5607943710133492, + "grad_norm": 11.042521647809682, + "learning_rate": 5e-06, + "loss": 0.1328, + "num_input_tokens_seen": 366194008, + "step": 2132 + }, + { + "epoch": 0.5607943710133492, + "loss": 0.09386501461267471, + "loss_ce": 0.00017604799359105527, + "loss_iou": 0.640625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 366194008, + "step": 2132 + }, + { + "epoch": 0.5610574077727363, + "grad_norm": 5.842914268629563, + "learning_rate": 5e-06, + "loss": 0.1394, + "num_input_tokens_seen": 366366212, + "step": 2133 + }, + { + "epoch": 0.5610574077727363, + "loss": 0.11772558093070984, + "loss_ce": 0.0009958385489881039, + "loss_iou": 0.5390625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 366366212, + "step": 2133 + }, + { + "epoch": 0.5613204445321234, + "grad_norm": 5.960122855306223, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 366538220, + "step": 2134 + }, + { + "epoch": 0.5613204445321234, + "loss": 0.08179056644439697, + "loss_ce": 0.003818158758804202, + "loss_iou": 0.703125, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 366538220, + "step": 2134 + }, + { + "epoch": 0.5615834812915105, + "grad_norm": 5.389091778733193, + "learning_rate": 5e-06, + "loss": 0.1578, + "num_input_tokens_seen": 366710096, + "step": 2135 + }, + { + "epoch": 0.5615834812915105, + "loss": 0.18556632101535797, + "loss_ce": 0.0002635964483488351, + "loss_iou": 0.408203125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 366710096, + "step": 2135 + }, + { + "epoch": 0.5618465180508976, + "grad_norm": 7.320192738875152, + "learning_rate": 5e-06, + "loss": 0.1107, + "num_input_tokens_seen": 366880456, + "step": 2136 + }, + { + "epoch": 0.5618465180508976, + "loss": 0.13861671090126038, + "loss_ce": 0.003179695922881365, + "loss_iou": 0.498046875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 366880456, + "step": 2136 + }, + { + "epoch": 0.5621095548102847, + "grad_norm": 14.114626386817225, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 367052752, + "step": 2137 + }, + { + "epoch": 0.5621095548102847, + "loss": 0.12559227645397186, + "loss_ce": 0.001477292738854885, + "loss_iou": 0.5234375, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 367052752, + "step": 2137 + }, + { + "epoch": 0.5623725915696719, + "grad_norm": 3.498748268755153, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 367225060, + "step": 2138 + }, + { + "epoch": 0.5623725915696719, + "loss": 0.08549857884645462, + "loss_ce": 0.000232468664762564, + "loss_iou": 0.69921875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 367225060, + "step": 2138 + }, + { + "epoch": 0.562635628329059, + "grad_norm": 20.701513936136166, + "learning_rate": 5e-06, + "loss": 0.0738, + "num_input_tokens_seen": 367397332, + "step": 2139 + }, + { + "epoch": 0.562635628329059, + "loss": 0.08525611460208893, + "loss_ce": 0.004125134088099003, + "loss_iou": 0.4921875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 367397332, + "step": 2139 + }, + { + "epoch": 0.5628986650884461, + "grad_norm": 11.983965441231234, + "learning_rate": 5e-06, + "loss": 0.0957, + "num_input_tokens_seen": 367569560, + "step": 2140 + }, + { + "epoch": 0.5628986650884461, + "loss": 0.06727585196495056, + "loss_ce": 0.00010666107118595392, + "loss_iou": 0.47265625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 367569560, + "step": 2140 + }, + { + "epoch": 0.5631617018478332, + "grad_norm": 6.398433590682968, + "learning_rate": 5e-06, + "loss": 0.1279, + "num_input_tokens_seen": 367741696, + "step": 2141 + }, + { + "epoch": 0.5631617018478332, + "loss": 0.16246706247329712, + "loss_ce": 0.0022497763857245445, + "loss_iou": 0.298828125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 367741696, + "step": 2141 + }, + { + "epoch": 0.5634247386072203, + "grad_norm": 4.385698804822032, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 367913912, + "step": 2142 + }, + { + "epoch": 0.5634247386072203, + "loss": 0.10307023674249649, + "loss_ce": 0.0009889386128634214, + "loss_iou": 0.671875, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 367913912, + "step": 2142 + }, + { + "epoch": 0.5636877753666075, + "grad_norm": 7.856910300327451, + "learning_rate": 5e-06, + "loss": 0.1906, + "num_input_tokens_seen": 368086176, + "step": 2143 + }, + { + "epoch": 0.5636877753666075, + "loss": 0.19684657454490662, + "loss_ce": 0.005714981816709042, + "loss_iou": 0.388671875, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 368086176, + "step": 2143 + }, + { + "epoch": 0.5639508121259946, + "grad_norm": 6.368187003490122, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 368258440, + "step": 2144 + }, + { + "epoch": 0.5639508121259946, + "loss": 0.1019875556230545, + "loss_ce": 0.0008217811118811369, + "loss_iou": 0.52734375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 368258440, + "step": 2144 + }, + { + "epoch": 0.5642138488853817, + "grad_norm": 7.190126438403998, + "learning_rate": 5e-06, + "loss": 0.1332, + "num_input_tokens_seen": 368430620, + "step": 2145 + }, + { + "epoch": 0.5642138488853817, + "loss": 0.1344844102859497, + "loss_ce": 0.00020707116345874965, + "loss_iou": 0.546875, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 368430620, + "step": 2145 + }, + { + "epoch": 0.5644768856447688, + "grad_norm": 6.755563239238797, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 368603024, + "step": 2146 + }, + { + "epoch": 0.5644768856447688, + "loss": 0.09109672158956528, + "loss_ce": 0.0006883963942527771, + "loss_iou": 0.51953125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 368603024, + "step": 2146 + }, + { + "epoch": 0.5647399224041559, + "grad_norm": 4.616140839020028, + "learning_rate": 5e-06, + "loss": 0.1489, + "num_input_tokens_seen": 368775624, + "step": 2147 + }, + { + "epoch": 0.5647399224041559, + "loss": 0.19518432021141052, + "loss_ce": 0.002435298403725028, + "loss_iou": 0.392578125, + "loss_num": 0.03857421875, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 368775624, + "step": 2147 + }, + { + "epoch": 0.5650029591635432, + "grad_norm": 11.734928019848066, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 368945448, + "step": 2148 + }, + { + "epoch": 0.5650029591635432, + "loss": 0.17952315509319305, + "loss_ce": 0.009143512696027756, + "loss_iou": 0.5703125, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 368945448, + "step": 2148 + }, + { + "epoch": 0.5652659959229303, + "grad_norm": 6.710624849462096, + "learning_rate": 5e-06, + "loss": 0.1518, + "num_input_tokens_seen": 369115952, + "step": 2149 + }, + { + "epoch": 0.5652659959229303, + "loss": 0.13233953714370728, + "loss_ce": 0.0019989716820418835, + "loss_iou": 0.310546875, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 369115952, + "step": 2149 + }, + { + "epoch": 0.5655290326823174, + "grad_norm": 6.669120586380705, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 369288028, + "step": 2150 + }, + { + "epoch": 0.5655290326823174, + "loss": 0.08679264783859253, + "loss_ce": 0.00695866858586669, + "loss_iou": 0.55078125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 369288028, + "step": 2150 + }, + { + "epoch": 0.5657920694417045, + "grad_norm": 4.502790600863043, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 369460112, + "step": 2151 + }, + { + "epoch": 0.5657920694417045, + "loss": 0.09304537624120712, + "loss_ce": 0.002102992497384548, + "loss_iou": 0.392578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 369460112, + "step": 2151 + }, + { + "epoch": 0.5660551062010916, + "grad_norm": 5.258977022311985, + "learning_rate": 5e-06, + "loss": 0.0818, + "num_input_tokens_seen": 369632404, + "step": 2152 + }, + { + "epoch": 0.5660551062010916, + "loss": 0.08912669122219086, + "loss_ce": 0.0003358002286404371, + "loss_iou": 0.41796875, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 369632404, + "step": 2152 + }, + { + "epoch": 0.5663181429604788, + "grad_norm": 5.721568404254781, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 369804776, + "step": 2153 + }, + { + "epoch": 0.5663181429604788, + "loss": 0.10320156812667847, + "loss_ce": 0.0007235408993437886, + "loss_iou": 0.490234375, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 369804776, + "step": 2153 + }, + { + "epoch": 0.5665811797198659, + "grad_norm": 4.65695630930855, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 369976952, + "step": 2154 + }, + { + "epoch": 0.5665811797198659, + "loss": 0.08642168343067169, + "loss_ce": 0.0003621142532210797, + "loss_iou": 0.515625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 369976952, + "step": 2154 + }, + { + "epoch": 0.566844216479253, + "grad_norm": 2.957349659939758, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 370149132, + "step": 2155 + }, + { + "epoch": 0.566844216479253, + "loss": 0.11206680536270142, + "loss_ce": 0.0026002456434071064, + "loss_iou": 0.61328125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 370149132, + "step": 2155 + }, + { + "epoch": 0.5671072532386401, + "grad_norm": 13.220367815915187, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 370321528, + "step": 2156 + }, + { + "epoch": 0.5671072532386401, + "loss": 0.13273131847381592, + "loss_ce": 0.0014446950517594814, + "loss_iou": 0.427734375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 370321528, + "step": 2156 + }, + { + "epoch": 0.5673702899980272, + "grad_norm": 6.5167958115597955, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 370493692, + "step": 2157 + }, + { + "epoch": 0.5673702899980272, + "loss": 0.0516238659620285, + "loss_ce": 0.00018648749392013997, + "loss_iou": 0.578125, + "loss_num": 0.01025390625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 370493692, + "step": 2157 + }, + { + "epoch": 0.5676333267574144, + "grad_norm": 9.055547000690966, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 370666128, + "step": 2158 + }, + { + "epoch": 0.5676333267574144, + "loss": 0.20648962259292603, + "loss_ce": 0.0006485594203695655, + "loss_iou": 0.51953125, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 370666128, + "step": 2158 + }, + { + "epoch": 0.5678963635168015, + "grad_norm": 20.191615671139235, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 370838096, + "step": 2159 + }, + { + "epoch": 0.5678963635168015, + "loss": 0.1389261931180954, + "loss_ce": 0.0004679340636357665, + "loss_iou": 0.359375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 370838096, + "step": 2159 + }, + { + "epoch": 0.5681594002761886, + "grad_norm": 16.724015569984758, + "learning_rate": 5e-06, + "loss": 0.0926, + "num_input_tokens_seen": 371010276, + "step": 2160 + }, + { + "epoch": 0.5681594002761886, + "loss": 0.09113931655883789, + "loss_ce": 0.004194741137325764, + "loss_iou": 0.53125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 371010276, + "step": 2160 + }, + { + "epoch": 0.5684224370355757, + "grad_norm": 9.65595197930286, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 371180756, + "step": 2161 + }, + { + "epoch": 0.5684224370355757, + "loss": 0.10710924118757248, + "loss_ce": 0.0031358497217297554, + "loss_iou": 0.53515625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 371180756, + "step": 2161 + }, + { + "epoch": 0.5686854737949628, + "grad_norm": 4.559805216337552, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 371351216, + "step": 2162 + }, + { + "epoch": 0.5686854737949628, + "loss": 0.18528233468532562, + "loss_ce": 0.0012918633874505758, + "loss_iou": 0.421875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 371351216, + "step": 2162 + }, + { + "epoch": 0.5689485105543499, + "grad_norm": 3.9132748752015707, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 371521504, + "step": 2163 + }, + { + "epoch": 0.5689485105543499, + "loss": 0.09377571940422058, + "loss_ce": 0.00017830087745096534, + "loss_iou": 0.54296875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 371521504, + "step": 2163 + }, + { + "epoch": 0.5692115473137371, + "grad_norm": 6.221489940852924, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 371693620, + "step": 2164 + }, + { + "epoch": 0.5692115473137371, + "loss": 0.08065352588891983, + "loss_ce": 0.000270225660642609, + "loss_iou": 0.62109375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 371693620, + "step": 2164 + }, + { + "epoch": 0.5694745840731242, + "grad_norm": 5.565730297773139, + "learning_rate": 5e-06, + "loss": 0.1544, + "num_input_tokens_seen": 371864020, + "step": 2165 + }, + { + "epoch": 0.5694745840731242, + "loss": 0.14530430734157562, + "loss_ce": 0.0009256468038074672, + "loss_iou": 0.466796875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 371864020, + "step": 2165 + }, + { + "epoch": 0.5697376208325113, + "grad_norm": 7.807853954323307, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 372036364, + "step": 2166 + }, + { + "epoch": 0.5697376208325113, + "loss": 0.1258363127708435, + "loss_ce": 0.00034803448943421245, + "loss_iou": 0.5390625, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 372036364, + "step": 2166 + }, + { + "epoch": 0.5700006575918984, + "grad_norm": 6.119338737471246, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 372208520, + "step": 2167 + }, + { + "epoch": 0.5700006575918984, + "loss": 0.183029443025589, + "loss_ce": 0.0006258888752199709, + "loss_iou": 0.4609375, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 372208520, + "step": 2167 + }, + { + "epoch": 0.5702636943512855, + "grad_norm": 4.796742469743409, + "learning_rate": 5e-06, + "loss": 0.1689, + "num_input_tokens_seen": 372380908, + "step": 2168 + }, + { + "epoch": 0.5702636943512855, + "loss": 0.24278424680233002, + "loss_ce": 0.006517150904983282, + "loss_iou": 0.6015625, + "loss_num": 0.04736328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 372380908, + "step": 2168 + }, + { + "epoch": 0.5705267311106728, + "grad_norm": 7.9441210817884444, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 372553152, + "step": 2169 + }, + { + "epoch": 0.5705267311106728, + "loss": 0.1403554081916809, + "loss_ce": 0.0024769881274551153, + "loss_iou": 0.48046875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 372553152, + "step": 2169 + }, + { + "epoch": 0.5707897678700599, + "grad_norm": 8.945699343509036, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 372722892, + "step": 2170 + }, + { + "epoch": 0.5707897678700599, + "loss": 0.17014455795288086, + "loss_ce": 0.0013518218183889985, + "loss_iou": 0.578125, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 372722892, + "step": 2170 + }, + { + "epoch": 0.571052804629447, + "grad_norm": 7.4926547587717405, + "learning_rate": 5e-06, + "loss": 0.1685, + "num_input_tokens_seen": 372895068, + "step": 2171 + }, + { + "epoch": 0.571052804629447, + "loss": 0.2696492373943329, + "loss_ce": 0.0004536675405688584, + "loss_iou": 0.40234375, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 372895068, + "step": 2171 + }, + { + "epoch": 0.5713158413888341, + "grad_norm": 17.833109235832993, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 373067312, + "step": 2172 + }, + { + "epoch": 0.5713158413888341, + "loss": 0.09599291533231735, + "loss_ce": 0.0006254853797145188, + "loss_iou": 0.515625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 373067312, + "step": 2172 + }, + { + "epoch": 0.5715788781482212, + "grad_norm": 14.290103080283112, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 373239588, + "step": 2173 + }, + { + "epoch": 0.5715788781482212, + "loss": 0.15090827643871307, + "loss_ce": 0.002836985979229212, + "loss_iou": 0.451171875, + "loss_num": 0.029541015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 373239588, + "step": 2173 + }, + { + "epoch": 0.5718419149076084, + "grad_norm": 4.534353132879591, + "learning_rate": 5e-06, + "loss": 0.0843, + "num_input_tokens_seen": 373411840, + "step": 2174 + }, + { + "epoch": 0.5718419149076084, + "loss": 0.07687939703464508, + "loss_ce": 0.0007075219764374197, + "loss_iou": 0.578125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 373411840, + "step": 2174 + }, + { + "epoch": 0.5721049516669955, + "grad_norm": 12.524713753292392, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 373582408, + "step": 2175 + }, + { + "epoch": 0.5721049516669955, + "loss": 0.17097817361354828, + "loss_ce": 0.0006900950102135539, + "loss_iou": 0.5859375, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 373582408, + "step": 2175 + }, + { + "epoch": 0.5723679884263826, + "grad_norm": 18.760082445449868, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 373754416, + "step": 2176 + }, + { + "epoch": 0.5723679884263826, + "loss": 0.11672262102365494, + "loss_ce": 0.0011525547597557306, + "loss_iou": 0.40625, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 373754416, + "step": 2176 + }, + { + "epoch": 0.5726310251857697, + "grad_norm": 8.516129726413212, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 373922312, + "step": 2177 + }, + { + "epoch": 0.5726310251857697, + "loss": 0.19126161932945251, + "loss_ce": 0.0003436502593103796, + "loss_iou": 0.462890625, + "loss_num": 0.0380859375, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 373922312, + "step": 2177 + }, + { + "epoch": 0.5728940619451568, + "grad_norm": 6.898009634423141, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 374092720, + "step": 2178 + }, + { + "epoch": 0.5728940619451568, + "loss": 0.15485724806785583, + "loss_ce": 0.0002551883808337152, + "loss_iou": 0.6328125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 374092720, + "step": 2178 + }, + { + "epoch": 0.573157098704544, + "grad_norm": 6.043872383339532, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 374264864, + "step": 2179 + }, + { + "epoch": 0.573157098704544, + "loss": 0.12720485031604767, + "loss_ce": 0.0004958686186000705, + "loss_iou": 0.466796875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 374264864, + "step": 2179 + }, + { + "epoch": 0.5734201354639311, + "grad_norm": 5.166426865666374, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 374436928, + "step": 2180 + }, + { + "epoch": 0.5734201354639311, + "loss": 0.10286220163106918, + "loss_ce": 0.0005062465788796544, + "loss_iou": 0.6328125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 374436928, + "step": 2180 + }, + { + "epoch": 0.5736831722233182, + "grad_norm": 3.8181355710391034, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 374608984, + "step": 2181 + }, + { + "epoch": 0.5736831722233182, + "loss": 0.1574546992778778, + "loss_ce": 0.00025864943745546043, + "loss_iou": 0.59375, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 374608984, + "step": 2181 + }, + { + "epoch": 0.5739462089827053, + "grad_norm": 9.672382405473702, + "learning_rate": 5e-06, + "loss": 0.1434, + "num_input_tokens_seen": 374779732, + "step": 2182 + }, + { + "epoch": 0.5739462089827053, + "loss": 0.17890840768814087, + "loss_ce": 0.007033395115286112, + "loss_iou": 0.64453125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 374779732, + "step": 2182 + }, + { + "epoch": 0.5742092457420924, + "grad_norm": 5.19568946256922, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 374951880, + "step": 2183 + }, + { + "epoch": 0.5742092457420924, + "loss": 0.11335025727748871, + "loss_ce": 0.0024798910599201918, + "loss_iou": 0.5859375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 374951880, + "step": 2183 + }, + { + "epoch": 0.5744722825014796, + "grad_norm": 4.272972418213817, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 375120908, + "step": 2184 + }, + { + "epoch": 0.5744722825014796, + "loss": 0.16440820693969727, + "loss_ce": 0.000330454291542992, + "loss_iou": 0.310546875, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 375120908, + "step": 2184 + }, + { + "epoch": 0.5747353192608667, + "grad_norm": 4.877678921853992, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 375292940, + "step": 2185 + }, + { + "epoch": 0.5747353192608667, + "loss": 0.19540463387966156, + "loss_ce": 0.0004278157721273601, + "loss_iou": 0.443359375, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 375292940, + "step": 2185 + }, + { + "epoch": 0.5749983560202538, + "grad_norm": 9.795881894231353, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 375463220, + "step": 2186 + }, + { + "epoch": 0.5749983560202538, + "loss": 0.18603767454624176, + "loss_ce": 0.0036951417569071054, + "loss_iou": 0.48046875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 375463220, + "step": 2186 + }, + { + "epoch": 0.575261392779641, + "grad_norm": 14.3298059229615, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 375633332, + "step": 2187 + }, + { + "epoch": 0.575261392779641, + "loss": 0.11824968457221985, + "loss_ce": 0.0010011474369093776, + "loss_iou": 0.546875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 375633332, + "step": 2187 + }, + { + "epoch": 0.575524429539028, + "grad_norm": 5.9170529044976305, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 375803896, + "step": 2188 + }, + { + "epoch": 0.575524429539028, + "loss": 0.09120282530784607, + "loss_ce": 0.0025797830894589424, + "loss_iou": 0.5625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 375803896, + "step": 2188 + }, + { + "epoch": 0.5757874662984152, + "grad_norm": 4.8523353127793944, + "learning_rate": 5e-06, + "loss": 0.1571, + "num_input_tokens_seen": 375976212, + "step": 2189 + }, + { + "epoch": 0.5757874662984152, + "loss": 0.11096520721912384, + "loss_ce": 0.0007662302814424038, + "loss_iou": 0.54296875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 375976212, + "step": 2189 + }, + { + "epoch": 0.5760505030578024, + "grad_norm": 3.7437825333865065, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 376148372, + "step": 2190 + }, + { + "epoch": 0.5760505030578024, + "loss": 0.1311783641576767, + "loss_ce": 0.0013108099810779095, + "loss_iou": 0.5546875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 376148372, + "step": 2190 + }, + { + "epoch": 0.5763135398171895, + "grad_norm": 3.8863610288761765, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 376320448, + "step": 2191 + }, + { + "epoch": 0.5763135398171895, + "loss": 0.08886295557022095, + "loss_ce": 0.00014836144691798836, + "loss_iou": 0.53515625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 376320448, + "step": 2191 + }, + { + "epoch": 0.5765765765765766, + "grad_norm": 26.72560789877691, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 376492652, + "step": 2192 + }, + { + "epoch": 0.5765765765765766, + "loss": 0.09363338351249695, + "loss_ce": 0.002172202803194523, + "loss_iou": 0.458984375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 376492652, + "step": 2192 + }, + { + "epoch": 0.5768396133359637, + "grad_norm": 3.4367691751013334, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 376665088, + "step": 2193 + }, + { + "epoch": 0.5768396133359637, + "loss": 0.05514270067214966, + "loss_ce": 0.0010502950754016638, + "loss_iou": 0.4765625, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 376665088, + "step": 2193 + }, + { + "epoch": 0.5771026500953508, + "grad_norm": 9.788333451803501, + "learning_rate": 5e-06, + "loss": 0.1029, + "num_input_tokens_seen": 376837384, + "step": 2194 + }, + { + "epoch": 0.5771026500953508, + "loss": 0.09362407773733139, + "loss_ce": 0.0004081379738636315, + "loss_iou": 0.55078125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 376837384, + "step": 2194 + }, + { + "epoch": 0.577365686854738, + "grad_norm": 5.1293333388841935, + "learning_rate": 5e-06, + "loss": 0.1096, + "num_input_tokens_seen": 377006200, + "step": 2195 + }, + { + "epoch": 0.577365686854738, + "loss": 0.14207029342651367, + "loss_ce": 0.0032763422932475805, + "loss_iou": 0.54296875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 377006200, + "step": 2195 + }, + { + "epoch": 0.5776287236141251, + "grad_norm": 9.582310699127605, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 377178552, + "step": 2196 + }, + { + "epoch": 0.5776287236141251, + "loss": 0.14796333014965057, + "loss_ce": 0.0009296314674429595, + "loss_iou": 0.470703125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 377178552, + "step": 2196 + }, + { + "epoch": 0.5778917603735122, + "grad_norm": 5.51238614508857, + "learning_rate": 5e-06, + "loss": 0.1782, + "num_input_tokens_seen": 377350508, + "step": 2197 + }, + { + "epoch": 0.5778917603735122, + "loss": 0.12102293223142624, + "loss_ce": 0.011342758312821388, + "loss_iou": 0.66015625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 377350508, + "step": 2197 + }, + { + "epoch": 0.5781547971328993, + "grad_norm": 12.700643820920646, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 377522740, + "step": 2198 + }, + { + "epoch": 0.5781547971328993, + "loss": 0.13567548990249634, + "loss_ce": 0.004617748782038689, + "loss_iou": 0.53125, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 377522740, + "step": 2198 + }, + { + "epoch": 0.5784178338922864, + "grad_norm": 10.703599236021498, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 377694584, + "step": 2199 + }, + { + "epoch": 0.5784178338922864, + "loss": 0.17266914248466492, + "loss_ce": 0.0024420833215117455, + "loss_iou": 0.640625, + "loss_num": 0.0341796875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 377694584, + "step": 2199 + }, + { + "epoch": 0.5786808706516736, + "grad_norm": 3.7187673646063684, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 377866484, + "step": 2200 + }, + { + "epoch": 0.5786808706516736, + "loss": 0.04466433823108673, + "loss_ce": 0.0001697081606835127, + "loss_iou": 0.486328125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 377866484, + "step": 2200 + }, + { + "epoch": 0.5789439074110607, + "grad_norm": 6.267393061703399, + "learning_rate": 5e-06, + "loss": 0.1317, + "num_input_tokens_seen": 378038564, + "step": 2201 + }, + { + "epoch": 0.5789439074110607, + "loss": 0.18358194828033447, + "loss_ce": 0.0036503085866570473, + "loss_iou": 0.416015625, + "loss_num": 0.0361328125, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 378038564, + "step": 2201 + }, + { + "epoch": 0.5792069441704478, + "grad_norm": 8.846753851466788, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 378211032, + "step": 2202 + }, + { + "epoch": 0.5792069441704478, + "loss": 0.14636895060539246, + "loss_ce": 0.0015783084090799093, + "loss_iou": 0.3203125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 378211032, + "step": 2202 + }, + { + "epoch": 0.5794699809298349, + "grad_norm": 9.056222556282368, + "learning_rate": 5e-06, + "loss": 0.1862, + "num_input_tokens_seen": 378383136, + "step": 2203 + }, + { + "epoch": 0.5794699809298349, + "loss": 0.10566692054271698, + "loss_ce": 0.003677169792354107, + "loss_iou": 0.46484375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 378383136, + "step": 2203 + }, + { + "epoch": 0.579733017689222, + "grad_norm": 4.9933922483079725, + "learning_rate": 5e-06, + "loss": 0.1063, + "num_input_tokens_seen": 378553532, + "step": 2204 + }, + { + "epoch": 0.579733017689222, + "loss": 0.10826604068279266, + "loss_ce": 0.001820725854486227, + "loss_iou": 0.74609375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 378553532, + "step": 2204 + }, + { + "epoch": 0.5799960544486092, + "grad_norm": 15.853504685750568, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 378725804, + "step": 2205 + }, + { + "epoch": 0.5799960544486092, + "loss": 0.05737042799592018, + "loss_ce": 0.0001499689242336899, + "loss_iou": 0.451171875, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 378725804, + "step": 2205 + }, + { + "epoch": 0.5802590912079963, + "grad_norm": 10.934142417787772, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 378897924, + "step": 2206 + }, + { + "epoch": 0.5802590912079963, + "loss": 0.08355730026960373, + "loss_ce": 0.004913499113172293, + "loss_iou": 0.52734375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 378897924, + "step": 2206 + }, + { + "epoch": 0.5805221279673835, + "grad_norm": 12.227334484970761, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 379070348, + "step": 2207 + }, + { + "epoch": 0.5805221279673835, + "loss": 0.07382334768772125, + "loss_ce": 0.00047435000305995345, + "loss_iou": 0.5078125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 379070348, + "step": 2207 + }, + { + "epoch": 0.5807851647267706, + "grad_norm": 5.142567427993278, + "learning_rate": 5e-06, + "loss": 0.1598, + "num_input_tokens_seen": 379242648, + "step": 2208 + }, + { + "epoch": 0.5807851647267706, + "loss": 0.06755711138248444, + "loss_ce": 0.0022494932636618614, + "loss_iou": 0.62890625, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 379242648, + "step": 2208 + }, + { + "epoch": 0.5810482014861577, + "grad_norm": 4.5313412775127055, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 379414684, + "step": 2209 + }, + { + "epoch": 0.5810482014861577, + "loss": 0.050227776169776917, + "loss_ce": 0.0009724035626277328, + "loss_iou": 0.478515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 379414684, + "step": 2209 + }, + { + "epoch": 0.5813112382455449, + "grad_norm": 51.12006755064005, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 379582192, + "step": 2210 + }, + { + "epoch": 0.5813112382455449, + "loss": 0.11350201815366745, + "loss_ce": 0.004676333162933588, + "loss_iou": 0.546875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 379582192, + "step": 2210 + }, + { + "epoch": 0.581574275004932, + "grad_norm": 18.708708783651804, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 379754536, + "step": 2211 + }, + { + "epoch": 0.581574275004932, + "loss": 0.09766032546758652, + "loss_ce": 0.0012552967527881265, + "loss_iou": 0.44140625, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 379754536, + "step": 2211 + }, + { + "epoch": 0.5818373117643191, + "grad_norm": 5.58601503702725, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 379926528, + "step": 2212 + }, + { + "epoch": 0.5818373117643191, + "loss": 0.18606778979301453, + "loss_ce": 0.0023214598186314106, + "loss_iou": 0.50390625, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 379926528, + "step": 2212 + }, + { + "epoch": 0.5821003485237062, + "grad_norm": 3.8208876250172965, + "learning_rate": 5e-06, + "loss": 0.1334, + "num_input_tokens_seen": 380096704, + "step": 2213 + }, + { + "epoch": 0.5821003485237062, + "loss": 0.18421456217765808, + "loss_ce": 0.001994105987250805, + "loss_iou": 0.455078125, + "loss_num": 0.036376953125, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 380096704, + "step": 2213 + }, + { + "epoch": 0.5823633852830933, + "grad_norm": 4.7316491932517595, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 380269136, + "step": 2214 + }, + { + "epoch": 0.5823633852830933, + "loss": 0.23052500188350677, + "loss_ce": 0.0009107402293011546, + "loss_iou": 0.65625, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 380269136, + "step": 2214 + }, + { + "epoch": 0.5826264220424804, + "grad_norm": 5.131239937970846, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 380441472, + "step": 2215 + }, + { + "epoch": 0.5826264220424804, + "loss": 0.09817831218242645, + "loss_ce": 0.0009187856921926141, + "loss_iou": 0.4921875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 380441472, + "step": 2215 + }, + { + "epoch": 0.5828894588018676, + "grad_norm": 5.520590188036205, + "learning_rate": 5e-06, + "loss": 0.1, + "num_input_tokens_seen": 380613832, + "step": 2216 + }, + { + "epoch": 0.5828894588018676, + "loss": 0.09423954784870148, + "loss_ce": 0.0019696487579494715, + "loss_iou": 0.46875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 380613832, + "step": 2216 + }, + { + "epoch": 0.5831524955612547, + "grad_norm": 18.191266422169875, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 380785988, + "step": 2217 + }, + { + "epoch": 0.5831524955612547, + "loss": 0.135514497756958, + "loss_ce": 0.0037701106630265713, + "loss_iou": 0.54296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 380785988, + "step": 2217 + }, + { + "epoch": 0.5834155323206418, + "grad_norm": 13.995891706301725, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 380958116, + "step": 2218 + }, + { + "epoch": 0.5834155323206418, + "loss": 0.07710295170545578, + "loss_ce": 0.00120573490858078, + "loss_iou": 0.60546875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 380958116, + "step": 2218 + }, + { + "epoch": 0.5836785690800289, + "grad_norm": 3.998007795342675, + "learning_rate": 5e-06, + "loss": 0.1081, + "num_input_tokens_seen": 381130168, + "step": 2219 + }, + { + "epoch": 0.5836785690800289, + "loss": 0.17625044286251068, + "loss_ce": 0.0008964374428614974, + "loss_iou": 0.494140625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 381130168, + "step": 2219 + }, + { + "epoch": 0.583941605839416, + "grad_norm": 8.315710539211903, + "learning_rate": 5e-06, + "loss": 0.1583, + "num_input_tokens_seen": 381302476, + "step": 2220 + }, + { + "epoch": 0.583941605839416, + "loss": 0.20635367929935455, + "loss_ce": 0.002496248111128807, + "loss_iou": 0.5703125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 381302476, + "step": 2220 + }, + { + "epoch": 0.5842046425988032, + "grad_norm": 9.769478710683881, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 381474700, + "step": 2221 + }, + { + "epoch": 0.5842046425988032, + "loss": 0.1044168546795845, + "loss_ce": 0.0017252071993425488, + "loss_iou": 0.447265625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 381474700, + "step": 2221 + }, + { + "epoch": 0.5844676793581903, + "grad_norm": 5.959703884673753, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 381646792, + "step": 2222 + }, + { + "epoch": 0.5844676793581903, + "loss": 0.20204247534275055, + "loss_ce": 0.0024575116112828255, + "loss_iou": 0.5625, + "loss_num": 0.0400390625, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 381646792, + "step": 2222 + }, + { + "epoch": 0.5847307161175774, + "grad_norm": 4.639177291482811, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 381818872, + "step": 2223 + }, + { + "epoch": 0.5847307161175774, + "loss": 0.1277788281440735, + "loss_ce": 0.007478540297597647, + "loss_iou": 0.484375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 381818872, + "step": 2223 + }, + { + "epoch": 0.5849937528769645, + "grad_norm": 4.556750582512925, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 381991284, + "step": 2224 + }, + { + "epoch": 0.5849937528769645, + "loss": 0.06165578216314316, + "loss_ce": 0.003062034724280238, + "loss_iou": 0.51953125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 381991284, + "step": 2224 + }, + { + "epoch": 0.5852567896363516, + "grad_norm": 5.717259207619562, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 382163040, + "step": 2225 + }, + { + "epoch": 0.5852567896363516, + "loss": 0.08200475573539734, + "loss_ce": 0.0018045613542199135, + "loss_iou": 0.39453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 382163040, + "step": 2225 + }, + { + "epoch": 0.5855198263957389, + "grad_norm": 4.765363459193876, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 382335172, + "step": 2226 + }, + { + "epoch": 0.5855198263957389, + "loss": 0.08970290422439575, + "loss_ce": 0.001568139297887683, + "loss_iou": 0.7109375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 382335172, + "step": 2226 + }, + { + "epoch": 0.585782863155126, + "grad_norm": 5.040130370486427, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 382507360, + "step": 2227 + }, + { + "epoch": 0.585782863155126, + "loss": 0.06643694639205933, + "loss_ce": 0.0008241523755714297, + "loss_iou": 0.5390625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 382507360, + "step": 2227 + }, + { + "epoch": 0.5860458999145131, + "grad_norm": 27.12917591646992, + "learning_rate": 5e-06, + "loss": 0.1669, + "num_input_tokens_seen": 382677836, + "step": 2228 + }, + { + "epoch": 0.5860458999145131, + "loss": 0.11577419936656952, + "loss_ce": 0.0032864054664969444, + "loss_iou": 0.53515625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 382677836, + "step": 2228 + }, + { + "epoch": 0.5863089366739002, + "grad_norm": 5.353068404352155, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 382849968, + "step": 2229 + }, + { + "epoch": 0.5863089366739002, + "loss": 0.11902253329753876, + "loss_ce": 0.0011636477429419756, + "loss_iou": 0.466796875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 382849968, + "step": 2229 + }, + { + "epoch": 0.5865719734332873, + "grad_norm": 44.6262248448857, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 383022244, + "step": 2230 + }, + { + "epoch": 0.5865719734332873, + "loss": 0.08623628318309784, + "loss_ce": 0.0015957842115312815, + "loss_iou": 0.62109375, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 383022244, + "step": 2230 + }, + { + "epoch": 0.5868350101926745, + "grad_norm": 11.243592990037332, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 383190620, + "step": 2231 + }, + { + "epoch": 0.5868350101926745, + "loss": 0.11198394745588303, + "loss_ce": 0.0013882413040846586, + "loss_iou": 0.490234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 383190620, + "step": 2231 + }, + { + "epoch": 0.5870980469520616, + "grad_norm": 4.97022834965809, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 383362980, + "step": 2232 + }, + { + "epoch": 0.5870980469520616, + "loss": 0.17192208766937256, + "loss_ce": 0.0035566147416830063, + "loss_iou": 0.447265625, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 383362980, + "step": 2232 + }, + { + "epoch": 0.5873610837114487, + "grad_norm": 4.21141022600957, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 383535200, + "step": 2233 + }, + { + "epoch": 0.5873610837114487, + "loss": 0.1112288236618042, + "loss_ce": 0.00690448796376586, + "loss_iou": 0.56640625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 383535200, + "step": 2233 + }, + { + "epoch": 0.5876241204708358, + "grad_norm": 4.036382922773338, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 383705348, + "step": 2234 + }, + { + "epoch": 0.5876241204708358, + "loss": 0.05898036062717438, + "loss_ce": 0.001027482096105814, + "loss_iou": 0.59765625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 383705348, + "step": 2234 + }, + { + "epoch": 0.5878871572302229, + "grad_norm": 7.244981585553582, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 383877692, + "step": 2235 + }, + { + "epoch": 0.5878871572302229, + "loss": 0.10776747018098831, + "loss_ce": 0.0007423229981213808, + "loss_iou": 0.58984375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 383877692, + "step": 2235 + }, + { + "epoch": 0.58815019398961, + "grad_norm": 3.396542457402831, + "learning_rate": 5e-06, + "loss": 0.1261, + "num_input_tokens_seen": 384049540, + "step": 2236 + }, + { + "epoch": 0.58815019398961, + "loss": 0.1896773874759674, + "loss_ce": 0.0011398009955883026, + "loss_iou": 0.72265625, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 384049540, + "step": 2236 + }, + { + "epoch": 0.5884132307489972, + "grad_norm": 8.64901320262372, + "learning_rate": 5e-06, + "loss": 0.1399, + "num_input_tokens_seen": 384221584, + "step": 2237 + }, + { + "epoch": 0.5884132307489972, + "loss": 0.12090113013982773, + "loss_ce": 0.0009365270379930735, + "loss_iou": 0.5078125, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 384221584, + "step": 2237 + }, + { + "epoch": 0.5886762675083843, + "grad_norm": 4.107798805009285, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 384393708, + "step": 2238 + }, + { + "epoch": 0.5886762675083843, + "loss": 0.10467529296875, + "loss_ce": 0.0007019043550826609, + "loss_iou": 0.51171875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 384393708, + "step": 2238 + }, + { + "epoch": 0.5889393042677714, + "grad_norm": 5.687252203871965, + "learning_rate": 5e-06, + "loss": 0.1541, + "num_input_tokens_seen": 384566100, + "step": 2239 + }, + { + "epoch": 0.5889393042677714, + "loss": 0.2502034306526184, + "loss_ce": 0.004079162143170834, + "loss_iou": 0.5078125, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 384566100, + "step": 2239 + }, + { + "epoch": 0.5892023410271585, + "grad_norm": 5.391300577620961, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 384738000, + "step": 2240 + }, + { + "epoch": 0.5892023410271585, + "loss": 0.09789521992206573, + "loss_ce": 0.0031381379812955856, + "loss_iou": 0.63671875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 384738000, + "step": 2240 + }, + { + "epoch": 0.5894653777865456, + "grad_norm": 2.700202309873461, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 384910408, + "step": 2241 + }, + { + "epoch": 0.5894653777865456, + "loss": 0.18347935378551483, + "loss_ce": 0.00031284932629205287, + "loss_iou": 0.484375, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 384910408, + "step": 2241 + }, + { + "epoch": 0.5897284145459328, + "grad_norm": 8.687970034600534, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 385082740, + "step": 2242 + }, + { + "epoch": 0.5897284145459328, + "loss": 0.11214028298854828, + "loss_ce": 0.0005375072360038757, + "loss_iou": 0.5859375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 385082740, + "step": 2242 + }, + { + "epoch": 0.5899914513053199, + "grad_norm": 7.347975569465696, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 385254812, + "step": 2243 + }, + { + "epoch": 0.5899914513053199, + "loss": 0.10931709408760071, + "loss_ce": 0.000491409155074507, + "loss_iou": 0.546875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 385254812, + "step": 2243 + }, + { + "epoch": 0.590254488064707, + "grad_norm": 5.972916509227761, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 385427116, + "step": 2244 + }, + { + "epoch": 0.590254488064707, + "loss": 0.06860466301441193, + "loss_ce": 0.0034191168379038572, + "loss_iou": 0.61328125, + "loss_num": 0.01300048828125, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 385427116, + "step": 2244 + }, + { + "epoch": 0.5905175248240941, + "grad_norm": 5.792932058485695, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 385599308, + "step": 2245 + }, + { + "epoch": 0.5905175248240941, + "loss": 0.09140485525131226, + "loss_ce": 0.0006760837859474123, + "loss_iou": 0.578125, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 385599308, + "step": 2245 + }, + { + "epoch": 0.5907805615834812, + "grad_norm": 6.999898072921272, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 385771596, + "step": 2246 + }, + { + "epoch": 0.5907805615834812, + "loss": 0.18632760643959045, + "loss_ce": 0.0021235125605016947, + "loss_iou": 0.416015625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 385771596, + "step": 2246 + }, + { + "epoch": 0.5910435983428685, + "grad_norm": 18.38237494107746, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 385940788, + "step": 2247 + }, + { + "epoch": 0.5910435983428685, + "loss": 0.16689589619636536, + "loss_ce": 0.004466078244149685, + "loss_iou": 0.40625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 385940788, + "step": 2247 + }, + { + "epoch": 0.5913066351022556, + "grad_norm": 7.020065723323422, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 386112940, + "step": 2248 + }, + { + "epoch": 0.5913066351022556, + "loss": 0.06020001322031021, + "loss_ce": 0.0016672981437295675, + "loss_iou": 0.55859375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 386112940, + "step": 2248 + }, + { + "epoch": 0.5915696718616427, + "grad_norm": 4.523572034689062, + "learning_rate": 5e-06, + "loss": 0.1053, + "num_input_tokens_seen": 386285088, + "step": 2249 + }, + { + "epoch": 0.5915696718616427, + "loss": 0.10929292440414429, + "loss_ce": 0.002771313302218914, + "loss_iou": 0.5234375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 386285088, + "step": 2249 + }, + { + "epoch": 0.5918327086210298, + "grad_norm": 4.838564447785546, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_websight_new_CIoU": 0.8620143532752991, + "eval_websight_new_GIoU": 0.8631013035774231, + "eval_websight_new_IoU": 0.8667041063308716, + "eval_websight_new_MAE_all": 0.020488929003477097, + "eval_websight_new_MAE_h": 0.008965343236923218, + "eval_websight_new_MAE_w": 0.0330337006598711, + "eval_websight_new_MAE_x": 0.03212242014706135, + "eval_websight_new_MAE_y": 0.007834249641746283, + "eval_websight_new_NUM_probability": 0.9999847710132599, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.10749460011720657, + "eval_websight_new_loss_ce": 1.4298896530817728e-05, + "eval_websight_new_loss_iou": 0.3743896484375, + "eval_websight_new_loss_num": 0.019153594970703125, + "eval_websight_new_loss_xval": 0.095703125, + "eval_websight_new_runtime": 58.289, + "eval_websight_new_samples_per_second": 0.858, + "eval_websight_new_steps_per_second": 0.034, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_seeclick_CIoU": 0.6236494481563568, + "eval_seeclick_GIoU": 0.6230664253234863, + "eval_seeclick_IoU": 0.6453896760940552, + "eval_seeclick_MAE_all": 0.046443790197372437, + "eval_seeclick_MAE_h": 0.026547173038125038, + "eval_seeclick_MAE_w": 0.06541823036968708, + "eval_seeclick_MAE_x": 0.06924234330654144, + "eval_seeclick_MAE_y": 0.024567410349845886, + "eval_seeclick_NUM_probability": 0.9999750256538391, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.21426968276500702, + "eval_seeclick_loss_ce": 0.009121979121118784, + "eval_seeclick_loss_iou": 0.506591796875, + "eval_seeclick_loss_num": 0.039905548095703125, + "eval_seeclick_loss_xval": 0.1995849609375, + "eval_seeclick_runtime": 71.9379, + "eval_seeclick_samples_per_second": 0.598, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_icons_CIoU": 0.834777295589447, + "eval_icons_GIoU": 0.8241135478019714, + "eval_icons_IoU": 0.8441117405891418, + "eval_icons_MAE_all": 0.02446969971060753, + "eval_icons_MAE_h": 0.024498000741004944, + "eval_icons_MAE_w": 0.02540498599410057, + "eval_icons_MAE_x": 0.02417761366814375, + "eval_icons_MAE_y": 0.02379819191992283, + "eval_icons_NUM_probability": 0.9999534487724304, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.0800996944308281, + "eval_icons_loss_ce": 2.8939639378222637e-05, + "eval_icons_loss_iou": 0.520263671875, + "eval_icons_loss_num": 0.014501571655273438, + "eval_icons_loss_xval": 0.072509765625, + "eval_icons_runtime": 87.9614, + "eval_icons_samples_per_second": 0.568, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "eval_screenspot_CIoU": 0.5634338855743408, + "eval_screenspot_GIoU": 0.5513045191764832, + "eval_screenspot_IoU": 0.6039714018503824, + "eval_screenspot_MAE_all": 0.08249951650698979, + "eval_screenspot_MAE_h": 0.04705421378215154, + "eval_screenspot_MAE_w": 0.14340341091156006, + "eval_screenspot_MAE_x": 0.09280380109945933, + "eval_screenspot_MAE_y": 0.04673664582272371, + "eval_screenspot_NUM_probability": 0.99980628490448, + "eval_screenspot_inside_bbox": 0.8454166650772095, + "eval_screenspot_loss": 0.8701639175415039, + "eval_screenspot_loss_ce": 0.543925940990448, + "eval_screenspot_loss_iou": 0.45556640625, + "eval_screenspot_loss_num": 0.06413777669270833, + "eval_screenspot_loss_xval": 0.3208414713541667, + "eval_screenspot_runtime": 147.2039, + "eval_screenspot_samples_per_second": 0.605, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5918327086210298, + "loss": 0.8634153604507446, + "loss_ce": 0.5327268838882446, + "loss_iou": 0.388671875, + "loss_num": 0.06591796875, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 386457364, + "step": 2250 + }, + { + "epoch": 0.5920957453804169, + "grad_norm": 17.382874089820607, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 386629432, + "step": 2251 + }, + { + "epoch": 0.5920957453804169, + "loss": 0.06886275112628937, + "loss_ce": 0.0024259830825030804, + "loss_iou": 0.55078125, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 386629432, + "step": 2251 + }, + { + "epoch": 0.5923587821398041, + "grad_norm": 4.874671437046262, + "learning_rate": 5e-06, + "loss": 0.1568, + "num_input_tokens_seen": 386801572, + "step": 2252 + }, + { + "epoch": 0.5923587821398041, + "loss": 0.14957007765769958, + "loss_ce": 0.0020786237437278032, + "loss_iou": 0.51171875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 386801572, + "step": 2252 + }, + { + "epoch": 0.5926218188991912, + "grad_norm": 5.10088801695758, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 386973592, + "step": 2253 + }, + { + "epoch": 0.5926218188991912, + "loss": 0.14472725987434387, + "loss_ce": 0.00031808449421077967, + "loss_iou": 0.357421875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 386973592, + "step": 2253 + }, + { + "epoch": 0.5928848556585783, + "grad_norm": 7.206762089035192, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 387143876, + "step": 2254 + }, + { + "epoch": 0.5928848556585783, + "loss": 0.1427307277917862, + "loss_ce": 0.0004577827639877796, + "loss_iou": 0.490234375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 387143876, + "step": 2254 + }, + { + "epoch": 0.5931478924179654, + "grad_norm": 28.833326639866428, + "learning_rate": 5e-06, + "loss": 0.1461, + "num_input_tokens_seen": 387316360, + "step": 2255 + }, + { + "epoch": 0.5931478924179654, + "loss": 0.10336272418498993, + "loss_ce": 0.0003353758074808866, + "loss_iou": 0.578125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 387316360, + "step": 2255 + }, + { + "epoch": 0.5934109291773525, + "grad_norm": 11.869740305960864, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 387488944, + "step": 2256 + }, + { + "epoch": 0.5934109291773525, + "loss": 0.0730450302362442, + "loss_ce": 0.0006573314312845469, + "loss_iou": 0.5859375, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 387488944, + "step": 2256 + }, + { + "epoch": 0.5936739659367397, + "grad_norm": 8.676646075770014, + "learning_rate": 5e-06, + "loss": 0.1525, + "num_input_tokens_seen": 387661216, + "step": 2257 + }, + { + "epoch": 0.5936739659367397, + "loss": 0.09287041425704956, + "loss_ce": 0.00037164040259085596, + "loss_iou": 0.48046875, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 387661216, + "step": 2257 + }, + { + "epoch": 0.5939370026961268, + "grad_norm": 6.091102593612703, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 387833316, + "step": 2258 + }, + { + "epoch": 0.5939370026961268, + "loss": 0.047980114817619324, + "loss_ce": 0.002722549019381404, + "loss_iou": 0.52734375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 387833316, + "step": 2258 + }, + { + "epoch": 0.5942000394555139, + "grad_norm": 13.12094289217443, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 388005488, + "step": 2259 + }, + { + "epoch": 0.5942000394555139, + "loss": 0.1262204349040985, + "loss_ce": 0.006103249732404947, + "loss_iou": 0.4453125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 388005488, + "step": 2259 + }, + { + "epoch": 0.594463076214901, + "grad_norm": 6.977408677449423, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 388178100, + "step": 2260 + }, + { + "epoch": 0.594463076214901, + "loss": 0.09303668141365051, + "loss_ce": 0.0011177423875778913, + "loss_iou": 0.57421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 388178100, + "step": 2260 + }, + { + "epoch": 0.5947261129742881, + "grad_norm": 5.548896564721941, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 388349964, + "step": 2261 + }, + { + "epoch": 0.5947261129742881, + "loss": 0.07538396120071411, + "loss_ce": 0.0003412406367715448, + "loss_iou": 0.49609375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 388349964, + "step": 2261 + }, + { + "epoch": 0.5949891497336752, + "grad_norm": 3.9845795265650694, + "learning_rate": 5e-06, + "loss": 0.1365, + "num_input_tokens_seen": 388520668, + "step": 2262 + }, + { + "epoch": 0.5949891497336752, + "loss": 0.18895787000656128, + "loss_ce": 0.00042028201278299093, + "loss_iou": 0.4765625, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 388520668, + "step": 2262 + }, + { + "epoch": 0.5952521864930624, + "grad_norm": 5.603595618443302, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 388692900, + "step": 2263 + }, + { + "epoch": 0.5952521864930624, + "loss": 0.08959123492240906, + "loss_ce": 0.0002662862534634769, + "loss_iou": 0.427734375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 388692900, + "step": 2263 + }, + { + "epoch": 0.5955152232524495, + "grad_norm": 5.071553765162839, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 388865328, + "step": 2264 + }, + { + "epoch": 0.5955152232524495, + "loss": 0.07327542454004288, + "loss_ce": 0.0008877270738594234, + "loss_iou": 0.5390625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 388865328, + "step": 2264 + }, + { + "epoch": 0.5957782600118366, + "grad_norm": 7.1017098248073385, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 389037536, + "step": 2265 + }, + { + "epoch": 0.5957782600118366, + "loss": 0.15171518921852112, + "loss_ce": 0.0005311004933901131, + "loss_iou": 0.44921875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 389037536, + "step": 2265 + }, + { + "epoch": 0.5960412967712237, + "grad_norm": 6.446588247534734, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 389209660, + "step": 2266 + }, + { + "epoch": 0.5960412967712237, + "loss": 0.11115900427103043, + "loss_ce": 0.00028864690102636814, + "loss_iou": 0.50390625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 389209660, + "step": 2266 + }, + { + "epoch": 0.5963043335306109, + "grad_norm": 9.702541385516984, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 389381624, + "step": 2267 + }, + { + "epoch": 0.5963043335306109, + "loss": 0.13201884925365448, + "loss_ce": 0.00581339979544282, + "loss_iou": 0.48046875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 389381624, + "step": 2267 + }, + { + "epoch": 0.5965673702899981, + "grad_norm": 4.325039584368215, + "learning_rate": 5e-06, + "loss": 0.1332, + "num_input_tokens_seen": 389553348, + "step": 2268 + }, + { + "epoch": 0.5965673702899981, + "loss": 0.09481080621480942, + "loss_ce": 0.0006488211220130324, + "loss_iou": 0.6640625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 389553348, + "step": 2268 + }, + { + "epoch": 0.5968304070493852, + "grad_norm": 4.06473977313393, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 389723828, + "step": 2269 + }, + { + "epoch": 0.5968304070493852, + "loss": 0.130482017993927, + "loss_ce": 0.00023298643645830452, + "loss_iou": 0.57421875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 389723828, + "step": 2269 + }, + { + "epoch": 0.5970934438087723, + "grad_norm": 11.83991543858794, + "learning_rate": 5e-06, + "loss": 0.1278, + "num_input_tokens_seen": 389896036, + "step": 2270 + }, + { + "epoch": 0.5970934438087723, + "loss": 0.1708485186100006, + "loss_ce": 0.0017506256699562073, + "loss_iou": 0.5625, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 389896036, + "step": 2270 + }, + { + "epoch": 0.5973564805681594, + "grad_norm": 4.637610311727533, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 390068288, + "step": 2271 + }, + { + "epoch": 0.5973564805681594, + "loss": 0.12343515455722809, + "loss_ce": 0.0004340623854659498, + "loss_iou": NaN, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 390068288, + "step": 2271 + }, + { + "epoch": 0.5976195173275465, + "grad_norm": 4.239687367925931, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 390240724, + "step": 2272 + }, + { + "epoch": 0.5976195173275465, + "loss": 0.09358179569244385, + "loss_ce": 0.0032497686333954334, + "loss_iou": 0.65234375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 390240724, + "step": 2272 + }, + { + "epoch": 0.5978825540869337, + "grad_norm": 3.650780325093881, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 390413092, + "step": 2273 + }, + { + "epoch": 0.5978825540869337, + "loss": 0.0876360684633255, + "loss_ce": 0.00032527127768844366, + "loss_iou": 0.515625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 390413092, + "step": 2273 + }, + { + "epoch": 0.5981455908463208, + "grad_norm": 4.543660924331624, + "learning_rate": 5e-06, + "loss": 0.0989, + "num_input_tokens_seen": 390585320, + "step": 2274 + }, + { + "epoch": 0.5981455908463208, + "loss": 0.1153046190738678, + "loss_ce": 0.0005890398169867694, + "loss_iou": 0.3671875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 390585320, + "step": 2274 + }, + { + "epoch": 0.5984086276057079, + "grad_norm": 5.356363884549334, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 390757376, + "step": 2275 + }, + { + "epoch": 0.5984086276057079, + "loss": 0.1397184282541275, + "loss_ce": 0.0057157427072525024, + "loss_iou": 0.5, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 390757376, + "step": 2275 + }, + { + "epoch": 0.598671664365095, + "grad_norm": 3.3713877207341088, + "learning_rate": 5e-06, + "loss": 0.0875, + "num_input_tokens_seen": 390929460, + "step": 2276 + }, + { + "epoch": 0.598671664365095, + "loss": 0.0523032546043396, + "loss_ce": 0.000316562014631927, + "loss_iou": 0.458984375, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 390929460, + "step": 2276 + }, + { + "epoch": 0.5989347011244821, + "grad_norm": 10.008368190926095, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 391100108, + "step": 2277 + }, + { + "epoch": 0.5989347011244821, + "loss": 0.08954879641532898, + "loss_ce": 0.0027873138897120953, + "loss_iou": 0.55078125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 391100108, + "step": 2277 + }, + { + "epoch": 0.5991977378838693, + "grad_norm": 13.178397333981023, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 391272352, + "step": 2278 + }, + { + "epoch": 0.5991977378838693, + "loss": 0.1727224737405777, + "loss_ce": 0.00011504795111250132, + "loss_iou": 0.53125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 391272352, + "step": 2278 + }, + { + "epoch": 0.5994607746432564, + "grad_norm": 5.1375780540443134, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 391444648, + "step": 2279 + }, + { + "epoch": 0.5994607746432564, + "loss": 0.05273713544011116, + "loss_ce": 0.0022915778681635857, + "loss_iou": 0.478515625, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 391444648, + "step": 2279 + }, + { + "epoch": 0.5997238114026435, + "grad_norm": 25.459943093434337, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 391616960, + "step": 2280 + }, + { + "epoch": 0.5997238114026435, + "loss": 0.1544933021068573, + "loss_ce": 0.0004405686049722135, + "loss_iou": 0.6796875, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 391616960, + "step": 2280 + }, + { + "epoch": 0.5999868481620306, + "grad_norm": 4.8890917043699185, + "learning_rate": 5e-06, + "loss": 0.1464, + "num_input_tokens_seen": 391788924, + "step": 2281 + }, + { + "epoch": 0.5999868481620306, + "loss": 0.06912894546985626, + "loss_ce": 0.006689978763461113, + "loss_iou": 0.55859375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 391788924, + "step": 2281 + }, + { + "epoch": 0.6002498849214177, + "grad_norm": 6.219770384955685, + "learning_rate": 5e-06, + "loss": 0.1357, + "num_input_tokens_seen": 391961072, + "step": 2282 + }, + { + "epoch": 0.6002498849214177, + "loss": 0.07950527220964432, + "loss_ce": 0.00041896995389834046, + "loss_iou": 0.4140625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 391961072, + "step": 2282 + }, + { + "epoch": 0.600512921680805, + "grad_norm": 3.971504685954048, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 392130392, + "step": 2283 + }, + { + "epoch": 0.600512921680805, + "loss": 0.11902518570423126, + "loss_ce": 0.0007390595856122673, + "loss_iou": 0.51953125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 392130392, + "step": 2283 + }, + { + "epoch": 0.600775958440192, + "grad_norm": 3.36927604682008, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 392300644, + "step": 2284 + }, + { + "epoch": 0.600775958440192, + "loss": 0.09242402017116547, + "loss_ce": 0.0041824462823569775, + "loss_iou": 0.375, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 392300644, + "step": 2284 + }, + { + "epoch": 0.6010389951995792, + "grad_norm": 5.15292132366606, + "learning_rate": 5e-06, + "loss": 0.1317, + "num_input_tokens_seen": 392472968, + "step": 2285 + }, + { + "epoch": 0.6010389951995792, + "loss": 0.14593744277954102, + "loss_ce": 0.0034508705139160156, + "loss_iou": 0.484375, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 392472968, + "step": 2285 + }, + { + "epoch": 0.6013020319589663, + "grad_norm": 4.237274665038102, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 392644756, + "step": 2286 + }, + { + "epoch": 0.6013020319589663, + "loss": 0.07083064317703247, + "loss_ce": 0.00170833186712116, + "loss_iou": 0.59765625, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 392644756, + "step": 2286 + }, + { + "epoch": 0.6015650687183534, + "grad_norm": 43.77503497663098, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 392816816, + "step": 2287 + }, + { + "epoch": 0.6015650687183534, + "loss": 0.09309446811676025, + "loss_ce": 0.00016844802303239703, + "loss_iou": 0.5390625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 392816816, + "step": 2287 + }, + { + "epoch": 0.6018281054777405, + "grad_norm": 8.046683810113267, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 392987164, + "step": 2288 + }, + { + "epoch": 0.6018281054777405, + "loss": 0.12998059391975403, + "loss_ce": 0.0008912362391129136, + "loss_iou": 0.5078125, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 392987164, + "step": 2288 + }, + { + "epoch": 0.6020911422371277, + "grad_norm": 6.745453284904858, + "learning_rate": 5e-06, + "loss": 0.1509, + "num_input_tokens_seen": 393159584, + "step": 2289 + }, + { + "epoch": 0.6020911422371277, + "loss": 0.18720246851444244, + "loss_ce": 0.0019302507862448692, + "loss_iou": 0.474609375, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 393159584, + "step": 2289 + }, + { + "epoch": 0.6023541789965148, + "grad_norm": 9.388482886571397, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 393331696, + "step": 2290 + }, + { + "epoch": 0.6023541789965148, + "loss": 0.153926283121109, + "loss_ce": 0.005275162868201733, + "loss_iou": 0.56640625, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 393331696, + "step": 2290 + }, + { + "epoch": 0.6026172157559019, + "grad_norm": 5.436488955385426, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 393504044, + "step": 2291 + }, + { + "epoch": 0.6026172157559019, + "loss": 0.11098843812942505, + "loss_ce": 0.00027066541952081025, + "loss_iou": 0.5234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 393504044, + "step": 2291 + }, + { + "epoch": 0.602880252515289, + "grad_norm": 8.104274681671972, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 393676272, + "step": 2292 + }, + { + "epoch": 0.602880252515289, + "loss": 0.16863158345222473, + "loss_ce": 0.003592532593756914, + "loss_iou": 0.453125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 393676272, + "step": 2292 + }, + { + "epoch": 0.6031432892746761, + "grad_norm": 12.32428248921536, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 393848392, + "step": 2293 + }, + { + "epoch": 0.6031432892746761, + "loss": 0.08580140769481659, + "loss_ce": 0.0013287551701068878, + "loss_iou": 0.53125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 393848392, + "step": 2293 + }, + { + "epoch": 0.6034063260340633, + "grad_norm": 10.386325441280649, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 394020680, + "step": 2294 + }, + { + "epoch": 0.6034063260340633, + "loss": 0.07794995605945587, + "loss_ce": 0.003273440757766366, + "loss_iou": 0.5546875, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 394020680, + "step": 2294 + }, + { + "epoch": 0.6036693627934504, + "grad_norm": 5.0087549483878355, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 394192740, + "step": 2295 + }, + { + "epoch": 0.6036693627934504, + "loss": 0.12549570202827454, + "loss_ce": 0.0021436563692986965, + "loss_iou": 0.51953125, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 394192740, + "step": 2295 + }, + { + "epoch": 0.6039323995528375, + "grad_norm": 12.109968647066866, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 394362948, + "step": 2296 + }, + { + "epoch": 0.6039323995528375, + "loss": 0.1552933156490326, + "loss_ce": 0.0013626604340970516, + "loss_iou": 0.625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 394362948, + "step": 2296 + }, + { + "epoch": 0.6041954363122246, + "grad_norm": 3.064699260610535, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 394534856, + "step": 2297 + }, + { + "epoch": 0.6041954363122246, + "loss": 0.13219855725765228, + "loss_ce": 0.0020563420839607716, + "loss_iou": 0.45703125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 394534856, + "step": 2297 + }, + { + "epoch": 0.6044584730716117, + "grad_norm": 9.354536255922673, + "learning_rate": 5e-06, + "loss": 0.1776, + "num_input_tokens_seen": 394707060, + "step": 2298 + }, + { + "epoch": 0.6044584730716117, + "loss": 0.16126899421215057, + "loss_ce": 0.003599932650104165, + "loss_iou": 0.26171875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 394707060, + "step": 2298 + }, + { + "epoch": 0.6047215098309989, + "grad_norm": 4.517992664125833, + "learning_rate": 5e-06, + "loss": 0.0805, + "num_input_tokens_seen": 394879016, + "step": 2299 + }, + { + "epoch": 0.6047215098309989, + "loss": 0.0760730504989624, + "loss_ce": 0.002098444849252701, + "loss_iou": 0.48046875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 394879016, + "step": 2299 + }, + { + "epoch": 0.604984546590386, + "grad_norm": 4.501760214176101, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 395051188, + "step": 2300 + }, + { + "epoch": 0.604984546590386, + "loss": 0.15630951523780823, + "loss_ce": 0.0008682362968102098, + "loss_iou": 0.578125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 395051188, + "step": 2300 + }, + { + "epoch": 0.6052475833497731, + "grad_norm": 4.195253270921506, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 395223544, + "step": 2301 + }, + { + "epoch": 0.6052475833497731, + "loss": 0.08342162519693375, + "loss_ce": 0.0002612252428662032, + "loss_iou": 0.54296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 395223544, + "step": 2301 + }, + { + "epoch": 0.6055106201091602, + "grad_norm": 5.118374892921478, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 395395376, + "step": 2302 + }, + { + "epoch": 0.6055106201091602, + "loss": 0.11348429322242737, + "loss_ce": 0.0011490845354273915, + "loss_iou": 0.390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 395395376, + "step": 2302 + }, + { + "epoch": 0.6057736568685473, + "grad_norm": 4.210270169640029, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 395567140, + "step": 2303 + }, + { + "epoch": 0.6057736568685473, + "loss": 0.08656048029661179, + "loss_ce": 0.0008671237155795097, + "loss_iou": 0.50390625, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 395567140, + "step": 2303 + }, + { + "epoch": 0.6060366936279346, + "grad_norm": 4.272171126326938, + "learning_rate": 5e-06, + "loss": 0.1251, + "num_input_tokens_seen": 395739120, + "step": 2304 + }, + { + "epoch": 0.6060366936279346, + "loss": 0.14756399393081665, + "loss_ce": 0.002758089918643236, + "loss_iou": 0.6171875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 395739120, + "step": 2304 + }, + { + "epoch": 0.6062997303873217, + "grad_norm": 8.535390245901846, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 395911508, + "step": 2305 + }, + { + "epoch": 0.6062997303873217, + "loss": 0.1481376737356186, + "loss_ce": 0.002080546924844384, + "loss_iou": 0.314453125, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 395911508, + "step": 2305 + }, + { + "epoch": 0.6065627671467088, + "grad_norm": 6.5128397067856785, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 396083668, + "step": 2306 + }, + { + "epoch": 0.6065627671467088, + "loss": 0.05626985430717468, + "loss_ce": 0.0008499314426444471, + "loss_iou": 0.62109375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 396083668, + "step": 2306 + }, + { + "epoch": 0.6068258039060959, + "grad_norm": 4.785548469928152, + "learning_rate": 5e-06, + "loss": 0.0998, + "num_input_tokens_seen": 396255888, + "step": 2307 + }, + { + "epoch": 0.6068258039060959, + "loss": 0.08620062470436096, + "loss_ce": 0.002124701626598835, + "loss_iou": 0.37109375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 396255888, + "step": 2307 + }, + { + "epoch": 0.607088840665483, + "grad_norm": 11.035467862846048, + "learning_rate": 5e-06, + "loss": 0.1858, + "num_input_tokens_seen": 396427920, + "step": 2308 + }, + { + "epoch": 0.607088840665483, + "loss": 0.19853177666664124, + "loss_ce": 0.0004726996412500739, + "loss_iou": 0.46875, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 396427920, + "step": 2308 + }, + { + "epoch": 0.6073518774248702, + "grad_norm": 5.6025345838645055, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 396599820, + "step": 2309 + }, + { + "epoch": 0.6073518774248702, + "loss": 0.09216836839914322, + "loss_ce": 0.0037742014974355698, + "loss_iou": 0.4375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 396599820, + "step": 2309 + }, + { + "epoch": 0.6076149141842573, + "grad_norm": 21.234701292276423, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 396771948, + "step": 2310 + }, + { + "epoch": 0.6076149141842573, + "loss": 0.08635897189378738, + "loss_ce": 0.000482511764857918, + "loss_iou": 0.6171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 396771948, + "step": 2310 + }, + { + "epoch": 0.6078779509436444, + "grad_norm": 4.7419638393344545, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 396943768, + "step": 2311 + }, + { + "epoch": 0.6078779509436444, + "loss": 0.12838855385780334, + "loss_ce": 0.002534065628424287, + "loss_iou": 0.55078125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 396943768, + "step": 2311 + }, + { + "epoch": 0.6081409877030315, + "grad_norm": 5.73379136559562, + "learning_rate": 5e-06, + "loss": 0.1965, + "num_input_tokens_seen": 397116008, + "step": 2312 + }, + { + "epoch": 0.6081409877030315, + "loss": 0.17652729153633118, + "loss_ce": 0.0020277751609683037, + "loss_iou": NaN, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 397116008, + "step": 2312 + }, + { + "epoch": 0.6084040244624186, + "grad_norm": 4.062322434152511, + "learning_rate": 5e-06, + "loss": 0.0845, + "num_input_tokens_seen": 397288120, + "step": 2313 + }, + { + "epoch": 0.6084040244624186, + "loss": 0.09052719175815582, + "loss_ce": 0.0007139619556255639, + "loss_iou": 0.470703125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 397288120, + "step": 2313 + }, + { + "epoch": 0.6086670612218057, + "grad_norm": 16.02293214120212, + "learning_rate": 5e-06, + "loss": 0.1668, + "num_input_tokens_seen": 397460304, + "step": 2314 + }, + { + "epoch": 0.6086670612218057, + "loss": 0.13694977760314941, + "loss_ce": 0.0005667208461090922, + "loss_iou": 0.69921875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 397460304, + "step": 2314 + }, + { + "epoch": 0.6089300979811929, + "grad_norm": 7.987103586052065, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 397632608, + "step": 2315 + }, + { + "epoch": 0.6089300979811929, + "loss": 0.06060004234313965, + "loss_ce": 0.002006293274462223, + "loss_iou": 0.5625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 397632608, + "step": 2315 + }, + { + "epoch": 0.60919313474058, + "grad_norm": 7.316466982553544, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 397804820, + "step": 2316 + }, + { + "epoch": 0.60919313474058, + "loss": 0.11626386642456055, + "loss_ce": 0.004111772403120995, + "loss_iou": 0.400390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 397804820, + "step": 2316 + }, + { + "epoch": 0.6094561714999671, + "grad_norm": 5.539376425734617, + "learning_rate": 5e-06, + "loss": 0.1638, + "num_input_tokens_seen": 397976940, + "step": 2317 + }, + { + "epoch": 0.6094561714999671, + "loss": 0.16956084966659546, + "loss_ce": 0.004979567602276802, + "loss_iou": 0.6171875, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 397976940, + "step": 2317 + }, + { + "epoch": 0.6097192082593542, + "grad_norm": 24.75685247660265, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 398147040, + "step": 2318 + }, + { + "epoch": 0.6097192082593542, + "loss": 0.18709902465343475, + "loss_ce": 0.0012164636282250285, + "loss_iou": NaN, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 398147040, + "step": 2318 + }, + { + "epoch": 0.6099822450187413, + "grad_norm": 4.315961063244564, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 398318996, + "step": 2319 + }, + { + "epoch": 0.6099822450187413, + "loss": 0.18130794167518616, + "loss_ce": 0.0009795635705813766, + "loss_iou": 0.5234375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 398318996, + "step": 2319 + }, + { + "epoch": 0.6102452817781285, + "grad_norm": 4.640931709162671, + "learning_rate": 5e-06, + "loss": 0.0931, + "num_input_tokens_seen": 398489328, + "step": 2320 + }, + { + "epoch": 0.6102452817781285, + "loss": 0.09983004629611969, + "loss_ce": 0.0005563638987950981, + "loss_iou": 0.546875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 398489328, + "step": 2320 + }, + { + "epoch": 0.6105083185375156, + "grad_norm": 23.53542790698555, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 398661592, + "step": 2321 + }, + { + "epoch": 0.6105083185375156, + "loss": 0.14297714829444885, + "loss_ce": 0.0037559503689408302, + "loss_iou": 0.59765625, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 398661592, + "step": 2321 + }, + { + "epoch": 0.6107713552969027, + "grad_norm": 4.191481208630257, + "learning_rate": 5e-06, + "loss": 0.1143, + "num_input_tokens_seen": 398833940, + "step": 2322 + }, + { + "epoch": 0.6107713552969027, + "loss": 0.1795070767402649, + "loss_ce": 0.003298588562756777, + "loss_iou": 0.51171875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 398833940, + "step": 2322 + }, + { + "epoch": 0.6110343920562898, + "grad_norm": 4.031146460659383, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 399006244, + "step": 2323 + }, + { + "epoch": 0.6110343920562898, + "loss": 0.0546613447368145, + "loss_ce": 0.0011029954766854644, + "loss_iou": 0.45703125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 399006244, + "step": 2323 + }, + { + "epoch": 0.611297428815677, + "grad_norm": 4.121680425084659, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 399178456, + "step": 2324 + }, + { + "epoch": 0.611297428815677, + "loss": 0.16250503063201904, + "loss_ce": 0.002928605070337653, + "loss_iou": 0.61328125, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 399178456, + "step": 2324 + }, + { + "epoch": 0.6115604655750642, + "grad_norm": 4.598379596612616, + "learning_rate": 5e-06, + "loss": 0.0851, + "num_input_tokens_seen": 399350964, + "step": 2325 + }, + { + "epoch": 0.6115604655750642, + "loss": 0.06914816796779633, + "loss_ce": 0.002833473263308406, + "loss_iou": 0.57421875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 399350964, + "step": 2325 + }, + { + "epoch": 0.6118235023344513, + "grad_norm": 7.270975374787245, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 399523228, + "step": 2326 + }, + { + "epoch": 0.6118235023344513, + "loss": 0.08900976926088333, + "loss_ce": 0.0011801763903349638, + "loss_iou": 0.7421875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 399523228, + "step": 2326 + }, + { + "epoch": 0.6120865390938384, + "grad_norm": 4.064818082785728, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 399691856, + "step": 2327 + }, + { + "epoch": 0.6120865390938384, + "loss": 0.10691290348768234, + "loss_ce": 0.00043707285658456385, + "loss_iou": 0.6328125, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 399691856, + "step": 2327 + }, + { + "epoch": 0.6123495758532255, + "grad_norm": 7.343987420034391, + "learning_rate": 5e-06, + "loss": 0.1249, + "num_input_tokens_seen": 399864108, + "step": 2328 + }, + { + "epoch": 0.6123495758532255, + "loss": 0.09687530994415283, + "loss_ce": 0.001996160950511694, + "loss_iou": 0.58984375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 399864108, + "step": 2328 + }, + { + "epoch": 0.6126126126126126, + "grad_norm": 5.275060955551622, + "learning_rate": 5e-06, + "loss": 0.0841, + "num_input_tokens_seen": 400036500, + "step": 2329 + }, + { + "epoch": 0.6126126126126126, + "loss": 0.08277605473995209, + "loss_ce": 0.004162768833339214, + "loss_iou": 0.5078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 400036500, + "step": 2329 + }, + { + "epoch": 0.6128756493719998, + "grad_norm": 4.648207674990067, + "learning_rate": 5e-06, + "loss": 0.1259, + "num_input_tokens_seen": 400206892, + "step": 2330 + }, + { + "epoch": 0.6128756493719998, + "loss": 0.19954022765159607, + "loss_ce": 0.0007487052353098989, + "loss_iou": 0.447265625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 400206892, + "step": 2330 + }, + { + "epoch": 0.6131386861313869, + "grad_norm": 6.976991385409509, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 400377220, + "step": 2331 + }, + { + "epoch": 0.6131386861313869, + "loss": 0.10119281709194183, + "loss_ce": 0.0006984363426454365, + "loss_iou": 0.5234375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 400377220, + "step": 2331 + }, + { + "epoch": 0.613401722890774, + "grad_norm": 12.605542023426503, + "learning_rate": 5e-06, + "loss": 0.1478, + "num_input_tokens_seen": 400549468, + "step": 2332 + }, + { + "epoch": 0.613401722890774, + "loss": 0.20965467393398285, + "loss_ce": 0.009276244789361954, + "loss_iou": 0.62109375, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 400549468, + "step": 2332 + }, + { + "epoch": 0.6136647596501611, + "grad_norm": 4.707098406894473, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 400721520, + "step": 2333 + }, + { + "epoch": 0.6136647596501611, + "loss": 0.11055370420217514, + "loss_ce": 0.0006599072366952896, + "loss_iou": 0.4140625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 400721520, + "step": 2333 + }, + { + "epoch": 0.6139277964095482, + "grad_norm": 14.198987666898462, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 400893600, + "step": 2334 + }, + { + "epoch": 0.6139277964095482, + "loss": 0.11723913997411728, + "loss_ce": 0.0025235607754439116, + "loss_iou": NaN, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 400893600, + "step": 2334 + }, + { + "epoch": 0.6141908331689354, + "grad_norm": 4.053613863943027, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 401065668, + "step": 2335 + }, + { + "epoch": 0.6141908331689354, + "loss": 0.07270236313343048, + "loss_ce": 0.0009250181610696018, + "loss_iou": 0.51953125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 401065668, + "step": 2335 + }, + { + "epoch": 0.6144538699283225, + "grad_norm": 5.7996514772347005, + "learning_rate": 5e-06, + "loss": 0.0778, + "num_input_tokens_seen": 401237708, + "step": 2336 + }, + { + "epoch": 0.6144538699283225, + "loss": 0.07430876046419144, + "loss_ce": 0.0035384970251470804, + "loss_iou": 0.58984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 401237708, + "step": 2336 + }, + { + "epoch": 0.6147169066877096, + "grad_norm": 4.799102165669682, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 401410012, + "step": 2337 + }, + { + "epoch": 0.6147169066877096, + "loss": 0.0582679845392704, + "loss_ce": 0.0014747708337381482, + "loss_iou": 0.61328125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 401410012, + "step": 2337 + }, + { + "epoch": 0.6149799434470967, + "grad_norm": 4.382135516303559, + "learning_rate": 5e-06, + "loss": 0.1124, + "num_input_tokens_seen": 401582352, + "step": 2338 + }, + { + "epoch": 0.6149799434470967, + "loss": 0.09339425712823868, + "loss_ce": 0.00275705405510962, + "loss_iou": 0.63671875, + "loss_num": 0.01806640625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 401582352, + "step": 2338 + }, + { + "epoch": 0.6152429802064838, + "grad_norm": 4.340567932001983, + "learning_rate": 5e-06, + "loss": 0.1484, + "num_input_tokens_seen": 401754308, + "step": 2339 + }, + { + "epoch": 0.6152429802064838, + "loss": 0.14638805389404297, + "loss_ce": 0.0007886901148594916, + "loss_iou": 0.53125, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 401754308, + "step": 2339 + }, + { + "epoch": 0.6155060169658709, + "grad_norm": 6.569770077552981, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 401926216, + "step": 2340 + }, + { + "epoch": 0.6155060169658709, + "loss": 0.12163828313350677, + "loss_ce": 0.004069316200911999, + "loss_iou": 0.50390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 401926216, + "step": 2340 + }, + { + "epoch": 0.6157690537252581, + "grad_norm": 4.067934681272699, + "learning_rate": 5e-06, + "loss": 0.096, + "num_input_tokens_seen": 402098264, + "step": 2341 + }, + { + "epoch": 0.6157690537252581, + "loss": 0.0871957540512085, + "loss_ce": 0.0004342720494605601, + "loss_iou": 0.52734375, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 402098264, + "step": 2341 + }, + { + "epoch": 0.6160320904846452, + "grad_norm": 4.130435258439036, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 402270372, + "step": 2342 + }, + { + "epoch": 0.6160320904846452, + "loss": 0.046382177621126175, + "loss_ce": 7.175222708610818e-05, + "loss_iou": 0.400390625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 402270372, + "step": 2342 + }, + { + "epoch": 0.6162951272440323, + "grad_norm": 5.637480244426784, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 402440632, + "step": 2343 + }, + { + "epoch": 0.6162951272440323, + "loss": 0.07675184309482574, + "loss_ce": 0.0019837813451886177, + "loss_iou": 0.361328125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 402440632, + "step": 2343 + }, + { + "epoch": 0.6165581640034195, + "grad_norm": 8.043736182852212, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 402612920, + "step": 2344 + }, + { + "epoch": 0.6165581640034195, + "loss": 0.09050323814153671, + "loss_ce": 0.00018646713579073548, + "loss_iou": 0.6328125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 402612920, + "step": 2344 + }, + { + "epoch": 0.6168212007628066, + "grad_norm": 6.221194498282131, + "learning_rate": 5e-06, + "loss": 0.1581, + "num_input_tokens_seen": 402785192, + "step": 2345 + }, + { + "epoch": 0.6168212007628066, + "loss": 0.14238294959068298, + "loss_ce": 0.0007356047863140702, + "loss_iou": 0.34765625, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 402785192, + "step": 2345 + }, + { + "epoch": 0.6170842375221938, + "grad_norm": 4.044951640988235, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 402955576, + "step": 2346 + }, + { + "epoch": 0.6170842375221938, + "loss": 0.0908626914024353, + "loss_ce": 0.0020260235760360956, + "loss_iou": 0.5, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 402955576, + "step": 2346 + }, + { + "epoch": 0.6173472742815809, + "grad_norm": 6.79401496913443, + "learning_rate": 5e-06, + "loss": 0.1167, + "num_input_tokens_seen": 403125596, + "step": 2347 + }, + { + "epoch": 0.6173472742815809, + "loss": 0.19389519095420837, + "loss_ce": 0.0025499900802969933, + "loss_iou": 0.52734375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 403125596, + "step": 2347 + }, + { + "epoch": 0.617610311040968, + "grad_norm": 4.6407931564353255, + "learning_rate": 5e-06, + "loss": 0.1101, + "num_input_tokens_seen": 403295848, + "step": 2348 + }, + { + "epoch": 0.617610311040968, + "loss": 0.12875889241695404, + "loss_ce": 0.0007681695278733969, + "loss_iou": 0.671875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 403295848, + "step": 2348 + }, + { + "epoch": 0.6178733478003551, + "grad_norm": 15.448434585562659, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 403468128, + "step": 2349 + }, + { + "epoch": 0.6178733478003551, + "loss": 0.12984147667884827, + "loss_ce": 0.00026383629301562905, + "loss_iou": 0.578125, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 403468128, + "step": 2349 + }, + { + "epoch": 0.6181363845597422, + "grad_norm": 6.470906735340733, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 403640328, + "step": 2350 + }, + { + "epoch": 0.6181363845597422, + "loss": 0.08621760457754135, + "loss_ce": 0.002935132011771202, + "loss_iou": 0.47265625, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 403640328, + "step": 2350 + }, + { + "epoch": 0.6183994213191294, + "grad_norm": 3.904089267434168, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 403812552, + "step": 2351 + }, + { + "epoch": 0.6183994213191294, + "loss": 0.09386193752288818, + "loss_ce": 0.00026452430756762624, + "loss_iou": 0.388671875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 403812552, + "step": 2351 + }, + { + "epoch": 0.6186624580785165, + "grad_norm": 6.850647458248975, + "learning_rate": 5e-06, + "loss": 0.1608, + "num_input_tokens_seen": 403984536, + "step": 2352 + }, + { + "epoch": 0.6186624580785165, + "loss": 0.10526977479457855, + "loss_ce": 0.001601562718860805, + "loss_iou": 0.50390625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 403984536, + "step": 2352 + }, + { + "epoch": 0.6189254948379036, + "grad_norm": 6.380076499827068, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 404156840, + "step": 2353 + }, + { + "epoch": 0.6189254948379036, + "loss": 0.10814794898033142, + "loss_ce": 0.00020728506206069142, + "loss_iou": 0.5, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 404156840, + "step": 2353 + }, + { + "epoch": 0.6191885315972907, + "grad_norm": 16.46365513417012, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 404328912, + "step": 2354 + }, + { + "epoch": 0.6191885315972907, + "loss": 0.17106536030769348, + "loss_ce": 0.00019742565928027034, + "loss_iou": 0.48828125, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 404328912, + "step": 2354 + }, + { + "epoch": 0.6194515683566778, + "grad_norm": 12.873967018770987, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 404501048, + "step": 2355 + }, + { + "epoch": 0.6194515683566778, + "loss": 0.1692376732826233, + "loss_ce": 0.0014825284015387297, + "loss_iou": 0.443359375, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 404501048, + "step": 2355 + }, + { + "epoch": 0.619714605116065, + "grad_norm": 3.2257038395599706, + "learning_rate": 5e-06, + "loss": 0.1655, + "num_input_tokens_seen": 404673104, + "step": 2356 + }, + { + "epoch": 0.619714605116065, + "loss": 0.23891915380954742, + "loss_ce": 0.007901079021394253, + "loss_iou": 0.50390625, + "loss_num": 0.046142578125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 404673104, + "step": 2356 + }, + { + "epoch": 0.6199776418754521, + "grad_norm": 7.80566516619346, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 404845264, + "step": 2357 + }, + { + "epoch": 0.6199776418754521, + "loss": 0.17429864406585693, + "loss_ce": 0.005078674294054508, + "loss_iou": 0.4375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 404845264, + "step": 2357 + }, + { + "epoch": 0.6202406786348392, + "grad_norm": 4.957580646805035, + "learning_rate": 5e-06, + "loss": 0.0895, + "num_input_tokens_seen": 405017460, + "step": 2358 + }, + { + "epoch": 0.6202406786348392, + "loss": 0.05168257653713226, + "loss_ce": 0.003480062121525407, + "loss_iou": 0.55078125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 405017460, + "step": 2358 + }, + { + "epoch": 0.6205037153942263, + "grad_norm": 4.916881514910221, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 405189524, + "step": 2359 + }, + { + "epoch": 0.6205037153942263, + "loss": 0.06677095592021942, + "loss_ce": 0.001280229538679123, + "loss_iou": 0.51171875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 405189524, + "step": 2359 + }, + { + "epoch": 0.6207667521536134, + "grad_norm": 3.691480970561855, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 405362028, + "step": 2360 + }, + { + "epoch": 0.6207667521536134, + "loss": 0.09511809051036835, + "loss_ce": 5.582500307355076e-05, + "loss_iou": 0.49609375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 405362028, + "step": 2360 + }, + { + "epoch": 0.6210297889130006, + "grad_norm": 23.640084314089293, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 405534164, + "step": 2361 + }, + { + "epoch": 0.6210297889130006, + "loss": 0.1902218908071518, + "loss_ce": 0.00021944480249658227, + "loss_iou": 0.4140625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 405534164, + "step": 2361 + }, + { + "epoch": 0.6212928256723877, + "grad_norm": 3.82381362472469, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 405706288, + "step": 2362 + }, + { + "epoch": 0.6212928256723877, + "loss": 0.07140006124973297, + "loss_ce": 0.0007060917560011148, + "loss_iou": 0.494140625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 405706288, + "step": 2362 + }, + { + "epoch": 0.6215558624317749, + "grad_norm": 3.7402655746033293, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 405878688, + "step": 2363 + }, + { + "epoch": 0.6215558624317749, + "loss": 0.09456443786621094, + "loss_ce": 0.0030269669368863106, + "loss_iou": 0.390625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 405878688, + "step": 2363 + }, + { + "epoch": 0.621818899191162, + "grad_norm": 5.548477550947741, + "learning_rate": 5e-06, + "loss": 0.1307, + "num_input_tokens_seen": 406050532, + "step": 2364 + }, + { + "epoch": 0.621818899191162, + "loss": 0.12644195556640625, + "loss_ce": 0.005317692644894123, + "loss_iou": 0.51171875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 406050532, + "step": 2364 + }, + { + "epoch": 0.6220819359505491, + "grad_norm": 8.39488287912259, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 406220760, + "step": 2365 + }, + { + "epoch": 0.6220819359505491, + "loss": 0.13592961430549622, + "loss_ce": 0.0007520002545788884, + "loss_iou": 0.3203125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 406220760, + "step": 2365 + }, + { + "epoch": 0.6223449727099362, + "grad_norm": 4.84850760753409, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 406393068, + "step": 2366 + }, + { + "epoch": 0.6223449727099362, + "loss": 0.09754068404436111, + "loss_ce": 0.0005253083654679358, + "loss_iou": 0.4296875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 406393068, + "step": 2366 + }, + { + "epoch": 0.6226080094693234, + "grad_norm": 4.613478147053686, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 406565304, + "step": 2367 + }, + { + "epoch": 0.6226080094693234, + "loss": 0.1131492406129837, + "loss_ce": 0.002370435046032071, + "loss_iou": 0.625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 406565304, + "step": 2367 + }, + { + "epoch": 0.6228710462287105, + "grad_norm": 4.098258220864737, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 406737308, + "step": 2368 + }, + { + "epoch": 0.6228710462287105, + "loss": 0.09665839374065399, + "loss_ce": 0.0019318348495289683, + "loss_iou": 0.498046875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 406737308, + "step": 2368 + }, + { + "epoch": 0.6231340829880976, + "grad_norm": 10.800256206470614, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 406909296, + "step": 2369 + }, + { + "epoch": 0.6231340829880976, + "loss": 0.08466358482837677, + "loss_ce": 0.0036088963970541954, + "loss_iou": 0.546875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 406909296, + "step": 2369 + }, + { + "epoch": 0.6233971197474847, + "grad_norm": 4.097548305836531, + "learning_rate": 5e-06, + "loss": 0.133, + "num_input_tokens_seen": 407078532, + "step": 2370 + }, + { + "epoch": 0.6233971197474847, + "loss": 0.14105001091957092, + "loss_ce": 0.0007301777368411422, + "loss_iou": 0.53515625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 407078532, + "step": 2370 + }, + { + "epoch": 0.6236601565068718, + "grad_norm": 4.885935118310897, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 407250520, + "step": 2371 + }, + { + "epoch": 0.6236601565068718, + "loss": 0.185832679271698, + "loss_ce": 0.001598058152012527, + "loss_iou": 0.365234375, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 407250520, + "step": 2371 + }, + { + "epoch": 0.623923193266259, + "grad_norm": 4.243940025684864, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 407420132, + "step": 2372 + }, + { + "epoch": 0.623923193266259, + "loss": 0.17022864520549774, + "loss_ce": 0.0003678113571368158, + "loss_iou": 0.60546875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 407420132, + "step": 2372 + }, + { + "epoch": 0.6241862300256461, + "grad_norm": 6.84932788040415, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 407592472, + "step": 2373 + }, + { + "epoch": 0.6241862300256461, + "loss": 0.07611523568630219, + "loss_ce": 0.0008588911150582135, + "loss_iou": 0.50390625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 407592472, + "step": 2373 + }, + { + "epoch": 0.6244492667850332, + "grad_norm": 5.11826467429585, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 407764812, + "step": 2374 + }, + { + "epoch": 0.6244492667850332, + "loss": 0.22299307584762573, + "loss_ce": 0.001649091369472444, + "loss_iou": 0.5078125, + "loss_num": 0.04443359375, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 407764812, + "step": 2374 + }, + { + "epoch": 0.6247123035444203, + "grad_norm": 8.470257803757693, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 407937232, + "step": 2375 + }, + { + "epoch": 0.6247123035444203, + "loss": 0.12985503673553467, + "loss_ce": 0.0015591441188007593, + "loss_iou": 0.5625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 407937232, + "step": 2375 + }, + { + "epoch": 0.6249753403038074, + "grad_norm": 6.1769339682539135, + "learning_rate": 5e-06, + "loss": 0.1287, + "num_input_tokens_seen": 408109660, + "step": 2376 + }, + { + "epoch": 0.6249753403038074, + "loss": 0.1395704746246338, + "loss_ce": 0.0009596287272870541, + "loss_iou": 0.4140625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 408109660, + "step": 2376 + }, + { + "epoch": 0.6252383770631946, + "grad_norm": 16.39235046187932, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 408281820, + "step": 2377 + }, + { + "epoch": 0.6252383770631946, + "loss": 0.12146437168121338, + "loss_ce": 0.0002790701691992581, + "loss_iou": 0.51953125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 408281820, + "step": 2377 + }, + { + "epoch": 0.6255014138225817, + "grad_norm": 7.613001701595713, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 408453984, + "step": 2378 + }, + { + "epoch": 0.6255014138225817, + "loss": 0.120358906686306, + "loss_ce": 0.002347429981455207, + "loss_iou": 0.51171875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 408453984, + "step": 2378 + }, + { + "epoch": 0.6257644505819688, + "grad_norm": 4.614954338110094, + "learning_rate": 5e-06, + "loss": 0.1215, + "num_input_tokens_seen": 408625956, + "step": 2379 + }, + { + "epoch": 0.6257644505819688, + "loss": 0.12903685867786407, + "loss_ce": 0.003334960900247097, + "loss_iou": 0.640625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 408625956, + "step": 2379 + }, + { + "epoch": 0.6260274873413559, + "grad_norm": 4.387875886048538, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 408797808, + "step": 2380 + }, + { + "epoch": 0.6260274873413559, + "loss": 0.1543307602405548, + "loss_ce": 0.0021701250225305557, + "loss_iou": 0.50390625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 408797808, + "step": 2380 + }, + { + "epoch": 0.626290524100743, + "grad_norm": 5.128859786695588, + "learning_rate": 5e-06, + "loss": 0.1393, + "num_input_tokens_seen": 408969768, + "step": 2381 + }, + { + "epoch": 0.626290524100743, + "loss": 0.1084834560751915, + "loss_ce": 0.0040522972121834755, + "loss_iou": 0.54296875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 408969768, + "step": 2381 + }, + { + "epoch": 0.6265535608601303, + "grad_norm": 9.647359334804406, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 409141828, + "step": 2382 + }, + { + "epoch": 0.6265535608601303, + "loss": 0.253137469291687, + "loss_ce": 0.00036035641096532345, + "loss_iou": 0.494140625, + "loss_num": 0.050537109375, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 409141828, + "step": 2382 + }, + { + "epoch": 0.6268165976195174, + "grad_norm": 7.279560183961185, + "learning_rate": 5e-06, + "loss": 0.0772, + "num_input_tokens_seen": 409313972, + "step": 2383 + }, + { + "epoch": 0.6268165976195174, + "loss": 0.07071413099765778, + "loss_ce": 0.00015749200247228146, + "loss_iou": 0.62109375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 409313972, + "step": 2383 + }, + { + "epoch": 0.6270796343789045, + "grad_norm": 34.71304497396249, + "learning_rate": 5e-06, + "loss": 0.1461, + "num_input_tokens_seen": 409482936, + "step": 2384 + }, + { + "epoch": 0.6270796343789045, + "loss": 0.07908271253108978, + "loss_ce": 0.0020258277654647827, + "loss_iou": 0.37890625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 409482936, + "step": 2384 + }, + { + "epoch": 0.6273426711382916, + "grad_norm": 3.2691371809714322, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 409655000, + "step": 2385 + }, + { + "epoch": 0.6273426711382916, + "loss": 0.06744687259197235, + "loss_ce": 0.00012509411317296326, + "loss_iou": 0.5546875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 409655000, + "step": 2385 + }, + { + "epoch": 0.6276057078976787, + "grad_norm": 3.653366346571365, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 409826932, + "step": 2386 + }, + { + "epoch": 0.6276057078976787, + "loss": 0.11388548463582993, + "loss_ce": 0.0006347582675516605, + "loss_iou": 0.5390625, + "loss_num": 0.0225830078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 409826932, + "step": 2386 + }, + { + "epoch": 0.6278687446570659, + "grad_norm": 4.394076994308062, + "learning_rate": 5e-06, + "loss": 0.126, + "num_input_tokens_seen": 409997112, + "step": 2387 + }, + { + "epoch": 0.6278687446570659, + "loss": 0.07691079378128052, + "loss_ce": 0.0022647997830063105, + "loss_iou": 0.625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 409997112, + "step": 2387 + }, + { + "epoch": 0.628131781416453, + "grad_norm": 5.885043334484188, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 410169412, + "step": 2388 + }, + { + "epoch": 0.628131781416453, + "loss": 0.09735430032014847, + "loss_ce": 0.0005830569425597787, + "loss_iou": 0.6171875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 410169412, + "step": 2388 + }, + { + "epoch": 0.6283948181758401, + "grad_norm": 9.328795985316807, + "learning_rate": 5e-06, + "loss": 0.0794, + "num_input_tokens_seen": 410341700, + "step": 2389 + }, + { + "epoch": 0.6283948181758401, + "loss": 0.08082857728004456, + "loss_ce": 0.0010098508791998029, + "loss_iou": 0.5390625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 410341700, + "step": 2389 + }, + { + "epoch": 0.6286578549352272, + "grad_norm": 4.002799196322825, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 410513660, + "step": 2390 + }, + { + "epoch": 0.6286578549352272, + "loss": 0.054968155920505524, + "loss_ce": 0.000341689505148679, + "loss_iou": 0.53515625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 410513660, + "step": 2390 + }, + { + "epoch": 0.6289208916946143, + "grad_norm": 6.9030334525022505, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 410685956, + "step": 2391 + }, + { + "epoch": 0.6289208916946143, + "loss": 0.15183863043785095, + "loss_ce": 0.0031264659482985735, + "loss_iou": 0.53125, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 410685956, + "step": 2391 + }, + { + "epoch": 0.6291839284540014, + "grad_norm": 5.941678610362804, + "learning_rate": 5e-06, + "loss": 0.0866, + "num_input_tokens_seen": 410856348, + "step": 2392 + }, + { + "epoch": 0.6291839284540014, + "loss": 0.1089685708284378, + "loss_ce": 0.0001734074903652072, + "loss_iou": 0.671875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 410856348, + "step": 2392 + }, + { + "epoch": 0.6294469652133886, + "grad_norm": 7.460369582099088, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 411028268, + "step": 2393 + }, + { + "epoch": 0.6294469652133886, + "loss": 0.17290328443050385, + "loss_ce": 0.0006010266370140016, + "loss_iou": 0.3671875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 411028268, + "step": 2393 + }, + { + "epoch": 0.6297100019727757, + "grad_norm": 4.673085403235698, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 411200296, + "step": 2394 + }, + { + "epoch": 0.6297100019727757, + "loss": 0.11547736823558807, + "loss_ce": 0.0025928488466888666, + "loss_iou": 0.45703125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 411200296, + "step": 2394 + }, + { + "epoch": 0.6299730387321628, + "grad_norm": 4.914810997482069, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 411372592, + "step": 2395 + }, + { + "epoch": 0.6299730387321628, + "loss": 0.1332027018070221, + "loss_ce": 0.0009089965024031699, + "loss_iou": 0.4140625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 411372592, + "step": 2395 + }, + { + "epoch": 0.6302360754915499, + "grad_norm": 10.387773947226163, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 411544920, + "step": 2396 + }, + { + "epoch": 0.6302360754915499, + "loss": 0.12052314728498459, + "loss_ce": 0.0002533740480430424, + "loss_iou": 0.5625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 411544920, + "step": 2396 + }, + { + "epoch": 0.630499112250937, + "grad_norm": 8.18207434885096, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 411717416, + "step": 2397 + }, + { + "epoch": 0.630499112250937, + "loss": 0.09294469654560089, + "loss_ce": 0.0004459216434042901, + "loss_iou": 0.66015625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 411717416, + "step": 2397 + }, + { + "epoch": 0.6307621490103242, + "grad_norm": 9.784216604991045, + "learning_rate": 5e-06, + "loss": 0.1422, + "num_input_tokens_seen": 411889656, + "step": 2398 + }, + { + "epoch": 0.6307621490103242, + "loss": 0.06576241552829742, + "loss_ce": 0.00011910500325029716, + "loss_iou": 0.46484375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 411889656, + "step": 2398 + }, + { + "epoch": 0.6310251857697113, + "grad_norm": 9.394179875352377, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 412061964, + "step": 2399 + }, + { + "epoch": 0.6310251857697113, + "loss": 0.2204100638628006, + "loss_ce": 0.003613188164308667, + "loss_iou": 0.37109375, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 412061964, + "step": 2399 + }, + { + "epoch": 0.6312882225290984, + "grad_norm": 5.421055277942169, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 412233844, + "step": 2400 + }, + { + "epoch": 0.6312882225290984, + "loss": 0.058117613196372986, + "loss_ce": 0.00016473224968649447, + "loss_iou": 0.390625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 412233844, + "step": 2400 + }, + { + "epoch": 0.6315512592884855, + "grad_norm": 32.78384103492781, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 412404404, + "step": 2401 + }, + { + "epoch": 0.6315512592884855, + "loss": 0.11002543568611145, + "loss_ce": 0.0008640556479804218, + "loss_iou": NaN, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 412404404, + "step": 2401 + }, + { + "epoch": 0.6318142960478726, + "grad_norm": 5.144111967585496, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 412576424, + "step": 2402 + }, + { + "epoch": 0.6318142960478726, + "loss": 0.10595827549695969, + "loss_ce": 0.0008252161205746233, + "loss_iou": 0.5390625, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 412576424, + "step": 2402 + }, + { + "epoch": 0.6320773328072599, + "grad_norm": 9.535369648896554, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 412748540, + "step": 2403 + }, + { + "epoch": 0.6320773328072599, + "loss": 0.08199536800384521, + "loss_ce": 0.0007880894117988646, + "loss_iou": 0.52734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 412748540, + "step": 2403 + }, + { + "epoch": 0.632340369566647, + "grad_norm": 3.8619766281957557, + "learning_rate": 5e-06, + "loss": 0.0813, + "num_input_tokens_seen": 412919044, + "step": 2404 + }, + { + "epoch": 0.632340369566647, + "loss": 0.07876091450452805, + "loss_ce": 0.0004375489370431751, + "loss_iou": 0.3671875, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 412919044, + "step": 2404 + }, + { + "epoch": 0.6326034063260341, + "grad_norm": 4.690903318512814, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 413089276, + "step": 2405 + }, + { + "epoch": 0.6326034063260341, + "loss": 0.12442415952682495, + "loss_ce": 0.0037271445617079735, + "loss_iou": 0.54296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 413089276, + "step": 2405 + }, + { + "epoch": 0.6328664430854212, + "grad_norm": 5.509443001792319, + "learning_rate": 5e-06, + "loss": 0.1865, + "num_input_tokens_seen": 413261512, + "step": 2406 + }, + { + "epoch": 0.6328664430854212, + "loss": 0.2314736247062683, + "loss_ce": 0.0039345622062683105, + "loss_iou": 0.55078125, + "loss_num": 0.04541015625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 413261512, + "step": 2406 + }, + { + "epoch": 0.6331294798448083, + "grad_norm": 4.789366484519657, + "learning_rate": 5e-06, + "loss": 0.1568, + "num_input_tokens_seen": 413433928, + "step": 2407 + }, + { + "epoch": 0.6331294798448083, + "loss": 0.10793605446815491, + "loss_ce": 0.002009541727602482, + "loss_iou": 0.4609375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 413433928, + "step": 2407 + }, + { + "epoch": 0.6333925166041955, + "grad_norm": 8.59758393749405, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 413606128, + "step": 2408 + }, + { + "epoch": 0.6333925166041955, + "loss": 0.0926143079996109, + "loss_ce": 0.001000535092316568, + "loss_iou": 0.40234375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 413606128, + "step": 2408 + }, + { + "epoch": 0.6336555533635826, + "grad_norm": 9.998355206402028, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 413778176, + "step": 2409 + }, + { + "epoch": 0.6336555533635826, + "loss": 0.12268656492233276, + "loss_ce": 0.0002958154655061662, + "loss_iou": 0.39453125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 413778176, + "step": 2409 + }, + { + "epoch": 0.6339185901229697, + "grad_norm": 4.79036194916934, + "learning_rate": 5e-06, + "loss": 0.1397, + "num_input_tokens_seen": 413950292, + "step": 2410 + }, + { + "epoch": 0.6339185901229697, + "loss": 0.22198227047920227, + "loss_ce": 0.0022557121701538563, + "loss_iou": 0.482421875, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 413950292, + "step": 2410 + }, + { + "epoch": 0.6341816268823568, + "grad_norm": 21.148690076997823, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 414119124, + "step": 2411 + }, + { + "epoch": 0.6341816268823568, + "loss": 0.08586390316486359, + "loss_ce": 0.000368903303751722, + "loss_iou": 0.494140625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 414119124, + "step": 2411 + }, + { + "epoch": 0.6344446636417439, + "grad_norm": 7.240778669769743, + "learning_rate": 5e-06, + "loss": 0.1765, + "num_input_tokens_seen": 414291560, + "step": 2412 + }, + { + "epoch": 0.6344446636417439, + "loss": 0.24769088625907898, + "loss_ce": 0.0022685134317725897, + "loss_iou": 0.369140625, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 414291560, + "step": 2412 + }, + { + "epoch": 0.634707700401131, + "grad_norm": 15.982142868420826, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 414463784, + "step": 2413 + }, + { + "epoch": 0.634707700401131, + "loss": 0.22224080562591553, + "loss_ce": 0.0012630214914679527, + "loss_iou": 0.474609375, + "loss_num": 0.044189453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 414463784, + "step": 2413 + }, + { + "epoch": 0.6349707371605182, + "grad_norm": 5.563886751421209, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 414635968, + "step": 2414 + }, + { + "epoch": 0.6349707371605182, + "loss": 0.12378741800785065, + "loss_ce": 0.0024190132971853018, + "loss_iou": 0.515625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 414635968, + "step": 2414 + }, + { + "epoch": 0.6352337739199053, + "grad_norm": 6.971489879379984, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 414808184, + "step": 2415 + }, + { + "epoch": 0.6352337739199053, + "loss": 0.09163232147693634, + "loss_ce": 0.00017114286310970783, + "loss_iou": 0.59765625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 414808184, + "step": 2415 + }, + { + "epoch": 0.6354968106792924, + "grad_norm": 10.763166082287821, + "learning_rate": 5e-06, + "loss": 0.1798, + "num_input_tokens_seen": 414978292, + "step": 2416 + }, + { + "epoch": 0.6354968106792924, + "loss": 0.1381605565547943, + "loss_ce": 0.00016007423982955515, + "loss_iou": 0.50390625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 414978292, + "step": 2416 + }, + { + "epoch": 0.6357598474386795, + "grad_norm": 9.836473738616426, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 415150344, + "step": 2417 + }, + { + "epoch": 0.6357598474386795, + "loss": 0.1071944609284401, + "loss_ce": 0.0013594944030046463, + "loss_iou": 0.47265625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 415150344, + "step": 2417 + }, + { + "epoch": 0.6360228841980666, + "grad_norm": 3.9009374145565308, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 415322328, + "step": 2418 + }, + { + "epoch": 0.6360228841980666, + "loss": 0.08769555389881134, + "loss_ce": 0.00026269443333148956, + "loss_iou": 0.54296875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 415322328, + "step": 2418 + }, + { + "epoch": 0.6362859209574538, + "grad_norm": 12.748558785828688, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 415494296, + "step": 2419 + }, + { + "epoch": 0.6362859209574538, + "loss": 0.09499529004096985, + "loss_ce": 0.004266531206667423, + "loss_iou": 0.458984375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 415494296, + "step": 2419 + }, + { + "epoch": 0.636548957716841, + "grad_norm": 19.009247758406477, + "learning_rate": 5e-06, + "loss": 0.1063, + "num_input_tokens_seen": 415666184, + "step": 2420 + }, + { + "epoch": 0.636548957716841, + "loss": 0.07810772955417633, + "loss_ce": 0.0017832687590271235, + "loss_iou": 0.54296875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 415666184, + "step": 2420 + }, + { + "epoch": 0.636811994476228, + "grad_norm": 4.624128366794573, + "learning_rate": 5e-06, + "loss": 0.1594, + "num_input_tokens_seen": 415838228, + "step": 2421 + }, + { + "epoch": 0.636811994476228, + "loss": 0.11569841206073761, + "loss_ce": 0.0010743860621005297, + "loss_iou": 0.546875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 415838228, + "step": 2421 + }, + { + "epoch": 0.6370750312356152, + "grad_norm": 4.489659891906354, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 416010568, + "step": 2422 + }, + { + "epoch": 0.6370750312356152, + "loss": 0.09839779138565063, + "loss_ce": 0.0043120919726789, + "loss_iou": 0.578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 416010568, + "step": 2422 + }, + { + "epoch": 0.6373380679950023, + "grad_norm": 5.451345385819082, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 416182976, + "step": 2423 + }, + { + "epoch": 0.6373380679950023, + "loss": 0.11042429506778717, + "loss_ce": 0.005230204667896032, + "loss_iou": 0.65234375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 416182976, + "step": 2423 + }, + { + "epoch": 0.6376011047543895, + "grad_norm": 4.648386667387077, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 416354968, + "step": 2424 + }, + { + "epoch": 0.6376011047543895, + "loss": 0.08212631195783615, + "loss_ce": 0.002170257270336151, + "loss_iou": 0.55859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 416354968, + "step": 2424 + }, + { + "epoch": 0.6378641415137766, + "grad_norm": 7.161469269147777, + "learning_rate": 5e-06, + "loss": 0.1544, + "num_input_tokens_seen": 416527268, + "step": 2425 + }, + { + "epoch": 0.6378641415137766, + "loss": 0.1374281942844391, + "loss_ce": 0.0008009903831407428, + "loss_iou": 0.73828125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 416527268, + "step": 2425 + }, + { + "epoch": 0.6381271782731637, + "grad_norm": 7.707879726473491, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 416699532, + "step": 2426 + }, + { + "epoch": 0.6381271782731637, + "loss": 0.06318493187427521, + "loss_ce": 0.00016612766194157302, + "loss_iou": 0.482421875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 416699532, + "step": 2426 + }, + { + "epoch": 0.6383902150325508, + "grad_norm": 4.53622350388229, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 416871872, + "step": 2427 + }, + { + "epoch": 0.6383902150325508, + "loss": 0.14566369354724884, + "loss_ce": 0.0013460592599585652, + "loss_iou": 0.640625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 416871872, + "step": 2427 + }, + { + "epoch": 0.6386532517919379, + "grad_norm": 5.815980535010829, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 417044020, + "step": 2428 + }, + { + "epoch": 0.6386532517919379, + "loss": 0.12992171943187714, + "loss_ce": 0.0009391760104335845, + "loss_iou": 0.5078125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 417044020, + "step": 2428 + }, + { + "epoch": 0.6389162885513251, + "grad_norm": 4.632913290565498, + "learning_rate": 5e-06, + "loss": 0.1236, + "num_input_tokens_seen": 417215912, + "step": 2429 + }, + { + "epoch": 0.6389162885513251, + "loss": 0.10175777971744537, + "loss_ce": 0.0023772907443344593, + "loss_iou": 0.53125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 417215912, + "step": 2429 + }, + { + "epoch": 0.6391793253107122, + "grad_norm": 3.31675640465626, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 417388272, + "step": 2430 + }, + { + "epoch": 0.6391793253107122, + "loss": 0.13262677192687988, + "loss_ce": 0.0031712136697024107, + "loss_iou": 0.46875, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 417388272, + "step": 2430 + }, + { + "epoch": 0.6394423620700993, + "grad_norm": 9.57220985801255, + "learning_rate": 5e-06, + "loss": 0.0935, + "num_input_tokens_seen": 417560304, + "step": 2431 + }, + { + "epoch": 0.6394423620700993, + "loss": 0.08074182271957397, + "loss_ce": 0.00218957313336432, + "loss_iou": 0.5078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 417560304, + "step": 2431 + }, + { + "epoch": 0.6397053988294864, + "grad_norm": 5.444532606716201, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 417732592, + "step": 2432 + }, + { + "epoch": 0.6397053988294864, + "loss": 0.23035961389541626, + "loss_ce": 0.0006538145244121552, + "loss_iou": 0.43359375, + "loss_num": 0.0458984375, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 417732592, + "step": 2432 + }, + { + "epoch": 0.6399684355888735, + "grad_norm": 10.22830520629982, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 417904600, + "step": 2433 + }, + { + "epoch": 0.6399684355888735, + "loss": 0.08408404886722565, + "loss_ce": 0.0004964005202054977, + "loss_iou": 0.57421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 417904600, + "step": 2433 + }, + { + "epoch": 0.6402314723482607, + "grad_norm": 4.834822071481386, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 418076896, + "step": 2434 + }, + { + "epoch": 0.6402314723482607, + "loss": 0.07859447598457336, + "loss_ce": 0.0008967254543676972, + "loss_iou": 0.46484375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 418076896, + "step": 2434 + }, + { + "epoch": 0.6404945091076478, + "grad_norm": 11.222393651192586, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 418249128, + "step": 2435 + }, + { + "epoch": 0.6404945091076478, + "loss": 0.17654769122600555, + "loss_ce": 0.0008885157876648009, + "loss_iou": 0.59375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 418249128, + "step": 2435 + }, + { + "epoch": 0.6407575458670349, + "grad_norm": 4.617257720330074, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 418421340, + "step": 2436 + }, + { + "epoch": 0.6407575458670349, + "loss": 0.15517690777778625, + "loss_ce": 0.0020702139008790255, + "loss_iou": 0.5546875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 418421340, + "step": 2436 + }, + { + "epoch": 0.641020582626422, + "grad_norm": 4.8286585158927, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 418591736, + "step": 2437 + }, + { + "epoch": 0.641020582626422, + "loss": 0.17714877426624298, + "loss_ce": 0.003656333312392235, + "loss_iou": 0.48046875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 418591736, + "step": 2437 + }, + { + "epoch": 0.6412836193858091, + "grad_norm": 13.06198419822853, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 418763952, + "step": 2438 + }, + { + "epoch": 0.6412836193858091, + "loss": 0.1256633698940277, + "loss_ce": 0.0007243968429975212, + "loss_iou": 0.5703125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 418763952, + "step": 2438 + }, + { + "epoch": 0.6415466561451962, + "grad_norm": 18.947469777145443, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 418936088, + "step": 2439 + }, + { + "epoch": 0.6415466561451962, + "loss": 0.09127810597419739, + "loss_ce": 0.003967312164604664, + "loss_iou": 0.65234375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 418936088, + "step": 2439 + }, + { + "epoch": 0.6418096929045835, + "grad_norm": 10.72734062355858, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 419108496, + "step": 2440 + }, + { + "epoch": 0.6418096929045835, + "loss": 0.16916052997112274, + "loss_ce": 0.0024124737828969955, + "loss_iou": 0.52734375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 419108496, + "step": 2440 + }, + { + "epoch": 0.6420727296639706, + "grad_norm": 9.781068157842057, + "learning_rate": 5e-06, + "loss": 0.1154, + "num_input_tokens_seen": 419280412, + "step": 2441 + }, + { + "epoch": 0.6420727296639706, + "loss": 0.06414149701595306, + "loss_ce": 0.0008175191469490528, + "loss_iou": 0.5390625, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 419280412, + "step": 2441 + }, + { + "epoch": 0.6423357664233577, + "grad_norm": 4.632619855595305, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 419452340, + "step": 2442 + }, + { + "epoch": 0.6423357664233577, + "loss": 0.11532483249902725, + "loss_ce": 0.0007618411909788847, + "loss_iou": 0.373046875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 419452340, + "step": 2442 + }, + { + "epoch": 0.6425988031827448, + "grad_norm": 6.28676819593018, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 419624260, + "step": 2443 + }, + { + "epoch": 0.6425988031827448, + "loss": 0.08432676643133163, + "loss_ce": 0.0007086057448759675, + "loss_iou": 0.51171875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 419624260, + "step": 2443 + }, + { + "epoch": 0.6428618399421319, + "grad_norm": 4.106105061012571, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 419796396, + "step": 2444 + }, + { + "epoch": 0.6428618399421319, + "loss": 0.08845219761133194, + "loss_ce": 0.00016484873776789755, + "loss_iou": 0.55078125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 419796396, + "step": 2444 + }, + { + "epoch": 0.6431248767015191, + "grad_norm": 5.5585844707251555, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 419965620, + "step": 2445 + }, + { + "epoch": 0.6431248767015191, + "loss": 0.08765023946762085, + "loss_ce": 0.0027198141906410456, + "loss_iou": 0.369140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 419965620, + "step": 2445 + }, + { + "epoch": 0.6433879134609062, + "grad_norm": 6.941283237766657, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 420137700, + "step": 2446 + }, + { + "epoch": 0.6433879134609062, + "loss": 0.15698650479316711, + "loss_ce": 0.0023844558745622635, + "loss_iou": 0.60546875, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 420137700, + "step": 2446 + }, + { + "epoch": 0.6436509502202933, + "grad_norm": 4.724784845199971, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 420309964, + "step": 2447 + }, + { + "epoch": 0.6436509502202933, + "loss": 0.05271516367793083, + "loss_ce": 0.0011099397670477629, + "loss_iou": 0.515625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 420309964, + "step": 2447 + }, + { + "epoch": 0.6439139869796804, + "grad_norm": 6.277321029907694, + "learning_rate": 5e-06, + "loss": 0.1226, + "num_input_tokens_seen": 420482028, + "step": 2448 + }, + { + "epoch": 0.6439139869796804, + "loss": 0.19532331824302673, + "loss_ce": 0.0010789325460791588, + "loss_iou": 0.51171875, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 420482028, + "step": 2448 + }, + { + "epoch": 0.6441770237390675, + "grad_norm": 13.80293628596545, + "learning_rate": 5e-06, + "loss": 0.1117, + "num_input_tokens_seen": 420654152, + "step": 2449 + }, + { + "epoch": 0.6441770237390675, + "loss": 0.07553352415561676, + "loss_ce": 0.00017035921337082982, + "loss_iou": 0.443359375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 420654152, + "step": 2449 + }, + { + "epoch": 0.6444400604984547, + "grad_norm": 10.20337023035234, + "learning_rate": 5e-06, + "loss": 0.1029, + "num_input_tokens_seen": 420823676, + "step": 2450 + }, + { + "epoch": 0.6444400604984547, + "loss": 0.1215822771191597, + "loss_ce": 0.0013124945107847452, + "loss_iou": 0.5078125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 420823676, + "step": 2450 + }, + { + "epoch": 0.6447030972578418, + "grad_norm": 3.8758961183069034, + "learning_rate": 5e-06, + "loss": 0.0947, + "num_input_tokens_seen": 420991548, + "step": 2451 + }, + { + "epoch": 0.6447030972578418, + "loss": 0.04900962486863136, + "loss_ce": 0.0008071088814176619, + "loss_iou": 0.61328125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 420991548, + "step": 2451 + }, + { + "epoch": 0.6449661340172289, + "grad_norm": 17.020894064566225, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 421163940, + "step": 2452 + }, + { + "epoch": 0.6449661340172289, + "loss": 0.11804142594337463, + "loss_ce": 6.047027636668645e-05, + "loss_iou": 0.455078125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 421163940, + "step": 2452 + }, + { + "epoch": 0.645229170776616, + "grad_norm": 9.681794920323762, + "learning_rate": 5e-06, + "loss": 0.0851, + "num_input_tokens_seen": 421333576, + "step": 2453 + }, + { + "epoch": 0.645229170776616, + "loss": 0.1210954412817955, + "loss_ce": 0.0014970521442592144, + "loss_iou": 0.52734375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 421333576, + "step": 2453 + }, + { + "epoch": 0.6454922075360031, + "grad_norm": 4.642104923921384, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 421505944, + "step": 2454 + }, + { + "epoch": 0.6454922075360031, + "loss": 0.1038353219628334, + "loss_ce": 0.0009910848457366228, + "loss_iou": 0.53515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 421505944, + "step": 2454 + }, + { + "epoch": 0.6457552442953903, + "grad_norm": 9.523149673908406, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 421677944, + "step": 2455 + }, + { + "epoch": 0.6457552442953903, + "loss": 0.15976807475090027, + "loss_ce": 0.0006189080304466188, + "loss_iou": 0.419921875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 421677944, + "step": 2455 + }, + { + "epoch": 0.6460182810547774, + "grad_norm": 5.022843580524971, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 421850200, + "step": 2456 + }, + { + "epoch": 0.6460182810547774, + "loss": 0.11521396040916443, + "loss_ce": 0.0040994551964104176, + "loss_iou": 0.5390625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 421850200, + "step": 2456 + }, + { + "epoch": 0.6462813178141645, + "grad_norm": 18.53576034320469, + "learning_rate": 5e-06, + "loss": 0.1467, + "num_input_tokens_seen": 422020768, + "step": 2457 + }, + { + "epoch": 0.6462813178141645, + "loss": 0.145416259765625, + "loss_ce": 0.002319341991096735, + "loss_iou": 0.60546875, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 422020768, + "step": 2457 + }, + { + "epoch": 0.6465443545735516, + "grad_norm": 3.5647933372156486, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 422188996, + "step": 2458 + }, + { + "epoch": 0.6465443545735516, + "loss": 0.09572234004735947, + "loss_ce": 0.002552174963057041, + "loss_iou": 0.435546875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 422188996, + "step": 2458 + }, + { + "epoch": 0.6468073913329387, + "grad_norm": 7.264219107970771, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 422361088, + "step": 2459 + }, + { + "epoch": 0.6468073913329387, + "loss": 0.25412267446517944, + "loss_ce": 0.002886736299842596, + "loss_iou": 0.56640625, + "loss_num": 0.050048828125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 422361088, + "step": 2459 + }, + { + "epoch": 0.647070428092326, + "grad_norm": 4.6344065191429635, + "learning_rate": 5e-06, + "loss": 0.1305, + "num_input_tokens_seen": 422533008, + "step": 2460 + }, + { + "epoch": 0.647070428092326, + "loss": 0.17847508192062378, + "loss_ce": 0.00795811414718628, + "loss_iou": 0.482421875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 422533008, + "step": 2460 + }, + { + "epoch": 0.6473334648517131, + "grad_norm": 8.368380580785693, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 422705020, + "step": 2461 + }, + { + "epoch": 0.6473334648517131, + "loss": 0.11911526322364807, + "loss_ce": 0.0007375775021500885, + "loss_iou": 0.65234375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 422705020, + "step": 2461 + }, + { + "epoch": 0.6475965016111002, + "grad_norm": 14.530786026480682, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 422876816, + "step": 2462 + }, + { + "epoch": 0.6475965016111002, + "loss": 0.1273796260356903, + "loss_ce": 0.0006401161663234234, + "loss_iou": 0.5390625, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 422876816, + "step": 2462 + }, + { + "epoch": 0.6478595383704873, + "grad_norm": 5.386199695848856, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 423048624, + "step": 2463 + }, + { + "epoch": 0.6478595383704873, + "loss": 0.07592228055000305, + "loss_ce": 0.00014713153359480202, + "loss_iou": 0.44921875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 423048624, + "step": 2463 + }, + { + "epoch": 0.6481225751298744, + "grad_norm": 4.520647486495735, + "learning_rate": 5e-06, + "loss": 0.0844, + "num_input_tokens_seen": 423220672, + "step": 2464 + }, + { + "epoch": 0.6481225751298744, + "loss": 0.0708494782447815, + "loss_ce": 0.0003843960876110941, + "loss_iou": 0.6015625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 423220672, + "step": 2464 + }, + { + "epoch": 0.6483856118892615, + "grad_norm": 6.550669243795055, + "learning_rate": 5e-06, + "loss": 0.0995, + "num_input_tokens_seen": 423392552, + "step": 2465 + }, + { + "epoch": 0.6483856118892615, + "loss": 0.07499829679727554, + "loss_ce": 0.0005048871971666813, + "loss_iou": 0.6953125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 423392552, + "step": 2465 + }, + { + "epoch": 0.6486486486486487, + "grad_norm": 19.46883155452454, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 423564752, + "step": 2466 + }, + { + "epoch": 0.6486486486486487, + "loss": 0.20438973605632782, + "loss_ce": 0.000684903294313699, + "loss_iou": 0.67578125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 423564752, + "step": 2466 + }, + { + "epoch": 0.6489116854080358, + "grad_norm": 5.889523777651774, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 423737504, + "step": 2467 + }, + { + "epoch": 0.6489116854080358, + "loss": 0.1293552964925766, + "loss_ce": 0.002432688605040312, + "loss_iou": 0.609375, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 423737504, + "step": 2467 + }, + { + "epoch": 0.6491747221674229, + "grad_norm": 7.016832670115666, + "learning_rate": 5e-06, + "loss": 0.1505, + "num_input_tokens_seen": 423909608, + "step": 2468 + }, + { + "epoch": 0.6491747221674229, + "loss": 0.05709705874323845, + "loss_ce": 0.0013414426939561963, + "loss_iou": 0.49609375, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 423909608, + "step": 2468 + }, + { + "epoch": 0.64943775892681, + "grad_norm": 3.9924309473660817, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 424081612, + "step": 2469 + }, + { + "epoch": 0.64943775892681, + "loss": 0.21573078632354736, + "loss_ce": 0.0024129238445311785, + "loss_iou": 0.50390625, + "loss_num": 0.042724609375, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 424081612, + "step": 2469 + }, + { + "epoch": 0.6497007956861971, + "grad_norm": 8.786316595444035, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 424253804, + "step": 2470 + }, + { + "epoch": 0.6497007956861971, + "loss": 0.05929354950785637, + "loss_ce": 0.00044039852218702435, + "loss_iou": NaN, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 424253804, + "step": 2470 + }, + { + "epoch": 0.6499638324455843, + "grad_norm": 15.002230358263791, + "learning_rate": 5e-06, + "loss": 0.1137, + "num_input_tokens_seen": 424425936, + "step": 2471 + }, + { + "epoch": 0.6499638324455843, + "loss": 0.13272526860237122, + "loss_ce": 0.004475146532058716, + "loss_iou": 0.39453125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 424425936, + "step": 2471 + }, + { + "epoch": 0.6502268692049714, + "grad_norm": 5.3031138296122675, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 424596308, + "step": 2472 + }, + { + "epoch": 0.6502268692049714, + "loss": 0.06613775342702866, + "loss_ce": 0.005651914514601231, + "loss_iou": 0.462890625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 424596308, + "step": 2472 + }, + { + "epoch": 0.6504899059643585, + "grad_norm": 17.448583587295317, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 424768552, + "step": 2473 + }, + { + "epoch": 0.6504899059643585, + "loss": 0.08512883633375168, + "loss_ce": 0.0031891404651105404, + "loss_iou": 0.5625, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 424768552, + "step": 2473 + }, + { + "epoch": 0.6507529427237456, + "grad_norm": 6.87156581112055, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 424940764, + "step": 2474 + }, + { + "epoch": 0.6507529427237456, + "loss": 0.07900265604257584, + "loss_ce": 0.0035021707881242037, + "loss_iou": 0.5546875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 424940764, + "step": 2474 + }, + { + "epoch": 0.6510159794831327, + "grad_norm": 7.4967444521140765, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 425112860, + "step": 2475 + }, + { + "epoch": 0.6510159794831327, + "loss": 0.1566176563501358, + "loss_ce": 0.0004592128098011017, + "loss_iou": 0.55078125, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 425112860, + "step": 2475 + }, + { + "epoch": 0.6512790162425199, + "grad_norm": 7.198491382802353, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 425284996, + "step": 2476 + }, + { + "epoch": 0.6512790162425199, + "loss": 0.12442326545715332, + "loss_ce": 0.0022308877669274807, + "loss_iou": 0.5234375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 425284996, + "step": 2476 + }, + { + "epoch": 0.651542053001907, + "grad_norm": 12.660738408941835, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 425457444, + "step": 2477 + }, + { + "epoch": 0.651542053001907, + "loss": 0.139640212059021, + "loss_ce": 0.0008462676778435707, + "loss_iou": 0.65625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 425457444, + "step": 2477 + }, + { + "epoch": 0.6518050897612941, + "grad_norm": 4.788908959245028, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 425629356, + "step": 2478 + }, + { + "epoch": 0.6518050897612941, + "loss": 0.11505892872810364, + "loss_ce": 0.0043716710060834885, + "loss_iou": 0.44921875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 425629356, + "step": 2478 + }, + { + "epoch": 0.6520681265206812, + "grad_norm": 14.713914944582257, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 425801412, + "step": 2479 + }, + { + "epoch": 0.6520681265206812, + "loss": 0.09329426288604736, + "loss_ce": 0.0003987499512732029, + "loss_iou": 0.5234375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 425801412, + "step": 2479 + }, + { + "epoch": 0.6523311632800683, + "grad_norm": 11.370966467516885, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 425973396, + "step": 2480 + }, + { + "epoch": 0.6523311632800683, + "loss": 0.12143571674823761, + "loss_ce": 0.0008607673225924373, + "loss_iou": 0.49609375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 425973396, + "step": 2480 + }, + { + "epoch": 0.6525942000394556, + "grad_norm": 16.96417675320112, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 426142788, + "step": 2481 + }, + { + "epoch": 0.6525942000394556, + "loss": 0.058773696422576904, + "loss_ce": 0.00017994196969084442, + "loss_iou": 0.51171875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 426142788, + "step": 2481 + }, + { + "epoch": 0.6528572367988427, + "grad_norm": 3.267968495608878, + "learning_rate": 5e-06, + "loss": 0.1454, + "num_input_tokens_seen": 426314852, + "step": 2482 + }, + { + "epoch": 0.6528572367988427, + "loss": 0.14266598224639893, + "loss_ce": 0.0022698603570461273, + "loss_iou": 0.37109375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 426314852, + "step": 2482 + }, + { + "epoch": 0.6531202735582298, + "grad_norm": 12.820952382520073, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 426487324, + "step": 2483 + }, + { + "epoch": 0.6531202735582298, + "loss": 0.09693928062915802, + "loss_ce": 0.0016328811179846525, + "loss_iou": 0.4296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 426487324, + "step": 2483 + }, + { + "epoch": 0.6533833103176169, + "grad_norm": 10.800205106839552, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 426659468, + "step": 2484 + }, + { + "epoch": 0.6533833103176169, + "loss": 0.07587607949972153, + "loss_ce": 0.00039084581658244133, + "loss_iou": 0.4921875, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 426659468, + "step": 2484 + }, + { + "epoch": 0.653646347077004, + "grad_norm": 7.9171669810832395, + "learning_rate": 5e-06, + "loss": 0.0853, + "num_input_tokens_seen": 426831496, + "step": 2485 + }, + { + "epoch": 0.653646347077004, + "loss": 0.07471035420894623, + "loss_ce": 0.0035586238373070955, + "loss_iou": 0.6015625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 426831496, + "step": 2485 + }, + { + "epoch": 0.6539093838363912, + "grad_norm": 8.230428013130412, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 427003612, + "step": 2486 + }, + { + "epoch": 0.6539093838363912, + "loss": 0.08275149762630463, + "loss_ce": 0.0038025237154215574, + "loss_iou": 0.5234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 427003612, + "step": 2486 + }, + { + "epoch": 0.6541724205957783, + "grad_norm": 3.632885838865945, + "learning_rate": 5e-06, + "loss": 0.1363, + "num_input_tokens_seen": 427175916, + "step": 2487 + }, + { + "epoch": 0.6541724205957783, + "loss": 0.1029161587357521, + "loss_ce": 0.000987447565421462, + "loss_iou": 0.52734375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 427175916, + "step": 2487 + }, + { + "epoch": 0.6544354573551654, + "grad_norm": 3.5649981841807588, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 427348060, + "step": 2488 + }, + { + "epoch": 0.6544354573551654, + "loss": 0.07192617654800415, + "loss_ce": 0.003597316797822714, + "loss_iou": 0.51171875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 427348060, + "step": 2488 + }, + { + "epoch": 0.6546984941145525, + "grad_norm": 5.397602518661847, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 427520416, + "step": 2489 + }, + { + "epoch": 0.6546984941145525, + "loss": 0.08971969783306122, + "loss_ce": 0.0012797524686902761, + "loss_iou": 0.470703125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 427520416, + "step": 2489 + }, + { + "epoch": 0.6549615308739396, + "grad_norm": 2.996287220023088, + "learning_rate": 5e-06, + "loss": 0.0974, + "num_input_tokens_seen": 427692328, + "step": 2490 + }, + { + "epoch": 0.6549615308739396, + "loss": 0.11320722848176956, + "loss_ce": 0.0006584011716768146, + "loss_iou": 0.3828125, + "loss_num": 0.0225830078125, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 427692328, + "step": 2490 + }, + { + "epoch": 0.6552245676333267, + "grad_norm": 9.620885086024206, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 427864376, + "step": 2491 + }, + { + "epoch": 0.6552245676333267, + "loss": 0.10166727006435394, + "loss_ce": 0.0020884163677692413, + "loss_iou": 0.4609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 427864376, + "step": 2491 + }, + { + "epoch": 0.6554876043927139, + "grad_norm": 38.296683588407596, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 428036792, + "step": 2492 + }, + { + "epoch": 0.6554876043927139, + "loss": 0.06815087795257568, + "loss_ce": 6.61654194118455e-05, + "loss_iou": 0.44140625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 428036792, + "step": 2492 + }, + { + "epoch": 0.655750641152101, + "grad_norm": 29.525923288124442, + "learning_rate": 5e-06, + "loss": 0.1634, + "num_input_tokens_seen": 428207124, + "step": 2493 + }, + { + "epoch": 0.655750641152101, + "loss": 0.23705750703811646, + "loss_ce": 0.0035522649995982647, + "loss_iou": 0.59375, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 428207124, + "step": 2493 + }, + { + "epoch": 0.6560136779114881, + "grad_norm": 5.543812647744825, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 428379688, + "step": 2494 + }, + { + "epoch": 0.6560136779114881, + "loss": 0.09441094100475311, + "loss_ce": 0.0024462228175252676, + "loss_iou": 0.515625, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 428379688, + "step": 2494 + }, + { + "epoch": 0.6562767146708752, + "grad_norm": 32.70215077495664, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 428551908, + "step": 2495 + }, + { + "epoch": 0.6562767146708752, + "loss": 0.05353452265262604, + "loss_ce": 0.00015927490312606096, + "loss_iou": 0.67578125, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 428551908, + "step": 2495 + }, + { + "epoch": 0.6565397514302623, + "grad_norm": 9.54910159759898, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 428724396, + "step": 2496 + }, + { + "epoch": 0.6565397514302623, + "loss": 0.1285240650177002, + "loss_ce": 0.00035023505915887654, + "loss_iou": 0.609375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 428724396, + "step": 2496 + }, + { + "epoch": 0.6568027881896495, + "grad_norm": 22.277656875348782, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 428891900, + "step": 2497 + }, + { + "epoch": 0.6568027881896495, + "loss": 0.08849343657493591, + "loss_ce": 0.0024643833748996258, + "loss_iou": 0.52734375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 428891900, + "step": 2497 + }, + { + "epoch": 0.6570658249490366, + "grad_norm": 6.04979449937967, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 429064016, + "step": 2498 + }, + { + "epoch": 0.6570658249490366, + "loss": 0.04533851146697998, + "loss_ce": 0.000935432268306613, + "loss_iou": 0.482421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 429064016, + "step": 2498 + }, + { + "epoch": 0.6573288617084238, + "grad_norm": 13.279231347366395, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 429236096, + "step": 2499 + }, + { + "epoch": 0.6573288617084238, + "loss": 0.09342057257890701, + "loss_ce": 0.0018373207421973348, + "loss_iou": 0.61328125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 429236096, + "step": 2499 + }, + { + "epoch": 0.6575918984678109, + "grad_norm": 9.388446901609708, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_websight_new_CIoU": 0.8709481358528137, + "eval_websight_new_GIoU": 0.8742890954017639, + "eval_websight_new_IoU": 0.8760707080364227, + "eval_websight_new_MAE_all": 0.01990661583840847, + "eval_websight_new_MAE_h": 0.01017875224351883, + "eval_websight_new_MAE_w": 0.03395752049982548, + "eval_websight_new_MAE_x": 0.03019585181027651, + "eval_websight_new_MAE_y": 0.005294335773214698, + "eval_websight_new_NUM_probability": 0.9999906122684479, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.09985960274934769, + "eval_websight_new_loss_ce": 2.431606571917655e-05, + "eval_websight_new_loss_iou": 0.4124755859375, + "eval_websight_new_loss_num": 0.017595291137695312, + "eval_websight_new_loss_xval": 0.08795166015625, + "eval_websight_new_runtime": 55.684, + "eval_websight_new_samples_per_second": 0.898, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_seeclick_CIoU": 0.6180358529090881, + "eval_seeclick_GIoU": 0.6200732290744781, + "eval_seeclick_IoU": 0.6430684626102448, + "eval_seeclick_MAE_all": 0.04816816933453083, + "eval_seeclick_MAE_h": 0.03338594362139702, + "eval_seeclick_MAE_w": 0.06272775307297707, + "eval_seeclick_MAE_x": 0.06945410370826721, + "eval_seeclick_MAE_y": 0.027104882523417473, + "eval_seeclick_NUM_probability": 0.9999706149101257, + "eval_seeclick_inside_bbox": 0.8764204680919647, + "eval_seeclick_loss": 0.22014649212360382, + "eval_seeclick_loss_ce": 0.008939406834542751, + "eval_seeclick_loss_iou": 0.5093994140625, + "eval_seeclick_loss_num": 0.040313720703125, + "eval_seeclick_loss_xval": 0.201568603515625, + "eval_seeclick_runtime": 68.7244, + "eval_seeclick_samples_per_second": 0.626, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_icons_CIoU": 0.8709467053413391, + "eval_icons_GIoU": 0.8682527244091034, + "eval_icons_IoU": 0.874538779258728, + "eval_icons_MAE_all": 0.01756941620260477, + "eval_icons_MAE_h": 0.02226724848151207, + "eval_icons_MAE_w": 0.020997921004891396, + "eval_icons_MAE_x": 0.0130357148591429, + "eval_icons_MAE_y": 0.013976779766380787, + "eval_icons_NUM_probability": 0.9999793469905853, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.06909541040658951, + "eval_icons_loss_ce": 1.1558900041563902e-05, + "eval_icons_loss_iou": 0.66064453125, + "eval_icons_loss_num": 0.01297760009765625, + "eval_icons_loss_xval": 0.0648956298828125, + "eval_icons_runtime": 83.8008, + "eval_icons_samples_per_second": 0.597, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.6575918984678109, + "eval_screenspot_CIoU": 0.5514570474624634, + "eval_screenspot_GIoU": 0.5452684958775839, + "eval_screenspot_IoU": 0.5914793411890665, + "eval_screenspot_MAE_all": 0.08535195142030716, + "eval_screenspot_MAE_h": 0.06157554934422175, + "eval_screenspot_MAE_w": 0.14490507543087006, + "eval_screenspot_MAE_x": 0.08271919315059979, + "eval_screenspot_MAE_y": 0.05220799893140793, + "eval_screenspot_NUM_probability": 0.9994663198788961, + "eval_screenspot_inside_bbox": 0.850000003973643, + "eval_screenspot_loss": 0.9172521233558655, + "eval_screenspot_loss_ce": 0.5530519783496857, + "eval_screenspot_loss_iou": 0.529052734375, + "eval_screenspot_loss_num": 0.07132466634114583, + "eval_screenspot_loss_xval": 0.3565266927083333, + "eval_screenspot_runtime": 139.3325, + "eval_screenspot_samples_per_second": 0.639, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 429408212, + "step": 2500 + }, + { + "epoch": 0.657854935227198, + "grad_norm": 4.075831219377808, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 429580660, + "step": 2501 + }, + { + "epoch": 0.657854935227198, + "loss": 0.13817401230335236, + "loss_ce": 0.0033473544754087925, + "loss_iou": 0.59375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 429580660, + "step": 2501 + }, + { + "epoch": 0.6581179719865852, + "grad_norm": 7.116588034171532, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 429752720, + "step": 2502 + }, + { + "epoch": 0.6581179719865852, + "loss": 0.1050395742058754, + "loss_ce": 0.0004253170336596668, + "loss_iou": 0.35546875, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 429752720, + "step": 2502 + }, + { + "epoch": 0.6583810087459723, + "grad_norm": 4.475079180174651, + "learning_rate": 5e-06, + "loss": 0.0863, + "num_input_tokens_seen": 429924908, + "step": 2503 + }, + { + "epoch": 0.6583810087459723, + "loss": 0.06600432842969894, + "loss_ce": 0.0007577461656183004, + "loss_iou": 0.60546875, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 429924908, + "step": 2503 + }, + { + "epoch": 0.6586440455053594, + "grad_norm": 4.723970217436198, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 430097000, + "step": 2504 + }, + { + "epoch": 0.6586440455053594, + "loss": 0.07487765699625015, + "loss_ce": 0.0005978714907541871, + "loss_iou": 0.54296875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 430097000, + "step": 2504 + }, + { + "epoch": 0.6589070822647465, + "grad_norm": 14.464350947162439, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 430266684, + "step": 2505 + }, + { + "epoch": 0.6589070822647465, + "loss": 0.24609506130218506, + "loss_ce": 0.00015389968757517636, + "loss_iou": 0.51171875, + "loss_num": 0.04931640625, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 430266684, + "step": 2505 + }, + { + "epoch": 0.6591701190241336, + "grad_norm": 4.076117698308253, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 430436940, + "step": 2506 + }, + { + "epoch": 0.6591701190241336, + "loss": 0.109318308532238, + "loss_ce": 0.002522045513615012, + "loss_iou": 0.328125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 430436940, + "step": 2506 + }, + { + "epoch": 0.6594331557835208, + "grad_norm": 5.795601298430443, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 430608572, + "step": 2507 + }, + { + "epoch": 0.6594331557835208, + "loss": 0.07380083203315735, + "loss_ce": 0.00034502719063311815, + "loss_iou": 0.6171875, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 430608572, + "step": 2507 + }, + { + "epoch": 0.6596961925429079, + "grad_norm": 3.8858425270409165, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 430778744, + "step": 2508 + }, + { + "epoch": 0.6596961925429079, + "loss": 0.1448889970779419, + "loss_ce": 0.00018989352975040674, + "loss_iou": 0.55078125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 430778744, + "step": 2508 + }, + { + "epoch": 0.659959229302295, + "grad_norm": 9.24454413883364, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 430950904, + "step": 2509 + }, + { + "epoch": 0.659959229302295, + "loss": 0.09062638133764267, + "loss_ce": 0.0013014276046305895, + "loss_iou": 0.53515625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 430950904, + "step": 2509 + }, + { + "epoch": 0.6602222660616821, + "grad_norm": 5.68970165561044, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 431121056, + "step": 2510 + }, + { + "epoch": 0.6602222660616821, + "loss": 0.09496060013771057, + "loss_ce": 0.0006155127775855362, + "loss_iou": NaN, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 431121056, + "step": 2510 + }, + { + "epoch": 0.6604853028210692, + "grad_norm": 3.2016449128749023, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 431293012, + "step": 2511 + }, + { + "epoch": 0.6604853028210692, + "loss": 0.03823622688651085, + "loss_ce": 1.2961418178747408e-05, + "loss_iou": 0.5703125, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 431293012, + "step": 2511 + }, + { + "epoch": 0.6607483395804564, + "grad_norm": 7.269277911217335, + "learning_rate": 5e-06, + "loss": 0.145, + "num_input_tokens_seen": 431461708, + "step": 2512 + }, + { + "epoch": 0.6607483395804564, + "loss": 0.27740049362182617, + "loss_ce": 0.004237642977386713, + "loss_iou": 0.255859375, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 431461708, + "step": 2512 + }, + { + "epoch": 0.6610113763398435, + "grad_norm": 3.005900285482203, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 431633828, + "step": 2513 + }, + { + "epoch": 0.6610113763398435, + "loss": 0.0913277417421341, + "loss_ce": 0.0005684680072590709, + "loss_iou": 0.36328125, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 431633828, + "step": 2513 + }, + { + "epoch": 0.6612744130992306, + "grad_norm": 3.8195415395135934, + "learning_rate": 5e-06, + "loss": 0.098, + "num_input_tokens_seen": 431805968, + "step": 2514 + }, + { + "epoch": 0.6612744130992306, + "loss": 0.04755711182951927, + "loss_ce": 0.0009872873779386282, + "loss_iou": 0.39453125, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 431805968, + "step": 2514 + }, + { + "epoch": 0.6615374498586177, + "grad_norm": 4.443605112638486, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 431977936, + "step": 2515 + }, + { + "epoch": 0.6615374498586177, + "loss": 0.0916135311126709, + "loss_ce": 0.0008542468422092497, + "loss_iou": 0.458984375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 431977936, + "step": 2515 + }, + { + "epoch": 0.6618004866180048, + "grad_norm": 5.518155601836215, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 432150116, + "step": 2516 + }, + { + "epoch": 0.6618004866180048, + "loss": 0.14211586117744446, + "loss_ce": 0.0029557030647993088, + "loss_iou": 0.53125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 432150116, + "step": 2516 + }, + { + "epoch": 0.6620635233773919, + "grad_norm": 5.250876023224286, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 432322696, + "step": 2517 + }, + { + "epoch": 0.6620635233773919, + "loss": 0.07852162420749664, + "loss_ce": 0.00102223118301481, + "loss_iou": 0.58984375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 432322696, + "step": 2517 + }, + { + "epoch": 0.6623265601367792, + "grad_norm": 4.979602548630015, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 432490748, + "step": 2518 + }, + { + "epoch": 0.6623265601367792, + "loss": 0.07791407406330109, + "loss_ce": 0.0011623608879745007, + "loss_iou": 0.5078125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 432490748, + "step": 2518 + }, + { + "epoch": 0.6625895968961663, + "grad_norm": 7.14143158314321, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 432662924, + "step": 2519 + }, + { + "epoch": 0.6625895968961663, + "loss": 0.168125718832016, + "loss_ce": 0.000858865212649107, + "loss_iou": 0.515625, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 432662924, + "step": 2519 + }, + { + "epoch": 0.6628526336555534, + "grad_norm": 3.8877457754066387, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 432835068, + "step": 2520 + }, + { + "epoch": 0.6628526336555534, + "loss": 0.12645836174488068, + "loss_ce": 0.0006496440037153661, + "loss_iou": 0.546875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 432835068, + "step": 2520 + }, + { + "epoch": 0.6631156704149405, + "grad_norm": 4.906018490784193, + "learning_rate": 5e-06, + "loss": 0.1167, + "num_input_tokens_seen": 433007048, + "step": 2521 + }, + { + "epoch": 0.6631156704149405, + "loss": 0.08021432906389236, + "loss_ce": 0.000868627626914531, + "loss_iou": 0.55859375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 433007048, + "step": 2521 + }, + { + "epoch": 0.6633787071743276, + "grad_norm": 4.283295443334077, + "learning_rate": 5e-06, + "loss": 0.158, + "num_input_tokens_seen": 433177068, + "step": 2522 + }, + { + "epoch": 0.6633787071743276, + "loss": 0.11892493069171906, + "loss_ce": 0.00060828379355371, + "loss_iou": 0.4921875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 433177068, + "step": 2522 + }, + { + "epoch": 0.6636417439337148, + "grad_norm": 4.831641088226549, + "learning_rate": 5e-06, + "loss": 0.0736, + "num_input_tokens_seen": 433349504, + "step": 2523 + }, + { + "epoch": 0.6636417439337148, + "loss": 0.06323867291212082, + "loss_ce": 0.00031142536317929626, + "loss_iou": 0.4921875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 433349504, + "step": 2523 + }, + { + "epoch": 0.6639047806931019, + "grad_norm": 24.62858299579612, + "learning_rate": 5e-06, + "loss": 0.1129, + "num_input_tokens_seen": 433521616, + "step": 2524 + }, + { + "epoch": 0.6639047806931019, + "loss": 0.12700864672660828, + "loss_ce": 0.003351423656567931, + "loss_iou": 0.45703125, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 433521616, + "step": 2524 + }, + { + "epoch": 0.664167817452489, + "grad_norm": 6.377416561201896, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 433692012, + "step": 2525 + }, + { + "epoch": 0.664167817452489, + "loss": 0.06224376708269119, + "loss_ce": 0.003238032106310129, + "loss_iou": 0.54296875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 433692012, + "step": 2525 + }, + { + "epoch": 0.6644308542118761, + "grad_norm": 7.501801025322401, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 433864040, + "step": 2526 + }, + { + "epoch": 0.6644308542118761, + "loss": 0.09891114383935928, + "loss_ce": 0.002109395107254386, + "loss_iou": 0.40234375, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 433864040, + "step": 2526 + }, + { + "epoch": 0.6646938909712632, + "grad_norm": 4.527105351225178, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 434034556, + "step": 2527 + }, + { + "epoch": 0.6646938909712632, + "loss": 0.14892783761024475, + "loss_ce": 0.0010244110599160194, + "loss_iou": 0.4765625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 434034556, + "step": 2527 + }, + { + "epoch": 0.6649569277306504, + "grad_norm": 31.749018149089892, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 434206672, + "step": 2528 + }, + { + "epoch": 0.6649569277306504, + "loss": 0.12175662815570831, + "loss_ce": 0.0015021114377304912, + "loss_iou": 0.53515625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 434206672, + "step": 2528 + }, + { + "epoch": 0.6652199644900375, + "grad_norm": 4.8576669690091885, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 434378728, + "step": 2529 + }, + { + "epoch": 0.6652199644900375, + "loss": 0.07344581931829453, + "loss_ce": 0.006276628468185663, + "loss_iou": 0.5078125, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 434378728, + "step": 2529 + }, + { + "epoch": 0.6654830012494246, + "grad_norm": 4.9340551800619, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 434550708, + "step": 2530 + }, + { + "epoch": 0.6654830012494246, + "loss": 0.0640825480222702, + "loss_ce": 0.0013689253246411681, + "loss_iou": 0.55859375, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 434550708, + "step": 2530 + }, + { + "epoch": 0.6657460380088117, + "grad_norm": 9.63881283843686, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 434723100, + "step": 2531 + }, + { + "epoch": 0.6657460380088117, + "loss": 0.050916872918605804, + "loss_ce": 0.0004713151138275862, + "loss_iou": 0.64453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 434723100, + "step": 2531 + }, + { + "epoch": 0.6660090747681988, + "grad_norm": 5.34856475630482, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 434893672, + "step": 2532 + }, + { + "epoch": 0.6660090747681988, + "loss": 0.11488111317157745, + "loss_ce": 0.00108106411062181, + "loss_iou": 0.5390625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 434893672, + "step": 2532 + }, + { + "epoch": 0.666272111527586, + "grad_norm": 6.482198264697821, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 435065996, + "step": 2533 + }, + { + "epoch": 0.666272111527586, + "loss": 0.1057998389005661, + "loss_ce": 0.00013272129581309855, + "loss_iou": 0.6015625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 435065996, + "step": 2533 + }, + { + "epoch": 0.6665351482869731, + "grad_norm": 15.608471694941006, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 435236164, + "step": 2534 + }, + { + "epoch": 0.6665351482869731, + "loss": 0.08966468274593353, + "loss_ce": 0.004398562014102936, + "loss_iou": 0.4375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 435236164, + "step": 2534 + }, + { + "epoch": 0.6667981850463602, + "grad_norm": 3.821066216794497, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 435408552, + "step": 2535 + }, + { + "epoch": 0.6667981850463602, + "loss": 0.13420158624649048, + "loss_ce": 0.0008855484193190932, + "loss_iou": 0.423828125, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 435408552, + "step": 2535 + }, + { + "epoch": 0.6670612218057473, + "grad_norm": 7.414329883383495, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 435580704, + "step": 2536 + }, + { + "epoch": 0.6670612218057473, + "loss": 0.09095387160778046, + "loss_ce": 0.0024223732762038708, + "loss_iou": 0.53125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 435580704, + "step": 2536 + }, + { + "epoch": 0.6673242585651344, + "grad_norm": 33.27432314606939, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 435752624, + "step": 2537 + }, + { + "epoch": 0.6673242585651344, + "loss": 0.15344524383544922, + "loss_ce": 0.0014066637959331274, + "loss_iou": 0.57421875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 435752624, + "step": 2537 + }, + { + "epoch": 0.6675872953245217, + "grad_norm": 7.327533892405677, + "learning_rate": 5e-06, + "loss": 0.1128, + "num_input_tokens_seen": 435924672, + "step": 2538 + }, + { + "epoch": 0.6675872953245217, + "loss": 0.11101265996694565, + "loss_ce": 0.000935757125262171, + "loss_iou": 0.447265625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 435924672, + "step": 2538 + }, + { + "epoch": 0.6678503320839088, + "grad_norm": 7.348552482036463, + "learning_rate": 5e-06, + "loss": 0.1424, + "num_input_tokens_seen": 436096756, + "step": 2539 + }, + { + "epoch": 0.6678503320839088, + "loss": 0.1398707926273346, + "loss_ce": 0.0033656705636531115, + "loss_iou": NaN, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 436096756, + "step": 2539 + }, + { + "epoch": 0.6681133688432959, + "grad_norm": 20.204014590374456, + "learning_rate": 5e-06, + "loss": 0.1482, + "num_input_tokens_seen": 436267320, + "step": 2540 + }, + { + "epoch": 0.6681133688432959, + "loss": 0.11994585394859314, + "loss_ce": 0.0005916071822866797, + "loss_iou": 0.4375, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 436267320, + "step": 2540 + }, + { + "epoch": 0.668376405602683, + "grad_norm": 8.407025005461401, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 436439328, + "step": 2541 + }, + { + "epoch": 0.668376405602683, + "loss": 0.11520107090473175, + "loss_ce": 0.003811910282820463, + "loss_iou": 0.61328125, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 436439328, + "step": 2541 + }, + { + "epoch": 0.6686394423620701, + "grad_norm": 5.145720476296494, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 436609828, + "step": 2542 + }, + { + "epoch": 0.6686394423620701, + "loss": 0.06836480647325516, + "loss_ce": 0.00037164578679949045, + "loss_iou": 0.53125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 436609828, + "step": 2542 + }, + { + "epoch": 0.6689024791214572, + "grad_norm": 8.206215780846446, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 436781764, + "step": 2543 + }, + { + "epoch": 0.6689024791214572, + "loss": 0.107704758644104, + "loss_ce": 0.002266527386382222, + "loss_iou": 0.5234375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 436781764, + "step": 2543 + }, + { + "epoch": 0.6691655158808444, + "grad_norm": 7.4768704044685865, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 436953548, + "step": 2544 + }, + { + "epoch": 0.6691655158808444, + "loss": 0.17240890860557556, + "loss_ce": 0.0010527035919949412, + "loss_iou": 0.53515625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 436953548, + "step": 2544 + }, + { + "epoch": 0.6694285526402315, + "grad_norm": 9.203598751541529, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 437125648, + "step": 2545 + }, + { + "epoch": 0.6694285526402315, + "loss": 0.0696527361869812, + "loss_ce": 0.0004388623929116875, + "loss_iou": 0.546875, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 437125648, + "step": 2545 + }, + { + "epoch": 0.6696915893996186, + "grad_norm": 4.3933164564180816, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 437297756, + "step": 2546 + }, + { + "epoch": 0.6696915893996186, + "loss": 0.12276500463485718, + "loss_ce": 0.004127927124500275, + "loss_iou": 0.498046875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 437297756, + "step": 2546 + }, + { + "epoch": 0.6699546261590057, + "grad_norm": 4.6906533688912795, + "learning_rate": 5e-06, + "loss": 0.1226, + "num_input_tokens_seen": 437470168, + "step": 2547 + }, + { + "epoch": 0.6699546261590057, + "loss": 0.16925577819347382, + "loss_ce": 0.00012736135977320373, + "loss_iou": 0.5078125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 437470168, + "step": 2547 + }, + { + "epoch": 0.6702176629183928, + "grad_norm": 5.700480300794898, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 437641860, + "step": 2548 + }, + { + "epoch": 0.6702176629183928, + "loss": 0.08314710855484009, + "loss_ce": 0.0011463778791949153, + "loss_iou": 0.3671875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 437641860, + "step": 2548 + }, + { + "epoch": 0.67048069967778, + "grad_norm": 4.582897287065671, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 437814364, + "step": 2549 + }, + { + "epoch": 0.67048069967778, + "loss": 0.1702890694141388, + "loss_ce": 0.0015268486458808184, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 437814364, + "step": 2549 + }, + { + "epoch": 0.6707437364371671, + "grad_norm": 5.613370832938644, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 437986344, + "step": 2550 + }, + { + "epoch": 0.6707437364371671, + "loss": 0.13855043053627014, + "loss_ce": 0.0012823636643588543, + "loss_iou": 0.53125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 437986344, + "step": 2550 + }, + { + "epoch": 0.6710067731965542, + "grad_norm": 17.044687935022132, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 438158712, + "step": 2551 + }, + { + "epoch": 0.6710067731965542, + "loss": 0.08713387697935104, + "loss_ce": 0.00023507134756073356, + "loss_iou": 0.5703125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 438158712, + "step": 2551 + }, + { + "epoch": 0.6712698099559413, + "grad_norm": 6.906118924960931, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 438330820, + "step": 2552 + }, + { + "epoch": 0.6712698099559413, + "loss": 0.13604578375816345, + "loss_ce": 0.0038131249602884054, + "loss_iou": 0.45703125, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 438330820, + "step": 2552 + }, + { + "epoch": 0.6715328467153284, + "grad_norm": 5.001208896831707, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 438503128, + "step": 2553 + }, + { + "epoch": 0.6715328467153284, + "loss": 0.11509568989276886, + "loss_ce": 0.00016648891323711723, + "loss_iou": 0.48046875, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 438503128, + "step": 2553 + }, + { + "epoch": 0.6717958834747156, + "grad_norm": 5.88413772044494, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 438675408, + "step": 2554 + }, + { + "epoch": 0.6717958834747156, + "loss": 0.06504229456186295, + "loss_ce": 0.000772272062022239, + "loss_iou": 0.58203125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 438675408, + "step": 2554 + }, + { + "epoch": 0.6720589202341027, + "grad_norm": 5.286645052918952, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 438847472, + "step": 2555 + }, + { + "epoch": 0.6720589202341027, + "loss": 0.11925958842039108, + "loss_ce": 0.00024103187024593353, + "loss_iou": 0.65625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 438847472, + "step": 2555 + }, + { + "epoch": 0.6723219569934898, + "grad_norm": 7.503881873733868, + "learning_rate": 5e-06, + "loss": 0.1275, + "num_input_tokens_seen": 439019864, + "step": 2556 + }, + { + "epoch": 0.6723219569934898, + "loss": 0.1399090439081192, + "loss_ce": 0.0017254444537684321, + "loss_iou": 0.51171875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 439019864, + "step": 2556 + }, + { + "epoch": 0.672584993752877, + "grad_norm": 4.642912313294466, + "learning_rate": 5e-06, + "loss": 0.0921, + "num_input_tokens_seen": 439191776, + "step": 2557 + }, + { + "epoch": 0.672584993752877, + "loss": 0.12639446556568146, + "loss_ce": 0.0015470522921532393, + "loss_iou": 0.62890625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 439191776, + "step": 2557 + }, + { + "epoch": 0.672848030512264, + "grad_norm": 5.916383494046343, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 439360860, + "step": 2558 + }, + { + "epoch": 0.672848030512264, + "loss": 0.09076009690761566, + "loss_ce": 0.0006722048274241388, + "loss_iou": 0.6015625, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 439360860, + "step": 2558 + }, + { + "epoch": 0.6731110672716513, + "grad_norm": 20.566659262884578, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 439532496, + "step": 2559 + }, + { + "epoch": 0.6731110672716513, + "loss": 0.10060098767280579, + "loss_ce": 0.003646642668172717, + "loss_iou": 0.546875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 439532496, + "step": 2559 + }, + { + "epoch": 0.6733741040310384, + "grad_norm": 4.011859025446864, + "learning_rate": 5e-06, + "loss": 0.1167, + "num_input_tokens_seen": 439704520, + "step": 2560 + }, + { + "epoch": 0.6733741040310384, + "loss": 0.0790782943367958, + "loss_ce": 0.0015483875758945942, + "loss_iou": 0.404296875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 439704520, + "step": 2560 + }, + { + "epoch": 0.6736371407904255, + "grad_norm": 7.247619498755374, + "learning_rate": 5e-06, + "loss": 0.1282, + "num_input_tokens_seen": 439877048, + "step": 2561 + }, + { + "epoch": 0.6736371407904255, + "loss": 0.09221772849559784, + "loss_ce": 0.0026181198190897703, + "loss_iou": 0.50390625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 439877048, + "step": 2561 + }, + { + "epoch": 0.6739001775498126, + "grad_norm": 6.385962315237775, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 440049300, + "step": 2562 + }, + { + "epoch": 0.6739001775498126, + "loss": 0.041686464101076126, + "loss_ce": 0.0010523094097152352, + "loss_iou": 0.55078125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 440049300, + "step": 2562 + }, + { + "epoch": 0.6741632143091997, + "grad_norm": 26.7431708970354, + "learning_rate": 5e-06, + "loss": 0.1921, + "num_input_tokens_seen": 440221368, + "step": 2563 + }, + { + "epoch": 0.6741632143091997, + "loss": 0.18678849935531616, + "loss_ce": 0.002889578230679035, + "loss_iou": 0.47265625, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 440221368, + "step": 2563 + }, + { + "epoch": 0.6744262510685868, + "grad_norm": 3.4897355882240686, + "learning_rate": 5e-06, + "loss": 0.131, + "num_input_tokens_seen": 440393588, + "step": 2564 + }, + { + "epoch": 0.6744262510685868, + "loss": 0.13501334190368652, + "loss_ce": 0.0007817824953235686, + "loss_iou": 0.5078125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 440393588, + "step": 2564 + }, + { + "epoch": 0.674689287827974, + "grad_norm": 5.796770664321287, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 440565712, + "step": 2565 + }, + { + "epoch": 0.674689287827974, + "loss": 0.06512662768363953, + "loss_ce": 0.0005819504731334746, + "loss_iou": 0.50390625, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 440565712, + "step": 2565 + }, + { + "epoch": 0.6749523245873611, + "grad_norm": 6.5511648198704755, + "learning_rate": 5e-06, + "loss": 0.0888, + "num_input_tokens_seen": 440737988, + "step": 2566 + }, + { + "epoch": 0.6749523245873611, + "loss": 0.11750826984643936, + "loss_ce": 0.0036166671197861433, + "loss_iou": 0.52734375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 440737988, + "step": 2566 + }, + { + "epoch": 0.6752153613467482, + "grad_norm": 21.358635365800044, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 440910068, + "step": 2567 + }, + { + "epoch": 0.6752153613467482, + "loss": 0.10725726187229156, + "loss_ce": 0.0005067750462330878, + "loss_iou": 0.6875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 440910068, + "step": 2567 + }, + { + "epoch": 0.6754783981061353, + "grad_norm": 4.519751532821699, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 441080800, + "step": 2568 + }, + { + "epoch": 0.6754783981061353, + "loss": 0.0696103498339653, + "loss_ce": 0.00015234279271680862, + "loss_iou": 0.50390625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 441080800, + "step": 2568 + }, + { + "epoch": 0.6757414348655224, + "grad_norm": 4.051067943325395, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 441251032, + "step": 2569 + }, + { + "epoch": 0.6757414348655224, + "loss": 0.09424732625484467, + "loss_ce": 0.00022266953601501882, + "loss_iou": 0.54296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 441251032, + "step": 2569 + }, + { + "epoch": 0.6760044716249096, + "grad_norm": 6.209787964250504, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 441422784, + "step": 2570 + }, + { + "epoch": 0.6760044716249096, + "loss": 0.0798967033624649, + "loss_ce": 0.00027634453726932406, + "loss_iou": 0.5078125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 441422784, + "step": 2570 + }, + { + "epoch": 0.6762675083842967, + "grad_norm": 7.3672437629155505, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 441593108, + "step": 2571 + }, + { + "epoch": 0.6762675083842967, + "loss": 0.12310583889484406, + "loss_ce": 0.0010050098644569516, + "loss_iou": NaN, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 441593108, + "step": 2571 + }, + { + "epoch": 0.6765305451436838, + "grad_norm": 5.196433267413594, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 441765456, + "step": 2572 + }, + { + "epoch": 0.6765305451436838, + "loss": 0.1754513680934906, + "loss_ce": 0.0007077160989865661, + "loss_iou": NaN, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 441765456, + "step": 2572 + }, + { + "epoch": 0.6767935819030709, + "grad_norm": 4.136115815472187, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 441937580, + "step": 2573 + }, + { + "epoch": 0.6767935819030709, + "loss": 0.13903826475143433, + "loss_ce": 0.0002443119592498988, + "loss_iou": 0.46484375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 441937580, + "step": 2573 + }, + { + "epoch": 0.677056618662458, + "grad_norm": 18.488728586585154, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 442108052, + "step": 2574 + }, + { + "epoch": 0.677056618662458, + "loss": 0.12112629413604736, + "loss_ce": 0.0002309026604052633, + "loss_iou": 0.478515625, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 442108052, + "step": 2574 + }, + { + "epoch": 0.6773196554218452, + "grad_norm": 7.779321814196593, + "learning_rate": 5e-06, + "loss": 0.1347, + "num_input_tokens_seen": 442279576, + "step": 2575 + }, + { + "epoch": 0.6773196554218452, + "loss": 0.10174375027418137, + "loss_ce": 0.0021038581617176533, + "loss_iou": 0.6015625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 442279576, + "step": 2575 + }, + { + "epoch": 0.6775826921812323, + "grad_norm": 4.027957209739208, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 442447800, + "step": 2576 + }, + { + "epoch": 0.6775826921812323, + "loss": 0.08731138706207275, + "loss_ce": 0.0017401032382622361, + "loss_iou": 0.4921875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 442447800, + "step": 2576 + }, + { + "epoch": 0.6778457289406195, + "grad_norm": 3.5976030894091644, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 442620136, + "step": 2577 + }, + { + "epoch": 0.6778457289406195, + "loss": 0.11548551917076111, + "loss_ce": 0.0009377849055454135, + "loss_iou": 0.5390625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 442620136, + "step": 2577 + }, + { + "epoch": 0.6781087657000066, + "grad_norm": 14.27185242111941, + "learning_rate": 5e-06, + "loss": 0.1706, + "num_input_tokens_seen": 442792328, + "step": 2578 + }, + { + "epoch": 0.6781087657000066, + "loss": 0.20107831060886383, + "loss_ce": 0.01119793951511383, + "loss_iou": 0.447265625, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 442792328, + "step": 2578 + }, + { + "epoch": 0.6783718024593937, + "grad_norm": 2.6862819825925905, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 442964732, + "step": 2579 + }, + { + "epoch": 0.6783718024593937, + "loss": 0.12557528913021088, + "loss_ce": 0.001063565374352038, + "loss_iou": 0.328125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 442964732, + "step": 2579 + }, + { + "epoch": 0.6786348392187809, + "grad_norm": 19.12719362396153, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 443136988, + "step": 2580 + }, + { + "epoch": 0.6786348392187809, + "loss": 0.1754310131072998, + "loss_ce": 0.001114598591811955, + "loss_iou": 0.578125, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 443136988, + "step": 2580 + }, + { + "epoch": 0.678897875978168, + "grad_norm": 4.296169271228249, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 443309004, + "step": 2581 + }, + { + "epoch": 0.678897875978168, + "loss": 0.056013718247413635, + "loss_ce": 0.0018907939083874226, + "loss_iou": 0.470703125, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 443309004, + "step": 2581 + }, + { + "epoch": 0.6791609127375551, + "grad_norm": 14.58212290872784, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 443481132, + "step": 2582 + }, + { + "epoch": 0.6791609127375551, + "loss": 0.10740009695291519, + "loss_ce": 0.00046649359865114093, + "loss_iou": 0.310546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 443481132, + "step": 2582 + }, + { + "epoch": 0.6794239494969422, + "grad_norm": 5.824835527761782, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 443651636, + "step": 2583 + }, + { + "epoch": 0.6794239494969422, + "loss": 0.13724330067634583, + "loss_ce": 0.0017452588072046638, + "loss_iou": 0.56640625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 443651636, + "step": 2583 + }, + { + "epoch": 0.6796869862563293, + "grad_norm": 6.27799845618246, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 443823548, + "step": 2584 + }, + { + "epoch": 0.6796869862563293, + "loss": 0.1675836145877838, + "loss_ce": 0.002330929273739457, + "loss_iou": 0.390625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 443823548, + "step": 2584 + }, + { + "epoch": 0.6799500230157165, + "grad_norm": 5.283309975586119, + "learning_rate": 5e-06, + "loss": 0.1072, + "num_input_tokens_seen": 443994196, + "step": 2585 + }, + { + "epoch": 0.6799500230157165, + "loss": 0.07297757267951965, + "loss_ce": 7.10797612555325e-05, + "loss_iou": 0.68359375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 443994196, + "step": 2585 + }, + { + "epoch": 0.6802130597751036, + "grad_norm": 4.83126163860268, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 444166260, + "step": 2586 + }, + { + "epoch": 0.6802130597751036, + "loss": 0.15566733479499817, + "loss_ce": 0.0010347592178732157, + "loss_iou": 0.57421875, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 444166260, + "step": 2586 + }, + { + "epoch": 0.6804760965344907, + "grad_norm": 5.935537777211587, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 444338748, + "step": 2587 + }, + { + "epoch": 0.6804760965344907, + "loss": 0.07495879381895065, + "loss_ce": 0.0001907299447339028, + "loss_iou": 0.6015625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 444338748, + "step": 2587 + }, + { + "epoch": 0.6807391332938778, + "grad_norm": 4.981341494504371, + "learning_rate": 5e-06, + "loss": 0.1501, + "num_input_tokens_seen": 444510996, + "step": 2588 + }, + { + "epoch": 0.6807391332938778, + "loss": 0.2452811598777771, + "loss_ce": 0.002849509473890066, + "loss_iou": 0.3984375, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 444510996, + "step": 2588 + }, + { + "epoch": 0.6810021700532649, + "grad_norm": 34.8294057916369, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 444682880, + "step": 2589 + }, + { + "epoch": 0.6810021700532649, + "loss": 0.20022635161876678, + "loss_ce": 0.002899688435718417, + "loss_iou": 0.64453125, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 444682880, + "step": 2589 + }, + { + "epoch": 0.681265206812652, + "grad_norm": 6.783440844262528, + "learning_rate": 5e-06, + "loss": 0.1355, + "num_input_tokens_seen": 444855080, + "step": 2590 + }, + { + "epoch": 0.681265206812652, + "loss": 0.17689135670661926, + "loss_ce": 0.0014458001824095845, + "loss_iou": 0.431640625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 444855080, + "step": 2590 + }, + { + "epoch": 0.6815282435720392, + "grad_norm": 6.398055261239124, + "learning_rate": 5e-06, + "loss": 0.1061, + "num_input_tokens_seen": 445027648, + "step": 2591 + }, + { + "epoch": 0.6815282435720392, + "loss": 0.06686560064554214, + "loss_ce": 0.003968872129917145, + "loss_iou": 0.6484375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 445027648, + "step": 2591 + }, + { + "epoch": 0.6817912803314263, + "grad_norm": 9.754653714308724, + "learning_rate": 5e-06, + "loss": 0.158, + "num_input_tokens_seen": 445198060, + "step": 2592 + }, + { + "epoch": 0.6817912803314263, + "loss": 0.0956597551703453, + "loss_ce": 0.001345182885415852, + "loss_iou": 0.42578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 445198060, + "step": 2592 + }, + { + "epoch": 0.6820543170908134, + "grad_norm": 11.093960051238781, + "learning_rate": 5e-06, + "loss": 0.1506, + "num_input_tokens_seen": 445370376, + "step": 2593 + }, + { + "epoch": 0.6820543170908134, + "loss": 0.14656749367713928, + "loss_ce": 0.001456425990909338, + "loss_iou": 0.482421875, + "loss_num": 0.0289306640625, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 445370376, + "step": 2593 + }, + { + "epoch": 0.6823173538502005, + "grad_norm": 7.226137026161222, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 445542828, + "step": 2594 + }, + { + "epoch": 0.6823173538502005, + "loss": 0.06742848455905914, + "loss_ce": 0.005309954285621643, + "loss_iou": 0.48828125, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 445542828, + "step": 2594 + }, + { + "epoch": 0.6825803906095876, + "grad_norm": 4.617442760822597, + "learning_rate": 5e-06, + "loss": 0.137, + "num_input_tokens_seen": 445715128, + "step": 2595 + }, + { + "epoch": 0.6825803906095876, + "loss": 0.11120542138814926, + "loss_ce": 0.0006707610446028411, + "loss_iou": 0.56640625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 445715128, + "step": 2595 + }, + { + "epoch": 0.6828434273689749, + "grad_norm": 8.070925307261565, + "learning_rate": 5e-06, + "loss": 0.1584, + "num_input_tokens_seen": 445887032, + "step": 2596 + }, + { + "epoch": 0.6828434273689749, + "loss": 0.12233078479766846, + "loss_ce": 0.0008708295645192266, + "loss_iou": 0.486328125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 445887032, + "step": 2596 + }, + { + "epoch": 0.683106464128362, + "grad_norm": 4.079666869126499, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 446059408, + "step": 2597 + }, + { + "epoch": 0.683106464128362, + "loss": 0.1118651032447815, + "loss_ce": 0.00023180160496849567, + "loss_iou": 0.49609375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 446059408, + "step": 2597 + }, + { + "epoch": 0.6833695008877491, + "grad_norm": 6.132037428307681, + "learning_rate": 5e-06, + "loss": 0.1023, + "num_input_tokens_seen": 446229696, + "step": 2598 + }, + { + "epoch": 0.6833695008877491, + "loss": 0.13406622409820557, + "loss_ce": 0.0008264797506853938, + "loss_iou": 0.44140625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 446229696, + "step": 2598 + }, + { + "epoch": 0.6836325376471362, + "grad_norm": 3.96696464215725, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 446401824, + "step": 2599 + }, + { + "epoch": 0.6836325376471362, + "loss": 0.07421835511922836, + "loss_ce": 0.0017085927538573742, + "loss_iou": 0.4453125, + "loss_num": 0.0145263671875, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 446401824, + "step": 2599 + }, + { + "epoch": 0.6838955744065233, + "grad_norm": 5.0831174679987035, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 446573796, + "step": 2600 + }, + { + "epoch": 0.6838955744065233, + "loss": 0.10569039732217789, + "loss_ce": 0.0010151021415367723, + "loss_iou": 0.484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 446573796, + "step": 2600 + }, + { + "epoch": 0.6841586111659105, + "grad_norm": 15.882119320832834, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 446746000, + "step": 2601 + }, + { + "epoch": 0.6841586111659105, + "loss": 0.1552843451499939, + "loss_ce": 0.000499195302836597, + "loss_iou": 0.3125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 446746000, + "step": 2601 + }, + { + "epoch": 0.6844216479252976, + "grad_norm": 3.8491365485511118, + "learning_rate": 5e-06, + "loss": 0.0928, + "num_input_tokens_seen": 446918100, + "step": 2602 + }, + { + "epoch": 0.6844216479252976, + "loss": 0.050599753856658936, + "loss_ce": 0.0002762702642939985, + "loss_iou": 0.54296875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 446918100, + "step": 2602 + }, + { + "epoch": 0.6846846846846847, + "grad_norm": 4.058506484572602, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 447088492, + "step": 2603 + }, + { + "epoch": 0.6846846846846847, + "loss": 0.043845463544130325, + "loss_ce": 0.0001748104114085436, + "loss_iou": 0.57421875, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 447088492, + "step": 2603 + }, + { + "epoch": 0.6849477214440718, + "grad_norm": 4.841823848779041, + "learning_rate": 5e-06, + "loss": 0.079, + "num_input_tokens_seen": 447260652, + "step": 2604 + }, + { + "epoch": 0.6849477214440718, + "loss": 0.08405909687280655, + "loss_ce": 0.004438734147697687, + "loss_iou": 0.419921875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 447260652, + "step": 2604 + }, + { + "epoch": 0.6852107582034589, + "grad_norm": 9.044057313780108, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 447432936, + "step": 2605 + }, + { + "epoch": 0.6852107582034589, + "loss": 0.07848000526428223, + "loss_ce": 0.0012094933772459626, + "loss_iou": 0.63671875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 447432936, + "step": 2605 + }, + { + "epoch": 0.6854737949628461, + "grad_norm": 7.595343434642406, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 447605384, + "step": 2606 + }, + { + "epoch": 0.6854737949628461, + "loss": 0.12424921244382858, + "loss_ce": 0.0026061516255140305, + "loss_iou": 0.50390625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 447605384, + "step": 2606 + }, + { + "epoch": 0.6857368317222332, + "grad_norm": 5.0763063086969415, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 447777576, + "step": 2607 + }, + { + "epoch": 0.6857368317222332, + "loss": 0.11916627734899521, + "loss_ce": 0.002253434620797634, + "loss_iou": 0.54296875, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 447777576, + "step": 2607 + }, + { + "epoch": 0.6859998684816203, + "grad_norm": 11.527878652419272, + "learning_rate": 5e-06, + "loss": 0.0904, + "num_input_tokens_seen": 447949852, + "step": 2608 + }, + { + "epoch": 0.6859998684816203, + "loss": 0.0797332376241684, + "loss_ce": 0.0030425682198256254, + "loss_iou": 0.5859375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 447949852, + "step": 2608 + }, + { + "epoch": 0.6862629052410074, + "grad_norm": 6.796020734872217, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 448122008, + "step": 2609 + }, + { + "epoch": 0.6862629052410074, + "loss": 0.09912531077861786, + "loss_ce": 0.0006145666702650487, + "loss_iou": 0.58203125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 448122008, + "step": 2609 + }, + { + "epoch": 0.6865259420003945, + "grad_norm": 53.2300135535103, + "learning_rate": 5e-06, + "loss": 0.1731, + "num_input_tokens_seen": 448294084, + "step": 2610 + }, + { + "epoch": 0.6865259420003945, + "loss": 0.14799347519874573, + "loss_ce": 0.0005325321108102798, + "loss_iou": 0.337890625, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 448294084, + "step": 2610 + }, + { + "epoch": 0.6867889787597817, + "grad_norm": 8.384011107333327, + "learning_rate": 5e-06, + "loss": 0.1662, + "num_input_tokens_seen": 448466064, + "step": 2611 + }, + { + "epoch": 0.6867889787597817, + "loss": 0.16361187398433685, + "loss_ce": 0.0061564212664961815, + "loss_iou": 0.40625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 448466064, + "step": 2611 + }, + { + "epoch": 0.6870520155191688, + "grad_norm": 58.16654404486195, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 448638396, + "step": 2612 + }, + { + "epoch": 0.6870520155191688, + "loss": 0.11319980025291443, + "loss_ce": 0.008921236731112003, + "loss_iou": 0.4609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 448638396, + "step": 2612 + }, + { + "epoch": 0.6873150522785559, + "grad_norm": 4.791126855568047, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 448808820, + "step": 2613 + }, + { + "epoch": 0.6873150522785559, + "loss": 0.08992376923561096, + "loss_ce": 0.0010565832490101457, + "loss_iou": 0.6640625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 448808820, + "step": 2613 + }, + { + "epoch": 0.687578089037943, + "grad_norm": 4.623158042374579, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 448980968, + "step": 2614 + }, + { + "epoch": 0.687578089037943, + "loss": 0.10407891124486923, + "loss_ce": 0.0005632878746837378, + "loss_iou": 0.39453125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 448980968, + "step": 2614 + }, + { + "epoch": 0.6878411257973301, + "grad_norm": 16.128815011570154, + "learning_rate": 5e-06, + "loss": 0.0861, + "num_input_tokens_seen": 449153104, + "step": 2615 + }, + { + "epoch": 0.6878411257973301, + "loss": 0.05162462592124939, + "loss_ce": 0.0013011416886001825, + "loss_iou": 0.498046875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 449153104, + "step": 2615 + }, + { + "epoch": 0.6881041625567172, + "grad_norm": 12.10527552944659, + "learning_rate": 5e-06, + "loss": 0.0853, + "num_input_tokens_seen": 449325208, + "step": 2616 + }, + { + "epoch": 0.6881041625567172, + "loss": 0.056678079068660736, + "loss_ce": 0.0005867721047252417, + "loss_iou": 0.515625, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 449325208, + "step": 2616 + }, + { + "epoch": 0.6883671993161045, + "grad_norm": 4.0544613329310675, + "learning_rate": 5e-06, + "loss": 0.0917, + "num_input_tokens_seen": 449497440, + "step": 2617 + }, + { + "epoch": 0.6883671993161045, + "loss": 0.12780970335006714, + "loss_ce": 0.0006887409836053848, + "loss_iou": 0.5234375, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 449497440, + "step": 2617 + }, + { + "epoch": 0.6886302360754916, + "grad_norm": 6.140781979511808, + "learning_rate": 5e-06, + "loss": 0.1287, + "num_input_tokens_seen": 449669860, + "step": 2618 + }, + { + "epoch": 0.6886302360754916, + "loss": 0.12178224325180054, + "loss_ce": 0.0038012792356312275, + "loss_iou": 0.43359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 449669860, + "step": 2618 + }, + { + "epoch": 0.6888932728348787, + "grad_norm": 4.582153125692399, + "learning_rate": 5e-06, + "loss": 0.1553, + "num_input_tokens_seen": 449842236, + "step": 2619 + }, + { + "epoch": 0.6888932728348787, + "loss": 0.11555735766887665, + "loss_ce": 0.003313705325126648, + "loss_iou": 0.5703125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 449842236, + "step": 2619 + }, + { + "epoch": 0.6891563095942658, + "grad_norm": 10.075980623616992, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 450014444, + "step": 2620 + }, + { + "epoch": 0.6891563095942658, + "loss": 0.20255392789840698, + "loss_ce": 0.004891556687653065, + "loss_iou": 0.4765625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 450014444, + "step": 2620 + }, + { + "epoch": 0.6894193463536529, + "grad_norm": 16.09024773291766, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 450186632, + "step": 2621 + }, + { + "epoch": 0.6894193463536529, + "loss": 0.15711162984371185, + "loss_ce": 0.001548278727568686, + "loss_iou": 0.5703125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 450186632, + "step": 2621 + }, + { + "epoch": 0.6896823831130401, + "grad_norm": 4.077753866376941, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 450358652, + "step": 2622 + }, + { + "epoch": 0.6896823831130401, + "loss": 0.17466840147972107, + "loss_ce": 0.0030528109055012465, + "loss_iou": 0.32421875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 450358652, + "step": 2622 + }, + { + "epoch": 0.6899454198724272, + "grad_norm": 5.782402643209976, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 450530992, + "step": 2623 + }, + { + "epoch": 0.6899454198724272, + "loss": 0.09183860570192337, + "loss_ce": 0.000987776555120945, + "loss_iou": 0.51171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 450530992, + "step": 2623 + }, + { + "epoch": 0.6902084566318143, + "grad_norm": 4.962596372966127, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 450702972, + "step": 2624 + }, + { + "epoch": 0.6902084566318143, + "loss": 0.16587726771831512, + "loss_ce": 0.004210404586046934, + "loss_iou": 0.34375, + "loss_num": 0.0322265625, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 450702972, + "step": 2624 + }, + { + "epoch": 0.6904714933912014, + "grad_norm": 4.41836936316546, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 450871644, + "step": 2625 + }, + { + "epoch": 0.6904714933912014, + "loss": 0.08390143513679504, + "loss_ce": 0.00022223126143217087, + "loss_iou": 0.35546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 450871644, + "step": 2625 + }, + { + "epoch": 0.6907345301505885, + "grad_norm": 6.24899387116621, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 451043764, + "step": 2626 + }, + { + "epoch": 0.6907345301505885, + "loss": 0.07276784628629684, + "loss_ce": 0.0041185528971254826, + "loss_iou": 0.515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 451043764, + "step": 2626 + }, + { + "epoch": 0.6909975669099757, + "grad_norm": 3.855416348391677, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 451215860, + "step": 2627 + }, + { + "epoch": 0.6909975669099757, + "loss": 0.0781233012676239, + "loss_ce": 0.00033398933010175824, + "loss_iou": 0.4921875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 451215860, + "step": 2627 + }, + { + "epoch": 0.6912606036693628, + "grad_norm": 9.945431727329952, + "learning_rate": 5e-06, + "loss": 0.0856, + "num_input_tokens_seen": 451387900, + "step": 2628 + }, + { + "epoch": 0.6912606036693628, + "loss": 0.11348669975996017, + "loss_ce": 0.0029215135145932436, + "loss_iou": 0.4140625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 451387900, + "step": 2628 + }, + { + "epoch": 0.6915236404287499, + "grad_norm": 4.281056455844802, + "learning_rate": 5e-06, + "loss": 0.0937, + "num_input_tokens_seen": 451558132, + "step": 2629 + }, + { + "epoch": 0.6915236404287499, + "loss": 0.11048734933137894, + "loss_ce": 0.00626982469111681, + "loss_iou": 0.578125, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 451558132, + "step": 2629 + }, + { + "epoch": 0.691786677188137, + "grad_norm": 4.21783711884085, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 451730300, + "step": 2630 + }, + { + "epoch": 0.691786677188137, + "loss": 0.06679116189479828, + "loss_ce": 0.0006290507735684514, + "loss_iou": 0.44921875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 451730300, + "step": 2630 + }, + { + "epoch": 0.6920497139475241, + "grad_norm": 3.6368581797967257, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 451902464, + "step": 2631 + }, + { + "epoch": 0.6920497139475241, + "loss": 0.07377897202968597, + "loss_ce": 0.002199993235990405, + "loss_iou": 0.373046875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 451902464, + "step": 2631 + }, + { + "epoch": 0.6923127507069113, + "grad_norm": 4.812739476349578, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 452074432, + "step": 2632 + }, + { + "epoch": 0.6923127507069113, + "loss": 0.08376286178827286, + "loss_ce": 0.0004193550848867744, + "loss_iou": 0.482421875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 452074432, + "step": 2632 + }, + { + "epoch": 0.6925757874662984, + "grad_norm": 17.745450709557872, + "learning_rate": 5e-06, + "loss": 0.1677, + "num_input_tokens_seen": 452246508, + "step": 2633 + }, + { + "epoch": 0.6925757874662984, + "loss": 0.12932631373405457, + "loss_ce": 0.00045057572424411774, + "loss_iou": 0.48046875, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 452246508, + "step": 2633 + }, + { + "epoch": 0.6928388242256855, + "grad_norm": 3.8241822612681404, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 452418684, + "step": 2634 + }, + { + "epoch": 0.6928388242256855, + "loss": 0.07180548459291458, + "loss_ce": 0.000638492638245225, + "loss_iou": 0.578125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 452418684, + "step": 2634 + }, + { + "epoch": 0.6931018609850726, + "grad_norm": 5.017758815401084, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 452591112, + "step": 2635 + }, + { + "epoch": 0.6931018609850726, + "loss": 0.14692719280719757, + "loss_ce": 0.001221017329953611, + "loss_iou": 0.52734375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 452591112, + "step": 2635 + }, + { + "epoch": 0.6933648977444598, + "grad_norm": 4.688279396436893, + "learning_rate": 5e-06, + "loss": 0.1001, + "num_input_tokens_seen": 452763100, + "step": 2636 + }, + { + "epoch": 0.6933648977444598, + "loss": 0.11540480703115463, + "loss_ce": 0.0031611607410013676, + "loss_iou": 0.5390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 452763100, + "step": 2636 + }, + { + "epoch": 0.693627934503847, + "grad_norm": 5.800484869059427, + "learning_rate": 5e-06, + "loss": 0.1513, + "num_input_tokens_seen": 452932760, + "step": 2637 + }, + { + "epoch": 0.693627934503847, + "loss": 0.20021981000900269, + "loss_ce": 0.002526947297155857, + "loss_iou": 0.62890625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 452932760, + "step": 2637 + }, + { + "epoch": 0.6938909712632341, + "grad_norm": 6.943406352493944, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 453104824, + "step": 2638 + }, + { + "epoch": 0.6938909712632341, + "loss": 0.11978557705879211, + "loss_ce": 0.003910328261554241, + "loss_iou": 0.51953125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 453104824, + "step": 2638 + }, + { + "epoch": 0.6941540080226212, + "grad_norm": 8.392991417214587, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 453276756, + "step": 2639 + }, + { + "epoch": 0.6941540080226212, + "loss": 0.14925961196422577, + "loss_ce": 0.00796323362737894, + "loss_iou": 0.6484375, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 453276756, + "step": 2639 + }, + { + "epoch": 0.6944170447820083, + "grad_norm": 7.152620797713759, + "learning_rate": 5e-06, + "loss": 0.0825, + "num_input_tokens_seen": 453447368, + "step": 2640 + }, + { + "epoch": 0.6944170447820083, + "loss": 0.12115476280450821, + "loss_ce": 0.002624482847750187, + "loss_iou": 0.51171875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 453447368, + "step": 2640 + }, + { + "epoch": 0.6946800815413954, + "grad_norm": 9.172566148062016, + "learning_rate": 5e-06, + "loss": 0.1376, + "num_input_tokens_seen": 453619160, + "step": 2641 + }, + { + "epoch": 0.6946800815413954, + "loss": 0.09918803721666336, + "loss_ce": 0.0022947255056351423, + "loss_iou": 0.53125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 453619160, + "step": 2641 + }, + { + "epoch": 0.6949431183007825, + "grad_norm": 4.761554471447507, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 453791024, + "step": 2642 + }, + { + "epoch": 0.6949431183007825, + "loss": 0.12317492812871933, + "loss_ce": 0.004400510806590319, + "loss_iou": 0.5, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 453791024, + "step": 2642 + }, + { + "epoch": 0.6952061550601697, + "grad_norm": 6.764608063652922, + "learning_rate": 5e-06, + "loss": 0.0985, + "num_input_tokens_seen": 453963268, + "step": 2643 + }, + { + "epoch": 0.6952061550601697, + "loss": 0.17664819955825806, + "loss_ce": 0.0005617668502964079, + "loss_iou": 0.5625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 453963268, + "step": 2643 + }, + { + "epoch": 0.6954691918195568, + "grad_norm": 8.472259366872775, + "learning_rate": 5e-06, + "loss": 0.1241, + "num_input_tokens_seen": 454133580, + "step": 2644 + }, + { + "epoch": 0.6954691918195568, + "loss": 0.05588904023170471, + "loss_ce": 0.0003470498777460307, + "loss_iou": 0.52734375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 454133580, + "step": 2644 + }, + { + "epoch": 0.6957322285789439, + "grad_norm": 9.45776397859236, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 454305316, + "step": 2645 + }, + { + "epoch": 0.6957322285789439, + "loss": 0.11683906614780426, + "loss_ce": 0.002215046202763915, + "loss_iou": NaN, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 454305316, + "step": 2645 + }, + { + "epoch": 0.695995265338331, + "grad_norm": 12.21950005723, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 454477460, + "step": 2646 + }, + { + "epoch": 0.695995265338331, + "loss": 0.19950228929519653, + "loss_ce": 0.00556308263912797, + "loss_iou": 0.455078125, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 454477460, + "step": 2646 + }, + { + "epoch": 0.6962583020977181, + "grad_norm": 4.539781489505394, + "learning_rate": 5e-06, + "loss": 0.1468, + "num_input_tokens_seen": 454649576, + "step": 2647 + }, + { + "epoch": 0.6962583020977181, + "loss": 0.053970806300640106, + "loss_ce": 0.0009465104667469859, + "loss_iou": 0.3828125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 454649576, + "step": 2647 + }, + { + "epoch": 0.6965213388571053, + "grad_norm": 14.32877869742435, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 454821436, + "step": 2648 + }, + { + "epoch": 0.6965213388571053, + "loss": 0.10093516111373901, + "loss_ce": 0.00645274156704545, + "loss_iou": 0.380859375, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 454821436, + "step": 2648 + }, + { + "epoch": 0.6967843756164924, + "grad_norm": 7.804241962530211, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 454993508, + "step": 2649 + }, + { + "epoch": 0.6967843756164924, + "loss": 0.1862741857767105, + "loss_ce": 0.0016580985393375158, + "loss_iou": 0.53515625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 454993508, + "step": 2649 + }, + { + "epoch": 0.6970474123758795, + "grad_norm": 16.275796095386262, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 455165748, + "step": 2650 + }, + { + "epoch": 0.6970474123758795, + "loss": 0.07620816677808762, + "loss_ce": 0.0016232050256803632, + "loss_iou": 0.5625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 455165748, + "step": 2650 + }, + { + "epoch": 0.6973104491352666, + "grad_norm": 4.357121015686003, + "learning_rate": 5e-06, + "loss": 0.1124, + "num_input_tokens_seen": 455338084, + "step": 2651 + }, + { + "epoch": 0.6973104491352666, + "loss": 0.13134872913360596, + "loss_ce": 0.0034037926234304905, + "loss_iou": 0.59765625, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 455338084, + "step": 2651 + }, + { + "epoch": 0.6975734858946537, + "grad_norm": 5.3899957417482725, + "learning_rate": 5e-06, + "loss": 0.1252, + "num_input_tokens_seen": 455509908, + "step": 2652 + }, + { + "epoch": 0.6975734858946537, + "loss": 0.14220395684242249, + "loss_ce": 0.0009075828129425645, + "loss_iou": 0.458984375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 455509908, + "step": 2652 + }, + { + "epoch": 0.697836522654041, + "grad_norm": 6.823610210106841, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 455681908, + "step": 2653 + }, + { + "epoch": 0.697836522654041, + "loss": 0.05261433497071266, + "loss_ce": 0.0018330833408981562, + "loss_iou": 0.46484375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 455681908, + "step": 2653 + }, + { + "epoch": 0.698099559413428, + "grad_norm": 6.597254450850545, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 455853952, + "step": 2654 + }, + { + "epoch": 0.698099559413428, + "loss": 0.147089421749115, + "loss_ce": 0.00023882777895778418, + "loss_iou": 0.43359375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 455853952, + "step": 2654 + }, + { + "epoch": 0.6983625961728152, + "grad_norm": 6.378423815524031, + "learning_rate": 5e-06, + "loss": 0.1075, + "num_input_tokens_seen": 456026208, + "step": 2655 + }, + { + "epoch": 0.6983625961728152, + "loss": 0.18122422695159912, + "loss_ce": 0.0033067562617361546, + "loss_iou": 0.57421875, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 456026208, + "step": 2655 + }, + { + "epoch": 0.6986256329322023, + "grad_norm": 25.554315055519154, + "learning_rate": 5e-06, + "loss": 0.1181, + "num_input_tokens_seen": 456198288, + "step": 2656 + }, + { + "epoch": 0.6986256329322023, + "loss": 0.10486012697219849, + "loss_ce": 0.0011003611143678427, + "loss_iou": 0.52734375, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 456198288, + "step": 2656 + }, + { + "epoch": 0.6988886696915894, + "grad_norm": 4.576359717222489, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 456365496, + "step": 2657 + }, + { + "epoch": 0.6988886696915894, + "loss": 0.13455136120319366, + "loss_ce": 0.0007317845011129975, + "loss_iou": 0.53125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 456365496, + "step": 2657 + }, + { + "epoch": 0.6991517064509766, + "grad_norm": 5.013450140740463, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 456537676, + "step": 2658 + }, + { + "epoch": 0.6991517064509766, + "loss": 0.08290005475282669, + "loss_ce": 0.003767975838854909, + "loss_iou": 0.5, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 456537676, + "step": 2658 + }, + { + "epoch": 0.6994147432103637, + "grad_norm": 4.084720539079834, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 456710364, + "step": 2659 + }, + { + "epoch": 0.6994147432103637, + "loss": 0.06024003401398659, + "loss_ce": 0.0006849807105027139, + "loss_iou": 0.5703125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 456710364, + "step": 2659 + }, + { + "epoch": 0.6996777799697508, + "grad_norm": 3.7990429726373636, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 456880760, + "step": 2660 + }, + { + "epoch": 0.6996777799697508, + "loss": 0.09679127484560013, + "loss_ce": 0.0005083205760456622, + "loss_iou": 0.482421875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 456880760, + "step": 2660 + }, + { + "epoch": 0.6999408167291379, + "grad_norm": 5.055246469854178, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 457051432, + "step": 2661 + }, + { + "epoch": 0.6999408167291379, + "loss": 0.21587547659873962, + "loss_ce": 0.0004213774227537215, + "loss_iou": 0.3671875, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 457051432, + "step": 2661 + }, + { + "epoch": 0.700203853488525, + "grad_norm": 3.8688477802914774, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 457223592, + "step": 2662 + }, + { + "epoch": 0.700203853488525, + "loss": 0.14515820145606995, + "loss_ce": 0.0040449099615216255, + "loss_iou": 0.4296875, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 457223592, + "step": 2662 + }, + { + "epoch": 0.7004668902479122, + "grad_norm": 10.967803606252325, + "learning_rate": 5e-06, + "loss": 0.0963, + "num_input_tokens_seen": 457396048, + "step": 2663 + }, + { + "epoch": 0.7004668902479122, + "loss": 0.13383157551288605, + "loss_ce": 0.0020261560566723347, + "loss_iou": 0.55078125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 457396048, + "step": 2663 + }, + { + "epoch": 0.7007299270072993, + "grad_norm": 3.339673678629444, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 457566272, + "step": 2664 + }, + { + "epoch": 0.7007299270072993, + "loss": 0.20303812623023987, + "loss_ce": 0.005711473990231752, + "loss_iou": 0.515625, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 457566272, + "step": 2664 + }, + { + "epoch": 0.7009929637666864, + "grad_norm": 10.341407204229276, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 457738384, + "step": 2665 + }, + { + "epoch": 0.7009929637666864, + "loss": 0.14979971945285797, + "loss_ce": 0.0018810234032571316, + "loss_iou": 0.46875, + "loss_num": 0.0296630859375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 457738384, + "step": 2665 + }, + { + "epoch": 0.7012560005260735, + "grad_norm": 3.460962528007649, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 457910472, + "step": 2666 + }, + { + "epoch": 0.7012560005260735, + "loss": 0.12169472873210907, + "loss_ce": 0.004110502544790506, + "loss_iou": 0.5859375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 457910472, + "step": 2666 + }, + { + "epoch": 0.7015190372854606, + "grad_norm": 5.718591075026739, + "learning_rate": 5e-06, + "loss": 0.1401, + "num_input_tokens_seen": 458082324, + "step": 2667 + }, + { + "epoch": 0.7015190372854606, + "loss": 0.1776396483182907, + "loss_ce": 0.0005613988032564521, + "loss_iou": 0.359375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 458082324, + "step": 2667 + }, + { + "epoch": 0.7017820740448477, + "grad_norm": 5.655030135877534, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 458254304, + "step": 2668 + }, + { + "epoch": 0.7017820740448477, + "loss": 0.12232168763875961, + "loss_ce": 0.0004344757762737572, + "loss_iou": 0.375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 458254304, + "step": 2668 + }, + { + "epoch": 0.7020451108042349, + "grad_norm": 5.899988129903735, + "learning_rate": 5e-06, + "loss": 0.0895, + "num_input_tokens_seen": 458426532, + "step": 2669 + }, + { + "epoch": 0.7020451108042349, + "loss": 0.09821672737598419, + "loss_ce": 0.0016896221786737442, + "loss_iou": 0.54296875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 458426532, + "step": 2669 + }, + { + "epoch": 0.702308147563622, + "grad_norm": 5.5784599020012715, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 458596896, + "step": 2670 + }, + { + "epoch": 0.702308147563622, + "loss": 0.16502083837985992, + "loss_ce": 0.0003174682497046888, + "loss_iou": 0.3984375, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 458596896, + "step": 2670 + }, + { + "epoch": 0.7025711843230091, + "grad_norm": 14.038116606042868, + "learning_rate": 5e-06, + "loss": 0.1154, + "num_input_tokens_seen": 458769240, + "step": 2671 + }, + { + "epoch": 0.7025711843230091, + "loss": 0.15062229335308075, + "loss_ce": 0.0042905076406896114, + "loss_iou": 0.53125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 458769240, + "step": 2671 + }, + { + "epoch": 0.7028342210823962, + "grad_norm": 4.120047179172289, + "learning_rate": 5e-06, + "loss": 0.0767, + "num_input_tokens_seen": 458941376, + "step": 2672 + }, + { + "epoch": 0.7028342210823962, + "loss": 0.08248385787010193, + "loss_ce": 0.0018869286868721247, + "loss_iou": 0.46875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 458941376, + "step": 2672 + }, + { + "epoch": 0.7030972578417833, + "grad_norm": 4.682779609797298, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 459113732, + "step": 2673 + }, + { + "epoch": 0.7030972578417833, + "loss": 0.08461406081914902, + "loss_ce": 0.0013010749826207757, + "loss_iou": 0.515625, + "loss_num": 0.0166015625, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 459113732, + "step": 2673 + }, + { + "epoch": 0.7033602946011706, + "grad_norm": 6.053537160791169, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 459283668, + "step": 2674 + }, + { + "epoch": 0.7033602946011706, + "loss": 0.21815051138401031, + "loss_ce": 0.0011094921501353383, + "loss_iou": 0.419921875, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 459283668, + "step": 2674 + }, + { + "epoch": 0.7036233313605577, + "grad_norm": 3.9631091480761715, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 459455956, + "step": 2675 + }, + { + "epoch": 0.7036233313605577, + "loss": 0.13024334609508514, + "loss_ce": 0.00025372387608513236, + "loss_iou": 0.5625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 459455956, + "step": 2675 + }, + { + "epoch": 0.7038863681199448, + "grad_norm": 5.129114238432721, + "learning_rate": 5e-06, + "loss": 0.137, + "num_input_tokens_seen": 459627968, + "step": 2676 + }, + { + "epoch": 0.7038863681199448, + "loss": 0.08658237755298615, + "loss_ce": 0.002750593703240156, + "loss_iou": 0.46484375, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 459627968, + "step": 2676 + }, + { + "epoch": 0.7041494048793319, + "grad_norm": 20.187762197991425, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 459800260, + "step": 2677 + }, + { + "epoch": 0.7041494048793319, + "loss": 0.08818955719470978, + "loss_ce": 0.0012297153007239103, + "loss_iou": 0.53125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 459800260, + "step": 2677 + }, + { + "epoch": 0.704412441638719, + "grad_norm": 7.902489579853604, + "learning_rate": 5e-06, + "loss": 0.1564, + "num_input_tokens_seen": 459971088, + "step": 2678 + }, + { + "epoch": 0.704412441638719, + "loss": 0.12185804545879364, + "loss_ce": 0.00024549951194785535, + "loss_iou": 0.3984375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 459971088, + "step": 2678 + }, + { + "epoch": 0.7046754783981062, + "grad_norm": 27.40955645070263, + "learning_rate": 5e-06, + "loss": 0.0666, + "num_input_tokens_seen": 460143240, + "step": 2679 + }, + { + "epoch": 0.7046754783981062, + "loss": 0.06965695321559906, + "loss_ce": 0.0006261939415708184, + "loss_iou": 0.63671875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 460143240, + "step": 2679 + }, + { + "epoch": 0.7049385151574933, + "grad_norm": 8.476577616174554, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 460315544, + "step": 2680 + }, + { + "epoch": 0.7049385151574933, + "loss": 0.14567114412784576, + "loss_ce": 0.0017502475529909134, + "loss_iou": 0.515625, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 460315544, + "step": 2680 + }, + { + "epoch": 0.7052015519168804, + "grad_norm": 11.423647115492507, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 460487612, + "step": 2681 + }, + { + "epoch": 0.7052015519168804, + "loss": 0.09613794833421707, + "loss_ce": 0.002235355554148555, + "loss_iou": 0.44921875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 460487612, + "step": 2681 + }, + { + "epoch": 0.7054645886762675, + "grad_norm": 13.90990204859974, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 460655964, + "step": 2682 + }, + { + "epoch": 0.7054645886762675, + "loss": 0.06838610768318176, + "loss_ce": 0.00024035980459302664, + "loss_iou": 0.49609375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 460655964, + "step": 2682 + }, + { + "epoch": 0.7057276254356546, + "grad_norm": 14.957706126877364, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 460827816, + "step": 2683 + }, + { + "epoch": 0.7057276254356546, + "loss": 0.1538136899471283, + "loss_ce": 0.0006154490984044969, + "loss_iou": 0.4609375, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 460827816, + "step": 2683 + }, + { + "epoch": 0.7059906621950418, + "grad_norm": 6.337066775015052, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 461000128, + "step": 2684 + }, + { + "epoch": 0.7059906621950418, + "loss": 0.18371257185935974, + "loss_ce": 0.0005155415856279433, + "loss_iou": 0.57421875, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 461000128, + "step": 2684 + }, + { + "epoch": 0.7062536989544289, + "grad_norm": 4.77556589512149, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 461172480, + "step": 2685 + }, + { + "epoch": 0.7062536989544289, + "loss": 0.06664656102657318, + "loss_ce": 0.001003247918561101, + "loss_iou": 0.55859375, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 461172480, + "step": 2685 + }, + { + "epoch": 0.706516735713816, + "grad_norm": 4.934194738374163, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 461344704, + "step": 2686 + }, + { + "epoch": 0.706516735713816, + "loss": 0.07681739330291748, + "loss_ce": 0.002476579276844859, + "loss_iou": 0.56640625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 461344704, + "step": 2686 + }, + { + "epoch": 0.7067797724732031, + "grad_norm": 5.14148867298625, + "learning_rate": 5e-06, + "loss": 0.0942, + "num_input_tokens_seen": 461517244, + "step": 2687 + }, + { + "epoch": 0.7067797724732031, + "loss": 0.08841484785079956, + "loss_ce": 9.696922643342987e-05, + "loss_iou": 0.62890625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 461517244, + "step": 2687 + }, + { + "epoch": 0.7070428092325902, + "grad_norm": 4.290637783097341, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 461689392, + "step": 2688 + }, + { + "epoch": 0.7070428092325902, + "loss": 0.12317334860563278, + "loss_ce": 0.0057722218334674835, + "loss_iou": 0.484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 461689392, + "step": 2688 + }, + { + "epoch": 0.7073058459919774, + "grad_norm": 18.724820792441665, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 461861916, + "step": 2689 + }, + { + "epoch": 0.7073058459919774, + "loss": 0.05893798917531967, + "loss_ce": 0.00014587045006919652, + "loss_iou": 0.59375, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 461861916, + "step": 2689 + }, + { + "epoch": 0.7075688827513645, + "grad_norm": 6.318558285374775, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 462033832, + "step": 2690 + }, + { + "epoch": 0.7075688827513645, + "loss": 0.129222571849823, + "loss_ce": 0.00034684882848523557, + "loss_iou": 0.396484375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 462033832, + "step": 2690 + }, + { + "epoch": 0.7078319195107516, + "grad_norm": 5.104970581967641, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 462206008, + "step": 2691 + }, + { + "epoch": 0.7078319195107516, + "loss": 0.06862036883831024, + "loss_ce": 0.00010840100003406405, + "loss_iou": 0.5390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 462206008, + "step": 2691 + }, + { + "epoch": 0.7080949562701387, + "grad_norm": 7.0445063257027885, + "learning_rate": 5e-06, + "loss": 0.1429, + "num_input_tokens_seen": 462377836, + "step": 2692 + }, + { + "epoch": 0.7080949562701387, + "loss": 0.16992726922035217, + "loss_ce": 0.014318134635686874, + "loss_iou": 0.478515625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 462377836, + "step": 2692 + }, + { + "epoch": 0.7083579930295258, + "grad_norm": 3.568328460902499, + "learning_rate": 5e-06, + "loss": 0.0725, + "num_input_tokens_seen": 462550184, + "step": 2693 + }, + { + "epoch": 0.7083579930295258, + "loss": 0.058605365455150604, + "loss_ce": 0.0025903512723743916, + "loss_iou": 0.48046875, + "loss_num": 0.01123046875, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 462550184, + "step": 2693 + }, + { + "epoch": 0.708621029788913, + "grad_norm": 16.451151717262515, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 462722228, + "step": 2694 + }, + { + "epoch": 0.708621029788913, + "loss": 0.15793108940124512, + "loss_ce": 0.0002467722224537283, + "loss_iou": 0.380859375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 462722228, + "step": 2694 + }, + { + "epoch": 0.7088840665483002, + "grad_norm": 18.144741325116385, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 462892644, + "step": 2695 + }, + { + "epoch": 0.7088840665483002, + "loss": 0.18394407629966736, + "loss_ce": 0.005751936696469784, + "loss_iou": 0.353515625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 462892644, + "step": 2695 + }, + { + "epoch": 0.7091471033076873, + "grad_norm": 4.177749037836763, + "learning_rate": 5e-06, + "loss": 0.0915, + "num_input_tokens_seen": 463059460, + "step": 2696 + }, + { + "epoch": 0.7091471033076873, + "loss": 0.08263557404279709, + "loss_ce": 0.0037171156145632267, + "loss_iou": 0.330078125, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 463059460, + "step": 2696 + }, + { + "epoch": 0.7094101400670744, + "grad_norm": 24.286466900686108, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 463231680, + "step": 2697 + }, + { + "epoch": 0.7094101400670744, + "loss": 0.1826072484254837, + "loss_ce": 0.005239082965999842, + "loss_iou": 0.40625, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 463231680, + "step": 2697 + }, + { + "epoch": 0.7096731768264615, + "grad_norm": 3.90367546541105, + "learning_rate": 5e-06, + "loss": 0.1316, + "num_input_tokens_seen": 463403632, + "step": 2698 + }, + { + "epoch": 0.7096731768264615, + "loss": 0.19678181409835815, + "loss_ce": 0.0011641355231404305, + "loss_iou": NaN, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 463403632, + "step": 2698 + }, + { + "epoch": 0.7099362135858486, + "grad_norm": 4.6638342162135, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 463575764, + "step": 2699 + }, + { + "epoch": 0.7099362135858486, + "loss": 0.05382794886827469, + "loss_ce": 0.0004832194827031344, + "loss_iou": 0.6328125, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 463575764, + "step": 2699 + }, + { + "epoch": 0.7101992503452358, + "grad_norm": 10.964970610276819, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 463747960, + "step": 2700 + }, + { + "epoch": 0.7101992503452358, + "loss": 0.10617360472679138, + "loss_ce": 0.002444351091980934, + "loss_iou": 0.494140625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 463747960, + "step": 2700 + }, + { + "epoch": 0.7104622871046229, + "grad_norm": 4.313980855592432, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 463920292, + "step": 2701 + }, + { + "epoch": 0.7104622871046229, + "loss": 0.06053323671221733, + "loss_ce": 0.0002305020607309416, + "loss_iou": 0.66015625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 463920292, + "step": 2701 + }, + { + "epoch": 0.71072532386401, + "grad_norm": 13.238355947380045, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 464092396, + "step": 2702 + }, + { + "epoch": 0.71072532386401, + "loss": 0.09292985498905182, + "loss_ce": 0.002384199295192957, + "loss_iou": 0.39453125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 464092396, + "step": 2702 + }, + { + "epoch": 0.7109883606233971, + "grad_norm": 3.301662541659157, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 464264724, + "step": 2703 + }, + { + "epoch": 0.7109883606233971, + "loss": 0.05558721721172333, + "loss_ce": 7.5738578743767e-05, + "loss_iou": 0.55078125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 464264724, + "step": 2703 + }, + { + "epoch": 0.7112513973827842, + "grad_norm": 3.9814210468792854, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 464436668, + "step": 2704 + }, + { + "epoch": 0.7112513973827842, + "loss": 0.0715593621134758, + "loss_ce": 0.004069737158715725, + "loss_iou": 0.494140625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 464436668, + "step": 2704 + }, + { + "epoch": 0.7115144341421714, + "grad_norm": 24.903703628485385, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 464607064, + "step": 2705 + }, + { + "epoch": 0.7115144341421714, + "loss": 0.13525709509849548, + "loss_ce": 0.0011323521612212062, + "loss_iou": 0.5703125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 464607064, + "step": 2705 + }, + { + "epoch": 0.7117774709015585, + "grad_norm": 8.733454229975411, + "learning_rate": 5e-06, + "loss": 0.099, + "num_input_tokens_seen": 464779372, + "step": 2706 + }, + { + "epoch": 0.7117774709015585, + "loss": 0.1117292046546936, + "loss_ce": 0.002689904533326626, + "loss_iou": 0.44140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 464779372, + "step": 2706 + }, + { + "epoch": 0.7120405076609456, + "grad_norm": 4.906686826576695, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 464951196, + "step": 2707 + }, + { + "epoch": 0.7120405076609456, + "loss": 0.09186941385269165, + "loss_ce": 0.00013357413990888745, + "loss_iou": 0.6171875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 464951196, + "step": 2707 + }, + { + "epoch": 0.7123035444203327, + "grad_norm": 3.5122712023777214, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 465119688, + "step": 2708 + }, + { + "epoch": 0.7123035444203327, + "loss": 0.17270034551620483, + "loss_ce": 0.004762125201523304, + "loss_iou": 0.3671875, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 465119688, + "step": 2708 + }, + { + "epoch": 0.7125665811797198, + "grad_norm": 4.667301843217583, + "learning_rate": 5e-06, + "loss": 0.1511, + "num_input_tokens_seen": 465291884, + "step": 2709 + }, + { + "epoch": 0.7125665811797198, + "loss": 0.24567091464996338, + "loss_ce": 0.0022016754373908043, + "loss_iou": 0.5703125, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 465291884, + "step": 2709 + }, + { + "epoch": 0.712829617939107, + "grad_norm": 4.677081009771049, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 465463732, + "step": 2710 + }, + { + "epoch": 0.712829617939107, + "loss": 0.10853127390146255, + "loss_ce": 0.0006211129948496819, + "loss_iou": 0.546875, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 465463732, + "step": 2710 + }, + { + "epoch": 0.7130926546984941, + "grad_norm": 9.121056040645339, + "learning_rate": 5e-06, + "loss": 0.0814, + "num_input_tokens_seen": 465635964, + "step": 2711 + }, + { + "epoch": 0.7130926546984941, + "loss": 0.05089259892702103, + "loss_ce": 0.0005080772680230439, + "loss_iou": 0.4453125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 465635964, + "step": 2711 + }, + { + "epoch": 0.7133556914578812, + "grad_norm": 5.554084070604222, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 465808444, + "step": 2712 + }, + { + "epoch": 0.7133556914578812, + "loss": 0.10150805115699768, + "loss_ce": 0.0013188383309170604, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 465808444, + "step": 2712 + }, + { + "epoch": 0.7136187282172683, + "grad_norm": 7.721557509382751, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 465980716, + "step": 2713 + }, + { + "epoch": 0.7136187282172683, + "loss": 0.1155381053686142, + "loss_ce": 0.0032639428973197937, + "loss_iou": 0.52734375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 465980716, + "step": 2713 + }, + { + "epoch": 0.7138817649766555, + "grad_norm": 4.213278179953207, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 466153208, + "step": 2714 + }, + { + "epoch": 0.7138817649766555, + "loss": 0.1079680472612381, + "loss_ce": 0.002407738706097007, + "loss_iou": 0.390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 466153208, + "step": 2714 + }, + { + "epoch": 0.7141448017360427, + "grad_norm": 4.8611508745546725, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 466325716, + "step": 2715 + }, + { + "epoch": 0.7141448017360427, + "loss": 0.07399383187294006, + "loss_ce": 0.0005075072403997183, + "loss_iou": 0.4765625, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 466325716, + "step": 2715 + }, + { + "epoch": 0.7144078384954298, + "grad_norm": 34.00071276149313, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 466497816, + "step": 2716 + }, + { + "epoch": 0.7144078384954298, + "loss": 0.11154329776763916, + "loss_ce": 0.00032198382541537285, + "loss_iou": 0.435546875, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 466497816, + "step": 2716 + }, + { + "epoch": 0.7146708752548169, + "grad_norm": 4.289011543265615, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 466666960, + "step": 2717 + }, + { + "epoch": 0.7146708752548169, + "loss": 0.10135161876678467, + "loss_ce": 0.000498652458190918, + "loss_iou": 0.4921875, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 466666960, + "step": 2717 + }, + { + "epoch": 0.714933912014204, + "grad_norm": 5.080575214282277, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 466839356, + "step": 2718 + }, + { + "epoch": 0.714933912014204, + "loss": 0.04582955688238144, + "loss_ce": 9.896683332044631e-05, + "loss_iou": 0.51953125, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 466839356, + "step": 2718 + }, + { + "epoch": 0.7151969487735911, + "grad_norm": 5.237294473459814, + "learning_rate": 5e-06, + "loss": 0.1367, + "num_input_tokens_seen": 467011304, + "step": 2719 + }, + { + "epoch": 0.7151969487735911, + "loss": 0.0945780873298645, + "loss_ce": 0.0005534276133403182, + "loss_iou": 0.72265625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 467011304, + "step": 2719 + }, + { + "epoch": 0.7154599855329782, + "grad_norm": 3.135766341307675, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 467180536, + "step": 2720 + }, + { + "epoch": 0.7154599855329782, + "loss": 0.042348556220531464, + "loss_ce": 0.0003716244827955961, + "loss_iou": 0.3984375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 467180536, + "step": 2720 + }, + { + "epoch": 0.7157230222923654, + "grad_norm": 3.5558668353050815, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 467352776, + "step": 2721 + }, + { + "epoch": 0.7157230222923654, + "loss": 0.175160214304924, + "loss_ce": 0.0027969309594482183, + "loss_iou": 0.40234375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 467352776, + "step": 2721 + }, + { + "epoch": 0.7159860590517525, + "grad_norm": 27.92814548048443, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 467524800, + "step": 2722 + }, + { + "epoch": 0.7159860590517525, + "loss": 0.11873021721839905, + "loss_ce": 0.004441884811967611, + "loss_iou": 0.5078125, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 467524800, + "step": 2722 + }, + { + "epoch": 0.7162490958111396, + "grad_norm": 4.250603227803627, + "learning_rate": 5e-06, + "loss": 0.0898, + "num_input_tokens_seen": 467693664, + "step": 2723 + }, + { + "epoch": 0.7162490958111396, + "loss": 0.07381434738636017, + "loss_ce": 0.0007857821765355766, + "loss_iou": 0.66796875, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 467693664, + "step": 2723 + }, + { + "epoch": 0.7165121325705267, + "grad_norm": 29.613017586366666, + "learning_rate": 5e-06, + "loss": 0.1783, + "num_input_tokens_seen": 467865648, + "step": 2724 + }, + { + "epoch": 0.7165121325705267, + "loss": 0.06583578884601593, + "loss_ce": 0.0012300718808546662, + "loss_iou": 0.5078125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 467865648, + "step": 2724 + }, + { + "epoch": 0.7167751693299138, + "grad_norm": 9.322275949588843, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 468035300, + "step": 2725 + }, + { + "epoch": 0.7167751693299138, + "loss": 0.11070144176483154, + "loss_ce": 0.00022780938888899982, + "loss_iou": 0.609375, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 468035300, + "step": 2725 + }, + { + "epoch": 0.717038206089301, + "grad_norm": 7.146952822401602, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 468207524, + "step": 2726 + }, + { + "epoch": 0.717038206089301, + "loss": 0.11441102623939514, + "loss_ce": 0.0002447651932016015, + "loss_iou": 0.6015625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 468207524, + "step": 2726 + }, + { + "epoch": 0.7173012428486881, + "grad_norm": 11.196754628011416, + "learning_rate": 5e-06, + "loss": 0.1409, + "num_input_tokens_seen": 468379608, + "step": 2727 + }, + { + "epoch": 0.7173012428486881, + "loss": 0.16638273000717163, + "loss_ce": 0.005860275588929653, + "loss_iou": 0.4609375, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 468379608, + "step": 2727 + }, + { + "epoch": 0.7175642796080752, + "grad_norm": 10.776354846598704, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 468551976, + "step": 2728 + }, + { + "epoch": 0.7175642796080752, + "loss": 0.11593279242515564, + "loss_ce": 0.002559985499829054, + "loss_iou": 0.5625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 468551976, + "step": 2728 + }, + { + "epoch": 0.7178273163674623, + "grad_norm": 5.353937326922854, + "learning_rate": 5e-06, + "loss": 0.1075, + "num_input_tokens_seen": 468724232, + "step": 2729 + }, + { + "epoch": 0.7178273163674623, + "loss": 0.09276724606752396, + "loss_ce": 0.00023794792650733143, + "loss_iou": 0.51171875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 468724232, + "step": 2729 + }, + { + "epoch": 0.7180903531268494, + "grad_norm": 4.435607019386441, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 468896348, + "step": 2730 + }, + { + "epoch": 0.7180903531268494, + "loss": 0.14431005716323853, + "loss_ce": 0.001487781759351492, + "loss_iou": 0.41796875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 468896348, + "step": 2730 + }, + { + "epoch": 0.7183533898862366, + "grad_norm": 5.860440789547134, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 469068240, + "step": 2731 + }, + { + "epoch": 0.7183533898862366, + "loss": 0.11086121201515198, + "loss_ce": 0.003713999642059207, + "loss_iou": 0.5, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 469068240, + "step": 2731 + }, + { + "epoch": 0.7186164266456238, + "grad_norm": 11.64009264254108, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 469238748, + "step": 2732 + }, + { + "epoch": 0.7186164266456238, + "loss": 0.14171399176120758, + "loss_ce": 0.0019129666034132242, + "loss_iou": NaN, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 469238748, + "step": 2732 + }, + { + "epoch": 0.7188794634050109, + "grad_norm": 19.26931594616413, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 469409356, + "step": 2733 + }, + { + "epoch": 0.7188794634050109, + "loss": 0.16873466968536377, + "loss_ce": 0.0015593739226460457, + "loss_iou": 0.51953125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 469409356, + "step": 2733 + }, + { + "epoch": 0.719142500164398, + "grad_norm": 39.88591044746162, + "learning_rate": 5e-06, + "loss": 0.1402, + "num_input_tokens_seen": 469581536, + "step": 2734 + }, + { + "epoch": 0.719142500164398, + "loss": 0.13763278722763062, + "loss_ce": 0.0018295738846063614, + "loss_iou": 0.55078125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 469581536, + "step": 2734 + }, + { + "epoch": 0.7194055369237851, + "grad_norm": 5.070448909039763, + "learning_rate": 5e-06, + "loss": 0.0973, + "num_input_tokens_seen": 469753888, + "step": 2735 + }, + { + "epoch": 0.7194055369237851, + "loss": 0.11928269267082214, + "loss_ce": 0.0068864524364471436, + "loss_iou": 0.578125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 469753888, + "step": 2735 + }, + { + "epoch": 0.7196685736831723, + "grad_norm": 4.73412496708124, + "learning_rate": 5e-06, + "loss": 0.0816, + "num_input_tokens_seen": 469926224, + "step": 2736 + }, + { + "epoch": 0.7196685736831723, + "loss": 0.10194897651672363, + "loss_ce": 0.000539067608769983, + "loss_iou": 0.43359375, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 469926224, + "step": 2736 + }, + { + "epoch": 0.7199316104425594, + "grad_norm": 14.69635519193442, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 470096688, + "step": 2737 + }, + { + "epoch": 0.7199316104425594, + "loss": 0.14469808340072632, + "loss_ce": 0.0047444626688957214, + "loss_iou": 0.671875, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 470096688, + "step": 2737 + }, + { + "epoch": 0.7201946472019465, + "grad_norm": 5.497552383274807, + "learning_rate": 5e-06, + "loss": 0.1507, + "num_input_tokens_seen": 470268692, + "step": 2738 + }, + { + "epoch": 0.7201946472019465, + "loss": 0.07712777704000473, + "loss_ce": 0.0003760677354875952, + "loss_iou": 0.51953125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 470268692, + "step": 2738 + }, + { + "epoch": 0.7204576839613336, + "grad_norm": 4.073372808076834, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 470440752, + "step": 2739 + }, + { + "epoch": 0.7204576839613336, + "loss": 0.1480931043624878, + "loss_ce": 0.001303559634834528, + "loss_iou": 0.5546875, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 470440752, + "step": 2739 + }, + { + "epoch": 0.7207207207207207, + "grad_norm": 5.675528284561051, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 470612568, + "step": 2740 + }, + { + "epoch": 0.7207207207207207, + "loss": 0.07299304753541946, + "loss_ce": 0.004481084644794464, + "loss_iou": 0.625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 470612568, + "step": 2740 + }, + { + "epoch": 0.7209837574801078, + "grad_norm": 20.879698464276387, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 470784724, + "step": 2741 + }, + { + "epoch": 0.7209837574801078, + "loss": 0.1316141039133072, + "loss_ce": 0.001822838094085455, + "loss_iou": 0.302734375, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 470784724, + "step": 2741 + }, + { + "epoch": 0.721246794239495, + "grad_norm": 10.194443634028673, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 470957128, + "step": 2742 + }, + { + "epoch": 0.721246794239495, + "loss": 0.2153215855360031, + "loss_ce": 0.0009050846565514803, + "loss_iou": 0.3984375, + "loss_num": 0.04296875, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 470957128, + "step": 2742 + }, + { + "epoch": 0.7215098309988821, + "grad_norm": 4.046503425790058, + "learning_rate": 5e-06, + "loss": 0.1402, + "num_input_tokens_seen": 471129076, + "step": 2743 + }, + { + "epoch": 0.7215098309988821, + "loss": 0.17502932250499725, + "loss_ce": 0.005137969274073839, + "loss_iou": NaN, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 471129076, + "step": 2743 + }, + { + "epoch": 0.7217728677582692, + "grad_norm": 16.718028178353897, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 471301596, + "step": 2744 + }, + { + "epoch": 0.7217728677582692, + "loss": 0.1273680329322815, + "loss_ce": 0.0013304388849064708, + "loss_iou": 0.5078125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 471301596, + "step": 2744 + }, + { + "epoch": 0.7220359045176563, + "grad_norm": 19.351969856870078, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 471473704, + "step": 2745 + }, + { + "epoch": 0.7220359045176563, + "loss": 0.08187679946422577, + "loss_ce": 4.3911892134929076e-05, + "loss_iou": 0.439453125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 471473704, + "step": 2745 + }, + { + "epoch": 0.7222989412770434, + "grad_norm": 3.5611349833279236, + "learning_rate": 5e-06, + "loss": 0.0947, + "num_input_tokens_seen": 471644084, + "step": 2746 + }, + { + "epoch": 0.7222989412770434, + "loss": 0.04295755550265312, + "loss_ce": 0.00018716827617026865, + "loss_iou": 0.48828125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 471644084, + "step": 2746 + }, + { + "epoch": 0.7225619780364306, + "grad_norm": 4.500519410155881, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 471814452, + "step": 2747 + }, + { + "epoch": 0.7225619780364306, + "loss": 0.15782231092453003, + "loss_ce": 0.0029913773760199547, + "loss_iou": 0.48046875, + "loss_num": 0.0308837890625, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 471814452, + "step": 2747 + }, + { + "epoch": 0.7228250147958177, + "grad_norm": 5.3088355134433645, + "learning_rate": 5e-06, + "loss": 0.1564, + "num_input_tokens_seen": 471980364, + "step": 2748 + }, + { + "epoch": 0.7228250147958177, + "loss": 0.16449454426765442, + "loss_ce": 0.0011339561315253377, + "loss_iou": 0.42578125, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 471980364, + "step": 2748 + }, + { + "epoch": 0.7230880515552048, + "grad_norm": 8.162974487273814, + "learning_rate": 5e-06, + "loss": 0.1117, + "num_input_tokens_seen": 472152464, + "step": 2749 + }, + { + "epoch": 0.7230880515552048, + "loss": 0.08619838953018188, + "loss_ce": 0.0012984833447262645, + "loss_iou": 0.5234375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 472152464, + "step": 2749 + }, + { + "epoch": 0.7233510883145919, + "grad_norm": 6.354241375539321, + "learning_rate": 5e-06, + "loss": 0.0919, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_websight_new_CIoU": 0.8880318701267242, + "eval_websight_new_GIoU": 0.8903599679470062, + "eval_websight_new_IoU": 0.8917953372001648, + "eval_websight_new_MAE_all": 0.014968848787248135, + "eval_websight_new_MAE_h": 0.008249826729297638, + "eval_websight_new_MAE_w": 0.021866537630558014, + "eval_websight_new_MAE_x": 0.02344994805753231, + "eval_websight_new_MAE_y": 0.006309080636128783, + "eval_websight_new_NUM_probability": 0.9999924898147583, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.0760061964392662, + "eval_websight_new_loss_ce": 5.840479570906609e-05, + "eval_websight_new_loss_iou": 0.35406494140625, + "eval_websight_new_loss_num": 0.013774871826171875, + "eval_websight_new_loss_xval": 0.0689239501953125, + "eval_websight_new_runtime": 59.6817, + "eval_websight_new_samples_per_second": 0.838, + "eval_websight_new_steps_per_second": 0.034, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_seeclick_CIoU": 0.6159887313842773, + "eval_seeclick_GIoU": 0.6205049157142639, + "eval_seeclick_IoU": 0.6426993608474731, + "eval_seeclick_MAE_all": 0.048651453107595444, + "eval_seeclick_MAE_h": 0.030230149626731873, + "eval_seeclick_MAE_w": 0.06347078271210194, + "eval_seeclick_MAE_x": 0.0767427384853363, + "eval_seeclick_MAE_y": 0.024162148125469685, + "eval_seeclick_NUM_probability": 0.9999720454216003, + "eval_seeclick_inside_bbox": 0.8920454680919647, + "eval_seeclick_loss": 0.22562426328659058, + "eval_seeclick_loss_ce": 0.008924027904868126, + "eval_seeclick_loss_iou": 0.5213623046875, + "eval_seeclick_loss_num": 0.04332733154296875, + "eval_seeclick_loss_xval": 0.2166900634765625, + "eval_seeclick_runtime": 76.0092, + "eval_seeclick_samples_per_second": 0.566, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_icons_CIoU": 0.8715368807315826, + "eval_icons_GIoU": 0.8687921464443207, + "eval_icons_IoU": 0.8755140602588654, + "eval_icons_MAE_all": 0.015967791434377432, + "eval_icons_MAE_h": 0.016439005732536316, + "eval_icons_MAE_w": 0.015302729327231646, + "eval_icons_MAE_x": 0.015842752531170845, + "eval_icons_MAE_y": 0.016286680474877357, + "eval_icons_NUM_probability": 0.9999882578849792, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.057806555181741714, + "eval_icons_loss_ce": 1.1600203379202867e-05, + "eval_icons_loss_iou": 0.617919921875, + "eval_icons_loss_num": 0.010974884033203125, + "eval_icons_loss_xval": 0.0548553466796875, + "eval_icons_runtime": 81.9252, + "eval_icons_samples_per_second": 0.61, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "eval_screenspot_CIoU": 0.5503915150960287, + "eval_screenspot_GIoU": 0.5448275804519653, + "eval_screenspot_IoU": 0.5854077339172363, + "eval_screenspot_MAE_all": 0.0787569632132848, + "eval_screenspot_MAE_h": 0.05612564583619436, + "eval_screenspot_MAE_w": 0.1299388830860456, + "eval_screenspot_MAE_x": 0.07724836965401967, + "eval_screenspot_MAE_y": 0.05171496793627739, + "eval_screenspot_NUM_probability": 0.9999065001805624, + "eval_screenspot_inside_bbox": 0.8291666706403097, + "eval_screenspot_loss": 0.9146350622177124, + "eval_screenspot_loss_ce": 0.567759374777476, + "eval_screenspot_loss_iou": 0.5614013671875, + "eval_screenspot_loss_num": 0.06831868489583333, + "eval_screenspot_loss_xval": 0.3416341145833333, + "eval_screenspot_runtime": 140.0636, + "eval_screenspot_samples_per_second": 0.635, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.7233510883145919, + "loss": 0.9040678143501282, + "loss_ce": 0.5593412518501282, + "loss_iou": 0.455078125, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 472324756, + "step": 2750 + }, + { + "epoch": 0.723614125073979, + "grad_norm": 4.425480410673166, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 472496880, + "step": 2751 + }, + { + "epoch": 0.723614125073979, + "loss": 0.10025835037231445, + "loss_ce": 0.0005421665264293551, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 472496880, + "step": 2751 + }, + { + "epoch": 0.7238771618333663, + "grad_norm": 4.0784104980939135, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 472668928, + "step": 2752 + }, + { + "epoch": 0.7238771618333663, + "loss": 0.0833350419998169, + "loss_ce": 0.00038827050593681633, + "loss_iou": 0.34765625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 472668928, + "step": 2752 + }, + { + "epoch": 0.7241401985927534, + "grad_norm": 6.523452376364869, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 472841040, + "step": 2753 + }, + { + "epoch": 0.7241401985927534, + "loss": 0.13216346502304077, + "loss_ce": 0.00023597091785632074, + "loss_iou": 0.54296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 472841040, + "step": 2753 + }, + { + "epoch": 0.7244032353521405, + "grad_norm": 13.511689037708722, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 473013468, + "step": 2754 + }, + { + "epoch": 0.7244032353521405, + "loss": 0.0918908640742302, + "loss_ce": 0.001101069850847125, + "loss_iou": 0.431640625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 473013468, + "step": 2754 + }, + { + "epoch": 0.7246662721115276, + "grad_norm": 4.609556566608224, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 473180556, + "step": 2755 + }, + { + "epoch": 0.7246662721115276, + "loss": 0.2066127210855484, + "loss_ce": 0.0006495795678347349, + "loss_iou": 0.400390625, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 473180556, + "step": 2755 + }, + { + "epoch": 0.7249293088709147, + "grad_norm": 17.674016284792703, + "learning_rate": 5e-06, + "loss": 0.123, + "num_input_tokens_seen": 473352636, + "step": 2756 + }, + { + "epoch": 0.7249293088709147, + "loss": 0.13866102695465088, + "loss_ce": 0.0018507244531065226, + "loss_iou": 0.443359375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 473352636, + "step": 2756 + }, + { + "epoch": 0.7251923456303019, + "grad_norm": 12.378222072828299, + "learning_rate": 5e-06, + "loss": 0.0739, + "num_input_tokens_seen": 473524848, + "step": 2757 + }, + { + "epoch": 0.7251923456303019, + "loss": 0.059792935848236084, + "loss_ce": 0.0014433301985263824, + "loss_iou": 0.419921875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 473524848, + "step": 2757 + }, + { + "epoch": 0.725455382389689, + "grad_norm": 4.317210813813628, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 473697016, + "step": 2758 + }, + { + "epoch": 0.725455382389689, + "loss": 0.1696222722530365, + "loss_ce": 0.0004328044014982879, + "loss_iou": 0.3984375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 473697016, + "step": 2758 + }, + { + "epoch": 0.7257184191490761, + "grad_norm": 14.320905421992641, + "learning_rate": 5e-06, + "loss": 0.1392, + "num_input_tokens_seen": 473868992, + "step": 2759 + }, + { + "epoch": 0.7257184191490761, + "loss": 0.07304464280605316, + "loss_ce": 0.0016335132531821728, + "loss_iou": 0.6484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 473868992, + "step": 2759 + }, + { + "epoch": 0.7259814559084632, + "grad_norm": 3.9251643218505756, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 474041228, + "step": 2760 + }, + { + "epoch": 0.7259814559084632, + "loss": 0.09449034929275513, + "loss_ce": 0.0015948471846058965, + "loss_iou": 0.48828125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 474041228, + "step": 2760 + }, + { + "epoch": 0.7262444926678503, + "grad_norm": 4.80079690317671, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 474213616, + "step": 2761 + }, + { + "epoch": 0.7262444926678503, + "loss": 0.14180995523929596, + "loss_ce": 0.001276510301977396, + "loss_iou": 0.53125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 474213616, + "step": 2761 + }, + { + "epoch": 0.7265075294272375, + "grad_norm": 5.815165285313656, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 474385980, + "step": 2762 + }, + { + "epoch": 0.7265075294272375, + "loss": 0.14968647062778473, + "loss_ce": 0.0027748444117605686, + "loss_iou": 0.474609375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 474385980, + "step": 2762 + }, + { + "epoch": 0.7267705661866246, + "grad_norm": 18.689053225654995, + "learning_rate": 5e-06, + "loss": 0.1345, + "num_input_tokens_seen": 474558212, + "step": 2763 + }, + { + "epoch": 0.7267705661866246, + "loss": 0.07762917876243591, + "loss_ce": 0.0015488516073673964, + "loss_iou": 0.546875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 474558212, + "step": 2763 + }, + { + "epoch": 0.7270336029460117, + "grad_norm": 26.06170268014936, + "learning_rate": 5e-06, + "loss": 0.1927, + "num_input_tokens_seen": 474730824, + "step": 2764 + }, + { + "epoch": 0.7270336029460117, + "loss": 0.2366107702255249, + "loss_ce": 0.0011676568537950516, + "loss_iou": 0.337890625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 474730824, + "step": 2764 + }, + { + "epoch": 0.7272966397053988, + "grad_norm": 4.246687258335021, + "learning_rate": 5e-06, + "loss": 0.0722, + "num_input_tokens_seen": 474901372, + "step": 2765 + }, + { + "epoch": 0.7272966397053988, + "loss": 0.05860138311982155, + "loss_ce": 0.0005874672788195312, + "loss_iou": 0.5546875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 474901372, + "step": 2765 + }, + { + "epoch": 0.7275596764647859, + "grad_norm": 10.116948882200285, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 475070388, + "step": 2766 + }, + { + "epoch": 0.7275596764647859, + "loss": 0.14292961359024048, + "loss_ce": 0.0015111502725630999, + "loss_iou": 0.484375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 475070388, + "step": 2766 + }, + { + "epoch": 0.727822713224173, + "grad_norm": 5.556641114443334, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 475242528, + "step": 2767 + }, + { + "epoch": 0.727822713224173, + "loss": 0.05033715069293976, + "loss_ce": 0.00010521705553401262, + "loss_iou": 0.51953125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 475242528, + "step": 2767 + }, + { + "epoch": 0.7280857499835602, + "grad_norm": 7.020407355867949, + "learning_rate": 5e-06, + "loss": 0.1336, + "num_input_tokens_seen": 475414560, + "step": 2768 + }, + { + "epoch": 0.7280857499835602, + "loss": 0.14076019823551178, + "loss_ce": 0.003125916002318263, + "loss_iou": 0.41796875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 475414560, + "step": 2768 + }, + { + "epoch": 0.7283487867429473, + "grad_norm": 3.869087312985774, + "learning_rate": 5e-06, + "loss": 0.1426, + "num_input_tokens_seen": 475586900, + "step": 2769 + }, + { + "epoch": 0.7283487867429473, + "loss": 0.1432042121887207, + "loss_ce": 0.001144890207797289, + "loss_iou": NaN, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 475586900, + "step": 2769 + }, + { + "epoch": 0.7286118235023344, + "grad_norm": 5.203369255610556, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 475759012, + "step": 2770 + }, + { + "epoch": 0.7286118235023344, + "loss": 0.09897112846374512, + "loss_ce": 0.000979190575890243, + "loss_iou": 0.478515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 475759012, + "step": 2770 + }, + { + "epoch": 0.7288748602617215, + "grad_norm": 5.470559222873129, + "learning_rate": 5e-06, + "loss": 0.1425, + "num_input_tokens_seen": 475931112, + "step": 2771 + }, + { + "epoch": 0.7288748602617215, + "loss": 0.057690735906362534, + "loss_ce": 0.00324737885966897, + "loss_iou": 0.447265625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 475931112, + "step": 2771 + }, + { + "epoch": 0.7291378970211086, + "grad_norm": 18.974366841188825, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 476100764, + "step": 2772 + }, + { + "epoch": 0.7291378970211086, + "loss": 0.15548212826251984, + "loss_ce": 0.0019024083158001304, + "loss_iou": 0.5078125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 476100764, + "step": 2772 + }, + { + "epoch": 0.7294009337804959, + "grad_norm": 5.671161770383539, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 476272960, + "step": 2773 + }, + { + "epoch": 0.7294009337804959, + "loss": 0.0945616215467453, + "loss_ce": 0.00023179112758953124, + "loss_iou": 0.50390625, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 476272960, + "step": 2773 + }, + { + "epoch": 0.729663970539883, + "grad_norm": 34.00148217519992, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 476445160, + "step": 2774 + }, + { + "epoch": 0.729663970539883, + "loss": 0.2345729023218155, + "loss_ce": 0.0021205078810453415, + "loss_iou": 0.4921875, + "loss_num": 0.04638671875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 476445160, + "step": 2774 + }, + { + "epoch": 0.7299270072992701, + "grad_norm": 4.401043478623018, + "learning_rate": 5e-06, + "loss": 0.0804, + "num_input_tokens_seen": 476617452, + "step": 2775 + }, + { + "epoch": 0.7299270072992701, + "loss": 0.07482883334159851, + "loss_ce": 0.00036594344419427216, + "loss_iou": 0.671875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 476617452, + "step": 2775 + }, + { + "epoch": 0.7301900440586572, + "grad_norm": 4.444423155502646, + "learning_rate": 5e-06, + "loss": 0.1158, + "num_input_tokens_seen": 476789580, + "step": 2776 + }, + { + "epoch": 0.7301900440586572, + "loss": 0.08125682920217514, + "loss_ce": 0.0011787032708525658, + "loss_iou": 0.427734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 476789580, + "step": 2776 + }, + { + "epoch": 0.7304530808180443, + "grad_norm": 9.388146192273584, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 476961420, + "step": 2777 + }, + { + "epoch": 0.7304530808180443, + "loss": 0.09009505808353424, + "loss_ce": 0.0032420377247035503, + "loss_iou": 0.41796875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 476961420, + "step": 2777 + }, + { + "epoch": 0.7307161175774315, + "grad_norm": 13.509674001993902, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 477133352, + "step": 2778 + }, + { + "epoch": 0.7307161175774315, + "loss": 0.15684369206428528, + "loss_ce": 0.0034623502288013697, + "loss_iou": 0.50390625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 477133352, + "step": 2778 + }, + { + "epoch": 0.7309791543368186, + "grad_norm": 10.371891827363712, + "learning_rate": 5e-06, + "loss": 0.1536, + "num_input_tokens_seen": 477305700, + "step": 2779 + }, + { + "epoch": 0.7309791543368186, + "loss": 0.1920810043811798, + "loss_ce": 0.003238243516534567, + "loss_iou": 0.490234375, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 477305700, + "step": 2779 + }, + { + "epoch": 0.7312421910962057, + "grad_norm": 4.473462123830256, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 477476192, + "step": 2780 + }, + { + "epoch": 0.7312421910962057, + "loss": 0.12351857125759125, + "loss_ce": 0.003828628221526742, + "loss_iou": 0.3671875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 477476192, + "step": 2780 + }, + { + "epoch": 0.7315052278555928, + "grad_norm": 8.785650264431224, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 477648396, + "step": 2781 + }, + { + "epoch": 0.7315052278555928, + "loss": 0.10561161488294601, + "loss_ce": 0.00282841082662344, + "loss_iou": 0.5390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 477648396, + "step": 2781 + }, + { + "epoch": 0.7317682646149799, + "grad_norm": 4.9317310113146835, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 477820536, + "step": 2782 + }, + { + "epoch": 0.7317682646149799, + "loss": 0.15500710904598236, + "loss_ce": 0.0015952409012243152, + "loss_iou": 0.50390625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 477820536, + "step": 2782 + }, + { + "epoch": 0.7320313013743671, + "grad_norm": 14.38075821387521, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 477992760, + "step": 2783 + }, + { + "epoch": 0.7320313013743671, + "loss": 0.09361109137535095, + "loss_ce": 0.0013869699323549867, + "loss_iou": 0.55859375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 477992760, + "step": 2783 + }, + { + "epoch": 0.7322943381337542, + "grad_norm": 3.3741559107447294, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 478164884, + "step": 2784 + }, + { + "epoch": 0.7322943381337542, + "loss": 0.06834319978952408, + "loss_ce": 0.00392058864235878, + "loss_iou": 0.5625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 478164884, + "step": 2784 + }, + { + "epoch": 0.7325573748931413, + "grad_norm": 4.560868774415877, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 478337276, + "step": 2785 + }, + { + "epoch": 0.7325573748931413, + "loss": 0.08397236466407776, + "loss_ce": 0.0004152356996200979, + "loss_iou": 0.6015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 478337276, + "step": 2785 + }, + { + "epoch": 0.7328204116525284, + "grad_norm": 17.451789372881052, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 478509332, + "step": 2786 + }, + { + "epoch": 0.7328204116525284, + "loss": 0.1723887324333191, + "loss_ce": 0.00039166733040474355, + "loss_iou": 0.44140625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 478509332, + "step": 2786 + }, + { + "epoch": 0.7330834484119155, + "grad_norm": 6.047350371102969, + "learning_rate": 5e-06, + "loss": 0.1491, + "num_input_tokens_seen": 478681352, + "step": 2787 + }, + { + "epoch": 0.7330834484119155, + "loss": 0.04348166286945343, + "loss_ce": 0.0019167213467881083, + "loss_iou": 0.5703125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 478681352, + "step": 2787 + }, + { + "epoch": 0.7333464851713027, + "grad_norm": 4.3763950262189235, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 478853448, + "step": 2788 + }, + { + "epoch": 0.7333464851713027, + "loss": 0.11647917330265045, + "loss_ce": 0.0007565193227492273, + "loss_iou": 0.490234375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 478853448, + "step": 2788 + }, + { + "epoch": 0.7336095219306898, + "grad_norm": 3.397549309274833, + "learning_rate": 5e-06, + "loss": 0.0831, + "num_input_tokens_seen": 479025420, + "step": 2789 + }, + { + "epoch": 0.7336095219306898, + "loss": 0.04994508996605873, + "loss_ce": 0.0003540250181686133, + "loss_iou": 0.37890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 479025420, + "step": 2789 + }, + { + "epoch": 0.733872558690077, + "grad_norm": 6.639247678980395, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 479197512, + "step": 2790 + }, + { + "epoch": 0.733872558690077, + "loss": 0.0962657481431961, + "loss_ce": 0.0033092054072767496, + "loss_iou": 0.5234375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 479197512, + "step": 2790 + }, + { + "epoch": 0.734135595449464, + "grad_norm": 70.18841715820902, + "learning_rate": 5e-06, + "loss": 0.121, + "num_input_tokens_seen": 479369608, + "step": 2791 + }, + { + "epoch": 0.734135595449464, + "loss": 0.057409316301345825, + "loss_ce": 0.000661880592815578, + "loss_iou": 0.61328125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 479369608, + "step": 2791 + }, + { + "epoch": 0.7343986322088512, + "grad_norm": 8.444086759452258, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 479541840, + "step": 2792 + }, + { + "epoch": 0.7343986322088512, + "loss": 0.18742145597934723, + "loss_ce": 0.0015999219613149762, + "loss_iou": 0.37890625, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 479541840, + "step": 2792 + }, + { + "epoch": 0.7346616689682383, + "grad_norm": 5.294583182815855, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 479714196, + "step": 2793 + }, + { + "epoch": 0.7346616689682383, + "loss": 0.1654970794916153, + "loss_ce": 0.002441658638417721, + "loss_iou": 0.5234375, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 479714196, + "step": 2793 + }, + { + "epoch": 0.7349247057276255, + "grad_norm": 4.160027277201995, + "learning_rate": 5e-06, + "loss": 0.0766, + "num_input_tokens_seen": 479886580, + "step": 2794 + }, + { + "epoch": 0.7349247057276255, + "loss": 0.11283927410840988, + "loss_ce": 0.0021825337316840887, + "loss_iou": 0.5, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 479886580, + "step": 2794 + }, + { + "epoch": 0.7351877424870126, + "grad_norm": 8.261194775106992, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 480058736, + "step": 2795 + }, + { + "epoch": 0.7351877424870126, + "loss": 0.12811481952667236, + "loss_ce": 0.0015584270004183054, + "loss_iou": 0.486328125, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 480058736, + "step": 2795 + }, + { + "epoch": 0.7354507792463997, + "grad_norm": 3.186827546083555, + "learning_rate": 5e-06, + "loss": 0.1094, + "num_input_tokens_seen": 480230860, + "step": 2796 + }, + { + "epoch": 0.7354507792463997, + "loss": 0.12094112485647202, + "loss_ce": 0.00286861858330667, + "loss_iou": 0.296875, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 480230860, + "step": 2796 + }, + { + "epoch": 0.7357138160057868, + "grad_norm": 3.6199423783668534, + "learning_rate": 5e-06, + "loss": 0.0792, + "num_input_tokens_seen": 480403260, + "step": 2797 + }, + { + "epoch": 0.7357138160057868, + "loss": 0.0642661452293396, + "loss_ce": 0.0005149244680069387, + "loss_iou": 0.61328125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 480403260, + "step": 2797 + }, + { + "epoch": 0.7359768527651739, + "grad_norm": 4.853254066183438, + "learning_rate": 5e-06, + "loss": 0.0999, + "num_input_tokens_seen": 480575740, + "step": 2798 + }, + { + "epoch": 0.7359768527651739, + "loss": 0.1287141740322113, + "loss_ce": 0.000998095260001719, + "loss_iou": 0.5234375, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 480575740, + "step": 2798 + }, + { + "epoch": 0.7362398895245611, + "grad_norm": 6.187299771807659, + "learning_rate": 5e-06, + "loss": 0.1079, + "num_input_tokens_seen": 480745892, + "step": 2799 + }, + { + "epoch": 0.7362398895245611, + "loss": 0.11742156744003296, + "loss_ce": 0.0004476910980883986, + "loss_iou": 0.40625, + "loss_num": 0.0233154296875, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 480745892, + "step": 2799 + }, + { + "epoch": 0.7365029262839482, + "grad_norm": 3.2510976930909194, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 480916480, + "step": 2800 + }, + { + "epoch": 0.7365029262839482, + "loss": 0.14714287221431732, + "loss_ce": 7.865649240557104e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 480916480, + "step": 2800 + }, + { + "epoch": 0.7367659630433353, + "grad_norm": 4.500033616411093, + "learning_rate": 5e-06, + "loss": 0.1404, + "num_input_tokens_seen": 481088428, + "step": 2801 + }, + { + "epoch": 0.7367659630433353, + "loss": 0.07280252873897552, + "loss_ce": 0.0012845833553001285, + "loss_iou": 0.427734375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 481088428, + "step": 2801 + }, + { + "epoch": 0.7370289998027224, + "grad_norm": 6.733873919958713, + "learning_rate": 5e-06, + "loss": 0.1157, + "num_input_tokens_seen": 481260648, + "step": 2802 + }, + { + "epoch": 0.7370289998027224, + "loss": 0.13488659262657166, + "loss_ce": 0.002318233484402299, + "loss_iou": 0.51171875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 481260648, + "step": 2802 + }, + { + "epoch": 0.7372920365621095, + "grad_norm": 4.443731950557251, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 481432928, + "step": 2803 + }, + { + "epoch": 0.7372920365621095, + "loss": 0.05556986480951309, + "loss_ce": 0.0003940859460271895, + "loss_iou": 0.578125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 481432928, + "step": 2803 + }, + { + "epoch": 0.7375550733214967, + "grad_norm": 3.9159514379676663, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 481603740, + "step": 2804 + }, + { + "epoch": 0.7375550733214967, + "loss": 0.07777837663888931, + "loss_ce": 5.010394670534879e-05, + "loss_iou": 0.58203125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 481603740, + "step": 2804 + }, + { + "epoch": 0.7378181100808838, + "grad_norm": 3.806670191888079, + "learning_rate": 5e-06, + "loss": 0.1196, + "num_input_tokens_seen": 481774292, + "step": 2805 + }, + { + "epoch": 0.7378181100808838, + "loss": 0.046146463602781296, + "loss_ce": 0.00412375945597887, + "loss_iou": 0.40625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 481774292, + "step": 2805 + }, + { + "epoch": 0.7380811468402709, + "grad_norm": 18.2473830072325, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 481946452, + "step": 2806 + }, + { + "epoch": 0.7380811468402709, + "loss": 0.15219584107398987, + "loss_ce": 0.0003708918229676783, + "loss_iou": 0.55078125, + "loss_num": 0.0303955078125, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 481946452, + "step": 2806 + }, + { + "epoch": 0.738344183599658, + "grad_norm": 5.953555323593533, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 482118496, + "step": 2807 + }, + { + "epoch": 0.738344183599658, + "loss": 0.14251753687858582, + "loss_ce": 0.00039717700565233827, + "loss_iou": 0.60546875, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 482118496, + "step": 2807 + }, + { + "epoch": 0.7386072203590451, + "grad_norm": 7.279415163791317, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 482287100, + "step": 2808 + }, + { + "epoch": 0.7386072203590451, + "loss": 0.11856916546821594, + "loss_ce": 0.0012290815357118845, + "loss_iou": 0.361328125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 482287100, + "step": 2808 + }, + { + "epoch": 0.7388702571184323, + "grad_norm": 60.57211297247697, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 482459096, + "step": 2809 + }, + { + "epoch": 0.7388702571184323, + "loss": 0.14476290345191956, + "loss_ce": 0.003466519294306636, + "loss_iou": 0.64453125, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 482459096, + "step": 2809 + }, + { + "epoch": 0.7391332938778195, + "grad_norm": 4.216089199918953, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 482631768, + "step": 2810 + }, + { + "epoch": 0.7391332938778195, + "loss": 0.1267136037349701, + "loss_ce": 0.0018051671795547009, + "loss_iou": 0.466796875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 482631768, + "step": 2810 + }, + { + "epoch": 0.7393963306372066, + "grad_norm": 5.529488884431239, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 482803968, + "step": 2811 + }, + { + "epoch": 0.7393963306372066, + "loss": 0.12075556814670563, + "loss_ce": 0.0023778879549354315, + "loss_iou": 0.67578125, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 482803968, + "step": 2811 + }, + { + "epoch": 0.7396593673965937, + "grad_norm": 14.970093925458164, + "learning_rate": 5e-06, + "loss": 0.089, + "num_input_tokens_seen": 482976316, + "step": 2812 + }, + { + "epoch": 0.7396593673965937, + "loss": 0.11577010154724121, + "loss_ce": 0.00230574794113636, + "loss_iou": 0.53125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 482976316, + "step": 2812 + }, + { + "epoch": 0.7399224041559808, + "grad_norm": 16.71817355365124, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 483148324, + "step": 2813 + }, + { + "epoch": 0.7399224041559808, + "loss": 0.12452364712953568, + "loss_ce": 0.0020718637388199568, + "loss_iou": 0.2734375, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 483148324, + "step": 2813 + }, + { + "epoch": 0.740185440915368, + "grad_norm": 6.067200171837272, + "learning_rate": 5e-06, + "loss": 0.0904, + "num_input_tokens_seen": 483320396, + "step": 2814 + }, + { + "epoch": 0.740185440915368, + "loss": 0.08812370151281357, + "loss_ce": 0.003498460166156292, + "loss_iou": 0.494140625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 483320396, + "step": 2814 + }, + { + "epoch": 0.7404484776747551, + "grad_norm": 4.557027585995325, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 483492288, + "step": 2815 + }, + { + "epoch": 0.7404484776747551, + "loss": 0.11246176064014435, + "loss_ce": 0.0007674178341403604, + "loss_iou": 0.5078125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 483492288, + "step": 2815 + }, + { + "epoch": 0.7407115144341422, + "grad_norm": 4.433500401049952, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 483664284, + "step": 2816 + }, + { + "epoch": 0.7407115144341422, + "loss": 0.11581188440322876, + "loss_ce": 0.0005164705216884613, + "loss_iou": 0.4765625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 483664284, + "step": 2816 + }, + { + "epoch": 0.7409745511935293, + "grad_norm": 4.273944166752719, + "learning_rate": 5e-06, + "loss": 0.1428, + "num_input_tokens_seen": 483836152, + "step": 2817 + }, + { + "epoch": 0.7409745511935293, + "loss": 0.1244652196764946, + "loss_ce": 0.0025780070573091507, + "loss_iou": 0.578125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 483836152, + "step": 2817 + }, + { + "epoch": 0.7412375879529164, + "grad_norm": 7.341058016704505, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 484005324, + "step": 2818 + }, + { + "epoch": 0.7412375879529164, + "loss": 0.09527404606342316, + "loss_ce": 0.0004711919464170933, + "loss_iou": 0.51171875, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 484005324, + "step": 2818 + }, + { + "epoch": 0.7415006247123035, + "grad_norm": 5.580265155977227, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 484175008, + "step": 2819 + }, + { + "epoch": 0.7415006247123035, + "loss": 0.061688005924224854, + "loss_ce": 0.000408706720918417, + "loss_iou": 0.8203125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 484175008, + "step": 2819 + }, + { + "epoch": 0.7417636614716907, + "grad_norm": 10.569140888378845, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 484346776, + "step": 2820 + }, + { + "epoch": 0.7417636614716907, + "loss": 0.1439221203327179, + "loss_ce": 0.00351074174977839, + "loss_iou": 0.435546875, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 484346776, + "step": 2820 + }, + { + "epoch": 0.7420266982310778, + "grad_norm": 4.104540427593442, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 484518992, + "step": 2821 + }, + { + "epoch": 0.7420266982310778, + "loss": 0.09965825080871582, + "loss_ce": 0.000567668757867068, + "loss_iou": 0.60546875, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 484518992, + "step": 2821 + }, + { + "epoch": 0.7422897349904649, + "grad_norm": 5.580054688233454, + "learning_rate": 5e-06, + "loss": 0.1559, + "num_input_tokens_seen": 484691244, + "step": 2822 + }, + { + "epoch": 0.7422897349904649, + "loss": 0.17825458943843842, + "loss_ce": 0.002351263538002968, + "loss_iou": 0.53515625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 484691244, + "step": 2822 + }, + { + "epoch": 0.742552771749852, + "grad_norm": 3.6849080723348067, + "learning_rate": 5e-06, + "loss": 0.1494, + "num_input_tokens_seen": 484861364, + "step": 2823 + }, + { + "epoch": 0.742552771749852, + "loss": 0.12127910554409027, + "loss_ce": 0.0025046863593161106, + "loss_iou": 0.54296875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 484861364, + "step": 2823 + }, + { + "epoch": 0.7428158085092391, + "grad_norm": 13.128530967150855, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 485032096, + "step": 2824 + }, + { + "epoch": 0.7428158085092391, + "loss": 0.044097013771533966, + "loss_ce": 0.0010977452620863914, + "loss_iou": 0.470703125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 485032096, + "step": 2824 + }, + { + "epoch": 0.7430788452686263, + "grad_norm": 29.97671813936596, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 485203848, + "step": 2825 + }, + { + "epoch": 0.7430788452686263, + "loss": 0.1745963990688324, + "loss_ce": 0.0014701783657073975, + "loss_iou": 0.52734375, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 485203848, + "step": 2825 + }, + { + "epoch": 0.7433418820280134, + "grad_norm": 3.457370742890275, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 485376152, + "step": 2826 + }, + { + "epoch": 0.7433418820280134, + "loss": 0.10546036064624786, + "loss_ce": 0.0021430959459394217, + "loss_iou": 0.44921875, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 485376152, + "step": 2826 + }, + { + "epoch": 0.7436049187874005, + "grad_norm": 14.472339934978308, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 485546204, + "step": 2827 + }, + { + "epoch": 0.7436049187874005, + "loss": 0.1524585783481598, + "loss_ce": 0.0006031189695931971, + "loss_iou": 0.453125, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 485546204, + "step": 2827 + }, + { + "epoch": 0.7438679555467876, + "grad_norm": 15.092363857837377, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 485718156, + "step": 2828 + }, + { + "epoch": 0.7438679555467876, + "loss": 0.11204935610294342, + "loss_ce": 0.00023295017308555543, + "loss_iou": 0.484375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 485718156, + "step": 2828 + }, + { + "epoch": 0.7441309923061747, + "grad_norm": 3.949937010975672, + "learning_rate": 5e-06, + "loss": 0.0703, + "num_input_tokens_seen": 485889984, + "step": 2829 + }, + { + "epoch": 0.7441309923061747, + "loss": 0.08257782459259033, + "loss_ce": 0.002652282826602459, + "loss_iou": 0.546875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 485889984, + "step": 2829 + }, + { + "epoch": 0.744394029065562, + "grad_norm": 5.74645673965003, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 486062200, + "step": 2830 + }, + { + "epoch": 0.744394029065562, + "loss": 0.10757631063461304, + "loss_ce": 0.0008258260786533356, + "loss_iou": 0.33203125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 486062200, + "step": 2830 + }, + { + "epoch": 0.7446570658249491, + "grad_norm": 5.133478707353028, + "learning_rate": 5e-06, + "loss": 0.0899, + "num_input_tokens_seen": 486234156, + "step": 2831 + }, + { + "epoch": 0.7446570658249491, + "loss": 0.05313008278608322, + "loss_ce": 0.0007771779783070087, + "loss_iou": 0.431640625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 486234156, + "step": 2831 + }, + { + "epoch": 0.7449201025843362, + "grad_norm": 3.5956313939936164, + "learning_rate": 5e-06, + "loss": 0.0765, + "num_input_tokens_seen": 486406012, + "step": 2832 + }, + { + "epoch": 0.7449201025843362, + "loss": 0.041897065937519073, + "loss_ce": 0.0002405716950306669, + "loss_iou": 0.49609375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 486406012, + "step": 2832 + }, + { + "epoch": 0.7451831393437233, + "grad_norm": 14.659992900472892, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 486577696, + "step": 2833 + }, + { + "epoch": 0.7451831393437233, + "loss": 0.10231424868106842, + "loss_ce": 0.00011088042083429173, + "loss_iou": 0.61328125, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 486577696, + "step": 2833 + }, + { + "epoch": 0.7454461761031104, + "grad_norm": 4.0898533446969365, + "learning_rate": 5e-06, + "loss": 0.132, + "num_input_tokens_seen": 486749788, + "step": 2834 + }, + { + "epoch": 0.7454461761031104, + "loss": 0.16116830706596375, + "loss_ce": 0.002553200349211693, + "loss_iou": 0.416015625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 486749788, + "step": 2834 + }, + { + "epoch": 0.7457092128624976, + "grad_norm": 7.24387104067779, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 486921812, + "step": 2835 + }, + { + "epoch": 0.7457092128624976, + "loss": 0.15365542471408844, + "loss_ce": 0.0005182233871892095, + "loss_iou": 0.6796875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 486921812, + "step": 2835 + }, + { + "epoch": 0.7459722496218847, + "grad_norm": 4.544468769186057, + "learning_rate": 5e-06, + "loss": 0.1593, + "num_input_tokens_seen": 487092260, + "step": 2836 + }, + { + "epoch": 0.7459722496218847, + "loss": 0.13337098062038422, + "loss_ce": 0.0010772723471745849, + "loss_iou": 0.44921875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 487092260, + "step": 2836 + }, + { + "epoch": 0.7462352863812718, + "grad_norm": 25.066833727752858, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 487264416, + "step": 2837 + }, + { + "epoch": 0.7462352863812718, + "loss": 0.053436558693647385, + "loss_ce": 0.0022585804108530283, + "loss_iou": 0.458984375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 487264416, + "step": 2837 + }, + { + "epoch": 0.7464983231406589, + "grad_norm": 5.520845699973876, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 487436620, + "step": 2838 + }, + { + "epoch": 0.7464983231406589, + "loss": 0.16054442524909973, + "loss_ce": 0.002860102104023099, + "loss_iou": 0.59375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 487436620, + "step": 2838 + }, + { + "epoch": 0.746761359900046, + "grad_norm": 3.6663304276460456, + "learning_rate": 5e-06, + "loss": 0.1253, + "num_input_tokens_seen": 487608856, + "step": 2839 + }, + { + "epoch": 0.746761359900046, + "loss": 0.07470214366912842, + "loss_ce": 0.0005749509437009692, + "loss_iou": 0.51953125, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 487608856, + "step": 2839 + }, + { + "epoch": 0.7470243966594332, + "grad_norm": 3.816698805705744, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 487779452, + "step": 2840 + }, + { + "epoch": 0.7470243966594332, + "loss": 0.08027718961238861, + "loss_ce": 0.00013802893226966262, + "loss_iou": 0.46484375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 487779452, + "step": 2840 + }, + { + "epoch": 0.7472874334188203, + "grad_norm": 5.187350725945626, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 487951444, + "step": 2841 + }, + { + "epoch": 0.7472874334188203, + "loss": 0.13180628418922424, + "loss_ce": 0.0002907742455136031, + "loss_iou": 0.50390625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 487951444, + "step": 2841 + }, + { + "epoch": 0.7475504701782074, + "grad_norm": 17.82443490036777, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 488123816, + "step": 2842 + }, + { + "epoch": 0.7475504701782074, + "loss": 0.10174276679754257, + "loss_ce": 0.00042440436664037406, + "loss_iou": 0.44921875, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 488123816, + "step": 2842 + }, + { + "epoch": 0.7478135069375945, + "grad_norm": 5.349643556816576, + "learning_rate": 5e-06, + "loss": 0.0817, + "num_input_tokens_seen": 488294544, + "step": 2843 + }, + { + "epoch": 0.7478135069375945, + "loss": 0.06341783702373505, + "loss_ce": 6.33449453744106e-05, + "loss_iou": 0.55859375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 488294544, + "step": 2843 + }, + { + "epoch": 0.7480765436969816, + "grad_norm": 5.881783273596092, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 488466512, + "step": 2844 + }, + { + "epoch": 0.7480765436969816, + "loss": 0.07953569293022156, + "loss_ce": 0.0013496556784957647, + "loss_iou": 0.515625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 488466512, + "step": 2844 + }, + { + "epoch": 0.7483395804563687, + "grad_norm": 4.967245481323238, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 488638580, + "step": 2845 + }, + { + "epoch": 0.7483395804563687, + "loss": 0.13291889429092407, + "loss_ce": 0.0028682297561317682, + "loss_iou": 0.55859375, + "loss_num": 0.026123046875, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 488638580, + "step": 2845 + }, + { + "epoch": 0.7486026172157559, + "grad_norm": 26.635770395926077, + "learning_rate": 5e-06, + "loss": 0.1546, + "num_input_tokens_seen": 488810652, + "step": 2846 + }, + { + "epoch": 0.7486026172157559, + "loss": 0.13681207597255707, + "loss_ce": 0.006761421915143728, + "loss_iou": 0.3359375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 488810652, + "step": 2846 + }, + { + "epoch": 0.748865653975143, + "grad_norm": 3.755973604918146, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 488982904, + "step": 2847 + }, + { + "epoch": 0.748865653975143, + "loss": 0.08354687690734863, + "loss_ce": 0.0004475130117498338, + "loss_iou": 0.447265625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 488982904, + "step": 2847 + }, + { + "epoch": 0.7491286907345301, + "grad_norm": 3.8356014362129516, + "learning_rate": 5e-06, + "loss": 0.0982, + "num_input_tokens_seen": 489153060, + "step": 2848 + }, + { + "epoch": 0.7491286907345301, + "loss": 0.1231696754693985, + "loss_ce": 0.0056769950315356255, + "loss_iou": 0.478515625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 489153060, + "step": 2848 + }, + { + "epoch": 0.7493917274939172, + "grad_norm": 6.542346686389534, + "learning_rate": 5e-06, + "loss": 0.1367, + "num_input_tokens_seen": 489324920, + "step": 2849 + }, + { + "epoch": 0.7493917274939172, + "loss": 0.10556487739086151, + "loss_ce": 0.00360565772280097, + "loss_iou": 0.56640625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 489324920, + "step": 2849 + }, + { + "epoch": 0.7496547642533044, + "grad_norm": 3.9950005088023786, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 489497376, + "step": 2850 + }, + { + "epoch": 0.7496547642533044, + "loss": 0.11913494765758514, + "loss_ce": 0.001535467803478241, + "loss_iou": 0.474609375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 489497376, + "step": 2850 + }, + { + "epoch": 0.7499178010126916, + "grad_norm": 3.9324452711240707, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 489669656, + "step": 2851 + }, + { + "epoch": 0.7499178010126916, + "loss": 0.14491230249404907, + "loss_ce": 0.0019679656252264977, + "loss_iou": 0.61328125, + "loss_num": 0.0286865234375, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 489669656, + "step": 2851 + }, + { + "epoch": 0.7501808377720787, + "grad_norm": 4.883057174767028, + "learning_rate": 5e-06, + "loss": 0.0772, + "num_input_tokens_seen": 489841572, + "step": 2852 + }, + { + "epoch": 0.7501808377720787, + "loss": 0.06473391503095627, + "loss_ce": 0.000845368776936084, + "loss_iou": 0.4140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 489841572, + "step": 2852 + }, + { + "epoch": 0.7504438745314658, + "grad_norm": 5.5775463930736695, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 490010444, + "step": 2853 + }, + { + "epoch": 0.7504438745314658, + "loss": 0.2125670313835144, + "loss_ce": 0.0003172852157149464, + "loss_iou": 0.55078125, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 490010444, + "step": 2853 + }, + { + "epoch": 0.7507069112908529, + "grad_norm": 12.138601127345398, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 490182752, + "step": 2854 + }, + { + "epoch": 0.7507069112908529, + "loss": 0.05896022543311119, + "loss_ce": 0.0001223331200890243, + "loss_iou": 0.5390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 490182752, + "step": 2854 + }, + { + "epoch": 0.75096994805024, + "grad_norm": 7.597618581630176, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 490354572, + "step": 2855 + }, + { + "epoch": 0.75096994805024, + "loss": 0.1745096743106842, + "loss_ce": 0.0004984359256923199, + "loss_iou": 0.498046875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 490354572, + "step": 2855 + }, + { + "epoch": 0.7512329848096272, + "grad_norm": 4.639912180988158, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 490524680, + "step": 2856 + }, + { + "epoch": 0.7512329848096272, + "loss": 0.16067692637443542, + "loss_ce": 0.006074874196201563, + "loss_iou": 0.427734375, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 490524680, + "step": 2856 + }, + { + "epoch": 0.7514960215690143, + "grad_norm": 14.921784803655473, + "learning_rate": 5e-06, + "loss": 0.1355, + "num_input_tokens_seen": 490696768, + "step": 2857 + }, + { + "epoch": 0.7514960215690143, + "loss": 0.1124132052063942, + "loss_ce": 0.0032213088124990463, + "loss_iou": 0.515625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 490696768, + "step": 2857 + }, + { + "epoch": 0.7517590583284014, + "grad_norm": 5.608340999176847, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 490869040, + "step": 2858 + }, + { + "epoch": 0.7517590583284014, + "loss": 0.05557282269001007, + "loss_ce": 0.003662426257506013, + "loss_iou": 0.478515625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 490869040, + "step": 2858 + }, + { + "epoch": 0.7520220950877885, + "grad_norm": 4.649441181141725, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 491041492, + "step": 2859 + }, + { + "epoch": 0.7520220950877885, + "loss": 0.08483953773975372, + "loss_ce": 0.00024481338914483786, + "loss_iou": 0.625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 491041492, + "step": 2859 + }, + { + "epoch": 0.7522851318471756, + "grad_norm": 5.629942269274102, + "learning_rate": 5e-06, + "loss": 0.1139, + "num_input_tokens_seen": 491213328, + "step": 2860 + }, + { + "epoch": 0.7522851318471756, + "loss": 0.06876988708972931, + "loss_ce": 0.0007156922947615385, + "loss_iou": 0.4921875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 491213328, + "step": 2860 + }, + { + "epoch": 0.7525481686065628, + "grad_norm": 5.7661162016412355, + "learning_rate": 5e-06, + "loss": 0.0949, + "num_input_tokens_seen": 491384112, + "step": 2861 + }, + { + "epoch": 0.7525481686065628, + "loss": 0.12756821513175964, + "loss_ce": 0.0015000998973846436, + "loss_iou": 0.69921875, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 491384112, + "step": 2861 + }, + { + "epoch": 0.7528112053659499, + "grad_norm": 4.585306018678188, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 491556404, + "step": 2862 + }, + { + "epoch": 0.7528112053659499, + "loss": 0.1506051868200302, + "loss_ce": 0.00195405725389719, + "loss_iou": 0.43359375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 491556404, + "step": 2862 + }, + { + "epoch": 0.753074242125337, + "grad_norm": 5.018961759147868, + "learning_rate": 5e-06, + "loss": 0.1566, + "num_input_tokens_seen": 491728560, + "step": 2863 + }, + { + "epoch": 0.753074242125337, + "loss": 0.08526686578989029, + "loss_ce": 0.0047004627995193005, + "loss_iou": 0.6015625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 491728560, + "step": 2863 + }, + { + "epoch": 0.7533372788847241, + "grad_norm": 5.649735345560609, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 491900292, + "step": 2864 + }, + { + "epoch": 0.7533372788847241, + "loss": 0.07598397135734558, + "loss_ce": 0.0024213448632508516, + "loss_iou": 0.3984375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 491900292, + "step": 2864 + }, + { + "epoch": 0.7536003156441112, + "grad_norm": 5.741199989182661, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 492072492, + "step": 2865 + }, + { + "epoch": 0.7536003156441112, + "loss": 0.17299330234527588, + "loss_ce": 0.00398694584146142, + "loss_iou": 0.296875, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 492072492, + "step": 2865 + }, + { + "epoch": 0.7538633524034984, + "grad_norm": 6.545995942105509, + "learning_rate": 5e-06, + "loss": 0.0985, + "num_input_tokens_seen": 492244560, + "step": 2866 + }, + { + "epoch": 0.7538633524034984, + "loss": 0.11732736229896545, + "loss_ce": 0.00035348787787370384, + "loss_iou": 0.48828125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 492244560, + "step": 2866 + }, + { + "epoch": 0.7541263891628855, + "grad_norm": 7.690174312653901, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 492416460, + "step": 2867 + }, + { + "epoch": 0.7541263891628855, + "loss": 0.11988115310668945, + "loss_ce": 0.0037617662455886602, + "loss_iou": 0.5546875, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 492416460, + "step": 2867 + }, + { + "epoch": 0.7543894259222726, + "grad_norm": 4.911561927440868, + "learning_rate": 5e-06, + "loss": 0.1762, + "num_input_tokens_seen": 492582560, + "step": 2868 + }, + { + "epoch": 0.7543894259222726, + "loss": 0.17355158925056458, + "loss_ce": 0.0006237310590222478, + "loss_iou": 0.41796875, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 492582560, + "step": 2868 + }, + { + "epoch": 0.7546524626816598, + "grad_norm": 9.965344498896014, + "learning_rate": 5e-06, + "loss": 0.1137, + "num_input_tokens_seen": 492754468, + "step": 2869 + }, + { + "epoch": 0.7546524626816598, + "loss": 0.12515220046043396, + "loss_ce": 0.0038448250852525234, + "loss_iou": 0.515625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 492754468, + "step": 2869 + }, + { + "epoch": 0.7549154994410469, + "grad_norm": 7.897878518361561, + "learning_rate": 5e-06, + "loss": 0.1325, + "num_input_tokens_seen": 492926916, + "step": 2870 + }, + { + "epoch": 0.7549154994410469, + "loss": 0.07539217174053192, + "loss_ce": 0.002699300181120634, + "loss_iou": 0.5390625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 492926916, + "step": 2870 + }, + { + "epoch": 0.755178536200434, + "grad_norm": 8.119348564456503, + "learning_rate": 5e-06, + "loss": 0.1413, + "num_input_tokens_seen": 493099340, + "step": 2871 + }, + { + "epoch": 0.755178536200434, + "loss": 0.10948731005191803, + "loss_ce": 0.002187497215345502, + "loss_iou": 0.5546875, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 493099340, + "step": 2871 + }, + { + "epoch": 0.7554415729598212, + "grad_norm": 4.655104945227204, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 493269864, + "step": 2872 + }, + { + "epoch": 0.7554415729598212, + "loss": 0.1148996502161026, + "loss_ce": 9.252215386368334e-05, + "loss_iou": 0.5390625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 493269864, + "step": 2872 + }, + { + "epoch": 0.7557046097192083, + "grad_norm": 7.2487773846350985, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 493441844, + "step": 2873 + }, + { + "epoch": 0.7557046097192083, + "loss": 0.22252312302589417, + "loss_ce": 0.00032462860690429807, + "loss_iou": 0.47265625, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 493441844, + "step": 2873 + }, + { + "epoch": 0.7559676464785954, + "grad_norm": 8.669322747332805, + "learning_rate": 5e-06, + "loss": 0.0904, + "num_input_tokens_seen": 493612416, + "step": 2874 + }, + { + "epoch": 0.7559676464785954, + "loss": 0.1249040961265564, + "loss_ce": 0.0034746630117297173, + "loss_iou": 0.5859375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 493612416, + "step": 2874 + }, + { + "epoch": 0.7562306832379825, + "grad_norm": 16.40313430717725, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 493782864, + "step": 2875 + }, + { + "epoch": 0.7562306832379825, + "loss": 0.09373937547206879, + "loss_ce": 0.0021256012842059135, + "loss_iou": 0.68359375, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 493782864, + "step": 2875 + }, + { + "epoch": 0.7564937199973696, + "grad_norm": 6.017272387510744, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 493955332, + "step": 2876 + }, + { + "epoch": 0.7564937199973696, + "loss": 0.1287456452846527, + "loss_ce": 0.0006633760058321059, + "loss_iou": 0.41015625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 493955332, + "step": 2876 + }, + { + "epoch": 0.7567567567567568, + "grad_norm": 5.38013008566038, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 494127604, + "step": 2877 + }, + { + "epoch": 0.7567567567567568, + "loss": 0.10688350349664688, + "loss_ce": 0.0010485434904694557, + "loss_iou": 0.5703125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 494127604, + "step": 2877 + }, + { + "epoch": 0.7570197935161439, + "grad_norm": 16.650882691357587, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 494298004, + "step": 2878 + }, + { + "epoch": 0.7570197935161439, + "loss": 0.1486469805240631, + "loss_ce": 0.0029255489353090525, + "loss_iou": 0.484375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 494298004, + "step": 2878 + }, + { + "epoch": 0.757282830275531, + "grad_norm": 6.750510186398287, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 494470108, + "step": 2879 + }, + { + "epoch": 0.757282830275531, + "loss": 0.1393243670463562, + "loss_ce": 0.0016443128697574139, + "loss_iou": 0.287109375, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 494470108, + "step": 2879 + }, + { + "epoch": 0.7575458670349181, + "grad_norm": 18.523627538074937, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 494640736, + "step": 2880 + }, + { + "epoch": 0.7575458670349181, + "loss": 0.11819358170032501, + "loss_ce": 0.0007009088294580579, + "loss_iou": 0.6171875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 494640736, + "step": 2880 + }, + { + "epoch": 0.7578089037943052, + "grad_norm": 6.843824113206992, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 494812784, + "step": 2881 + }, + { + "epoch": 0.7578089037943052, + "loss": 0.18498176336288452, + "loss_ce": 0.001769477385096252, + "loss_iou": 0.390625, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 494812784, + "step": 2881 + }, + { + "epoch": 0.7580719405536924, + "grad_norm": 4.288128333077997, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 494984900, + "step": 2882 + }, + { + "epoch": 0.7580719405536924, + "loss": 0.11615432053804398, + "loss_ce": 0.0020796118769794703, + "loss_iou": 0.51171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 494984900, + "step": 2882 + }, + { + "epoch": 0.7583349773130795, + "grad_norm": 7.805094359622811, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 495157204, + "step": 2883 + }, + { + "epoch": 0.7583349773130795, + "loss": 0.15978480875492096, + "loss_ce": 0.002222547074779868, + "loss_iou": 0.578125, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 495157204, + "step": 2883 + }, + { + "epoch": 0.7585980140724666, + "grad_norm": 9.068753339594036, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 495325872, + "step": 2884 + }, + { + "epoch": 0.7585980140724666, + "loss": 0.06683582067489624, + "loss_ce": 0.001055177883245051, + "loss_iou": 0.5625, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 495325872, + "step": 2884 + }, + { + "epoch": 0.7588610508318537, + "grad_norm": 5.303316590010937, + "learning_rate": 5e-06, + "loss": 0.1331, + "num_input_tokens_seen": 495497944, + "step": 2885 + }, + { + "epoch": 0.7588610508318537, + "loss": 0.09937077760696411, + "loss_ce": 0.00031071933335624635, + "loss_iou": 0.5625, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 495497944, + "step": 2885 + }, + { + "epoch": 0.7591240875912408, + "grad_norm": 5.954668834746019, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 495670336, + "step": 2886 + }, + { + "epoch": 0.7591240875912408, + "loss": 0.04735005646944046, + "loss_ce": 0.0002461753028910607, + "loss_iou": 0.4375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 495670336, + "step": 2886 + }, + { + "epoch": 0.759387124350628, + "grad_norm": 9.86702776219205, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 495842556, + "step": 2887 + }, + { + "epoch": 0.759387124350628, + "loss": 0.09923535585403442, + "loss_ce": 0.0010908262338489294, + "loss_iou": NaN, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 495842556, + "step": 2887 + }, + { + "epoch": 0.7596501611100152, + "grad_norm": 11.076727887110565, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 496012000, + "step": 2888 + }, + { + "epoch": 0.7596501611100152, + "loss": 0.07854500412940979, + "loss_ce": 0.0005115569802001119, + "loss_iou": 0.6171875, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 496012000, + "step": 2888 + }, + { + "epoch": 0.7599131978694023, + "grad_norm": 4.344396012390115, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 496181744, + "step": 2889 + }, + { + "epoch": 0.7599131978694023, + "loss": 0.07574538886547089, + "loss_ce": 0.0021980288438498974, + "loss_iou": 0.5625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 496181744, + "step": 2889 + }, + { + "epoch": 0.7601762346287894, + "grad_norm": 4.8762260883901964, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 496353892, + "step": 2890 + }, + { + "epoch": 0.7601762346287894, + "loss": 0.1910967230796814, + "loss_ce": 0.0009111673571169376, + "loss_iou": 0.349609375, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 496353892, + "step": 2890 + }, + { + "epoch": 0.7604392713881765, + "grad_norm": 31.258153024005672, + "learning_rate": 5e-06, + "loss": 0.1188, + "num_input_tokens_seen": 496525916, + "step": 2891 + }, + { + "epoch": 0.7604392713881765, + "loss": 0.07003885507583618, + "loss_ce": 0.0008249912643805146, + "loss_iou": 0.546875, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 496525916, + "step": 2891 + }, + { + "epoch": 0.7607023081475637, + "grad_norm": 3.99666824293353, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 496698052, + "step": 2892 + }, + { + "epoch": 0.7607023081475637, + "loss": 0.09317123889923096, + "loss_ce": 0.0031443799380213022, + "loss_iou": 0.59765625, + "loss_num": 0.01806640625, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 496698052, + "step": 2892 + }, + { + "epoch": 0.7609653449069508, + "grad_norm": 3.917894736012383, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 496868580, + "step": 2893 + }, + { + "epoch": 0.7609653449069508, + "loss": 0.1594741940498352, + "loss_ce": 0.0014236548449844122, + "loss_iou": 0.462890625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 496868580, + "step": 2893 + }, + { + "epoch": 0.7612283816663379, + "grad_norm": 6.805778622837066, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 497040720, + "step": 2894 + }, + { + "epoch": 0.7612283816663379, + "loss": 0.04234718531370163, + "loss_ce": 0.0003244808176532388, + "loss_iou": 0.44140625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 497040720, + "step": 2894 + }, + { + "epoch": 0.761491418425725, + "grad_norm": 8.055134529886692, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 497213204, + "step": 2895 + }, + { + "epoch": 0.761491418425725, + "loss": 0.06772617995738983, + "loss_ce": 0.0006180237978696823, + "loss_iou": 0.5546875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 497213204, + "step": 2895 + }, + { + "epoch": 0.7617544551851121, + "grad_norm": 20.74511641540883, + "learning_rate": 5e-06, + "loss": 0.1427, + "num_input_tokens_seen": 497382348, + "step": 2896 + }, + { + "epoch": 0.7617544551851121, + "loss": 0.1839737594127655, + "loss_ce": 0.006239374168217182, + "loss_iou": 0.5, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 497382348, + "step": 2896 + }, + { + "epoch": 0.7620174919444992, + "grad_norm": 3.827051566364027, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 497554288, + "step": 2897 + }, + { + "epoch": 0.7620174919444992, + "loss": 0.1408492773771286, + "loss_ce": 0.0003158297040499747, + "loss_iou": 0.59375, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 497554288, + "step": 2897 + }, + { + "epoch": 0.7622805287038864, + "grad_norm": 4.251155438400414, + "learning_rate": 5e-06, + "loss": 0.0949, + "num_input_tokens_seen": 497726624, + "step": 2898 + }, + { + "epoch": 0.7622805287038864, + "loss": 0.08204406499862671, + "loss_ce": 0.0024694851599633694, + "loss_iou": 0.6015625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 497726624, + "step": 2898 + }, + { + "epoch": 0.7625435654632735, + "grad_norm": 25.921624546006864, + "learning_rate": 5e-06, + "loss": 0.1214, + "num_input_tokens_seen": 497899052, + "step": 2899 + }, + { + "epoch": 0.7625435654632735, + "loss": 0.125847727060318, + "loss_ce": 5.42689704161603e-05, + "loss_iou": 0.546875, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 497899052, + "step": 2899 + }, + { + "epoch": 0.7628066022226606, + "grad_norm": 3.225720909448473, + "learning_rate": 5e-06, + "loss": 0.0838, + "num_input_tokens_seen": 498071164, + "step": 2900 + }, + { + "epoch": 0.7628066022226606, + "loss": 0.13742178678512573, + "loss_ce": 0.002015294972807169, + "loss_iou": 0.5625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 498071164, + "step": 2900 + }, + { + "epoch": 0.7630696389820477, + "grad_norm": 10.436595772958983, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 498243380, + "step": 2901 + }, + { + "epoch": 0.7630696389820477, + "loss": 0.08406674116849899, + "loss_ce": 0.0019744576420634985, + "loss_iou": 0.482421875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 498243380, + "step": 2901 + }, + { + "epoch": 0.7633326757414348, + "grad_norm": 5.876814424855767, + "learning_rate": 5e-06, + "loss": 0.0899, + "num_input_tokens_seen": 498415740, + "step": 2902 + }, + { + "epoch": 0.7633326757414348, + "loss": 0.12014832347631454, + "loss_ce": 0.0023199557326734066, + "loss_iou": 0.52734375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 498415740, + "step": 2902 + }, + { + "epoch": 0.763595712500822, + "grad_norm": 6.217199501402136, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 498587740, + "step": 2903 + }, + { + "epoch": 0.763595712500822, + "loss": 0.0937931090593338, + "loss_ce": 0.0013553638709709048, + "loss_iou": 0.6171875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 498587740, + "step": 2903 + }, + { + "epoch": 0.7638587492602091, + "grad_norm": 4.965193192238494, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 498759856, + "step": 2904 + }, + { + "epoch": 0.7638587492602091, + "loss": 0.0735275000333786, + "loss_ce": 0.00013272266369313002, + "loss_iou": 0.578125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 498759856, + "step": 2904 + }, + { + "epoch": 0.7641217860195962, + "grad_norm": 36.02324764024362, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 498929524, + "step": 2905 + }, + { + "epoch": 0.7641217860195962, + "loss": 0.13745470345020294, + "loss_ce": 0.002353382296860218, + "loss_iou": 0.40234375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 498929524, + "step": 2905 + }, + { + "epoch": 0.7643848227789833, + "grad_norm": 4.064476858285287, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 499099092, + "step": 2906 + }, + { + "epoch": 0.7643848227789833, + "loss": 0.12520155310630798, + "loss_ce": 0.0020325970835983753, + "loss_iou": 0.431640625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 499099092, + "step": 2906 + }, + { + "epoch": 0.7646478595383704, + "grad_norm": 9.564182625696946, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 499271132, + "step": 2907 + }, + { + "epoch": 0.7646478595383704, + "loss": 0.10383596271276474, + "loss_ce": 0.0036772743333131075, + "loss_iou": 0.6015625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 499271132, + "step": 2907 + }, + { + "epoch": 0.7649108962977577, + "grad_norm": 7.38548887746826, + "learning_rate": 5e-06, + "loss": 0.1407, + "num_input_tokens_seen": 499443276, + "step": 2908 + }, + { + "epoch": 0.7649108962977577, + "loss": 0.07420553267002106, + "loss_ce": 0.006151327397674322, + "loss_iou": 0.32421875, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 499443276, + "step": 2908 + }, + { + "epoch": 0.7651739330571448, + "grad_norm": 3.357102180261463, + "learning_rate": 5e-06, + "loss": 0.1387, + "num_input_tokens_seen": 499615436, + "step": 2909 + }, + { + "epoch": 0.7651739330571448, + "loss": 0.13687880337238312, + "loss_ce": 0.0005110005149617791, + "loss_iou": 0.52734375, + "loss_num": 0.0272216796875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 499615436, + "step": 2909 + }, + { + "epoch": 0.7654369698165319, + "grad_norm": 5.532042225189138, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 499787568, + "step": 2910 + }, + { + "epoch": 0.7654369698165319, + "loss": 0.10780411958694458, + "loss_ce": 0.0017250193050131202, + "loss_iou": 0.423828125, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 499787568, + "step": 2910 + }, + { + "epoch": 0.765700006575919, + "grad_norm": 9.098827733621626, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 499959744, + "step": 2911 + }, + { + "epoch": 0.765700006575919, + "loss": 0.11246542632579803, + "loss_ce": 0.0012135956203565001, + "loss_iou": 0.431640625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 499959744, + "step": 2911 + }, + { + "epoch": 0.7659630433353061, + "grad_norm": 4.671256681679922, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 500132012, + "step": 2912 + }, + { + "epoch": 0.7659630433353061, + "loss": 0.07265074551105499, + "loss_ce": 0.003986193798482418, + "loss_iou": 0.478515625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 500132012, + "step": 2912 + }, + { + "epoch": 0.7662260800946933, + "grad_norm": 9.318330323174848, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 500304120, + "step": 2913 + }, + { + "epoch": 0.7662260800946933, + "loss": 0.10342703014612198, + "loss_ce": 0.0003996905288659036, + "loss_iou": 0.55859375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 500304120, + "step": 2913 + }, + { + "epoch": 0.7664891168540804, + "grad_norm": 4.11205791514663, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 500476220, + "step": 2914 + }, + { + "epoch": 0.7664891168540804, + "loss": 0.16668415069580078, + "loss_ce": 0.00170612963847816, + "loss_iou": 0.33203125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 500476220, + "step": 2914 + }, + { + "epoch": 0.7667521536134675, + "grad_norm": 7.921166949882202, + "learning_rate": 5e-06, + "loss": 0.1248, + "num_input_tokens_seen": 500648456, + "step": 2915 + }, + { + "epoch": 0.7667521536134675, + "loss": 0.14688915014266968, + "loss_ce": 0.0016559937503188848, + "loss_iou": 0.546875, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 500648456, + "step": 2915 + }, + { + "epoch": 0.7670151903728546, + "grad_norm": 7.89559416185462, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 500817440, + "step": 2916 + }, + { + "epoch": 0.7670151903728546, + "loss": 0.13112960755825043, + "loss_ce": 0.0047868345864117146, + "loss_iou": 0.5, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 500817440, + "step": 2916 + }, + { + "epoch": 0.7672782271322417, + "grad_norm": 7.07373988410314, + "learning_rate": 5e-06, + "loss": 0.1543, + "num_input_tokens_seen": 500989576, + "step": 2917 + }, + { + "epoch": 0.7672782271322417, + "loss": 0.17218878865242004, + "loss_ce": 0.0034876265563070774, + "loss_iou": 0.55859375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 500989576, + "step": 2917 + }, + { + "epoch": 0.7675412638916288, + "grad_norm": 4.658693255259618, + "learning_rate": 5e-06, + "loss": 0.0942, + "num_input_tokens_seen": 501161844, + "step": 2918 + }, + { + "epoch": 0.7675412638916288, + "loss": 0.0911635234951973, + "loss_ce": 0.0017775364685803652, + "loss_iou": 0.474609375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 501161844, + "step": 2918 + }, + { + "epoch": 0.767804300651016, + "grad_norm": 5.6482830730287485, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 501334012, + "step": 2919 + }, + { + "epoch": 0.767804300651016, + "loss": 0.12131966650485992, + "loss_ce": 0.00028695265064015985, + "loss_iou": 0.52734375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 501334012, + "step": 2919 + }, + { + "epoch": 0.7680673374104031, + "grad_norm": 6.055907471900818, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 501504632, + "step": 2920 + }, + { + "epoch": 0.7680673374104031, + "loss": 0.06848346441984177, + "loss_ce": 0.0003071928513236344, + "loss_iou": 0.44921875, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 501504632, + "step": 2920 + }, + { + "epoch": 0.7683303741697902, + "grad_norm": 4.128716866112718, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 501676892, + "step": 2921 + }, + { + "epoch": 0.7683303741697902, + "loss": 0.12142758071422577, + "loss_ce": 0.000974692520685494, + "loss_iou": 0.5, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 501676892, + "step": 2921 + }, + { + "epoch": 0.7685934109291773, + "grad_norm": 6.285708685708194, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 501846408, + "step": 2922 + }, + { + "epoch": 0.7685934109291773, + "loss": 0.13839051127433777, + "loss_ce": 0.0014276191359385848, + "loss_iou": 0.5703125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 501846408, + "step": 2922 + }, + { + "epoch": 0.7688564476885644, + "grad_norm": 5.199523762158551, + "learning_rate": 5e-06, + "loss": 0.0935, + "num_input_tokens_seen": 502018884, + "step": 2923 + }, + { + "epoch": 0.7688564476885644, + "loss": 0.0823674127459526, + "loss_ce": 0.0034794718958437443, + "loss_iou": 0.69921875, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 502018884, + "step": 2923 + }, + { + "epoch": 0.7691194844479516, + "grad_norm": 7.865534449035968, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 502189840, + "step": 2924 + }, + { + "epoch": 0.7691194844479516, + "loss": 0.09236955642700195, + "loss_ce": 0.0008473452762700617, + "loss_iou": 0.5234375, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 502189840, + "step": 2924 + }, + { + "epoch": 0.7693825212073387, + "grad_norm": 4.464952808674884, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 502361908, + "step": 2925 + }, + { + "epoch": 0.7693825212073387, + "loss": 0.09519391506910324, + "loss_ce": 0.002862985013052821, + "loss_iou": 0.40234375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 502361908, + "step": 2925 + }, + { + "epoch": 0.7696455579667258, + "grad_norm": 3.137863532599948, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 502533852, + "step": 2926 + }, + { + "epoch": 0.7696455579667258, + "loss": 0.062405504286289215, + "loss_ce": 0.000225941272219643, + "loss_iou": 0.52734375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 502533852, + "step": 2926 + }, + { + "epoch": 0.769908594726113, + "grad_norm": 5.424523366119041, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 502706068, + "step": 2927 + }, + { + "epoch": 0.769908594726113, + "loss": 0.15939806401729584, + "loss_ce": 0.0009050165535882115, + "loss_iou": 0.6484375, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 502706068, + "step": 2927 + }, + { + "epoch": 0.7701716314855, + "grad_norm": 21.020644002444403, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 502878204, + "step": 2928 + }, + { + "epoch": 0.7701716314855, + "loss": 0.1498870849609375, + "loss_ce": 0.00108338613063097, + "loss_iou": 0.3984375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 502878204, + "step": 2928 + }, + { + "epoch": 0.7704346682448873, + "grad_norm": 6.513721562381314, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 503050516, + "step": 2929 + }, + { + "epoch": 0.7704346682448873, + "loss": 0.14237374067306519, + "loss_ce": 0.0013520093634724617, + "loss_iou": 0.53125, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 503050516, + "step": 2929 + }, + { + "epoch": 0.7706977050042744, + "grad_norm": 21.68669265107994, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 503222664, + "step": 2930 + }, + { + "epoch": 0.7706977050042744, + "loss": 0.09872519969940186, + "loss_ce": 0.0024269861169159412, + "loss_iou": 0.53125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 503222664, + "step": 2930 + }, + { + "epoch": 0.7709607417636615, + "grad_norm": 23.71932442538902, + "learning_rate": 5e-06, + "loss": 0.1131, + "num_input_tokens_seen": 503394820, + "step": 2931 + }, + { + "epoch": 0.7709607417636615, + "loss": 0.14681634306907654, + "loss_ce": 0.0005455805221572518, + "loss_iou": 0.359375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 503394820, + "step": 2931 + }, + { + "epoch": 0.7712237785230486, + "grad_norm": 14.0356485916023, + "learning_rate": 5e-06, + "loss": 0.1609, + "num_input_tokens_seen": 503567064, + "step": 2932 + }, + { + "epoch": 0.7712237785230486, + "loss": 0.1981443464756012, + "loss_ce": 0.002374083735048771, + "loss_iou": 0.4453125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 503567064, + "step": 2932 + }, + { + "epoch": 0.7714868152824357, + "grad_norm": 4.952386040850401, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 503739480, + "step": 2933 + }, + { + "epoch": 0.7714868152824357, + "loss": 0.2314581573009491, + "loss_ce": 0.002728914376348257, + "loss_iou": 0.40234375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 503739480, + "step": 2933 + }, + { + "epoch": 0.7717498520418229, + "grad_norm": 5.454737692407838, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 503911536, + "step": 2934 + }, + { + "epoch": 0.7717498520418229, + "loss": 0.1048421710729599, + "loss_ce": 0.004591919481754303, + "loss_iou": 0.474609375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 503911536, + "step": 2934 + }, + { + "epoch": 0.77201288880121, + "grad_norm": 7.9773349981616795, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 504082084, + "step": 2935 + }, + { + "epoch": 0.77201288880121, + "loss": 0.1008715033531189, + "loss_ce": 0.0018419669941067696, + "loss_iou": 0.5234375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 504082084, + "step": 2935 + }, + { + "epoch": 0.7722759255605971, + "grad_norm": 5.256239273173906, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 504254328, + "step": 2936 + }, + { + "epoch": 0.7722759255605971, + "loss": 0.10146059095859528, + "loss_ce": 0.0031024364288896322, + "loss_iou": 0.50390625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 504254328, + "step": 2936 + }, + { + "epoch": 0.7725389623199842, + "grad_norm": 7.796343823545596, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 504426408, + "step": 2937 + }, + { + "epoch": 0.7725389623199842, + "loss": 0.09895452111959457, + "loss_ce": 0.00047429182450287044, + "loss_iou": 0.4375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 504426408, + "step": 2937 + }, + { + "epoch": 0.7728019990793713, + "grad_norm": 4.506271114804364, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 504599160, + "step": 2938 + }, + { + "epoch": 0.7728019990793713, + "loss": 0.10439816117286682, + "loss_ce": 0.0011114203371107578, + "loss_iou": 0.53515625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 504599160, + "step": 2938 + }, + { + "epoch": 0.7730650358387585, + "grad_norm": 7.051793538200423, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 504771620, + "step": 2939 + }, + { + "epoch": 0.7730650358387585, + "loss": 0.04662296548485756, + "loss_ce": 0.001548499334603548, + "loss_iou": 0.51953125, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 504771620, + "step": 2939 + }, + { + "epoch": 0.7733280725981456, + "grad_norm": 4.48321731819358, + "learning_rate": 5e-06, + "loss": 0.1049, + "num_input_tokens_seen": 504942212, + "step": 2940 + }, + { + "epoch": 0.7733280725981456, + "loss": 0.1206519603729248, + "loss_ce": 0.0022132450249046087, + "loss_iou": 0.49609375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 504942212, + "step": 2940 + }, + { + "epoch": 0.7735911093575327, + "grad_norm": 3.542637672706516, + "learning_rate": 5e-06, + "loss": 0.0699, + "num_input_tokens_seen": 505114396, + "step": 2941 + }, + { + "epoch": 0.7735911093575327, + "loss": 0.07642598450183868, + "loss_ce": 0.002970176050439477, + "loss_iou": 0.59375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 505114396, + "step": 2941 + }, + { + "epoch": 0.7738541461169198, + "grad_norm": 13.31557941839502, + "learning_rate": 5e-06, + "loss": 0.152, + "num_input_tokens_seen": 505286572, + "step": 2942 + }, + { + "epoch": 0.7738541461169198, + "loss": 0.11694711446762085, + "loss_ce": 0.000492042163386941, + "loss_iou": 0.484375, + "loss_num": 0.0233154296875, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 505286572, + "step": 2942 + }, + { + "epoch": 0.7741171828763069, + "grad_norm": 4.217985208788173, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 505458580, + "step": 2943 + }, + { + "epoch": 0.7741171828763069, + "loss": 0.12103226035833359, + "loss_ce": 0.00256301905028522, + "loss_iou": 0.6640625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 505458580, + "step": 2943 + }, + { + "epoch": 0.774380219635694, + "grad_norm": 5.305953482185517, + "learning_rate": 5e-06, + "loss": 0.1417, + "num_input_tokens_seen": 505630628, + "step": 2944 + }, + { + "epoch": 0.774380219635694, + "loss": 0.13166974484920502, + "loss_ce": 0.0009019209537655115, + "loss_iou": 0.302734375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 505630628, + "step": 2944 + }, + { + "epoch": 0.7746432563950812, + "grad_norm": 3.7265892335604103, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 505802760, + "step": 2945 + }, + { + "epoch": 0.7746432563950812, + "loss": 0.174719899892807, + "loss_ce": 0.005622013006359339, + "loss_iou": 0.515625, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 505802760, + "step": 2945 + }, + { + "epoch": 0.7749062931544684, + "grad_norm": 3.906958326198367, + "learning_rate": 5e-06, + "loss": 0.0915, + "num_input_tokens_seen": 505975008, + "step": 2946 + }, + { + "epoch": 0.7749062931544684, + "loss": 0.15144148468971252, + "loss_ce": 0.003522793762385845, + "loss_iou": 0.609375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 505975008, + "step": 2946 + }, + { + "epoch": 0.7751693299138555, + "grad_norm": 30.273164796131727, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 506146864, + "step": 2947 + }, + { + "epoch": 0.7751693299138555, + "loss": 0.12878431379795074, + "loss_ce": 0.00238050683401525, + "loss_iou": 0.52734375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 506146864, + "step": 2947 + }, + { + "epoch": 0.7754323666732426, + "grad_norm": 9.326627239945015, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 506317520, + "step": 2948 + }, + { + "epoch": 0.7754323666732426, + "loss": 0.13574595749378204, + "loss_ce": 0.001163432258181274, + "loss_iou": 0.384765625, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 506317520, + "step": 2948 + }, + { + "epoch": 0.7756954034326297, + "grad_norm": 4.291282220054208, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 506489644, + "step": 2949 + }, + { + "epoch": 0.7756954034326297, + "loss": 0.07531201094388962, + "loss_ce": 0.0006965312641113997, + "loss_iou": 0.5625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 506489644, + "step": 2949 + }, + { + "epoch": 0.7759584401920169, + "grad_norm": 5.132046879043553, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 506661552, + "step": 2950 + }, + { + "epoch": 0.7759584401920169, + "loss": 0.12264753133058548, + "loss_ce": 0.003171210875734687, + "loss_iou": 0.6015625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 506661552, + "step": 2950 + }, + { + "epoch": 0.776221476951404, + "grad_norm": 18.605800930237596, + "learning_rate": 5e-06, + "loss": 0.1066, + "num_input_tokens_seen": 506832080, + "step": 2951 + }, + { + "epoch": 0.776221476951404, + "loss": 0.08513291925191879, + "loss_ce": 0.0006144904182292521, + "loss_iou": 0.5390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 506832080, + "step": 2951 + }, + { + "epoch": 0.7764845137107911, + "grad_norm": 13.809771584503252, + "learning_rate": 5e-06, + "loss": 0.148, + "num_input_tokens_seen": 507004396, + "step": 2952 + }, + { + "epoch": 0.7764845137107911, + "loss": 0.16220027208328247, + "loss_ce": 0.007384595461189747, + "loss_iou": 0.51953125, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 507004396, + "step": 2952 + }, + { + "epoch": 0.7767475504701782, + "grad_norm": 6.052098852686667, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 507176636, + "step": 2953 + }, + { + "epoch": 0.7767475504701782, + "loss": 0.10348343849182129, + "loss_ce": 7.461909262929112e-05, + "loss_iou": 0.640625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 507176636, + "step": 2953 + }, + { + "epoch": 0.7770105872295653, + "grad_norm": 6.115673418463162, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 507348812, + "step": 2954 + }, + { + "epoch": 0.7770105872295653, + "loss": 0.17799662053585052, + "loss_ce": 0.0009031177032738924, + "loss_iou": 0.55078125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 507348812, + "step": 2954 + }, + { + "epoch": 0.7772736239889525, + "grad_norm": 4.920564715708625, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 507521260, + "step": 2955 + }, + { + "epoch": 0.7772736239889525, + "loss": 0.12127618491649628, + "loss_ce": 0.002562812063843012, + "loss_iou": 0.625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 507521260, + "step": 2955 + }, + { + "epoch": 0.7775366607483396, + "grad_norm": 4.1269903282175875, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 507693580, + "step": 2956 + }, + { + "epoch": 0.7775366607483396, + "loss": 0.1181151419878006, + "loss_ce": 0.0023619618732482195, + "loss_iou": 0.5234375, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 507693580, + "step": 2956 + }, + { + "epoch": 0.7777996975077267, + "grad_norm": 6.530908453055091, + "learning_rate": 5e-06, + "loss": 0.1223, + "num_input_tokens_seen": 507866000, + "step": 2957 + }, + { + "epoch": 0.7777996975077267, + "loss": 0.09897395223379135, + "loss_ce": 0.001989088486880064, + "loss_iou": 0.5234375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 507866000, + "step": 2957 + }, + { + "epoch": 0.7780627342671138, + "grad_norm": 13.799197287571243, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 508036520, + "step": 2958 + }, + { + "epoch": 0.7780627342671138, + "loss": 0.12385988235473633, + "loss_ce": 0.0020031901076436043, + "loss_iou": 0.5078125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 508036520, + "step": 2958 + }, + { + "epoch": 0.7783257710265009, + "grad_norm": 6.771630034008941, + "learning_rate": 5e-06, + "loss": 0.1293, + "num_input_tokens_seen": 508208512, + "step": 2959 + }, + { + "epoch": 0.7783257710265009, + "loss": 0.09063053131103516, + "loss_ce": 0.001976970350369811, + "loss_iou": 0.70703125, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 508208512, + "step": 2959 + }, + { + "epoch": 0.7785888077858881, + "grad_norm": 17.947272506590085, + "learning_rate": 5e-06, + "loss": 0.1182, + "num_input_tokens_seen": 508380916, + "step": 2960 + }, + { + "epoch": 0.7785888077858881, + "loss": 0.18887397646903992, + "loss_ce": 0.000855189049616456, + "loss_iou": 0.349609375, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 508380916, + "step": 2960 + }, + { + "epoch": 0.7788518445452752, + "grad_norm": 8.543500304163382, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 508551252, + "step": 2961 + }, + { + "epoch": 0.7788518445452752, + "loss": 0.10420133173465729, + "loss_ce": 0.000197422195924446, + "loss_iou": 0.412109375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 508551252, + "step": 2961 + }, + { + "epoch": 0.7791148813046623, + "grad_norm": 11.527993211943581, + "learning_rate": 5e-06, + "loss": 0.1274, + "num_input_tokens_seen": 508723096, + "step": 2962 + }, + { + "epoch": 0.7791148813046623, + "loss": 0.13255223631858826, + "loss_ce": 0.0016013117274269462, + "loss_iou": 0.431640625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 508723096, + "step": 2962 + }, + { + "epoch": 0.7793779180640494, + "grad_norm": 3.765008399602552, + "learning_rate": 5e-06, + "loss": 0.1139, + "num_input_tokens_seen": 508895336, + "step": 2963 + }, + { + "epoch": 0.7793779180640494, + "loss": 0.15525312721729279, + "loss_ce": 0.00026197341503575444, + "loss_iou": 0.435546875, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 508895336, + "step": 2963 + }, + { + "epoch": 0.7796409548234365, + "grad_norm": 4.0681392149062905, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 509065748, + "step": 2964 + }, + { + "epoch": 0.7796409548234365, + "loss": 0.103541798889637, + "loss_ce": 0.0006670450093224645, + "loss_iou": 0.4765625, + "loss_num": 0.0205078125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 509065748, + "step": 2964 + }, + { + "epoch": 0.7799039915828238, + "grad_norm": 5.764259729112096, + "learning_rate": 5e-06, + "loss": 0.1354, + "num_input_tokens_seen": 509237988, + "step": 2965 + }, + { + "epoch": 0.7799039915828238, + "loss": 0.09293576329946518, + "loss_ce": 0.0004980218946002424, + "loss_iou": 0.578125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 509237988, + "step": 2965 + }, + { + "epoch": 0.7801670283422109, + "grad_norm": 6.653297635090324, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 509407480, + "step": 2966 + }, + { + "epoch": 0.7801670283422109, + "loss": 0.10752134025096893, + "loss_ce": 0.0008471491746604443, + "loss_iou": 0.546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 509407480, + "step": 2966 + }, + { + "epoch": 0.780430065101598, + "grad_norm": 5.8594293697060476, + "learning_rate": 5e-06, + "loss": 0.1152, + "num_input_tokens_seen": 509579708, + "step": 2967 + }, + { + "epoch": 0.780430065101598, + "loss": 0.19974008202552795, + "loss_ce": 0.005846647545695305, + "loss_iou": 0.3984375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 509579708, + "step": 2967 + }, + { + "epoch": 0.7806931018609851, + "grad_norm": 4.074243325830813, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 509750260, + "step": 2968 + }, + { + "epoch": 0.7806931018609851, + "loss": 0.09352241456508636, + "loss_ce": 0.00045905529987066984, + "loss_iou": 0.5625, + "loss_num": 0.0185546875, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 509750260, + "step": 2968 + }, + { + "epoch": 0.7809561386203722, + "grad_norm": 18.473086839056062, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 509922416, + "step": 2969 + }, + { + "epoch": 0.7809561386203722, + "loss": 0.10857398062944412, + "loss_ce": 0.002586429938673973, + "loss_iou": 0.50390625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 509922416, + "step": 2969 + }, + { + "epoch": 0.7812191753797593, + "grad_norm": 4.297341068075648, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 510094352, + "step": 2970 + }, + { + "epoch": 0.7812191753797593, + "loss": 0.10554330050945282, + "loss_ce": 0.002088708570227027, + "loss_iou": 0.53125, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 510094352, + "step": 2970 + }, + { + "epoch": 0.7814822121391465, + "grad_norm": 4.523551186515909, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 510266568, + "step": 2971 + }, + { + "epoch": 0.7814822121391465, + "loss": 0.1601797491312027, + "loss_ce": 0.000939027639105916, + "loss_iou": 0.51171875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 510266568, + "step": 2971 + }, + { + "epoch": 0.7817452488985336, + "grad_norm": 11.257967210341226, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 510438880, + "step": 2972 + }, + { + "epoch": 0.7817452488985336, + "loss": 0.08428631722927094, + "loss_ce": 0.0006986761000007391, + "loss_iou": 0.44921875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 510438880, + "step": 2972 + }, + { + "epoch": 0.7820082856579207, + "grad_norm": 4.879983737803821, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 510610888, + "step": 2973 + }, + { + "epoch": 0.7820082856579207, + "loss": 0.061606645584106445, + "loss_ce": 0.0010292520746588707, + "loss_iou": 0.515625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 510610888, + "step": 2973 + }, + { + "epoch": 0.7822713224173078, + "grad_norm": 6.784338572567158, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 510780736, + "step": 2974 + }, + { + "epoch": 0.7822713224173078, + "loss": 0.09628412127494812, + "loss_ce": 0.0025646386202424765, + "loss_iou": 0.51171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 510780736, + "step": 2974 + }, + { + "epoch": 0.7825343591766949, + "grad_norm": 2.4966016721195827, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 510952740, + "step": 2975 + }, + { + "epoch": 0.7825343591766949, + "loss": 0.11784331500530243, + "loss_ce": 0.002517384709790349, + "loss_iou": 0.5390625, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 510952740, + "step": 2975 + }, + { + "epoch": 0.7827973959360821, + "grad_norm": 9.752032559631301, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 511124984, + "step": 2976 + }, + { + "epoch": 0.7827973959360821, + "loss": 0.10057765245437622, + "loss_ce": 0.001853286987170577, + "loss_iou": 0.431640625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 511124984, + "step": 2976 + }, + { + "epoch": 0.7830604326954692, + "grad_norm": 4.047284415442861, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 511297176, + "step": 2977 + }, + { + "epoch": 0.7830604326954692, + "loss": 0.1439676731824875, + "loss_ce": 0.004929587244987488, + "loss_iou": 0.47265625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 511297176, + "step": 2977 + }, + { + "epoch": 0.7833234694548563, + "grad_norm": 4.111909259977495, + "learning_rate": 5e-06, + "loss": 0.121, + "num_input_tokens_seen": 511469216, + "step": 2978 + }, + { + "epoch": 0.7833234694548563, + "loss": 0.1689942479133606, + "loss_ce": 0.008380233310163021, + "loss_iou": 0.337890625, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 511469216, + "step": 2978 + }, + { + "epoch": 0.7835865062142434, + "grad_norm": 3.736811899980652, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 511641400, + "step": 2979 + }, + { + "epoch": 0.7835865062142434, + "loss": 0.09928886592388153, + "loss_ce": 0.0024871169589459896, + "loss_iou": 0.5546875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 511641400, + "step": 2979 + }, + { + "epoch": 0.7838495429736305, + "grad_norm": 3.8885212513343204, + "learning_rate": 5e-06, + "loss": 0.0719, + "num_input_tokens_seen": 511813644, + "step": 2980 + }, + { + "epoch": 0.7838495429736305, + "loss": 0.08123628050088882, + "loss_ce": 0.0019821308087557554, + "loss_iou": 0.57421875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 511813644, + "step": 2980 + }, + { + "epoch": 0.7841125797330177, + "grad_norm": 3.6947801269712643, + "learning_rate": 5e-06, + "loss": 0.1196, + "num_input_tokens_seen": 511985744, + "step": 2981 + }, + { + "epoch": 0.7841125797330177, + "loss": 0.07377283275127411, + "loss_ce": 0.0037197312340140343, + "loss_iou": 0.71875, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 511985744, + "step": 2981 + }, + { + "epoch": 0.7843756164924048, + "grad_norm": 4.052383876786155, + "learning_rate": 5e-06, + "loss": 0.1062, + "num_input_tokens_seen": 512157708, + "step": 2982 + }, + { + "epoch": 0.7843756164924048, + "loss": 0.18616530299186707, + "loss_ce": 0.00116774532943964, + "loss_iou": 0.59375, + "loss_num": 0.037109375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 512157708, + "step": 2982 + }, + { + "epoch": 0.7846386532517919, + "grad_norm": 3.4825813224277185, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 512330084, + "step": 2983 + }, + { + "epoch": 0.7846386532517919, + "loss": 0.1472734808921814, + "loss_ce": 0.0032915552146732807, + "loss_iou": 0.341796875, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 512330084, + "step": 2983 + }, + { + "epoch": 0.784901690011179, + "grad_norm": 3.5815743222985947, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 512502076, + "step": 2984 + }, + { + "epoch": 0.784901690011179, + "loss": 0.10481996834278107, + "loss_ce": 0.0021588318049907684, + "loss_iou": 0.51171875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 512502076, + "step": 2984 + }, + { + "epoch": 0.7851647267705661, + "grad_norm": 7.905852445879054, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 512674288, + "step": 2985 + }, + { + "epoch": 0.7851647267705661, + "loss": 0.12316185235977173, + "loss_ce": 0.0030751884914934635, + "loss_iou": 0.5390625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 512674288, + "step": 2985 + }, + { + "epoch": 0.7854277635299534, + "grad_norm": 5.926087069922328, + "learning_rate": 5e-06, + "loss": 0.0863, + "num_input_tokens_seen": 512846624, + "step": 2986 + }, + { + "epoch": 0.7854277635299534, + "loss": 0.06602786481380463, + "loss_ce": 0.0011474882485345006, + "loss_iou": 0.58203125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 512846624, + "step": 2986 + }, + { + "epoch": 0.7856908002893405, + "grad_norm": 5.039950797702919, + "learning_rate": 5e-06, + "loss": 0.1472, + "num_input_tokens_seen": 513018668, + "step": 2987 + }, + { + "epoch": 0.7856908002893405, + "loss": 0.10930870473384857, + "loss_ce": 0.004847035743296146, + "loss_iou": 0.5078125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 513018668, + "step": 2987 + }, + { + "epoch": 0.7859538370487276, + "grad_norm": 4.823970003989147, + "learning_rate": 5e-06, + "loss": 0.1425, + "num_input_tokens_seen": 513190872, + "step": 2988 + }, + { + "epoch": 0.7859538370487276, + "loss": 0.15449807047843933, + "loss_ce": 0.0005368961137719452, + "loss_iou": 0.486328125, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 513190872, + "step": 2988 + }, + { + "epoch": 0.7862168738081147, + "grad_norm": 11.156095172455718, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 513363216, + "step": 2989 + }, + { + "epoch": 0.7862168738081147, + "loss": 0.07052050530910492, + "loss_ce": 0.002100098878145218, + "loss_iou": 0.431640625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 513363216, + "step": 2989 + }, + { + "epoch": 0.7864799105675018, + "grad_norm": 6.545014028470699, + "learning_rate": 5e-06, + "loss": 0.1922, + "num_input_tokens_seen": 513532780, + "step": 2990 + }, + { + "epoch": 0.7864799105675018, + "loss": 0.20626530051231384, + "loss_ce": 0.004162629134953022, + "loss_iou": 0.515625, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 513532780, + "step": 2990 + }, + { + "epoch": 0.786742947326889, + "grad_norm": 5.675250290858523, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 513705312, + "step": 2991 + }, + { + "epoch": 0.786742947326889, + "loss": 0.10429085791110992, + "loss_ce": 0.002728358842432499, + "loss_iou": 0.53515625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 513705312, + "step": 2991 + }, + { + "epoch": 0.7870059840862761, + "grad_norm": 3.771208751532015, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 513877348, + "step": 2992 + }, + { + "epoch": 0.7870059840862761, + "loss": 0.09906225651502609, + "loss_ce": 0.0011008285218849778, + "loss_iou": 0.51171875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 513877348, + "step": 2992 + }, + { + "epoch": 0.7872690208456632, + "grad_norm": 8.702146962798412, + "learning_rate": 5e-06, + "loss": 0.108, + "num_input_tokens_seen": 514046516, + "step": 2993 + }, + { + "epoch": 0.7872690208456632, + "loss": 0.11204151809215546, + "loss_ce": 0.004131356719881296, + "loss_iou": 0.50390625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 514046516, + "step": 2993 + }, + { + "epoch": 0.7875320576050503, + "grad_norm": 6.0868763702009465, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 514218536, + "step": 2994 + }, + { + "epoch": 0.7875320576050503, + "loss": 0.15223419666290283, + "loss_ce": 0.0040408410131931305, + "loss_iou": 0.5390625, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 514218536, + "step": 2994 + }, + { + "epoch": 0.7877950943644374, + "grad_norm": 4.672234975964948, + "learning_rate": 5e-06, + "loss": 0.0826, + "num_input_tokens_seen": 514391068, + "step": 2995 + }, + { + "epoch": 0.7877950943644374, + "loss": 0.05085252225399017, + "loss_ce": 0.0038707097992300987, + "loss_iou": 0.51171875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 514391068, + "step": 2995 + }, + { + "epoch": 0.7880581311238245, + "grad_norm": 6.661101757033409, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 514561472, + "step": 2996 + }, + { + "epoch": 0.7880581311238245, + "loss": 0.20267510414123535, + "loss_ce": 0.000404604768846184, + "loss_iou": 0.333984375, + "loss_num": 0.04052734375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 514561472, + "step": 2996 + }, + { + "epoch": 0.7883211678832117, + "grad_norm": 9.694053172636844, + "learning_rate": 5e-06, + "loss": 0.148, + "num_input_tokens_seen": 514733680, + "step": 2997 + }, + { + "epoch": 0.7883211678832117, + "loss": 0.13084860146045685, + "loss_ce": 0.002278049010783434, + "loss_iou": 0.56640625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 514733680, + "step": 2997 + }, + { + "epoch": 0.7885842046425988, + "grad_norm": 69.63701857559735, + "learning_rate": 5e-06, + "loss": 0.1399, + "num_input_tokens_seen": 514905800, + "step": 2998 + }, + { + "epoch": 0.7885842046425988, + "loss": 0.13946260511875153, + "loss_ce": 0.004315503872931004, + "loss_iou": 0.48046875, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 514905800, + "step": 2998 + }, + { + "epoch": 0.7888472414019859, + "grad_norm": 16.568333279972496, + "learning_rate": 5e-06, + "loss": 0.0888, + "num_input_tokens_seen": 515077960, + "step": 2999 + }, + { + "epoch": 0.7888472414019859, + "loss": 0.10318569839000702, + "loss_ce": 0.004888580180704594, + "loss_iou": 0.46875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 515077960, + "step": 2999 + }, + { + "epoch": 0.789110278161373, + "grad_norm": 5.577264845510526, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_websight_new_CIoU": 0.8789950311183929, + "eval_websight_new_GIoU": 0.881831705570221, + "eval_websight_new_IoU": 0.8831256628036499, + "eval_websight_new_MAE_all": 0.016948864795267582, + "eval_websight_new_MAE_h": 0.00989671004936099, + "eval_websight_new_MAE_w": 0.027918956242501736, + "eval_websight_new_MAE_x": 0.024959519505500793, + "eval_websight_new_MAE_y": 0.005020270356908441, + "eval_websight_new_NUM_probability": 0.9999921023845673, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.08870729804039001, + "eval_websight_new_loss_ce": 6.795420176786138e-06, + "eval_websight_new_loss_iou": 0.3958740234375, + "eval_websight_new_loss_num": 0.016002655029296875, + "eval_websight_new_loss_xval": 0.0800323486328125, + "eval_websight_new_runtime": 55.9259, + "eval_websight_new_samples_per_second": 0.894, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_seeclick_CIoU": 0.6246416866779327, + "eval_seeclick_GIoU": 0.6264897286891937, + "eval_seeclick_IoU": 0.650597870349884, + "eval_seeclick_MAE_all": 0.04822420887649059, + "eval_seeclick_MAE_h": 0.030418247915804386, + "eval_seeclick_MAE_w": 0.0654730387032032, + "eval_seeclick_MAE_x": 0.07548732310533524, + "eval_seeclick_MAE_y": 0.0215182239189744, + "eval_seeclick_NUM_probability": 0.9999599456787109, + "eval_seeclick_inside_bbox": 0.9375, + "eval_seeclick_loss": 0.2215959131717682, + "eval_seeclick_loss_ce": 0.008739723358303308, + "eval_seeclick_loss_iou": 0.4912109375, + "eval_seeclick_loss_num": 0.043231964111328125, + "eval_seeclick_loss_xval": 0.216217041015625, + "eval_seeclick_runtime": 91.1896, + "eval_seeclick_samples_per_second": 0.472, + "eval_seeclick_steps_per_second": 0.022, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_icons_CIoU": 0.8647165596485138, + "eval_icons_GIoU": 0.8629220426082611, + "eval_icons_IoU": 0.8704103231430054, + "eval_icons_MAE_all": 0.018641653936356306, + "eval_icons_MAE_h": 0.02465621568262577, + "eval_icons_MAE_w": 0.017738113179802895, + "eval_icons_MAE_x": 0.013989617582410574, + "eval_icons_MAE_y": 0.018182669766247272, + "eval_icons_NUM_probability": 0.999986469745636, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.06894619762897491, + "eval_icons_loss_ce": 9.26887514651753e-06, + "eval_icons_loss_iou": 0.6270751953125, + "eval_icons_loss_num": 0.012828826904296875, + "eval_icons_loss_xval": 0.0641326904296875, + "eval_icons_runtime": 101.3174, + "eval_icons_samples_per_second": 0.493, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "eval_screenspot_CIoU": 0.5550089081128439, + "eval_screenspot_GIoU": 0.5516955653826395, + "eval_screenspot_IoU": 0.5956698457400004, + "eval_screenspot_MAE_all": 0.08370348066091537, + "eval_screenspot_MAE_h": 0.056737360854943596, + "eval_screenspot_MAE_w": 0.1454519679148992, + "eval_screenspot_MAE_x": 0.08442502965529759, + "eval_screenspot_MAE_y": 0.04819955242176851, + "eval_screenspot_NUM_probability": 0.9998689492543539, + "eval_screenspot_inside_bbox": 0.8395833373069763, + "eval_screenspot_loss": 0.9374791979789734, + "eval_screenspot_loss_ce": 0.5804212689399719, + "eval_screenspot_loss_iou": 0.5504557291666666, + "eval_screenspot_loss_num": 0.069854736328125, + "eval_screenspot_loss_xval": 0.3492228190104167, + "eval_screenspot_runtime": 149.2861, + "eval_screenspot_samples_per_second": 0.596, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.789110278161373, + "loss": 0.9090390205383301, + "loss_ce": 0.5727353096008301, + "loss_iou": 0.4609375, + "loss_num": 0.0673828125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 515248120, + "step": 3000 + }, + { + "epoch": 0.7893733149207601, + "grad_norm": 4.193864370890535, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 515420392, + "step": 3001 + }, + { + "epoch": 0.7893733149207601, + "loss": 0.12226281315088272, + "loss_ce": 0.0011690594255924225, + "loss_iou": 0.515625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 515420392, + "step": 3001 + }, + { + "epoch": 0.7896363516801473, + "grad_norm": 4.537019888829979, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 515590772, + "step": 3002 + }, + { + "epoch": 0.7896363516801473, + "loss": 0.14288941025733948, + "loss_ce": 0.0013793996768072248, + "loss_iou": 0.35546875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 515590772, + "step": 3002 + }, + { + "epoch": 0.7898993884395344, + "grad_norm": 7.837104532573747, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 515763064, + "step": 3003 + }, + { + "epoch": 0.7898993884395344, + "loss": 0.20384354889392853, + "loss_ce": 0.0005659655435010791, + "loss_iou": 0.443359375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 515763064, + "step": 3003 + }, + { + "epoch": 0.7901624251989215, + "grad_norm": 9.871988037860998, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 515935288, + "step": 3004 + }, + { + "epoch": 0.7901624251989215, + "loss": 0.1931522786617279, + "loss_ce": 0.004248465411365032, + "loss_iou": 0.33203125, + "loss_num": 0.037841796875, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 515935288, + "step": 3004 + }, + { + "epoch": 0.7904254619583087, + "grad_norm": 6.705662570329979, + "learning_rate": 5e-06, + "loss": 0.1395, + "num_input_tokens_seen": 516107836, + "step": 3005 + }, + { + "epoch": 0.7904254619583087, + "loss": 0.24680155515670776, + "loss_ce": 0.0018979848828166723, + "loss_iou": 0.53125, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 516107836, + "step": 3005 + }, + { + "epoch": 0.7906884987176958, + "grad_norm": 3.468178023705032, + "learning_rate": 5e-06, + "loss": 0.1047, + "num_input_tokens_seen": 516280260, + "step": 3006 + }, + { + "epoch": 0.7906884987176958, + "loss": 0.12856581807136536, + "loss_ce": 0.0021620113402605057, + "loss_iou": 0.478515625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 516280260, + "step": 3006 + }, + { + "epoch": 0.790951535477083, + "grad_norm": 2.516769311233636, + "learning_rate": 5e-06, + "loss": 0.1021, + "num_input_tokens_seen": 516452472, + "step": 3007 + }, + { + "epoch": 0.790951535477083, + "loss": 0.03532446175813675, + "loss_ce": 7.665850716875866e-05, + "loss_iou": 0.404296875, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 516452472, + "step": 3007 + }, + { + "epoch": 0.7912145722364701, + "grad_norm": 2.8390538522305993, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 516624540, + "step": 3008 + }, + { + "epoch": 0.7912145722364701, + "loss": 0.10718082636594772, + "loss_ce": 0.0042908103205263615, + "loss_iou": 0.51953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 516624540, + "step": 3008 + }, + { + "epoch": 0.7914776089958572, + "grad_norm": 4.2650017166958545, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 516796808, + "step": 3009 + }, + { + "epoch": 0.7914776089958572, + "loss": 0.17158488929271698, + "loss_ce": 0.0024106951896101236, + "loss_iou": 0.392578125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 516796808, + "step": 3009 + }, + { + "epoch": 0.7917406457552443, + "grad_norm": 6.803158022173843, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 516969144, + "step": 3010 + }, + { + "epoch": 0.7917406457552443, + "loss": 0.12062282115221024, + "loss_ce": 0.0013601221144199371, + "loss_iou": 0.50390625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 516969144, + "step": 3010 + }, + { + "epoch": 0.7920036825146314, + "grad_norm": 5.268394185167123, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 517139672, + "step": 3011 + }, + { + "epoch": 0.7920036825146314, + "loss": 0.10529518872499466, + "loss_ce": 0.0033054398372769356, + "loss_iou": 0.330078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 517139672, + "step": 3011 + }, + { + "epoch": 0.7922667192740186, + "grad_norm": 4.897299268033889, + "learning_rate": 5e-06, + "loss": 0.1488, + "num_input_tokens_seen": 517311812, + "step": 3012 + }, + { + "epoch": 0.7922667192740186, + "loss": 0.15520727634429932, + "loss_ce": 0.0010019636247307062, + "loss_iou": 0.47265625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 517311812, + "step": 3012 + }, + { + "epoch": 0.7925297560334057, + "grad_norm": 6.998508129858562, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 517483804, + "step": 3013 + }, + { + "epoch": 0.7925297560334057, + "loss": 0.14621217548847198, + "loss_ce": 0.004305441863834858, + "loss_iou": 0.60546875, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 517483804, + "step": 3013 + }, + { + "epoch": 0.7927927927927928, + "grad_norm": 8.855328154069232, + "learning_rate": 5e-06, + "loss": 0.0784, + "num_input_tokens_seen": 517656016, + "step": 3014 + }, + { + "epoch": 0.7927927927927928, + "loss": 0.07204173505306244, + "loss_ce": 0.003957017324864864, + "loss_iou": 0.515625, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 517656016, + "step": 3014 + }, + { + "epoch": 0.7930558295521799, + "grad_norm": 26.2413884381383, + "learning_rate": 5e-06, + "loss": 0.1451, + "num_input_tokens_seen": 517828272, + "step": 3015 + }, + { + "epoch": 0.7930558295521799, + "loss": 0.11117606610059738, + "loss_ce": 0.0008550205966457725, + "loss_iou": 0.455078125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 517828272, + "step": 3015 + }, + { + "epoch": 0.793318866311567, + "grad_norm": 4.148118560901186, + "learning_rate": 5e-06, + "loss": 0.1143, + "num_input_tokens_seen": 518000272, + "step": 3016 + }, + { + "epoch": 0.793318866311567, + "loss": 0.06484581530094147, + "loss_ce": 0.0005757926846854389, + "loss_iou": 0.3828125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 518000272, + "step": 3016 + }, + { + "epoch": 0.7935819030709542, + "grad_norm": 13.9691023081111, + "learning_rate": 5e-06, + "loss": 0.0913, + "num_input_tokens_seen": 518172412, + "step": 3017 + }, + { + "epoch": 0.7935819030709542, + "loss": 0.10305923223495483, + "loss_ce": 0.002427519764751196, + "loss_iou": 0.373046875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 518172412, + "step": 3017 + }, + { + "epoch": 0.7938449398303413, + "grad_norm": 4.54961216940048, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 518344668, + "step": 3018 + }, + { + "epoch": 0.7938449398303413, + "loss": 0.14651378989219666, + "loss_ce": 0.0014942658599466085, + "loss_iou": 0.53125, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 518344668, + "step": 3018 + }, + { + "epoch": 0.7941079765897284, + "grad_norm": 4.200570019762456, + "learning_rate": 5e-06, + "loss": 0.107, + "num_input_tokens_seen": 518517120, + "step": 3019 + }, + { + "epoch": 0.7941079765897284, + "loss": 0.11832761764526367, + "loss_ce": 0.00037718465318903327, + "loss_iou": 0.46484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 518517120, + "step": 3019 + }, + { + "epoch": 0.7943710133491155, + "grad_norm": 7.5294315936950005, + "learning_rate": 5e-06, + "loss": 0.0876, + "num_input_tokens_seen": 518689296, + "step": 3020 + }, + { + "epoch": 0.7943710133491155, + "loss": 0.06614542007446289, + "loss_ce": 0.00021218777692411095, + "loss_iou": 0.427734375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 518689296, + "step": 3020 + }, + { + "epoch": 0.7946340501085026, + "grad_norm": 5.635502401126071, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 518859544, + "step": 3021 + }, + { + "epoch": 0.7946340501085026, + "loss": 0.12032654881477356, + "loss_ce": 0.0027118013240396976, + "loss_iou": 0.3359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 518859544, + "step": 3021 + }, + { + "epoch": 0.7948970868678897, + "grad_norm": 6.009060825525287, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 519031676, + "step": 3022 + }, + { + "epoch": 0.7948970868678897, + "loss": 0.1436431109905243, + "loss_ce": 0.0033232811838388443, + "loss_iou": 0.66015625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 519031676, + "step": 3022 + }, + { + "epoch": 0.795160123627277, + "grad_norm": 4.029495813443641, + "learning_rate": 5e-06, + "loss": 0.1666, + "num_input_tokens_seen": 519203700, + "step": 3023 + }, + { + "epoch": 0.795160123627277, + "loss": 0.15425805747509003, + "loss_ce": 0.0003884279867634177, + "loss_iou": 0.4453125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 519203700, + "step": 3023 + }, + { + "epoch": 0.795423160386664, + "grad_norm": 37.082876522501756, + "learning_rate": 5e-06, + "loss": 0.1101, + "num_input_tokens_seen": 519375616, + "step": 3024 + }, + { + "epoch": 0.795423160386664, + "loss": 0.1391124576330185, + "loss_ce": 0.00012015047832392156, + "loss_iou": 0.5625, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 519375616, + "step": 3024 + }, + { + "epoch": 0.7956861971460512, + "grad_norm": 3.823112661234258, + "learning_rate": 5e-06, + "loss": 0.1111, + "num_input_tokens_seen": 519547736, + "step": 3025 + }, + { + "epoch": 0.7956861971460512, + "loss": 0.09849868714809418, + "loss_ce": 0.0015290760202333331, + "loss_iou": 0.359375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 519547736, + "step": 3025 + }, + { + "epoch": 0.7959492339054383, + "grad_norm": 4.930287612034227, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 519720084, + "step": 3026 + }, + { + "epoch": 0.7959492339054383, + "loss": 0.1211109384894371, + "loss_ce": 0.005083112046122551, + "loss_iou": 0.6328125, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 519720084, + "step": 3026 + }, + { + "epoch": 0.7962122706648254, + "grad_norm": 13.986605867255715, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 519892180, + "step": 3027 + }, + { + "epoch": 0.7962122706648254, + "loss": 0.16327086091041565, + "loss_ce": 0.0026263254694640636, + "loss_iou": 0.408203125, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 519892180, + "step": 3027 + }, + { + "epoch": 0.7964753074242126, + "grad_norm": 12.476632712407037, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 520064636, + "step": 3028 + }, + { + "epoch": 0.7964753074242126, + "loss": 0.11255937814712524, + "loss_ce": 0.000788743665907532, + "loss_iou": 0.4609375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 520064636, + "step": 3028 + }, + { + "epoch": 0.7967383441835997, + "grad_norm": 3.6081873902324326, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 520236756, + "step": 3029 + }, + { + "epoch": 0.7967383441835997, + "loss": 0.12160242348909378, + "loss_ce": 0.0018361852271482348, + "loss_iou": 0.31640625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 520236756, + "step": 3029 + }, + { + "epoch": 0.7970013809429868, + "grad_norm": 8.030386724109855, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 520407308, + "step": 3030 + }, + { + "epoch": 0.7970013809429868, + "loss": 0.18061389029026031, + "loss_ce": 0.0010331911034882069, + "loss_iou": 0.5625, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 520407308, + "step": 3030 + }, + { + "epoch": 0.7972644177023739, + "grad_norm": 11.21273309596263, + "learning_rate": 5e-06, + "loss": 0.1103, + "num_input_tokens_seen": 520579744, + "step": 3031 + }, + { + "epoch": 0.7972644177023739, + "loss": 0.082832470536232, + "loss_ce": 0.0002824235416483134, + "loss_iou": 0.515625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 520579744, + "step": 3031 + }, + { + "epoch": 0.797527454461761, + "grad_norm": 20.85679503542732, + "learning_rate": 5e-06, + "loss": 0.1348, + "num_input_tokens_seen": 520752148, + "step": 3032 + }, + { + "epoch": 0.797527454461761, + "loss": 0.12713350355625153, + "loss_ce": 0.0008517719688825309, + "loss_iou": 0.416015625, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 520752148, + "step": 3032 + }, + { + "epoch": 0.7977904912211482, + "grad_norm": 37.09104960564763, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 520924220, + "step": 3033 + }, + { + "epoch": 0.7977904912211482, + "loss": 0.10619133710861206, + "loss_ce": 0.0030724371317774057, + "loss_iou": 0.5234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 520924220, + "step": 3033 + }, + { + "epoch": 0.7980535279805353, + "grad_norm": 16.059073148748364, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 521093348, + "step": 3034 + }, + { + "epoch": 0.7980535279805353, + "loss": 0.06166623532772064, + "loss_ce": 0.0008294428698718548, + "loss_iou": 0.455078125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 521093348, + "step": 3034 + }, + { + "epoch": 0.7983165647399224, + "grad_norm": 8.368290368776519, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 521265688, + "step": 3035 + }, + { + "epoch": 0.7983165647399224, + "loss": 0.14087893068790436, + "loss_ce": 0.004129666369408369, + "loss_iou": 0.51171875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 521265688, + "step": 3035 + }, + { + "epoch": 0.7985796014993095, + "grad_norm": 25.178404936799044, + "learning_rate": 5e-06, + "loss": 0.1592, + "num_input_tokens_seen": 521437496, + "step": 3036 + }, + { + "epoch": 0.7985796014993095, + "loss": 0.29297274351119995, + "loss_ce": 0.002842147834599018, + "loss_iou": 0.357421875, + "loss_num": 0.05810546875, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 521437496, + "step": 3036 + }, + { + "epoch": 0.7988426382586966, + "grad_norm": 3.991516418389156, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 521609940, + "step": 3037 + }, + { + "epoch": 0.7988426382586966, + "loss": 0.09374190121889114, + "loss_ce": 0.0009379457915201783, + "loss_iou": 0.392578125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 521609940, + "step": 3037 + }, + { + "epoch": 0.7991056750180838, + "grad_norm": 4.808453824486521, + "learning_rate": 5e-06, + "loss": 0.1124, + "num_input_tokens_seen": 521781892, + "step": 3038 + }, + { + "epoch": 0.7991056750180838, + "loss": 0.07893365621566772, + "loss_ce": 0.001052800682373345, + "loss_iou": 0.5078125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 521781892, + "step": 3038 + }, + { + "epoch": 0.7993687117774709, + "grad_norm": 12.778560985993714, + "learning_rate": 5e-06, + "loss": 0.109, + "num_input_tokens_seen": 521954108, + "step": 3039 + }, + { + "epoch": 0.7993687117774709, + "loss": 0.12000415474176407, + "loss_ce": 0.0017790585989132524, + "loss_iou": 0.4765625, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 521954108, + "step": 3039 + }, + { + "epoch": 0.799631748536858, + "grad_norm": 5.33906426825674, + "learning_rate": 5e-06, + "loss": 0.1944, + "num_input_tokens_seen": 522126236, + "step": 3040 + }, + { + "epoch": 0.799631748536858, + "loss": 0.2638484239578247, + "loss_ce": 0.00042068029870279133, + "loss_iou": 0.6015625, + "loss_num": 0.052734375, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 522126236, + "step": 3040 + }, + { + "epoch": 0.7998947852962451, + "grad_norm": 4.623536872405031, + "learning_rate": 5e-06, + "loss": 0.0883, + "num_input_tokens_seen": 522298380, + "step": 3041 + }, + { + "epoch": 0.7998947852962451, + "loss": 0.12680000066757202, + "loss_ce": 0.0012506938073784113, + "loss_iou": 0.337890625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 522298380, + "step": 3041 + }, + { + "epoch": 0.8001578220556322, + "grad_norm": 4.125445081698177, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 522470360, + "step": 3042 + }, + { + "epoch": 0.8001578220556322, + "loss": 0.15849211812019348, + "loss_ce": 0.0013113392051309347, + "loss_iou": 0.50390625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 522470360, + "step": 3042 + }, + { + "epoch": 0.8004208588150195, + "grad_norm": 11.789889533203668, + "learning_rate": 5e-06, + "loss": 0.0876, + "num_input_tokens_seen": 522642472, + "step": 3043 + }, + { + "epoch": 0.8004208588150195, + "loss": 0.09324462711811066, + "loss_ce": 0.0006237818161025643, + "loss_iou": 0.50390625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 522642472, + "step": 3043 + }, + { + "epoch": 0.8006838955744066, + "grad_norm": 4.414508918444819, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 522814808, + "step": 3044 + }, + { + "epoch": 0.8006838955744066, + "loss": 0.07435610890388489, + "loss_ce": 0.0009460713481530547, + "loss_iou": 0.474609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 522814808, + "step": 3044 + }, + { + "epoch": 0.8009469323337937, + "grad_norm": 6.1075510398524395, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 522986892, + "step": 3045 + }, + { + "epoch": 0.8009469323337937, + "loss": 0.08977600932121277, + "loss_ce": 0.0010614084312692285, + "loss_iou": 0.46484375, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 522986892, + "step": 3045 + }, + { + "epoch": 0.8012099690931808, + "grad_norm": 6.86235832016632, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 523159044, + "step": 3046 + }, + { + "epoch": 0.8012099690931808, + "loss": 0.10283628106117249, + "loss_ce": 0.003226900240406394, + "loss_iou": 0.578125, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 523159044, + "step": 3046 + }, + { + "epoch": 0.8014730058525679, + "grad_norm": 8.620167546317841, + "learning_rate": 5e-06, + "loss": 0.1508, + "num_input_tokens_seen": 523331372, + "step": 3047 + }, + { + "epoch": 0.8014730058525679, + "loss": 0.17512959241867065, + "loss_ce": 0.0035597749520093203, + "loss_iou": 0.404296875, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 523331372, + "step": 3047 + }, + { + "epoch": 0.801736042611955, + "grad_norm": 4.823577960221348, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 523504004, + "step": 3048 + }, + { + "epoch": 0.801736042611955, + "loss": 0.07766547054052353, + "loss_ce": 0.0020429138094186783, + "loss_iou": 0.44921875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 523504004, + "step": 3048 + }, + { + "epoch": 0.8019990793713422, + "grad_norm": 11.00975772697334, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 523675952, + "step": 3049 + }, + { + "epoch": 0.8019990793713422, + "loss": 0.11927802860736847, + "loss_ce": 0.001342848176136613, + "loss_iou": 0.359375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 523675952, + "step": 3049 + }, + { + "epoch": 0.8022621161307293, + "grad_norm": 10.685552832243063, + "learning_rate": 5e-06, + "loss": 0.1029, + "num_input_tokens_seen": 523848152, + "step": 3050 + }, + { + "epoch": 0.8022621161307293, + "loss": 0.0827643871307373, + "loss_ce": 0.002106426050886512, + "loss_iou": 0.5859375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 523848152, + "step": 3050 + }, + { + "epoch": 0.8025251528901164, + "grad_norm": 24.17010500449297, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 524020708, + "step": 3051 + }, + { + "epoch": 0.8025251528901164, + "loss": 0.10048617422580719, + "loss_ce": 0.000510584854055196, + "loss_iou": 0.51953125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 524020708, + "step": 3051 + }, + { + "epoch": 0.8027881896495035, + "grad_norm": 4.992845761930844, + "learning_rate": 5e-06, + "loss": 0.1304, + "num_input_tokens_seen": 524191372, + "step": 3052 + }, + { + "epoch": 0.8027881896495035, + "loss": 0.15961629152297974, + "loss_ce": 0.006387531757354736, + "loss_iou": 0.4453125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 524191372, + "step": 3052 + }, + { + "epoch": 0.8030512264088906, + "grad_norm": 2.943381415031694, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 524363912, + "step": 3053 + }, + { + "epoch": 0.8030512264088906, + "loss": 0.15981021523475647, + "loss_ce": 0.0005695016006939113, + "loss_iou": 0.419921875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 524363912, + "step": 3053 + }, + { + "epoch": 0.8033142631682778, + "grad_norm": 5.648094984205903, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 524536316, + "step": 3054 + }, + { + "epoch": 0.8033142631682778, + "loss": 0.1740710288286209, + "loss_ce": 0.0022723155561834574, + "loss_iou": 0.52734375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 524536316, + "step": 3054 + }, + { + "epoch": 0.8035772999276649, + "grad_norm": 7.984973488378181, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 524708444, + "step": 3055 + }, + { + "epoch": 0.8035772999276649, + "loss": 0.07127489894628525, + "loss_ce": 0.000809812976513058, + "loss_iou": 0.5078125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 524708444, + "step": 3055 + }, + { + "epoch": 0.803840336687052, + "grad_norm": 6.031677657650838, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 524880960, + "step": 3056 + }, + { + "epoch": 0.803840336687052, + "loss": 0.0638066828250885, + "loss_ce": 0.0005437473300844431, + "loss_iou": 0.703125, + "loss_num": 0.01263427734375, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 524880960, + "step": 3056 + }, + { + "epoch": 0.8041033734464391, + "grad_norm": 5.599082172393745, + "learning_rate": 5e-06, + "loss": 0.086, + "num_input_tokens_seen": 525053308, + "step": 3057 + }, + { + "epoch": 0.8041033734464391, + "loss": 0.07932358235120773, + "loss_ce": 0.0024650623090565205, + "loss_iou": 0.546875, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 525053308, + "step": 3057 + }, + { + "epoch": 0.8043664102058262, + "grad_norm": 28.17799645696428, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 525225432, + "step": 3058 + }, + { + "epoch": 0.8043664102058262, + "loss": 0.08576367795467377, + "loss_ce": 0.0013825736241415143, + "loss_iou": 0.578125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 525225432, + "step": 3058 + }, + { + "epoch": 0.8046294469652134, + "grad_norm": 3.4834198316912564, + "learning_rate": 5e-06, + "loss": 0.0788, + "num_input_tokens_seen": 525397804, + "step": 3059 + }, + { + "epoch": 0.8046294469652134, + "loss": 0.07873048633337021, + "loss_ce": 0.0016125671099871397, + "loss_iou": 0.345703125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 525397804, + "step": 3059 + }, + { + "epoch": 0.8048924837246005, + "grad_norm": 2.9860188986605367, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 525570124, + "step": 3060 + }, + { + "epoch": 0.8048924837246005, + "loss": 0.07787738740444183, + "loss_ce": 0.0008205035701394081, + "loss_iou": 0.62890625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 525570124, + "step": 3060 + }, + { + "epoch": 0.8051555204839876, + "grad_norm": 14.35918831416577, + "learning_rate": 5e-06, + "loss": 0.1217, + "num_input_tokens_seen": 525742708, + "step": 3061 + }, + { + "epoch": 0.8051555204839876, + "loss": 0.10341215878725052, + "loss_ce": 0.00123931048437953, + "loss_iou": 0.458984375, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 525742708, + "step": 3061 + }, + { + "epoch": 0.8054185572433747, + "grad_norm": 16.381501371718237, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 525914908, + "step": 3062 + }, + { + "epoch": 0.8054185572433747, + "loss": 0.10784236341714859, + "loss_ce": 0.0002678967430256307, + "loss_iou": 0.462890625, + "loss_num": 0.021484375, + "loss_xval": 0.107421875, + "num_input_tokens_seen": 525914908, + "step": 3062 + }, + { + "epoch": 0.8056815940027618, + "grad_norm": 4.73914065847523, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 526087040, + "step": 3063 + }, + { + "epoch": 0.8056815940027618, + "loss": 0.06849893927574158, + "loss_ce": 0.000368443870684132, + "loss_iou": 0.455078125, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 526087040, + "step": 3063 + }, + { + "epoch": 0.8059446307621491, + "grad_norm": 8.562314005917766, + "learning_rate": 5e-06, + "loss": 0.0954, + "num_input_tokens_seen": 526259428, + "step": 3064 + }, + { + "epoch": 0.8059446307621491, + "loss": 0.07737226039171219, + "loss_ce": 0.0030009234324097633, + "loss_iou": 0.59375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 526259428, + "step": 3064 + }, + { + "epoch": 0.8062076675215362, + "grad_norm": 12.302010843258703, + "learning_rate": 5e-06, + "loss": 0.1505, + "num_input_tokens_seen": 526431568, + "step": 3065 + }, + { + "epoch": 0.8062076675215362, + "loss": 0.20694774389266968, + "loss_ce": 0.0032123818527907133, + "loss_iou": 0.51171875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 526431568, + "step": 3065 + }, + { + "epoch": 0.8064707042809233, + "grad_norm": 8.1243600151207, + "learning_rate": 5e-06, + "loss": 0.1291, + "num_input_tokens_seen": 526603728, + "step": 3066 + }, + { + "epoch": 0.8064707042809233, + "loss": 0.14467400312423706, + "loss_ce": 0.0004784482589457184, + "loss_iou": 0.59375, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 526603728, + "step": 3066 + }, + { + "epoch": 0.8067337410403104, + "grad_norm": 92.56005912402708, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 526775572, + "step": 3067 + }, + { + "epoch": 0.8067337410403104, + "loss": 0.08829745650291443, + "loss_ce": 0.0011697689769789577, + "loss_iou": 0.44140625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 526775572, + "step": 3067 + }, + { + "epoch": 0.8069967777996975, + "grad_norm": 33.266184929246414, + "learning_rate": 5e-06, + "loss": 0.0732, + "num_input_tokens_seen": 526947576, + "step": 3068 + }, + { + "epoch": 0.8069967777996975, + "loss": 0.060733191668987274, + "loss_ce": 0.0012391710188239813, + "loss_iou": 0.59375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 526947576, + "step": 3068 + }, + { + "epoch": 0.8072598145590847, + "grad_norm": 5.97758373469706, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 527117908, + "step": 3069 + }, + { + "epoch": 0.8072598145590847, + "loss": 0.09366244077682495, + "loss_ce": 0.006046472117304802, + "loss_iou": 0.51953125, + "loss_num": 0.017578125, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 527117908, + "step": 3069 + }, + { + "epoch": 0.8075228513184718, + "grad_norm": 5.290039917786788, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 527287100, + "step": 3070 + }, + { + "epoch": 0.8075228513184718, + "loss": 0.10383239388465881, + "loss_ce": 0.00046935188584029675, + "loss_iou": 0.56640625, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 527287100, + "step": 3070 + }, + { + "epoch": 0.8077858880778589, + "grad_norm": 4.7891570756526125, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 527458888, + "step": 3071 + }, + { + "epoch": 0.8077858880778589, + "loss": 0.10491342842578888, + "loss_ce": 0.0008484934223815799, + "loss_iou": 0.46484375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 527458888, + "step": 3071 + }, + { + "epoch": 0.808048924837246, + "grad_norm": 11.230743830755795, + "learning_rate": 5e-06, + "loss": 0.1349, + "num_input_tokens_seen": 527630920, + "step": 3072 + }, + { + "epoch": 0.808048924837246, + "loss": 0.06209864094853401, + "loss_ce": 0.0007583063561469316, + "loss_iou": 0.64453125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 527630920, + "step": 3072 + }, + { + "epoch": 0.8083119615966331, + "grad_norm": 5.040744603317089, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 527803168, + "step": 3073 + }, + { + "epoch": 0.8083119615966331, + "loss": 0.1508997231721878, + "loss_ce": 0.0011194492690265179, + "loss_iou": 0.53515625, + "loss_num": 0.030029296875, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 527803168, + "step": 3073 + }, + { + "epoch": 0.8085749983560202, + "grad_norm": 8.63208434905108, + "learning_rate": 5e-06, + "loss": 0.0993, + "num_input_tokens_seen": 527975596, + "step": 3074 + }, + { + "epoch": 0.8085749983560202, + "loss": 0.11231046169996262, + "loss_ce": 0.00017361767822876573, + "loss_iou": 0.408203125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 527975596, + "step": 3074 + }, + { + "epoch": 0.8088380351154074, + "grad_norm": 4.381253880078918, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 528147980, + "step": 3075 + }, + { + "epoch": 0.8088380351154074, + "loss": 0.11533799767494202, + "loss_ce": 0.0029875326436012983, + "loss_iou": 0.275390625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 528147980, + "step": 3075 + }, + { + "epoch": 0.8091010718747945, + "grad_norm": 8.161484057964593, + "learning_rate": 5e-06, + "loss": 0.1619, + "num_input_tokens_seen": 528320296, + "step": 3076 + }, + { + "epoch": 0.8091010718747945, + "loss": 0.16661548614501953, + "loss_ce": 0.0008439991506747901, + "loss_iou": 0.337890625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 528320296, + "step": 3076 + }, + { + "epoch": 0.8093641086341816, + "grad_norm": 4.683292048006889, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 528492424, + "step": 3077 + }, + { + "epoch": 0.8093641086341816, + "loss": 0.11115469038486481, + "loss_ce": 0.0029546155128628016, + "loss_iou": 0.51171875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 528492424, + "step": 3077 + }, + { + "epoch": 0.8096271453935687, + "grad_norm": 5.257960533659777, + "learning_rate": 5e-06, + "loss": 0.1374, + "num_input_tokens_seen": 528664732, + "step": 3078 + }, + { + "epoch": 0.8096271453935687, + "loss": 0.21525058150291443, + "loss_ce": 0.001490199938416481, + "loss_iou": 0.435546875, + "loss_num": 0.042724609375, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 528664732, + "step": 3078 + }, + { + "epoch": 0.8098901821529558, + "grad_norm": 12.566112829085096, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 528837012, + "step": 3079 + }, + { + "epoch": 0.8098901821529558, + "loss": 0.08169254660606384, + "loss_ce": 0.00021060870494693518, + "loss_iou": 0.45703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 528837012, + "step": 3079 + }, + { + "epoch": 0.810153218912343, + "grad_norm": 5.324934758940462, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 529009052, + "step": 3080 + }, + { + "epoch": 0.810153218912343, + "loss": 0.08878730237483978, + "loss_ce": 0.0001795147400116548, + "loss_iou": 0.5078125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 529009052, + "step": 3080 + }, + { + "epoch": 0.8104162556717301, + "grad_norm": 9.272723862546323, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 529181216, + "step": 3081 + }, + { + "epoch": 0.8104162556717301, + "loss": 0.0852864533662796, + "loss_ce": 0.0025990745052695274, + "loss_iou": 0.51171875, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 529181216, + "step": 3081 + }, + { + "epoch": 0.8106792924311172, + "grad_norm": 28.988099538140567, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 529351788, + "step": 3082 + }, + { + "epoch": 0.8106792924311172, + "loss": 0.05319926142692566, + "loss_ce": 0.0007700645364820957, + "loss_iou": 0.6796875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 529351788, + "step": 3082 + }, + { + "epoch": 0.8109423291905044, + "grad_norm": 5.871269884706642, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 529524200, + "step": 3083 + }, + { + "epoch": 0.8109423291905044, + "loss": 0.180719256401062, + "loss_ce": 0.004815942607820034, + "loss_iou": 0.5078125, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 529524200, + "step": 3083 + }, + { + "epoch": 0.8112053659498915, + "grad_norm": 9.034864384864637, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 529696672, + "step": 3084 + }, + { + "epoch": 0.8112053659498915, + "loss": 0.039623767137527466, + "loss_ce": 0.0003476430138107389, + "loss_iou": 0.546875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 529696672, + "step": 3084 + }, + { + "epoch": 0.8114684027092787, + "grad_norm": 7.571992947700351, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 529868932, + "step": 3085 + }, + { + "epoch": 0.8114684027092787, + "loss": 0.15128442645072937, + "loss_ce": 0.002373900031670928, + "loss_iou": 0.375, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 529868932, + "step": 3085 + }, + { + "epoch": 0.8117314394686658, + "grad_norm": 29.21584689056005, + "learning_rate": 5e-06, + "loss": 0.0879, + "num_input_tokens_seen": 530041080, + "step": 3086 + }, + { + "epoch": 0.8117314394686658, + "loss": 0.11466438323259354, + "loss_ce": 0.001291584805585444, + "loss_iou": 0.494140625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 530041080, + "step": 3086 + }, + { + "epoch": 0.8119944762280529, + "grad_norm": 8.627125392444821, + "learning_rate": 5e-06, + "loss": 0.1586, + "num_input_tokens_seen": 530210668, + "step": 3087 + }, + { + "epoch": 0.8119944762280529, + "loss": 0.1240130364894867, + "loss_ce": 0.0024767834693193436, + "loss_iou": 0.39453125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 530210668, + "step": 3087 + }, + { + "epoch": 0.81225751298744, + "grad_norm": 11.153523319241675, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 530382776, + "step": 3088 + }, + { + "epoch": 0.81225751298744, + "loss": 0.07181555032730103, + "loss_ce": 0.00026709536905400455, + "loss_iou": NaN, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 530382776, + "step": 3088 + }, + { + "epoch": 0.8125205497468271, + "grad_norm": 10.380517960562548, + "learning_rate": 5e-06, + "loss": 0.1332, + "num_input_tokens_seen": 530555004, + "step": 3089 + }, + { + "epoch": 0.8125205497468271, + "loss": 0.16954563558101654, + "loss_ce": 0.0021872336510568857, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 530555004, + "step": 3089 + }, + { + "epoch": 0.8127835865062143, + "grad_norm": 5.4446542029328775, + "learning_rate": 5e-06, + "loss": 0.0898, + "num_input_tokens_seen": 530726976, + "step": 3090 + }, + { + "epoch": 0.8127835865062143, + "loss": 0.08060289919376373, + "loss_ce": 0.00040270722820423543, + "loss_iou": 0.40234375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 530726976, + "step": 3090 + }, + { + "epoch": 0.8130466232656014, + "grad_norm": 5.347357758568155, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 530898852, + "step": 3091 + }, + { + "epoch": 0.8130466232656014, + "loss": 0.08159644901752472, + "loss_ce": 0.0003891719679813832, + "loss_iou": 0.4140625, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 530898852, + "step": 3091 + }, + { + "epoch": 0.8133096600249885, + "grad_norm": 4.89308048127463, + "learning_rate": 5e-06, + "loss": 0.1005, + "num_input_tokens_seen": 531071100, + "step": 3092 + }, + { + "epoch": 0.8133096600249885, + "loss": 0.05772348493337631, + "loss_ce": 7.577867654617876e-05, + "loss_iou": 0.54296875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 531071100, + "step": 3092 + }, + { + "epoch": 0.8135726967843756, + "grad_norm": 5.314071866923476, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 531243088, + "step": 3093 + }, + { + "epoch": 0.8135726967843756, + "loss": 0.16711823642253876, + "loss_ce": 0.002140207216143608, + "loss_iou": 0.470703125, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 531243088, + "step": 3093 + }, + { + "epoch": 0.8138357335437627, + "grad_norm": 4.018224451020325, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 531415364, + "step": 3094 + }, + { + "epoch": 0.8138357335437627, + "loss": 0.07948748767375946, + "loss_ce": 0.0013472279533743858, + "loss_iou": 0.431640625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 531415364, + "step": 3094 + }, + { + "epoch": 0.8140987703031498, + "grad_norm": 5.643939653563094, + "learning_rate": 5e-06, + "loss": 0.0894, + "num_input_tokens_seen": 531587860, + "step": 3095 + }, + { + "epoch": 0.8140987703031498, + "loss": 0.070250503718853, + "loss_ce": 0.00019740140123758465, + "loss_iou": 0.48828125, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 531587860, + "step": 3095 + }, + { + "epoch": 0.814361807062537, + "grad_norm": 6.298167200858131, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 531760160, + "step": 3096 + }, + { + "epoch": 0.814361807062537, + "loss": 0.13414643704891205, + "loss_ce": 0.0014254867564886808, + "loss_iou": 0.48046875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 531760160, + "step": 3096 + }, + { + "epoch": 0.8146248438219241, + "grad_norm": 4.559829968544296, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 531932728, + "step": 3097 + }, + { + "epoch": 0.8146248438219241, + "loss": 0.11818031221628189, + "loss_ce": 0.0006265999400056899, + "loss_iou": 0.44140625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 531932728, + "step": 3097 + }, + { + "epoch": 0.8148878805813112, + "grad_norm": 6.798137203947019, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 532104748, + "step": 3098 + }, + { + "epoch": 0.8148878805813112, + "loss": 0.13361144065856934, + "loss_ce": 0.0008294496219605207, + "loss_iou": 0.44921875, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 532104748, + "step": 3098 + }, + { + "epoch": 0.8151509173406983, + "grad_norm": 4.492953640528966, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 532276872, + "step": 3099 + }, + { + "epoch": 0.8151509173406983, + "loss": 0.23120509088039398, + "loss_ce": 0.004703632555902004, + "loss_iou": 0.4296875, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 532276872, + "step": 3099 + }, + { + "epoch": 0.8154139541000854, + "grad_norm": 23.973104234846247, + "learning_rate": 5e-06, + "loss": 0.1384, + "num_input_tokens_seen": 532449140, + "step": 3100 + }, + { + "epoch": 0.8154139541000854, + "loss": 0.2094377875328064, + "loss_ce": 0.005488819442689419, + "loss_iou": 0.384765625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 532449140, + "step": 3100 + }, + { + "epoch": 0.8156769908594727, + "grad_norm": 5.077265892021237, + "learning_rate": 5e-06, + "loss": 0.0818, + "num_input_tokens_seen": 532621452, + "step": 3101 + }, + { + "epoch": 0.8156769908594727, + "loss": 0.07252339273691177, + "loss_ce": 0.001692092278972268, + "loss_iou": 0.53125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 532621452, + "step": 3101 + }, + { + "epoch": 0.8159400276188598, + "grad_norm": 4.611749386992743, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 532793420, + "step": 3102 + }, + { + "epoch": 0.8159400276188598, + "loss": 0.10748874396085739, + "loss_ce": 0.0007687745383009315, + "loss_iou": 0.59375, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 532793420, + "step": 3102 + }, + { + "epoch": 0.8162030643782469, + "grad_norm": 8.46788739165104, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 532965672, + "step": 3103 + }, + { + "epoch": 0.8162030643782469, + "loss": 0.0986635610461235, + "loss_ce": 0.006317365914583206, + "loss_iou": 0.359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 532965672, + "step": 3103 + }, + { + "epoch": 0.816466101137634, + "grad_norm": 5.21012803335228, + "learning_rate": 5e-06, + "loss": 0.1037, + "num_input_tokens_seen": 533137620, + "step": 3104 + }, + { + "epoch": 0.816466101137634, + "loss": 0.1294005811214447, + "loss_ce": 0.0007232190691865981, + "loss_iou": NaN, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 533137620, + "step": 3104 + }, + { + "epoch": 0.8167291378970211, + "grad_norm": 4.29918218259194, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 533308040, + "step": 3105 + }, + { + "epoch": 0.8167291378970211, + "loss": 0.07954747974872589, + "loss_ce": 0.0007816128781996667, + "loss_iou": 0.466796875, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 533308040, + "step": 3105 + }, + { + "epoch": 0.8169921746564083, + "grad_norm": 7.9379046963597775, + "learning_rate": 5e-06, + "loss": 0.0867, + "num_input_tokens_seen": 533480088, + "step": 3106 + }, + { + "epoch": 0.8169921746564083, + "loss": 0.055092211812734604, + "loss_ce": 0.002663013059645891, + "loss_iou": 0.578125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 533480088, + "step": 3106 + }, + { + "epoch": 0.8172552114157954, + "grad_norm": 5.798174841670742, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 533652304, + "step": 3107 + }, + { + "epoch": 0.8172552114157954, + "loss": 0.10001754760742188, + "loss_ce": 0.0004997201031073928, + "loss_iou": 0.4765625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 533652304, + "step": 3107 + }, + { + "epoch": 0.8175182481751825, + "grad_norm": 4.397743796531851, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 533824784, + "step": 3108 + }, + { + "epoch": 0.8175182481751825, + "loss": 0.04015748202800751, + "loss_ce": 0.001079726149328053, + "loss_iou": 0.376953125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 533824784, + "step": 3108 + }, + { + "epoch": 0.8177812849345696, + "grad_norm": 3.0634489465977937, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 533997192, + "step": 3109 + }, + { + "epoch": 0.8177812849345696, + "loss": 0.11612477153539658, + "loss_ce": 0.0004936738405376673, + "loss_iou": 0.5234375, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 533997192, + "step": 3109 + }, + { + "epoch": 0.8180443216939567, + "grad_norm": 11.903043667241318, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 534169324, + "step": 3110 + }, + { + "epoch": 0.8180443216939567, + "loss": 0.12225233018398285, + "loss_ce": 0.002577648963779211, + "loss_iou": 0.4609375, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 534169324, + "step": 3110 + }, + { + "epoch": 0.8183073584533439, + "grad_norm": 11.232858169527498, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 534341512, + "step": 3111 + }, + { + "epoch": 0.8183073584533439, + "loss": 0.093436598777771, + "loss_ce": 0.00041902740485966206, + "loss_iou": 0.4140625, + "loss_num": 0.0186767578125, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 534341512, + "step": 3111 + }, + { + "epoch": 0.818570395212731, + "grad_norm": 4.531836934905563, + "learning_rate": 5e-06, + "loss": 0.0859, + "num_input_tokens_seen": 534511748, + "step": 3112 + }, + { + "epoch": 0.818570395212731, + "loss": 0.07575514912605286, + "loss_ce": 0.00020888875587843359, + "loss_iou": 0.515625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 534511748, + "step": 3112 + }, + { + "epoch": 0.8188334319721181, + "grad_norm": 7.470057833509574, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 534683872, + "step": 3113 + }, + { + "epoch": 0.8188334319721181, + "loss": 0.10066039860248566, + "loss_ce": 0.00047118880320340395, + "loss_iou": 0.466796875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 534683872, + "step": 3113 + }, + { + "epoch": 0.8190964687315052, + "grad_norm": 6.233336264317783, + "learning_rate": 5e-06, + "loss": 0.139, + "num_input_tokens_seen": 534855832, + "step": 3114 + }, + { + "epoch": 0.8190964687315052, + "loss": 0.09569014608860016, + "loss_ce": 0.0034660203382372856, + "loss_iou": 0.40625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 534855832, + "step": 3114 + }, + { + "epoch": 0.8193595054908923, + "grad_norm": 5.930104072036699, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 535027896, + "step": 3115 + }, + { + "epoch": 0.8193595054908923, + "loss": 0.13678929209709167, + "loss_ce": 0.001047111232765019, + "loss_iou": 0.42578125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 535027896, + "step": 3115 + }, + { + "epoch": 0.8196225422502795, + "grad_norm": 12.505587123966333, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 535200112, + "step": 3116 + }, + { + "epoch": 0.8196225422502795, + "loss": 0.0822412520647049, + "loss_ce": 0.0008356067701242864, + "loss_iou": 0.5078125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 535200112, + "step": 3116 + }, + { + "epoch": 0.8198855790096666, + "grad_norm": 4.231730639769997, + "learning_rate": 5e-06, + "loss": 0.1087, + "num_input_tokens_seen": 535372284, + "step": 3117 + }, + { + "epoch": 0.8198855790096666, + "loss": 0.12019523978233337, + "loss_ce": 0.00018486013868823647, + "loss_iou": 0.5234375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 535372284, + "step": 3117 + }, + { + "epoch": 0.8201486157690537, + "grad_norm": 7.438830292525926, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 535544868, + "step": 3118 + }, + { + "epoch": 0.8201486157690537, + "loss": 0.16251003742218018, + "loss_ce": 0.0003091081453021616, + "loss_iou": 0.45703125, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 535544868, + "step": 3118 + }, + { + "epoch": 0.8204116525284408, + "grad_norm": 4.777332996643992, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 535717156, + "step": 3119 + }, + { + "epoch": 0.8204116525284408, + "loss": 0.16800367832183838, + "loss_ce": 0.00139296252746135, + "loss_iou": 0.43359375, + "loss_num": 0.033203125, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 535717156, + "step": 3119 + }, + { + "epoch": 0.8206746892878279, + "grad_norm": 9.684696947025552, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 535887380, + "step": 3120 + }, + { + "epoch": 0.8206746892878279, + "loss": 0.0679091364145279, + "loss_ce": 0.0011366696562618017, + "loss_iou": 0.5390625, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 535887380, + "step": 3120 + }, + { + "epoch": 0.820937726047215, + "grad_norm": 5.315834673220869, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 536059444, + "step": 3121 + }, + { + "epoch": 0.820937726047215, + "loss": 0.04389767348766327, + "loss_ce": 0.00013546722766477615, + "loss_iou": 0.4296875, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 536059444, + "step": 3121 + }, + { + "epoch": 0.8212007628066023, + "grad_norm": 4.263271795831078, + "learning_rate": 5e-06, + "loss": 0.1419, + "num_input_tokens_seen": 536231404, + "step": 3122 + }, + { + "epoch": 0.8212007628066023, + "loss": 0.15159711241722107, + "loss_ce": 0.004105648957192898, + "loss_iou": 0.498046875, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 536231404, + "step": 3122 + }, + { + "epoch": 0.8214637995659894, + "grad_norm": 7.367303112338102, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 536403668, + "step": 3123 + }, + { + "epoch": 0.8214637995659894, + "loss": 0.1084074005484581, + "loss_ce": 0.0006498372531495988, + "loss_iou": 0.58203125, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 536403668, + "step": 3123 + }, + { + "epoch": 0.8217268363253765, + "grad_norm": 6.857717580081254, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 536575892, + "step": 3124 + }, + { + "epoch": 0.8217268363253765, + "loss": 0.0787474736571312, + "loss_ce": 0.00030203917413018644, + "loss_iou": 0.58203125, + "loss_num": 0.015625, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 536575892, + "step": 3124 + }, + { + "epoch": 0.8219898730847636, + "grad_norm": 4.826419943692509, + "learning_rate": 5e-06, + "loss": 0.0802, + "num_input_tokens_seen": 536745964, + "step": 3125 + }, + { + "epoch": 0.8219898730847636, + "loss": 0.06057834252715111, + "loss_ce": 7.724385795881972e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 536745964, + "step": 3125 + }, + { + "epoch": 0.8222529098441507, + "grad_norm": 8.883739420539388, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 536918180, + "step": 3126 + }, + { + "epoch": 0.8222529098441507, + "loss": 0.11514750123023987, + "loss_ce": 0.0007676149252802134, + "loss_iou": 0.416015625, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 536918180, + "step": 3126 + }, + { + "epoch": 0.8225159466035379, + "grad_norm": 4.391986789320263, + "learning_rate": 5e-06, + "loss": 0.1256, + "num_input_tokens_seen": 537090196, + "step": 3127 + }, + { + "epoch": 0.8225159466035379, + "loss": 0.13754448294639587, + "loss_ce": 0.0006731519242748618, + "loss_iou": 0.51171875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 537090196, + "step": 3127 + }, + { + "epoch": 0.822778983362925, + "grad_norm": 7.64555343727262, + "learning_rate": 5e-06, + "loss": 0.0728, + "num_input_tokens_seen": 537259572, + "step": 3128 + }, + { + "epoch": 0.822778983362925, + "loss": 0.05659133195877075, + "loss_ce": 5.7516066590324044e-05, + "loss_iou": 0.56640625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 537259572, + "step": 3128 + }, + { + "epoch": 0.8230420201223121, + "grad_norm": 4.3032702337998225, + "learning_rate": 5e-06, + "loss": 0.0678, + "num_input_tokens_seen": 537431812, + "step": 3129 + }, + { + "epoch": 0.8230420201223121, + "loss": 0.07997481524944305, + "loss_ce": 0.0033604400232434273, + "loss_iou": 0.5390625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 537431812, + "step": 3129 + }, + { + "epoch": 0.8233050568816992, + "grad_norm": 11.32084254562734, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 537602468, + "step": 3130 + }, + { + "epoch": 0.8233050568816992, + "loss": 0.12599240243434906, + "loss_ce": 0.0030370799358934164, + "loss_iou": 0.54296875, + "loss_num": 0.0245361328125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 537602468, + "step": 3130 + }, + { + "epoch": 0.8235680936410863, + "grad_norm": 7.355242955773187, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 537774448, + "step": 3131 + }, + { + "epoch": 0.8235680936410863, + "loss": 0.18665780127048492, + "loss_ce": 0.0003784986911341548, + "loss_iou": 0.4453125, + "loss_num": 0.037109375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 537774448, + "step": 3131 + }, + { + "epoch": 0.8238311304004735, + "grad_norm": 11.757519786843565, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 537946740, + "step": 3132 + }, + { + "epoch": 0.8238311304004735, + "loss": 0.06623389571905136, + "loss_ce": 0.0015976695576682687, + "loss_iou": 0.359375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 537946740, + "step": 3132 + }, + { + "epoch": 0.8240941671598606, + "grad_norm": 17.264629874858777, + "learning_rate": 5e-06, + "loss": 0.1361, + "num_input_tokens_seen": 538118940, + "step": 3133 + }, + { + "epoch": 0.8240941671598606, + "loss": 0.09394903481006622, + "loss_ce": 0.0025488929823040962, + "loss_iou": 0.51953125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 538118940, + "step": 3133 + }, + { + "epoch": 0.8243572039192477, + "grad_norm": 12.736031512367697, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 538291488, + "step": 3134 + }, + { + "epoch": 0.8243572039192477, + "loss": 0.12837865948677063, + "loss_ce": 0.0019290748750790954, + "loss_iou": 0.5078125, + "loss_num": 0.025390625, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 538291488, + "step": 3134 + }, + { + "epoch": 0.8246202406786348, + "grad_norm": 4.032167194049116, + "learning_rate": 5e-06, + "loss": 0.0797, + "num_input_tokens_seen": 538461628, + "step": 3135 + }, + { + "epoch": 0.8246202406786348, + "loss": 0.09231233596801758, + "loss_ce": 0.0006070120725780725, + "loss_iou": 0.4921875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 538461628, + "step": 3135 + }, + { + "epoch": 0.8248832774380219, + "grad_norm": 3.914309781520812, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 538633772, + "step": 3136 + }, + { + "epoch": 0.8248832774380219, + "loss": 0.07093091309070587, + "loss_ce": 0.00011487161100376397, + "loss_iou": 0.58984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 538633772, + "step": 3136 + }, + { + "epoch": 0.8251463141974091, + "grad_norm": 3.7923535168173763, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 538805732, + "step": 3137 + }, + { + "epoch": 0.8251463141974091, + "loss": 0.1175466924905777, + "loss_ce": 0.0006643689121119678, + "loss_iou": 0.462890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 538805732, + "step": 3137 + }, + { + "epoch": 0.8254093509567962, + "grad_norm": 10.05128274905235, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 538977916, + "step": 3138 + }, + { + "epoch": 0.8254093509567962, + "loss": 0.11163683980703354, + "loss_ce": 0.003146846778690815, + "loss_iou": 0.52734375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 538977916, + "step": 3138 + }, + { + "epoch": 0.8256723877161833, + "grad_norm": 12.00226096912509, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 539150104, + "step": 3139 + }, + { + "epoch": 0.8256723877161833, + "loss": 0.14204728603363037, + "loss_ce": 0.0005983082228340209, + "loss_iou": 0.439453125, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 539150104, + "step": 3139 + }, + { + "epoch": 0.8259354244755704, + "grad_norm": 10.327954218233959, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 539322280, + "step": 3140 + }, + { + "epoch": 0.8259354244755704, + "loss": 0.1829340159893036, + "loss_ce": 0.004543509799987078, + "loss_iou": 0.453125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 539322280, + "step": 3140 + }, + { + "epoch": 0.8261984612349575, + "grad_norm": 3.552655492096545, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 539494448, + "step": 3141 + }, + { + "epoch": 0.8261984612349575, + "loss": 0.12512250244617462, + "loss_ce": 0.003601514268666506, + "loss_iou": 0.50390625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 539494448, + "step": 3141 + }, + { + "epoch": 0.8264614979943448, + "grad_norm": 4.0634516483762795, + "learning_rate": 5e-06, + "loss": 0.0828, + "num_input_tokens_seen": 539667072, + "step": 3142 + }, + { + "epoch": 0.8264614979943448, + "loss": 0.1312233954668045, + "loss_ce": 0.0005166015471331775, + "loss_iou": 0.47265625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 539667072, + "step": 3142 + }, + { + "epoch": 0.8267245347537319, + "grad_norm": 12.104159402624534, + "learning_rate": 5e-06, + "loss": 0.1399, + "num_input_tokens_seen": 539839460, + "step": 3143 + }, + { + "epoch": 0.8267245347537319, + "loss": 0.2201562076807022, + "loss_ce": 0.002535369014367461, + "loss_iou": 0.34765625, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 539839460, + "step": 3143 + }, + { + "epoch": 0.826987571513119, + "grad_norm": 5.4839208249491245, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 540011628, + "step": 3144 + }, + { + "epoch": 0.826987571513119, + "loss": 0.1272164285182953, + "loss_ce": 0.0027352366596460342, + "loss_iou": 0.474609375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 540011628, + "step": 3144 + }, + { + "epoch": 0.8272506082725061, + "grad_norm": 4.535384525117057, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 540183608, + "step": 3145 + }, + { + "epoch": 0.8272506082725061, + "loss": 0.0952780544757843, + "loss_ce": 0.002458844566717744, + "loss_iou": 0.3671875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 540183608, + "step": 3145 + }, + { + "epoch": 0.8275136450318932, + "grad_norm": 4.264720708193596, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 540355724, + "step": 3146 + }, + { + "epoch": 0.8275136450318932, + "loss": 0.05311005562543869, + "loss_ce": 0.0009097411530092359, + "loss_iou": 0.51953125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 540355724, + "step": 3146 + }, + { + "epoch": 0.8277766817912803, + "grad_norm": 6.253031178644263, + "learning_rate": 5e-06, + "loss": 0.1647, + "num_input_tokens_seen": 540526424, + "step": 3147 + }, + { + "epoch": 0.8277766817912803, + "loss": 0.1789083182811737, + "loss_ce": 0.0011586775071918964, + "loss_iou": 0.2734375, + "loss_num": 0.03564453125, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 540526424, + "step": 3147 + }, + { + "epoch": 0.8280397185506675, + "grad_norm": 4.716126774756702, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 540698736, + "step": 3148 + }, + { + "epoch": 0.8280397185506675, + "loss": 0.053756728768348694, + "loss_ce": 0.0007171769393607974, + "loss_iou": 0.4453125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 540698736, + "step": 3148 + }, + { + "epoch": 0.8283027553100546, + "grad_norm": 15.946573913857675, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 540870876, + "step": 3149 + }, + { + "epoch": 0.8283027553100546, + "loss": 0.11821180582046509, + "loss_ce": 0.001833026995882392, + "loss_iou": 0.44140625, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 540870876, + "step": 3149 + }, + { + "epoch": 0.8285657920694417, + "grad_norm": 3.1443507951345406, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 541041144, + "step": 3150 + }, + { + "epoch": 0.8285657920694417, + "loss": 0.13489758968353271, + "loss_ce": 0.004984267987310886, + "loss_iou": 0.494140625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 541041144, + "step": 3150 + }, + { + "epoch": 0.8288288288288288, + "grad_norm": 4.1136952366915605, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 541213632, + "step": 3151 + }, + { + "epoch": 0.8288288288288288, + "loss": 0.10275200009346008, + "loss_ce": 0.0014183830935508013, + "loss_iou": 0.4765625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 541213632, + "step": 3151 + }, + { + "epoch": 0.8290918655882159, + "grad_norm": 5.711362515317036, + "learning_rate": 5e-06, + "loss": 0.1271, + "num_input_tokens_seen": 541384104, + "step": 3152 + }, + { + "epoch": 0.8290918655882159, + "loss": 0.08564618229866028, + "loss_ce": 0.0014176733093336225, + "loss_iou": 0.66796875, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 541384104, + "step": 3152 + }, + { + "epoch": 0.8293549023476031, + "grad_norm": 7.096186366509263, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 541556240, + "step": 3153 + }, + { + "epoch": 0.8293549023476031, + "loss": 0.17091956734657288, + "loss_ce": 0.0015012390213087201, + "loss_iou": 0.421875, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 541556240, + "step": 3153 + }, + { + "epoch": 0.8296179391069902, + "grad_norm": 4.29702608695741, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 541728356, + "step": 3154 + }, + { + "epoch": 0.8296179391069902, + "loss": 0.10837417840957642, + "loss_ce": 0.0006623809458687901, + "loss_iou": 0.51171875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 541728356, + "step": 3154 + }, + { + "epoch": 0.8298809758663773, + "grad_norm": 3.729285456687102, + "learning_rate": 5e-06, + "loss": 0.0659, + "num_input_tokens_seen": 541898716, + "step": 3155 + }, + { + "epoch": 0.8298809758663773, + "loss": 0.062001317739486694, + "loss_ce": 0.0011492683552205563, + "loss_iou": 0.50390625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 541898716, + "step": 3155 + }, + { + "epoch": 0.8301440126257644, + "grad_norm": 6.543104796714217, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 542070820, + "step": 3156 + }, + { + "epoch": 0.8301440126257644, + "loss": 0.09034896641969681, + "loss_ce": 0.0022752326913177967, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 542070820, + "step": 3156 + }, + { + "epoch": 0.8304070493851515, + "grad_norm": 3.814660695434685, + "learning_rate": 5e-06, + "loss": 0.0838, + "num_input_tokens_seen": 542242936, + "step": 3157 + }, + { + "epoch": 0.8304070493851515, + "loss": 0.05618397891521454, + "loss_ce": 0.0004894005251117051, + "loss_iou": 0.412109375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 542242936, + "step": 3157 + }, + { + "epoch": 0.8306700861445387, + "grad_norm": 13.94223132380543, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 542415376, + "step": 3158 + }, + { + "epoch": 0.8306700861445387, + "loss": 0.09673337638378143, + "loss_ce": 0.0017016411293298006, + "loss_iou": 0.5703125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 542415376, + "step": 3158 + }, + { + "epoch": 0.8309331229039258, + "grad_norm": 46.47454058969395, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 542587700, + "step": 3159 + }, + { + "epoch": 0.8309331229039258, + "loss": 0.13622400164604187, + "loss_ce": 0.00045128766214475036, + "loss_iou": 0.5703125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 542587700, + "step": 3159 + }, + { + "epoch": 0.831196159663313, + "grad_norm": 7.76157996882612, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 542758148, + "step": 3160 + }, + { + "epoch": 0.831196159663313, + "loss": 0.11631490290164948, + "loss_ce": 0.004040728323161602, + "loss_iou": 0.42578125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 542758148, + "step": 3160 + }, + { + "epoch": 0.8314591964227, + "grad_norm": 20.483608511309008, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 542930836, + "step": 3161 + }, + { + "epoch": 0.8314591964227, + "loss": 0.06239602342247963, + "loss_ce": 0.00014016299974173307, + "loss_iou": 0.55078125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 542930836, + "step": 3161 + }, + { + "epoch": 0.8317222331820872, + "grad_norm": 4.545805831684384, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 543102800, + "step": 3162 + }, + { + "epoch": 0.8317222331820872, + "loss": 0.12887202203273773, + "loss_ce": 0.0006066488567739725, + "loss_iou": 0.6015625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 543102800, + "step": 3162 + }, + { + "epoch": 0.8319852699414744, + "grad_norm": 6.107575063227538, + "learning_rate": 5e-06, + "loss": 0.1, + "num_input_tokens_seen": 543274984, + "step": 3163 + }, + { + "epoch": 0.8319852699414744, + "loss": 0.08694491535425186, + "loss_ce": 0.00036654339055530727, + "loss_iou": 0.498046875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 543274984, + "step": 3163 + }, + { + "epoch": 0.8322483067008615, + "grad_norm": 5.535589838242773, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 543447000, + "step": 3164 + }, + { + "epoch": 0.8322483067008615, + "loss": 0.12083543837070465, + "loss_ce": 0.006333490367978811, + "loss_iou": 0.6796875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 543447000, + "step": 3164 + }, + { + "epoch": 0.8325113434602486, + "grad_norm": 5.113582833614297, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 543619120, + "step": 3165 + }, + { + "epoch": 0.8325113434602486, + "loss": 0.1699899584054947, + "loss_ce": 0.002250079531222582, + "loss_iou": 0.51953125, + "loss_num": 0.033447265625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 543619120, + "step": 3165 + }, + { + "epoch": 0.8327743802196357, + "grad_norm": 5.479089729021323, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 543791308, + "step": 3166 + }, + { + "epoch": 0.8327743802196357, + "loss": 0.08266595751047134, + "loss_ce": 0.00042108428897336125, + "loss_iou": 0.5625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 543791308, + "step": 3166 + }, + { + "epoch": 0.8330374169790228, + "grad_norm": 4.786501058802981, + "learning_rate": 5e-06, + "loss": 0.0917, + "num_input_tokens_seen": 543958632, + "step": 3167 + }, + { + "epoch": 0.8330374169790228, + "loss": 0.07515060901641846, + "loss_ce": 0.0012675554025918245, + "loss_iou": 0.5703125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 543958632, + "step": 3167 + }, + { + "epoch": 0.83330045373841, + "grad_norm": 8.594141920355682, + "learning_rate": 5e-06, + "loss": 0.12, + "num_input_tokens_seen": 544130848, + "step": 3168 + }, + { + "epoch": 0.83330045373841, + "loss": 0.09625629335641861, + "loss_ce": 0.0025978446938097477, + "loss_iou": 0.51171875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 544130848, + "step": 3168 + }, + { + "epoch": 0.8335634904977971, + "grad_norm": 3.7461805856215458, + "learning_rate": 5e-06, + "loss": 0.1468, + "num_input_tokens_seen": 544302820, + "step": 3169 + }, + { + "epoch": 0.8335634904977971, + "loss": 0.09222942590713501, + "loss_ce": 0.0022025699727237225, + "loss_iou": 0.375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 544302820, + "step": 3169 + }, + { + "epoch": 0.8338265272571842, + "grad_norm": 9.92373943246775, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 544474904, + "step": 3170 + }, + { + "epoch": 0.8338265272571842, + "loss": 0.14077287912368774, + "loss_ce": 0.0030775703489780426, + "loss_iou": 0.482421875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 544474904, + "step": 3170 + }, + { + "epoch": 0.8340895640165713, + "grad_norm": 14.235058865758765, + "learning_rate": 5e-06, + "loss": 0.0805, + "num_input_tokens_seen": 544647036, + "step": 3171 + }, + { + "epoch": 0.8340895640165713, + "loss": 0.060213349759578705, + "loss_ce": 0.0022452091798186302, + "loss_iou": 0.4609375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 544647036, + "step": 3171 + }, + { + "epoch": 0.8343526007759584, + "grad_norm": 4.48891217014488, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 544817552, + "step": 3172 + }, + { + "epoch": 0.8343526007759584, + "loss": 0.1579454094171524, + "loss_ce": 0.0024888694752007723, + "loss_iou": 0.482421875, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 544817552, + "step": 3172 + }, + { + "epoch": 0.8346156375353455, + "grad_norm": 3.708111692136732, + "learning_rate": 5e-06, + "loss": 0.1094, + "num_input_tokens_seen": 544989748, + "step": 3173 + }, + { + "epoch": 0.8346156375353455, + "loss": 0.14915120601654053, + "loss_ce": 0.0004848288372159004, + "loss_iou": 0.34375, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 544989748, + "step": 3173 + }, + { + "epoch": 0.8348786742947327, + "grad_norm": 6.783609066408303, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 545161788, + "step": 3174 + }, + { + "epoch": 0.8348786742947327, + "loss": 0.13735045492649078, + "loss_ce": 0.003683460643514991, + "loss_iou": 0.54296875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 545161788, + "step": 3174 + }, + { + "epoch": 0.8351417110541198, + "grad_norm": 4.8955030087080775, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 545333884, + "step": 3175 + }, + { + "epoch": 0.8351417110541198, + "loss": 0.08379638940095901, + "loss_ce": 0.0006665037362836301, + "loss_iou": 0.51171875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 545333884, + "step": 3175 + }, + { + "epoch": 0.8354047478135069, + "grad_norm": 4.39565885197116, + "learning_rate": 5e-06, + "loss": 0.1439, + "num_input_tokens_seen": 545506244, + "step": 3176 + }, + { + "epoch": 0.8354047478135069, + "loss": 0.12928782403469086, + "loss_ce": 0.000915632932446897, + "loss_iou": 0.46875, + "loss_num": 0.0257568359375, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 545506244, + "step": 3176 + }, + { + "epoch": 0.835667784572894, + "grad_norm": 6.2304214245063605, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 545678348, + "step": 3177 + }, + { + "epoch": 0.835667784572894, + "loss": 0.07726402580738068, + "loss_ce": 0.000832755584269762, + "loss_iou": 0.44921875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 545678348, + "step": 3177 + }, + { + "epoch": 0.8359308213322811, + "grad_norm": 4.950850483708038, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 545848136, + "step": 3178 + }, + { + "epoch": 0.8359308213322811, + "loss": 0.12010537087917328, + "loss_ce": 0.0009342365083284676, + "loss_iou": 0.62890625, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 545848136, + "step": 3178 + }, + { + "epoch": 0.8361938580916684, + "grad_norm": 4.252976007842196, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 546020692, + "step": 3179 + }, + { + "epoch": 0.8361938580916684, + "loss": 0.12632903456687927, + "loss_ce": 0.00018462821026332676, + "loss_iou": 0.486328125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 546020692, + "step": 3179 + }, + { + "epoch": 0.8364568948510555, + "grad_norm": 16.684582759145172, + "learning_rate": 5e-06, + "loss": 0.073, + "num_input_tokens_seen": 546192760, + "step": 3180 + }, + { + "epoch": 0.8364568948510555, + "loss": 0.09913001954555511, + "loss_ce": 0.0009549736278131604, + "loss_iou": 0.49609375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 546192760, + "step": 3180 + }, + { + "epoch": 0.8367199316104426, + "grad_norm": 5.017020175241404, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 546363064, + "step": 3181 + }, + { + "epoch": 0.8367199316104426, + "loss": 0.1710553914308548, + "loss_ce": 0.00040109228575602174, + "loss_iou": NaN, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 546363064, + "step": 3181 + }, + { + "epoch": 0.8369829683698297, + "grad_norm": 17.167559763411923, + "learning_rate": 5e-06, + "loss": 0.1342, + "num_input_tokens_seen": 546535208, + "step": 3182 + }, + { + "epoch": 0.8369829683698297, + "loss": 0.15842683613300323, + "loss_ce": 0.0015664853854104877, + "loss_iou": 0.443359375, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 546535208, + "step": 3182 + }, + { + "epoch": 0.8372460051292168, + "grad_norm": 3.9098429704759723, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 546707288, + "step": 3183 + }, + { + "epoch": 0.8372460051292168, + "loss": 0.08958999812602997, + "loss_ce": 0.00014297313464339823, + "loss_iou": 0.5234375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 546707288, + "step": 3183 + }, + { + "epoch": 0.837509041888604, + "grad_norm": 5.336892331569848, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 546879532, + "step": 3184 + }, + { + "epoch": 0.837509041888604, + "loss": 0.07368629425764084, + "loss_ce": 0.004197763279080391, + "loss_iou": 0.51953125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 546879532, + "step": 3184 + }, + { + "epoch": 0.8377720786479911, + "grad_norm": 3.9486138565288265, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 547051612, + "step": 3185 + }, + { + "epoch": 0.8377720786479911, + "loss": 0.15984772145748138, + "loss_ce": 0.0009426883771084249, + "loss_iou": 0.45703125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 547051612, + "step": 3185 + }, + { + "epoch": 0.8380351154073782, + "grad_norm": 6.643098269759205, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 547223768, + "step": 3186 + }, + { + "epoch": 0.8380351154073782, + "loss": 0.1492346227169037, + "loss_ce": 0.0017126407474279404, + "loss_iou": 0.40625, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 547223768, + "step": 3186 + }, + { + "epoch": 0.8382981521667653, + "grad_norm": 30.69973998174967, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 547395788, + "step": 3187 + }, + { + "epoch": 0.8382981521667653, + "loss": 0.13615994155406952, + "loss_ce": 0.00041775350109674037, + "loss_iou": 0.490234375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 547395788, + "step": 3187 + }, + { + "epoch": 0.8385611889261524, + "grad_norm": 35.72025172038926, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 547568000, + "step": 3188 + }, + { + "epoch": 0.8385611889261524, + "loss": 0.04795503616333008, + "loss_ce": 0.0002560606808401644, + "loss_iou": 0.5703125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 547568000, + "step": 3188 + }, + { + "epoch": 0.8388242256855396, + "grad_norm": 7.18874848401809, + "learning_rate": 5e-06, + "loss": 0.1135, + "num_input_tokens_seen": 547740064, + "step": 3189 + }, + { + "epoch": 0.8388242256855396, + "loss": 0.12481731176376343, + "loss_ce": 0.0005802565719932318, + "loss_iou": 0.40625, + "loss_num": 0.02490234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 547740064, + "step": 3189 + }, + { + "epoch": 0.8390872624449267, + "grad_norm": 6.155592453443207, + "learning_rate": 5e-06, + "loss": 0.0875, + "num_input_tokens_seen": 547912300, + "step": 3190 + }, + { + "epoch": 0.8390872624449267, + "loss": 0.08178332448005676, + "loss_ce": 0.0008201911114156246, + "loss_iou": 0.470703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 547912300, + "step": 3190 + }, + { + "epoch": 0.8393502992043138, + "grad_norm": 12.72945010384761, + "learning_rate": 5e-06, + "loss": 0.139, + "num_input_tokens_seen": 548084548, + "step": 3191 + }, + { + "epoch": 0.8393502992043138, + "loss": 0.19623327255249023, + "loss_ce": 0.002812865423038602, + "loss_iou": 0.4296875, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 548084548, + "step": 3191 + }, + { + "epoch": 0.8396133359637009, + "grad_norm": 5.353324449071839, + "learning_rate": 5e-06, + "loss": 0.1299, + "num_input_tokens_seen": 548255176, + "step": 3192 + }, + { + "epoch": 0.8396133359637009, + "loss": 0.12437019497156143, + "loss_ce": 0.0003467575879767537, + "loss_iou": 0.4921875, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 548255176, + "step": 3192 + }, + { + "epoch": 0.839876372723088, + "grad_norm": 4.347588948765136, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 548427488, + "step": 3193 + }, + { + "epoch": 0.839876372723088, + "loss": 0.08202692121267319, + "loss_ce": 0.0022234548814594746, + "loss_iou": 0.53515625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 548427488, + "step": 3193 + }, + { + "epoch": 0.8401394094824752, + "grad_norm": 5.210446302172759, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 548599828, + "step": 3194 + }, + { + "epoch": 0.8401394094824752, + "loss": 0.12387488037347794, + "loss_ce": 0.00018713258032221347, + "loss_iou": 0.59765625, + "loss_num": 0.024658203125, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 548599828, + "step": 3194 + }, + { + "epoch": 0.8404024462418623, + "grad_norm": 3.9642883700177514, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 548771932, + "step": 3195 + }, + { + "epoch": 0.8404024462418623, + "loss": 0.12389804422855377, + "loss_ce": 0.00396396778523922, + "loss_iou": 0.490234375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 548771932, + "step": 3195 + }, + { + "epoch": 0.8406654830012494, + "grad_norm": 4.444986430362012, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 548944332, + "step": 3196 + }, + { + "epoch": 0.8406654830012494, + "loss": 0.07641720026731491, + "loss_ce": 0.0017254289705306292, + "loss_iou": 0.578125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 548944332, + "step": 3196 + }, + { + "epoch": 0.8409285197606365, + "grad_norm": 17.152304646652222, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 549116460, + "step": 3197 + }, + { + "epoch": 0.8409285197606365, + "loss": 0.07439431548118591, + "loss_ce": 0.0016404138877987862, + "loss_iou": 0.435546875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 549116460, + "step": 3197 + }, + { + "epoch": 0.8411915565200236, + "grad_norm": 6.835966777853977, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 549286920, + "step": 3198 + }, + { + "epoch": 0.8411915565200236, + "loss": 0.23670437932014465, + "loss_ce": 0.0033669895492494106, + "loss_iou": 0.4609375, + "loss_num": 0.046630859375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 549286920, + "step": 3198 + }, + { + "epoch": 0.8414545932794107, + "grad_norm": 3.334370051059133, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 549458932, + "step": 3199 + }, + { + "epoch": 0.8414545932794107, + "loss": 0.10936430096626282, + "loss_ce": 0.001118454267270863, + "loss_iou": 0.47265625, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 549458932, + "step": 3199 + }, + { + "epoch": 0.841717630038798, + "grad_norm": 3.3887650622610863, + "learning_rate": 5e-06, + "loss": 0.119, + "num_input_tokens_seen": 549630896, + "step": 3200 + }, + { + "epoch": 0.841717630038798, + "loss": 0.1543048769235611, + "loss_ce": 0.00280036055482924, + "loss_iou": 0.451171875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 549630896, + "step": 3200 + }, + { + "epoch": 0.8419806667981851, + "grad_norm": 8.90667441822763, + "learning_rate": 5e-06, + "loss": 0.1431, + "num_input_tokens_seen": 549802980, + "step": 3201 + }, + { + "epoch": 0.8419806667981851, + "loss": 0.10668284446001053, + "loss_ce": 0.0037165414541959763, + "loss_iou": 0.322265625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 549802980, + "step": 3201 + }, + { + "epoch": 0.8422437035575722, + "grad_norm": 4.581013999909635, + "learning_rate": 5e-06, + "loss": 0.0768, + "num_input_tokens_seen": 549973288, + "step": 3202 + }, + { + "epoch": 0.8422437035575722, + "loss": 0.05071475729346275, + "loss_ce": 0.0011084338184446096, + "loss_iou": 0.578125, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 549973288, + "step": 3202 + }, + { + "epoch": 0.8425067403169593, + "grad_norm": 7.123665679875533, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 550143700, + "step": 3203 + }, + { + "epoch": 0.8425067403169593, + "loss": 0.1053222045302391, + "loss_ce": 0.0030272852163761854, + "loss_iou": 0.404296875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 550143700, + "step": 3203 + }, + { + "epoch": 0.8427697770763464, + "grad_norm": 14.86581378718403, + "learning_rate": 5e-06, + "loss": 0.1414, + "num_input_tokens_seen": 550315752, + "step": 3204 + }, + { + "epoch": 0.8427697770763464, + "loss": 0.08674832433462143, + "loss_ce": 0.000688750937115401, + "loss_iou": NaN, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 550315752, + "step": 3204 + }, + { + "epoch": 0.8430328138357336, + "grad_norm": 15.15255979041917, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 550487764, + "step": 3205 + }, + { + "epoch": 0.8430328138357336, + "loss": 0.11826883256435394, + "loss_ce": 0.001401771791279316, + "loss_iou": 0.47265625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 550487764, + "step": 3205 + }, + { + "epoch": 0.8432958505951207, + "grad_norm": 5.341317771181548, + "learning_rate": 5e-06, + "loss": 0.1085, + "num_input_tokens_seen": 550659696, + "step": 3206 + }, + { + "epoch": 0.8432958505951207, + "loss": 0.11918849498033524, + "loss_ce": 0.0002920094411820173, + "loss_iou": 0.5859375, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 550659696, + "step": 3206 + }, + { + "epoch": 0.8435588873545078, + "grad_norm": 5.395123389791678, + "learning_rate": 5e-06, + "loss": 0.1327, + "num_input_tokens_seen": 550831904, + "step": 3207 + }, + { + "epoch": 0.8435588873545078, + "loss": 0.07498294115066528, + "loss_ce": 0.001038852147758007, + "loss_iou": 0.33984375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 550831904, + "step": 3207 + }, + { + "epoch": 0.8438219241138949, + "grad_norm": 3.643785530851578, + "learning_rate": 5e-06, + "loss": 0.1128, + "num_input_tokens_seen": 551004156, + "step": 3208 + }, + { + "epoch": 0.8438219241138949, + "loss": 0.06928001344203949, + "loss_ce": 0.004369123373180628, + "loss_iou": 0.494140625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 551004156, + "step": 3208 + }, + { + "epoch": 0.844084960873282, + "grad_norm": 5.391792901404867, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 551176320, + "step": 3209 + }, + { + "epoch": 0.844084960873282, + "loss": 0.0887857973575592, + "loss_ce": 0.0004984364495612681, + "loss_iou": 0.376953125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 551176320, + "step": 3209 + }, + { + "epoch": 0.8443479976326692, + "grad_norm": 7.538916165469519, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 551348168, + "step": 3210 + }, + { + "epoch": 0.8443479976326692, + "loss": 0.16849397122859955, + "loss_ce": 0.0030734348110854626, + "loss_iou": 0.419921875, + "loss_num": 0.033203125, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 551348168, + "step": 3210 + }, + { + "epoch": 0.8446110343920563, + "grad_norm": 7.685615482077233, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 551520728, + "step": 3211 + }, + { + "epoch": 0.8446110343920563, + "loss": 0.0783834308385849, + "loss_ce": 0.001479133265092969, + "loss_iou": 0.5390625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 551520728, + "step": 3211 + }, + { + "epoch": 0.8448740711514434, + "grad_norm": 8.632575689436024, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 551692804, + "step": 3212 + }, + { + "epoch": 0.8448740711514434, + "loss": 0.15737096965312958, + "loss_ce": 0.0006174240261316299, + "loss_iou": 0.5234375, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 551692804, + "step": 3212 + }, + { + "epoch": 0.8451371079108305, + "grad_norm": 12.457163123139722, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 551864880, + "step": 3213 + }, + { + "epoch": 0.8451371079108305, + "loss": 0.07481381297111511, + "loss_ce": 0.003173800650984049, + "loss_iou": 0.44921875, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 551864880, + "step": 3213 + }, + { + "epoch": 0.8454001446702176, + "grad_norm": 3.8683898896692166, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 552036812, + "step": 3214 + }, + { + "epoch": 0.8454001446702176, + "loss": 0.05259804055094719, + "loss_ce": 0.0005503093125298619, + "loss_iou": 0.51171875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 552036812, + "step": 3214 + }, + { + "epoch": 0.8456631814296048, + "grad_norm": 4.539951161935164, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 552208856, + "step": 3215 + }, + { + "epoch": 0.8456631814296048, + "loss": 0.07421931624412537, + "loss_ce": 0.0011754983570426702, + "loss_iou": 0.478515625, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 552208856, + "step": 3215 + }, + { + "epoch": 0.8459262181889919, + "grad_norm": 10.573780213794782, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 552380904, + "step": 3216 + }, + { + "epoch": 0.8459262181889919, + "loss": 0.06666961312294006, + "loss_ce": 0.001804507803171873, + "loss_iou": 0.484375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 552380904, + "step": 3216 + }, + { + "epoch": 0.846189254948379, + "grad_norm": 4.275878567324078, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 552551132, + "step": 3217 + }, + { + "epoch": 0.846189254948379, + "loss": 0.09229104220867157, + "loss_ce": 0.0006162393838167191, + "loss_iou": 0.4921875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 552551132, + "step": 3217 + }, + { + "epoch": 0.8464522917077661, + "grad_norm": 4.658023344353002, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 552722824, + "step": 3218 + }, + { + "epoch": 0.8464522917077661, + "loss": 0.07020144164562225, + "loss_ce": 0.0020709503442049026, + "loss_iou": 0.49609375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 552722824, + "step": 3218 + }, + { + "epoch": 0.8467153284671532, + "grad_norm": 6.1002348516078095, + "learning_rate": 5e-06, + "loss": 0.1233, + "num_input_tokens_seen": 552894948, + "step": 3219 + }, + { + "epoch": 0.8467153284671532, + "loss": 0.10183661431074142, + "loss_ce": 0.0007318751304410398, + "loss_iou": 0.5703125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 552894948, + "step": 3219 + }, + { + "epoch": 0.8469783652265405, + "grad_norm": 3.6957881460841646, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 553065492, + "step": 3220 + }, + { + "epoch": 0.8469783652265405, + "loss": 0.20681847631931305, + "loss_ce": 0.005616086535155773, + "loss_iou": 0.4609375, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 553065492, + "step": 3220 + }, + { + "epoch": 0.8472414019859276, + "grad_norm": 31.1352481206466, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 553237384, + "step": 3221 + }, + { + "epoch": 0.8472414019859276, + "loss": 0.0710492879152298, + "loss_ce": 0.0004316139966249466, + "loss_iou": 0.42578125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 553237384, + "step": 3221 + }, + { + "epoch": 0.8475044387453147, + "grad_norm": 15.822969391935775, + "learning_rate": 5e-06, + "loss": 0.1572, + "num_input_tokens_seen": 553409788, + "step": 3222 + }, + { + "epoch": 0.8475044387453147, + "loss": 0.24513369798660278, + "loss_ce": 0.001847569365054369, + "loss_iou": NaN, + "loss_num": 0.048583984375, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 553409788, + "step": 3222 + }, + { + "epoch": 0.8477674755047018, + "grad_norm": 4.903880792540357, + "learning_rate": 5e-06, + "loss": 0.082, + "num_input_tokens_seen": 553581532, + "step": 3223 + }, + { + "epoch": 0.8477674755047018, + "loss": 0.0573626384139061, + "loss_ce": 0.0015459894202649593, + "loss_iou": NaN, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 553581532, + "step": 3223 + }, + { + "epoch": 0.8480305122640889, + "grad_norm": 11.193676890263411, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 553753504, + "step": 3224 + }, + { + "epoch": 0.8480305122640889, + "loss": 0.12998200953006744, + "loss_ce": 0.0016861144686117768, + "loss_iou": 0.515625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 553753504, + "step": 3224 + }, + { + "epoch": 0.848293549023476, + "grad_norm": 6.892798676870787, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 553925232, + "step": 3225 + }, + { + "epoch": 0.848293549023476, + "loss": 0.06351655721664429, + "loss_ce": 0.0010470744455233216, + "loss_iou": 0.33203125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 553925232, + "step": 3225 + }, + { + "epoch": 0.8485565857828632, + "grad_norm": 3.94134933091549, + "learning_rate": 5e-06, + "loss": 0.0882, + "num_input_tokens_seen": 554097380, + "step": 3226 + }, + { + "epoch": 0.8485565857828632, + "loss": 0.06772696226835251, + "loss_ce": 0.00040518559399060905, + "loss_iou": 0.5, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 554097380, + "step": 3226 + }, + { + "epoch": 0.8488196225422503, + "grad_norm": 4.553319445567292, + "learning_rate": 5e-06, + "loss": 0.1467, + "num_input_tokens_seen": 554269324, + "step": 3227 + }, + { + "epoch": 0.8488196225422503, + "loss": 0.06106797605752945, + "loss_ce": 0.00015489489305764437, + "loss_iou": 0.50390625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 554269324, + "step": 3227 + }, + { + "epoch": 0.8490826593016374, + "grad_norm": 8.122842269719726, + "learning_rate": 5e-06, + "loss": 0.0758, + "num_input_tokens_seen": 554441420, + "step": 3228 + }, + { + "epoch": 0.8490826593016374, + "loss": 0.07994222640991211, + "loss_ce": 0.0005202332977205515, + "loss_iou": 0.5078125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 554441420, + "step": 3228 + }, + { + "epoch": 0.8493456960610245, + "grad_norm": 4.946244650717768, + "learning_rate": 5e-06, + "loss": 0.0891, + "num_input_tokens_seen": 554613348, + "step": 3229 + }, + { + "epoch": 0.8493456960610245, + "loss": 0.07239595800638199, + "loss_ce": 0.0015646612737327814, + "loss_iou": 0.4921875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 554613348, + "step": 3229 + }, + { + "epoch": 0.8496087328204116, + "grad_norm": 10.327431498521296, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 554785544, + "step": 3230 + }, + { + "epoch": 0.8496087328204116, + "loss": 0.1082451343536377, + "loss_ce": 0.001616714522242546, + "loss_iou": 0.56640625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 554785544, + "step": 3230 + }, + { + "epoch": 0.8498717695797988, + "grad_norm": 6.2381116819087605, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 554957700, + "step": 3231 + }, + { + "epoch": 0.8498717695797988, + "loss": 0.05288837477564812, + "loss_ce": 0.0004286564071662724, + "loss_iou": 0.41015625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 554957700, + "step": 3231 + }, + { + "epoch": 0.8501348063391859, + "grad_norm": 5.665331437954715, + "learning_rate": 5e-06, + "loss": 0.1052, + "num_input_tokens_seen": 555128400, + "step": 3232 + }, + { + "epoch": 0.8501348063391859, + "loss": 0.17892731726169586, + "loss_ce": 0.008456122130155563, + "loss_iou": 0.47265625, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 555128400, + "step": 3232 + }, + { + "epoch": 0.850397843098573, + "grad_norm": 4.02543856443553, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 555300712, + "step": 3233 + }, + { + "epoch": 0.850397843098573, + "loss": 0.1149306371808052, + "loss_ce": 0.0023512912448495626, + "loss_iou": 0.51171875, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 555300712, + "step": 3233 + }, + { + "epoch": 0.8506608798579601, + "grad_norm": 4.049090978454337, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 555472756, + "step": 3234 + }, + { + "epoch": 0.8506608798579601, + "loss": 0.14269746840000153, + "loss_ce": 0.0037509393878281116, + "loss_iou": 0.56640625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 555472756, + "step": 3234 + }, + { + "epoch": 0.8509239166173472, + "grad_norm": 4.24693988860905, + "learning_rate": 5e-06, + "loss": 0.1089, + "num_input_tokens_seen": 555644856, + "step": 3235 + }, + { + "epoch": 0.8509239166173472, + "loss": 0.06741193681955338, + "loss_ce": 0.0012955997372046113, + "loss_iou": 0.478515625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 555644856, + "step": 3235 + }, + { + "epoch": 0.8511869533767344, + "grad_norm": 4.223553863328503, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 555817488, + "step": 3236 + }, + { + "epoch": 0.8511869533767344, + "loss": 0.09499558061361313, + "loss_ce": 0.0007572993636131287, + "loss_iou": 0.5078125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 555817488, + "step": 3236 + }, + { + "epoch": 0.8514499901361215, + "grad_norm": 8.070861336660833, + "learning_rate": 5e-06, + "loss": 0.1101, + "num_input_tokens_seen": 555989844, + "step": 3237 + }, + { + "epoch": 0.8514499901361215, + "loss": 0.09148094058036804, + "loss_ce": 0.0017287411028519273, + "loss_iou": 0.515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 555989844, + "step": 3237 + }, + { + "epoch": 0.8517130268955087, + "grad_norm": 4.7628320746905235, + "learning_rate": 5e-06, + "loss": 0.13, + "num_input_tokens_seen": 556162144, + "step": 3238 + }, + { + "epoch": 0.8517130268955087, + "loss": 0.08755885809659958, + "loss_ce": 0.001667136326432228, + "loss_iou": 0.421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 556162144, + "step": 3238 + }, + { + "epoch": 0.8519760636548958, + "grad_norm": 8.201240707199219, + "learning_rate": 5e-06, + "loss": 0.068, + "num_input_tokens_seen": 556334020, + "step": 3239 + }, + { + "epoch": 0.8519760636548958, + "loss": 0.06216670200228691, + "loss_ce": 0.001772413495928049, + "loss_iou": 0.404296875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 556334020, + "step": 3239 + }, + { + "epoch": 0.8522391004142829, + "grad_norm": 5.1836633623539266, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 556502704, + "step": 3240 + }, + { + "epoch": 0.8522391004142829, + "loss": 0.09547331184148788, + "loss_ce": 0.0012350315228104591, + "loss_iou": 0.333984375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 556502704, + "step": 3240 + }, + { + "epoch": 0.8525021371736701, + "grad_norm": 4.1790455766130234, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 556674848, + "step": 3241 + }, + { + "epoch": 0.8525021371736701, + "loss": 0.15765714645385742, + "loss_ce": 0.00018642976647242904, + "loss_iou": 0.353515625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 556674848, + "step": 3241 + }, + { + "epoch": 0.8527651739330572, + "grad_norm": 32.42085523655999, + "learning_rate": 5e-06, + "loss": 0.0973, + "num_input_tokens_seen": 556847120, + "step": 3242 + }, + { + "epoch": 0.8527651739330572, + "loss": 0.10435596853494644, + "loss_ce": 0.0030986424535512924, + "loss_iou": 0.48046875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 556847120, + "step": 3242 + }, + { + "epoch": 0.8530282106924443, + "grad_norm": 3.085990680432512, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 557015380, + "step": 3243 + }, + { + "epoch": 0.8530282106924443, + "loss": 0.12935911118984222, + "loss_ce": 0.0042217751033604145, + "loss_iou": 0.3515625, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 557015380, + "step": 3243 + }, + { + "epoch": 0.8532912474518314, + "grad_norm": 5.554407683422717, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 557186020, + "step": 3244 + }, + { + "epoch": 0.8532912474518314, + "loss": 0.12462737411260605, + "loss_ce": 0.002557065337896347, + "loss_iou": 0.5234375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 557186020, + "step": 3244 + }, + { + "epoch": 0.8535542842112185, + "grad_norm": 77.39805630943022, + "learning_rate": 5e-06, + "loss": 0.0876, + "num_input_tokens_seen": 557358056, + "step": 3245 + }, + { + "epoch": 0.8535542842112185, + "loss": 0.06247711926698685, + "loss_ce": 0.0029983618296682835, + "loss_iou": 0.5390625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 557358056, + "step": 3245 + }, + { + "epoch": 0.8538173209706057, + "grad_norm": 6.568132581838013, + "learning_rate": 5e-06, + "loss": 0.1201, + "num_input_tokens_seen": 557530164, + "step": 3246 + }, + { + "epoch": 0.8538173209706057, + "loss": 0.10226649791002274, + "loss_ce": 0.0013143508695065975, + "loss_iou": 0.48828125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 557530164, + "step": 3246 + }, + { + "epoch": 0.8540803577299928, + "grad_norm": 8.945021217700889, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 557702352, + "step": 3247 + }, + { + "epoch": 0.8540803577299928, + "loss": 0.14076803624629974, + "loss_ce": 0.002218232722952962, + "loss_iou": 0.40625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 557702352, + "step": 3247 + }, + { + "epoch": 0.8543433944893799, + "grad_norm": 5.970819800467794, + "learning_rate": 5e-06, + "loss": 0.113, + "num_input_tokens_seen": 557874756, + "step": 3248 + }, + { + "epoch": 0.8543433944893799, + "loss": 0.12330596148967743, + "loss_ce": 0.002425831276923418, + "loss_iou": 0.470703125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 557874756, + "step": 3248 + }, + { + "epoch": 0.854606431248767, + "grad_norm": 12.952896902366254, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 558045276, + "step": 3249 + }, + { + "epoch": 0.854606431248767, + "loss": 0.15818831324577332, + "loss_ce": 0.0018772899638861418, + "loss_iou": 0.443359375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 558045276, + "step": 3249 + }, + { + "epoch": 0.8548694680081541, + "grad_norm": 10.077522334681213, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_websight_new_CIoU": 0.8994152843952179, + "eval_websight_new_GIoU": 0.9002452492713928, + "eval_websight_new_IoU": 0.9029170572757721, + "eval_websight_new_MAE_all": 0.014043471310287714, + "eval_websight_new_MAE_h": 0.007522843778133392, + "eval_websight_new_MAE_w": 0.020659465342760086, + "eval_websight_new_MAE_x": 0.021266265772283077, + "eval_websight_new_MAE_y": 0.006725311512127519, + "eval_websight_new_NUM_probability": 0.9999923408031464, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.07195720076560974, + "eval_websight_new_loss_ce": 4.601216005539754e-06, + "eval_websight_new_loss_iou": 0.31280517578125, + "eval_websight_new_loss_num": 0.012767791748046875, + "eval_websight_new_loss_xval": 0.06391143798828125, + "eval_websight_new_runtime": 56.9054, + "eval_websight_new_samples_per_second": 0.879, + "eval_websight_new_steps_per_second": 0.035, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_seeclick_CIoU": 0.6642794907093048, + "eval_seeclick_GIoU": 0.6613934338092804, + "eval_seeclick_IoU": 0.6828196048736572, + "eval_seeclick_MAE_all": 0.04043097607791424, + "eval_seeclick_MAE_h": 0.023812726140022278, + "eval_seeclick_MAE_w": 0.055972687900066376, + "eval_seeclick_MAE_x": 0.05744660459458828, + "eval_seeclick_MAE_y": 0.02449188195168972, + "eval_seeclick_NUM_probability": 0.999969094991684, + "eval_seeclick_inside_bbox": 0.9375, + "eval_seeclick_loss": 0.1842024326324463, + "eval_seeclick_loss_ce": 0.008998175617307425, + "eval_seeclick_loss_iou": 0.4769287109375, + "eval_seeclick_loss_num": 0.0336456298828125, + "eval_seeclick_loss_xval": 0.1681365966796875, + "eval_seeclick_runtime": 76.1272, + "eval_seeclick_samples_per_second": 0.565, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_icons_CIoU": 0.8727431297302246, + "eval_icons_GIoU": 0.8711867332458496, + "eval_icons_IoU": 0.8781991004943848, + "eval_icons_MAE_all": 0.017583131790161133, + "eval_icons_MAE_h": 0.019472193904221058, + "eval_icons_MAE_w": 0.015529958996921778, + "eval_icons_MAE_x": 0.015652839560061693, + "eval_icons_MAE_y": 0.019677532836794853, + "eval_icons_NUM_probability": 0.9999885261058807, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.06516695767641068, + "eval_icons_loss_ce": 9.223055712936912e-06, + "eval_icons_loss_iou": 0.504638671875, + "eval_icons_loss_num": 0.012132644653320312, + "eval_icons_loss_xval": 0.0607147216796875, + "eval_icons_runtime": 94.9537, + "eval_icons_samples_per_second": 0.527, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "eval_screenspot_CIoU": 0.6023598512013754, + "eval_screenspot_GIoU": 0.5998436013857523, + "eval_screenspot_IoU": 0.6310188174247742, + "eval_screenspot_MAE_all": 0.06860506162047386, + "eval_screenspot_MAE_h": 0.04429138886431853, + "eval_screenspot_MAE_w": 0.11814649154742558, + "eval_screenspot_MAE_x": 0.06877896686395009, + "eval_screenspot_MAE_y": 0.043203407898545265, + "eval_screenspot_NUM_probability": 0.9998593727747599, + "eval_screenspot_inside_bbox": 0.8841666579246521, + "eval_screenspot_loss": 0.8844305276870728, + "eval_screenspot_loss_ce": 0.5899222294489542, + "eval_screenspot_loss_iou": 0.4657796223958333, + "eval_screenspot_loss_num": 0.057614644368489586, + "eval_screenspot_loss_xval": 0.2879842122395833, + "eval_screenspot_runtime": 153.6916, + "eval_screenspot_samples_per_second": 0.579, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8548694680081541, + "loss": 0.8671462535858154, + "loss_ce": 0.5839431285858154, + "loss_iou": 0.404296875, + "loss_num": 0.056640625, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 558217164, + "step": 3250 + }, + { + "epoch": 0.8551325047675412, + "grad_norm": 38.29206628356214, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 558389192, + "step": 3251 + }, + { + "epoch": 0.8551325047675412, + "loss": 0.21791958808898926, + "loss_ce": 0.0010311761870980263, + "loss_iou": 0.5859375, + "loss_num": 0.043212890625, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 558389192, + "step": 3251 + }, + { + "epoch": 0.8553955415269284, + "grad_norm": 7.6502620366688525, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 558561204, + "step": 3252 + }, + { + "epoch": 0.8553955415269284, + "loss": 0.16455963253974915, + "loss_ce": 0.0034573215525597334, + "loss_iou": 0.376953125, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 558561204, + "step": 3252 + }, + { + "epoch": 0.8556585782863155, + "grad_norm": 7.815933727502156, + "learning_rate": 5e-06, + "loss": 0.142, + "num_input_tokens_seen": 558733140, + "step": 3253 + }, + { + "epoch": 0.8556585782863155, + "loss": 0.118372842669487, + "loss_ce": 0.0003461065352894366, + "loss_iou": NaN, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 558733140, + "step": 3253 + }, + { + "epoch": 0.8559216150457026, + "grad_norm": 26.03211500217736, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 558905528, + "step": 3254 + }, + { + "epoch": 0.8559216150457026, + "loss": 0.0649479404091835, + "loss_ce": 0.0013187924632802606, + "loss_iou": 0.484375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 558905528, + "step": 3254 + }, + { + "epoch": 0.8561846518050897, + "grad_norm": 3.695118212351963, + "learning_rate": 5e-06, + "loss": 0.0848, + "num_input_tokens_seen": 559077788, + "step": 3255 + }, + { + "epoch": 0.8561846518050897, + "loss": 0.05111432075500488, + "loss_ce": 0.00036358798388391733, + "loss_iou": 0.419921875, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 559077788, + "step": 3255 + }, + { + "epoch": 0.8564476885644768, + "grad_norm": 4.828867302993965, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 559249740, + "step": 3256 + }, + { + "epoch": 0.8564476885644768, + "loss": 0.15594083070755005, + "loss_ce": 0.00011808436829596758, + "loss_iou": 0.279296875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 559249740, + "step": 3256 + }, + { + "epoch": 0.856710725323864, + "grad_norm": 4.4588084335736, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 559422008, + "step": 3257 + }, + { + "epoch": 0.856710725323864, + "loss": 0.1171468049287796, + "loss_ce": 0.005177808925509453, + "loss_iou": 0.458984375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 559422008, + "step": 3257 + }, + { + "epoch": 0.8569737620832512, + "grad_norm": 22.52084230262158, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 559594180, + "step": 3258 + }, + { + "epoch": 0.8569737620832512, + "loss": 0.12303026020526886, + "loss_ce": 0.00025803959579207003, + "loss_iou": 0.49609375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 559594180, + "step": 3258 + }, + { + "epoch": 0.8572367988426383, + "grad_norm": 5.00194835125284, + "learning_rate": 5e-06, + "loss": 0.1458, + "num_input_tokens_seen": 559766068, + "step": 3259 + }, + { + "epoch": 0.8572367988426383, + "loss": 0.14650404453277588, + "loss_ce": 0.0012403683504089713, + "loss_iou": 0.29296875, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 559766068, + "step": 3259 + }, + { + "epoch": 0.8574998356020254, + "grad_norm": 5.872594878613971, + "learning_rate": 5e-06, + "loss": 0.107, + "num_input_tokens_seen": 559937984, + "step": 3260 + }, + { + "epoch": 0.8574998356020254, + "loss": 0.14809638261795044, + "loss_ce": 0.0009711501188576221, + "loss_iou": 0.40234375, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 559937984, + "step": 3260 + }, + { + "epoch": 0.8577628723614125, + "grad_norm": 7.89915891449399, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 560108224, + "step": 3261 + }, + { + "epoch": 0.8577628723614125, + "loss": 0.12339873611927032, + "loss_ce": 0.0029458554927259684, + "loss_iou": 0.54296875, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 560108224, + "step": 3261 + }, + { + "epoch": 0.8580259091207997, + "grad_norm": 5.069283331721027, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 560280532, + "step": 3262 + }, + { + "epoch": 0.8580259091207997, + "loss": 0.12928983569145203, + "loss_ce": 0.004533977247774601, + "loss_iou": 0.69921875, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 560280532, + "step": 3262 + }, + { + "epoch": 0.8582889458801868, + "grad_norm": 14.252906607697872, + "learning_rate": 5e-06, + "loss": 0.0923, + "num_input_tokens_seen": 560452680, + "step": 3263 + }, + { + "epoch": 0.8582889458801868, + "loss": 0.10280074179172516, + "loss_ce": 0.0005515902303159237, + "loss_iou": 0.341796875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 560452680, + "step": 3263 + }, + { + "epoch": 0.8585519826395739, + "grad_norm": 4.981476516908149, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 560625236, + "step": 3264 + }, + { + "epoch": 0.8585519826395739, + "loss": 0.11402536928653717, + "loss_ce": 0.0004999822122044861, + "loss_iou": 0.453125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 560625236, + "step": 3264 + }, + { + "epoch": 0.858815019398961, + "grad_norm": 8.268610027664979, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 560797248, + "step": 3265 + }, + { + "epoch": 0.858815019398961, + "loss": 0.13858602941036224, + "loss_ce": 0.0017146880272775888, + "loss_iou": 0.57421875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 560797248, + "step": 3265 + }, + { + "epoch": 0.8590780561583481, + "grad_norm": 8.133733497165355, + "learning_rate": 5e-06, + "loss": 0.1637, + "num_input_tokens_seen": 560969220, + "step": 3266 + }, + { + "epoch": 0.8590780561583481, + "loss": 0.09112342447042465, + "loss_ce": 0.00012000648712273687, + "loss_iou": 0.59375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 560969220, + "step": 3266 + }, + { + "epoch": 0.8593410929177353, + "grad_norm": 3.6754936276624406, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 561141552, + "step": 3267 + }, + { + "epoch": 0.8593410929177353, + "loss": 0.12336836755275726, + "loss_ce": 0.0018015915993601084, + "loss_iou": 0.443359375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 561141552, + "step": 3267 + }, + { + "epoch": 0.8596041296771224, + "grad_norm": 12.378662074106767, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 561313780, + "step": 3268 + }, + { + "epoch": 0.8596041296771224, + "loss": 0.14065909385681152, + "loss_ce": 0.0051305294036865234, + "loss_iou": 0.6015625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 561313780, + "step": 3268 + }, + { + "epoch": 0.8598671664365095, + "grad_norm": 3.0561071778648485, + "learning_rate": 5e-06, + "loss": 0.103, + "num_input_tokens_seen": 561486192, + "step": 3269 + }, + { + "epoch": 0.8598671664365095, + "loss": 0.0891718789935112, + "loss_ce": 0.003921023570001125, + "loss_iou": 0.453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 561486192, + "step": 3269 + }, + { + "epoch": 0.8601302031958966, + "grad_norm": 4.439332180024113, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 561658324, + "step": 3270 + }, + { + "epoch": 0.8601302031958966, + "loss": 0.0599624440073967, + "loss_ce": 0.0006515316781587899, + "loss_iou": 0.44140625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 561658324, + "step": 3270 + }, + { + "epoch": 0.8603932399552837, + "grad_norm": 5.0489240884621305, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 561829100, + "step": 3271 + }, + { + "epoch": 0.8603932399552837, + "loss": 0.06052926927804947, + "loss_ce": 0.0006232602754607797, + "loss_iou": 0.408203125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 561829100, + "step": 3271 + }, + { + "epoch": 0.8606562767146708, + "grad_norm": 14.0110320202428, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 562001212, + "step": 3272 + }, + { + "epoch": 0.8606562767146708, + "loss": 0.05530541390180588, + "loss_ce": 0.0009230896248482168, + "loss_iou": 0.59765625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 562001212, + "step": 3272 + }, + { + "epoch": 0.860919313474058, + "grad_norm": 5.01456561485217, + "learning_rate": 5e-06, + "loss": 0.1815, + "num_input_tokens_seen": 562173524, + "step": 3273 + }, + { + "epoch": 0.860919313474058, + "loss": 0.2010606825351715, + "loss_ce": 0.002330208197236061, + "loss_iou": 0.50390625, + "loss_num": 0.039794921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 562173524, + "step": 3273 + }, + { + "epoch": 0.8611823502334451, + "grad_norm": 9.881111822277372, + "learning_rate": 5e-06, + "loss": 0.1502, + "num_input_tokens_seen": 562345824, + "step": 3274 + }, + { + "epoch": 0.8611823502334451, + "loss": 0.106649249792099, + "loss_ce": 0.0010584269184619188, + "loss_iou": 0.59765625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 562345824, + "step": 3274 + }, + { + "epoch": 0.8614453869928322, + "grad_norm": 4.536681210731165, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 562517872, + "step": 3275 + }, + { + "epoch": 0.8614453869928322, + "loss": 0.07371491193771362, + "loss_ce": 0.0001065141914295964, + "loss_iou": 0.48046875, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 562517872, + "step": 3275 + }, + { + "epoch": 0.8617084237522193, + "grad_norm": 4.65141834474055, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 562689796, + "step": 3276 + }, + { + "epoch": 0.8617084237522193, + "loss": 0.09085643291473389, + "loss_ce": 0.001287340302951634, + "loss_iou": 0.55859375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 562689796, + "step": 3276 + }, + { + "epoch": 0.8619714605116064, + "grad_norm": 8.721252840556735, + "learning_rate": 5e-06, + "loss": 0.1602, + "num_input_tokens_seen": 562860476, + "step": 3277 + }, + { + "epoch": 0.8619714605116064, + "loss": 0.1055789366364479, + "loss_ce": 0.00273469858802855, + "loss_iou": 0.5234375, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 562860476, + "step": 3277 + }, + { + "epoch": 0.8622344972709937, + "grad_norm": 4.218638673445259, + "learning_rate": 5e-06, + "loss": 0.0896, + "num_input_tokens_seen": 563032560, + "step": 3278 + }, + { + "epoch": 0.8622344972709937, + "loss": 0.10979656875133514, + "loss_ce": 0.00536542059853673, + "loss_iou": 0.609375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 563032560, + "step": 3278 + }, + { + "epoch": 0.8624975340303808, + "grad_norm": 5.172843844259764, + "learning_rate": 5e-06, + "loss": 0.0986, + "num_input_tokens_seen": 563204720, + "step": 3279 + }, + { + "epoch": 0.8624975340303808, + "loss": 0.08518050611019135, + "loss_ce": 0.0019438066519796848, + "loss_iou": 0.458984375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 563204720, + "step": 3279 + }, + { + "epoch": 0.8627605707897679, + "grad_norm": 4.560338661852982, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 563377056, + "step": 3280 + }, + { + "epoch": 0.8627605707897679, + "loss": 0.08894481509923935, + "loss_ce": 0.0001691804500296712, + "loss_iou": 0.578125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 563377056, + "step": 3280 + }, + { + "epoch": 0.863023607549155, + "grad_norm": 10.082366990468211, + "learning_rate": 5e-06, + "loss": 0.1264, + "num_input_tokens_seen": 563549604, + "step": 3281 + }, + { + "epoch": 0.863023607549155, + "loss": 0.1488599181175232, + "loss_ce": 0.0033978780265897512, + "loss_iou": 0.494140625, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 563549604, + "step": 3281 + }, + { + "epoch": 0.8632866443085421, + "grad_norm": 4.832733016773012, + "learning_rate": 5e-06, + "loss": 0.081, + "num_input_tokens_seen": 563721852, + "step": 3282 + }, + { + "epoch": 0.8632866443085421, + "loss": 0.14535953104496002, + "loss_ce": 0.0013928530970588326, + "loss_iou": 0.46484375, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 563721852, + "step": 3282 + }, + { + "epoch": 0.8635496810679293, + "grad_norm": 6.063993109800719, + "learning_rate": 5e-06, + "loss": 0.0887, + "num_input_tokens_seen": 563893984, + "step": 3283 + }, + { + "epoch": 0.8635496810679293, + "loss": 0.06962516903877258, + "loss_ce": 0.0006554402643814683, + "loss_iou": 0.4453125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 563893984, + "step": 3283 + }, + { + "epoch": 0.8638127178273164, + "grad_norm": 4.985736636691599, + "learning_rate": 5e-06, + "loss": 0.1177, + "num_input_tokens_seen": 564062972, + "step": 3284 + }, + { + "epoch": 0.8638127178273164, + "loss": 0.11227120459079742, + "loss_ce": 0.002926721004769206, + "loss_iou": 0.625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 564062972, + "step": 3284 + }, + { + "epoch": 0.8640757545867035, + "grad_norm": 10.90397210954732, + "learning_rate": 5e-06, + "loss": 0.1087, + "num_input_tokens_seen": 564235088, + "step": 3285 + }, + { + "epoch": 0.8640757545867035, + "loss": 0.1431923508644104, + "loss_ce": 0.0032692591194063425, + "loss_iou": 0.51953125, + "loss_num": 0.0279541015625, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 564235088, + "step": 3285 + }, + { + "epoch": 0.8643387913460906, + "grad_norm": 10.801058055973197, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 564407212, + "step": 3286 + }, + { + "epoch": 0.8643387913460906, + "loss": 0.17996424436569214, + "loss_ce": 0.005098515655845404, + "loss_iou": 0.48828125, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 564407212, + "step": 3286 + }, + { + "epoch": 0.8646018281054777, + "grad_norm": 3.7908688756477646, + "learning_rate": 5e-06, + "loss": 0.1763, + "num_input_tokens_seen": 564579544, + "step": 3287 + }, + { + "epoch": 0.8646018281054777, + "loss": 0.1904245764017105, + "loss_ce": 0.0021311198361217976, + "loss_iou": 0.455078125, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 564579544, + "step": 3287 + }, + { + "epoch": 0.8648648648648649, + "grad_norm": 20.50727894197877, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 564751736, + "step": 3288 + }, + { + "epoch": 0.8648648648648649, + "loss": 0.13853441178798676, + "loss_ce": 0.006606926675885916, + "loss_iou": 0.34765625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 564751736, + "step": 3288 + }, + { + "epoch": 0.865127901624252, + "grad_norm": 7.515905813339519, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 564923836, + "step": 3289 + }, + { + "epoch": 0.865127901624252, + "loss": 0.09346568584442139, + "loss_ce": 0.0014246755745261908, + "loss_iou": 0.453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 564923836, + "step": 3289 + }, + { + "epoch": 0.8653909383836391, + "grad_norm": 4.659865141429364, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 565093036, + "step": 3290 + }, + { + "epoch": 0.8653909383836391, + "loss": 0.05857858434319496, + "loss_ce": 0.0009613969596102834, + "loss_iou": 0.46875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 565093036, + "step": 3290 + }, + { + "epoch": 0.8656539751430262, + "grad_norm": 7.156818926737148, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 565264968, + "step": 3291 + }, + { + "epoch": 0.8656539751430262, + "loss": 0.05671301484107971, + "loss_ce": 0.0005454107304103673, + "loss_iou": 0.4453125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 565264968, + "step": 3291 + }, + { + "epoch": 0.8659170119024133, + "grad_norm": 4.159578348893538, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 565437008, + "step": 3292 + }, + { + "epoch": 0.8659170119024133, + "loss": 0.10393117368221283, + "loss_ce": 0.0047185225412249565, + "loss_iou": 0.380859375, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 565437008, + "step": 3292 + }, + { + "epoch": 0.8661800486618005, + "grad_norm": 5.125947407861983, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 565609352, + "step": 3293 + }, + { + "epoch": 0.8661800486618005, + "loss": 0.07895916700363159, + "loss_ce": 0.004221613518893719, + "loss_iou": 0.51171875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 565609352, + "step": 3293 + }, + { + "epoch": 0.8664430854211876, + "grad_norm": 4.631562530302505, + "learning_rate": 5e-06, + "loss": 0.0987, + "num_input_tokens_seen": 565781624, + "step": 3294 + }, + { + "epoch": 0.8664430854211876, + "loss": 0.10047349333763123, + "loss_ce": 0.0010014427825808525, + "loss_iou": 0.609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 565781624, + "step": 3294 + }, + { + "epoch": 0.8667061221805747, + "grad_norm": 12.748430441696817, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 565954124, + "step": 3295 + }, + { + "epoch": 0.8667061221805747, + "loss": 0.10027378797531128, + "loss_ce": 0.0005728579708375037, + "loss_iou": 0.478515625, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 565954124, + "step": 3295 + }, + { + "epoch": 0.8669691589399618, + "grad_norm": 8.307592437145553, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 566124916, + "step": 3296 + }, + { + "epoch": 0.8669691589399618, + "loss": 0.10838001221418381, + "loss_ce": 0.006359752267599106, + "loss_iou": 0.61328125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 566124916, + "step": 3296 + }, + { + "epoch": 0.867232195699349, + "grad_norm": 4.778870142686005, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 566296912, + "step": 3297 + }, + { + "epoch": 0.867232195699349, + "loss": 0.04819861054420471, + "loss_ce": 0.00030127062927931547, + "loss_iou": 0.45703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 566296912, + "step": 3297 + }, + { + "epoch": 0.867495232458736, + "grad_norm": 7.859655636877698, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 566467596, + "step": 3298 + }, + { + "epoch": 0.867495232458736, + "loss": 0.1346224993467331, + "loss_ce": 0.0007113683386705816, + "loss_iou": 0.57421875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 566467596, + "step": 3298 + }, + { + "epoch": 0.8677582692181233, + "grad_norm": 15.137416498429843, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 566639732, + "step": 3299 + }, + { + "epoch": 0.8677582692181233, + "loss": 0.10433374345302582, + "loss_ce": 0.00313745578750968, + "loss_iou": 0.49609375, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 566639732, + "step": 3299 + }, + { + "epoch": 0.8680213059775104, + "grad_norm": 2.9506634806748657, + "learning_rate": 5e-06, + "loss": 0.118, + "num_input_tokens_seen": 566811792, + "step": 3300 + }, + { + "epoch": 0.8680213059775104, + "loss": 0.07893712818622589, + "loss_ce": 0.0004764424229506403, + "loss_iou": 0.53125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 566811792, + "step": 3300 + }, + { + "epoch": 0.8682843427368975, + "grad_norm": 21.308475468927227, + "learning_rate": 5e-06, + "loss": 0.1217, + "num_input_tokens_seen": 566984152, + "step": 3301 + }, + { + "epoch": 0.8682843427368975, + "loss": 0.07327578961849213, + "loss_ce": 0.004519685637205839, + "loss_iou": 0.41015625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 566984152, + "step": 3301 + }, + { + "epoch": 0.8685473794962846, + "grad_norm": 4.854189489350901, + "learning_rate": 5e-06, + "loss": 0.0778, + "num_input_tokens_seen": 567156660, + "step": 3302 + }, + { + "epoch": 0.8685473794962846, + "loss": 0.05097039043903351, + "loss_ce": 0.0012877746485173702, + "loss_iou": 0.44140625, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 567156660, + "step": 3302 + }, + { + "epoch": 0.8688104162556717, + "grad_norm": 16.314064517801537, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 567328912, + "step": 3303 + }, + { + "epoch": 0.8688104162556717, + "loss": 0.11892714351415634, + "loss_ce": 0.0012513676192611456, + "loss_iou": 0.5546875, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 567328912, + "step": 3303 + }, + { + "epoch": 0.8690734530150589, + "grad_norm": 16.431940991025456, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 567501148, + "step": 3304 + }, + { + "epoch": 0.8690734530150589, + "loss": 0.12847568094730377, + "loss_ce": 8.82247113622725e-05, + "loss_iou": 0.46875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 567501148, + "step": 3304 + }, + { + "epoch": 0.869336489774446, + "grad_norm": 3.641251656159218, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 567673160, + "step": 3305 + }, + { + "epoch": 0.869336489774446, + "loss": 0.2121918946504593, + "loss_ce": 0.0007966216653585434, + "loss_iou": 0.3984375, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 567673160, + "step": 3305 + }, + { + "epoch": 0.8695995265338331, + "grad_norm": 27.660934774490972, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 567845644, + "step": 3306 + }, + { + "epoch": 0.8695995265338331, + "loss": 0.07302013039588928, + "loss_ce": 0.00043406913755461574, + "loss_iou": 0.5078125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 567845644, + "step": 3306 + }, + { + "epoch": 0.8698625632932202, + "grad_norm": 5.415514477165426, + "learning_rate": 5e-06, + "loss": 0.1059, + "num_input_tokens_seen": 568017832, + "step": 3307 + }, + { + "epoch": 0.8698625632932202, + "loss": 0.14274545013904572, + "loss_ce": 0.0012964779743924737, + "loss_iou": 0.412109375, + "loss_num": 0.0283203125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 568017832, + "step": 3307 + }, + { + "epoch": 0.8701256000526073, + "grad_norm": 6.457549289729974, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 568189908, + "step": 3308 + }, + { + "epoch": 0.8701256000526073, + "loss": 0.17567481100559235, + "loss_ce": 0.0026706610806286335, + "loss_iou": 0.58203125, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 568189908, + "step": 3308 + }, + { + "epoch": 0.8703886368119945, + "grad_norm": 4.776984151346558, + "learning_rate": 5e-06, + "loss": 0.0712, + "num_input_tokens_seen": 568358640, + "step": 3309 + }, + { + "epoch": 0.8703886368119945, + "loss": 0.050816282629966736, + "loss_ce": 0.0006148651009425521, + "loss_iou": 0.50390625, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 568358640, + "step": 3309 + }, + { + "epoch": 0.8706516735713816, + "grad_norm": 8.715556903813102, + "learning_rate": 5e-06, + "loss": 0.0681, + "num_input_tokens_seen": 568529372, + "step": 3310 + }, + { + "epoch": 0.8706516735713816, + "loss": 0.055395372211933136, + "loss_ce": 0.002645737724378705, + "loss_iou": 0.546875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 568529372, + "step": 3310 + }, + { + "epoch": 0.8709147103307687, + "grad_norm": 4.571078001011392, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 568701592, + "step": 3311 + }, + { + "epoch": 0.8709147103307687, + "loss": 0.07437048852443695, + "loss_ce": 0.0011893401388078928, + "loss_iou": 0.408203125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 568701592, + "step": 3311 + }, + { + "epoch": 0.8711777470901558, + "grad_norm": 5.629346035154349, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 568871992, + "step": 3312 + }, + { + "epoch": 0.8711777470901558, + "loss": 0.08190266788005829, + "loss_ce": 0.001977135892957449, + "loss_iou": 0.58203125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 568871992, + "step": 3312 + }, + { + "epoch": 0.8714407838495429, + "grad_norm": 5.163413374700219, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 569044108, + "step": 3313 + }, + { + "epoch": 0.8714407838495429, + "loss": 0.08449655771255493, + "loss_ce": 0.0003901080635841936, + "loss_iou": 0.515625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 569044108, + "step": 3313 + }, + { + "epoch": 0.8717038206089301, + "grad_norm": 5.300317232023734, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 569216304, + "step": 3314 + }, + { + "epoch": 0.8717038206089301, + "loss": 0.18054433166980743, + "loss_ce": 9.388441685587168e-05, + "loss_iou": 0.484375, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 569216304, + "step": 3314 + }, + { + "epoch": 0.8719668573683172, + "grad_norm": 5.650101854564751, + "learning_rate": 5e-06, + "loss": 0.101, + "num_input_tokens_seen": 569388684, + "step": 3315 + }, + { + "epoch": 0.8719668573683172, + "loss": 0.08428364247083664, + "loss_ce": 0.0036256806924939156, + "loss_iou": 0.37890625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 569388684, + "step": 3315 + }, + { + "epoch": 0.8722298941277044, + "grad_norm": 6.227695364300726, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 569558992, + "step": 3316 + }, + { + "epoch": 0.8722298941277044, + "loss": 0.18722307682037354, + "loss_ce": 0.005307785701006651, + "loss_iou": 0.55859375, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 569558992, + "step": 3316 + }, + { + "epoch": 0.8724929308870915, + "grad_norm": 5.420469146952656, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 569731372, + "step": 3317 + }, + { + "epoch": 0.8724929308870915, + "loss": 0.09945103526115417, + "loss_ce": 0.002435657661408186, + "loss_iou": 0.4921875, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 569731372, + "step": 3317 + }, + { + "epoch": 0.8727559676464786, + "grad_norm": 4.496256221139891, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 569902092, + "step": 3318 + }, + { + "epoch": 0.8727559676464786, + "loss": 0.09977222979068756, + "loss_ce": 8.65593392518349e-05, + "loss_iou": 0.5234375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 569902092, + "step": 3318 + }, + { + "epoch": 0.8730190044058658, + "grad_norm": 3.818210026049398, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 570074040, + "step": 3319 + }, + { + "epoch": 0.8730190044058658, + "loss": 0.11558879911899567, + "loss_ce": 0.0015446072211489081, + "loss_iou": 0.447265625, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 570074040, + "step": 3319 + }, + { + "epoch": 0.8732820411652529, + "grad_norm": 48.02513571155541, + "learning_rate": 5e-06, + "loss": 0.1371, + "num_input_tokens_seen": 570246368, + "step": 3320 + }, + { + "epoch": 0.8732820411652529, + "loss": 0.1751258671283722, + "loss_ce": 0.00022961836657486856, + "loss_iou": 0.56640625, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 570246368, + "step": 3320 + }, + { + "epoch": 0.87354507792464, + "grad_norm": 9.348108956967192, + "learning_rate": 5e-06, + "loss": 0.1217, + "num_input_tokens_seen": 570418348, + "step": 3321 + }, + { + "epoch": 0.87354507792464, + "loss": 0.1822582334280014, + "loss_ce": 0.001411056611686945, + "loss_iou": 0.53125, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 570418348, + "step": 3321 + }, + { + "epoch": 0.8738081146840271, + "grad_norm": 10.70912004570317, + "learning_rate": 5e-06, + "loss": 0.1067, + "num_input_tokens_seen": 570590896, + "step": 3322 + }, + { + "epoch": 0.8738081146840271, + "loss": 0.1525057554244995, + "loss_ce": 0.001626854995265603, + "loss_iou": 0.439453125, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 570590896, + "step": 3322 + }, + { + "epoch": 0.8740711514434142, + "grad_norm": 5.432623634439659, + "learning_rate": 5e-06, + "loss": 0.0846, + "num_input_tokens_seen": 570763096, + "step": 3323 + }, + { + "epoch": 0.8740711514434142, + "loss": 0.10707151144742966, + "loss_ce": 0.00158750603441149, + "loss_iou": 0.392578125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 570763096, + "step": 3323 + }, + { + "epoch": 0.8743341882028013, + "grad_norm": 5.502873741651398, + "learning_rate": 5e-06, + "loss": 0.1645, + "num_input_tokens_seen": 570935100, + "step": 3324 + }, + { + "epoch": 0.8743341882028013, + "loss": 0.0433497279882431, + "loss_ce": 0.0022425525821745396, + "loss_iou": 0.625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 570935100, + "step": 3324 + }, + { + "epoch": 0.8745972249621885, + "grad_norm": 9.035108225672646, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 571107528, + "step": 3325 + }, + { + "epoch": 0.8745972249621885, + "loss": 0.11922352015972137, + "loss_ce": 0.008475230075418949, + "loss_iou": 0.490234375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 571107528, + "step": 3325 + }, + { + "epoch": 0.8748602617215756, + "grad_norm": 4.9129107081845635, + "learning_rate": 5e-06, + "loss": 0.1196, + "num_input_tokens_seen": 571279464, + "step": 3326 + }, + { + "epoch": 0.8748602617215756, + "loss": 0.09204280376434326, + "loss_ce": 0.0020769857801496983, + "loss_iou": 0.640625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 571279464, + "step": 3326 + }, + { + "epoch": 0.8751232984809627, + "grad_norm": 4.808896887234646, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 571451676, + "step": 3327 + }, + { + "epoch": 0.8751232984809627, + "loss": 0.11273814737796783, + "loss_ce": 0.00046397349797189236, + "loss_iou": 0.439453125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 571451676, + "step": 3327 + }, + { + "epoch": 0.8753863352403498, + "grad_norm": 4.264250577253754, + "learning_rate": 5e-06, + "loss": 0.0882, + "num_input_tokens_seen": 571620552, + "step": 3328 + }, + { + "epoch": 0.8753863352403498, + "loss": 0.06821378320455551, + "loss_ce": 0.0043710097670555115, + "loss_iou": 0.38671875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 571620552, + "step": 3328 + }, + { + "epoch": 0.8756493719997369, + "grad_norm": 19.986204474022973, + "learning_rate": 5e-06, + "loss": 0.1309, + "num_input_tokens_seen": 571792724, + "step": 3329 + }, + { + "epoch": 0.8756493719997369, + "loss": 0.10326668620109558, + "loss_ce": 0.002741775708273053, + "loss_iou": 0.5859375, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 571792724, + "step": 3329 + }, + { + "epoch": 0.8759124087591241, + "grad_norm": 4.794277486673457, + "learning_rate": 5e-06, + "loss": 0.1107, + "num_input_tokens_seen": 571964880, + "step": 3330 + }, + { + "epoch": 0.8759124087591241, + "loss": 0.11237187683582306, + "loss_ce": 0.00040288365562446415, + "loss_iou": 0.5703125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 571964880, + "step": 3330 + }, + { + "epoch": 0.8761754455185112, + "grad_norm": 4.388563206165435, + "learning_rate": 5e-06, + "loss": 0.1008, + "num_input_tokens_seen": 572137040, + "step": 3331 + }, + { + "epoch": 0.8761754455185112, + "loss": 0.1180691048502922, + "loss_ce": 0.003506115637719631, + "loss_iou": 0.5625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 572137040, + "step": 3331 + }, + { + "epoch": 0.8764384822778983, + "grad_norm": 4.318891449603682, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 572309112, + "step": 3332 + }, + { + "epoch": 0.8764384822778983, + "loss": 0.11371566355228424, + "loss_ce": 0.0035472088493406773, + "loss_iou": 0.51171875, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 572309112, + "step": 3332 + }, + { + "epoch": 0.8767015190372854, + "grad_norm": 10.0048154448047, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 572481700, + "step": 3333 + }, + { + "epoch": 0.8767015190372854, + "loss": 0.21858729422092438, + "loss_ce": 0.0018819711403921247, + "loss_iou": 0.42578125, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 572481700, + "step": 3333 + }, + { + "epoch": 0.8769645557966725, + "grad_norm": 20.86653328138894, + "learning_rate": 5e-06, + "loss": 0.1063, + "num_input_tokens_seen": 572653588, + "step": 3334 + }, + { + "epoch": 0.8769645557966725, + "loss": 0.1189296618103981, + "loss_ce": 0.002413547597825527, + "loss_iou": 0.392578125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 572653588, + "step": 3334 + }, + { + "epoch": 0.8772275925560598, + "grad_norm": 4.936814293101831, + "learning_rate": 5e-06, + "loss": 0.1146, + "num_input_tokens_seen": 572825664, + "step": 3335 + }, + { + "epoch": 0.8772275925560598, + "loss": 0.11313323676586151, + "loss_ce": 0.002995288698002696, + "loss_iou": 0.4765625, + "loss_num": 0.02197265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 572825664, + "step": 3335 + }, + { + "epoch": 0.8774906293154469, + "grad_norm": 45.48466563185734, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 572997764, + "step": 3336 + }, + { + "epoch": 0.8774906293154469, + "loss": 0.12880939245224, + "loss_ce": 0.0020393752492964268, + "loss_iou": 0.61328125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 572997764, + "step": 3336 + }, + { + "epoch": 0.877753666074834, + "grad_norm": 5.329772367724437, + "learning_rate": 5e-06, + "loss": 0.145, + "num_input_tokens_seen": 573167692, + "step": 3337 + }, + { + "epoch": 0.877753666074834, + "loss": 0.06320846080780029, + "loss_ce": 0.0007694964297115803, + "loss_iou": 0.4609375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 573167692, + "step": 3337 + }, + { + "epoch": 0.8780167028342211, + "grad_norm": 9.592551272557182, + "learning_rate": 5e-06, + "loss": 0.0773, + "num_input_tokens_seen": 573340028, + "step": 3338 + }, + { + "epoch": 0.8780167028342211, + "loss": 0.08143293112516403, + "loss_ce": 0.0008054894860833883, + "loss_iou": 0.57421875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 573340028, + "step": 3338 + }, + { + "epoch": 0.8782797395936082, + "grad_norm": 3.2912653441237016, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 573510240, + "step": 3339 + }, + { + "epoch": 0.8782797395936082, + "loss": 0.11927518248558044, + "loss_ce": 0.0026980361435562372, + "loss_iou": 0.4296875, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 573510240, + "step": 3339 + }, + { + "epoch": 0.8785427763529954, + "grad_norm": 6.074875569536377, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 573682136, + "step": 3340 + }, + { + "epoch": 0.8785427763529954, + "loss": 0.14973485469818115, + "loss_ce": 0.0035251472145318985, + "loss_iou": 0.53125, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 573682136, + "step": 3340 + }, + { + "epoch": 0.8788058131123825, + "grad_norm": 4.101578026279459, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 573854492, + "step": 3341 + }, + { + "epoch": 0.8788058131123825, + "loss": 0.09308157861232758, + "loss_ce": 0.0016814331756904721, + "loss_iou": 0.57421875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 573854492, + "step": 3341 + }, + { + "epoch": 0.8790688498717696, + "grad_norm": 3.5139785635989704, + "learning_rate": 5e-06, + "loss": 0.1205, + "num_input_tokens_seen": 574026312, + "step": 3342 + }, + { + "epoch": 0.8790688498717696, + "loss": 0.12709392607212067, + "loss_ce": 0.002124447375535965, + "loss_iou": 0.498046875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 574026312, + "step": 3342 + }, + { + "epoch": 0.8793318866311567, + "grad_norm": 8.712064685299605, + "learning_rate": 5e-06, + "loss": 0.1039, + "num_input_tokens_seen": 574198272, + "step": 3343 + }, + { + "epoch": 0.8793318866311567, + "loss": 0.07467342913150787, + "loss_ce": 0.0009124410571530461, + "loss_iou": 0.443359375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 574198272, + "step": 3343 + }, + { + "epoch": 0.8795949233905438, + "grad_norm": 3.320527799830495, + "learning_rate": 5e-06, + "loss": 0.0674, + "num_input_tokens_seen": 574370244, + "step": 3344 + }, + { + "epoch": 0.8795949233905438, + "loss": 0.04423138499259949, + "loss_ce": 0.00010297012340743095, + "loss_iou": 0.486328125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 574370244, + "step": 3344 + }, + { + "epoch": 0.879857960149931, + "grad_norm": 16.55083596075495, + "learning_rate": 5e-06, + "loss": 0.1285, + "num_input_tokens_seen": 574542464, + "step": 3345 + }, + { + "epoch": 0.879857960149931, + "loss": 0.05567498877644539, + "loss_ce": 0.0007433480932377279, + "loss_iou": 0.53125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 574542464, + "step": 3345 + }, + { + "epoch": 0.8801209969093181, + "grad_norm": 4.233870416516864, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 574714876, + "step": 3346 + }, + { + "epoch": 0.8801209969093181, + "loss": 0.19878937304019928, + "loss_ce": 0.0015847685281187296, + "loss_iou": 0.443359375, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 574714876, + "step": 3346 + }, + { + "epoch": 0.8803840336687052, + "grad_norm": 28.780242588246683, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 574886952, + "step": 3347 + }, + { + "epoch": 0.8803840336687052, + "loss": 0.11355656385421753, + "loss_ce": 0.0028998262714594603, + "loss_iou": 0.40625, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 574886952, + "step": 3347 + }, + { + "epoch": 0.8806470704280923, + "grad_norm": 3.1096156375201027, + "learning_rate": 5e-06, + "loss": 0.0687, + "num_input_tokens_seen": 575059096, + "step": 3348 + }, + { + "epoch": 0.8806470704280923, + "loss": 0.049926742911338806, + "loss_ce": 0.0002136088878614828, + "loss_iou": 0.4921875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 575059096, + "step": 3348 + }, + { + "epoch": 0.8809101071874794, + "grad_norm": 11.92064516300679, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 575231232, + "step": 3349 + }, + { + "epoch": 0.8809101071874794, + "loss": 0.10825909674167633, + "loss_ce": 0.00019635571516118944, + "loss_iou": 0.482421875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 575231232, + "step": 3349 + }, + { + "epoch": 0.8811731439468665, + "grad_norm": 3.9827768470010754, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 575403592, + "step": 3350 + }, + { + "epoch": 0.8811731439468665, + "loss": 0.11462774872779846, + "loss_ce": 0.005161193665117025, + "loss_iou": 0.70703125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 575403592, + "step": 3350 + }, + { + "epoch": 0.8814361807062537, + "grad_norm": 6.779641757995467, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 575574024, + "step": 3351 + }, + { + "epoch": 0.8814361807062537, + "loss": 0.08754941821098328, + "loss_ce": 0.0006353590288199484, + "loss_iou": 0.3828125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 575574024, + "step": 3351 + }, + { + "epoch": 0.8816992174656408, + "grad_norm": 6.941492265981877, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 575746304, + "step": 3352 + }, + { + "epoch": 0.8816992174656408, + "loss": 0.08934507519006729, + "loss_ce": 0.0032244701869785786, + "loss_iou": 0.40625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 575746304, + "step": 3352 + }, + { + "epoch": 0.8819622542250279, + "grad_norm": 5.409664650483643, + "learning_rate": 5e-06, + "loss": 0.1231, + "num_input_tokens_seen": 575918724, + "step": 3353 + }, + { + "epoch": 0.8819622542250279, + "loss": 0.08703694492578506, + "loss_ce": 0.0007637504604645073, + "loss_iou": 0.482421875, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 575918724, + "step": 3353 + }, + { + "epoch": 0.882225290984415, + "grad_norm": 7.600652328701811, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 576090812, + "step": 3354 + }, + { + "epoch": 0.882225290984415, + "loss": 0.25224149227142334, + "loss_ce": 0.00353849190287292, + "loss_iou": 0.44140625, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 576090812, + "step": 3354 + }, + { + "epoch": 0.8824883277438021, + "grad_norm": 8.07764479644441, + "learning_rate": 5e-06, + "loss": 0.124, + "num_input_tokens_seen": 576262592, + "step": 3355 + }, + { + "epoch": 0.8824883277438021, + "loss": 0.08115795254707336, + "loss_ce": 0.0004999967059120536, + "loss_iou": 0.5703125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 576262592, + "step": 3355 + }, + { + "epoch": 0.8827513645031894, + "grad_norm": 9.615405351973429, + "learning_rate": 5e-06, + "loss": 0.087, + "num_input_tokens_seen": 576434568, + "step": 3356 + }, + { + "epoch": 0.8827513645031894, + "loss": 0.08256545662879944, + "loss_ce": 0.0005952401552349329, + "loss_iou": 0.5703125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 576434568, + "step": 3356 + }, + { + "epoch": 0.8830144012625765, + "grad_norm": 4.944244594714697, + "learning_rate": 5e-06, + "loss": 0.1242, + "num_input_tokens_seen": 576606792, + "step": 3357 + }, + { + "epoch": 0.8830144012625765, + "loss": 0.05560879409313202, + "loss_ce": 0.00032620219280943274, + "loss_iou": 0.50390625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 576606792, + "step": 3357 + }, + { + "epoch": 0.8832774380219636, + "grad_norm": 3.905791245202154, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 576778952, + "step": 3358 + }, + { + "epoch": 0.8832774380219636, + "loss": 0.05678252875804901, + "loss_ce": 0.0012634244048967957, + "loss_iou": 0.404296875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 576778952, + "step": 3358 + }, + { + "epoch": 0.8835404747813507, + "grad_norm": 3.3297332937077506, + "learning_rate": 5e-06, + "loss": 0.0826, + "num_input_tokens_seen": 576951468, + "step": 3359 + }, + { + "epoch": 0.8835404747813507, + "loss": 0.15261146426200867, + "loss_ce": 0.0013968587154522538, + "loss_iou": 0.447265625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 576951468, + "step": 3359 + }, + { + "epoch": 0.8838035115407378, + "grad_norm": 7.82434473224169, + "learning_rate": 5e-06, + "loss": 0.0952, + "num_input_tokens_seen": 577121952, + "step": 3360 + }, + { + "epoch": 0.8838035115407378, + "loss": 0.10315507650375366, + "loss_ce": 0.0030421605333685875, + "loss_iou": 0.427734375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 577121952, + "step": 3360 + }, + { + "epoch": 0.884066548300125, + "grad_norm": 10.425535033707964, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 577293848, + "step": 3361 + }, + { + "epoch": 0.884066548300125, + "loss": 0.07322396337985992, + "loss_ce": 0.0005005778511986136, + "loss_iou": 0.546875, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 577293848, + "step": 3361 + }, + { + "epoch": 0.8843295850595121, + "grad_norm": 14.476290505686679, + "learning_rate": 5e-06, + "loss": 0.1432, + "num_input_tokens_seen": 577466084, + "step": 3362 + }, + { + "epoch": 0.8843295850595121, + "loss": 0.1955878734588623, + "loss_ce": 0.0021064176689833403, + "loss_iou": 0.5234375, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 577466084, + "step": 3362 + }, + { + "epoch": 0.8845926218188992, + "grad_norm": 7.100653555231675, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 577638416, + "step": 3363 + }, + { + "epoch": 0.8845926218188992, + "loss": 0.051397159695625305, + "loss_ce": 0.00037176761543378234, + "loss_iou": 0.462890625, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 577638416, + "step": 3363 + }, + { + "epoch": 0.8848556585782863, + "grad_norm": 17.047045722794394, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 577810576, + "step": 3364 + }, + { + "epoch": 0.8848556585782863, + "loss": 0.11024264991283417, + "loss_ce": 0.0004404087667353451, + "loss_iou": 0.482421875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 577810576, + "step": 3364 + }, + { + "epoch": 0.8851186953376734, + "grad_norm": 6.6116170819185625, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 577982704, + "step": 3365 + }, + { + "epoch": 0.8851186953376734, + "loss": 0.10162153840065002, + "loss_ce": 0.003171829041093588, + "loss_iou": 0.474609375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 577982704, + "step": 3365 + }, + { + "epoch": 0.8853817320970606, + "grad_norm": 8.013299917102648, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 578154696, + "step": 3366 + }, + { + "epoch": 0.8853817320970606, + "loss": 0.09505804628133774, + "loss_ce": 0.002620304934680462, + "loss_iou": 0.458984375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 578154696, + "step": 3366 + }, + { + "epoch": 0.8856447688564477, + "grad_norm": 34.03888154985408, + "learning_rate": 5e-06, + "loss": 0.0988, + "num_input_tokens_seen": 578326940, + "step": 3367 + }, + { + "epoch": 0.8856447688564477, + "loss": 0.10307721793651581, + "loss_ce": 0.0007899247575551271, + "loss_iou": 0.427734375, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 578326940, + "step": 3367 + }, + { + "epoch": 0.8859078056158348, + "grad_norm": 5.847556882862909, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 578499332, + "step": 3368 + }, + { + "epoch": 0.8859078056158348, + "loss": 0.14421170949935913, + "loss_ce": 0.0017556664533913136, + "loss_iou": 0.4609375, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 578499332, + "step": 3368 + }, + { + "epoch": 0.8861708423752219, + "grad_norm": 6.636470819413435, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 578671272, + "step": 3369 + }, + { + "epoch": 0.8861708423752219, + "loss": 0.128991961479187, + "loss_ce": 0.0024660732597112656, + "loss_iou": 0.421875, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 578671272, + "step": 3369 + }, + { + "epoch": 0.886433879134609, + "grad_norm": 17.54919755928276, + "learning_rate": 5e-06, + "loss": 0.1119, + "num_input_tokens_seen": 578842196, + "step": 3370 + }, + { + "epoch": 0.886433879134609, + "loss": 0.10547197610139847, + "loss_ce": 0.0005067643942311406, + "loss_iou": 0.5078125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 578842196, + "step": 3370 + }, + { + "epoch": 0.8866969158939962, + "grad_norm": 7.184938374175427, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 579014236, + "step": 3371 + }, + { + "epoch": 0.8866969158939962, + "loss": 0.08108609914779663, + "loss_ce": 0.0009774556383490562, + "loss_iou": 0.439453125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 579014236, + "step": 3371 + }, + { + "epoch": 0.8869599526533833, + "grad_norm": 6.863755671185031, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 579186596, + "step": 3372 + }, + { + "epoch": 0.8869599526533833, + "loss": 0.1200333908200264, + "loss_ce": 0.00034345051972195506, + "loss_iou": 0.47265625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 579186596, + "step": 3372 + }, + { + "epoch": 0.8872229894127704, + "grad_norm": 7.255619753434164, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 579358828, + "step": 3373 + }, + { + "epoch": 0.8872229894127704, + "loss": 0.11177671700716019, + "loss_ce": 0.0016998156206682324, + "loss_iou": 0.46484375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 579358828, + "step": 3373 + }, + { + "epoch": 0.8874860261721575, + "grad_norm": 4.645601347618675, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 579531144, + "step": 3374 + }, + { + "epoch": 0.8874860261721575, + "loss": 0.13369636237621307, + "loss_ce": 0.0010364485206082463, + "loss_iou": 0.59765625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 579531144, + "step": 3374 + }, + { + "epoch": 0.8877490629315447, + "grad_norm": 19.381762342400947, + "learning_rate": 5e-06, + "loss": 0.1611, + "num_input_tokens_seen": 579703288, + "step": 3375 + }, + { + "epoch": 0.8877490629315447, + "loss": 0.15896877646446228, + "loss_ce": 0.0007961708470247686, + "loss_iou": 0.2578125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 579703288, + "step": 3375 + }, + { + "epoch": 0.8880120996909318, + "grad_norm": 4.08558956843784, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 579875620, + "step": 3376 + }, + { + "epoch": 0.8880120996909318, + "loss": 0.06248597800731659, + "loss_ce": 0.0004437366151250899, + "loss_iou": 0.58203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 579875620, + "step": 3376 + }, + { + "epoch": 0.888275136450319, + "grad_norm": 4.308623737664192, + "learning_rate": 5e-06, + "loss": 0.0812, + "num_input_tokens_seen": 580047776, + "step": 3377 + }, + { + "epoch": 0.888275136450319, + "loss": 0.07517598569393158, + "loss_ce": 0.0003468855866231024, + "loss_iou": 0.48046875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 580047776, + "step": 3377 + }, + { + "epoch": 0.8885381732097061, + "grad_norm": 17.189878667773364, + "learning_rate": 5e-06, + "loss": 0.1126, + "num_input_tokens_seen": 580219696, + "step": 3378 + }, + { + "epoch": 0.8885381732097061, + "loss": 0.1588488519191742, + "loss_ce": 0.00040158609044738114, + "loss_iou": 0.46875, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 580219696, + "step": 3378 + }, + { + "epoch": 0.8888012099690932, + "grad_norm": 17.94232408746683, + "learning_rate": 5e-06, + "loss": 0.1435, + "num_input_tokens_seen": 580389856, + "step": 3379 + }, + { + "epoch": 0.8888012099690932, + "loss": 0.16451287269592285, + "loss_ce": 0.002464542631059885, + "loss_iou": 0.255859375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 580389856, + "step": 3379 + }, + { + "epoch": 0.8890642467284803, + "grad_norm": 24.21781414429754, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 580562140, + "step": 3380 + }, + { + "epoch": 0.8890642467284803, + "loss": 0.15492644906044006, + "loss_ce": 0.0021554557606577873, + "loss_iou": 0.515625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 580562140, + "step": 3380 + }, + { + "epoch": 0.8893272834878674, + "grad_norm": 10.546114100067816, + "learning_rate": 5e-06, + "loss": 0.1532, + "num_input_tokens_seen": 580734572, + "step": 3381 + }, + { + "epoch": 0.8893272834878674, + "loss": 0.10413020849227905, + "loss_ce": 0.0011639007134363055, + "loss_iou": 0.4453125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 580734572, + "step": 3381 + }, + { + "epoch": 0.8895903202472546, + "grad_norm": 6.233208405285995, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 580906660, + "step": 3382 + }, + { + "epoch": 0.8895903202472546, + "loss": 0.14456871151924133, + "loss_ce": 0.0018837791867554188, + "loss_iou": 0.53515625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 580906660, + "step": 3382 + }, + { + "epoch": 0.8898533570066417, + "grad_norm": 3.9495859544374516, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 581078540, + "step": 3383 + }, + { + "epoch": 0.8898533570066417, + "loss": 0.13160666823387146, + "loss_ce": 0.0018764439737424254, + "loss_iou": 0.25390625, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 581078540, + "step": 3383 + }, + { + "epoch": 0.8901163937660288, + "grad_norm": 4.909648559933354, + "learning_rate": 5e-06, + "loss": 0.0663, + "num_input_tokens_seen": 581247384, + "step": 3384 + }, + { + "epoch": 0.8901163937660288, + "loss": 0.06679339706897736, + "loss_ce": 0.00032610760536044836, + "loss_iou": 0.58984375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 581247384, + "step": 3384 + }, + { + "epoch": 0.8903794305254159, + "grad_norm": 4.667075414121075, + "learning_rate": 5e-06, + "loss": 0.1195, + "num_input_tokens_seen": 581419360, + "step": 3385 + }, + { + "epoch": 0.8903794305254159, + "loss": 0.1276930421590805, + "loss_ce": 0.0018690668512135744, + "loss_iou": 0.5, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 581419360, + "step": 3385 + }, + { + "epoch": 0.890642467284803, + "grad_norm": 4.483616610873386, + "learning_rate": 5e-06, + "loss": 0.0794, + "num_input_tokens_seen": 581589952, + "step": 3386 + }, + { + "epoch": 0.890642467284803, + "loss": 0.047724828124046326, + "loss_ce": 0.00011740828631445765, + "loss_iou": 0.50390625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 581589952, + "step": 3386 + }, + { + "epoch": 0.8909055040441902, + "grad_norm": 21.859242645746537, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 581762156, + "step": 3387 + }, + { + "epoch": 0.8909055040441902, + "loss": 0.18419209122657776, + "loss_ce": 0.0005067941965535283, + "loss_iou": 0.33984375, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 581762156, + "step": 3387 + }, + { + "epoch": 0.8911685408035773, + "grad_norm": 8.665372364747144, + "learning_rate": 5e-06, + "loss": 0.0776, + "num_input_tokens_seen": 581934152, + "step": 3388 + }, + { + "epoch": 0.8911685408035773, + "loss": 0.08677740395069122, + "loss_ce": 0.00022955110762268305, + "loss_iou": 0.50390625, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 581934152, + "step": 3388 + }, + { + "epoch": 0.8914315775629644, + "grad_norm": 4.1210884311067835, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 582106144, + "step": 3389 + }, + { + "epoch": 0.8914315775629644, + "loss": 0.13846494257450104, + "loss_ce": 0.0010137634817510843, + "loss_iou": 0.53125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 582106144, + "step": 3389 + }, + { + "epoch": 0.8916946143223515, + "grad_norm": 11.500479931217354, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 582278400, + "step": 3390 + }, + { + "epoch": 0.8916946143223515, + "loss": 0.12132581323385239, + "loss_ce": 0.002826055744662881, + "loss_iou": 0.5859375, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 582278400, + "step": 3390 + }, + { + "epoch": 0.8919576510817386, + "grad_norm": 4.055853466494655, + "learning_rate": 5e-06, + "loss": 0.094, + "num_input_tokens_seen": 582450564, + "step": 3391 + }, + { + "epoch": 0.8919576510817386, + "loss": 0.0762665793299675, + "loss_ce": 0.0018952443497255445, + "loss_iou": 0.4453125, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 582450564, + "step": 3391 + }, + { + "epoch": 0.8922206878411258, + "grad_norm": 8.665290308457982, + "learning_rate": 5e-06, + "loss": 0.1485, + "num_input_tokens_seen": 582622336, + "step": 3392 + }, + { + "epoch": 0.8922206878411258, + "loss": 0.16068054735660553, + "loss_ce": 0.0011956822127103806, + "loss_iou": 0.546875, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 582622336, + "step": 3392 + }, + { + "epoch": 0.892483724600513, + "grad_norm": 14.955097188866974, + "learning_rate": 5e-06, + "loss": 0.1142, + "num_input_tokens_seen": 582794384, + "step": 3393 + }, + { + "epoch": 0.892483724600513, + "loss": 0.08523661643266678, + "loss_ce": 0.0010691368952393532, + "loss_iou": 0.494140625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 582794384, + "step": 3393 + }, + { + "epoch": 0.8927467613599, + "grad_norm": 14.050392295123078, + "learning_rate": 5e-06, + "loss": 0.0769, + "num_input_tokens_seen": 582966476, + "step": 3394 + }, + { + "epoch": 0.8927467613599, + "loss": 0.11920854449272156, + "loss_ce": 0.0003883557510562241, + "loss_iou": 0.5234375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 582966476, + "step": 3394 + }, + { + "epoch": 0.8930097981192872, + "grad_norm": 3.5189407633589225, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 583138656, + "step": 3395 + }, + { + "epoch": 0.8930097981192872, + "loss": 0.23967677354812622, + "loss_ce": 0.0011361411307007074, + "loss_iou": 0.27734375, + "loss_num": 0.0478515625, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 583138656, + "step": 3395 + }, + { + "epoch": 0.8932728348786743, + "grad_norm": 4.239345260315017, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 583310580, + "step": 3396 + }, + { + "epoch": 0.8932728348786743, + "loss": 0.045496270060539246, + "loss_ce": 0.00010137087519979104, + "loss_iou": 0.58203125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 583310580, + "step": 3396 + }, + { + "epoch": 0.8935358716380615, + "grad_norm": 4.318293293535835, + "learning_rate": 5e-06, + "loss": 0.1154, + "num_input_tokens_seen": 583482984, + "step": 3397 + }, + { + "epoch": 0.8935358716380615, + "loss": 0.11194127053022385, + "loss_ce": 0.000826766830869019, + "loss_iou": 0.44140625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 583482984, + "step": 3397 + }, + { + "epoch": 0.8937989083974486, + "grad_norm": 4.660547370469876, + "learning_rate": 5e-06, + "loss": 0.15, + "num_input_tokens_seen": 583655292, + "step": 3398 + }, + { + "epoch": 0.8937989083974486, + "loss": 0.07680627703666687, + "loss_ce": 0.00017663151083979756, + "loss_iou": 0.451171875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 583655292, + "step": 3398 + }, + { + "epoch": 0.8940619451568357, + "grad_norm": 10.828455614089098, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 583827408, + "step": 3399 + }, + { + "epoch": 0.8940619451568357, + "loss": 0.1173757016658783, + "loss_ce": 3.5622346331365407e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 583827408, + "step": 3399 + }, + { + "epoch": 0.8943249819162228, + "grad_norm": 5.042018374187092, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 583999664, + "step": 3400 + }, + { + "epoch": 0.8943249819162228, + "loss": 0.061997972428798676, + "loss_ce": 0.000367723434465006, + "loss_iou": 0.48046875, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 583999664, + "step": 3400 + }, + { + "epoch": 0.8945880186756099, + "grad_norm": 26.304936486213094, + "learning_rate": 5e-06, + "loss": 0.1601, + "num_input_tokens_seen": 584171616, + "step": 3401 + }, + { + "epoch": 0.8945880186756099, + "loss": 0.2535288333892822, + "loss_ce": 0.004078162834048271, + "loss_iou": 0.34765625, + "loss_num": 0.0498046875, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 584171616, + "step": 3401 + }, + { + "epoch": 0.894851055434997, + "grad_norm": 4.926581765464391, + "learning_rate": 5e-06, + "loss": 0.1067, + "num_input_tokens_seen": 584343844, + "step": 3402 + }, + { + "epoch": 0.894851055434997, + "loss": 0.11322697252035141, + "loss_ce": 0.0008307351381517947, + "loss_iou": NaN, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 584343844, + "step": 3402 + }, + { + "epoch": 0.8951140921943842, + "grad_norm": 7.363528943726032, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 584516156, + "step": 3403 + }, + { + "epoch": 0.8951140921943842, + "loss": 0.08316925168037415, + "loss_ce": 0.002114569302648306, + "loss_iou": 0.37109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 584516156, + "step": 3403 + }, + { + "epoch": 0.8953771289537713, + "grad_norm": 4.344737704222575, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 584688680, + "step": 3404 + }, + { + "epoch": 0.8953771289537713, + "loss": 0.061312295496463776, + "loss_ce": 0.0012231803266331553, + "loss_iou": 0.58203125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 584688680, + "step": 3404 + }, + { + "epoch": 0.8956401657131584, + "grad_norm": 9.026057514105759, + "learning_rate": 5e-06, + "loss": 0.1616, + "num_input_tokens_seen": 584860880, + "step": 3405 + }, + { + "epoch": 0.8956401657131584, + "loss": 0.09903927892446518, + "loss_ce": 0.00197811983525753, + "loss_iou": 0.52734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 584860880, + "step": 3405 + }, + { + "epoch": 0.8959032024725455, + "grad_norm": 9.19169137185048, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 585032800, + "step": 3406 + }, + { + "epoch": 0.8959032024725455, + "loss": 0.06624776124954224, + "loss_ce": 0.00032979153911583126, + "loss_iou": 0.443359375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 585032800, + "step": 3406 + }, + { + "epoch": 0.8961662392319326, + "grad_norm": 9.011492701681583, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 585203264, + "step": 3407 + }, + { + "epoch": 0.8961662392319326, + "loss": 0.21042554080486298, + "loss_ce": 0.002722906181588769, + "loss_iou": 0.40234375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 585203264, + "step": 3407 + }, + { + "epoch": 0.8964292759913198, + "grad_norm": 17.979008103100615, + "learning_rate": 5e-06, + "loss": 0.093, + "num_input_tokens_seen": 585375592, + "step": 3408 + }, + { + "epoch": 0.8964292759913198, + "loss": 0.041413549333810806, + "loss_ce": 0.0023205317556858063, + "loss_iou": 0.62890625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 585375592, + "step": 3408 + }, + { + "epoch": 0.8966923127507069, + "grad_norm": 10.115194008381021, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 585546216, + "step": 3409 + }, + { + "epoch": 0.8966923127507069, + "loss": 0.15392959117889404, + "loss_ce": 0.0011891128960996866, + "loss_iou": 0.447265625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 585546216, + "step": 3409 + }, + { + "epoch": 0.896955349510094, + "grad_norm": 7.995711146610571, + "learning_rate": 5e-06, + "loss": 0.1232, + "num_input_tokens_seen": 585718304, + "step": 3410 + }, + { + "epoch": 0.896955349510094, + "loss": 0.05361251160502434, + "loss_ce": 0.0008171012159436941, + "loss_iou": 0.4921875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 585718304, + "step": 3410 + }, + { + "epoch": 0.8972183862694811, + "grad_norm": 4.960227305854337, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 585890728, + "step": 3411 + }, + { + "epoch": 0.8972183862694811, + "loss": 0.08083316683769226, + "loss_ce": 0.0013043570797890425, + "loss_iou": 0.455078125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 585890728, + "step": 3411 + }, + { + "epoch": 0.8974814230288682, + "grad_norm": 5.883674464139159, + "learning_rate": 5e-06, + "loss": 0.1462, + "num_input_tokens_seen": 586062896, + "step": 3412 + }, + { + "epoch": 0.8974814230288682, + "loss": 0.06225815415382385, + "loss_ce": 0.003023534081876278, + "loss_iou": 0.515625, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 586062896, + "step": 3412 + }, + { + "epoch": 0.8977444597882555, + "grad_norm": 5.4958687199216625, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 586235628, + "step": 3413 + }, + { + "epoch": 0.8977444597882555, + "loss": 0.08900677412748337, + "loss_ce": 0.0012687351554632187, + "loss_iou": 0.50390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 586235628, + "step": 3413 + }, + { + "epoch": 0.8980074965476426, + "grad_norm": 6.158695953111894, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 586407736, + "step": 3414 + }, + { + "epoch": 0.8980074965476426, + "loss": 0.13948456943035126, + "loss_ce": 0.0007364002522081137, + "loss_iou": 0.5625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 586407736, + "step": 3414 + }, + { + "epoch": 0.8982705333070297, + "grad_norm": 6.799680477506266, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 586579816, + "step": 3415 + }, + { + "epoch": 0.8982705333070297, + "loss": 0.1316445767879486, + "loss_ce": 0.0035775681026279926, + "loss_iou": 0.447265625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 586579816, + "step": 3415 + }, + { + "epoch": 0.8985335700664168, + "grad_norm": 4.757727392184302, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 586750092, + "step": 3416 + }, + { + "epoch": 0.8985335700664168, + "loss": 0.1803445667028427, + "loss_ce": 0.0017557005630806088, + "loss_iou": 0.30078125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 586750092, + "step": 3416 + }, + { + "epoch": 0.8987966068258039, + "grad_norm": 4.7130808920215825, + "learning_rate": 5e-06, + "loss": 0.0988, + "num_input_tokens_seen": 586920724, + "step": 3417 + }, + { + "epoch": 0.8987966068258039, + "loss": 0.04298722371459007, + "loss_ce": 0.0010102951200678945, + "loss_iou": 0.39453125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 586920724, + "step": 3417 + }, + { + "epoch": 0.8990596435851911, + "grad_norm": 9.716073099163628, + "learning_rate": 5e-06, + "loss": 0.0909, + "num_input_tokens_seen": 587093308, + "step": 3418 + }, + { + "epoch": 0.8990596435851911, + "loss": 0.11212408542633057, + "loss_ce": 0.0025659759994596243, + "loss_iou": 0.78125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 587093308, + "step": 3418 + }, + { + "epoch": 0.8993226803445782, + "grad_norm": 9.72495754411886, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 587265328, + "step": 3419 + }, + { + "epoch": 0.8993226803445782, + "loss": 0.09303727746009827, + "loss_ce": 0.0017286788206547499, + "loss_iou": 0.53125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 587265328, + "step": 3419 + }, + { + "epoch": 0.8995857171039653, + "grad_norm": 27.58546147063481, + "learning_rate": 5e-06, + "loss": 0.1334, + "num_input_tokens_seen": 587435704, + "step": 3420 + }, + { + "epoch": 0.8995857171039653, + "loss": 0.20503398776054382, + "loss_ce": 0.0012986233923584223, + "loss_iou": 0.28515625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 587435704, + "step": 3420 + }, + { + "epoch": 0.8998487538633524, + "grad_norm": 3.780121082337908, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 587605756, + "step": 3421 + }, + { + "epoch": 0.8998487538633524, + "loss": 0.09629541635513306, + "loss_ce": 0.0002413361653452739, + "loss_iou": 0.435546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 587605756, + "step": 3421 + }, + { + "epoch": 0.9001117906227395, + "grad_norm": 4.905276205162047, + "learning_rate": 5e-06, + "loss": 0.1407, + "num_input_tokens_seen": 587776128, + "step": 3422 + }, + { + "epoch": 0.9001117906227395, + "loss": 0.22563423216342926, + "loss_ce": 0.003191609401255846, + "loss_iou": 0.462890625, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 587776128, + "step": 3422 + }, + { + "epoch": 0.9003748273821266, + "grad_norm": 4.24824425523058, + "learning_rate": 5e-06, + "loss": 0.0902, + "num_input_tokens_seen": 587948348, + "step": 3423 + }, + { + "epoch": 0.9003748273821266, + "loss": 0.04901735112071037, + "loss_ce": 0.0007690612110309303, + "loss_iou": 0.5, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 587948348, + "step": 3423 + }, + { + "epoch": 0.9006378641415138, + "grad_norm": 14.901804223286693, + "learning_rate": 5e-06, + "loss": 0.0734, + "num_input_tokens_seen": 588115160, + "step": 3424 + }, + { + "epoch": 0.9006378641415138, + "loss": 0.0699070394039154, + "loss_ce": 0.001684993039816618, + "loss_iou": 0.515625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 588115160, + "step": 3424 + }, + { + "epoch": 0.9009009009009009, + "grad_norm": 4.750314737648371, + "learning_rate": 5e-06, + "loss": 0.0881, + "num_input_tokens_seen": 588287148, + "step": 3425 + }, + { + "epoch": 0.9009009009009009, + "loss": 0.07479090988636017, + "loss_ce": 0.006248427089303732, + "loss_iou": 0.498046875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 588287148, + "step": 3425 + }, + { + "epoch": 0.901163937660288, + "grad_norm": 3.9857635354375183, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 588457468, + "step": 3426 + }, + { + "epoch": 0.901163937660288, + "loss": 0.04558904469013214, + "loss_ce": 0.0004535481857601553, + "loss_iou": 0.5546875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 588457468, + "step": 3426 + }, + { + "epoch": 0.9014269744196751, + "grad_norm": 7.929685479499518, + "learning_rate": 5e-06, + "loss": 0.1174, + "num_input_tokens_seen": 588627832, + "step": 3427 + }, + { + "epoch": 0.9014269744196751, + "loss": 0.11351916939020157, + "loss_ce": 0.00022265504230745137, + "loss_iou": 0.41796875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 588627832, + "step": 3427 + }, + { + "epoch": 0.9016900111790622, + "grad_norm": 5.274686939603064, + "learning_rate": 5e-06, + "loss": 0.1085, + "num_input_tokens_seen": 588797844, + "step": 3428 + }, + { + "epoch": 0.9016900111790622, + "loss": 0.07151903212070465, + "loss_ce": 0.004655018448829651, + "loss_iou": 0.2314453125, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 588797844, + "step": 3428 + }, + { + "epoch": 0.9019530479384494, + "grad_norm": 5.241669176622665, + "learning_rate": 5e-06, + "loss": 0.1302, + "num_input_tokens_seen": 588966836, + "step": 3429 + }, + { + "epoch": 0.9019530479384494, + "loss": 0.12862293422222137, + "loss_ce": 0.0008153152884915471, + "loss_iou": 0.5546875, + "loss_num": 0.0255126953125, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 588966836, + "step": 3429 + }, + { + "epoch": 0.9022160846978365, + "grad_norm": 7.584585654330743, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 589136916, + "step": 3430 + }, + { + "epoch": 0.9022160846978365, + "loss": 0.1643849015235901, + "loss_ce": 0.0007801597821526229, + "loss_iou": 0.4609375, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 589136916, + "step": 3430 + }, + { + "epoch": 0.9024791214572236, + "grad_norm": 3.532701818396997, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 589309164, + "step": 3431 + }, + { + "epoch": 0.9024791214572236, + "loss": 0.077778160572052, + "loss_ce": 0.0013621454127132893, + "loss_iou": 0.48046875, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 589309164, + "step": 3431 + }, + { + "epoch": 0.9027421582166107, + "grad_norm": 7.674994076422982, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 589481132, + "step": 3432 + }, + { + "epoch": 0.9027421582166107, + "loss": 0.09501226991415024, + "loss_ce": 0.0008197662536986172, + "loss_iou": 0.404296875, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 589481132, + "step": 3432 + }, + { + "epoch": 0.9030051949759978, + "grad_norm": 6.592301292301914, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 589649864, + "step": 3433 + }, + { + "epoch": 0.9030051949759978, + "loss": 0.06211673840880394, + "loss_ce": 0.0013867560774087906, + "loss_iou": 0.48828125, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 589649864, + "step": 3433 + }, + { + "epoch": 0.9032682317353851, + "grad_norm": 5.45319787734761, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 589822176, + "step": 3434 + }, + { + "epoch": 0.9032682317353851, + "loss": 0.06987213343381882, + "loss_ce": 0.0025808701757341623, + "loss_iou": 0.58984375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 589822176, + "step": 3434 + }, + { + "epoch": 0.9035312684947722, + "grad_norm": 35.99527436418574, + "learning_rate": 5e-06, + "loss": 0.1639, + "num_input_tokens_seen": 589992448, + "step": 3435 + }, + { + "epoch": 0.9035312684947722, + "loss": 0.2300114631652832, + "loss_ce": 0.0013890261761844158, + "loss_iou": 0.66796875, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 589992448, + "step": 3435 + }, + { + "epoch": 0.9037943052541593, + "grad_norm": 8.340774703127748, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 590162576, + "step": 3436 + }, + { + "epoch": 0.9037943052541593, + "loss": 0.09980174899101257, + "loss_ce": 0.001687736832536757, + "loss_iou": 0.478515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 590162576, + "step": 3436 + }, + { + "epoch": 0.9040573420135464, + "grad_norm": 3.893496744409823, + "learning_rate": 5e-06, + "loss": 0.1361, + "num_input_tokens_seen": 590334768, + "step": 3437 + }, + { + "epoch": 0.9040573420135464, + "loss": 0.14963126182556152, + "loss_ce": 0.002658609300851822, + "loss_iou": 0.443359375, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 590334768, + "step": 3437 + }, + { + "epoch": 0.9043203787729335, + "grad_norm": 4.512105245241041, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 590506964, + "step": 3438 + }, + { + "epoch": 0.9043203787729335, + "loss": 0.2095954418182373, + "loss_ce": 0.0010688342154026031, + "loss_iou": 0.423828125, + "loss_num": 0.041748046875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 590506964, + "step": 3438 + }, + { + "epoch": 0.9045834155323207, + "grad_norm": 10.79594707291913, + "learning_rate": 5e-06, + "loss": 0.1262, + "num_input_tokens_seen": 590679132, + "step": 3439 + }, + { + "epoch": 0.9045834155323207, + "loss": 0.056604690849781036, + "loss_ce": 0.0010932188015431166, + "loss_iou": 0.470703125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 590679132, + "step": 3439 + }, + { + "epoch": 0.9048464522917078, + "grad_norm": 7.232598771359226, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 590851488, + "step": 3440 + }, + { + "epoch": 0.9048464522917078, + "loss": 0.07088696211576462, + "loss_ce": 0.0023749994579702616, + "loss_iou": 0.396484375, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 590851488, + "step": 3440 + }, + { + "epoch": 0.9051094890510949, + "grad_norm": 5.761072356819141, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 591023672, + "step": 3441 + }, + { + "epoch": 0.9051094890510949, + "loss": 0.05779781565070152, + "loss_ce": 0.002149013802409172, + "loss_iou": 0.48828125, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 591023672, + "step": 3441 + }, + { + "epoch": 0.905372525810482, + "grad_norm": 5.464589960258587, + "learning_rate": 5e-06, + "loss": 0.0756, + "num_input_tokens_seen": 591196064, + "step": 3442 + }, + { + "epoch": 0.905372525810482, + "loss": 0.11280819773674011, + "loss_ce": 0.004058802034705877, + "loss_iou": 0.392578125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 591196064, + "step": 3442 + }, + { + "epoch": 0.9056355625698691, + "grad_norm": 10.0338321043045, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 591368108, + "step": 3443 + }, + { + "epoch": 0.9056355625698691, + "loss": 0.11732570827007294, + "loss_ce": 0.0005044231074862182, + "loss_iou": 0.3671875, + "loss_num": 0.0234375, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 591368108, + "step": 3443 + }, + { + "epoch": 0.9058985993292563, + "grad_norm": 4.9322678909564805, + "learning_rate": 5e-06, + "loss": 0.1497, + "num_input_tokens_seen": 591539968, + "step": 3444 + }, + { + "epoch": 0.9058985993292563, + "loss": 0.10426676273345947, + "loss_ce": 0.0025516818277537823, + "loss_iou": 0.478515625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 591539968, + "step": 3444 + }, + { + "epoch": 0.9061616360886434, + "grad_norm": 10.20363728877728, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 591709020, + "step": 3445 + }, + { + "epoch": 0.9061616360886434, + "loss": 0.049313947558403015, + "loss_ce": 0.0002569416828919202, + "loss_iou": 0.515625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 591709020, + "step": 3445 + }, + { + "epoch": 0.9064246728480305, + "grad_norm": 8.985092426433443, + "learning_rate": 5e-06, + "loss": 0.1115, + "num_input_tokens_seen": 591881092, + "step": 3446 + }, + { + "epoch": 0.9064246728480305, + "loss": 0.0865587666630745, + "loss_ce": 0.0004686739994212985, + "loss_iou": 0.462890625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 591881092, + "step": 3446 + }, + { + "epoch": 0.9066877096074176, + "grad_norm": 27.73232811749845, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 592053524, + "step": 3447 + }, + { + "epoch": 0.9066877096074176, + "loss": 0.10780518501996994, + "loss_ce": 0.0006884862086735666, + "loss_iou": 0.5, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 592053524, + "step": 3447 + }, + { + "epoch": 0.9069507463668047, + "grad_norm": 11.824522545760333, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 592225468, + "step": 3448 + }, + { + "epoch": 0.9069507463668047, + "loss": 0.0678405836224556, + "loss_ce": 0.002670294838026166, + "loss_iou": 0.546875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 592225468, + "step": 3448 + }, + { + "epoch": 0.9072137831261918, + "grad_norm": 5.549108113367569, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 592397784, + "step": 3449 + }, + { + "epoch": 0.9072137831261918, + "loss": 0.06828893721103668, + "loss_ce": 0.0015164725482463837, + "loss_iou": 0.482421875, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 592397784, + "step": 3449 + }, + { + "epoch": 0.907476819885579, + "grad_norm": 34.02061333154032, + "learning_rate": 5e-06, + "loss": 0.1082, + "num_input_tokens_seen": 592569716, + "step": 3450 + }, + { + "epoch": 0.907476819885579, + "loss": 0.18558475375175476, + "loss_ce": 0.002723418176174164, + "loss_iou": 0.4296875, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 592569716, + "step": 3450 + }, + { + "epoch": 0.9077398566449661, + "grad_norm": 8.419964301658604, + "learning_rate": 5e-06, + "loss": 0.1118, + "num_input_tokens_seen": 592742332, + "step": 3451 + }, + { + "epoch": 0.9077398566449661, + "loss": 0.10704197734594345, + "loss_ce": 0.0033432499039918184, + "loss_iou": 0.447265625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 592742332, + "step": 3451 + }, + { + "epoch": 0.9080028934043533, + "grad_norm": 6.543315599280218, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 592914548, + "step": 3452 + }, + { + "epoch": 0.9080028934043533, + "loss": 0.1184120774269104, + "loss_ce": 0.00665670819580555, + "loss_iou": 0.482421875, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 592914548, + "step": 3452 + }, + { + "epoch": 0.9082659301637404, + "grad_norm": 17.663325985728193, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 593086732, + "step": 3453 + }, + { + "epoch": 0.9082659301637404, + "loss": 0.10038851201534271, + "loss_ce": 0.0001993027253774926, + "loss_iou": 0.48046875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 593086732, + "step": 3453 + }, + { + "epoch": 0.9085289669231275, + "grad_norm": 4.446508836333715, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 593257004, + "step": 3454 + }, + { + "epoch": 0.9085289669231275, + "loss": 0.10031658411026001, + "loss_ce": 0.0005546216852962971, + "loss_iou": 0.51171875, + "loss_num": 0.02001953125, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 593257004, + "step": 3454 + }, + { + "epoch": 0.9087920036825147, + "grad_norm": 33.88108016172298, + "learning_rate": 5e-06, + "loss": 0.1243, + "num_input_tokens_seen": 593429128, + "step": 3455 + }, + { + "epoch": 0.9087920036825147, + "loss": 0.08902574330568314, + "loss_ce": 0.0005247698863968253, + "loss_iou": 0.326171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 593429128, + "step": 3455 + }, + { + "epoch": 0.9090550404419018, + "grad_norm": 5.647244857793579, + "learning_rate": 5e-06, + "loss": 0.1055, + "num_input_tokens_seen": 593601516, + "step": 3456 + }, + { + "epoch": 0.9090550404419018, + "loss": 0.05231146514415741, + "loss_ce": 0.001621769741177559, + "loss_iou": 0.42578125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 593601516, + "step": 3456 + }, + { + "epoch": 0.9093180772012889, + "grad_norm": 9.778948998175075, + "learning_rate": 5e-06, + "loss": 0.1366, + "num_input_tokens_seen": 593773792, + "step": 3457 + }, + { + "epoch": 0.9093180772012889, + "loss": 0.09074349701404572, + "loss_ce": 0.0006250919541344047, + "loss_iou": 0.3359375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 593773792, + "step": 3457 + }, + { + "epoch": 0.909581113960676, + "grad_norm": 8.900238633139468, + "learning_rate": 5e-06, + "loss": 0.0842, + "num_input_tokens_seen": 593945832, + "step": 3458 + }, + { + "epoch": 0.909581113960676, + "loss": 0.10212016105651855, + "loss_ce": 0.0015037069097161293, + "loss_iou": 0.515625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 593945832, + "step": 3458 + }, + { + "epoch": 0.9098441507200631, + "grad_norm": 5.072537904030542, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 594118188, + "step": 3459 + }, + { + "epoch": 0.9098441507200631, + "loss": 0.051810335367918015, + "loss_ce": 0.0029516899958252907, + "loss_iou": 0.41796875, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 594118188, + "step": 3459 + }, + { + "epoch": 0.9101071874794503, + "grad_norm": 4.7625991934389305, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 594290712, + "step": 3460 + }, + { + "epoch": 0.9101071874794503, + "loss": 0.07667580991983414, + "loss_ce": 0.00038186419988051057, + "loss_iou": 0.49609375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 594290712, + "step": 3460 + }, + { + "epoch": 0.9103702242388374, + "grad_norm": 10.907454103811546, + "learning_rate": 5e-06, + "loss": 0.1191, + "num_input_tokens_seen": 594461168, + "step": 3461 + }, + { + "epoch": 0.9103702242388374, + "loss": 0.07658274471759796, + "loss_ce": 0.00019724905723705888, + "loss_iou": 0.455078125, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 594461168, + "step": 3461 + }, + { + "epoch": 0.9106332609982245, + "grad_norm": 6.097275668896845, + "learning_rate": 5e-06, + "loss": 0.1622, + "num_input_tokens_seen": 594633236, + "step": 3462 + }, + { + "epoch": 0.9106332609982245, + "loss": 0.16042682528495789, + "loss_ce": 0.0004689389606937766, + "loss_iou": 0.43359375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 594633236, + "step": 3462 + }, + { + "epoch": 0.9108962977576116, + "grad_norm": 4.28871295829322, + "learning_rate": 5e-06, + "loss": 0.1378, + "num_input_tokens_seen": 594805240, + "step": 3463 + }, + { + "epoch": 0.9108962977576116, + "loss": 0.10126248002052307, + "loss_ce": 0.0024465657770633698, + "loss_iou": 0.3828125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 594805240, + "step": 3463 + }, + { + "epoch": 0.9111593345169987, + "grad_norm": 29.617769917448427, + "learning_rate": 5e-06, + "loss": 0.1396, + "num_input_tokens_seen": 594977116, + "step": 3464 + }, + { + "epoch": 0.9111593345169987, + "loss": 0.08838079869747162, + "loss_ce": 0.0014514753129333258, + "loss_iou": 0.421875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 594977116, + "step": 3464 + }, + { + "epoch": 0.9114223712763859, + "grad_norm": 12.912626025390432, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 595148996, + "step": 3465 + }, + { + "epoch": 0.9114223712763859, + "loss": 0.05163312330842018, + "loss_ce": 0.0015690373256802559, + "loss_iou": 0.55859375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 595148996, + "step": 3465 + }, + { + "epoch": 0.911685408035773, + "grad_norm": 3.7162474056368, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 595321500, + "step": 3466 + }, + { + "epoch": 0.911685408035773, + "loss": 0.12145687639713287, + "loss_ce": 0.0029113469645380974, + "loss_iou": 0.3828125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 595321500, + "step": 3466 + }, + { + "epoch": 0.9119484447951601, + "grad_norm": 26.611038275341258, + "learning_rate": 5e-06, + "loss": 0.0815, + "num_input_tokens_seen": 595493996, + "step": 3467 + }, + { + "epoch": 0.9119484447951601, + "loss": 0.08623991906642914, + "loss_ce": 8.879496454028413e-05, + "loss_iou": 0.369140625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 595493996, + "step": 3467 + }, + { + "epoch": 0.9122114815545472, + "grad_norm": 3.8019745447441835, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 595666232, + "step": 3468 + }, + { + "epoch": 0.9122114815545472, + "loss": 0.10731638222932816, + "loss_ce": 0.0009015857940539718, + "loss_iou": 0.546875, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 595666232, + "step": 3468 + }, + { + "epoch": 0.9124745183139343, + "grad_norm": 10.09490833472444, + "learning_rate": 5e-06, + "loss": 0.126, + "num_input_tokens_seen": 595838532, + "step": 3469 + }, + { + "epoch": 0.9124745183139343, + "loss": 0.16073833405971527, + "loss_ce": 0.0017417498165741563, + "loss_iou": 0.58984375, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 595838532, + "step": 3469 + }, + { + "epoch": 0.9127375550733215, + "grad_norm": 2.9635391739801356, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 596010728, + "step": 3470 + }, + { + "epoch": 0.9127375550733215, + "loss": 0.09987487643957138, + "loss_ce": 0.0005096413660794497, + "loss_iou": 0.5625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 596010728, + "step": 3470 + }, + { + "epoch": 0.9130005918327087, + "grad_norm": 5.819990486053208, + "learning_rate": 5e-06, + "loss": 0.077, + "num_input_tokens_seen": 596183036, + "step": 3471 + }, + { + "epoch": 0.9130005918327087, + "loss": 0.08611226826906204, + "loss_ce": 0.0005867574363946915, + "loss_iou": 0.4609375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 596183036, + "step": 3471 + }, + { + "epoch": 0.9132636285920958, + "grad_norm": 13.706964401152458, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 596353748, + "step": 3472 + }, + { + "epoch": 0.9132636285920958, + "loss": 0.11669529974460602, + "loss_ce": 0.007839101366698742, + "loss_iou": 0.490234375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 596353748, + "step": 3472 + }, + { + "epoch": 0.9135266653514829, + "grad_norm": 12.86793443272329, + "learning_rate": 5e-06, + "loss": 0.0992, + "num_input_tokens_seen": 596525804, + "step": 3473 + }, + { + "epoch": 0.9135266653514829, + "loss": 0.14639021456241608, + "loss_ce": 0.002057329285889864, + "loss_iou": 0.5390625, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 596525804, + "step": 3473 + }, + { + "epoch": 0.91378970211087, + "grad_norm": 3.549687957056835, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 596698176, + "step": 3474 + }, + { + "epoch": 0.91378970211087, + "loss": 0.07896921038627625, + "loss_ce": 0.0026447533164173365, + "loss_iou": 0.37890625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 596698176, + "step": 3474 + }, + { + "epoch": 0.9140527388702571, + "grad_norm": 7.626577083859758, + "learning_rate": 5e-06, + "loss": 0.0851, + "num_input_tokens_seen": 596870272, + "step": 3475 + }, + { + "epoch": 0.9140527388702571, + "loss": 0.110136017203331, + "loss_ce": 0.000928854919038713, + "loss_iou": 0.453125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 596870272, + "step": 3475 + }, + { + "epoch": 0.9143157756296443, + "grad_norm": 4.749367923818828, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 597042624, + "step": 3476 + }, + { + "epoch": 0.9143157756296443, + "loss": 0.1048622876405716, + "loss_ce": 0.0006752688204869628, + "loss_iou": 0.48046875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 597042624, + "step": 3476 + }, + { + "epoch": 0.9145788123890314, + "grad_norm": 7.997778290032009, + "learning_rate": 5e-06, + "loss": 0.0919, + "num_input_tokens_seen": 597214744, + "step": 3477 + }, + { + "epoch": 0.9145788123890314, + "loss": 0.0939270555973053, + "loss_ce": 0.00036016173544339836, + "loss_iou": 0.40234375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 597214744, + "step": 3477 + }, + { + "epoch": 0.9148418491484185, + "grad_norm": 9.635212836595876, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 597387080, + "step": 3478 + }, + { + "epoch": 0.9148418491484185, + "loss": 0.12316415458917618, + "loss_ce": 0.0010938385967165232, + "loss_iou": 0.68359375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 597387080, + "step": 3478 + }, + { + "epoch": 0.9151048859078056, + "grad_norm": 3.963240318321649, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 597559392, + "step": 3479 + }, + { + "epoch": 0.9151048859078056, + "loss": 0.05641660839319229, + "loss_ce": 0.0005084058502689004, + "loss_iou": 0.498046875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 597559392, + "step": 3479 + }, + { + "epoch": 0.9153679226671927, + "grad_norm": 3.4338009584504947, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 597731528, + "step": 3480 + }, + { + "epoch": 0.9153679226671927, + "loss": 0.09766215085983276, + "loss_ce": 0.001623332966119051, + "loss_iou": 0.443359375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 597731528, + "step": 3480 + }, + { + "epoch": 0.9156309594265799, + "grad_norm": 7.619842626421138, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 597903520, + "step": 3481 + }, + { + "epoch": 0.9156309594265799, + "loss": 0.04850924387574196, + "loss_ce": 0.0030533126555383205, + "loss_iou": 0.3984375, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 597903520, + "step": 3481 + }, + { + "epoch": 0.915893996185967, + "grad_norm": 3.2410150384393765, + "learning_rate": 5e-06, + "loss": 0.0627, + "num_input_tokens_seen": 598075560, + "step": 3482 + }, + { + "epoch": 0.915893996185967, + "loss": 0.06452830880880356, + "loss_ce": 0.00015147785597946495, + "loss_iou": NaN, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 598075560, + "step": 3482 + }, + { + "epoch": 0.9161570329453541, + "grad_norm": 3.618001442656621, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 598247796, + "step": 3483 + }, + { + "epoch": 0.9161570329453541, + "loss": 0.07420238852500916, + "loss_ce": 0.00047191951307468116, + "loss_iou": 0.51171875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 598247796, + "step": 3483 + }, + { + "epoch": 0.9164200697047412, + "grad_norm": 10.045861260608898, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 598419940, + "step": 3484 + }, + { + "epoch": 0.9164200697047412, + "loss": 0.1423761397600174, + "loss_ce": 0.0001794886775314808, + "loss_iou": NaN, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 598419940, + "step": 3484 + }, + { + "epoch": 0.9166831064641283, + "grad_norm": 13.006322984837828, + "learning_rate": 5e-06, + "loss": 0.1031, + "num_input_tokens_seen": 598592140, + "step": 3485 + }, + { + "epoch": 0.9166831064641283, + "loss": 0.08574345707893372, + "loss_ce": 0.0002637250581756234, + "loss_iou": 0.6640625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 598592140, + "step": 3485 + }, + { + "epoch": 0.9169461432235155, + "grad_norm": 7.24999783542604, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 598764500, + "step": 3486 + }, + { + "epoch": 0.9169461432235155, + "loss": 0.0942230224609375, + "loss_ce": 0.002975467825308442, + "loss_iou": 0.41796875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 598764500, + "step": 3486 + }, + { + "epoch": 0.9172091799829026, + "grad_norm": 4.120990685906085, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 598936608, + "step": 3487 + }, + { + "epoch": 0.9172091799829026, + "loss": 0.08670195192098618, + "loss_ce": 0.0005203153123147786, + "loss_iou": 0.6171875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 598936608, + "step": 3487 + }, + { + "epoch": 0.9174722167422897, + "grad_norm": 7.691079480924726, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 599109116, + "step": 3488 + }, + { + "epoch": 0.9174722167422897, + "loss": 0.07726689428091049, + "loss_ce": 0.0006982971681281924, + "loss_iou": 0.48828125, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 599109116, + "step": 3488 + }, + { + "epoch": 0.9177352535016768, + "grad_norm": 5.641902324014326, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 599281296, + "step": 3489 + }, + { + "epoch": 0.9177352535016768, + "loss": 0.07277127355337143, + "loss_ce": 0.0036947373300790787, + "loss_iou": 0.375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 599281296, + "step": 3489 + }, + { + "epoch": 0.9179982902610639, + "grad_norm": 5.439497936856968, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 599450632, + "step": 3490 + }, + { + "epoch": 0.9179982902610639, + "loss": 0.08008137345314026, + "loss_ce": 0.0042757089249789715, + "loss_iou": 0.44140625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 599450632, + "step": 3490 + }, + { + "epoch": 0.9182613270204512, + "grad_norm": 3.272614734962344, + "learning_rate": 5e-06, + "loss": 0.1474, + "num_input_tokens_seen": 599622648, + "step": 3491 + }, + { + "epoch": 0.9182613270204512, + "loss": 0.21320411562919617, + "loss_ce": 0.002327651483938098, + "loss_iou": 0.296875, + "loss_num": 0.042236328125, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 599622648, + "step": 3491 + }, + { + "epoch": 0.9185243637798383, + "grad_norm": 4.968216314604122, + "learning_rate": 5e-06, + "loss": 0.0668, + "num_input_tokens_seen": 599793016, + "step": 3492 + }, + { + "epoch": 0.9185243637798383, + "loss": 0.05503164976835251, + "loss_ce": 0.001076570013538003, + "loss_iou": 0.435546875, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 599793016, + "step": 3492 + }, + { + "epoch": 0.9187874005392254, + "grad_norm": 13.600408653665847, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 599965196, + "step": 3493 + }, + { + "epoch": 0.9187874005392254, + "loss": 0.2306230664253235, + "loss_ce": 0.002626231173053384, + "loss_iou": 0.453125, + "loss_num": 0.045654296875, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 599965196, + "step": 3493 + }, + { + "epoch": 0.9190504372986125, + "grad_norm": 13.080198547497895, + "learning_rate": 5e-06, + "loss": 0.1038, + "num_input_tokens_seen": 600137328, + "step": 3494 + }, + { + "epoch": 0.9190504372986125, + "loss": 0.0880010575056076, + "loss_ce": 0.0009954443667083979, + "loss_iou": 0.486328125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 600137328, + "step": 3494 + }, + { + "epoch": 0.9193134740579996, + "grad_norm": 6.526249469256959, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 600309400, + "step": 3495 + }, + { + "epoch": 0.9193134740579996, + "loss": 0.11348491907119751, + "loss_ce": 0.000966615742072463, + "loss_iou": 0.515625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 600309400, + "step": 3495 + }, + { + "epoch": 0.9195765108173868, + "grad_norm": 8.360743966936655, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 600481616, + "step": 3496 + }, + { + "epoch": 0.9195765108173868, + "loss": 0.09168469160795212, + "loss_ce": 0.0012611029669642448, + "loss_iou": 0.5390625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 600481616, + "step": 3496 + }, + { + "epoch": 0.9198395475767739, + "grad_norm": 6.554644415896188, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 600653744, + "step": 3497 + }, + { + "epoch": 0.9198395475767739, + "loss": 0.11767074465751648, + "loss_ce": 0.0016429107636213303, + "loss_iou": 0.59375, + "loss_num": 0.023193359375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 600653744, + "step": 3497 + }, + { + "epoch": 0.920102584336161, + "grad_norm": 16.679597431920058, + "learning_rate": 5e-06, + "loss": 0.0954, + "num_input_tokens_seen": 600824128, + "step": 3498 + }, + { + "epoch": 0.920102584336161, + "loss": 0.14813083410263062, + "loss_ce": 0.0007919695926830173, + "loss_iou": NaN, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 600824128, + "step": 3498 + }, + { + "epoch": 0.9203656210955481, + "grad_norm": 4.858084855581349, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 600996452, + "step": 3499 + }, + { + "epoch": 0.9203656210955481, + "loss": 0.10890492051839828, + "loss_ce": 0.0007811367395333946, + "loss_iou": 0.5625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 600996452, + "step": 3499 + }, + { + "epoch": 0.9206286578549352, + "grad_norm": 5.0714335035388896, + "learning_rate": 5e-06, + "loss": 0.1338, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_websight_new_CIoU": 0.8971402049064636, + "eval_websight_new_GIoU": 0.8995657861232758, + "eval_websight_new_IoU": 0.9006073176860809, + "eval_websight_new_MAE_all": 0.013850971590727568, + "eval_websight_new_MAE_h": 0.00682047987356782, + "eval_websight_new_MAE_w": 0.02169650699943304, + "eval_websight_new_MAE_x": 0.022679010406136513, + "eval_websight_new_MAE_y": 0.004207887570373714, + "eval_websight_new_NUM_probability": 0.9999927878379822, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.07072407752275467, + "eval_websight_new_loss_ce": 9.538403446640586e-06, + "eval_websight_new_loss_iou": 0.32720947265625, + "eval_websight_new_loss_num": 0.012699127197265625, + "eval_websight_new_loss_xval": 0.06354522705078125, + "eval_websight_new_runtime": 56.2114, + "eval_websight_new_samples_per_second": 0.889, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_seeclick_CIoU": 0.6764970123767853, + "eval_seeclick_GIoU": 0.6781544089317322, + "eval_seeclick_IoU": 0.6978051662445068, + "eval_seeclick_MAE_all": 0.040375180542469025, + "eval_seeclick_MAE_h": 0.021458005532622337, + "eval_seeclick_MAE_w": 0.05631308630108833, + "eval_seeclick_MAE_x": 0.06287308409810066, + "eval_seeclick_MAE_y": 0.02085655089467764, + "eval_seeclick_NUM_probability": 0.9999865889549255, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.17490381002426147, + "eval_seeclick_loss_ce": 0.008978934027254581, + "eval_seeclick_loss_iou": 0.46295166015625, + "eval_seeclick_loss_num": 0.031360626220703125, + "eval_seeclick_loss_xval": 0.1567230224609375, + "eval_seeclick_runtime": 74.0396, + "eval_seeclick_samples_per_second": 0.581, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_icons_CIoU": 0.8881092965602875, + "eval_icons_GIoU": 0.8860294818878174, + "eval_icons_IoU": 0.8926480114459991, + "eval_icons_MAE_all": 0.0159080708399415, + "eval_icons_MAE_h": 0.020047838799655437, + "eval_icons_MAE_w": 0.015083736274391413, + "eval_icons_MAE_x": 0.013403147924691439, + "eval_icons_MAE_y": 0.015097561292350292, + "eval_icons_NUM_probability": 0.9999927878379822, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.060060471296310425, + "eval_icons_loss_ce": 9.6267791604987e-06, + "eval_icons_loss_iou": 0.605712890625, + "eval_icons_loss_num": 0.01105499267578125, + "eval_icons_loss_xval": 0.05532073974609375, + "eval_icons_runtime": 80.8235, + "eval_icons_samples_per_second": 0.619, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "eval_screenspot_CIoU": 0.5590948661168417, + "eval_screenspot_GIoU": 0.5591723521550497, + "eval_screenspot_IoU": 0.5979611476262411, + "eval_screenspot_MAE_all": 0.08105809738238652, + "eval_screenspot_MAE_h": 0.05730322003364563, + "eval_screenspot_MAE_w": 0.1365982194741567, + "eval_screenspot_MAE_x": 0.0790914719303449, + "eval_screenspot_MAE_y": 0.05123948057492574, + "eval_screenspot_NUM_probability": 0.9999733567237854, + "eval_screenspot_inside_bbox": 0.8841666579246521, + "eval_screenspot_loss": 0.9397080540657043, + "eval_screenspot_loss_ce": 0.5959697167078654, + "eval_screenspot_loss_iou": 0.539306640625, + "eval_screenspot_loss_num": 0.06738789876302083, + "eval_screenspot_loss_xval": 0.3368326822916667, + "eval_screenspot_runtime": 151.1273, + "eval_screenspot_samples_per_second": 0.589, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9206286578549352, + "loss": 0.9230542182922363, + "loss_ce": 0.5880932807922363, + "loss_iou": 0.435546875, + "loss_num": 0.06689453125, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 601168700, + "step": 3500 + }, + { + "epoch": 0.9208916946143223, + "grad_norm": 3.0758731887113147, + "learning_rate": 5e-06, + "loss": 0.0803, + "num_input_tokens_seen": 601340840, + "step": 3501 + }, + { + "epoch": 0.9208916946143223, + "loss": 0.07921823859214783, + "loss_ce": 0.0005973259685561061, + "loss_iou": 0.474609375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 601340840, + "step": 3501 + }, + { + "epoch": 0.9211547313737095, + "grad_norm": 12.765305175609484, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 601513112, + "step": 3502 + }, + { + "epoch": 0.9211547313737095, + "loss": 0.13090460002422333, + "loss_ce": 0.002868090523406863, + "loss_iou": 0.35546875, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 601513112, + "step": 3502 + }, + { + "epoch": 0.9214177681330966, + "grad_norm": 3.071857580938524, + "learning_rate": 5e-06, + "loss": 0.0751, + "num_input_tokens_seen": 601685348, + "step": 3503 + }, + { + "epoch": 0.9214177681330966, + "loss": 0.11101450026035309, + "loss_ce": 0.002280366839841008, + "loss_iou": 0.48046875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 601685348, + "step": 3503 + }, + { + "epoch": 0.9216808048924837, + "grad_norm": 3.8727649532640296, + "learning_rate": 5e-06, + "loss": 0.0905, + "num_input_tokens_seen": 601857460, + "step": 3504 + }, + { + "epoch": 0.9216808048924837, + "loss": 0.0590723380446434, + "loss_ce": 0.0022028274834156036, + "loss_iou": 0.421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 601857460, + "step": 3504 + }, + { + "epoch": 0.9219438416518708, + "grad_norm": 12.18479200262375, + "learning_rate": 5e-06, + "loss": 0.0989, + "num_input_tokens_seen": 602029992, + "step": 3505 + }, + { + "epoch": 0.9219438416518708, + "loss": 0.12037432193756104, + "loss_ce": 0.0029426885303109884, + "loss_iou": 0.5625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 602029992, + "step": 3505 + }, + { + "epoch": 0.9222068784112579, + "grad_norm": 4.289786395943063, + "learning_rate": 5e-06, + "loss": 0.1091, + "num_input_tokens_seen": 602202184, + "step": 3506 + }, + { + "epoch": 0.9222068784112579, + "loss": 0.16438013315200806, + "loss_ce": 0.00022608340077567846, + "loss_iou": 0.453125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 602202184, + "step": 3506 + }, + { + "epoch": 0.9224699151706451, + "grad_norm": 27.091794274961455, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 602372792, + "step": 3507 + }, + { + "epoch": 0.9224699151706451, + "loss": 0.081387460231781, + "loss_ce": 0.0013398483861237764, + "loss_iou": 0.640625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 602372792, + "step": 3507 + }, + { + "epoch": 0.9227329519300322, + "grad_norm": 8.942524792614973, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 602542048, + "step": 3508 + }, + { + "epoch": 0.9227329519300322, + "loss": 0.11642280220985413, + "loss_ce": 0.005247259978204966, + "loss_iou": 0.52734375, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 602542048, + "step": 3508 + }, + { + "epoch": 0.9229959886894193, + "grad_norm": 5.673853173524807, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 602714148, + "step": 3509 + }, + { + "epoch": 0.9229959886894193, + "loss": 0.22935867309570312, + "loss_ce": 0.0008430513553321362, + "loss_iou": 0.396484375, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 602714148, + "step": 3509 + }, + { + "epoch": 0.9232590254488064, + "grad_norm": 4.438849137936148, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 602886264, + "step": 3510 + }, + { + "epoch": 0.9232590254488064, + "loss": 0.1615859568119049, + "loss_ce": 0.0012771158944815397, + "loss_iou": 0.376953125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 602886264, + "step": 3510 + }, + { + "epoch": 0.9235220622081936, + "grad_norm": 4.872038444582204, + "learning_rate": 5e-06, + "loss": 0.1038, + "num_input_tokens_seen": 603058732, + "step": 3511 + }, + { + "epoch": 0.9235220622081936, + "loss": 0.13943278789520264, + "loss_ce": 0.0005015181959606707, + "loss_iou": 0.423828125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 603058732, + "step": 3511 + }, + { + "epoch": 0.9237850989675808, + "grad_norm": 4.92552213324199, + "learning_rate": 5e-06, + "loss": 0.1144, + "num_input_tokens_seen": 603230928, + "step": 3512 + }, + { + "epoch": 0.9237850989675808, + "loss": 0.14707276225090027, + "loss_ce": 0.0023278831504285336, + "loss_iou": 0.40234375, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 603230928, + "step": 3512 + }, + { + "epoch": 0.9240481357269679, + "grad_norm": 8.817456239528582, + "learning_rate": 5e-06, + "loss": 0.0967, + "num_input_tokens_seen": 603403300, + "step": 3513 + }, + { + "epoch": 0.9240481357269679, + "loss": 0.12186002731323242, + "loss_ce": 0.0018343898700550199, + "loss_iou": 0.5625, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 603403300, + "step": 3513 + }, + { + "epoch": 0.924311172486355, + "grad_norm": 10.676170008596674, + "learning_rate": 5e-06, + "loss": 0.1438, + "num_input_tokens_seen": 603575704, + "step": 3514 + }, + { + "epoch": 0.924311172486355, + "loss": 0.15692317485809326, + "loss_ce": 0.004670977126806974, + "loss_iou": NaN, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 603575704, + "step": 3514 + }, + { + "epoch": 0.9245742092457421, + "grad_norm": 11.658802940011123, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 603745852, + "step": 3515 + }, + { + "epoch": 0.9245742092457421, + "loss": 0.08981953561306, + "loss_ce": 0.00015888996131252497, + "loss_iou": 0.53515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 603745852, + "step": 3515 + }, + { + "epoch": 0.9248372460051292, + "grad_norm": 11.699348951917594, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 603918360, + "step": 3516 + }, + { + "epoch": 0.9248372460051292, + "loss": 0.1186264157295227, + "loss_ce": 0.0009811592753976583, + "loss_iou": 0.416015625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 603918360, + "step": 3516 + }, + { + "epoch": 0.9251002827645164, + "grad_norm": 16.59746302136246, + "learning_rate": 5e-06, + "loss": 0.1447, + "num_input_tokens_seen": 604090468, + "step": 3517 + }, + { + "epoch": 0.9251002827645164, + "loss": 0.15999768674373627, + "loss_ce": 0.0011536948150023818, + "loss_iou": 0.5234375, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 604090468, + "step": 3517 + }, + { + "epoch": 0.9253633195239035, + "grad_norm": 7.69976984803875, + "learning_rate": 5e-06, + "loss": 0.1235, + "num_input_tokens_seen": 604262492, + "step": 3518 + }, + { + "epoch": 0.9253633195239035, + "loss": 0.14189431071281433, + "loss_ce": 0.006487809121608734, + "loss_iou": 0.470703125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 604262492, + "step": 3518 + }, + { + "epoch": 0.9256263562832906, + "grad_norm": 12.75000933447967, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 604432856, + "step": 3519 + }, + { + "epoch": 0.9256263562832906, + "loss": 0.0861673578619957, + "loss_ce": 0.004319215193390846, + "loss_iou": 0.484375, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 604432856, + "step": 3519 + }, + { + "epoch": 0.9258893930426777, + "grad_norm": 5.042427155046337, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 604605056, + "step": 3520 + }, + { + "epoch": 0.9258893930426777, + "loss": 0.1412590742111206, + "loss_ce": 0.002571945311501622, + "loss_iou": 0.455078125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 604605056, + "step": 3520 + }, + { + "epoch": 0.9261524298020648, + "grad_norm": 5.26588732086772, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 604775372, + "step": 3521 + }, + { + "epoch": 0.9261524298020648, + "loss": 0.05718105286359787, + "loss_ce": 0.0014254315756261349, + "loss_iou": 0.54296875, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 604775372, + "step": 3521 + }, + { + "epoch": 0.926415466561452, + "grad_norm": 6.371747445648948, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 604947376, + "step": 3522 + }, + { + "epoch": 0.926415466561452, + "loss": 0.08819465339183807, + "loss_ce": 0.0007312724483199418, + "loss_iou": 0.322265625, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 604947376, + "step": 3522 + }, + { + "epoch": 0.9266785033208391, + "grad_norm": 10.505886717736445, + "learning_rate": 5e-06, + "loss": 0.1835, + "num_input_tokens_seen": 605119568, + "step": 3523 + }, + { + "epoch": 0.9266785033208391, + "loss": 0.17979584634304047, + "loss_ce": 0.004075629636645317, + "loss_iou": 0.609375, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 605119568, + "step": 3523 + }, + { + "epoch": 0.9269415400802262, + "grad_norm": 6.455190531468335, + "learning_rate": 5e-06, + "loss": 0.1006, + "num_input_tokens_seen": 605291524, + "step": 3524 + }, + { + "epoch": 0.9269415400802262, + "loss": 0.08140784502029419, + "loss_ce": 0.0016959276981651783, + "loss_iou": 0.51171875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 605291524, + "step": 3524 + }, + { + "epoch": 0.9272045768396133, + "grad_norm": 14.745208066661506, + "learning_rate": 5e-06, + "loss": 0.1184, + "num_input_tokens_seen": 605463808, + "step": 3525 + }, + { + "epoch": 0.9272045768396133, + "loss": 0.11558607965707779, + "loss_ce": 0.0017555101076141, + "loss_iou": 0.51171875, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 605463808, + "step": 3525 + }, + { + "epoch": 0.9274676135990004, + "grad_norm": 3.8942399340921825, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 605635872, + "step": 3526 + }, + { + "epoch": 0.9274676135990004, + "loss": 0.06215044856071472, + "loss_ce": 0.00019976735347881913, + "loss_iou": 0.546875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 605635872, + "step": 3526 + }, + { + "epoch": 0.9277306503583875, + "grad_norm": 4.175922010381311, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 605807804, + "step": 3527 + }, + { + "epoch": 0.9277306503583875, + "loss": 0.16303026676177979, + "loss_ce": 0.0018974501872435212, + "loss_iou": 0.482421875, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 605807804, + "step": 3527 + }, + { + "epoch": 0.9279936871177747, + "grad_norm": 3.844069149031415, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 605979952, + "step": 3528 + }, + { + "epoch": 0.9279936871177747, + "loss": 0.0688394084572792, + "loss_ce": 0.0023721237666904926, + "loss_iou": 0.4609375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 605979952, + "step": 3528 + }, + { + "epoch": 0.9282567238771618, + "grad_norm": 4.202436418234633, + "learning_rate": 5e-06, + "loss": 0.081, + "num_input_tokens_seen": 606150360, + "step": 3529 + }, + { + "epoch": 0.9282567238771618, + "loss": 0.04098789393901825, + "loss_ce": 0.00038425601087510586, + "loss_iou": 0.5, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 606150360, + "step": 3529 + }, + { + "epoch": 0.928519760636549, + "grad_norm": 3.743613714946928, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 606320544, + "step": 3530 + }, + { + "epoch": 0.928519760636549, + "loss": 0.12318438291549683, + "loss_ce": 0.003891176311299205, + "loss_iou": 0.357421875, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 606320544, + "step": 3530 + }, + { + "epoch": 0.928782797395936, + "grad_norm": 7.541725516396188, + "learning_rate": 5e-06, + "loss": 0.0936, + "num_input_tokens_seen": 606492452, + "step": 3531 + }, + { + "epoch": 0.928782797395936, + "loss": 0.05570812523365021, + "loss_ce": 0.0006238996866159141, + "loss_iou": 0.66796875, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 606492452, + "step": 3531 + }, + { + "epoch": 0.9290458341553232, + "grad_norm": 7.283637997577956, + "learning_rate": 5e-06, + "loss": 0.1403, + "num_input_tokens_seen": 606664904, + "step": 3532 + }, + { + "epoch": 0.9290458341553232, + "loss": 0.2060985267162323, + "loss_ce": 0.0019969542045146227, + "loss_iou": 0.40625, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 606664904, + "step": 3532 + }, + { + "epoch": 0.9293088709147104, + "grad_norm": 3.96510750197539, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 606836960, + "step": 3533 + }, + { + "epoch": 0.9293088709147104, + "loss": 0.07297110557556152, + "loss_ce": 0.002353430027142167, + "loss_iou": 0.384765625, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 606836960, + "step": 3533 + }, + { + "epoch": 0.9295719076740975, + "grad_norm": 2.973198469624849, + "learning_rate": 5e-06, + "loss": 0.1219, + "num_input_tokens_seen": 607009360, + "step": 3534 + }, + { + "epoch": 0.9295719076740975, + "loss": 0.13494953513145447, + "loss_ce": 0.0017403117381036282, + "loss_iou": 0.4375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 607009360, + "step": 3534 + }, + { + "epoch": 0.9298349444334846, + "grad_norm": 3.119729004971168, + "learning_rate": 5e-06, + "loss": 0.0664, + "num_input_tokens_seen": 607181304, + "step": 3535 + }, + { + "epoch": 0.9298349444334846, + "loss": 0.06605780124664307, + "loss_ce": 0.002443910576403141, + "loss_iou": 0.421875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 607181304, + "step": 3535 + }, + { + "epoch": 0.9300979811928717, + "grad_norm": 4.352773383034863, + "learning_rate": 5e-06, + "loss": 0.0763, + "num_input_tokens_seen": 607353736, + "step": 3536 + }, + { + "epoch": 0.9300979811928717, + "loss": 0.06802303344011307, + "loss_ce": 0.001021688454784453, + "loss_iou": 0.4296875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 607353736, + "step": 3536 + }, + { + "epoch": 0.9303610179522588, + "grad_norm": 3.9578360400969794, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 607525920, + "step": 3537 + }, + { + "epoch": 0.9303610179522588, + "loss": 0.06541258096694946, + "loss_ce": 0.00288206129334867, + "loss_iou": 0.5703125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 607525920, + "step": 3537 + }, + { + "epoch": 0.930624054711646, + "grad_norm": 4.492300233458511, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 607697980, + "step": 3538 + }, + { + "epoch": 0.930624054711646, + "loss": 0.09697936475276947, + "loss_ce": 0.0013983015669509768, + "loss_iou": 0.53515625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 607697980, + "step": 3538 + }, + { + "epoch": 0.9308870914710331, + "grad_norm": 4.302295585236548, + "learning_rate": 5e-06, + "loss": 0.1081, + "num_input_tokens_seen": 607870144, + "step": 3539 + }, + { + "epoch": 0.9308870914710331, + "loss": 0.10722782462835312, + "loss_ce": 0.000843545887619257, + "loss_iou": 0.49609375, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 607870144, + "step": 3539 + }, + { + "epoch": 0.9311501282304202, + "grad_norm": 7.0499450082503365, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 608042240, + "step": 3540 + }, + { + "epoch": 0.9311501282304202, + "loss": 0.1015363559126854, + "loss_ce": 0.006443582940846682, + "loss_iou": 0.44140625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 608042240, + "step": 3540 + }, + { + "epoch": 0.9314131649898073, + "grad_norm": 6.586834009281099, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 608214284, + "step": 3541 + }, + { + "epoch": 0.9314131649898073, + "loss": 0.12908074259757996, + "loss_ce": 0.004782632924616337, + "loss_iou": 0.59765625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 608214284, + "step": 3541 + }, + { + "epoch": 0.9316762017491944, + "grad_norm": 4.53807614520661, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 608386616, + "step": 3542 + }, + { + "epoch": 0.9316762017491944, + "loss": 0.1289425790309906, + "loss_ce": 0.0027523916214704514, + "loss_iou": 0.458984375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 608386616, + "step": 3542 + }, + { + "epoch": 0.9319392385085816, + "grad_norm": 7.59401410995318, + "learning_rate": 5e-06, + "loss": 0.1268, + "num_input_tokens_seen": 608558468, + "step": 3543 + }, + { + "epoch": 0.9319392385085816, + "loss": 0.16303950548171997, + "loss_ce": 0.00520259328186512, + "loss_iou": 0.4375, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 608558468, + "step": 3543 + }, + { + "epoch": 0.9322022752679687, + "grad_norm": 8.796770729596103, + "learning_rate": 5e-06, + "loss": 0.1311, + "num_input_tokens_seen": 608730916, + "step": 3544 + }, + { + "epoch": 0.9322022752679687, + "loss": 0.11866636574268341, + "loss_ce": 0.0005633389810100198, + "loss_iou": 0.341796875, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 608730916, + "step": 3544 + }, + { + "epoch": 0.9324653120273558, + "grad_norm": 5.984167827436451, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 608902956, + "step": 3545 + }, + { + "epoch": 0.9324653120273558, + "loss": 0.09633171558380127, + "loss_ce": 0.000903245760127902, + "loss_iou": 0.55078125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 608902956, + "step": 3545 + }, + { + "epoch": 0.9327283487867429, + "grad_norm": 13.864562508217144, + "learning_rate": 5e-06, + "loss": 0.1121, + "num_input_tokens_seen": 609072616, + "step": 3546 + }, + { + "epoch": 0.9327283487867429, + "loss": 0.11617599427700043, + "loss_ce": 0.0005448899464681745, + "loss_iou": 0.54296875, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 609072616, + "step": 3546 + }, + { + "epoch": 0.93299138554613, + "grad_norm": 7.250119178294138, + "learning_rate": 5e-06, + "loss": 0.0744, + "num_input_tokens_seen": 609244840, + "step": 3547 + }, + { + "epoch": 0.93299138554613, + "loss": 0.08952006697654724, + "loss_ce": 0.001415827078744769, + "loss_iou": 0.498046875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 609244840, + "step": 3547 + }, + { + "epoch": 0.9332544223055173, + "grad_norm": 8.965223152196344, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 609417116, + "step": 3548 + }, + { + "epoch": 0.9332544223055173, + "loss": 0.058124981820583344, + "loss_ce": 0.0005077911773696542, + "loss_iou": 0.4921875, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 609417116, + "step": 3548 + }, + { + "epoch": 0.9335174590649044, + "grad_norm": 4.578416828961302, + "learning_rate": 5e-06, + "loss": 0.1283, + "num_input_tokens_seen": 609589092, + "step": 3549 + }, + { + "epoch": 0.9335174590649044, + "loss": 0.06900876015424728, + "loss_ce": 0.0006951588438823819, + "loss_iou": 0.671875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 609589092, + "step": 3549 + }, + { + "epoch": 0.9337804958242915, + "grad_norm": 8.028931966527075, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 609761432, + "step": 3550 + }, + { + "epoch": 0.9337804958242915, + "loss": 0.10969488322734833, + "loss_ce": 0.0008691949769854546, + "loss_iou": 0.32421875, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 609761432, + "step": 3550 + }, + { + "epoch": 0.9340435325836786, + "grad_norm": 3.0608586378113225, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 609933500, + "step": 3551 + }, + { + "epoch": 0.9340435325836786, + "loss": 0.1336784064769745, + "loss_ce": 0.003475167090073228, + "loss_iou": 0.5546875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 609933500, + "step": 3551 + }, + { + "epoch": 0.9343065693430657, + "grad_norm": 6.227248956666904, + "learning_rate": 5e-06, + "loss": 0.0903, + "num_input_tokens_seen": 610105792, + "step": 3552 + }, + { + "epoch": 0.9343065693430657, + "loss": 0.08306320756673813, + "loss_ce": 0.007104953285306692, + "loss_iou": 0.453125, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 610105792, + "step": 3552 + }, + { + "epoch": 0.9345696061024528, + "grad_norm": 5.469597791438911, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 610277880, + "step": 3553 + }, + { + "epoch": 0.9345696061024528, + "loss": 0.08409252762794495, + "loss_ce": 0.002595331287011504, + "loss_iou": 0.42578125, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 610277880, + "step": 3553 + }, + { + "epoch": 0.93483264286184, + "grad_norm": 4.1047405914838455, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 610449924, + "step": 3554 + }, + { + "epoch": 0.93483264286184, + "loss": 0.10268253833055496, + "loss_ce": 0.0010895198211073875, + "loss_iou": 0.5, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 610449924, + "step": 3554 + }, + { + "epoch": 0.9350956796212271, + "grad_norm": 4.649820698561932, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 610622052, + "step": 3555 + }, + { + "epoch": 0.9350956796212271, + "loss": 0.08575969934463501, + "loss_ce": 0.0018668812699615955, + "loss_iou": 0.337890625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 610622052, + "step": 3555 + }, + { + "epoch": 0.9353587163806142, + "grad_norm": 9.738163582969452, + "learning_rate": 5e-06, + "loss": 0.0872, + "num_input_tokens_seen": 610794168, + "step": 3556 + }, + { + "epoch": 0.9353587163806142, + "loss": 0.17649231851100922, + "loss_ce": 0.004281637258827686, + "loss_iou": 0.423828125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 610794168, + "step": 3556 + }, + { + "epoch": 0.9356217531400013, + "grad_norm": 60.60865892554417, + "learning_rate": 5e-06, + "loss": 0.1377, + "num_input_tokens_seen": 610964304, + "step": 3557 + }, + { + "epoch": 0.9356217531400013, + "loss": 0.21576589345932007, + "loss_ce": 0.01731007918715477, + "loss_iou": 0.390625, + "loss_num": 0.039794921875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 610964304, + "step": 3557 + }, + { + "epoch": 0.9358847898993884, + "grad_norm": 26.41744180605267, + "learning_rate": 5e-06, + "loss": 0.278, + "num_input_tokens_seen": 611136572, + "step": 3558 + }, + { + "epoch": 0.9358847898993884, + "loss": 0.2503662705421448, + "loss_ce": 0.15902717411518097, + "loss_iou": 0.515625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 611136572, + "step": 3558 + }, + { + "epoch": 0.9361478266587756, + "grad_norm": 31.460812663910158, + "learning_rate": 5e-06, + "loss": 0.1477, + "num_input_tokens_seen": 611308712, + "step": 3559 + }, + { + "epoch": 0.9361478266587756, + "loss": 0.10697901993989944, + "loss_ce": 0.014724383130669594, + "loss_iou": 0.59765625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 611308712, + "step": 3559 + }, + { + "epoch": 0.9364108634181627, + "grad_norm": 125.0152995081179, + "learning_rate": 5e-06, + "loss": 0.2523, + "num_input_tokens_seen": 611481064, + "step": 3560 + }, + { + "epoch": 0.9364108634181627, + "loss": 0.19752028584480286, + "loss_ce": 0.055674582719802856, + "loss_iou": 0.6484375, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 611481064, + "step": 3560 + }, + { + "epoch": 0.9366739001775498, + "grad_norm": 42.388335248969774, + "learning_rate": 5e-06, + "loss": 0.6327, + "num_input_tokens_seen": 611653432, + "step": 3561 + }, + { + "epoch": 0.9366739001775498, + "loss": 0.6363104581832886, + "loss_ce": 0.5759619474411011, + "loss_iou": 0.546875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 611653432, + "step": 3561 + }, + { + "epoch": 0.9369369369369369, + "grad_norm": 34.483421318180156, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 611825552, + "step": 3562 + }, + { + "epoch": 0.9369369369369369, + "loss": 0.10395447909832001, + "loss_ce": 0.05541627109050751, + "loss_iou": 0.435546875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 611825552, + "step": 3562 + }, + { + "epoch": 0.937199973696324, + "grad_norm": 3.5466208788177513, + "learning_rate": 5e-06, + "loss": 0.0777, + "num_input_tokens_seen": 611997960, + "step": 3563 + }, + { + "epoch": 0.937199973696324, + "loss": 0.1155683621764183, + "loss_ce": 0.0075666578486561775, + "loss_iou": 0.47265625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 611997960, + "step": 3563 + }, + { + "epoch": 0.9374630104557112, + "grad_norm": 6.556188606634138, + "learning_rate": 5e-06, + "loss": 0.0901, + "num_input_tokens_seen": 612170180, + "step": 3564 + }, + { + "epoch": 0.9374630104557112, + "loss": 0.0452946312725544, + "loss_ce": 0.0018070839578285813, + "loss_iou": 0.455078125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 612170180, + "step": 3564 + }, + { + "epoch": 0.9377260472150983, + "grad_norm": 12.949003378413929, + "learning_rate": 5e-06, + "loss": 0.1504, + "num_input_tokens_seen": 612342360, + "step": 3565 + }, + { + "epoch": 0.9377260472150983, + "loss": 0.16198524832725525, + "loss_ce": 0.001249166438356042, + "loss_iou": 0.625, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 612342360, + "step": 3565 + }, + { + "epoch": 0.9379890839744854, + "grad_norm": 5.1711873858566335, + "learning_rate": 5e-06, + "loss": 0.074, + "num_input_tokens_seen": 612514340, + "step": 3566 + }, + { + "epoch": 0.9379890839744854, + "loss": 0.050079330801963806, + "loss_ce": 0.0008087016176432371, + "loss_iou": 0.455078125, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 612514340, + "step": 3566 + }, + { + "epoch": 0.9382521207338725, + "grad_norm": 15.271109982643123, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 612686644, + "step": 3567 + }, + { + "epoch": 0.9382521207338725, + "loss": 0.051343828439712524, + "loss_ce": 0.0003336968075018376, + "loss_iou": 0.4609375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 612686644, + "step": 3567 + }, + { + "epoch": 0.9385151574932596, + "grad_norm": 6.894404249326668, + "learning_rate": 5e-06, + "loss": 0.1218, + "num_input_tokens_seen": 612854000, + "step": 3568 + }, + { + "epoch": 0.9385151574932596, + "loss": 0.1327725201845169, + "loss_ce": 0.0005093337967991829, + "loss_iou": 0.46484375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 612854000, + "step": 3568 + }, + { + "epoch": 0.9387781942526469, + "grad_norm": 16.019697398064586, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 613026136, + "step": 3569 + }, + { + "epoch": 0.9387781942526469, + "loss": 0.14443224668502808, + "loss_ce": 0.0018541140016168356, + "loss_iou": 0.4453125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 613026136, + "step": 3569 + }, + { + "epoch": 0.939041231012034, + "grad_norm": 2.778217951943394, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 613198388, + "step": 3570 + }, + { + "epoch": 0.939041231012034, + "loss": 0.05536004900932312, + "loss_ce": 0.0027019698172807693, + "loss_iou": 0.53515625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 613198388, + "step": 3570 + }, + { + "epoch": 0.9393042677714211, + "grad_norm": 4.873345772996428, + "learning_rate": 5e-06, + "loss": 0.0827, + "num_input_tokens_seen": 613370288, + "step": 3571 + }, + { + "epoch": 0.9393042677714211, + "loss": 0.09475830942392349, + "loss_ce": 0.0007489121053367853, + "loss_iou": 0.392578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 613370288, + "step": 3571 + }, + { + "epoch": 0.9395673045308082, + "grad_norm": 6.737346874423115, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 613542216, + "step": 3572 + }, + { + "epoch": 0.9395673045308082, + "loss": 0.2296835035085678, + "loss_ce": 0.005287751089781523, + "loss_iou": 0.330078125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 613542216, + "step": 3572 + }, + { + "epoch": 0.9398303412901953, + "grad_norm": 5.428921826665437, + "learning_rate": 5e-06, + "loss": 0.0901, + "num_input_tokens_seen": 613714320, + "step": 3573 + }, + { + "epoch": 0.9398303412901953, + "loss": 0.04020649567246437, + "loss_ce": 0.002578320913016796, + "loss_iou": 0.50390625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 613714320, + "step": 3573 + }, + { + "epoch": 0.9400933780495825, + "grad_norm": 5.730418477266325, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 613884004, + "step": 3574 + }, + { + "epoch": 0.9400933780495825, + "loss": 0.12099497765302658, + "loss_ce": 0.000435287831351161, + "loss_iou": 0.1884765625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 613884004, + "step": 3574 + }, + { + "epoch": 0.9403564148089696, + "grad_norm": 2.8371958686948315, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 614056360, + "step": 3575 + }, + { + "epoch": 0.9403564148089696, + "loss": 0.06765338778495789, + "loss_ce": 0.00017902448598761111, + "loss_iou": 0.392578125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 614056360, + "step": 3575 + }, + { + "epoch": 0.9406194515683567, + "grad_norm": 3.3483226181812173, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 614228248, + "step": 3576 + }, + { + "epoch": 0.9406194515683567, + "loss": 0.07497625052928925, + "loss_ce": 0.0009863873710855842, + "loss_iou": 0.404296875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 614228248, + "step": 3576 + }, + { + "epoch": 0.9408824883277438, + "grad_norm": 3.0470015747124837, + "learning_rate": 5e-06, + "loss": 0.0987, + "num_input_tokens_seen": 614400460, + "step": 3577 + }, + { + "epoch": 0.9408824883277438, + "loss": 0.15504948794841766, + "loss_ce": 0.005391271784901619, + "loss_iou": 0.23828125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 614400460, + "step": 3577 + }, + { + "epoch": 0.9411455250871309, + "grad_norm": 14.590254214221755, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 614572668, + "step": 3578 + }, + { + "epoch": 0.9411455250871309, + "loss": 0.12589725852012634, + "loss_ce": 0.00352177070453763, + "loss_iou": 0.453125, + "loss_num": 0.0244140625, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 614572668, + "step": 3578 + }, + { + "epoch": 0.941408561846518, + "grad_norm": 4.098895136914831, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 614744728, + "step": 3579 + }, + { + "epoch": 0.941408561846518, + "loss": 0.07378913462162018, + "loss_ce": 0.0005316926399245858, + "loss_iou": 0.5, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 614744728, + "step": 3579 + }, + { + "epoch": 0.9416715986059052, + "grad_norm": 6.551103187216315, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 614917064, + "step": 3580 + }, + { + "epoch": 0.9416715986059052, + "loss": 0.11033067107200623, + "loss_ce": 0.0037785512395203114, + "loss_iou": 0.625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 614917064, + "step": 3580 + }, + { + "epoch": 0.9419346353652923, + "grad_norm": 6.125579245234497, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 615089092, + "step": 3581 + }, + { + "epoch": 0.9419346353652923, + "loss": 0.09463340044021606, + "loss_ce": 0.002287208568304777, + "loss_iou": 0.421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 615089092, + "step": 3581 + }, + { + "epoch": 0.9421976721246794, + "grad_norm": 4.52462668676838, + "learning_rate": 5e-06, + "loss": 0.0929, + "num_input_tokens_seen": 615259732, + "step": 3582 + }, + { + "epoch": 0.9421976721246794, + "loss": 0.09844870865345001, + "loss_ce": 0.0007771998061798513, + "loss_iou": 0.546875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 615259732, + "step": 3582 + }, + { + "epoch": 0.9424607088840665, + "grad_norm": 4.261037832557463, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 615431984, + "step": 3583 + }, + { + "epoch": 0.9424607088840665, + "loss": 0.10644324868917465, + "loss_ce": 0.0039041785057634115, + "loss_iou": 0.53515625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 615431984, + "step": 3583 + }, + { + "epoch": 0.9427237456434536, + "grad_norm": 11.026204038590599, + "learning_rate": 5e-06, + "loss": 0.0974, + "num_input_tokens_seen": 615604052, + "step": 3584 + }, + { + "epoch": 0.9427237456434536, + "loss": 0.08766089379787445, + "loss_ce": 0.0031577199697494507, + "loss_iou": 0.455078125, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 615604052, + "step": 3584 + }, + { + "epoch": 0.9429867824028408, + "grad_norm": 4.892521797802774, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 615776672, + "step": 3585 + }, + { + "epoch": 0.9429867824028408, + "loss": 0.04693538695573807, + "loss_ce": 0.0009148788521997631, + "loss_iou": 0.380859375, + "loss_num": 0.00921630859375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 615776672, + "step": 3585 + }, + { + "epoch": 0.9432498191622279, + "grad_norm": 8.102761106727687, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 615948804, + "step": 3586 + }, + { + "epoch": 0.9432498191622279, + "loss": 0.0826391950249672, + "loss_ce": 0.00152347341645509, + "loss_iou": 0.40234375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 615948804, + "step": 3586 + }, + { + "epoch": 0.943512855921615, + "grad_norm": 4.699200150667693, + "learning_rate": 5e-06, + "loss": 0.0853, + "num_input_tokens_seen": 616120904, + "step": 3587 + }, + { + "epoch": 0.943512855921615, + "loss": 0.07182273268699646, + "loss_ce": 0.004500953480601311, + "loss_iou": 0.431640625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 616120904, + "step": 3587 + }, + { + "epoch": 0.9437758926810021, + "grad_norm": 5.979856684754754, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 616293232, + "step": 3588 + }, + { + "epoch": 0.9437758926810021, + "loss": 0.14250054955482483, + "loss_ce": 0.004179632291197777, + "loss_iou": 0.515625, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 616293232, + "step": 3588 + }, + { + "epoch": 0.9440389294403893, + "grad_norm": 4.477659380807632, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 616465544, + "step": 3589 + }, + { + "epoch": 0.9440389294403893, + "loss": 0.13571983575820923, + "loss_ce": 0.004356917925179005, + "loss_iou": 0.3671875, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 616465544, + "step": 3589 + }, + { + "epoch": 0.9443019661997765, + "grad_norm": 5.354193803812341, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 616638112, + "step": 3590 + }, + { + "epoch": 0.9443019661997765, + "loss": 0.08732321113348007, + "loss_ce": 0.0015383013524115086, + "loss_iou": 0.458984375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 616638112, + "step": 3590 + }, + { + "epoch": 0.9445650029591636, + "grad_norm": 3.712673541690186, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 616808952, + "step": 3591 + }, + { + "epoch": 0.9445650029591636, + "loss": 0.0873071700334549, + "loss_ce": 0.0063440315425395966, + "loss_iou": 0.45703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 616808952, + "step": 3591 + }, + { + "epoch": 0.9448280397185507, + "grad_norm": 5.018692274423124, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 616981056, + "step": 3592 + }, + { + "epoch": 0.9448280397185507, + "loss": 0.16758012771606445, + "loss_ce": 0.0011983029544353485, + "loss_iou": 0.416015625, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 616981056, + "step": 3592 + }, + { + "epoch": 0.9450910764779378, + "grad_norm": 8.157994634855726, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 617152956, + "step": 3593 + }, + { + "epoch": 0.9450910764779378, + "loss": 0.09211128950119019, + "loss_ce": 0.0016877016751095653, + "loss_iou": 0.466796875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 617152956, + "step": 3593 + }, + { + "epoch": 0.9453541132373249, + "grad_norm": 6.815211067626931, + "learning_rate": 5e-06, + "loss": 0.104, + "num_input_tokens_seen": 617323192, + "step": 3594 + }, + { + "epoch": 0.9453541132373249, + "loss": 0.15205200016498566, + "loss_ce": 0.0006542917108163238, + "loss_iou": 0.373046875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 617323192, + "step": 3594 + }, + { + "epoch": 0.9456171499967121, + "grad_norm": 5.698573656915325, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 617495528, + "step": 3595 + }, + { + "epoch": 0.9456171499967121, + "loss": 0.15452706813812256, + "loss_ce": 0.004380582831799984, + "loss_iou": 0.466796875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 617495528, + "step": 3595 + }, + { + "epoch": 0.9458801867560992, + "grad_norm": 5.492995557036467, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 617667772, + "step": 3596 + }, + { + "epoch": 0.9458801867560992, + "loss": 0.11533143371343613, + "loss_ce": 0.0028283819556236267, + "loss_iou": 0.453125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 617667772, + "step": 3596 + }, + { + "epoch": 0.9461432235154863, + "grad_norm": 3.750755987920789, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 617836324, + "step": 3597 + }, + { + "epoch": 0.9461432235154863, + "loss": 0.07357309758663177, + "loss_ce": 0.0006360823172144592, + "loss_iou": 0.5546875, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 617836324, + "step": 3597 + }, + { + "epoch": 0.9464062602748734, + "grad_norm": 3.634136051792054, + "learning_rate": 5e-06, + "loss": 0.102, + "num_input_tokens_seen": 618006748, + "step": 3598 + }, + { + "epoch": 0.9464062602748734, + "loss": 0.05711160972714424, + "loss_ce": 0.0004099512880202383, + "loss_iou": 0.474609375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 618006748, + "step": 3598 + }, + { + "epoch": 0.9466692970342605, + "grad_norm": 17.367283166631303, + "learning_rate": 5e-06, + "loss": 0.0994, + "num_input_tokens_seen": 618179204, + "step": 3599 + }, + { + "epoch": 0.9466692970342605, + "loss": 0.11188434064388275, + "loss_ce": 0.0004646638117264956, + "loss_iou": 0.53125, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 618179204, + "step": 3599 + }, + { + "epoch": 0.9469323337936476, + "grad_norm": 4.326428384534629, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 618351176, + "step": 3600 + }, + { + "epoch": 0.9469323337936476, + "loss": 0.1435290426015854, + "loss_ce": 0.002629393944516778, + "loss_iou": 0.34375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 618351176, + "step": 3600 + }, + { + "epoch": 0.9471953705530348, + "grad_norm": 19.80317191157718, + "learning_rate": 5e-06, + "loss": 0.0993, + "num_input_tokens_seen": 618523308, + "step": 3601 + }, + { + "epoch": 0.9471953705530348, + "loss": 0.055401187390089035, + "loss_ce": 0.0005763589288108051, + "loss_iou": 0.5, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 618523308, + "step": 3601 + }, + { + "epoch": 0.9474584073124219, + "grad_norm": 7.650182889486073, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 618695336, + "step": 3602 + }, + { + "epoch": 0.9474584073124219, + "loss": 0.15820011496543884, + "loss_ce": 0.0013244987931102514, + "loss_iou": 0.51171875, + "loss_num": 0.03125, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 618695336, + "step": 3602 + }, + { + "epoch": 0.947721444071809, + "grad_norm": 15.018190748040105, + "learning_rate": 5e-06, + "loss": 0.1028, + "num_input_tokens_seen": 618868120, + "step": 3603 + }, + { + "epoch": 0.947721444071809, + "loss": 0.08152879774570465, + "loss_ce": 0.0010081640211865306, + "loss_iou": 0.388671875, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 618868120, + "step": 3603 + }, + { + "epoch": 0.9479844808311961, + "grad_norm": 3.724726783342823, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 619040216, + "step": 3604 + }, + { + "epoch": 0.9479844808311961, + "loss": 0.07213738560676575, + "loss_ce": 0.0031829241197556257, + "loss_iou": 0.3671875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 619040216, + "step": 3604 + }, + { + "epoch": 0.9482475175905832, + "grad_norm": 28.840812127557175, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 619212192, + "step": 3605 + }, + { + "epoch": 0.9482475175905832, + "loss": 0.1626545786857605, + "loss_ce": 0.0024830668698996305, + "loss_iou": 0.482421875, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 619212192, + "step": 3605 + }, + { + "epoch": 0.9485105543499704, + "grad_norm": 5.069934241766405, + "learning_rate": 5e-06, + "loss": 0.1145, + "num_input_tokens_seen": 619382656, + "step": 3606 + }, + { + "epoch": 0.9485105543499704, + "loss": 0.1947801113128662, + "loss_ce": 0.00343489833176136, + "loss_iou": 0.53515625, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 619382656, + "step": 3606 + }, + { + "epoch": 0.9487735911093576, + "grad_norm": 13.189826643263768, + "learning_rate": 5e-06, + "loss": 0.0894, + "num_input_tokens_seen": 619554772, + "step": 3607 + }, + { + "epoch": 0.9487735911093576, + "loss": 0.10128442198038101, + "loss_ce": 0.001995484111830592, + "loss_iou": 0.388671875, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 619554772, + "step": 3607 + }, + { + "epoch": 0.9490366278687447, + "grad_norm": 3.7735949518372953, + "learning_rate": 5e-06, + "loss": 0.0685, + "num_input_tokens_seen": 619727116, + "step": 3608 + }, + { + "epoch": 0.9490366278687447, + "loss": 0.11414018273353577, + "loss_ce": 0.005039841867983341, + "loss_iou": 0.4140625, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 619727116, + "step": 3608 + }, + { + "epoch": 0.9492996646281318, + "grad_norm": 4.250123008721577, + "learning_rate": 5e-06, + "loss": 0.1075, + "num_input_tokens_seen": 619899428, + "step": 3609 + }, + { + "epoch": 0.9492996646281318, + "loss": 0.1226678267121315, + "loss_ce": 0.007280868943780661, + "loss_iou": 0.45703125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 619899428, + "step": 3609 + }, + { + "epoch": 0.9495627013875189, + "grad_norm": 6.863676153540119, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 620071540, + "step": 3610 + }, + { + "epoch": 0.9495627013875189, + "loss": 0.21938937902450562, + "loss_ce": 0.0038132029585540295, + "loss_iou": 0.3984375, + "loss_num": 0.04296875, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 620071540, + "step": 3610 + }, + { + "epoch": 0.9498257381469061, + "grad_norm": 3.813325984971825, + "learning_rate": 5e-06, + "loss": 0.1107, + "num_input_tokens_seen": 620243836, + "step": 3611 + }, + { + "epoch": 0.9498257381469061, + "loss": 0.07250767946243286, + "loss_ce": 0.0018289745785295963, + "loss_iou": 0.33984375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 620243836, + "step": 3611 + }, + { + "epoch": 0.9500887749062932, + "grad_norm": 8.694293187792477, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 620416164, + "step": 3612 + }, + { + "epoch": 0.9500887749062932, + "loss": 0.08137984573841095, + "loss_ce": 0.0004167144070379436, + "loss_iou": 0.421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 620416164, + "step": 3612 + }, + { + "epoch": 0.9503518116656803, + "grad_norm": 4.011759905693181, + "learning_rate": 5e-06, + "loss": 0.0881, + "num_input_tokens_seen": 620588248, + "step": 3613 + }, + { + "epoch": 0.9503518116656803, + "loss": 0.10577777028083801, + "loss_ce": 0.003009829204529524, + "loss_iou": 0.57421875, + "loss_num": 0.0206298828125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 620588248, + "step": 3613 + }, + { + "epoch": 0.9506148484250674, + "grad_norm": 4.8583016740360545, + "learning_rate": 5e-06, + "loss": 0.0856, + "num_input_tokens_seen": 620760212, + "step": 3614 + }, + { + "epoch": 0.9506148484250674, + "loss": 0.07320687174797058, + "loss_ce": 0.00593086751177907, + "loss_iou": 0.4609375, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 620760212, + "step": 3614 + }, + { + "epoch": 0.9508778851844545, + "grad_norm": 7.183426742063696, + "learning_rate": 5e-06, + "loss": 0.092, + "num_input_tokens_seen": 620932584, + "step": 3615 + }, + { + "epoch": 0.9508778851844545, + "loss": 0.12085875123739243, + "loss_ce": 0.004022202454507351, + "loss_iou": 0.3828125, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 620932584, + "step": 3615 + }, + { + "epoch": 0.9511409219438417, + "grad_norm": 5.9763835890459855, + "learning_rate": 5e-06, + "loss": 0.0623, + "num_input_tokens_seen": 621104624, + "step": 3616 + }, + { + "epoch": 0.9511409219438417, + "loss": 0.057411566376686096, + "loss_ce": 0.00405157683417201, + "loss_iou": 0.40625, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 621104624, + "step": 3616 + }, + { + "epoch": 0.9514039587032288, + "grad_norm": 4.148575034087351, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 621276932, + "step": 3617 + }, + { + "epoch": 0.9514039587032288, + "loss": 0.17621394991874695, + "loss_ce": 0.0008599417633377016, + "loss_iou": 0.515625, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 621276932, + "step": 3617 + }, + { + "epoch": 0.9516669954626159, + "grad_norm": 9.831716470581155, + "learning_rate": 5e-06, + "loss": 0.1326, + "num_input_tokens_seen": 621449128, + "step": 3618 + }, + { + "epoch": 0.9516669954626159, + "loss": 0.17716863751411438, + "loss_ce": 0.0019672252237796783, + "loss_iou": 0.5, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 621449128, + "step": 3618 + }, + { + "epoch": 0.951930032222003, + "grad_norm": 6.726193865130754, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 621621260, + "step": 3619 + }, + { + "epoch": 0.951930032222003, + "loss": 0.08456599712371826, + "loss_ce": 0.0014971550554037094, + "loss_iou": 0.4453125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 621621260, + "step": 3619 + }, + { + "epoch": 0.9521930689813901, + "grad_norm": 5.019143488565095, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 621792008, + "step": 3620 + }, + { + "epoch": 0.9521930689813901, + "loss": 0.05885142832994461, + "loss_ce": 0.0001813878770917654, + "loss_iou": 0.59375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 621792008, + "step": 3620 + }, + { + "epoch": 0.9524561057407773, + "grad_norm": 55.831161754212744, + "learning_rate": 5e-06, + "loss": 0.1297, + "num_input_tokens_seen": 621964056, + "step": 3621 + }, + { + "epoch": 0.9524561057407773, + "loss": 0.08644313365221024, + "loss_ce": 0.0015737485373392701, + "loss_iou": 0.419921875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 621964056, + "step": 3621 + }, + { + "epoch": 0.9527191425001644, + "grad_norm": 6.339787905898623, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 622136360, + "step": 3622 + }, + { + "epoch": 0.9527191425001644, + "loss": 0.07180093228816986, + "loss_ce": 0.0007102307863533497, + "loss_iou": 0.423828125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 622136360, + "step": 3622 + }, + { + "epoch": 0.9529821792595515, + "grad_norm": 5.256007258631962, + "learning_rate": 5e-06, + "loss": 0.1175, + "num_input_tokens_seen": 622308160, + "step": 3623 + }, + { + "epoch": 0.9529821792595515, + "loss": 0.16570287942886353, + "loss_ce": 0.0014267577789723873, + "loss_iou": 0.396484375, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 622308160, + "step": 3623 + }, + { + "epoch": 0.9532452160189386, + "grad_norm": 16.742735373121388, + "learning_rate": 5e-06, + "loss": 0.0872, + "num_input_tokens_seen": 622480548, + "step": 3624 + }, + { + "epoch": 0.9532452160189386, + "loss": 0.052828967571258545, + "loss_ce": 0.0007049451814964414, + "loss_iou": 0.5078125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 622480548, + "step": 3624 + }, + { + "epoch": 0.9535082527783257, + "grad_norm": 17.572925497634134, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 622652520, + "step": 3625 + }, + { + "epoch": 0.9535082527783257, + "loss": 0.0615709125995636, + "loss_ce": 0.00045946481986902654, + "loss_iou": 0.48046875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 622652520, + "step": 3625 + }, + { + "epoch": 0.9537712895377128, + "grad_norm": 6.0047852962098975, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 622823148, + "step": 3626 + }, + { + "epoch": 0.9537712895377128, + "loss": 0.13855966925621033, + "loss_ce": 0.0012610815465450287, + "loss_iou": 0.43359375, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 622823148, + "step": 3626 + }, + { + "epoch": 0.9540343262971, + "grad_norm": 7.792667970397866, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 622995452, + "step": 3627 + }, + { + "epoch": 0.9540343262971, + "loss": 0.13608847558498383, + "loss_ce": 0.001505955122411251, + "loss_iou": 0.48828125, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 622995452, + "step": 3627 + }, + { + "epoch": 0.9542973630564872, + "grad_norm": 4.565665131118125, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 623167560, + "step": 3628 + }, + { + "epoch": 0.9542973630564872, + "loss": 0.11297139525413513, + "loss_ce": 0.0007735221879556775, + "loss_iou": 0.490234375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 623167560, + "step": 3628 + }, + { + "epoch": 0.9545603998158743, + "grad_norm": 5.57299165717365, + "learning_rate": 5e-06, + "loss": 0.1077, + "num_input_tokens_seen": 623339872, + "step": 3629 + }, + { + "epoch": 0.9545603998158743, + "loss": 0.09495042264461517, + "loss_ce": 0.002497426699846983, + "loss_iou": 0.365234375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 623339872, + "step": 3629 + }, + { + "epoch": 0.9548234365752614, + "grad_norm": 21.302937351842843, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 623512180, + "step": 3630 + }, + { + "epoch": 0.9548234365752614, + "loss": 0.11213900148868561, + "loss_ce": 0.0011923413258045912, + "loss_iou": 0.50390625, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 623512180, + "step": 3630 + }, + { + "epoch": 0.9550864733346485, + "grad_norm": 12.098614631817124, + "learning_rate": 5e-06, + "loss": 0.1408, + "num_input_tokens_seen": 623680972, + "step": 3631 + }, + { + "epoch": 0.9550864733346485, + "loss": 0.21909213066101074, + "loss_ce": 0.0015017889672890306, + "loss_iou": 0.453125, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 623680972, + "step": 3631 + }, + { + "epoch": 0.9553495100940357, + "grad_norm": 9.034625023288243, + "learning_rate": 5e-06, + "loss": 0.1064, + "num_input_tokens_seen": 623851276, + "step": 3632 + }, + { + "epoch": 0.9553495100940357, + "loss": 0.0687929093837738, + "loss_ce": 0.0007081945077516139, + "loss_iou": 0.5859375, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 623851276, + "step": 3632 + }, + { + "epoch": 0.9556125468534228, + "grad_norm": 4.18724157975137, + "learning_rate": 5e-06, + "loss": 0.1245, + "num_input_tokens_seen": 624023500, + "step": 3633 + }, + { + "epoch": 0.9556125468534228, + "loss": 0.12112436443567276, + "loss_ce": 0.0027008973993360996, + "loss_iou": 0.4140625, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 624023500, + "step": 3633 + }, + { + "epoch": 0.9558755836128099, + "grad_norm": 10.76826846477555, + "learning_rate": 5e-06, + "loss": 0.1289, + "num_input_tokens_seen": 624195576, + "step": 3634 + }, + { + "epoch": 0.9558755836128099, + "loss": 0.11673011630773544, + "loss_ce": 0.004501717630773783, + "loss_iou": 0.47265625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 624195576, + "step": 3634 + }, + { + "epoch": 0.956138620372197, + "grad_norm": 3.420122712086701, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 624366376, + "step": 3635 + }, + { + "epoch": 0.956138620372197, + "loss": 0.13994066417217255, + "loss_ce": 0.0026115677319467068, + "loss_iou": 0.3125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 624366376, + "step": 3635 + }, + { + "epoch": 0.9564016571315841, + "grad_norm": 3.188466600397663, + "learning_rate": 5e-06, + "loss": 0.1096, + "num_input_tokens_seen": 624538928, + "step": 3636 + }, + { + "epoch": 0.9564016571315841, + "loss": 0.19849437475204468, + "loss_ce": 0.0006184080266393721, + "loss_iou": 0.443359375, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 624538928, + "step": 3636 + }, + { + "epoch": 0.9566646938909713, + "grad_norm": 4.145251385205127, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 624711036, + "step": 3637 + }, + { + "epoch": 0.9566646938909713, + "loss": 0.07516495883464813, + "loss_ce": 0.0005189694347791374, + "loss_iou": 0.51953125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 624711036, + "step": 3637 + }, + { + "epoch": 0.9569277306503584, + "grad_norm": 7.0181560986956395, + "learning_rate": 5e-06, + "loss": 0.1178, + "num_input_tokens_seen": 624883520, + "step": 3638 + }, + { + "epoch": 0.9569277306503584, + "loss": 0.06267523765563965, + "loss_ce": 0.0029981140978634357, + "loss_iou": 0.60546875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 624883520, + "step": 3638 + }, + { + "epoch": 0.9571907674097455, + "grad_norm": 3.9660695052723813, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 625055640, + "step": 3639 + }, + { + "epoch": 0.9571907674097455, + "loss": 0.1359872817993164, + "loss_ce": 0.00042819694499485195, + "loss_iou": 0.5546875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 625055640, + "step": 3639 + }, + { + "epoch": 0.9574538041691326, + "grad_norm": 17.732753202882193, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 625227732, + "step": 3640 + }, + { + "epoch": 0.9574538041691326, + "loss": 0.09507328271865845, + "loss_ce": 0.0008502600830979645, + "loss_iou": 0.458984375, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 625227732, + "step": 3640 + }, + { + "epoch": 0.9577168409285197, + "grad_norm": 3.4840402032914257, + "learning_rate": 5e-06, + "loss": 0.0918, + "num_input_tokens_seen": 625398028, + "step": 3641 + }, + { + "epoch": 0.9577168409285197, + "loss": 0.13700971007347107, + "loss_ce": 0.0006876978441141546, + "loss_iou": 0.408203125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 625398028, + "step": 3641 + }, + { + "epoch": 0.9579798776879069, + "grad_norm": 7.413398524469702, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 625570244, + "step": 3642 + }, + { + "epoch": 0.9579798776879069, + "loss": 0.12876945734024048, + "loss_ce": 0.00047354548587463796, + "loss_iou": 0.5234375, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 625570244, + "step": 3642 + }, + { + "epoch": 0.958242914447294, + "grad_norm": 5.336702710138045, + "learning_rate": 5e-06, + "loss": 0.0898, + "num_input_tokens_seen": 625742464, + "step": 3643 + }, + { + "epoch": 0.958242914447294, + "loss": 0.05379210785031319, + "loss_ce": 0.00020323891658335924, + "loss_iou": 0.44140625, + "loss_num": 0.01068115234375, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 625742464, + "step": 3643 + }, + { + "epoch": 0.9585059512066811, + "grad_norm": 23.972493383254932, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 625914372, + "step": 3644 + }, + { + "epoch": 0.9585059512066811, + "loss": 0.08553168922662735, + "loss_ce": 0.0017609409987926483, + "loss_iou": 0.45703125, + "loss_num": 0.0167236328125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 625914372, + "step": 3644 + }, + { + "epoch": 0.9587689879660682, + "grad_norm": 10.340048159606546, + "learning_rate": 5e-06, + "loss": 0.1191, + "num_input_tokens_seen": 626086592, + "step": 3645 + }, + { + "epoch": 0.9587689879660682, + "loss": 0.234289288520813, + "loss_ce": 0.00026523511041887105, + "loss_iou": 0.404296875, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 626086592, + "step": 3645 + }, + { + "epoch": 0.9590320247254553, + "grad_norm": 9.300486446962857, + "learning_rate": 5e-06, + "loss": 0.094, + "num_input_tokens_seen": 626257416, + "step": 3646 + }, + { + "epoch": 0.9590320247254553, + "loss": 0.045688219368457794, + "loss_ce": 0.0002475440560374409, + "loss_iou": 0.3125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 626257416, + "step": 3646 + }, + { + "epoch": 0.9592950614848426, + "grad_norm": 11.589258821015147, + "learning_rate": 5e-06, + "loss": 0.1346, + "num_input_tokens_seen": 626427708, + "step": 3647 + }, + { + "epoch": 0.9592950614848426, + "loss": 0.10979291796684265, + "loss_ce": 0.0016691365744918585, + "loss_iou": 0.33984375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 626427708, + "step": 3647 + }, + { + "epoch": 0.9595580982442297, + "grad_norm": 3.2081047701574943, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 626599988, + "step": 3648 + }, + { + "epoch": 0.9595580982442297, + "loss": 0.09939119219779968, + "loss_ce": 0.0016281325370073318, + "loss_iou": 0.5546875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 626599988, + "step": 3648 + }, + { + "epoch": 0.9598211350036168, + "grad_norm": 4.783540700409292, + "learning_rate": 5e-06, + "loss": 0.1455, + "num_input_tokens_seen": 626772316, + "step": 3649 + }, + { + "epoch": 0.9598211350036168, + "loss": 0.09440785646438599, + "loss_ce": 0.0029161556158214808, + "loss_iou": 0.4296875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 626772316, + "step": 3649 + }, + { + "epoch": 0.9600841717630039, + "grad_norm": 5.125730804994831, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 626942620, + "step": 3650 + }, + { + "epoch": 0.9600841717630039, + "loss": 0.06443943828344345, + "loss_ce": 0.0007797717116773129, + "loss_iou": 0.482421875, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 626942620, + "step": 3650 + }, + { + "epoch": 0.960347208522391, + "grad_norm": 4.565795888601258, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 627114816, + "step": 3651 + }, + { + "epoch": 0.960347208522391, + "loss": 0.12303532660007477, + "loss_ce": 0.0003851845976896584, + "loss_iou": 0.453125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 627114816, + "step": 3651 + }, + { + "epoch": 0.9606102452817781, + "grad_norm": 8.198006738555572, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 627286832, + "step": 3652 + }, + { + "epoch": 0.9606102452817781, + "loss": 0.12663108110427856, + "loss_ce": 0.0022109271958470345, + "loss_iou": 0.6875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 627286832, + "step": 3652 + }, + { + "epoch": 0.9608732820411653, + "grad_norm": 6.977304479694191, + "learning_rate": 5e-06, + "loss": 0.141, + "num_input_tokens_seen": 627458840, + "step": 3653 + }, + { + "epoch": 0.9608732820411653, + "loss": 0.08323599398136139, + "loss_ce": 0.003646154422312975, + "loss_iou": 0.4921875, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 627458840, + "step": 3653 + }, + { + "epoch": 0.9611363188005524, + "grad_norm": 6.143584577276683, + "learning_rate": 5e-06, + "loss": 0.127, + "num_input_tokens_seen": 627631048, + "step": 3654 + }, + { + "epoch": 0.9611363188005524, + "loss": 0.08655130863189697, + "loss_ce": 0.0005985412281006575, + "loss_iou": 0.44921875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 627631048, + "step": 3654 + }, + { + "epoch": 0.9613993555599395, + "grad_norm": 4.962748401603593, + "learning_rate": 5e-06, + "loss": 0.0843, + "num_input_tokens_seen": 627801572, + "step": 3655 + }, + { + "epoch": 0.9613993555599395, + "loss": 0.06723140180110931, + "loss_ce": 0.0012218892807140946, + "loss_iou": 0.232421875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 627801572, + "step": 3655 + }, + { + "epoch": 0.9616623923193266, + "grad_norm": 3.888233294845763, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 627973444, + "step": 3656 + }, + { + "epoch": 0.9616623923193266, + "loss": 0.19316411018371582, + "loss_ce": 0.0060761114582419395, + "loss_iou": 0.4765625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 627973444, + "step": 3656 + }, + { + "epoch": 0.9619254290787137, + "grad_norm": 12.231564786844833, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 628145692, + "step": 3657 + }, + { + "epoch": 0.9619254290787137, + "loss": 0.08886748552322388, + "loss_ce": 0.004608447663486004, + "loss_iou": 0.37109375, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 628145692, + "step": 3657 + }, + { + "epoch": 0.9621884658381009, + "grad_norm": 6.681516105502576, + "learning_rate": 5e-06, + "loss": 0.1222, + "num_input_tokens_seen": 628317968, + "step": 3658 + }, + { + "epoch": 0.9621884658381009, + "loss": 0.11374935507774353, + "loss_ce": 0.00020870784646831453, + "loss_iou": 0.61328125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 628317968, + "step": 3658 + }, + { + "epoch": 0.962451502597488, + "grad_norm": 9.221687537639268, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 628489248, + "step": 3659 + }, + { + "epoch": 0.962451502597488, + "loss": 0.07078136503696442, + "loss_ce": 0.000972404726780951, + "loss_iou": 0.41015625, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 628489248, + "step": 3659 + }, + { + "epoch": 0.9627145393568751, + "grad_norm": 3.888238330257217, + "learning_rate": 5e-06, + "loss": 0.1194, + "num_input_tokens_seen": 628661488, + "step": 3660 + }, + { + "epoch": 0.9627145393568751, + "loss": 0.148333340883255, + "loss_ce": 0.001513272407464683, + "loss_iou": 0.361328125, + "loss_num": 0.0294189453125, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 628661488, + "step": 3660 + }, + { + "epoch": 0.9629775761162622, + "grad_norm": 3.548818147424885, + "learning_rate": 5e-06, + "loss": 0.1097, + "num_input_tokens_seen": 628833384, + "step": 3661 + }, + { + "epoch": 0.9629775761162622, + "loss": 0.07117056846618652, + "loss_ce": 0.0004918586346320808, + "loss_iou": 0.455078125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 628833384, + "step": 3661 + }, + { + "epoch": 0.9632406128756493, + "grad_norm": 3.779136602687443, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 629005652, + "step": 3662 + }, + { + "epoch": 0.9632406128756493, + "loss": 0.06165676563978195, + "loss_ce": 0.0007131616584956646, + "loss_iou": 0.62109375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 629005652, + "step": 3662 + }, + { + "epoch": 0.9635036496350365, + "grad_norm": 5.7914523260663495, + "learning_rate": 5e-06, + "loss": 0.1238, + "num_input_tokens_seen": 629177884, + "step": 3663 + }, + { + "epoch": 0.9635036496350365, + "loss": 0.09137916564941406, + "loss_ce": 0.00181007559876889, + "loss_iou": 0.46484375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 629177884, + "step": 3663 + }, + { + "epoch": 0.9637666863944236, + "grad_norm": 3.071560976269854, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 629350348, + "step": 3664 + }, + { + "epoch": 0.9637666863944236, + "loss": 0.08777488768100739, + "loss_ce": 0.0004946062108501792, + "loss_iou": 0.455078125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 629350348, + "step": 3664 + }, + { + "epoch": 0.9640297231538107, + "grad_norm": 4.779549366101704, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 629522792, + "step": 3665 + }, + { + "epoch": 0.9640297231538107, + "loss": 0.1326950043439865, + "loss_ce": 0.0025680402759462595, + "loss_iou": 0.5234375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 629522792, + "step": 3665 + }, + { + "epoch": 0.9642927599131978, + "grad_norm": 4.346102092926262, + "learning_rate": 5e-06, + "loss": 0.1112, + "num_input_tokens_seen": 629694856, + "step": 3666 + }, + { + "epoch": 0.9642927599131978, + "loss": 0.11643362045288086, + "loss_ce": 0.0008330341661348939, + "loss_iou": 0.6328125, + "loss_num": 0.0230712890625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 629694856, + "step": 3666 + }, + { + "epoch": 0.964555796672585, + "grad_norm": 5.7406338456333845, + "learning_rate": 5e-06, + "loss": 0.1044, + "num_input_tokens_seen": 629866908, + "step": 3667 + }, + { + "epoch": 0.964555796672585, + "loss": 0.13996919989585876, + "loss_ce": 0.0034030412789434195, + "loss_iou": 0.37890625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 629866908, + "step": 3667 + }, + { + "epoch": 0.9648188334319722, + "grad_norm": 7.17686270310257, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 630039516, + "step": 3668 + }, + { + "epoch": 0.9648188334319722, + "loss": 0.12104253470897675, + "loss_ce": 0.00020818831399083138, + "loss_iou": 0.49609375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 630039516, + "step": 3668 + }, + { + "epoch": 0.9650818701913593, + "grad_norm": 4.2613778122636035, + "learning_rate": 5e-06, + "loss": 0.0821, + "num_input_tokens_seen": 630211624, + "step": 3669 + }, + { + "epoch": 0.9650818701913593, + "loss": 0.062059760093688965, + "loss_ce": 0.0022910854313522577, + "loss_iou": 0.34375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 630211624, + "step": 3669 + }, + { + "epoch": 0.9653449069507464, + "grad_norm": 3.5238324652450235, + "learning_rate": 5e-06, + "loss": 0.1352, + "num_input_tokens_seen": 630382332, + "step": 3670 + }, + { + "epoch": 0.9653449069507464, + "loss": 0.15831097960472107, + "loss_ce": 0.00016888529353309423, + "loss_iou": 0.453125, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 630382332, + "step": 3670 + }, + { + "epoch": 0.9656079437101335, + "grad_norm": 3.564039297129955, + "learning_rate": 5e-06, + "loss": 0.0998, + "num_input_tokens_seen": 630552592, + "step": 3671 + }, + { + "epoch": 0.9656079437101335, + "loss": 0.08011811971664429, + "loss_ce": 0.0016269085463136435, + "loss_iou": 0.470703125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 630552592, + "step": 3671 + }, + { + "epoch": 0.9658709804695206, + "grad_norm": 9.76288401818362, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 630722244, + "step": 3672 + }, + { + "epoch": 0.9658709804695206, + "loss": 0.08490733802318573, + "loss_ce": 0.0020673726685345173, + "loss_iou": 0.53125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 630722244, + "step": 3672 + }, + { + "epoch": 0.9661340172289078, + "grad_norm": 7.354300413013905, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 630894380, + "step": 3673 + }, + { + "epoch": 0.9661340172289078, + "loss": 0.13285204768180847, + "loss_ce": 0.0009245476103387773, + "loss_iou": 0.54296875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 630894380, + "step": 3673 + }, + { + "epoch": 0.9663970539882949, + "grad_norm": 5.188368697186112, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 631066652, + "step": 3674 + }, + { + "epoch": 0.9663970539882949, + "loss": 0.09987783432006836, + "loss_ce": 0.003976346459239721, + "loss_iou": 0.5546875, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 631066652, + "step": 3674 + }, + { + "epoch": 0.966660090747682, + "grad_norm": 12.325083640976278, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 631237000, + "step": 3675 + }, + { + "epoch": 0.966660090747682, + "loss": 0.17316505312919617, + "loss_ce": 0.002648080699145794, + "loss_iou": 0.404296875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 631237000, + "step": 3675 + }, + { + "epoch": 0.9669231275070691, + "grad_norm": 6.105729054233263, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 631409164, + "step": 3676 + }, + { + "epoch": 0.9669231275070691, + "loss": 0.09672191739082336, + "loss_ce": 0.0017207016935572028, + "loss_iou": 0.466796875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 631409164, + "step": 3676 + }, + { + "epoch": 0.9671861642664562, + "grad_norm": 4.682056592281961, + "learning_rate": 5e-06, + "loss": 0.1134, + "num_input_tokens_seen": 631581116, + "step": 3677 + }, + { + "epoch": 0.9671861642664562, + "loss": 0.12396599352359772, + "loss_ce": 0.0040776850655674934, + "loss_iou": 0.33984375, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 631581116, + "step": 3677 + }, + { + "epoch": 0.9674492010258433, + "grad_norm": 8.33340254914201, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 631750180, + "step": 3678 + }, + { + "epoch": 0.9674492010258433, + "loss": 0.09776239842176437, + "loss_ce": 0.0013878863537684083, + "loss_iou": 0.59375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 631750180, + "step": 3678 + }, + { + "epoch": 0.9677122377852305, + "grad_norm": 3.892518620403468, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 631922528, + "step": 3679 + }, + { + "epoch": 0.9677122377852305, + "loss": 0.10747776180505753, + "loss_ce": 0.0042520565912127495, + "loss_iou": 0.498046875, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 631922528, + "step": 3679 + }, + { + "epoch": 0.9679752745446176, + "grad_norm": 7.586861538526353, + "learning_rate": 5e-06, + "loss": 0.1193, + "num_input_tokens_seen": 632094768, + "step": 3680 + }, + { + "epoch": 0.9679752745446176, + "loss": 0.08680423349142075, + "loss_ce": 0.001721223583444953, + "loss_iou": 0.55078125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 632094768, + "step": 3680 + }, + { + "epoch": 0.9682383113040047, + "grad_norm": 4.2210475168735355, + "learning_rate": 5e-06, + "loss": 0.0757, + "num_input_tokens_seen": 632266740, + "step": 3681 + }, + { + "epoch": 0.9682383113040047, + "loss": 0.05067894607782364, + "loss_ce": 0.0006911527598276734, + "loss_iou": 0.5546875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 632266740, + "step": 3681 + }, + { + "epoch": 0.9685013480633918, + "grad_norm": 3.8121173575745755, + "learning_rate": 5e-06, + "loss": 0.1237, + "num_input_tokens_seen": 632439188, + "step": 3682 + }, + { + "epoch": 0.9685013480633918, + "loss": 0.11126217991113663, + "loss_ce": 0.00033078622072935104, + "loss_iou": 0.60546875, + "loss_num": 0.022216796875, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 632439188, + "step": 3682 + }, + { + "epoch": 0.9687643848227789, + "grad_norm": 5.838321531180536, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 632611332, + "step": 3683 + }, + { + "epoch": 0.9687643848227789, + "loss": 0.08182443678379059, + "loss_ce": 0.0006171565037220716, + "loss_iou": 0.57421875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 632611332, + "step": 3683 + }, + { + "epoch": 0.9690274215821661, + "grad_norm": 4.164644957819613, + "learning_rate": 5e-06, + "loss": 0.086, + "num_input_tokens_seen": 632783660, + "step": 3684 + }, + { + "epoch": 0.9690274215821661, + "loss": 0.12429676204919815, + "loss_ce": 0.0015245481627061963, + "loss_iou": 0.609375, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 632783660, + "step": 3684 + }, + { + "epoch": 0.9692904583415533, + "grad_norm": 26.321407570319618, + "learning_rate": 5e-06, + "loss": 0.1137, + "num_input_tokens_seen": 632955748, + "step": 3685 + }, + { + "epoch": 0.9692904583415533, + "loss": 0.05816134810447693, + "loss_ce": 0.0002847612486220896, + "loss_iou": 0.427734375, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 632955748, + "step": 3685 + }, + { + "epoch": 0.9695534951009404, + "grad_norm": 3.994252764794672, + "learning_rate": 5e-06, + "loss": 0.0997, + "num_input_tokens_seen": 633126124, + "step": 3686 + }, + { + "epoch": 0.9695534951009404, + "loss": 0.10153771191835403, + "loss_ce": 0.00023461380624212325, + "loss_iou": 0.51171875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 633126124, + "step": 3686 + }, + { + "epoch": 0.9698165318603275, + "grad_norm": 7.846355332992627, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 633298344, + "step": 3687 + }, + { + "epoch": 0.9698165318603275, + "loss": 0.15658894181251526, + "loss_ce": 0.001040847273543477, + "loss_iou": 0.478515625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 633298344, + "step": 3687 + }, + { + "epoch": 0.9700795686197146, + "grad_norm": 6.085055991191323, + "learning_rate": 5e-06, + "loss": 0.0813, + "num_input_tokens_seen": 633470408, + "step": 3688 + }, + { + "epoch": 0.9700795686197146, + "loss": 0.14665310084819794, + "loss_ce": 0.0056618861854076385, + "loss_iou": 0.49609375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 633470408, + "step": 3688 + }, + { + "epoch": 0.9703426053791018, + "grad_norm": 4.551457844454551, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 633642716, + "step": 3689 + }, + { + "epoch": 0.9703426053791018, + "loss": 0.0793912261724472, + "loss_ce": 0.0006558679160661995, + "loss_iou": 0.5234375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 633642716, + "step": 3689 + }, + { + "epoch": 0.9706056421384889, + "grad_norm": 5.945079605333328, + "learning_rate": 5e-06, + "loss": 0.1123, + "num_input_tokens_seen": 633812440, + "step": 3690 + }, + { + "epoch": 0.9706056421384889, + "loss": 0.08010639995336533, + "loss_ce": 0.0009438038687221706, + "loss_iou": 0.498046875, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 633812440, + "step": 3690 + }, + { + "epoch": 0.970868678897876, + "grad_norm": 4.065153864479657, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 633985000, + "step": 3691 + }, + { + "epoch": 0.970868678897876, + "loss": 0.0722232460975647, + "loss_ce": 0.0003238367207814008, + "loss_iou": 0.4609375, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 633985000, + "step": 3691 + }, + { + "epoch": 0.9711317156572631, + "grad_norm": 4.197621752026495, + "learning_rate": 5e-06, + "loss": 0.0911, + "num_input_tokens_seen": 634157200, + "step": 3692 + }, + { + "epoch": 0.9711317156572631, + "loss": 0.09805089235305786, + "loss_ce": 0.003385363146662712, + "loss_iou": 0.51953125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 634157200, + "step": 3692 + }, + { + "epoch": 0.9713947524166502, + "grad_norm": 4.118683496322007, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 634329136, + "step": 3693 + }, + { + "epoch": 0.9713947524166502, + "loss": 0.06334918737411499, + "loss_ce": 0.0014290215913206339, + "loss_iou": 0.52734375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 634329136, + "step": 3693 + }, + { + "epoch": 0.9716577891760374, + "grad_norm": 5.569436401286201, + "learning_rate": 5e-06, + "loss": 0.0858, + "num_input_tokens_seen": 634501660, + "step": 3694 + }, + { + "epoch": 0.9716577891760374, + "loss": 0.056709855794906616, + "loss_ce": 0.0002981142024509609, + "loss_iou": 0.447265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 634501660, + "step": 3694 + }, + { + "epoch": 0.9719208259354245, + "grad_norm": 15.633714461557659, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 634673676, + "step": 3695 + }, + { + "epoch": 0.9719208259354245, + "loss": 0.16554518043994904, + "loss_ce": 0.0010554337641224265, + "loss_iou": 0.416015625, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 634673676, + "step": 3695 + }, + { + "epoch": 0.9721838626948116, + "grad_norm": 26.512182592600844, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 634845964, + "step": 3696 + }, + { + "epoch": 0.9721838626948116, + "loss": 0.08471856266260147, + "loss_ce": 0.0012224669335409999, + "loss_iou": 0.43359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 634845964, + "step": 3696 + }, + { + "epoch": 0.9724468994541987, + "grad_norm": 6.8173523395664715, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 635018328, + "step": 3697 + }, + { + "epoch": 0.9724468994541987, + "loss": 0.08134950697422028, + "loss_ce": 0.0013324212050065398, + "loss_iou": 0.5859375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 635018328, + "step": 3697 + }, + { + "epoch": 0.9727099362135858, + "grad_norm": 4.383461318684907, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 635190608, + "step": 3698 + }, + { + "epoch": 0.9727099362135858, + "loss": 0.08322876691818237, + "loss_ce": 0.0011364765232428908, + "loss_iou": 0.6171875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 635190608, + "step": 3698 + }, + { + "epoch": 0.972972972972973, + "grad_norm": 8.267610666760818, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 635362588, + "step": 3699 + }, + { + "epoch": 0.972972972972973, + "loss": 0.11783481389284134, + "loss_ce": 0.0006015403778292239, + "loss_iou": 0.486328125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 635362588, + "step": 3699 + }, + { + "epoch": 0.9732360097323601, + "grad_norm": 4.332996360892621, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 635534740, + "step": 3700 + }, + { + "epoch": 0.9732360097323601, + "loss": 0.1074044331908226, + "loss_ce": 0.00025721488054841757, + "loss_iou": 0.51953125, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 635534740, + "step": 3700 + }, + { + "epoch": 0.9734990464917472, + "grad_norm": 5.513199468514905, + "learning_rate": 5e-06, + "loss": 0.1466, + "num_input_tokens_seen": 635707288, + "step": 3701 + }, + { + "epoch": 0.9734990464917472, + "loss": 0.09230555593967438, + "loss_ce": 0.0021413678769022226, + "loss_iou": 0.56640625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 635707288, + "step": 3701 + }, + { + "epoch": 0.9737620832511343, + "grad_norm": 3.768479254290149, + "learning_rate": 5e-06, + "loss": 0.0828, + "num_input_tokens_seen": 635879648, + "step": 3702 + }, + { + "epoch": 0.9737620832511343, + "loss": 0.03509838879108429, + "loss_ce": 0.000766111770644784, + "loss_iou": 0.484375, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 635879648, + "step": 3702 + }, + { + "epoch": 0.9740251200105214, + "grad_norm": 3.566831404475063, + "learning_rate": 5e-06, + "loss": 0.0964, + "num_input_tokens_seen": 636051432, + "step": 3703 + }, + { + "epoch": 0.9740251200105214, + "loss": 0.09813763201236725, + "loss_ce": 0.001030704821459949, + "loss_iou": 0.45703125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 636051432, + "step": 3703 + }, + { + "epoch": 0.9742881567699085, + "grad_norm": 3.4711728772501447, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 636223588, + "step": 3704 + }, + { + "epoch": 0.9742881567699085, + "loss": 0.09130216389894485, + "loss_ce": 0.0020840244833379984, + "loss_iou": 0.25390625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 636223588, + "step": 3704 + }, + { + "epoch": 0.9745511935292958, + "grad_norm": 10.080851555182159, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 636395540, + "step": 3705 + }, + { + "epoch": 0.9745511935292958, + "loss": 0.07607042789459229, + "loss_ce": 0.0012871015351265669, + "loss_iou": 0.2353515625, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 636395540, + "step": 3705 + }, + { + "epoch": 0.9748142302886829, + "grad_norm": 3.5401703294732814, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 636567872, + "step": 3706 + }, + { + "epoch": 0.9748142302886829, + "loss": 0.1415339708328247, + "loss_ce": 0.0009700124501250684, + "loss_iou": 0.44140625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 636567872, + "step": 3706 + }, + { + "epoch": 0.97507726704807, + "grad_norm": 4.65430606928134, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 636739796, + "step": 3707 + }, + { + "epoch": 0.97507726704807, + "loss": 0.06319437175989151, + "loss_ce": 0.0008469584863632917, + "loss_iou": 0.341796875, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 636739796, + "step": 3707 + }, + { + "epoch": 0.9753403038074571, + "grad_norm": 12.08107208422745, + "learning_rate": 5e-06, + "loss": 0.0954, + "num_input_tokens_seen": 636911900, + "step": 3708 + }, + { + "epoch": 0.9753403038074571, + "loss": 0.09893044084310532, + "loss_ce": 0.004631124436855316, + "loss_iou": 0.48828125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 636911900, + "step": 3708 + }, + { + "epoch": 0.9756033405668442, + "grad_norm": 4.1664851424909894, + "learning_rate": 5e-06, + "loss": 0.1173, + "num_input_tokens_seen": 637083804, + "step": 3709 + }, + { + "epoch": 0.9756033405668442, + "loss": 0.14125725626945496, + "loss_ce": 0.0035619523841887712, + "loss_iou": 0.408203125, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 637083804, + "step": 3709 + }, + { + "epoch": 0.9758663773262314, + "grad_norm": 4.483302931910663, + "learning_rate": 5e-06, + "loss": 0.0909, + "num_input_tokens_seen": 637253480, + "step": 3710 + }, + { + "epoch": 0.9758663773262314, + "loss": 0.09771590679883957, + "loss_ce": 0.0010667359456419945, + "loss_iou": 0.498046875, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 637253480, + "step": 3710 + }, + { + "epoch": 0.9761294140856185, + "grad_norm": 4.570273140954518, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 637425832, + "step": 3711 + }, + { + "epoch": 0.9761294140856185, + "loss": 0.15927918255329132, + "loss_ce": 0.0016558904899284244, + "loss_iou": 0.3671875, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 637425832, + "step": 3711 + }, + { + "epoch": 0.9763924508450056, + "grad_norm": 4.261644785229519, + "learning_rate": 5e-06, + "loss": 0.1004, + "num_input_tokens_seen": 637597956, + "step": 3712 + }, + { + "epoch": 0.9763924508450056, + "loss": 0.09720858931541443, + "loss_ce": 0.005060770083218813, + "loss_iou": 0.400390625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 637597956, + "step": 3712 + }, + { + "epoch": 0.9766554876043927, + "grad_norm": 4.316121186379811, + "learning_rate": 5e-06, + "loss": 0.1502, + "num_input_tokens_seen": 637770516, + "step": 3713 + }, + { + "epoch": 0.9766554876043927, + "loss": 0.23752275109291077, + "loss_ce": 0.003071451559662819, + "loss_iou": 0.33984375, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 637770516, + "step": 3713 + }, + { + "epoch": 0.9769185243637798, + "grad_norm": 3.9858618973194195, + "learning_rate": 5e-06, + "loss": 0.0865, + "num_input_tokens_seen": 637940932, + "step": 3714 + }, + { + "epoch": 0.9769185243637798, + "loss": 0.09894842654466629, + "loss_ce": 0.0026502024848014116, + "loss_iou": 0.3984375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 637940932, + "step": 3714 + }, + { + "epoch": 0.977181561123167, + "grad_norm": 6.459702059191179, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 638112972, + "step": 3715 + }, + { + "epoch": 0.977181561123167, + "loss": 0.05996452271938324, + "loss_ce": 0.0013707715552300215, + "loss_iou": 0.54296875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 638112972, + "step": 3715 + }, + { + "epoch": 0.9774445978825541, + "grad_norm": 4.702404331881163, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 638283296, + "step": 3716 + }, + { + "epoch": 0.9774445978825541, + "loss": 0.09870034456253052, + "loss_ce": 0.0003421921283006668, + "loss_iou": 0.3671875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 638283296, + "step": 3716 + }, + { + "epoch": 0.9777076346419412, + "grad_norm": 6.311133724359332, + "learning_rate": 5e-06, + "loss": 0.1405, + "num_input_tokens_seen": 638455388, + "step": 3717 + }, + { + "epoch": 0.9777076346419412, + "loss": 0.10849727690219879, + "loss_ce": 0.001868859282694757, + "loss_iou": 0.39453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 638455388, + "step": 3717 + }, + { + "epoch": 0.9779706714013283, + "grad_norm": 4.841088605766017, + "learning_rate": 5e-06, + "loss": 0.0692, + "num_input_tokens_seen": 638627620, + "step": 3718 + }, + { + "epoch": 0.9779706714013283, + "loss": 0.0839821919798851, + "loss_ce": 0.0003640282666310668, + "loss_iou": 0.5625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 638627620, + "step": 3718 + }, + { + "epoch": 0.9782337081607154, + "grad_norm": 4.2847622894404696, + "learning_rate": 5e-06, + "loss": 0.1385, + "num_input_tokens_seen": 638799776, + "step": 3719 + }, + { + "epoch": 0.9782337081607154, + "loss": 0.18468543887138367, + "loss_ce": 0.0003745291323866695, + "loss_iou": 0.4453125, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 638799776, + "step": 3719 + }, + { + "epoch": 0.9784967449201026, + "grad_norm": 4.11039448761818, + "learning_rate": 5e-06, + "loss": 0.0832, + "num_input_tokens_seen": 638971952, + "step": 3720 + }, + { + "epoch": 0.9784967449201026, + "loss": 0.1171189695596695, + "loss_ce": 0.002739093266427517, + "loss_iou": 0.41796875, + "loss_num": 0.02294921875, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 638971952, + "step": 3720 + }, + { + "epoch": 0.9787597816794897, + "grad_norm": 6.78823750558351, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 639144380, + "step": 3721 + }, + { + "epoch": 0.9787597816794897, + "loss": 0.10253561288118362, + "loss_ce": 0.0012172528076916933, + "loss_iou": 0.5078125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 639144380, + "step": 3721 + }, + { + "epoch": 0.9790228184388768, + "grad_norm": 3.963795445916244, + "learning_rate": 5e-06, + "loss": 0.0748, + "num_input_tokens_seen": 639316848, + "step": 3722 + }, + { + "epoch": 0.9790228184388768, + "loss": 0.13296961784362793, + "loss_ce": 0.0031783583108335733, + "loss_iou": 0.453125, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 639316848, + "step": 3722 + }, + { + "epoch": 0.9792858551982639, + "grad_norm": 11.532632559627872, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 639489024, + "step": 3723 + }, + { + "epoch": 0.9792858551982639, + "loss": 0.12053656578063965, + "loss_ce": 0.005637889727950096, + "loss_iou": 0.3828125, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 639489024, + "step": 3723 + }, + { + "epoch": 0.979548891957651, + "grad_norm": 9.989053455854867, + "learning_rate": 5e-06, + "loss": 0.0825, + "num_input_tokens_seen": 639661440, + "step": 3724 + }, + { + "epoch": 0.979548891957651, + "loss": 0.13625101745128632, + "loss_ce": 0.00029520769021473825, + "loss_iou": 0.5078125, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 639661440, + "step": 3724 + }, + { + "epoch": 0.9798119287170383, + "grad_norm": 4.9459595738196285, + "learning_rate": 5e-06, + "loss": 0.1161, + "num_input_tokens_seen": 639833676, + "step": 3725 + }, + { + "epoch": 0.9798119287170383, + "loss": 0.1375160813331604, + "loss_ce": 0.0008125934982672334, + "loss_iou": 0.392578125, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 639833676, + "step": 3725 + }, + { + "epoch": 0.9800749654764254, + "grad_norm": 4.566071751318605, + "learning_rate": 5e-06, + "loss": 0.1086, + "num_input_tokens_seen": 640006092, + "step": 3726 + }, + { + "epoch": 0.9800749654764254, + "loss": 0.15862302482128143, + "loss_ce": 0.006462385877966881, + "loss_iou": 0.44140625, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 640006092, + "step": 3726 + }, + { + "epoch": 0.9803380022358125, + "grad_norm": 5.733708403269408, + "learning_rate": 5e-06, + "loss": 0.1203, + "num_input_tokens_seen": 640178452, + "step": 3727 + }, + { + "epoch": 0.9803380022358125, + "loss": 0.045382432639598846, + "loss_ce": 0.00027745150146074593, + "loss_iou": 0.515625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 640178452, + "step": 3727 + }, + { + "epoch": 0.9806010389951996, + "grad_norm": 4.6254627269264095, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 640350624, + "step": 3728 + }, + { + "epoch": 0.9806010389951996, + "loss": 0.054897043853998184, + "loss_ce": 0.0018880083225667477, + "loss_iou": 0.63671875, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 640350624, + "step": 3728 + }, + { + "epoch": 0.9808640757545867, + "grad_norm": 5.915011241669676, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 640522796, + "step": 3729 + }, + { + "epoch": 0.9808640757545867, + "loss": 0.12351959943771362, + "loss_ce": 0.002242741174995899, + "loss_iou": 0.56640625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 640522796, + "step": 3729 + }, + { + "epoch": 0.9811271125139738, + "grad_norm": 13.40274422198988, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 640694968, + "step": 3730 + }, + { + "epoch": 0.9811271125139738, + "loss": 0.08854502439498901, + "loss_ce": 0.00027292766026221216, + "loss_iou": 0.455078125, + "loss_num": 0.017578125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 640694968, + "step": 3730 + }, + { + "epoch": 0.981390149273361, + "grad_norm": 6.656264975637515, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 640867112, + "step": 3731 + }, + { + "epoch": 0.981390149273361, + "loss": 0.12663085758686066, + "loss_ce": 0.0017834422178566456, + "loss_iou": 0.46484375, + "loss_num": 0.02490234375, + "loss_xval": 0.125, + "num_input_tokens_seen": 640867112, + "step": 3731 + }, + { + "epoch": 0.9816531860327481, + "grad_norm": 11.247340076351778, + "learning_rate": 5e-06, + "loss": 0.0991, + "num_input_tokens_seen": 641039384, + "step": 3732 + }, + { + "epoch": 0.9816531860327481, + "loss": 0.1410384476184845, + "loss_ce": 0.0051131523214280605, + "loss_iou": 0.5859375, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 641039384, + "step": 3732 + }, + { + "epoch": 0.9819162227921352, + "grad_norm": 5.304544125644644, + "learning_rate": 5e-06, + "loss": 0.1061, + "num_input_tokens_seen": 641211452, + "step": 3733 + }, + { + "epoch": 0.9819162227921352, + "loss": 0.12673211097717285, + "loss_ce": 0.004204033873975277, + "loss_iou": 0.361328125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 641211452, + "step": 3733 + }, + { + "epoch": 0.9821792595515223, + "grad_norm": 4.504449466935876, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 641383184, + "step": 3734 + }, + { + "epoch": 0.9821792595515223, + "loss": 0.09244327247142792, + "loss_ce": 0.0025995145551860332, + "loss_iou": 0.5546875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 641383184, + "step": 3734 + }, + { + "epoch": 0.9824422963109094, + "grad_norm": 4.706100049802098, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 641555380, + "step": 3735 + }, + { + "epoch": 0.9824422963109094, + "loss": 0.08688107877969742, + "loss_ce": 0.0007604720303788781, + "loss_iou": 0.46484375, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 641555380, + "step": 3735 + }, + { + "epoch": 0.9827053330702966, + "grad_norm": 8.136156546275306, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 641727456, + "step": 3736 + }, + { + "epoch": 0.9827053330702966, + "loss": 0.11328569054603577, + "loss_ce": 0.0008589247590862215, + "loss_iou": 0.404296875, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 641727456, + "step": 3736 + }, + { + "epoch": 0.9829683698296837, + "grad_norm": 4.005733802836262, + "learning_rate": 5e-06, + "loss": 0.074, + "num_input_tokens_seen": 641899684, + "step": 3737 + }, + { + "epoch": 0.9829683698296837, + "loss": 0.09893114119768143, + "loss_ce": 0.00038988247979432344, + "loss_iou": 0.470703125, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 641899684, + "step": 3737 + }, + { + "epoch": 0.9832314065890708, + "grad_norm": 3.6593398060214795, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 642072000, + "step": 3738 + }, + { + "epoch": 0.9832314065890708, + "loss": 0.0708111971616745, + "loss_ce": 0.003092692233622074, + "loss_iou": 0.609375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 642072000, + "step": 3738 + }, + { + "epoch": 0.9834944433484579, + "grad_norm": 112.80387932882391, + "learning_rate": 5e-06, + "loss": 0.075, + "num_input_tokens_seen": 642243916, + "step": 3739 + }, + { + "epoch": 0.9834944433484579, + "loss": 0.07938267290592194, + "loss_ce": 0.001837508985772729, + "loss_iou": 0.5078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 642243916, + "step": 3739 + }, + { + "epoch": 0.983757480107845, + "grad_norm": 46.399369914383044, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 642415992, + "step": 3740 + }, + { + "epoch": 0.983757480107845, + "loss": 0.12807899713516235, + "loss_ce": 0.0024686530232429504, + "loss_iou": 0.458984375, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 642415992, + "step": 3740 + }, + { + "epoch": 0.9840205168672322, + "grad_norm": 4.972320382385805, + "learning_rate": 5e-06, + "loss": 0.0813, + "num_input_tokens_seen": 642586472, + "step": 3741 + }, + { + "epoch": 0.9840205168672322, + "loss": 0.10946183651685715, + "loss_ce": 0.0011854701442644, + "loss_iou": 0.412109375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 642586472, + "step": 3741 + }, + { + "epoch": 0.9842835536266193, + "grad_norm": 3.8095943920850632, + "learning_rate": 5e-06, + "loss": 0.1015, + "num_input_tokens_seen": 642758552, + "step": 3742 + }, + { + "epoch": 0.9842835536266193, + "loss": 0.1769047975540161, + "loss_ce": 0.002832533325999975, + "loss_iou": 0.35546875, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 642758552, + "step": 3742 + }, + { + "epoch": 0.9845465903860064, + "grad_norm": 5.317599890556273, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 642930684, + "step": 3743 + }, + { + "epoch": 0.9845465903860064, + "loss": 0.05296643078327179, + "loss_ce": 0.0012238813797011971, + "loss_iou": 0.5078125, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 642930684, + "step": 3743 + }, + { + "epoch": 0.9848096271453936, + "grad_norm": 4.846803333953094, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 643102812, + "step": 3744 + }, + { + "epoch": 0.9848096271453936, + "loss": 0.10566210746765137, + "loss_ce": 0.0007274242816492915, + "loss_iou": 0.5234375, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 643102812, + "step": 3744 + }, + { + "epoch": 0.9850726639047807, + "grad_norm": 3.406688370945616, + "learning_rate": 5e-06, + "loss": 0.1164, + "num_input_tokens_seen": 643275096, + "step": 3745 + }, + { + "epoch": 0.9850726639047807, + "loss": 0.08728724718093872, + "loss_ce": 0.0003884438192471862, + "loss_iou": 0.41796875, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 643275096, + "step": 3745 + }, + { + "epoch": 0.9853357006641679, + "grad_norm": 5.344442258228798, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 643444760, + "step": 3746 + }, + { + "epoch": 0.9853357006641679, + "loss": 0.07199759781360626, + "loss_ce": 0.002844766713678837, + "loss_iou": 0.455078125, + "loss_num": 0.0137939453125, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 643444760, + "step": 3746 + }, + { + "epoch": 0.985598737423555, + "grad_norm": 9.063261890916973, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 643617080, + "step": 3747 + }, + { + "epoch": 0.985598737423555, + "loss": 0.06594318896532059, + "loss_ce": 0.0005440223030745983, + "loss_iou": 0.39453125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 643617080, + "step": 3747 + }, + { + "epoch": 0.9858617741829421, + "grad_norm": 3.8879508865341124, + "learning_rate": 5e-06, + "loss": 0.0591, + "num_input_tokens_seen": 643789448, + "step": 3748 + }, + { + "epoch": 0.9858617741829421, + "loss": 0.056218698620796204, + "loss_ce": 0.001195505610667169, + "loss_iou": 0.65625, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 643789448, + "step": 3748 + }, + { + "epoch": 0.9861248109423292, + "grad_norm": 193.16068563077133, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 643958208, + "step": 3749 + }, + { + "epoch": 0.9861248109423292, + "loss": 0.057384688407182693, + "loss_ce": 0.003338058013468981, + "loss_iou": 0.3984375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 643958208, + "step": 3749 + }, + { + "epoch": 0.9863878477017163, + "grad_norm": 11.212759382877062, + "learning_rate": 5e-06, + "loss": 0.0866, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_websight_new_CIoU": 0.8978322446346283, + "eval_websight_new_GIoU": 0.8991535305976868, + "eval_websight_new_IoU": 0.9012524485588074, + "eval_websight_new_MAE_all": 0.013891254551708698, + "eval_websight_new_MAE_h": 0.008930663112550974, + "eval_websight_new_MAE_w": 0.020785433240234852, + "eval_websight_new_MAE_x": 0.0205678790807724, + "eval_websight_new_MAE_y": 0.005281045567244291, + "eval_websight_new_NUM_probability": 0.9999656677246094, + "eval_websight_new_inside_bbox": 0.984375, + "eval_websight_new_loss": 0.06887268275022507, + "eval_websight_new_loss_ce": 5.8660152717493474e-06, + "eval_websight_new_loss_iou": 0.30145263671875, + "eval_websight_new_loss_num": 0.012205123901367188, + "eval_websight_new_loss_xval": 0.06097412109375, + "eval_websight_new_runtime": 65.4719, + "eval_websight_new_samples_per_second": 0.764, + "eval_websight_new_steps_per_second": 0.031, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_seeclick_CIoU": 0.657441109418869, + "eval_seeclick_GIoU": 0.6616063714027405, + "eval_seeclick_IoU": 0.6793454885482788, + "eval_seeclick_MAE_all": 0.04169847071170807, + "eval_seeclick_MAE_h": 0.024878486059606075, + "eval_seeclick_MAE_w": 0.05510186776518822, + "eval_seeclick_MAE_x": 0.06434983387589455, + "eval_seeclick_MAE_y": 0.02246370818465948, + "eval_seeclick_NUM_probability": 0.9999178946018219, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.169124573469162, + "eval_seeclick_loss_ce": 0.009124522097408772, + "eval_seeclick_loss_iou": 0.455078125, + "eval_seeclick_loss_num": 0.030914306640625, + "eval_seeclick_loss_xval": 0.15460205078125, + "eval_seeclick_runtime": 75.5663, + "eval_seeclick_samples_per_second": 0.569, + "eval_seeclick_steps_per_second": 0.026, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_icons_CIoU": 0.8627608418464661, + "eval_icons_GIoU": 0.863149881362915, + "eval_icons_IoU": 0.8711875081062317, + "eval_icons_MAE_all": 0.022217202931642532, + "eval_icons_MAE_h": 0.01895691081881523, + "eval_icons_MAE_w": 0.0274124164134264, + "eval_icons_MAE_x": 0.0250897784717381, + "eval_icons_MAE_y": 0.017409704625606537, + "eval_icons_NUM_probability": 0.9999702572822571, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.07059822231531143, + "eval_icons_loss_ce": 1.0734858733485453e-05, + "eval_icons_loss_iou": 0.47100830078125, + "eval_icons_loss_num": 0.012834548950195312, + "eval_icons_loss_xval": 0.064178466796875, + "eval_icons_runtime": 81.7327, + "eval_icons_samples_per_second": 0.612, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 0.9863878477017163, + "eval_screenspot_CIoU": 0.6053812901178995, + "eval_screenspot_GIoU": 0.5979885856310526, + "eval_screenspot_IoU": 0.637059231599172, + "eval_screenspot_MAE_all": 0.07145863150556882, + "eval_screenspot_MAE_h": 0.04250209157665571, + "eval_screenspot_MAE_w": 0.12718970080216727, + "eval_screenspot_MAE_x": 0.07470711196462314, + "eval_screenspot_MAE_y": 0.04143562292059263, + "eval_screenspot_NUM_probability": 0.9999008377393087, + "eval_screenspot_inside_bbox": 0.8841666579246521, + "eval_screenspot_loss": 0.9072001576423645, + "eval_screenspot_loss_ce": 0.6087295611699423, + "eval_screenspot_loss_iou": 0.4524739583333333, + "eval_screenspot_loss_num": 0.058499654134114586, + "eval_screenspot_loss_xval": 0.29248046875, + "eval_screenspot_runtime": 146.6335, + "eval_screenspot_samples_per_second": 0.607, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 644130472, + "step": 3750 + }, + { + "epoch": 42.623229461756374, + "grad_norm": 24.37971363613738, + "learning_rate": 5e-06, + "loss": 1.249, + "num_input_tokens_seen": 644302160, + "step": 3751 + }, + { + "epoch": 42.623229461756374, + "loss": 1.3391211032867432, + "loss_ce": 0.9495947360992432, + "loss_iou": 0.609375, + "loss_num": 0.078125, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 644302160, + "step": 3751 + }, + { + "epoch": 42.63456090651558, + "grad_norm": 15.008470587136868, + "learning_rate": 5e-06, + "loss": 0.6148, + "num_input_tokens_seen": 644473668, + "step": 3752 + }, + { + "epoch": 42.63456090651558, + "loss": 0.5472804307937622, + "loss_ce": 0.2776271402835846, + "loss_iou": 0.412109375, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 644473668, + "step": 3752 + }, + { + "epoch": 42.64589235127479, + "grad_norm": 10.153654090229104, + "learning_rate": 5e-06, + "loss": 0.5629, + "num_input_tokens_seen": 644644808, + "step": 3753 + }, + { + "epoch": 42.64589235127479, + "loss": 0.5728659629821777, + "loss_ce": 0.26402807235717773, + "loss_iou": 0.65625, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 644644808, + "step": 3753 + }, + { + "epoch": 42.657223796033996, + "grad_norm": 17.498808383644807, + "learning_rate": 5e-06, + "loss": 0.4414, + "num_input_tokens_seen": 644817044, + "step": 3754 + }, + { + "epoch": 42.657223796033996, + "loss": 0.4910026788711548, + "loss_ce": 0.17215505242347717, + "loss_iou": 0.47265625, + "loss_num": 0.06396484375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 644817044, + "step": 3754 + }, + { + "epoch": 42.668555240793204, + "grad_norm": 23.60598186278669, + "learning_rate": 5e-06, + "loss": 0.5217, + "num_input_tokens_seen": 644988620, + "step": 3755 + }, + { + "epoch": 42.668555240793204, + "loss": 0.6383213996887207, + "loss_ce": 0.2723698318004608, + "loss_iou": 0.2353515625, + "loss_num": 0.0732421875, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 644988620, + "step": 3755 + }, + { + "epoch": 42.67988668555241, + "grad_norm": 13.62518187033341, + "learning_rate": 5e-06, + "loss": 0.4168, + "num_input_tokens_seen": 645157052, + "step": 3756 + }, + { + "epoch": 42.67988668555241, + "loss": 0.4043606221675873, + "loss_ce": 0.03991969674825668, + "loss_iou": 0.0, + "loss_num": 0.07275390625, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 645157052, + "step": 3756 + }, + { + "epoch": 42.69121813031161, + "grad_norm": 9.375746028718728, + "learning_rate": 5e-06, + "loss": 0.4246, + "num_input_tokens_seen": 645328860, + "step": 3757 + }, + { + "epoch": 42.69121813031161, + "loss": 0.450731098651886, + "loss_ce": 0.10713370144367218, + "loss_iou": 0.1328125, + "loss_num": 0.06884765625, + "loss_xval": 0.34375, + "num_input_tokens_seen": 645328860, + "step": 3757 + }, + { + "epoch": 42.70254957507082, + "grad_norm": 18.34093224440512, + "learning_rate": 5e-06, + "loss": 0.3826, + "num_input_tokens_seen": 645500924, + "step": 3758 + }, + { + "epoch": 42.70254957507082, + "loss": 0.5433092713356018, + "loss_ce": 0.164830282330513, + "loss_iou": 0.330078125, + "loss_num": 0.07568359375, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 645500924, + "step": 3758 + }, + { + "epoch": 42.713881019830026, + "grad_norm": 20.083649202913804, + "learning_rate": 5e-06, + "loss": 0.333, + "num_input_tokens_seen": 645672684, + "step": 3759 + }, + { + "epoch": 42.713881019830026, + "loss": 0.4810216724872589, + "loss_ce": 0.1741369217634201, + "loss_iou": 0.3828125, + "loss_num": 0.061279296875, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 645672684, + "step": 3759 + }, + { + "epoch": 42.725212464589234, + "grad_norm": 16.870768244182024, + "learning_rate": 5e-06, + "loss": 0.4587, + "num_input_tokens_seen": 645842788, + "step": 3760 + }, + { + "epoch": 42.725212464589234, + "loss": 0.3847481608390808, + "loss_ce": 0.2161385416984558, + "loss_iou": 0.0, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 645842788, + "step": 3760 + }, + { + "epoch": 42.73654390934844, + "grad_norm": 7.052248444847047, + "learning_rate": 5e-06, + "loss": 0.4233, + "num_input_tokens_seen": 646014564, + "step": 3761 + }, + { + "epoch": 42.73654390934844, + "loss": 0.37615275382995605, + "loss_ce": 0.07849954068660736, + "loss_iou": 0.2490234375, + "loss_num": 0.0595703125, + "loss_xval": 0.296875, + "num_input_tokens_seen": 646014564, + "step": 3761 + }, + { + "epoch": 42.74787535410765, + "grad_norm": 59.35628679550276, + "learning_rate": 5e-06, + "loss": 0.3503, + "num_input_tokens_seen": 646185212, + "step": 3762 + }, + { + "epoch": 42.74787535410765, + "loss": 0.2602749466896057, + "loss_ce": 0.0961819440126419, + "loss_iou": 0.25, + "loss_num": 0.03271484375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 646185212, + "step": 3762 + }, + { + "epoch": 42.759206798866856, + "grad_norm": 11.196946849133175, + "learning_rate": 5e-06, + "loss": 0.3785, + "num_input_tokens_seen": 646356972, + "step": 3763 + }, + { + "epoch": 42.759206798866856, + "loss": 0.2895856499671936, + "loss_ce": 0.07242254912853241, + "loss_iou": 0.376953125, + "loss_num": 0.04345703125, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 646356972, + "step": 3763 + }, + { + "epoch": 42.77053824362606, + "grad_norm": 21.65002065464953, + "learning_rate": 5e-06, + "loss": 0.3237, + "num_input_tokens_seen": 646528444, + "step": 3764 + }, + { + "epoch": 42.77053824362606, + "loss": 0.271282434463501, + "loss_ce": 0.042797308415174484, + "loss_iou": 0.0, + "loss_num": 0.045654296875, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 646528444, + "step": 3764 + }, + { + "epoch": 42.78186968838527, + "grad_norm": 8.94016361166771, + "learning_rate": 5e-06, + "loss": 0.2839, + "num_input_tokens_seen": 646699156, + "step": 3765 + }, + { + "epoch": 42.78186968838527, + "loss": 0.3400135338306427, + "loss_ce": 0.1177540272474289, + "loss_iou": 0.470703125, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 646699156, + "step": 3765 + }, + { + "epoch": 42.79320113314448, + "grad_norm": 7.42291041299476, + "learning_rate": 5e-06, + "loss": 0.326, + "num_input_tokens_seen": 646869864, + "step": 3766 + }, + { + "epoch": 42.79320113314448, + "loss": 0.3942365050315857, + "loss_ce": 0.0757550597190857, + "loss_iou": 0.396484375, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 646869864, + "step": 3766 + }, + { + "epoch": 42.804532577903686, + "grad_norm": 8.782066939888233, + "learning_rate": 5e-06, + "loss": 0.2947, + "num_input_tokens_seen": 647040996, + "step": 3767 + }, + { + "epoch": 42.804532577903686, + "loss": 0.35241279006004333, + "loss_ce": 0.08031807094812393, + "loss_iou": 0.49609375, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 647040996, + "step": 3767 + }, + { + "epoch": 42.815864022662886, + "grad_norm": 16.45957031099631, + "learning_rate": 5e-06, + "loss": 0.3525, + "num_input_tokens_seen": 647212756, + "step": 3768 + }, + { + "epoch": 42.815864022662886, + "loss": 0.41619616746902466, + "loss_ce": 0.05816391855478287, + "loss_iou": 0.201171875, + "loss_num": 0.07177734375, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 647212756, + "step": 3768 + }, + { + "epoch": 42.827195467422094, + "grad_norm": 6.952175045712314, + "learning_rate": 5e-06, + "loss": 0.2681, + "num_input_tokens_seen": 647384520, + "step": 3769 + }, + { + "epoch": 42.827195467422094, + "loss": 0.2557736039161682, + "loss_ce": 0.06387907266616821, + "loss_iou": 0.443359375, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 647384520, + "step": 3769 + }, + { + "epoch": 42.8385269121813, + "grad_norm": 17.73762005139639, + "learning_rate": 5e-06, + "loss": 0.347, + "num_input_tokens_seen": 647555936, + "step": 3770 + }, + { + "epoch": 42.8385269121813, + "loss": 0.4990995526313782, + "loss_ce": 0.04640180990099907, + "loss_iou": 0.0, + "loss_num": 0.09033203125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 647555936, + "step": 3770 + }, + { + "epoch": 42.84985835694051, + "grad_norm": 12.40804914890636, + "learning_rate": 5e-06, + "loss": 0.304, + "num_input_tokens_seen": 647727976, + "step": 3771 + }, + { + "epoch": 42.84985835694051, + "loss": 0.3800738453865051, + "loss_ce": 0.062141742557287216, + "loss_iou": 0.48046875, + "loss_num": 0.0634765625, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 647727976, + "step": 3771 + }, + { + "epoch": 42.861189801699716, + "grad_norm": 12.775334867173783, + "learning_rate": 5e-06, + "loss": 0.3116, + "num_input_tokens_seen": 647898980, + "step": 3772 + }, + { + "epoch": 42.861189801699716, + "loss": 0.43261992931365967, + "loss_ce": 0.05771148204803467, + "loss_iou": 0.314453125, + "loss_num": 0.0751953125, + "loss_xval": 0.375, + "num_input_tokens_seen": 647898980, + "step": 3772 + }, + { + "epoch": 42.87252124645892, + "grad_norm": 58.24261672409072, + "learning_rate": 5e-06, + "loss": 0.3323, + "num_input_tokens_seen": 648070640, + "step": 3773 + }, + { + "epoch": 42.87252124645892, + "loss": 0.3606436252593994, + "loss_ce": 0.05900786817073822, + "loss_iou": 0.41796875, + "loss_num": 0.060302734375, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 648070640, + "step": 3773 + }, + { + "epoch": 42.88385269121813, + "grad_norm": 17.755111713808308, + "learning_rate": 5e-06, + "loss": 0.3556, + "num_input_tokens_seen": 648240800, + "step": 3774 + }, + { + "epoch": 42.88385269121813, + "loss": 0.3877432346343994, + "loss_ce": 0.0405142605304718, + "loss_iou": 0.076171875, + "loss_num": 0.0693359375, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 648240800, + "step": 3774 + }, + { + "epoch": 42.89518413597734, + "grad_norm": 21.60438828874626, + "learning_rate": 5e-06, + "loss": 0.2659, + "num_input_tokens_seen": 648410980, + "step": 3775 + }, + { + "epoch": 42.89518413597734, + "loss": 0.2268907129764557, + "loss_ce": 0.10134139657020569, + "loss_iou": 0.470703125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 648410980, + "step": 3775 + }, + { + "epoch": 42.906515580736546, + "grad_norm": 7.314877446779315, + "learning_rate": 5e-06, + "loss": 0.2199, + "num_input_tokens_seen": 648582436, + "step": 3776 + }, + { + "epoch": 42.906515580736546, + "loss": 0.19990015029907227, + "loss_ce": 0.06458520144224167, + "loss_iou": 0.421875, + "loss_num": 0.0269775390625, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 648582436, + "step": 3776 + }, + { + "epoch": 42.91784702549575, + "grad_norm": 89.40364631522068, + "learning_rate": 5e-06, + "loss": 0.2529, + "num_input_tokens_seen": 648753276, + "step": 3777 + }, + { + "epoch": 42.91784702549575, + "loss": 0.3249284625053406, + "loss_ce": 0.08383958786725998, + "loss_iou": 0.357421875, + "loss_num": 0.048095703125, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 648753276, + "step": 3777 + }, + { + "epoch": 42.92917847025496, + "grad_norm": 10.629610869016606, + "learning_rate": 5e-06, + "loss": 0.2321, + "num_input_tokens_seen": 648925076, + "step": 3778 + }, + { + "epoch": 42.92917847025496, + "loss": 0.24622215330600739, + "loss_ce": 0.03916039317846298, + "loss_iou": 0.498046875, + "loss_num": 0.04150390625, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 648925076, + "step": 3778 + }, + { + "epoch": 42.94050991501416, + "grad_norm": 8.143397457379272, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 649097140, + "step": 3779 + }, + { + "epoch": 42.94050991501416, + "loss": 0.22169527411460876, + "loss_ce": 0.04338107258081436, + "loss_iou": 0.578125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 649097140, + "step": 3779 + }, + { + "epoch": 42.95184135977337, + "grad_norm": 12.910000035177736, + "learning_rate": 5e-06, + "loss": 0.2911, + "num_input_tokens_seen": 649267156, + "step": 3780 + }, + { + "epoch": 42.95184135977337, + "loss": 0.22911566495895386, + "loss_ce": 0.021168887615203857, + "loss_iou": 0.443359375, + "loss_num": 0.04150390625, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 649267156, + "step": 3780 + }, + { + "epoch": 42.963172804532576, + "grad_norm": 17.3966640002082, + "learning_rate": 5e-06, + "loss": 0.3775, + "num_input_tokens_seen": 649438392, + "step": 3781 + }, + { + "epoch": 42.963172804532576, + "loss": 0.47513455152511597, + "loss_ce": 0.01383085548877716, + "loss_iou": 0.55078125, + "loss_num": 0.09228515625, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 649438392, + "step": 3781 + }, + { + "epoch": 42.97450424929178, + "grad_norm": 5.281441594031922, + "learning_rate": 5e-06, + "loss": 0.258, + "num_input_tokens_seen": 649608808, + "step": 3782 + }, + { + "epoch": 42.97450424929178, + "loss": 0.26870954036712646, + "loss_ce": 0.08236920088529587, + "loss_iou": 0.546875, + "loss_num": 0.037353515625, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 649608808, + "step": 3782 + }, + { + "epoch": 42.98583569405099, + "grad_norm": 9.712115026609329, + "learning_rate": 5e-06, + "loss": 0.3038, + "num_input_tokens_seen": 649779764, + "step": 3783 + }, + { + "epoch": 42.98583569405099, + "loss": 0.29391050338745117, + "loss_ce": 0.07393980026245117, + "loss_iou": 0.15234375, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 649779764, + "step": 3783 + }, + { + "epoch": 42.9971671388102, + "grad_norm": 8.644881376613798, + "learning_rate": 5e-06, + "loss": 0.2818, + "num_input_tokens_seen": 649950780, + "step": 3784 + }, + { + "epoch": 42.9971671388102, + "loss": 0.33376365900039673, + "loss_ce": 0.06652123481035233, + "loss_iou": 0.5546875, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 649950780, + "step": 3784 + }, + { + "epoch": 42.9971671388102, + "loss": 0.22381728887557983, + "loss_ce": 0.03976577892899513, + "loss_iou": 0.275390625, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 649993692, + "step": 3784 + }, + { + "epoch": 43.008498583569406, + "grad_norm": 5.784889107414259, + "learning_rate": 5e-06, + "loss": 0.2592, + "num_input_tokens_seen": 650122464, + "step": 3785 + }, + { + "epoch": 43.008498583569406, + "loss": 0.2489549070596695, + "loss_ce": 0.0555039718747139, + "loss_iou": 0.482421875, + "loss_num": 0.038818359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 650122464, + "step": 3785 + }, + { + "epoch": 43.01983002832861, + "grad_norm": 14.651729063727553, + "learning_rate": 5e-06, + "loss": 0.2333, + "num_input_tokens_seen": 650293332, + "step": 3786 + }, + { + "epoch": 43.01983002832861, + "loss": 0.2854953408241272, + "loss_ce": 0.024783657863736153, + "loss_iou": 0.578125, + "loss_num": 0.05224609375, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 650293332, + "step": 3786 + }, + { + "epoch": 43.03116147308782, + "grad_norm": 6.864405551935044, + "learning_rate": 5e-06, + "loss": 0.2131, + "num_input_tokens_seen": 650465228, + "step": 3787 + }, + { + "epoch": 43.03116147308782, + "loss": 0.21152222156524658, + "loss_ce": 0.05841553211212158, + "loss_iou": 0.5625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 650465228, + "step": 3787 + }, + { + "epoch": 43.04249291784703, + "grad_norm": 30.52150954800119, + "learning_rate": 5e-06, + "loss": 0.2151, + "num_input_tokens_seen": 650636904, + "step": 3788 + }, + { + "epoch": 43.04249291784703, + "loss": 0.20700110495090485, + "loss_ce": 0.03639259189367294, + "loss_iou": 0.451171875, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 650636904, + "step": 3788 + }, + { + "epoch": 43.053824362606235, + "grad_norm": 6.722962112619755, + "learning_rate": 5e-06, + "loss": 0.232, + "num_input_tokens_seen": 650807920, + "step": 3789 + }, + { + "epoch": 43.053824362606235, + "loss": 0.15295374393463135, + "loss_ce": 0.05824244022369385, + "loss_iou": 0.470703125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 650807920, + "step": 3789 + }, + { + "epoch": 43.065155807365436, + "grad_norm": 13.160820060207227, + "learning_rate": 5e-06, + "loss": 0.2421, + "num_input_tokens_seen": 650980148, + "step": 3790 + }, + { + "epoch": 43.065155807365436, + "loss": 0.33120375871658325, + "loss_ce": 0.08941297233104706, + "loss_iou": 0.1318359375, + "loss_num": 0.04833984375, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 650980148, + "step": 3790 + }, + { + "epoch": 43.07648725212464, + "grad_norm": 6.435419692026323, + "learning_rate": 5e-06, + "loss": 0.197, + "num_input_tokens_seen": 651151304, + "step": 3791 + }, + { + "epoch": 43.07648725212464, + "loss": 0.2795127034187317, + "loss_ce": 0.04800637066364288, + "loss_iou": 0.48046875, + "loss_num": 0.04638671875, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 651151304, + "step": 3791 + }, + { + "epoch": 43.08781869688385, + "grad_norm": 7.7632092093522544, + "learning_rate": 5e-06, + "loss": 0.2171, + "num_input_tokens_seen": 651323268, + "step": 3792 + }, + { + "epoch": 43.08781869688385, + "loss": 0.23102571070194244, + "loss_ce": 0.05686188489198685, + "loss_iou": 0.51953125, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 651323268, + "step": 3792 + }, + { + "epoch": 43.09915014164306, + "grad_norm": 9.493350798082142, + "learning_rate": 5e-06, + "loss": 0.2472, + "num_input_tokens_seen": 651495456, + "step": 3793 + }, + { + "epoch": 43.09915014164306, + "loss": 0.22000005841255188, + "loss_ce": 0.034987252205610275, + "loss_iou": 0.4921875, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 651495456, + "step": 3793 + }, + { + "epoch": 43.110481586402265, + "grad_norm": 31.530854864107187, + "learning_rate": 5e-06, + "loss": 0.2926, + "num_input_tokens_seen": 651665408, + "step": 3794 + }, + { + "epoch": 43.110481586402265, + "loss": 0.29464665055274963, + "loss_ce": 0.07449283450841904, + "loss_iou": 0.51953125, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 651665408, + "step": 3794 + }, + { + "epoch": 43.12181303116147, + "grad_norm": 17.13192978487039, + "learning_rate": 5e-06, + "loss": 0.229, + "num_input_tokens_seen": 651836708, + "step": 3795 + }, + { + "epoch": 43.12181303116147, + "loss": 0.17648279666900635, + "loss_ce": 0.016387568786740303, + "loss_iou": 0.53515625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 651836708, + "step": 3795 + }, + { + "epoch": 43.13314447592068, + "grad_norm": 5.659905257802971, + "learning_rate": 5e-06, + "loss": 0.1888, + "num_input_tokens_seen": 652008812, + "step": 3796 + }, + { + "epoch": 43.13314447592068, + "loss": 0.24487656354904175, + "loss_ce": 0.04092760011553764, + "loss_iou": 0.51953125, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 652008812, + "step": 3796 + }, + { + "epoch": 43.14447592067989, + "grad_norm": 11.602617114678928, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 652180336, + "step": 3797 + }, + { + "epoch": 43.14447592067989, + "loss": 0.25554385781288147, + "loss_ce": 0.02931704744696617, + "loss_iou": 0.427734375, + "loss_num": 0.045166015625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 652180336, + "step": 3797 + }, + { + "epoch": 43.155807365439095, + "grad_norm": 29.420810232565373, + "learning_rate": 5e-06, + "loss": 0.239, + "num_input_tokens_seen": 652351044, + "step": 3798 + }, + { + "epoch": 43.155807365439095, + "loss": 0.3694992959499359, + "loss_ce": 0.031654439866542816, + "loss_iou": 0.279296875, + "loss_num": 0.0673828125, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 652351044, + "step": 3798 + }, + { + "epoch": 43.1671388101983, + "grad_norm": 7.506924438989472, + "learning_rate": 5e-06, + "loss": 0.2761, + "num_input_tokens_seen": 652522800, + "step": 3799 + }, + { + "epoch": 43.1671388101983, + "loss": 0.2833660840988159, + "loss_ce": 0.025339949876070023, + "loss_iou": 0.384765625, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 652522800, + "step": 3799 + }, + { + "epoch": 43.17847025495751, + "grad_norm": 10.13119802584233, + "learning_rate": 5e-06, + "loss": 0.1714, + "num_input_tokens_seen": 652694216, + "step": 3800 + }, + { + "epoch": 43.17847025495751, + "loss": 0.22721588611602783, + "loss_ce": 0.017956862226128578, + "loss_iou": 0.1103515625, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 652694216, + "step": 3800 + }, + { + "epoch": 43.18980169971671, + "grad_norm": 6.188441992461635, + "learning_rate": 5e-06, + "loss": 0.3126, + "num_input_tokens_seen": 652866332, + "step": 3801 + }, + { + "epoch": 43.18980169971671, + "loss": 0.3227539658546448, + "loss_ce": 0.012817460112273693, + "loss_iou": 0.50390625, + "loss_num": 0.06201171875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 652866332, + "step": 3801 + }, + { + "epoch": 43.20113314447592, + "grad_norm": 9.953767348454466, + "learning_rate": 5e-06, + "loss": 0.2419, + "num_input_tokens_seen": 653037548, + "step": 3802 + }, + { + "epoch": 43.20113314447592, + "loss": 0.34050068259239197, + "loss_ce": 0.03166278079152107, + "loss_iou": 0.099609375, + "loss_num": 0.061767578125, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 653037548, + "step": 3802 + }, + { + "epoch": 43.212464589235125, + "grad_norm": 10.240115738305146, + "learning_rate": 5e-06, + "loss": 0.2311, + "num_input_tokens_seen": 653209240, + "step": 3803 + }, + { + "epoch": 43.212464589235125, + "loss": 0.2943469285964966, + "loss_ce": 0.02209961786866188, + "loss_iou": 0.59375, + "loss_num": 0.054443359375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 653209240, + "step": 3803 + }, + { + "epoch": 43.22379603399433, + "grad_norm": 9.75843735457791, + "learning_rate": 5e-06, + "loss": 0.2735, + "num_input_tokens_seen": 653379136, + "step": 3804 + }, + { + "epoch": 43.22379603399433, + "loss": 0.22456300258636475, + "loss_ce": 0.027694083750247955, + "loss_iou": 0.13671875, + "loss_num": 0.039306640625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 653379136, + "step": 3804 + }, + { + "epoch": 43.23512747875354, + "grad_norm": 10.498601980364539, + "learning_rate": 5e-06, + "loss": 0.2699, + "num_input_tokens_seen": 653551180, + "step": 3805 + }, + { + "epoch": 43.23512747875354, + "loss": 0.2836833894252777, + "loss_ce": 0.02382621541619301, + "loss_iou": 0.59765625, + "loss_num": 0.052001953125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 653551180, + "step": 3805 + }, + { + "epoch": 43.24645892351275, + "grad_norm": 18.66063402803507, + "learning_rate": 5e-06, + "loss": 0.2754, + "num_input_tokens_seen": 653722740, + "step": 3806 + }, + { + "epoch": 43.24645892351275, + "loss": 0.3721906542778015, + "loss_ce": 0.01601998507976532, + "loss_iou": 0.0, + "loss_num": 0.0712890625, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 653722740, + "step": 3806 + }, + { + "epoch": 43.257790368271955, + "grad_norm": 11.386499638967939, + "learning_rate": 5e-06, + "loss": 0.206, + "num_input_tokens_seen": 653894636, + "step": 3807 + }, + { + "epoch": 43.257790368271955, + "loss": 0.24139291048049927, + "loss_ce": 0.08189278841018677, + "loss_iou": 0.365234375, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 653894636, + "step": 3807 + }, + { + "epoch": 43.26912181303116, + "grad_norm": 13.489539747453755, + "learning_rate": 5e-06, + "loss": 0.1924, + "num_input_tokens_seen": 654065232, + "step": 3808 + }, + { + "epoch": 43.26912181303116, + "loss": 0.19118039309978485, + "loss_ce": 0.02858273684978485, + "loss_iou": 0.3359375, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 654065232, + "step": 3808 + }, + { + "epoch": 43.28045325779037, + "grad_norm": 10.507455853530496, + "learning_rate": 5e-06, + "loss": 0.2809, + "num_input_tokens_seen": 654236760, + "step": 3809 + }, + { + "epoch": 43.28045325779037, + "loss": 0.27525657415390015, + "loss_ce": 0.09505028277635574, + "loss_iou": 0.56640625, + "loss_num": 0.0361328125, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 654236760, + "step": 3809 + }, + { + "epoch": 43.29178470254958, + "grad_norm": 8.614593715955264, + "learning_rate": 5e-06, + "loss": 0.2246, + "num_input_tokens_seen": 654408368, + "step": 3810 + }, + { + "epoch": 43.29178470254958, + "loss": 0.17899608612060547, + "loss_ce": 0.01819896697998047, + "loss_iou": 0.0, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 654408368, + "step": 3810 + }, + { + "epoch": 43.303116147308785, + "grad_norm": 4.433324089882691, + "learning_rate": 5e-06, + "loss": 0.1783, + "num_input_tokens_seen": 654580212, + "step": 3811 + }, + { + "epoch": 43.303116147308785, + "loss": 0.1583895981311798, + "loss_ce": 0.025638127699494362, + "loss_iou": 0.380859375, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 654580212, + "step": 3811 + }, + { + "epoch": 43.314447592067985, + "grad_norm": 10.737819808808593, + "learning_rate": 5e-06, + "loss": 0.1974, + "num_input_tokens_seen": 654752004, + "step": 3812 + }, + { + "epoch": 43.314447592067985, + "loss": 0.25267672538757324, + "loss_ce": 0.03273654356598854, + "loss_iou": 0.41015625, + "loss_num": 0.0439453125, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 654752004, + "step": 3812 + }, + { + "epoch": 43.32577903682719, + "grad_norm": 9.169743103048214, + "learning_rate": 5e-06, + "loss": 0.2434, + "num_input_tokens_seen": 654921420, + "step": 3813 + }, + { + "epoch": 43.32577903682719, + "loss": 0.302659273147583, + "loss_ce": 0.07158016413450241, + "loss_iou": 0.57421875, + "loss_num": 0.046142578125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 654921420, + "step": 3813 + }, + { + "epoch": 43.3371104815864, + "grad_norm": 6.7512630689625786, + "learning_rate": 5e-06, + "loss": 0.1827, + "num_input_tokens_seen": 655093140, + "step": 3814 + }, + { + "epoch": 43.3371104815864, + "loss": 0.17458195984363556, + "loss_ce": 0.02226872742176056, + "loss_iou": 0.373046875, + "loss_num": 0.0303955078125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 655093140, + "step": 3814 + }, + { + "epoch": 43.34844192634561, + "grad_norm": 14.476485058249372, + "learning_rate": 5e-06, + "loss": 0.2628, + "num_input_tokens_seen": 655265136, + "step": 3815 + }, + { + "epoch": 43.34844192634561, + "loss": 0.2875571846961975, + "loss_ce": 0.08492045104503632, + "loss_iou": 0.490234375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 655265136, + "step": 3815 + }, + { + "epoch": 43.359773371104815, + "grad_norm": 7.813083325716797, + "learning_rate": 5e-06, + "loss": 0.2828, + "num_input_tokens_seen": 655437064, + "step": 3816 + }, + { + "epoch": 43.359773371104815, + "loss": 0.34945645928382874, + "loss_ce": 0.04571498930454254, + "loss_iou": 0.390625, + "loss_num": 0.060791015625, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 655437064, + "step": 3816 + }, + { + "epoch": 43.37110481586402, + "grad_norm": 7.336895629162358, + "learning_rate": 5e-06, + "loss": 0.222, + "num_input_tokens_seen": 655607676, + "step": 3817 + }, + { + "epoch": 43.37110481586402, + "loss": 0.16241437196731567, + "loss_ce": 0.04251081123948097, + "loss_iou": 0.52734375, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 655607676, + "step": 3817 + }, + { + "epoch": 43.38243626062323, + "grad_norm": 5.472498623836622, + "learning_rate": 5e-06, + "loss": 0.2419, + "num_input_tokens_seen": 655779600, + "step": 3818 + }, + { + "epoch": 43.38243626062323, + "loss": 0.13749703764915466, + "loss_ce": 0.05436716228723526, + "loss_iou": 0.54296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 655779600, + "step": 3818 + }, + { + "epoch": 43.39376770538244, + "grad_norm": 11.062765407752119, + "learning_rate": 5e-06, + "loss": 0.2462, + "num_input_tokens_seen": 655950168, + "step": 3819 + }, + { + "epoch": 43.39376770538244, + "loss": 0.24120905995368958, + "loss_ce": 0.03503229841589928, + "loss_iou": 0.375, + "loss_num": 0.041259765625, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 655950168, + "step": 3819 + }, + { + "epoch": 43.405099150141645, + "grad_norm": 7.983856235610563, + "learning_rate": 5e-06, + "loss": 0.2226, + "num_input_tokens_seen": 656121288, + "step": 3820 + }, + { + "epoch": 43.405099150141645, + "loss": 0.2142389714717865, + "loss_ce": 0.053594425320625305, + "loss_iou": 0.4765625, + "loss_num": 0.0322265625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 656121288, + "step": 3820 + }, + { + "epoch": 43.41643059490085, + "grad_norm": 8.372275065133358, + "learning_rate": 5e-06, + "loss": 0.3354, + "num_input_tokens_seen": 656292508, + "step": 3821 + }, + { + "epoch": 43.41643059490085, + "loss": 0.27440422773361206, + "loss_ce": 0.02116938680410385, + "loss_iou": 0.55859375, + "loss_num": 0.050537109375, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 656292508, + "step": 3821 + }, + { + "epoch": 43.42776203966006, + "grad_norm": 15.602341705793288, + "learning_rate": 5e-06, + "loss": 0.2219, + "num_input_tokens_seen": 656461684, + "step": 3822 + }, + { + "epoch": 43.42776203966006, + "loss": 0.31255632638931274, + "loss_ce": 0.07775407284498215, + "loss_iou": 0.39453125, + "loss_num": 0.046875, + "loss_xval": 0.234375, + "num_input_tokens_seen": 656461684, + "step": 3822 + }, + { + "epoch": 43.43909348441926, + "grad_norm": 14.301675332298627, + "learning_rate": 5e-06, + "loss": 0.272, + "num_input_tokens_seen": 656633508, + "step": 3823 + }, + { + "epoch": 43.43909348441926, + "loss": 0.24964401125907898, + "loss_ce": 0.029185030609369278, + "loss_iou": 0.392578125, + "loss_num": 0.0439453125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 656633508, + "step": 3823 + }, + { + "epoch": 43.45042492917847, + "grad_norm": 11.526420488961662, + "learning_rate": 5e-06, + "loss": 0.2866, + "num_input_tokens_seen": 656805284, + "step": 3824 + }, + { + "epoch": 43.45042492917847, + "loss": 0.3178904056549072, + "loss_ce": 0.019184362143278122, + "loss_iou": 0.498046875, + "loss_num": 0.059814453125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 656805284, + "step": 3824 + }, + { + "epoch": 43.461756373937675, + "grad_norm": 12.819341592963644, + "learning_rate": 5e-06, + "loss": 0.1974, + "num_input_tokens_seen": 656977192, + "step": 3825 + }, + { + "epoch": 43.461756373937675, + "loss": 0.21698424220085144, + "loss_ce": 0.03781554847955704, + "loss_iou": 0.388671875, + "loss_num": 0.035888671875, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 656977192, + "step": 3825 + }, + { + "epoch": 43.47308781869688, + "grad_norm": 8.831140904488134, + "learning_rate": 5e-06, + "loss": 0.2008, + "num_input_tokens_seen": 657148044, + "step": 3826 + }, + { + "epoch": 43.47308781869688, + "loss": 0.2201002687215805, + "loss_ce": 0.011665214784443378, + "loss_iou": 0.59375, + "loss_num": 0.041748046875, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 657148044, + "step": 3826 + }, + { + "epoch": 43.48441926345609, + "grad_norm": 8.065696891811767, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 657320124, + "step": 3827 + }, + { + "epoch": 43.48441926345609, + "loss": 0.24065983295440674, + "loss_ce": 0.016554001718759537, + "loss_iou": 0.330078125, + "loss_num": 0.044677734375, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 657320124, + "step": 3827 + }, + { + "epoch": 43.4957507082153, + "grad_norm": 16.439324369048308, + "learning_rate": 5e-06, + "loss": 0.2246, + "num_input_tokens_seen": 657492284, + "step": 3828 + }, + { + "epoch": 43.4957507082153, + "loss": 0.21445706486701965, + "loss_ce": 0.04032377526164055, + "loss_iou": 0.328125, + "loss_num": 0.034912109375, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 657492284, + "step": 3828 + }, + { + "epoch": 43.507082152974505, + "grad_norm": 7.712628241498826, + "learning_rate": 5e-06, + "loss": 0.2453, + "num_input_tokens_seen": 657663228, + "step": 3829 + }, + { + "epoch": 43.507082152974505, + "loss": 0.20886492729187012, + "loss_ce": 0.03424334153532982, + "loss_iou": 0.46875, + "loss_num": 0.034912109375, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 657663228, + "step": 3829 + }, + { + "epoch": 43.51841359773371, + "grad_norm": 12.0529266773192, + "learning_rate": 5e-06, + "loss": 0.2182, + "num_input_tokens_seen": 657834276, + "step": 3830 + }, + { + "epoch": 43.51841359773371, + "loss": 0.2074132114648819, + "loss_ce": 0.037369273602962494, + "loss_iou": 0.46875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 657834276, + "step": 3830 + }, + { + "epoch": 43.52974504249292, + "grad_norm": 11.68380231415628, + "learning_rate": 5e-06, + "loss": 0.2204, + "num_input_tokens_seen": 658005484, + "step": 3831 + }, + { + "epoch": 43.52974504249292, + "loss": 0.2939346134662628, + "loss_ce": 0.04387357458472252, + "loss_iou": 0.55859375, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 658005484, + "step": 3831 + }, + { + "epoch": 43.54107648725213, + "grad_norm": 21.100746918424953, + "learning_rate": 5e-06, + "loss": 0.2424, + "num_input_tokens_seen": 658177708, + "step": 3832 + }, + { + "epoch": 43.54107648725213, + "loss": 0.2990325391292572, + "loss_ce": 0.0315764844417572, + "loss_iou": 0.73828125, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 658177708, + "step": 3832 + }, + { + "epoch": 43.552407932011334, + "grad_norm": 11.518224064790715, + "learning_rate": 5e-06, + "loss": 0.2318, + "num_input_tokens_seen": 658348500, + "step": 3833 + }, + { + "epoch": 43.552407932011334, + "loss": 0.20460152626037598, + "loss_ce": 0.06385447084903717, + "loss_iou": 0.64453125, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 658348500, + "step": 3833 + }, + { + "epoch": 43.563739376770535, + "grad_norm": 6.272488097687055, + "learning_rate": 5e-06, + "loss": 0.2291, + "num_input_tokens_seen": 658519692, + "step": 3834 + }, + { + "epoch": 43.563739376770535, + "loss": 0.2233453094959259, + "loss_ce": 0.039995696395635605, + "loss_iou": 0.291015625, + "loss_num": 0.03662109375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 658519692, + "step": 3834 + }, + { + "epoch": 43.57507082152974, + "grad_norm": 4.098578612905147, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 658690396, + "step": 3835 + }, + { + "epoch": 43.57507082152974, + "loss": 0.20045900344848633, + "loss_ce": 0.04628421366214752, + "loss_iou": 0.53125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 658690396, + "step": 3835 + }, + { + "epoch": 43.58640226628895, + "grad_norm": 8.110726258019252, + "learning_rate": 5e-06, + "loss": 0.2675, + "num_input_tokens_seen": 658862380, + "step": 3836 + }, + { + "epoch": 43.58640226628895, + "loss": 0.36111244559288025, + "loss_ce": 0.04659830033779144, + "loss_iou": 0.42578125, + "loss_num": 0.06298828125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 658862380, + "step": 3836 + }, + { + "epoch": 43.59773371104816, + "grad_norm": 7.458622707898409, + "learning_rate": 5e-06, + "loss": 0.2198, + "num_input_tokens_seen": 659034252, + "step": 3837 + }, + { + "epoch": 43.59773371104816, + "loss": 0.209273561835289, + "loss_ce": 0.0072471983730793, + "loss_iou": 0.57421875, + "loss_num": 0.040283203125, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 659034252, + "step": 3837 + }, + { + "epoch": 43.609065155807365, + "grad_norm": 8.432559943682879, + "learning_rate": 5e-06, + "loss": 0.2397, + "num_input_tokens_seen": 659204724, + "step": 3838 + }, + { + "epoch": 43.609065155807365, + "loss": 0.19880183041095734, + "loss_ce": 0.07355768978595734, + "loss_iou": 0.359375, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 659204724, + "step": 3838 + }, + { + "epoch": 43.62039660056657, + "grad_norm": 5.5823825751763, + "learning_rate": 5e-06, + "loss": 0.1696, + "num_input_tokens_seen": 659376692, + "step": 3839 + }, + { + "epoch": 43.62039660056657, + "loss": 0.09564369916915894, + "loss_ce": 0.013398820534348488, + "loss_iou": 0.55859375, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 659376692, + "step": 3839 + }, + { + "epoch": 43.63172804532578, + "grad_norm": 10.559594019574956, + "learning_rate": 5e-06, + "loss": 0.2299, + "num_input_tokens_seen": 659547172, + "step": 3840 + }, + { + "epoch": 43.63172804532578, + "loss": 0.1778956651687622, + "loss_ce": 0.02066909149289131, + "loss_iou": 0.4140625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 659547172, + "step": 3840 + }, + { + "epoch": 43.64305949008499, + "grad_norm": 25.648096566696097, + "learning_rate": 5e-06, + "loss": 0.2321, + "num_input_tokens_seen": 659719140, + "step": 3841 + }, + { + "epoch": 43.64305949008499, + "loss": 0.26578018069267273, + "loss_ce": 0.052859045565128326, + "loss_iou": 0.40625, + "loss_num": 0.04248046875, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 659719140, + "step": 3841 + }, + { + "epoch": 43.654390934844194, + "grad_norm": 4.322128469344213, + "learning_rate": 5e-06, + "loss": 0.2071, + "num_input_tokens_seen": 659891276, + "step": 3842 + }, + { + "epoch": 43.654390934844194, + "loss": 0.1609874963760376, + "loss_ce": 0.058723084628582, + "loss_iou": 0.62890625, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 659891276, + "step": 3842 + }, + { + "epoch": 43.6657223796034, + "grad_norm": 15.389118290922337, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 660063448, + "step": 3843 + }, + { + "epoch": 43.6657223796034, + "loss": 0.30868998169898987, + "loss_ce": 0.050419725477695465, + "loss_iou": 0.3671875, + "loss_num": 0.0517578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 660063448, + "step": 3843 + }, + { + "epoch": 43.67705382436261, + "grad_norm": 9.30800837634291, + "learning_rate": 5e-06, + "loss": 0.1926, + "num_input_tokens_seen": 660234844, + "step": 3844 + }, + { + "epoch": 43.67705382436261, + "loss": 0.24589240550994873, + "loss_ce": 0.052899233996868134, + "loss_iou": 0.41015625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 660234844, + "step": 3844 + }, + { + "epoch": 43.68838526912181, + "grad_norm": 7.544857056958143, + "learning_rate": 5e-06, + "loss": 0.2734, + "num_input_tokens_seen": 660407012, + "step": 3845 + }, + { + "epoch": 43.68838526912181, + "loss": 0.3219788372516632, + "loss_ce": 0.02855231799185276, + "loss_iou": 0.47265625, + "loss_num": 0.05859375, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 660407012, + "step": 3845 + }, + { + "epoch": 43.69971671388102, + "grad_norm": 5.713854999747559, + "learning_rate": 5e-06, + "loss": 0.2058, + "num_input_tokens_seen": 660578960, + "step": 3846 + }, + { + "epoch": 43.69971671388102, + "loss": 0.2059517502784729, + "loss_ce": 0.0179939903318882, + "loss_iou": 0.4140625, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 660578960, + "step": 3846 + }, + { + "epoch": 43.711048158640224, + "grad_norm": 7.675785504814342, + "learning_rate": 5e-06, + "loss": 0.2137, + "num_input_tokens_seen": 660750016, + "step": 3847 + }, + { + "epoch": 43.711048158640224, + "loss": 0.1930961310863495, + "loss_ce": 0.026409126818180084, + "loss_iou": 0.484375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 660750016, + "step": 3847 + }, + { + "epoch": 43.72237960339943, + "grad_norm": 8.297970210596503, + "learning_rate": 5e-06, + "loss": 0.2239, + "num_input_tokens_seen": 660919824, + "step": 3848 + }, + { + "epoch": 43.72237960339943, + "loss": 0.1422894448041916, + "loss_ce": 0.01771668717265129, + "loss_iou": 0.48828125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 660919824, + "step": 3848 + }, + { + "epoch": 43.73371104815864, + "grad_norm": 8.498874853495801, + "learning_rate": 5e-06, + "loss": 0.2264, + "num_input_tokens_seen": 661090136, + "step": 3849 + }, + { + "epoch": 43.73371104815864, + "loss": 0.2185337096452713, + "loss_ce": 0.038968276232481, + "loss_iou": 0.38671875, + "loss_num": 0.035888671875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 661090136, + "step": 3849 + }, + { + "epoch": 43.74504249291785, + "grad_norm": 9.257081145105209, + "learning_rate": 5e-06, + "loss": 0.2249, + "num_input_tokens_seen": 661261156, + "step": 3850 + }, + { + "epoch": 43.74504249291785, + "loss": 0.20226441323757172, + "loss_ce": 0.028604142367839813, + "loss_iou": 0.263671875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 661261156, + "step": 3850 + }, + { + "epoch": 43.756373937677054, + "grad_norm": 5.628612071722162, + "learning_rate": 5e-06, + "loss": 0.2236, + "num_input_tokens_seen": 661432132, + "step": 3851 + }, + { + "epoch": 43.756373937677054, + "loss": 0.19719679653644562, + "loss_ce": 0.048637233674526215, + "loss_iou": 0.4921875, + "loss_num": 0.0296630859375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 661432132, + "step": 3851 + }, + { + "epoch": 43.76770538243626, + "grad_norm": 7.847435672271257, + "learning_rate": 5e-06, + "loss": 0.2018, + "num_input_tokens_seen": 661603952, + "step": 3852 + }, + { + "epoch": 43.76770538243626, + "loss": 0.20567631721496582, + "loss_ce": 0.023303259164094925, + "loss_iou": 0.45703125, + "loss_num": 0.03662109375, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 661603952, + "step": 3852 + }, + { + "epoch": 43.77903682719547, + "grad_norm": 5.306042708069077, + "learning_rate": 5e-06, + "loss": 0.2072, + "num_input_tokens_seen": 661775660, + "step": 3853 + }, + { + "epoch": 43.77903682719547, + "loss": 0.22403264045715332, + "loss_ce": 0.02073979191482067, + "loss_iou": 0.412109375, + "loss_num": 0.040771484375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 661775660, + "step": 3853 + }, + { + "epoch": 43.79036827195468, + "grad_norm": 12.147321715502649, + "learning_rate": 5e-06, + "loss": 0.2544, + "num_input_tokens_seen": 661945204, + "step": 3854 + }, + { + "epoch": 43.79036827195468, + "loss": 0.1255451738834381, + "loss_ce": 0.03261914476752281, + "loss_iou": 0.53515625, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 661945204, + "step": 3854 + }, + { + "epoch": 43.801699716713884, + "grad_norm": 16.31804533288108, + "learning_rate": 5e-06, + "loss": 0.2187, + "num_input_tokens_seen": 662115404, + "step": 3855 + }, + { + "epoch": 43.801699716713884, + "loss": 0.2279517650604248, + "loss_ce": 0.01835702732205391, + "loss_iou": 0.515625, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 662115404, + "step": 3855 + }, + { + "epoch": 43.81303116147309, + "grad_norm": 9.729461521704513, + "learning_rate": 5e-06, + "loss": 0.2394, + "num_input_tokens_seen": 662287296, + "step": 3856 + }, + { + "epoch": 43.81303116147309, + "loss": 0.15691012144088745, + "loss_ce": 0.003193075302988291, + "loss_iou": 0.462890625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 662287296, + "step": 3856 + }, + { + "epoch": 43.82436260623229, + "grad_norm": 8.11043735683055, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 662459236, + "step": 3857 + }, + { + "epoch": 43.82436260623229, + "loss": 0.22318127751350403, + "loss_ce": 0.026556532829999924, + "loss_iou": 0.216796875, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 662459236, + "step": 3857 + }, + { + "epoch": 43.8356940509915, + "grad_norm": 9.073067304511525, + "learning_rate": 5e-06, + "loss": 0.2003, + "num_input_tokens_seen": 662631280, + "step": 3858 + }, + { + "epoch": 43.8356940509915, + "loss": 0.12552236020565033, + "loss_ce": 0.02737782895565033, + "loss_iou": 0.5703125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 662631280, + "step": 3858 + }, + { + "epoch": 43.84702549575071, + "grad_norm": 5.130539356409205, + "learning_rate": 5e-06, + "loss": 0.2124, + "num_input_tokens_seen": 662802136, + "step": 3859 + }, + { + "epoch": 43.84702549575071, + "loss": 0.16339465975761414, + "loss_ce": 0.03280993923544884, + "loss_iou": 0.46875, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 662802136, + "step": 3859 + }, + { + "epoch": 43.858356940509914, + "grad_norm": 14.886448583020941, + "learning_rate": 5e-06, + "loss": 0.2103, + "num_input_tokens_seen": 662972652, + "step": 3860 + }, + { + "epoch": 43.858356940509914, + "loss": 0.1632852554321289, + "loss_ce": 0.00761509221047163, + "loss_iou": 0.6015625, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 662972652, + "step": 3860 + }, + { + "epoch": 43.86968838526912, + "grad_norm": 18.515462307673364, + "learning_rate": 5e-06, + "loss": 0.2135, + "num_input_tokens_seen": 663144556, + "step": 3861 + }, + { + "epoch": 43.86968838526912, + "loss": 0.27687323093414307, + "loss_ce": 0.01797734946012497, + "loss_iou": 0.5078125, + "loss_num": 0.0517578125, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 663144556, + "step": 3861 + }, + { + "epoch": 43.88101983002833, + "grad_norm": 10.369040382279163, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 663315964, + "step": 3862 + }, + { + "epoch": 43.88101983002833, + "loss": 0.17530646920204163, + "loss_ce": 0.01565375365316868, + "loss_iou": 0.154296875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 663315964, + "step": 3862 + }, + { + "epoch": 43.892351274787536, + "grad_norm": 8.739823892770874, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 663487744, + "step": 3863 + }, + { + "epoch": 43.892351274787536, + "loss": 0.1954469382762909, + "loss_ce": 0.03522966429591179, + "loss_iou": 0.4765625, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 663487744, + "step": 3863 + }, + { + "epoch": 43.903682719546744, + "grad_norm": 21.854777367265793, + "learning_rate": 5e-06, + "loss": 0.2322, + "num_input_tokens_seen": 663659864, + "step": 3864 + }, + { + "epoch": 43.903682719546744, + "loss": 0.308188796043396, + "loss_ce": 0.040427565574645996, + "loss_iou": 0.4375, + "loss_num": 0.053466796875, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 663659864, + "step": 3864 + }, + { + "epoch": 43.91501416430595, + "grad_norm": 5.780298148519195, + "learning_rate": 5e-06, + "loss": 0.2676, + "num_input_tokens_seen": 663828604, + "step": 3865 + }, + { + "epoch": 43.91501416430595, + "loss": 0.17363348603248596, + "loss_ce": 0.05888739228248596, + "loss_iou": 0.515625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 663828604, + "step": 3865 + }, + { + "epoch": 43.92634560906516, + "grad_norm": 11.531013536154632, + "learning_rate": 5e-06, + "loss": 0.2441, + "num_input_tokens_seen": 663999584, + "step": 3866 + }, + { + "epoch": 43.92634560906516, + "loss": 0.2707352638244629, + "loss_ce": 0.020613208413124084, + "loss_iou": 0.453125, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 663999584, + "step": 3866 + }, + { + "epoch": 43.93767705382436, + "grad_norm": 4.158122993888739, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 664169836, + "step": 3867 + }, + { + "epoch": 43.93767705382436, + "loss": 0.191764235496521, + "loss_ce": 0.023490317165851593, + "loss_iou": 0.5859375, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 664169836, + "step": 3867 + }, + { + "epoch": 43.94900849858357, + "grad_norm": 6.3836287404208285, + "learning_rate": 5e-06, + "loss": 0.2108, + "num_input_tokens_seen": 664341616, + "step": 3868 + }, + { + "epoch": 43.94900849858357, + "loss": 0.20721814036369324, + "loss_ce": 0.03509899228811264, + "loss_iou": 0.3828125, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 664341616, + "step": 3868 + }, + { + "epoch": 43.960339943342774, + "grad_norm": 6.661686630134207, + "learning_rate": 5e-06, + "loss": 0.1539, + "num_input_tokens_seen": 664512632, + "step": 3869 + }, + { + "epoch": 43.960339943342774, + "loss": 0.12985064089298248, + "loss_ce": 0.00677324878051877, + "loss_iou": 0.494140625, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 664512632, + "step": 3869 + }, + { + "epoch": 43.97167138810198, + "grad_norm": 11.260007791505094, + "learning_rate": 5e-06, + "loss": 0.2184, + "num_input_tokens_seen": 664684092, + "step": 3870 + }, + { + "epoch": 43.97167138810198, + "loss": 0.21081560850143433, + "loss_ce": 0.023742854595184326, + "loss_iou": 0.423828125, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 664684092, + "step": 3870 + }, + { + "epoch": 43.98300283286119, + "grad_norm": 18.315713023686772, + "learning_rate": 5e-06, + "loss": 0.2193, + "num_input_tokens_seen": 664855708, + "step": 3871 + }, + { + "epoch": 43.98300283286119, + "loss": 0.19681121408939362, + "loss_ce": 0.03161955997347832, + "loss_iou": 0.515625, + "loss_num": 0.032958984375, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 664855708, + "step": 3871 + }, + { + "epoch": 43.994334277620396, + "grad_norm": 8.966144041465395, + "learning_rate": 5e-06, + "loss": 0.165, + "num_input_tokens_seen": 665027668, + "step": 3872 + }, + { + "epoch": 43.994334277620396, + "loss": 0.19843284785747528, + "loss_ce": 0.04087058827280998, + "loss_iou": 0.5, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 665027668, + "step": 3872 + }, + { + "epoch": 43.994334277620396, + "loss": 0.20960503816604614, + "loss_ce": 0.02045709453523159, + "loss_iou": 0.5234375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 665113460, + "step": 3872 + }, + { + "epoch": 44.005665722379604, + "grad_norm": 5.528582486793579, + "learning_rate": 5e-06, + "loss": 0.1555, + "num_input_tokens_seen": 665199152, + "step": 3873 + }, + { + "epoch": 44.005665722379604, + "loss": 0.16204151511192322, + "loss_ce": 0.02001270093023777, + "loss_iou": 0.494140625, + "loss_num": 0.0284423828125, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 665199152, + "step": 3873 + }, + { + "epoch": 44.01699716713881, + "grad_norm": 4.395095908779968, + "learning_rate": 5e-06, + "loss": 0.1865, + "num_input_tokens_seen": 665370028, + "step": 3874 + }, + { + "epoch": 44.01699716713881, + "loss": 0.1789645254611969, + "loss_ce": 0.024179378524422646, + "loss_iou": 0.3359375, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 665370028, + "step": 3874 + }, + { + "epoch": 44.02832861189802, + "grad_norm": 17.337216912045225, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 665541468, + "step": 3875 + }, + { + "epoch": 44.02832861189802, + "loss": 0.14606338739395142, + "loss_ce": 0.01892716810107231, + "loss_iou": 0.28125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 665541468, + "step": 3875 + }, + { + "epoch": 44.039660056657226, + "grad_norm": 4.722631917871475, + "learning_rate": 5e-06, + "loss": 0.162, + "num_input_tokens_seen": 665713312, + "step": 3876 + }, + { + "epoch": 44.039660056657226, + "loss": 0.1612386256456375, + "loss_ce": 0.028822844848036766, + "loss_iou": 0.54296875, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 665713312, + "step": 3876 + }, + { + "epoch": 44.05099150141643, + "grad_norm": 4.3230895037327555, + "learning_rate": 5e-06, + "loss": 0.2116, + "num_input_tokens_seen": 665885268, + "step": 3877 + }, + { + "epoch": 44.05099150141643, + "loss": 0.20948238670825958, + "loss_ce": 0.044931601732969284, + "loss_iou": 0.078125, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 665885268, + "step": 3877 + }, + { + "epoch": 44.06232294617564, + "grad_norm": 8.788249643225756, + "learning_rate": 5e-06, + "loss": 0.1393, + "num_input_tokens_seen": 666057036, + "step": 3878 + }, + { + "epoch": 44.06232294617564, + "loss": 0.12548784911632538, + "loss_ce": 0.01913408376276493, + "loss_iou": 0.09228515625, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 666057036, + "step": 3878 + }, + { + "epoch": 44.07365439093484, + "grad_norm": 4.572224335134698, + "learning_rate": 5e-06, + "loss": 0.1863, + "num_input_tokens_seen": 666228044, + "step": 3879 + }, + { + "epoch": 44.07365439093484, + "loss": 0.17247647047042847, + "loss_ce": 0.014944737777113914, + "loss_iou": 0.353515625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 666228044, + "step": 3879 + }, + { + "epoch": 44.08498583569405, + "grad_norm": 5.784244766872415, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 666398588, + "step": 3880 + }, + { + "epoch": 44.08498583569405, + "loss": 0.22369036078453064, + "loss_ce": 0.03066667541861534, + "loss_iou": 0.5390625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 666398588, + "step": 3880 + }, + { + "epoch": 44.096317280453256, + "grad_norm": 7.298966303373416, + "learning_rate": 5e-06, + "loss": 0.146, + "num_input_tokens_seen": 666570360, + "step": 3881 + }, + { + "epoch": 44.096317280453256, + "loss": 0.17871423065662384, + "loss_ce": 0.016330188140273094, + "loss_iou": 0.26171875, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 666570360, + "step": 3881 + }, + { + "epoch": 44.107648725212464, + "grad_norm": 5.166123627681899, + "learning_rate": 5e-06, + "loss": 0.1344, + "num_input_tokens_seen": 666741996, + "step": 3882 + }, + { + "epoch": 44.107648725212464, + "loss": 0.11732202768325806, + "loss_ce": 0.04682641476392746, + "loss_iou": 0.177734375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 666741996, + "step": 3882 + }, + { + "epoch": 44.11898016997167, + "grad_norm": 20.384235799519953, + "learning_rate": 5e-06, + "loss": 0.2089, + "num_input_tokens_seen": 666913588, + "step": 3883 + }, + { + "epoch": 44.11898016997167, + "loss": 0.28551924228668213, + "loss_ce": 0.017361294478178024, + "loss_iou": 0.28515625, + "loss_num": 0.0537109375, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 666913588, + "step": 3883 + }, + { + "epoch": 44.13031161473088, + "grad_norm": 7.911628476498444, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 667085308, + "step": 3884 + }, + { + "epoch": 44.13031161473088, + "loss": 0.20761322975158691, + "loss_ce": 0.03772187978029251, + "loss_iou": 0.3671875, + "loss_num": 0.033935546875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 667085308, + "step": 3884 + }, + { + "epoch": 44.141643059490086, + "grad_norm": 4.8956588594092185, + "learning_rate": 5e-06, + "loss": 0.1681, + "num_input_tokens_seen": 667257128, + "step": 3885 + }, + { + "epoch": 44.141643059490086, + "loss": 0.11757586896419525, + "loss_ce": 0.018393736332654953, + "loss_iou": 0.51953125, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 667257128, + "step": 3885 + }, + { + "epoch": 44.15297450424929, + "grad_norm": 9.613726583708134, + "learning_rate": 5e-06, + "loss": 0.1702, + "num_input_tokens_seen": 667428216, + "step": 3886 + }, + { + "epoch": 44.15297450424929, + "loss": 0.13310682773590088, + "loss_ce": 0.027058254927396774, + "loss_iou": 0.44140625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 667428216, + "step": 3886 + }, + { + "epoch": 44.1643059490085, + "grad_norm": 4.528860521900117, + "learning_rate": 5e-06, + "loss": 0.1893, + "num_input_tokens_seen": 667600244, + "step": 3887 + }, + { + "epoch": 44.1643059490085, + "loss": 0.11852609366178513, + "loss_ce": 0.015498753637075424, + "loss_iou": 0.251953125, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 667600244, + "step": 3887 + }, + { + "epoch": 44.17563739376771, + "grad_norm": 4.692495130189492, + "learning_rate": 5e-06, + "loss": 0.1662, + "num_input_tokens_seen": 667772056, + "step": 3888 + }, + { + "epoch": 44.17563739376771, + "loss": 0.16544538736343384, + "loss_ce": 0.009225909598171711, + "loss_iou": 0.419921875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 667772056, + "step": 3888 + }, + { + "epoch": 44.186968838526916, + "grad_norm": 4.571984747713864, + "learning_rate": 5e-06, + "loss": 0.1561, + "num_input_tokens_seen": 667943060, + "step": 3889 + }, + { + "epoch": 44.186968838526916, + "loss": 0.14817777276039124, + "loss_ce": 0.030120527371764183, + "loss_iou": 0.177734375, + "loss_num": 0.0235595703125, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 667943060, + "step": 3889 + }, + { + "epoch": 44.198300283286116, + "grad_norm": 21.334265923346287, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 668115148, + "step": 3890 + }, + { + "epoch": 44.198300283286116, + "loss": 0.17541903257369995, + "loss_ce": 0.039066486060619354, + "loss_iou": 0.6015625, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 668115148, + "step": 3890 + }, + { + "epoch": 44.20963172804532, + "grad_norm": 7.800471377143766, + "learning_rate": 5e-06, + "loss": 0.2, + "num_input_tokens_seen": 668285564, + "step": 3891 + }, + { + "epoch": 44.20963172804532, + "loss": 0.12931157648563385, + "loss_ce": 0.03919317573308945, + "loss_iou": 0.462890625, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 668285564, + "step": 3891 + }, + { + "epoch": 44.22096317280453, + "grad_norm": 6.6094242919048956, + "learning_rate": 5e-06, + "loss": 0.2124, + "num_input_tokens_seen": 668457312, + "step": 3892 + }, + { + "epoch": 44.22096317280453, + "loss": 0.19351902604103088, + "loss_ce": 0.01682223007082939, + "loss_iou": 0.61328125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 668457312, + "step": 3892 + }, + { + "epoch": 44.23229461756374, + "grad_norm": 3.763893800603745, + "learning_rate": 5e-06, + "loss": 0.1748, + "num_input_tokens_seen": 668628532, + "step": 3893 + }, + { + "epoch": 44.23229461756374, + "loss": 0.14614585041999817, + "loss_ce": 0.013119742274284363, + "loss_iou": 0.5234375, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 668628532, + "step": 3893 + }, + { + "epoch": 44.243626062322946, + "grad_norm": 6.6147255674233385, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 668797504, + "step": 3894 + }, + { + "epoch": 44.243626062322946, + "loss": 0.1490989476442337, + "loss_ce": 0.0556846484541893, + "loss_iou": 0.298828125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 668797504, + "step": 3894 + }, + { + "epoch": 44.25495750708215, + "grad_norm": 3.739099364885454, + "learning_rate": 5e-06, + "loss": 0.1632, + "num_input_tokens_seen": 668968832, + "step": 3895 + }, + { + "epoch": 44.25495750708215, + "loss": 0.20089229941368103, + "loss_ce": 0.06338009238243103, + "loss_iou": 0.373046875, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 668968832, + "step": 3895 + }, + { + "epoch": 44.26628895184136, + "grad_norm": 9.12464093646164, + "learning_rate": 5e-06, + "loss": 0.1581, + "num_input_tokens_seen": 669140524, + "step": 3896 + }, + { + "epoch": 44.26628895184136, + "loss": 0.1690121293067932, + "loss_ce": 0.017950113862752914, + "loss_iou": 0.55859375, + "loss_num": 0.0301513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 669140524, + "step": 3896 + }, + { + "epoch": 44.27762039660057, + "grad_norm": 6.8958968158456875, + "learning_rate": 5e-06, + "loss": 0.179, + "num_input_tokens_seen": 669312280, + "step": 3897 + }, + { + "epoch": 44.27762039660057, + "loss": 0.20437884330749512, + "loss_ce": 0.011233100667595863, + "loss_iou": 0.578125, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 669312280, + "step": 3897 + }, + { + "epoch": 44.288951841359776, + "grad_norm": 7.217350983333874, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 669484004, + "step": 3898 + }, + { + "epoch": 44.288951841359776, + "loss": 0.15820790827274323, + "loss_ce": 0.018986724317073822, + "loss_iou": 0.4921875, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 669484004, + "step": 3898 + }, + { + "epoch": 44.30028328611898, + "grad_norm": 5.162043680061618, + "learning_rate": 5e-06, + "loss": 0.1978, + "num_input_tokens_seen": 669655768, + "step": 3899 + }, + { + "epoch": 44.30028328611898, + "loss": 0.2508530020713806, + "loss_ce": 0.013853507116436958, + "loss_iou": 0.43359375, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 669655768, + "step": 3899 + }, + { + "epoch": 44.31161473087819, + "grad_norm": 5.31066240255363, + "learning_rate": 5e-06, + "loss": 0.189, + "num_input_tokens_seen": 669827588, + "step": 3900 + }, + { + "epoch": 44.31161473087819, + "loss": 0.18520700931549072, + "loss_ce": 0.008082992397248745, + "loss_iou": 0.48828125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 669827588, + "step": 3900 + }, + { + "epoch": 44.32294617563739, + "grad_norm": 4.28118504428304, + "learning_rate": 5e-06, + "loss": 0.1752, + "num_input_tokens_seen": 669997728, + "step": 3901 + }, + { + "epoch": 44.32294617563739, + "loss": 0.11126811802387238, + "loss_ce": 0.011841850355267525, + "loss_iou": 0.50390625, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 669997728, + "step": 3901 + }, + { + "epoch": 44.3342776203966, + "grad_norm": 3.2329819093097476, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 670169376, + "step": 3902 + }, + { + "epoch": 44.3342776203966, + "loss": 0.16909259557724, + "loss_ce": 0.015558669343590736, + "loss_iou": 0.443359375, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 670169376, + "step": 3902 + }, + { + "epoch": 44.345609065155806, + "grad_norm": 5.902571291429869, + "learning_rate": 5e-06, + "loss": 0.1749, + "num_input_tokens_seen": 670341112, + "step": 3903 + }, + { + "epoch": 44.345609065155806, + "loss": 0.1980319768190384, + "loss_ce": 0.04535253345966339, + "loss_iou": 0.322265625, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 670341112, + "step": 3903 + }, + { + "epoch": 44.35694050991501, + "grad_norm": 6.6970159763365364, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 670510016, + "step": 3904 + }, + { + "epoch": 44.35694050991501, + "loss": 0.12334737181663513, + "loss_ce": 0.02100667729973793, + "loss_iou": 0.390625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 670510016, + "step": 3904 + }, + { + "epoch": 44.36827195467422, + "grad_norm": 13.377695266846061, + "learning_rate": 5e-06, + "loss": 0.1944, + "num_input_tokens_seen": 670680452, + "step": 3905 + }, + { + "epoch": 44.36827195467422, + "loss": 0.148904949426651, + "loss_ce": 0.028024822473526, + "loss_iou": 0.5859375, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 670680452, + "step": 3905 + }, + { + "epoch": 44.37960339943343, + "grad_norm": 7.838408355591025, + "learning_rate": 5e-06, + "loss": 0.1867, + "num_input_tokens_seen": 670852512, + "step": 3906 + }, + { + "epoch": 44.37960339943343, + "loss": 0.14574454724788666, + "loss_ce": 0.00911734439432621, + "loss_iou": 0.60546875, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 670852512, + "step": 3906 + }, + { + "epoch": 44.390934844192635, + "grad_norm": 5.4060900424333616, + "learning_rate": 5e-06, + "loss": 0.1881, + "num_input_tokens_seen": 671024224, + "step": 3907 + }, + { + "epoch": 44.390934844192635, + "loss": 0.0828448235988617, + "loss_ce": 0.017628755420446396, + "loss_iou": 0.73828125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 671024224, + "step": 3907 + }, + { + "epoch": 44.40226628895184, + "grad_norm": 4.41292949501653, + "learning_rate": 5e-06, + "loss": 0.1535, + "num_input_tokens_seen": 671196500, + "step": 3908 + }, + { + "epoch": 44.40226628895184, + "loss": 0.13619935512542725, + "loss_ce": 0.00952088087797165, + "loss_iou": 0.376953125, + "loss_num": 0.025390625, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 671196500, + "step": 3908 + }, + { + "epoch": 44.41359773371105, + "grad_norm": 5.1494649175171014, + "learning_rate": 5e-06, + "loss": 0.21, + "num_input_tokens_seen": 671368548, + "step": 3909 + }, + { + "epoch": 44.41359773371105, + "loss": 0.1771545708179474, + "loss_ce": 0.00851443037390709, + "loss_iou": 0.3984375, + "loss_num": 0.03369140625, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 671368548, + "step": 3909 + }, + { + "epoch": 44.42492917847026, + "grad_norm": 10.00238723031077, + "learning_rate": 5e-06, + "loss": 0.2128, + "num_input_tokens_seen": 671540372, + "step": 3910 + }, + { + "epoch": 44.42492917847026, + "loss": 0.2260538935661316, + "loss_ce": 0.014048272743821144, + "loss_iou": 0.30859375, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 671540372, + "step": 3910 + }, + { + "epoch": 44.436260623229465, + "grad_norm": 4.331142078752378, + "learning_rate": 5e-06, + "loss": 0.1383, + "num_input_tokens_seen": 671710528, + "step": 3911 + }, + { + "epoch": 44.436260623229465, + "loss": 0.1739613562822342, + "loss_ce": 0.023326583206653595, + "loss_iou": 0.3984375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 671710528, + "step": 3911 + }, + { + "epoch": 44.447592067988666, + "grad_norm": 8.844643532094778, + "learning_rate": 5e-06, + "loss": 0.1864, + "num_input_tokens_seen": 671881304, + "step": 3912 + }, + { + "epoch": 44.447592067988666, + "loss": 0.18756544589996338, + "loss_ce": 0.02997267246246338, + "loss_iou": 0.353515625, + "loss_num": 0.031494140625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 671881304, + "step": 3912 + }, + { + "epoch": 44.45892351274787, + "grad_norm": 6.510764341126988, + "learning_rate": 5e-06, + "loss": 0.1857, + "num_input_tokens_seen": 672053348, + "step": 3913 + }, + { + "epoch": 44.45892351274787, + "loss": 0.2138848453760147, + "loss_ce": 0.013842117041349411, + "loss_iou": 0.310546875, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 672053348, + "step": 3913 + }, + { + "epoch": 44.47025495750708, + "grad_norm": 9.006163268268532, + "learning_rate": 5e-06, + "loss": 0.1907, + "num_input_tokens_seen": 672224644, + "step": 3914 + }, + { + "epoch": 44.47025495750708, + "loss": 0.1696176528930664, + "loss_ce": 0.020905490964651108, + "loss_iou": 0.48828125, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 672224644, + "step": 3914 + }, + { + "epoch": 44.48158640226629, + "grad_norm": 49.08920396255695, + "learning_rate": 5e-06, + "loss": 0.1507, + "num_input_tokens_seen": 672394948, + "step": 3915 + }, + { + "epoch": 44.48158640226629, + "loss": 0.09520304948091507, + "loss_ce": 0.02095378190279007, + "loss_iou": 0.32421875, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 672394948, + "step": 3915 + }, + { + "epoch": 44.492917847025495, + "grad_norm": 22.289723145068706, + "learning_rate": 5e-06, + "loss": 0.2131, + "num_input_tokens_seen": 672567020, + "step": 3916 + }, + { + "epoch": 44.492917847025495, + "loss": 0.24901898205280304, + "loss_ce": 0.013942087069153786, + "loss_iou": 0.390625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 672567020, + "step": 3916 + }, + { + "epoch": 44.5042492917847, + "grad_norm": 9.872482670890951, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 672739200, + "step": 3917 + }, + { + "epoch": 44.5042492917847, + "loss": 0.20507213473320007, + "loss_ce": 0.009820652194321156, + "loss_iou": 0.6015625, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 672739200, + "step": 3917 + }, + { + "epoch": 44.51558073654391, + "grad_norm": 4.701513319513321, + "learning_rate": 5e-06, + "loss": 0.1669, + "num_input_tokens_seen": 672910356, + "step": 3918 + }, + { + "epoch": 44.51558073654391, + "loss": 0.1403428018093109, + "loss_ce": 0.02788553573191166, + "loss_iou": 0.1806640625, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 672910356, + "step": 3918 + }, + { + "epoch": 44.52691218130312, + "grad_norm": 5.04437684335888, + "learning_rate": 5e-06, + "loss": 0.1928, + "num_input_tokens_seen": 673081496, + "step": 3919 + }, + { + "epoch": 44.52691218130312, + "loss": 0.18987755477428436, + "loss_ce": 0.017758414149284363, + "loss_iou": 0.275390625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 673081496, + "step": 3919 + }, + { + "epoch": 44.538243626062325, + "grad_norm": 11.325975447959623, + "learning_rate": 5e-06, + "loss": 0.2147, + "num_input_tokens_seen": 673253860, + "step": 3920 + }, + { + "epoch": 44.538243626062325, + "loss": 0.17633388936519623, + "loss_ce": 0.03412197530269623, + "loss_iou": 0.51953125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 673253860, + "step": 3920 + }, + { + "epoch": 44.54957507082153, + "grad_norm": 12.70179835878848, + "learning_rate": 5e-06, + "loss": 0.2076, + "num_input_tokens_seen": 673424932, + "step": 3921 + }, + { + "epoch": 44.54957507082153, + "loss": 0.1992478221654892, + "loss_ce": 0.0031723843421787024, + "loss_iou": 0.306640625, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 673424932, + "step": 3921 + }, + { + "epoch": 44.56090651558074, + "grad_norm": 4.233158942637238, + "learning_rate": 5e-06, + "loss": 0.1372, + "num_input_tokens_seen": 673596912, + "step": 3922 + }, + { + "epoch": 44.56090651558074, + "loss": 0.12535905838012695, + "loss_ce": 0.025475017726421356, + "loss_iou": 0.5078125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 673596912, + "step": 3922 + }, + { + "epoch": 44.57223796033994, + "grad_norm": 6.590398737913662, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 673767644, + "step": 3923 + }, + { + "epoch": 44.57223796033994, + "loss": 0.11935992538928986, + "loss_ce": 0.024755436927080154, + "loss_iou": 0.416015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 673767644, + "step": 3923 + }, + { + "epoch": 44.58356940509915, + "grad_norm": 8.997132730978791, + "learning_rate": 5e-06, + "loss": 0.1728, + "num_input_tokens_seen": 673939180, + "step": 3924 + }, + { + "epoch": 44.58356940509915, + "loss": 0.25624096393585205, + "loss_ce": 0.020828373730182648, + "loss_iou": 0.4140625, + "loss_num": 0.047119140625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 673939180, + "step": 3924 + }, + { + "epoch": 44.594900849858355, + "grad_norm": 4.469462741478929, + "learning_rate": 5e-06, + "loss": 0.1665, + "num_input_tokens_seen": 674110892, + "step": 3925 + }, + { + "epoch": 44.594900849858355, + "loss": 0.11365257948637009, + "loss_ce": 0.03238426148891449, + "loss_iou": 0.37109375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 674110892, + "step": 3925 + }, + { + "epoch": 44.60623229461756, + "grad_norm": 5.543728005249897, + "learning_rate": 5e-06, + "loss": 0.133, + "num_input_tokens_seen": 674282916, + "step": 3926 + }, + { + "epoch": 44.60623229461756, + "loss": 0.11739157140254974, + "loss_ce": 0.012136444449424744, + "loss_iou": 0.41015625, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 674282916, + "step": 3926 + }, + { + "epoch": 44.61756373937677, + "grad_norm": 3.9444757987061574, + "learning_rate": 5e-06, + "loss": 0.1813, + "num_input_tokens_seen": 674454380, + "step": 3927 + }, + { + "epoch": 44.61756373937677, + "loss": 0.19512583315372467, + "loss_ce": 0.04400278627872467, + "loss_iou": 0.50390625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 674454380, + "step": 3927 + }, + { + "epoch": 44.62889518413598, + "grad_norm": 7.615596531975357, + "learning_rate": 5e-06, + "loss": 0.1729, + "num_input_tokens_seen": 674626708, + "step": 3928 + }, + { + "epoch": 44.62889518413598, + "loss": 0.21386294066905975, + "loss_ce": 0.012416411191225052, + "loss_iou": 0.5390625, + "loss_num": 0.040283203125, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 674626708, + "step": 3928 + }, + { + "epoch": 44.640226628895185, + "grad_norm": 13.07656539893205, + "learning_rate": 5e-06, + "loss": 0.145, + "num_input_tokens_seen": 674796932, + "step": 3929 + }, + { + "epoch": 44.640226628895185, + "loss": 0.13044890761375427, + "loss_ce": 0.022447196766734123, + "loss_iou": 0.498046875, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 674796932, + "step": 3929 + }, + { + "epoch": 44.65155807365439, + "grad_norm": 5.034325857680527, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 674969036, + "step": 3930 + }, + { + "epoch": 44.65155807365439, + "loss": 0.08581889420747757, + "loss_ce": 0.014499313198029995, + "loss_iou": 0.46484375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 674969036, + "step": 3930 + }, + { + "epoch": 44.6628895184136, + "grad_norm": 8.033244599908855, + "learning_rate": 5e-06, + "loss": 0.1752, + "num_input_tokens_seen": 675139804, + "step": 3931 + }, + { + "epoch": 44.6628895184136, + "loss": 0.1502116322517395, + "loss_ce": 0.025913530960679054, + "loss_iou": 0.48828125, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 675139804, + "step": 3931 + }, + { + "epoch": 44.67422096317281, + "grad_norm": 4.438394760476305, + "learning_rate": 5e-06, + "loss": 0.2002, + "num_input_tokens_seen": 675311184, + "step": 3932 + }, + { + "epoch": 44.67422096317281, + "loss": 0.18503566086292267, + "loss_ce": 0.03879541903734207, + "loss_iou": 0.55859375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 675311184, + "step": 3932 + }, + { + "epoch": 44.685552407932015, + "grad_norm": 15.97628154259988, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 675482768, + "step": 3933 + }, + { + "epoch": 44.685552407932015, + "loss": 0.3114229440689087, + "loss_ce": 0.017416590824723244, + "loss_iou": 0.326171875, + "loss_num": 0.05859375, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 675482768, + "step": 3933 + }, + { + "epoch": 44.696883852691215, + "grad_norm": 6.345179098172619, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 675654556, + "step": 3934 + }, + { + "epoch": 44.696883852691215, + "loss": 0.13065677881240845, + "loss_ce": 0.008006628602743149, + "loss_iou": 0.36328125, + "loss_num": 0.0245361328125, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 675654556, + "step": 3934 + }, + { + "epoch": 44.70821529745042, + "grad_norm": 6.999335904516425, + "learning_rate": 5e-06, + "loss": 0.1705, + "num_input_tokens_seen": 675826228, + "step": 3935 + }, + { + "epoch": 44.70821529745042, + "loss": 0.11878420412540436, + "loss_ce": 0.014200454577803612, + "loss_iou": 0.40234375, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 675826228, + "step": 3935 + }, + { + "epoch": 44.71954674220963, + "grad_norm": 8.656115297011691, + "learning_rate": 5e-06, + "loss": 0.1705, + "num_input_tokens_seen": 675997212, + "step": 3936 + }, + { + "epoch": 44.71954674220963, + "loss": 0.17036622762680054, + "loss_ce": 0.07292358577251434, + "loss_iou": 0.462890625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 675997212, + "step": 3936 + }, + { + "epoch": 44.73087818696884, + "grad_norm": 5.791122415515461, + "learning_rate": 5e-06, + "loss": 0.1437, + "num_input_tokens_seen": 676169256, + "step": 3937 + }, + { + "epoch": 44.73087818696884, + "loss": 0.14486059546470642, + "loss_ce": 0.010613763704895973, + "loss_iou": 0.578125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 676169256, + "step": 3937 + }, + { + "epoch": 44.742209631728045, + "grad_norm": 5.713523697325441, + "learning_rate": 5e-06, + "loss": 0.224, + "num_input_tokens_seen": 676340820, + "step": 3938 + }, + { + "epoch": 44.742209631728045, + "loss": 0.2946818768978119, + "loss_ce": 0.03241381421685219, + "loss_iou": 0.490234375, + "loss_num": 0.052490234375, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 676340820, + "step": 3938 + }, + { + "epoch": 44.75354107648725, + "grad_norm": 7.450730756549203, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 676511092, + "step": 3939 + }, + { + "epoch": 44.75354107648725, + "loss": 0.09902353584766388, + "loss_ce": 0.01612253114581108, + "loss_iou": 0.33984375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 676511092, + "step": 3939 + }, + { + "epoch": 44.76487252124646, + "grad_norm": 7.619284153921158, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 676682856, + "step": 3940 + }, + { + "epoch": 44.76487252124646, + "loss": 0.20546351373195648, + "loss_ce": 0.03425990790128708, + "loss_iou": 0.49609375, + "loss_num": 0.0341796875, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 676682856, + "step": 3940 + }, + { + "epoch": 44.77620396600567, + "grad_norm": 10.094560451988361, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 676854932, + "step": 3941 + }, + { + "epoch": 44.77620396600567, + "loss": 0.1597079336643219, + "loss_ce": 0.024301443248987198, + "loss_iou": 0.306640625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 676854932, + "step": 3941 + }, + { + "epoch": 44.787535410764875, + "grad_norm": 33.792708028340655, + "learning_rate": 5e-06, + "loss": 0.2305, + "num_input_tokens_seen": 677027040, + "step": 3942 + }, + { + "epoch": 44.787535410764875, + "loss": 0.17407982051372528, + "loss_ce": 0.017768792808055878, + "loss_iou": 0.38671875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 677027040, + "step": 3942 + }, + { + "epoch": 44.79886685552408, + "grad_norm": 71.07096406548798, + "learning_rate": 5e-06, + "loss": 0.2148, + "num_input_tokens_seen": 677198864, + "step": 3943 + }, + { + "epoch": 44.79886685552408, + "loss": 0.18417896330356598, + "loss_ce": 0.020955689251422882, + "loss_iou": 0.310546875, + "loss_num": 0.03271484375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 677198864, + "step": 3943 + }, + { + "epoch": 44.81019830028329, + "grad_norm": 5.615255253349635, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 677370844, + "step": 3944 + }, + { + "epoch": 44.81019830028329, + "loss": 0.2352963089942932, + "loss_ce": 0.019262365996837616, + "loss_iou": 0.46875, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 677370844, + "step": 3944 + }, + { + "epoch": 44.82152974504249, + "grad_norm": 5.8238037477455205, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 677542668, + "step": 3945 + }, + { + "epoch": 44.82152974504249, + "loss": 0.14084841310977936, + "loss_ce": 0.01236940547823906, + "loss_iou": 0.5078125, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 677542668, + "step": 3945 + }, + { + "epoch": 44.8328611898017, + "grad_norm": 6.238451023563346, + "learning_rate": 5e-06, + "loss": 0.2078, + "num_input_tokens_seen": 677714584, + "step": 3946 + }, + { + "epoch": 44.8328611898017, + "loss": 0.15844382345676422, + "loss_ce": 0.015530005097389221, + "loss_iou": 0.50390625, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 677714584, + "step": 3946 + }, + { + "epoch": 44.844192634560905, + "grad_norm": 6.717142131780718, + "learning_rate": 5e-06, + "loss": 0.1627, + "num_input_tokens_seen": 677886160, + "step": 3947 + }, + { + "epoch": 44.844192634560905, + "loss": 0.20618517696857452, + "loss_ce": 0.022469352930784225, + "loss_iou": 0.37890625, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 677886160, + "step": 3947 + }, + { + "epoch": 44.85552407932011, + "grad_norm": 5.418955008781779, + "learning_rate": 5e-06, + "loss": 0.1681, + "num_input_tokens_seen": 678057140, + "step": 3948 + }, + { + "epoch": 44.85552407932011, + "loss": 0.16610217094421387, + "loss_ce": 0.01214098371565342, + "loss_iou": 0.408203125, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 678057140, + "step": 3948 + }, + { + "epoch": 44.86685552407932, + "grad_norm": 6.785527307628056, + "learning_rate": 5e-06, + "loss": 0.1788, + "num_input_tokens_seen": 678228316, + "step": 3949 + }, + { + "epoch": 44.86685552407932, + "loss": 0.1471843719482422, + "loss_ce": 0.025724399834871292, + "loss_iou": 0.5625, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 678228316, + "step": 3949 + }, + { + "epoch": 44.87818696883853, + "grad_norm": 5.509305973758549, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 678398708, + "step": 3950 + }, + { + "epoch": 44.87818696883853, + "loss": 0.25689616799354553, + "loss_ce": 0.019652530550956726, + "loss_iou": 0.390625, + "loss_num": 0.04736328125, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 678398708, + "step": 3950 + }, + { + "epoch": 44.889518413597735, + "grad_norm": 7.6875765451044105, + "learning_rate": 5e-06, + "loss": 0.2383, + "num_input_tokens_seen": 678570344, + "step": 3951 + }, + { + "epoch": 44.889518413597735, + "loss": 0.24902088940143585, + "loss_ce": 0.0131810512393713, + "loss_iou": 0.31640625, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 678570344, + "step": 3951 + }, + { + "epoch": 44.90084985835694, + "grad_norm": 19.312373410910233, + "learning_rate": 5e-06, + "loss": 0.1422, + "num_input_tokens_seen": 678740492, + "step": 3952 + }, + { + "epoch": 44.90084985835694, + "loss": 0.1345103681087494, + "loss_ce": 0.007191033102571964, + "loss_iou": 0.447265625, + "loss_num": 0.0255126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 678740492, + "step": 3952 + }, + { + "epoch": 44.91218130311615, + "grad_norm": 4.775684109064375, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 678911784, + "step": 3953 + }, + { + "epoch": 44.91218130311615, + "loss": 0.1061486005783081, + "loss_ce": 0.009407874196767807, + "loss_iou": 0.33203125, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 678911784, + "step": 3953 + }, + { + "epoch": 44.92351274787536, + "grad_norm": 7.61891778490257, + "learning_rate": 5e-06, + "loss": 0.1563, + "num_input_tokens_seen": 679083040, + "step": 3954 + }, + { + "epoch": 44.92351274787536, + "loss": 0.1314413845539093, + "loss_ce": 0.013857155106961727, + "loss_iou": 0.515625, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 679083040, + "step": 3954 + }, + { + "epoch": 44.934844192634564, + "grad_norm": 6.6918706288396175, + "learning_rate": 5e-06, + "loss": 0.1622, + "num_input_tokens_seen": 679253388, + "step": 3955 + }, + { + "epoch": 44.934844192634564, + "loss": 0.12126082181930542, + "loss_ce": 0.02278059720993042, + "loss_iou": 0.5234375, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 679253388, + "step": 3955 + }, + { + "epoch": 44.946175637393765, + "grad_norm": 6.841514197216895, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 679423444, + "step": 3956 + }, + { + "epoch": 44.946175637393765, + "loss": 0.10983335971832275, + "loss_ce": 0.013764026574790478, + "loss_iou": 0.5546875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 679423444, + "step": 3956 + }, + { + "epoch": 44.95750708215297, + "grad_norm": 7.339101136394465, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 679595260, + "step": 3957 + }, + { + "epoch": 44.95750708215297, + "loss": 0.3390614092350006, + "loss_ce": 0.031169582158327103, + "loss_iou": 0.55078125, + "loss_num": 0.0615234375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 679595260, + "step": 3957 + }, + { + "epoch": 44.96883852691218, + "grad_norm": 4.74519792464132, + "learning_rate": 5e-06, + "loss": 0.1758, + "num_input_tokens_seen": 679766124, + "step": 3958 + }, + { + "epoch": 44.96883852691218, + "loss": 0.2327853888273239, + "loss_ce": 0.03436008840799332, + "loss_iou": 0.498046875, + "loss_num": 0.03955078125, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 679766124, + "step": 3958 + }, + { + "epoch": 44.98016997167139, + "grad_norm": 4.51509256827041, + "learning_rate": 5e-06, + "loss": 0.1314, + "num_input_tokens_seen": 679936368, + "step": 3959 + }, + { + "epoch": 44.98016997167139, + "loss": 0.1303640902042389, + "loss_ce": 0.05376496911048889, + "loss_iou": 0.49609375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 679936368, + "step": 3959 + }, + { + "epoch": 44.991501416430594, + "grad_norm": 5.104430236835701, + "learning_rate": 5e-06, + "loss": 0.1661, + "num_input_tokens_seen": 680107800, + "step": 3960 + }, + { + "epoch": 44.991501416430594, + "loss": 0.12421780079603195, + "loss_ce": 0.018718529492616653, + "loss_iou": 0.3828125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 680107800, + "step": 3960 + }, + { + "epoch": 44.991501416430594, + "loss": 0.20759907364845276, + "loss_ce": 0.02623310312628746, + "loss_iou": 0.4140625, + "loss_num": 0.036376953125, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 680235652, + "step": 3960 + }, + { + "epoch": 45.0028328611898, + "grad_norm": 9.245029315616877, + "learning_rate": 5e-06, + "loss": 0.1847, + "num_input_tokens_seen": 680278628, + "step": 3961 + }, + { + "epoch": 45.0028328611898, + "loss": 0.2199929654598236, + "loss_ce": 0.017203668132424355, + "loss_iou": 0.609375, + "loss_num": 0.04052734375, + "loss_xval": 0.203125, + "num_input_tokens_seen": 680278628, + "step": 3961 + }, + { + "epoch": 45.01416430594901, + "grad_norm": 13.978362735732325, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 680449940, + "step": 3962 + }, + { + "epoch": 45.01416430594901, + "loss": 0.1891382485628128, + "loss_ce": 0.019948795437812805, + "loss_iou": 0.3984375, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 680449940, + "step": 3962 + }, + { + "epoch": 45.02549575070822, + "grad_norm": 15.361499531571992, + "learning_rate": 5e-06, + "loss": 0.1655, + "num_input_tokens_seen": 680621760, + "step": 3963 + }, + { + "epoch": 45.02549575070822, + "loss": 0.07439267635345459, + "loss_ce": 0.003317241556942463, + "loss_iou": 0.5234375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 680621760, + "step": 3963 + }, + { + "epoch": 45.036827195467424, + "grad_norm": 3.89474739029748, + "learning_rate": 5e-06, + "loss": 0.1453, + "num_input_tokens_seen": 680793512, + "step": 3964 + }, + { + "epoch": 45.036827195467424, + "loss": 0.10964441299438477, + "loss_ce": 0.020258424803614616, + "loss_iou": 0.5546875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 680793512, + "step": 3964 + }, + { + "epoch": 45.04815864022663, + "grad_norm": 10.564883519250367, + "learning_rate": 5e-06, + "loss": 0.1322, + "num_input_tokens_seen": 680965480, + "step": 3965 + }, + { + "epoch": 45.04815864022663, + "loss": 0.12299640476703644, + "loss_ce": 0.023661689832806587, + "loss_iou": 0.546875, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 680965480, + "step": 3965 + }, + { + "epoch": 45.05949008498584, + "grad_norm": 5.677005087711661, + "learning_rate": 5e-06, + "loss": 0.1523, + "num_input_tokens_seen": 681137032, + "step": 3966 + }, + { + "epoch": 45.05949008498584, + "loss": 0.18711864948272705, + "loss_ce": 0.008758679032325745, + "loss_iou": 0.5703125, + "loss_num": 0.03564453125, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 681137032, + "step": 3966 + }, + { + "epoch": 45.07082152974504, + "grad_norm": 3.1774431762700535, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 681308340, + "step": 3967 + }, + { + "epoch": 45.07082152974504, + "loss": 0.20559372007846832, + "loss_ce": 0.015316622331738472, + "loss_iou": 0.265625, + "loss_num": 0.0380859375, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 681308340, + "step": 3967 + }, + { + "epoch": 45.08215297450425, + "grad_norm": 3.293653747544128, + "learning_rate": 5e-06, + "loss": 0.1151, + "num_input_tokens_seen": 681479768, + "step": 3968 + }, + { + "epoch": 45.08215297450425, + "loss": 0.07989273220300674, + "loss_ce": 0.01559219416230917, + "loss_iou": 0.56640625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 681479768, + "step": 3968 + }, + { + "epoch": 45.093484419263454, + "grad_norm": 13.85451427557297, + "learning_rate": 5e-06, + "loss": 0.1595, + "num_input_tokens_seen": 681649276, + "step": 3969 + }, + { + "epoch": 45.093484419263454, + "loss": 0.22615768015384674, + "loss_ce": 0.010337376967072487, + "loss_iou": 0.3671875, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 681649276, + "step": 3969 + }, + { + "epoch": 45.10481586402266, + "grad_norm": 19.43299757201911, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 681819948, + "step": 3970 + }, + { + "epoch": 45.10481586402266, + "loss": 0.13635998964309692, + "loss_ce": 0.004218881484121084, + "loss_iou": 0.53515625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 681819948, + "step": 3970 + }, + { + "epoch": 45.11614730878187, + "grad_norm": 5.351304537964111, + "learning_rate": 5e-06, + "loss": 0.1722, + "num_input_tokens_seen": 681990612, + "step": 3971 + }, + { + "epoch": 45.11614730878187, + "loss": 0.29831600189208984, + "loss_ce": 0.024756435304880142, + "loss_iou": 0.470703125, + "loss_num": 0.0546875, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 681990612, + "step": 3971 + }, + { + "epoch": 45.12747875354108, + "grad_norm": 24.48441549631094, + "learning_rate": 5e-06, + "loss": 0.1587, + "num_input_tokens_seen": 682162248, + "step": 3972 + }, + { + "epoch": 45.12747875354108, + "loss": 0.1765347421169281, + "loss_ce": 0.0029812734574079514, + "loss_iou": 0.251953125, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 682162248, + "step": 3972 + }, + { + "epoch": 45.138810198300284, + "grad_norm": 4.795848369809877, + "learning_rate": 5e-06, + "loss": 0.1697, + "num_input_tokens_seen": 682331992, + "step": 3973 + }, + { + "epoch": 45.138810198300284, + "loss": 0.12189280241727829, + "loss_ce": 0.02449595369398594, + "loss_iou": 0.326171875, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 682331992, + "step": 3973 + }, + { + "epoch": 45.15014164305949, + "grad_norm": 10.173440029675737, + "learning_rate": 5e-06, + "loss": 0.1363, + "num_input_tokens_seen": 682503792, + "step": 3974 + }, + { + "epoch": 45.15014164305949, + "loss": 0.22476442158222198, + "loss_ce": 0.015322279185056686, + "loss_iou": 0.4140625, + "loss_num": 0.0419921875, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 682503792, + "step": 3974 + }, + { + "epoch": 45.1614730878187, + "grad_norm": 6.554892812737551, + "learning_rate": 5e-06, + "loss": 0.2, + "num_input_tokens_seen": 682675432, + "step": 3975 + }, + { + "epoch": 45.1614730878187, + "loss": 0.2163558304309845, + "loss_ce": 0.0121016725897789, + "loss_iou": 0.2451171875, + "loss_num": 0.040771484375, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 682675432, + "step": 3975 + }, + { + "epoch": 45.172804532577906, + "grad_norm": 9.94909079286914, + "learning_rate": 5e-06, + "loss": 0.1567, + "num_input_tokens_seen": 682846604, + "step": 3976 + }, + { + "epoch": 45.172804532577906, + "loss": 0.1480620950460434, + "loss_ce": 0.013418532907962799, + "loss_iou": 0.474609375, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 682846604, + "step": 3976 + }, + { + "epoch": 45.184135977337114, + "grad_norm": 4.859778556428821, + "learning_rate": 5e-06, + "loss": 0.1761, + "num_input_tokens_seen": 683018104, + "step": 3977 + }, + { + "epoch": 45.184135977337114, + "loss": 0.21864371001720428, + "loss_ce": 0.02110341563820839, + "loss_iou": 0.490234375, + "loss_num": 0.03955078125, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 683018104, + "step": 3977 + }, + { + "epoch": 45.195467422096314, + "grad_norm": 8.340452487209136, + "learning_rate": 5e-06, + "loss": 0.185, + "num_input_tokens_seen": 683188376, + "step": 3978 + }, + { + "epoch": 45.195467422096314, + "loss": 0.13148869574069977, + "loss_ce": 0.027393236756324768, + "loss_iou": 0.443359375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 683188376, + "step": 3978 + }, + { + "epoch": 45.20679886685552, + "grad_norm": 6.471837698938179, + "learning_rate": 5e-06, + "loss": 0.1516, + "num_input_tokens_seen": 683360000, + "step": 3979 + }, + { + "epoch": 45.20679886685552, + "loss": 0.2048691213130951, + "loss_ce": 0.00940403901040554, + "loss_iou": 0.052001953125, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 683360000, + "step": 3979 + }, + { + "epoch": 45.21813031161473, + "grad_norm": 4.966360828511372, + "learning_rate": 5e-06, + "loss": 0.1257, + "num_input_tokens_seen": 683531784, + "step": 3980 + }, + { + "epoch": 45.21813031161473, + "loss": 0.12496919929981232, + "loss_ce": 0.009032929316163063, + "loss_iou": 0.439453125, + "loss_num": 0.023193359375, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 683531784, + "step": 3980 + }, + { + "epoch": 45.22946175637394, + "grad_norm": 3.2656518214800245, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 683703388, + "step": 3981 + }, + { + "epoch": 45.22946175637394, + "loss": 0.17586511373519897, + "loss_ce": 0.0028914890717715025, + "loss_iou": 0.390625, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 683703388, + "step": 3981 + }, + { + "epoch": 45.240793201133144, + "grad_norm": 7.119328119959129, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 683875044, + "step": 3982 + }, + { + "epoch": 45.240793201133144, + "loss": 0.14759066700935364, + "loss_ce": 0.042137183248996735, + "loss_iou": 0.490234375, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 683875044, + "step": 3982 + }, + { + "epoch": 45.25212464589235, + "grad_norm": 5.439364781816015, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 684044320, + "step": 3983 + }, + { + "epoch": 45.25212464589235, + "loss": 0.11087010055780411, + "loss_ce": 0.008514144457876682, + "loss_iou": 0.1875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 684044320, + "step": 3983 + }, + { + "epoch": 45.26345609065156, + "grad_norm": 5.3909633068184295, + "learning_rate": 5e-06, + "loss": 0.1855, + "num_input_tokens_seen": 684215692, + "step": 3984 + }, + { + "epoch": 45.26345609065156, + "loss": 0.12300942093133926, + "loss_ce": 0.007866598665714264, + "loss_iou": 0.51953125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 684215692, + "step": 3984 + }, + { + "epoch": 45.274787535410766, + "grad_norm": 7.406830467154554, + "learning_rate": 5e-06, + "loss": 0.1602, + "num_input_tokens_seen": 684387580, + "step": 3985 + }, + { + "epoch": 45.274787535410766, + "loss": 0.17128312587738037, + "loss_ce": 0.03551041707396507, + "loss_iou": 0.302734375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 684387580, + "step": 3985 + }, + { + "epoch": 45.286118980169974, + "grad_norm": 4.513940040204225, + "learning_rate": 5e-06, + "loss": 0.197, + "num_input_tokens_seen": 684558308, + "step": 3986 + }, + { + "epoch": 45.286118980169974, + "loss": 0.1830013245344162, + "loss_ce": 0.026324080303311348, + "loss_iou": 0.54296875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 684558308, + "step": 3986 + }, + { + "epoch": 45.29745042492918, + "grad_norm": 5.550275942700856, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 684730044, + "step": 3987 + }, + { + "epoch": 45.29745042492918, + "loss": 0.10014459490776062, + "loss_ce": 0.00367852789349854, + "loss_iou": 0.41015625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 684730044, + "step": 3987 + }, + { + "epoch": 45.30878186968839, + "grad_norm": 7.533654743288638, + "learning_rate": 5e-06, + "loss": 0.1355, + "num_input_tokens_seen": 684899472, + "step": 3988 + }, + { + "epoch": 45.30878186968839, + "loss": 0.12186650931835175, + "loss_ce": 0.012094780802726746, + "loss_iou": 0.431640625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 684899472, + "step": 3988 + }, + { + "epoch": 45.32011331444759, + "grad_norm": 4.292112600348843, + "learning_rate": 5e-06, + "loss": 0.1531, + "num_input_tokens_seen": 685070948, + "step": 3989 + }, + { + "epoch": 45.32011331444759, + "loss": 0.13786914944648743, + "loss_ce": 0.02712084725499153, + "loss_iou": 0.3671875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 685070948, + "step": 3989 + }, + { + "epoch": 45.331444759206796, + "grad_norm": 3.924196065163375, + "learning_rate": 5e-06, + "loss": 0.1543, + "num_input_tokens_seen": 685241192, + "step": 3990 + }, + { + "epoch": 45.331444759206796, + "loss": 0.13688303530216217, + "loss_ce": 0.023906951770186424, + "loss_iou": 0.359375, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 685241192, + "step": 3990 + }, + { + "epoch": 45.342776203966004, + "grad_norm": 3.5673849325899623, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 685412808, + "step": 3991 + }, + { + "epoch": 45.342776203966004, + "loss": 0.1533830612897873, + "loss_ce": 0.023591801524162292, + "loss_iou": 0.054443359375, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 685412808, + "step": 3991 + }, + { + "epoch": 45.35410764872521, + "grad_norm": 32.92043305837096, + "learning_rate": 5e-06, + "loss": 0.1197, + "num_input_tokens_seen": 685584656, + "step": 3992 + }, + { + "epoch": 45.35410764872521, + "loss": 0.13519130647182465, + "loss_ce": 0.006269804667681456, + "loss_iou": 0.41015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 685584656, + "step": 3992 + }, + { + "epoch": 45.36543909348442, + "grad_norm": 4.149902785668814, + "learning_rate": 5e-06, + "loss": 0.1216, + "num_input_tokens_seen": 685756264, + "step": 3993 + }, + { + "epoch": 45.36543909348442, + "loss": 0.12544085085391998, + "loss_ce": 0.012953061610460281, + "loss_iou": 0.4375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 685756264, + "step": 3993 + }, + { + "epoch": 45.376770538243626, + "grad_norm": 9.920051263143561, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 685928208, + "step": 3994 + }, + { + "epoch": 45.376770538243626, + "loss": 0.11352415382862091, + "loss_ce": 0.01623411476612091, + "loss_iou": 0.23828125, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 685928208, + "step": 3994 + }, + { + "epoch": 45.388101983002834, + "grad_norm": 7.464195854503767, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 686099572, + "step": 3995 + }, + { + "epoch": 45.388101983002834, + "loss": 0.1888955980539322, + "loss_ce": 0.02434481680393219, + "loss_iou": 0.0, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 686099572, + "step": 3995 + }, + { + "epoch": 45.39943342776204, + "grad_norm": 4.83817901538267, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 686269796, + "step": 3996 + }, + { + "epoch": 45.39943342776204, + "loss": 0.24119064211845398, + "loss_ce": 0.041269995272159576, + "loss_iou": 0.21875, + "loss_num": 0.0400390625, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 686269796, + "step": 3996 + }, + { + "epoch": 45.41076487252125, + "grad_norm": 7.8935377101564494, + "learning_rate": 5e-06, + "loss": 0.1298, + "num_input_tokens_seen": 686438212, + "step": 3997 + }, + { + "epoch": 45.41076487252125, + "loss": 0.16225041449069977, + "loss_ce": 0.031085869297385216, + "loss_iou": 0.515625, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 686438212, + "step": 3997 + }, + { + "epoch": 45.422096317280456, + "grad_norm": 3.405448797930211, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 686610268, + "step": 3998 + }, + { + "epoch": 45.422096317280456, + "loss": 0.18029937148094177, + "loss_ce": 0.02063140645623207, + "loss_iou": 0.33984375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 686610268, + "step": 3998 + }, + { + "epoch": 45.43342776203966, + "grad_norm": 7.301927382845081, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 686781284, + "step": 3999 + }, + { + "epoch": 45.43342776203966, + "loss": 0.09374446421861649, + "loss_ce": 0.011224937625229359, + "loss_iou": 0.13671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 686781284, + "step": 3999 + }, + { + "epoch": 45.444759206798864, + "grad_norm": 4.6130987466132085, + "learning_rate": 5e-06, + "loss": 0.1381, + "num_input_tokens_seen": 686953220, + "step": 4000 + }, + { + "epoch": 45.444759206798864, + "eval_seeclick_CIoU": 0.5753580033779144, + "eval_seeclick_GIoU": 0.5722742974758148, + "eval_seeclick_IoU": 0.6068568825721741, + "eval_seeclick_MAE_all": 0.057229211553931236, + "eval_seeclick_MAE_h": 0.03215831704437733, + "eval_seeclick_MAE_w": 0.07634955644607544, + "eval_seeclick_MAE_x": 0.08851699158549309, + "eval_seeclick_MAE_y": 0.03189198113977909, + "eval_seeclick_NUM_probability": 0.9997608363628387, + "eval_seeclick_inside_bbox": 0.9375, + "eval_seeclick_loss": 0.5520585775375366, + "eval_seeclick_loss_ce": 0.3357124775648117, + "eval_seeclick_loss_iou": 0.506103515625, + "eval_seeclick_loss_num": 0.042758941650390625, + "eval_seeclick_loss_xval": 0.2137908935546875, + "eval_seeclick_runtime": 70.0915, + "eval_seeclick_samples_per_second": 0.613, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 686953220, + "step": 4000 + }, + { + "epoch": 45.444759206798864, + "eval_icons_CIoU": 0.786271333694458, + "eval_icons_GIoU": 0.7868271470069885, + "eval_icons_IoU": 0.7971721887588501, + "eval_icons_MAE_all": 0.030403072014451027, + "eval_icons_MAE_h": 0.028422322124242783, + "eval_icons_MAE_w": 0.03643226437270641, + "eval_icons_MAE_x": 0.0266373991034925, + "eval_icons_MAE_y": 0.030120306648314, + "eval_icons_NUM_probability": 0.9990636110305786, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.1066146120429039, + "eval_icons_loss_ce": 0.00027707025583367795, + "eval_icons_loss_iou": 0.56201171875, + "eval_icons_loss_num": 0.019847869873046875, + "eval_icons_loss_xval": 0.0992279052734375, + "eval_icons_runtime": 79.7349, + "eval_icons_samples_per_second": 0.627, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 686953220, + "step": 4000 + }, + { + "epoch": 45.444759206798864, + "eval_screenspot_CIoU": 0.6420889695485433, + "eval_screenspot_GIoU": 0.6387250622113546, + "eval_screenspot_IoU": 0.6685307423273722, + "eval_screenspot_MAE_all": 0.0636286586523056, + "eval_screenspot_MAE_h": 0.03605052394171556, + "eval_screenspot_MAE_w": 0.11177605638901393, + "eval_screenspot_MAE_x": 0.0704745426774025, + "eval_screenspot_MAE_y": 0.03621351780990759, + "eval_screenspot_NUM_probability": 0.9997280637423197, + "eval_screenspot_inside_bbox": 0.8974999984105428, + "eval_screenspot_loss": 0.27599117159843445, + "eval_screenspot_loss_ce": 0.012172514184688529, + "eval_screenspot_loss_iou": 0.46875, + "eval_screenspot_loss_num": 0.05130767822265625, + "eval_screenspot_loss_xval": 0.2565511067708333, + "eval_screenspot_runtime": 137.9208, + "eval_screenspot_samples_per_second": 0.645, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 686953220, + "step": 4000 + }, + { + "epoch": 45.444759206798864, + "eval_compot_CIoU": 0.8052595555782318, + "eval_compot_GIoU": 0.7997151017189026, + "eval_compot_IoU": 0.8200662136077881, + "eval_compot_MAE_all": 0.034053971990942955, + "eval_compot_MAE_h": 0.029667741619050503, + "eval_compot_MAE_w": 0.04165242798626423, + "eval_compot_MAE_x": 0.03711581323295832, + "eval_compot_MAE_y": 0.027779914438724518, + "eval_compot_NUM_probability": 0.9999364614486694, + "eval_compot_inside_bbox": 0.9131944477558136, + "eval_compot_loss": 0.11417640745639801, + "eval_compot_loss_ce": 0.00021567247313214466, + "eval_compot_loss_iou": 0.52294921875, + "eval_compot_loss_num": 0.020069122314453125, + "eval_compot_loss_xval": 0.10040283203125, + "eval_compot_runtime": 88.5259, + "eval_compot_samples_per_second": 0.565, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 686953220, + "step": 4000 + }, + { + "epoch": 45.444759206798864, + "eval_custom_ui_MAE_all": 0.028795880265533924, + "eval_custom_ui_MAE_x": 0.04842784069478512, + "eval_custom_ui_MAE_y": 0.009163921000435948, + "eval_custom_ui_NUM_probability": 0.9999248087406158, + "eval_custom_ui_loss": 0.22209425270557404, + "eval_custom_ui_loss_ce": 0.08117441087961197, + "eval_custom_ui_loss_num": 0.028009414672851562, + "eval_custom_ui_loss_xval": 0.1399688720703125, + "eval_custom_ui_runtime": 59.6647, + "eval_custom_ui_samples_per_second": 0.838, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 686953220, + "step": 4000 + }, + { + "epoch": 45.444759206798864, + "loss": 0.270561158657074, + "loss_ce": 0.09734340012073517, + "loss_iou": 0.0, + "loss_num": 0.03466796875, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 686953220, + "step": 4000 + }, + { + "epoch": 45.45609065155807, + "grad_norm": 9.31204032917625, + "learning_rate": 5e-06, + "loss": 0.1192, + "num_input_tokens_seen": 687123868, + "step": 4001 + }, + { + "epoch": 45.45609065155807, + "loss": 0.09752388298511505, + "loss_ce": 0.011189653538167477, + "loss_iou": 0.26953125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 687123868, + "step": 4001 + }, + { + "epoch": 45.46742209631728, + "grad_norm": 12.269168177350808, + "learning_rate": 5e-06, + "loss": 0.1621, + "num_input_tokens_seen": 687296036, + "step": 4002 + }, + { + "epoch": 45.46742209631728, + "loss": 0.11949057877063751, + "loss_ce": 0.009169531986117363, + "loss_iou": 0.353515625, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 687296036, + "step": 4002 + }, + { + "epoch": 45.478753541076486, + "grad_norm": 4.147795239221182, + "learning_rate": 5e-06, + "loss": 0.1766, + "num_input_tokens_seen": 687466952, + "step": 4003 + }, + { + "epoch": 45.478753541076486, + "loss": 0.1856471300125122, + "loss_ce": 0.008401032537221909, + "loss_iou": 0.48828125, + "loss_num": 0.035400390625, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 687466952, + "step": 4003 + }, + { + "epoch": 45.49008498583569, + "grad_norm": 14.007785582328102, + "learning_rate": 5e-06, + "loss": 0.1206, + "num_input_tokens_seen": 687638980, + "step": 4004 + }, + { + "epoch": 45.49008498583569, + "loss": 0.09351881593465805, + "loss_ce": 0.013349141925573349, + "loss_iou": 0.58984375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 687638980, + "step": 4004 + }, + { + "epoch": 45.5014164305949, + "grad_norm": 6.070229664311195, + "learning_rate": 5e-06, + "loss": 0.1473, + "num_input_tokens_seen": 687809768, + "step": 4005 + }, + { + "epoch": 45.5014164305949, + "loss": 0.09495474398136139, + "loss_ce": 0.0030357972718775272, + "loss_iou": 0.328125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 687809768, + "step": 4005 + }, + { + "epoch": 45.51274787535411, + "grad_norm": 11.16057294591649, + "learning_rate": 5e-06, + "loss": 0.135, + "num_input_tokens_seen": 687981112, + "step": 4006 + }, + { + "epoch": 45.51274787535411, + "loss": 0.1584462821483612, + "loss_ce": 0.007689447142183781, + "loss_iou": 0.54296875, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 687981112, + "step": 4006 + }, + { + "epoch": 45.524079320113316, + "grad_norm": 11.814343170315833, + "learning_rate": 5e-06, + "loss": 0.1808, + "num_input_tokens_seen": 688152364, + "step": 4007 + }, + { + "epoch": 45.524079320113316, + "loss": 0.2301245927810669, + "loss_ce": 0.008902676403522491, + "loss_iou": 0.58203125, + "loss_num": 0.044189453125, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 688152364, + "step": 4007 + }, + { + "epoch": 45.53541076487252, + "grad_norm": 4.871684150103701, + "learning_rate": 5e-06, + "loss": 0.1321, + "num_input_tokens_seen": 688320480, + "step": 4008 + }, + { + "epoch": 45.53541076487252, + "loss": 0.17684242129325867, + "loss_ce": 0.012505274266004562, + "loss_iou": 0.44921875, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 688320480, + "step": 4008 + }, + { + "epoch": 45.54674220963173, + "grad_norm": 8.719209365182106, + "learning_rate": 5e-06, + "loss": 0.1494, + "num_input_tokens_seen": 688490572, + "step": 4009 + }, + { + "epoch": 45.54674220963173, + "loss": 0.09991415590047836, + "loss_ce": 0.011916719377040863, + "loss_iou": 0.439453125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 688490572, + "step": 4009 + }, + { + "epoch": 45.55807365439094, + "grad_norm": 9.053125357384218, + "learning_rate": 5e-06, + "loss": 0.106, + "num_input_tokens_seen": 688662528, + "step": 4010 + }, + { + "epoch": 45.55807365439094, + "loss": 0.09128035604953766, + "loss_ce": 0.008638758212327957, + "loss_iou": 0.50390625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 688662528, + "step": 4010 + }, + { + "epoch": 45.56940509915014, + "grad_norm": 8.352866762475179, + "learning_rate": 5e-06, + "loss": 0.1295, + "num_input_tokens_seen": 688834664, + "step": 4011 + }, + { + "epoch": 45.56940509915014, + "loss": 0.13500797748565674, + "loss_ce": 0.027006270363926888, + "loss_iou": 0.443359375, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 688834664, + "step": 4011 + }, + { + "epoch": 45.580736543909346, + "grad_norm": 5.040456032464371, + "learning_rate": 5e-06, + "loss": 0.1383, + "num_input_tokens_seen": 689006528, + "step": 4012 + }, + { + "epoch": 45.580736543909346, + "loss": 0.10428878664970398, + "loss_ce": 0.01514693908393383, + "loss_iou": 0.451171875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 689006528, + "step": 4012 + }, + { + "epoch": 45.59206798866855, + "grad_norm": 6.043612290767415, + "learning_rate": 5e-06, + "loss": 0.1865, + "num_input_tokens_seen": 689178116, + "step": 4013 + }, + { + "epoch": 45.59206798866855, + "loss": 0.3318237066268921, + "loss_ce": 0.018362397328019142, + "loss_iou": 0.361328125, + "loss_num": 0.0625, + "loss_xval": 0.3125, + "num_input_tokens_seen": 689178116, + "step": 4013 + }, + { + "epoch": 45.60339943342776, + "grad_norm": 6.376306942221769, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 689349720, + "step": 4014 + }, + { + "epoch": 45.60339943342776, + "loss": 0.16877371072769165, + "loss_ce": 0.018993424251675606, + "loss_iou": 0.408203125, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 689349720, + "step": 4014 + }, + { + "epoch": 45.61473087818697, + "grad_norm": 5.246386012748272, + "learning_rate": 5e-06, + "loss": 0.1691, + "num_input_tokens_seen": 689520732, + "step": 4015 + }, + { + "epoch": 45.61473087818697, + "loss": 0.13252577185630798, + "loss_ce": 0.01536879874765873, + "loss_iou": 0.4296875, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 689520732, + "step": 4015 + }, + { + "epoch": 45.626062322946176, + "grad_norm": 4.300499721567621, + "learning_rate": 5e-06, + "loss": 0.1838, + "num_input_tokens_seen": 689691084, + "step": 4016 + }, + { + "epoch": 45.626062322946176, + "loss": 0.07665916532278061, + "loss_ce": 0.009581529535353184, + "loss_iou": 0.6484375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 689691084, + "step": 4016 + }, + { + "epoch": 45.63739376770538, + "grad_norm": 3.1191792672971483, + "learning_rate": 5e-06, + "loss": 0.1429, + "num_input_tokens_seen": 689862804, + "step": 4017 + }, + { + "epoch": 45.63739376770538, + "loss": 0.1220824122428894, + "loss_ce": 0.012005502358078957, + "loss_iou": 0.359375, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 689862804, + "step": 4017 + }, + { + "epoch": 45.64872521246459, + "grad_norm": 4.719077406216629, + "learning_rate": 5e-06, + "loss": 0.1165, + "num_input_tokens_seen": 690031992, + "step": 4018 + }, + { + "epoch": 45.64872521246459, + "loss": 0.10369390994310379, + "loss_ce": 0.02381414920091629, + "loss_iou": 0.248046875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 690031992, + "step": 4018 + }, + { + "epoch": 45.6600566572238, + "grad_norm": 8.181839908512726, + "learning_rate": 5e-06, + "loss": 0.1932, + "num_input_tokens_seen": 690203596, + "step": 4019 + }, + { + "epoch": 45.6600566572238, + "loss": 0.21765275299549103, + "loss_ce": 0.03378434106707573, + "loss_iou": 0.404296875, + "loss_num": 0.036865234375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 690203596, + "step": 4019 + }, + { + "epoch": 45.671388101983005, + "grad_norm": 3.9158688906263914, + "learning_rate": 5e-06, + "loss": 0.1095, + "num_input_tokens_seen": 690375272, + "step": 4020 + }, + { + "epoch": 45.671388101983005, + "loss": 0.12163268029689789, + "loss_ce": 0.019978631287813187, + "loss_iou": 0.625, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 690375272, + "step": 4020 + }, + { + "epoch": 45.68271954674221, + "grad_norm": 7.975997913736339, + "learning_rate": 5e-06, + "loss": 0.1642, + "num_input_tokens_seen": 690546200, + "step": 4021 + }, + { + "epoch": 45.68271954674221, + "loss": 0.14920452237129211, + "loss_ce": 0.011081965640187263, + "loss_iou": 0.62890625, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 690546200, + "step": 4021 + }, + { + "epoch": 45.69405099150141, + "grad_norm": 4.908049695166497, + "learning_rate": 5e-06, + "loss": 0.136, + "num_input_tokens_seen": 690716648, + "step": 4022 + }, + { + "epoch": 45.69405099150141, + "loss": 0.09378327429294586, + "loss_ce": 0.0029629673808813095, + "loss_iou": 0.62890625, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 690716648, + "step": 4022 + }, + { + "epoch": 45.70538243626062, + "grad_norm": 10.419466484524843, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 690888552, + "step": 4023 + }, + { + "epoch": 45.70538243626062, + "loss": 0.07819560170173645, + "loss_ce": 0.015359912998974323, + "loss_iou": 0.1826171875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 690888552, + "step": 4023 + }, + { + "epoch": 45.71671388101983, + "grad_norm": 8.4351898440096, + "learning_rate": 5e-06, + "loss": 0.1526, + "num_input_tokens_seen": 691060204, + "step": 4024 + }, + { + "epoch": 45.71671388101983, + "loss": 0.09885814040899277, + "loss_ce": 0.024441026151180267, + "loss_iou": 0.47265625, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 691060204, + "step": 4024 + }, + { + "epoch": 45.728045325779036, + "grad_norm": 4.815286880513129, + "learning_rate": 5e-06, + "loss": 0.1802, + "num_input_tokens_seen": 691232156, + "step": 4025 + }, + { + "epoch": 45.728045325779036, + "loss": 0.10942128300666809, + "loss_ce": 0.02589466981589794, + "loss_iou": 0.322265625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 691232156, + "step": 4025 + }, + { + "epoch": 45.73937677053824, + "grad_norm": 7.476140142632498, + "learning_rate": 5e-06, + "loss": 0.17, + "num_input_tokens_seen": 691403528, + "step": 4026 + }, + { + "epoch": 45.73937677053824, + "loss": 0.25937730073928833, + "loss_ce": 0.03720935434103012, + "loss_iou": 0.65234375, + "loss_num": 0.04443359375, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 691403528, + "step": 4026 + }, + { + "epoch": 45.75070821529745, + "grad_norm": 7.256598943671983, + "learning_rate": 5e-06, + "loss": 0.1565, + "num_input_tokens_seen": 691575664, + "step": 4027 + }, + { + "epoch": 45.75070821529745, + "loss": 0.12446477264165878, + "loss_ce": 0.02204778790473938, + "loss_iou": 0.263671875, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 691575664, + "step": 4027 + }, + { + "epoch": 45.76203966005666, + "grad_norm": 7.709819614324347, + "learning_rate": 5e-06, + "loss": 0.1923, + "num_input_tokens_seen": 691747364, + "step": 4028 + }, + { + "epoch": 45.76203966005666, + "loss": 0.15157550573349, + "loss_ce": 0.007502014748752117, + "loss_iou": 0.4765625, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 691747364, + "step": 4028 + }, + { + "epoch": 45.773371104815865, + "grad_norm": 6.228833336730881, + "learning_rate": 5e-06, + "loss": 0.1534, + "num_input_tokens_seen": 691918140, + "step": 4029 + }, + { + "epoch": 45.773371104815865, + "loss": 0.16909784078598022, + "loss_ce": 0.01889032870531082, + "loss_iou": 0.32421875, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 691918140, + "step": 4029 + }, + { + "epoch": 45.78470254957507, + "grad_norm": 4.262229610498088, + "learning_rate": 5e-06, + "loss": 0.1843, + "num_input_tokens_seen": 692088244, + "step": 4030 + }, + { + "epoch": 45.78470254957507, + "loss": 0.15146948397159576, + "loss_ce": 0.023326169699430466, + "loss_iou": 0.28125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 692088244, + "step": 4030 + }, + { + "epoch": 45.79603399433428, + "grad_norm": 5.439921049211111, + "learning_rate": 5e-06, + "loss": 0.1693, + "num_input_tokens_seen": 692260012, + "step": 4031 + }, + { + "epoch": 45.79603399433428, + "loss": 0.1990601271390915, + "loss_ce": 0.009759585373103619, + "loss_iou": 0.458984375, + "loss_num": 0.037841796875, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 692260012, + "step": 4031 + }, + { + "epoch": 45.80736543909349, + "grad_norm": 21.720364474416225, + "learning_rate": 5e-06, + "loss": 0.1778, + "num_input_tokens_seen": 692430088, + "step": 4032 + }, + { + "epoch": 45.80736543909349, + "loss": 0.17105695605278015, + "loss_ce": 0.017736639827489853, + "loss_iou": 0.447265625, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 692430088, + "step": 4032 + }, + { + "epoch": 45.81869688385269, + "grad_norm": 4.291526005404919, + "learning_rate": 5e-06, + "loss": 0.1265, + "num_input_tokens_seen": 692601696, + "step": 4033 + }, + { + "epoch": 45.81869688385269, + "loss": 0.10573385655879974, + "loss_ce": 0.019811607897281647, + "loss_iou": 0.47265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 692601696, + "step": 4033 + }, + { + "epoch": 45.830028328611895, + "grad_norm": 14.738662069142217, + "learning_rate": 5e-06, + "loss": 0.1612, + "num_input_tokens_seen": 692771796, + "step": 4034 + }, + { + "epoch": 45.830028328611895, + "loss": 0.1369961053133011, + "loss_ce": 0.010378671810030937, + "loss_iou": 0.3046875, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 692771796, + "step": 4034 + }, + { + "epoch": 45.8413597733711, + "grad_norm": 5.272358834271055, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 692942128, + "step": 4035 + }, + { + "epoch": 45.8413597733711, + "loss": 0.1546439379453659, + "loss_ce": 0.011699603870511055, + "loss_iou": 0.451171875, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 692942128, + "step": 4035 + }, + { + "epoch": 45.85269121813031, + "grad_norm": 5.0499681071489775, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 693113752, + "step": 4036 + }, + { + "epoch": 45.85269121813031, + "loss": 0.10734754800796509, + "loss_ce": 0.003526749787852168, + "loss_iou": 0.58984375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 693113752, + "step": 4036 + }, + { + "epoch": 45.86402266288952, + "grad_norm": 12.33536913658161, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 693285384, + "step": 4037 + }, + { + "epoch": 45.86402266288952, + "loss": 0.15130847692489624, + "loss_ce": 0.01770252361893654, + "loss_iou": 0.421875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 693285384, + "step": 4037 + }, + { + "epoch": 45.875354107648725, + "grad_norm": 5.575263259142814, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 693456296, + "step": 4038 + }, + { + "epoch": 45.875354107648725, + "loss": 0.13451847434043884, + "loss_ce": 0.019650310277938843, + "loss_iou": 0.458984375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 693456296, + "step": 4038 + }, + { + "epoch": 45.88668555240793, + "grad_norm": 4.394589401981415, + "learning_rate": 5e-06, + "loss": 0.1504, + "num_input_tokens_seen": 693628360, + "step": 4039 + }, + { + "epoch": 45.88668555240793, + "loss": 0.16762179136276245, + "loss_ce": 0.016193576157093048, + "loss_iou": 0.51171875, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 693628360, + "step": 4039 + }, + { + "epoch": 45.89801699716714, + "grad_norm": 14.174316130694905, + "learning_rate": 5e-06, + "loss": 0.1275, + "num_input_tokens_seen": 693798768, + "step": 4040 + }, + { + "epoch": 45.89801699716714, + "loss": 0.0929897278547287, + "loss_ce": 0.0105922631919384, + "loss_iou": 0.447265625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 693798768, + "step": 4040 + }, + { + "epoch": 45.90934844192635, + "grad_norm": 4.384574378142213, + "learning_rate": 5e-06, + "loss": 0.1551, + "num_input_tokens_seen": 693970572, + "step": 4041 + }, + { + "epoch": 45.90934844192635, + "loss": 0.16414324939250946, + "loss_ce": 0.00490253372117877, + "loss_iou": 0.294921875, + "loss_num": 0.031982421875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 693970572, + "step": 4041 + }, + { + "epoch": 45.920679886685555, + "grad_norm": 7.035182863820019, + "learning_rate": 5e-06, + "loss": 0.149, + "num_input_tokens_seen": 694142172, + "step": 4042 + }, + { + "epoch": 45.920679886685555, + "loss": 0.1220024824142456, + "loss_ce": 0.014870522543787956, + "loss_iou": 0.546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 694142172, + "step": 4042 + }, + { + "epoch": 45.93201133144476, + "grad_norm": 4.525227088226603, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 694314212, + "step": 4043 + }, + { + "epoch": 45.93201133144476, + "loss": 0.0821773111820221, + "loss_ce": 0.0008479647221975029, + "loss_iou": 0.484375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 694314212, + "step": 4043 + }, + { + "epoch": 45.94334277620396, + "grad_norm": 4.44986101318948, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 694483232, + "step": 4044 + }, + { + "epoch": 45.94334277620396, + "loss": 0.17371788620948792, + "loss_ce": 0.03776206821203232, + "loss_iou": 0.52734375, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 694483232, + "step": 4044 + }, + { + "epoch": 45.95467422096317, + "grad_norm": 3.525747481949752, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 694655076, + "step": 4045 + }, + { + "epoch": 45.95467422096317, + "loss": 0.15462997555732727, + "loss_ce": 0.003934177104383707, + "loss_iou": 0.609375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 694655076, + "step": 4045 + }, + { + "epoch": 45.96600566572238, + "grad_norm": 5.503909865333753, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 694826128, + "step": 4046 + }, + { + "epoch": 45.96600566572238, + "loss": 0.20056535303592682, + "loss_ce": 0.0060157934203743935, + "loss_iou": 0.369140625, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 694826128, + "step": 4046 + }, + { + "epoch": 45.977337110481585, + "grad_norm": 6.673464510430702, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 694997956, + "step": 4047 + }, + { + "epoch": 45.977337110481585, + "loss": 0.08946847915649414, + "loss_ce": 0.019201762974262238, + "loss_iou": 0.47265625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 694997956, + "step": 4047 + }, + { + "epoch": 45.98866855524079, + "grad_norm": 4.230827994984814, + "learning_rate": 5e-06, + "loss": 0.129, + "num_input_tokens_seen": 695169776, + "step": 4048 + }, + { + "epoch": 45.98866855524079, + "loss": 0.11835239827632904, + "loss_ce": 0.01721714250743389, + "loss_iou": 0.482421875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 695169776, + "step": 4048 + }, + { + "epoch": 46.0, + "grad_norm": 37.442339599008115, + "learning_rate": 5e-06, + "loss": 0.14, + "num_input_tokens_seen": 695340616, + "step": 4049 + }, + { + "epoch": 46.0, + "loss": 0.20682911574840546, + "loss_ce": 0.010768943466246128, + "loss_iou": 0.357421875, + "loss_num": 0.0390625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 695340616, + "step": 4049 + }, + { + "epoch": 46.01133144475921, + "grad_norm": 8.029671612997193, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 695512220, + "step": 4050 + }, + { + "epoch": 46.01133144475921, + "loss": 0.19538497924804688, + "loss_ce": 0.016262037679553032, + "loss_iou": 0.0, + "loss_num": 0.035888671875, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 695512220, + "step": 4050 + }, + { + "epoch": 46.022662889518415, + "grad_norm": 3.9366780952861826, + "learning_rate": 5e-06, + "loss": 0.1109, + "num_input_tokens_seen": 695682644, + "step": 4051 + }, + { + "epoch": 46.022662889518415, + "loss": 0.10989221930503845, + "loss_ce": 0.015913337469100952, + "loss_iou": 0.421875, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 695682644, + "step": 4051 + }, + { + "epoch": 46.03399433427762, + "grad_norm": 3.909104107158833, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 695853972, + "step": 4052 + }, + { + "epoch": 46.03399433427762, + "loss": 0.10027308762073517, + "loss_ce": 0.014610247686505318, + "loss_iou": 0.0, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 695853972, + "step": 4052 + }, + { + "epoch": 46.04532577903683, + "grad_norm": 4.78749313718534, + "learning_rate": 5e-06, + "loss": 0.1211, + "num_input_tokens_seen": 696023636, + "step": 4053 + }, + { + "epoch": 46.04532577903683, + "loss": 0.10963616520166397, + "loss_ce": 0.002824646420776844, + "loss_iou": 0.47265625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 696023636, + "step": 4053 + }, + { + "epoch": 46.05665722379604, + "grad_norm": 8.140274621598023, + "learning_rate": 5e-06, + "loss": 0.1153, + "num_input_tokens_seen": 696195536, + "step": 4054 + }, + { + "epoch": 46.05665722379604, + "loss": 0.1319035291671753, + "loss_ce": 0.005499732214957476, + "loss_iou": 0.5078125, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 696195536, + "step": 4054 + }, + { + "epoch": 46.06798866855524, + "grad_norm": 9.317605070566657, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 696365748, + "step": 4055 + }, + { + "epoch": 46.06798866855524, + "loss": 0.06587816029787064, + "loss_ce": 0.006170519627630711, + "loss_iou": 0.55859375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 696365748, + "step": 4055 + }, + { + "epoch": 46.079320113314445, + "grad_norm": 5.99944704025556, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 696536612, + "step": 4056 + }, + { + "epoch": 46.079320113314445, + "loss": 0.09519752860069275, + "loss_ce": 0.004468770697712898, + "loss_iou": 0.6171875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 696536612, + "step": 4056 + }, + { + "epoch": 46.09065155807365, + "grad_norm": 6.519834938236697, + "learning_rate": 5e-06, + "loss": 0.1337, + "num_input_tokens_seen": 696708272, + "step": 4057 + }, + { + "epoch": 46.09065155807365, + "loss": 0.07906650751829147, + "loss_ce": 0.012721296399831772, + "loss_iou": 0.23046875, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 696708272, + "step": 4057 + }, + { + "epoch": 46.10198300283286, + "grad_norm": 4.327929889530436, + "learning_rate": 5e-06, + "loss": 0.134, + "num_input_tokens_seen": 696879424, + "step": 4058 + }, + { + "epoch": 46.10198300283286, + "loss": 0.17151013016700745, + "loss_ce": 0.004304311703890562, + "loss_iou": 0.3984375, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 696879424, + "step": 4058 + }, + { + "epoch": 46.11331444759207, + "grad_norm": 3.6334591029548866, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 697051788, + "step": 4059 + }, + { + "epoch": 46.11331444759207, + "loss": 0.1637030690908432, + "loss_ce": 0.010596380569040775, + "loss_iou": 0.48046875, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 697051788, + "step": 4059 + }, + { + "epoch": 46.124645892351275, + "grad_norm": 4.770888159027178, + "learning_rate": 5e-06, + "loss": 0.1351, + "num_input_tokens_seen": 697221036, + "step": 4060 + }, + { + "epoch": 46.124645892351275, + "loss": 0.11457142978906631, + "loss_ce": 0.006813860032707453, + "loss_iou": 0.47265625, + "loss_num": 0.0216064453125, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 697221036, + "step": 4060 + }, + { + "epoch": 46.13597733711048, + "grad_norm": 4.453661251315172, + "learning_rate": 5e-06, + "loss": 0.1234, + "num_input_tokens_seen": 697392960, + "step": 4061 + }, + { + "epoch": 46.13597733711048, + "loss": 0.10582560300827026, + "loss_ce": 0.011312661692500114, + "loss_iou": 0.5546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 697392960, + "step": 4061 + }, + { + "epoch": 46.14730878186969, + "grad_norm": 4.738120467915284, + "learning_rate": 5e-06, + "loss": 0.1388, + "num_input_tokens_seen": 697564976, + "step": 4062 + }, + { + "epoch": 46.14730878186969, + "loss": 0.1082979291677475, + "loss_ce": 0.005758873652666807, + "loss_iou": 0.5625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 697564976, + "step": 4062 + }, + { + "epoch": 46.1586402266289, + "grad_norm": 4.674716134505156, + "learning_rate": 5e-06, + "loss": 0.1854, + "num_input_tokens_seen": 697735944, + "step": 4063 + }, + { + "epoch": 46.1586402266289, + "loss": 0.19423949718475342, + "loss_ce": 0.005869742948561907, + "loss_iou": 0.22265625, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 697735944, + "step": 4063 + }, + { + "epoch": 46.169971671388105, + "grad_norm": 10.410889751536967, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 697907684, + "step": 4064 + }, + { + "epoch": 46.169971671388105, + "loss": 0.08565324544906616, + "loss_ce": 0.01128191128373146, + "loss_iou": 0.4609375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 697907684, + "step": 4064 + }, + { + "epoch": 46.18130311614731, + "grad_norm": 4.3081013184249555, + "learning_rate": 5e-06, + "loss": 0.1726, + "num_input_tokens_seen": 698076996, + "step": 4065 + }, + { + "epoch": 46.18130311614731, + "loss": 0.1471741795539856, + "loss_ce": 0.046038925647735596, + "loss_iou": 0.54296875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 698076996, + "step": 4065 + }, + { + "epoch": 46.19263456090651, + "grad_norm": 4.47030046163472, + "learning_rate": 5e-06, + "loss": 0.1162, + "num_input_tokens_seen": 698249656, + "step": 4066 + }, + { + "epoch": 46.19263456090651, + "loss": 0.12511605024337769, + "loss_ce": 0.004785236902534962, + "loss_iou": 0.49609375, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 698249656, + "step": 4066 + }, + { + "epoch": 46.20396600566572, + "grad_norm": 6.0205544317267865, + "learning_rate": 5e-06, + "loss": 0.1596, + "num_input_tokens_seen": 698421400, + "step": 4067 + }, + { + "epoch": 46.20396600566572, + "loss": 0.10036885738372803, + "loss_ce": 0.004757285583764315, + "loss_iou": 0.4375, + "loss_num": 0.01904296875, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 698421400, + "step": 4067 + }, + { + "epoch": 46.21529745042493, + "grad_norm": 3.8501652583748864, + "learning_rate": 5e-06, + "loss": 0.1303, + "num_input_tokens_seen": 698593236, + "step": 4068 + }, + { + "epoch": 46.21529745042493, + "loss": 0.2151014804840088, + "loss_ce": 0.02143692597746849, + "loss_iou": 0.4140625, + "loss_num": 0.03857421875, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 698593236, + "step": 4068 + }, + { + "epoch": 46.226628895184135, + "grad_norm": 6.152689104657704, + "learning_rate": 5e-06, + "loss": 0.1329, + "num_input_tokens_seen": 698764976, + "step": 4069 + }, + { + "epoch": 46.226628895184135, + "loss": 0.18791410326957703, + "loss_ce": 0.01158355362713337, + "loss_iou": 0.51171875, + "loss_num": 0.03515625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 698764976, + "step": 4069 + }, + { + "epoch": 46.23796033994334, + "grad_norm": 3.8575994737298793, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 698936224, + "step": 4070 + }, + { + "epoch": 46.23796033994334, + "loss": 0.14665040373802185, + "loss_ce": 0.007825931534171104, + "loss_iou": 0.451171875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 698936224, + "step": 4070 + }, + { + "epoch": 46.24929178470255, + "grad_norm": 5.036340117789579, + "learning_rate": 5e-06, + "loss": 0.159, + "num_input_tokens_seen": 699107260, + "step": 4071 + }, + { + "epoch": 46.24929178470255, + "loss": 0.2108308970928192, + "loss_ce": 0.006180016789585352, + "loss_iou": 0.59375, + "loss_num": 0.041015625, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 699107260, + "step": 4071 + }, + { + "epoch": 46.26062322946176, + "grad_norm": 5.476714873348524, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 699279088, + "step": 4072 + }, + { + "epoch": 46.26062322946176, + "loss": 0.10727986693382263, + "loss_ce": 0.008814901113510132, + "loss_iou": 0.515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 699279088, + "step": 4072 + }, + { + "epoch": 46.271954674220964, + "grad_norm": 4.428868749580724, + "learning_rate": 5e-06, + "loss": 0.1286, + "num_input_tokens_seen": 699449704, + "step": 4073 + }, + { + "epoch": 46.271954674220964, + "loss": 0.19007912278175354, + "loss_ce": 0.06129493936896324, + "loss_iou": 0.212890625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 699449704, + "step": 4073 + }, + { + "epoch": 46.28328611898017, + "grad_norm": 4.876000199126908, + "learning_rate": 5e-06, + "loss": 0.1278, + "num_input_tokens_seen": 699621788, + "step": 4074 + }, + { + "epoch": 46.28328611898017, + "loss": 0.08124564588069916, + "loss_ce": 0.014534223824739456, + "loss_iou": 0.490234375, + "loss_num": 0.01336669921875, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 699621788, + "step": 4074 + }, + { + "epoch": 46.29461756373938, + "grad_norm": 7.153241046723841, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 699792328, + "step": 4075 + }, + { + "epoch": 46.29461756373938, + "loss": 0.14683113992214203, + "loss_ce": 0.01880989968776703, + "loss_iou": 0.53515625, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 699792328, + "step": 4075 + }, + { + "epoch": 46.30594900849859, + "grad_norm": 5.106710145431983, + "learning_rate": 5e-06, + "loss": 0.1368, + "num_input_tokens_seen": 699964152, + "step": 4076 + }, + { + "epoch": 46.30594900849859, + "loss": 0.08658863604068756, + "loss_ce": 0.008799327537417412, + "loss_iou": 0.54296875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 699964152, + "step": 4076 + }, + { + "epoch": 46.31728045325779, + "grad_norm": 3.9861398239406935, + "learning_rate": 5e-06, + "loss": 0.1346, + "num_input_tokens_seen": 700136144, + "step": 4077 + }, + { + "epoch": 46.31728045325779, + "loss": 0.06945373117923737, + "loss_ce": 0.0023760886397212744, + "loss_iou": 0.52734375, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 700136144, + "step": 4077 + }, + { + "epoch": 46.328611898016995, + "grad_norm": 5.222039549340686, + "learning_rate": 5e-06, + "loss": 0.1132, + "num_input_tokens_seen": 700306940, + "step": 4078 + }, + { + "epoch": 46.328611898016995, + "loss": 0.10762211680412292, + "loss_ce": 0.009080860763788223, + "loss_iou": 0.380859375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 700306940, + "step": 4078 + }, + { + "epoch": 46.3399433427762, + "grad_norm": 10.256451617900025, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 700477872, + "step": 4079 + }, + { + "epoch": 46.3399433427762, + "loss": 0.10224519670009613, + "loss_ce": 0.0004232922801747918, + "loss_iou": 0.416015625, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 700477872, + "step": 4079 + }, + { + "epoch": 46.35127478753541, + "grad_norm": 3.8981066547072576, + "learning_rate": 5e-06, + "loss": 0.113, + "num_input_tokens_seen": 700649372, + "step": 4080 + }, + { + "epoch": 46.35127478753541, + "loss": 0.1448293924331665, + "loss_ce": 0.022667525336146355, + "loss_iou": 0.46484375, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 700649372, + "step": 4080 + }, + { + "epoch": 46.36260623229462, + "grad_norm": 5.488542117464678, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 700819904, + "step": 4081 + }, + { + "epoch": 46.36260623229462, + "loss": 0.08121670037508011, + "loss_ce": 0.007760888896882534, + "loss_iou": 0.490234375, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 700819904, + "step": 4081 + }, + { + "epoch": 46.373937677053824, + "grad_norm": 5.459321748202955, + "learning_rate": 5e-06, + "loss": 0.1394, + "num_input_tokens_seen": 700990904, + "step": 4082 + }, + { + "epoch": 46.373937677053824, + "loss": 0.1338321566581726, + "loss_ce": 0.018475698307156563, + "loss_iou": 0.58203125, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 700990904, + "step": 4082 + }, + { + "epoch": 46.38526912181303, + "grad_norm": 4.375294198108324, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 701162804, + "step": 4083 + }, + { + "epoch": 46.38526912181303, + "loss": 0.08643153309822083, + "loss_ce": 0.003301647724583745, + "loss_iou": 0.48828125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 701162804, + "step": 4083 + }, + { + "epoch": 46.39660056657224, + "grad_norm": 3.7627644767980946, + "learning_rate": 5e-06, + "loss": 0.1054, + "num_input_tokens_seen": 701334956, + "step": 4084 + }, + { + "epoch": 46.39660056657224, + "loss": 0.08527108281850815, + "loss_ce": 0.008702482096850872, + "loss_iou": 0.43359375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 701334956, + "step": 4084 + }, + { + "epoch": 46.40793201133145, + "grad_norm": 10.395005249273133, + "learning_rate": 5e-06, + "loss": 0.1356, + "num_input_tokens_seen": 701505084, + "step": 4085 + }, + { + "epoch": 46.40793201133145, + "loss": 0.15912649035453796, + "loss_ce": 0.016670452430844307, + "loss_iou": 0.4453125, + "loss_num": 0.0284423828125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 701505084, + "step": 4085 + }, + { + "epoch": 46.419263456090654, + "grad_norm": 4.571563993193018, + "learning_rate": 5e-06, + "loss": 0.1469, + "num_input_tokens_seen": 701677328, + "step": 4086 + }, + { + "epoch": 46.419263456090654, + "loss": 0.07297007739543915, + "loss_ce": 0.015505479648709297, + "loss_iou": 0.546875, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 701677328, + "step": 4086 + }, + { + "epoch": 46.43059490084986, + "grad_norm": 6.295797853748418, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 701848872, + "step": 4087 + }, + { + "epoch": 46.43059490084986, + "loss": 0.14068666100502014, + "loss_ce": 0.026596687734127045, + "loss_iou": 0.37109375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 701848872, + "step": 4087 + }, + { + "epoch": 46.44192634560906, + "grad_norm": 3.410184659695416, + "learning_rate": 5e-06, + "loss": 0.1049, + "num_input_tokens_seen": 702021120, + "step": 4088 + }, + { + "epoch": 46.44192634560906, + "loss": 0.10812586545944214, + "loss_ce": 0.0036947091575711966, + "loss_iou": 0.390625, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 702021120, + "step": 4088 + }, + { + "epoch": 46.45325779036827, + "grad_norm": 5.255826511941344, + "learning_rate": 5e-06, + "loss": 0.1475, + "num_input_tokens_seen": 702193012, + "step": 4089 + }, + { + "epoch": 46.45325779036827, + "loss": 0.11156702041625977, + "loss_ce": 0.010767453350126743, + "loss_iou": 0.44921875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 702193012, + "step": 4089 + }, + { + "epoch": 46.46458923512748, + "grad_norm": 5.6815189225046545, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 702365180, + "step": 4090 + }, + { + "epoch": 46.46458923512748, + "loss": 0.18132451176643372, + "loss_ce": 0.012318171560764313, + "loss_iou": 0.470703125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 702365180, + "step": 4090 + }, + { + "epoch": 46.475920679886684, + "grad_norm": 5.596616364565578, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 702536904, + "step": 4091 + }, + { + "epoch": 46.475920679886684, + "loss": 0.09863276034593582, + "loss_ce": 0.005737253464758396, + "loss_iou": 0.40234375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 702536904, + "step": 4091 + }, + { + "epoch": 46.48725212464589, + "grad_norm": 5.420601102713969, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 702709000, + "step": 4092 + }, + { + "epoch": 46.48725212464589, + "loss": 0.0952075719833374, + "loss_ce": 0.007103325333446264, + "loss_iou": 0.49609375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 702709000, + "step": 4092 + }, + { + "epoch": 46.4985835694051, + "grad_norm": 6.288228733485025, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 702880828, + "step": 4093 + }, + { + "epoch": 46.4985835694051, + "loss": 0.12521764636039734, + "loss_ce": 0.021213725209236145, + "loss_iou": 0.466796875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 702880828, + "step": 4093 + }, + { + "epoch": 46.50991501416431, + "grad_norm": 3.63778727264492, + "learning_rate": 5e-06, + "loss": 0.1244, + "num_input_tokens_seen": 703052452, + "step": 4094 + }, + { + "epoch": 46.50991501416431, + "loss": 0.1453527957201004, + "loss_ce": 0.015439460054039955, + "loss_iou": 0.482421875, + "loss_num": 0.0260009765625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 703052452, + "step": 4094 + }, + { + "epoch": 46.521246458923514, + "grad_norm": 6.4617429890580595, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 703224348, + "step": 4095 + }, + { + "epoch": 46.521246458923514, + "loss": 0.10762103646993637, + "loss_ce": 0.01658710092306137, + "loss_iou": 0.48046875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 703224348, + "step": 4095 + }, + { + "epoch": 46.53257790368272, + "grad_norm": 11.668815896068267, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 703396032, + "step": 4096 + }, + { + "epoch": 46.53257790368272, + "loss": 0.11966902017593384, + "loss_ce": 0.0061436304822564125, + "loss_iou": 0.4453125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 703396032, + "step": 4096 + }, + { + "epoch": 46.54390934844193, + "grad_norm": 5.066378124433442, + "learning_rate": 5e-06, + "loss": 0.125, + "num_input_tokens_seen": 703567540, + "step": 4097 + }, + { + "epoch": 46.54390934844193, + "loss": 0.14190658926963806, + "loss_ce": 0.0061949086375534534, + "loss_iou": 0.279296875, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 703567540, + "step": 4097 + }, + { + "epoch": 46.555240793201136, + "grad_norm": 7.279656555333125, + "learning_rate": 5e-06, + "loss": 0.2111, + "num_input_tokens_seen": 703739108, + "step": 4098 + }, + { + "epoch": 46.555240793201136, + "loss": 0.23989951610565186, + "loss_ce": 0.015473244711756706, + "loss_iou": 0.33203125, + "loss_num": 0.044921875, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 703739108, + "step": 4098 + }, + { + "epoch": 46.56657223796034, + "grad_norm": 3.967641706646963, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 703909940, + "step": 4099 + }, + { + "epoch": 46.56657223796034, + "loss": 0.12897495925426483, + "loss_ce": 0.007881210185587406, + "loss_iou": 0.482421875, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 703909940, + "step": 4099 + }, + { + "epoch": 46.577903682719544, + "grad_norm": 6.193551355263466, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 704081276, + "step": 4100 + }, + { + "epoch": 46.577903682719544, + "loss": 0.09545089304447174, + "loss_ce": 0.00423385901376605, + "loss_iou": 0.671875, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 704081276, + "step": 4100 + }, + { + "epoch": 46.58923512747875, + "grad_norm": 5.394284180369022, + "learning_rate": 5e-06, + "loss": 0.1149, + "num_input_tokens_seen": 704252848, + "step": 4101 + }, + { + "epoch": 46.58923512747875, + "loss": 0.11043091863393784, + "loss_ce": 0.005496219731867313, + "loss_iou": 0.373046875, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 704252848, + "step": 4101 + }, + { + "epoch": 46.60056657223796, + "grad_norm": 4.0417234731558915, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 704422812, + "step": 4102 + }, + { + "epoch": 46.60056657223796, + "loss": 0.16895537078380585, + "loss_ce": 0.015024702996015549, + "loss_iou": 0.296875, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 704422812, + "step": 4102 + }, + { + "epoch": 46.611898016997166, + "grad_norm": 4.0477765416488145, + "learning_rate": 5e-06, + "loss": 0.0883, + "num_input_tokens_seen": 704594832, + "step": 4103 + }, + { + "epoch": 46.611898016997166, + "loss": 0.07284734398126602, + "loss_ce": 0.011110285297036171, + "loss_iou": 0.421875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 704594832, + "step": 4103 + }, + { + "epoch": 46.623229461756374, + "grad_norm": 4.507132621218288, + "learning_rate": 5e-06, + "loss": 0.1849, + "num_input_tokens_seen": 704765956, + "step": 4104 + }, + { + "epoch": 46.623229461756374, + "loss": 0.1683284044265747, + "loss_ce": 0.014763961546123028, + "loss_iou": 0.7109375, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 704765956, + "step": 4104 + }, + { + "epoch": 46.63456090651558, + "grad_norm": 3.9266538127119053, + "learning_rate": 5e-06, + "loss": 0.1087, + "num_input_tokens_seen": 704937096, + "step": 4105 + }, + { + "epoch": 46.63456090651558, + "loss": 0.16485007107257843, + "loss_ce": 0.02749045565724373, + "loss_iou": 0.3125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 704937096, + "step": 4105 + }, + { + "epoch": 46.64589235127479, + "grad_norm": 4.362016053961835, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 705107796, + "step": 4106 + }, + { + "epoch": 46.64589235127479, + "loss": 0.09971150755882263, + "loss_ce": 0.01450643315911293, + "loss_iou": 0.5390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 705107796, + "step": 4106 + }, + { + "epoch": 46.657223796033996, + "grad_norm": 6.681970585480897, + "learning_rate": 5e-06, + "loss": 0.145, + "num_input_tokens_seen": 705278624, + "step": 4107 + }, + { + "epoch": 46.657223796033996, + "loss": 0.1683436781167984, + "loss_ce": 0.004098069854080677, + "loss_iou": 0.4609375, + "loss_num": 0.032958984375, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 705278624, + "step": 4107 + }, + { + "epoch": 46.668555240793204, + "grad_norm": 8.049263162315144, + "learning_rate": 5e-06, + "loss": 0.0939, + "num_input_tokens_seen": 705450604, + "step": 4108 + }, + { + "epoch": 46.668555240793204, + "loss": 0.14787597954273224, + "loss_ce": 0.0035125839058309793, + "loss_iou": 0.51171875, + "loss_num": 0.02880859375, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 705450604, + "step": 4108 + }, + { + "epoch": 46.67988668555241, + "grad_norm": 32.421335583148995, + "learning_rate": 5e-06, + "loss": 0.1359, + "num_input_tokens_seen": 705621788, + "step": 4109 + }, + { + "epoch": 46.67988668555241, + "loss": 0.1430978775024414, + "loss_ce": 0.01199435256421566, + "loss_iou": 0.55859375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 705621788, + "step": 4109 + }, + { + "epoch": 46.69121813031161, + "grad_norm": 5.701183868217729, + "learning_rate": 5e-06, + "loss": 0.111, + "num_input_tokens_seen": 705793576, + "step": 4110 + }, + { + "epoch": 46.69121813031161, + "loss": 0.1010262668132782, + "loss_ce": 0.005231581628322601, + "loss_iou": 0.5625, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 705793576, + "step": 4110 + }, + { + "epoch": 46.70254957507082, + "grad_norm": 6.564509490208478, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 705964484, + "step": 4111 + }, + { + "epoch": 46.70254957507082, + "loss": 0.13094480335712433, + "loss_ce": 0.007287577725946903, + "loss_iou": 0.482421875, + "loss_num": 0.0247802734375, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 705964484, + "step": 4111 + }, + { + "epoch": 46.713881019830026, + "grad_norm": 9.280283493497476, + "learning_rate": 5e-06, + "loss": 0.1168, + "num_input_tokens_seen": 706135580, + "step": 4112 + }, + { + "epoch": 46.713881019830026, + "loss": 0.12362485378980637, + "loss_ce": 0.006040620151907206, + "loss_iou": 0.46484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 706135580, + "step": 4112 + }, + { + "epoch": 46.725212464589234, + "grad_norm": 10.852094869787907, + "learning_rate": 5e-06, + "loss": 0.1169, + "num_input_tokens_seen": 706306428, + "step": 4113 + }, + { + "epoch": 46.725212464589234, + "loss": 0.10285863280296326, + "loss_ce": 0.0033102878369390965, + "loss_iou": 0.404296875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 706306428, + "step": 4113 + }, + { + "epoch": 46.73654390934844, + "grad_norm": 3.4265361385303397, + "learning_rate": 5e-06, + "loss": 0.1273, + "num_input_tokens_seen": 706477796, + "step": 4114 + }, + { + "epoch": 46.73654390934844, + "loss": 0.08991587162017822, + "loss_ce": 0.011989235877990723, + "loss_iou": 0.5234375, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 706477796, + "step": 4114 + }, + { + "epoch": 46.74787535410765, + "grad_norm": 5.136046983358275, + "learning_rate": 5e-06, + "loss": 0.1281, + "num_input_tokens_seen": 706649372, + "step": 4115 + }, + { + "epoch": 46.74787535410765, + "loss": 0.08165792375802994, + "loss_ce": 0.004875699989497662, + "loss_iou": 0.5, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 706649372, + "step": 4115 + }, + { + "epoch": 46.759206798866856, + "grad_norm": 3.890961109170352, + "learning_rate": 5e-06, + "loss": 0.0989, + "num_input_tokens_seen": 706820868, + "step": 4116 + }, + { + "epoch": 46.759206798866856, + "loss": 0.06502777338027954, + "loss_ce": 0.006983338855206966, + "loss_iou": 0.494140625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 706820868, + "step": 4116 + }, + { + "epoch": 46.77053824362606, + "grad_norm": 4.632384730094208, + "learning_rate": 5e-06, + "loss": 0.1497, + "num_input_tokens_seen": 706991388, + "step": 4117 + }, + { + "epoch": 46.77053824362606, + "loss": 0.10120939463376999, + "loss_ce": 0.01179288886487484, + "loss_iou": 0.41015625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 706991388, + "step": 4117 + }, + { + "epoch": 46.78186968838527, + "grad_norm": 4.449145455379127, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 707161544, + "step": 4118 + }, + { + "epoch": 46.78186968838527, + "loss": 0.13187825679779053, + "loss_ce": 0.027141934260725975, + "loss_iou": 0.51171875, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 707161544, + "step": 4118 + }, + { + "epoch": 46.79320113314448, + "grad_norm": 3.93943171045403, + "learning_rate": 5e-06, + "loss": 0.13, + "num_input_tokens_seen": 707333404, + "step": 4119 + }, + { + "epoch": 46.79320113314448, + "loss": 0.13345091044902802, + "loss_ce": 0.0050329407677054405, + "loss_iou": 0.427734375, + "loss_num": 0.025634765625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 707333404, + "step": 4119 + }, + { + "epoch": 46.804532577903686, + "grad_norm": 4.358186299723352, + "learning_rate": 5e-06, + "loss": 0.1668, + "num_input_tokens_seen": 707503460, + "step": 4120 + }, + { + "epoch": 46.804532577903686, + "loss": 0.20717239379882812, + "loss_ce": 0.010852810926735401, + "loss_iou": 0.2421875, + "loss_num": 0.039306640625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 707503460, + "step": 4120 + }, + { + "epoch": 46.815864022662886, + "grad_norm": 4.131564781907263, + "learning_rate": 5e-06, + "loss": 0.1741, + "num_input_tokens_seen": 707675436, + "step": 4121 + }, + { + "epoch": 46.815864022662886, + "loss": 0.2797163426876068, + "loss_ce": 0.01018509455025196, + "loss_iou": 0.4453125, + "loss_num": 0.053955078125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 707675436, + "step": 4121 + }, + { + "epoch": 46.827195467422094, + "grad_norm": 7.321534278070325, + "learning_rate": 5e-06, + "loss": 0.1315, + "num_input_tokens_seen": 707846220, + "step": 4122 + }, + { + "epoch": 46.827195467422094, + "loss": 0.10882514715194702, + "loss_ce": 0.002105175517499447, + "loss_iou": 0.466796875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 707846220, + "step": 4122 + }, + { + "epoch": 46.8385269121813, + "grad_norm": 6.76126905741619, + "learning_rate": 5e-06, + "loss": 0.1362, + "num_input_tokens_seen": 708017504, + "step": 4123 + }, + { + "epoch": 46.8385269121813, + "loss": 0.11360522359609604, + "loss_ce": 0.008365360088646412, + "loss_iou": 0.369140625, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 708017504, + "step": 4123 + }, + { + "epoch": 46.84985835694051, + "grad_norm": 5.4881715817973395, + "learning_rate": 5e-06, + "loss": 0.173, + "num_input_tokens_seen": 708187696, + "step": 4124 + }, + { + "epoch": 46.84985835694051, + "loss": 0.2519981563091278, + "loss_ce": 0.016066759824752808, + "loss_iou": 0.41015625, + "loss_num": 0.047119140625, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 708187696, + "step": 4124 + }, + { + "epoch": 46.861189801699716, + "grad_norm": 7.595222017014687, + "learning_rate": 5e-06, + "loss": 0.1228, + "num_input_tokens_seen": 708359532, + "step": 4125 + }, + { + "epoch": 46.861189801699716, + "loss": 0.17828808724880219, + "loss_ce": 0.006352048367261887, + "loss_iou": 0.49609375, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 708359532, + "step": 4125 + }, + { + "epoch": 46.87252124645892, + "grad_norm": 6.581851012640536, + "learning_rate": 5e-06, + "loss": 0.1043, + "num_input_tokens_seen": 708531392, + "step": 4126 + }, + { + "epoch": 46.87252124645892, + "loss": 0.10337062180042267, + "loss_ce": 0.010292013175785542, + "loss_iou": 0.375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 708531392, + "step": 4126 + }, + { + "epoch": 46.88385269121813, + "grad_norm": 3.8732253010822775, + "learning_rate": 5e-06, + "loss": 0.1141, + "num_input_tokens_seen": 708703060, + "step": 4127 + }, + { + "epoch": 46.88385269121813, + "loss": 0.10050459951162338, + "loss_ce": 0.023768151178956032, + "loss_iou": 0.490234375, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 708703060, + "step": 4127 + }, + { + "epoch": 46.89518413597734, + "grad_norm": 4.6322022794767586, + "learning_rate": 5e-06, + "loss": 0.1313, + "num_input_tokens_seen": 708874876, + "step": 4128 + }, + { + "epoch": 46.89518413597734, + "loss": 0.063858762383461, + "loss_ce": 0.00569226173684001, + "loss_iou": 0.58203125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 708874876, + "step": 4128 + }, + { + "epoch": 46.906515580736546, + "grad_norm": 3.297237030701353, + "learning_rate": 5e-06, + "loss": 0.1343, + "num_input_tokens_seen": 709046612, + "step": 4129 + }, + { + "epoch": 46.906515580736546, + "loss": 0.19996875524520874, + "loss_ce": 0.011858412064611912, + "loss_iou": 0.41015625, + "loss_num": 0.03759765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 709046612, + "step": 4129 + }, + { + "epoch": 46.91784702549575, + "grad_norm": 5.620244892929863, + "learning_rate": 5e-06, + "loss": 0.1433, + "num_input_tokens_seen": 709218572, + "step": 4130 + }, + { + "epoch": 46.91784702549575, + "loss": 0.13293421268463135, + "loss_ce": 0.01260340679436922, + "loss_iou": 0.5546875, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 709218572, + "step": 4130 + }, + { + "epoch": 46.92917847025496, + "grad_norm": 4.860258132306557, + "learning_rate": 5e-06, + "loss": 0.1312, + "num_input_tokens_seen": 709389612, + "step": 4131 + }, + { + "epoch": 46.92917847025496, + "loss": 0.11076609790325165, + "loss_ce": 0.01744334027171135, + "loss_iou": 0.5703125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 709389612, + "step": 4131 + }, + { + "epoch": 46.94050991501416, + "grad_norm": 19.646166504744603, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 709559772, + "step": 4132 + }, + { + "epoch": 46.94050991501416, + "loss": 0.1330343782901764, + "loss_ce": 0.0077597107738256454, + "loss_iou": 0.45703125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 709559772, + "step": 4132 + }, + { + "epoch": 46.95184135977337, + "grad_norm": 6.2773890506999726, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 709730940, + "step": 4133 + }, + { + "epoch": 46.95184135977337, + "loss": 0.1601986587047577, + "loss_ce": 0.01643035188317299, + "loss_iou": 0.5234375, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 709730940, + "step": 4133 + }, + { + "epoch": 46.963172804532576, + "grad_norm": 4.503412059648807, + "learning_rate": 5e-06, + "loss": 0.1057, + "num_input_tokens_seen": 709901864, + "step": 4134 + }, + { + "epoch": 46.963172804532576, + "loss": 0.07518798857927322, + "loss_ce": 0.011528322473168373, + "loss_iou": 0.474609375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 709901864, + "step": 4134 + }, + { + "epoch": 46.97450424929178, + "grad_norm": 4.807239796706627, + "learning_rate": 5e-06, + "loss": 0.1148, + "num_input_tokens_seen": 710073560, + "step": 4135 + }, + { + "epoch": 46.97450424929178, + "loss": 0.15487581491470337, + "loss_ce": 0.006224700249731541, + "loss_iou": 0.5, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 710073560, + "step": 4135 + }, + { + "epoch": 46.98583569405099, + "grad_norm": 3.6758352152746383, + "learning_rate": 5e-06, + "loss": 0.1266, + "num_input_tokens_seen": 710245128, + "step": 4136 + }, + { + "epoch": 46.98583569405099, + "loss": 0.14955711364746094, + "loss_ce": 0.015829086303710938, + "loss_iou": 0.5, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 710245128, + "step": 4136 + }, + { + "epoch": 46.9971671388102, + "grad_norm": 5.416476818994581, + "learning_rate": 5e-06, + "loss": 0.1347, + "num_input_tokens_seen": 710417108, + "step": 4137 + }, + { + "epoch": 46.9971671388102, + "loss": 0.17205780744552612, + "loss_ce": 0.005218209698796272, + "loss_iou": 0.48046875, + "loss_num": 0.033203125, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 710417108, + "step": 4137 + }, + { + "epoch": 46.9971671388102, + "loss": 0.08274796605110168, + "loss_ce": 0.0075984220020473, + "loss_iou": 0.1552734375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 710460092, + "step": 4137 + }, + { + "epoch": 47.008498583569406, + "grad_norm": 3.7500122487300653, + "learning_rate": 5e-06, + "loss": 0.128, + "num_input_tokens_seen": 710588272, + "step": 4138 + }, + { + "epoch": 47.008498583569406, + "loss": 0.12801267206668854, + "loss_ce": 0.0033483668230473995, + "loss_iou": 0.6875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 710588272, + "step": 4138 + }, + { + "epoch": 47.01983002832861, + "grad_norm": 3.2823704561750566, + "learning_rate": 5e-06, + "loss": 0.1254, + "num_input_tokens_seen": 710759344, + "step": 4139 + }, + { + "epoch": 47.01983002832861, + "loss": 0.15157178044319153, + "loss_ce": 0.00807812251150608, + "loss_iou": 0.453125, + "loss_num": 0.02880859375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 710759344, + "step": 4139 + }, + { + "epoch": 47.03116147308782, + "grad_norm": 3.2825986373188036, + "learning_rate": 5e-06, + "loss": 0.1202, + "num_input_tokens_seen": 710930604, + "step": 4140 + }, + { + "epoch": 47.03116147308782, + "loss": 0.0933210551738739, + "loss_ce": 0.0437299944460392, + "loss_iou": 0.515625, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 710930604, + "step": 4140 + }, + { + "epoch": 47.04249291784703, + "grad_norm": 3.274139828962442, + "learning_rate": 5e-06, + "loss": 0.0955, + "num_input_tokens_seen": 711102008, + "step": 4141 + }, + { + "epoch": 47.04249291784703, + "loss": 0.15674403309822083, + "loss_ce": 0.00742152938619256, + "loss_iou": 0.5234375, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 711102008, + "step": 4141 + }, + { + "epoch": 47.053824362606235, + "grad_norm": 3.3560075308653863, + "learning_rate": 5e-06, + "loss": 0.0961, + "num_input_tokens_seen": 711273944, + "step": 4142 + }, + { + "epoch": 47.053824362606235, + "loss": 0.1561080813407898, + "loss_ce": 0.012370292097330093, + "loss_iou": 0.64453125, + "loss_num": 0.0286865234375, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 711273944, + "step": 4142 + }, + { + "epoch": 47.065155807365436, + "grad_norm": 3.8465241877593734, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 711445736, + "step": 4143 + }, + { + "epoch": 47.065155807365436, + "loss": 0.1261669397354126, + "loss_ce": 0.012305853888392448, + "loss_iou": 0.447265625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 711445736, + "step": 4143 + }, + { + "epoch": 47.07648725212464, + "grad_norm": 3.4247367107228897, + "learning_rate": 5e-06, + "loss": 0.0746, + "num_input_tokens_seen": 711617736, + "step": 4144 + }, + { + "epoch": 47.07648725212464, + "loss": 0.05091525614261627, + "loss_ce": 0.0004696962714660913, + "loss_iou": 0.443359375, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 711617736, + "step": 4144 + }, + { + "epoch": 47.08781869688385, + "grad_norm": 4.703246043959707, + "learning_rate": 5e-06, + "loss": 0.1277, + "num_input_tokens_seen": 711789700, + "step": 4145 + }, + { + "epoch": 47.08781869688385, + "loss": 0.09686382114887238, + "loss_ce": 0.0074168043211102486, + "loss_iou": 0.35546875, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 711789700, + "step": 4145 + }, + { + "epoch": 47.09915014164306, + "grad_norm": 5.195075177504792, + "learning_rate": 5e-06, + "loss": 0.113, + "num_input_tokens_seen": 711961568, + "step": 4146 + }, + { + "epoch": 47.09915014164306, + "loss": 0.09385610371828079, + "loss_ce": 0.0018761223182082176, + "loss_iou": 0.482421875, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 711961568, + "step": 4146 + }, + { + "epoch": 47.110481586402265, + "grad_norm": 6.566250934573623, + "learning_rate": 5e-06, + "loss": 0.1072, + "num_input_tokens_seen": 712133288, + "step": 4147 + }, + { + "epoch": 47.110481586402265, + "loss": 0.13616234064102173, + "loss_ce": 0.00461631640791893, + "loss_iou": 0.11474609375, + "loss_num": 0.0262451171875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 712133288, + "step": 4147 + }, + { + "epoch": 47.12181303116147, + "grad_norm": 8.871963562392512, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 712305604, + "step": 4148 + }, + { + "epoch": 47.12181303116147, + "loss": 0.09593099355697632, + "loss_ce": 0.007094325963407755, + "loss_iou": 0.47265625, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 712305604, + "step": 4148 + }, + { + "epoch": 47.13314447592068, + "grad_norm": 3.2485259823090495, + "learning_rate": 5e-06, + "loss": 0.0896, + "num_input_tokens_seen": 712474768, + "step": 4149 + }, + { + "epoch": 47.13314447592068, + "loss": 0.05707137659192085, + "loss_ce": 0.009921718388795853, + "loss_iou": 0.439453125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 712474768, + "step": 4149 + }, + { + "epoch": 47.14447592067989, + "grad_norm": 5.053475724918715, + "learning_rate": 5e-06, + "loss": 0.0806, + "num_input_tokens_seen": 712644864, + "step": 4150 + }, + { + "epoch": 47.14447592067989, + "loss": 0.09276419132947922, + "loss_ce": 0.0073149725794792175, + "loss_iou": 0.189453125, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 712644864, + "step": 4150 + }, + { + "epoch": 47.155807365439095, + "grad_norm": 6.185396896656014, + "learning_rate": 5e-06, + "loss": 0.1739, + "num_input_tokens_seen": 712815384, + "step": 4151 + }, + { + "epoch": 47.155807365439095, + "loss": 0.18949854373931885, + "loss_ce": 0.012435544282197952, + "loss_iou": 0.47265625, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 712815384, + "step": 4151 + }, + { + "epoch": 47.1671388101983, + "grad_norm": 3.347347481858735, + "learning_rate": 5e-06, + "loss": 0.0954, + "num_input_tokens_seen": 712986364, + "step": 4152 + }, + { + "epoch": 47.1671388101983, + "loss": 0.0958608090877533, + "loss_ce": 0.014439910650253296, + "loss_iou": 0.56640625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 712986364, + "step": 4152 + }, + { + "epoch": 47.17847025495751, + "grad_norm": 4.455102194275949, + "learning_rate": 5e-06, + "loss": 0.1198, + "num_input_tokens_seen": 713157948, + "step": 4153 + }, + { + "epoch": 47.17847025495751, + "loss": 0.09848418086767197, + "loss_ce": 0.013126516714692116, + "loss_iou": 0.357421875, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 713157948, + "step": 4153 + }, + { + "epoch": 47.18980169971671, + "grad_norm": 4.746513590040216, + "learning_rate": 5e-06, + "loss": 0.1155, + "num_input_tokens_seen": 713328776, + "step": 4154 + }, + { + "epoch": 47.18980169971671, + "loss": 0.09090571105480194, + "loss_ce": 0.016290239989757538, + "loss_iou": 0.201171875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 713328776, + "step": 4154 + }, + { + "epoch": 47.20113314447592, + "grad_norm": 4.364856566897489, + "learning_rate": 5e-06, + "loss": 0.1657, + "num_input_tokens_seen": 713498652, + "step": 4155 + }, + { + "epoch": 47.20113314447592, + "loss": 0.15336136519908905, + "loss_ce": 0.002848668023943901, + "loss_iou": 0.4375, + "loss_num": 0.0301513671875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 713498652, + "step": 4155 + }, + { + "epoch": 47.212464589235125, + "grad_norm": 5.365442246487729, + "learning_rate": 5e-06, + "loss": 0.1399, + "num_input_tokens_seen": 713669348, + "step": 4156 + }, + { + "epoch": 47.212464589235125, + "loss": 0.10210458934307098, + "loss_ce": 0.009498994797468185, + "loss_iou": 0.53125, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 713669348, + "step": 4156 + }, + { + "epoch": 47.22379603399433, + "grad_norm": 5.2976462198843235, + "learning_rate": 5e-06, + "loss": 0.1189, + "num_input_tokens_seen": 713841060, + "step": 4157 + }, + { + "epoch": 47.22379603399433, + "loss": 0.11391109973192215, + "loss_ce": 0.005604216363281012, + "loss_iou": 0.416015625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 713841060, + "step": 4157 + }, + { + "epoch": 47.23512747875354, + "grad_norm": 3.9077438210568856, + "learning_rate": 5e-06, + "loss": 0.0744, + "num_input_tokens_seen": 714012740, + "step": 4158 + }, + { + "epoch": 47.23512747875354, + "loss": 0.058469705283641815, + "loss_ce": 0.004072119481861591, + "loss_iou": 0.1875, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 714012740, + "step": 4158 + }, + { + "epoch": 47.24645892351275, + "grad_norm": 4.182300246857357, + "learning_rate": 5e-06, + "loss": 0.1335, + "num_input_tokens_seen": 714184500, + "step": 4159 + }, + { + "epoch": 47.24645892351275, + "loss": 0.13485759496688843, + "loss_ce": 0.002960630226880312, + "loss_iou": 0.45703125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 714184500, + "step": 4159 + }, + { + "epoch": 47.257790368271955, + "grad_norm": 3.878452961059349, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 714356332, + "step": 4160 + }, + { + "epoch": 47.257790368271955, + "loss": 0.0788886696100235, + "loss_ce": 0.007599605247378349, + "loss_iou": 0.43359375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 714356332, + "step": 4160 + }, + { + "epoch": 47.26912181303116, + "grad_norm": 3.308629479720532, + "learning_rate": 5e-06, + "loss": 0.1016, + "num_input_tokens_seen": 714528512, + "step": 4161 + }, + { + "epoch": 47.26912181303116, + "loss": 0.08633209764957428, + "loss_ce": 0.008779304102063179, + "loss_iou": 0.44921875, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 714528512, + "step": 4161 + }, + { + "epoch": 47.28045325779037, + "grad_norm": 4.59178044378594, + "learning_rate": 5e-06, + "loss": 0.1086, + "num_input_tokens_seen": 714698852, + "step": 4162 + }, + { + "epoch": 47.28045325779037, + "loss": 0.192364901304245, + "loss_ce": 0.004376627504825592, + "loss_iou": 0.328125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 714698852, + "step": 4162 + }, + { + "epoch": 47.29178470254958, + "grad_norm": 3.6008658883683657, + "learning_rate": 5e-06, + "loss": 0.0963, + "num_input_tokens_seen": 714869320, + "step": 4163 + }, + { + "epoch": 47.29178470254958, + "loss": 0.1373557448387146, + "loss_ce": 0.0013083870289847255, + "loss_iou": 0.46875, + "loss_num": 0.0272216796875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 714869320, + "step": 4163 + }, + { + "epoch": 47.303116147308785, + "grad_norm": 4.302261990466185, + "learning_rate": 5e-06, + "loss": 0.0743, + "num_input_tokens_seen": 715040692, + "step": 4164 + }, + { + "epoch": 47.303116147308785, + "loss": 0.06913058459758759, + "loss_ce": 0.0029989946633577347, + "loss_iou": 0.546875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 715040692, + "step": 4164 + }, + { + "epoch": 47.314447592067985, + "grad_norm": 3.217772044208377, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 715212324, + "step": 4165 + }, + { + "epoch": 47.314447592067985, + "loss": 0.06825892627239227, + "loss_ce": 0.013235732913017273, + "loss_iou": 0.162109375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 715212324, + "step": 4165 + }, + { + "epoch": 47.32577903682719, + "grad_norm": 7.170243489217333, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 715384416, + "step": 4166 + }, + { + "epoch": 47.32577903682719, + "loss": 0.13868673145771027, + "loss_ce": 0.005263877101242542, + "loss_iou": 0.35546875, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 715384416, + "step": 4166 + }, + { + "epoch": 47.3371104815864, + "grad_norm": 4.035680116667053, + "learning_rate": 5e-06, + "loss": 0.1159, + "num_input_tokens_seen": 715554716, + "step": 4167 + }, + { + "epoch": 47.3371104815864, + "loss": 0.13755160570144653, + "loss_ce": 0.01664094813168049, + "loss_iou": 0.443359375, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 715554716, + "step": 4167 + }, + { + "epoch": 47.34844192634561, + "grad_norm": 4.5163624861501255, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 715726904, + "step": 4168 + }, + { + "epoch": 47.34844192634561, + "loss": 0.08666570484638214, + "loss_ce": 0.004207202233374119, + "loss_iou": 0.462890625, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 715726904, + "step": 4168 + }, + { + "epoch": 47.359773371104815, + "grad_norm": 8.89699127214451, + "learning_rate": 5e-06, + "loss": 0.1121, + "num_input_tokens_seen": 715897916, + "step": 4169 + }, + { + "epoch": 47.359773371104815, + "loss": 0.12220755964517593, + "loss_ce": 0.00856009777635336, + "loss_iou": 0.48828125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 715897916, + "step": 4169 + }, + { + "epoch": 47.37110481586402, + "grad_norm": 7.708439530775382, + "learning_rate": 5e-06, + "loss": 0.1091, + "num_input_tokens_seen": 716070116, + "step": 4170 + }, + { + "epoch": 47.37110481586402, + "loss": 0.08520098030567169, + "loss_ce": 0.007686336524784565, + "loss_iou": 0.5078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 716070116, + "step": 4170 + }, + { + "epoch": 47.38243626062323, + "grad_norm": 5.989922188487153, + "learning_rate": 5e-06, + "loss": 0.0975, + "num_input_tokens_seen": 716241032, + "step": 4171 + }, + { + "epoch": 47.38243626062323, + "loss": 0.07092398405075073, + "loss_ce": 0.009919339790940285, + "loss_iou": 0.486328125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 716241032, + "step": 4171 + }, + { + "epoch": 47.39376770538244, + "grad_norm": 4.87162750552465, + "learning_rate": 5e-06, + "loss": 0.1319, + "num_input_tokens_seen": 716412864, + "step": 4172 + }, + { + "epoch": 47.39376770538244, + "loss": 0.21069695055484772, + "loss_ce": 0.024722829461097717, + "loss_iou": 0.345703125, + "loss_num": 0.037109375, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 716412864, + "step": 4172 + }, + { + "epoch": 47.405099150141645, + "grad_norm": 5.645400079701885, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 716583472, + "step": 4173 + }, + { + "epoch": 47.405099150141645, + "loss": 0.09405819326639175, + "loss_ce": 0.0018951030215248466, + "loss_iou": 0.4453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 716583472, + "step": 4173 + }, + { + "epoch": 47.41643059490085, + "grad_norm": 4.006809787317644, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 716754880, + "step": 4174 + }, + { + "epoch": 47.41643059490085, + "loss": 0.08096863329410553, + "loss_ce": 0.005101929418742657, + "loss_iou": 0.57421875, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 716754880, + "step": 4174 + }, + { + "epoch": 47.42776203966006, + "grad_norm": 4.708546500577695, + "learning_rate": 5e-06, + "loss": 0.1306, + "num_input_tokens_seen": 716926804, + "step": 4175 + }, + { + "epoch": 47.42776203966006, + "loss": 0.1754830777645111, + "loss_ce": 0.01575406827032566, + "loss_iou": 0.5234375, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 716926804, + "step": 4175 + }, + { + "epoch": 47.43909348441926, + "grad_norm": 3.9007423814955327, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 717097252, + "step": 4176 + }, + { + "epoch": 47.43909348441926, + "loss": 0.06068117544054985, + "loss_ce": 0.0019348366186022758, + "loss_iou": 0.228515625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 717097252, + "step": 4176 + }, + { + "epoch": 47.45042492917847, + "grad_norm": 3.777033326333161, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 717268800, + "step": 4177 + }, + { + "epoch": 47.45042492917847, + "loss": 0.11455395817756653, + "loss_ce": 0.0049958499148488045, + "loss_iou": 0.396484375, + "loss_num": 0.02197265625, + "loss_xval": 0.109375, + "num_input_tokens_seen": 717268800, + "step": 4177 + }, + { + "epoch": 47.461756373937675, + "grad_norm": 4.1211095172457615, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 717440720, + "step": 4178 + }, + { + "epoch": 47.461756373937675, + "loss": 0.09557903558015823, + "loss_ce": 0.005857359617948532, + "loss_iou": 0.52734375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 717440720, + "step": 4178 + }, + { + "epoch": 47.47308781869688, + "grad_norm": 16.199100882214815, + "learning_rate": 5e-06, + "loss": 0.1296, + "num_input_tokens_seen": 717612424, + "step": 4179 + }, + { + "epoch": 47.47308781869688, + "loss": 0.1352303922176361, + "loss_ce": 0.010245651938021183, + "loss_iou": 0.44921875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 717612424, + "step": 4179 + }, + { + "epoch": 47.48441926345609, + "grad_norm": 2.84956766886783, + "learning_rate": 5e-06, + "loss": 0.0826, + "num_input_tokens_seen": 717784756, + "step": 4180 + }, + { + "epoch": 47.48441926345609, + "loss": 0.06373220682144165, + "loss_ce": 0.009502465836703777, + "loss_iou": 0.40234375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 717784756, + "step": 4180 + }, + { + "epoch": 47.4957507082153, + "grad_norm": 3.491380691038172, + "learning_rate": 5e-06, + "loss": 0.1088, + "num_input_tokens_seen": 717956336, + "step": 4181 + }, + { + "epoch": 47.4957507082153, + "loss": 0.17644673585891724, + "loss_ce": 0.009790237993001938, + "loss_iou": 0.2578125, + "loss_num": 0.033447265625, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 717956336, + "step": 4181 + }, + { + "epoch": 47.507082152974505, + "grad_norm": 155.16852225449458, + "learning_rate": 5e-06, + "loss": 0.1072, + "num_input_tokens_seen": 718126756, + "step": 4182 + }, + { + "epoch": 47.507082152974505, + "loss": 0.1101723313331604, + "loss_ce": 0.009876305237412453, + "loss_iou": 0.51953125, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 718126756, + "step": 4182 + }, + { + "epoch": 47.51841359773371, + "grad_norm": 3.8276867275783193, + "learning_rate": 5e-06, + "loss": 0.1225, + "num_input_tokens_seen": 718298628, + "step": 4183 + }, + { + "epoch": 47.51841359773371, + "loss": 0.09529563039541245, + "loss_ce": 0.007557597476989031, + "loss_iou": 0.451171875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 718298628, + "step": 4183 + }, + { + "epoch": 47.52974504249292, + "grad_norm": 3.1537458110690726, + "learning_rate": 5e-06, + "loss": 0.0987, + "num_input_tokens_seen": 718470576, + "step": 4184 + }, + { + "epoch": 47.52974504249292, + "loss": 0.05719384551048279, + "loss_ce": 0.009632197208702564, + "loss_iou": 0.5, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 718470576, + "step": 4184 + }, + { + "epoch": 47.54107648725213, + "grad_norm": 3.6755730472954578, + "learning_rate": 5e-06, + "loss": 0.1213, + "num_input_tokens_seen": 718642428, + "step": 4185 + }, + { + "epoch": 47.54107648725213, + "loss": 0.08919580280780792, + "loss_ce": 0.005638672970235348, + "loss_iou": 0.412109375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 718642428, + "step": 4185 + }, + { + "epoch": 47.552407932011334, + "grad_norm": 4.051823991912136, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 718814548, + "step": 4186 + }, + { + "epoch": 47.552407932011334, + "loss": 0.14492866396903992, + "loss_ce": 0.00473091471940279, + "loss_iou": 0.5625, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 718814548, + "step": 4186 + }, + { + "epoch": 47.563739376770535, + "grad_norm": 3.703186494278891, + "learning_rate": 5e-06, + "loss": 0.1534, + "num_input_tokens_seen": 718986472, + "step": 4187 + }, + { + "epoch": 47.563739376770535, + "loss": 0.100298210978508, + "loss_ce": 0.003938961774110794, + "loss_iou": 0.48828125, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 718986472, + "step": 4187 + }, + { + "epoch": 47.57507082152974, + "grad_norm": 4.577818380793257, + "learning_rate": 5e-06, + "loss": 0.1332, + "num_input_tokens_seen": 719158252, + "step": 4188 + }, + { + "epoch": 47.57507082152974, + "loss": 0.06148518621921539, + "loss_ce": 0.007163895759731531, + "loss_iou": 0.4921875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 719158252, + "step": 4188 + }, + { + "epoch": 47.58640226628895, + "grad_norm": 8.840045535492111, + "learning_rate": 5e-06, + "loss": 0.1263, + "num_input_tokens_seen": 719328264, + "step": 4189 + }, + { + "epoch": 47.58640226628895, + "loss": 0.10586181282997131, + "loss_ce": 0.011135252192616463, + "loss_iou": 0.42578125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 719328264, + "step": 4189 + }, + { + "epoch": 47.59773371104816, + "grad_norm": 4.076542713532686, + "learning_rate": 5e-06, + "loss": 0.1042, + "num_input_tokens_seen": 719500244, + "step": 4190 + }, + { + "epoch": 47.59773371104816, + "loss": 0.11987441033124924, + "loss_ce": 0.0046705519780516624, + "loss_iou": 0.443359375, + "loss_num": 0.0230712890625, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 719500244, + "step": 4190 + }, + { + "epoch": 47.609065155807365, + "grad_norm": 7.16829502025035, + "learning_rate": 5e-06, + "loss": 0.1191, + "num_input_tokens_seen": 719670588, + "step": 4191 + }, + { + "epoch": 47.609065155807365, + "loss": 0.08642441034317017, + "loss_ce": 0.013853613287210464, + "loss_iou": 0.4609375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 719670588, + "step": 4191 + }, + { + "epoch": 47.62039660056657, + "grad_norm": 3.6589365251270713, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 719840676, + "step": 4192 + }, + { + "epoch": 47.62039660056657, + "loss": 0.13755139708518982, + "loss_ce": 0.006753057707101107, + "loss_iou": 0.5234375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 719840676, + "step": 4192 + }, + { + "epoch": 47.63172804532578, + "grad_norm": 3.407042032395625, + "learning_rate": 5e-06, + "loss": 0.0982, + "num_input_tokens_seen": 720011548, + "step": 4193 + }, + { + "epoch": 47.63172804532578, + "loss": 0.07615301758050919, + "loss_ce": 0.016796328127384186, + "loss_iou": 0.30859375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 720011548, + "step": 4193 + }, + { + "epoch": 47.64305949008499, + "grad_norm": 4.3316498302145625, + "learning_rate": 5e-06, + "loss": 0.0697, + "num_input_tokens_seen": 720183748, + "step": 4194 + }, + { + "epoch": 47.64305949008499, + "loss": 0.07157811522483826, + "loss_ce": 0.0006247470737434924, + "loss_iou": 0.4609375, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 720183748, + "step": 4194 + }, + { + "epoch": 47.654390934844194, + "grad_norm": 6.905107492272185, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 720355300, + "step": 4195 + }, + { + "epoch": 47.654390934844194, + "loss": 0.10575076192617416, + "loss_ce": 0.019172394648194313, + "loss_iou": 0.392578125, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 720355300, + "step": 4195 + }, + { + "epoch": 47.6657223796034, + "grad_norm": 4.192693541095733, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 720525276, + "step": 4196 + }, + { + "epoch": 47.6657223796034, + "loss": 0.11182039231061935, + "loss_ce": 0.009372884407639503, + "loss_iou": 0.5078125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 720525276, + "step": 4196 + }, + { + "epoch": 47.67705382436261, + "grad_norm": 4.891522799595915, + "learning_rate": 5e-06, + "loss": 0.1548, + "num_input_tokens_seen": 720696908, + "step": 4197 + }, + { + "epoch": 47.67705382436261, + "loss": 0.0823417603969574, + "loss_ce": 0.011205289512872696, + "loss_iou": 0.27734375, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 720696908, + "step": 4197 + }, + { + "epoch": 47.68838526912181, + "grad_norm": 6.922247898464801, + "learning_rate": 5e-06, + "loss": 0.1186, + "num_input_tokens_seen": 720868860, + "step": 4198 + }, + { + "epoch": 47.68838526912181, + "loss": 0.06526593118906021, + "loss_ce": 0.0011332416906952858, + "loss_iou": 0.16015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 720868860, + "step": 4198 + }, + { + "epoch": 47.69971671388102, + "grad_norm": 4.363957584028134, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 721040436, + "step": 4199 + }, + { + "epoch": 47.69971671388102, + "loss": 0.07195132970809937, + "loss_ce": 0.0017151257488876581, + "loss_iou": 0.5234375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 721040436, + "step": 4199 + }, + { + "epoch": 47.711048158640224, + "grad_norm": 5.363117587429788, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 721208832, + "step": 4200 + }, + { + "epoch": 47.711048158640224, + "loss": 0.08068595826625824, + "loss_ce": 0.0013250020565465093, + "loss_iou": 0.5390625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 721208832, + "step": 4200 + }, + { + "epoch": 47.72237960339943, + "grad_norm": 6.431877585859448, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 721379924, + "step": 4201 + }, + { + "epoch": 47.72237960339943, + "loss": 0.1810862123966217, + "loss_ce": 0.003901155898347497, + "loss_iou": 0.44921875, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 721379924, + "step": 4201 + }, + { + "epoch": 47.73371104815864, + "grad_norm": 4.165527307527694, + "learning_rate": 5e-06, + "loss": 0.138, + "num_input_tokens_seen": 721551444, + "step": 4202 + }, + { + "epoch": 47.73371104815864, + "loss": 0.17227022349834442, + "loss_ce": 0.011717259883880615, + "loss_iou": 0.095703125, + "loss_num": 0.031982421875, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 721551444, + "step": 4202 + }, + { + "epoch": 47.74504249291785, + "grad_norm": 4.002261686857825, + "learning_rate": 5e-06, + "loss": 0.1285, + "num_input_tokens_seen": 721723112, + "step": 4203 + }, + { + "epoch": 47.74504249291785, + "loss": 0.1651363968849182, + "loss_ce": 0.009649345651268959, + "loss_iou": 0.58203125, + "loss_num": 0.0311279296875, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 721723112, + "step": 4203 + }, + { + "epoch": 47.756373937677054, + "grad_norm": 6.208699437208896, + "learning_rate": 5e-06, + "loss": 0.1284, + "num_input_tokens_seen": 721894704, + "step": 4204 + }, + { + "epoch": 47.756373937677054, + "loss": 0.1591370552778244, + "loss_ce": 0.0043824203312397, + "loss_iou": 0.474609375, + "loss_num": 0.031005859375, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 721894704, + "step": 4204 + }, + { + "epoch": 47.76770538243626, + "grad_norm": 4.1244437800790275, + "learning_rate": 5e-06, + "loss": 0.1032, + "num_input_tokens_seen": 722065572, + "step": 4205 + }, + { + "epoch": 47.76770538243626, + "loss": 0.09495656192302704, + "loss_ce": 0.006516617722809315, + "loss_iou": 0.55859375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 722065572, + "step": 4205 + }, + { + "epoch": 47.77903682719547, + "grad_norm": 4.4937411023770455, + "learning_rate": 5e-06, + "loss": 0.1542, + "num_input_tokens_seen": 722237332, + "step": 4206 + }, + { + "epoch": 47.77903682719547, + "loss": 0.07483170926570892, + "loss_ce": 0.0135981859639287, + "loss_iou": 0.4765625, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 722237332, + "step": 4206 + }, + { + "epoch": 47.79036827195468, + "grad_norm": 3.6510219836697395, + "learning_rate": 5e-06, + "loss": 0.1099, + "num_input_tokens_seen": 722409240, + "step": 4207 + }, + { + "epoch": 47.79036827195468, + "loss": 0.09725276380777359, + "loss_ce": 0.006340898107737303, + "loss_iou": 0.435546875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 722409240, + "step": 4207 + }, + { + "epoch": 47.801699716713884, + "grad_norm": 4.261052119114076, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 722580796, + "step": 4208 + }, + { + "epoch": 47.801699716713884, + "loss": 0.10438238829374313, + "loss_ce": 0.007580631412565708, + "loss_iou": 0.5703125, + "loss_num": 0.0194091796875, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 722580796, + "step": 4208 + }, + { + "epoch": 47.81303116147309, + "grad_norm": 7.865780838892762, + "learning_rate": 5e-06, + "loss": 0.1292, + "num_input_tokens_seen": 722750844, + "step": 4209 + }, + { + "epoch": 47.81303116147309, + "loss": 0.1078704372048378, + "loss_ce": 0.002554281149059534, + "loss_iou": 0.37890625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 722750844, + "step": 4209 + }, + { + "epoch": 47.82436260623229, + "grad_norm": 4.28299915733703, + "learning_rate": 5e-06, + "loss": 0.1536, + "num_input_tokens_seen": 722922796, + "step": 4210 + }, + { + "epoch": 47.82436260623229, + "loss": 0.2250032275915146, + "loss_ce": 0.007596009410917759, + "loss_iou": 0.32421875, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 722922796, + "step": 4210 + }, + { + "epoch": 47.8356940509915, + "grad_norm": 3.6998087357345852, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 723094576, + "step": 4211 + }, + { + "epoch": 47.8356940509915, + "loss": 0.14309553802013397, + "loss_ce": 0.004606769420206547, + "loss_iou": 0.421875, + "loss_num": 0.027587890625, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 723094576, + "step": 4211 + }, + { + "epoch": 47.84702549575071, + "grad_norm": 5.991102049383098, + "learning_rate": 5e-06, + "loss": 0.1603, + "num_input_tokens_seen": 723266368, + "step": 4212 + }, + { + "epoch": 47.84702549575071, + "loss": 0.18474189937114716, + "loss_ce": 0.026569288223981857, + "loss_iou": 0.56640625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 723266368, + "step": 4212 + }, + { + "epoch": 47.858356940509914, + "grad_norm": 3.8338759663685575, + "learning_rate": 5e-06, + "loss": 0.0838, + "num_input_tokens_seen": 723437040, + "step": 4213 + }, + { + "epoch": 47.858356940509914, + "loss": 0.05602739006280899, + "loss_ce": 0.0008821301162242889, + "loss_iou": 0.49609375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 723437040, + "step": 4213 + }, + { + "epoch": 47.86968838526912, + "grad_norm": 4.322287049616213, + "learning_rate": 5e-06, + "loss": 0.1046, + "num_input_tokens_seen": 723609056, + "step": 4214 + }, + { + "epoch": 47.86968838526912, + "loss": 0.07943825423717499, + "loss_ce": 0.0036936295218765736, + "loss_iou": 0.380859375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 723609056, + "step": 4214 + }, + { + "epoch": 47.88101983002833, + "grad_norm": 4.391115906578069, + "learning_rate": 5e-06, + "loss": 0.1255, + "num_input_tokens_seen": 723781028, + "step": 4215 + }, + { + "epoch": 47.88101983002833, + "loss": 0.14958599209785461, + "loss_ce": 0.006428041495382786, + "loss_iou": 0.34765625, + "loss_num": 0.028564453125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 723781028, + "step": 4215 + }, + { + "epoch": 47.892351274787536, + "grad_norm": 5.005200616958996, + "learning_rate": 5e-06, + "loss": 0.1114, + "num_input_tokens_seen": 723952968, + "step": 4216 + }, + { + "epoch": 47.892351274787536, + "loss": 0.1433696150779724, + "loss_ce": 0.00799363013356924, + "loss_iou": 0.423828125, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 723952968, + "step": 4216 + }, + { + "epoch": 47.903682719546744, + "grad_norm": 4.095520063661307, + "learning_rate": 5e-06, + "loss": 0.0841, + "num_input_tokens_seen": 724124880, + "step": 4217 + }, + { + "epoch": 47.903682719546744, + "loss": 0.057315874844789505, + "loss_ce": 0.004840902052819729, + "loss_iou": 0.474609375, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 724124880, + "step": 4217 + }, + { + "epoch": 47.91501416430595, + "grad_norm": 4.310502822772367, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 724296544, + "step": 4218 + }, + { + "epoch": 47.91501416430595, + "loss": 0.061356451362371445, + "loss_ce": 0.0025795954279601574, + "loss_iou": 0.44921875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 724296544, + "step": 4218 + }, + { + "epoch": 47.92634560906516, + "grad_norm": 4.333164150456548, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 724466208, + "step": 4219 + }, + { + "epoch": 47.92634560906516, + "loss": 0.0856182724237442, + "loss_ce": 0.006638777907937765, + "loss_iou": 0.4375, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 724466208, + "step": 4219 + }, + { + "epoch": 47.93767705382436, + "grad_norm": 5.764154800687036, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 724638180, + "step": 4220 + }, + { + "epoch": 47.93767705382436, + "loss": 0.11939413845539093, + "loss_ce": 0.009881798177957535, + "loss_iou": 0.40625, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 724638180, + "step": 4220 + }, + { + "epoch": 47.94900849858357, + "grad_norm": 3.3802125938357785, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 724808840, + "step": 4221 + }, + { + "epoch": 47.94900849858357, + "loss": 0.06741583347320557, + "loss_ce": 0.009401912800967693, + "loss_iou": 0.439453125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 724808840, + "step": 4221 + }, + { + "epoch": 47.960339943342774, + "grad_norm": 6.922033666320785, + "learning_rate": 5e-06, + "loss": 0.1674, + "num_input_tokens_seen": 724980784, + "step": 4222 + }, + { + "epoch": 47.960339943342774, + "loss": 0.09399766474962234, + "loss_ce": 0.007388778030872345, + "loss_iou": 0.51171875, + "loss_num": 0.017333984375, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 724980784, + "step": 4222 + }, + { + "epoch": 47.97167138810198, + "grad_norm": 4.452549019340513, + "learning_rate": 5e-06, + "loss": 0.1412, + "num_input_tokens_seen": 725152688, + "step": 4223 + }, + { + "epoch": 47.97167138810198, + "loss": 0.12679237127304077, + "loss_ce": 0.006232663057744503, + "loss_iou": 0.5234375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 725152688, + "step": 4223 + }, + { + "epoch": 47.98300283286119, + "grad_norm": 5.800637540562238, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 725324828, + "step": 4224 + }, + { + "epoch": 47.98300283286119, + "loss": 0.14737582206726074, + "loss_ce": 0.01047397032380104, + "loss_iou": 0.43359375, + "loss_num": 0.02734375, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 725324828, + "step": 4224 + }, + { + "epoch": 47.994334277620396, + "grad_norm": 4.02003753009002, + "learning_rate": 5e-06, + "loss": 0.1497, + "num_input_tokens_seen": 725496612, + "step": 4225 + }, + { + "epoch": 47.994334277620396, + "loss": 0.18000835180282593, + "loss_ce": 0.012146404944360256, + "loss_iou": 0.2255859375, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 725496612, + "step": 4225 + }, + { + "epoch": 47.994334277620396, + "loss": 0.0791236162185669, + "loss_ce": 0.004401322919875383, + "loss_iou": 0.484375, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 725582460, + "step": 4225 + }, + { + "epoch": 48.005665722379604, + "grad_norm": 2.560986806828304, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 725666636, + "step": 4226 + }, + { + "epoch": 48.005665722379604, + "loss": 0.08964906632900238, + "loss_ce": 0.001926284865476191, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 725666636, + "step": 4226 + }, + { + "epoch": 48.01699716713881, + "grad_norm": 2.2664165575224833, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 725838372, + "step": 4227 + }, + { + "epoch": 48.01699716713881, + "loss": 0.07389166951179504, + "loss_ce": 0.006768262945115566, + "loss_iou": 0.41796875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 725838372, + "step": 4227 + }, + { + "epoch": 48.02832861189802, + "grad_norm": 3.1319693365791594, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 726009644, + "step": 4228 + }, + { + "epoch": 48.02832861189802, + "loss": 0.0971192866563797, + "loss_ce": 0.0033235165756195784, + "loss_iou": 0.390625, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 726009644, + "step": 4228 + }, + { + "epoch": 48.039660056657226, + "grad_norm": 5.2748469049709525, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 726181768, + "step": 4229 + }, + { + "epoch": 48.039660056657226, + "loss": 0.05843126028776169, + "loss_ce": 0.003163927933201194, + "loss_iou": 0.5625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 726181768, + "step": 4229 + }, + { + "epoch": 48.05099150141643, + "grad_norm": 4.344460325867675, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 726353104, + "step": 4230 + }, + { + "epoch": 48.05099150141643, + "loss": 0.0806979387998581, + "loss_ce": 0.0011996516259387136, + "loss_iou": 0.48046875, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 726353104, + "step": 4230 + }, + { + "epoch": 48.06232294617564, + "grad_norm": 5.095345151385122, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 726525064, + "step": 4231 + }, + { + "epoch": 48.06232294617564, + "loss": 0.08278858661651611, + "loss_ce": 0.004053235519677401, + "loss_iou": 0.578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 726525064, + "step": 4231 + }, + { + "epoch": 48.07365439093484, + "grad_norm": 4.667750883667177, + "learning_rate": 5e-06, + "loss": 0.1381, + "num_input_tokens_seen": 726696020, + "step": 4232 + }, + { + "epoch": 48.07365439093484, + "loss": 0.1957799196243286, + "loss_ce": 0.0008183744503185153, + "loss_iou": 0.357421875, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 726696020, + "step": 4232 + }, + { + "epoch": 48.08498583569405, + "grad_norm": 5.079813233099534, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 726868240, + "step": 4233 + }, + { + "epoch": 48.08498583569405, + "loss": 0.07104089856147766, + "loss_ce": 0.004649905487895012, + "loss_iou": 0.59765625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 726868240, + "step": 4233 + }, + { + "epoch": 48.096317280453256, + "grad_norm": 5.060157686812156, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 727038508, + "step": 4234 + }, + { + "epoch": 48.096317280453256, + "loss": 0.10144554078578949, + "loss_ce": 0.004186021164059639, + "loss_iou": 0.5546875, + "loss_num": 0.01953125, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 727038508, + "step": 4234 + }, + { + "epoch": 48.107648725212464, + "grad_norm": 3.615878582070035, + "learning_rate": 5e-06, + "loss": 0.1045, + "num_input_tokens_seen": 727210044, + "step": 4235 + }, + { + "epoch": 48.107648725212464, + "loss": 0.16303175687789917, + "loss_ce": 0.004035164602100849, + "loss_iou": 0.361328125, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 727210044, + "step": 4235 + }, + { + "epoch": 48.11898016997167, + "grad_norm": 4.016571185927729, + "learning_rate": 5e-06, + "loss": 0.1261, + "num_input_tokens_seen": 727382172, + "step": 4236 + }, + { + "epoch": 48.11898016997167, + "loss": 0.15425026416778564, + "loss_ce": 0.0001975233171833679, + "loss_iou": 0.6015625, + "loss_num": 0.03076171875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 727382172, + "step": 4236 + }, + { + "epoch": 48.13031161473088, + "grad_norm": 4.195520203080299, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 727553700, + "step": 4237 + }, + { + "epoch": 48.13031161473088, + "loss": 0.06917223334312439, + "loss_ce": 0.0008281111950054765, + "loss_iou": 0.419921875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 727553700, + "step": 4237 + }, + { + "epoch": 48.141643059490086, + "grad_norm": 8.27732720401473, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 727725472, + "step": 4238 + }, + { + "epoch": 48.141643059490086, + "loss": 0.13010528683662415, + "loss_ce": 0.017617493867874146, + "loss_iou": 0.24609375, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 727725472, + "step": 4238 + }, + { + "epoch": 48.15297450424929, + "grad_norm": 3.307257998824753, + "learning_rate": 5e-06, + "loss": 0.0842, + "num_input_tokens_seen": 727896288, + "step": 4239 + }, + { + "epoch": 48.15297450424929, + "loss": 0.10900919884443283, + "loss_ce": 0.0037235496565699577, + "loss_iou": 0.3046875, + "loss_num": 0.02099609375, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 727896288, + "step": 4239 + }, + { + "epoch": 48.1643059490085, + "grad_norm": 11.103666262859841, + "learning_rate": 5e-06, + "loss": 0.1706, + "num_input_tokens_seen": 728068536, + "step": 4240 + }, + { + "epoch": 48.1643059490085, + "loss": 0.1067027896642685, + "loss_ce": 0.0008067976450547576, + "loss_iou": 0.37890625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 728068536, + "step": 4240 + }, + { + "epoch": 48.17563739376771, + "grad_norm": 5.018892917937562, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 728240468, + "step": 4241 + }, + { + "epoch": 48.17563739376771, + "loss": 0.08491016924381256, + "loss_ce": 0.0035197832621634007, + "loss_iou": 0.470703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 728240468, + "step": 4241 + }, + { + "epoch": 48.186968838526916, + "grad_norm": 4.185984092897652, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 728412140, + "step": 4242 + }, + { + "epoch": 48.186968838526916, + "loss": 0.1789214313030243, + "loss_ce": 0.005245887208729982, + "loss_iou": 0.41796875, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 728412140, + "step": 4242 + }, + { + "epoch": 48.198300283286116, + "grad_norm": 5.1223715318213605, + "learning_rate": 5e-06, + "loss": 0.0907, + "num_input_tokens_seen": 728583968, + "step": 4243 + }, + { + "epoch": 48.198300283286116, + "loss": 0.1572830080986023, + "loss_ce": 0.002650424838066101, + "loss_iou": 0.0, + "loss_num": 0.0308837890625, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 728583968, + "step": 4243 + }, + { + "epoch": 48.20963172804532, + "grad_norm": 3.993915516207532, + "learning_rate": 5e-06, + "loss": 0.0841, + "num_input_tokens_seen": 728755788, + "step": 4244 + }, + { + "epoch": 48.20963172804532, + "loss": 0.09096597135066986, + "loss_ce": 0.006157625466585159, + "loss_iou": 0.197265625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 728755788, + "step": 4244 + }, + { + "epoch": 48.22096317280453, + "grad_norm": 3.775856290174205, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 728926556, + "step": 4245 + }, + { + "epoch": 48.22096317280453, + "loss": 0.0689842626452446, + "loss_ce": 0.0012352368794381618, + "loss_iou": 0.064453125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 728926556, + "step": 4245 + }, + { + "epoch": 48.23229461756374, + "grad_norm": 4.133716410062589, + "learning_rate": 5e-06, + "loss": 0.1317, + "num_input_tokens_seen": 729098520, + "step": 4246 + }, + { + "epoch": 48.23229461756374, + "loss": 0.2406463772058487, + "loss_ce": 0.0025787455961108208, + "loss_iou": 0.416015625, + "loss_num": 0.047607421875, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 729098520, + "step": 4246 + }, + { + "epoch": 48.243626062322946, + "grad_norm": 2.7103875785594034, + "learning_rate": 5e-06, + "loss": 0.1073, + "num_input_tokens_seen": 729270316, + "step": 4247 + }, + { + "epoch": 48.243626062322946, + "loss": 0.08812672644853592, + "loss_ce": 0.00307423691265285, + "loss_iou": 0.291015625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 729270316, + "step": 4247 + }, + { + "epoch": 48.25495750708215, + "grad_norm": 2.9468621257751746, + "learning_rate": 5e-06, + "loss": 0.0828, + "num_input_tokens_seen": 729442276, + "step": 4248 + }, + { + "epoch": 48.25495750708215, + "loss": 0.06399865448474884, + "loss_ce": 0.008365103974938393, + "loss_iou": 0.05712890625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 729442276, + "step": 4248 + }, + { + "epoch": 48.26628895184136, + "grad_norm": 3.4277331328508582, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 729614272, + "step": 4249 + }, + { + "epoch": 48.26628895184136, + "loss": 0.08927109092473984, + "loss_ce": 0.001243135193362832, + "loss_iou": 0.310546875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 729614272, + "step": 4249 + }, + { + "epoch": 48.27762039660057, + "grad_norm": 4.05348556065702, + "learning_rate": 5e-06, + "loss": 0.1188, + "num_input_tokens_seen": 729786340, + "step": 4250 + }, + { + "epoch": 48.27762039660057, + "eval_seeclick_CIoU": 0.5490350723266602, + "eval_seeclick_GIoU": 0.5412544012069702, + "eval_seeclick_IoU": 0.582158088684082, + "eval_seeclick_MAE_all": 0.060669198632240295, + "eval_seeclick_MAE_h": 0.03776175528764725, + "eval_seeclick_MAE_w": 0.07905042171478271, + "eval_seeclick_MAE_x": 0.09220220521092415, + "eval_seeclick_MAE_y": 0.03366241417825222, + "eval_seeclick_NUM_probability": 0.999801903963089, + "eval_seeclick_inside_bbox": 0.953125, + "eval_seeclick_loss": 0.67937833070755, + "eval_seeclick_loss_ce": 0.43770506978034973, + "eval_seeclick_loss_iou": 0.5579833984375, + "eval_seeclick_loss_num": 0.047847747802734375, + "eval_seeclick_loss_xval": 0.239410400390625, + "eval_seeclick_runtime": 69.0762, + "eval_seeclick_samples_per_second": 0.623, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 729786340, + "step": 4250 + }, + { + "epoch": 48.27762039660057, + "eval_icons_CIoU": 0.7743414342403412, + "eval_icons_GIoU": 0.7759648859500885, + "eval_icons_IoU": 0.7864299714565277, + "eval_icons_MAE_all": 0.03141877520829439, + "eval_icons_MAE_h": 0.02992552798241377, + "eval_icons_MAE_w": 0.03443619981408119, + "eval_icons_MAE_x": 0.029241597279906273, + "eval_icons_MAE_y": 0.032071780413389206, + "eval_icons_NUM_probability": 0.999067485332489, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.09602519869804382, + "eval_icons_loss_ce": 0.00017912417388288304, + "eval_icons_loss_iou": 0.5159912109375, + "eval_icons_loss_num": 0.017490386962890625, + "eval_icons_loss_xval": 0.0875244140625, + "eval_icons_runtime": 80.1421, + "eval_icons_samples_per_second": 0.624, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 729786340, + "step": 4250 + }, + { + "epoch": 48.27762039660057, + "eval_screenspot_CIoU": 0.6139720479647318, + "eval_screenspot_GIoU": 0.6172506213188171, + "eval_screenspot_IoU": 0.6374714374542236, + "eval_screenspot_MAE_all": 0.06436327720681827, + "eval_screenspot_MAE_h": 0.038560821364323296, + "eval_screenspot_MAE_w": 0.11435861885547638, + "eval_screenspot_MAE_x": 0.07099338124195735, + "eval_screenspot_MAE_y": 0.03354028550287088, + "eval_screenspot_NUM_probability": 0.999848206837972, + "eval_screenspot_inside_bbox": 0.9212500055631002, + "eval_screenspot_loss": 0.2953372299671173, + "eval_screenspot_loss_ce": 0.012427997930596272, + "eval_screenspot_loss_iou": 0.561279296875, + "eval_screenspot_loss_num": 0.056116739908854164, + "eval_screenspot_loss_xval": 0.2805582682291667, + "eval_screenspot_runtime": 144.2475, + "eval_screenspot_samples_per_second": 0.617, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 729786340, + "step": 4250 + }, + { + "epoch": 48.27762039660057, + "eval_compot_CIoU": 0.8351185619831085, + "eval_compot_GIoU": 0.8283049464225769, + "eval_compot_IoU": 0.8476720750331879, + "eval_compot_MAE_all": 0.028342776000499725, + "eval_compot_MAE_h": 0.02567121386528015, + "eval_compot_MAE_w": 0.031636047177016735, + "eval_compot_MAE_x": 0.030706957913935184, + "eval_compot_MAE_y": 0.02535688877105713, + "eval_compot_NUM_probability": 0.9999547600746155, + "eval_compot_inside_bbox": 0.9409722089767456, + "eval_compot_loss": 0.09002256393432617, + "eval_compot_loss_ce": 0.00013389794912654907, + "eval_compot_loss_iou": 0.5308837890625, + "eval_compot_loss_num": 0.0156402587890625, + "eval_compot_loss_xval": 0.0781707763671875, + "eval_compot_runtime": 89.1767, + "eval_compot_samples_per_second": 0.561, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 729786340, + "step": 4250 + }, + { + "epoch": 48.27762039660057, + "eval_custom_ui_MAE_all": 0.02408888377249241, + "eval_custom_ui_MAE_x": 0.03926929924637079, + "eval_custom_ui_MAE_y": 0.008908470161259174, + "eval_custom_ui_NUM_probability": 0.9998082518577576, + "eval_custom_ui_loss": 0.2017803192138672, + "eval_custom_ui_loss_ce": 0.07914393022656441, + "eval_custom_ui_loss_num": 0.024816513061523438, + "eval_custom_ui_loss_xval": 0.1241302490234375, + "eval_custom_ui_runtime": 57.9242, + "eval_custom_ui_samples_per_second": 0.863, + "eval_custom_ui_steps_per_second": 0.035, + "num_input_tokens_seen": 729786340, + "step": 4250 + }, + { + "epoch": 48.27762039660057, + "loss": 0.24452589452266693, + "loss_ce": 0.09440992772579193, + "loss_iou": 0.0, + "loss_num": 0.030029296875, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 729786340, + "step": 4250 + }, + { + "epoch": 48.288951841359776, + "grad_norm": 5.008323932931067, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 729958032, + "step": 4251 + }, + { + "epoch": 48.288951841359776, + "loss": 0.1132562905550003, + "loss_ce": 0.002630067290738225, + "loss_iou": 0.54296875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 729958032, + "step": 4251 + }, + { + "epoch": 48.30028328611898, + "grad_norm": 4.965848227978041, + "learning_rate": 5e-06, + "loss": 0.1033, + "num_input_tokens_seen": 730129792, + "step": 4252 + }, + { + "epoch": 48.30028328611898, + "loss": 0.06447215378284454, + "loss_ce": 0.010211902670562267, + "loss_iou": 0.1865234375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 730129792, + "step": 4252 + }, + { + "epoch": 48.31161473087819, + "grad_norm": 4.285547477785948, + "learning_rate": 5e-06, + "loss": 0.1191, + "num_input_tokens_seen": 730302020, + "step": 4253 + }, + { + "epoch": 48.31161473087819, + "loss": 0.09805352985858917, + "loss_ce": 0.0020757406018674374, + "loss_iou": 0.423828125, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 730302020, + "step": 4253 + }, + { + "epoch": 48.32294617563739, + "grad_norm": 4.479127188763341, + "learning_rate": 5e-06, + "loss": 0.1001, + "num_input_tokens_seen": 730473660, + "step": 4254 + }, + { + "epoch": 48.32294617563739, + "loss": 0.08305899053812027, + "loss_ce": 0.005117096938192844, + "loss_iou": 0.25390625, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 730473660, + "step": 4254 + }, + { + "epoch": 48.3342776203966, + "grad_norm": 4.96026218858615, + "learning_rate": 5e-06, + "loss": 0.1163, + "num_input_tokens_seen": 730644508, + "step": 4255 + }, + { + "epoch": 48.3342776203966, + "loss": 0.10590037703514099, + "loss_ce": 0.010822868905961514, + "loss_iou": 0.4765625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 730644508, + "step": 4255 + }, + { + "epoch": 48.345609065155806, + "grad_norm": 4.245440900984709, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 730816284, + "step": 4256 + }, + { + "epoch": 48.345609065155806, + "loss": 0.1167280375957489, + "loss_ce": 0.0030805806163698435, + "loss_iou": 0.390625, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 730816284, + "step": 4256 + }, + { + "epoch": 48.35694050991501, + "grad_norm": 3.0107728653746815, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 730988000, + "step": 4257 + }, + { + "epoch": 48.35694050991501, + "loss": 0.048872120678424835, + "loss_ce": 0.001173144206404686, + "loss_iou": 0.4296875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 730988000, + "step": 4257 + }, + { + "epoch": 48.36827195467422, + "grad_norm": 2.979316255092292, + "learning_rate": 5e-06, + "loss": 0.1105, + "num_input_tokens_seen": 731159896, + "step": 4258 + }, + { + "epoch": 48.36827195467422, + "loss": 0.1505395770072937, + "loss_ce": 0.0014612016966566443, + "loss_iou": 0.26953125, + "loss_num": 0.02978515625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 731159896, + "step": 4258 + }, + { + "epoch": 48.37960339943343, + "grad_norm": 3.126901756899927, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 731331788, + "step": 4259 + }, + { + "epoch": 48.37960339943343, + "loss": 0.07996238768100739, + "loss_ce": 0.0031649041920900345, + "loss_iou": 0.318359375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 731331788, + "step": 4259 + }, + { + "epoch": 48.390934844192635, + "grad_norm": 5.319590957025481, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 731501628, + "step": 4260 + }, + { + "epoch": 48.390934844192635, + "loss": 0.16945302486419678, + "loss_ce": 0.006824860814958811, + "loss_iou": 0.494140625, + "loss_num": 0.032470703125, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 731501628, + "step": 4260 + }, + { + "epoch": 48.40226628895184, + "grad_norm": 4.84252851343208, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 731673104, + "step": 4261 + }, + { + "epoch": 48.40226628895184, + "loss": 0.1442323923110962, + "loss_ce": 0.0037599829956889153, + "loss_iou": 0.25, + "loss_num": 0.028076171875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 731673104, + "step": 4261 + }, + { + "epoch": 48.41359773371105, + "grad_norm": 3.4432359752470787, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 731844604, + "step": 4262 + }, + { + "epoch": 48.41359773371105, + "loss": 0.08415471762418747, + "loss_ce": 0.0027338173240423203, + "loss_iou": 0.353515625, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 731844604, + "step": 4262 + }, + { + "epoch": 48.42492917847026, + "grad_norm": 4.286629721086909, + "learning_rate": 5e-06, + "loss": 0.0979, + "num_input_tokens_seen": 732014724, + "step": 4263 + }, + { + "epoch": 48.42492917847026, + "loss": 0.11379265785217285, + "loss_ce": 0.004966974724084139, + "loss_iou": 0.53125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 732014724, + "step": 4263 + }, + { + "epoch": 48.436260623229465, + "grad_norm": 4.9569213446095555, + "learning_rate": 5e-06, + "loss": 0.1272, + "num_input_tokens_seen": 732185356, + "step": 4264 + }, + { + "epoch": 48.436260623229465, + "loss": 0.07775212824344635, + "loss_ce": 0.0020227553322911263, + "loss_iou": 0.47265625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 732185356, + "step": 4264 + }, + { + "epoch": 48.447592067988666, + "grad_norm": 4.092167613692802, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 732356964, + "step": 4265 + }, + { + "epoch": 48.447592067988666, + "loss": 0.07262058556079865, + "loss_ce": 0.0035593120846897364, + "loss_iou": 0.49609375, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 732356964, + "step": 4265 + }, + { + "epoch": 48.45892351274787, + "grad_norm": 5.196939899072797, + "learning_rate": 5e-06, + "loss": 0.0737, + "num_input_tokens_seen": 732528184, + "step": 4266 + }, + { + "epoch": 48.45892351274787, + "loss": 0.10172464698553085, + "loss_ce": 0.0025730361230671406, + "loss_iou": 0.3828125, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 732528184, + "step": 4266 + }, + { + "epoch": 48.47025495750708, + "grad_norm": 4.275484228140162, + "learning_rate": 5e-06, + "loss": 0.122, + "num_input_tokens_seen": 732700036, + "step": 4267 + }, + { + "epoch": 48.47025495750708, + "loss": 0.11507698893547058, + "loss_ce": 0.0015363376587629318, + "loss_iou": 0.52734375, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 732700036, + "step": 4267 + }, + { + "epoch": 48.48158640226629, + "grad_norm": 3.356876264887818, + "learning_rate": 5e-06, + "loss": 0.1181, + "num_input_tokens_seen": 732870896, + "step": 4268 + }, + { + "epoch": 48.48158640226629, + "loss": 0.1799665242433548, + "loss_ce": 0.007267555221915245, + "loss_iou": 0.33203125, + "loss_num": 0.034423828125, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 732870896, + "step": 4268 + }, + { + "epoch": 48.492917847025495, + "grad_norm": 3.5321914434287955, + "learning_rate": 5e-06, + "loss": 0.1341, + "num_input_tokens_seen": 733040180, + "step": 4269 + }, + { + "epoch": 48.492917847025495, + "loss": 0.12801069021224976, + "loss_ce": 0.007283156272023916, + "loss_iou": 0.494140625, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 733040180, + "step": 4269 + }, + { + "epoch": 48.5042492917847, + "grad_norm": 4.115522581994122, + "learning_rate": 5e-06, + "loss": 0.0682, + "num_input_tokens_seen": 733211112, + "step": 4270 + }, + { + "epoch": 48.5042492917847, + "loss": 0.0661100298166275, + "loss_ce": 0.007119554560631514, + "loss_iou": 0.390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 733211112, + "step": 4270 + }, + { + "epoch": 48.51558073654391, + "grad_norm": 3.4497487339955257, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 733382704, + "step": 4271 + }, + { + "epoch": 48.51558073654391, + "loss": 0.10695919394493103, + "loss_ce": 0.001643032068386674, + "loss_iou": 0.50390625, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 733382704, + "step": 4271 + }, + { + "epoch": 48.52691218130312, + "grad_norm": 3.6149180820629065, + "learning_rate": 5e-06, + "loss": 0.099, + "num_input_tokens_seen": 733553896, + "step": 4272 + }, + { + "epoch": 48.52691218130312, + "loss": 0.13652561604976654, + "loss_ce": 0.003636822337284684, + "loss_iou": 0.50390625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 733553896, + "step": 4272 + }, + { + "epoch": 48.538243626062325, + "grad_norm": 8.280845339631705, + "learning_rate": 5e-06, + "loss": 0.1453, + "num_input_tokens_seen": 733725752, + "step": 4273 + }, + { + "epoch": 48.538243626062325, + "loss": 0.12543433904647827, + "loss_ce": 0.0014108981704339385, + "loss_iou": 0.39453125, + "loss_num": 0.0247802734375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 733725752, + "step": 4273 + }, + { + "epoch": 48.54957507082153, + "grad_norm": 4.665986699062646, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 733897512, + "step": 4274 + }, + { + "epoch": 48.54957507082153, + "loss": 0.14283835887908936, + "loss_ce": 0.003388291457667947, + "loss_iou": 0.390625, + "loss_num": 0.02783203125, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 733897512, + "step": 4274 + }, + { + "epoch": 48.56090651558074, + "grad_norm": 3.9022350004449065, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 734069692, + "step": 4275 + }, + { + "epoch": 48.56090651558074, + "loss": 0.06221405044198036, + "loss_ce": 0.007511289790272713, + "loss_iou": 0.53125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 734069692, + "step": 4275 + }, + { + "epoch": 48.57223796033994, + "grad_norm": 3.680592793458854, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 734241692, + "step": 4276 + }, + { + "epoch": 48.57223796033994, + "loss": 0.0740753561258316, + "loss_ce": 0.0030151798855513334, + "loss_iou": 0.55078125, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 734241692, + "step": 4276 + }, + { + "epoch": 48.58356940509915, + "grad_norm": 4.396541167149029, + "learning_rate": 5e-06, + "loss": 0.0999, + "num_input_tokens_seen": 734413692, + "step": 4277 + }, + { + "epoch": 48.58356940509915, + "loss": 0.10261402279138565, + "loss_ce": 0.0065752062946558, + "loss_iou": 0.6015625, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 734413692, + "step": 4277 + }, + { + "epoch": 48.594900849858355, + "grad_norm": 10.750248819532924, + "learning_rate": 5e-06, + "loss": 0.1212, + "num_input_tokens_seen": 734585328, + "step": 4278 + }, + { + "epoch": 48.594900849858355, + "loss": 0.11873096972703934, + "loss_ce": 0.010576676577329636, + "loss_iou": 0.46875, + "loss_num": 0.0216064453125, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 734585328, + "step": 4278 + }, + { + "epoch": 48.60623229461756, + "grad_norm": 4.3787436246014275, + "learning_rate": 5e-06, + "loss": 0.0884, + "num_input_tokens_seen": 734757128, + "step": 4279 + }, + { + "epoch": 48.60623229461756, + "loss": 0.08168354630470276, + "loss_ce": 0.007190130650997162, + "loss_iou": 0.46875, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 734757128, + "step": 4279 + }, + { + "epoch": 48.61756373937677, + "grad_norm": 15.81769423746986, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 734928204, + "step": 4280 + }, + { + "epoch": 48.61756373937677, + "loss": 0.0950947254896164, + "loss_ce": 0.0025043943896889687, + "loss_iou": 0.359375, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 734928204, + "step": 4280 + }, + { + "epoch": 48.62889518413598, + "grad_norm": 4.1727358080254975, + "learning_rate": 5e-06, + "loss": 0.0757, + "num_input_tokens_seen": 735099996, + "step": 4281 + }, + { + "epoch": 48.62889518413598, + "loss": 0.04747691750526428, + "loss_ce": 0.0012122668558731675, + "loss_iou": 0.53125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 735099996, + "step": 4281 + }, + { + "epoch": 48.640226628895185, + "grad_norm": 3.481485991237219, + "learning_rate": 5e-06, + "loss": 0.1246, + "num_input_tokens_seen": 735270788, + "step": 4282 + }, + { + "epoch": 48.640226628895185, + "loss": 0.11616332828998566, + "loss_ce": 0.006788330618292093, + "loss_iou": 0.46484375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 735270788, + "step": 4282 + }, + { + "epoch": 48.65155807365439, + "grad_norm": 3.9990412778699294, + "learning_rate": 5e-06, + "loss": 0.0999, + "num_input_tokens_seen": 735442308, + "step": 4283 + }, + { + "epoch": 48.65155807365439, + "loss": 0.10802154242992401, + "loss_ce": 0.004170226398855448, + "loss_iou": 0.45703125, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 735442308, + "step": 4283 + }, + { + "epoch": 48.6628895184136, + "grad_norm": 3.6806368462051027, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 735614292, + "step": 4284 + }, + { + "epoch": 48.6628895184136, + "loss": 0.14905351400375366, + "loss_ce": 0.003728807670995593, + "loss_iou": 0.18359375, + "loss_num": 0.029052734375, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 735614292, + "step": 4284 + }, + { + "epoch": 48.67422096317281, + "grad_norm": 4.014967665069057, + "learning_rate": 5e-06, + "loss": 0.113, + "num_input_tokens_seen": 735786300, + "step": 4285 + }, + { + "epoch": 48.67422096317281, + "loss": 0.14360037446022034, + "loss_ce": 0.00444022286683321, + "loss_iou": 0.4375, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 735786300, + "step": 4285 + }, + { + "epoch": 48.685552407932015, + "grad_norm": 3.4984328046370687, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 735958224, + "step": 4286 + }, + { + "epoch": 48.685552407932015, + "loss": 0.13294550776481628, + "loss_ce": 0.003978218417614698, + "loss_iou": 0.5234375, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 735958224, + "step": 4286 + }, + { + "epoch": 48.696883852691215, + "grad_norm": 6.105849937970278, + "learning_rate": 5e-06, + "loss": 0.1221, + "num_input_tokens_seen": 736129968, + "step": 4287 + }, + { + "epoch": 48.696883852691215, + "loss": 0.12372542917728424, + "loss_ce": 0.00708724232390523, + "loss_iou": 0.462890625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 736129968, + "step": 4287 + }, + { + "epoch": 48.70821529745042, + "grad_norm": 3.4128048536556217, + "learning_rate": 5e-06, + "loss": 0.1373, + "num_input_tokens_seen": 736301272, + "step": 4288 + }, + { + "epoch": 48.70821529745042, + "loss": 0.16212065517902374, + "loss_ce": 0.0028189001604914665, + "loss_iou": 0.400390625, + "loss_num": 0.03173828125, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 736301272, + "step": 4288 + }, + { + "epoch": 48.71954674220963, + "grad_norm": 3.558755349484977, + "learning_rate": 5e-06, + "loss": 0.1029, + "num_input_tokens_seen": 736472616, + "step": 4289 + }, + { + "epoch": 48.71954674220963, + "loss": 0.069485142827034, + "loss_ce": 0.004085969645529985, + "loss_iou": 0.01220703125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 736472616, + "step": 4289 + }, + { + "epoch": 48.73087818696884, + "grad_norm": 3.4913937377942807, + "learning_rate": 5e-06, + "loss": 0.1002, + "num_input_tokens_seen": 736644160, + "step": 4290 + }, + { + "epoch": 48.73087818696884, + "loss": 0.05032903328537941, + "loss_ce": 0.002355400938540697, + "loss_iou": 0.453125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 736644160, + "step": 4290 + }, + { + "epoch": 48.742209631728045, + "grad_norm": 4.529124973874452, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 736815836, + "step": 4291 + }, + { + "epoch": 48.742209631728045, + "loss": 0.08149650692939758, + "loss_ce": 0.007888110354542732, + "loss_iou": 0.458984375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 736815836, + "step": 4291 + }, + { + "epoch": 48.75354107648725, + "grad_norm": 7.255218511608639, + "learning_rate": 5e-06, + "loss": 0.098, + "num_input_tokens_seen": 736987580, + "step": 4292 + }, + { + "epoch": 48.75354107648725, + "loss": 0.0904083102941513, + "loss_ce": 0.005233755335211754, + "loss_iou": 0.48046875, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 736987580, + "step": 4292 + }, + { + "epoch": 48.76487252124646, + "grad_norm": 4.308651704191451, + "learning_rate": 5e-06, + "loss": 0.103, + "num_input_tokens_seen": 737158400, + "step": 4293 + }, + { + "epoch": 48.76487252124646, + "loss": 0.07376554608345032, + "loss_ce": 0.005879189353436232, + "loss_iou": 0.451171875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 737158400, + "step": 4293 + }, + { + "epoch": 48.77620396600567, + "grad_norm": 4.27711939400556, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 737329348, + "step": 4294 + }, + { + "epoch": 48.77620396600567, + "loss": 0.11947590112686157, + "loss_ce": 0.0006862251902930439, + "loss_iou": 0.46484375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 737329348, + "step": 4294 + }, + { + "epoch": 48.787535410764875, + "grad_norm": 5.172696697527694, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 737500868, + "step": 4295 + }, + { + "epoch": 48.787535410764875, + "loss": 0.07078903168439865, + "loss_ce": 0.007938079535961151, + "loss_iou": 0.1875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 737500868, + "step": 4295 + }, + { + "epoch": 48.79886685552408, + "grad_norm": 3.61832815781928, + "learning_rate": 5e-06, + "loss": 0.1053, + "num_input_tokens_seen": 737672476, + "step": 4296 + }, + { + "epoch": 48.79886685552408, + "loss": 0.11403404176235199, + "loss_ce": 0.0027364278212189674, + "loss_iou": 0.45703125, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 737672476, + "step": 4296 + }, + { + "epoch": 48.81019830028329, + "grad_norm": 3.7446788983069093, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 737844208, + "step": 4297 + }, + { + "epoch": 48.81019830028329, + "loss": 0.07033340632915497, + "loss_ce": 0.00015060264558997005, + "loss_iou": 0.50390625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 737844208, + "step": 4297 + }, + { + "epoch": 48.82152974504249, + "grad_norm": 3.962563660047595, + "learning_rate": 5e-06, + "loss": 0.1593, + "num_input_tokens_seen": 738015844, + "step": 4298 + }, + { + "epoch": 48.82152974504249, + "loss": 0.19637420773506165, + "loss_ce": 0.0013058530166745186, + "loss_iou": 0.3984375, + "loss_num": 0.0390625, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 738015844, + "step": 4298 + }, + { + "epoch": 48.8328611898017, + "grad_norm": 3.696844458100421, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 738186788, + "step": 4299 + }, + { + "epoch": 48.8328611898017, + "loss": 0.08086097240447998, + "loss_ce": 0.004719617776572704, + "loss_iou": 0.56640625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 738186788, + "step": 4299 + }, + { + "epoch": 48.844192634560905, + "grad_norm": 3.8948833797307976, + "learning_rate": 5e-06, + "loss": 0.1369, + "num_input_tokens_seen": 738358256, + "step": 4300 + }, + { + "epoch": 48.844192634560905, + "loss": 0.17816084623336792, + "loss_ce": 0.0019828733056783676, + "loss_iou": 0.4921875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 738358256, + "step": 4300 + }, + { + "epoch": 48.85552407932011, + "grad_norm": 5.191495325960047, + "learning_rate": 5e-06, + "loss": 0.0948, + "num_input_tokens_seen": 738528936, + "step": 4301 + }, + { + "epoch": 48.85552407932011, + "loss": 0.15114825963974, + "loss_ce": 0.0016121244989335537, + "loss_iou": 0.59765625, + "loss_num": 0.0299072265625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 738528936, + "step": 4301 + }, + { + "epoch": 48.86685552407932, + "grad_norm": 8.281709804459151, + "learning_rate": 5e-06, + "loss": 0.1183, + "num_input_tokens_seen": 738699236, + "step": 4302 + }, + { + "epoch": 48.86685552407932, + "loss": 0.12180070579051971, + "loss_ce": 0.0003102283226326108, + "loss_iou": 0.5078125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 738699236, + "step": 4302 + }, + { + "epoch": 48.87818696883853, + "grad_norm": 4.655570414645217, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 738870800, + "step": 4303 + }, + { + "epoch": 48.87818696883853, + "loss": 0.10928734391927719, + "loss_ce": 0.003643119940534234, + "loss_iou": 0.30078125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 738870800, + "step": 4303 + }, + { + "epoch": 48.889518413597735, + "grad_norm": 3.1580637596584493, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 739042524, + "step": 4304 + }, + { + "epoch": 48.889518413597735, + "loss": 0.08541439473628998, + "loss_ce": 0.00014828561688773334, + "loss_iou": 0.4375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 739042524, + "step": 4304 + }, + { + "epoch": 48.90084985835694, + "grad_norm": 3.7153373827408664, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 739214492, + "step": 4305 + }, + { + "epoch": 48.90084985835694, + "loss": 0.05097080022096634, + "loss_ce": 0.0017001696396619081, + "loss_iou": 0.365234375, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 739214492, + "step": 4305 + }, + { + "epoch": 48.91218130311615, + "grad_norm": 3.910928125887995, + "learning_rate": 5e-06, + "loss": 0.1143, + "num_input_tokens_seen": 739386360, + "step": 4306 + }, + { + "epoch": 48.91218130311615, + "loss": 0.09835060685873032, + "loss_ce": 0.0038987009320408106, + "loss_iou": 0.51171875, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 739386360, + "step": 4306 + }, + { + "epoch": 48.92351274787536, + "grad_norm": 3.83367633078769, + "learning_rate": 5e-06, + "loss": 0.0922, + "num_input_tokens_seen": 739558112, + "step": 4307 + }, + { + "epoch": 48.92351274787536, + "loss": 0.06259407103061676, + "loss_ce": 0.004427570849657059, + "loss_iou": 0.5078125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 739558112, + "step": 4307 + }, + { + "epoch": 48.934844192634564, + "grad_norm": 3.5539803874064875, + "learning_rate": 5e-06, + "loss": 0.0791, + "num_input_tokens_seen": 739729436, + "step": 4308 + }, + { + "epoch": 48.934844192634564, + "loss": 0.05807384476065636, + "loss_ce": 0.012266959995031357, + "loss_iou": 0.52734375, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 739729436, + "step": 4308 + }, + { + "epoch": 48.946175637393765, + "grad_norm": 3.675091285750069, + "learning_rate": 5e-06, + "loss": 0.0804, + "num_input_tokens_seen": 739901028, + "step": 4309 + }, + { + "epoch": 48.946175637393765, + "loss": 0.04846800118684769, + "loss_ce": 0.0020965421572327614, + "loss_iou": 0.416015625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 739901028, + "step": 4309 + }, + { + "epoch": 48.95750708215297, + "grad_norm": 4.2305004566189455, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 740073252, + "step": 4310 + }, + { + "epoch": 48.95750708215297, + "loss": 0.04841400310397148, + "loss_ce": 0.002698671305552125, + "loss_iou": 0.44921875, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 740073252, + "step": 4310 + }, + { + "epoch": 48.96883852691218, + "grad_norm": 3.9474841478106177, + "learning_rate": 5e-06, + "loss": 0.0879, + "num_input_tokens_seen": 740244172, + "step": 4311 + }, + { + "epoch": 48.96883852691218, + "loss": 0.07899753749370575, + "loss_ce": 0.005328100174665451, + "loss_iou": 0.490234375, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 740244172, + "step": 4311 + }, + { + "epoch": 48.98016997167139, + "grad_norm": 4.575526650111768, + "learning_rate": 5e-06, + "loss": 0.1102, + "num_input_tokens_seen": 740415300, + "step": 4312 + }, + { + "epoch": 48.98016997167139, + "loss": 0.0887562558054924, + "loss_ce": 0.001857450231909752, + "loss_iou": 0.453125, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 740415300, + "step": 4312 + }, + { + "epoch": 48.991501416430594, + "grad_norm": 3.652875275513445, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 740585464, + "step": 4313 + }, + { + "epoch": 48.991501416430594, + "loss": 0.10209809243679047, + "loss_ce": 0.0025497477035969496, + "loss_iou": 0.2021484375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 740585464, + "step": 4313 + }, + { + "epoch": 48.991501416430594, + "loss": 0.0529276579618454, + "loss_ce": 0.0027567625511437654, + "loss_iou": 0.326171875, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 740714468, + "step": 4313 + }, + { + "epoch": 49.0028328611898, + "grad_norm": 3.4731509067808997, + "learning_rate": 5e-06, + "loss": 0.0769, + "num_input_tokens_seen": 740757324, + "step": 4314 + }, + { + "epoch": 49.0028328611898, + "loss": 0.07634000480175018, + "loss_ce": 0.0008089952752925456, + "loss_iou": 0.259765625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 740757324, + "step": 4314 + }, + { + "epoch": 49.01416430594901, + "grad_norm": 3.226841820356901, + "learning_rate": 5e-06, + "loss": 0.1456, + "num_input_tokens_seen": 740925352, + "step": 4315 + }, + { + "epoch": 49.01416430594901, + "loss": 0.17698752880096436, + "loss_ce": 0.001114727696403861, + "loss_iou": 0.54296875, + "loss_num": 0.03515625, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 740925352, + "step": 4315 + }, + { + "epoch": 49.02549575070822, + "grad_norm": 3.4968279206823474, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 741097600, + "step": 4316 + }, + { + "epoch": 49.02549575070822, + "loss": 0.074805349111557, + "loss_ce": 0.0007849633693695068, + "loss_iou": 0.546875, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 741097600, + "step": 4316 + }, + { + "epoch": 49.036827195467424, + "grad_norm": 3.030572576311112, + "learning_rate": 5e-06, + "loss": 0.1208, + "num_input_tokens_seen": 741269636, + "step": 4317 + }, + { + "epoch": 49.036827195467424, + "loss": 0.05827585235238075, + "loss_ce": 0.0028254101052880287, + "loss_iou": 0.46875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 741269636, + "step": 4317 + }, + { + "epoch": 49.04815864022663, + "grad_norm": 3.5442837172108175, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 741441176, + "step": 4318 + }, + { + "epoch": 49.04815864022663, + "loss": 0.05447140708565712, + "loss_ce": 0.0011266814544796944, + "loss_iou": 0.43359375, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 741441176, + "step": 4318 + }, + { + "epoch": 49.05949008498584, + "grad_norm": 3.4420791807746145, + "learning_rate": 5e-06, + "loss": 0.0708, + "num_input_tokens_seen": 741613384, + "step": 4319 + }, + { + "epoch": 49.05949008498584, + "loss": 0.05469021946191788, + "loss_ce": 0.00018582388292998075, + "loss_iou": 0.451171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 741613384, + "step": 4319 + }, + { + "epoch": 49.07082152974504, + "grad_norm": 3.976612078863114, + "learning_rate": 5e-06, + "loss": 0.0889, + "num_input_tokens_seen": 741784896, + "step": 4320 + }, + { + "epoch": 49.07082152974504, + "loss": 0.053845614194869995, + "loss_ce": 0.0018436636310070753, + "loss_iou": 0.41015625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 741784896, + "step": 4320 + }, + { + "epoch": 49.08215297450425, + "grad_norm": 3.3151647845175933, + "learning_rate": 5e-06, + "loss": 0.0899, + "num_input_tokens_seen": 741956640, + "step": 4321 + }, + { + "epoch": 49.08215297450425, + "loss": 0.05855793505907059, + "loss_ce": 0.0009254844626411796, + "loss_iou": 0.5078125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 741956640, + "step": 4321 + }, + { + "epoch": 49.093484419263454, + "grad_norm": 6.875468956913932, + "learning_rate": 5e-06, + "loss": 0.0866, + "num_input_tokens_seen": 742127196, + "step": 4322 + }, + { + "epoch": 49.093484419263454, + "loss": 0.1033497005701065, + "loss_ce": 0.0015125458594411612, + "loss_iou": 0.5703125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 742127196, + "step": 4322 + }, + { + "epoch": 49.10481586402266, + "grad_norm": 4.227861006491061, + "learning_rate": 5e-06, + "loss": 0.0839, + "num_input_tokens_seen": 742299164, + "step": 4323 + }, + { + "epoch": 49.10481586402266, + "loss": 0.055766358971595764, + "loss_ce": 0.0007584268460050225, + "loss_iou": 0.52734375, + "loss_num": 0.01104736328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 742299164, + "step": 4323 + }, + { + "epoch": 49.11614730878187, + "grad_norm": 4.263018981776261, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 742469196, + "step": 4324 + }, + { + "epoch": 49.11614730878187, + "loss": 0.11103181540966034, + "loss_ce": 0.0019009571988135576, + "loss_iou": 0.173828125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 742469196, + "step": 4324 + }, + { + "epoch": 49.12747875354108, + "grad_norm": 4.245028883666309, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 742641656, + "step": 4325 + }, + { + "epoch": 49.12747875354108, + "loss": 0.1197550892829895, + "loss_ce": 0.0015299873193725944, + "loss_iou": 0.4609375, + "loss_num": 0.023681640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 742641656, + "step": 4325 + }, + { + "epoch": 49.138810198300284, + "grad_norm": 3.2887527616888583, + "learning_rate": 5e-06, + "loss": 0.1301, + "num_input_tokens_seen": 742813156, + "step": 4326 + }, + { + "epoch": 49.138810198300284, + "loss": 0.07255055010318756, + "loss_ce": 0.0002544056042097509, + "loss_iou": 0.51171875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 742813156, + "step": 4326 + }, + { + "epoch": 49.15014164305949, + "grad_norm": 2.338941762486456, + "learning_rate": 5e-06, + "loss": 0.0678, + "num_input_tokens_seen": 742984864, + "step": 4327 + }, + { + "epoch": 49.15014164305949, + "loss": 0.04754868894815445, + "loss_ce": 0.004213727544993162, + "loss_iou": 0.42578125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 742984864, + "step": 4327 + }, + { + "epoch": 49.1614730878187, + "grad_norm": 2.786666687353388, + "learning_rate": 5e-06, + "loss": 0.0619, + "num_input_tokens_seen": 743156400, + "step": 4328 + }, + { + "epoch": 49.1614730878187, + "loss": 0.046610768884420395, + "loss_ce": 0.002360280603170395, + "loss_iou": 0.390625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 743156400, + "step": 4328 + }, + { + "epoch": 49.172804532577906, + "grad_norm": 4.438183649509324, + "learning_rate": 5e-06, + "loss": 0.0913, + "num_input_tokens_seen": 743327584, + "step": 4329 + }, + { + "epoch": 49.172804532577906, + "loss": 0.13865070044994354, + "loss_ce": 0.000741772644687444, + "loss_iou": 0.60546875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 743327584, + "step": 4329 + }, + { + "epoch": 49.184135977337114, + "grad_norm": 3.073314635685493, + "learning_rate": 5e-06, + "loss": 0.114, + "num_input_tokens_seen": 743498260, + "step": 4330 + }, + { + "epoch": 49.184135977337114, + "loss": 0.0806737095117569, + "loss_ce": 0.001450078096240759, + "loss_iou": 0.47265625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 743498260, + "step": 4330 + }, + { + "epoch": 49.195467422096314, + "grad_norm": 3.8487355782874886, + "learning_rate": 5e-06, + "loss": 0.0795, + "num_input_tokens_seen": 743667916, + "step": 4331 + }, + { + "epoch": 49.195467422096314, + "loss": 0.07400716096162796, + "loss_ce": 0.0029317219741642475, + "loss_iou": 0.56640625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 743667916, + "step": 4331 + }, + { + "epoch": 49.20679886685552, + "grad_norm": 6.851903874822369, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 743838532, + "step": 4332 + }, + { + "epoch": 49.20679886685552, + "loss": 0.10344220697879791, + "loss_ce": 0.0023374767042696476, + "loss_iou": 0.46875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 743838532, + "step": 4332 + }, + { + "epoch": 49.21813031161473, + "grad_norm": 6.1283737427026415, + "learning_rate": 5e-06, + "loss": 0.0992, + "num_input_tokens_seen": 744010164, + "step": 4333 + }, + { + "epoch": 49.21813031161473, + "loss": 0.06122565641999245, + "loss_ce": 0.0026319031603634357, + "loss_iou": 0.2421875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 744010164, + "step": 4333 + }, + { + "epoch": 49.22946175637394, + "grad_norm": 5.268752635010931, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 744181836, + "step": 4334 + }, + { + "epoch": 49.22946175637394, + "loss": 0.05788575857877731, + "loss_ce": 0.0014587538316845894, + "loss_iou": 0.4296875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 744181836, + "step": 4334 + }, + { + "epoch": 49.240793201133144, + "grad_norm": 4.210641814112757, + "learning_rate": 5e-06, + "loss": 0.1087, + "num_input_tokens_seen": 744353392, + "step": 4335 + }, + { + "epoch": 49.240793201133144, + "loss": 0.09317013621330261, + "loss_ce": 0.0015716226771473885, + "loss_iou": 0.58203125, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 744353392, + "step": 4335 + }, + { + "epoch": 49.25212464589235, + "grad_norm": 4.5924333386773055, + "learning_rate": 5e-06, + "loss": 0.1258, + "num_input_tokens_seen": 744525052, + "step": 4336 + }, + { + "epoch": 49.25212464589235, + "loss": 0.06469232589006424, + "loss_ce": 0.0009411001810804009, + "loss_iou": 0.369140625, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 744525052, + "step": 4336 + }, + { + "epoch": 49.26345609065156, + "grad_norm": 17.098168190024793, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 744694528, + "step": 4337 + }, + { + "epoch": 49.26345609065156, + "loss": 0.09466172009706497, + "loss_ce": 0.0008201682940125465, + "loss_iou": 0.58203125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 744694528, + "step": 4337 + }, + { + "epoch": 49.274787535410766, + "grad_norm": 4.185098376233496, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 744865708, + "step": 4338 + }, + { + "epoch": 49.274787535410766, + "loss": 0.06628775596618652, + "loss_ce": 0.002338170539587736, + "loss_iou": 0.59375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 744865708, + "step": 4338 + }, + { + "epoch": 49.286118980169974, + "grad_norm": 3.888527606834987, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 745037548, + "step": 4339 + }, + { + "epoch": 49.286118980169974, + "loss": 0.08603458106517792, + "loss_ce": 0.0010889050317928195, + "loss_iou": 0.453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 745037548, + "step": 4339 + }, + { + "epoch": 49.29745042492918, + "grad_norm": 3.865975312395479, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 745209432, + "step": 4340 + }, + { + "epoch": 49.29745042492918, + "loss": 0.05419117584824562, + "loss_ce": 0.0012736956123262644, + "loss_iou": 0.5, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 745209432, + "step": 4340 + }, + { + "epoch": 49.30878186968839, + "grad_norm": 3.641625553205517, + "learning_rate": 5e-06, + "loss": 0.1076, + "num_input_tokens_seen": 745381400, + "step": 4341 + }, + { + "epoch": 49.30878186968839, + "loss": 0.06112079694867134, + "loss_ce": 0.0028627419378608465, + "loss_iou": 0.458984375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 745381400, + "step": 4341 + }, + { + "epoch": 49.32011331444759, + "grad_norm": 5.268422727416257, + "learning_rate": 5e-06, + "loss": 0.1224, + "num_input_tokens_seen": 745553100, + "step": 4342 + }, + { + "epoch": 49.32011331444759, + "loss": 0.13542711734771729, + "loss_ce": 0.004598258063197136, + "loss_iou": 0.265625, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 745553100, + "step": 4342 + }, + { + "epoch": 49.331444759206796, + "grad_norm": 2.9015070743611715, + "learning_rate": 5e-06, + "loss": 0.0685, + "num_input_tokens_seen": 745725268, + "step": 4343 + }, + { + "epoch": 49.331444759206796, + "loss": 0.06892704218626022, + "loss_ce": 0.000979652046225965, + "loss_iou": 0.435546875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 745725268, + "step": 4343 + }, + { + "epoch": 49.342776203966004, + "grad_norm": 4.031799282163996, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 745896452, + "step": 4344 + }, + { + "epoch": 49.342776203966004, + "loss": 0.09716005623340607, + "loss_ce": 0.0005261427140794694, + "loss_iou": 0.412109375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 745896452, + "step": 4344 + }, + { + "epoch": 49.35410764872521, + "grad_norm": 3.745710628841776, + "learning_rate": 5e-06, + "loss": 0.0815, + "num_input_tokens_seen": 746068412, + "step": 4345 + }, + { + "epoch": 49.35410764872521, + "loss": 0.08749920129776001, + "loss_ce": 0.0026603350415825844, + "loss_iou": 0.2890625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 746068412, + "step": 4345 + }, + { + "epoch": 49.36543909348442, + "grad_norm": 4.780133073611569, + "learning_rate": 5e-06, + "loss": 0.0903, + "num_input_tokens_seen": 746239956, + "step": 4346 + }, + { + "epoch": 49.36543909348442, + "loss": 0.06789717078208923, + "loss_ce": 0.003260944038629532, + "loss_iou": 0.287109375, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 746239956, + "step": 4346 + }, + { + "epoch": 49.376770538243626, + "grad_norm": 3.997057887700053, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 746411000, + "step": 4347 + }, + { + "epoch": 49.376770538243626, + "loss": 0.08519040793180466, + "loss_ce": 0.000809304416179657, + "loss_iou": 0.4921875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 746411000, + "step": 4347 + }, + { + "epoch": 49.388101983002834, + "grad_norm": 4.043479665739765, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 746582804, + "step": 4348 + }, + { + "epoch": 49.388101983002834, + "loss": 0.06669161468744278, + "loss_ce": 0.0005600225413218141, + "loss_iou": 0.44921875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 746582804, + "step": 4348 + }, + { + "epoch": 49.39943342776204, + "grad_norm": 4.625509585538065, + "learning_rate": 5e-06, + "loss": 0.099, + "num_input_tokens_seen": 746754476, + "step": 4349 + }, + { + "epoch": 49.39943342776204, + "loss": 0.19054612517356873, + "loss_ce": 0.0006352471536956728, + "loss_iou": 0.46484375, + "loss_num": 0.0380859375, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 746754476, + "step": 4349 + }, + { + "epoch": 49.41076487252125, + "grad_norm": 4.126685897996752, + "learning_rate": 5e-06, + "loss": 0.1092, + "num_input_tokens_seen": 746926148, + "step": 4350 + }, + { + "epoch": 49.41076487252125, + "loss": 0.0921538919210434, + "loss_ce": 0.0005706397932954133, + "loss_iou": 0.51171875, + "loss_num": 0.018310546875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 746926148, + "step": 4350 + }, + { + "epoch": 49.422096317280456, + "grad_norm": 4.8060166306817695, + "learning_rate": 5e-06, + "loss": 0.0883, + "num_input_tokens_seen": 747095448, + "step": 4351 + }, + { + "epoch": 49.422096317280456, + "loss": 0.07864593714475632, + "loss_ce": 0.00299286306835711, + "loss_iou": 0.435546875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 747095448, + "step": 4351 + }, + { + "epoch": 49.43342776203966, + "grad_norm": 4.4387311828724005, + "learning_rate": 5e-06, + "loss": 0.0897, + "num_input_tokens_seen": 747267176, + "step": 4352 + }, + { + "epoch": 49.43342776203966, + "loss": 0.05372954159975052, + "loss_ce": 0.0015139628667384386, + "loss_iou": 0.474609375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 747267176, + "step": 4352 + }, + { + "epoch": 49.444759206798864, + "grad_norm": 5.250254257051551, + "learning_rate": 5e-06, + "loss": 0.087, + "num_input_tokens_seen": 747438600, + "step": 4353 + }, + { + "epoch": 49.444759206798864, + "loss": 0.10443614423274994, + "loss_ce": 0.0013477625325322151, + "loss_iou": 0.5390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 747438600, + "step": 4353 + }, + { + "epoch": 49.45609065155807, + "grad_norm": 4.095399647514437, + "learning_rate": 5e-06, + "loss": 0.0984, + "num_input_tokens_seen": 747610484, + "step": 4354 + }, + { + "epoch": 49.45609065155807, + "loss": 0.08993314206600189, + "loss_ce": 0.0015237231273204088, + "loss_iou": 0.37890625, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 747610484, + "step": 4354 + }, + { + "epoch": 49.46742209631728, + "grad_norm": 3.581291881022985, + "learning_rate": 5e-06, + "loss": 0.0772, + "num_input_tokens_seen": 747781732, + "step": 4355 + }, + { + "epoch": 49.46742209631728, + "loss": 0.10985402762889862, + "loss_ce": 0.0005705712828785181, + "loss_iou": 0.46484375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 747781732, + "step": 4355 + }, + { + "epoch": 49.478753541076486, + "grad_norm": 9.460265397579896, + "learning_rate": 5e-06, + "loss": 0.0648, + "num_input_tokens_seen": 747952732, + "step": 4356 + }, + { + "epoch": 49.478753541076486, + "loss": 0.03622867912054062, + "loss_ce": 0.0018964068731293082, + "loss_iou": 0.330078125, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 747952732, + "step": 4356 + }, + { + "epoch": 49.49008498583569, + "grad_norm": 4.4833623848144875, + "learning_rate": 5e-06, + "loss": 0.0652, + "num_input_tokens_seen": 748122908, + "step": 4357 + }, + { + "epoch": 49.49008498583569, + "loss": 0.06123455986380577, + "loss_ce": 0.003159608691930771, + "loss_iou": 0.54296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 748122908, + "step": 4357 + }, + { + "epoch": 49.5014164305949, + "grad_norm": 5.778599854687138, + "learning_rate": 5e-06, + "loss": 0.0926, + "num_input_tokens_seen": 748294016, + "step": 4358 + }, + { + "epoch": 49.5014164305949, + "loss": 0.08962063491344452, + "loss_ce": 0.0027065714821219444, + "loss_iou": 0.39453125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 748294016, + "step": 4358 + }, + { + "epoch": 49.51274787535411, + "grad_norm": 4.351421035238239, + "learning_rate": 5e-06, + "loss": 0.0945, + "num_input_tokens_seen": 748464184, + "step": 4359 + }, + { + "epoch": 49.51274787535411, + "loss": 0.05633925646543503, + "loss_ce": 0.002109519438818097, + "loss_iou": 0.57421875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 748464184, + "step": 4359 + }, + { + "epoch": 49.524079320113316, + "grad_norm": 5.106061808874131, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 748635376, + "step": 4360 + }, + { + "epoch": 49.524079320113316, + "loss": 0.06278373301029205, + "loss_ce": 0.001870643813163042, + "loss_iou": 0.26171875, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 748635376, + "step": 4360 + }, + { + "epoch": 49.53541076487252, + "grad_norm": 3.813947211152109, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 748806940, + "step": 4361 + }, + { + "epoch": 49.53541076487252, + "loss": 0.10220496356487274, + "loss_ce": 0.0005509055918082595, + "loss_iou": 0.478515625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 748806940, + "step": 4361 + }, + { + "epoch": 49.54674220963173, + "grad_norm": 2.7406397459626497, + "learning_rate": 5e-06, + "loss": 0.068, + "num_input_tokens_seen": 748977128, + "step": 4362 + }, + { + "epoch": 49.54674220963173, + "loss": 0.05096760392189026, + "loss_ce": 0.001483348896726966, + "loss_iou": 0.416015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 748977128, + "step": 4362 + }, + { + "epoch": 49.55807365439094, + "grad_norm": 3.572300868636267, + "learning_rate": 5e-06, + "loss": 0.0858, + "num_input_tokens_seen": 749148932, + "step": 4363 + }, + { + "epoch": 49.55807365439094, + "loss": 0.10987602174282074, + "loss_ce": 0.002179492497816682, + "loss_iou": 0.412109375, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 749148932, + "step": 4363 + }, + { + "epoch": 49.56940509915014, + "grad_norm": 4.609996131348387, + "learning_rate": 5e-06, + "loss": 0.1025, + "num_input_tokens_seen": 749320524, + "step": 4364 + }, + { + "epoch": 49.56940509915014, + "loss": 0.06587588042020798, + "loss_ce": 0.00015627789252903312, + "loss_iou": 0.0146484375, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 749320524, + "step": 4364 + }, + { + "epoch": 49.580736543909346, + "grad_norm": 3.7454499124671576, + "learning_rate": 5e-06, + "loss": 0.1098, + "num_input_tokens_seen": 749492104, + "step": 4365 + }, + { + "epoch": 49.580736543909346, + "loss": 0.10872483253479004, + "loss_ce": 0.001699693500995636, + "loss_iou": 0.515625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 749492104, + "step": 4365 + }, + { + "epoch": 49.59206798866855, + "grad_norm": 4.217173046323239, + "learning_rate": 5e-06, + "loss": 0.0951, + "num_input_tokens_seen": 749664056, + "step": 4366 + }, + { + "epoch": 49.59206798866855, + "loss": 0.06376250088214874, + "loss_ce": 0.0031240719836205244, + "loss_iou": 0.53515625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 749664056, + "step": 4366 + }, + { + "epoch": 49.60339943342776, + "grad_norm": 7.324372451364367, + "learning_rate": 5e-06, + "loss": 0.0963, + "num_input_tokens_seen": 749834768, + "step": 4367 + }, + { + "epoch": 49.60339943342776, + "loss": 0.07092300057411194, + "loss_ce": 0.0026704370975494385, + "loss_iou": 0.212890625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 749834768, + "step": 4367 + }, + { + "epoch": 49.61473087818697, + "grad_norm": 4.111071181194956, + "learning_rate": 5e-06, + "loss": 0.0918, + "num_input_tokens_seen": 750004936, + "step": 4368 + }, + { + "epoch": 49.61473087818697, + "loss": 0.04804462194442749, + "loss_ce": 0.0020088544115424156, + "loss_iou": 0.142578125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 750004936, + "step": 4368 + }, + { + "epoch": 49.626062322946176, + "grad_norm": 4.19101592959029, + "learning_rate": 5e-06, + "loss": 0.1147, + "num_input_tokens_seen": 750176432, + "step": 4369 + }, + { + "epoch": 49.626062322946176, + "loss": 0.10773782432079315, + "loss_ce": 0.0014145805034786463, + "loss_iou": 0.46875, + "loss_num": 0.021240234375, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 750176432, + "step": 4369 + }, + { + "epoch": 49.63739376770538, + "grad_norm": 5.022033745972118, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 750348180, + "step": 4370 + }, + { + "epoch": 49.63739376770538, + "loss": 0.05284727364778519, + "loss_ce": 0.0014861896634101868, + "loss_iou": 0.35546875, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 750348180, + "step": 4370 + }, + { + "epoch": 49.64872521246459, + "grad_norm": 3.209276110993788, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 750519856, + "step": 4371 + }, + { + "epoch": 49.64872521246459, + "loss": 0.039785709232091904, + "loss_ce": 0.0006621736101806164, + "loss_iou": 0.36328125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 750519856, + "step": 4371 + }, + { + "epoch": 49.6600566572238, + "grad_norm": 2.9342217290281902, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 750690804, + "step": 4372 + }, + { + "epoch": 49.6600566572238, + "loss": 0.04395214468240738, + "loss_ce": 0.0007087374688126147, + "loss_iou": 0.30859375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 750690804, + "step": 4372 + }, + { + "epoch": 49.671388101983005, + "grad_norm": 3.414709944889579, + "learning_rate": 5e-06, + "loss": 0.0717, + "num_input_tokens_seen": 750862236, + "step": 4373 + }, + { + "epoch": 49.671388101983005, + "loss": 0.06521345674991608, + "loss_ce": 0.003506913548335433, + "loss_iou": 0.44921875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 750862236, + "step": 4373 + }, + { + "epoch": 49.68271954674221, + "grad_norm": 3.791969349259422, + "learning_rate": 5e-06, + "loss": 0.091, + "num_input_tokens_seen": 751034176, + "step": 4374 + }, + { + "epoch": 49.68271954674221, + "loss": 0.13955862820148468, + "loss_ce": 0.0011614140821620822, + "loss_iou": 0.345703125, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 751034176, + "step": 4374 + }, + { + "epoch": 49.69405099150141, + "grad_norm": 5.4588785586855675, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 751205332, + "step": 4375 + }, + { + "epoch": 49.69405099150141, + "loss": 0.11089522391557693, + "loss_ce": 0.0010929773561656475, + "loss_iou": 0.3125, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 751205332, + "step": 4375 + }, + { + "epoch": 49.70538243626062, + "grad_norm": 7.065185276228404, + "learning_rate": 5e-06, + "loss": 0.0779, + "num_input_tokens_seen": 751376916, + "step": 4376 + }, + { + "epoch": 49.70538243626062, + "loss": 0.11830483376979828, + "loss_ce": 0.001117331557907164, + "loss_iou": 0.4140625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 751376916, + "step": 4376 + }, + { + "epoch": 49.71671388101983, + "grad_norm": 7.133288924907278, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 751548420, + "step": 4377 + }, + { + "epoch": 49.71671388101983, + "loss": 0.2544563412666321, + "loss_ce": 0.00918656773865223, + "loss_iou": 0.408203125, + "loss_num": 0.049072265625, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 751548420, + "step": 4377 + }, + { + "epoch": 49.728045325779036, + "grad_norm": 5.21335164761946, + "learning_rate": 5e-06, + "loss": 0.0662, + "num_input_tokens_seen": 751720072, + "step": 4378 + }, + { + "epoch": 49.728045325779036, + "loss": 0.06574045121669769, + "loss_ce": 0.003942357841879129, + "loss_iou": 0.39453125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 751720072, + "step": 4378 + }, + { + "epoch": 49.73937677053824, + "grad_norm": 4.213015836699212, + "learning_rate": 5e-06, + "loss": 0.1556, + "num_input_tokens_seen": 751891104, + "step": 4379 + }, + { + "epoch": 49.73937677053824, + "loss": 0.17926564812660217, + "loss_ce": 0.010137236677110195, + "loss_iou": 0.4453125, + "loss_num": 0.033935546875, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 751891104, + "step": 4379 + }, + { + "epoch": 49.75070821529745, + "grad_norm": 5.515384881179111, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 752061492, + "step": 4380 + }, + { + "epoch": 49.75070821529745, + "loss": 0.05599920079112053, + "loss_ce": 0.000350397516740486, + "loss_iou": 0.5625, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 752061492, + "step": 4380 + }, + { + "epoch": 49.76203966005666, + "grad_norm": 5.509663136072773, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 752233440, + "step": 4381 + }, + { + "epoch": 49.76203966005666, + "loss": 0.10660643875598907, + "loss_ce": 0.004342036787420511, + "loss_iou": 0.5078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 752233440, + "step": 4381 + }, + { + "epoch": 49.773371104815865, + "grad_norm": 4.026326589886566, + "learning_rate": 5e-06, + "loss": 0.1175, + "num_input_tokens_seen": 752405320, + "step": 4382 + }, + { + "epoch": 49.773371104815865, + "loss": 0.15437199175357819, + "loss_ce": 0.0006854669190943241, + "loss_iou": 0.291015625, + "loss_num": 0.03076171875, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 752405320, + "step": 4382 + }, + { + "epoch": 49.78470254957507, + "grad_norm": 3.6386829417651048, + "learning_rate": 5e-06, + "loss": 0.0963, + "num_input_tokens_seen": 752577152, + "step": 4383 + }, + { + "epoch": 49.78470254957507, + "loss": 0.05102023482322693, + "loss_ce": 0.0008188205538317561, + "loss_iou": 0.40234375, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 752577152, + "step": 4383 + }, + { + "epoch": 49.79603399433428, + "grad_norm": 4.207026555063898, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 752745676, + "step": 4384 + }, + { + "epoch": 49.79603399433428, + "loss": 0.0880984216928482, + "loss_ce": 0.0020541041158139706, + "loss_iou": 0.4140625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 752745676, + "step": 4384 + }, + { + "epoch": 49.80736543909349, + "grad_norm": 3.2988619522199736, + "learning_rate": 5e-06, + "loss": 0.0754, + "num_input_tokens_seen": 752916992, + "step": 4385 + }, + { + "epoch": 49.80736543909349, + "loss": 0.10638163983821869, + "loss_ce": 0.0005466811126098037, + "loss_iou": 0.228515625, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 752916992, + "step": 4385 + }, + { + "epoch": 49.81869688385269, + "grad_norm": 4.523919041771532, + "learning_rate": 5e-06, + "loss": 0.1416, + "num_input_tokens_seen": 753087748, + "step": 4386 + }, + { + "epoch": 49.81869688385269, + "loss": 0.07070009410381317, + "loss_ce": 0.001181046012789011, + "loss_iou": 0.5078125, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 753087748, + "step": 4386 + }, + { + "epoch": 49.830028328611895, + "grad_norm": 3.5139723443398476, + "learning_rate": 5e-06, + "loss": 0.0711, + "num_input_tokens_seen": 753259584, + "step": 4387 + }, + { + "epoch": 49.830028328611895, + "loss": 0.03835410624742508, + "loss_ce": 0.0027629798278212547, + "loss_iou": 0.341796875, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 753259584, + "step": 4387 + }, + { + "epoch": 49.8413597733711, + "grad_norm": 3.5661981556230766, + "learning_rate": 5e-06, + "loss": 0.1014, + "num_input_tokens_seen": 753431112, + "step": 4388 + }, + { + "epoch": 49.8413597733711, + "loss": 0.06541304290294647, + "loss_ce": 0.0003800879349000752, + "loss_iou": 0.37890625, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 753431112, + "step": 4388 + }, + { + "epoch": 49.85269121813031, + "grad_norm": 2.855531912963118, + "learning_rate": 5e-06, + "loss": 0.1452, + "num_input_tokens_seen": 753602968, + "step": 4389 + }, + { + "epoch": 49.85269121813031, + "loss": 0.28580358624458313, + "loss_ce": 0.0006778663373552263, + "loss_iou": 0.33203125, + "loss_num": 0.056884765625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 753602968, + "step": 4389 + }, + { + "epoch": 49.86402266288952, + "grad_norm": 5.537335755281551, + "learning_rate": 5e-06, + "loss": 0.08, + "num_input_tokens_seen": 753775036, + "step": 4390 + }, + { + "epoch": 49.86402266288952, + "loss": 0.043919578194618225, + "loss_ce": 0.0005693574785254896, + "loss_iou": 0.275390625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 753775036, + "step": 4390 + }, + { + "epoch": 49.875354107648725, + "grad_norm": 2.817339374794205, + "learning_rate": 5e-06, + "loss": 0.0717, + "num_input_tokens_seen": 753945436, + "step": 4391 + }, + { + "epoch": 49.875354107648725, + "loss": 0.05330030620098114, + "loss_ce": 0.002702165860682726, + "loss_iou": 0.2890625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 753945436, + "step": 4391 + }, + { + "epoch": 49.88668555240793, + "grad_norm": 13.38166640926914, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 754116964, + "step": 4392 + }, + { + "epoch": 49.88668555240793, + "loss": 0.09472432732582092, + "loss_ce": 0.0010658869286999106, + "loss_iou": 0.177734375, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 754116964, + "step": 4392 + }, + { + "epoch": 49.89801699716714, + "grad_norm": 3.241238386214431, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 754288068, + "step": 4393 + }, + { + "epoch": 49.89801699716714, + "loss": 0.1573636829853058, + "loss_ce": 0.0009305835701525211, + "loss_iou": 0.296875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 754288068, + "step": 4393 + }, + { + "epoch": 49.90934844192635, + "grad_norm": 8.678767483883833, + "learning_rate": 5e-06, + "loss": 0.0897, + "num_input_tokens_seen": 754459996, + "step": 4394 + }, + { + "epoch": 49.90934844192635, + "loss": 0.1557530164718628, + "loss_ce": 0.0029972770716995, + "loss_iou": 0.38671875, + "loss_num": 0.030517578125, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 754459996, + "step": 4394 + }, + { + "epoch": 49.920679886685555, + "grad_norm": 3.7155969428994973, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 754631660, + "step": 4395 + }, + { + "epoch": 49.920679886685555, + "loss": 0.060144711285829544, + "loss_ce": 0.0011847498826682568, + "loss_iou": 0.404296875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 754631660, + "step": 4395 + }, + { + "epoch": 49.93201133144476, + "grad_norm": 3.622786070449283, + "learning_rate": 5e-06, + "loss": 0.0724, + "num_input_tokens_seen": 754803276, + "step": 4396 + }, + { + "epoch": 49.93201133144476, + "loss": 0.06250564008951187, + "loss_ce": 0.0029200688004493713, + "loss_iou": 0.435546875, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 754803276, + "step": 4396 + }, + { + "epoch": 49.94334277620396, + "grad_norm": 3.868323404788726, + "learning_rate": 5e-06, + "loss": 0.0673, + "num_input_tokens_seen": 754974148, + "step": 4397 + }, + { + "epoch": 49.94334277620396, + "loss": 0.04745414853096008, + "loss_ce": 0.0037529736291617155, + "loss_iou": 0.1455078125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 754974148, + "step": 4397 + }, + { + "epoch": 49.95467422096317, + "grad_norm": 3.9238463410260773, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 755145496, + "step": 4398 + }, + { + "epoch": 49.95467422096317, + "loss": 0.09703067690134048, + "loss_ce": 0.00090030551655218, + "loss_iou": 0.466796875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 755145496, + "step": 4398 + }, + { + "epoch": 49.96600566572238, + "grad_norm": 29.220471541875742, + "learning_rate": 5e-06, + "loss": 0.0901, + "num_input_tokens_seen": 755317220, + "step": 4399 + }, + { + "epoch": 49.96600566572238, + "loss": 0.10200240463018417, + "loss_ce": 0.003247522981837392, + "loss_iou": 0.349609375, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 755317220, + "step": 4399 + }, + { + "epoch": 49.977337110481585, + "grad_norm": 3.0335491516984656, + "learning_rate": 5e-06, + "loss": 0.0697, + "num_input_tokens_seen": 755488908, + "step": 4400 + }, + { + "epoch": 49.977337110481585, + "loss": 0.10426551103591919, + "loss_ce": 0.004991832189261913, + "loss_iou": 0.26953125, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 755488908, + "step": 4400 + }, + { + "epoch": 49.98866855524079, + "grad_norm": 5.726212766529932, + "learning_rate": 5e-06, + "loss": 0.112, + "num_input_tokens_seen": 755659036, + "step": 4401 + }, + { + "epoch": 49.98866855524079, + "loss": 0.13855154812335968, + "loss_ce": 0.004381020553410053, + "loss_iou": 0.314453125, + "loss_num": 0.02685546875, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 755659036, + "step": 4401 + }, + { + "epoch": 50.0, + "grad_norm": 3.7630255373759285, + "learning_rate": 5e-06, + "loss": 0.1003, + "num_input_tokens_seen": 755830676, + "step": 4402 + }, + { + "epoch": 50.0, + "loss": 0.10462699830532074, + "loss_ce": 0.0004552433965727687, + "loss_iou": 0.4375, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 755830676, + "step": 4402 + }, + { + "epoch": 50.01133144475921, + "grad_norm": 3.3401218698445185, + "learning_rate": 5e-06, + "loss": 0.1056, + "num_input_tokens_seen": 756002576, + "step": 4403 + }, + { + "epoch": 50.01133144475921, + "loss": 0.07208964228630066, + "loss_ce": 0.0009150212281383574, + "loss_iou": 0.427734375, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 756002576, + "step": 4403 + }, + { + "epoch": 50.022662889518415, + "grad_norm": 18.044934729647085, + "learning_rate": 5e-06, + "loss": 0.0709, + "num_input_tokens_seen": 756174064, + "step": 4404 + }, + { + "epoch": 50.022662889518415, + "loss": 0.05712079256772995, + "loss_ce": 0.0008158613927662373, + "loss_iou": 0.49609375, + "loss_num": 0.01123046875, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 756174064, + "step": 4404 + }, + { + "epoch": 50.03399433427762, + "grad_norm": 3.7195738320771667, + "learning_rate": 5e-06, + "loss": 0.0795, + "num_input_tokens_seen": 756346012, + "step": 4405 + }, + { + "epoch": 50.03399433427762, + "loss": 0.06001533195376396, + "loss_ce": 0.0006281236419454217, + "loss_iou": 0.546875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 756346012, + "step": 4405 + }, + { + "epoch": 50.04532577903683, + "grad_norm": 4.137350334768581, + "learning_rate": 5e-06, + "loss": 0.0817, + "num_input_tokens_seen": 756516772, + "step": 4406 + }, + { + "epoch": 50.04532577903683, + "loss": 0.05741478130221367, + "loss_ce": 0.0004689824709203094, + "loss_iou": 0.470703125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 756516772, + "step": 4406 + }, + { + "epoch": 50.05665722379604, + "grad_norm": 3.1581671903973527, + "learning_rate": 5e-06, + "loss": 0.0644, + "num_input_tokens_seen": 756687528, + "step": 4407 + }, + { + "epoch": 50.05665722379604, + "loss": 0.03790707886219025, + "loss_ce": 0.0001415750157320872, + "loss_iou": 0.447265625, + "loss_num": 0.007537841796875, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 756687528, + "step": 4407 + }, + { + "epoch": 50.06798866855524, + "grad_norm": 3.435931554021553, + "learning_rate": 5e-06, + "loss": 0.0778, + "num_input_tokens_seen": 756859136, + "step": 4408 + }, + { + "epoch": 50.06798866855524, + "loss": 0.11430996656417847, + "loss_ce": 0.000509918958414346, + "loss_iou": 0.12353515625, + "loss_num": 0.0228271484375, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 756859136, + "step": 4408 + }, + { + "epoch": 50.079320113314445, + "grad_norm": 42.06504448100528, + "learning_rate": 5e-06, + "loss": 0.0704, + "num_input_tokens_seen": 757031140, + "step": 4409 + }, + { + "epoch": 50.079320113314445, + "loss": 0.13308638334274292, + "loss_ce": 0.0005027586012147367, + "loss_iou": 0.462890625, + "loss_num": 0.0264892578125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 757031140, + "step": 4409 + }, + { + "epoch": 50.09065155807365, + "grad_norm": 4.650183993877952, + "learning_rate": 5e-06, + "loss": 0.0939, + "num_input_tokens_seen": 757203080, + "step": 4410 + }, + { + "epoch": 50.09065155807365, + "loss": 0.12237624078989029, + "loss_ce": 0.004883565474301577, + "loss_iou": 0.435546875, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 757203080, + "step": 4410 + }, + { + "epoch": 50.10198300283286, + "grad_norm": 4.093911039131734, + "learning_rate": 5e-06, + "loss": 0.1013, + "num_input_tokens_seen": 757374752, + "step": 4411 + }, + { + "epoch": 50.10198300283286, + "loss": 0.09824614971876144, + "loss_ce": 0.000773002568166703, + "loss_iou": 0.482421875, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 757374752, + "step": 4411 + }, + { + "epoch": 50.11331444759207, + "grad_norm": 4.103051129307374, + "learning_rate": 5e-06, + "loss": 0.0694, + "num_input_tokens_seen": 757546344, + "step": 4412 + }, + { + "epoch": 50.11331444759207, + "loss": 0.09053158015012741, + "loss_ce": 0.00019954868184868246, + "loss_iou": 0.36328125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 757546344, + "step": 4412 + }, + { + "epoch": 50.124645892351275, + "grad_norm": 4.398128082821813, + "learning_rate": 5e-06, + "loss": 0.109, + "num_input_tokens_seen": 757717780, + "step": 4413 + }, + { + "epoch": 50.124645892351275, + "loss": 0.16719692945480347, + "loss_ce": 0.0008150916546583176, + "loss_iou": 0.271484375, + "loss_num": 0.033203125, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 757717780, + "step": 4413 + }, + { + "epoch": 50.13597733711048, + "grad_norm": 3.9798703964823026, + "learning_rate": 5e-06, + "loss": 0.0875, + "num_input_tokens_seen": 757887744, + "step": 4414 + }, + { + "epoch": 50.13597733711048, + "loss": 0.14836016297340393, + "loss_ce": 0.00016679619147907943, + "loss_iou": 0.42578125, + "loss_num": 0.029541015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 757887744, + "step": 4414 + }, + { + "epoch": 50.14730878186969, + "grad_norm": 3.721527603207931, + "learning_rate": 5e-06, + "loss": 0.0872, + "num_input_tokens_seen": 758058608, + "step": 4415 + }, + { + "epoch": 50.14730878186969, + "loss": 0.11124084889888763, + "loss_ce": 0.0026898211799561977, + "loss_iou": 0.55078125, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 758058608, + "step": 4415 + }, + { + "epoch": 50.1586402266289, + "grad_norm": 3.9387275366860792, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 758229648, + "step": 4416 + }, + { + "epoch": 50.1586402266289, + "loss": 0.07520405948162079, + "loss_ce": 0.0007259119884110987, + "loss_iou": 0.416015625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 758229648, + "step": 4416 + }, + { + "epoch": 50.169971671388105, + "grad_norm": 4.057770523118788, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 758401496, + "step": 4417 + }, + { + "epoch": 50.169971671388105, + "loss": 0.1181391105055809, + "loss_ce": 0.0007990228477865458, + "loss_iou": 0.49609375, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 758401496, + "step": 4417 + }, + { + "epoch": 50.18130311614731, + "grad_norm": 2.160843232749934, + "learning_rate": 5e-06, + "loss": 0.0463, + "num_input_tokens_seen": 758573248, + "step": 4418 + }, + { + "epoch": 50.18130311614731, + "loss": 0.0316898450255394, + "loss_ce": 0.00024148012744262815, + "loss_iou": 0.4375, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 758573248, + "step": 4418 + }, + { + "epoch": 50.19263456090651, + "grad_norm": 3.7319124431378903, + "learning_rate": 5e-06, + "loss": 0.1036, + "num_input_tokens_seen": 758744244, + "step": 4419 + }, + { + "epoch": 50.19263456090651, + "loss": 0.048760753124952316, + "loss_ce": 0.0008634141413494945, + "loss_iou": 0.5703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 758744244, + "step": 4419 + }, + { + "epoch": 50.20396600566572, + "grad_norm": 5.626406562416814, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 758916052, + "step": 4420 + }, + { + "epoch": 50.20396600566572, + "loss": 0.06947594881057739, + "loss_ce": 0.0016506274696439505, + "loss_iou": 0.54296875, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 758916052, + "step": 4420 + }, + { + "epoch": 50.21529745042493, + "grad_norm": 5.012386939099503, + "learning_rate": 5e-06, + "loss": 0.0762, + "num_input_tokens_seen": 759086092, + "step": 4421 + }, + { + "epoch": 50.21529745042493, + "loss": 0.06383313238620758, + "loss_ce": 0.0006922596367076039, + "loss_iou": 0.14453125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 759086092, + "step": 4421 + }, + { + "epoch": 50.226628895184135, + "grad_norm": 4.184912909981786, + "learning_rate": 5e-06, + "loss": 0.0708, + "num_input_tokens_seen": 759257144, + "step": 4422 + }, + { + "epoch": 50.226628895184135, + "loss": 0.061709772795438766, + "loss_ce": 0.0011628975626081228, + "loss_iou": 0.52734375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 759257144, + "step": 4422 + }, + { + "epoch": 50.23796033994334, + "grad_norm": 6.187060024118792, + "learning_rate": 5e-06, + "loss": 0.0788, + "num_input_tokens_seen": 759428836, + "step": 4423 + }, + { + "epoch": 50.23796033994334, + "loss": 0.06169979274272919, + "loss_ce": 5.4287578677758574e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0123291015625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 759428836, + "step": 4423 + }, + { + "epoch": 50.24929178470255, + "grad_norm": 5.0446440281313585, + "learning_rate": 5e-06, + "loss": 0.0976, + "num_input_tokens_seen": 759599648, + "step": 4424 + }, + { + "epoch": 50.24929178470255, + "loss": 0.05307450145483017, + "loss_ce": 0.0038954231422394514, + "loss_iou": 0.2392578125, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 759599648, + "step": 4424 + }, + { + "epoch": 50.26062322946176, + "grad_norm": 4.701344706992798, + "learning_rate": 5e-06, + "loss": 0.0709, + "num_input_tokens_seen": 759771620, + "step": 4425 + }, + { + "epoch": 50.26062322946176, + "loss": 0.08179698139429092, + "loss_ce": 0.0017798923654481769, + "loss_iou": 0.2890625, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 759771620, + "step": 4425 + }, + { + "epoch": 50.271954674220964, + "grad_norm": 4.460396666306289, + "learning_rate": 5e-06, + "loss": 0.0707, + "num_input_tokens_seen": 759943252, + "step": 4426 + }, + { + "epoch": 50.271954674220964, + "loss": 0.058770958334207535, + "loss_ce": 0.001184286898933351, + "loss_iou": 0.458984375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 759943252, + "step": 4426 + }, + { + "epoch": 50.28328611898017, + "grad_norm": 4.170328484870514, + "learning_rate": 5e-06, + "loss": 0.1104, + "num_input_tokens_seen": 760113352, + "step": 4427 + }, + { + "epoch": 50.28328611898017, + "loss": 0.08763612806797028, + "loss_ce": 0.00044740550220012665, + "loss_iou": 0.4609375, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 760113352, + "step": 4427 + }, + { + "epoch": 50.29461756373938, + "grad_norm": 4.403793980424108, + "learning_rate": 5e-06, + "loss": 0.117, + "num_input_tokens_seen": 760284152, + "step": 4428 + }, + { + "epoch": 50.29461756373938, + "loss": 0.21055921912193298, + "loss_ce": 0.0005982845323160291, + "loss_iou": 0.423828125, + "loss_num": 0.0419921875, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 760284152, + "step": 4428 + }, + { + "epoch": 50.30594900849859, + "grad_norm": 4.6892535238443696, + "learning_rate": 5e-06, + "loss": 0.0796, + "num_input_tokens_seen": 760456148, + "step": 4429 + }, + { + "epoch": 50.30594900849859, + "loss": 0.07114341109991074, + "loss_ce": 0.00046470039524137974, + "loss_iou": 0.1982421875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 760456148, + "step": 4429 + }, + { + "epoch": 50.31728045325779, + "grad_norm": 4.225301058257953, + "learning_rate": 5e-06, + "loss": 0.1339, + "num_input_tokens_seen": 760627588, + "step": 4430 + }, + { + "epoch": 50.31728045325779, + "loss": 0.10689513385295868, + "loss_ce": 0.0011517195962369442, + "loss_iou": 0.453125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 760627588, + "step": 4430 + }, + { + "epoch": 50.328611898016995, + "grad_norm": 10.011086101252994, + "learning_rate": 5e-06, + "loss": 0.0626, + "num_input_tokens_seen": 760798556, + "step": 4431 + }, + { + "epoch": 50.328611898016995, + "loss": 0.061522889882326126, + "loss_ce": 0.0006098012090660632, + "loss_iou": 0.3515625, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 760798556, + "step": 4431 + }, + { + "epoch": 50.3399433427762, + "grad_norm": 7.002099777056922, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 760970140, + "step": 4432 + }, + { + "epoch": 50.3399433427762, + "loss": 0.07696007192134857, + "loss_ce": 0.0004677576362155378, + "loss_iou": 0.41796875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 760970140, + "step": 4432 + }, + { + "epoch": 50.35127478753541, + "grad_norm": 4.359764552948906, + "learning_rate": 5e-06, + "loss": 0.1069, + "num_input_tokens_seen": 761141964, + "step": 4433 + }, + { + "epoch": 50.35127478753541, + "loss": 0.11063601076602936, + "loss_ce": 0.0008185015758499503, + "loss_iou": 0.15625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 761141964, + "step": 4433 + }, + { + "epoch": 50.36260623229462, + "grad_norm": 3.1975418134899174, + "learning_rate": 5e-06, + "loss": 0.0689, + "num_input_tokens_seen": 761312964, + "step": 4434 + }, + { + "epoch": 50.36260623229462, + "loss": 0.10568847507238388, + "loss_ce": 0.0009521447354927659, + "loss_iou": 0.392578125, + "loss_num": 0.02099609375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 761312964, + "step": 4434 + }, + { + "epoch": 50.373937677053824, + "grad_norm": 3.7055292500600423, + "learning_rate": 5e-06, + "loss": 0.0977, + "num_input_tokens_seen": 761483964, + "step": 4435 + }, + { + "epoch": 50.373937677053824, + "loss": 0.1450173705816269, + "loss_ce": 0.00024198158644139767, + "loss_iou": 0.44921875, + "loss_num": 0.0289306640625, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 761483964, + "step": 4435 + }, + { + "epoch": 50.38526912181303, + "grad_norm": 3.9033128277895988, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 761654848, + "step": 4436 + }, + { + "epoch": 50.38526912181303, + "loss": 0.05661904066801071, + "loss_ce": 0.0002530761994421482, + "loss_iou": 0.322265625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 761654848, + "step": 4436 + }, + { + "epoch": 50.39660056657224, + "grad_norm": 4.101962729604324, + "learning_rate": 5e-06, + "loss": 0.0651, + "num_input_tokens_seen": 761826036, + "step": 4437 + }, + { + "epoch": 50.39660056657224, + "loss": 0.07162822782993317, + "loss_ce": 0.0005222682375460863, + "loss_iou": 0.26171875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 761826036, + "step": 4437 + }, + { + "epoch": 50.40793201133145, + "grad_norm": 4.069166702192787, + "learning_rate": 5e-06, + "loss": 0.0904, + "num_input_tokens_seen": 761997472, + "step": 4438 + }, + { + "epoch": 50.40793201133145, + "loss": 0.09274718910455704, + "loss_ce": 0.0003399592242203653, + "loss_iou": 0.6015625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 761997472, + "step": 4438 + }, + { + "epoch": 50.419263456090654, + "grad_norm": 4.884899028897731, + "learning_rate": 5e-06, + "loss": 0.0933, + "num_input_tokens_seen": 762168420, + "step": 4439 + }, + { + "epoch": 50.419263456090654, + "loss": 0.12636074423789978, + "loss_ce": 0.0004146971623413265, + "loss_iou": 0.53125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 762168420, + "step": 4439 + }, + { + "epoch": 50.43059490084986, + "grad_norm": 4.508484589620329, + "learning_rate": 5e-06, + "loss": 0.1084, + "num_input_tokens_seen": 762340384, + "step": 4440 + }, + { + "epoch": 50.43059490084986, + "loss": 0.12017138302326202, + "loss_ce": 0.0002983251179102808, + "loss_iou": 0.54296875, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 762340384, + "step": 4440 + }, + { + "epoch": 50.44192634560906, + "grad_norm": 4.6213845222841385, + "learning_rate": 5e-06, + "loss": 0.1093, + "num_input_tokens_seen": 762509820, + "step": 4441 + }, + { + "epoch": 50.44192634560906, + "loss": 0.1878926008939743, + "loss_ce": 0.0003926012432202697, + "loss_iou": 0.3515625, + "loss_num": 0.037353515625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 762509820, + "step": 4441 + }, + { + "epoch": 50.45325779036827, + "grad_norm": 5.225287559911648, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 762681756, + "step": 4442 + }, + { + "epoch": 50.45325779036827, + "loss": 0.052958663552999496, + "loss_ce": 0.00040739180985838175, + "loss_iou": 0.369140625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 762681756, + "step": 4442 + }, + { + "epoch": 50.46458923512748, + "grad_norm": 3.6754381417200497, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 762853912, + "step": 4443 + }, + { + "epoch": 50.46458923512748, + "loss": 0.05573795735836029, + "loss_ce": 0.0003027716011274606, + "loss_iou": 0.6796875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 762853912, + "step": 4443 + }, + { + "epoch": 50.475920679886684, + "grad_norm": 3.8937946679832076, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 763024216, + "step": 4444 + }, + { + "epoch": 50.475920679886684, + "loss": 0.09364482760429382, + "loss_ce": 0.0005204459303058684, + "loss_iou": 0.48046875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 763024216, + "step": 4444 + }, + { + "epoch": 50.48725212464589, + "grad_norm": 4.669908733165509, + "learning_rate": 5e-06, + "loss": 0.0619, + "num_input_tokens_seen": 763196380, + "step": 4445 + }, + { + "epoch": 50.48725212464589, + "loss": 0.04426318407058716, + "loss_ce": 0.000195800224901177, + "loss_iou": 0.328125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 763196380, + "step": 4445 + }, + { + "epoch": 50.4985835694051, + "grad_norm": 4.324062435745885, + "learning_rate": 5e-06, + "loss": 0.1065, + "num_input_tokens_seen": 763365028, + "step": 4446 + }, + { + "epoch": 50.4985835694051, + "loss": 0.14875555038452148, + "loss_ce": 0.0002570173528511077, + "loss_iou": 0.322265625, + "loss_num": 0.02978515625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 763365028, + "step": 4446 + }, + { + "epoch": 50.50991501416431, + "grad_norm": 3.8630831844918543, + "learning_rate": 5e-06, + "loss": 0.0711, + "num_input_tokens_seen": 763536276, + "step": 4447 + }, + { + "epoch": 50.50991501416431, + "loss": 0.09591014683246613, + "loss_ce": 0.001198843470774591, + "loss_iou": 0.41015625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 763536276, + "step": 4447 + }, + { + "epoch": 50.521246458923514, + "grad_norm": 17.249312436005795, + "learning_rate": 5e-06, + "loss": 0.1172, + "num_input_tokens_seen": 763708156, + "step": 4448 + }, + { + "epoch": 50.521246458923514, + "loss": 0.07101692259311676, + "loss_ce": 0.000567091628909111, + "loss_iou": 0.5078125, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 763708156, + "step": 4448 + }, + { + "epoch": 50.53257790368272, + "grad_norm": 3.6634734959179154, + "learning_rate": 5e-06, + "loss": 0.0727, + "num_input_tokens_seen": 763879872, + "step": 4449 + }, + { + "epoch": 50.53257790368272, + "loss": 0.06236252188682556, + "loss_ce": 0.0010527074337005615, + "loss_iou": 0.44921875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 763879872, + "step": 4449 + }, + { + "epoch": 50.54390934844193, + "grad_norm": 3.1649803801412415, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 764051644, + "step": 4450 + }, + { + "epoch": 50.54390934844193, + "loss": 0.11230932921171188, + "loss_ce": 0.00046240483061410487, + "loss_iou": 0.177734375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 764051644, + "step": 4450 + }, + { + "epoch": 50.555240793201136, + "grad_norm": 3.004898679713995, + "learning_rate": 5e-06, + "loss": 0.0718, + "num_input_tokens_seen": 764222032, + "step": 4451 + }, + { + "epoch": 50.555240793201136, + "loss": 0.052467942237854004, + "loss_ce": 0.0002523672883398831, + "loss_iou": 0.4140625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 764222032, + "step": 4451 + }, + { + "epoch": 50.56657223796034, + "grad_norm": 2.8458393998173763, + "learning_rate": 5e-06, + "loss": 0.0571, + "num_input_tokens_seen": 764391780, + "step": 4452 + }, + { + "epoch": 50.56657223796034, + "loss": 0.06604805588722229, + "loss_ce": 0.0003589663829188794, + "loss_iou": 0.384765625, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 764391780, + "step": 4452 + }, + { + "epoch": 50.577903682719544, + "grad_norm": 2.884652573213962, + "learning_rate": 5e-06, + "loss": 0.0706, + "num_input_tokens_seen": 764561460, + "step": 4453 + }, + { + "epoch": 50.577903682719544, + "loss": 0.07057365775108337, + "loss_ce": 0.0003832242509815842, + "loss_iou": 0.4375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 764561460, + "step": 4453 + }, + { + "epoch": 50.58923512747875, + "grad_norm": 2.9240991805472683, + "learning_rate": 5e-06, + "loss": 0.0966, + "num_input_tokens_seen": 764733044, + "step": 4454 + }, + { + "epoch": 50.58923512747875, + "loss": 0.21847650408744812, + "loss_ce": 0.0007335906266234815, + "loss_iou": 0.0966796875, + "loss_num": 0.04345703125, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 764733044, + "step": 4454 + }, + { + "epoch": 50.60056657223796, + "grad_norm": 3.781267890603106, + "learning_rate": 5e-06, + "loss": 0.1074, + "num_input_tokens_seen": 764903636, + "step": 4455 + }, + { + "epoch": 50.60056657223796, + "loss": 0.15405893325805664, + "loss_ce": 0.0006623170920647681, + "loss_iou": 0.314453125, + "loss_num": 0.0306396484375, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 764903636, + "step": 4455 + }, + { + "epoch": 50.611898016997166, + "grad_norm": 5.169681165561143, + "learning_rate": 5e-06, + "loss": 0.0862, + "num_input_tokens_seen": 765075268, + "step": 4456 + }, + { + "epoch": 50.611898016997166, + "loss": 0.07309263944625854, + "loss_ce": 0.0002166681078961119, + "loss_iou": 0.427734375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 765075268, + "step": 4456 + }, + { + "epoch": 50.623229461756374, + "grad_norm": 4.091553070917247, + "learning_rate": 5e-06, + "loss": 0.0887, + "num_input_tokens_seen": 765247104, + "step": 4457 + }, + { + "epoch": 50.623229461756374, + "loss": 0.07224041223526001, + "loss_ce": 0.001317555201239884, + "loss_iou": 0.2177734375, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 765247104, + "step": 4457 + }, + { + "epoch": 50.63456090651558, + "grad_norm": 4.126998992242619, + "learning_rate": 5e-06, + "loss": 0.0759, + "num_input_tokens_seen": 765418476, + "step": 4458 + }, + { + "epoch": 50.63456090651558, + "loss": 0.08577851951122284, + "loss_ce": 0.000420852069510147, + "loss_iou": 0.15234375, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 765418476, + "step": 4458 + }, + { + "epoch": 50.64589235127479, + "grad_norm": 3.720466276255888, + "learning_rate": 5e-06, + "loss": 0.0781, + "num_input_tokens_seen": 765589536, + "step": 4459 + }, + { + "epoch": 50.64589235127479, + "loss": 0.07755723595619202, + "loss_ce": 0.0007444872171618044, + "loss_iou": 0.443359375, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 765589536, + "step": 4459 + }, + { + "epoch": 50.657223796033996, + "grad_norm": 4.2967392019514286, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 765761248, + "step": 4460 + }, + { + "epoch": 50.657223796033996, + "loss": 0.10007596015930176, + "loss_ce": 0.00010038288019131869, + "loss_iou": 0.494140625, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 765761248, + "step": 4460 + }, + { + "epoch": 50.668555240793204, + "grad_norm": 4.097532890372669, + "learning_rate": 5e-06, + "loss": 0.097, + "num_input_tokens_seen": 765931492, + "step": 4461 + }, + { + "epoch": 50.668555240793204, + "loss": 0.09184668213129044, + "loss_ce": 0.0006754158530384302, + "loss_iou": 0.431640625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 765931492, + "step": 4461 + }, + { + "epoch": 50.67988668555241, + "grad_norm": 3.7907628879818263, + "learning_rate": 5e-06, + "loss": 0.0805, + "num_input_tokens_seen": 766103056, + "step": 4462 + }, + { + "epoch": 50.67988668555241, + "loss": 0.04951099306344986, + "loss_ce": 0.0002861374814528972, + "loss_iou": 0.4609375, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 766103056, + "step": 4462 + }, + { + "epoch": 50.69121813031161, + "grad_norm": 3.7076227203087995, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 766274636, + "step": 4463 + }, + { + "epoch": 50.69121813031161, + "loss": 0.07821498066186905, + "loss_ce": 0.00042567052878439426, + "loss_iou": 0.328125, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 766274636, + "step": 4463 + }, + { + "epoch": 50.70254957507082, + "grad_norm": 3.4484857248147853, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 766446340, + "step": 4464 + }, + { + "epoch": 50.70254957507082, + "loss": 0.06428650766611099, + "loss_ce": 0.00032166283926926553, + "loss_iou": 0.455078125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 766446340, + "step": 4464 + }, + { + "epoch": 50.713881019830026, + "grad_norm": 4.003184014867428, + "learning_rate": 5e-06, + "loss": 0.0769, + "num_input_tokens_seen": 766618132, + "step": 4465 + }, + { + "epoch": 50.713881019830026, + "loss": 0.05058712512254715, + "loss_ce": 0.0010494638700038195, + "loss_iou": 0.49609375, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 766618132, + "step": 4465 + }, + { + "epoch": 50.725212464589234, + "grad_norm": 6.23183585450477, + "learning_rate": 5e-06, + "loss": 0.0634, + "num_input_tokens_seen": 766787736, + "step": 4466 + }, + { + "epoch": 50.725212464589234, + "loss": 0.06207390874624252, + "loss_ce": 0.0004131448222324252, + "loss_iou": 0.63671875, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 766787736, + "step": 4466 + }, + { + "epoch": 50.73654390934844, + "grad_norm": 5.782560540058673, + "learning_rate": 5e-06, + "loss": 0.0677, + "num_input_tokens_seen": 766959388, + "step": 4467 + }, + { + "epoch": 50.73654390934844, + "loss": 0.07525616139173508, + "loss_ce": 0.00012188716209493577, + "loss_iou": 0.3359375, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 766959388, + "step": 4467 + }, + { + "epoch": 50.74787535410765, + "grad_norm": 3.6699910332487518, + "learning_rate": 5e-06, + "loss": 0.0685, + "num_input_tokens_seen": 767131616, + "step": 4468 + }, + { + "epoch": 50.74787535410765, + "loss": 0.0930500477552414, + "loss_ce": 0.002481503179296851, + "loss_iou": 0.373046875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 767131616, + "step": 4468 + }, + { + "epoch": 50.759206798866856, + "grad_norm": 5.082677311606937, + "learning_rate": 5e-06, + "loss": 0.0787, + "num_input_tokens_seen": 767302644, + "step": 4469 + }, + { + "epoch": 50.759206798866856, + "loss": 0.06062406674027443, + "loss_ce": 0.003785079112276435, + "loss_iou": 0.44140625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 767302644, + "step": 4469 + }, + { + "epoch": 50.77053824362606, + "grad_norm": 3.469927821657738, + "learning_rate": 5e-06, + "loss": 0.0819, + "num_input_tokens_seen": 767474644, + "step": 4470 + }, + { + "epoch": 50.77053824362606, + "loss": 0.10431724786758423, + "loss_ce": 0.00048118835547938943, + "loss_iou": 0.482421875, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 767474644, + "step": 4470 + }, + { + "epoch": 50.78186968838527, + "grad_norm": 4.251362090542788, + "learning_rate": 5e-06, + "loss": 0.0667, + "num_input_tokens_seen": 767646668, + "step": 4471 + }, + { + "epoch": 50.78186968838527, + "loss": 0.054449379444122314, + "loss_ce": 0.0012267231941223145, + "loss_iou": 0.275390625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 767646668, + "step": 4471 + }, + { + "epoch": 50.79320113314448, + "grad_norm": 3.1657645455821526, + "learning_rate": 5e-06, + "loss": 0.0705, + "num_input_tokens_seen": 767818424, + "step": 4472 + }, + { + "epoch": 50.79320113314448, + "loss": 0.09054841101169586, + "loss_ce": 0.00015533754776697606, + "loss_iou": 0.365234375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 767818424, + "step": 4472 + }, + { + "epoch": 50.804532577903686, + "grad_norm": 18.121377094572424, + "learning_rate": 5e-06, + "loss": 0.078, + "num_input_tokens_seen": 767990356, + "step": 4473 + }, + { + "epoch": 50.804532577903686, + "loss": 0.08168047666549683, + "loss_ce": 0.00038164720172062516, + "loss_iou": 0.60546875, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 767990356, + "step": 4473 + }, + { + "epoch": 50.815864022662886, + "grad_norm": 4.7741616377752685, + "learning_rate": 5e-06, + "loss": 0.0647, + "num_input_tokens_seen": 768159880, + "step": 4474 + }, + { + "epoch": 50.815864022662886, + "loss": 0.0824337899684906, + "loss_ce": 0.00049409712664783, + "loss_iou": 0.453125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 768159880, + "step": 4474 + }, + { + "epoch": 50.827195467422094, + "grad_norm": 3.218696108190223, + "learning_rate": 5e-06, + "loss": 0.0983, + "num_input_tokens_seen": 768331996, + "step": 4475 + }, + { + "epoch": 50.827195467422094, + "loss": 0.1260414570569992, + "loss_ce": 0.0010719744022935629, + "loss_iou": 0.39453125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 768331996, + "step": 4475 + }, + { + "epoch": 50.8385269121813, + "grad_norm": 3.881314956253814, + "learning_rate": 5e-06, + "loss": 0.0746, + "num_input_tokens_seen": 768503540, + "step": 4476 + }, + { + "epoch": 50.8385269121813, + "loss": 0.06960465013980865, + "loss_ce": 0.000299228064250201, + "loss_iou": 0.43359375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 768503540, + "step": 4476 + }, + { + "epoch": 50.84985835694051, + "grad_norm": 7.3627371740088785, + "learning_rate": 5e-06, + "loss": 0.1156, + "num_input_tokens_seen": 768673824, + "step": 4477 + }, + { + "epoch": 50.84985835694051, + "loss": 0.07985567301511765, + "loss_ce": 0.00043367242324166, + "loss_iou": 0.46484375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 768673824, + "step": 4477 + }, + { + "epoch": 50.861189801699716, + "grad_norm": 4.3201353556278015, + "learning_rate": 5e-06, + "loss": 0.0695, + "num_input_tokens_seen": 768844620, + "step": 4478 + }, + { + "epoch": 50.861189801699716, + "loss": 0.06958837807178497, + "loss_ce": 0.0011984816519543529, + "loss_iou": 0.435546875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 768844620, + "step": 4478 + }, + { + "epoch": 50.87252124645892, + "grad_norm": 4.718497359939087, + "learning_rate": 5e-06, + "loss": 0.0687, + "num_input_tokens_seen": 769015160, + "step": 4479 + }, + { + "epoch": 50.87252124645892, + "loss": 0.06399042159318924, + "loss_ce": 0.0010784345213323832, + "loss_iou": 0.54296875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 769015160, + "step": 4479 + }, + { + "epoch": 50.88385269121813, + "grad_norm": 4.0812761461182205, + "learning_rate": 5e-06, + "loss": 0.1113, + "num_input_tokens_seen": 769185676, + "step": 4480 + }, + { + "epoch": 50.88385269121813, + "loss": 0.11874992400407791, + "loss_ce": 0.0010131086455658078, + "loss_iou": 0.36328125, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 769185676, + "step": 4480 + }, + { + "epoch": 50.89518413597734, + "grad_norm": 3.750528923218562, + "learning_rate": 5e-06, + "loss": 0.1024, + "num_input_tokens_seen": 769356776, + "step": 4481 + }, + { + "epoch": 50.89518413597734, + "loss": 0.07799747586250305, + "loss_ce": 0.00039127099444158375, + "loss_iou": 0.37890625, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 769356776, + "step": 4481 + }, + { + "epoch": 50.906515580736546, + "grad_norm": 7.689864593323736, + "learning_rate": 5e-06, + "loss": 0.0854, + "num_input_tokens_seen": 769528424, + "step": 4482 + }, + { + "epoch": 50.906515580736546, + "loss": 0.06997948884963989, + "loss_ce": 6.371815106831491e-05, + "loss_iou": 0.373046875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 769528424, + "step": 4482 + }, + { + "epoch": 50.91784702549575, + "grad_norm": 4.425615390648746, + "learning_rate": 5e-06, + "loss": 0.0792, + "num_input_tokens_seen": 769700304, + "step": 4483 + }, + { + "epoch": 50.91784702549575, + "loss": 0.05891703814268112, + "loss_ce": 0.0003385467571206391, + "loss_iou": 0.390625, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 769700304, + "step": 4483 + }, + { + "epoch": 50.92917847025496, + "grad_norm": 3.4634851903456187, + "learning_rate": 5e-06, + "loss": 0.0978, + "num_input_tokens_seen": 769870796, + "step": 4484 + }, + { + "epoch": 50.92917847025496, + "loss": 0.10744870454072952, + "loss_ce": 0.0008508105529472232, + "loss_iou": 0.48046875, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 769870796, + "step": 4484 + }, + { + "epoch": 50.94050991501416, + "grad_norm": 3.316613958922671, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 770042420, + "step": 4485 + }, + { + "epoch": 50.94050991501416, + "loss": 0.06894853711128235, + "loss_ce": 0.001642022281885147, + "loss_iou": 0.53515625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 770042420, + "step": 4485 + }, + { + "epoch": 50.95184135977337, + "grad_norm": 2.991785197317611, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 770213472, + "step": 4486 + }, + { + "epoch": 50.95184135977337, + "loss": 0.14712882041931152, + "loss_ce": 0.00012564245844259858, + "loss_iou": 0.458984375, + "loss_num": 0.0294189453125, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 770213472, + "step": 4486 + }, + { + "epoch": 50.963172804532576, + "grad_norm": 4.353569033815165, + "learning_rate": 5e-06, + "loss": 0.0844, + "num_input_tokens_seen": 770384272, + "step": 4487 + }, + { + "epoch": 50.963172804532576, + "loss": 0.05455201119184494, + "loss_ce": 0.00015443185111507773, + "loss_iou": 0.427734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 770384272, + "step": 4487 + }, + { + "epoch": 50.97450424929178, + "grad_norm": 5.072560782369086, + "learning_rate": 5e-06, + "loss": 0.066, + "num_input_tokens_seen": 770555772, + "step": 4488 + }, + { + "epoch": 50.97450424929178, + "loss": 0.05929575115442276, + "loss_ce": 0.0007325211772695184, + "loss_iou": 0.546875, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 770555772, + "step": 4488 + }, + { + "epoch": 50.98583569405099, + "grad_norm": 3.98101773270491, + "learning_rate": 5e-06, + "loss": 0.0962, + "num_input_tokens_seen": 770725732, + "step": 4489 + }, + { + "epoch": 50.98583569405099, + "loss": 0.10329563915729523, + "loss_ce": 0.0006650229915976524, + "loss_iou": 0.6640625, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 770725732, + "step": 4489 + }, + { + "epoch": 50.9971671388102, + "grad_norm": 3.543801989019078, + "learning_rate": 5e-06, + "loss": 0.1083, + "num_input_tokens_seen": 770897584, + "step": 4490 + }, + { + "epoch": 50.9971671388102, + "loss": 0.07445745915174484, + "loss_ce": 0.0006354345241561532, + "loss_iou": 0.42578125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 770897584, + "step": 4490 + }, + { + "epoch": 50.9971671388102, + "loss": 0.09697633236646652, + "loss_ce": 0.0008612187812104821, + "loss_iou": 0.431640625, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 770940412, + "step": 4490 + }, + { + "epoch": 51.008498583569406, + "grad_norm": 11.696029333854522, + "learning_rate": 5e-06, + "loss": 0.0933, + "num_input_tokens_seen": 771068864, + "step": 4491 + }, + { + "epoch": 51.008498583569406, + "loss": 0.06903669238090515, + "loss_ce": 0.001249522203579545, + "loss_iou": 0.09765625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 771068864, + "step": 4491 + }, + { + "epoch": 51.01983002832861, + "grad_norm": 2.944264058756464, + "learning_rate": 5e-06, + "loss": 0.0587, + "num_input_tokens_seen": 771240468, + "step": 4492 + }, + { + "epoch": 51.01983002832861, + "loss": 0.07568216323852539, + "loss_ce": 0.00032663423917256296, + "loss_iou": 0.388671875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 771240468, + "step": 4492 + }, + { + "epoch": 51.03116147308782, + "grad_norm": 4.038955340697767, + "learning_rate": 5e-06, + "loss": 0.0787, + "num_input_tokens_seen": 771412288, + "step": 4493 + }, + { + "epoch": 51.03116147308782, + "loss": 0.06470480561256409, + "loss_ce": 0.0003279761876910925, + "loss_iou": 0.419921875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 771412288, + "step": 4493 + }, + { + "epoch": 51.04249291784703, + "grad_norm": 4.118764173577385, + "learning_rate": 5e-06, + "loss": 0.079, + "num_input_tokens_seen": 771581744, + "step": 4494 + }, + { + "epoch": 51.04249291784703, + "loss": 0.06250278651714325, + "loss_ce": 6.38273122604005e-05, + "loss_iou": 0.443359375, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 771581744, + "step": 4494 + }, + { + "epoch": 51.053824362606235, + "grad_norm": 4.094204475591344, + "learning_rate": 5e-06, + "loss": 0.0882, + "num_input_tokens_seen": 771753200, + "step": 4495 + }, + { + "epoch": 51.053824362606235, + "loss": 0.06505470722913742, + "loss_ce": 0.0005100309499539435, + "loss_iou": 0.578125, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 771753200, + "step": 4495 + }, + { + "epoch": 51.065155807365436, + "grad_norm": 3.955992170010095, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 771923520, + "step": 4496 + }, + { + "epoch": 51.065155807365436, + "loss": 0.047629572451114655, + "loss_ce": 0.00025103078223764896, + "loss_iou": 0.5546875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 771923520, + "step": 4496 + }, + { + "epoch": 51.07648725212464, + "grad_norm": 3.9878613105217866, + "learning_rate": 5e-06, + "loss": 0.0819, + "num_input_tokens_seen": 772095020, + "step": 4497 + }, + { + "epoch": 51.07648725212464, + "loss": 0.05086749047040939, + "loss_ce": 0.0003914149710908532, + "loss_iou": 0.5703125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 772095020, + "step": 4497 + }, + { + "epoch": 51.08781869688385, + "grad_norm": 4.2614251442128035, + "learning_rate": 5e-06, + "loss": 0.0695, + "num_input_tokens_seen": 772265236, + "step": 4498 + }, + { + "epoch": 51.08781869688385, + "loss": 0.10085298120975494, + "loss_ce": 0.0005417039501480758, + "loss_iou": 0.236328125, + "loss_num": 0.0201416015625, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 772265236, + "step": 4498 + }, + { + "epoch": 51.09915014164306, + "grad_norm": 4.71628385813899, + "learning_rate": 5e-06, + "loss": 0.057, + "num_input_tokens_seen": 772436780, + "step": 4499 + }, + { + "epoch": 51.09915014164306, + "loss": 0.05079628527164459, + "loss_ce": 0.00028969591949135065, + "loss_iou": 0.6171875, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 772436780, + "step": 4499 + }, + { + "epoch": 51.110481586402265, + "grad_norm": 4.168961813978523, + "learning_rate": 5e-06, + "loss": 0.0902, + "num_input_tokens_seen": 772608204, + "step": 4500 + }, + { + "epoch": 51.110481586402265, + "eval_seeclick_CIoU": 0.5465849936008453, + "eval_seeclick_GIoU": 0.5389738380908966, + "eval_seeclick_IoU": 0.5810232162475586, + "eval_seeclick_MAE_all": 0.06306532770395279, + "eval_seeclick_MAE_h": 0.03525807522237301, + "eval_seeclick_MAE_w": 0.08735432475805283, + "eval_seeclick_MAE_x": 0.0965481661260128, + "eval_seeclick_MAE_y": 0.03310075309127569, + "eval_seeclick_NUM_probability": 0.9998623430728912, + "eval_seeclick_inside_bbox": 0.8920454680919647, + "eval_seeclick_loss": 0.7668238878250122, + "eval_seeclick_loss_ce": 0.5261051058769226, + "eval_seeclick_loss_iou": 0.5706787109375, + "eval_seeclick_loss_num": 0.047580718994140625, + "eval_seeclick_loss_xval": 0.237640380859375, + "eval_seeclick_runtime": 68.9924, + "eval_seeclick_samples_per_second": 0.623, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 772608204, + "step": 4500 + }, + { + "epoch": 51.110481586402265, + "eval_icons_CIoU": 0.7577144801616669, + "eval_icons_GIoU": 0.7579652965068817, + "eval_icons_IoU": 0.7701258361339569, + "eval_icons_MAE_all": 0.032606909051537514, + "eval_icons_MAE_h": 0.031240517273545265, + "eval_icons_MAE_w": 0.03600175678730011, + "eval_icons_MAE_x": 0.028579371981322765, + "eval_icons_MAE_y": 0.03460598364472389, + "eval_icons_NUM_probability": 0.9976474046707153, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.11088309437036514, + "eval_icons_loss_ce": 0.001976564759388566, + "eval_icons_loss_iou": 0.5400390625, + "eval_icons_loss_num": 0.01996612548828125, + "eval_icons_loss_xval": 0.099884033203125, + "eval_icons_runtime": 84.5, + "eval_icons_samples_per_second": 0.592, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 772608204, + "step": 4500 + }, + { + "epoch": 51.110481586402265, + "eval_screenspot_CIoU": 0.6291436950365702, + "eval_screenspot_GIoU": 0.6315093835194906, + "eval_screenspot_IoU": 0.6529751420021057, + "eval_screenspot_MAE_all": 0.06452302634716034, + "eval_screenspot_MAE_h": 0.039918248231212296, + "eval_screenspot_MAE_w": 0.11111702273289363, + "eval_screenspot_MAE_x": 0.0706788773338, + "eval_screenspot_MAE_y": 0.0363779521236817, + "eval_screenspot_NUM_probability": 0.9998601078987122, + "eval_screenspot_inside_bbox": 0.8945833245913187, + "eval_screenspot_loss": 0.2845422625541687, + "eval_screenspot_loss_ce": 0.014367691474035382, + "eval_screenspot_loss_iou": 0.5061848958333334, + "eval_screenspot_loss_num": 0.053690592447916664, + "eval_screenspot_loss_xval": 0.2685139973958333, + "eval_screenspot_runtime": 137.4318, + "eval_screenspot_samples_per_second": 0.648, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 772608204, + "step": 4500 + }, + { + "epoch": 51.110481586402265, + "eval_compot_CIoU": 0.8065867125988007, + "eval_compot_GIoU": 0.8006526827812195, + "eval_compot_IoU": 0.8227367103099823, + "eval_compot_MAE_all": 0.029521239921450615, + "eval_compot_MAE_h": 0.022162611596286297, + "eval_compot_MAE_w": 0.03618418239057064, + "eval_compot_MAE_x": 0.03599496930837631, + "eval_compot_MAE_y": 0.02374320011585951, + "eval_compot_NUM_probability": 0.9999399483203888, + "eval_compot_inside_bbox": 0.9131944477558136, + "eval_compot_loss": 0.09009036421775818, + "eval_compot_loss_ce": 9.890348155749962e-05, + "eval_compot_loss_iou": 0.465087890625, + "eval_compot_loss_num": 0.015417098999023438, + "eval_compot_loss_xval": 0.07709503173828125, + "eval_compot_runtime": 79.8502, + "eval_compot_samples_per_second": 0.626, + "eval_compot_steps_per_second": 0.025, + "num_input_tokens_seen": 772608204, + "step": 4500 + }, + { + "epoch": 51.110481586402265, + "eval_custom_ui_MAE_all": 0.021320768631994724, + "eval_custom_ui_MAE_x": 0.0345577672123909, + "eval_custom_ui_MAE_y": 0.008083771681413054, + "eval_custom_ui_NUM_probability": 0.999765545129776, + "eval_custom_ui_loss": 0.20550864934921265, + "eval_custom_ui_loss_ce": 0.09546193853020668, + "eval_custom_ui_loss_num": 0.0206756591796875, + "eval_custom_ui_loss_xval": 0.1033477783203125, + "eval_custom_ui_runtime": 58.8978, + "eval_custom_ui_samples_per_second": 0.849, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 772608204, + "step": 4500 + }, + { + "epoch": 51.110481586402265, + "loss": 0.24092400074005127, + "loss_ce": 0.11543572694063187, + "loss_iou": 0.0, + "loss_num": 0.025146484375, + "loss_xval": 0.125, + "num_input_tokens_seen": 772608204, + "step": 4500 + }, + { + "epoch": 51.12181303116147, + "grad_norm": 3.0836833964427015, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 772778084, + "step": 4501 + }, + { + "epoch": 51.12181303116147, + "loss": 0.044696711003780365, + "loss_ce": 0.0007666591554880142, + "loss_iou": 0.09765625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 772778084, + "step": 4501 + }, + { + "epoch": 51.13314447592068, + "grad_norm": 3.704876455578892, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 772949496, + "step": 4502 + }, + { + "epoch": 51.13314447592068, + "loss": 0.07046029716730118, + "loss_ce": 0.00042245464283041656, + "loss_iou": 0.27734375, + "loss_num": 0.0140380859375, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 772949496, + "step": 4502 + }, + { + "epoch": 51.14447592067989, + "grad_norm": 3.444899198827256, + "learning_rate": 5e-06, + "loss": 0.0823, + "num_input_tokens_seen": 773119972, + "step": 4503 + }, + { + "epoch": 51.14447592067989, + "loss": 0.040962524712085724, + "loss_ce": 0.00023681898892391473, + "loss_iou": 0.306640625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 773119972, + "step": 4503 + }, + { + "epoch": 51.155807365439095, + "grad_norm": 3.7466415073475305, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 773291744, + "step": 4504 + }, + { + "epoch": 51.155807365439095, + "loss": 0.1048392727971077, + "loss_ce": 0.00040812097722664475, + "loss_iou": 0.408203125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 773291744, + "step": 4504 + }, + { + "epoch": 51.1671388101983, + "grad_norm": 3.0811271862134704, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 773463464, + "step": 4505 + }, + { + "epoch": 51.1671388101983, + "loss": 0.04541376233100891, + "loss_ce": 0.00023248909565154463, + "loss_iou": 0.37890625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 773463464, + "step": 4505 + }, + { + "epoch": 51.17847025495751, + "grad_norm": 3.2550722332963824, + "learning_rate": 5e-06, + "loss": 0.0938, + "num_input_tokens_seen": 773634660, + "step": 4506 + }, + { + "epoch": 51.17847025495751, + "loss": 0.08185526728630066, + "loss_ce": 0.0003122958296444267, + "loss_iou": 0.482421875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 773634660, + "step": 4506 + }, + { + "epoch": 51.18980169971671, + "grad_norm": 4.050945261020096, + "learning_rate": 5e-06, + "loss": 0.0654, + "num_input_tokens_seen": 773804896, + "step": 4507 + }, + { + "epoch": 51.18980169971671, + "loss": 0.06780797243118286, + "loss_ce": 0.00022679413086734712, + "loss_iou": 0.3671875, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 773804896, + "step": 4507 + }, + { + "epoch": 51.20113314447592, + "grad_norm": 4.021506772788571, + "learning_rate": 5e-06, + "loss": 0.0669, + "num_input_tokens_seen": 773976180, + "step": 4508 + }, + { + "epoch": 51.20113314447592, + "loss": 0.059205252677202225, + "loss_ce": 0.00027580931782722473, + "loss_iou": 0.47265625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 773976180, + "step": 4508 + }, + { + "epoch": 51.212464589235125, + "grad_norm": 3.8404061210170335, + "learning_rate": 5e-06, + "loss": 0.0795, + "num_input_tokens_seen": 774147332, + "step": 4509 + }, + { + "epoch": 51.212464589235125, + "loss": 0.10334266722202301, + "loss_ce": 0.0006510112434625626, + "loss_iou": 0.3125, + "loss_num": 0.0205078125, + "loss_xval": 0.1025390625, + "num_input_tokens_seen": 774147332, + "step": 4509 + }, + { + "epoch": 51.22379603399433, + "grad_norm": 3.5631100162159806, + "learning_rate": 5e-06, + "loss": 0.1239, + "num_input_tokens_seen": 774319096, + "step": 4510 + }, + { + "epoch": 51.22379603399433, + "loss": 0.2650887966156006, + "loss_ce": 0.0001657061802688986, + "loss_iou": 0.31640625, + "loss_num": 0.052978515625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 774319096, + "step": 4510 + }, + { + "epoch": 51.23512747875354, + "grad_norm": 3.568186650477262, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 774489288, + "step": 4511 + }, + { + "epoch": 51.23512747875354, + "loss": 0.05329609289765358, + "loss_ce": 0.0003175779711455107, + "loss_iou": 0.380859375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 774489288, + "step": 4511 + }, + { + "epoch": 51.24645892351275, + "grad_norm": 4.063399714688638, + "learning_rate": 5e-06, + "loss": 0.1022, + "num_input_tokens_seen": 774660856, + "step": 4512 + }, + { + "epoch": 51.24645892351275, + "loss": 0.11217355728149414, + "loss_ce": 0.0005097426474094391, + "loss_iou": 0.53125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 774660856, + "step": 4512 + }, + { + "epoch": 51.257790368271955, + "grad_norm": 5.245471536020726, + "learning_rate": 5e-06, + "loss": 0.0786, + "num_input_tokens_seen": 774832468, + "step": 4513 + }, + { + "epoch": 51.257790368271955, + "loss": 0.07258614897727966, + "loss_ce": 0.00027474723174236715, + "loss_iou": 0.50390625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 774832468, + "step": 4513 + }, + { + "epoch": 51.26912181303116, + "grad_norm": 3.388952625301186, + "learning_rate": 5e-06, + "loss": 0.0694, + "num_input_tokens_seen": 775004024, + "step": 4514 + }, + { + "epoch": 51.26912181303116, + "loss": 0.06552299857139587, + "loss_ce": 0.0005663403426297009, + "loss_iou": 0.3984375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 775004024, + "step": 4514 + }, + { + "epoch": 51.28045325779037, + "grad_norm": 3.541188551834626, + "learning_rate": 5e-06, + "loss": 0.0627, + "num_input_tokens_seen": 775176000, + "step": 4515 + }, + { + "epoch": 51.28045325779037, + "loss": 0.039313673973083496, + "loss_ce": 0.00025117231416516006, + "loss_iou": 0.466796875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 775176000, + "step": 4515 + }, + { + "epoch": 51.29178470254958, + "grad_norm": 4.00918964159367, + "learning_rate": 5e-06, + "loss": 0.0726, + "num_input_tokens_seen": 775347296, + "step": 4516 + }, + { + "epoch": 51.29178470254958, + "loss": 0.05828949064016342, + "loss_ce": 0.00027557372231967747, + "loss_iou": 0.60546875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 775347296, + "step": 4516 + }, + { + "epoch": 51.303116147308785, + "grad_norm": 3.9411707848265465, + "learning_rate": 5e-06, + "loss": 0.0873, + "num_input_tokens_seen": 775518092, + "step": 4517 + }, + { + "epoch": 51.303116147308785, + "loss": 0.10470222681760788, + "loss_ce": 0.008114089258015156, + "loss_iou": 0.54296875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 775518092, + "step": 4517 + }, + { + "epoch": 51.314447592067985, + "grad_norm": 3.2787565227625812, + "learning_rate": 5e-06, + "loss": 0.0818, + "num_input_tokens_seen": 775689896, + "step": 4518 + }, + { + "epoch": 51.314447592067985, + "loss": 0.06945203244686127, + "loss_ce": 0.00022290655761025846, + "loss_iou": 0.41015625, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 775689896, + "step": 4518 + }, + { + "epoch": 51.32577903682719, + "grad_norm": 3.592584442269907, + "learning_rate": 5e-06, + "loss": 0.0827, + "num_input_tokens_seen": 775859404, + "step": 4519 + }, + { + "epoch": 51.32577903682719, + "loss": 0.08434545993804932, + "loss_ce": 0.000300046696793288, + "loss_iou": 0.62890625, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 775859404, + "step": 4519 + }, + { + "epoch": 51.3371104815864, + "grad_norm": 3.0736497148178725, + "learning_rate": 5e-06, + "loss": 0.0718, + "num_input_tokens_seen": 776031524, + "step": 4520 + }, + { + "epoch": 51.3371104815864, + "loss": 0.06985723972320557, + "loss_ce": 0.00020086189033463597, + "loss_iou": 0.4375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 776031524, + "step": 4520 + }, + { + "epoch": 51.34844192634561, + "grad_norm": 3.4876066097664657, + "learning_rate": 5e-06, + "loss": 0.0715, + "num_input_tokens_seen": 776203388, + "step": 4521 + }, + { + "epoch": 51.34844192634561, + "loss": 0.09579696506261826, + "loss_ce": 0.001833343063481152, + "loss_iou": 0.5078125, + "loss_num": 0.018798828125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 776203388, + "step": 4521 + }, + { + "epoch": 51.359773371104815, + "grad_norm": 6.06725668325794, + "learning_rate": 5e-06, + "loss": 0.0733, + "num_input_tokens_seen": 776374720, + "step": 4522 + }, + { + "epoch": 51.359773371104815, + "loss": 0.10557658970355988, + "loss_ce": 0.0007639627438038588, + "loss_iou": 0.453125, + "loss_num": 0.02099609375, + "loss_xval": 0.10498046875, + "num_input_tokens_seen": 776374720, + "step": 4522 + }, + { + "epoch": 51.37110481586402, + "grad_norm": 4.304705218594438, + "learning_rate": 5e-06, + "loss": 0.1072, + "num_input_tokens_seen": 776546292, + "step": 4523 + }, + { + "epoch": 51.37110481586402, + "loss": 0.04815925285220146, + "loss_ce": 0.0002619122969917953, + "loss_iou": 0.4140625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 776546292, + "step": 4523 + }, + { + "epoch": 51.38243626062323, + "grad_norm": 2.9298417830938344, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 776717500, + "step": 4524 + }, + { + "epoch": 51.38243626062323, + "loss": 0.05452612787485123, + "loss_ce": 0.00011328543041599914, + "loss_iou": 0.4765625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 776717500, + "step": 4524 + }, + { + "epoch": 51.39376770538244, + "grad_norm": 2.941561889973824, + "learning_rate": 5e-06, + "loss": 0.061, + "num_input_tokens_seen": 776889140, + "step": 4525 + }, + { + "epoch": 51.39376770538244, + "loss": 0.03288891538977623, + "loss_ce": 0.0002808834542520344, + "loss_iou": 0.474609375, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 776889140, + "step": 4525 + }, + { + "epoch": 51.405099150141645, + "grad_norm": 4.11941007446635, + "learning_rate": 5e-06, + "loss": 0.0626, + "num_input_tokens_seen": 777061364, + "step": 4526 + }, + { + "epoch": 51.405099150141645, + "loss": 0.08032729476690292, + "loss_ce": 0.00023391506692860276, + "loss_iou": 0.53125, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 777061364, + "step": 4526 + }, + { + "epoch": 51.41643059490085, + "grad_norm": 5.254428987401125, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 777232936, + "step": 4527 + }, + { + "epoch": 51.41643059490085, + "loss": 0.05055055767297745, + "loss_ce": 0.00045595254050567746, + "loss_iou": 0.6484375, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 777232936, + "step": 4527 + }, + { + "epoch": 51.42776203966006, + "grad_norm": 3.7166516772707, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 777404436, + "step": 4528 + }, + { + "epoch": 51.42776203966006, + "loss": 0.07817244529724121, + "loss_ce": 0.00032210606150329113, + "loss_iou": 0.482421875, + "loss_num": 0.01556396484375, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 777404436, + "step": 4528 + }, + { + "epoch": 51.43909348441926, + "grad_norm": 4.941066003490131, + "learning_rate": 5e-06, + "loss": 0.0701, + "num_input_tokens_seen": 777576300, + "step": 4529 + }, + { + "epoch": 51.43909348441926, + "loss": 0.06884098052978516, + "loss_ce": 0.0003900496813002974, + "loss_iou": 0.3984375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 777576300, + "step": 4529 + }, + { + "epoch": 51.45042492917847, + "grad_norm": 4.477123869474375, + "learning_rate": 5e-06, + "loss": 0.0682, + "num_input_tokens_seen": 777745720, + "step": 4530 + }, + { + "epoch": 51.45042492917847, + "loss": 0.05033246800303459, + "loss_ce": 0.00028363786987029016, + "loss_iou": 0.44921875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 777745720, + "step": 4530 + }, + { + "epoch": 51.461756373937675, + "grad_norm": 3.9073355502327325, + "learning_rate": 5e-06, + "loss": 0.1094, + "num_input_tokens_seen": 777916876, + "step": 4531 + }, + { + "epoch": 51.461756373937675, + "loss": 0.07175743579864502, + "loss_ce": 0.0012160517508164048, + "loss_iou": 0.640625, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 777916876, + "step": 4531 + }, + { + "epoch": 51.47308781869688, + "grad_norm": 4.938120073212937, + "learning_rate": 5e-06, + "loss": 0.0849, + "num_input_tokens_seen": 778088448, + "step": 4532 + }, + { + "epoch": 51.47308781869688, + "loss": 0.04724579676985741, + "loss_ce": 6.56223128316924e-05, + "loss_iou": 0.62109375, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 778088448, + "step": 4532 + }, + { + "epoch": 51.48441926345609, + "grad_norm": 4.323858802474057, + "learning_rate": 5e-06, + "loss": 0.0773, + "num_input_tokens_seen": 778258496, + "step": 4533 + }, + { + "epoch": 51.48441926345609, + "loss": 0.07330875098705292, + "loss_ce": 0.00021915065008215606, + "loss_iou": 0.482421875, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 778258496, + "step": 4533 + }, + { + "epoch": 51.4957507082153, + "grad_norm": 3.755605542475779, + "learning_rate": 5e-06, + "loss": 0.0768, + "num_input_tokens_seen": 778428840, + "step": 4534 + }, + { + "epoch": 51.4957507082153, + "loss": 0.12589004635810852, + "loss_ce": 0.0003102092305198312, + "loss_iou": 0.462890625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 778428840, + "step": 4534 + }, + { + "epoch": 51.507082152974505, + "grad_norm": 8.384106220042213, + "learning_rate": 5e-06, + "loss": 0.0738, + "num_input_tokens_seen": 778600936, + "step": 4535 + }, + { + "epoch": 51.507082152974505, + "loss": 0.039587028324604034, + "loss_ce": 0.00020409331773407757, + "loss_iou": 0.412109375, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 778600936, + "step": 4535 + }, + { + "epoch": 51.51841359773371, + "grad_norm": 3.2738725288189743, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 778769396, + "step": 4536 + }, + { + "epoch": 51.51841359773371, + "loss": 0.04192512109875679, + "loss_ce": 0.0002686251245904714, + "loss_iou": 0.345703125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 778769396, + "step": 4536 + }, + { + "epoch": 51.52974504249292, + "grad_norm": 4.698153922818547, + "learning_rate": 5e-06, + "loss": 0.0816, + "num_input_tokens_seen": 778941060, + "step": 4537 + }, + { + "epoch": 51.52974504249292, + "loss": 0.12478635460138321, + "loss_ce": 0.0004272240912541747, + "loss_iou": 0.3515625, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 778941060, + "step": 4537 + }, + { + "epoch": 51.54107648725213, + "grad_norm": 4.4347826307340155, + "learning_rate": 5e-06, + "loss": 0.1199, + "num_input_tokens_seen": 779112156, + "step": 4538 + }, + { + "epoch": 51.54107648725213, + "loss": 0.06575482338666916, + "loss_ce": 0.00020305917132645845, + "loss_iou": 0.478515625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 779112156, + "step": 4538 + }, + { + "epoch": 51.552407932011334, + "grad_norm": 5.724012122037744, + "learning_rate": 5e-06, + "loss": 0.0953, + "num_input_tokens_seen": 779284308, + "step": 4539 + }, + { + "epoch": 51.552407932011334, + "loss": 0.08919038623571396, + "loss_ce": 0.0003689788281917572, + "loss_iou": 0.65234375, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 779284308, + "step": 4539 + }, + { + "epoch": 51.563739376770535, + "grad_norm": 3.523108598541958, + "learning_rate": 5e-06, + "loss": 0.0817, + "num_input_tokens_seen": 779454308, + "step": 4540 + }, + { + "epoch": 51.563739376770535, + "loss": 0.05216085538268089, + "loss_ce": 0.00020467647118493915, + "loss_iou": 0.37890625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 779454308, + "step": 4540 + }, + { + "epoch": 51.57507082152974, + "grad_norm": 2.788066414864014, + "learning_rate": 5e-06, + "loss": 0.0548, + "num_input_tokens_seen": 779624456, + "step": 4541 + }, + { + "epoch": 51.57507082152974, + "loss": 0.0518072135746479, + "loss_ce": 0.0002935428055934608, + "loss_iou": 0.2353515625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 779624456, + "step": 4541 + }, + { + "epoch": 51.58640226628895, + "grad_norm": 5.1969517557770235, + "learning_rate": 5e-06, + "loss": 0.0604, + "num_input_tokens_seen": 779794708, + "step": 4542 + }, + { + "epoch": 51.58640226628895, + "loss": 0.04793885722756386, + "loss_ce": 0.00030091824010014534, + "loss_iou": 0.3671875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 779794708, + "step": 4542 + }, + { + "epoch": 51.59773371104816, + "grad_norm": 3.967935626798619, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 779966024, + "step": 4543 + }, + { + "epoch": 51.59773371104816, + "loss": 0.17677834630012512, + "loss_ce": 0.00020362407667562366, + "loss_iou": 0.392578125, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 779966024, + "step": 4543 + }, + { + "epoch": 51.609065155807365, + "grad_norm": 3.8957132359016216, + "learning_rate": 5e-06, + "loss": 0.0956, + "num_input_tokens_seen": 780137820, + "step": 4544 + }, + { + "epoch": 51.609065155807365, + "loss": 0.12237225472927094, + "loss_ce": 0.00024091021623462439, + "loss_iou": 0.453125, + "loss_num": 0.0244140625, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 780137820, + "step": 4544 + }, + { + "epoch": 51.62039660056657, + "grad_norm": 3.185600408235008, + "learning_rate": 5e-06, + "loss": 0.0777, + "num_input_tokens_seen": 780309576, + "step": 4545 + }, + { + "epoch": 51.62039660056657, + "loss": 0.05723777785897255, + "loss_ce": 0.0002004253037739545, + "loss_iou": 0.255859375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 780309576, + "step": 4545 + }, + { + "epoch": 51.63172804532578, + "grad_norm": 4.995323500215864, + "learning_rate": 5e-06, + "loss": 0.0714, + "num_input_tokens_seen": 780481248, + "step": 4546 + }, + { + "epoch": 51.63172804532578, + "loss": 0.058180827647447586, + "loss_ce": 0.000288982642814517, + "loss_iou": 0.56640625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 780481248, + "step": 4546 + }, + { + "epoch": 51.64305949008499, + "grad_norm": 4.228666600667304, + "learning_rate": 5e-06, + "loss": 0.0767, + "num_input_tokens_seen": 780653384, + "step": 4547 + }, + { + "epoch": 51.64305949008499, + "loss": 0.057006604969501495, + "loss_ce": 0.00036597985308617353, + "loss_iou": 0.609375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 780653384, + "step": 4547 + }, + { + "epoch": 51.654390934844194, + "grad_norm": 3.747763916070439, + "learning_rate": 5e-06, + "loss": 0.0925, + "num_input_tokens_seen": 780824620, + "step": 4548 + }, + { + "epoch": 51.654390934844194, + "loss": 0.1148025393486023, + "loss_ce": 0.0010635234648361802, + "loss_iou": 0.45703125, + "loss_num": 0.022705078125, + "loss_xval": 0.11376953125, + "num_input_tokens_seen": 780824620, + "step": 4548 + }, + { + "epoch": 51.6657223796034, + "grad_norm": 3.6317036214257423, + "learning_rate": 5e-06, + "loss": 0.098, + "num_input_tokens_seen": 780995548, + "step": 4549 + }, + { + "epoch": 51.6657223796034, + "loss": 0.09171932935714722, + "loss_ce": 0.0002886661095544696, + "loss_iou": 0.4296875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 780995548, + "step": 4549 + }, + { + "epoch": 51.67705382436261, + "grad_norm": 3.916847526133361, + "learning_rate": 5e-06, + "loss": 0.1051, + "num_input_tokens_seen": 781167400, + "step": 4550 + }, + { + "epoch": 51.67705382436261, + "loss": 0.06322573125362396, + "loss_ce": 0.0002984790480695665, + "loss_iou": 0.349609375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 781167400, + "step": 4550 + }, + { + "epoch": 51.68838526912181, + "grad_norm": 3.796101313892827, + "learning_rate": 5e-06, + "loss": 0.0547, + "num_input_tokens_seen": 781338704, + "step": 4551 + }, + { + "epoch": 51.68838526912181, + "loss": 0.04833993688225746, + "loss_ce": 0.0007172580226324499, + "loss_iou": 0.5234375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 781338704, + "step": 4551 + }, + { + "epoch": 51.69971671388102, + "grad_norm": 5.437545814560452, + "learning_rate": 5e-06, + "loss": 0.0547, + "num_input_tokens_seen": 781510388, + "step": 4552 + }, + { + "epoch": 51.69971671388102, + "loss": 0.044218339025974274, + "loss_ce": 0.000517164240591228, + "loss_iou": 0.345703125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 781510388, + "step": 4552 + }, + { + "epoch": 51.711048158640224, + "grad_norm": 5.024072145983963, + "learning_rate": 5e-06, + "loss": 0.0864, + "num_input_tokens_seen": 781682224, + "step": 4553 + }, + { + "epoch": 51.711048158640224, + "loss": 0.0491611622273922, + "loss_ce": 0.0002720020420383662, + "loss_iou": 0.5234375, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 781682224, + "step": 4553 + }, + { + "epoch": 51.72237960339943, + "grad_norm": 3.147355411062974, + "learning_rate": 5e-06, + "loss": 0.0513, + "num_input_tokens_seen": 781854112, + "step": 4554 + }, + { + "epoch": 51.72237960339943, + "loss": 0.04576756805181503, + "loss_ce": 0.00044896139297634363, + "loss_iou": 0.16015625, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 781854112, + "step": 4554 + }, + { + "epoch": 51.73371104815864, + "grad_norm": 4.3830538468801095, + "learning_rate": 5e-06, + "loss": 0.0754, + "num_input_tokens_seen": 782023732, + "step": 4555 + }, + { + "epoch": 51.73371104815864, + "loss": 0.11079318076372147, + "loss_ce": 0.0001822151243686676, + "loss_iou": 0.32421875, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 782023732, + "step": 4555 + }, + { + "epoch": 51.74504249291785, + "grad_norm": 3.1154138772315147, + "learning_rate": 5e-06, + "loss": 0.0518, + "num_input_tokens_seen": 782193224, + "step": 4556 + }, + { + "epoch": 51.74504249291785, + "loss": 0.05634358897805214, + "loss_ce": 0.00032857467886060476, + "loss_iou": 0.306640625, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 782193224, + "step": 4556 + }, + { + "epoch": 51.756373937677054, + "grad_norm": 3.3295835669807796, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 782364844, + "step": 4557 + }, + { + "epoch": 51.756373937677054, + "loss": 0.16247408092021942, + "loss_ce": 0.00027314884937368333, + "loss_iou": 0.431640625, + "loss_num": 0.032470703125, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 782364844, + "step": 4557 + }, + { + "epoch": 51.76770538243626, + "grad_norm": 4.795776728751136, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 782536636, + "step": 4558 + }, + { + "epoch": 51.76770538243626, + "loss": 0.05933544412255287, + "loss_ce": 0.00034496455918997526, + "loss_iou": 0.28125, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 782536636, + "step": 4558 + }, + { + "epoch": 51.77903682719547, + "grad_norm": 3.456902876735184, + "learning_rate": 5e-06, + "loss": 0.0641, + "num_input_tokens_seen": 782708580, + "step": 4559 + }, + { + "epoch": 51.77903682719547, + "loss": 0.07327303290367126, + "loss_ce": 0.00015291766612790525, + "loss_iou": 0.341796875, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 782708580, + "step": 4559 + }, + { + "epoch": 51.79036827195468, + "grad_norm": 3.2982085404406645, + "learning_rate": 5e-06, + "loss": 0.1124, + "num_input_tokens_seen": 782880456, + "step": 4560 + }, + { + "epoch": 51.79036827195468, + "loss": 0.09759436547756195, + "loss_ce": 0.00015173680731095374, + "loss_iou": 0.33203125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 782880456, + "step": 4560 + }, + { + "epoch": 51.801699716713884, + "grad_norm": 3.386624785449222, + "learning_rate": 5e-06, + "loss": 0.0698, + "num_input_tokens_seen": 783052612, + "step": 4561 + }, + { + "epoch": 51.801699716713884, + "loss": 0.05019538849592209, + "loss_ce": 0.00031440702150575817, + "loss_iou": 0.52734375, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 783052612, + "step": 4561 + }, + { + "epoch": 51.81303116147309, + "grad_norm": 3.6256909214450594, + "learning_rate": 5e-06, + "loss": 0.0694, + "num_input_tokens_seen": 783223928, + "step": 4562 + }, + { + "epoch": 51.81303116147309, + "loss": 0.05543501302599907, + "loss_ce": 0.0008543243166059256, + "loss_iou": 0.408203125, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 783223928, + "step": 4562 + }, + { + "epoch": 51.82436260623229, + "grad_norm": 3.43870692204241, + "learning_rate": 5e-06, + "loss": 0.0812, + "num_input_tokens_seen": 783395124, + "step": 4563 + }, + { + "epoch": 51.82436260623229, + "loss": 0.11391442269086838, + "loss_ce": 0.00048058529500849545, + "loss_iou": 0.421875, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 783395124, + "step": 4563 + }, + { + "epoch": 51.8356940509915, + "grad_norm": 3.8587525631730872, + "learning_rate": 5e-06, + "loss": 0.0915, + "num_input_tokens_seen": 783566948, + "step": 4564 + }, + { + "epoch": 51.8356940509915, + "loss": 0.047966137528419495, + "loss_ce": 0.0002824224065989256, + "loss_iou": 0.58203125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 783566948, + "step": 4564 + }, + { + "epoch": 51.84702549575071, + "grad_norm": 5.27205274234332, + "learning_rate": 5e-06, + "loss": 0.1211, + "num_input_tokens_seen": 783736480, + "step": 4565 + }, + { + "epoch": 51.84702549575071, + "loss": 0.17447087168693542, + "loss_ce": 0.0005511859781108797, + "loss_iou": 0.494140625, + "loss_num": 0.03466796875, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 783736480, + "step": 4565 + }, + { + "epoch": 51.858356940509914, + "grad_norm": 3.9523070232026734, + "learning_rate": 5e-06, + "loss": 0.0946, + "num_input_tokens_seen": 783907644, + "step": 4566 + }, + { + "epoch": 51.858356940509914, + "loss": 0.06779085099697113, + "loss_ce": 0.000133380715851672, + "loss_iou": 0.431640625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 783907644, + "step": 4566 + }, + { + "epoch": 51.86968838526912, + "grad_norm": 3.332731983165318, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 784078368, + "step": 4567 + }, + { + "epoch": 51.86968838526912, + "loss": 0.07595255225896835, + "loss_ce": 0.00016214708739425987, + "loss_iou": 0.279296875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 784078368, + "step": 4567 + }, + { + "epoch": 51.88101983002833, + "grad_norm": 2.8011320176660166, + "learning_rate": 5e-06, + "loss": 0.0456, + "num_input_tokens_seen": 784250188, + "step": 4568 + }, + { + "epoch": 51.88101983002833, + "loss": 0.05351635441184044, + "loss_ce": 0.0003547338128555566, + "loss_iou": 0.515625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 784250188, + "step": 4568 + }, + { + "epoch": 51.892351274787536, + "grad_norm": 3.4640752341995116, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 784422232, + "step": 4569 + }, + { + "epoch": 51.892351274787536, + "loss": 0.08177356421947479, + "loss_ce": 0.0004899952909909189, + "loss_iou": 0.609375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 784422232, + "step": 4569 + }, + { + "epoch": 51.903682719546744, + "grad_norm": 3.9566960634442965, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 784593788, + "step": 4570 + }, + { + "epoch": 51.903682719546744, + "loss": 0.03977704420685768, + "loss_ce": 0.0003788497415371239, + "loss_iou": 0.328125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 784593788, + "step": 4570 + }, + { + "epoch": 51.91501416430595, + "grad_norm": 4.176445520410173, + "learning_rate": 5e-06, + "loss": 0.0784, + "num_input_tokens_seen": 784765404, + "step": 4571 + }, + { + "epoch": 51.91501416430595, + "loss": 0.09650145471096039, + "loss_ce": 0.0002490076585672796, + "loss_iou": 0.296875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 784765404, + "step": 4571 + }, + { + "epoch": 51.92634560906516, + "grad_norm": 3.6836944026912506, + "learning_rate": 5e-06, + "loss": 0.0708, + "num_input_tokens_seen": 784937108, + "step": 4572 + }, + { + "epoch": 51.92634560906516, + "loss": 0.10871314257383347, + "loss_ce": 0.00025367323542013764, + "loss_iou": 0.2421875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 784937108, + "step": 4572 + }, + { + "epoch": 51.93767705382436, + "grad_norm": 3.322656807451097, + "learning_rate": 5e-06, + "loss": 0.0779, + "num_input_tokens_seen": 785108972, + "step": 4573 + }, + { + "epoch": 51.93767705382436, + "loss": 0.08650954067707062, + "loss_ce": 0.00020583285368047655, + "loss_iou": 0.328125, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 785108972, + "step": 4573 + }, + { + "epoch": 51.94900849858357, + "grad_norm": 3.626391420245067, + "learning_rate": 5e-06, + "loss": 0.0856, + "num_input_tokens_seen": 785280928, + "step": 4574 + }, + { + "epoch": 51.94900849858357, + "loss": 0.14615091681480408, + "loss_ce": 0.00021584474598057568, + "loss_iou": 0.365234375, + "loss_num": 0.0291748046875, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 785280928, + "step": 4574 + }, + { + "epoch": 51.960339943342774, + "grad_norm": 3.74228068445649, + "learning_rate": 5e-06, + "loss": 0.062, + "num_input_tokens_seen": 785452408, + "step": 4575 + }, + { + "epoch": 51.960339943342774, + "loss": 0.040177762508392334, + "loss_ce": 0.00019973621238023043, + "loss_iou": 0.3515625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 785452408, + "step": 4575 + }, + { + "epoch": 51.97167138810198, + "grad_norm": 4.381000466982566, + "learning_rate": 5e-06, + "loss": 0.0764, + "num_input_tokens_seen": 785623312, + "step": 4576 + }, + { + "epoch": 51.97167138810198, + "loss": 0.08159258216619492, + "loss_ce": 0.000278495776001364, + "loss_iou": 0.45703125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 785623312, + "step": 4576 + }, + { + "epoch": 51.98300283286119, + "grad_norm": 4.443961495423953, + "learning_rate": 5e-06, + "loss": 0.093, + "num_input_tokens_seen": 785795032, + "step": 4577 + }, + { + "epoch": 51.98300283286119, + "loss": 0.06167113408446312, + "loss_ce": 0.0026196218095719814, + "loss_iou": 0.51171875, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 785795032, + "step": 4577 + }, + { + "epoch": 51.994334277620396, + "grad_norm": 5.640772146673305, + "learning_rate": 5e-06, + "loss": 0.1204, + "num_input_tokens_seen": 785964900, + "step": 4578 + }, + { + "epoch": 51.994334277620396, + "loss": 0.1554902344942093, + "loss_ce": 9.472258534515277e-05, + "loss_iou": 0.384765625, + "loss_num": 0.031005859375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 785964900, + "step": 4578 + }, + { + "epoch": 51.994334277620396, + "loss": 0.08137950301170349, + "loss_ce": 0.0002027432492468506, + "loss_iou": 0.2734375, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 786050860, + "step": 4578 + }, + { + "epoch": 52.005665722379604, + "grad_norm": 4.303144423608778, + "learning_rate": 5e-06, + "loss": 0.0804, + "num_input_tokens_seen": 786136924, + "step": 4579 + }, + { + "epoch": 52.005665722379604, + "loss": 0.07516370713710785, + "loss_ce": 0.0003193450393155217, + "loss_iou": 0.404296875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 786136924, + "step": 4579 + }, + { + "epoch": 52.01699716713881, + "grad_norm": 3.921834133399401, + "learning_rate": 5e-06, + "loss": 0.0661, + "num_input_tokens_seen": 786308744, + "step": 4580 + }, + { + "epoch": 52.01699716713881, + "loss": 0.06335844844579697, + "loss_ce": 0.0002175786648876965, + "loss_iou": 0.56640625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 786308744, + "step": 4580 + }, + { + "epoch": 52.02832861189802, + "grad_norm": 5.2965494335021, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 786480196, + "step": 4581 + }, + { + "epoch": 52.02832861189802, + "loss": 0.04880340397357941, + "loss_ce": 0.00014312355779111385, + "loss_iou": 0.40625, + "loss_num": 0.009765625, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 786480196, + "step": 4581 + }, + { + "epoch": 52.039660056657226, + "grad_norm": 3.8913015508272104, + "learning_rate": 5e-06, + "loss": 0.0632, + "num_input_tokens_seen": 786650312, + "step": 4582 + }, + { + "epoch": 52.039660056657226, + "loss": 0.06473094969987869, + "loss_ce": 0.0002167891652788967, + "loss_iou": 0.0, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 786650312, + "step": 4582 + }, + { + "epoch": 52.05099150141643, + "grad_norm": 3.239706300114128, + "learning_rate": 5e-06, + "loss": 0.0634, + "num_input_tokens_seen": 786822288, + "step": 4583 + }, + { + "epoch": 52.05099150141643, + "loss": 0.06472610682249069, + "loss_ce": 0.0001203926804009825, + "loss_iou": 0.42578125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 786822288, + "step": 4583 + }, + { + "epoch": 52.06232294617564, + "grad_norm": 8.126112690511478, + "learning_rate": 5e-06, + "loss": 0.0586, + "num_input_tokens_seen": 786992440, + "step": 4584 + }, + { + "epoch": 52.06232294617564, + "loss": 0.04443705454468727, + "loss_ce": 0.0002476015652064234, + "loss_iou": 0.30078125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 786992440, + "step": 4584 + }, + { + "epoch": 52.07365439093484, + "grad_norm": 2.8604273328543655, + "learning_rate": 5e-06, + "loss": 0.0931, + "num_input_tokens_seen": 787164384, + "step": 4585 + }, + { + "epoch": 52.07365439093484, + "loss": 0.1946127712726593, + "loss_ce": 0.00018528368673287332, + "loss_iou": 0.099609375, + "loss_num": 0.038818359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 787164384, + "step": 4585 + }, + { + "epoch": 52.08498583569405, + "grad_norm": 3.781306563416115, + "learning_rate": 5e-06, + "loss": 0.0536, + "num_input_tokens_seen": 787336264, + "step": 4586 + }, + { + "epoch": 52.08498583569405, + "loss": 0.05890447646379471, + "loss_ce": 0.00020391664293128997, + "loss_iou": 0.359375, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 787336264, + "step": 4586 + }, + { + "epoch": 52.096317280453256, + "grad_norm": 4.026259780403755, + "learning_rate": 5e-06, + "loss": 0.071, + "num_input_tokens_seen": 787507268, + "step": 4587 + }, + { + "epoch": 52.096317280453256, + "loss": 0.06553702801465988, + "loss_ce": 0.0017858067294582725, + "loss_iou": 0.61328125, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 787507268, + "step": 4587 + }, + { + "epoch": 52.107648725212464, + "grad_norm": 4.201046247893455, + "learning_rate": 5e-06, + "loss": 0.0931, + "num_input_tokens_seen": 787677964, + "step": 4588 + }, + { + "epoch": 52.107648725212464, + "loss": 0.12023103982210159, + "loss_ce": 0.00035798695171251893, + "loss_iou": 0.3828125, + "loss_num": 0.0240478515625, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 787677964, + "step": 4588 + }, + { + "epoch": 52.11898016997167, + "grad_norm": 4.302483079826222, + "learning_rate": 5e-06, + "loss": 0.0619, + "num_input_tokens_seen": 787848408, + "step": 4589 + }, + { + "epoch": 52.11898016997167, + "loss": 0.04813995957374573, + "loss_ce": 0.0005783134838566184, + "loss_iou": 0.416015625, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 787848408, + "step": 4589 + }, + { + "epoch": 52.13031161473088, + "grad_norm": 3.9119184280850714, + "learning_rate": 5e-06, + "loss": 0.0719, + "num_input_tokens_seen": 788018700, + "step": 4590 + }, + { + "epoch": 52.13031161473088, + "loss": 0.08638627827167511, + "loss_ce": 0.00031145173124969006, + "loss_iou": 0.447265625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 788018700, + "step": 4590 + }, + { + "epoch": 52.141643059490086, + "grad_norm": 4.143548816134912, + "learning_rate": 5e-06, + "loss": 0.0631, + "num_input_tokens_seen": 788190580, + "step": 4591 + }, + { + "epoch": 52.141643059490086, + "loss": 0.06912362575531006, + "loss_ce": 0.00035226656473241746, + "loss_iou": 0.45703125, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 788190580, + "step": 4591 + }, + { + "epoch": 52.15297450424929, + "grad_norm": 3.4365427981048535, + "learning_rate": 5e-06, + "loss": 0.0815, + "num_input_tokens_seen": 788362272, + "step": 4592 + }, + { + "epoch": 52.15297450424929, + "loss": 0.07606668770313263, + "loss_ce": 0.0003983561182394624, + "loss_iou": 0.494140625, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 788362272, + "step": 4592 + }, + { + "epoch": 52.1643059490085, + "grad_norm": 2.283329325977374, + "learning_rate": 5e-06, + "loss": 0.0692, + "num_input_tokens_seen": 788534284, + "step": 4593 + }, + { + "epoch": 52.1643059490085, + "loss": 0.11275344341993332, + "loss_ce": 0.0007691902574151754, + "loss_iou": 0.333984375, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 788534284, + "step": 4593 + }, + { + "epoch": 52.17563739376771, + "grad_norm": 25.004455015184295, + "learning_rate": 5e-06, + "loss": 0.1291, + "num_input_tokens_seen": 788704576, + "step": 4594 + }, + { + "epoch": 52.17563739376771, + "loss": 0.06830897182226181, + "loss_ce": 0.0002089940244331956, + "loss_iou": 0.45703125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 788704576, + "step": 4594 + }, + { + "epoch": 52.186968838526916, + "grad_norm": 3.933231076233221, + "learning_rate": 5e-06, + "loss": 0.1091, + "num_input_tokens_seen": 788876128, + "step": 4595 + }, + { + "epoch": 52.186968838526916, + "loss": 0.12454564869403839, + "loss_ce": 0.00024756102357059717, + "loss_iou": 0.380859375, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 788876128, + "step": 4595 + }, + { + "epoch": 52.198300283286116, + "grad_norm": 3.882650550469579, + "learning_rate": 5e-06, + "loss": 0.0765, + "num_input_tokens_seen": 789047916, + "step": 4596 + }, + { + "epoch": 52.198300283286116, + "loss": 0.07191193848848343, + "loss_ce": 0.0002566642942838371, + "loss_iou": 0.5078125, + "loss_num": 0.01434326171875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 789047916, + "step": 4596 + }, + { + "epoch": 52.20963172804532, + "grad_norm": 3.8972527625328928, + "learning_rate": 5e-06, + "loss": 0.0709, + "num_input_tokens_seen": 789219720, + "step": 4597 + }, + { + "epoch": 52.20963172804532, + "loss": 0.060532379895448685, + "loss_ce": 0.00015335108037106693, + "loss_iou": 0.1962890625, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 789219720, + "step": 4597 + }, + { + "epoch": 52.22096317280453, + "grad_norm": 6.019367548412412, + "learning_rate": 5e-06, + "loss": 0.058, + "num_input_tokens_seen": 789390824, + "step": 4598 + }, + { + "epoch": 52.22096317280453, + "loss": 0.0595117025077343, + "loss_ce": 0.00018553137488197535, + "loss_iou": 0.23046875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 789390824, + "step": 4598 + }, + { + "epoch": 52.23229461756374, + "grad_norm": 5.295395724390834, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 789561784, + "step": 4599 + }, + { + "epoch": 52.23229461756374, + "loss": 0.07566888630390167, + "loss_ce": 0.00030573003459721804, + "loss_iou": 0.5546875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 789561784, + "step": 4599 + }, + { + "epoch": 52.243626062322946, + "grad_norm": 4.334072642231794, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 789732644, + "step": 4600 + }, + { + "epoch": 52.243626062322946, + "loss": 0.08430413901805878, + "loss_ce": 0.0003502837789710611, + "loss_iou": 0.4609375, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 789732644, + "step": 4600 + }, + { + "epoch": 52.25495750708215, + "grad_norm": 5.6747098431544005, + "learning_rate": 5e-06, + "loss": 0.115, + "num_input_tokens_seen": 789902708, + "step": 4601 + }, + { + "epoch": 52.25495750708215, + "loss": 0.0486568883061409, + "loss_ce": 0.00019497414177749306, + "loss_iou": 0.294921875, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 789902708, + "step": 4601 + }, + { + "epoch": 52.26628895184136, + "grad_norm": 3.0795139691449607, + "learning_rate": 5e-06, + "loss": 0.0859, + "num_input_tokens_seen": 790074916, + "step": 4602 + }, + { + "epoch": 52.26628895184136, + "loss": 0.10147304832935333, + "loss_ce": 0.00018520957382861525, + "loss_iou": 0.2890625, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 790074916, + "step": 4602 + }, + { + "epoch": 52.27762039660057, + "grad_norm": 2.5756460073002128, + "learning_rate": 5e-06, + "loss": 0.0577, + "num_input_tokens_seen": 790244832, + "step": 4603 + }, + { + "epoch": 52.27762039660057, + "loss": 0.04581351578235626, + "loss_ce": 6.766716978745535e-05, + "loss_iou": 0.44921875, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 790244832, + "step": 4603 + }, + { + "epoch": 52.288951841359776, + "grad_norm": 3.059930235493123, + "learning_rate": 5e-06, + "loss": 0.0764, + "num_input_tokens_seen": 790416556, + "step": 4604 + }, + { + "epoch": 52.288951841359776, + "loss": 0.04115888103842735, + "loss_ce": 0.00018903279851656407, + "loss_iou": 0.46484375, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 790416556, + "step": 4604 + }, + { + "epoch": 52.30028328611898, + "grad_norm": 3.311641786260434, + "learning_rate": 5e-06, + "loss": 0.105, + "num_input_tokens_seen": 790585940, + "step": 4605 + }, + { + "epoch": 52.30028328611898, + "loss": 0.044871728867292404, + "loss_ce": 0.0004686516767833382, + "loss_iou": 0.45703125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 790585940, + "step": 4605 + }, + { + "epoch": 52.31161473087819, + "grad_norm": 4.606099171997949, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 790757840, + "step": 4606 + }, + { + "epoch": 52.31161473087819, + "loss": 0.07226390391588211, + "loss_ce": 0.0003950106038246304, + "loss_iou": 0.396484375, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 790757840, + "step": 4606 + }, + { + "epoch": 52.32294617563739, + "grad_norm": 4.691173116312316, + "learning_rate": 5e-06, + "loss": 0.0643, + "num_input_tokens_seen": 790929496, + "step": 4607 + }, + { + "epoch": 52.32294617563739, + "loss": 0.038962364196777344, + "loss_ce": 0.00041866640094667673, + "loss_iou": 0.44140625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 790929496, + "step": 4607 + }, + { + "epoch": 52.3342776203966, + "grad_norm": 5.360875976558649, + "learning_rate": 5e-06, + "loss": 0.116, + "num_input_tokens_seen": 791100772, + "step": 4608 + }, + { + "epoch": 52.3342776203966, + "loss": 0.18516650795936584, + "loss_ce": 0.00044359132880344987, + "loss_iou": 0.384765625, + "loss_num": 0.036865234375, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 791100772, + "step": 4608 + }, + { + "epoch": 52.345609065155806, + "grad_norm": 7.852135784214818, + "learning_rate": 5e-06, + "loss": 0.0797, + "num_input_tokens_seen": 791271060, + "step": 4609 + }, + { + "epoch": 52.345609065155806, + "loss": 0.14633364975452423, + "loss_ce": 0.00012392696226015687, + "loss_iou": 0.4609375, + "loss_num": 0.0291748046875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 791271060, + "step": 4609 + }, + { + "epoch": 52.35694050991501, + "grad_norm": 4.353403091836125, + "learning_rate": 5e-06, + "loss": 0.0721, + "num_input_tokens_seen": 791442700, + "step": 4610 + }, + { + "epoch": 52.35694050991501, + "loss": 0.04589192941784859, + "loss_ce": 0.00013082032091915607, + "loss_iou": 0.47265625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 791442700, + "step": 4610 + }, + { + "epoch": 52.36827195467422, + "grad_norm": 5.7207745324118, + "learning_rate": 5e-06, + "loss": 0.0874, + "num_input_tokens_seen": 791613768, + "step": 4611 + }, + { + "epoch": 52.36827195467422, + "loss": 0.08321557939052582, + "loss_ce": 0.00029931924655102193, + "loss_iou": 0.396484375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 791613768, + "step": 4611 + }, + { + "epoch": 52.37960339943343, + "grad_norm": 3.1196219284948743, + "learning_rate": 5e-06, + "loss": 0.0585, + "num_input_tokens_seen": 791785176, + "step": 4612 + }, + { + "epoch": 52.37960339943343, + "loss": 0.04471583664417267, + "loss_ce": 0.00023646561021450907, + "loss_iou": 0.44921875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 791785176, + "step": 4612 + }, + { + "epoch": 52.390934844192635, + "grad_norm": 3.0644902369411224, + "learning_rate": 5e-06, + "loss": 0.0871, + "num_input_tokens_seen": 791957076, + "step": 4613 + }, + { + "epoch": 52.390934844192635, + "loss": 0.047982584685087204, + "loss_ce": 0.0002836083003785461, + "loss_iou": 0.486328125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 791957076, + "step": 4613 + }, + { + "epoch": 52.40226628895184, + "grad_norm": 3.562853539455721, + "learning_rate": 5e-06, + "loss": 0.078, + "num_input_tokens_seen": 792127860, + "step": 4614 + }, + { + "epoch": 52.40226628895184, + "loss": 0.1062203198671341, + "loss_ce": 0.0005074322107248008, + "loss_iou": 0.51953125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 792127860, + "step": 4614 + }, + { + "epoch": 52.41359773371105, + "grad_norm": 3.8692498889392914, + "learning_rate": 5e-06, + "loss": 0.0827, + "num_input_tokens_seen": 792297432, + "step": 4615 + }, + { + "epoch": 52.41359773371105, + "loss": 0.07992237061262131, + "loss_ce": 0.00027149339439347386, + "loss_iou": 0.5625, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 792297432, + "step": 4615 + }, + { + "epoch": 52.42492917847026, + "grad_norm": 3.234229716298798, + "learning_rate": 5e-06, + "loss": 0.061, + "num_input_tokens_seen": 792469232, + "step": 4616 + }, + { + "epoch": 52.42492917847026, + "loss": 0.04069376736879349, + "loss_ce": 0.00013590327580459416, + "loss_iou": 0.486328125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 792469232, + "step": 4616 + }, + { + "epoch": 52.436260623229465, + "grad_norm": 3.7335701325658466, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 792641164, + "step": 4617 + }, + { + "epoch": 52.436260623229465, + "loss": 0.04785492271184921, + "loss_ce": 0.00017120649863500148, + "loss_iou": 0.484375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 792641164, + "step": 4617 + }, + { + "epoch": 52.447592067988666, + "grad_norm": 4.038699857916988, + "learning_rate": 5e-06, + "loss": 0.1106, + "num_input_tokens_seen": 792812784, + "step": 4618 + }, + { + "epoch": 52.447592067988666, + "loss": 0.19159315526485443, + "loss_ce": 0.0002784566313493997, + "loss_iou": 0.498046875, + "loss_num": 0.038330078125, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 792812784, + "step": 4618 + }, + { + "epoch": 52.45892351274787, + "grad_norm": 10.070736555907908, + "learning_rate": 5e-06, + "loss": 0.1421, + "num_input_tokens_seen": 792983924, + "step": 4619 + }, + { + "epoch": 52.45892351274787, + "loss": 0.10200847685337067, + "loss_ce": 0.00023235470871441066, + "loss_iou": 0.5078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 792983924, + "step": 4619 + }, + { + "epoch": 52.47025495750708, + "grad_norm": 4.21185622423892, + "learning_rate": 5e-06, + "loss": 0.0943, + "num_input_tokens_seen": 793155532, + "step": 4620 + }, + { + "epoch": 52.47025495750708, + "loss": 0.08301263302564621, + "loss_ce": 0.0002184435143135488, + "loss_iou": 0.58984375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 793155532, + "step": 4620 + }, + { + "epoch": 52.48158640226629, + "grad_norm": 3.489045365033888, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 793327388, + "step": 4621 + }, + { + "epoch": 52.48158640226629, + "loss": 0.13099710643291473, + "loss_ce": 9.1952046204824e-05, + "loss_iou": 0.5234375, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 793327388, + "step": 4621 + }, + { + "epoch": 52.492917847025495, + "grad_norm": 3.290680055827727, + "learning_rate": 5e-06, + "loss": 0.0613, + "num_input_tokens_seen": 793499516, + "step": 4622 + }, + { + "epoch": 52.492917847025495, + "loss": 0.06874236464500427, + "loss_ce": 0.0002151418593712151, + "loss_iou": 0.40625, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 793499516, + "step": 4622 + }, + { + "epoch": 52.5042492917847, + "grad_norm": 3.676025515682793, + "learning_rate": 5e-06, + "loss": 0.0747, + "num_input_tokens_seen": 793669372, + "step": 4623 + }, + { + "epoch": 52.5042492917847, + "loss": 0.06475893408060074, + "loss_ce": 0.0003668441786430776, + "loss_iou": 0.625, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 793669372, + "step": 4623 + }, + { + "epoch": 52.51558073654391, + "grad_norm": 4.138553728613425, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 793841280, + "step": 4624 + }, + { + "epoch": 52.51558073654391, + "loss": 0.06110313534736633, + "loss_ce": 0.00012901413720101118, + "loss_iou": 0.486328125, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 793841280, + "step": 4624 + }, + { + "epoch": 52.52691218130312, + "grad_norm": 3.7479659378994348, + "learning_rate": 5e-06, + "loss": 0.1136, + "num_input_tokens_seen": 794012904, + "step": 4625 + }, + { + "epoch": 52.52691218130312, + "loss": 0.15677732229232788, + "loss_ce": 0.00010007420496549457, + "loss_iou": 0.24609375, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 794012904, + "step": 4625 + }, + { + "epoch": 52.538243626062325, + "grad_norm": 3.9256908910914308, + "learning_rate": 5e-06, + "loss": 0.077, + "num_input_tokens_seen": 794182000, + "step": 4626 + }, + { + "epoch": 52.538243626062325, + "loss": 0.04677785187959671, + "loss_ce": 0.00033009928301908076, + "loss_iou": 0.36328125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 794182000, + "step": 4626 + }, + { + "epoch": 52.54957507082153, + "grad_norm": 2.7862732774895265, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 794353776, + "step": 4627 + }, + { + "epoch": 52.54957507082153, + "loss": 0.0538012720644474, + "loss_ce": 0.00022766315669286996, + "loss_iou": 0.49609375, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 794353776, + "step": 4627 + }, + { + "epoch": 52.56090651558074, + "grad_norm": 3.1180152230786446, + "learning_rate": 5e-06, + "loss": 0.076, + "num_input_tokens_seen": 794525756, + "step": 4628 + }, + { + "epoch": 52.56090651558074, + "loss": 0.09337963163852692, + "loss_ce": 0.0001484311796957627, + "loss_iou": 0.376953125, + "loss_num": 0.0186767578125, + "loss_xval": 0.09326171875, + "num_input_tokens_seen": 794525756, + "step": 4628 + }, + { + "epoch": 52.57223796033994, + "grad_norm": 4.366194404392181, + "learning_rate": 5e-06, + "loss": 0.0748, + "num_input_tokens_seen": 794695428, + "step": 4629 + }, + { + "epoch": 52.57223796033994, + "loss": 0.07714550942182541, + "loss_ce": 0.003918584436178207, + "loss_iou": 0.37890625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 794695428, + "step": 4629 + }, + { + "epoch": 52.58356940509915, + "grad_norm": 3.69099901403253, + "learning_rate": 5e-06, + "loss": 0.085, + "num_input_tokens_seen": 794867264, + "step": 4630 + }, + { + "epoch": 52.58356940509915, + "loss": 0.05563777685165405, + "loss_ce": 9.578681056154892e-05, + "loss_iou": 0.51171875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 794867264, + "step": 4630 + }, + { + "epoch": 52.594900849858355, + "grad_norm": 5.17556408270218, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 795039292, + "step": 4631 + }, + { + "epoch": 52.594900849858355, + "loss": 0.060076937079429626, + "loss_ce": 0.00044558741501532495, + "loss_iou": 0.4375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 795039292, + "step": 4631 + }, + { + "epoch": 52.60623229461756, + "grad_norm": 4.477108568470924, + "learning_rate": 5e-06, + "loss": 0.0551, + "num_input_tokens_seen": 795211032, + "step": 4632 + }, + { + "epoch": 52.60623229461756, + "loss": 0.06861227750778198, + "loss_ce": 0.0006954035488888621, + "loss_iou": 0.474609375, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 795211032, + "step": 4632 + }, + { + "epoch": 52.61756373937677, + "grad_norm": 4.047173777819922, + "learning_rate": 5e-06, + "loss": 0.0814, + "num_input_tokens_seen": 795382672, + "step": 4633 + }, + { + "epoch": 52.61756373937677, + "loss": 0.07145858556032181, + "loss_ce": 7.797317812219262e-05, + "loss_iou": 0.59765625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 795382672, + "step": 4633 + }, + { + "epoch": 52.62889518413598, + "grad_norm": 3.823208299584292, + "learning_rate": 5e-06, + "loss": 0.0857, + "num_input_tokens_seen": 795553456, + "step": 4634 + }, + { + "epoch": 52.62889518413598, + "loss": 0.06402266770601273, + "loss_ce": 0.0003019628638867289, + "loss_iou": 0.423828125, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 795553456, + "step": 4634 + }, + { + "epoch": 52.640226628895185, + "grad_norm": 4.516449227754517, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 795722164, + "step": 4635 + }, + { + "epoch": 52.640226628895185, + "loss": 0.05346234142780304, + "loss_ce": 0.00034650013549253345, + "loss_iou": 0.40234375, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 795722164, + "step": 4635 + }, + { + "epoch": 52.65155807365439, + "grad_norm": 3.675764736433771, + "learning_rate": 5e-06, + "loss": 0.0775, + "num_input_tokens_seen": 795893256, + "step": 4636 + }, + { + "epoch": 52.65155807365439, + "loss": 0.07161383330821991, + "loss_ce": 5.011321263737045e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0142822265625, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 795893256, + "step": 4636 + }, + { + "epoch": 52.6628895184136, + "grad_norm": 3.8623752727817084, + "learning_rate": 5e-06, + "loss": 0.0532, + "num_input_tokens_seen": 796065108, + "step": 4637 + }, + { + "epoch": 52.6628895184136, + "loss": 0.050797734409570694, + "loss_ce": 9.277938806917518e-05, + "loss_iou": 0.5859375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 796065108, + "step": 4637 + }, + { + "epoch": 52.67422096317281, + "grad_norm": 3.3647622613023693, + "learning_rate": 5e-06, + "loss": 0.0596, + "num_input_tokens_seen": 796237156, + "step": 4638 + }, + { + "epoch": 52.67422096317281, + "loss": 0.051536764949560165, + "loss_ce": 0.0001146470385720022, + "loss_iou": 0.37109375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 796237156, + "step": 4638 + }, + { + "epoch": 52.685552407932015, + "grad_norm": 3.463757725684601, + "learning_rate": 5e-06, + "loss": 0.0555, + "num_input_tokens_seen": 796408116, + "step": 4639 + }, + { + "epoch": 52.685552407932015, + "loss": 0.07609277963638306, + "loss_ce": 0.00016504891391377896, + "loss_iou": 0.57421875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 796408116, + "step": 4639 + }, + { + "epoch": 52.696883852691215, + "grad_norm": 3.6596971834167484, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 796578348, + "step": 4640 + }, + { + "epoch": 52.696883852691215, + "loss": 0.0635182112455368, + "loss_ce": 0.00037734012585133314, + "loss_iou": 0.390625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 796578348, + "step": 4640 + }, + { + "epoch": 52.70821529745042, + "grad_norm": 3.6799844599966494, + "learning_rate": 5e-06, + "loss": 0.0734, + "num_input_tokens_seen": 796749216, + "step": 4641 + }, + { + "epoch": 52.70821529745042, + "loss": 0.05242349952459335, + "loss_ce": 0.00013162585673853755, + "loss_iou": 0.4453125, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 796749216, + "step": 4641 + }, + { + "epoch": 52.71954674220963, + "grad_norm": 3.100191065797761, + "learning_rate": 5e-06, + "loss": 0.0688, + "num_input_tokens_seen": 796921008, + "step": 4642 + }, + { + "epoch": 52.71954674220963, + "loss": 0.10974981635808945, + "loss_ce": 5.437968502519652e-05, + "loss_iou": 0.51171875, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 796921008, + "step": 4642 + }, + { + "epoch": 52.73087818696884, + "grad_norm": 3.731808353896168, + "learning_rate": 5e-06, + "loss": 0.0577, + "num_input_tokens_seen": 797092816, + "step": 4643 + }, + { + "epoch": 52.73087818696884, + "loss": 0.05236506834626198, + "loss_ce": 0.0005004440317861736, + "loss_iou": 0.455078125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 797092816, + "step": 4643 + }, + { + "epoch": 52.742209631728045, + "grad_norm": 4.16604243397578, + "learning_rate": 5e-06, + "loss": 0.0644, + "num_input_tokens_seen": 797264816, + "step": 4644 + }, + { + "epoch": 52.742209631728045, + "loss": 0.08338037133216858, + "loss_ce": 0.00026574498042464256, + "loss_iou": 0.54296875, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 797264816, + "step": 4644 + }, + { + "epoch": 52.75354107648725, + "grad_norm": 3.3287556650560353, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 797436136, + "step": 4645 + }, + { + "epoch": 52.75354107648725, + "loss": 0.0746200680732727, + "loss_ce": 0.00046235433546826243, + "loss_iou": 0.515625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 797436136, + "step": 4645 + }, + { + "epoch": 52.76487252124646, + "grad_norm": 3.1747378055032045, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 797607076, + "step": 4646 + }, + { + "epoch": 52.76487252124646, + "loss": 0.13902829587459564, + "loss_ce": 0.00012754701310768723, + "loss_iou": 0.375, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 797607076, + "step": 4646 + }, + { + "epoch": 52.77620396600567, + "grad_norm": 3.1970781674124735, + "learning_rate": 5e-06, + "loss": 0.0678, + "num_input_tokens_seen": 797778688, + "step": 4647 + }, + { + "epoch": 52.77620396600567, + "loss": 0.056480772793293, + "loss_ce": 0.0002216164139099419, + "loss_iou": 0.484375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 797778688, + "step": 4647 + }, + { + "epoch": 52.787535410764875, + "grad_norm": 3.6266024786709887, + "learning_rate": 5e-06, + "loss": 0.0485, + "num_input_tokens_seen": 797950376, + "step": 4648 + }, + { + "epoch": 52.787535410764875, + "loss": 0.04782555252313614, + "loss_ce": 8.079847611952573e-05, + "loss_iou": 0.416015625, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 797950376, + "step": 4648 + }, + { + "epoch": 52.79886685552408, + "grad_norm": 3.8045115743989366, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 798121512, + "step": 4649 + }, + { + "epoch": 52.79886685552408, + "loss": 0.03150452673435211, + "loss_ce": 0.00019348980276845396, + "loss_iou": 0.57421875, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 798121512, + "step": 4649 + }, + { + "epoch": 52.81019830028329, + "grad_norm": 9.042861900432891, + "learning_rate": 5e-06, + "loss": 0.0896, + "num_input_tokens_seen": 798293176, + "step": 4650 + }, + { + "epoch": 52.81019830028329, + "loss": 0.0683264285326004, + "loss_ce": 0.00015015466487966478, + "loss_iou": 0.43359375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 798293176, + "step": 4650 + }, + { + "epoch": 52.82152974504249, + "grad_norm": 3.094945140900843, + "learning_rate": 5e-06, + "loss": 0.0741, + "num_input_tokens_seen": 798464928, + "step": 4651 + }, + { + "epoch": 52.82152974504249, + "loss": 0.063038170337677, + "loss_ce": 0.00021773495245724916, + "loss_iou": 0.447265625, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 798464928, + "step": 4651 + }, + { + "epoch": 52.8328611898017, + "grad_norm": 4.842813103805323, + "learning_rate": 5e-06, + "loss": 0.0825, + "num_input_tokens_seen": 798636896, + "step": 4652 + }, + { + "epoch": 52.8328611898017, + "loss": 0.05820214003324509, + "loss_ce": 0.0001271892833756283, + "loss_iou": 0.51953125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 798636896, + "step": 4652 + }, + { + "epoch": 52.844192634560905, + "grad_norm": 3.99634349426944, + "learning_rate": 5e-06, + "loss": 0.0972, + "num_input_tokens_seen": 798808904, + "step": 4653 + }, + { + "epoch": 52.844192634560905, + "loss": 0.12208408862352371, + "loss_ce": 0.000563089910428971, + "loss_iou": 0.53125, + "loss_num": 0.0242919921875, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 798808904, + "step": 4653 + }, + { + "epoch": 52.85552407932011, + "grad_norm": 3.7155214222231163, + "learning_rate": 5e-06, + "loss": 0.1171, + "num_input_tokens_seen": 798980428, + "step": 4654 + }, + { + "epoch": 52.85552407932011, + "loss": 0.1009514182806015, + "loss_ce": 0.0002434083289699629, + "loss_iou": 0.4765625, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 798980428, + "step": 4654 + }, + { + "epoch": 52.86685552407932, + "grad_norm": 3.8699393792472097, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 799151540, + "step": 4655 + }, + { + "epoch": 52.86685552407932, + "loss": 0.03530982509255409, + "loss_ce": 0.00021460725110955536, + "loss_iou": 0.4609375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 799151540, + "step": 4655 + }, + { + "epoch": 52.87818696883853, + "grad_norm": 3.8444299838952127, + "learning_rate": 5e-06, + "loss": 0.1094, + "num_input_tokens_seen": 799322788, + "step": 4656 + }, + { + "epoch": 52.87818696883853, + "loss": 0.06876468658447266, + "loss_ce": 0.00010013507562689483, + "loss_iou": 0.5, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 799322788, + "step": 4656 + }, + { + "epoch": 52.889518413597735, + "grad_norm": 3.245113541170879, + "learning_rate": 5e-06, + "loss": 0.0706, + "num_input_tokens_seen": 799494620, + "step": 4657 + }, + { + "epoch": 52.889518413597735, + "loss": 0.06102427467703819, + "loss_ce": 0.00023325718939304352, + "loss_iou": 0.421875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 799494620, + "step": 4657 + }, + { + "epoch": 52.90084985835694, + "grad_norm": 3.3406453498419255, + "learning_rate": 5e-06, + "loss": 0.1012, + "num_input_tokens_seen": 799666216, + "step": 4658 + }, + { + "epoch": 52.90084985835694, + "loss": 0.1429997831583023, + "loss_ce": 0.00010122729872819036, + "loss_iou": 0.455078125, + "loss_num": 0.028564453125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 799666216, + "step": 4658 + }, + { + "epoch": 52.91218130311615, + "grad_norm": 3.5451157154569657, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 799837884, + "step": 4659 + }, + { + "epoch": 52.91218130311615, + "loss": 0.05081760138273239, + "loss_ce": 0.00031101072090677917, + "loss_iou": 0.30078125, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 799837884, + "step": 4659 + }, + { + "epoch": 52.92351274787536, + "grad_norm": 2.9378663856248286, + "learning_rate": 5e-06, + "loss": 0.0869, + "num_input_tokens_seen": 800008636, + "step": 4660 + }, + { + "epoch": 52.92351274787536, + "loss": 0.09843829274177551, + "loss_ce": 0.000141167503898032, + "loss_iou": 0.310546875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 800008636, + "step": 4660 + }, + { + "epoch": 52.934844192634564, + "grad_norm": 3.0602509644733886, + "learning_rate": 5e-06, + "loss": 0.0653, + "num_input_tokens_seen": 800180580, + "step": 4661 + }, + { + "epoch": 52.934844192634564, + "loss": 0.043142370879650116, + "loss_ce": 0.00021939734870102257, + "loss_iou": 0.498046875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 800180580, + "step": 4661 + }, + { + "epoch": 52.946175637393765, + "grad_norm": 3.2067073291695167, + "learning_rate": 5e-06, + "loss": 0.0601, + "num_input_tokens_seen": 800352152, + "step": 4662 + }, + { + "epoch": 52.946175637393765, + "loss": 0.05809144675731659, + "loss_ce": 7.753477257210761e-05, + "loss_iou": 0.53125, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 800352152, + "step": 4662 + }, + { + "epoch": 52.95750708215297, + "grad_norm": 4.853015698655401, + "learning_rate": 5e-06, + "loss": 0.0982, + "num_input_tokens_seen": 800523876, + "step": 4663 + }, + { + "epoch": 52.95750708215297, + "loss": 0.05939960479736328, + "loss_ce": 0.00021076375560369343, + "loss_iou": 0.2314453125, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 800523876, + "step": 4663 + }, + { + "epoch": 52.96883852691218, + "grad_norm": 3.8007122424660493, + "learning_rate": 5e-06, + "loss": 0.0632, + "num_input_tokens_seen": 800695456, + "step": 4664 + }, + { + "epoch": 52.96883852691218, + "loss": 0.06708081811666489, + "loss_ce": 0.00036939175333827734, + "loss_iou": 0.4140625, + "loss_num": 0.0133056640625, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 800695456, + "step": 4664 + }, + { + "epoch": 52.98016997167139, + "grad_norm": 3.5606164941183156, + "learning_rate": 5e-06, + "loss": 0.0697, + "num_input_tokens_seen": 800865964, + "step": 4665 + }, + { + "epoch": 52.98016997167139, + "loss": 0.04869307577610016, + "loss_ce": 0.00023116446391213685, + "loss_iou": 0.455078125, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 800865964, + "step": 4665 + }, + { + "epoch": 52.991501416430594, + "grad_norm": 5.552846294414013, + "learning_rate": 5e-06, + "loss": 0.0551, + "num_input_tokens_seen": 801037732, + "step": 4666 + }, + { + "epoch": 52.991501416430594, + "loss": 0.04968515783548355, + "loss_ce": 0.00025431145331822336, + "loss_iou": 0.48828125, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 801037732, + "step": 4666 + }, + { + "epoch": 52.991501416430594, + "loss": 0.08807050436735153, + "loss_ce": 0.0001188477617688477, + "loss_iou": 0.05029296875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 801166568, + "step": 4666 + }, + { + "epoch": 53.0028328611898, + "grad_norm": 3.6183639870460675, + "learning_rate": 5e-06, + "loss": 0.1127, + "num_input_tokens_seen": 801209384, + "step": 4667 + }, + { + "epoch": 53.0028328611898, + "loss": 0.1584804654121399, + "loss_ce": 0.00015527110372204334, + "loss_iou": 0.7265625, + "loss_num": 0.03173828125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 801209384, + "step": 4667 + }, + { + "epoch": 53.01416430594901, + "grad_norm": 4.737952616452396, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 801381352, + "step": 4668 + }, + { + "epoch": 53.01416430594901, + "loss": 0.0649605393409729, + "loss_ce": 0.00023275637067854404, + "loss_iou": 0.4140625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 801381352, + "step": 4668 + }, + { + "epoch": 53.02549575070822, + "grad_norm": 5.7467922276591175, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 801551968, + "step": 4669 + }, + { + "epoch": 53.02549575070822, + "loss": 0.11760212481021881, + "loss_ce": 0.00020100100664421916, + "loss_iou": 0.5078125, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 801551968, + "step": 4669 + }, + { + "epoch": 53.036827195467424, + "grad_norm": 6.994592180657559, + "learning_rate": 5e-06, + "loss": 0.055, + "num_input_tokens_seen": 801720804, + "step": 4670 + }, + { + "epoch": 53.036827195467424, + "loss": 0.06153670325875282, + "loss_ce": 0.00019637009245343506, + "loss_iou": 0.484375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 801720804, + "step": 4670 + }, + { + "epoch": 53.04815864022663, + "grad_norm": 3.581475052913095, + "learning_rate": 5e-06, + "loss": 0.0472, + "num_input_tokens_seen": 801892200, + "step": 4671 + }, + { + "epoch": 53.04815864022663, + "loss": 0.04765965789556503, + "loss_ce": 0.000250599579885602, + "loss_iou": 0.384765625, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 801892200, + "step": 4671 + }, + { + "epoch": 53.05949008498584, + "grad_norm": 4.216413776684863, + "learning_rate": 5e-06, + "loss": 0.0511, + "num_input_tokens_seen": 802064272, + "step": 4672 + }, + { + "epoch": 53.05949008498584, + "loss": 0.03942747414112091, + "loss_ce": 9.03129912330769e-05, + "loss_iou": 0.45703125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 802064272, + "step": 4672 + }, + { + "epoch": 53.07082152974504, + "grad_norm": 3.534813613488904, + "learning_rate": 5e-06, + "loss": 0.1207, + "num_input_tokens_seen": 802232532, + "step": 4673 + }, + { + "epoch": 53.07082152974504, + "loss": 0.07966461032629013, + "loss_ce": 0.00016632223560009152, + "loss_iou": 0.1787109375, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 802232532, + "step": 4673 + }, + { + "epoch": 53.08215297450425, + "grad_norm": 3.1714678883847984, + "learning_rate": 5e-06, + "loss": 0.0493, + "num_input_tokens_seen": 802404136, + "step": 4674 + }, + { + "epoch": 53.08215297450425, + "loss": 0.05684254318475723, + "loss_ce": 0.00023243525356519967, + "loss_iou": 0.07421875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 802404136, + "step": 4674 + }, + { + "epoch": 53.093484419263454, + "grad_norm": 2.940926377124737, + "learning_rate": 5e-06, + "loss": 0.067, + "num_input_tokens_seen": 802574764, + "step": 4675 + }, + { + "epoch": 53.093484419263454, + "loss": 0.03710676729679108, + "loss_ce": 0.00036360337981022894, + "loss_iou": 0.435546875, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 802574764, + "step": 4675 + }, + { + "epoch": 53.10481586402266, + "grad_norm": 2.8071621213057893, + "learning_rate": 5e-06, + "loss": 0.0789, + "num_input_tokens_seen": 802746548, + "step": 4676 + }, + { + "epoch": 53.10481586402266, + "loss": 0.039691343903541565, + "loss_ce": 0.00017107937310356647, + "loss_iou": 0.4921875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 802746548, + "step": 4676 + }, + { + "epoch": 53.11614730878187, + "grad_norm": 4.215744155765197, + "learning_rate": 5e-06, + "loss": 0.0716, + "num_input_tokens_seen": 802918028, + "step": 4677 + }, + { + "epoch": 53.11614730878187, + "loss": 0.08283141255378723, + "loss_ce": 0.00018980893946718425, + "loss_iou": 0.330078125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 802918028, + "step": 4677 + }, + { + "epoch": 53.12747875354108, + "grad_norm": 3.748771855538572, + "learning_rate": 5e-06, + "loss": 0.089, + "num_input_tokens_seen": 803089556, + "step": 4678 + }, + { + "epoch": 53.12747875354108, + "loss": 0.05010093003511429, + "loss_ce": 0.00023520737886428833, + "loss_iou": 0.2734375, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 803089556, + "step": 4678 + }, + { + "epoch": 53.138810198300284, + "grad_norm": 3.3817925588098174, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 803261660, + "step": 4679 + }, + { + "epoch": 53.138810198300284, + "loss": 0.0462978333234787, + "loss_ce": 7.896107126725838e-05, + "loss_iou": 0.47265625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 803261660, + "step": 4679 + }, + { + "epoch": 53.15014164305949, + "grad_norm": 3.210287937189868, + "learning_rate": 5e-06, + "loss": 0.0712, + "num_input_tokens_seen": 803431452, + "step": 4680 + }, + { + "epoch": 53.15014164305949, + "loss": 0.03854640573263168, + "loss_ce": 0.00014003329852130264, + "loss_iou": 0.458984375, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 803431452, + "step": 4680 + }, + { + "epoch": 53.1614730878187, + "grad_norm": 3.4720640179637225, + "learning_rate": 5e-06, + "loss": 0.0444, + "num_input_tokens_seen": 803601552, + "step": 4681 + }, + { + "epoch": 53.1614730878187, + "loss": 0.04625174030661583, + "loss_ce": 0.0002159738796763122, + "loss_iou": 0.4609375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 803601552, + "step": 4681 + }, + { + "epoch": 53.172804532577906, + "grad_norm": 3.6961143800626117, + "learning_rate": 5e-06, + "loss": 0.0891, + "num_input_tokens_seen": 803771352, + "step": 4682 + }, + { + "epoch": 53.172804532577906, + "loss": 0.07767904549837112, + "loss_ce": 0.00013387876970227808, + "loss_iou": 0.396484375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 803771352, + "step": 4682 + }, + { + "epoch": 53.184135977337114, + "grad_norm": 3.282811122322535, + "learning_rate": 5e-06, + "loss": 0.0784, + "num_input_tokens_seen": 803943016, + "step": 4683 + }, + { + "epoch": 53.184135977337114, + "loss": 0.06307847797870636, + "loss_ce": 0.0001970052398974076, + "loss_iou": 0.3359375, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 803943016, + "step": 4683 + }, + { + "epoch": 53.195467422096314, + "grad_norm": 5.781785936193978, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 804114688, + "step": 4684 + }, + { + "epoch": 53.195467422096314, + "loss": 0.052209023386240005, + "loss_ce": 0.000100260331237223, + "loss_iou": 0.4921875, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 804114688, + "step": 4684 + }, + { + "epoch": 53.20679886685552, + "grad_norm": 4.025053206139338, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 804286568, + "step": 4685 + }, + { + "epoch": 53.20679886685552, + "loss": 0.03820769861340523, + "loss_ce": 0.00010650143667589873, + "loss_iou": 0.5234375, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 804286568, + "step": 4685 + }, + { + "epoch": 53.21813031161473, + "grad_norm": 3.574836476854667, + "learning_rate": 5e-06, + "loss": 0.0574, + "num_input_tokens_seen": 804458068, + "step": 4686 + }, + { + "epoch": 53.21813031161473, + "loss": 0.07929275184869766, + "loss_ce": 0.0001606728765182197, + "loss_iou": 0.447265625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 804458068, + "step": 4686 + }, + { + "epoch": 53.22946175637394, + "grad_norm": 2.9335493824511785, + "learning_rate": 5e-06, + "loss": 0.0602, + "num_input_tokens_seen": 804629840, + "step": 4687 + }, + { + "epoch": 53.22946175637394, + "loss": 0.033859480172395706, + "loss_ce": 6.88940635882318e-05, + "loss_iou": 0.404296875, + "loss_num": 0.00677490234375, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 804629840, + "step": 4687 + }, + { + "epoch": 53.240793201133144, + "grad_norm": 3.4803271643586617, + "learning_rate": 5e-06, + "loss": 0.1138, + "num_input_tokens_seen": 804801292, + "step": 4688 + }, + { + "epoch": 53.240793201133144, + "loss": 0.21241691708564758, + "loss_ce": 4.5087228500051424e-05, + "loss_iou": 0.390625, + "loss_num": 0.04248046875, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 804801292, + "step": 4688 + }, + { + "epoch": 53.25212464589235, + "grad_norm": 3.339061998052338, + "learning_rate": 5e-06, + "loss": 0.0701, + "num_input_tokens_seen": 804972080, + "step": 4689 + }, + { + "epoch": 53.25212464589235, + "loss": 0.036198146641254425, + "loss_ce": 0.00033999019069597125, + "loss_iou": 0.486328125, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 804972080, + "step": 4689 + }, + { + "epoch": 53.26345609065156, + "grad_norm": 3.4130839081378106, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 805144104, + "step": 4690 + }, + { + "epoch": 53.26345609065156, + "loss": 0.06861278414726257, + "loss_ce": 0.0001466000103391707, + "loss_iou": 0.44921875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 805144104, + "step": 4690 + }, + { + "epoch": 53.274787535410766, + "grad_norm": 4.101209750153936, + "learning_rate": 5e-06, + "loss": 0.0731, + "num_input_tokens_seen": 805316084, + "step": 4691 + }, + { + "epoch": 53.274787535410766, + "loss": 0.09842968732118607, + "loss_ce": 0.00025463569909334183, + "loss_iou": 0.423828125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 805316084, + "step": 4691 + }, + { + "epoch": 53.286118980169974, + "grad_norm": 4.216783410876979, + "learning_rate": 5e-06, + "loss": 0.0822, + "num_input_tokens_seen": 805488052, + "step": 4692 + }, + { + "epoch": 53.286118980169974, + "loss": 0.10868196189403534, + "loss_ce": 0.00013093504821881652, + "loss_iou": 0.4921875, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 805488052, + "step": 4692 + }, + { + "epoch": 53.29745042492918, + "grad_norm": 3.709801538829773, + "learning_rate": 5e-06, + "loss": 0.0893, + "num_input_tokens_seen": 805659744, + "step": 4693 + }, + { + "epoch": 53.29745042492918, + "loss": 0.08964075893163681, + "loss_ce": 7.166880823206156e-05, + "loss_iou": 0.369140625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 805659744, + "step": 4693 + }, + { + "epoch": 53.30878186968839, + "grad_norm": 3.5890266602641114, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 805828452, + "step": 4694 + }, + { + "epoch": 53.30878186968839, + "loss": 0.05880039557814598, + "loss_ce": 5.405877891462296e-05, + "loss_iou": 0.314453125, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 805828452, + "step": 4694 + }, + { + "epoch": 53.32011331444759, + "grad_norm": 3.184967900399177, + "learning_rate": 5e-06, + "loss": 0.0633, + "num_input_tokens_seen": 806000476, + "step": 4695 + }, + { + "epoch": 53.32011331444759, + "loss": 0.05769744887948036, + "loss_ce": 0.00011078018724219874, + "loss_iou": 0.302734375, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 806000476, + "step": 4695 + }, + { + "epoch": 53.331444759206796, + "grad_norm": 3.7570691194562627, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 806172588, + "step": 4696 + }, + { + "epoch": 53.331444759206796, + "loss": 0.10235042870044708, + "loss_ce": 0.0007268953486345708, + "loss_iou": 0.361328125, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 806172588, + "step": 4696 + }, + { + "epoch": 53.342776203966004, + "grad_norm": 3.362923314806938, + "learning_rate": 5e-06, + "loss": 0.054, + "num_input_tokens_seen": 806344556, + "step": 4697 + }, + { + "epoch": 53.342776203966004, + "loss": 0.049817316234111786, + "loss_ce": 0.0001652156061027199, + "loss_iou": 0.5390625, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 806344556, + "step": 4697 + }, + { + "epoch": 53.35410764872521, + "grad_norm": 3.1716614065854, + "learning_rate": 5e-06, + "loss": 0.0533, + "num_input_tokens_seen": 806515664, + "step": 4698 + }, + { + "epoch": 53.35410764872521, + "loss": 0.05348663777112961, + "loss_ce": 0.00017242977628484368, + "loss_iou": 0.34375, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 806515664, + "step": 4698 + }, + { + "epoch": 53.36543909348442, + "grad_norm": 3.115464620621151, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 806686296, + "step": 4699 + }, + { + "epoch": 53.36543909348442, + "loss": 0.08509091287851334, + "loss_ce": 0.0002673053531907499, + "loss_iou": 0.46875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 806686296, + "step": 4699 + }, + { + "epoch": 53.376770538243626, + "grad_norm": 3.920512052086018, + "learning_rate": 5e-06, + "loss": 0.0486, + "num_input_tokens_seen": 806858380, + "step": 4700 + }, + { + "epoch": 53.376770538243626, + "loss": 0.04476987570524216, + "loss_ce": 6.162213685456663e-05, + "loss_iou": 0.4375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 806858380, + "step": 4700 + }, + { + "epoch": 53.388101983002834, + "grad_norm": 3.8555820734061257, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 807030260, + "step": 4701 + }, + { + "epoch": 53.388101983002834, + "loss": 0.043867938220500946, + "loss_ce": 7.52117921365425e-05, + "loss_iou": 0.62109375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 807030260, + "step": 4701 + }, + { + "epoch": 53.39943342776204, + "grad_norm": 2.724184268662446, + "learning_rate": 5e-06, + "loss": 0.045, + "num_input_tokens_seen": 807201372, + "step": 4702 + }, + { + "epoch": 53.39943342776204, + "loss": 0.04772461578249931, + "loss_ce": 0.0002697823219932616, + "loss_iou": 0.28515625, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 807201372, + "step": 4702 + }, + { + "epoch": 53.41076487252125, + "grad_norm": 2.8315897322615924, + "learning_rate": 5e-06, + "loss": 0.0483, + "num_input_tokens_seen": 807372492, + "step": 4703 + }, + { + "epoch": 53.41076487252125, + "loss": 0.03447873890399933, + "loss_ce": 8.542610157746822e-05, + "loss_iou": 0.421875, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 807372492, + "step": 4703 + }, + { + "epoch": 53.422096317280456, + "grad_norm": 5.854905697449762, + "learning_rate": 5e-06, + "loss": 0.0896, + "num_input_tokens_seen": 807543508, + "step": 4704 + }, + { + "epoch": 53.422096317280456, + "loss": 0.11171075701713562, + "loss_ce": 7.745045877527446e-05, + "loss_iou": 0.28125, + "loss_num": 0.0223388671875, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 807543508, + "step": 4704 + }, + { + "epoch": 53.43342776203966, + "grad_norm": 4.204693793117543, + "learning_rate": 5e-06, + "loss": 0.0585, + "num_input_tokens_seen": 807715544, + "step": 4705 + }, + { + "epoch": 53.43342776203966, + "loss": 0.05382070690393448, + "loss_ce": 0.00024709681747481227, + "loss_iou": 0.49609375, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 807715544, + "step": 4705 + }, + { + "epoch": 53.444759206798864, + "grad_norm": 3.9195419395884, + "learning_rate": 5e-06, + "loss": 0.103, + "num_input_tokens_seen": 807887324, + "step": 4706 + }, + { + "epoch": 53.444759206798864, + "loss": 0.061490319669246674, + "loss_ce": 0.00010421065235277638, + "loss_iou": 0.349609375, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 807887324, + "step": 4706 + }, + { + "epoch": 53.45609065155807, + "grad_norm": 4.3423556259341884, + "learning_rate": 5e-06, + "loss": 0.0472, + "num_input_tokens_seen": 808059196, + "step": 4707 + }, + { + "epoch": 53.45609065155807, + "loss": 0.04491560906171799, + "loss_ce": 0.00013106442929711193, + "loss_iou": 0.482421875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 808059196, + "step": 4707 + }, + { + "epoch": 53.46742209631728, + "grad_norm": 3.9545664113392722, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 808230340, + "step": 4708 + }, + { + "epoch": 53.46742209631728, + "loss": 0.07541818171739578, + "loss_ce": 0.00010080445645144209, + "loss_iou": 0.28125, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 808230340, + "step": 4708 + }, + { + "epoch": 53.478753541076486, + "grad_norm": 3.844563904266706, + "learning_rate": 5e-06, + "loss": 0.0567, + "num_input_tokens_seen": 808402108, + "step": 4709 + }, + { + "epoch": 53.478753541076486, + "loss": 0.05846604332327843, + "loss_ce": 0.00014695180288981646, + "loss_iou": 0.3046875, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 808402108, + "step": 4709 + }, + { + "epoch": 53.49008498583569, + "grad_norm": 3.8091888115848085, + "learning_rate": 5e-06, + "loss": 0.0842, + "num_input_tokens_seen": 808573440, + "step": 4710 + }, + { + "epoch": 53.49008498583569, + "loss": 0.03224262222647667, + "loss_ce": 0.0001533899048808962, + "loss_iou": 0.466796875, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 808573440, + "step": 4710 + }, + { + "epoch": 53.5014164305949, + "grad_norm": 3.288774773185425, + "learning_rate": 5e-06, + "loss": 0.0711, + "num_input_tokens_seen": 808745364, + "step": 4711 + }, + { + "epoch": 53.5014164305949, + "loss": 0.0679798498749733, + "loss_ce": 6.297829531831667e-05, + "loss_iou": 0.39453125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 808745364, + "step": 4711 + }, + { + "epoch": 53.51274787535411, + "grad_norm": 3.467909028755271, + "learning_rate": 5e-06, + "loss": 0.0959, + "num_input_tokens_seen": 808916396, + "step": 4712 + }, + { + "epoch": 53.51274787535411, + "loss": 0.15797144174575806, + "loss_ce": 0.0001955716434167698, + "loss_iou": 0.37890625, + "loss_num": 0.031494140625, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 808916396, + "step": 4712 + }, + { + "epoch": 53.524079320113316, + "grad_norm": 3.083361360268773, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 809087332, + "step": 4713 + }, + { + "epoch": 53.524079320113316, + "loss": 0.04382530599832535, + "loss_ce": 3.258429205743596e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 809087332, + "step": 4713 + }, + { + "epoch": 53.53541076487252, + "grad_norm": 3.4923296446901952, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 809258808, + "step": 4714 + }, + { + "epoch": 53.53541076487252, + "loss": 0.05811478942632675, + "loss_ce": 8.561462163925171e-05, + "loss_iou": 0.4375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 809258808, + "step": 4714 + }, + { + "epoch": 53.54674220963173, + "grad_norm": 3.8627771173012437, + "learning_rate": 5e-06, + "loss": 0.083, + "num_input_tokens_seen": 809430676, + "step": 4715 + }, + { + "epoch": 53.54674220963173, + "loss": 0.05545062571763992, + "loss_ce": 9.174172737402841e-05, + "loss_iou": 0.5859375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 809430676, + "step": 4715 + }, + { + "epoch": 53.55807365439094, + "grad_norm": 3.298441864008281, + "learning_rate": 5e-06, + "loss": 0.0466, + "num_input_tokens_seen": 809601628, + "step": 4716 + }, + { + "epoch": 53.55807365439094, + "loss": 0.038653187453746796, + "loss_ce": 0.00030784669797867537, + "loss_iou": 0.294921875, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 809601628, + "step": 4716 + }, + { + "epoch": 53.56940509915014, + "grad_norm": 3.812308088539133, + "learning_rate": 5e-06, + "loss": 0.1122, + "num_input_tokens_seen": 809772656, + "step": 4717 + }, + { + "epoch": 53.56940509915014, + "loss": 0.08617973327636719, + "loss_ce": 5.913038330618292e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 809772656, + "step": 4717 + }, + { + "epoch": 53.580736543909346, + "grad_norm": 2.896232334976172, + "learning_rate": 5e-06, + "loss": 0.0617, + "num_input_tokens_seen": 809944568, + "step": 4718 + }, + { + "epoch": 53.580736543909346, + "loss": 0.030894268304109573, + "loss_ce": 0.0001172898628283292, + "loss_iou": 0.3046875, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 809944568, + "step": 4718 + }, + { + "epoch": 53.59206798866855, + "grad_norm": 3.0005585140462996, + "learning_rate": 5e-06, + "loss": 0.0523, + "num_input_tokens_seen": 810116176, + "step": 4719 + }, + { + "epoch": 53.59206798866855, + "loss": 0.0554022379219532, + "loss_ce": 8.912795601645485e-05, + "loss_iou": 0.50390625, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 810116176, + "step": 4719 + }, + { + "epoch": 53.60339943342776, + "grad_norm": 3.6880261578395483, + "learning_rate": 5e-06, + "loss": 0.0825, + "num_input_tokens_seen": 810285660, + "step": 4720 + }, + { + "epoch": 53.60339943342776, + "loss": 0.1042427197098732, + "loss_ce": 0.00016252268687821925, + "loss_iou": 0.447265625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 810285660, + "step": 4720 + }, + { + "epoch": 53.61473087818697, + "grad_norm": 3.639951122224193, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 810457344, + "step": 4721 + }, + { + "epoch": 53.61473087818697, + "loss": 0.0848560705780983, + "loss_ce": 0.00020030523592140526, + "loss_iou": 0.451171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 810457344, + "step": 4721 + }, + { + "epoch": 53.626062322946176, + "grad_norm": 3.7371813028541077, + "learning_rate": 5e-06, + "loss": 0.0927, + "num_input_tokens_seen": 810628152, + "step": 4722 + }, + { + "epoch": 53.626062322946176, + "loss": 0.09531824290752411, + "loss_ce": 0.00016442826017737389, + "loss_iou": 0.32421875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 810628152, + "step": 4722 + }, + { + "epoch": 53.63739376770538, + "grad_norm": 3.6873669335555217, + "learning_rate": 5e-06, + "loss": 0.0804, + "num_input_tokens_seen": 810799664, + "step": 4723 + }, + { + "epoch": 53.63739376770538, + "loss": 0.08019520342350006, + "loss_ce": 0.00014759181067347527, + "loss_iou": 0.59375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 810799664, + "step": 4723 + }, + { + "epoch": 53.64872521246459, + "grad_norm": 5.009013222489445, + "learning_rate": 5e-06, + "loss": 0.0759, + "num_input_tokens_seen": 810969668, + "step": 4724 + }, + { + "epoch": 53.64872521246459, + "loss": 0.05545923113822937, + "loss_ce": 5.456890357891098e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 810969668, + "step": 4724 + }, + { + "epoch": 53.6600566572238, + "grad_norm": 3.669644026865973, + "learning_rate": 5e-06, + "loss": 0.0786, + "num_input_tokens_seen": 811140488, + "step": 4725 + }, + { + "epoch": 53.6600566572238, + "loss": 0.07152142375707626, + "loss_ce": 0.00035443459637463093, + "loss_iou": 0.6171875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 811140488, + "step": 4725 + }, + { + "epoch": 53.671388101983005, + "grad_norm": 5.499783017178718, + "learning_rate": 5e-06, + "loss": 0.066, + "num_input_tokens_seen": 811311344, + "step": 4726 + }, + { + "epoch": 53.671388101983005, + "loss": 0.06037700176239014, + "loss_ce": 0.0003184127854183316, + "loss_iou": 0.41796875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 811311344, + "step": 4726 + }, + { + "epoch": 53.68271954674221, + "grad_norm": 3.781040851411071, + "learning_rate": 5e-06, + "loss": 0.0785, + "num_input_tokens_seen": 811483052, + "step": 4727 + }, + { + "epoch": 53.68271954674221, + "loss": 0.06411369144916534, + "loss_ce": 7.255301170516759e-05, + "loss_iou": 0.546875, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 811483052, + "step": 4727 + }, + { + "epoch": 53.69405099150141, + "grad_norm": 3.9217344741463758, + "learning_rate": 5e-06, + "loss": 0.0769, + "num_input_tokens_seen": 811655120, + "step": 4728 + }, + { + "epoch": 53.69405099150141, + "loss": 0.1315111666917801, + "loss_ce": 8.722052007215098e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 811655120, + "step": 4728 + }, + { + "epoch": 53.70538243626062, + "grad_norm": 3.7764690095459628, + "learning_rate": 5e-06, + "loss": 0.0649, + "num_input_tokens_seen": 811826176, + "step": 4729 + }, + { + "epoch": 53.70538243626062, + "loss": 0.10992484539747238, + "loss_ce": 0.00016837533621583134, + "loss_iou": 0.416015625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 811826176, + "step": 4729 + }, + { + "epoch": 53.71671388101983, + "grad_norm": 5.493801959527736, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 811996400, + "step": 4730 + }, + { + "epoch": 53.71671388101983, + "loss": 0.07168786227703094, + "loss_ce": 0.00024620932526886463, + "loss_iou": 0.462890625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 811996400, + "step": 4730 + }, + { + "epoch": 53.728045325779036, + "grad_norm": 3.504280176538522, + "learning_rate": 5e-06, + "loss": 0.0646, + "num_input_tokens_seen": 812168080, + "step": 4731 + }, + { + "epoch": 53.728045325779036, + "loss": 0.0738058090209961, + "loss_ce": 9.05952911125496e-05, + "loss_iou": 0.49609375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 812168080, + "step": 4731 + }, + { + "epoch": 53.73937677053824, + "grad_norm": 3.3562273849600746, + "learning_rate": 5e-06, + "loss": 0.0795, + "num_input_tokens_seen": 812339848, + "step": 4732 + }, + { + "epoch": 53.73937677053824, + "loss": 0.0745634213089943, + "loss_ce": 0.0009855427779257298, + "loss_iou": 0.41015625, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 812339848, + "step": 4732 + }, + { + "epoch": 53.75070821529745, + "grad_norm": 2.992186732060685, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 812511904, + "step": 4733 + }, + { + "epoch": 53.75070821529745, + "loss": 0.04918941110372543, + "loss_ce": 0.00022395931591745466, + "loss_iou": 0.17578125, + "loss_num": 0.009765625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 812511904, + "step": 4733 + }, + { + "epoch": 53.76203966005666, + "grad_norm": 3.476255917572014, + "learning_rate": 5e-06, + "loss": 0.0601, + "num_input_tokens_seen": 812683476, + "step": 4734 + }, + { + "epoch": 53.76203966005666, + "loss": 0.040006931871175766, + "loss_ce": 0.0001509758731117472, + "loss_iou": 0.47265625, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 812683476, + "step": 4734 + }, + { + "epoch": 53.773371104815865, + "grad_norm": 4.74225071552281, + "learning_rate": 5e-06, + "loss": 0.0479, + "num_input_tokens_seen": 812855144, + "step": 4735 + }, + { + "epoch": 53.773371104815865, + "loss": 0.058695729821920395, + "loss_ce": 8.671992691233754e-05, + "loss_iou": 0.330078125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 812855144, + "step": 4735 + }, + { + "epoch": 53.78470254957507, + "grad_norm": 4.317753439416477, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 813026816, + "step": 4736 + }, + { + "epoch": 53.78470254957507, + "loss": 0.13734766840934753, + "loss_ce": 7.959892536746338e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0274658203125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 813026816, + "step": 4736 + }, + { + "epoch": 53.79603399433428, + "grad_norm": 4.06933961291216, + "learning_rate": 5e-06, + "loss": 0.0618, + "num_input_tokens_seen": 813198868, + "step": 4737 + }, + { + "epoch": 53.79603399433428, + "loss": 0.06954001635313034, + "loss_ce": 9.726545977173373e-05, + "loss_iou": 0.6640625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 813198868, + "step": 4737 + }, + { + "epoch": 53.80736543909349, + "grad_norm": 4.732088376055132, + "learning_rate": 5e-06, + "loss": 0.0738, + "num_input_tokens_seen": 813369232, + "step": 4738 + }, + { + "epoch": 53.80736543909349, + "loss": 0.09732776880264282, + "loss_ce": 6.825188029324636e-05, + "loss_iou": 0.490234375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 813369232, + "step": 4738 + }, + { + "epoch": 53.81869688385269, + "grad_norm": 4.965105353213792, + "learning_rate": 5e-06, + "loss": 0.0914, + "num_input_tokens_seen": 813540132, + "step": 4739 + }, + { + "epoch": 53.81869688385269, + "loss": 0.06733009219169617, + "loss_ce": 9.986882650991902e-05, + "loss_iou": 0.50390625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 813540132, + "step": 4739 + }, + { + "epoch": 53.830028328611895, + "grad_norm": 3.440272746120115, + "learning_rate": 5e-06, + "loss": 0.1133, + "num_input_tokens_seen": 813710804, + "step": 4740 + }, + { + "epoch": 53.830028328611895, + "loss": 0.13867655396461487, + "loss_ce": 0.00020304112695157528, + "loss_iou": 0.435546875, + "loss_num": 0.0277099609375, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 813710804, + "step": 4740 + }, + { + "epoch": 53.8413597733711, + "grad_norm": 3.241899024244398, + "learning_rate": 5e-06, + "loss": 0.0793, + "num_input_tokens_seen": 813881780, + "step": 4741 + }, + { + "epoch": 53.8413597733711, + "loss": 0.04989277571439743, + "loss_ce": 2.7049361960962415e-05, + "loss_iou": 0.48046875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 813881780, + "step": 4741 + }, + { + "epoch": 53.85269121813031, + "grad_norm": 3.3182526774776653, + "learning_rate": 5e-06, + "loss": 0.0727, + "num_input_tokens_seen": 814053592, + "step": 4742 + }, + { + "epoch": 53.85269121813031, + "loss": 0.12495151907205582, + "loss_ce": 8.885106217348948e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 814053592, + "step": 4742 + }, + { + "epoch": 53.86402266288952, + "grad_norm": 3.300922566199744, + "learning_rate": 5e-06, + "loss": 0.1011, + "num_input_tokens_seen": 814225508, + "step": 4743 + }, + { + "epoch": 53.86402266288952, + "loss": 0.1144113689661026, + "loss_ce": 4.674070078181103e-05, + "loss_iou": 0.357421875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 814225508, + "step": 4743 + }, + { + "epoch": 53.875354107648725, + "grad_norm": 6.433502017316352, + "learning_rate": 5e-06, + "loss": 0.0799, + "num_input_tokens_seen": 814395736, + "step": 4744 + }, + { + "epoch": 53.875354107648725, + "loss": 0.10933315008878708, + "loss_ce": 0.00015652200090698898, + "loss_iou": 0.443359375, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 814395736, + "step": 4744 + }, + { + "epoch": 53.88668555240793, + "grad_norm": 3.1704300427332606, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 814567484, + "step": 4745 + }, + { + "epoch": 53.88668555240793, + "loss": 0.044446833431720734, + "loss_ce": 7.427643140545115e-05, + "loss_iou": 0.490234375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 814567484, + "step": 4745 + }, + { + "epoch": 53.89801699716714, + "grad_norm": 3.0519088979500175, + "learning_rate": 5e-06, + "loss": 0.0564, + "num_input_tokens_seen": 814739064, + "step": 4746 + }, + { + "epoch": 53.89801699716714, + "loss": 0.04025079309940338, + "loss_ce": 0.00012017632980132475, + "loss_iou": 0.181640625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 814739064, + "step": 4746 + }, + { + "epoch": 53.90934844192635, + "grad_norm": 2.7669753392707284, + "learning_rate": 5e-06, + "loss": 0.0643, + "num_input_tokens_seen": 814909564, + "step": 4747 + }, + { + "epoch": 53.90934844192635, + "loss": 0.05583912879228592, + "loss_ce": 8.351239375770092e-05, + "loss_iou": 0.5625, + "loss_num": 0.01116943359375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 814909564, + "step": 4747 + }, + { + "epoch": 53.920679886685555, + "grad_norm": 3.690798693609719, + "learning_rate": 5e-06, + "loss": 0.0601, + "num_input_tokens_seen": 815080640, + "step": 4748 + }, + { + "epoch": 53.920679886685555, + "loss": 0.044106900691986084, + "loss_ce": 0.00011581404396565631, + "loss_iou": 0.546875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 815080640, + "step": 4748 + }, + { + "epoch": 53.93201133144476, + "grad_norm": 3.709204693984996, + "learning_rate": 5e-06, + "loss": 0.0739, + "num_input_tokens_seen": 815252568, + "step": 4749 + }, + { + "epoch": 53.93201133144476, + "loss": 0.08503993600606918, + "loss_ce": 0.00017055007629096508, + "loss_iou": 0.52734375, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 815252568, + "step": 4749 + }, + { + "epoch": 53.94334277620396, + "grad_norm": 3.6783407282604093, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 815424420, + "step": 4750 + }, + { + "epoch": 53.94334277620396, + "eval_seeclick_CIoU": 0.5522235333919525, + "eval_seeclick_GIoU": 0.5461505651473999, + "eval_seeclick_IoU": 0.5867660045623779, + "eval_seeclick_MAE_all": 0.06227055564522743, + "eval_seeclick_MAE_h": 0.03112515900284052, + "eval_seeclick_MAE_w": 0.09046434238553047, + "eval_seeclick_MAE_x": 0.09699121490120888, + "eval_seeclick_MAE_y": 0.03050148393958807, + "eval_seeclick_NUM_probability": 0.9999385178089142, + "eval_seeclick_inside_bbox": 0.9076704680919647, + "eval_seeclick_loss": 0.8090967535972595, + "eval_seeclick_loss_ce": 0.5721153616905212, + "eval_seeclick_loss_iou": 0.54638671875, + "eval_seeclick_loss_num": 0.0470733642578125, + "eval_seeclick_loss_xval": 0.2353057861328125, + "eval_seeclick_runtime": 68.9236, + "eval_seeclick_samples_per_second": 0.624, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 815424420, + "step": 4750 + }, + { + "epoch": 53.94334277620396, + "eval_icons_CIoU": 0.7505115568637848, + "eval_icons_GIoU": 0.7503921985626221, + "eval_icons_IoU": 0.7632699310779572, + "eval_icons_MAE_all": 0.03318015206605196, + "eval_icons_MAE_h": 0.030470607802271843, + "eval_icons_MAE_w": 0.035882056690752506, + "eval_icons_MAE_x": 0.030328705441206694, + "eval_icons_MAE_y": 0.03603922761976719, + "eval_icons_NUM_probability": 0.998185783624649, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.11329381912946701, + "eval_icons_loss_ce": 0.002961780352052301, + "eval_icons_loss_iou": 0.53125, + "eval_icons_loss_num": 0.020236968994140625, + "eval_icons_loss_xval": 0.101165771484375, + "eval_icons_runtime": 78.9791, + "eval_icons_samples_per_second": 0.633, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 815424420, + "step": 4750 + }, + { + "epoch": 53.94334277620396, + "eval_screenspot_CIoU": 0.6188857158025106, + "eval_screenspot_GIoU": 0.6189959645271301, + "eval_screenspot_IoU": 0.6486753424008688, + "eval_screenspot_MAE_all": 0.06953854113817215, + "eval_screenspot_MAE_h": 0.04198170329133669, + "eval_screenspot_MAE_w": 0.12024158984422684, + "eval_screenspot_MAE_x": 0.08158182352781296, + "eval_screenspot_MAE_y": 0.03434905533989271, + "eval_screenspot_NUM_probability": 0.9999154011408488, + "eval_screenspot_inside_bbox": 0.8974999984105428, + "eval_screenspot_loss": 0.29695162177085876, + "eval_screenspot_loss_ce": 0.014681711016843716, + "eval_screenspot_loss_iou": 0.5032958984375, + "eval_screenspot_loss_num": 0.05596923828125, + "eval_screenspot_loss_xval": 0.2797444661458333, + "eval_screenspot_runtime": 147.3545, + "eval_screenspot_samples_per_second": 0.604, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 815424420, + "step": 4750 + }, + { + "epoch": 53.94334277620396, + "eval_compot_CIoU": 0.847744882106781, + "eval_compot_GIoU": 0.842746913433075, + "eval_compot_IoU": 0.8632494211196899, + "eval_compot_MAE_all": 0.028249988332390785, + "eval_compot_MAE_h": 0.022975671105086803, + "eval_compot_MAE_w": 0.034658296033740044, + "eval_compot_MAE_x": 0.03308155573904514, + "eval_compot_MAE_y": 0.022284426260739565, + "eval_compot_NUM_probability": 0.9999487102031708, + "eval_compot_inside_bbox": 0.9409722089767456, + "eval_compot_loss": 0.08661291003227234, + "eval_compot_loss_ce": 5.562421210925095e-05, + "eval_compot_loss_iou": 0.4775390625, + "eval_compot_loss_num": 0.014847755432128906, + "eval_compot_loss_xval": 0.07421875, + "eval_compot_runtime": 90.0244, + "eval_compot_samples_per_second": 0.555, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 815424420, + "step": 4750 + }, + { + "epoch": 53.94334277620396, + "eval_custom_ui_MAE_all": 0.020098551642149687, + "eval_custom_ui_MAE_x": 0.031903672963380814, + "eval_custom_ui_MAE_y": 0.00829342845827341, + "eval_custom_ui_NUM_probability": 0.999820202589035, + "eval_custom_ui_loss": 0.20859397947788239, + "eval_custom_ui_loss_ce": 0.10381979122757912, + "eval_custom_ui_loss_num": 0.01966094970703125, + "eval_custom_ui_loss_xval": 0.0983123779296875, + "eval_custom_ui_runtime": 58.7917, + "eval_custom_ui_samples_per_second": 0.85, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 815424420, + "step": 4750 + }, + { + "epoch": 53.94334277620396, + "loss": 0.2475331723690033, + "loss_ce": 0.1268971860408783, + "loss_iou": 0.0, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 815424420, + "step": 4750 + }, + { + "epoch": 53.95467422096317, + "grad_norm": 3.0741142778363035, + "learning_rate": 5e-06, + "loss": 0.0921, + "num_input_tokens_seen": 815596440, + "step": 4751 + }, + { + "epoch": 53.95467422096317, + "loss": 0.04185118153691292, + "loss_ce": 0.00011839439684990793, + "loss_iou": 0.328125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 815596440, + "step": 4751 + }, + { + "epoch": 53.96600566572238, + "grad_norm": 3.299156308551903, + "learning_rate": 5e-06, + "loss": 0.0932, + "num_input_tokens_seen": 815767592, + "step": 4752 + }, + { + "epoch": 53.96600566572238, + "loss": 0.04948686435818672, + "loss_ce": 7.890626147855073e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 815767592, + "step": 4752 + }, + { + "epoch": 53.977337110481585, + "grad_norm": 3.5150675242539853, + "learning_rate": 5e-06, + "loss": 0.0921, + "num_input_tokens_seen": 815938556, + "step": 4753 + }, + { + "epoch": 53.977337110481585, + "loss": 0.10413302481174469, + "loss_ce": 0.00014437444042414427, + "loss_iou": 0.47265625, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 815938556, + "step": 4753 + }, + { + "epoch": 53.98866855524079, + "grad_norm": 3.897100742484984, + "learning_rate": 5e-06, + "loss": 0.0906, + "num_input_tokens_seen": 816110692, + "step": 4754 + }, + { + "epoch": 53.98866855524079, + "loss": 0.168490469455719, + "loss_ce": 0.00014026153075974435, + "loss_iou": 0.34375, + "loss_num": 0.03369140625, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 816110692, + "step": 4754 + }, + { + "epoch": 54.0, + "grad_norm": 3.992371962658389, + "learning_rate": 5e-06, + "loss": 0.1294, + "num_input_tokens_seen": 816282776, + "step": 4755 + }, + { + "epoch": 54.0, + "loss": 0.21594902873039246, + "loss_ce": 0.00014396820915862918, + "loss_iou": 0.546875, + "loss_num": 0.043212890625, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 816282776, + "step": 4755 + }, + { + "epoch": 54.01133144475921, + "grad_norm": 4.798275610049018, + "learning_rate": 5e-06, + "loss": 0.0704, + "num_input_tokens_seen": 816454904, + "step": 4756 + }, + { + "epoch": 54.01133144475921, + "loss": 0.11086331307888031, + "loss_ce": 8.450080349575728e-05, + "loss_iou": 0.53125, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 816454904, + "step": 4756 + }, + { + "epoch": 54.022662889518415, + "grad_norm": 4.0198215592283, + "learning_rate": 5e-06, + "loss": 0.0632, + "num_input_tokens_seen": 816626276, + "step": 4757 + }, + { + "epoch": 54.022662889518415, + "loss": 0.07552963495254517, + "loss_ce": 0.00012069502554368228, + "loss_iou": 0.58984375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 816626276, + "step": 4757 + }, + { + "epoch": 54.03399433427762, + "grad_norm": 5.4977803106136145, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 816796748, + "step": 4758 + }, + { + "epoch": 54.03399433427762, + "loss": 0.062033962458372116, + "loss_ce": 0.00026638322742655873, + "loss_iou": 0.5703125, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 816796748, + "step": 4758 + }, + { + "epoch": 54.04532577903683, + "grad_norm": 4.215787564761496, + "learning_rate": 5e-06, + "loss": 0.0855, + "num_input_tokens_seen": 816967408, + "step": 4759 + }, + { + "epoch": 54.04532577903683, + "loss": 0.048968251794576645, + "loss_ce": 0.00010961154475808144, + "loss_iou": 0.0673828125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 816967408, + "step": 4759 + }, + { + "epoch": 54.05665722379604, + "grad_norm": 2.854494039973909, + "learning_rate": 5e-06, + "loss": 0.0457, + "num_input_tokens_seen": 817139676, + "step": 4760 + }, + { + "epoch": 54.05665722379604, + "loss": 0.07032491266727448, + "loss_ce": 0.00011922520934604108, + "loss_iou": 0.46875, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 817139676, + "step": 4760 + }, + { + "epoch": 54.06798866855524, + "grad_norm": 3.8089038966549555, + "learning_rate": 5e-06, + "loss": 0.0515, + "num_input_tokens_seen": 817311660, + "step": 4761 + }, + { + "epoch": 54.06798866855524, + "loss": 0.060257233679294586, + "loss_ce": 7.656498928554356e-05, + "loss_iou": 0.46875, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 817311660, + "step": 4761 + }, + { + "epoch": 54.079320113314445, + "grad_norm": 4.2336133692485145, + "learning_rate": 5e-06, + "loss": 0.0919, + "num_input_tokens_seen": 817483628, + "step": 4762 + }, + { + "epoch": 54.079320113314445, + "loss": 0.054187577217817307, + "loss_ce": 0.00011043060658266768, + "loss_iou": 0.5234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 817483628, + "step": 4762 + }, + { + "epoch": 54.09065155807365, + "grad_norm": 4.09759752186007, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 817654044, + "step": 4763 + }, + { + "epoch": 54.09065155807365, + "loss": 0.06972815841436386, + "loss_ce": 0.00020911383035127074, + "loss_iou": 0.6171875, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 817654044, + "step": 4763 + }, + { + "epoch": 54.10198300283286, + "grad_norm": 4.304100044369193, + "learning_rate": 5e-06, + "loss": 0.0981, + "num_input_tokens_seen": 817825252, + "step": 4764 + }, + { + "epoch": 54.10198300283286, + "loss": 0.07415296137332916, + "loss_ce": 0.0001173118653241545, + "loss_iou": 0.33984375, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 817825252, + "step": 4764 + }, + { + "epoch": 54.11331444759207, + "grad_norm": 3.3664759933973154, + "learning_rate": 5e-06, + "loss": 0.0487, + "num_input_tokens_seen": 817996288, + "step": 4765 + }, + { + "epoch": 54.11331444759207, + "loss": 0.043957240879535675, + "loss_ce": 7.296632247744128e-05, + "loss_iou": 0.4375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 817996288, + "step": 4765 + }, + { + "epoch": 54.124645892351275, + "grad_norm": 2.6768072844900317, + "learning_rate": 5e-06, + "loss": 0.095, + "num_input_tokens_seen": 818168404, + "step": 4766 + }, + { + "epoch": 54.124645892351275, + "loss": 0.08594170212745667, + "loss_ce": 4.9977148592006415e-05, + "loss_iou": 0.55078125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 818168404, + "step": 4766 + }, + { + "epoch": 54.13597733711048, + "grad_norm": 3.0232286716919394, + "learning_rate": 5e-06, + "loss": 0.0887, + "num_input_tokens_seen": 818338656, + "step": 4767 + }, + { + "epoch": 54.13597733711048, + "loss": 0.13524626195430756, + "loss_ce": 0.00016019925533328205, + "loss_iou": 0.306640625, + "loss_num": 0.0269775390625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 818338656, + "step": 4767 + }, + { + "epoch": 54.14730878186969, + "grad_norm": 3.923562074053229, + "learning_rate": 5e-06, + "loss": 0.1022, + "num_input_tokens_seen": 818510120, + "step": 4768 + }, + { + "epoch": 54.14730878186969, + "loss": 0.132354736328125, + "loss_ce": 0.00010681435378501192, + "loss_iou": 0.3671875, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 818510120, + "step": 4768 + }, + { + "epoch": 54.1586402266289, + "grad_norm": 4.721469361154377, + "learning_rate": 5e-06, + "loss": 0.0848, + "num_input_tokens_seen": 818681576, + "step": 4769 + }, + { + "epoch": 54.1586402266289, + "loss": 0.07277920097112656, + "loss_ce": 0.00013210689940024167, + "loss_iou": 0.640625, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 818681576, + "step": 4769 + }, + { + "epoch": 54.169971671388105, + "grad_norm": 3.110943331057615, + "learning_rate": 5e-06, + "loss": 0.0424, + "num_input_tokens_seen": 818852528, + "step": 4770 + }, + { + "epoch": 54.169971671388105, + "loss": 0.03584219142794609, + "loss_ce": 2.9813183573423885e-05, + "loss_iou": 0.34375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 818852528, + "step": 4770 + }, + { + "epoch": 54.18130311614731, + "grad_norm": 4.983396811961203, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 819022796, + "step": 4771 + }, + { + "epoch": 54.18130311614731, + "loss": 0.05537346005439758, + "loss_ce": 6.03479020355735e-05, + "loss_iou": 0.4296875, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 819022796, + "step": 4771 + }, + { + "epoch": 54.19263456090651, + "grad_norm": 4.125428968528424, + "learning_rate": 5e-06, + "loss": 0.0545, + "num_input_tokens_seen": 819192240, + "step": 4772 + }, + { + "epoch": 54.19263456090651, + "loss": 0.054470986127853394, + "loss_ce": 0.00018021788855548948, + "loss_iou": 0.404296875, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 819192240, + "step": 4772 + }, + { + "epoch": 54.20396600566572, + "grad_norm": 3.196326019502915, + "learning_rate": 5e-06, + "loss": 0.0629, + "num_input_tokens_seen": 819363680, + "step": 4773 + }, + { + "epoch": 54.20396600566572, + "loss": 0.06213907524943352, + "loss_ce": 6.63199316477403e-05, + "loss_iou": 0.65234375, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 819363680, + "step": 4773 + }, + { + "epoch": 54.21529745042493, + "grad_norm": 3.220228013607888, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 819535452, + "step": 4774 + }, + { + "epoch": 54.21529745042493, + "loss": 0.059016890823841095, + "loss_ce": 0.00022477943275589496, + "loss_iou": 0.419921875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 819535452, + "step": 4774 + }, + { + "epoch": 54.226628895184135, + "grad_norm": 3.777664569826417, + "learning_rate": 5e-06, + "loss": 0.0769, + "num_input_tokens_seen": 819706504, + "step": 4775 + }, + { + "epoch": 54.226628895184135, + "loss": 0.03237161785364151, + "loss_ce": 8.401776722166687e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 819706504, + "step": 4775 + }, + { + "epoch": 54.23796033994334, + "grad_norm": 3.3608117550637724, + "learning_rate": 5e-06, + "loss": 0.0672, + "num_input_tokens_seen": 819877244, + "step": 4776 + }, + { + "epoch": 54.23796033994334, + "loss": 0.04349207133054733, + "loss_ce": 0.00024866312742233276, + "loss_iou": 0.3671875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 819877244, + "step": 4776 + }, + { + "epoch": 54.24929178470255, + "grad_norm": 3.905544656208704, + "learning_rate": 5e-06, + "loss": 0.0671, + "num_input_tokens_seen": 820048000, + "step": 4777 + }, + { + "epoch": 54.24929178470255, + "loss": 0.04804790019989014, + "loss_ce": 7.426677620969713e-05, + "loss_iou": 0.462890625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 820048000, + "step": 4777 + }, + { + "epoch": 54.26062322946176, + "grad_norm": 4.476319027336858, + "learning_rate": 5e-06, + "loss": 0.0555, + "num_input_tokens_seen": 820219732, + "step": 4778 + }, + { + "epoch": 54.26062322946176, + "loss": 0.05025298893451691, + "loss_ce": 8.208947838284075e-05, + "loss_iou": 0.498046875, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 820219732, + "step": 4778 + }, + { + "epoch": 54.271954674220964, + "grad_norm": 4.020817647951555, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 820391716, + "step": 4779 + }, + { + "epoch": 54.271954674220964, + "loss": 0.05510406196117401, + "loss_ce": 6.560588371939957e-05, + "loss_iou": 0.45703125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 820391716, + "step": 4779 + }, + { + "epoch": 54.28328611898017, + "grad_norm": 3.3421576503910453, + "learning_rate": 5e-06, + "loss": 0.0616, + "num_input_tokens_seen": 820563260, + "step": 4780 + }, + { + "epoch": 54.28328611898017, + "loss": 0.07333078980445862, + "loss_ce": 0.00027171504916623235, + "loss_iou": 0.228515625, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 820563260, + "step": 4780 + }, + { + "epoch": 54.29461756373938, + "grad_norm": 3.928710372330599, + "learning_rate": 5e-06, + "loss": 0.0523, + "num_input_tokens_seen": 820734788, + "step": 4781 + }, + { + "epoch": 54.29461756373938, + "loss": 0.04864327237010002, + "loss_ce": 5.9288166085025296e-05, + "loss_iou": 0.462890625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 820734788, + "step": 4781 + }, + { + "epoch": 54.30594900849859, + "grad_norm": 4.136087283622599, + "learning_rate": 5e-06, + "loss": 0.047, + "num_input_tokens_seen": 820905200, + "step": 4782 + }, + { + "epoch": 54.30594900849859, + "loss": 0.04021323844790459, + "loss_ce": 9.788366151042283e-05, + "loss_iou": 0.47265625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 820905200, + "step": 4782 + }, + { + "epoch": 54.31728045325779, + "grad_norm": 4.100370047611949, + "learning_rate": 5e-06, + "loss": 0.0536, + "num_input_tokens_seen": 821076488, + "step": 4783 + }, + { + "epoch": 54.31728045325779, + "loss": 0.051346998661756516, + "loss_ce": 0.0001385059003951028, + "loss_iou": 0.42578125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 821076488, + "step": 4783 + }, + { + "epoch": 54.328611898016995, + "grad_norm": 4.355560878216246, + "learning_rate": 5e-06, + "loss": 0.0566, + "num_input_tokens_seen": 821247328, + "step": 4784 + }, + { + "epoch": 54.328611898016995, + "loss": 0.03923734650015831, + "loss_ce": 8.329332194989547e-05, + "loss_iou": 0.474609375, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 821247328, + "step": 4784 + }, + { + "epoch": 54.3399433427762, + "grad_norm": 3.6971697671546213, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 821419012, + "step": 4785 + }, + { + "epoch": 54.3399433427762, + "loss": 0.08891374617815018, + "loss_ce": 7.707713666604832e-05, + "loss_iou": 0.48828125, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 821419012, + "step": 4785 + }, + { + "epoch": 54.35127478753541, + "grad_norm": 5.102105084891512, + "learning_rate": 5e-06, + "loss": 0.0793, + "num_input_tokens_seen": 821588980, + "step": 4786 + }, + { + "epoch": 54.35127478753541, + "loss": 0.12474104762077332, + "loss_ce": 3.0967781640356407e-05, + "loss_iou": 0.474609375, + "loss_num": 0.0250244140625, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 821588980, + "step": 4786 + }, + { + "epoch": 54.36260623229462, + "grad_norm": 2.9079707183029484, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 821761080, + "step": 4787 + }, + { + "epoch": 54.36260623229462, + "loss": 0.03385075181722641, + "loss_ce": 0.00013645895523950458, + "loss_iou": 0.486328125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 821761080, + "step": 4787 + }, + { + "epoch": 54.373937677053824, + "grad_norm": 4.197997791580651, + "learning_rate": 5e-06, + "loss": 0.0623, + "num_input_tokens_seen": 821931384, + "step": 4788 + }, + { + "epoch": 54.373937677053824, + "loss": 0.07166539132595062, + "loss_ce": 0.0001474460877943784, + "loss_iou": 0.359375, + "loss_num": 0.01434326171875, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 821931384, + "step": 4788 + }, + { + "epoch": 54.38526912181303, + "grad_norm": 4.318176769670441, + "learning_rate": 5e-06, + "loss": 0.056, + "num_input_tokens_seen": 822102316, + "step": 4789 + }, + { + "epoch": 54.38526912181303, + "loss": 0.05376601964235306, + "loss_ce": 7.034085865598172e-05, + "loss_iou": 0.578125, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 822102316, + "step": 4789 + }, + { + "epoch": 54.39660056657224, + "grad_norm": 4.164191010859506, + "learning_rate": 5e-06, + "loss": 0.11, + "num_input_tokens_seen": 822273496, + "step": 4790 + }, + { + "epoch": 54.39660056657224, + "loss": 0.18763966858386993, + "loss_ce": 0.00012442198931239545, + "loss_iou": 0.455078125, + "loss_num": 0.03759765625, + "loss_xval": 0.1875, + "num_input_tokens_seen": 822273496, + "step": 4790 + }, + { + "epoch": 54.40793201133145, + "grad_norm": 4.5370911849606665, + "learning_rate": 5e-06, + "loss": 0.0728, + "num_input_tokens_seen": 822445404, + "step": 4791 + }, + { + "epoch": 54.40793201133145, + "loss": 0.0644446611404419, + "loss_ce": 0.0001746386697050184, + "loss_iou": 0.427734375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 822445404, + "step": 4791 + }, + { + "epoch": 54.419263456090654, + "grad_norm": 4.603239099516089, + "learning_rate": 5e-06, + "loss": 0.1049, + "num_input_tokens_seen": 822615796, + "step": 4792 + }, + { + "epoch": 54.419263456090654, + "loss": 0.09498606622219086, + "loss_ce": 0.00010690891940612346, + "loss_iou": 0.4140625, + "loss_num": 0.01904296875, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 822615796, + "step": 4792 + }, + { + "epoch": 54.43059490084986, + "grad_norm": 4.175609154193794, + "learning_rate": 5e-06, + "loss": 0.0701, + "num_input_tokens_seen": 822787900, + "step": 4793 + }, + { + "epoch": 54.43059490084986, + "loss": 0.038354288786649704, + "loss_ce": 0.00016153889009729028, + "loss_iou": 0.4765625, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 822787900, + "step": 4793 + }, + { + "epoch": 54.44192634560906, + "grad_norm": 3.385730142833818, + "learning_rate": 5e-06, + "loss": 0.0719, + "num_input_tokens_seen": 822959392, + "step": 4794 + }, + { + "epoch": 54.44192634560906, + "loss": 0.06145177036523819, + "loss_ce": 3.514348281896673e-05, + "loss_iou": 0.51953125, + "loss_num": 0.01226806640625, + "loss_xval": 0.0615234375, + "num_input_tokens_seen": 822959392, + "step": 4794 + }, + { + "epoch": 54.45325779036827, + "grad_norm": 4.109561322232012, + "learning_rate": 5e-06, + "loss": 0.0595, + "num_input_tokens_seen": 823131144, + "step": 4795 + }, + { + "epoch": 54.45325779036827, + "loss": 0.06852729618549347, + "loss_ce": 7.637016824446619e-05, + "loss_iou": 0.5859375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 823131144, + "step": 4795 + }, + { + "epoch": 54.46458923512748, + "grad_norm": 3.3794133644466093, + "learning_rate": 5e-06, + "loss": 0.0574, + "num_input_tokens_seen": 823301424, + "step": 4796 + }, + { + "epoch": 54.46458923512748, + "loss": 0.06543462723493576, + "loss_ce": 5.071881241747178e-05, + "loss_iou": 0.439453125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 823301424, + "step": 4796 + }, + { + "epoch": 54.475920679886684, + "grad_norm": 3.5252525999746678, + "learning_rate": 5e-06, + "loss": 0.0861, + "num_input_tokens_seen": 823473456, + "step": 4797 + }, + { + "epoch": 54.475920679886684, + "loss": 0.1188625916838646, + "loss_ce": 0.0002712833811528981, + "loss_iou": 0.126953125, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 823473456, + "step": 4797 + }, + { + "epoch": 54.48725212464589, + "grad_norm": 2.5652412742463855, + "learning_rate": 5e-06, + "loss": 0.0603, + "num_input_tokens_seen": 823645064, + "step": 4798 + }, + { + "epoch": 54.48725212464589, + "loss": 0.09908372908830643, + "loss_ce": 0.00010759633732959628, + "loss_iou": 0.15234375, + "loss_num": 0.019775390625, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 823645064, + "step": 4798 + }, + { + "epoch": 54.4985835694051, + "grad_norm": 2.946514585062964, + "learning_rate": 5e-06, + "loss": 0.0532, + "num_input_tokens_seen": 823814164, + "step": 4799 + }, + { + "epoch": 54.4985835694051, + "loss": 0.07510152459144592, + "loss_ce": 0.0001656093809287995, + "loss_iou": 0.265625, + "loss_num": 0.0150146484375, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 823814164, + "step": 4799 + }, + { + "epoch": 54.50991501416431, + "grad_norm": 3.559158584586017, + "learning_rate": 5e-06, + "loss": 0.0579, + "num_input_tokens_seen": 823984380, + "step": 4800 + }, + { + "epoch": 54.50991501416431, + "loss": 0.04265569522976875, + "loss_ce": 0.0001294516696361825, + "loss_iou": 0.34765625, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 823984380, + "step": 4800 + }, + { + "epoch": 54.521246458923514, + "grad_norm": 2.2563742430502542, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 824154940, + "step": 4801 + }, + { + "epoch": 54.521246458923514, + "loss": 0.11463603377342224, + "loss_ce": 0.0001417134189978242, + "loss_iou": 0.421875, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 824154940, + "step": 4801 + }, + { + "epoch": 54.53257790368272, + "grad_norm": 3.9751600473707454, + "learning_rate": 5e-06, + "loss": 0.0567, + "num_input_tokens_seen": 824326540, + "step": 4802 + }, + { + "epoch": 54.53257790368272, + "loss": 0.04263674467802048, + "loss_ce": 0.00011049696331610903, + "loss_iou": 0.43359375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 824326540, + "step": 4802 + }, + { + "epoch": 54.54390934844193, + "grad_norm": 3.293858694691053, + "learning_rate": 5e-06, + "loss": 0.0888, + "num_input_tokens_seen": 824497492, + "step": 4803 + }, + { + "epoch": 54.54390934844193, + "loss": 0.10928931832313538, + "loss_ce": 2.112583206326235e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0218505859375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 824497492, + "step": 4803 + }, + { + "epoch": 54.555240793201136, + "grad_norm": 2.866017122208915, + "learning_rate": 5e-06, + "loss": 0.0548, + "num_input_tokens_seen": 824668488, + "step": 4804 + }, + { + "epoch": 54.555240793201136, + "loss": 0.05335213243961334, + "loss_ce": 0.0001294738322030753, + "loss_iou": 0.38671875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 824668488, + "step": 4804 + }, + { + "epoch": 54.56657223796034, + "grad_norm": 3.6457935048700274, + "learning_rate": 5e-06, + "loss": 0.0743, + "num_input_tokens_seen": 824840116, + "step": 4805 + }, + { + "epoch": 54.56657223796034, + "loss": 0.08859752118587494, + "loss_ce": 8.128270565066487e-05, + "loss_iou": 0.51171875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 824840116, + "step": 4805 + }, + { + "epoch": 54.577903682719544, + "grad_norm": 3.7502007928505567, + "learning_rate": 5e-06, + "loss": 0.0779, + "num_input_tokens_seen": 825012052, + "step": 4806 + }, + { + "epoch": 54.577903682719544, + "loss": 0.0466158390045166, + "loss_ce": 4.601352702593431e-05, + "loss_iou": 0.5546875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 825012052, + "step": 4806 + }, + { + "epoch": 54.58923512747875, + "grad_norm": 3.7225774111623346, + "learning_rate": 5e-06, + "loss": 0.0859, + "num_input_tokens_seen": 825184044, + "step": 4807 + }, + { + "epoch": 54.58923512747875, + "loss": 0.04037688672542572, + "loss_ce": 0.00012420277926139534, + "loss_iou": 0.4765625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 825184044, + "step": 4807 + }, + { + "epoch": 54.60056657223796, + "grad_norm": 3.889638768464499, + "learning_rate": 5e-06, + "loss": 0.1017, + "num_input_tokens_seen": 825352924, + "step": 4808 + }, + { + "epoch": 54.60056657223796, + "loss": 0.06766480952501297, + "loss_ce": 5.311829590937123e-05, + "loss_iou": 0.494140625, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 825352924, + "step": 4808 + }, + { + "epoch": 54.611898016997166, + "grad_norm": 4.49605031857285, + "learning_rate": 5e-06, + "loss": 0.0794, + "num_input_tokens_seen": 825525080, + "step": 4809 + }, + { + "epoch": 54.611898016997166, + "loss": 0.052674490958452225, + "loss_ce": 0.00015373813221231103, + "loss_iou": 0.390625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 825525080, + "step": 4809 + }, + { + "epoch": 54.623229461756374, + "grad_norm": 3.9350984044921895, + "learning_rate": 5e-06, + "loss": 0.1058, + "num_input_tokens_seen": 825697280, + "step": 4810 + }, + { + "epoch": 54.623229461756374, + "loss": 0.09486401826143265, + "loss_ce": 0.00016797560965642333, + "loss_iou": 0.53515625, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 825697280, + "step": 4810 + }, + { + "epoch": 54.63456090651558, + "grad_norm": 3.927121727963861, + "learning_rate": 5e-06, + "loss": 0.0883, + "num_input_tokens_seen": 825866944, + "step": 4811 + }, + { + "epoch": 54.63456090651558, + "loss": 0.05210106074810028, + "loss_ce": 0.00010673302313080058, + "loss_iou": 0.515625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 825866944, + "step": 4811 + }, + { + "epoch": 54.64589235127479, + "grad_norm": 3.6655260560736753, + "learning_rate": 5e-06, + "loss": 0.0764, + "num_input_tokens_seen": 826037992, + "step": 4812 + }, + { + "epoch": 54.64589235127479, + "loss": 0.14142560958862305, + "loss_ce": 6.81814708514139e-05, + "loss_iou": 0.490234375, + "loss_num": 0.0281982421875, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 826037992, + "step": 4812 + }, + { + "epoch": 54.657223796033996, + "grad_norm": 3.7347683776950245, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 826209604, + "step": 4813 + }, + { + "epoch": 54.657223796033996, + "loss": 0.0608188733458519, + "loss_ce": 0.00016518283518962562, + "loss_iou": 0.5, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 826209604, + "step": 4813 + }, + { + "epoch": 54.668555240793204, + "grad_norm": 3.895897289203231, + "learning_rate": 5e-06, + "loss": 0.075, + "num_input_tokens_seen": 826381536, + "step": 4814 + }, + { + "epoch": 54.668555240793204, + "loss": 0.0637035071849823, + "loss_ce": 5.9096742916153744e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 826381536, + "step": 4814 + }, + { + "epoch": 54.67988668555241, + "grad_norm": 3.3592646523617717, + "learning_rate": 5e-06, + "loss": 0.0449, + "num_input_tokens_seen": 826553720, + "step": 4815 + }, + { + "epoch": 54.67988668555241, + "loss": 0.034071944653987885, + "loss_ce": 0.0001211362105095759, + "loss_iou": 0.203125, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 826553720, + "step": 4815 + }, + { + "epoch": 54.69121813031161, + "grad_norm": 3.540193329895928, + "learning_rate": 5e-06, + "loss": 0.0803, + "num_input_tokens_seen": 826725536, + "step": 4816 + }, + { + "epoch": 54.69121813031161, + "loss": 0.04260273277759552, + "loss_ce": 3.071070386795327e-05, + "loss_iou": 0.27734375, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 826725536, + "step": 4816 + }, + { + "epoch": 54.70254957507082, + "grad_norm": 3.380776251926493, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 826897596, + "step": 4817 + }, + { + "epoch": 54.70254957507082, + "loss": 0.050521381199359894, + "loss_ce": 0.0001521162485005334, + "loss_iou": 0.330078125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 826897596, + "step": 4817 + }, + { + "epoch": 54.713881019830026, + "grad_norm": 2.8592284379065123, + "learning_rate": 5e-06, + "loss": 0.0776, + "num_input_tokens_seen": 827069444, + "step": 4818 + }, + { + "epoch": 54.713881019830026, + "loss": 0.045377492904663086, + "loss_ce": 0.000303027278278023, + "loss_iou": 0.33984375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 827069444, + "step": 4818 + }, + { + "epoch": 54.725212464589234, + "grad_norm": 3.703633199262127, + "learning_rate": 5e-06, + "loss": 0.0498, + "num_input_tokens_seen": 827241208, + "step": 4819 + }, + { + "epoch": 54.725212464589234, + "loss": 0.0570884607732296, + "loss_ce": 0.0001884365192381665, + "loss_iou": 0.3125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 827241208, + "step": 4819 + }, + { + "epoch": 54.73654390934844, + "grad_norm": 12.420864520741524, + "learning_rate": 5e-06, + "loss": 0.1041, + "num_input_tokens_seen": 827412980, + "step": 4820 + }, + { + "epoch": 54.73654390934844, + "loss": 0.11870403587818146, + "loss_ce": 5.168938514543697e-05, + "loss_iou": 0.48046875, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 827412980, + "step": 4820 + }, + { + "epoch": 54.74787535410765, + "grad_norm": 3.8378961402377056, + "learning_rate": 5e-06, + "loss": 0.0619, + "num_input_tokens_seen": 827584712, + "step": 4821 + }, + { + "epoch": 54.74787535410765, + "loss": 0.04571519419550896, + "loss_ce": 4.5638960727956146e-05, + "loss_iou": 0.62109375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 827584712, + "step": 4821 + }, + { + "epoch": 54.759206798866856, + "grad_norm": 3.449814975254727, + "learning_rate": 5e-06, + "loss": 0.0818, + "num_input_tokens_seen": 827756648, + "step": 4822 + }, + { + "epoch": 54.759206798866856, + "loss": 0.07373366504907608, + "loss_ce": 3.371223283465952e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 827756648, + "step": 4822 + }, + { + "epoch": 54.77053824362606, + "grad_norm": 3.4363965785193296, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 827928464, + "step": 4823 + }, + { + "epoch": 54.77053824362606, + "loss": 0.05223904922604561, + "loss_ce": 0.00032864813692867756, + "loss_iou": 0.494140625, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 827928464, + "step": 4823 + }, + { + "epoch": 54.78186968838527, + "grad_norm": 4.089368975478395, + "learning_rate": 5e-06, + "loss": 0.0743, + "num_input_tokens_seen": 828100456, + "step": 4824 + }, + { + "epoch": 54.78186968838527, + "loss": 0.1433524787425995, + "loss_ce": 0.00013347614731173962, + "loss_iou": 0.458984375, + "loss_num": 0.028564453125, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 828100456, + "step": 4824 + }, + { + "epoch": 54.79320113314448, + "grad_norm": 4.224494235415107, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 828271344, + "step": 4825 + }, + { + "epoch": 54.79320113314448, + "loss": 0.08384694159030914, + "loss_ce": 0.00013722653966397047, + "loss_iou": 0.314453125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 828271344, + "step": 4825 + }, + { + "epoch": 54.804532577903686, + "grad_norm": 4.132696797184806, + "learning_rate": 5e-06, + "loss": 0.068, + "num_input_tokens_seen": 828443304, + "step": 4826 + }, + { + "epoch": 54.804532577903686, + "loss": 0.060608796775341034, + "loss_ce": 0.00016873680579010397, + "loss_iou": 0.37109375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 828443304, + "step": 4826 + }, + { + "epoch": 54.815864022662886, + "grad_norm": 5.715146873866257, + "learning_rate": 5e-06, + "loss": 0.0745, + "num_input_tokens_seen": 828614272, + "step": 4827 + }, + { + "epoch": 54.815864022662886, + "loss": 0.15620847046375275, + "loss_ce": 0.00030941443401388824, + "loss_iou": 0.453125, + "loss_num": 0.0311279296875, + "loss_xval": 0.15625, + "num_input_tokens_seen": 828614272, + "step": 4827 + }, + { + "epoch": 54.827195467422094, + "grad_norm": 3.5342595585611107, + "learning_rate": 5e-06, + "loss": 0.0584, + "num_input_tokens_seen": 828785468, + "step": 4828 + }, + { + "epoch": 54.827195467422094, + "loss": 0.07759758830070496, + "loss_ce": 0.0001744925684761256, + "loss_iou": 0.51171875, + "loss_num": 0.01544189453125, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 828785468, + "step": 4828 + }, + { + "epoch": 54.8385269121813, + "grad_norm": 3.113392249446793, + "learning_rate": 5e-06, + "loss": 0.0739, + "num_input_tokens_seen": 828957024, + "step": 4829 + }, + { + "epoch": 54.8385269121813, + "loss": 0.059903040528297424, + "loss_ce": 0.00011910706234630197, + "loss_iou": 0.369140625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 828957024, + "step": 4829 + }, + { + "epoch": 54.84985835694051, + "grad_norm": 3.376017094281157, + "learning_rate": 5e-06, + "loss": 0.0649, + "num_input_tokens_seen": 829129308, + "step": 4830 + }, + { + "epoch": 54.84985835694051, + "loss": 0.08225090056657791, + "loss_ce": 0.00012809839972760528, + "loss_iou": 0.310546875, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 829129308, + "step": 4830 + }, + { + "epoch": 54.861189801699716, + "grad_norm": 3.367628310907048, + "learning_rate": 5e-06, + "loss": 0.0599, + "num_input_tokens_seen": 829299544, + "step": 4831 + }, + { + "epoch": 54.861189801699716, + "loss": 0.08068462461233139, + "loss_ce": 0.00016399394371546805, + "loss_iou": 0.59375, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 829299544, + "step": 4831 + }, + { + "epoch": 54.87252124645892, + "grad_norm": 4.490877263604367, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 829471672, + "step": 4832 + }, + { + "epoch": 54.87252124645892, + "loss": 0.0562872551381588, + "loss_ce": 0.0001043960073729977, + "loss_iou": 0.3828125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 829471672, + "step": 4832 + }, + { + "epoch": 54.88385269121813, + "grad_norm": 3.5382005892734947, + "learning_rate": 5e-06, + "loss": 0.0556, + "num_input_tokens_seen": 829643252, + "step": 4833 + }, + { + "epoch": 54.88385269121813, + "loss": 0.085013747215271, + "loss_ce": 0.0001291025837417692, + "loss_iou": 0.515625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 829643252, + "step": 4833 + }, + { + "epoch": 54.89518413597734, + "grad_norm": 3.604868679760141, + "learning_rate": 5e-06, + "loss": 0.059, + "num_input_tokens_seen": 829815276, + "step": 4834 + }, + { + "epoch": 54.89518413597734, + "loss": 0.0672743022441864, + "loss_ce": 0.00012036891712341458, + "loss_iou": 0.4140625, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 829815276, + "step": 4834 + }, + { + "epoch": 54.906515580736546, + "grad_norm": 4.158991697365566, + "learning_rate": 5e-06, + "loss": 0.0817, + "num_input_tokens_seen": 829986936, + "step": 4835 + }, + { + "epoch": 54.906515580736546, + "loss": 0.03940323740243912, + "loss_ce": 9.659463103162125e-05, + "loss_iou": 0.47265625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 829986936, + "step": 4835 + }, + { + "epoch": 54.91784702549575, + "grad_norm": 4.790648861877688, + "learning_rate": 5e-06, + "loss": 0.069, + "num_input_tokens_seen": 830158848, + "step": 4836 + }, + { + "epoch": 54.91784702549575, + "loss": 0.03385428339242935, + "loss_ce": 0.00014761844067834318, + "loss_iou": 0.427734375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 830158848, + "step": 4836 + }, + { + "epoch": 54.92917847025496, + "grad_norm": 3.774530388782342, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 830330192, + "step": 4837 + }, + { + "epoch": 54.92917847025496, + "loss": 0.07731056958436966, + "loss_ce": 0.00013160944217815995, + "loss_iou": 0.69921875, + "loss_num": 0.01544189453125, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 830330192, + "step": 4837 + }, + { + "epoch": 54.94050991501416, + "grad_norm": 3.100728104340838, + "learning_rate": 5e-06, + "loss": 0.058, + "num_input_tokens_seen": 830502468, + "step": 4838 + }, + { + "epoch": 54.94050991501416, + "loss": 0.06617963314056396, + "loss_ce": 0.00020063022384420037, + "loss_iou": 0.443359375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 830502468, + "step": 4838 + }, + { + "epoch": 54.95184135977337, + "grad_norm": 4.123130030874466, + "learning_rate": 5e-06, + "loss": 0.0859, + "num_input_tokens_seen": 830674192, + "step": 4839 + }, + { + "epoch": 54.95184135977337, + "loss": 0.06947401165962219, + "loss_ce": 0.00016859086463227868, + "loss_iou": 0.53125, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 830674192, + "step": 4839 + }, + { + "epoch": 54.963172804532576, + "grad_norm": 4.231864916395142, + "learning_rate": 5e-06, + "loss": 0.0897, + "num_input_tokens_seen": 830844016, + "step": 4840 + }, + { + "epoch": 54.963172804532576, + "loss": 0.13347838819026947, + "loss_ce": 0.00014708723756484687, + "loss_iou": 0.546875, + "loss_num": 0.026611328125, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 830844016, + "step": 4840 + }, + { + "epoch": 54.97450424929178, + "grad_norm": 3.291507432454373, + "learning_rate": 5e-06, + "loss": 0.0479, + "num_input_tokens_seen": 831014900, + "step": 4841 + }, + { + "epoch": 54.97450424929178, + "loss": 0.0711250975728035, + "loss_ce": 0.00011069620086345822, + "loss_iou": 0.2255859375, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 831014900, + "step": 4841 + }, + { + "epoch": 54.98583569405099, + "grad_norm": 3.754441083026507, + "learning_rate": 5e-06, + "loss": 0.0934, + "num_input_tokens_seen": 831186668, + "step": 4842 + }, + { + "epoch": 54.98583569405099, + "loss": 0.044196151196956635, + "loss_ce": 5.247210356174037e-05, + "loss_iou": 0.3515625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 831186668, + "step": 4842 + }, + { + "epoch": 54.9971671388102, + "grad_norm": 4.812387360169728, + "learning_rate": 5e-06, + "loss": 0.088, + "num_input_tokens_seen": 831358496, + "step": 4843 + }, + { + "epoch": 54.9971671388102, + "loss": 0.12149156630039215, + "loss_ce": 0.0001841913181124255, + "loss_iou": 0.5234375, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 831358496, + "step": 4843 + }, + { + "epoch": 54.9971671388102, + "loss": 0.05885420739650726, + "loss_ce": 0.000367268017726019, + "loss_iou": 0.314453125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 831401396, + "step": 4843 + }, + { + "epoch": 55.008498583569406, + "grad_norm": 3.08002992962033, + "learning_rate": 5e-06, + "loss": 0.0794, + "num_input_tokens_seen": 831530268, + "step": 4844 + }, + { + "epoch": 55.008498583569406, + "loss": 0.049395687878131866, + "loss_ce": 7.928359264042228e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 831530268, + "step": 4844 + }, + { + "epoch": 55.01983002832861, + "grad_norm": 3.3385109741113417, + "learning_rate": 5e-06, + "loss": 0.0767, + "num_input_tokens_seen": 831701532, + "step": 4845 + }, + { + "epoch": 55.01983002832861, + "loss": 0.08859927952289581, + "loss_ce": 0.00015934000839479268, + "loss_iou": 0.345703125, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 831701532, + "step": 4845 + }, + { + "epoch": 55.03116147308782, + "grad_norm": 3.591264079351339, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 831873124, + "step": 4846 + }, + { + "epoch": 55.03116147308782, + "loss": 0.14700305461883545, + "loss_ce": 6.090020542615093e-05, + "loss_iou": 0.44140625, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 831873124, + "step": 4846 + }, + { + "epoch": 55.04249291784703, + "grad_norm": 3.751691612207764, + "learning_rate": 5e-06, + "loss": 0.1108, + "num_input_tokens_seen": 832045180, + "step": 4847 + }, + { + "epoch": 55.04249291784703, + "loss": 0.040723513811826706, + "loss_ce": 7.409861427731812e-05, + "loss_iou": 0.458984375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 832045180, + "step": 4847 + }, + { + "epoch": 55.053824362606235, + "grad_norm": 3.5488526355455243, + "learning_rate": 5e-06, + "loss": 0.0571, + "num_input_tokens_seen": 832217084, + "step": 4848 + }, + { + "epoch": 55.053824362606235, + "loss": 0.06552620232105255, + "loss_ce": 2.0215240510879084e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 832217084, + "step": 4848 + }, + { + "epoch": 55.065155807365436, + "grad_norm": 3.451855976311762, + "learning_rate": 5e-06, + "loss": 0.052, + "num_input_tokens_seen": 832387800, + "step": 4849 + }, + { + "epoch": 55.065155807365436, + "loss": 0.050868839025497437, + "loss_ce": 0.00010284706513630226, + "loss_iou": 0.375, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 832387800, + "step": 4849 + }, + { + "epoch": 55.07648725212464, + "grad_norm": 3.4960358945510177, + "learning_rate": 5e-06, + "loss": 0.0628, + "num_input_tokens_seen": 832559164, + "step": 4850 + }, + { + "epoch": 55.07648725212464, + "loss": 0.04941345751285553, + "loss_ce": 5.127799522597343e-05, + "loss_iou": 0.462890625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 832559164, + "step": 4850 + }, + { + "epoch": 55.08781869688385, + "grad_norm": 3.019830567129509, + "learning_rate": 5e-06, + "loss": 0.0732, + "num_input_tokens_seen": 832729004, + "step": 4851 + }, + { + "epoch": 55.08781869688385, + "loss": 0.10447005927562714, + "loss_ce": 0.0003135710139758885, + "loss_iou": 0.40234375, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 832729004, + "step": 4851 + }, + { + "epoch": 55.09915014164306, + "grad_norm": 5.171988480875635, + "learning_rate": 5e-06, + "loss": 0.0449, + "num_input_tokens_seen": 832901252, + "step": 4852 + }, + { + "epoch": 55.09915014164306, + "loss": 0.04791152477264404, + "loss_ce": 0.00016677156963851303, + "loss_iou": 0.404296875, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 832901252, + "step": 4852 + }, + { + "epoch": 55.110481586402265, + "grad_norm": 3.7103334345427466, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 833072592, + "step": 4853 + }, + { + "epoch": 55.110481586402265, + "loss": 0.05243786796927452, + "loss_ce": 5.44440554222092e-05, + "loss_iou": 0.55078125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 833072592, + "step": 4853 + }, + { + "epoch": 55.12181303116147, + "grad_norm": 5.34505010837871, + "learning_rate": 5e-06, + "loss": 0.0746, + "num_input_tokens_seen": 833244000, + "step": 4854 + }, + { + "epoch": 55.12181303116147, + "loss": 0.1246090978384018, + "loss_ce": 6.685879634460434e-05, + "loss_iou": 0.07421875, + "loss_num": 0.02490234375, + "loss_xval": 0.12451171875, + "num_input_tokens_seen": 833244000, + "step": 4854 + }, + { + "epoch": 55.13314447592068, + "grad_norm": 3.600249735058, + "learning_rate": 5e-06, + "loss": 0.0573, + "num_input_tokens_seen": 833414296, + "step": 4855 + }, + { + "epoch": 55.13314447592068, + "loss": 0.08738004416227341, + "loss_ce": 0.00013028204557485878, + "loss_iou": 0.53125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 833414296, + "step": 4855 + }, + { + "epoch": 55.14447592067989, + "grad_norm": 3.9518795251806416, + "learning_rate": 5e-06, + "loss": 0.0703, + "num_input_tokens_seen": 833585820, + "step": 4856 + }, + { + "epoch": 55.14447592067989, + "loss": 0.03788881376385689, + "loss_ce": 4.70182130811736e-05, + "loss_iou": 0.44921875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 833585820, + "step": 4856 + }, + { + "epoch": 55.155807365439095, + "grad_norm": 3.250529452619723, + "learning_rate": 5e-06, + "loss": 0.0396, + "num_input_tokens_seen": 833757340, + "step": 4857 + }, + { + "epoch": 55.155807365439095, + "loss": 0.04628312587738037, + "loss_ce": 4.899408304481767e-05, + "loss_iou": 0.67578125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 833757340, + "step": 4857 + }, + { + "epoch": 55.1671388101983, + "grad_norm": 3.6477704130554094, + "learning_rate": 5e-06, + "loss": 0.099, + "num_input_tokens_seen": 833928936, + "step": 4858 + }, + { + "epoch": 55.1671388101983, + "loss": 0.11140298843383789, + "loss_ce": 7.486053073080257e-05, + "loss_iou": 0.16015625, + "loss_num": 0.0223388671875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 833928936, + "step": 4858 + }, + { + "epoch": 55.17847025495751, + "grad_norm": 4.049437142282047, + "learning_rate": 5e-06, + "loss": 0.0448, + "num_input_tokens_seen": 834100300, + "step": 4859 + }, + { + "epoch": 55.17847025495751, + "loss": 0.0394081249833107, + "loss_ce": 0.00010148172441404313, + "loss_iou": 0.474609375, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 834100300, + "step": 4859 + }, + { + "epoch": 55.18980169971671, + "grad_norm": 3.7360462053462715, + "learning_rate": 5e-06, + "loss": 0.0567, + "num_input_tokens_seen": 834270868, + "step": 4860 + }, + { + "epoch": 55.18980169971671, + "loss": 0.05600510165095329, + "loss_ce": 0.00017319328617304564, + "loss_iou": 0.392578125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 834270868, + "step": 4860 + }, + { + "epoch": 55.20113314447592, + "grad_norm": 3.5912981043900216, + "learning_rate": 5e-06, + "loss": 0.0422, + "num_input_tokens_seen": 834441052, + "step": 4861 + }, + { + "epoch": 55.20113314447592, + "loss": 0.04095540568232536, + "loss_ce": 0.0001686612085904926, + "loss_iou": 0.5859375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 834441052, + "step": 4861 + }, + { + "epoch": 55.212464589235125, + "grad_norm": 5.312096689896347, + "learning_rate": 5e-06, + "loss": 0.0666, + "num_input_tokens_seen": 834612156, + "step": 4862 + }, + { + "epoch": 55.212464589235125, + "loss": 0.08147785067558289, + "loss_ce": 0.0001484984823036939, + "loss_iou": 0.439453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 834612156, + "step": 4862 + }, + { + "epoch": 55.22379603399433, + "grad_norm": 5.652013947056333, + "learning_rate": 5e-06, + "loss": 0.0902, + "num_input_tokens_seen": 834783564, + "step": 4863 + }, + { + "epoch": 55.22379603399433, + "loss": 0.08377085626125336, + "loss_ce": 0.00018320902017876506, + "loss_iou": 0.62109375, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 834783564, + "step": 4863 + }, + { + "epoch": 55.23512747875354, + "grad_norm": 2.9174302224508923, + "learning_rate": 5e-06, + "loss": 0.0709, + "num_input_tokens_seen": 834955048, + "step": 4864 + }, + { + "epoch": 55.23512747875354, + "loss": 0.03336211293935776, + "loss_ce": 5.217511352384463e-05, + "loss_iou": 0.302734375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 834955048, + "step": 4864 + }, + { + "epoch": 55.24645892351275, + "grad_norm": 3.183846767327532, + "learning_rate": 5e-06, + "loss": 0.0735, + "num_input_tokens_seen": 835125456, + "step": 4865 + }, + { + "epoch": 55.24645892351275, + "loss": 0.12082712352275848, + "loss_ce": 6.90634551574476e-05, + "loss_iou": 0.5703125, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 835125456, + "step": 4865 + }, + { + "epoch": 55.257790368271955, + "grad_norm": 3.6693916962039057, + "learning_rate": 5e-06, + "loss": 0.0808, + "num_input_tokens_seen": 835297028, + "step": 4866 + }, + { + "epoch": 55.257790368271955, + "loss": 0.09813405573368073, + "loss_ce": 0.00011159271525684744, + "loss_iou": 0.326171875, + "loss_num": 0.01953125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 835297028, + "step": 4866 + }, + { + "epoch": 55.26912181303116, + "grad_norm": 3.6166673366715525, + "learning_rate": 5e-06, + "loss": 0.0702, + "num_input_tokens_seen": 835468056, + "step": 4867 + }, + { + "epoch": 55.26912181303116, + "loss": 0.06933483481407166, + "loss_ce": 9.045004844665527e-05, + "loss_iou": 0.490234375, + "loss_num": 0.01385498046875, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 835468056, + "step": 4867 + }, + { + "epoch": 55.28045325779037, + "grad_norm": 3.859604724392222, + "learning_rate": 5e-06, + "loss": 0.0524, + "num_input_tokens_seen": 835640024, + "step": 4868 + }, + { + "epoch": 55.28045325779037, + "loss": 0.06038837134838104, + "loss_ce": 3.986154479207471e-05, + "loss_iou": 0.453125, + "loss_num": 0.01202392578125, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 835640024, + "step": 4868 + }, + { + "epoch": 55.29178470254958, + "grad_norm": 4.137460084964757, + "learning_rate": 5e-06, + "loss": 0.0521, + "num_input_tokens_seen": 835812208, + "step": 4869 + }, + { + "epoch": 55.29178470254958, + "loss": 0.04981578141450882, + "loss_ce": 0.00013316590047907084, + "loss_iou": 0.5, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 835812208, + "step": 4869 + }, + { + "epoch": 55.303116147308785, + "grad_norm": 4.994204068695308, + "learning_rate": 5e-06, + "loss": 0.0607, + "num_input_tokens_seen": 835984376, + "step": 4870 + }, + { + "epoch": 55.303116147308785, + "loss": 0.07546278834342957, + "loss_ce": 0.0001759215083438903, + "loss_iou": 0.498046875, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 835984376, + "step": 4870 + }, + { + "epoch": 55.314447592067985, + "grad_norm": 4.783889326640654, + "learning_rate": 5e-06, + "loss": 0.0696, + "num_input_tokens_seen": 836153268, + "step": 4871 + }, + { + "epoch": 55.314447592067985, + "loss": 0.09268462657928467, + "loss_ce": 0.0008267149678431451, + "loss_iou": 0.26953125, + "loss_num": 0.0184326171875, + "loss_xval": 0.091796875, + "num_input_tokens_seen": 836153268, + "step": 4871 + }, + { + "epoch": 55.32577903682719, + "grad_norm": 3.2990718542812116, + "learning_rate": 5e-06, + "loss": 0.056, + "num_input_tokens_seen": 836325396, + "step": 4872 + }, + { + "epoch": 55.32577903682719, + "loss": 0.038956448435783386, + "loss_ce": 6.179563933983445e-05, + "loss_iou": 0.4453125, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 836325396, + "step": 4872 + }, + { + "epoch": 55.3371104815864, + "grad_norm": 3.2999850297171736, + "learning_rate": 5e-06, + "loss": 0.0735, + "num_input_tokens_seen": 836497336, + "step": 4873 + }, + { + "epoch": 55.3371104815864, + "loss": 0.08170707523822784, + "loss_ce": 0.00019462211639620364, + "loss_iou": 0.4921875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 836497336, + "step": 4873 + }, + { + "epoch": 55.34844192634561, + "grad_norm": 4.309098550220041, + "learning_rate": 5e-06, + "loss": 0.0777, + "num_input_tokens_seen": 836667720, + "step": 4874 + }, + { + "epoch": 55.34844192634561, + "loss": 0.09926150739192963, + "loss_ce": 7.937539339764044e-05, + "loss_iou": 0.474609375, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 836667720, + "step": 4874 + }, + { + "epoch": 55.359773371104815, + "grad_norm": 4.740192279373646, + "learning_rate": 5e-06, + "loss": 0.0819, + "num_input_tokens_seen": 836837976, + "step": 4875 + }, + { + "epoch": 55.359773371104815, + "loss": 0.055474989116191864, + "loss_ce": 7.032898429315537e-05, + "loss_iou": 0.294921875, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 836837976, + "step": 4875 + }, + { + "epoch": 55.37110481586402, + "grad_norm": 3.8193839405818504, + "learning_rate": 5e-06, + "loss": 0.0599, + "num_input_tokens_seen": 837009992, + "step": 4876 + }, + { + "epoch": 55.37110481586402, + "loss": 0.0744498074054718, + "loss_ce": 0.00015476541011594236, + "loss_iou": 0.353515625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 837009992, + "step": 4876 + }, + { + "epoch": 55.38243626062323, + "grad_norm": 5.282860737524898, + "learning_rate": 5e-06, + "loss": 0.0529, + "num_input_tokens_seen": 837181552, + "step": 4877 + }, + { + "epoch": 55.38243626062323, + "loss": 0.031676217913627625, + "loss_ce": 9.052344830706716e-05, + "loss_iou": 0.359375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 837181552, + "step": 4877 + }, + { + "epoch": 55.39376770538244, + "grad_norm": 68.58655013328075, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 837353896, + "step": 4878 + }, + { + "epoch": 55.39376770538244, + "loss": 0.05661048740148544, + "loss_ce": 6.141711492091417e-05, + "loss_iou": 0.41796875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 837353896, + "step": 4878 + }, + { + "epoch": 55.405099150141645, + "grad_norm": 4.794209646326477, + "learning_rate": 5e-06, + "loss": 0.077, + "num_input_tokens_seen": 837526036, + "step": 4879 + }, + { + "epoch": 55.405099150141645, + "loss": 0.04730220139026642, + "loss_ce": 0.00010676590318325907, + "loss_iou": 0.267578125, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 837526036, + "step": 4879 + }, + { + "epoch": 55.41643059490085, + "grad_norm": 3.983621763867161, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 837697824, + "step": 4880 + }, + { + "epoch": 55.41643059490085, + "loss": 0.03588370233774185, + "loss_ce": 0.00013235759979579598, + "loss_iou": 0.466796875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 837697824, + "step": 4880 + }, + { + "epoch": 55.42776203966006, + "grad_norm": 2.556705177928445, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 837868728, + "step": 4881 + }, + { + "epoch": 55.42776203966006, + "loss": 0.06567414104938507, + "loss_ce": 0.00016816367860883474, + "loss_iou": 0.4296875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 837868728, + "step": 4881 + }, + { + "epoch": 55.43909348441926, + "grad_norm": 2.214142232023204, + "learning_rate": 5e-06, + "loss": 0.0785, + "num_input_tokens_seen": 838040748, + "step": 4882 + }, + { + "epoch": 55.43909348441926, + "loss": 0.05788538604974747, + "loss_ce": 5.457275256048888e-05, + "loss_iou": 0.271484375, + "loss_num": 0.01153564453125, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 838040748, + "step": 4882 + }, + { + "epoch": 55.45042492917847, + "grad_norm": 2.668098887875958, + "learning_rate": 5e-06, + "loss": 0.065, + "num_input_tokens_seen": 838210412, + "step": 4883 + }, + { + "epoch": 55.45042492917847, + "loss": 0.09227945655584335, + "loss_ce": 8.585143950767815e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 838210412, + "step": 4883 + }, + { + "epoch": 55.461756373937675, + "grad_norm": 2.324162073048407, + "learning_rate": 5e-06, + "loss": 0.0609, + "num_input_tokens_seen": 838381936, + "step": 4884 + }, + { + "epoch": 55.461756373937675, + "loss": 0.07587769627571106, + "loss_ce": 0.0003314313944429159, + "loss_iou": 0.3046875, + "loss_num": 0.01507568359375, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 838381936, + "step": 4884 + }, + { + "epoch": 55.47308781869688, + "grad_norm": 3.295053985404213, + "learning_rate": 5e-06, + "loss": 0.0881, + "num_input_tokens_seen": 838553800, + "step": 4885 + }, + { + "epoch": 55.47308781869688, + "loss": 0.11731519550085068, + "loss_ce": 0.00011243583867326379, + "loss_iou": 0.40625, + "loss_num": 0.0234375, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 838553800, + "step": 4885 + }, + { + "epoch": 55.48441926345609, + "grad_norm": 2.091044967669853, + "learning_rate": 5e-06, + "loss": 0.075, + "num_input_tokens_seen": 838725868, + "step": 4886 + }, + { + "epoch": 55.48441926345609, + "loss": 0.052465178072452545, + "loss_ce": 0.00018093621474690735, + "loss_iou": 0.33984375, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 838725868, + "step": 4886 + }, + { + "epoch": 55.4957507082153, + "grad_norm": 5.63296129853245, + "learning_rate": 5e-06, + "loss": 0.0523, + "num_input_tokens_seen": 838897860, + "step": 4887 + }, + { + "epoch": 55.4957507082153, + "loss": 0.09221188724040985, + "loss_ce": 0.00010983859829138964, + "loss_iou": 0.3359375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 838897860, + "step": 4887 + }, + { + "epoch": 55.507082152974505, + "grad_norm": 3.6102645727168037, + "learning_rate": 5e-06, + "loss": 0.0426, + "num_input_tokens_seen": 839069112, + "step": 4888 + }, + { + "epoch": 55.507082152974505, + "loss": 0.044318750500679016, + "loss_ce": 5.300229895510711e-05, + "loss_iou": 0.482421875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 839069112, + "step": 4888 + }, + { + "epoch": 55.51841359773371, + "grad_norm": 4.135972129451464, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 839239864, + "step": 4889 + }, + { + "epoch": 55.51841359773371, + "loss": 0.10798877477645874, + "loss_ce": 0.0002617257705423981, + "loss_iou": 0.546875, + "loss_num": 0.021484375, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 839239864, + "step": 4889 + }, + { + "epoch": 55.52974504249292, + "grad_norm": 4.650632633301889, + "learning_rate": 5e-06, + "loss": 0.0674, + "num_input_tokens_seen": 839411936, + "step": 4890 + }, + { + "epoch": 55.52974504249292, + "loss": 0.10204128921031952, + "loss_ce": 0.00023463831166736782, + "loss_iou": 0.388671875, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 839411936, + "step": 4890 + }, + { + "epoch": 55.54107648725213, + "grad_norm": 3.5102093662092635, + "learning_rate": 5e-06, + "loss": 0.0504, + "num_input_tokens_seen": 839583056, + "step": 4891 + }, + { + "epoch": 55.54107648725213, + "loss": 0.038575440645217896, + "loss_ce": 0.00019958653138019145, + "loss_iou": 0.63671875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 839583056, + "step": 4891 + }, + { + "epoch": 55.552407932011334, + "grad_norm": 5.496362906048299, + "learning_rate": 5e-06, + "loss": 0.0678, + "num_input_tokens_seen": 839753408, + "step": 4892 + }, + { + "epoch": 55.552407932011334, + "loss": 0.0450739860534668, + "loss_ce": 9.107418009079993e-05, + "loss_iou": 0.44921875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 839753408, + "step": 4892 + }, + { + "epoch": 55.563739376770535, + "grad_norm": 4.258196540781646, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 839925240, + "step": 4893 + }, + { + "epoch": 55.563739376770535, + "loss": 0.10558530688285828, + "loss_ce": 8.604592585470527e-05, + "loss_iou": 0.486328125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 839925240, + "step": 4893 + }, + { + "epoch": 55.57507082152974, + "grad_norm": 4.403422192478105, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 840096608, + "step": 4894 + }, + { + "epoch": 55.57507082152974, + "loss": 0.033832140266895294, + "loss_ce": 0.00011021541286027059, + "loss_iou": 0.5, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 840096608, + "step": 4894 + }, + { + "epoch": 55.58640226628895, + "grad_norm": 3.4135292072406562, + "learning_rate": 5e-06, + "loss": 0.0846, + "num_input_tokens_seen": 840266596, + "step": 4895 + }, + { + "epoch": 55.58640226628895, + "loss": 0.17693571746349335, + "loss_ce": 0.00014738901518285275, + "loss_iou": 0.2412109375, + "loss_num": 0.035400390625, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 840266596, + "step": 4895 + }, + { + "epoch": 55.59773371104816, + "grad_norm": 2.9171624453334646, + "learning_rate": 5e-06, + "loss": 0.0732, + "num_input_tokens_seen": 840438620, + "step": 4896 + }, + { + "epoch": 55.59773371104816, + "loss": 0.06255581974983215, + "loss_ce": 7.10822205292061e-05, + "loss_iou": 0.41015625, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 840438620, + "step": 4896 + }, + { + "epoch": 55.609065155807365, + "grad_norm": 2.7276004161517036, + "learning_rate": 5e-06, + "loss": 0.043, + "num_input_tokens_seen": 840608808, + "step": 4897 + }, + { + "epoch": 55.609065155807365, + "loss": 0.028982864692807198, + "loss_ce": 6.74590264679864e-05, + "loss_iou": 0.439453125, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 840608808, + "step": 4897 + }, + { + "epoch": 55.62039660056657, + "grad_norm": 3.6868888325986107, + "learning_rate": 5e-06, + "loss": 0.0643, + "num_input_tokens_seen": 840779572, + "step": 4898 + }, + { + "epoch": 55.62039660056657, + "loss": 0.092755526304245, + "loss_ce": 7.364529301412404e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 840779572, + "step": 4898 + }, + { + "epoch": 55.63172804532578, + "grad_norm": 6.472887507062862, + "learning_rate": 5e-06, + "loss": 0.1001, + "num_input_tokens_seen": 840950552, + "step": 4899 + }, + { + "epoch": 55.63172804532578, + "loss": 0.05668313801288605, + "loss_ce": 8.82900640135631e-05, + "loss_iou": 0.45703125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 840950552, + "step": 4899 + }, + { + "epoch": 55.64305949008499, + "grad_norm": 3.384872877294457, + "learning_rate": 5e-06, + "loss": 0.0573, + "num_input_tokens_seen": 841122804, + "step": 4900 + }, + { + "epoch": 55.64305949008499, + "loss": 0.07760881632566452, + "loss_ce": 0.0001246868196176365, + "loss_iou": 0.3828125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 841122804, + "step": 4900 + }, + { + "epoch": 55.654390934844194, + "grad_norm": 4.040554348783327, + "learning_rate": 5e-06, + "loss": 0.0503, + "num_input_tokens_seen": 841292968, + "step": 4901 + }, + { + "epoch": 55.654390934844194, + "loss": 0.04645824432373047, + "loss_ce": 8.678375161252916e-05, + "loss_iou": 0.51171875, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 841292968, + "step": 4901 + }, + { + "epoch": 55.6657223796034, + "grad_norm": 13.51059684780058, + "learning_rate": 5e-06, + "loss": 0.09, + "num_input_tokens_seen": 841463308, + "step": 4902 + }, + { + "epoch": 55.6657223796034, + "loss": 0.07271280884742737, + "loss_ce": 8.09723132988438e-05, + "loss_iou": 0.453125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 841463308, + "step": 4902 + }, + { + "epoch": 55.67705382436261, + "grad_norm": 3.770452936213153, + "learning_rate": 5e-06, + "loss": 0.0572, + "num_input_tokens_seen": 841635424, + "step": 4903 + }, + { + "epoch": 55.67705382436261, + "loss": 0.04690537601709366, + "loss_ce": 0.0002134811511496082, + "loss_iou": 0.56640625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 841635424, + "step": 4903 + }, + { + "epoch": 55.68838526912181, + "grad_norm": 4.028212939468325, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 841807568, + "step": 4904 + }, + { + "epoch": 55.68838526912181, + "loss": 0.032003916800022125, + "loss_ce": 0.0002198598231188953, + "loss_iou": 0.4609375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 841807568, + "step": 4904 + }, + { + "epoch": 55.69971671388102, + "grad_norm": 5.4954994365558205, + "learning_rate": 5e-06, + "loss": 0.0781, + "num_input_tokens_seen": 841979408, + "step": 4905 + }, + { + "epoch": 55.69971671388102, + "loss": 0.07235094904899597, + "loss_ce": 0.00016161345411092043, + "loss_iou": 0.5390625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 841979408, + "step": 4905 + }, + { + "epoch": 55.711048158640224, + "grad_norm": 3.7626026912339396, + "learning_rate": 5e-06, + "loss": 0.0538, + "num_input_tokens_seen": 842151028, + "step": 4906 + }, + { + "epoch": 55.711048158640224, + "loss": 0.039993807673454285, + "loss_ce": 4.6300912799779326e-05, + "loss_iou": 0.44921875, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 842151028, + "step": 4906 + }, + { + "epoch": 55.72237960339943, + "grad_norm": 3.406112451312235, + "learning_rate": 5e-06, + "loss": 0.0696, + "num_input_tokens_seen": 842323228, + "step": 4907 + }, + { + "epoch": 55.72237960339943, + "loss": 0.09761609137058258, + "loss_ce": 0.00011242822074564174, + "loss_iou": 0.62109375, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 842323228, + "step": 4907 + }, + { + "epoch": 55.73371104815864, + "grad_norm": 3.9424127173175454, + "learning_rate": 5e-06, + "loss": 0.0886, + "num_input_tokens_seen": 842495116, + "step": 4908 + }, + { + "epoch": 55.73371104815864, + "loss": 0.043002791702747345, + "loss_ce": 9.507859067525715e-05, + "loss_iou": 0.421875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 842495116, + "step": 4908 + }, + { + "epoch": 55.74504249291785, + "grad_norm": 4.796925740731655, + "learning_rate": 5e-06, + "loss": 0.0903, + "num_input_tokens_seen": 842665972, + "step": 4909 + }, + { + "epoch": 55.74504249291785, + "loss": 0.10123119503259659, + "loss_ce": 0.00018749914306681603, + "loss_iou": 0.546875, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 842665972, + "step": 4909 + }, + { + "epoch": 55.756373937677054, + "grad_norm": 3.8954137003505065, + "learning_rate": 5e-06, + "loss": 0.092, + "num_input_tokens_seen": 842836000, + "step": 4910 + }, + { + "epoch": 55.756373937677054, + "loss": 0.060989804565906525, + "loss_ce": 0.00018353178165853024, + "loss_iou": 0.326171875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 842836000, + "step": 4910 + }, + { + "epoch": 55.76770538243626, + "grad_norm": 3.424275686345672, + "learning_rate": 5e-06, + "loss": 0.0741, + "num_input_tokens_seen": 843007948, + "step": 4911 + }, + { + "epoch": 55.76770538243626, + "loss": 0.1410728096961975, + "loss_ce": 6.634854071307927e-05, + "loss_iou": 0.453125, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 843007948, + "step": 4911 + }, + { + "epoch": 55.77903682719547, + "grad_norm": 3.9696601319207447, + "learning_rate": 5e-06, + "loss": 0.0578, + "num_input_tokens_seen": 843179012, + "step": 4912 + }, + { + "epoch": 55.77903682719547, + "loss": 0.05479143187403679, + "loss_ce": 0.0001497048360761255, + "loss_iou": 0.4140625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 843179012, + "step": 4912 + }, + { + "epoch": 55.79036827195468, + "grad_norm": 4.418036715516445, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 843349240, + "step": 4913 + }, + { + "epoch": 55.79036827195468, + "loss": 0.0803055465221405, + "loss_ce": 0.00012060943117830902, + "loss_iou": 0.3828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 843349240, + "step": 4913 + }, + { + "epoch": 55.801699716713884, + "grad_norm": 3.85633454409002, + "learning_rate": 5e-06, + "loss": 0.0575, + "num_input_tokens_seen": 843520008, + "step": 4914 + }, + { + "epoch": 55.801699716713884, + "loss": 0.04541558027267456, + "loss_ce": 5.1203820476075634e-05, + "loss_iou": 0.51171875, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 843520008, + "step": 4914 + }, + { + "epoch": 55.81303116147309, + "grad_norm": 4.506391250644138, + "learning_rate": 5e-06, + "loss": 0.1072, + "num_input_tokens_seen": 843689300, + "step": 4915 + }, + { + "epoch": 55.81303116147309, + "loss": 0.06844595074653625, + "loss_ce": 0.0001323487376794219, + "loss_iou": 0.185546875, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 843689300, + "step": 4915 + }, + { + "epoch": 55.82436260623229, + "grad_norm": 2.9060615384027937, + "learning_rate": 5e-06, + "loss": 0.054, + "num_input_tokens_seen": 843859248, + "step": 4916 + }, + { + "epoch": 55.82436260623229, + "loss": 0.03144977241754532, + "loss_ce": 0.00010821959585882723, + "loss_iou": 0.40625, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 843859248, + "step": 4916 + }, + { + "epoch": 55.8356940509915, + "grad_norm": 3.625623667593636, + "learning_rate": 5e-06, + "loss": 0.0621, + "num_input_tokens_seen": 844029808, + "step": 4917 + }, + { + "epoch": 55.8356940509915, + "loss": 0.06820227205753326, + "loss_ce": 5.6525452237110585e-05, + "loss_iou": 0.4375, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 844029808, + "step": 4917 + }, + { + "epoch": 55.84702549575071, + "grad_norm": 3.3309251540068745, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 844201496, + "step": 4918 + }, + { + "epoch": 55.84702549575071, + "loss": 0.05710037052631378, + "loss_ce": 0.00016982998931780457, + "loss_iou": 0.244140625, + "loss_num": 0.01141357421875, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 844201496, + "step": 4918 + }, + { + "epoch": 55.858356940509914, + "grad_norm": 3.278909537406917, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 844373532, + "step": 4919 + }, + { + "epoch": 55.858356940509914, + "loss": 0.052809786051511765, + "loss_ce": 6.0152528021717444e-05, + "loss_iou": 0.404296875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 844373532, + "step": 4919 + }, + { + "epoch": 55.86968838526912, + "grad_norm": 3.2660163344739854, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 844544464, + "step": 4920 + }, + { + "epoch": 55.86968838526912, + "loss": 0.059388354420661926, + "loss_ce": 7.74409927544184e-05, + "loss_iou": 0.515625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 844544464, + "step": 4920 + }, + { + "epoch": 55.88101983002833, + "grad_norm": 3.6625839864904806, + "learning_rate": 5e-06, + "loss": 0.0671, + "num_input_tokens_seen": 844716164, + "step": 4921 + }, + { + "epoch": 55.88101983002833, + "loss": 0.03279288113117218, + "loss_ce": 0.0002153678797185421, + "loss_iou": 0.41015625, + "loss_num": 0.00653076171875, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 844716164, + "step": 4921 + }, + { + "epoch": 55.892351274787536, + "grad_norm": 4.579621288372402, + "learning_rate": 5e-06, + "loss": 0.0958, + "num_input_tokens_seen": 844887928, + "step": 4922 + }, + { + "epoch": 55.892351274787536, + "loss": 0.0599120631814003, + "loss_ce": 9.760991088114679e-05, + "loss_iou": 0.53125, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 844887928, + "step": 4922 + }, + { + "epoch": 55.903682719546744, + "grad_norm": 3.7953442271450806, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 845059536, + "step": 4923 + }, + { + "epoch": 55.903682719546744, + "loss": 0.10065902769565582, + "loss_ce": 4.2575193219818175e-05, + "loss_iou": 0.51171875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 845059536, + "step": 4923 + }, + { + "epoch": 55.91501416430595, + "grad_norm": 4.039896304998453, + "learning_rate": 5e-06, + "loss": 0.0646, + "num_input_tokens_seen": 845230272, + "step": 4924 + }, + { + "epoch": 55.91501416430595, + "loss": 0.0445399172604084, + "loss_ce": 0.00010632298653945327, + "loss_iou": 0.4140625, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 845230272, + "step": 4924 + }, + { + "epoch": 55.92634560906516, + "grad_norm": 3.7475278478992404, + "learning_rate": 5e-06, + "loss": 0.0518, + "num_input_tokens_seen": 845400328, + "step": 4925 + }, + { + "epoch": 55.92634560906516, + "loss": 0.04840108007192612, + "loss_ce": 0.0006410708301700652, + "loss_iou": 0.5546875, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 845400328, + "step": 4925 + }, + { + "epoch": 55.93767705382436, + "grad_norm": 3.631795058849131, + "learning_rate": 5e-06, + "loss": 0.0661, + "num_input_tokens_seen": 845571388, + "step": 4926 + }, + { + "epoch": 55.93767705382436, + "loss": 0.05695248767733574, + "loss_ce": 6.772139749955386e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 845571388, + "step": 4926 + }, + { + "epoch": 55.94900849858357, + "grad_norm": 3.6403936228206923, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 845742144, + "step": 4927 + }, + { + "epoch": 55.94900849858357, + "loss": 0.0556352436542511, + "loss_ce": 0.00024584238417446613, + "loss_iou": 0.349609375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 845742144, + "step": 4927 + }, + { + "epoch": 55.960339943342774, + "grad_norm": 3.3421383799749287, + "learning_rate": 5e-06, + "loss": 0.0481, + "num_input_tokens_seen": 845912708, + "step": 4928 + }, + { + "epoch": 55.960339943342774, + "loss": 0.03565491735935211, + "loss_ce": 5.615890404442325e-05, + "loss_iou": 0.44921875, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 845912708, + "step": 4928 + }, + { + "epoch": 55.97167138810198, + "grad_norm": 3.5515769376924964, + "learning_rate": 5e-06, + "loss": 0.0472, + "num_input_tokens_seen": 846081776, + "step": 4929 + }, + { + "epoch": 55.97167138810198, + "loss": 0.03801915794610977, + "loss_ce": 5.529045301955193e-05, + "loss_iou": 0.435546875, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 846081776, + "step": 4929 + }, + { + "epoch": 55.98300283286119, + "grad_norm": 3.38914912074444, + "learning_rate": 5e-06, + "loss": 0.0713, + "num_input_tokens_seen": 846253676, + "step": 4930 + }, + { + "epoch": 55.98300283286119, + "loss": 0.06063020974397659, + "loss_ce": 0.0002054050419246778, + "loss_iou": 0.357421875, + "loss_num": 0.0120849609375, + "loss_xval": 0.060546875, + "num_input_tokens_seen": 846253676, + "step": 4930 + }, + { + "epoch": 55.994334277620396, + "grad_norm": 2.964755871467462, + "learning_rate": 5e-06, + "loss": 0.0669, + "num_input_tokens_seen": 846424632, + "step": 4931 + }, + { + "epoch": 55.994334277620396, + "loss": 0.11273901909589767, + "loss_ce": 8.337547478731722e-05, + "loss_iou": 0.43359375, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 846424632, + "step": 4931 + }, + { + "epoch": 55.994334277620396, + "loss": 0.0826747789978981, + "loss_ce": 4.843393253395334e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0166015625, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 846509616, + "step": 4931 + }, + { + "epoch": 56.005665722379604, + "grad_norm": 3.3536091692495864, + "learning_rate": 5e-06, + "loss": 0.0781, + "num_input_tokens_seen": 846595100, + "step": 4932 + }, + { + "epoch": 56.005665722379604, + "loss": 0.05322595313191414, + "loss_ce": 0.00015588558744639158, + "loss_iou": 0.384765625, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 846595100, + "step": 4932 + }, + { + "epoch": 56.01699716713881, + "grad_norm": 3.7673356202276587, + "learning_rate": 5e-06, + "loss": 0.0774, + "num_input_tokens_seen": 846766744, + "step": 4933 + }, + { + "epoch": 56.01699716713881, + "loss": 0.05581562966108322, + "loss_ce": 0.0002888973103836179, + "loss_iou": 0.546875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 846766744, + "step": 4933 + }, + { + "epoch": 56.02832861189802, + "grad_norm": 4.289372405513039, + "learning_rate": 5e-06, + "loss": 0.0478, + "num_input_tokens_seen": 846936064, + "step": 4934 + }, + { + "epoch": 56.02832861189802, + "loss": 0.07163050025701523, + "loss_ce": 9.729446901474148e-05, + "loss_iou": 0.3515625, + "loss_num": 0.01434326171875, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 846936064, + "step": 4934 + }, + { + "epoch": 56.039660056657226, + "grad_norm": 4.189385506432874, + "learning_rate": 5e-06, + "loss": 0.0931, + "num_input_tokens_seen": 847106620, + "step": 4935 + }, + { + "epoch": 56.039660056657226, + "loss": 0.10359985381364822, + "loss_ce": 8.422660903306678e-05, + "loss_iou": 0.4140625, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 847106620, + "step": 4935 + }, + { + "epoch": 56.05099150141643, + "grad_norm": 5.283206833997654, + "learning_rate": 5e-06, + "loss": 0.0695, + "num_input_tokens_seen": 847278548, + "step": 4936 + }, + { + "epoch": 56.05099150141643, + "loss": 0.08981213718652725, + "loss_ce": 0.00012097464059479535, + "loss_iou": 0.396484375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 847278548, + "step": 4936 + }, + { + "epoch": 56.06232294617564, + "grad_norm": 3.5717911775909035, + "learning_rate": 5e-06, + "loss": 0.0626, + "num_input_tokens_seen": 847450156, + "step": 4937 + }, + { + "epoch": 56.06232294617564, + "loss": 0.050839874893426895, + "loss_ce": 0.00011966139572905377, + "loss_iou": 0.408203125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 847450156, + "step": 4937 + }, + { + "epoch": 56.07365439093484, + "grad_norm": 3.3854271070409374, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 847622068, + "step": 4938 + }, + { + "epoch": 56.07365439093484, + "loss": 0.09919365495443344, + "loss_ce": 7.255817763507366e-05, + "loss_iou": 0.431640625, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 847622068, + "step": 4938 + }, + { + "epoch": 56.08498583569405, + "grad_norm": 4.114400871868835, + "learning_rate": 5e-06, + "loss": 0.0557, + "num_input_tokens_seen": 847793616, + "step": 4939 + }, + { + "epoch": 56.08498583569405, + "loss": 0.06629049777984619, + "loss_ce": 0.00011313016875647008, + "loss_iou": 0.4453125, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 847793616, + "step": 4939 + }, + { + "epoch": 56.096317280453256, + "grad_norm": 3.5425123253729733, + "learning_rate": 5e-06, + "loss": 0.0591, + "num_input_tokens_seen": 847962836, + "step": 4940 + }, + { + "epoch": 56.096317280453256, + "loss": 0.09083575010299683, + "loss_ce": 0.00025957508478313684, + "loss_iou": 0.375, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 847962836, + "step": 4940 + }, + { + "epoch": 56.107648725212464, + "grad_norm": 2.996206786367124, + "learning_rate": 5e-06, + "loss": 0.0509, + "num_input_tokens_seen": 848134244, + "step": 4941 + }, + { + "epoch": 56.107648725212464, + "loss": 0.031099818646907806, + "loss_ce": 7.869962428230792e-05, + "loss_iou": 0.3359375, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 848134244, + "step": 4941 + }, + { + "epoch": 56.11898016997167, + "grad_norm": 3.6048026164947617, + "learning_rate": 5e-06, + "loss": 0.0824, + "num_input_tokens_seen": 848305708, + "step": 4942 + }, + { + "epoch": 56.11898016997167, + "loss": 0.07235158234834671, + "loss_ce": 8.595604595029727e-05, + "loss_iou": 0.498046875, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 848305708, + "step": 4942 + }, + { + "epoch": 56.13031161473088, + "grad_norm": 3.328234090233558, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 848475716, + "step": 4943 + }, + { + "epoch": 56.13031161473088, + "loss": 0.04021462798118591, + "loss_ce": 5.349469211068936e-05, + "loss_iou": 0.07861328125, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 848475716, + "step": 4943 + }, + { + "epoch": 56.141643059490086, + "grad_norm": 4.080033657337501, + "learning_rate": 5e-06, + "loss": 0.0612, + "num_input_tokens_seen": 848646964, + "step": 4944 + }, + { + "epoch": 56.141643059490086, + "loss": 0.07201793789863586, + "loss_ce": 8.800456998869777e-05, + "loss_iou": 0.470703125, + "loss_num": 0.014404296875, + "loss_xval": 0.07177734375, + "num_input_tokens_seen": 848646964, + "step": 4944 + }, + { + "epoch": 56.15297450424929, + "grad_norm": 3.3278663990506483, + "learning_rate": 5e-06, + "loss": 0.0509, + "num_input_tokens_seen": 848818064, + "step": 4945 + }, + { + "epoch": 56.15297450424929, + "loss": 0.05643495172262192, + "loss_ce": 6.898325227666646e-05, + "loss_iou": 0.5625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 848818064, + "step": 4945 + }, + { + "epoch": 56.1643059490085, + "grad_norm": 2.662876391110361, + "learning_rate": 5e-06, + "loss": 0.0507, + "num_input_tokens_seen": 848990168, + "step": 4946 + }, + { + "epoch": 56.1643059490085, + "loss": 0.046533383429050446, + "loss_ce": 0.00011614571121754125, + "loss_iou": 0.41015625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 848990168, + "step": 4946 + }, + { + "epoch": 56.17563739376771, + "grad_norm": 3.098204345612474, + "learning_rate": 5e-06, + "loss": 0.0334, + "num_input_tokens_seen": 849162136, + "step": 4947 + }, + { + "epoch": 56.17563739376771, + "loss": 0.03562714904546738, + "loss_ce": 7.41688854759559e-05, + "loss_iou": 0.2255859375, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 849162136, + "step": 4947 + }, + { + "epoch": 56.186968838526916, + "grad_norm": 4.799208198369888, + "learning_rate": 5e-06, + "loss": 0.0835, + "num_input_tokens_seen": 849332956, + "step": 4948 + }, + { + "epoch": 56.186968838526916, + "loss": 0.13538876175880432, + "loss_ce": 7.38299495424144e-05, + "loss_iou": 0.44140625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 849332956, + "step": 4948 + }, + { + "epoch": 56.198300283286116, + "grad_norm": 3.9671172987701877, + "learning_rate": 5e-06, + "loss": 0.0597, + "num_input_tokens_seen": 849505204, + "step": 4949 + }, + { + "epoch": 56.198300283286116, + "loss": 0.0647658109664917, + "loss_ce": 5.328988845576532e-05, + "loss_iou": 0.515625, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 849505204, + "step": 4949 + }, + { + "epoch": 56.20963172804532, + "grad_norm": 3.900793327298166, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 849677564, + "step": 4950 + }, + { + "epoch": 56.20963172804532, + "loss": 0.042843788862228394, + "loss_ce": 8.866139978636056e-05, + "loss_iou": 0.3671875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 849677564, + "step": 4950 + }, + { + "epoch": 56.22096317280453, + "grad_norm": 3.839341613292569, + "learning_rate": 5e-06, + "loss": 0.0584, + "num_input_tokens_seen": 849849180, + "step": 4951 + }, + { + "epoch": 56.22096317280453, + "loss": 0.059122323989868164, + "loss_ce": 0.0001013303262880072, + "loss_iou": 0.4140625, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 849849180, + "step": 4951 + }, + { + "epoch": 56.23229461756374, + "grad_norm": 3.8769282289676754, + "learning_rate": 5e-06, + "loss": 0.0507, + "num_input_tokens_seen": 850019312, + "step": 4952 + }, + { + "epoch": 56.23229461756374, + "loss": 0.03659261763095856, + "loss_ce": 0.0001241089921677485, + "loss_iou": 0.345703125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 850019312, + "step": 4952 + }, + { + "epoch": 56.243626062322946, + "grad_norm": 3.4578204485483766, + "learning_rate": 5e-06, + "loss": 0.0577, + "num_input_tokens_seen": 850190856, + "step": 4953 + }, + { + "epoch": 56.243626062322946, + "loss": 0.10320930182933807, + "loss_ce": 4.4626533053815365e-05, + "loss_iou": 0.6640625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 850190856, + "step": 4953 + }, + { + "epoch": 56.25495750708215, + "grad_norm": 4.7885427532956575, + "learning_rate": 5e-06, + "loss": 0.0526, + "num_input_tokens_seen": 850363280, + "step": 4954 + }, + { + "epoch": 56.25495750708215, + "loss": 0.058364663273096085, + "loss_ce": 4.5572338422061875e-05, + "loss_iou": 0.3984375, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 850363280, + "step": 4954 + }, + { + "epoch": 56.26628895184136, + "grad_norm": 3.9535189843192984, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 850533712, + "step": 4955 + }, + { + "epoch": 56.26628895184136, + "loss": 0.13784638047218323, + "loss_ce": 2.901221341744531e-05, + "loss_iou": 0.4296875, + "loss_num": 0.027587890625, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 850533712, + "step": 4955 + }, + { + "epoch": 56.27762039660057, + "grad_norm": 4.733945619596806, + "learning_rate": 5e-06, + "loss": 0.0503, + "num_input_tokens_seen": 850705532, + "step": 4956 + }, + { + "epoch": 56.27762039660057, + "loss": 0.07580488920211792, + "loss_ce": 6.0261842008912936e-05, + "loss_iou": 0.369140625, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 850705532, + "step": 4956 + }, + { + "epoch": 56.288951841359776, + "grad_norm": 4.100497366778174, + "learning_rate": 5e-06, + "loss": 0.0698, + "num_input_tokens_seen": 850877292, + "step": 4957 + }, + { + "epoch": 56.288951841359776, + "loss": 0.04398246854543686, + "loss_ce": 9.819120168685913e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 850877292, + "step": 4957 + }, + { + "epoch": 56.30028328611898, + "grad_norm": 3.62200045473798, + "learning_rate": 5e-06, + "loss": 0.0659, + "num_input_tokens_seen": 851049008, + "step": 4958 + }, + { + "epoch": 56.30028328611898, + "loss": 0.05289608985185623, + "loss_ce": 7.016229210421443e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 851049008, + "step": 4958 + }, + { + "epoch": 56.31161473087819, + "grad_norm": 3.8524813387577854, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 851219144, + "step": 4959 + }, + { + "epoch": 56.31161473087819, + "loss": 0.07554268091917038, + "loss_ce": 0.00013374886475503445, + "loss_iou": 0.5, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 851219144, + "step": 4959 + }, + { + "epoch": 56.32294617563739, + "grad_norm": 3.4014691755529403, + "learning_rate": 5e-06, + "loss": 0.0592, + "num_input_tokens_seen": 851390180, + "step": 4960 + }, + { + "epoch": 56.32294617563739, + "loss": 0.041571419686079025, + "loss_ce": 0.00014380962238647044, + "loss_iou": 0.30078125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 851390180, + "step": 4960 + }, + { + "epoch": 56.3342776203966, + "grad_norm": 3.3987210190568984, + "learning_rate": 5e-06, + "loss": 0.0551, + "num_input_tokens_seen": 851561384, + "step": 4961 + }, + { + "epoch": 56.3342776203966, + "loss": 0.05235926806926727, + "loss_ce": 6.739788659615442e-05, + "loss_iou": 0.53515625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 851561384, + "step": 4961 + }, + { + "epoch": 56.345609065155806, + "grad_norm": 4.12563978245002, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 851732804, + "step": 4962 + }, + { + "epoch": 56.345609065155806, + "loss": 0.1039101779460907, + "loss_ce": 7.411520346067846e-05, + "loss_iou": 0.31640625, + "loss_num": 0.020751953125, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 851732804, + "step": 4962 + }, + { + "epoch": 56.35694050991501, + "grad_norm": 6.658687136813664, + "learning_rate": 5e-06, + "loss": 0.0672, + "num_input_tokens_seen": 851904452, + "step": 4963 + }, + { + "epoch": 56.35694050991501, + "loss": 0.10025132447481155, + "loss_ce": 9.26262655411847e-05, + "loss_iou": 0.43359375, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 851904452, + "step": 4963 + }, + { + "epoch": 56.36827195467422, + "grad_norm": 3.8907108610126087, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 852074612, + "step": 4964 + }, + { + "epoch": 56.36827195467422, + "loss": 0.05475816875696182, + "loss_ce": 4.0155638998840004e-05, + "loss_iou": 0.369140625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 852074612, + "step": 4964 + }, + { + "epoch": 56.37960339943343, + "grad_norm": 3.7545912140779767, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 852245048, + "step": 4965 + }, + { + "epoch": 56.37960339943343, + "loss": 0.1148814857006073, + "loss_ce": 0.00013538844359572977, + "loss_iou": 0.3984375, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 852245048, + "step": 4965 + }, + { + "epoch": 56.390934844192635, + "grad_norm": 3.1588256694801804, + "learning_rate": 5e-06, + "loss": 0.0661, + "num_input_tokens_seen": 852415672, + "step": 4966 + }, + { + "epoch": 56.390934844192635, + "loss": 0.042692095041275024, + "loss_ce": 8.955708472058177e-05, + "loss_iou": 0.3984375, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 852415672, + "step": 4966 + }, + { + "epoch": 56.40226628895184, + "grad_norm": 5.321244010620065, + "learning_rate": 5e-06, + "loss": 0.0682, + "num_input_tokens_seen": 852586424, + "step": 4967 + }, + { + "epoch": 56.40226628895184, + "loss": 0.036578815430402756, + "loss_ce": 7.979491783771664e-05, + "loss_iou": 0.189453125, + "loss_num": 0.007293701171875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 852586424, + "step": 4967 + }, + { + "epoch": 56.41359773371105, + "grad_norm": 5.584262332178128, + "learning_rate": 5e-06, + "loss": 0.0806, + "num_input_tokens_seen": 852757076, + "step": 4968 + }, + { + "epoch": 56.41359773371105, + "loss": 0.03489917516708374, + "loss_ce": 4.8100184358190745e-05, + "loss_iou": 0.255859375, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 852757076, + "step": 4968 + }, + { + "epoch": 56.42492917847026, + "grad_norm": 3.3172669578355714, + "learning_rate": 5e-06, + "loss": 0.0629, + "num_input_tokens_seen": 852928820, + "step": 4969 + }, + { + "epoch": 56.42492917847026, + "loss": 0.03755845129489899, + "loss_ce": 8.286676165880635e-05, + "loss_iou": 0.3828125, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 852928820, + "step": 4969 + }, + { + "epoch": 56.436260623229465, + "grad_norm": 3.3188128448066143, + "learning_rate": 5e-06, + "loss": 0.064, + "num_input_tokens_seen": 853098516, + "step": 4970 + }, + { + "epoch": 56.436260623229465, + "loss": 0.06092609092593193, + "loss_ce": 5.8779402024811134e-05, + "loss_iou": 0.404296875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 853098516, + "step": 4970 + }, + { + "epoch": 56.447592067988666, + "grad_norm": 3.4500553346177925, + "learning_rate": 5e-06, + "loss": 0.0969, + "num_input_tokens_seen": 853268508, + "step": 4971 + }, + { + "epoch": 56.447592067988666, + "loss": 0.0780632346868515, + "loss_ce": 2.9783921490889043e-05, + "loss_iou": 0.306640625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 853268508, + "step": 4971 + }, + { + "epoch": 56.45892351274787, + "grad_norm": 2.636503485244163, + "learning_rate": 5e-06, + "loss": 0.0524, + "num_input_tokens_seen": 853439300, + "step": 4972 + }, + { + "epoch": 56.45892351274787, + "loss": 0.050229206681251526, + "loss_ce": 4.305325273890048e-05, + "loss_iou": 0.349609375, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 853439300, + "step": 4972 + }, + { + "epoch": 56.47025495750708, + "grad_norm": 3.111984902578889, + "learning_rate": 5e-06, + "loss": 0.0622, + "num_input_tokens_seen": 853611144, + "step": 4973 + }, + { + "epoch": 56.47025495750708, + "loss": 0.08155134320259094, + "loss_ce": 8.466400322504342e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0162353515625, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 853611144, + "step": 4973 + }, + { + "epoch": 56.48158640226629, + "grad_norm": 3.624977827208369, + "learning_rate": 5e-06, + "loss": 0.0491, + "num_input_tokens_seen": 853783024, + "step": 4974 + }, + { + "epoch": 56.48158640226629, + "loss": 0.03524838387966156, + "loss_ce": 9.213176235789433e-05, + "loss_iou": 0.5078125, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 853783024, + "step": 4974 + }, + { + "epoch": 56.492917847025495, + "grad_norm": 2.9029984017162103, + "learning_rate": 5e-06, + "loss": 0.0444, + "num_input_tokens_seen": 853954680, + "step": 4975 + }, + { + "epoch": 56.492917847025495, + "loss": 0.035742372274398804, + "loss_ce": 6.732695328537375e-05, + "loss_iou": 0.51953125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 853954680, + "step": 4975 + }, + { + "epoch": 56.5042492917847, + "grad_norm": 3.3800031713130623, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 854122288, + "step": 4976 + }, + { + "epoch": 56.5042492917847, + "loss": 0.03584957867860794, + "loss_ce": 9.82389465207234e-05, + "loss_iou": 0.328125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 854122288, + "step": 4976 + }, + { + "epoch": 56.51558073654391, + "grad_norm": 4.575082251332415, + "learning_rate": 5e-06, + "loss": 0.0592, + "num_input_tokens_seen": 854294344, + "step": 4977 + }, + { + "epoch": 56.51558073654391, + "loss": 0.05696126073598862, + "loss_ce": 4.597748556989245e-05, + "loss_iou": 0.48046875, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 854294344, + "step": 4977 + }, + { + "epoch": 56.52691218130312, + "grad_norm": 3.288662489137845, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 854466328, + "step": 4978 + }, + { + "epoch": 56.52691218130312, + "loss": 0.047406382858753204, + "loss_ce": 0.0005161237204447389, + "loss_iou": 0.4296875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 854466328, + "step": 4978 + }, + { + "epoch": 56.538243626062325, + "grad_norm": 3.7791332314263353, + "learning_rate": 5e-06, + "loss": 0.0843, + "num_input_tokens_seen": 854638396, + "step": 4979 + }, + { + "epoch": 56.538243626062325, + "loss": 0.05304059758782387, + "loss_ce": 4.682278085965663e-05, + "loss_iou": 0.515625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 854638396, + "step": 4979 + }, + { + "epoch": 56.54957507082153, + "grad_norm": 3.3139414531224483, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 854809012, + "step": 4980 + }, + { + "epoch": 56.54957507082153, + "loss": 0.08319170027971268, + "loss_ce": 0.0002906979061663151, + "loss_iou": 0.30859375, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 854809012, + "step": 4980 + }, + { + "epoch": 56.56090651558074, + "grad_norm": 2.6589242281008043, + "learning_rate": 5e-06, + "loss": 0.1096, + "num_input_tokens_seen": 854979212, + "step": 4981 + }, + { + "epoch": 56.56090651558074, + "loss": 0.11985158175230026, + "loss_ce": 8.534132211934775e-05, + "loss_iou": 0.435546875, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 854979212, + "step": 4981 + }, + { + "epoch": 56.57223796033994, + "grad_norm": 2.946845232078595, + "learning_rate": 5e-06, + "loss": 0.0545, + "num_input_tokens_seen": 855151068, + "step": 4982 + }, + { + "epoch": 56.57223796033994, + "loss": 0.07540497928857803, + "loss_ce": 5.7077835663221776e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 855151068, + "step": 4982 + }, + { + "epoch": 56.58356940509915, + "grad_norm": 3.252890628692685, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 855322920, + "step": 4983 + }, + { + "epoch": 56.58356940509915, + "loss": 0.03005708009004593, + "loss_ce": 7.356132118729874e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0059814453125, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 855322920, + "step": 4983 + }, + { + "epoch": 56.594900849858355, + "grad_norm": 3.4605470952618327, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 855494476, + "step": 4984 + }, + { + "epoch": 56.594900849858355, + "loss": 0.040171485394239426, + "loss_ce": 7.138719956856221e-05, + "loss_iou": 0.546875, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 855494476, + "step": 4984 + }, + { + "epoch": 56.60623229461756, + "grad_norm": 2.695984834480556, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 855665256, + "step": 4985 + }, + { + "epoch": 56.60623229461756, + "loss": 0.033694181591272354, + "loss_ce": 0.0002011396427405998, + "loss_iou": 0.51953125, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 855665256, + "step": 4985 + }, + { + "epoch": 56.61756373937677, + "grad_norm": 3.1628748981828, + "learning_rate": 5e-06, + "loss": 0.0866, + "num_input_tokens_seen": 855836092, + "step": 4986 + }, + { + "epoch": 56.61756373937677, + "loss": 0.06583873182535172, + "loss_ce": 7.335042755585164e-05, + "loss_iou": 0.4609375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 855836092, + "step": 4986 + }, + { + "epoch": 56.62889518413598, + "grad_norm": 5.016588449832622, + "learning_rate": 5e-06, + "loss": 0.0784, + "num_input_tokens_seen": 856008108, + "step": 4987 + }, + { + "epoch": 56.62889518413598, + "loss": 0.05184105411171913, + "loss_ce": 6.798441609134898e-05, + "loss_iou": 0.15625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 856008108, + "step": 4987 + }, + { + "epoch": 56.640226628895185, + "grad_norm": 3.2625502096198398, + "learning_rate": 5e-06, + "loss": 0.0772, + "num_input_tokens_seen": 856180012, + "step": 4988 + }, + { + "epoch": 56.640226628895185, + "loss": 0.043545953929424286, + "loss_ce": 7.366194040514529e-05, + "loss_iou": 0.357421875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 856180012, + "step": 4988 + }, + { + "epoch": 56.65155807365439, + "grad_norm": 3.1762156173856084, + "learning_rate": 5e-06, + "loss": 0.0642, + "num_input_tokens_seen": 856351032, + "step": 4989 + }, + { + "epoch": 56.65155807365439, + "loss": 0.10385333001613617, + "loss_ce": 9.355971997138113e-05, + "loss_iou": 0.5546875, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 856351032, + "step": 4989 + }, + { + "epoch": 56.6628895184136, + "grad_norm": 3.367729780434394, + "learning_rate": 5e-06, + "loss": 0.0609, + "num_input_tokens_seen": 856520400, + "step": 4990 + }, + { + "epoch": 56.6628895184136, + "loss": 0.04593687877058983, + "loss_ce": 0.0001299942086916417, + "loss_iou": 0.3046875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 856520400, + "step": 4990 + }, + { + "epoch": 56.67422096317281, + "grad_norm": 3.493860391116628, + "learning_rate": 5e-06, + "loss": 0.0725, + "num_input_tokens_seen": 856692220, + "step": 4991 + }, + { + "epoch": 56.67422096317281, + "loss": 0.06501106917858124, + "loss_ce": 5.4410804295912385e-05, + "loss_iou": 0.232421875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 856692220, + "step": 4991 + }, + { + "epoch": 56.685552407932015, + "grad_norm": 3.489201211541105, + "learning_rate": 5e-06, + "loss": 0.0544, + "num_input_tokens_seen": 856864100, + "step": 4992 + }, + { + "epoch": 56.685552407932015, + "loss": 0.0858091190457344, + "loss_ce": 6.998020398896188e-05, + "loss_iou": 0.3671875, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 856864100, + "step": 4992 + }, + { + "epoch": 56.696883852691215, + "grad_norm": 4.204124544672858, + "learning_rate": 5e-06, + "loss": 0.0788, + "num_input_tokens_seen": 857035852, + "step": 4993 + }, + { + "epoch": 56.696883852691215, + "loss": 0.053366005420684814, + "loss_ce": 0.00023490253079216927, + "loss_iou": 0.478515625, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 857035852, + "step": 4993 + }, + { + "epoch": 56.70821529745042, + "grad_norm": 3.886155176632275, + "learning_rate": 5e-06, + "loss": 0.0669, + "num_input_tokens_seen": 857205796, + "step": 4994 + }, + { + "epoch": 56.70821529745042, + "loss": 0.05728646367788315, + "loss_ce": 0.00014230137458071113, + "loss_iou": 0.296875, + "loss_num": 0.011474609375, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 857205796, + "step": 4994 + }, + { + "epoch": 56.71954674220963, + "grad_norm": 4.2344686707047305, + "learning_rate": 5e-06, + "loss": 0.0575, + "num_input_tokens_seen": 857376164, + "step": 4995 + }, + { + "epoch": 56.71954674220963, + "loss": 0.07924749702215195, + "loss_ce": 2.386427513556555e-05, + "loss_iou": 0.5859375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 857376164, + "step": 4995 + }, + { + "epoch": 56.73087818696884, + "grad_norm": 10.997457627545906, + "learning_rate": 5e-06, + "loss": 0.0941, + "num_input_tokens_seen": 857548108, + "step": 4996 + }, + { + "epoch": 56.73087818696884, + "loss": 0.06188258156180382, + "loss_ce": 0.00011500447726575658, + "loss_iou": 0.380859375, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 857548108, + "step": 4996 + }, + { + "epoch": 56.742209631728045, + "grad_norm": 3.6828098030553496, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 857719680, + "step": 4997 + }, + { + "epoch": 56.742209631728045, + "loss": 0.12832649052143097, + "loss_ce": 0.0003662837261799723, + "loss_iou": 0.45703125, + "loss_num": 0.025634765625, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 857719680, + "step": 4997 + }, + { + "epoch": 56.75354107648725, + "grad_norm": 3.3609040150071428, + "learning_rate": 5e-06, + "loss": 0.0749, + "num_input_tokens_seen": 857890152, + "step": 4998 + }, + { + "epoch": 56.75354107648725, + "loss": 0.06360742449760437, + "loss_ce": 3.9312657463597134e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 857890152, + "step": 4998 + }, + { + "epoch": 56.76487252124646, + "grad_norm": 3.610086961053706, + "learning_rate": 5e-06, + "loss": 0.0613, + "num_input_tokens_seen": 858061716, + "step": 4999 + }, + { + "epoch": 56.76487252124646, + "loss": 0.05188732594251633, + "loss_ce": 8.373636228498071e-05, + "loss_iou": 0.4140625, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 858061716, + "step": 4999 + }, + { + "epoch": 56.77620396600567, + "grad_norm": 3.2722959488567205, + "learning_rate": 5e-06, + "loss": 0.0599, + "num_input_tokens_seen": 858233480, + "step": 5000 + }, + { + "epoch": 56.77620396600567, + "eval_seeclick_CIoU": 0.5374038815498352, + "eval_seeclick_GIoU": 0.5343401730060577, + "eval_seeclick_IoU": 0.5729995667934418, + "eval_seeclick_MAE_all": 0.06275784783065319, + "eval_seeclick_MAE_h": 0.029513271525502205, + "eval_seeclick_MAE_w": 0.0937967300415039, + "eval_seeclick_MAE_x": 0.09877407923340797, + "eval_seeclick_MAE_y": 0.028947304002940655, + "eval_seeclick_NUM_probability": 0.999929815530777, + "eval_seeclick_inside_bbox": 0.8920454680919647, + "eval_seeclick_loss": 0.8652315139770508, + "eval_seeclick_loss_ce": 0.6316386461257935, + "eval_seeclick_loss_iou": 0.5211181640625, + "eval_seeclick_loss_num": 0.04671478271484375, + "eval_seeclick_loss_xval": 0.23345947265625, + "eval_seeclick_runtime": 68.9432, + "eval_seeclick_samples_per_second": 0.624, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 858233480, + "step": 5000 + }, + { + "epoch": 56.77620396600567, + "eval_icons_CIoU": 0.7602474093437195, + "eval_icons_GIoU": 0.7598395049571991, + "eval_icons_IoU": 0.772760659456253, + "eval_icons_MAE_all": 0.03311178460717201, + "eval_icons_MAE_h": 0.030518805608153343, + "eval_icons_MAE_w": 0.03753254935145378, + "eval_icons_MAE_x": 0.031239313073456287, + "eval_icons_MAE_y": 0.033156465739011765, + "eval_icons_NUM_probability": 0.9983026385307312, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.11245551705360413, + "eval_icons_loss_ce": 0.0024013028014451265, + "eval_icons_loss_iou": 0.5394287109375, + "eval_icons_loss_num": 0.020328521728515625, + "eval_icons_loss_xval": 0.1016693115234375, + "eval_icons_runtime": 78.7876, + "eval_icons_samples_per_second": 0.635, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 858233480, + "step": 5000 + }, + { + "epoch": 56.77620396600567, + "eval_screenspot_CIoU": 0.6211823423703512, + "eval_screenspot_GIoU": 0.6262733538945516, + "eval_screenspot_IoU": 0.6511431932449341, + "eval_screenspot_MAE_all": 0.06840701152880986, + "eval_screenspot_MAE_h": 0.033884188160300255, + "eval_screenspot_MAE_w": 0.12381209433078766, + "eval_screenspot_MAE_x": 0.08469116439421971, + "eval_screenspot_MAE_y": 0.031240610716243584, + "eval_screenspot_NUM_probability": 0.9999233881632487, + "eval_screenspot_inside_bbox": 0.8974999984105428, + "eval_screenspot_loss": 0.28809013962745667, + "eval_screenspot_loss_ce": 0.013613161475708088, + "eval_screenspot_loss_iou": 0.498046875, + "eval_screenspot_loss_num": 0.054570515950520836, + "eval_screenspot_loss_xval": 0.2729085286458333, + "eval_screenspot_runtime": 147.1347, + "eval_screenspot_samples_per_second": 0.605, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 858233480, + "step": 5000 + }, + { + "epoch": 56.77620396600567, + "eval_compot_CIoU": 0.8435192108154297, + "eval_compot_GIoU": 0.8384089767932892, + "eval_compot_IoU": 0.8579079210758209, + "eval_compot_MAE_all": 0.02599179372191429, + "eval_compot_MAE_h": 0.02002739906311035, + "eval_compot_MAE_w": 0.03220875933766365, + "eval_compot_MAE_x": 0.029916545376181602, + "eval_compot_MAE_y": 0.02181447669863701, + "eval_compot_NUM_probability": 0.9999608099460602, + "eval_compot_inside_bbox": 0.9565972089767456, + "eval_compot_loss": 0.08126524835824966, + "eval_compot_loss_ce": 5.167060953681357e-05, + "eval_compot_loss_iou": 0.50213623046875, + "eval_compot_loss_num": 0.014265060424804688, + "eval_compot_loss_xval": 0.07131195068359375, + "eval_compot_runtime": 82.3608, + "eval_compot_samples_per_second": 0.607, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 858233480, + "step": 5000 + }, + { + "epoch": 56.77620396600567, + "eval_custom_ui_MAE_all": 0.019572661723941565, + "eval_custom_ui_MAE_x": 0.031017981469631195, + "eval_custom_ui_MAE_y": 0.008127343142405152, + "eval_custom_ui_NUM_probability": 0.9998641312122345, + "eval_custom_ui_loss": 0.2092946618795395, + "eval_custom_ui_loss_ce": 0.11008366197347641, + "eval_custom_ui_loss_num": 0.018358230590820312, + "eval_custom_ui_loss_xval": 0.091827392578125, + "eval_custom_ui_runtime": 64.8486, + "eval_custom_ui_samples_per_second": 0.771, + "eval_custom_ui_steps_per_second": 0.031, + "num_input_tokens_seen": 858233480, + "step": 5000 + }, + { + "epoch": 56.77620396600567, + "loss": 0.25011128187179565, + "loss_ce": 0.13557879626750946, + "loss_iou": 0.0, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 858233480, + "step": 5000 + }, + { + "epoch": 56.787535410764875, + "grad_norm": 3.531880139320847, + "learning_rate": 5e-06, + "loss": 0.0727, + "num_input_tokens_seen": 858404320, + "step": 5001 + }, + { + "epoch": 56.787535410764875, + "loss": 0.06346552073955536, + "loss_ce": 6.525822391267866e-05, + "loss_iou": 0.328125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 858404320, + "step": 5001 + }, + { + "epoch": 56.79886685552408, + "grad_norm": 5.595290723932635, + "learning_rate": 5e-06, + "loss": 0.0723, + "num_input_tokens_seen": 858575740, + "step": 5002 + }, + { + "epoch": 56.79886685552408, + "loss": 0.06861157715320587, + "loss_ce": 9.96118105831556e-05, + "loss_iou": 0.369140625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 858575740, + "step": 5002 + }, + { + "epoch": 56.81019830028329, + "grad_norm": 3.731199623088894, + "learning_rate": 5e-06, + "loss": 0.0501, + "num_input_tokens_seen": 858745836, + "step": 5003 + }, + { + "epoch": 56.81019830028329, + "loss": 0.04174460470676422, + "loss_ce": 0.00010337021376471967, + "loss_iou": 0.546875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 858745836, + "step": 5003 + }, + { + "epoch": 56.82152974504249, + "grad_norm": 4.050315692478664, + "learning_rate": 5e-06, + "loss": 0.0516, + "num_input_tokens_seen": 858917428, + "step": 5004 + }, + { + "epoch": 56.82152974504249, + "loss": 0.04483303427696228, + "loss_ce": 0.00015530255041085184, + "loss_iou": 0.302734375, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 858917428, + "step": 5004 + }, + { + "epoch": 56.8328611898017, + "grad_norm": 4.079478613238688, + "learning_rate": 5e-06, + "loss": 0.0627, + "num_input_tokens_seen": 859086580, + "step": 5005 + }, + { + "epoch": 56.8328611898017, + "loss": 0.0678957849740982, + "loss_ce": 7.046588871162385e-05, + "loss_iou": 0.51953125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 859086580, + "step": 5005 + }, + { + "epoch": 56.844192634560905, + "grad_norm": 3.8407059310069003, + "learning_rate": 5e-06, + "loss": 0.053, + "num_input_tokens_seen": 859258396, + "step": 5006 + }, + { + "epoch": 56.844192634560905, + "loss": 0.07439401745796204, + "loss_ce": 5.3192015911918133e-05, + "loss_iou": 0.55859375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 859258396, + "step": 5006 + }, + { + "epoch": 56.85552407932011, + "grad_norm": 3.875403017964247, + "learning_rate": 5e-06, + "loss": 0.0965, + "num_input_tokens_seen": 859429420, + "step": 5007 + }, + { + "epoch": 56.85552407932011, + "loss": 0.10895470529794693, + "loss_ce": 8.324792725034058e-05, + "loss_iou": 0.439453125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 859429420, + "step": 5007 + }, + { + "epoch": 56.86685552407932, + "grad_norm": 2.989740407858414, + "learning_rate": 5e-06, + "loss": 0.0511, + "num_input_tokens_seen": 859601012, + "step": 5008 + }, + { + "epoch": 56.86685552407932, + "loss": 0.038098469376564026, + "loss_ce": 2.7790672902483493e-05, + "loss_iou": 0.37890625, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 859601012, + "step": 5008 + }, + { + "epoch": 56.87818696883853, + "grad_norm": 2.907420240435011, + "learning_rate": 5e-06, + "loss": 0.0892, + "num_input_tokens_seen": 859771720, + "step": 5009 + }, + { + "epoch": 56.87818696883853, + "loss": 0.060057006776332855, + "loss_ce": 0.0009597192984074354, + "loss_iou": 0.380859375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 859771720, + "step": 5009 + }, + { + "epoch": 56.889518413597735, + "grad_norm": 3.421985480487567, + "learning_rate": 5e-06, + "loss": 0.0406, + "num_input_tokens_seen": 859943672, + "step": 5010 + }, + { + "epoch": 56.889518413597735, + "loss": 0.024871021509170532, + "loss_ce": 6.022902380209416e-05, + "loss_iou": 0.365234375, + "loss_num": 0.004974365234375, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 859943672, + "step": 5010 + }, + { + "epoch": 56.90084985835694, + "grad_norm": 6.224721474708366, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 860114784, + "step": 5011 + }, + { + "epoch": 56.90084985835694, + "loss": 0.09442857652902603, + "loss_ce": 9.873868839349598e-05, + "loss_iou": 0.392578125, + "loss_num": 0.018798828125, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 860114784, + "step": 5011 + }, + { + "epoch": 56.91218130311615, + "grad_norm": 3.0724953723064012, + "learning_rate": 5e-06, + "loss": 0.0865, + "num_input_tokens_seen": 860286620, + "step": 5012 + }, + { + "epoch": 56.91218130311615, + "loss": 0.07430398464202881, + "loss_ce": 5.4709242249373347e-05, + "loss_iou": 0.30859375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 860286620, + "step": 5012 + }, + { + "epoch": 56.92351274787536, + "grad_norm": 3.575510818823419, + "learning_rate": 5e-06, + "loss": 0.049, + "num_input_tokens_seen": 860458152, + "step": 5013 + }, + { + "epoch": 56.92351274787536, + "loss": 0.03822827711701393, + "loss_ce": 3.5529570595826954e-05, + "loss_iou": 0.42578125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 860458152, + "step": 5013 + }, + { + "epoch": 56.934844192634564, + "grad_norm": 3.9125080639132617, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 860630088, + "step": 5014 + }, + { + "epoch": 56.934844192634564, + "loss": 0.0815986916422844, + "loss_ce": 0.00016253037028945982, + "loss_iou": 0.380859375, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 860630088, + "step": 5014 + }, + { + "epoch": 56.946175637393765, + "grad_norm": 3.9089289880742832, + "learning_rate": 5e-06, + "loss": 0.0641, + "num_input_tokens_seen": 860800904, + "step": 5015 + }, + { + "epoch": 56.946175637393765, + "loss": 0.05884288251399994, + "loss_ce": 3.5510980524122715e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 860800904, + "step": 5015 + }, + { + "epoch": 56.95750708215297, + "grad_norm": 4.312881125873777, + "learning_rate": 5e-06, + "loss": 0.063, + "num_input_tokens_seen": 860972328, + "step": 5016 + }, + { + "epoch": 56.95750708215297, + "loss": 0.03629220277070999, + "loss_ce": 0.000983365927822888, + "loss_iou": 0.39453125, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 860972328, + "step": 5016 + }, + { + "epoch": 56.96883852691218, + "grad_norm": 3.6306028847334093, + "learning_rate": 5e-06, + "loss": 0.0411, + "num_input_tokens_seen": 861144208, + "step": 5017 + }, + { + "epoch": 56.96883852691218, + "loss": 0.03210487216711044, + "loss_ce": 0.00012245196558069438, + "loss_iou": 0.43359375, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 861144208, + "step": 5017 + }, + { + "epoch": 56.98016997167139, + "grad_norm": 3.7858792747750267, + "learning_rate": 5e-06, + "loss": 0.0868, + "num_input_tokens_seen": 861315960, + "step": 5018 + }, + { + "epoch": 56.98016997167139, + "loss": 0.06369799375534058, + "loss_ce": 0.0001451359858037904, + "loss_iou": 0.31640625, + "loss_num": 0.01275634765625, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 861315960, + "step": 5018 + }, + { + "epoch": 56.991501416430594, + "grad_norm": 3.4509099325672494, + "learning_rate": 5e-06, + "loss": 0.073, + "num_input_tokens_seen": 861487568, + "step": 5019 + }, + { + "epoch": 56.991501416430594, + "loss": 0.09958165884017944, + "loss_ce": 9.43559207371436e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 861487568, + "step": 5019 + }, + { + "epoch": 56.991501416430594, + "loss": 0.09639353305101395, + "loss_ce": 0.00011057269148295745, + "loss_iou": 0.44921875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 861616320, + "step": 5019 + }, + { + "epoch": 57.0028328611898, + "grad_norm": 4.231669477564399, + "learning_rate": 5e-06, + "loss": 0.0665, + "num_input_tokens_seen": 861658344, + "step": 5020 + }, + { + "epoch": 57.0028328611898, + "loss": 0.06899544596672058, + "loss_ce": 5.623399920295924e-05, + "loss_iou": 0.328125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 861658344, + "step": 5020 + }, + { + "epoch": 57.01416430594901, + "grad_norm": 3.537246798207537, + "learning_rate": 5e-06, + "loss": 0.0449, + "num_input_tokens_seen": 861830260, + "step": 5021 + }, + { + "epoch": 57.01416430594901, + "loss": 0.0476437509059906, + "loss_ce": 0.0001583982666488737, + "loss_iou": 0.53515625, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 861830260, + "step": 5021 + }, + { + "epoch": 57.02549575070822, + "grad_norm": 3.5774645402285126, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 862002040, + "step": 5022 + }, + { + "epoch": 57.02549575070822, + "loss": 0.05057121813297272, + "loss_ce": 5.6996053899638355e-05, + "loss_iou": 0.375, + "loss_num": 0.01007080078125, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 862002040, + "step": 5022 + }, + { + "epoch": 57.036827195467424, + "grad_norm": 3.257504467543487, + "learning_rate": 5e-06, + "loss": 0.0546, + "num_input_tokens_seen": 862172640, + "step": 5023 + }, + { + "epoch": 57.036827195467424, + "loss": 0.07824724167585373, + "loss_ce": 0.0003663810493890196, + "loss_iou": 0.02001953125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 862172640, + "step": 5023 + }, + { + "epoch": 57.04815864022663, + "grad_norm": 3.65946973525709, + "learning_rate": 5e-06, + "loss": 0.0582, + "num_input_tokens_seen": 862344820, + "step": 5024 + }, + { + "epoch": 57.04815864022663, + "loss": 0.1127917617559433, + "loss_ce": 5.981946742394939e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0224609375, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 862344820, + "step": 5024 + }, + { + "epoch": 57.05949008498584, + "grad_norm": 3.803995886183146, + "learning_rate": 5e-06, + "loss": 0.0603, + "num_input_tokens_seen": 862516640, + "step": 5025 + }, + { + "epoch": 57.05949008498584, + "loss": 0.05670784413814545, + "loss_ce": 8.247532969107851e-05, + "loss_iou": 0.4453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 862516640, + "step": 5025 + }, + { + "epoch": 57.07082152974504, + "grad_norm": 3.931559119462654, + "learning_rate": 5e-06, + "loss": 0.0607, + "num_input_tokens_seen": 862688216, + "step": 5026 + }, + { + "epoch": 57.07082152974504, + "loss": 0.08011346310377121, + "loss_ce": 6.5854394051712e-05, + "loss_iou": 0.427734375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 862688216, + "step": 5026 + }, + { + "epoch": 57.08215297450425, + "grad_norm": 3.3828929426001366, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 862860044, + "step": 5027 + }, + { + "epoch": 57.08215297450425, + "loss": 0.05361152067780495, + "loss_ce": 0.00015998186427168548, + "loss_iou": 0.32421875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 862860044, + "step": 5027 + }, + { + "epoch": 57.093484419263454, + "grad_norm": 3.6592721193873565, + "learning_rate": 5e-06, + "loss": 0.0384, + "num_input_tokens_seen": 863031836, + "step": 5028 + }, + { + "epoch": 57.093484419263454, + "loss": 0.03794557601213455, + "loss_ce": 8.852001337800175e-05, + "loss_iou": 0.265625, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 863031836, + "step": 5028 + }, + { + "epoch": 57.10481586402266, + "grad_norm": 3.238188279584168, + "learning_rate": 5e-06, + "loss": 0.0386, + "num_input_tokens_seen": 863203528, + "step": 5029 + }, + { + "epoch": 57.10481586402266, + "loss": 0.05411809682846069, + "loss_ce": 7.146866119001061e-05, + "loss_iou": 0.328125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 863203528, + "step": 5029 + }, + { + "epoch": 57.11614730878187, + "grad_norm": 2.993312155786794, + "learning_rate": 5e-06, + "loss": 0.043, + "num_input_tokens_seen": 863374556, + "step": 5030 + }, + { + "epoch": 57.11614730878187, + "loss": 0.03923165798187256, + "loss_ce": 0.0001538988435640931, + "loss_iou": 0.44921875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 863374556, + "step": 5030 + }, + { + "epoch": 57.12747875354108, + "grad_norm": 7.261858447850296, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 863546088, + "step": 5031 + }, + { + "epoch": 57.12747875354108, + "loss": 0.08519823849201202, + "loss_ce": 5.41940753464587e-05, + "loss_iou": 0.1943359375, + "loss_num": 0.01708984375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 863546088, + "step": 5031 + }, + { + "epoch": 57.138810198300284, + "grad_norm": 3.2578107247106916, + "learning_rate": 5e-06, + "loss": 0.0472, + "num_input_tokens_seen": 863715504, + "step": 5032 + }, + { + "epoch": 57.138810198300284, + "loss": 0.038077641278505325, + "loss_ce": 0.0034401891753077507, + "loss_iou": 0.4140625, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 863715504, + "step": 5032 + }, + { + "epoch": 57.15014164305949, + "grad_norm": 3.212333335213238, + "learning_rate": 5e-06, + "loss": 0.0878, + "num_input_tokens_seen": 863886188, + "step": 5033 + }, + { + "epoch": 57.15014164305949, + "loss": 0.1206144168972969, + "loss_ce": 0.00011575574171729386, + "loss_iou": 0.40625, + "loss_num": 0.0240478515625, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 863886188, + "step": 5033 + }, + { + "epoch": 57.1614730878187, + "grad_norm": 2.941922916003128, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 864058052, + "step": 5034 + }, + { + "epoch": 57.1614730878187, + "loss": 0.03937336057424545, + "loss_ce": 0.00011249412636971101, + "loss_iou": 0.21484375, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 864058052, + "step": 5034 + }, + { + "epoch": 57.172804532577906, + "grad_norm": 2.6937968117440767, + "learning_rate": 5e-06, + "loss": 0.0548, + "num_input_tokens_seen": 864229764, + "step": 5035 + }, + { + "epoch": 57.172804532577906, + "loss": 0.05294235050678253, + "loss_ce": 4.012823774246499e-05, + "loss_iou": 0.322265625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 864229764, + "step": 5035 + }, + { + "epoch": 57.184135977337114, + "grad_norm": 3.672482680881072, + "learning_rate": 5e-06, + "loss": 0.0626, + "num_input_tokens_seen": 864401652, + "step": 5036 + }, + { + "epoch": 57.184135977337114, + "loss": 0.035760682076215744, + "loss_ce": 8.56327751534991e-05, + "loss_iou": 0.49609375, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 864401652, + "step": 5036 + }, + { + "epoch": 57.195467422096314, + "grad_norm": 3.7672993398283823, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 864573348, + "step": 5037 + }, + { + "epoch": 57.195467422096314, + "loss": 0.04081244766712189, + "loss_ce": 5.621928721666336e-05, + "loss_iou": 0.5703125, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 864573348, + "step": 5037 + }, + { + "epoch": 57.20679886685552, + "grad_norm": 4.266719969087465, + "learning_rate": 5e-06, + "loss": 0.1007, + "num_input_tokens_seen": 864745652, + "step": 5038 + }, + { + "epoch": 57.20679886685552, + "loss": 0.10175144672393799, + "loss_ce": 0.00012790832261089236, + "loss_iou": 0.5234375, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 864745652, + "step": 5038 + }, + { + "epoch": 57.21813031161473, + "grad_norm": 4.058714396900332, + "learning_rate": 5e-06, + "loss": 0.081, + "num_input_tokens_seen": 864916432, + "step": 5039 + }, + { + "epoch": 57.21813031161473, + "loss": 0.041860103607177734, + "loss_ce": 0.0002188688813475892, + "loss_iou": 0.359375, + "loss_num": 0.00830078125, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 864916432, + "step": 5039 + }, + { + "epoch": 57.22946175637394, + "grad_norm": 2.9693412464792606, + "learning_rate": 5e-06, + "loss": 0.0881, + "num_input_tokens_seen": 865088232, + "step": 5040 + }, + { + "epoch": 57.22946175637394, + "loss": 0.047646526247262955, + "loss_ce": 2.3845044779591262e-05, + "loss_iou": 0.189453125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 865088232, + "step": 5040 + }, + { + "epoch": 57.240793201133144, + "grad_norm": 3.1168266226033805, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 865260188, + "step": 5041 + }, + { + "epoch": 57.240793201133144, + "loss": 0.03999623283743858, + "loss_ce": 0.00012501695891842246, + "loss_iou": 0.41796875, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 865260188, + "step": 5041 + }, + { + "epoch": 57.25212464589235, + "grad_norm": 3.4797757072260778, + "learning_rate": 5e-06, + "loss": 0.0667, + "num_input_tokens_seen": 865430652, + "step": 5042 + }, + { + "epoch": 57.25212464589235, + "loss": 0.09753690659999847, + "loss_ce": 7.90227422839962e-05, + "loss_iou": 0.31640625, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 865430652, + "step": 5042 + }, + { + "epoch": 57.26345609065156, + "grad_norm": 4.862666830760718, + "learning_rate": 5e-06, + "loss": 0.0637, + "num_input_tokens_seen": 865602488, + "step": 5043 + }, + { + "epoch": 57.26345609065156, + "loss": 0.05271115154027939, + "loss_ce": 0.00012936675921082497, + "loss_iou": 0.416015625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 865602488, + "step": 5043 + }, + { + "epoch": 57.274787535410766, + "grad_norm": 4.020972927791236, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 865773428, + "step": 5044 + }, + { + "epoch": 57.274787535410766, + "loss": 0.06453670561313629, + "loss_ce": 2.254248465760611e-05, + "loss_iou": 0.3359375, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 865773428, + "step": 5044 + }, + { + "epoch": 57.286118980169974, + "grad_norm": 6.731758623376648, + "learning_rate": 5e-06, + "loss": 0.1125, + "num_input_tokens_seen": 865942904, + "step": 5045 + }, + { + "epoch": 57.286118980169974, + "loss": 0.12917166948318481, + "loss_ce": 0.00012808524479623884, + "loss_iou": 0.38671875, + "loss_num": 0.02587890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 865942904, + "step": 5045 + }, + { + "epoch": 57.29745042492918, + "grad_norm": 3.4722130684953743, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 866114776, + "step": 5046 + }, + { + "epoch": 57.29745042492918, + "loss": 0.044525355100631714, + "loss_ce": 9.175902232527733e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 866114776, + "step": 5046 + }, + { + "epoch": 57.30878186968839, + "grad_norm": 3.54394128499371, + "learning_rate": 5e-06, + "loss": 0.0813, + "num_input_tokens_seen": 866286588, + "step": 5047 + }, + { + "epoch": 57.30878186968839, + "loss": 0.06828071177005768, + "loss_ce": 7.392392581095919e-05, + "loss_iou": 0.37890625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 866286588, + "step": 5047 + }, + { + "epoch": 57.32011331444759, + "grad_norm": 4.8540153175909735, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 866457340, + "step": 5048 + }, + { + "epoch": 57.32011331444759, + "loss": 0.038827527314424515, + "loss_ce": 7.020324846962467e-05, + "loss_iou": 0.41015625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 866457340, + "step": 5048 + }, + { + "epoch": 57.331444759206796, + "grad_norm": 3.5363898314808067, + "learning_rate": 5e-06, + "loss": 0.0521, + "num_input_tokens_seen": 866627524, + "step": 5049 + }, + { + "epoch": 57.331444759206796, + "loss": 0.05021118372678757, + "loss_ce": 5.5545671784784645e-05, + "loss_iou": 0.5625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 866627524, + "step": 5049 + }, + { + "epoch": 57.342776203966004, + "grad_norm": 3.2470222610965322, + "learning_rate": 5e-06, + "loss": 0.0504, + "num_input_tokens_seen": 866799256, + "step": 5050 + }, + { + "epoch": 57.342776203966004, + "loss": 0.03605237603187561, + "loss_ce": 8.740978228161111e-05, + "loss_iou": 0.484375, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 866799256, + "step": 5050 + }, + { + "epoch": 57.35410764872521, + "grad_norm": 3.4114460330260985, + "learning_rate": 5e-06, + "loss": 0.0552, + "num_input_tokens_seen": 866969368, + "step": 5051 + }, + { + "epoch": 57.35410764872521, + "loss": 0.05440719425678253, + "loss_ce": 4.01291654270608e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 866969368, + "step": 5051 + }, + { + "epoch": 57.36543909348442, + "grad_norm": 5.446001217826163, + "learning_rate": 5e-06, + "loss": 0.064, + "num_input_tokens_seen": 867140612, + "step": 5052 + }, + { + "epoch": 57.36543909348442, + "loss": 0.06678560376167297, + "loss_ce": 0.00016573088942095637, + "loss_iou": 0.3984375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 867140612, + "step": 5052 + }, + { + "epoch": 57.376770538243626, + "grad_norm": 4.087480069299749, + "learning_rate": 5e-06, + "loss": 0.0912, + "num_input_tokens_seen": 867312368, + "step": 5053 + }, + { + "epoch": 57.376770538243626, + "loss": 0.08000503480434418, + "loss_ce": 6.423363811336458e-05, + "loss_iou": 0.54296875, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 867312368, + "step": 5053 + }, + { + "epoch": 57.388101983002834, + "grad_norm": 3.598545889710196, + "learning_rate": 5e-06, + "loss": 0.0918, + "num_input_tokens_seen": 867482772, + "step": 5054 + }, + { + "epoch": 57.388101983002834, + "loss": 0.13171756267547607, + "loss_ce": 0.00015627549146302044, + "loss_iou": 0.30078125, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 867482772, + "step": 5054 + }, + { + "epoch": 57.39943342776204, + "grad_norm": 3.1050709183802456, + "learning_rate": 5e-06, + "loss": 0.0559, + "num_input_tokens_seen": 867654648, + "step": 5055 + }, + { + "epoch": 57.39943342776204, + "loss": 0.046764541417360306, + "loss_ce": 0.00013367910287342966, + "loss_iou": 0.482421875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 867654648, + "step": 5055 + }, + { + "epoch": 57.41076487252125, + "grad_norm": 35.77623341430084, + "learning_rate": 5e-06, + "loss": 0.0834, + "num_input_tokens_seen": 867823208, + "step": 5056 + }, + { + "epoch": 57.41076487252125, + "loss": 0.03876463323831558, + "loss_ce": 6.834713713033125e-05, + "loss_iou": 0.40234375, + "loss_num": 0.00775146484375, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 867823208, + "step": 5056 + }, + { + "epoch": 57.422096317280456, + "grad_norm": 3.996609357243811, + "learning_rate": 5e-06, + "loss": 0.0788, + "num_input_tokens_seen": 867994520, + "step": 5057 + }, + { + "epoch": 57.422096317280456, + "loss": 0.10653401911258698, + "loss_ce": 2.766936995612923e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0213623046875, + "loss_xval": 0.1064453125, + "num_input_tokens_seen": 867994520, + "step": 5057 + }, + { + "epoch": 57.43342776203966, + "grad_norm": 3.6658206174973396, + "learning_rate": 5e-06, + "loss": 0.0692, + "num_input_tokens_seen": 868165596, + "step": 5058 + }, + { + "epoch": 57.43342776203966, + "loss": 0.1198868453502655, + "loss_ce": 9.008888446260244e-05, + "loss_iou": 0.369140625, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 868165596, + "step": 5058 + }, + { + "epoch": 57.444759206798864, + "grad_norm": 8.13883596178892, + "learning_rate": 5e-06, + "loss": 0.0757, + "num_input_tokens_seen": 868337372, + "step": 5059 + }, + { + "epoch": 57.444759206798864, + "loss": 0.04465852305293083, + "loss_ce": 0.0001333790278295055, + "loss_iou": 0.54296875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 868337372, + "step": 5059 + }, + { + "epoch": 57.45609065155807, + "grad_norm": 3.621310775553928, + "learning_rate": 5e-06, + "loss": 0.0533, + "num_input_tokens_seen": 868509060, + "step": 5060 + }, + { + "epoch": 57.45609065155807, + "loss": 0.05548835173249245, + "loss_ce": 0.0002973129157908261, + "loss_iou": 0.388671875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 868509060, + "step": 5060 + }, + { + "epoch": 57.46742209631728, + "grad_norm": 3.7753393288950763, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 868681164, + "step": 5061 + }, + { + "epoch": 57.46742209631728, + "loss": 0.03601599112153053, + "loss_ce": 3.576567905838601e-05, + "loss_iou": 0.2236328125, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 868681164, + "step": 5061 + }, + { + "epoch": 57.478753541076486, + "grad_norm": 4.391557164235902, + "learning_rate": 5e-06, + "loss": 0.0439, + "num_input_tokens_seen": 868852404, + "step": 5062 + }, + { + "epoch": 57.478753541076486, + "loss": 0.050505250692367554, + "loss_ce": 0.0001359869638690725, + "loss_iou": 0.404296875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 868852404, + "step": 5062 + }, + { + "epoch": 57.49008498583569, + "grad_norm": 3.9887814992177333, + "learning_rate": 5e-06, + "loss": 0.0824, + "num_input_tokens_seen": 869024084, + "step": 5063 + }, + { + "epoch": 57.49008498583569, + "loss": 0.06514575332403183, + "loss_ce": 5.17590124218259e-05, + "loss_iou": 0.671875, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 869024084, + "step": 5063 + }, + { + "epoch": 57.5014164305949, + "grad_norm": 4.071979292489725, + "learning_rate": 5e-06, + "loss": 0.0664, + "num_input_tokens_seen": 869194608, + "step": 5064 + }, + { + "epoch": 57.5014164305949, + "loss": 0.05516326054930687, + "loss_ce": 7.903205550974235e-05, + "loss_iou": 0.408203125, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 869194608, + "step": 5064 + }, + { + "epoch": 57.51274787535411, + "grad_norm": 4.116623607204772, + "learning_rate": 5e-06, + "loss": 0.0747, + "num_input_tokens_seen": 869366760, + "step": 5065 + }, + { + "epoch": 57.51274787535411, + "loss": 0.10826753079891205, + "loss_ce": 5.219535159994848e-05, + "loss_iou": 0.27734375, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 869366760, + "step": 5065 + }, + { + "epoch": 57.524079320113316, + "grad_norm": 2.839576688607693, + "learning_rate": 5e-06, + "loss": 0.0651, + "num_input_tokens_seen": 869538644, + "step": 5066 + }, + { + "epoch": 57.524079320113316, + "loss": 0.06913868337869644, + "loss_ce": 6.214836321305484e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01385498046875, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 869538644, + "step": 5066 + }, + { + "epoch": 57.53541076487252, + "grad_norm": 3.0964737999170335, + "learning_rate": 5e-06, + "loss": 0.0677, + "num_input_tokens_seen": 869710768, + "step": 5067 + }, + { + "epoch": 57.53541076487252, + "loss": 0.0796612873673439, + "loss_ce": 0.00010196147923124954, + "loss_iou": 0.470703125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 869710768, + "step": 5067 + }, + { + "epoch": 57.54674220963173, + "grad_norm": 3.1266950009736094, + "learning_rate": 5e-06, + "loss": 0.0496, + "num_input_tokens_seen": 869882580, + "step": 5068 + }, + { + "epoch": 57.54674220963173, + "loss": 0.04041702300310135, + "loss_ce": 0.0001033049775287509, + "loss_iou": 0.5390625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 869882580, + "step": 5068 + }, + { + "epoch": 57.55807365439094, + "grad_norm": 3.3143194949148764, + "learning_rate": 5e-06, + "loss": 0.0582, + "num_input_tokens_seen": 870051768, + "step": 5069 + }, + { + "epoch": 57.55807365439094, + "loss": 0.035957276821136475, + "loss_ce": 3.808567998930812e-05, + "loss_iou": 0.431640625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 870051768, + "step": 5069 + }, + { + "epoch": 57.56940509915014, + "grad_norm": 4.617062231752367, + "learning_rate": 5e-06, + "loss": 0.0633, + "num_input_tokens_seen": 870223740, + "step": 5070 + }, + { + "epoch": 57.56940509915014, + "loss": 0.10611091554164886, + "loss_ce": 7.759387517580763e-05, + "loss_iou": 0.404296875, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 870223740, + "step": 5070 + }, + { + "epoch": 57.580736543909346, + "grad_norm": 3.9206413164352734, + "learning_rate": 5e-06, + "loss": 0.0501, + "num_input_tokens_seen": 870395288, + "step": 5071 + }, + { + "epoch": 57.580736543909346, + "loss": 0.04212199151515961, + "loss_ce": 2.29953984671738e-05, + "loss_iou": 0.5390625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 870395288, + "step": 5071 + }, + { + "epoch": 57.59206798866855, + "grad_norm": 3.72554947154378, + "learning_rate": 5e-06, + "loss": 0.0561, + "num_input_tokens_seen": 870564132, + "step": 5072 + }, + { + "epoch": 57.59206798866855, + "loss": 0.05641186982393265, + "loss_ce": 9.168143151327968e-05, + "loss_iou": 0.39453125, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 870564132, + "step": 5072 + }, + { + "epoch": 57.60339943342776, + "grad_norm": 3.217229627382938, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 870735996, + "step": 5073 + }, + { + "epoch": 57.60339943342776, + "loss": 0.10920092463493347, + "loss_ce": 0.00010058139741886407, + "loss_iou": 0.5, + "loss_num": 0.0218505859375, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 870735996, + "step": 5073 + }, + { + "epoch": 57.61473087818697, + "grad_norm": 3.198346507225924, + "learning_rate": 5e-06, + "loss": 0.0462, + "num_input_tokens_seen": 870906760, + "step": 5074 + }, + { + "epoch": 57.61473087818697, + "loss": 0.06260886788368225, + "loss_ce": 0.00015464352327398956, + "loss_iou": 0.0703125, + "loss_num": 0.01251220703125, + "loss_xval": 0.0625, + "num_input_tokens_seen": 870906760, + "step": 5074 + }, + { + "epoch": 57.626062322946176, + "grad_norm": 3.597125684633523, + "learning_rate": 5e-06, + "loss": 0.0648, + "num_input_tokens_seen": 871078044, + "step": 5075 + }, + { + "epoch": 57.626062322946176, + "loss": 0.08610302209854126, + "loss_ce": 7.396174623863772e-05, + "loss_iou": 0.58203125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 871078044, + "step": 5075 + }, + { + "epoch": 57.63739376770538, + "grad_norm": 3.3973737608801913, + "learning_rate": 5e-06, + "loss": 0.0744, + "num_input_tokens_seen": 871249904, + "step": 5076 + }, + { + "epoch": 57.63739376770538, + "loss": 0.10708586871623993, + "loss_ce": 7.597856892971322e-05, + "loss_iou": 0.43359375, + "loss_num": 0.021484375, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 871249904, + "step": 5076 + }, + { + "epoch": 57.64872521246459, + "grad_norm": 14.15498967655936, + "learning_rate": 5e-06, + "loss": 0.0655, + "num_input_tokens_seen": 871421708, + "step": 5077 + }, + { + "epoch": 57.64872521246459, + "loss": 0.12140925973653793, + "loss_ce": 0.0018719020299613476, + "loss_iou": 0.330078125, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 871421708, + "step": 5077 + }, + { + "epoch": 57.6600566572238, + "grad_norm": 2.3009264601009862, + "learning_rate": 5e-06, + "loss": 0.0448, + "num_input_tokens_seen": 871593536, + "step": 5078 + }, + { + "epoch": 57.6600566572238, + "loss": 0.03933015465736389, + "loss_ce": 6.166232196846977e-05, + "loss_iou": 0.41796875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 871593536, + "step": 5078 + }, + { + "epoch": 57.671388101983005, + "grad_norm": 2.719883639560166, + "learning_rate": 5e-06, + "loss": 0.0349, + "num_input_tokens_seen": 871765224, + "step": 5079 + }, + { + "epoch": 57.671388101983005, + "loss": 0.02782721072435379, + "loss_ce": 7.147227006498724e-05, + "loss_iou": 0.5078125, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 871765224, + "step": 5079 + }, + { + "epoch": 57.68271954674221, + "grad_norm": 5.392435032151135, + "learning_rate": 5e-06, + "loss": 0.0729, + "num_input_tokens_seen": 871935504, + "step": 5080 + }, + { + "epoch": 57.68271954674221, + "loss": 0.12589111924171448, + "loss_ce": 0.0001281767472391948, + "loss_iou": 0.306640625, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 871935504, + "step": 5080 + }, + { + "epoch": 57.69405099150141, + "grad_norm": 3.6050758135481114, + "learning_rate": 5e-06, + "loss": 0.0551, + "num_input_tokens_seen": 872106924, + "step": 5081 + }, + { + "epoch": 57.69405099150141, + "loss": 0.06396076083183289, + "loss_ce": 8.746745879761875e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 872106924, + "step": 5081 + }, + { + "epoch": 57.70538243626062, + "grad_norm": 4.204151675840715, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 872278996, + "step": 5082 + }, + { + "epoch": 57.70538243626062, + "loss": 0.045700009912252426, + "loss_ce": 5.3342999308370054e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 872278996, + "step": 5082 + }, + { + "epoch": 57.71671388101983, + "grad_norm": 4.280723193083514, + "learning_rate": 5e-06, + "loss": 0.0498, + "num_input_tokens_seen": 872449260, + "step": 5083 + }, + { + "epoch": 57.71671388101983, + "loss": 0.04482072964310646, + "loss_ce": 8.196244016289711e-05, + "loss_iou": 0.47265625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 872449260, + "step": 5083 + }, + { + "epoch": 57.728045325779036, + "grad_norm": 3.682391061868857, + "learning_rate": 5e-06, + "loss": 0.0593, + "num_input_tokens_seen": 872620680, + "step": 5084 + }, + { + "epoch": 57.728045325779036, + "loss": 0.04363110288977623, + "loss_ce": 5.963179864920676e-05, + "loss_iou": 0.3984375, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 872620680, + "step": 5084 + }, + { + "epoch": 57.73937677053824, + "grad_norm": 3.2672101962535125, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 872792372, + "step": 5085 + }, + { + "epoch": 57.73937677053824, + "loss": 0.04827268421649933, + "loss_ce": 0.00010068750270875171, + "loss_iou": 0.234375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 872792372, + "step": 5085 + }, + { + "epoch": 57.75070821529745, + "grad_norm": 3.8539558843221764, + "learning_rate": 5e-06, + "loss": 0.0741, + "num_input_tokens_seen": 872964324, + "step": 5086 + }, + { + "epoch": 57.75070821529745, + "loss": 0.07103857398033142, + "loss_ce": 3.9427824958693236e-05, + "loss_iou": 0.4140625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 872964324, + "step": 5086 + }, + { + "epoch": 57.76203966005666, + "grad_norm": 3.6034569761092516, + "learning_rate": 5e-06, + "loss": 0.0536, + "num_input_tokens_seen": 873136036, + "step": 5087 + }, + { + "epoch": 57.76203966005666, + "loss": 0.05179614573717117, + "loss_ce": 2.3074911950971e-05, + "loss_iou": 0.470703125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 873136036, + "step": 5087 + }, + { + "epoch": 57.773371104815865, + "grad_norm": 6.347541657907292, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 873307896, + "step": 5088 + }, + { + "epoch": 57.773371104815865, + "loss": 0.06276465207338333, + "loss_ce": 0.0008902625413611531, + "loss_iou": 0.45703125, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 873307896, + "step": 5088 + }, + { + "epoch": 57.78470254957507, + "grad_norm": 3.2255177959465247, + "learning_rate": 5e-06, + "loss": 0.0588, + "num_input_tokens_seen": 873479704, + "step": 5089 + }, + { + "epoch": 57.78470254957507, + "loss": 0.039910003542900085, + "loss_ce": 5.4046304285293445e-05, + "loss_iou": 0.427734375, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 873479704, + "step": 5089 + }, + { + "epoch": 57.79603399433428, + "grad_norm": 5.388892145701267, + "learning_rate": 5e-06, + "loss": 0.1116, + "num_input_tokens_seen": 873651572, + "step": 5090 + }, + { + "epoch": 57.79603399433428, + "loss": 0.07917419075965881, + "loss_ce": 2.685383878997527e-05, + "loss_iou": 0.49609375, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 873651572, + "step": 5090 + }, + { + "epoch": 57.80736543909349, + "grad_norm": 4.090374408229897, + "learning_rate": 5e-06, + "loss": 0.0714, + "num_input_tokens_seen": 873822572, + "step": 5091 + }, + { + "epoch": 57.80736543909349, + "loss": 0.0569564551115036, + "loss_ce": 0.00010220690455753356, + "loss_iou": 0.3828125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 873822572, + "step": 5091 + }, + { + "epoch": 57.81869688385269, + "grad_norm": 3.07757802750749, + "learning_rate": 5e-06, + "loss": 0.0599, + "num_input_tokens_seen": 873993632, + "step": 5092 + }, + { + "epoch": 57.81869688385269, + "loss": 0.08146277070045471, + "loss_ce": 0.004558474291115999, + "loss_iou": 0.53125, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 873993632, + "step": 5092 + }, + { + "epoch": 57.830028328611895, + "grad_norm": 3.6462578497896283, + "learning_rate": 5e-06, + "loss": 0.0365, + "num_input_tokens_seen": 874165400, + "step": 5093 + }, + { + "epoch": 57.830028328611895, + "loss": 0.04115299880504608, + "loss_ce": 0.00016789112123660743, + "loss_iou": 0.349609375, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 874165400, + "step": 5093 + }, + { + "epoch": 57.8413597733711, + "grad_norm": 3.5431815528857205, + "learning_rate": 5e-06, + "loss": 0.0731, + "num_input_tokens_seen": 874336344, + "step": 5094 + }, + { + "epoch": 57.8413597733711, + "loss": 0.06220860034227371, + "loss_ce": 0.00012058853462804109, + "loss_iou": 0.3125, + "loss_num": 0.012451171875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 874336344, + "step": 5094 + }, + { + "epoch": 57.85269121813031, + "grad_norm": 3.541857387568089, + "learning_rate": 5e-06, + "loss": 0.0773, + "num_input_tokens_seen": 874508336, + "step": 5095 + }, + { + "epoch": 57.85269121813031, + "loss": 0.11195708811283112, + "loss_ce": 3.386967728147283e-05, + "loss_iou": 0.4375, + "loss_num": 0.0224609375, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 874508336, + "step": 5095 + }, + { + "epoch": 57.86402266288952, + "grad_norm": 4.784857267031445, + "learning_rate": 5e-06, + "loss": 0.063, + "num_input_tokens_seen": 874679984, + "step": 5096 + }, + { + "epoch": 57.86402266288952, + "loss": 0.03885423392057419, + "loss_ce": 6.639276398345828e-05, + "loss_iou": 0.41015625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 874679984, + "step": 5096 + }, + { + "epoch": 57.875354107648725, + "grad_norm": 3.4170915279287724, + "learning_rate": 5e-06, + "loss": 0.0536, + "num_input_tokens_seen": 874851824, + "step": 5097 + }, + { + "epoch": 57.875354107648725, + "loss": 0.037616003304719925, + "loss_ce": 0.00014041727990843356, + "loss_iou": 0.353515625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 874851824, + "step": 5097 + }, + { + "epoch": 57.88668555240793, + "grad_norm": 4.184896382903822, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 875020924, + "step": 5098 + }, + { + "epoch": 57.88668555240793, + "loss": 0.04111591726541519, + "loss_ce": 8.503426215611398e-05, + "loss_iou": 0.369140625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 875020924, + "step": 5098 + }, + { + "epoch": 57.89801699716714, + "grad_norm": 3.352523644292825, + "learning_rate": 5e-06, + "loss": 0.0664, + "num_input_tokens_seen": 875192168, + "step": 5099 + }, + { + "epoch": 57.89801699716714, + "loss": 0.05032181739807129, + "loss_ce": 8.988452464109287e-05, + "loss_iou": 0.486328125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 875192168, + "step": 5099 + }, + { + "epoch": 57.90934844192635, + "grad_norm": 8.64567669140172, + "learning_rate": 5e-06, + "loss": 0.0838, + "num_input_tokens_seen": 875362972, + "step": 5100 + }, + { + "epoch": 57.90934844192635, + "loss": 0.05239830166101456, + "loss_ce": 3.0138548027025536e-05, + "loss_iou": 0.419921875, + "loss_num": 0.010498046875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 875362972, + "step": 5100 + }, + { + "epoch": 57.920679886685555, + "grad_norm": 11.855779680858916, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 875532388, + "step": 5101 + }, + { + "epoch": 57.920679886685555, + "loss": 0.05897153541445732, + "loss_ce": 7.261127757374197e-05, + "loss_iou": 0.4375, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 875532388, + "step": 5101 + }, + { + "epoch": 57.93201133144476, + "grad_norm": 4.5639913925062405, + "learning_rate": 5e-06, + "loss": 0.0561, + "num_input_tokens_seen": 875704220, + "step": 5102 + }, + { + "epoch": 57.93201133144476, + "loss": 0.04093395918607712, + "loss_ce": 5.5663207604084164e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 875704220, + "step": 5102 + }, + { + "epoch": 57.94334277620396, + "grad_norm": 3.467964592377589, + "learning_rate": 5e-06, + "loss": 0.0792, + "num_input_tokens_seen": 875876036, + "step": 5103 + }, + { + "epoch": 57.94334277620396, + "loss": 0.09246046841144562, + "loss_ce": 3.798675970756449e-05, + "loss_iou": 0.5546875, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 875876036, + "step": 5103 + }, + { + "epoch": 57.95467422096317, + "grad_norm": 4.771503567332697, + "learning_rate": 5e-06, + "loss": 0.0885, + "num_input_tokens_seen": 876047956, + "step": 5104 + }, + { + "epoch": 57.95467422096317, + "loss": 0.1134958416223526, + "loss_ce": 3.148883115500212e-05, + "loss_iou": 0.50390625, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 876047956, + "step": 5104 + }, + { + "epoch": 57.96600566572238, + "grad_norm": 4.364680975526661, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 876218100, + "step": 5105 + }, + { + "epoch": 57.96600566572238, + "loss": 0.043542273342609406, + "loss_ce": 6.998371100053191e-05, + "loss_iou": 0.43359375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 876218100, + "step": 5105 + }, + { + "epoch": 57.977337110481585, + "grad_norm": 4.424021672230856, + "learning_rate": 5e-06, + "loss": 0.0608, + "num_input_tokens_seen": 876389104, + "step": 5106 + }, + { + "epoch": 57.977337110481585, + "loss": 0.05820219963788986, + "loss_ce": 3.569648106349632e-05, + "loss_iou": 0.48828125, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 876389104, + "step": 5106 + }, + { + "epoch": 57.98866855524079, + "grad_norm": 4.141340864324125, + "learning_rate": 5e-06, + "loss": 0.0478, + "num_input_tokens_seen": 876561396, + "step": 5107 + }, + { + "epoch": 57.98866855524079, + "loss": 0.03864654526114464, + "loss_ce": 4.1810115362750366e-05, + "loss_iou": 0.34375, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 876561396, + "step": 5107 + }, + { + "epoch": 58.0, + "grad_norm": 4.19898040942282, + "learning_rate": 5e-06, + "loss": 0.064, + "num_input_tokens_seen": 876732192, + "step": 5108 + }, + { + "epoch": 58.0, + "loss": 0.03873483091592789, + "loss_ce": 6.905860936967656e-05, + "loss_iou": 0.3671875, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 876732192, + "step": 5108 + }, + { + "epoch": 58.01133144475921, + "grad_norm": 3.199857446432168, + "learning_rate": 5e-06, + "loss": 0.0617, + "num_input_tokens_seen": 876903308, + "step": 5109 + }, + { + "epoch": 58.01133144475921, + "loss": 0.03641393035650253, + "loss_ce": 6.749634485458955e-05, + "loss_iou": 0.52734375, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 876903308, + "step": 5109 + }, + { + "epoch": 58.022662889518415, + "grad_norm": 3.3669725716022456, + "learning_rate": 5e-06, + "loss": 0.0653, + "num_input_tokens_seen": 877075500, + "step": 5110 + }, + { + "epoch": 58.022662889518415, + "loss": 0.04136184975504875, + "loss_ce": 0.00034622280509211123, + "loss_iou": 0.42578125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 877075500, + "step": 5110 + }, + { + "epoch": 58.03399433427762, + "grad_norm": 2.8820647740383163, + "learning_rate": 5e-06, + "loss": 0.0427, + "num_input_tokens_seen": 877247104, + "step": 5111 + }, + { + "epoch": 58.03399433427762, + "loss": 0.025820832699537277, + "loss_ce": 6.399747508112341e-05, + "loss_iou": 0.0, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 877247104, + "step": 5111 + }, + { + "epoch": 58.04532577903683, + "grad_norm": 2.3984366774928483, + "learning_rate": 5e-06, + "loss": 0.0742, + "num_input_tokens_seen": 877418828, + "step": 5112 + }, + { + "epoch": 58.04532577903683, + "loss": 0.04250626638531685, + "loss_ce": 0.00010208995081484318, + "loss_iou": 0.27734375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 877418828, + "step": 5112 + }, + { + "epoch": 58.05665722379604, + "grad_norm": 2.885038635840427, + "learning_rate": 5e-06, + "loss": 0.0474, + "num_input_tokens_seen": 877591060, + "step": 5113 + }, + { + "epoch": 58.05665722379604, + "loss": 0.06191824749112129, + "loss_ce": 3.622656367952004e-05, + "loss_iou": 0.47265625, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 877591060, + "step": 5113 + }, + { + "epoch": 58.06798866855524, + "grad_norm": 11.275130982169811, + "learning_rate": 5e-06, + "loss": 0.0671, + "num_input_tokens_seen": 877761884, + "step": 5114 + }, + { + "epoch": 58.06798866855524, + "loss": 0.07542091608047485, + "loss_ce": 2.7240759663982317e-05, + "loss_iou": 0.140625, + "loss_num": 0.01507568359375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 877761884, + "step": 5114 + }, + { + "epoch": 58.079320113314445, + "grad_norm": 3.8428706585620542, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 877934304, + "step": 5115 + }, + { + "epoch": 58.079320113314445, + "loss": 0.05714516341686249, + "loss_ce": 3.91458670492284e-05, + "loss_iou": 0.62109375, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 877934304, + "step": 5115 + }, + { + "epoch": 58.09065155807365, + "grad_norm": 4.277177756148496, + "learning_rate": 5e-06, + "loss": 0.0996, + "num_input_tokens_seen": 878105080, + "step": 5116 + }, + { + "epoch": 58.09065155807365, + "loss": 0.1516241729259491, + "loss_ce": 0.0002112043439410627, + "loss_iou": 0.41015625, + "loss_num": 0.0302734375, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 878105080, + "step": 5116 + }, + { + "epoch": 58.10198300283286, + "grad_norm": 3.4166333894226355, + "learning_rate": 5e-06, + "loss": 0.0691, + "num_input_tokens_seen": 878276888, + "step": 5117 + }, + { + "epoch": 58.10198300283286, + "loss": 0.033880945295095444, + "loss_ce": 3.695099803735502e-05, + "loss_iou": 0.498046875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 878276888, + "step": 5117 + }, + { + "epoch": 58.11331444759207, + "grad_norm": 3.9675535702788474, + "learning_rate": 5e-06, + "loss": 0.0715, + "num_input_tokens_seen": 878448492, + "step": 5118 + }, + { + "epoch": 58.11331444759207, + "loss": 0.09718040376901627, + "loss_ce": 7.346836355281994e-05, + "loss_iou": 0.427734375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 878448492, + "step": 5118 + }, + { + "epoch": 58.124645892351275, + "grad_norm": 3.089661459899956, + "learning_rate": 5e-06, + "loss": 0.0526, + "num_input_tokens_seen": 878620404, + "step": 5119 + }, + { + "epoch": 58.124645892351275, + "loss": 0.043347954750061035, + "loss_ce": 4.3514530261745676e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 878620404, + "step": 5119 + }, + { + "epoch": 58.13597733711048, + "grad_norm": 2.9783339010224408, + "learning_rate": 5e-06, + "loss": 0.0515, + "num_input_tokens_seen": 878792252, + "step": 5120 + }, + { + "epoch": 58.13597733711048, + "loss": 0.057560212910175323, + "loss_ce": 4.98374029120896e-05, + "loss_iou": 0.42578125, + "loss_num": 0.01153564453125, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 878792252, + "step": 5120 + }, + { + "epoch": 58.14730878186969, + "grad_norm": 3.6784884208077897, + "learning_rate": 5e-06, + "loss": 0.0427, + "num_input_tokens_seen": 878962572, + "step": 5121 + }, + { + "epoch": 58.14730878186969, + "loss": 0.040791139006614685, + "loss_ce": 1.9656603399198502e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 878962572, + "step": 5121 + }, + { + "epoch": 58.1586402266289, + "grad_norm": 4.574891394109308, + "learning_rate": 5e-06, + "loss": 0.0766, + "num_input_tokens_seen": 879134364, + "step": 5122 + }, + { + "epoch": 58.1586402266289, + "loss": 0.048217467963695526, + "loss_ce": 1.49508414324373e-05, + "loss_iou": 0.53125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 879134364, + "step": 5122 + }, + { + "epoch": 58.169971671388105, + "grad_norm": 3.5322630962007424, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 879305944, + "step": 5123 + }, + { + "epoch": 58.169971671388105, + "loss": 0.04073029011487961, + "loss_ce": 1.984263872145675e-05, + "loss_iou": 0.435546875, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 879305944, + "step": 5123 + }, + { + "epoch": 58.18130311614731, + "grad_norm": 3.9595663092352056, + "learning_rate": 5e-06, + "loss": 0.0687, + "num_input_tokens_seen": 879478580, + "step": 5124 + }, + { + "epoch": 58.18130311614731, + "loss": 0.08592826128005981, + "loss_ce": 8.231262472691014e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 879478580, + "step": 5124 + }, + { + "epoch": 58.19263456090651, + "grad_norm": 3.6069704515292176, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 879647576, + "step": 5125 + }, + { + "epoch": 58.19263456090651, + "loss": 0.0516824796795845, + "loss_ce": 7.725724572082981e-05, + "loss_iou": 0.458984375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 879647576, + "step": 5125 + }, + { + "epoch": 58.20396600566572, + "grad_norm": 3.045683902802767, + "learning_rate": 5e-06, + "loss": 0.0784, + "num_input_tokens_seen": 879819492, + "step": 5126 + }, + { + "epoch": 58.20396600566572, + "loss": 0.05950441583991051, + "loss_ce": 0.00011721032205969095, + "loss_iou": 0.48046875, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 879819492, + "step": 5126 + }, + { + "epoch": 58.21529745042493, + "grad_norm": 2.6772214076046112, + "learning_rate": 5e-06, + "loss": 0.0541, + "num_input_tokens_seen": 879990924, + "step": 5127 + }, + { + "epoch": 58.21529745042493, + "loss": 0.03356531262397766, + "loss_ce": 4.93829429615289e-05, + "loss_iou": 0.05126953125, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 879990924, + "step": 5127 + }, + { + "epoch": 58.226628895184135, + "grad_norm": 2.6516104702028374, + "learning_rate": 5e-06, + "loss": 0.0303, + "num_input_tokens_seen": 880163040, + "step": 5128 + }, + { + "epoch": 58.226628895184135, + "loss": 0.03838310390710831, + "loss_ce": 6.828515324741602e-05, + "loss_iou": 0.033203125, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 880163040, + "step": 5128 + }, + { + "epoch": 58.23796033994334, + "grad_norm": 3.4412722254487997, + "learning_rate": 5e-06, + "loss": 0.076, + "num_input_tokens_seen": 880334264, + "step": 5129 + }, + { + "epoch": 58.23796033994334, + "loss": 0.03586754947900772, + "loss_ce": 0.00010094626486534253, + "loss_iou": 0.5234375, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 880334264, + "step": 5129 + }, + { + "epoch": 58.24929178470255, + "grad_norm": 4.118062442913695, + "learning_rate": 5e-06, + "loss": 0.0513, + "num_input_tokens_seen": 880505100, + "step": 5130 + }, + { + "epoch": 58.24929178470255, + "loss": 0.05262747406959534, + "loss_ce": 0.00010672125790733844, + "loss_iou": 0.45703125, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 880505100, + "step": 5130 + }, + { + "epoch": 58.26062322946176, + "grad_norm": 3.615844659676223, + "learning_rate": 5e-06, + "loss": 0.0666, + "num_input_tokens_seen": 880675108, + "step": 5131 + }, + { + "epoch": 58.26062322946176, + "loss": 0.09601166099309921, + "loss_ce": 3.38781246682629e-05, + "loss_iou": 0.412109375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 880675108, + "step": 5131 + }, + { + "epoch": 58.271954674220964, + "grad_norm": 4.891314659232577, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 880846780, + "step": 5132 + }, + { + "epoch": 58.271954674220964, + "loss": 0.10980711132287979, + "loss_ce": 2.01282455236651e-05, + "loss_iou": 0.15625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 880846780, + "step": 5132 + }, + { + "epoch": 58.28328611898017, + "grad_norm": 2.8925024487179694, + "learning_rate": 5e-06, + "loss": 0.0563, + "num_input_tokens_seen": 881017888, + "step": 5133 + }, + { + "epoch": 58.28328611898017, + "loss": 0.10723724216222763, + "loss_ce": 7.476101745851338e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 881017888, + "step": 5133 + }, + { + "epoch": 58.29461756373938, + "grad_norm": 3.2468648900083186, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 881188444, + "step": 5134 + }, + { + "epoch": 58.29461756373938, + "loss": 0.09116239845752716, + "loss_ce": 6.743149424437433e-05, + "loss_iou": 0.427734375, + "loss_num": 0.0181884765625, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 881188444, + "step": 5134 + }, + { + "epoch": 58.30594900849859, + "grad_norm": 3.7878268786147817, + "learning_rate": 5e-06, + "loss": 0.0568, + "num_input_tokens_seen": 881360560, + "step": 5135 + }, + { + "epoch": 58.30594900849859, + "loss": 0.0402548685669899, + "loss_ce": 6.321677938103676e-05, + "loss_iou": 0.5, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 881360560, + "step": 5135 + }, + { + "epoch": 58.31728045325779, + "grad_norm": 3.3404631393697835, + "learning_rate": 5e-06, + "loss": 0.0436, + "num_input_tokens_seen": 881531232, + "step": 5136 + }, + { + "epoch": 58.31728045325779, + "loss": 0.042157139629125595, + "loss_ce": 4.288295895094052e-05, + "loss_iou": 0.453125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 881531232, + "step": 5136 + }, + { + "epoch": 58.328611898016995, + "grad_norm": 3.090252804263699, + "learning_rate": 5e-06, + "loss": 0.0496, + "num_input_tokens_seen": 881702996, + "step": 5137 + }, + { + "epoch": 58.328611898016995, + "loss": 0.05525463446974754, + "loss_ce": 3.307961014797911e-05, + "loss_iou": 0.3125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 881702996, + "step": 5137 + }, + { + "epoch": 58.3399433427762, + "grad_norm": 3.604927215276543, + "learning_rate": 5e-06, + "loss": 0.0699, + "num_input_tokens_seen": 881874200, + "step": 5138 + }, + { + "epoch": 58.3399433427762, + "loss": 0.0328078530728817, + "loss_ce": 6.249104626476765e-05, + "loss_iou": 0.37890625, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 881874200, + "step": 5138 + }, + { + "epoch": 58.35127478753541, + "grad_norm": 4.0498817920183345, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 882046348, + "step": 5139 + }, + { + "epoch": 58.35127478753541, + "loss": 0.05541135370731354, + "loss_ce": 9.824558947002515e-05, + "loss_iou": 0.45703125, + "loss_num": 0.01104736328125, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 882046348, + "step": 5139 + }, + { + "epoch": 58.36260623229462, + "grad_norm": 3.263545120224719, + "learning_rate": 5e-06, + "loss": 0.0603, + "num_input_tokens_seen": 882216184, + "step": 5140 + }, + { + "epoch": 58.36260623229462, + "loss": 0.06498118489980698, + "loss_ce": 5.5038894060999155e-05, + "loss_iou": 0.15234375, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 882216184, + "step": 5140 + }, + { + "epoch": 58.373937677053824, + "grad_norm": 3.594097870683096, + "learning_rate": 5e-06, + "loss": 0.0478, + "num_input_tokens_seen": 882387708, + "step": 5141 + }, + { + "epoch": 58.373937677053824, + "loss": 0.05924582853913307, + "loss_ce": 7.224692672025412e-05, + "loss_iou": 0.267578125, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 882387708, + "step": 5141 + }, + { + "epoch": 58.38526912181303, + "grad_norm": 3.0042299427516297, + "learning_rate": 5e-06, + "loss": 0.0682, + "num_input_tokens_seen": 882559580, + "step": 5142 + }, + { + "epoch": 58.38526912181303, + "loss": 0.03157265484333038, + "loss_ce": 4.799876478500664e-05, + "loss_iou": 0.3359375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 882559580, + "step": 5142 + }, + { + "epoch": 58.39660056657224, + "grad_norm": 3.41089944767485, + "learning_rate": 5e-06, + "loss": 0.0944, + "num_input_tokens_seen": 882731532, + "step": 5143 + }, + { + "epoch": 58.39660056657224, + "loss": 0.11940383911132812, + "loss_ce": 3.434465907048434e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0238037109375, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 882731532, + "step": 5143 + }, + { + "epoch": 58.40793201133145, + "grad_norm": 2.842345429791182, + "learning_rate": 5e-06, + "loss": 0.0715, + "num_input_tokens_seen": 882902512, + "step": 5144 + }, + { + "epoch": 58.40793201133145, + "loss": 0.10355561971664429, + "loss_ce": 8.57689228723757e-05, + "loss_iou": 0.3984375, + "loss_num": 0.0206298828125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 882902512, + "step": 5144 + }, + { + "epoch": 58.419263456090654, + "grad_norm": 2.5457588820237618, + "learning_rate": 5e-06, + "loss": 0.0577, + "num_input_tokens_seen": 883072764, + "step": 5145 + }, + { + "epoch": 58.419263456090654, + "loss": 0.10356974601745605, + "loss_ce": 3.8865608075866476e-05, + "loss_iou": 0.376953125, + "loss_num": 0.020751953125, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 883072764, + "step": 5145 + }, + { + "epoch": 58.43059490084986, + "grad_norm": 2.2274371942951032, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 883244632, + "step": 5146 + }, + { + "epoch": 58.43059490084986, + "loss": 0.02465543895959854, + "loss_ce": 5.82701395615004e-05, + "loss_iou": 0.3515625, + "loss_num": 0.004913330078125, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 883244632, + "step": 5146 + }, + { + "epoch": 58.44192634560906, + "grad_norm": 2.425816204474589, + "learning_rate": 5e-06, + "loss": 0.064, + "num_input_tokens_seen": 883416764, + "step": 5147 + }, + { + "epoch": 58.44192634560906, + "loss": 0.06607544422149658, + "loss_ce": 7.355008710874245e-05, + "loss_iou": 0.466796875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 883416764, + "step": 5147 + }, + { + "epoch": 58.45325779036827, + "grad_norm": 3.0425603181572747, + "learning_rate": 5e-06, + "loss": 0.0735, + "num_input_tokens_seen": 883588312, + "step": 5148 + }, + { + "epoch": 58.45325779036827, + "loss": 0.05393122881650925, + "loss_ce": 6.770497566321865e-05, + "loss_iou": 0.3203125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 883588312, + "step": 5148 + }, + { + "epoch": 58.46458923512748, + "grad_norm": 3.0562728917512296, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 883759892, + "step": 5149 + }, + { + "epoch": 58.46458923512748, + "loss": 0.06445635855197906, + "loss_ce": 1.8499664292903617e-05, + "loss_iou": 0.4296875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 883759892, + "step": 5149 + }, + { + "epoch": 58.475920679886684, + "grad_norm": 3.227847404238047, + "learning_rate": 5e-06, + "loss": 0.1027, + "num_input_tokens_seen": 883931692, + "step": 5150 + }, + { + "epoch": 58.475920679886684, + "loss": 0.07732360810041428, + "loss_ce": 0.00015991358668543398, + "loss_iou": 0.263671875, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 883931692, + "step": 5150 + }, + { + "epoch": 58.48725212464589, + "grad_norm": 3.377039550405065, + "learning_rate": 5e-06, + "loss": 0.0518, + "num_input_tokens_seen": 884102024, + "step": 5151 + }, + { + "epoch": 58.48725212464589, + "loss": 0.06637821346521378, + "loss_ce": 7.877300959080458e-05, + "loss_iou": 0.53515625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 884102024, + "step": 5151 + }, + { + "epoch": 58.4985835694051, + "grad_norm": 4.0823108696331865, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 884272964, + "step": 5152 + }, + { + "epoch": 58.4985835694051, + "loss": 0.03338304162025452, + "loss_ce": 4.2590116208884865e-05, + "loss_iou": 0.431640625, + "loss_num": 0.00665283203125, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 884272964, + "step": 5152 + }, + { + "epoch": 58.50991501416431, + "grad_norm": 3.5536594124255125, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 884444676, + "step": 5153 + }, + { + "epoch": 58.50991501416431, + "loss": 0.04385187476873398, + "loss_ce": 7.440572517225519e-05, + "loss_iou": 0.55078125, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 884444676, + "step": 5153 + }, + { + "epoch": 58.521246458923514, + "grad_norm": 3.993798626008827, + "learning_rate": 5e-06, + "loss": 0.0645, + "num_input_tokens_seen": 884614212, + "step": 5154 + }, + { + "epoch": 58.521246458923514, + "loss": 0.13052082061767578, + "loss_ce": 5.818325371365063e-05, + "loss_iou": 0.359375, + "loss_num": 0.026123046875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 884614212, + "step": 5154 + }, + { + "epoch": 58.53257790368272, + "grad_norm": 3.857913453011595, + "learning_rate": 5e-06, + "loss": 0.0447, + "num_input_tokens_seen": 884786108, + "step": 5155 + }, + { + "epoch": 58.53257790368272, + "loss": 0.05422041565179825, + "loss_ce": 0.00025008045486174524, + "loss_iou": 0.5078125, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 884786108, + "step": 5155 + }, + { + "epoch": 58.54390934844193, + "grad_norm": 4.665716681310218, + "learning_rate": 5e-06, + "loss": 0.052, + "num_input_tokens_seen": 884956924, + "step": 5156 + }, + { + "epoch": 58.54390934844193, + "loss": 0.07115911692380905, + "loss_ce": 6.841756112407893e-05, + "loss_iou": 0.44921875, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 884956924, + "step": 5156 + }, + { + "epoch": 58.555240793201136, + "grad_norm": 3.0631782360908972, + "learning_rate": 5e-06, + "loss": 0.0784, + "num_input_tokens_seen": 885128788, + "step": 5157 + }, + { + "epoch": 58.555240793201136, + "loss": 0.06514810770750046, + "loss_ce": 4.648451431421563e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0130615234375, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 885128788, + "step": 5157 + }, + { + "epoch": 58.56657223796034, + "grad_norm": 3.6429955683677684, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 885300476, + "step": 5158 + }, + { + "epoch": 58.56657223796034, + "loss": 0.038397423923015594, + "loss_ce": 0.0002657122095115483, + "loss_iou": 0.37109375, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 885300476, + "step": 5158 + }, + { + "epoch": 58.577903682719544, + "grad_norm": 3.832394927390381, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 885472168, + "step": 5159 + }, + { + "epoch": 58.577903682719544, + "loss": 0.04183327034115791, + "loss_ce": 3.9447215385735035e-05, + "loss_iou": 0.462890625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 885472168, + "step": 5159 + }, + { + "epoch": 58.58923512747875, + "grad_norm": 3.7582317401369183, + "learning_rate": 5e-06, + "loss": 0.0707, + "num_input_tokens_seen": 885644112, + "step": 5160 + }, + { + "epoch": 58.58923512747875, + "loss": 0.07095743715763092, + "loss_ce": 6.51002919767052e-05, + "loss_iou": 0.4296875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 885644112, + "step": 5160 + }, + { + "epoch": 58.60056657223796, + "grad_norm": 3.948321749939692, + "learning_rate": 5e-06, + "loss": 0.0514, + "num_input_tokens_seen": 885815256, + "step": 5161 + }, + { + "epoch": 58.60056657223796, + "loss": 0.05903956666588783, + "loss_ce": 0.00014063986600376666, + "loss_iou": 0.5703125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 885815256, + "step": 5161 + }, + { + "epoch": 58.611898016997166, + "grad_norm": 4.667675028409224, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 885986980, + "step": 5162 + }, + { + "epoch": 58.611898016997166, + "loss": 0.041775137186050415, + "loss_ce": 0.00010338243009755388, + "loss_iou": 0.498046875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 885986980, + "step": 5162 + }, + { + "epoch": 58.623229461756374, + "grad_norm": 3.509228240218512, + "learning_rate": 5e-06, + "loss": 0.0671, + "num_input_tokens_seen": 886158724, + "step": 5163 + }, + { + "epoch": 58.623229461756374, + "loss": 0.06796180456876755, + "loss_ce": 7.545328844571486e-05, + "loss_iou": 0.53125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 886158724, + "step": 5163 + }, + { + "epoch": 58.63456090651558, + "grad_norm": 4.0130861192513345, + "learning_rate": 5e-06, + "loss": 0.0707, + "num_input_tokens_seen": 886330920, + "step": 5164 + }, + { + "epoch": 58.63456090651558, + "loss": 0.0440477654337883, + "loss_ce": 4.141558747505769e-05, + "loss_iou": 0.453125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 886330920, + "step": 5164 + }, + { + "epoch": 58.64589235127479, + "grad_norm": 3.934199158970189, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 886502768, + "step": 5165 + }, + { + "epoch": 58.64589235127479, + "loss": 0.03873293846845627, + "loss_ce": 6.716544885421172e-05, + "loss_iou": 0.416015625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 886502768, + "step": 5165 + }, + { + "epoch": 58.657223796033996, + "grad_norm": 3.856637103531575, + "learning_rate": 5e-06, + "loss": 0.0441, + "num_input_tokens_seen": 886673020, + "step": 5166 + }, + { + "epoch": 58.657223796033996, + "loss": 0.03267926722764969, + "loss_ce": 2.545849565649405e-05, + "loss_iou": 0.5546875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 886673020, + "step": 5166 + }, + { + "epoch": 58.668555240793204, + "grad_norm": 4.324162407777541, + "learning_rate": 5e-06, + "loss": 0.0518, + "num_input_tokens_seen": 886844684, + "step": 5167 + }, + { + "epoch": 58.668555240793204, + "loss": 0.05115539953112602, + "loss_ce": 3.845775790978223e-05, + "loss_iou": 0.44921875, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 886844684, + "step": 5167 + }, + { + "epoch": 58.67988668555241, + "grad_norm": 2.951253382292852, + "learning_rate": 5e-06, + "loss": 0.073, + "num_input_tokens_seen": 887016588, + "step": 5168 + }, + { + "epoch": 58.67988668555241, + "loss": 0.06818795204162598, + "loss_ce": 5.745126327383332e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01361083984375, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 887016588, + "step": 5168 + }, + { + "epoch": 58.69121813031161, + "grad_norm": 3.5326689245700233, + "learning_rate": 5e-06, + "loss": 0.0627, + "num_input_tokens_seen": 887186476, + "step": 5169 + }, + { + "epoch": 58.69121813031161, + "loss": 0.04181846231222153, + "loss_ce": 3.989890683442354e-05, + "loss_iou": 0.53125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 887186476, + "step": 5169 + }, + { + "epoch": 58.70254957507082, + "grad_norm": 3.5115756826015962, + "learning_rate": 5e-06, + "loss": 0.0676, + "num_input_tokens_seen": 887356768, + "step": 5170 + }, + { + "epoch": 58.70254957507082, + "loss": 0.056678298860788345, + "loss_ce": 0.00011397062917239964, + "loss_iou": 0.427734375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 887356768, + "step": 5170 + }, + { + "epoch": 58.713881019830026, + "grad_norm": 3.445329232365934, + "learning_rate": 5e-06, + "loss": 0.0546, + "num_input_tokens_seen": 887526332, + "step": 5171 + }, + { + "epoch": 58.713881019830026, + "loss": 0.07594342529773712, + "loss_ce": 4.620824256562628e-05, + "loss_iou": 0.408203125, + "loss_num": 0.01519775390625, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 887526332, + "step": 5171 + }, + { + "epoch": 58.725212464589234, + "grad_norm": 3.8084227247381373, + "learning_rate": 5e-06, + "loss": 0.0786, + "num_input_tokens_seen": 887698288, + "step": 5172 + }, + { + "epoch": 58.725212464589234, + "loss": 0.059559352695941925, + "loss_ce": 3.481722160358913e-05, + "loss_iou": 0.3828125, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 887698288, + "step": 5172 + }, + { + "epoch": 58.73654390934844, + "grad_norm": 6.1021537043841825, + "learning_rate": 5e-06, + "loss": 0.0999, + "num_input_tokens_seen": 887870352, + "step": 5173 + }, + { + "epoch": 58.73654390934844, + "loss": 0.03974830359220505, + "loss_ce": 6.019495049258694e-05, + "loss_iou": 0.455078125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 887870352, + "step": 5173 + }, + { + "epoch": 58.74787535410765, + "grad_norm": 24.118149560993597, + "learning_rate": 5e-06, + "loss": 0.0582, + "num_input_tokens_seen": 888042324, + "step": 5174 + }, + { + "epoch": 58.74787535410765, + "loss": 0.05540543794631958, + "loss_ce": 0.00012284755939617753, + "loss_iou": 0.47265625, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 888042324, + "step": 5174 + }, + { + "epoch": 58.759206798866856, + "grad_norm": 3.5913669121689704, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 888213448, + "step": 5175 + }, + { + "epoch": 58.759206798866856, + "loss": 0.05376358702778816, + "loss_ce": 6.79100994602777e-05, + "loss_iou": 0.55859375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 888213448, + "step": 5175 + }, + { + "epoch": 58.77053824362606, + "grad_norm": 3.125276980338516, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 888384316, + "step": 5176 + }, + { + "epoch": 58.77053824362606, + "loss": 0.07048118859529495, + "loss_ce": 0.00015342731785494834, + "loss_iou": 0.27734375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 888384316, + "step": 5176 + }, + { + "epoch": 58.78186968838527, + "grad_norm": 3.2676573211304083, + "learning_rate": 5e-06, + "loss": 0.0552, + "num_input_tokens_seen": 888555352, + "step": 5177 + }, + { + "epoch": 58.78186968838527, + "loss": 0.027033204212784767, + "loss_ce": 8.618322317488492e-05, + "loss_iou": 0.470703125, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 888555352, + "step": 5177 + }, + { + "epoch": 58.79320113314448, + "grad_norm": 3.8466464162662635, + "learning_rate": 5e-06, + "loss": 0.0514, + "num_input_tokens_seen": 888727452, + "step": 5178 + }, + { + "epoch": 58.79320113314448, + "loss": 0.06564351916313171, + "loss_ce": 6.124786159489304e-05, + "loss_iou": 0.5390625, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 888727452, + "step": 5178 + }, + { + "epoch": 58.804532577903686, + "grad_norm": 3.4246407041435205, + "learning_rate": 5e-06, + "loss": 0.0672, + "num_input_tokens_seen": 888898764, + "step": 5179 + }, + { + "epoch": 58.804532577903686, + "loss": 0.048203542828559875, + "loss_ce": 4.68058860860765e-05, + "loss_iou": 0.478515625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 888898764, + "step": 5179 + }, + { + "epoch": 58.815864022662886, + "grad_norm": 3.1318405152470103, + "learning_rate": 5e-06, + "loss": 0.0626, + "num_input_tokens_seen": 889069668, + "step": 5180 + }, + { + "epoch": 58.815864022662886, + "loss": 0.12884651124477386, + "loss_ce": 0.000382764614187181, + "loss_iou": 0.1806640625, + "loss_num": 0.0257568359375, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 889069668, + "step": 5180 + }, + { + "epoch": 58.827195467422094, + "grad_norm": 3.7127060891935946, + "learning_rate": 5e-06, + "loss": 0.0517, + "num_input_tokens_seen": 889241380, + "step": 5181 + }, + { + "epoch": 58.827195467422094, + "loss": 0.03743062913417816, + "loss_ce": 7.71126797189936e-05, + "loss_iou": 0.58984375, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 889241380, + "step": 5181 + }, + { + "epoch": 58.8385269121813, + "grad_norm": 3.32036666150757, + "learning_rate": 5e-06, + "loss": 0.0547, + "num_input_tokens_seen": 889412572, + "step": 5182 + }, + { + "epoch": 58.8385269121813, + "loss": 0.029161836951971054, + "loss_ce": 6.332558405119926e-05, + "loss_iou": 0.376953125, + "loss_num": 0.005828857421875, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 889412572, + "step": 5182 + }, + { + "epoch": 58.84985835694051, + "grad_norm": 3.1431802741623733, + "learning_rate": 5e-06, + "loss": 0.0461, + "num_input_tokens_seen": 889584372, + "step": 5183 + }, + { + "epoch": 58.84985835694051, + "loss": 0.04124344140291214, + "loss_ce": 2.1819667381350882e-05, + "loss_iou": 0.390625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 889584372, + "step": 5183 + }, + { + "epoch": 58.861189801699716, + "grad_norm": 3.1697984764414184, + "learning_rate": 5e-06, + "loss": 0.0563, + "num_input_tokens_seen": 889755696, + "step": 5184 + }, + { + "epoch": 58.861189801699716, + "loss": 0.09251611679792404, + "loss_ce": 3.259401273680851e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 889755696, + "step": 5184 + }, + { + "epoch": 58.87252124645892, + "grad_norm": 3.2758392657934152, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 889927732, + "step": 5185 + }, + { + "epoch": 58.87252124645892, + "loss": 0.08032210171222687, + "loss_ce": 0.00012190362031105906, + "loss_iou": 0.462890625, + "loss_num": 0.01611328125, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 889927732, + "step": 5185 + }, + { + "epoch": 58.88385269121813, + "grad_norm": 3.5441915976507445, + "learning_rate": 5e-06, + "loss": 0.0439, + "num_input_tokens_seen": 890099496, + "step": 5186 + }, + { + "epoch": 58.88385269121813, + "loss": 0.036501601338386536, + "loss_ce": 0.0002467167214490473, + "loss_iou": 0.41796875, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 890099496, + "step": 5186 + }, + { + "epoch": 58.89518413597734, + "grad_norm": 8.553759434547434, + "learning_rate": 5e-06, + "loss": 0.0517, + "num_input_tokens_seen": 890271624, + "step": 5187 + }, + { + "epoch": 58.89518413597734, + "loss": 0.05966384336352348, + "loss_ce": 9.353039786219597e-05, + "loss_iou": 0.431640625, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 890271624, + "step": 5187 + }, + { + "epoch": 58.906515580736546, + "grad_norm": 4.349920012260608, + "learning_rate": 5e-06, + "loss": 0.0557, + "num_input_tokens_seen": 890442888, + "step": 5188 + }, + { + "epoch": 58.906515580736546, + "loss": 0.04077528044581413, + "loss_ce": 4.957479177392088e-05, + "loss_iou": 0.0, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 890442888, + "step": 5188 + }, + { + "epoch": 58.91784702549575, + "grad_norm": 2.59131701536466, + "learning_rate": 5e-06, + "loss": 0.0416, + "num_input_tokens_seen": 890614716, + "step": 5189 + }, + { + "epoch": 58.91784702549575, + "loss": 0.04095759987831116, + "loss_ce": 9.456434054300189e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 890614716, + "step": 5189 + }, + { + "epoch": 58.92917847025496, + "grad_norm": 4.765012906974223, + "learning_rate": 5e-06, + "loss": 0.0333, + "num_input_tokens_seen": 890786476, + "step": 5190 + }, + { + "epoch": 58.92917847025496, + "loss": 0.027431871742010117, + "loss_ce": 4.234358493704349e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0054931640625, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 890786476, + "step": 5190 + }, + { + "epoch": 58.94050991501416, + "grad_norm": 3.8250754101430284, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 890957028, + "step": 5191 + }, + { + "epoch": 58.94050991501416, + "loss": 0.030107181519269943, + "loss_ce": 7.788444054313004e-05, + "loss_iou": 0.56640625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 890957028, + "step": 5191 + }, + { + "epoch": 58.95184135977337, + "grad_norm": 3.492034853285189, + "learning_rate": 5e-06, + "loss": 0.0763, + "num_input_tokens_seen": 891128872, + "step": 5192 + }, + { + "epoch": 58.95184135977337, + "loss": 0.0569198802113533, + "loss_ce": 5.037500523030758e-05, + "loss_iou": 0.28515625, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 891128872, + "step": 5192 + }, + { + "epoch": 58.963172804532576, + "grad_norm": 4.031746483819515, + "learning_rate": 5e-06, + "loss": 0.0534, + "num_input_tokens_seen": 891299784, + "step": 5193 + }, + { + "epoch": 58.963172804532576, + "loss": 0.052659302949905396, + "loss_ce": 4.700115096056834e-05, + "loss_iou": 0.625, + "loss_num": 0.010498046875, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 891299784, + "step": 5193 + }, + { + "epoch": 58.97450424929178, + "grad_norm": 3.5584400734985246, + "learning_rate": 5e-06, + "loss": 0.0527, + "num_input_tokens_seen": 891471652, + "step": 5194 + }, + { + "epoch": 58.97450424929178, + "loss": 0.05989007651805878, + "loss_ce": 7.562487007817253e-05, + "loss_iou": 0.34375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 891471652, + "step": 5194 + }, + { + "epoch": 58.98583569405099, + "grad_norm": 3.56007655017724, + "learning_rate": 5e-06, + "loss": 0.0606, + "num_input_tokens_seen": 891642620, + "step": 5195 + }, + { + "epoch": 58.98583569405099, + "loss": 0.034561313688755035, + "loss_ce": 7.644950528629124e-05, + "loss_iou": 0.443359375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 891642620, + "step": 5195 + }, + { + "epoch": 58.9971671388102, + "grad_norm": 3.3759239239938763, + "learning_rate": 5e-06, + "loss": 0.0479, + "num_input_tokens_seen": 891814196, + "step": 5196 + }, + { + "epoch": 58.9971671388102, + "loss": 0.046778351068496704, + "loss_ce": 5.59394211450126e-05, + "loss_iou": 0.3125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 891814196, + "step": 5196 + }, + { + "epoch": 58.9971671388102, + "loss": 0.03991572558879852, + "loss_ce": 5.9768499340862036e-05, + "loss_iou": 0.369140625, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 891857332, + "step": 5196 + }, + { + "epoch": 59.008498583569406, + "grad_norm": 2.9512422980350177, + "learning_rate": 5e-06, + "loss": 0.0519, + "num_input_tokens_seen": 891986148, + "step": 5197 + }, + { + "epoch": 59.008498583569406, + "loss": 0.07488942891359329, + "loss_ce": 7.558612560387701e-05, + "loss_iou": 0.328125, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 891986148, + "step": 5197 + }, + { + "epoch": 59.01983002832861, + "grad_norm": 3.000726492663069, + "learning_rate": 5e-06, + "loss": 0.0452, + "num_input_tokens_seen": 892157096, + "step": 5198 + }, + { + "epoch": 59.01983002832861, + "loss": 0.03526885062456131, + "loss_ce": 5.156617407919839e-05, + "loss_iou": 0.52734375, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 892157096, + "step": 5198 + }, + { + "epoch": 59.03116147308782, + "grad_norm": 2.8948697206372023, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 892327480, + "step": 5199 + }, + { + "epoch": 59.03116147308782, + "loss": 0.036264244467020035, + "loss_ce": 5.5136340961325914e-05, + "loss_iou": 0.54296875, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 892327480, + "step": 5199 + }, + { + "epoch": 59.04249291784703, + "grad_norm": 3.0108412687307133, + "learning_rate": 5e-06, + "loss": 0.0435, + "num_input_tokens_seen": 892495460, + "step": 5200 + }, + { + "epoch": 59.04249291784703, + "loss": 0.038533806800842285, + "loss_ce": 5.113949009682983e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 892495460, + "step": 5200 + }, + { + "epoch": 59.053824362606235, + "grad_norm": 3.4335686742362515, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 892667304, + "step": 5201 + }, + { + "epoch": 59.053824362606235, + "loss": 0.057238124310970306, + "loss_ce": 4.818146044271998e-05, + "loss_iou": 0.36328125, + "loss_num": 0.01141357421875, + "loss_xval": 0.05712890625, + "num_input_tokens_seen": 892667304, + "step": 5201 + }, + { + "epoch": 59.065155807365436, + "grad_norm": 3.737309829311134, + "learning_rate": 5e-06, + "loss": 0.0529, + "num_input_tokens_seen": 892839164, + "step": 5202 + }, + { + "epoch": 59.065155807365436, + "loss": 0.03735791891813278, + "loss_ce": 0.00041638925904408097, + "loss_iou": 0.46484375, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 892839164, + "step": 5202 + }, + { + "epoch": 59.07648725212464, + "grad_norm": 4.897844191340123, + "learning_rate": 5e-06, + "loss": 0.0786, + "num_input_tokens_seen": 893009204, + "step": 5203 + }, + { + "epoch": 59.07648725212464, + "loss": 0.08405373990535736, + "loss_ce": 6.936582212802023e-05, + "loss_iou": 0.2001953125, + "loss_num": 0.016845703125, + "loss_xval": 0.083984375, + "num_input_tokens_seen": 893009204, + "step": 5203 + }, + { + "epoch": 59.08781869688385, + "grad_norm": 3.048425046986099, + "learning_rate": 5e-06, + "loss": 0.0387, + "num_input_tokens_seen": 893180392, + "step": 5204 + }, + { + "epoch": 59.08781869688385, + "loss": 0.033896736800670624, + "loss_ce": 3.748424569494091e-05, + "loss_iou": 0.388671875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 893180392, + "step": 5204 + }, + { + "epoch": 59.09915014164306, + "grad_norm": 3.8873056091322047, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 893352384, + "step": 5205 + }, + { + "epoch": 59.09915014164306, + "loss": 0.036965228617191315, + "loss_ce": 8.473488560412079e-05, + "loss_iou": 0.4453125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 893352384, + "step": 5205 + }, + { + "epoch": 59.110481586402265, + "grad_norm": 3.578444002014332, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 893523704, + "step": 5206 + }, + { + "epoch": 59.110481586402265, + "loss": 0.09712836146354675, + "loss_ce": 9.772434714250267e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0194091796875, + "loss_xval": 0.09716796875, + "num_input_tokens_seen": 893523704, + "step": 5206 + }, + { + "epoch": 59.12181303116147, + "grad_norm": 3.0985569110304, + "learning_rate": 5e-06, + "loss": 0.065, + "num_input_tokens_seen": 893695352, + "step": 5207 + }, + { + "epoch": 59.12181303116147, + "loss": 0.03881976008415222, + "loss_ce": 3.1915416911942884e-05, + "loss_iou": 0.251953125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 893695352, + "step": 5207 + }, + { + "epoch": 59.13314447592068, + "grad_norm": 8.824055311547783, + "learning_rate": 5e-06, + "loss": 0.0714, + "num_input_tokens_seen": 893867392, + "step": 5208 + }, + { + "epoch": 59.13314447592068, + "loss": 0.07464145123958588, + "loss_ce": 2.5969651687773876e-05, + "loss_iou": 0.38671875, + "loss_num": 0.01495361328125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 893867392, + "step": 5208 + }, + { + "epoch": 59.14447592067989, + "grad_norm": 3.500292296272771, + "learning_rate": 5e-06, + "loss": 0.045, + "num_input_tokens_seen": 894038532, + "step": 5209 + }, + { + "epoch": 59.14447592067989, + "loss": 0.031523656100034714, + "loss_ce": 4.4773987610824406e-05, + "loss_iou": 0.640625, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 894038532, + "step": 5209 + }, + { + "epoch": 59.155807365439095, + "grad_norm": 2.9003618968807303, + "learning_rate": 5e-06, + "loss": 0.0594, + "num_input_tokens_seen": 894210628, + "step": 5210 + }, + { + "epoch": 59.155807365439095, + "loss": 0.04914901405572891, + "loss_ce": 6.148905958980322e-05, + "loss_iou": 0.34765625, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 894210628, + "step": 5210 + }, + { + "epoch": 59.1671388101983, + "grad_norm": 3.661745755725081, + "learning_rate": 5e-06, + "loss": 0.0547, + "num_input_tokens_seen": 894381088, + "step": 5211 + }, + { + "epoch": 59.1671388101983, + "loss": 0.031463753432035446, + "loss_ce": 4.590559910866432e-05, + "loss_iou": 0.318359375, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 894381088, + "step": 5211 + }, + { + "epoch": 59.17847025495751, + "grad_norm": 3.6140270793788654, + "learning_rate": 5e-06, + "loss": 0.0665, + "num_input_tokens_seen": 894552936, + "step": 5212 + }, + { + "epoch": 59.17847025495751, + "loss": 0.06581361591815948, + "loss_ce": 7.875532901380211e-05, + "loss_iou": 0.46875, + "loss_num": 0.01312255859375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 894552936, + "step": 5212 + }, + { + "epoch": 59.18980169971671, + "grad_norm": 3.0262770442006057, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 894724828, + "step": 5213 + }, + { + "epoch": 59.18980169971671, + "loss": 0.040536098182201385, + "loss_ce": 3.927417128579691e-05, + "loss_iou": 0.400390625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 894724828, + "step": 5213 + }, + { + "epoch": 59.20113314447592, + "grad_norm": 3.8619316168824627, + "learning_rate": 5e-06, + "loss": 0.0451, + "num_input_tokens_seen": 894896716, + "step": 5214 + }, + { + "epoch": 59.20113314447592, + "loss": 0.061334170401096344, + "loss_ce": 2.436098839098122e-05, + "loss_iou": 0.5546875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 894896716, + "step": 5214 + }, + { + "epoch": 59.212464589235125, + "grad_norm": 3.5741880313937777, + "learning_rate": 5e-06, + "loss": 0.0503, + "num_input_tokens_seen": 895068848, + "step": 5215 + }, + { + "epoch": 59.212464589235125, + "loss": 0.031410180032253265, + "loss_ce": 2.2852716938359663e-05, + "loss_iou": 0.423828125, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 895068848, + "step": 5215 + }, + { + "epoch": 59.22379603399433, + "grad_norm": 3.3876071852006344, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 895240408, + "step": 5216 + }, + { + "epoch": 59.22379603399433, + "loss": 0.0444064661860466, + "loss_ce": 5.6797223805915564e-05, + "loss_iou": 0.390625, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 895240408, + "step": 5216 + }, + { + "epoch": 59.23512747875354, + "grad_norm": 3.6376875567123848, + "learning_rate": 5e-06, + "loss": 0.0644, + "num_input_tokens_seen": 895412100, + "step": 5217 + }, + { + "epoch": 59.23512747875354, + "loss": 0.058473944664001465, + "loss_ce": 3.278174699516967e-05, + "loss_iou": 0.298828125, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 895412100, + "step": 5217 + }, + { + "epoch": 59.24645892351275, + "grad_norm": 3.5705671608171876, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 895583924, + "step": 5218 + }, + { + "epoch": 59.24645892351275, + "loss": 0.03600358963012695, + "loss_ce": 6.91401437507011e-05, + "loss_iou": 0.4609375, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 895583924, + "step": 5218 + }, + { + "epoch": 59.257790368271955, + "grad_norm": 3.2717295880101074, + "learning_rate": 5e-06, + "loss": 0.0729, + "num_input_tokens_seen": 895755580, + "step": 5219 + }, + { + "epoch": 59.257790368271955, + "loss": 0.16085468232631683, + "loss_ce": 2.703846439544577e-05, + "loss_iou": 0.359375, + "loss_num": 0.0322265625, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 895755580, + "step": 5219 + }, + { + "epoch": 59.26912181303116, + "grad_norm": 3.8188275027294445, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 895927500, + "step": 5220 + }, + { + "epoch": 59.26912181303116, + "loss": 0.05126499384641647, + "loss_ce": 7.175881182774901e-05, + "loss_iou": 0.412109375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 895927500, + "step": 5220 + }, + { + "epoch": 59.28045325779037, + "grad_norm": 3.348527184903433, + "learning_rate": 5e-06, + "loss": 0.0736, + "num_input_tokens_seen": 896098612, + "step": 5221 + }, + { + "epoch": 59.28045325779037, + "loss": 0.08834994584321976, + "loss_ce": 6.259631481952965e-05, + "loss_iou": 0.421875, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 896098612, + "step": 5221 + }, + { + "epoch": 59.29178470254958, + "grad_norm": 3.6352127779974093, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 896269664, + "step": 5222 + }, + { + "epoch": 59.29178470254958, + "loss": 0.042771488428115845, + "loss_ce": 7.739839202258736e-05, + "loss_iou": 0.279296875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 896269664, + "step": 5222 + }, + { + "epoch": 59.303116147308785, + "grad_norm": 5.631886452199878, + "learning_rate": 5e-06, + "loss": 0.0882, + "num_input_tokens_seen": 896441164, + "step": 5223 + }, + { + "epoch": 59.303116147308785, + "loss": 0.14781562983989716, + "loss_ce": 0.0005072755157016218, + "loss_iou": 0.484375, + "loss_num": 0.029541015625, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 896441164, + "step": 5223 + }, + { + "epoch": 59.314447592067985, + "grad_norm": 19.52152245455981, + "learning_rate": 5e-06, + "loss": 0.0579, + "num_input_tokens_seen": 896612956, + "step": 5224 + }, + { + "epoch": 59.314447592067985, + "loss": 0.026554375886917114, + "loss_ce": 8.037681982386857e-05, + "loss_iou": 0.2890625, + "loss_num": 0.005279541015625, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 896612956, + "step": 5224 + }, + { + "epoch": 59.32577903682719, + "grad_norm": 2.491045027064484, + "learning_rate": 5e-06, + "loss": 0.0367, + "num_input_tokens_seen": 896784588, + "step": 5225 + }, + { + "epoch": 59.32577903682719, + "loss": 0.045461613684892654, + "loss_ce": 2.0940171452821232e-05, + "loss_iou": 0.40625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 896784588, + "step": 5225 + }, + { + "epoch": 59.3371104815864, + "grad_norm": 2.6585516503788598, + "learning_rate": 5e-06, + "loss": 0.0397, + "num_input_tokens_seen": 896956444, + "step": 5226 + }, + { + "epoch": 59.3371104815864, + "loss": 0.034350357949733734, + "loss_ce": 4.8599016736261547e-05, + "loss_iou": 0.390625, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 896956444, + "step": 5226 + }, + { + "epoch": 59.34844192634561, + "grad_norm": 3.215248566430737, + "learning_rate": 5e-06, + "loss": 0.0638, + "num_input_tokens_seen": 897125964, + "step": 5227 + }, + { + "epoch": 59.34844192634561, + "loss": 0.09650050848722458, + "loss_ce": 9.547673835186288e-05, + "loss_iou": 0.4375, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 897125964, + "step": 5227 + }, + { + "epoch": 59.359773371104815, + "grad_norm": 3.5119163950093584, + "learning_rate": 5e-06, + "loss": 0.0591, + "num_input_tokens_seen": 897297824, + "step": 5228 + }, + { + "epoch": 59.359773371104815, + "loss": 0.07572523504495621, + "loss_ce": 4.164027632214129e-05, + "loss_iou": 0.349609375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 897297824, + "step": 5228 + }, + { + "epoch": 59.37110481586402, + "grad_norm": 5.764402596600396, + "learning_rate": 5e-06, + "loss": 0.1035, + "num_input_tokens_seen": 897467512, + "step": 5229 + }, + { + "epoch": 59.37110481586402, + "loss": 0.14661215245723724, + "loss_ce": 6.673127063550055e-05, + "loss_iou": 0.287109375, + "loss_num": 0.029296875, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 897467512, + "step": 5229 + }, + { + "epoch": 59.38243626062323, + "grad_norm": 4.043773771106399, + "learning_rate": 5e-06, + "loss": 0.0702, + "num_input_tokens_seen": 897639088, + "step": 5230 + }, + { + "epoch": 59.38243626062323, + "loss": 0.12071627378463745, + "loss_ce": 4.977144271833822e-05, + "loss_iou": 0.40234375, + "loss_num": 0.024169921875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 897639088, + "step": 5230 + }, + { + "epoch": 59.39376770538244, + "grad_norm": 3.3223077903414184, + "learning_rate": 5e-06, + "loss": 0.0581, + "num_input_tokens_seen": 897810984, + "step": 5231 + }, + { + "epoch": 59.39376770538244, + "loss": 0.044003166258335114, + "loss_ce": 2.7336178391124122e-05, + "loss_iou": 0.47265625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 897810984, + "step": 5231 + }, + { + "epoch": 59.405099150141645, + "grad_norm": 3.795589652525898, + "learning_rate": 5e-06, + "loss": 0.0418, + "num_input_tokens_seen": 897981576, + "step": 5232 + }, + { + "epoch": 59.405099150141645, + "loss": 0.04296860471367836, + "loss_ce": 6.0891936300322413e-05, + "loss_iou": 0.28515625, + "loss_num": 0.008544921875, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 897981576, + "step": 5232 + }, + { + "epoch": 59.41643059490085, + "grad_norm": 3.152132433951108, + "learning_rate": 5e-06, + "loss": 0.0457, + "num_input_tokens_seen": 898153336, + "step": 5233 + }, + { + "epoch": 59.41643059490085, + "loss": 0.044174984097480774, + "loss_ce": 6.182699144119397e-05, + "loss_iou": 0.474609375, + "loss_num": 0.0087890625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 898153336, + "step": 5233 + }, + { + "epoch": 59.42776203966006, + "grad_norm": 3.530469751948677, + "learning_rate": 5e-06, + "loss": 0.0509, + "num_input_tokens_seen": 898324248, + "step": 5234 + }, + { + "epoch": 59.42776203966006, + "loss": 0.08770953118801117, + "loss_ce": 4.7785593778826296e-05, + "loss_iou": 0.390625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 898324248, + "step": 5234 + }, + { + "epoch": 59.43909348441926, + "grad_norm": 3.1972254841473653, + "learning_rate": 5e-06, + "loss": 0.038, + "num_input_tokens_seen": 898495984, + "step": 5235 + }, + { + "epoch": 59.43909348441926, + "loss": 0.03576774150133133, + "loss_ce": 0.000817483349237591, + "loss_iou": 0.388671875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 898495984, + "step": 5235 + }, + { + "epoch": 59.45042492917847, + "grad_norm": 3.6598191103957824, + "learning_rate": 5e-06, + "loss": 0.0518, + "num_input_tokens_seen": 898667248, + "step": 5236 + }, + { + "epoch": 59.45042492917847, + "loss": 0.046595871448516846, + "loss_ce": 0.00014811850269325078, + "loss_iou": 0.478515625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 898667248, + "step": 5236 + }, + { + "epoch": 59.461756373937675, + "grad_norm": 3.9673486446916817, + "learning_rate": 5e-06, + "loss": 0.053, + "num_input_tokens_seen": 898838312, + "step": 5237 + }, + { + "epoch": 59.461756373937675, + "loss": 0.05976229906082153, + "loss_ce": 6.991454574745148e-05, + "loss_iou": 0.33984375, + "loss_num": 0.011962890625, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 898838312, + "step": 5237 + }, + { + "epoch": 59.47308781869688, + "grad_norm": 4.6056600842978845, + "learning_rate": 5e-06, + "loss": 0.0564, + "num_input_tokens_seen": 899009204, + "step": 5238 + }, + { + "epoch": 59.47308781869688, + "loss": 0.08491309732198715, + "loss_ce": 8.948677714215592e-05, + "loss_iou": 0.50390625, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 899009204, + "step": 5238 + }, + { + "epoch": 59.48441926345609, + "grad_norm": 8.445198532074523, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 899181288, + "step": 5239 + }, + { + "epoch": 59.48441926345609, + "loss": 0.07698958367109299, + "loss_ce": 5.476978185470216e-05, + "loss_iou": 0.462890625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 899181288, + "step": 5239 + }, + { + "epoch": 59.4957507082153, + "grad_norm": 4.764378479825633, + "learning_rate": 5e-06, + "loss": 0.0739, + "num_input_tokens_seen": 899352900, + "step": 5240 + }, + { + "epoch": 59.4957507082153, + "loss": 0.05390908196568489, + "loss_ce": 4.555407213047147e-05, + "loss_iou": 0.49609375, + "loss_num": 0.01080322265625, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 899352900, + "step": 5240 + }, + { + "epoch": 59.507082152974505, + "grad_norm": 3.1921177712850275, + "learning_rate": 5e-06, + "loss": 0.0557, + "num_input_tokens_seen": 899524552, + "step": 5241 + }, + { + "epoch": 59.507082152974505, + "loss": 0.04019215330481529, + "loss_ce": 7.679581176489592e-05, + "loss_iou": 0.5, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 899524552, + "step": 5241 + }, + { + "epoch": 59.51841359773371, + "grad_norm": 3.0275404441557745, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 899695716, + "step": 5242 + }, + { + "epoch": 59.51841359773371, + "loss": 0.036375295370817184, + "loss_ce": 0.002149831270799041, + "loss_iou": 0.359375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 899695716, + "step": 5242 + }, + { + "epoch": 59.52974504249292, + "grad_norm": 4.720293188026689, + "learning_rate": 5e-06, + "loss": 0.0847, + "num_input_tokens_seen": 899868096, + "step": 5243 + }, + { + "epoch": 59.52974504249292, + "loss": 0.04349971190094948, + "loss_ce": 0.00014949182514101267, + "loss_iou": 0.42578125, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 899868096, + "step": 5243 + }, + { + "epoch": 59.54107648725213, + "grad_norm": 3.7776323495494184, + "learning_rate": 5e-06, + "loss": 0.0423, + "num_input_tokens_seen": 900040268, + "step": 5244 + }, + { + "epoch": 59.54107648725213, + "loss": 0.04592818766832352, + "loss_ce": 0.00010604485578369349, + "loss_iou": 0.439453125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 900040268, + "step": 5244 + }, + { + "epoch": 59.552407932011334, + "grad_norm": 3.279990577561121, + "learning_rate": 5e-06, + "loss": 0.0593, + "num_input_tokens_seen": 900212156, + "step": 5245 + }, + { + "epoch": 59.552407932011334, + "loss": 0.03187783807516098, + "loss_ce": 3.27452908095438e-05, + "loss_iou": 0.365234375, + "loss_num": 0.006378173828125, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 900212156, + "step": 5245 + }, + { + "epoch": 59.563739376770535, + "grad_norm": 3.411208085270571, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 900384188, + "step": 5246 + }, + { + "epoch": 59.563739376770535, + "loss": 0.05028306692838669, + "loss_ce": 5.1135484682163224e-05, + "loss_iou": 0.484375, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 900384188, + "step": 5246 + }, + { + "epoch": 59.57507082152974, + "grad_norm": 3.999552577502403, + "learning_rate": 5e-06, + "loss": 0.0425, + "num_input_tokens_seen": 900556476, + "step": 5247 + }, + { + "epoch": 59.57507082152974, + "loss": 0.03434481471776962, + "loss_ce": 4.305379115976393e-05, + "loss_iou": 0.65234375, + "loss_num": 0.006866455078125, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 900556476, + "step": 5247 + }, + { + "epoch": 59.58640226628895, + "grad_norm": 3.6926286319582684, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 900728344, + "step": 5248 + }, + { + "epoch": 59.58640226628895, + "loss": 0.06600113213062286, + "loss_ce": 5.264939318294637e-05, + "loss_iou": 0.46875, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 900728344, + "step": 5248 + }, + { + "epoch": 59.59773371104816, + "grad_norm": 3.3382681546609305, + "learning_rate": 5e-06, + "loss": 0.0549, + "num_input_tokens_seen": 900900272, + "step": 5249 + }, + { + "epoch": 59.59773371104816, + "loss": 0.06957980245351791, + "loss_ce": 3.0236533348215744e-05, + "loss_iou": 0.134765625, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 900900272, + "step": 5249 + }, + { + "epoch": 59.609065155807365, + "grad_norm": 3.1123895424490073, + "learning_rate": 5e-06, + "loss": 0.0379, + "num_input_tokens_seen": 901072064, + "step": 5250 + }, + { + "epoch": 59.609065155807365, + "eval_seeclick_CIoU": 0.5399854183197021, + "eval_seeclick_GIoU": 0.5371536016464233, + "eval_seeclick_IoU": 0.5758621692657471, + "eval_seeclick_MAE_all": 0.0638058539479971, + "eval_seeclick_MAE_h": 0.03312877286225557, + "eval_seeclick_MAE_w": 0.0970098078250885, + "eval_seeclick_MAE_x": 0.09429314360022545, + "eval_seeclick_MAE_y": 0.03079168125987053, + "eval_seeclick_NUM_probability": 0.9999721050262451, + "eval_seeclick_inside_bbox": 0.9232954680919647, + "eval_seeclick_loss": 0.9146884679794312, + "eval_seeclick_loss_ce": 0.663954883813858, + "eval_seeclick_loss_iou": 0.5419921875, + "eval_seeclick_loss_num": 0.050067901611328125, + "eval_seeclick_loss_xval": 0.250457763671875, + "eval_seeclick_runtime": 69.1178, + "eval_seeclick_samples_per_second": 0.622, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 901072064, + "step": 5250 + }, + { + "epoch": 59.609065155807365, + "eval_icons_CIoU": 0.7491226494312286, + "eval_icons_GIoU": 0.7482729554176331, + "eval_icons_IoU": 0.7629626095294952, + "eval_icons_MAE_all": 0.03358728997409344, + "eval_icons_MAE_h": 0.030434874817728996, + "eval_icons_MAE_w": 0.03648512065410614, + "eval_icons_MAE_x": 0.033146674279123545, + "eval_icons_MAE_y": 0.03428248316049576, + "eval_icons_NUM_probability": 0.9996849894523621, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.12134695798158646, + "eval_icons_loss_ce": 0.005554302595555782, + "eval_icons_loss_iou": 0.50933837890625, + "eval_icons_loss_num": 0.020902633666992188, + "eval_icons_loss_xval": 0.1045379638671875, + "eval_icons_runtime": 86.7469, + "eval_icons_samples_per_second": 0.576, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 901072064, + "step": 5250 + }, + { + "epoch": 59.609065155807365, + "eval_screenspot_CIoU": 0.6173333923021952, + "eval_screenspot_GIoU": 0.6209428906440735, + "eval_screenspot_IoU": 0.6434433460235596, + "eval_screenspot_MAE_all": 0.06658324226737022, + "eval_screenspot_MAE_h": 0.03863982359568278, + "eval_screenspot_MAE_w": 0.11977884421745937, + "eval_screenspot_MAE_x": 0.07868607218066852, + "eval_screenspot_MAE_y": 0.02922821355362733, + "eval_screenspot_NUM_probability": 0.9999502499898275, + "eval_screenspot_inside_bbox": 0.8974999984105428, + "eval_screenspot_loss": 0.28868868947029114, + "eval_screenspot_loss_ce": 0.013192149965713421, + "eval_screenspot_loss_iou": 0.5154622395833334, + "eval_screenspot_loss_num": 0.054911295572916664, + "eval_screenspot_loss_xval": 0.27447509765625, + "eval_screenspot_runtime": 137.1174, + "eval_screenspot_samples_per_second": 0.649, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 901072064, + "step": 5250 + }, + { + "epoch": 59.609065155807365, + "eval_compot_CIoU": 0.8520741164684296, + "eval_compot_GIoU": 0.8481800258159637, + "eval_compot_IoU": 0.8633108735084534, + "eval_compot_MAE_all": 0.02417416125535965, + "eval_compot_MAE_h": 0.018058547750115395, + "eval_compot_MAE_w": 0.03456522338092327, + "eval_compot_MAE_x": 0.02924160286784172, + "eval_compot_MAE_y": 0.01483126450330019, + "eval_compot_NUM_probability": 0.9999614357948303, + "eval_compot_inside_bbox": 0.9409722089767456, + "eval_compot_loss": 0.0808219239115715, + "eval_compot_loss_ce": 3.6658964745583944e-05, + "eval_compot_loss_iou": 0.520263671875, + "eval_compot_loss_num": 0.01422882080078125, + "eval_compot_loss_xval": 0.07110595703125, + "eval_compot_runtime": 89.3881, + "eval_compot_samples_per_second": 0.559, + "eval_compot_steps_per_second": 0.022, + "num_input_tokens_seen": 901072064, + "step": 5250 + }, + { + "epoch": 59.609065155807365, + "eval_custom_ui_MAE_all": 0.020768867805600166, + "eval_custom_ui_MAE_x": 0.03414425626397133, + "eval_custom_ui_MAE_y": 0.0073934795800596476, + "eval_custom_ui_NUM_probability": 0.9998302757740021, + "eval_custom_ui_loss": 0.2251778095960617, + "eval_custom_ui_loss_ce": 0.11784247308969498, + "eval_custom_ui_loss_num": 0.020610809326171875, + "eval_custom_ui_loss_xval": 0.102996826171875, + "eval_custom_ui_runtime": 59.5926, + "eval_custom_ui_samples_per_second": 0.839, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 901072064, + "step": 5250 + }, + { + "epoch": 59.609065155807365, + "loss": 0.2747749090194702, + "loss_ce": 0.14831003546714783, + "loss_iou": 0.0, + "loss_num": 0.0252685546875, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 901072064, + "step": 5250 + }, + { + "epoch": 59.62039660056657, + "grad_norm": 3.7087665501915996, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 901244140, + "step": 5251 + }, + { + "epoch": 59.62039660056657, + "loss": 0.029397062957286835, + "loss_ce": 0.000893645454198122, + "loss_iou": 0.435546875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 901244140, + "step": 5251 + }, + { + "epoch": 59.63172804532578, + "grad_norm": 3.283835748950128, + "learning_rate": 5e-06, + "loss": 0.0552, + "num_input_tokens_seen": 901415972, + "step": 5252 + }, + { + "epoch": 59.63172804532578, + "loss": 0.05694088712334633, + "loss_ce": 7.138182263588533e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 901415972, + "step": 5252 + }, + { + "epoch": 59.64305949008499, + "grad_norm": 3.06353840442789, + "learning_rate": 5e-06, + "loss": 0.0475, + "num_input_tokens_seen": 901587804, + "step": 5253 + }, + { + "epoch": 59.64305949008499, + "loss": 0.06405947357416153, + "loss_ce": 6.411396316252649e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 901587804, + "step": 5253 + }, + { + "epoch": 59.654390934844194, + "grad_norm": 2.9528373098068994, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 901758880, + "step": 5254 + }, + { + "epoch": 59.654390934844194, + "loss": 0.05358899384737015, + "loss_ce": 6.116217991802841e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 901758880, + "step": 5254 + }, + { + "epoch": 59.6657223796034, + "grad_norm": 3.2228902111648075, + "learning_rate": 5e-06, + "loss": 0.0707, + "num_input_tokens_seen": 901930576, + "step": 5255 + }, + { + "epoch": 59.6657223796034, + "loss": 0.11216942965984344, + "loss_ce": 4.784986231243238e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0224609375, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 901930576, + "step": 5255 + }, + { + "epoch": 59.67705382436261, + "grad_norm": 3.584780983850795, + "learning_rate": 5e-06, + "loss": 0.0564, + "num_input_tokens_seen": 902101944, + "step": 5256 + }, + { + "epoch": 59.67705382436261, + "loss": 0.10403447598218918, + "loss_ce": 4.58312060800381e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 902101944, + "step": 5256 + }, + { + "epoch": 59.68838526912181, + "grad_norm": 3.7482166240001744, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 902273520, + "step": 5257 + }, + { + "epoch": 59.68838526912181, + "loss": 0.03523241728544235, + "loss_ce": 4.565247581922449e-05, + "loss_iou": 0.3359375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 902273520, + "step": 5257 + }, + { + "epoch": 59.69971671388102, + "grad_norm": 3.209212969824453, + "learning_rate": 5e-06, + "loss": 0.0449, + "num_input_tokens_seen": 902445200, + "step": 5258 + }, + { + "epoch": 59.69971671388102, + "loss": 0.050422340631484985, + "loss_ce": 8.359376806765795e-05, + "loss_iou": 0.337890625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 902445200, + "step": 5258 + }, + { + "epoch": 59.711048158640224, + "grad_norm": 2.9227306083120066, + "learning_rate": 5e-06, + "loss": 0.0439, + "num_input_tokens_seen": 902616932, + "step": 5259 + }, + { + "epoch": 59.711048158640224, + "loss": 0.046733736991882324, + "loss_ce": 0.00011813243327196687, + "loss_iou": 0.40625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 902616932, + "step": 5259 + }, + { + "epoch": 59.72237960339943, + "grad_norm": 2.5986721588811226, + "learning_rate": 5e-06, + "loss": 0.0481, + "num_input_tokens_seen": 902788236, + "step": 5260 + }, + { + "epoch": 59.72237960339943, + "loss": 0.0629258081316948, + "loss_ce": 0.00021981516329105943, + "loss_iou": 0.369140625, + "loss_num": 0.0125732421875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 902788236, + "step": 5260 + }, + { + "epoch": 59.73371104815864, + "grad_norm": 2.7199419352021823, + "learning_rate": 5e-06, + "loss": 0.0312, + "num_input_tokens_seen": 902958860, + "step": 5261 + }, + { + "epoch": 59.73371104815864, + "loss": 0.03853485360741615, + "loss_ce": 2.167032471334096e-05, + "loss_iou": 0.328125, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 902958860, + "step": 5261 + }, + { + "epoch": 59.74504249291785, + "grad_norm": 2.445189939451211, + "learning_rate": 5e-06, + "loss": 0.0616, + "num_input_tokens_seen": 903127396, + "step": 5262 + }, + { + "epoch": 59.74504249291785, + "loss": 0.029985571280121803, + "loss_ce": 7.834483403712511e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 903127396, + "step": 5262 + }, + { + "epoch": 59.756373937677054, + "grad_norm": 2.2026778973020398, + "learning_rate": 5e-06, + "loss": 0.0503, + "num_input_tokens_seen": 903299152, + "step": 5263 + }, + { + "epoch": 59.756373937677054, + "loss": 0.04307733476161957, + "loss_ce": 9.332680201623589e-05, + "loss_iou": 0.287109375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 903299152, + "step": 5263 + }, + { + "epoch": 59.76770538243626, + "grad_norm": 1.933058244066133, + "learning_rate": 5e-06, + "loss": 0.048, + "num_input_tokens_seen": 903470880, + "step": 5264 + }, + { + "epoch": 59.76770538243626, + "loss": 0.0282057486474514, + "loss_ce": 0.00011431855091359466, + "loss_iou": 0.2578125, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 903470880, + "step": 5264 + }, + { + "epoch": 59.77903682719547, + "grad_norm": 2.370442082220845, + "learning_rate": 5e-06, + "loss": 0.0692, + "num_input_tokens_seen": 903642872, + "step": 5265 + }, + { + "epoch": 59.77903682719547, + "loss": 0.0547727569937706, + "loss_ce": 5.473729470395483e-05, + "loss_iou": 0.0810546875, + "loss_num": 0.010986328125, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 903642872, + "step": 5265 + }, + { + "epoch": 59.79036827195468, + "grad_norm": 4.494736490360524, + "learning_rate": 5e-06, + "loss": 0.0641, + "num_input_tokens_seen": 903814820, + "step": 5266 + }, + { + "epoch": 59.79036827195468, + "loss": 0.11013898253440857, + "loss_ce": 0.00017652772658038884, + "loss_iou": 0.265625, + "loss_num": 0.02197265625, + "loss_xval": 0.10986328125, + "num_input_tokens_seen": 903814820, + "step": 5266 + }, + { + "epoch": 59.801699716713884, + "grad_norm": 3.984608772692165, + "learning_rate": 5e-06, + "loss": 0.039, + "num_input_tokens_seen": 903986920, + "step": 5267 + }, + { + "epoch": 59.801699716713884, + "loss": 0.050803352147340775, + "loss_ce": 0.004447150509804487, + "loss_iou": 0.4765625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 903986920, + "step": 5267 + }, + { + "epoch": 59.81303116147309, + "grad_norm": 4.291915277877288, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 904155872, + "step": 5268 + }, + { + "epoch": 59.81303116147309, + "loss": 0.03763923421502113, + "loss_ce": 5.683595372829586e-05, + "loss_iou": 0.337890625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 904155872, + "step": 5268 + }, + { + "epoch": 59.82436260623229, + "grad_norm": 3.291402256615581, + "learning_rate": 5e-06, + "loss": 0.0787, + "num_input_tokens_seen": 904325228, + "step": 5269 + }, + { + "epoch": 59.82436260623229, + "loss": 0.04456984996795654, + "loss_ce": 7.522162195527926e-05, + "loss_iou": 0.28125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 904325228, + "step": 5269 + }, + { + "epoch": 59.8356940509915, + "grad_norm": 2.919817446506232, + "learning_rate": 5e-06, + "loss": 0.0638, + "num_input_tokens_seen": 904496380, + "step": 5270 + }, + { + "epoch": 59.8356940509915, + "loss": 0.0895550325512886, + "loss_ce": 9.274869807995856e-05, + "loss_iou": 0.177734375, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 904496380, + "step": 5270 + }, + { + "epoch": 59.84702549575071, + "grad_norm": 3.499973683797222, + "learning_rate": 5e-06, + "loss": 0.0585, + "num_input_tokens_seen": 904665796, + "step": 5271 + }, + { + "epoch": 59.84702549575071, + "loss": 0.03309710696339607, + "loss_ce": 3.894151814165525e-05, + "loss_iou": 0.427734375, + "loss_num": 0.006622314453125, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 904665796, + "step": 5271 + }, + { + "epoch": 59.858356940509914, + "grad_norm": 4.9206719201400455, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 904836296, + "step": 5272 + }, + { + "epoch": 59.858356940509914, + "loss": 0.052924949675798416, + "loss_ce": 6.850512727396563e-05, + "loss_iou": 0.4140625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 904836296, + "step": 5272 + }, + { + "epoch": 59.86968838526912, + "grad_norm": 4.0376322760594325, + "learning_rate": 5e-06, + "loss": 0.0603, + "num_input_tokens_seen": 905008360, + "step": 5273 + }, + { + "epoch": 59.86968838526912, + "loss": 0.03543814271688461, + "loss_ce": 0.00019034132128581405, + "loss_iou": 0.6328125, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 905008360, + "step": 5273 + }, + { + "epoch": 59.88101983002833, + "grad_norm": 4.674511053048756, + "learning_rate": 5e-06, + "loss": 0.0696, + "num_input_tokens_seen": 905178776, + "step": 5274 + }, + { + "epoch": 59.88101983002833, + "loss": 0.09513377398252487, + "loss_ce": 0.00011729764810297638, + "loss_iou": 0.306640625, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 905178776, + "step": 5274 + }, + { + "epoch": 59.892351274787536, + "grad_norm": 5.714946555958703, + "learning_rate": 5e-06, + "loss": 0.1176, + "num_input_tokens_seen": 905350724, + "step": 5275 + }, + { + "epoch": 59.892351274787536, + "loss": 0.10863351821899414, + "loss_ce": 0.00015879135753493756, + "loss_iou": 0.416015625, + "loss_num": 0.021728515625, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 905350724, + "step": 5275 + }, + { + "epoch": 59.903682719546744, + "grad_norm": 3.1390227953572136, + "learning_rate": 5e-06, + "loss": 0.0646, + "num_input_tokens_seen": 905521936, + "step": 5276 + }, + { + "epoch": 59.903682719546744, + "loss": 0.10067965090274811, + "loss_ce": 1.741788764775265e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0201416015625, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 905521936, + "step": 5276 + }, + { + "epoch": 59.91501416430595, + "grad_norm": 2.9155259803237623, + "learning_rate": 5e-06, + "loss": 0.0572, + "num_input_tokens_seen": 905693844, + "step": 5277 + }, + { + "epoch": 59.91501416430595, + "loss": 0.047902293503284454, + "loss_ce": 3.546860898495652e-05, + "loss_iou": 0.3828125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 905693844, + "step": 5277 + }, + { + "epoch": 59.92634560906516, + "grad_norm": 3.7189901168004087, + "learning_rate": 5e-06, + "loss": 0.0733, + "num_input_tokens_seen": 905864384, + "step": 5278 + }, + { + "epoch": 59.92634560906516, + "loss": 0.06744060665369034, + "loss_ce": 7.305352482944727e-05, + "loss_iou": 0.455078125, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 905864384, + "step": 5278 + }, + { + "epoch": 59.93767705382436, + "grad_norm": 3.336170011931883, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 906036168, + "step": 5279 + }, + { + "epoch": 59.93767705382436, + "loss": 0.03950412571430206, + "loss_ce": 2.9637725674547255e-05, + "loss_iou": 0.466796875, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 906036168, + "step": 5279 + }, + { + "epoch": 59.94900849858357, + "grad_norm": 3.600718726467341, + "learning_rate": 5e-06, + "loss": 0.059, + "num_input_tokens_seen": 906207388, + "step": 5280 + }, + { + "epoch": 59.94900849858357, + "loss": 0.03594730421900749, + "loss_ce": 7.389260281343013e-05, + "loss_iou": 0.4765625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 906207388, + "step": 5280 + }, + { + "epoch": 59.960339943342774, + "grad_norm": 3.683771661353578, + "learning_rate": 5e-06, + "loss": 0.0683, + "num_input_tokens_seen": 906378016, + "step": 5281 + }, + { + "epoch": 59.960339943342774, + "loss": 0.10890936851501465, + "loss_ce": 2.265237708343193e-05, + "loss_iou": 0.71484375, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 906378016, + "step": 5281 + }, + { + "epoch": 59.97167138810198, + "grad_norm": 3.2901430966928698, + "learning_rate": 5e-06, + "loss": 0.0445, + "num_input_tokens_seen": 906549748, + "step": 5282 + }, + { + "epoch": 59.97167138810198, + "loss": 0.031769223511219025, + "loss_ce": 6.146270607132465e-05, + "loss_iou": 0.287109375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 906549748, + "step": 5282 + }, + { + "epoch": 59.98300283286119, + "grad_norm": 2.7343532032088333, + "learning_rate": 5e-06, + "loss": 0.0521, + "num_input_tokens_seen": 906719144, + "step": 5283 + }, + { + "epoch": 59.98300283286119, + "loss": 0.06259878724813461, + "loss_ce": 8.352575241588056e-05, + "loss_iou": 0.203125, + "loss_num": 0.012451171875, + "loss_xval": 0.0625, + "num_input_tokens_seen": 906719144, + "step": 5283 + }, + { + "epoch": 59.994334277620396, + "grad_norm": 3.524429963076134, + "learning_rate": 5e-06, + "loss": 0.0806, + "num_input_tokens_seen": 906891176, + "step": 5284 + }, + { + "epoch": 59.994334277620396, + "loss": 0.12320826202630997, + "loss_ce": 2.404916813247837e-05, + "loss_iou": 0.33984375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 906891176, + "step": 5284 + }, + { + "epoch": 59.994334277620396, + "loss": 0.06530926376581192, + "loss_ce": 4.7420820919796824e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0130615234375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 906976940, + "step": 5284 + }, + { + "epoch": 60.005665722379604, + "grad_norm": 9.944597566678567, + "learning_rate": 5e-06, + "loss": 0.0659, + "num_input_tokens_seen": 907063212, + "step": 5285 + }, + { + "epoch": 60.005665722379604, + "loss": 0.07469999045133591, + "loss_ce": 6.16243269178085e-05, + "loss_iou": 0.2890625, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 907063212, + "step": 5285 + }, + { + "epoch": 60.01699716713881, + "grad_norm": 3.233336838936552, + "learning_rate": 5e-06, + "loss": 0.0625, + "num_input_tokens_seen": 907235352, + "step": 5286 + }, + { + "epoch": 60.01699716713881, + "loss": 0.11833537369966507, + "loss_ce": 0.0033298793714493513, + "loss_iou": 0.42578125, + "loss_num": 0.02294921875, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 907235352, + "step": 5286 + }, + { + "epoch": 60.02832861189802, + "grad_norm": 3.155411714739412, + "learning_rate": 5e-06, + "loss": 0.0534, + "num_input_tokens_seen": 907405296, + "step": 5287 + }, + { + "epoch": 60.02832861189802, + "loss": 0.08510617166757584, + "loss_ce": 5.368136044126004e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 907405296, + "step": 5287 + }, + { + "epoch": 60.039660056657226, + "grad_norm": 3.7161595690155895, + "learning_rate": 5e-06, + "loss": 0.0541, + "num_input_tokens_seen": 907576600, + "step": 5288 + }, + { + "epoch": 60.039660056657226, + "loss": 0.03138658404350281, + "loss_ce": 9.080891322810203e-05, + "loss_iou": 0.345703125, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 907576600, + "step": 5288 + }, + { + "epoch": 60.05099150141643, + "grad_norm": 4.321721882023258, + "learning_rate": 5e-06, + "loss": 0.0744, + "num_input_tokens_seen": 907744644, + "step": 5289 + }, + { + "epoch": 60.05099150141643, + "loss": 0.044658586382865906, + "loss_ce": 0.00010292250226484612, + "loss_iou": 0.4609375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 907744644, + "step": 5289 + }, + { + "epoch": 60.06232294617564, + "grad_norm": 5.325421575238035, + "learning_rate": 5e-06, + "loss": 0.0491, + "num_input_tokens_seen": 907916728, + "step": 5290 + }, + { + "epoch": 60.06232294617564, + "loss": 0.028624074533581734, + "loss_ce": 4.436278686625883e-05, + "loss_iou": 0.43359375, + "loss_num": 0.0057373046875, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 907916728, + "step": 5290 + }, + { + "epoch": 60.07365439093484, + "grad_norm": 3.3122030502063566, + "learning_rate": 5e-06, + "loss": 0.044, + "num_input_tokens_seen": 908088780, + "step": 5291 + }, + { + "epoch": 60.07365439093484, + "loss": 0.034543078392744064, + "loss_ce": 7.347343489527702e-05, + "loss_iou": 0.50390625, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 908088780, + "step": 5291 + }, + { + "epoch": 60.08498583569405, + "grad_norm": 4.7144702983211975, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 908260860, + "step": 5292 + }, + { + "epoch": 60.08498583569405, + "loss": 0.06500456482172012, + "loss_ce": 0.0008108344045467675, + "loss_iou": 0.26953125, + "loss_num": 0.01287841796875, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 908260860, + "step": 5292 + }, + { + "epoch": 60.096317280453256, + "grad_norm": 3.0513645968633516, + "learning_rate": 5e-06, + "loss": 0.0722, + "num_input_tokens_seen": 908431884, + "step": 5293 + }, + { + "epoch": 60.096317280453256, + "loss": 0.05299338698387146, + "loss_ce": 9.11634269868955e-05, + "loss_iou": 0.44140625, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 908431884, + "step": 5293 + }, + { + "epoch": 60.107648725212464, + "grad_norm": 3.4499735471378012, + "learning_rate": 5e-06, + "loss": 0.0448, + "num_input_tokens_seen": 908602524, + "step": 5294 + }, + { + "epoch": 60.107648725212464, + "loss": 0.04691363126039505, + "loss_ce": 9.966388461180031e-05, + "loss_iou": 0.435546875, + "loss_num": 0.0093994140625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 908602524, + "step": 5294 + }, + { + "epoch": 60.11898016997167, + "grad_norm": 3.4328010263247335, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 908773032, + "step": 5295 + }, + { + "epoch": 60.11898016997167, + "loss": 0.04888949170708656, + "loss_ce": 0.0011142236180603504, + "loss_iou": 0.1796875, + "loss_num": 0.009521484375, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 908773032, + "step": 5295 + }, + { + "epoch": 60.13031161473088, + "grad_norm": 4.543315143245784, + "learning_rate": 5e-06, + "loss": 0.0677, + "num_input_tokens_seen": 908940228, + "step": 5296 + }, + { + "epoch": 60.13031161473088, + "loss": 0.037237007170915604, + "loss_ce": 2.082024366245605e-05, + "loss_iou": 0.419921875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 908940228, + "step": 5296 + }, + { + "epoch": 60.141643059490086, + "grad_norm": 4.005370055505921, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 909110940, + "step": 5297 + }, + { + "epoch": 60.141643059490086, + "loss": 0.02901434898376465, + "loss_ce": 0.0002515295927878469, + "loss_iou": 0.4296875, + "loss_num": 0.0057373046875, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 909110940, + "step": 5297 + }, + { + "epoch": 60.15297450424929, + "grad_norm": 2.996543023093513, + "learning_rate": 5e-06, + "loss": 0.0537, + "num_input_tokens_seen": 909281900, + "step": 5298 + }, + { + "epoch": 60.15297450424929, + "loss": 0.043112967163324356, + "loss_ce": 5.266504012979567e-05, + "loss_iou": 0.42578125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 909281900, + "step": 5298 + }, + { + "epoch": 60.1643059490085, + "grad_norm": 3.2434793767078345, + "learning_rate": 5e-06, + "loss": 0.0398, + "num_input_tokens_seen": 909453544, + "step": 5299 + }, + { + "epoch": 60.1643059490085, + "loss": 0.03535942733287811, + "loss_ce": 5.0591705075930804e-05, + "loss_iou": 0.439453125, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 909453544, + "step": 5299 + }, + { + "epoch": 60.17563739376771, + "grad_norm": 4.44120916068292, + "learning_rate": 5e-06, + "loss": 0.0606, + "num_input_tokens_seen": 909624588, + "step": 5300 + }, + { + "epoch": 60.17563739376771, + "loss": 0.042779356241226196, + "loss_ce": 7.000465848250315e-05, + "loss_iou": 0.357421875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 909624588, + "step": 5300 + }, + { + "epoch": 60.186968838526916, + "grad_norm": 3.5546218501379356, + "learning_rate": 5e-06, + "loss": 0.0426, + "num_input_tokens_seen": 909795992, + "step": 5301 + }, + { + "epoch": 60.186968838526916, + "loss": 0.02905501052737236, + "loss_ce": 4.8052814236143604e-05, + "loss_iou": 0.41796875, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 909795992, + "step": 5301 + }, + { + "epoch": 60.198300283286116, + "grad_norm": 3.6992703774051408, + "learning_rate": 5e-06, + "loss": 0.0538, + "num_input_tokens_seen": 909967688, + "step": 5302 + }, + { + "epoch": 60.198300283286116, + "loss": 0.03620871156454086, + "loss_ce": 4.538108260021545e-05, + "loss_iou": 0.3984375, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 909967688, + "step": 5302 + }, + { + "epoch": 60.20963172804532, + "grad_norm": 3.7704361175605765, + "learning_rate": 5e-06, + "loss": 0.1169, + "num_input_tokens_seen": 910138364, + "step": 5303 + }, + { + "epoch": 60.20963172804532, + "loss": 0.2508850693702698, + "loss_ce": 9.162837523035705e-05, + "loss_iou": 0.404296875, + "loss_num": 0.050048828125, + "loss_xval": 0.25, + "num_input_tokens_seen": 910138364, + "step": 5303 + }, + { + "epoch": 60.22096317280453, + "grad_norm": 4.082134267957282, + "learning_rate": 5e-06, + "loss": 0.075, + "num_input_tokens_seen": 910310248, + "step": 5304 + }, + { + "epoch": 60.22096317280453, + "loss": 0.04449412226676941, + "loss_ce": 7.578609802294523e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 910310248, + "step": 5304 + }, + { + "epoch": 60.23229461756374, + "grad_norm": 3.2077888659000227, + "learning_rate": 5e-06, + "loss": 0.0443, + "num_input_tokens_seen": 910481940, + "step": 5305 + }, + { + "epoch": 60.23229461756374, + "loss": 0.041572362184524536, + "loss_ce": 0.00011423211981309578, + "loss_iou": 0.4140625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 910481940, + "step": 5305 + }, + { + "epoch": 60.243626062322946, + "grad_norm": 52.62992156400096, + "learning_rate": 5e-06, + "loss": 0.0625, + "num_input_tokens_seen": 910650624, + "step": 5306 + }, + { + "epoch": 60.243626062322946, + "loss": 0.03883575648069382, + "loss_ce": 7.843222556402907e-05, + "loss_iou": 0.4375, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 910650624, + "step": 5306 + }, + { + "epoch": 60.25495750708215, + "grad_norm": 60.626483052214766, + "learning_rate": 5e-06, + "loss": 0.0499, + "num_input_tokens_seen": 910822336, + "step": 5307 + }, + { + "epoch": 60.25495750708215, + "loss": 0.04873540252447128, + "loss_ce": 0.00021245228708721697, + "loss_iou": 0.5859375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 910822336, + "step": 5307 + }, + { + "epoch": 60.26628895184136, + "grad_norm": 3.7409132307538333, + "learning_rate": 5e-06, + "loss": 0.0543, + "num_input_tokens_seen": 910994308, + "step": 5308 + }, + { + "epoch": 60.26628895184136, + "loss": 0.08972421288490295, + "loss_ce": 4.830794205190614e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 910994308, + "step": 5308 + }, + { + "epoch": 60.27762039660057, + "grad_norm": 4.172696941141727, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 911166440, + "step": 5309 + }, + { + "epoch": 60.27762039660057, + "loss": 0.0516594834625721, + "loss_ce": 5.4256794101092964e-05, + "loss_iou": 0.486328125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 911166440, + "step": 5309 + }, + { + "epoch": 60.288951841359776, + "grad_norm": 4.892729380505663, + "learning_rate": 5e-06, + "loss": 0.0622, + "num_input_tokens_seen": 911338324, + "step": 5310 + }, + { + "epoch": 60.288951841359776, + "loss": 0.041725482791662216, + "loss_ce": 0.00019106024410575628, + "loss_iou": 0.515625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 911338324, + "step": 5310 + }, + { + "epoch": 60.30028328611898, + "grad_norm": 4.534346362503417, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 911509600, + "step": 5311 + }, + { + "epoch": 60.30028328611898, + "loss": 0.059051379561424255, + "loss_ce": 0.00013719552953261882, + "loss_iou": 0.466796875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 911509600, + "step": 5311 + }, + { + "epoch": 60.31161473087819, + "grad_norm": 3.552033149940994, + "learning_rate": 5e-06, + "loss": 0.0754, + "num_input_tokens_seen": 911681232, + "step": 5312 + }, + { + "epoch": 60.31161473087819, + "loss": 0.10575592517852783, + "loss_ce": 5.8291636378271505e-05, + "loss_iou": 0.08251953125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 911681232, + "step": 5312 + }, + { + "epoch": 60.32294617563739, + "grad_norm": 2.938119773227793, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 911853084, + "step": 5313 + }, + { + "epoch": 60.32294617563739, + "loss": 0.07814521342515945, + "loss_ce": 0.00014227983774617314, + "loss_iou": 0.41015625, + "loss_num": 0.015625, + "loss_xval": 0.078125, + "num_input_tokens_seen": 911853084, + "step": 5313 + }, + { + "epoch": 60.3342776203966, + "grad_norm": 2.9562902498742267, + "learning_rate": 5e-06, + "loss": 0.0377, + "num_input_tokens_seen": 912023336, + "step": 5314 + }, + { + "epoch": 60.3342776203966, + "loss": 0.04107864946126938, + "loss_ce": 7.828190427972004e-05, + "loss_iou": 0.294921875, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 912023336, + "step": 5314 + }, + { + "epoch": 60.345609065155806, + "grad_norm": 7.32597120194911, + "learning_rate": 5e-06, + "loss": 0.0718, + "num_input_tokens_seen": 912195080, + "step": 5315 + }, + { + "epoch": 60.345609065155806, + "loss": 0.03294748812913895, + "loss_ce": 4.953733514412306e-05, + "loss_iou": 0.486328125, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 912195080, + "step": 5315 + }, + { + "epoch": 60.35694050991501, + "grad_norm": 3.6730861898071594, + "learning_rate": 5e-06, + "loss": 0.0438, + "num_input_tokens_seen": 912366752, + "step": 5316 + }, + { + "epoch": 60.35694050991501, + "loss": 0.05385345220565796, + "loss_ce": 6.622144428547472e-05, + "loss_iou": 0.11865234375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 912366752, + "step": 5316 + }, + { + "epoch": 60.36827195467422, + "grad_norm": 3.6236791811335443, + "learning_rate": 5e-06, + "loss": 0.0421, + "num_input_tokens_seen": 912538200, + "step": 5317 + }, + { + "epoch": 60.36827195467422, + "loss": 0.04094957932829857, + "loss_ce": 0.00013231889170128852, + "loss_iou": 0.55859375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 912538200, + "step": 5317 + }, + { + "epoch": 60.37960339943343, + "grad_norm": 6.647115198108427, + "learning_rate": 5e-06, + "loss": 0.0575, + "num_input_tokens_seen": 912709472, + "step": 5318 + }, + { + "epoch": 60.37960339943343, + "loss": 0.04388439282774925, + "loss_ce": 7.6408134191297e-05, + "loss_iou": 0.1220703125, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 912709472, + "step": 5318 + }, + { + "epoch": 60.390934844192635, + "grad_norm": 3.0380153621279327, + "learning_rate": 5e-06, + "loss": 0.0577, + "num_input_tokens_seen": 912881332, + "step": 5319 + }, + { + "epoch": 60.390934844192635, + "loss": 0.03983393311500549, + "loss_ce": 0.00010004534851759672, + "loss_iou": 0.51171875, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 912881332, + "step": 5319 + }, + { + "epoch": 60.40226628895184, + "grad_norm": 3.9709569958688324, + "learning_rate": 5e-06, + "loss": 0.0633, + "num_input_tokens_seen": 913052384, + "step": 5320 + }, + { + "epoch": 60.40226628895184, + "loss": 0.06085922196507454, + "loss_ce": 5.294629227137193e-05, + "loss_iou": 0.326171875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 913052384, + "step": 5320 + }, + { + "epoch": 60.41359773371105, + "grad_norm": 3.9302525097261847, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 913223456, + "step": 5321 + }, + { + "epoch": 60.41359773371105, + "loss": 0.04768989607691765, + "loss_ce": 3.669855868793093e-05, + "loss_iou": 0.54296875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 913223456, + "step": 5321 + }, + { + "epoch": 60.42492917847026, + "grad_norm": 3.5528166998725377, + "learning_rate": 5e-06, + "loss": 0.0702, + "num_input_tokens_seen": 913394428, + "step": 5322 + }, + { + "epoch": 60.42492917847026, + "loss": 0.0415625125169754, + "loss_ce": 0.00010437953460495919, + "loss_iou": 0.279296875, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 913394428, + "step": 5322 + }, + { + "epoch": 60.436260623229465, + "grad_norm": 3.7763049516416567, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 913566468, + "step": 5323 + }, + { + "epoch": 60.436260623229465, + "loss": 0.06610424071550369, + "loss_ce": 0.001162838307209313, + "loss_iou": 0.451171875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 913566468, + "step": 5323 + }, + { + "epoch": 60.447592067988666, + "grad_norm": 4.832197538193646, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 913738448, + "step": 5324 + }, + { + "epoch": 60.447592067988666, + "loss": 0.03582945838570595, + "loss_ce": 0.00013915127783548087, + "loss_iou": 0.46875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 913738448, + "step": 5324 + }, + { + "epoch": 60.45892351274787, + "grad_norm": 3.9103831486990317, + "learning_rate": 5e-06, + "loss": 0.0801, + "num_input_tokens_seen": 913910044, + "step": 5325 + }, + { + "epoch": 60.45892351274787, + "loss": 0.056947849690914154, + "loss_ce": 0.0001698917185422033, + "loss_iou": 0.39453125, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 913910044, + "step": 5325 + }, + { + "epoch": 60.47025495750708, + "grad_norm": 4.331391604625984, + "learning_rate": 5e-06, + "loss": 0.0435, + "num_input_tokens_seen": 914080700, + "step": 5326 + }, + { + "epoch": 60.47025495750708, + "loss": 0.050633084028959274, + "loss_ce": 5.019925447413698e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 914080700, + "step": 5326 + }, + { + "epoch": 60.48158640226629, + "grad_norm": 3.0465676268319557, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 914252472, + "step": 5327 + }, + { + "epoch": 60.48158640226629, + "loss": 0.039646558463573456, + "loss_ce": 6.526013748953119e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 914252472, + "step": 5327 + }, + { + "epoch": 60.492917847025495, + "grad_norm": 4.63397528210791, + "learning_rate": 5e-06, + "loss": 0.0722, + "num_input_tokens_seen": 914422816, + "step": 5328 + }, + { + "epoch": 60.492917847025495, + "loss": 0.07953919470310211, + "loss_ce": 5.616456473944709e-05, + "loss_iou": 0.3828125, + "loss_num": 0.015869140625, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 914422816, + "step": 5328 + }, + { + "epoch": 60.5042492917847, + "grad_norm": 3.417403666928378, + "learning_rate": 5e-06, + "loss": 0.0857, + "num_input_tokens_seen": 914594468, + "step": 5329 + }, + { + "epoch": 60.5042492917847, + "loss": 0.12426836043596268, + "loss_ce": 0.001282525365240872, + "loss_iou": 0.3125, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 914594468, + "step": 5329 + }, + { + "epoch": 60.51558073654391, + "grad_norm": 3.6345513073395743, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 914765384, + "step": 5330 + }, + { + "epoch": 60.51558073654391, + "loss": 0.09104451537132263, + "loss_ce": 7.161663961596787e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0181884765625, + "loss_xval": 0.0908203125, + "num_input_tokens_seen": 914765384, + "step": 5330 + }, + { + "epoch": 60.52691218130312, + "grad_norm": 3.428790681170709, + "learning_rate": 5e-06, + "loss": 0.0579, + "num_input_tokens_seen": 914936652, + "step": 5331 + }, + { + "epoch": 60.52691218130312, + "loss": 0.033823419362306595, + "loss_ce": 4.0460443415213376e-05, + "loss_iou": 0.333984375, + "loss_num": 0.00677490234375, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 914936652, + "step": 5331 + }, + { + "epoch": 60.538243626062325, + "grad_norm": 3.3356038372650394, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 915107168, + "step": 5332 + }, + { + "epoch": 60.538243626062325, + "loss": 0.045993655920028687, + "loss_ce": 0.00023254757979884744, + "loss_iou": 0.271484375, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 915107168, + "step": 5332 + }, + { + "epoch": 60.54957507082153, + "grad_norm": 3.844161256637789, + "learning_rate": 5e-06, + "loss": 0.0445, + "num_input_tokens_seen": 915279200, + "step": 5333 + }, + { + "epoch": 60.54957507082153, + "loss": 0.06420257687568665, + "loss_ce": 0.00013092387234792113, + "loss_iou": 0.44140625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 915279200, + "step": 5333 + }, + { + "epoch": 60.56090651558074, + "grad_norm": 3.5094774430612423, + "learning_rate": 5e-06, + "loss": 0.0466, + "num_input_tokens_seen": 915450712, + "step": 5334 + }, + { + "epoch": 60.56090651558074, + "loss": 0.039232831448316574, + "loss_ce": 0.00018559047020971775, + "loss_iou": 0.50390625, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 915450712, + "step": 5334 + }, + { + "epoch": 60.57223796033994, + "grad_norm": 3.6725382736535677, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 915622488, + "step": 5335 + }, + { + "epoch": 60.57223796033994, + "loss": 0.05054699629545212, + "loss_ce": 2.5146975531242788e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 915622488, + "step": 5335 + }, + { + "epoch": 60.58356940509915, + "grad_norm": 3.1941661988372863, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 915794368, + "step": 5336 + }, + { + "epoch": 60.58356940509915, + "loss": 0.05514456331729889, + "loss_ce": 9.085034253075719e-05, + "loss_iou": 0.37890625, + "loss_num": 0.010986328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 915794368, + "step": 5336 + }, + { + "epoch": 60.594900849858355, + "grad_norm": 2.985224376866991, + "learning_rate": 5e-06, + "loss": 0.0637, + "num_input_tokens_seen": 915965932, + "step": 5337 + }, + { + "epoch": 60.594900849858355, + "loss": 0.06457200646400452, + "loss_ce": 8.836544293444604e-05, + "loss_iou": 0.341796875, + "loss_num": 0.01287841796875, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 915965932, + "step": 5337 + }, + { + "epoch": 60.60623229461756, + "grad_norm": 3.1195290302707748, + "learning_rate": 5e-06, + "loss": 0.0592, + "num_input_tokens_seen": 916137716, + "step": 5338 + }, + { + "epoch": 60.60623229461756, + "loss": 0.03704243153333664, + "loss_ce": 7.038851617835462e-05, + "loss_iou": 0.41796875, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 916137716, + "step": 5338 + }, + { + "epoch": 60.61756373937677, + "grad_norm": 3.144355509678711, + "learning_rate": 5e-06, + "loss": 0.0389, + "num_input_tokens_seen": 916308984, + "step": 5339 + }, + { + "epoch": 60.61756373937677, + "loss": 0.041377291083335876, + "loss_ce": 1.0714086783991661e-05, + "loss_iou": 0.388671875, + "loss_num": 0.00830078125, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 916308984, + "step": 5339 + }, + { + "epoch": 60.62889518413598, + "grad_norm": 3.9728630362030253, + "learning_rate": 5e-06, + "loss": 0.0644, + "num_input_tokens_seen": 916480872, + "step": 5340 + }, + { + "epoch": 60.62889518413598, + "loss": 0.06372785568237305, + "loss_ce": 3.766483496292494e-05, + "loss_iou": 0.490234375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 916480872, + "step": 5340 + }, + { + "epoch": 60.640226628895185, + "grad_norm": 3.434787622281573, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 916651212, + "step": 5341 + }, + { + "epoch": 60.640226628895185, + "loss": 0.07078741490840912, + "loss_ce": 7.818300946382806e-05, + "loss_iou": 0.478515625, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 916651212, + "step": 5341 + }, + { + "epoch": 60.65155807365439, + "grad_norm": 2.99494251309212, + "learning_rate": 5e-06, + "loss": 0.049, + "num_input_tokens_seen": 916822764, + "step": 5342 + }, + { + "epoch": 60.65155807365439, + "loss": 0.05739685520529747, + "loss_ce": 5.432831676444039e-05, + "loss_iou": 0.328125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 916822764, + "step": 5342 + }, + { + "epoch": 60.6628895184136, + "grad_norm": 2.866935456467115, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 916994532, + "step": 5343 + }, + { + "epoch": 60.6628895184136, + "loss": 0.05307865887880325, + "loss_ce": 0.0002527314645703882, + "loss_iou": 0.392578125, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 916994532, + "step": 5343 + }, + { + "epoch": 60.67422096317281, + "grad_norm": 2.8775239754470103, + "learning_rate": 5e-06, + "loss": 0.0542, + "num_input_tokens_seen": 917164824, + "step": 5344 + }, + { + "epoch": 60.67422096317281, + "loss": 0.11749410629272461, + "loss_ce": 3.1954506994225085e-05, + "loss_iou": 0.23046875, + "loss_num": 0.0234375, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 917164824, + "step": 5344 + }, + { + "epoch": 60.685552407932015, + "grad_norm": 4.672608535448603, + "learning_rate": 5e-06, + "loss": 0.0414, + "num_input_tokens_seen": 917336516, + "step": 5345 + }, + { + "epoch": 60.685552407932015, + "loss": 0.044775884598493576, + "loss_ce": 6.763250712538138e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 917336516, + "step": 5345 + }, + { + "epoch": 60.696883852691215, + "grad_norm": 3.436934965950403, + "learning_rate": 5e-06, + "loss": 0.0528, + "num_input_tokens_seen": 917507284, + "step": 5346 + }, + { + "epoch": 60.696883852691215, + "loss": 0.0323917530477047, + "loss_ce": 7.36360889277421e-05, + "loss_iou": 0.458984375, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 917507284, + "step": 5346 + }, + { + "epoch": 60.70821529745042, + "grad_norm": 2.7588964267339633, + "learning_rate": 5e-06, + "loss": 0.039, + "num_input_tokens_seen": 917679244, + "step": 5347 + }, + { + "epoch": 60.70821529745042, + "loss": 0.02689727395772934, + "loss_ce": 6.46931875962764e-05, + "loss_iou": 0.171875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 917679244, + "step": 5347 + }, + { + "epoch": 60.71954674220963, + "grad_norm": 3.896795808320474, + "learning_rate": 5e-06, + "loss": 0.0548, + "num_input_tokens_seen": 917850096, + "step": 5348 + }, + { + "epoch": 60.71954674220963, + "loss": 0.06668774783611298, + "loss_ce": 6.78746000630781e-05, + "loss_iou": 0.234375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 917850096, + "step": 5348 + }, + { + "epoch": 60.73087818696884, + "grad_norm": 3.5464775819922627, + "learning_rate": 5e-06, + "loss": 0.0971, + "num_input_tokens_seen": 918021984, + "step": 5349 + }, + { + "epoch": 60.73087818696884, + "loss": 0.04377872869372368, + "loss_ce": 4.703795275418088e-05, + "loss_iou": 0.384765625, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 918021984, + "step": 5349 + }, + { + "epoch": 60.742209631728045, + "grad_norm": 3.5358960923237963, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 918193020, + "step": 5350 + }, + { + "epoch": 60.742209631728045, + "loss": 0.04534516483545303, + "loss_ce": 4.1820010665105656e-05, + "loss_iou": 0.48828125, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 918193020, + "step": 5350 + }, + { + "epoch": 60.75354107648725, + "grad_norm": 3.459368774951292, + "learning_rate": 5e-06, + "loss": 0.0609, + "num_input_tokens_seen": 918363096, + "step": 5351 + }, + { + "epoch": 60.75354107648725, + "loss": 0.08535854518413544, + "loss_ce": 6.191064312588423e-05, + "loss_iou": 0.462890625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 918363096, + "step": 5351 + }, + { + "epoch": 60.76487252124646, + "grad_norm": 3.6421239954789297, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 918534720, + "step": 5352 + }, + { + "epoch": 60.76487252124646, + "loss": 0.04928767308592796, + "loss_ce": 1.704102396615781e-05, + "loss_iou": 0.3359375, + "loss_num": 0.00982666015625, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 918534720, + "step": 5352 + }, + { + "epoch": 60.77620396600567, + "grad_norm": 8.577733510934467, + "learning_rate": 5e-06, + "loss": 0.0743, + "num_input_tokens_seen": 918704856, + "step": 5353 + }, + { + "epoch": 60.77620396600567, + "loss": 0.09034101665019989, + "loss_ce": 0.00010054362792288885, + "loss_iou": 0.3984375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 918704856, + "step": 5353 + }, + { + "epoch": 60.787535410764875, + "grad_norm": 4.279460044950278, + "learning_rate": 5e-06, + "loss": 0.0667, + "num_input_tokens_seen": 918875896, + "step": 5354 + }, + { + "epoch": 60.787535410764875, + "loss": 0.11280453205108643, + "loss_ce": 2.682180274860002e-05, + "loss_iou": 0.2578125, + "loss_num": 0.0225830078125, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 918875896, + "step": 5354 + }, + { + "epoch": 60.79886685552408, + "grad_norm": 3.1644655289872343, + "learning_rate": 5e-06, + "loss": 0.0517, + "num_input_tokens_seen": 919046916, + "step": 5355 + }, + { + "epoch": 60.79886685552408, + "loss": 0.02776140347123146, + "loss_ce": 0.0001124780610552989, + "loss_iou": 0.45703125, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 919046916, + "step": 5355 + }, + { + "epoch": 60.81019830028329, + "grad_norm": 3.4591929620229958, + "learning_rate": 5e-06, + "loss": 0.0514, + "num_input_tokens_seen": 919218324, + "step": 5356 + }, + { + "epoch": 60.81019830028329, + "loss": 0.027796726673841476, + "loss_ce": 2.5732038920978084e-05, + "loss_iou": 0.255859375, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 919218324, + "step": 5356 + }, + { + "epoch": 60.82152974504249, + "grad_norm": 3.8068347489283454, + "learning_rate": 5e-06, + "loss": 0.0688, + "num_input_tokens_seen": 919389136, + "step": 5357 + }, + { + "epoch": 60.82152974504249, + "loss": 0.08701546490192413, + "loss_ce": 4.0369253838434815e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0174560546875, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 919389136, + "step": 5357 + }, + { + "epoch": 60.8328611898017, + "grad_norm": 3.838767399604408, + "learning_rate": 5e-06, + "loss": 0.0541, + "num_input_tokens_seen": 919561256, + "step": 5358 + }, + { + "epoch": 60.8328611898017, + "loss": 0.04236666113138199, + "loss_ce": 3.8779722672188655e-05, + "loss_iou": 0.345703125, + "loss_num": 0.00848388671875, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 919561256, + "step": 5358 + }, + { + "epoch": 60.844192634560905, + "grad_norm": 3.5630442421661446, + "learning_rate": 5e-06, + "loss": 0.0635, + "num_input_tokens_seen": 919733196, + "step": 5359 + }, + { + "epoch": 60.844192634560905, + "loss": 0.043702252209186554, + "loss_ce": 1.634146974538453e-05, + "loss_iou": 0.6015625, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 919733196, + "step": 5359 + }, + { + "epoch": 60.85552407932011, + "grad_norm": 3.5274798459111123, + "learning_rate": 5e-06, + "loss": 0.0366, + "num_input_tokens_seen": 919903784, + "step": 5360 + }, + { + "epoch": 60.85552407932011, + "loss": 0.029463166370987892, + "loss_ce": 8.999784768093377e-05, + "loss_iou": 0.421875, + "loss_num": 0.005859375, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 919903784, + "step": 5360 + }, + { + "epoch": 60.86685552407932, + "grad_norm": 3.065690085216179, + "learning_rate": 5e-06, + "loss": 0.0596, + "num_input_tokens_seen": 920075336, + "step": 5361 + }, + { + "epoch": 60.86685552407932, + "loss": 0.05938621237874031, + "loss_ce": 2.9526741855079308e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 920075336, + "step": 5361 + }, + { + "epoch": 60.87818696883853, + "grad_norm": 3.6584399703373247, + "learning_rate": 5e-06, + "loss": 0.0543, + "num_input_tokens_seen": 920247412, + "step": 5362 + }, + { + "epoch": 60.87818696883853, + "loss": 0.04484322667121887, + "loss_ce": 2.81646462099161e-05, + "loss_iou": 0.380859375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 920247412, + "step": 5362 + }, + { + "epoch": 60.889518413597735, + "grad_norm": 3.4592669651519206, + "learning_rate": 5e-06, + "loss": 0.0628, + "num_input_tokens_seen": 920418924, + "step": 5363 + }, + { + "epoch": 60.889518413597735, + "loss": 0.038255900144577026, + "loss_ce": 9.366725134896114e-05, + "loss_iou": 0.427734375, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 920418924, + "step": 5363 + }, + { + "epoch": 60.90084985835694, + "grad_norm": 3.2966777464399364, + "learning_rate": 5e-06, + "loss": 0.0486, + "num_input_tokens_seen": 920589732, + "step": 5364 + }, + { + "epoch": 60.90084985835694, + "loss": 0.03835352510213852, + "loss_ce": 2.3445025362889282e-05, + "loss_iou": 0.498046875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 920589732, + "step": 5364 + }, + { + "epoch": 60.91218130311615, + "grad_norm": 3.3396297880577026, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 920760848, + "step": 5365 + }, + { + "epoch": 60.91218130311615, + "loss": 0.04530102759599686, + "loss_ce": 8.923888526624069e-05, + "loss_iou": 0.326171875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 920760848, + "step": 5365 + }, + { + "epoch": 60.92351274787536, + "grad_norm": 3.6937501855135495, + "learning_rate": 5e-06, + "loss": 0.0435, + "num_input_tokens_seen": 920932928, + "step": 5366 + }, + { + "epoch": 60.92351274787536, + "loss": 0.051009755581617355, + "loss_ce": 2.25132389459759e-05, + "loss_iou": 0.40234375, + "loss_num": 0.01019287109375, + "loss_xval": 0.051025390625, + "num_input_tokens_seen": 920932928, + "step": 5366 + }, + { + "epoch": 60.934844192634564, + "grad_norm": 3.669876031423358, + "learning_rate": 5e-06, + "loss": 0.0424, + "num_input_tokens_seen": 921104200, + "step": 5367 + }, + { + "epoch": 60.934844192634564, + "loss": 0.033807940781116486, + "loss_ce": 4.0241549868369475e-05, + "loss_iou": 0.51171875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 921104200, + "step": 5367 + }, + { + "epoch": 60.946175637393765, + "grad_norm": 3.578982783201232, + "learning_rate": 5e-06, + "loss": 0.0527, + "num_input_tokens_seen": 921276148, + "step": 5368 + }, + { + "epoch": 60.946175637393765, + "loss": 0.06229458749294281, + "loss_ce": 0.0010000327602028847, + "loss_iou": 0.28125, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 921276148, + "step": 5368 + }, + { + "epoch": 60.95750708215297, + "grad_norm": 3.676618433402416, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 921448072, + "step": 5369 + }, + { + "epoch": 60.95750708215297, + "loss": 0.04133474826812744, + "loss_ce": 5.9723417507484555e-05, + "loss_iou": 0.494140625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 921448072, + "step": 5369 + }, + { + "epoch": 60.96883852691218, + "grad_norm": 3.895485292129811, + "learning_rate": 5e-06, + "loss": 0.0924, + "num_input_tokens_seen": 921619728, + "step": 5370 + }, + { + "epoch": 60.96883852691218, + "loss": 0.14817720651626587, + "loss_ce": 7.540077058365569e-05, + "loss_iou": 0.484375, + "loss_num": 0.029541015625, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 921619728, + "step": 5370 + }, + { + "epoch": 60.98016997167139, + "grad_norm": 3.331073725054409, + "learning_rate": 5e-06, + "loss": 0.0643, + "num_input_tokens_seen": 921790584, + "step": 5371 + }, + { + "epoch": 60.98016997167139, + "loss": 0.03188817575573921, + "loss_ce": 0.00011937714589294046, + "loss_iou": 0.380859375, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 921790584, + "step": 5371 + }, + { + "epoch": 60.991501416430594, + "grad_norm": 2.958003639440145, + "learning_rate": 5e-06, + "loss": 0.1034, + "num_input_tokens_seen": 921962312, + "step": 5372 + }, + { + "epoch": 60.991501416430594, + "loss": 0.11879308521747589, + "loss_ce": 9.496501297689974e-05, + "loss_iou": 0.04638671875, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 921962312, + "step": 5372 + }, + { + "epoch": 60.991501416430594, + "loss": 0.06609611213207245, + "loss_ce": 7.133104372769594e-05, + "loss_iou": 0.333984375, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 922091072, + "step": 5372 + }, + { + "epoch": 61.0028328611898, + "grad_norm": 2.618421301790613, + "learning_rate": 5e-06, + "loss": 0.0759, + "num_input_tokens_seen": 922132364, + "step": 5373 + }, + { + "epoch": 61.0028328611898, + "loss": 0.05547528713941574, + "loss_ce": 0.00025372812524437904, + "loss_iou": 0.154296875, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 922132364, + "step": 5373 + }, + { + "epoch": 61.01416430594901, + "grad_norm": 2.7079544177949773, + "learning_rate": 5e-06, + "loss": 0.0514, + "num_input_tokens_seen": 922303744, + "step": 5374 + }, + { + "epoch": 61.01416430594901, + "loss": 0.08013202995061874, + "loss_ce": 2.3384365704259835e-05, + "loss_iou": 0.361328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 922303744, + "step": 5374 + }, + { + "epoch": 61.02549575070822, + "grad_norm": 3.094933194899561, + "learning_rate": 5e-06, + "loss": 0.045, + "num_input_tokens_seen": 922474460, + "step": 5375 + }, + { + "epoch": 61.02549575070822, + "loss": 0.04491021856665611, + "loss_ce": 6.463790487032384e-05, + "loss_iou": 0.232421875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 922474460, + "step": 5375 + }, + { + "epoch": 61.036827195467424, + "grad_norm": 3.970055090364111, + "learning_rate": 5e-06, + "loss": 0.0608, + "num_input_tokens_seen": 922646220, + "step": 5376 + }, + { + "epoch": 61.036827195467424, + "loss": 0.03952575474977493, + "loss_ce": 8.178333519026637e-05, + "loss_iou": 0.328125, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 922646220, + "step": 5376 + }, + { + "epoch": 61.04815864022663, + "grad_norm": 3.8113416756828795, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 922816960, + "step": 5377 + }, + { + "epoch": 61.04815864022663, + "loss": 0.05297220125794411, + "loss_ce": 3.946200740756467e-05, + "loss_iou": 0.4921875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 922816960, + "step": 5377 + }, + { + "epoch": 61.05949008498584, + "grad_norm": 4.287461186986569, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 922988924, + "step": 5378 + }, + { + "epoch": 61.05949008498584, + "loss": 0.08201511204242706, + "loss_ce": 4.490065111895092e-05, + "loss_iou": 0.0, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 922988924, + "step": 5378 + }, + { + "epoch": 61.07082152974504, + "grad_norm": 2.99957455436037, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 923160468, + "step": 5379 + }, + { + "epoch": 61.07082152974504, + "loss": 0.0688992589712143, + "loss_ce": 8.212377724703401e-05, + "loss_iou": 0.50390625, + "loss_num": 0.01373291015625, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 923160468, + "step": 5379 + }, + { + "epoch": 61.08215297450425, + "grad_norm": 4.947147071926928, + "learning_rate": 5e-06, + "loss": 0.0621, + "num_input_tokens_seen": 923332612, + "step": 5380 + }, + { + "epoch": 61.08215297450425, + "loss": 0.0749097466468811, + "loss_ce": 7.301415462279692e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0150146484375, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 923332612, + "step": 5380 + }, + { + "epoch": 61.093484419263454, + "grad_norm": 2.9007247427808136, + "learning_rate": 5e-06, + "loss": 0.063, + "num_input_tokens_seen": 923504372, + "step": 5381 + }, + { + "epoch": 61.093484419263454, + "loss": 0.043455302715301514, + "loss_ce": 2.8789188945665956e-05, + "loss_iou": 0.4765625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 923504372, + "step": 5381 + }, + { + "epoch": 61.10481586402266, + "grad_norm": 8.553309062017176, + "learning_rate": 5e-06, + "loss": 0.0808, + "num_input_tokens_seen": 923675068, + "step": 5382 + }, + { + "epoch": 61.10481586402266, + "loss": 0.08925221860408783, + "loss_ce": 3.407484109629877e-05, + "loss_iou": 0.212890625, + "loss_num": 0.017822265625, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 923675068, + "step": 5382 + }, + { + "epoch": 61.11614730878187, + "grad_norm": 2.7025777183912227, + "learning_rate": 5e-06, + "loss": 0.0476, + "num_input_tokens_seen": 923845204, + "step": 5383 + }, + { + "epoch": 61.11614730878187, + "loss": 0.03930659964680672, + "loss_ce": 0.00012202812649775296, + "loss_iou": 0.357421875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 923845204, + "step": 5383 + }, + { + "epoch": 61.12747875354108, + "grad_norm": 2.4315243336572014, + "learning_rate": 5e-06, + "loss": 0.0515, + "num_input_tokens_seen": 924017264, + "step": 5384 + }, + { + "epoch": 61.12747875354108, + "loss": 0.037681084126234055, + "loss_ce": 2.2392483515432104e-05, + "loss_iou": 0.4140625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 924017264, + "step": 5384 + }, + { + "epoch": 61.138810198300284, + "grad_norm": 2.9323200141645396, + "learning_rate": 5e-06, + "loss": 0.0382, + "num_input_tokens_seen": 924187328, + "step": 5385 + }, + { + "epoch": 61.138810198300284, + "loss": 0.029375692829489708, + "loss_ce": 4.8299727495759726e-05, + "loss_iou": 0.490234375, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 924187328, + "step": 5385 + }, + { + "epoch": 61.15014164305949, + "grad_norm": 3.8294529155714025, + "learning_rate": 5e-06, + "loss": 0.051, + "num_input_tokens_seen": 924358196, + "step": 5386 + }, + { + "epoch": 61.15014164305949, + "loss": 0.0677463710308075, + "loss_ce": 2.7861919079441577e-05, + "loss_iou": 0.53515625, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 924358196, + "step": 5386 + }, + { + "epoch": 61.1614730878187, + "grad_norm": 3.535512680210106, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 924530108, + "step": 5387 + }, + { + "epoch": 61.1614730878187, + "loss": 0.0360972099006176, + "loss_ce": 2.5431319954805076e-05, + "loss_iou": 0.4921875, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 924530108, + "step": 5387 + }, + { + "epoch": 61.172804532577906, + "grad_norm": 3.659544842438588, + "learning_rate": 5e-06, + "loss": 0.068, + "num_input_tokens_seen": 924700868, + "step": 5388 + }, + { + "epoch": 61.172804532577906, + "loss": 0.04517899453639984, + "loss_ce": 4.3497555452631786e-05, + "loss_iou": 0.228515625, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 924700868, + "step": 5388 + }, + { + "epoch": 61.184135977337114, + "grad_norm": 3.5042503761228523, + "learning_rate": 5e-06, + "loss": 0.0578, + "num_input_tokens_seen": 924871528, + "step": 5389 + }, + { + "epoch": 61.184135977337114, + "loss": 0.07018998265266418, + "loss_ce": 1.4817609553574584e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 924871528, + "step": 5389 + }, + { + "epoch": 61.195467422096314, + "grad_norm": 3.7407213081920694, + "learning_rate": 5e-06, + "loss": 0.04, + "num_input_tokens_seen": 925043544, + "step": 5390 + }, + { + "epoch": 61.195467422096314, + "loss": 0.03325444832444191, + "loss_ce": 8.184273610822856e-05, + "loss_iou": 0.490234375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 925043544, + "step": 5390 + }, + { + "epoch": 61.20679886685552, + "grad_norm": 3.372763482189903, + "learning_rate": 5e-06, + "loss": 0.0618, + "num_input_tokens_seen": 925213444, + "step": 5391 + }, + { + "epoch": 61.20679886685552, + "loss": 0.04542173072695732, + "loss_ce": 0.0001641612616367638, + "loss_iou": 0.1748046875, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 925213444, + "step": 5391 + }, + { + "epoch": 61.21813031161473, + "grad_norm": 3.4511652855575896, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 925383564, + "step": 5392 + }, + { + "epoch": 61.21813031161473, + "loss": 0.03346575051546097, + "loss_ce": 6.425984611269087e-05, + "loss_iou": 0.416015625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 925383564, + "step": 5392 + }, + { + "epoch": 61.22946175637394, + "grad_norm": 4.378422567240052, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 925555040, + "step": 5393 + }, + { + "epoch": 61.22946175637394, + "loss": 0.04979715496301651, + "loss_ce": 3.8242134905885905e-05, + "loss_iou": 0.53125, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 925555040, + "step": 5393 + }, + { + "epoch": 61.240793201133144, + "grad_norm": 9.225634993019883, + "learning_rate": 5e-06, + "loss": 0.065, + "num_input_tokens_seen": 925725252, + "step": 5394 + }, + { + "epoch": 61.240793201133144, + "loss": 0.04012008756399155, + "loss_ce": 6.576583837158978e-05, + "loss_iou": 0.36328125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 925725252, + "step": 5394 + }, + { + "epoch": 61.25212464589235, + "grad_norm": 4.097715928365581, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 925897652, + "step": 5395 + }, + { + "epoch": 61.25212464589235, + "loss": 0.08319907635450363, + "loss_ce": 3.867210398311727e-05, + "loss_iou": 0.6015625, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 925897652, + "step": 5395 + }, + { + "epoch": 61.26345609065156, + "grad_norm": 7.551747478651342, + "learning_rate": 5e-06, + "loss": 0.0519, + "num_input_tokens_seen": 926069616, + "step": 5396 + }, + { + "epoch": 61.26345609065156, + "loss": 0.07369707524776459, + "loss_ce": 0.00025651822215877473, + "loss_iou": 0.28125, + "loss_num": 0.01470947265625, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 926069616, + "step": 5396 + }, + { + "epoch": 61.274787535410766, + "grad_norm": 3.6511756459188347, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 926241012, + "step": 5397 + }, + { + "epoch": 61.274787535410766, + "loss": 0.04547043517231941, + "loss_ce": 6.027755807735957e-05, + "loss_iou": 0.47265625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 926241012, + "step": 5397 + }, + { + "epoch": 61.286118980169974, + "grad_norm": 4.382312423281042, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 926412948, + "step": 5398 + }, + { + "epoch": 61.286118980169974, + "loss": 0.030807040631771088, + "loss_ce": 4.5323533413466066e-05, + "loss_iou": 0.46484375, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 926412948, + "step": 5398 + }, + { + "epoch": 61.29745042492918, + "grad_norm": 3.5702317902278486, + "learning_rate": 5e-06, + "loss": 0.0466, + "num_input_tokens_seen": 926584352, + "step": 5399 + }, + { + "epoch": 61.29745042492918, + "loss": 0.07534250617027283, + "loss_ce": 7.09006781107746e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0150146484375, + "loss_xval": 0.0751953125, + "num_input_tokens_seen": 926584352, + "step": 5399 + }, + { + "epoch": 61.30878186968839, + "grad_norm": 4.353963779393881, + "learning_rate": 5e-06, + "loss": 0.0425, + "num_input_tokens_seen": 926756256, + "step": 5400 + }, + { + "epoch": 61.30878186968839, + "loss": 0.03362511098384857, + "loss_ce": 4.0518236346542835e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 926756256, + "step": 5400 + }, + { + "epoch": 61.32011331444759, + "grad_norm": 4.2806917431010145, + "learning_rate": 5e-06, + "loss": 0.0415, + "num_input_tokens_seen": 926925480, + "step": 5401 + }, + { + "epoch": 61.32011331444759, + "loss": 0.06202850490808487, + "loss_ce": 0.00019989105931017548, + "loss_iou": 0.490234375, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 926925480, + "step": 5401 + }, + { + "epoch": 61.331444759206796, + "grad_norm": 4.184087435412052, + "learning_rate": 5e-06, + "loss": 0.0637, + "num_input_tokens_seen": 927096704, + "step": 5402 + }, + { + "epoch": 61.331444759206796, + "loss": 0.03770472854375839, + "loss_ce": 4.6038785512791947e-05, + "loss_iou": 0.5625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 927096704, + "step": 5402 + }, + { + "epoch": 61.342776203966004, + "grad_norm": 4.6483500108472295, + "learning_rate": 5e-06, + "loss": 0.0625, + "num_input_tokens_seen": 927268792, + "step": 5403 + }, + { + "epoch": 61.342776203966004, + "loss": 0.041017964482307434, + "loss_ce": 1.7597076293895952e-05, + "loss_iou": 0.462890625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 927268792, + "step": 5403 + }, + { + "epoch": 61.35410764872521, + "grad_norm": 3.541180097637101, + "learning_rate": 5e-06, + "loss": 0.0414, + "num_input_tokens_seen": 927439980, + "step": 5404 + }, + { + "epoch": 61.35410764872521, + "loss": 0.03730998933315277, + "loss_ce": 6.328700692392886e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 927439980, + "step": 5404 + }, + { + "epoch": 61.36543909348442, + "grad_norm": 4.06912400471719, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 927611752, + "step": 5405 + }, + { + "epoch": 61.36543909348442, + "loss": 0.09848879277706146, + "loss_ce": 0.00017641550221014768, + "loss_iou": 0.31640625, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 927611752, + "step": 5405 + }, + { + "epoch": 61.376770538243626, + "grad_norm": 6.7557225539952155, + "learning_rate": 5e-06, + "loss": 0.103, + "num_input_tokens_seen": 927783696, + "step": 5406 + }, + { + "epoch": 61.376770538243626, + "loss": 0.08808835595846176, + "loss_ce": 6.039925938239321e-05, + "loss_iou": 0.44140625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 927783696, + "step": 5406 + }, + { + "epoch": 61.388101983002834, + "grad_norm": 2.5775862143684343, + "learning_rate": 5e-06, + "loss": 0.0421, + "num_input_tokens_seen": 927955968, + "step": 5407 + }, + { + "epoch": 61.388101983002834, + "loss": 0.03690294921398163, + "loss_ce": 3.771438787225634e-05, + "loss_iou": 0.4140625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 927955968, + "step": 5407 + }, + { + "epoch": 61.39943342776204, + "grad_norm": 3.1407236906164826, + "learning_rate": 5e-06, + "loss": 0.0421, + "num_input_tokens_seen": 928127184, + "step": 5408 + }, + { + "epoch": 61.39943342776204, + "loss": 0.05802961811423302, + "loss_ce": 3.096037107752636e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 928127184, + "step": 5408 + }, + { + "epoch": 61.41076487252125, + "grad_norm": 2.3814743568995707, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 928298580, + "step": 5409 + }, + { + "epoch": 61.41076487252125, + "loss": 0.02485290728509426, + "loss_ce": 2.6857247576117516e-05, + "loss_iou": 0.279296875, + "loss_num": 0.004974365234375, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 928298580, + "step": 5409 + }, + { + "epoch": 61.422096317280456, + "grad_norm": 5.518554237820242, + "learning_rate": 5e-06, + "loss": 0.0779, + "num_input_tokens_seen": 928470272, + "step": 5410 + }, + { + "epoch": 61.422096317280456, + "loss": 0.034210801124572754, + "loss_ce": 1.5853276636335067e-05, + "loss_iou": 0.419921875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 928470272, + "step": 5410 + }, + { + "epoch": 61.43342776203966, + "grad_norm": 8.528453867087968, + "learning_rate": 5e-06, + "loss": 0.0597, + "num_input_tokens_seen": 928641288, + "step": 5411 + }, + { + "epoch": 61.43342776203966, + "loss": 0.03900233283638954, + "loss_ce": 1.6126345144584775e-05, + "loss_iou": 0.64453125, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 928641288, + "step": 5411 + }, + { + "epoch": 61.444759206798864, + "grad_norm": 3.084682555867012, + "learning_rate": 5e-06, + "loss": 0.0656, + "num_input_tokens_seen": 928812436, + "step": 5412 + }, + { + "epoch": 61.444759206798864, + "loss": 0.0273964311927557, + "loss_ce": 5.267925007501617e-05, + "loss_iou": 0.359375, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 928812436, + "step": 5412 + }, + { + "epoch": 61.45609065155807, + "grad_norm": 3.9639327219346874, + "learning_rate": 5e-06, + "loss": 0.0591, + "num_input_tokens_seen": 928983856, + "step": 5413 + }, + { + "epoch": 61.45609065155807, + "loss": 0.029340803623199463, + "loss_ce": 0.00010496339382370934, + "loss_iou": 0.46875, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 928983856, + "step": 5413 + }, + { + "epoch": 61.46742209631728, + "grad_norm": 3.6539222112964658, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 929155308, + "step": 5414 + }, + { + "epoch": 61.46742209631728, + "loss": 0.04240777716040611, + "loss_ce": 0.00024011486675590277, + "loss_iou": 0.4140625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 929155308, + "step": 5414 + }, + { + "epoch": 61.478753541076486, + "grad_norm": 3.5068252122486903, + "learning_rate": 5e-06, + "loss": 0.077, + "num_input_tokens_seen": 929325240, + "step": 5415 + }, + { + "epoch": 61.478753541076486, + "loss": 0.126019686460495, + "loss_ce": 4.312150122132152e-05, + "loss_iou": 0.205078125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 929325240, + "step": 5415 + }, + { + "epoch": 61.49008498583569, + "grad_norm": 3.2674185502066058, + "learning_rate": 5e-06, + "loss": 0.0393, + "num_input_tokens_seen": 929497020, + "step": 5416 + }, + { + "epoch": 61.49008498583569, + "loss": 0.027205945923924446, + "loss_ce": 0.0001368547382298857, + "loss_iou": 0.4765625, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 929497020, + "step": 5416 + }, + { + "epoch": 61.5014164305949, + "grad_norm": 5.000935789033505, + "learning_rate": 5e-06, + "loss": 0.0596, + "num_input_tokens_seen": 929667324, + "step": 5417 + }, + { + "epoch": 61.5014164305949, + "loss": 0.034466326236724854, + "loss_ce": 1.1979979717580136e-05, + "loss_iou": 0.48046875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 929667324, + "step": 5417 + }, + { + "epoch": 61.51274787535411, + "grad_norm": 4.028097059557899, + "learning_rate": 5e-06, + "loss": 0.0556, + "num_input_tokens_seen": 929838736, + "step": 5418 + }, + { + "epoch": 61.51274787535411, + "loss": 0.05556664243340492, + "loss_ce": 5.516869350685738e-05, + "loss_iou": 0.48046875, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 929838736, + "step": 5418 + }, + { + "epoch": 61.524079320113316, + "grad_norm": 3.6396761912297717, + "learning_rate": 5e-06, + "loss": 0.0359, + "num_input_tokens_seen": 930010732, + "step": 5419 + }, + { + "epoch": 61.524079320113316, + "loss": 0.0308145210146904, + "loss_ce": 7.568874571006745e-05, + "loss_iou": 0.392578125, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 930010732, + "step": 5419 + }, + { + "epoch": 61.53541076487252, + "grad_norm": 3.989282732550078, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 930182116, + "step": 5420 + }, + { + "epoch": 61.53541076487252, + "loss": 0.0341818705201149, + "loss_ce": 4.796050052391365e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 930182116, + "step": 5420 + }, + { + "epoch": 61.54674220963173, + "grad_norm": 3.4325899624936826, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 930353272, + "step": 5421 + }, + { + "epoch": 61.54674220963173, + "loss": 0.08812987804412842, + "loss_ce": 4.088549758307636e-05, + "loss_iou": 0.33203125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 930353272, + "step": 5421 + }, + { + "epoch": 61.55807365439094, + "grad_norm": 3.190805977217918, + "learning_rate": 5e-06, + "loss": 0.0429, + "num_input_tokens_seen": 930523944, + "step": 5422 + }, + { + "epoch": 61.55807365439094, + "loss": 0.05038332939147949, + "loss_ce": 6.747568841092288e-05, + "loss_iou": 0.42578125, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 930523944, + "step": 5422 + }, + { + "epoch": 61.56940509915014, + "grad_norm": 3.28246754590637, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 930695872, + "step": 5423 + }, + { + "epoch": 61.56940509915014, + "loss": 0.03591139242053032, + "loss_ce": 3.797807221417315e-05, + "loss_iou": 0.326171875, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 930695872, + "step": 5423 + }, + { + "epoch": 61.580736543909346, + "grad_norm": 2.9441316974884915, + "learning_rate": 5e-06, + "loss": 0.0585, + "num_input_tokens_seen": 930867124, + "step": 5424 + }, + { + "epoch": 61.580736543909346, + "loss": 0.07019766420125961, + "loss_ce": 5.3008283430244774e-05, + "loss_iou": 0.45703125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 930867124, + "step": 5424 + }, + { + "epoch": 61.59206798866855, + "grad_norm": 18.563942399602713, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 931039048, + "step": 5425 + }, + { + "epoch": 61.59206798866855, + "loss": 0.02848649024963379, + "loss_ce": 2.8848489819210954e-05, + "loss_iou": 0.39453125, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 931039048, + "step": 5425 + }, + { + "epoch": 61.60339943342776, + "grad_norm": 3.589626787427431, + "learning_rate": 5e-06, + "loss": 0.0461, + "num_input_tokens_seen": 931210696, + "step": 5426 + }, + { + "epoch": 61.60339943342776, + "loss": 0.050541266798973083, + "loss_ce": 4.993191396351904e-05, + "loss_iou": 0.52734375, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 931210696, + "step": 5426 + }, + { + "epoch": 61.61473087818697, + "grad_norm": 3.473028753065151, + "learning_rate": 5e-06, + "loss": 0.0597, + "num_input_tokens_seen": 931381840, + "step": 5427 + }, + { + "epoch": 61.61473087818697, + "loss": 0.07088002562522888, + "loss_ce": 3.3468684705439955e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 931381840, + "step": 5427 + }, + { + "epoch": 61.626062322946176, + "grad_norm": 4.658931952452781, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 931553444, + "step": 5428 + }, + { + "epoch": 61.626062322946176, + "loss": 0.0438983254134655, + "loss_ce": 4.456478200154379e-05, + "loss_iou": 0.51171875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 931553444, + "step": 5428 + }, + { + "epoch": 61.63739376770538, + "grad_norm": 4.018127900473281, + "learning_rate": 5e-06, + "loss": 0.0589, + "num_input_tokens_seen": 931725036, + "step": 5429 + }, + { + "epoch": 61.63739376770538, + "loss": 0.0372612327337265, + "loss_ce": 4.5044809667160735e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 931725036, + "step": 5429 + }, + { + "epoch": 61.64872521246459, + "grad_norm": 4.453898987378134, + "learning_rate": 5e-06, + "loss": 0.0586, + "num_input_tokens_seen": 931897016, + "step": 5430 + }, + { + "epoch": 61.64872521246459, + "loss": 0.04600450396537781, + "loss_ce": 0.0004265017923898995, + "loss_iou": 0.4609375, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 931897016, + "step": 5430 + }, + { + "epoch": 61.6600566572238, + "grad_norm": 3.216479772630932, + "learning_rate": 5e-06, + "loss": 0.0499, + "num_input_tokens_seen": 932068764, + "step": 5431 + }, + { + "epoch": 61.6600566572238, + "loss": 0.04468665271997452, + "loss_ce": 2.4175857106456533e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 932068764, + "step": 5431 + }, + { + "epoch": 61.671388101983005, + "grad_norm": 3.9802162349397903, + "learning_rate": 5e-06, + "loss": 0.0676, + "num_input_tokens_seen": 932238280, + "step": 5432 + }, + { + "epoch": 61.671388101983005, + "loss": 0.030636565759778023, + "loss_ce": 4.269458440830931e-05, + "loss_iou": 0.5078125, + "loss_num": 0.006103515625, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 932238280, + "step": 5432 + }, + { + "epoch": 61.68271954674221, + "grad_norm": 2.986396616669738, + "learning_rate": 5e-06, + "loss": 0.0698, + "num_input_tokens_seen": 932409984, + "step": 5433 + }, + { + "epoch": 61.68271954674221, + "loss": 0.0468275249004364, + "loss_ce": 2.8818783903261647e-05, + "loss_iou": 0.44140625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 932409984, + "step": 5433 + }, + { + "epoch": 61.69405099150141, + "grad_norm": 3.0210660949127752, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 932581864, + "step": 5434 + }, + { + "epoch": 61.69405099150141, + "loss": 0.031948283314704895, + "loss_ce": 4.2154802940785885e-05, + "loss_iou": 0.349609375, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 932581864, + "step": 5434 + }, + { + "epoch": 61.70538243626062, + "grad_norm": 3.501685231665008, + "learning_rate": 5e-06, + "loss": 0.0464, + "num_input_tokens_seen": 932753596, + "step": 5435 + }, + { + "epoch": 61.70538243626062, + "loss": 0.057972513139247894, + "loss_ce": 3.4894284908659756e-05, + "loss_iou": 0.447265625, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 932753596, + "step": 5435 + }, + { + "epoch": 61.71671388101983, + "grad_norm": 4.665618159130267, + "learning_rate": 5e-06, + "loss": 0.0642, + "num_input_tokens_seen": 932925300, + "step": 5436 + }, + { + "epoch": 61.71671388101983, + "loss": 0.05014738440513611, + "loss_ce": 2.226119249826297e-05, + "loss_iou": 0.4296875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 932925300, + "step": 5436 + }, + { + "epoch": 61.728045325779036, + "grad_norm": 3.6334451061987476, + "learning_rate": 5e-06, + "loss": 0.0462, + "num_input_tokens_seen": 933097384, + "step": 5437 + }, + { + "epoch": 61.728045325779036, + "loss": 0.03699394688010216, + "loss_ce": 3.716022183652967e-05, + "loss_iou": 0.640625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 933097384, + "step": 5437 + }, + { + "epoch": 61.73937677053824, + "grad_norm": 3.460290664743299, + "learning_rate": 5e-06, + "loss": 0.0806, + "num_input_tokens_seen": 933269376, + "step": 5438 + }, + { + "epoch": 61.73937677053824, + "loss": 0.06431925296783447, + "loss_ce": 0.00011027281288988888, + "loss_iou": 0.470703125, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 933269376, + "step": 5438 + }, + { + "epoch": 61.75070821529745, + "grad_norm": 3.217820309660579, + "learning_rate": 5e-06, + "loss": 0.0668, + "num_input_tokens_seen": 933441000, + "step": 5439 + }, + { + "epoch": 61.75070821529745, + "loss": 0.06744620203971863, + "loss_ce": 3.286795617896132e-05, + "loss_iou": 0.419921875, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 933441000, + "step": 5439 + }, + { + "epoch": 61.76203966005666, + "grad_norm": 11.72594330252447, + "learning_rate": 5e-06, + "loss": 0.0571, + "num_input_tokens_seen": 933611336, + "step": 5440 + }, + { + "epoch": 61.76203966005666, + "loss": 0.06040842831134796, + "loss_ce": 2.939722799055744e-05, + "loss_iou": 0.283203125, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 933611336, + "step": 5440 + }, + { + "epoch": 61.773371104815865, + "grad_norm": 3.0682254511155818, + "learning_rate": 5e-06, + "loss": 0.0496, + "num_input_tokens_seen": 933783524, + "step": 5441 + }, + { + "epoch": 61.773371104815865, + "loss": 0.040234826505184174, + "loss_ce": 7.369305239990354e-05, + "loss_iou": 0.4609375, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 933783524, + "step": 5441 + }, + { + "epoch": 61.78470254957507, + "grad_norm": 3.0250770804577334, + "learning_rate": 5e-06, + "loss": 0.049, + "num_input_tokens_seen": 933955356, + "step": 5442 + }, + { + "epoch": 61.78470254957507, + "loss": 0.04656115174293518, + "loss_ce": 2.184302320529241e-05, + "loss_iou": 0.546875, + "loss_num": 0.00927734375, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 933955356, + "step": 5442 + }, + { + "epoch": 61.79603399433428, + "grad_norm": 2.544432106404559, + "learning_rate": 5e-06, + "loss": 0.0678, + "num_input_tokens_seen": 934125340, + "step": 5443 + }, + { + "epoch": 61.79603399433428, + "loss": 0.14097346365451813, + "loss_ce": 7.380479655694216e-05, + "loss_iou": 0.234375, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 934125340, + "step": 5443 + }, + { + "epoch": 61.80736543909349, + "grad_norm": 2.51052929600517, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 934294652, + "step": 5444 + }, + { + "epoch": 61.80736543909349, + "loss": 0.037977270781993866, + "loss_ce": 5.918265014770441e-05, + "loss_iou": 0.2890625, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 934294652, + "step": 5444 + }, + { + "epoch": 61.81869688385269, + "grad_norm": 2.930109162078187, + "learning_rate": 5e-06, + "loss": 0.0327, + "num_input_tokens_seen": 934465460, + "step": 5445 + }, + { + "epoch": 61.81869688385269, + "loss": 0.03072969615459442, + "loss_ce": 5.952788342256099e-05, + "loss_iou": 0.3515625, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 934465460, + "step": 5445 + }, + { + "epoch": 61.830028328611895, + "grad_norm": 3.5571746383023672, + "learning_rate": 5e-06, + "loss": 0.0568, + "num_input_tokens_seen": 934637528, + "step": 5446 + }, + { + "epoch": 61.830028328611895, + "loss": 0.03334224969148636, + "loss_ce": 4.757177157443948e-05, + "loss_iou": 0.3984375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 934637528, + "step": 5446 + }, + { + "epoch": 61.8413597733711, + "grad_norm": 3.990280030304733, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 934809404, + "step": 5447 + }, + { + "epoch": 61.8413597733711, + "loss": 0.05740930512547493, + "loss_ce": 3.626099351095036e-05, + "loss_iou": 0.59375, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 934809404, + "step": 5447 + }, + { + "epoch": 61.85269121813031, + "grad_norm": 3.718655360816984, + "learning_rate": 5e-06, + "loss": 0.0478, + "num_input_tokens_seen": 934980428, + "step": 5448 + }, + { + "epoch": 61.85269121813031, + "loss": 0.03991135209798813, + "loss_ce": 2.4881439458113164e-05, + "loss_iou": 0.41015625, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 934980428, + "step": 5448 + }, + { + "epoch": 61.86402266288952, + "grad_norm": 3.374143159873171, + "learning_rate": 5e-06, + "loss": 0.0623, + "num_input_tokens_seen": 935152444, + "step": 5449 + }, + { + "epoch": 61.86402266288952, + "loss": 0.038515567779541016, + "loss_ce": 0.00013971282169222832, + "loss_iou": 0.40625, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 935152444, + "step": 5449 + }, + { + "epoch": 61.875354107648725, + "grad_norm": 3.887096494678402, + "learning_rate": 5e-06, + "loss": 0.0558, + "num_input_tokens_seen": 935322656, + "step": 5450 + }, + { + "epoch": 61.875354107648725, + "loss": 0.05174320936203003, + "loss_ce": 9.220830543199554e-05, + "loss_iou": 0.326171875, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 935322656, + "step": 5450 + }, + { + "epoch": 61.88668555240793, + "grad_norm": 3.2053296501397277, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 935491536, + "step": 5451 + }, + { + "epoch": 61.88668555240793, + "loss": 0.03982760012149811, + "loss_ce": 4.79383124911692e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 935491536, + "step": 5451 + }, + { + "epoch": 61.89801699716714, + "grad_norm": 3.5288835585458127, + "learning_rate": 5e-06, + "loss": 0.0375, + "num_input_tokens_seen": 935663660, + "step": 5452 + }, + { + "epoch": 61.89801699716714, + "loss": 0.03726642578840256, + "loss_ce": 1.9720599084394053e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 935663660, + "step": 5452 + }, + { + "epoch": 61.90934844192635, + "grad_norm": 4.201719011974456, + "learning_rate": 5e-06, + "loss": 0.0497, + "num_input_tokens_seen": 935834640, + "step": 5453 + }, + { + "epoch": 61.90934844192635, + "loss": 0.048681341111660004, + "loss_ce": 3.632364678196609e-05, + "loss_iou": 0.44921875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 935834640, + "step": 5453 + }, + { + "epoch": 61.920679886685555, + "grad_norm": 3.7062454895882597, + "learning_rate": 5e-06, + "loss": 0.0555, + "num_input_tokens_seen": 936004300, + "step": 5454 + }, + { + "epoch": 61.920679886685555, + "loss": 0.05486631765961647, + "loss_ce": 2.6231682568322867e-05, + "loss_iou": 0.33203125, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 936004300, + "step": 5454 + }, + { + "epoch": 61.93201133144476, + "grad_norm": 3.482678929249557, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 936175564, + "step": 5455 + }, + { + "epoch": 61.93201133144476, + "loss": 0.051267560571432114, + "loss_ce": 7.432389247696847e-05, + "loss_iou": 0.470703125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 936175564, + "step": 5455 + }, + { + "epoch": 61.94334277620396, + "grad_norm": 3.435541656780739, + "learning_rate": 5e-06, + "loss": 0.0362, + "num_input_tokens_seen": 936343884, + "step": 5456 + }, + { + "epoch": 61.94334277620396, + "loss": 0.029880985617637634, + "loss_ce": 3.479361112113111e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0059814453125, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 936343884, + "step": 5456 + }, + { + "epoch": 61.95467422096317, + "grad_norm": 2.5019860673659955, + "learning_rate": 5e-06, + "loss": 0.0414, + "num_input_tokens_seen": 936515464, + "step": 5457 + }, + { + "epoch": 61.95467422096317, + "loss": 0.04240552335977554, + "loss_ce": 4.712356167146936e-05, + "loss_iou": 0.26953125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 936515464, + "step": 5457 + }, + { + "epoch": 61.96600566572238, + "grad_norm": 2.820645150690986, + "learning_rate": 5e-06, + "loss": 0.0596, + "num_input_tokens_seen": 936686680, + "step": 5458 + }, + { + "epoch": 61.96600566572238, + "loss": 0.08181725442409515, + "loss_ce": 4.540765075944364e-05, + "loss_iou": 0.3671875, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 936686680, + "step": 5458 + }, + { + "epoch": 61.977337110481585, + "grad_norm": 3.1310054560317986, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 936858448, + "step": 5459 + }, + { + "epoch": 61.977337110481585, + "loss": 0.03853434696793556, + "loss_ce": 2.1163112251088023e-05, + "loss_iou": 0.47265625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 936858448, + "step": 5459 + }, + { + "epoch": 61.98866855524079, + "grad_norm": 3.7846895611219433, + "learning_rate": 5e-06, + "loss": 0.0422, + "num_input_tokens_seen": 937030340, + "step": 5460 + }, + { + "epoch": 61.98866855524079, + "loss": 0.031084464862942696, + "loss_ce": 3.2829411793500185e-05, + "loss_iou": 0.48046875, + "loss_num": 0.0062255859375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 937030340, + "step": 5460 + }, + { + "epoch": 62.0, + "grad_norm": 3.417153388904375, + "learning_rate": 5e-06, + "loss": 0.0508, + "num_input_tokens_seen": 937201416, + "step": 5461 + }, + { + "epoch": 62.0, + "loss": 0.062279924750328064, + "loss_ce": 0.00011561553401406854, + "loss_iou": 0.392578125, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 937201416, + "step": 5461 + }, + { + "epoch": 62.01133144475921, + "grad_norm": 3.2218375417976497, + "learning_rate": 5e-06, + "loss": 0.0568, + "num_input_tokens_seen": 937372452, + "step": 5462 + }, + { + "epoch": 62.01133144475921, + "loss": 0.06710394471883774, + "loss_ce": 4.156710201641545e-05, + "loss_iou": 0.439453125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 937372452, + "step": 5462 + }, + { + "epoch": 62.022662889518415, + "grad_norm": 3.061302474910644, + "learning_rate": 5e-06, + "loss": 0.048, + "num_input_tokens_seen": 937542480, + "step": 5463 + }, + { + "epoch": 62.022662889518415, + "loss": 0.04750329256057739, + "loss_ce": 3.319942334201187e-05, + "loss_iou": 0.0830078125, + "loss_num": 0.009521484375, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 937542480, + "step": 5463 + }, + { + "epoch": 62.03399433427762, + "grad_norm": 4.791495385985432, + "learning_rate": 5e-06, + "loss": 0.0586, + "num_input_tokens_seen": 937713684, + "step": 5464 + }, + { + "epoch": 62.03399433427762, + "loss": 0.05480201169848442, + "loss_ce": 3.8218680856516585e-05, + "loss_iou": 0.34765625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 937713684, + "step": 5464 + }, + { + "epoch": 62.04532577903683, + "grad_norm": 4.773380508106855, + "learning_rate": 5e-06, + "loss": 0.0811, + "num_input_tokens_seen": 937885840, + "step": 5465 + }, + { + "epoch": 62.04532577903683, + "loss": 0.11668197810649872, + "loss_ce": 4.379051097203046e-05, + "loss_iou": 0.369140625, + "loss_num": 0.0233154296875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 937885840, + "step": 5465 + }, + { + "epoch": 62.05665722379604, + "grad_norm": 3.748227812173109, + "learning_rate": 5e-06, + "loss": 0.0515, + "num_input_tokens_seen": 938057976, + "step": 5466 + }, + { + "epoch": 62.05665722379604, + "loss": 0.048764150589704514, + "loss_ce": 2.7580414098338224e-05, + "loss_iou": 0.515625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 938057976, + "step": 5466 + }, + { + "epoch": 62.06798866855524, + "grad_norm": 5.729863651225758, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 938229656, + "step": 5467 + }, + { + "epoch": 62.06798866855524, + "loss": 0.06794430315494537, + "loss_ce": 4.269663622835651e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0135498046875, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 938229656, + "step": 5467 + }, + { + "epoch": 62.079320113314445, + "grad_norm": 3.465089578231126, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 938401440, + "step": 5468 + }, + { + "epoch": 62.079320113314445, + "loss": 0.0363333523273468, + "loss_ce": 7.847221422707662e-05, + "loss_iou": 0.439453125, + "loss_num": 0.00726318359375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 938401440, + "step": 5468 + }, + { + "epoch": 62.09065155807365, + "grad_norm": 3.636549317837481, + "learning_rate": 5e-06, + "loss": 0.0493, + "num_input_tokens_seen": 938572824, + "step": 5469 + }, + { + "epoch": 62.09065155807365, + "loss": 0.04511266201734543, + "loss_ce": 0.00014501233818009496, + "loss_iou": 0.458984375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 938572824, + "step": 5469 + }, + { + "epoch": 62.10198300283286, + "grad_norm": 3.3519042758105075, + "learning_rate": 5e-06, + "loss": 0.0724, + "num_input_tokens_seen": 938744636, + "step": 5470 + }, + { + "epoch": 62.10198300283286, + "loss": 0.031431086361408234, + "loss_ce": 5.9018824686063454e-05, + "loss_iou": 0.376953125, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 938744636, + "step": 5470 + }, + { + "epoch": 62.11331444759207, + "grad_norm": 2.659883634851869, + "learning_rate": 5e-06, + "loss": 0.0481, + "num_input_tokens_seen": 938916256, + "step": 5471 + }, + { + "epoch": 62.11331444759207, + "loss": 0.03059856966137886, + "loss_ce": 5.810384755022824e-05, + "loss_iou": 0.0283203125, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 938916256, + "step": 5471 + }, + { + "epoch": 62.124645892351275, + "grad_norm": 2.855979792314044, + "learning_rate": 5e-06, + "loss": 0.0825, + "num_input_tokens_seen": 939087900, + "step": 5472 + }, + { + "epoch": 62.124645892351275, + "loss": 0.11063708364963531, + "loss_ce": 4.138428266742267e-05, + "loss_iou": 0.328125, + "loss_num": 0.0220947265625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 939087900, + "step": 5472 + }, + { + "epoch": 62.13597733711048, + "grad_norm": 2.1534098796645025, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 939257536, + "step": 5473 + }, + { + "epoch": 62.13597733711048, + "loss": 0.028540601953864098, + "loss_ce": 8.296081068692729e-05, + "loss_iou": 0.33203125, + "loss_num": 0.00567626953125, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 939257536, + "step": 5473 + }, + { + "epoch": 62.14730878186969, + "grad_norm": 3.2350856560883776, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 939427492, + "step": 5474 + }, + { + "epoch": 62.14730878186969, + "loss": 0.03937472403049469, + "loss_ce": 4.5195130951469764e-05, + "loss_iou": 0.41796875, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 939427492, + "step": 5474 + }, + { + "epoch": 62.1586402266289, + "grad_norm": 4.100332152486143, + "learning_rate": 5e-06, + "loss": 0.0381, + "num_input_tokens_seen": 939599192, + "step": 5475 + }, + { + "epoch": 62.1586402266289, + "loss": 0.027330735698342323, + "loss_ce": 1.7503924027550966e-05, + "loss_iou": 0.35546875, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 939599192, + "step": 5475 + }, + { + "epoch": 62.169971671388105, + "grad_norm": 3.507712436120644, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 939770736, + "step": 5476 + }, + { + "epoch": 62.169971671388105, + "loss": 0.040435679256916046, + "loss_ce": 4.566271309158765e-05, + "loss_iou": 0.47265625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 939770736, + "step": 5476 + }, + { + "epoch": 62.18130311614731, + "grad_norm": 3.345223746675795, + "learning_rate": 5e-06, + "loss": 0.0419, + "num_input_tokens_seen": 939940736, + "step": 5477 + }, + { + "epoch": 62.18130311614731, + "loss": 0.03670191019773483, + "loss_ce": 0.0003554762515705079, + "loss_iou": 0.451171875, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 939940736, + "step": 5477 + }, + { + "epoch": 62.19263456090651, + "grad_norm": 3.634020319367164, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 940112220, + "step": 5478 + }, + { + "epoch": 62.19263456090651, + "loss": 0.03957882523536682, + "loss_ce": 2.804466384986881e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 940112220, + "step": 5478 + }, + { + "epoch": 62.20396600566572, + "grad_norm": 3.734574033117681, + "learning_rate": 5e-06, + "loss": 0.0529, + "num_input_tokens_seen": 940281772, + "step": 5479 + }, + { + "epoch": 62.20396600566572, + "loss": 0.04302915185689926, + "loss_ce": 4.514193278737366e-05, + "loss_iou": 0.57421875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 940281772, + "step": 5479 + }, + { + "epoch": 62.21529745042493, + "grad_norm": 3.3620158978122068, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 940453712, + "step": 5480 + }, + { + "epoch": 62.21529745042493, + "loss": 0.0472903847694397, + "loss_ce": 6.44295578240417e-05, + "loss_iou": 0.498046875, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 940453712, + "step": 5480 + }, + { + "epoch": 62.226628895184135, + "grad_norm": 3.4598195091750634, + "learning_rate": 5e-06, + "loss": 0.0524, + "num_input_tokens_seen": 940625912, + "step": 5481 + }, + { + "epoch": 62.226628895184135, + "loss": 0.050176411867141724, + "loss_ce": 5.128864722792059e-05, + "loss_iou": 0.3515625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 940625912, + "step": 5481 + }, + { + "epoch": 62.23796033994334, + "grad_norm": 3.638312621458632, + "learning_rate": 5e-06, + "loss": 0.1026, + "num_input_tokens_seen": 940797020, + "step": 5482 + }, + { + "epoch": 62.23796033994334, + "loss": 0.06003417819738388, + "loss_ce": 3.662324888864532e-05, + "loss_iou": 0.4921875, + "loss_num": 0.011962890625, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 940797020, + "step": 5482 + }, + { + "epoch": 62.24929178470255, + "grad_norm": 3.6491319345394113, + "learning_rate": 5e-06, + "loss": 0.0398, + "num_input_tokens_seen": 940968596, + "step": 5483 + }, + { + "epoch": 62.24929178470255, + "loss": 0.03468136861920357, + "loss_ce": 5.917662929277867e-05, + "loss_iou": 0.5859375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 940968596, + "step": 5483 + }, + { + "epoch": 62.26062322946176, + "grad_norm": 3.4337978968828584, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 941140248, + "step": 5484 + }, + { + "epoch": 62.26062322946176, + "loss": 0.052647706121206284, + "loss_ce": 2.014332130784169e-05, + "loss_iou": 0.1044921875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 941140248, + "step": 5484 + }, + { + "epoch": 62.271954674220964, + "grad_norm": 3.0039884288887255, + "learning_rate": 5e-06, + "loss": 0.048, + "num_input_tokens_seen": 941312176, + "step": 5485 + }, + { + "epoch": 62.271954674220964, + "loss": 0.0806206688284874, + "loss_ce": 0.00013055690214969218, + "loss_iou": 0.47265625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 941312176, + "step": 5485 + }, + { + "epoch": 62.28328611898017, + "grad_norm": 3.2068092276286273, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 941483908, + "step": 5486 + }, + { + "epoch": 62.28328611898017, + "loss": 0.03270648419857025, + "loss_ce": 5.267691449262202e-05, + "loss_iou": 0.2734375, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 941483908, + "step": 5486 + }, + { + "epoch": 62.29461756373938, + "grad_norm": 3.054246704007408, + "learning_rate": 5e-06, + "loss": 0.0571, + "num_input_tokens_seen": 941654524, + "step": 5487 + }, + { + "epoch": 62.29461756373938, + "loss": 0.0500912144780159, + "loss_ce": 2.7128768124384806e-05, + "loss_iou": 0.44140625, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 941654524, + "step": 5487 + }, + { + "epoch": 62.30594900849859, + "grad_norm": 2.8691608604965206, + "learning_rate": 5e-06, + "loss": 0.0356, + "num_input_tokens_seen": 941826104, + "step": 5488 + }, + { + "epoch": 62.30594900849859, + "loss": 0.029837291687726974, + "loss_ce": 2.1616673620883375e-05, + "loss_iou": 0.1552734375, + "loss_num": 0.0059814453125, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 941826104, + "step": 5488 + }, + { + "epoch": 62.31728045325779, + "grad_norm": 4.056745157682888, + "learning_rate": 5e-06, + "loss": 0.0531, + "num_input_tokens_seen": 941996984, + "step": 5489 + }, + { + "epoch": 62.31728045325779, + "loss": 0.027645211666822433, + "loss_ce": 5.7322162319906056e-05, + "loss_iou": 0.4765625, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 941996984, + "step": 5489 + }, + { + "epoch": 62.328611898016995, + "grad_norm": 3.71155986718287, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 942167972, + "step": 5490 + }, + { + "epoch": 62.328611898016995, + "loss": 0.034503042697906494, + "loss_ce": 0.00010973081225529313, + "loss_iou": 0.1943359375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 942167972, + "step": 5490 + }, + { + "epoch": 62.3399433427762, + "grad_norm": 3.2239677368004456, + "learning_rate": 5e-06, + "loss": 0.0542, + "num_input_tokens_seen": 942339772, + "step": 5491 + }, + { + "epoch": 62.3399433427762, + "loss": 0.036976248025894165, + "loss_ce": 1.9460418116068467e-05, + "loss_iou": 0.4765625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 942339772, + "step": 5491 + }, + { + "epoch": 62.35127478753541, + "grad_norm": 3.4934915350009472, + "learning_rate": 5e-06, + "loss": 0.0792, + "num_input_tokens_seen": 942511676, + "step": 5492 + }, + { + "epoch": 62.35127478753541, + "loss": 0.07291790843009949, + "loss_ce": 4.193507265881635e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 942511676, + "step": 5492 + }, + { + "epoch": 62.36260623229462, + "grad_norm": 2.95771765077045, + "learning_rate": 5e-06, + "loss": 0.0503, + "num_input_tokens_seen": 942683112, + "step": 5493 + }, + { + "epoch": 62.36260623229462, + "loss": 0.05229192227125168, + "loss_ce": 0.0002747105318121612, + "loss_iou": 0.2490234375, + "loss_num": 0.01043701171875, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 942683112, + "step": 5493 + }, + { + "epoch": 62.373937677053824, + "grad_norm": 71.51418829355738, + "learning_rate": 5e-06, + "loss": 0.0416, + "num_input_tokens_seen": 942855304, + "step": 5494 + }, + { + "epoch": 62.373937677053824, + "loss": 0.03032710961997509, + "loss_ce": 2.3153936126618646e-05, + "loss_iou": 0.46875, + "loss_num": 0.006072998046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 942855304, + "step": 5494 + }, + { + "epoch": 62.38526912181303, + "grad_norm": 3.4614384727158227, + "learning_rate": 5e-06, + "loss": 0.0461, + "num_input_tokens_seen": 943027188, + "step": 5495 + }, + { + "epoch": 62.38526912181303, + "loss": 0.03571552410721779, + "loss_ce": 2.5214503693860024e-05, + "loss_iou": 0.5390625, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 943027188, + "step": 5495 + }, + { + "epoch": 62.39660056657224, + "grad_norm": 3.5745985231228183, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 943199220, + "step": 5496 + }, + { + "epoch": 62.39660056657224, + "loss": 0.036427609622478485, + "loss_ce": 5.065386358182877e-05, + "loss_iou": 0.2578125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 943199220, + "step": 5496 + }, + { + "epoch": 62.40793201133145, + "grad_norm": 3.731658545464063, + "learning_rate": 5e-06, + "loss": 0.0761, + "num_input_tokens_seen": 943370800, + "step": 5497 + }, + { + "epoch": 62.40793201133145, + "loss": 0.06790690124034882, + "loss_ce": 2.054810647678096e-05, + "loss_iou": 0.486328125, + "loss_num": 0.01361083984375, + "loss_xval": 0.06787109375, + "num_input_tokens_seen": 943370800, + "step": 5497 + }, + { + "epoch": 62.419263456090654, + "grad_norm": 3.4609450095495955, + "learning_rate": 5e-06, + "loss": 0.0602, + "num_input_tokens_seen": 943542468, + "step": 5498 + }, + { + "epoch": 62.419263456090654, + "loss": 0.043365925550460815, + "loss_ce": 4.622228880180046e-05, + "loss_iou": 0.421875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 943542468, + "step": 5498 + }, + { + "epoch": 62.43059490084986, + "grad_norm": 2.917480575959165, + "learning_rate": 5e-06, + "loss": 0.0386, + "num_input_tokens_seen": 943712368, + "step": 5499 + }, + { + "epoch": 62.43059490084986, + "loss": 0.02478640154004097, + "loss_ce": 6.716401549056172e-05, + "loss_iou": 0.490234375, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 943712368, + "step": 5499 + }, + { + "epoch": 62.44192634560906, + "grad_norm": 3.639346077728529, + "learning_rate": 5e-06, + "loss": 0.0451, + "num_input_tokens_seen": 943884380, + "step": 5500 + }, + { + "epoch": 62.44192634560906, + "eval_seeclick_CIoU": 0.5294385850429535, + "eval_seeclick_GIoU": 0.5267061293125153, + "eval_seeclick_IoU": 0.5655473172664642, + "eval_seeclick_MAE_all": 0.0660373866558075, + "eval_seeclick_MAE_h": 0.03382791392505169, + "eval_seeclick_MAE_w": 0.0966530367732048, + "eval_seeclick_MAE_x": 0.10127667337656021, + "eval_seeclick_MAE_y": 0.03239190950989723, + "eval_seeclick_NUM_probability": 0.9999770522117615, + "eval_seeclick_inside_bbox": 0.9232954680919647, + "eval_seeclick_loss": 0.9397633671760559, + "eval_seeclick_loss_ce": 0.6926635801792145, + "eval_seeclick_loss_iou": 0.5479736328125, + "eval_seeclick_loss_num": 0.049381256103515625, + "eval_seeclick_loss_xval": 0.2469482421875, + "eval_seeclick_runtime": 68.7854, + "eval_seeclick_samples_per_second": 0.625, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 943884380, + "step": 5500 + }, + { + "epoch": 62.44192634560906, + "eval_icons_CIoU": 0.7322515547275543, + "eval_icons_GIoU": 0.7308847606182098, + "eval_icons_IoU": 0.7460240721702576, + "eval_icons_MAE_all": 0.03638903237879276, + "eval_icons_MAE_h": 0.03358772397041321, + "eval_icons_MAE_w": 0.042488014325499535, + "eval_icons_MAE_x": 0.03324949275702238, + "eval_icons_MAE_y": 0.03623090125620365, + "eval_icons_NUM_probability": 0.9991548657417297, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.13571447134017944, + "eval_icons_loss_ce": 0.00491101061925292, + "eval_icons_loss_iou": 0.5789794921875, + "eval_icons_loss_num": 0.02399444580078125, + "eval_icons_loss_xval": 0.1199951171875, + "eval_icons_runtime": 78.55, + "eval_icons_samples_per_second": 0.637, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 943884380, + "step": 5500 + }, + { + "epoch": 62.44192634560906, + "eval_screenspot_CIoU": 0.6412812868754069, + "eval_screenspot_GIoU": 0.6449100772539774, + "eval_screenspot_IoU": 0.6669768492380778, + "eval_screenspot_MAE_all": 0.06233343109488487, + "eval_screenspot_MAE_h": 0.030469733600815136, + "eval_screenspot_MAE_w": 0.11368518508970737, + "eval_screenspot_MAE_x": 0.07766263497372468, + "eval_screenspot_MAE_y": 0.027516182512044907, + "eval_screenspot_NUM_probability": 0.9999338785807291, + "eval_screenspot_inside_bbox": 0.9079166650772095, + "eval_screenspot_loss": 0.2680523097515106, + "eval_screenspot_loss_ce": 0.01657482422888279, + "eval_screenspot_loss_iou": 0.4791666666666667, + "eval_screenspot_loss_num": 0.0505218505859375, + "eval_screenspot_loss_xval": 0.2526448567708333, + "eval_screenspot_runtime": 149.5419, + "eval_screenspot_samples_per_second": 0.595, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 943884380, + "step": 5500 + }, + { + "epoch": 62.44192634560906, + "eval_compot_CIoU": 0.8782232403755188, + "eval_compot_GIoU": 0.8780380487442017, + "eval_compot_IoU": 0.8868246972560883, + "eval_compot_MAE_all": 0.018250909633934498, + "eval_compot_MAE_h": 0.008052447810769081, + "eval_compot_MAE_w": 0.028046160470694304, + "eval_compot_MAE_x": 0.02766910195350647, + "eval_compot_MAE_y": 0.009235929697751999, + "eval_compot_NUM_probability": 0.9999649822711945, + "eval_compot_inside_bbox": 0.96875, + "eval_compot_loss": 0.06666712462902069, + "eval_compot_loss_ce": 3.294740054116119e-05, + "eval_compot_loss_iou": 0.4637451171875, + "eval_compot_loss_num": 0.011798858642578125, + "eval_compot_loss_xval": 0.0589752197265625, + "eval_compot_runtime": 83.2066, + "eval_compot_samples_per_second": 0.601, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 943884380, + "step": 5500 + }, + { + "epoch": 62.44192634560906, + "eval_custom_ui_MAE_all": 0.019400109071284533, + "eval_custom_ui_MAE_x": 0.03052842989563942, + "eval_custom_ui_MAE_y": 0.008271789643913507, + "eval_custom_ui_NUM_probability": 0.9998880922794342, + "eval_custom_ui_loss": 0.21498550474643707, + "eval_custom_ui_loss_ce": 0.11672582477331161, + "eval_custom_ui_loss_num": 0.018308639526367188, + "eval_custom_ui_loss_xval": 0.0915069580078125, + "eval_custom_ui_runtime": 58.2084, + "eval_custom_ui_samples_per_second": 0.859, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 943884380, + "step": 5500 + }, + { + "epoch": 62.44192634560906, + "loss": 0.259590744972229, + "loss_ce": 0.1462026834487915, + "loss_iou": 0.0, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 943884380, + "step": 5500 + }, + { + "epoch": 62.45325779036827, + "grad_norm": 2.8989652265084795, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 944056040, + "step": 5501 + }, + { + "epoch": 62.45325779036827, + "loss": 0.05497933179140091, + "loss_ce": 1.7175698303617537e-05, + "loss_iou": 0.49609375, + "loss_num": 0.010986328125, + "loss_xval": 0.054931640625, + "num_input_tokens_seen": 944056040, + "step": 5501 + }, + { + "epoch": 62.46458923512748, + "grad_norm": 3.886888631254645, + "learning_rate": 5e-06, + "loss": 0.037, + "num_input_tokens_seen": 944227688, + "step": 5502 + }, + { + "epoch": 62.46458923512748, + "loss": 0.031036769971251488, + "loss_ce": 3.0910578061593696e-05, + "loss_iou": 0.337890625, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 944227688, + "step": 5502 + }, + { + "epoch": 62.475920679886684, + "grad_norm": 4.631258754104667, + "learning_rate": 5e-06, + "loss": 0.0678, + "num_input_tokens_seen": 944399352, + "step": 5503 + }, + { + "epoch": 62.475920679886684, + "loss": 0.09276417642831802, + "loss_ce": 2.1252646547509357e-05, + "loss_iou": 0.6171875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 944399352, + "step": 5503 + }, + { + "epoch": 62.48725212464589, + "grad_norm": 3.233491652720797, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 944569148, + "step": 5504 + }, + { + "epoch": 62.48725212464589, + "loss": 0.05277272313833237, + "loss_ce": 2.3091110051609576e-05, + "loss_iou": 0.34375, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 944569148, + "step": 5504 + }, + { + "epoch": 62.4985835694051, + "grad_norm": 3.012208039440752, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 944740044, + "step": 5505 + }, + { + "epoch": 62.4985835694051, + "loss": 0.04457823932170868, + "loss_ce": 5.309501648298465e-05, + "loss_iou": 0.546875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 944740044, + "step": 5505 + }, + { + "epoch": 62.50991501416431, + "grad_norm": 3.183093027172892, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 944911944, + "step": 5506 + }, + { + "epoch": 62.50991501416431, + "loss": 0.038533765822649, + "loss_ce": 3.5839791962644085e-05, + "loss_iou": 0.240234375, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 944911944, + "step": 5506 + }, + { + "epoch": 62.521246458923514, + "grad_norm": 3.2110582643211774, + "learning_rate": 5e-06, + "loss": 0.0538, + "num_input_tokens_seen": 945083412, + "step": 5507 + }, + { + "epoch": 62.521246458923514, + "loss": 0.036099180579185486, + "loss_ce": 4.266292671673e-05, + "loss_iou": 0.025390625, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 945083412, + "step": 5507 + }, + { + "epoch": 62.53257790368272, + "grad_norm": 3.4422862511589054, + "learning_rate": 5e-06, + "loss": 0.044, + "num_input_tokens_seen": 945255504, + "step": 5508 + }, + { + "epoch": 62.53257790368272, + "loss": 0.04410179704427719, + "loss_ce": 3.441364970058203e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 945255504, + "step": 5508 + }, + { + "epoch": 62.54390934844193, + "grad_norm": 3.0136358965578594, + "learning_rate": 5e-06, + "loss": 0.0443, + "num_input_tokens_seen": 945427568, + "step": 5509 + }, + { + "epoch": 62.54390934844193, + "loss": 0.026692409068346024, + "loss_ce": 3.530406320351176e-05, + "loss_iou": 0.296875, + "loss_num": 0.005340576171875, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 945427568, + "step": 5509 + }, + { + "epoch": 62.555240793201136, + "grad_norm": 2.9336353248864806, + "learning_rate": 5e-06, + "loss": 0.0374, + "num_input_tokens_seen": 945598324, + "step": 5510 + }, + { + "epoch": 62.555240793201136, + "loss": 0.041790224611759186, + "loss_ce": 5.743668953073211e-05, + "loss_iou": 0.34765625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 945598324, + "step": 5510 + }, + { + "epoch": 62.56657223796034, + "grad_norm": 4.196889928738389, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 945769936, + "step": 5511 + }, + { + "epoch": 62.56657223796034, + "loss": 0.031056372448801994, + "loss_ce": 5.051364132668823e-05, + "loss_iou": 0.318359375, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 945769936, + "step": 5511 + }, + { + "epoch": 62.577903682719544, + "grad_norm": 3.2909186270744875, + "learning_rate": 5e-06, + "loss": 0.055, + "num_input_tokens_seen": 945939532, + "step": 5512 + }, + { + "epoch": 62.577903682719544, + "loss": 0.035352952778339386, + "loss_ce": 4.411242844071239e-05, + "loss_iou": 0.42578125, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 945939532, + "step": 5512 + }, + { + "epoch": 62.58923512747875, + "grad_norm": 3.7816456933003115, + "learning_rate": 5e-06, + "loss": 0.042, + "num_input_tokens_seen": 946111400, + "step": 5513 + }, + { + "epoch": 62.58923512747875, + "loss": 0.04019385948777199, + "loss_ce": 4.798402369488031e-05, + "loss_iou": 0.61328125, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 946111400, + "step": 5513 + }, + { + "epoch": 62.60056657223796, + "grad_norm": 4.076746931196385, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 946280812, + "step": 5514 + }, + { + "epoch": 62.60056657223796, + "loss": 0.035268209874629974, + "loss_ce": 2.041032348643057e-05, + "loss_iou": 0.36328125, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 946280812, + "step": 5514 + }, + { + "epoch": 62.611898016997166, + "grad_norm": 10.867864889594042, + "learning_rate": 5e-06, + "loss": 0.0916, + "num_input_tokens_seen": 946452328, + "step": 5515 + }, + { + "epoch": 62.611898016997166, + "loss": 0.05627760291099548, + "loss_ce": 0.0004151762695983052, + "loss_iou": 0.2578125, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 946452328, + "step": 5515 + }, + { + "epoch": 62.623229461756374, + "grad_norm": 2.649761157902133, + "learning_rate": 5e-06, + "loss": 0.0379, + "num_input_tokens_seen": 946624120, + "step": 5516 + }, + { + "epoch": 62.623229461756374, + "loss": 0.03688281029462814, + "loss_ce": 4.80938506370876e-05, + "loss_iou": 0.408203125, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 946624120, + "step": 5516 + }, + { + "epoch": 62.63456090651558, + "grad_norm": 3.051612995672834, + "learning_rate": 5e-06, + "loss": 0.04, + "num_input_tokens_seen": 946796408, + "step": 5517 + }, + { + "epoch": 62.63456090651558, + "loss": 0.05551644042134285, + "loss_ce": 3.5485689295455813e-05, + "loss_iou": 0.484375, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 946796408, + "step": 5517 + }, + { + "epoch": 62.64589235127479, + "grad_norm": 3.5373073118656984, + "learning_rate": 5e-06, + "loss": 0.0602, + "num_input_tokens_seen": 946966452, + "step": 5518 + }, + { + "epoch": 62.64589235127479, + "loss": 0.04456561431288719, + "loss_ce": 4.046925823786296e-05, + "loss_iou": 0.42578125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 946966452, + "step": 5518 + }, + { + "epoch": 62.657223796033996, + "grad_norm": 3.907036335038993, + "learning_rate": 5e-06, + "loss": 0.0503, + "num_input_tokens_seen": 947137888, + "step": 5519 + }, + { + "epoch": 62.657223796033996, + "loss": 0.05694613233208656, + "loss_ce": 3.84789273084607e-05, + "loss_iou": 0.224609375, + "loss_num": 0.0113525390625, + "loss_xval": 0.056884765625, + "num_input_tokens_seen": 947137888, + "step": 5519 + }, + { + "epoch": 62.668555240793204, + "grad_norm": 3.2000291273963035, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 947309924, + "step": 5520 + }, + { + "epoch": 62.668555240793204, + "loss": 0.03819970414042473, + "loss_ce": 3.747270966414362e-05, + "loss_iou": 0.27734375, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 947309924, + "step": 5520 + }, + { + "epoch": 62.67988668555241, + "grad_norm": 3.5812497904922735, + "learning_rate": 5e-06, + "loss": 0.0351, + "num_input_tokens_seen": 947482232, + "step": 5521 + }, + { + "epoch": 62.67988668555241, + "loss": 0.032056666910648346, + "loss_ce": 4.3724794522859156e-05, + "loss_iou": 0.62109375, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 947482232, + "step": 5521 + }, + { + "epoch": 62.69121813031161, + "grad_norm": 3.546502527072104, + "learning_rate": 5e-06, + "loss": 0.0735, + "num_input_tokens_seen": 947653988, + "step": 5522 + }, + { + "epoch": 62.69121813031161, + "loss": 0.10170534998178482, + "loss_ce": 0.0003412201476749033, + "loss_iou": 0.3125, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 947653988, + "step": 5522 + }, + { + "epoch": 62.70254957507082, + "grad_norm": 2.9130508687657453, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 947825392, + "step": 5523 + }, + { + "epoch": 62.70254957507082, + "loss": 0.03968769684433937, + "loss_ce": 4.5361222873907536e-05, + "loss_iou": 0.5078125, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 947825392, + "step": 5523 + }, + { + "epoch": 62.713881019830026, + "grad_norm": 2.7849107269109816, + "learning_rate": 5e-06, + "loss": 0.0366, + "num_input_tokens_seen": 947997548, + "step": 5524 + }, + { + "epoch": 62.713881019830026, + "loss": 0.038623061031103134, + "loss_ce": 7.936110341688618e-05, + "loss_iou": 0.31640625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 947997548, + "step": 5524 + }, + { + "epoch": 62.725212464589234, + "grad_norm": 2.7275279847315534, + "learning_rate": 5e-06, + "loss": 0.0528, + "num_input_tokens_seen": 948169724, + "step": 5525 + }, + { + "epoch": 62.725212464589234, + "loss": 0.08988450467586517, + "loss_ce": 2.5498316972516477e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 948169724, + "step": 5525 + }, + { + "epoch": 62.73654390934844, + "grad_norm": 3.580844911917427, + "learning_rate": 5e-06, + "loss": 0.0581, + "num_input_tokens_seen": 948340084, + "step": 5526 + }, + { + "epoch": 62.73654390934844, + "loss": 0.0536431148648262, + "loss_ce": 0.00025260995607823133, + "loss_iou": 0.296875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 948340084, + "step": 5526 + }, + { + "epoch": 62.74787535410765, + "grad_norm": 4.0624235856668065, + "learning_rate": 5e-06, + "loss": 0.043, + "num_input_tokens_seen": 948510764, + "step": 5527 + }, + { + "epoch": 62.74787535410765, + "loss": 0.05362735688686371, + "loss_ce": 6.900899461470544e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 948510764, + "step": 5527 + }, + { + "epoch": 62.759206798866856, + "grad_norm": 4.225018529027337, + "learning_rate": 5e-06, + "loss": 0.0742, + "num_input_tokens_seen": 948681964, + "step": 5528 + }, + { + "epoch": 62.759206798866856, + "loss": 0.07007186114788055, + "loss_ce": 7.979686051839963e-05, + "loss_iou": 0.408203125, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 948681964, + "step": 5528 + }, + { + "epoch": 62.77053824362606, + "grad_norm": 2.6253563894603347, + "learning_rate": 5e-06, + "loss": 0.0551, + "num_input_tokens_seen": 948853932, + "step": 5529 + }, + { + "epoch": 62.77053824362606, + "loss": 0.043707218021154404, + "loss_ce": 5.1821691158693284e-05, + "loss_iou": 0.4609375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 948853932, + "step": 5529 + }, + { + "epoch": 62.78186968838527, + "grad_norm": 2.7099694501533813, + "learning_rate": 5e-06, + "loss": 0.0679, + "num_input_tokens_seen": 949025976, + "step": 5530 + }, + { + "epoch": 62.78186968838527, + "loss": 0.09038908779621124, + "loss_ce": 4.942140367347747e-05, + "loss_iou": 0.05078125, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 949025976, + "step": 5530 + }, + { + "epoch": 62.79320113314448, + "grad_norm": 3.5593713543248726, + "learning_rate": 5e-06, + "loss": 0.0561, + "num_input_tokens_seen": 949195152, + "step": 5531 + }, + { + "epoch": 62.79320113314448, + "loss": 0.047094546258449554, + "loss_ce": 6.695705815218389e-05, + "loss_iou": 0.455078125, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 949195152, + "step": 5531 + }, + { + "epoch": 62.804532577903686, + "grad_norm": 3.626068397511478, + "learning_rate": 5e-06, + "loss": 0.0689, + "num_input_tokens_seen": 949366200, + "step": 5532 + }, + { + "epoch": 62.804532577903686, + "loss": 0.03735312819480896, + "loss_ce": 3.012992601725273e-05, + "loss_iou": 0.462890625, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 949366200, + "step": 5532 + }, + { + "epoch": 62.815864022662886, + "grad_norm": 3.4964932596175315, + "learning_rate": 5e-06, + "loss": 0.0553, + "num_input_tokens_seen": 949538252, + "step": 5533 + }, + { + "epoch": 62.815864022662886, + "loss": 0.0271298848092556, + "loss_ce": 1.5017081750556827e-05, + "loss_iou": 0.35546875, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 949538252, + "step": 5533 + }, + { + "epoch": 62.827195467422094, + "grad_norm": 3.563074364395963, + "learning_rate": 5e-06, + "loss": 0.0616, + "num_input_tokens_seen": 949708288, + "step": 5534 + }, + { + "epoch": 62.827195467422094, + "loss": 0.09923435002565384, + "loss_ce": 6.74857001286e-05, + "loss_iou": 0.1328125, + "loss_num": 0.0198974609375, + "loss_xval": 0.09912109375, + "num_input_tokens_seen": 949708288, + "step": 5534 + }, + { + "epoch": 62.8385269121813, + "grad_norm": 7.6493647738468, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 949879900, + "step": 5535 + }, + { + "epoch": 62.8385269121813, + "loss": 0.08445686101913452, + "loss_ce": 4.5232605771161616e-05, + "loss_iou": 0.451171875, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 949879900, + "step": 5535 + }, + { + "epoch": 62.84985835694051, + "grad_norm": 3.6836378734952415, + "learning_rate": 5e-06, + "loss": 0.0576, + "num_input_tokens_seen": 950051596, + "step": 5536 + }, + { + "epoch": 62.84985835694051, + "loss": 0.04199357330799103, + "loss_ce": 3.190220013493672e-05, + "loss_iou": 0.185546875, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 950051596, + "step": 5536 + }, + { + "epoch": 62.861189801699716, + "grad_norm": 4.1354539553392105, + "learning_rate": 5e-06, + "loss": 0.0723, + "num_input_tokens_seen": 950222536, + "step": 5537 + }, + { + "epoch": 62.861189801699716, + "loss": 0.08515620976686478, + "loss_ce": 0.00013423789641819894, + "loss_iou": 0.439453125, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 950222536, + "step": 5537 + }, + { + "epoch": 62.87252124645892, + "grad_norm": 3.330818098883854, + "learning_rate": 5e-06, + "loss": 0.0581, + "num_input_tokens_seen": 950392056, + "step": 5538 + }, + { + "epoch": 62.87252124645892, + "loss": 0.0704437643289566, + "loss_ce": 2.4455370294163004e-05, + "loss_iou": 0.359375, + "loss_num": 0.01409912109375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 950392056, + "step": 5538 + }, + { + "epoch": 62.88385269121813, + "grad_norm": 46.86637353936134, + "learning_rate": 5e-06, + "loss": 0.0663, + "num_input_tokens_seen": 950562524, + "step": 5539 + }, + { + "epoch": 62.88385269121813, + "loss": 0.034051500260829926, + "loss_ce": 2.440443859086372e-05, + "loss_iou": 0.26953125, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 950562524, + "step": 5539 + }, + { + "epoch": 62.89518413597734, + "grad_norm": 4.284276290861703, + "learning_rate": 5e-06, + "loss": 0.0543, + "num_input_tokens_seen": 950734480, + "step": 5540 + }, + { + "epoch": 62.89518413597734, + "loss": 0.04192068427801132, + "loss_ce": 5.056730515207164e-05, + "loss_iou": 0.48046875, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 950734480, + "step": 5540 + }, + { + "epoch": 62.906515580736546, + "grad_norm": 3.747367498548348, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 950906020, + "step": 5541 + }, + { + "epoch": 62.906515580736546, + "loss": 0.02904198318719864, + "loss_ce": 5.028243322158232e-05, + "loss_iou": 0.4921875, + "loss_num": 0.00579833984375, + "loss_xval": 0.029052734375, + "num_input_tokens_seen": 950906020, + "step": 5541 + }, + { + "epoch": 62.91784702549575, + "grad_norm": 4.109480185749345, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 951077940, + "step": 5542 + }, + { + "epoch": 62.91784702549575, + "loss": 0.02756473794579506, + "loss_ce": 2.2624190023634583e-05, + "loss_iou": 0.4765625, + "loss_num": 0.0054931640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 951077940, + "step": 5542 + }, + { + "epoch": 62.92917847025496, + "grad_norm": 2.967862491880175, + "learning_rate": 5e-06, + "loss": 0.062, + "num_input_tokens_seen": 951249684, + "step": 5543 + }, + { + "epoch": 62.92917847025496, + "loss": 0.048396818339824677, + "loss_ce": 5.697394226444885e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0096435546875, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 951249684, + "step": 5543 + }, + { + "epoch": 62.94050991501416, + "grad_norm": 2.757708236252594, + "learning_rate": 5e-06, + "loss": 0.0497, + "num_input_tokens_seen": 951421544, + "step": 5544 + }, + { + "epoch": 62.94050991501416, + "loss": 0.08457933366298676, + "loss_ce": 6.090298847993836e-05, + "loss_iou": 0.330078125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 951421544, + "step": 5544 + }, + { + "epoch": 62.95184135977337, + "grad_norm": 3.1680387240008447, + "learning_rate": 5e-06, + "loss": 0.0686, + "num_input_tokens_seen": 951591828, + "step": 5545 + }, + { + "epoch": 62.95184135977337, + "loss": 0.047847069799900055, + "loss_ce": 2.602555468911305e-05, + "loss_iou": 0.45703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 951591828, + "step": 5545 + }, + { + "epoch": 62.963172804532576, + "grad_norm": 3.49766816083566, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 951762252, + "step": 5546 + }, + { + "epoch": 62.963172804532576, + "loss": 0.04163876175880432, + "loss_ce": 4.3301130062900484e-05, + "loss_iou": 0.3515625, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 951762252, + "step": 5546 + }, + { + "epoch": 62.97450424929178, + "grad_norm": 3.5518985165046186, + "learning_rate": 5e-06, + "loss": 0.0474, + "num_input_tokens_seen": 951933848, + "step": 5547 + }, + { + "epoch": 62.97450424929178, + "loss": 0.0535704642534256, + "loss_ce": 2.736984970397316e-05, + "loss_iou": 0.478515625, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 951933848, + "step": 5547 + }, + { + "epoch": 62.98583569405099, + "grad_norm": 5.483441912411955, + "learning_rate": 5e-06, + "loss": 0.0537, + "num_input_tokens_seen": 952105424, + "step": 5548 + }, + { + "epoch": 62.98583569405099, + "loss": 0.03674546256661415, + "loss_ce": 4.807391087524593e-05, + "loss_iou": 0.419921875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 952105424, + "step": 5548 + }, + { + "epoch": 62.9971671388102, + "grad_norm": 3.222507785108215, + "learning_rate": 5e-06, + "loss": 0.0572, + "num_input_tokens_seen": 952275744, + "step": 5549 + }, + { + "epoch": 62.9971671388102, + "loss": 0.03499796986579895, + "loss_ce": 4.008049290860072e-05, + "loss_iou": 0.349609375, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 952275744, + "step": 5549 + }, + { + "epoch": 62.9971671388102, + "loss": 0.022512448951601982, + "loss_ce": 4.388198794913478e-05, + "loss_iou": 0.328125, + "loss_num": 0.004486083984375, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 952318660, + "step": 5549 + }, + { + "epoch": 63.008498583569406, + "grad_norm": 3.0726285121250587, + "learning_rate": 5e-06, + "loss": 0.0434, + "num_input_tokens_seen": 952446708, + "step": 5550 + }, + { + "epoch": 63.008498583569406, + "loss": 0.0453701987862587, + "loss_ce": 3.633755841292441e-05, + "loss_iou": 0.447265625, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 952446708, + "step": 5550 + }, + { + "epoch": 63.01983002832861, + "grad_norm": 8.066609775491115, + "learning_rate": 5e-06, + "loss": 0.0478, + "num_input_tokens_seen": 952618676, + "step": 5551 + }, + { + "epoch": 63.01983002832861, + "loss": 0.07753059267997742, + "loss_ce": 1.5940637240419164e-05, + "loss_iou": 0.5078125, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 952618676, + "step": 5551 + }, + { + "epoch": 63.03116147308782, + "grad_norm": 3.4605414647529296, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 952789532, + "step": 5552 + }, + { + "epoch": 63.03116147308782, + "loss": 0.03073236159980297, + "loss_ce": 3.1678409868618473e-05, + "loss_iou": 0.072265625, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 952789532, + "step": 5552 + }, + { + "epoch": 63.04249291784703, + "grad_norm": 3.516790101144911, + "learning_rate": 5e-06, + "loss": 0.0456, + "num_input_tokens_seen": 952960124, + "step": 5553 + }, + { + "epoch": 63.04249291784703, + "loss": 0.04897896945476532, + "loss_ce": 8.980993879958987e-05, + "loss_iou": 0.5078125, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 952960124, + "step": 5553 + }, + { + "epoch": 63.053824362606235, + "grad_norm": 3.1040856488604938, + "learning_rate": 5e-06, + "loss": 0.0349, + "num_input_tokens_seen": 953131736, + "step": 5554 + }, + { + "epoch": 63.053824362606235, + "loss": 0.026958251371979713, + "loss_ce": 2.648955342010595e-05, + "loss_iou": 0.09130859375, + "loss_num": 0.00537109375, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 953131736, + "step": 5554 + }, + { + "epoch": 63.065155807365436, + "grad_norm": 2.2572768312065623, + "learning_rate": 5e-06, + "loss": 0.0838, + "num_input_tokens_seen": 953302820, + "step": 5555 + }, + { + "epoch": 63.065155807365436, + "loss": 0.11343389749526978, + "loss_ce": 1.53264518303331e-05, + "loss_iou": 0.453125, + "loss_num": 0.022705078125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 953302820, + "step": 5555 + }, + { + "epoch": 63.07648725212464, + "grad_norm": 1.6245283851401011, + "learning_rate": 5e-06, + "loss": 0.0629, + "num_input_tokens_seen": 953472924, + "step": 5556 + }, + { + "epoch": 63.07648725212464, + "loss": 0.09962118417024612, + "loss_ce": 2.7067460905527696e-05, + "loss_iou": 0.375, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 953472924, + "step": 5556 + }, + { + "epoch": 63.08781869688385, + "grad_norm": 1.845728616668121, + "learning_rate": 5e-06, + "loss": 0.0331, + "num_input_tokens_seen": 953642624, + "step": 5557 + }, + { + "epoch": 63.08781869688385, + "loss": 0.05160154402256012, + "loss_ce": 4.2095598473679274e-05, + "loss_iou": 0.166015625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 953642624, + "step": 5557 + }, + { + "epoch": 63.09915014164306, + "grad_norm": 2.983284150019853, + "learning_rate": 5e-06, + "loss": 0.0317, + "num_input_tokens_seen": 953813464, + "step": 5558 + }, + { + "epoch": 63.09915014164306, + "loss": 0.04296734929084778, + "loss_ce": 2.9116959922248498e-05, + "loss_iou": 0.30078125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 953813464, + "step": 5558 + }, + { + "epoch": 63.110481586402265, + "grad_norm": 3.4295234938795858, + "learning_rate": 5e-06, + "loss": 0.0765, + "num_input_tokens_seen": 953985092, + "step": 5559 + }, + { + "epoch": 63.110481586402265, + "loss": 0.04968930780887604, + "loss_ce": 2.195028901041951e-05, + "loss_iou": 0.55078125, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 953985092, + "step": 5559 + }, + { + "epoch": 63.12181303116147, + "grad_norm": 3.1002552986878786, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 954156588, + "step": 5560 + }, + { + "epoch": 63.12181303116147, + "loss": 0.05151050165295601, + "loss_ce": 4.260712012182921e-05, + "loss_iou": 0.359375, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 954156588, + "step": 5560 + }, + { + "epoch": 63.13314447592068, + "grad_norm": 3.0713185503779683, + "learning_rate": 5e-06, + "loss": 0.0447, + "num_input_tokens_seen": 954328908, + "step": 5561 + }, + { + "epoch": 63.13314447592068, + "loss": 0.05751762539148331, + "loss_ce": 5.3026731620775536e-05, + "loss_iou": 0.376953125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 954328908, + "step": 5561 + }, + { + "epoch": 63.14447592067989, + "grad_norm": 3.7568378889500935, + "learning_rate": 5e-06, + "loss": 0.0474, + "num_input_tokens_seen": 954501452, + "step": 5562 + }, + { + "epoch": 63.14447592067989, + "loss": 0.05231466516852379, + "loss_ce": 3.80574056180194e-05, + "loss_iou": 0.482421875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 954501452, + "step": 5562 + }, + { + "epoch": 63.155807365439095, + "grad_norm": 3.879245840564028, + "learning_rate": 5e-06, + "loss": 0.0369, + "num_input_tokens_seen": 954672428, + "step": 5563 + }, + { + "epoch": 63.155807365439095, + "loss": 0.03197059407830238, + "loss_ce": 4.920885112369433e-05, + "loss_iou": 0.453125, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 954672428, + "step": 5563 + }, + { + "epoch": 63.1671388101983, + "grad_norm": 4.029669843159996, + "learning_rate": 5e-06, + "loss": 0.0447, + "num_input_tokens_seen": 954842956, + "step": 5564 + }, + { + "epoch": 63.1671388101983, + "loss": 0.0496329739689827, + "loss_ce": 4.1908337152563035e-05, + "loss_iou": 0.4375, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 954842956, + "step": 5564 + }, + { + "epoch": 63.17847025495751, + "grad_norm": 3.405893534186781, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 955014900, + "step": 5565 + }, + { + "epoch": 63.17847025495751, + "loss": 0.06293866783380508, + "loss_ce": 4.193765198579058e-05, + "loss_iou": 0.58203125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 955014900, + "step": 5565 + }, + { + "epoch": 63.18980169971671, + "grad_norm": 3.6858397617786784, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 955185804, + "step": 5566 + }, + { + "epoch": 63.18980169971671, + "loss": 0.03252874314785004, + "loss_ce": 2.7520247385837138e-05, + "loss_iou": 0.3984375, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 955185804, + "step": 5566 + }, + { + "epoch": 63.20113314447592, + "grad_norm": 3.4877211384962377, + "learning_rate": 5e-06, + "loss": 0.0541, + "num_input_tokens_seen": 955357280, + "step": 5567 + }, + { + "epoch": 63.20113314447592, + "loss": 0.0366751104593277, + "loss_ce": 2.3499407689087093e-05, + "loss_iou": 0.35546875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 955357280, + "step": 5567 + }, + { + "epoch": 63.212464589235125, + "grad_norm": 2.789096633978577, + "learning_rate": 5e-06, + "loss": 0.0356, + "num_input_tokens_seen": 955528236, + "step": 5568 + }, + { + "epoch": 63.212464589235125, + "loss": 0.03407943993806839, + "loss_ce": 2.1822495909873396e-05, + "loss_iou": 0.19921875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 955528236, + "step": 5568 + }, + { + "epoch": 63.22379603399433, + "grad_norm": 3.1484394802352003, + "learning_rate": 5e-06, + "loss": 0.0453, + "num_input_tokens_seen": 955699884, + "step": 5569 + }, + { + "epoch": 63.22379603399433, + "loss": 0.09011122584342957, + "loss_ce": 0.001205886946991086, + "loss_iou": 0.32421875, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 955699884, + "step": 5569 + }, + { + "epoch": 63.23512747875354, + "grad_norm": 3.5739299590467675, + "learning_rate": 5e-06, + "loss": 0.033, + "num_input_tokens_seen": 955871648, + "step": 5570 + }, + { + "epoch": 63.23512747875354, + "loss": 0.036277756094932556, + "loss_ce": 3.8133817724883556e-05, + "loss_iou": 0.28515625, + "loss_num": 0.00726318359375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 955871648, + "step": 5570 + }, + { + "epoch": 63.24645892351275, + "grad_norm": 3.2392655078018016, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 956042996, + "step": 5571 + }, + { + "epoch": 63.24645892351275, + "loss": 0.029277481138706207, + "loss_ce": 2.6382684154668823e-05, + "loss_iou": 0.6171875, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 956042996, + "step": 5571 + }, + { + "epoch": 63.257790368271955, + "grad_norm": 2.5369535907591034, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 956215116, + "step": 5572 + }, + { + "epoch": 63.257790368271955, + "loss": 0.030111264437437057, + "loss_ce": 2.093338844133541e-05, + "loss_iou": 0.2392578125, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 956215116, + "step": 5572 + }, + { + "epoch": 63.26912181303116, + "grad_norm": 17.48850390691409, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 956385932, + "step": 5573 + }, + { + "epoch": 63.26912181303116, + "loss": 0.04315242916345596, + "loss_ce": 4.6352201025001705e-05, + "loss_iou": 0.408203125, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 956385932, + "step": 5573 + }, + { + "epoch": 63.28045325779037, + "grad_norm": 3.780119713259165, + "learning_rate": 5e-06, + "loss": 0.0602, + "num_input_tokens_seen": 956557840, + "step": 5574 + }, + { + "epoch": 63.28045325779037, + "loss": 0.06662124395370483, + "loss_ce": 1.663565490162e-05, + "loss_iou": 0.427734375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 956557840, + "step": 5574 + }, + { + "epoch": 63.29178470254958, + "grad_norm": 3.856453084559645, + "learning_rate": 5e-06, + "loss": 0.0586, + "num_input_tokens_seen": 956728512, + "step": 5575 + }, + { + "epoch": 63.29178470254958, + "loss": 0.11761099100112915, + "loss_ce": 7.253909279825166e-05, + "loss_iou": 0.46484375, + "loss_num": 0.0235595703125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 956728512, + "step": 5575 + }, + { + "epoch": 63.303116147308785, + "grad_norm": 4.096762063490481, + "learning_rate": 5e-06, + "loss": 0.0822, + "num_input_tokens_seen": 956900412, + "step": 5576 + }, + { + "epoch": 63.303116147308785, + "loss": 0.03702767938375473, + "loss_ce": 4.037451435578987e-05, + "loss_iou": 0.365234375, + "loss_num": 0.00738525390625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 956900412, + "step": 5576 + }, + { + "epoch": 63.314447592067985, + "grad_norm": 3.464112655615027, + "learning_rate": 5e-06, + "loss": 0.0663, + "num_input_tokens_seen": 957071180, + "step": 5577 + }, + { + "epoch": 63.314447592067985, + "loss": 0.03874988108873367, + "loss_ce": 5.3590476454701275e-05, + "loss_iou": 0.23828125, + "loss_num": 0.00775146484375, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 957071180, + "step": 5577 + }, + { + "epoch": 63.32577903682719, + "grad_norm": 3.7135803523094784, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 957239820, + "step": 5578 + }, + { + "epoch": 63.32577903682719, + "loss": 0.0559682622551918, + "loss_ce": 7.531804294558242e-05, + "loss_iou": 0.380859375, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 957239820, + "step": 5578 + }, + { + "epoch": 63.3371104815864, + "grad_norm": 3.674811826499546, + "learning_rate": 5e-06, + "loss": 0.0851, + "num_input_tokens_seen": 957409972, + "step": 5579 + }, + { + "epoch": 63.3371104815864, + "loss": 0.10021825134754181, + "loss_ce": 2.904361826949753e-05, + "loss_iou": 0.498046875, + "loss_num": 0.02001953125, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 957409972, + "step": 5579 + }, + { + "epoch": 63.34844192634561, + "grad_norm": 3.0014446453364187, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 957581456, + "step": 5580 + }, + { + "epoch": 63.34844192634561, + "loss": 0.13484829664230347, + "loss_ce": 5.214768316363916e-05, + "loss_iou": 0.271484375, + "loss_num": 0.02685546875, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 957581456, + "step": 5580 + }, + { + "epoch": 63.359773371104815, + "grad_norm": 4.104451018042735, + "learning_rate": 5e-06, + "loss": 0.0415, + "num_input_tokens_seen": 957752524, + "step": 5581 + }, + { + "epoch": 63.359773371104815, + "loss": 0.03547465801239014, + "loss_ce": 2.8488504540291615e-05, + "loss_iou": 0.34375, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 957752524, + "step": 5581 + }, + { + "epoch": 63.37110481586402, + "grad_norm": 3.572085229707181, + "learning_rate": 5e-06, + "loss": 0.0747, + "num_input_tokens_seen": 957923436, + "step": 5582 + }, + { + "epoch": 63.37110481586402, + "loss": 0.04619760066270828, + "loss_ce": 2.450751344440505e-05, + "loss_iou": 0.3359375, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 957923436, + "step": 5582 + }, + { + "epoch": 63.38243626062323, + "grad_norm": 4.316242948746455, + "learning_rate": 5e-06, + "loss": 0.0704, + "num_input_tokens_seen": 958095076, + "step": 5583 + }, + { + "epoch": 63.38243626062323, + "loss": 0.077049620449543, + "loss_ce": 3.088476660195738e-05, + "loss_iou": 0.30859375, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 958095076, + "step": 5583 + }, + { + "epoch": 63.39376770538244, + "grad_norm": 3.1183898338672473, + "learning_rate": 5e-06, + "loss": 0.0513, + "num_input_tokens_seen": 958265652, + "step": 5584 + }, + { + "epoch": 63.39376770538244, + "loss": 0.026530340313911438, + "loss_ce": 5.6342134485021234e-05, + "loss_iou": 0.4453125, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 958265652, + "step": 5584 + }, + { + "epoch": 63.405099150141645, + "grad_norm": 3.245622560958497, + "learning_rate": 5e-06, + "loss": 0.045, + "num_input_tokens_seen": 958436604, + "step": 5585 + }, + { + "epoch": 63.405099150141645, + "loss": 0.03134666383266449, + "loss_ce": 2.0372652215883136e-05, + "loss_iou": 0.5859375, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 958436604, + "step": 5585 + }, + { + "epoch": 63.41643059490085, + "grad_norm": 3.511396698237713, + "learning_rate": 5e-06, + "loss": 0.0763, + "num_input_tokens_seen": 958608500, + "step": 5586 + }, + { + "epoch": 63.41643059490085, + "loss": 0.09213487803936005, + "loss_ce": 3.281996032455936e-05, + "loss_iou": 0.474609375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 958608500, + "step": 5586 + }, + { + "epoch": 63.42776203966006, + "grad_norm": 2.7535135754958016, + "learning_rate": 5e-06, + "loss": 0.0327, + "num_input_tokens_seen": 958780004, + "step": 5587 + }, + { + "epoch": 63.42776203966006, + "loss": 0.024539779871702194, + "loss_ce": 3.416433901293203e-05, + "loss_iou": 0.359375, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 958780004, + "step": 5587 + }, + { + "epoch": 63.43909348441926, + "grad_norm": 2.2488181785794845, + "learning_rate": 5e-06, + "loss": 0.0345, + "num_input_tokens_seen": 958951728, + "step": 5588 + }, + { + "epoch": 63.43909348441926, + "loss": 0.03219490498304367, + "loss_ce": 1.411808352713706e-05, + "loss_iou": 0.4765625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 958951728, + "step": 5588 + }, + { + "epoch": 63.45042492917847, + "grad_norm": 4.044006306081819, + "learning_rate": 5e-06, + "loss": 0.0566, + "num_input_tokens_seen": 959123764, + "step": 5589 + }, + { + "epoch": 63.45042492917847, + "loss": 0.027514085173606873, + "loss_ce": 0.0017343595391139388, + "loss_iou": 0.2890625, + "loss_num": 0.005157470703125, + "loss_xval": 0.0257568359375, + "num_input_tokens_seen": 959123764, + "step": 5589 + }, + { + "epoch": 63.461756373937675, + "grad_norm": 2.0575514676075994, + "learning_rate": 5e-06, + "loss": 0.0407, + "num_input_tokens_seen": 959294788, + "step": 5590 + }, + { + "epoch": 63.461756373937675, + "loss": 0.04627084732055664, + "loss_ce": 2.1456908143591136e-05, + "loss_iou": 0.41015625, + "loss_num": 0.00927734375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 959294788, + "step": 5590 + }, + { + "epoch": 63.47308781869688, + "grad_norm": 2.3870953048577026, + "learning_rate": 5e-06, + "loss": 0.067, + "num_input_tokens_seen": 959466308, + "step": 5591 + }, + { + "epoch": 63.47308781869688, + "loss": 0.1199498176574707, + "loss_ce": 7.677252870053053e-05, + "loss_iou": 0.234375, + "loss_num": 0.02392578125, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 959466308, + "step": 5591 + }, + { + "epoch": 63.48441926345609, + "grad_norm": 4.139935714053537, + "learning_rate": 5e-06, + "loss": 0.0628, + "num_input_tokens_seen": 959637912, + "step": 5592 + }, + { + "epoch": 63.48441926345609, + "loss": 0.05224212259054184, + "loss_ce": 2.6541882107267156e-05, + "loss_iou": 0.28515625, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 959637912, + "step": 5592 + }, + { + "epoch": 63.4957507082153, + "grad_norm": 4.056118180792055, + "learning_rate": 5e-06, + "loss": 0.0602, + "num_input_tokens_seen": 959809196, + "step": 5593 + }, + { + "epoch": 63.4957507082153, + "loss": 0.03815827891230583, + "loss_ce": 2.6566141968942247e-05, + "loss_iou": 0.55078125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 959809196, + "step": 5593 + }, + { + "epoch": 63.507082152974505, + "grad_norm": 3.423991928067274, + "learning_rate": 5e-06, + "loss": 0.0585, + "num_input_tokens_seen": 959978684, + "step": 5594 + }, + { + "epoch": 63.507082152974505, + "loss": 0.04080747812986374, + "loss_ce": 0.00018858144176192582, + "loss_iou": 0.392578125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 959978684, + "step": 5594 + }, + { + "epoch": 63.51841359773371, + "grad_norm": 3.04059163856433, + "learning_rate": 5e-06, + "loss": 0.0493, + "num_input_tokens_seen": 960150596, + "step": 5595 + }, + { + "epoch": 63.51841359773371, + "loss": 0.06325870752334595, + "loss_ce": 7.206252485048026e-05, + "loss_iou": 0.38671875, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 960150596, + "step": 5595 + }, + { + "epoch": 63.52974504249292, + "grad_norm": 3.2057911955594505, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 960322312, + "step": 5596 + }, + { + "epoch": 63.52974504249292, + "loss": 0.03003763034939766, + "loss_ce": 3.885219484800473e-05, + "loss_iou": 0.051513671875, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 960322312, + "step": 5596 + }, + { + "epoch": 63.54107648725213, + "grad_norm": 3.0136179664321525, + "learning_rate": 5e-06, + "loss": 0.0544, + "num_input_tokens_seen": 960494360, + "step": 5597 + }, + { + "epoch": 63.54107648725213, + "loss": 0.04219797998666763, + "loss_ce": 2.268829121021554e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 960494360, + "step": 5597 + }, + { + "epoch": 63.552407932011334, + "grad_norm": 3.304524073190295, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 960665812, + "step": 5598 + }, + { + "epoch": 63.552407932011334, + "loss": 0.05926670506596565, + "loss_ce": 0.00016941546346060932, + "loss_iou": 0.388671875, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 960665812, + "step": 5598 + }, + { + "epoch": 63.563739376770535, + "grad_norm": 3.08255476666748, + "learning_rate": 5e-06, + "loss": 0.0643, + "num_input_tokens_seen": 960836720, + "step": 5599 + }, + { + "epoch": 63.563739376770535, + "loss": 0.09824319183826447, + "loss_ce": 3.7625391996698454e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 960836720, + "step": 5599 + }, + { + "epoch": 63.57507082152974, + "grad_norm": 3.0751660740568574, + "learning_rate": 5e-06, + "loss": 0.0507, + "num_input_tokens_seen": 961007856, + "step": 5600 + }, + { + "epoch": 63.57507082152974, + "loss": 0.03729415684938431, + "loss_ce": 3.2191150239668787e-05, + "loss_iou": 0.169921875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 961007856, + "step": 5600 + }, + { + "epoch": 63.58640226628895, + "grad_norm": 3.238049387634643, + "learning_rate": 5e-06, + "loss": 0.0716, + "num_input_tokens_seen": 961179468, + "step": 5601 + }, + { + "epoch": 63.58640226628895, + "loss": 0.1232166439294815, + "loss_ce": 6.296264473348856e-05, + "loss_iou": 0.375, + "loss_num": 0.024658203125, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 961179468, + "step": 5601 + }, + { + "epoch": 63.59773371104816, + "grad_norm": 3.559327683167962, + "learning_rate": 5e-06, + "loss": 0.0604, + "num_input_tokens_seen": 961349880, + "step": 5602 + }, + { + "epoch": 63.59773371104816, + "loss": 0.0860898345708847, + "loss_ce": 6.0780505009461194e-05, + "loss_iou": 0.50390625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 961349880, + "step": 5602 + }, + { + "epoch": 63.609065155807365, + "grad_norm": 3.319702796243184, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 961521876, + "step": 5603 + }, + { + "epoch": 63.609065155807365, + "loss": 0.035224370658397675, + "loss_ce": 6.81184756103903e-05, + "loss_iou": 0.2890625, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 961521876, + "step": 5603 + }, + { + "epoch": 63.62039660056657, + "grad_norm": 4.349612899861339, + "learning_rate": 5e-06, + "loss": 0.0483, + "num_input_tokens_seen": 961693552, + "step": 5604 + }, + { + "epoch": 63.62039660056657, + "loss": 0.0656881183385849, + "loss_ce": 0.00013636643416248262, + "loss_iou": 0.29296875, + "loss_num": 0.01312255859375, + "loss_xval": 0.0654296875, + "num_input_tokens_seen": 961693552, + "step": 5604 + }, + { + "epoch": 63.63172804532578, + "grad_norm": 3.4759568008340995, + "learning_rate": 5e-06, + "loss": 0.0659, + "num_input_tokens_seen": 961864420, + "step": 5605 + }, + { + "epoch": 63.63172804532578, + "loss": 0.091413713991642, + "loss_ce": 1.356533721263986e-05, + "loss_iou": 0.314453125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 961864420, + "step": 5605 + }, + { + "epoch": 63.64305949008499, + "grad_norm": 3.291281051661945, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 962036372, + "step": 5606 + }, + { + "epoch": 63.64305949008499, + "loss": 0.04150788113474846, + "loss_ce": 1.9234055798733607e-05, + "loss_iou": 0.408203125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 962036372, + "step": 5606 + }, + { + "epoch": 63.654390934844194, + "grad_norm": 3.307902196414728, + "learning_rate": 5e-06, + "loss": 0.0637, + "num_input_tokens_seen": 962208460, + "step": 5607 + }, + { + "epoch": 63.654390934844194, + "loss": 0.05206012725830078, + "loss_ce": 5.817593773826957e-05, + "loss_iou": 0.486328125, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 962208460, + "step": 5607 + }, + { + "epoch": 63.6657223796034, + "grad_norm": 3.84998803033237, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 962379968, + "step": 5608 + }, + { + "epoch": 63.6657223796034, + "loss": 0.0449778214097023, + "loss_ce": 4.06864273827523e-05, + "loss_iou": 0.39453125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 962379968, + "step": 5608 + }, + { + "epoch": 63.67705382436261, + "grad_norm": 3.390590423753857, + "learning_rate": 5e-06, + "loss": 0.0486, + "num_input_tokens_seen": 962550852, + "step": 5609 + }, + { + "epoch": 63.67705382436261, + "loss": 0.044905029237270355, + "loss_ce": 5.944907752564177e-05, + "loss_iou": 0.431640625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 962550852, + "step": 5609 + }, + { + "epoch": 63.68838526912181, + "grad_norm": 3.242737635489924, + "learning_rate": 5e-06, + "loss": 0.054, + "num_input_tokens_seen": 962721656, + "step": 5610 + }, + { + "epoch": 63.68838526912181, + "loss": 0.03350004553794861, + "loss_ce": 5.2782394050154835e-05, + "loss_iou": 0.2177734375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 962721656, + "step": 5610 + }, + { + "epoch": 63.69971671388102, + "grad_norm": 3.5341141303856554, + "learning_rate": 5e-06, + "loss": 0.0312, + "num_input_tokens_seen": 962893592, + "step": 5611 + }, + { + "epoch": 63.69971671388102, + "loss": 0.02900099754333496, + "loss_ce": 7.033486326690763e-05, + "loss_iou": 0.337890625, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 962893592, + "step": 5611 + }, + { + "epoch": 63.711048158640224, + "grad_norm": 3.648832531736882, + "learning_rate": 5e-06, + "loss": 0.067, + "num_input_tokens_seen": 963065260, + "step": 5612 + }, + { + "epoch": 63.711048158640224, + "loss": 0.03863013535737991, + "loss_ce": 2.5400549930054694e-05, + "loss_iou": 0.431640625, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 963065260, + "step": 5612 + }, + { + "epoch": 63.72237960339943, + "grad_norm": 3.963323910051783, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 963236236, + "step": 5613 + }, + { + "epoch": 63.72237960339943, + "loss": 0.08686865866184235, + "loss_ce": 1.56275782501325e-05, + "loss_iou": 0.2421875, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 963236236, + "step": 5613 + }, + { + "epoch": 63.73371104815864, + "grad_norm": 3.0671449103716277, + "learning_rate": 5e-06, + "loss": 0.0558, + "num_input_tokens_seen": 963406420, + "step": 5614 + }, + { + "epoch": 63.73371104815864, + "loss": 0.04987305402755737, + "loss_ce": 3.7846475606784225e-05, + "loss_iou": 0.201171875, + "loss_num": 0.00994873046875, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 963406420, + "step": 5614 + }, + { + "epoch": 63.74504249291785, + "grad_norm": 2.853354158370971, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 963578108, + "step": 5615 + }, + { + "epoch": 63.74504249291785, + "loss": 0.03520198538899422, + "loss_ce": 3.047668724320829e-05, + "loss_iou": 0.29296875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 963578108, + "step": 5615 + }, + { + "epoch": 63.756373937677054, + "grad_norm": 7.058838399571512, + "learning_rate": 5e-06, + "loss": 0.0724, + "num_input_tokens_seen": 963749856, + "step": 5616 + }, + { + "epoch": 63.756373937677054, + "loss": 0.13913656771183014, + "loss_ce": 3.744324931176379e-05, + "loss_iou": 0.421875, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 963749856, + "step": 5616 + }, + { + "epoch": 63.76770538243626, + "grad_norm": 3.105532113430941, + "learning_rate": 5e-06, + "loss": 0.0349, + "num_input_tokens_seen": 963921884, + "step": 5617 + }, + { + "epoch": 63.76770538243626, + "loss": 0.04394109547138214, + "loss_ce": 1.1040867320843972e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 963921884, + "step": 5617 + }, + { + "epoch": 63.77903682719547, + "grad_norm": 4.326486056073228, + "learning_rate": 5e-06, + "loss": 0.047, + "num_input_tokens_seen": 964093564, + "step": 5618 + }, + { + "epoch": 63.77903682719547, + "loss": 0.03333599492907524, + "loss_ce": 2.605864210636355e-05, + "loss_iou": 0.142578125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 964093564, + "step": 5618 + }, + { + "epoch": 63.79036827195468, + "grad_norm": 3.1073288459533672, + "learning_rate": 5e-06, + "loss": 0.0554, + "num_input_tokens_seen": 964265312, + "step": 5619 + }, + { + "epoch": 63.79036827195468, + "loss": 0.08995333313941956, + "loss_ce": 0.00010958635539282113, + "loss_iou": 0.3515625, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 964265312, + "step": 5619 + }, + { + "epoch": 63.801699716713884, + "grad_norm": 2.665485201457284, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 964433932, + "step": 5620 + }, + { + "epoch": 63.801699716713884, + "loss": 0.03465600311756134, + "loss_ce": 4.90689417347312e-05, + "loss_iou": 0.5390625, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 964433932, + "step": 5620 + }, + { + "epoch": 63.81303116147309, + "grad_norm": 3.001507436378676, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 964605876, + "step": 5621 + }, + { + "epoch": 63.81303116147309, + "loss": 0.028206557035446167, + "loss_ce": 3.883427052642219e-05, + "loss_iou": 0.341796875, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 964605876, + "step": 5621 + }, + { + "epoch": 63.82436260623229, + "grad_norm": 3.592585108157437, + "learning_rate": 5e-06, + "loss": 0.0531, + "num_input_tokens_seen": 964776820, + "step": 5622 + }, + { + "epoch": 63.82436260623229, + "loss": 0.06478553265333176, + "loss_ce": 5.774579040007666e-05, + "loss_iou": 0.408203125, + "loss_num": 0.012939453125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 964776820, + "step": 5622 + }, + { + "epoch": 63.8356940509915, + "grad_norm": 3.322501747688564, + "learning_rate": 5e-06, + "loss": 0.0622, + "num_input_tokens_seen": 964947572, + "step": 5623 + }, + { + "epoch": 63.8356940509915, + "loss": 0.10199524462223053, + "loss_ce": 3.6022509448230267e-05, + "loss_iou": 0.5078125, + "loss_num": 0.0203857421875, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 964947572, + "step": 5623 + }, + { + "epoch": 63.84702549575071, + "grad_norm": 2.790117742190208, + "learning_rate": 5e-06, + "loss": 0.0684, + "num_input_tokens_seen": 965119232, + "step": 5624 + }, + { + "epoch": 63.84702549575071, + "loss": 0.03153211623430252, + "loss_ce": 5.323695586412214e-05, + "loss_iou": 0.4296875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 965119232, + "step": 5624 + }, + { + "epoch": 63.858356940509914, + "grad_norm": 3.495267003240254, + "learning_rate": 5e-06, + "loss": 0.0421, + "num_input_tokens_seen": 965291060, + "step": 5625 + }, + { + "epoch": 63.858356940509914, + "loss": 0.0597134605050087, + "loss_ce": 4.3967294914182276e-05, + "loss_iou": 0.365234375, + "loss_num": 0.01190185546875, + "loss_xval": 0.0595703125, + "num_input_tokens_seen": 965291060, + "step": 5625 + }, + { + "epoch": 63.86968838526912, + "grad_norm": 3.5538715811434285, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 965461892, + "step": 5626 + }, + { + "epoch": 63.86968838526912, + "loss": 0.04024675861001015, + "loss_ce": 2.4590355678810738e-05, + "loss_iou": 0.328125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 965461892, + "step": 5626 + }, + { + "epoch": 63.88101983002833, + "grad_norm": 3.342196276751497, + "learning_rate": 5e-06, + "loss": 0.0456, + "num_input_tokens_seen": 965633768, + "step": 5627 + }, + { + "epoch": 63.88101983002833, + "loss": 0.04424550384283066, + "loss_ce": 5.605052865575999e-05, + "loss_iou": 0.4296875, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 965633768, + "step": 5627 + }, + { + "epoch": 63.892351274787536, + "grad_norm": 3.9672739991304624, + "learning_rate": 5e-06, + "loss": 0.0569, + "num_input_tokens_seen": 965804404, + "step": 5628 + }, + { + "epoch": 63.892351274787536, + "loss": 0.055883850902318954, + "loss_ce": 2.1421208657557145e-05, + "loss_iou": 0.482421875, + "loss_num": 0.01116943359375, + "loss_xval": 0.055908203125, + "num_input_tokens_seen": 965804404, + "step": 5628 + }, + { + "epoch": 63.903682719546744, + "grad_norm": 3.8018861919290634, + "learning_rate": 5e-06, + "loss": 0.0601, + "num_input_tokens_seen": 965974528, + "step": 5629 + }, + { + "epoch": 63.903682719546744, + "loss": 0.07362597435712814, + "loss_ce": 4.8089077608892694e-05, + "loss_iou": 0.412109375, + "loss_num": 0.01470947265625, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 965974528, + "step": 5629 + }, + { + "epoch": 63.91501416430595, + "grad_norm": 3.8577425551825084, + "learning_rate": 5e-06, + "loss": 0.0451, + "num_input_tokens_seen": 966146368, + "step": 5630 + }, + { + "epoch": 63.91501416430595, + "loss": 0.057474687695503235, + "loss_ce": 4.0604638343211263e-05, + "loss_iou": 0.5703125, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 966146368, + "step": 5630 + }, + { + "epoch": 63.92634560906516, + "grad_norm": 3.0837934175541104, + "learning_rate": 5e-06, + "loss": 0.061, + "num_input_tokens_seen": 966318108, + "step": 5631 + }, + { + "epoch": 63.92634560906516, + "loss": 0.08532983064651489, + "loss_ce": 4.8456768126925454e-05, + "loss_iou": 0.47265625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 966318108, + "step": 5631 + }, + { + "epoch": 63.93767705382436, + "grad_norm": 2.5832236290080757, + "learning_rate": 5e-06, + "loss": 0.0529, + "num_input_tokens_seen": 966488820, + "step": 5632 + }, + { + "epoch": 63.93767705382436, + "loss": 0.03581484034657478, + "loss_ce": 3.2980107789626345e-05, + "loss_iou": 0.4765625, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 966488820, + "step": 5632 + }, + { + "epoch": 63.94900849858357, + "grad_norm": 3.659230430175558, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 966660548, + "step": 5633 + }, + { + "epoch": 63.94900849858357, + "loss": 0.029485203325748444, + "loss_ce": 3.574071160983294e-05, + "loss_iou": 0.294921875, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 966660548, + "step": 5633 + }, + { + "epoch": 63.960339943342774, + "grad_norm": 3.420272409614578, + "learning_rate": 5e-06, + "loss": 0.0525, + "num_input_tokens_seen": 966832440, + "step": 5634 + }, + { + "epoch": 63.960339943342774, + "loss": 0.05185551568865776, + "loss_ce": 2.1407164240372367e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 966832440, + "step": 5634 + }, + { + "epoch": 63.97167138810198, + "grad_norm": 3.0803276120719647, + "learning_rate": 5e-06, + "loss": 0.0508, + "num_input_tokens_seen": 967004168, + "step": 5635 + }, + { + "epoch": 63.97167138810198, + "loss": 0.040978141129016876, + "loss_ce": 3.881048542098142e-05, + "loss_iou": 0.462890625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 967004168, + "step": 5635 + }, + { + "epoch": 63.98300283286119, + "grad_norm": 3.1702988538022328, + "learning_rate": 5e-06, + "loss": 0.0428, + "num_input_tokens_seen": 967175868, + "step": 5636 + }, + { + "epoch": 63.98300283286119, + "loss": 0.029574017971754074, + "loss_ce": 0.0001092967067961581, + "loss_iou": 0.30859375, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 967175868, + "step": 5636 + }, + { + "epoch": 63.994334277620396, + "grad_norm": 3.194526194787991, + "learning_rate": 5e-06, + "loss": 0.0405, + "num_input_tokens_seen": 967345240, + "step": 5637 + }, + { + "epoch": 63.994334277620396, + "loss": 0.03083980828523636, + "loss_ce": 4.757327405968681e-05, + "loss_iou": 0.345703125, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 967345240, + "step": 5637 + }, + { + "epoch": 63.994334277620396, + "loss": 0.06418736279010773, + "loss_ce": 0.00016148097347468138, + "loss_iou": 0.443359375, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 967431080, + "step": 5637 + }, + { + "epoch": 64.0056657223796, + "grad_norm": 3.4043921666610912, + "learning_rate": 5e-06, + "loss": 0.0534, + "num_input_tokens_seen": 967517088, + "step": 5638 + }, + { + "epoch": 64.0056657223796, + "loss": 0.05650923401117325, + "loss_ce": 0.00011274752614554018, + "loss_iou": 0.462890625, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 967517088, + "step": 5638 + }, + { + "epoch": 64.01699716713881, + "grad_norm": 3.3673918855447624, + "learning_rate": 5e-06, + "loss": 0.0451, + "num_input_tokens_seen": 967688416, + "step": 5639 + }, + { + "epoch": 64.01699716713881, + "loss": 0.04392354190349579, + "loss_ce": 3.926271165255457e-05, + "loss_iou": 0.0, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 967688416, + "step": 5639 + }, + { + "epoch": 64.02832861189802, + "grad_norm": 2.7462258795519543, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 967857656, + "step": 5640 + }, + { + "epoch": 64.02832861189802, + "loss": 0.053232863545417786, + "loss_ce": 4.072372757946141e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 967857656, + "step": 5640 + }, + { + "epoch": 64.03966005665723, + "grad_norm": 3.321336390282397, + "learning_rate": 5e-06, + "loss": 0.0526, + "num_input_tokens_seen": 968029768, + "step": 5641 + }, + { + "epoch": 64.03966005665723, + "loss": 0.0696585476398468, + "loss_ce": 1.74314554897137e-05, + "loss_iou": 0.427734375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 968029768, + "step": 5641 + }, + { + "epoch": 64.05099150141643, + "grad_norm": 3.45511423659337, + "learning_rate": 5e-06, + "loss": 0.0429, + "num_input_tokens_seen": 968201456, + "step": 5642 + }, + { + "epoch": 64.05099150141643, + "loss": 0.04316634684801102, + "loss_ce": 1.4490167814074084e-05, + "loss_iou": 0.447265625, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 968201456, + "step": 5642 + }, + { + "epoch": 64.06232294617564, + "grad_norm": 3.660624579416374, + "learning_rate": 5e-06, + "loss": 0.0486, + "num_input_tokens_seen": 968373120, + "step": 5643 + }, + { + "epoch": 64.06232294617564, + "loss": 0.05655057728290558, + "loss_ce": 1.6763298845035024e-05, + "loss_iou": 0.6171875, + "loss_num": 0.01129150390625, + "loss_xval": 0.056640625, + "num_input_tokens_seen": 968373120, + "step": 5643 + }, + { + "epoch": 64.07365439093485, + "grad_norm": 3.1247914860741974, + "learning_rate": 5e-06, + "loss": 0.0382, + "num_input_tokens_seen": 968542272, + "step": 5644 + }, + { + "epoch": 64.07365439093485, + "loss": 0.03748254477977753, + "loss_ce": 2.22196031245403e-05, + "loss_iou": 0.33203125, + "loss_num": 0.00750732421875, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 968542272, + "step": 5644 + }, + { + "epoch": 64.08498583569406, + "grad_norm": 2.7439084129009568, + "learning_rate": 5e-06, + "loss": 0.059, + "num_input_tokens_seen": 968712796, + "step": 5645 + }, + { + "epoch": 64.08498583569406, + "loss": 0.0821637213230133, + "loss_ce": 2.5654127966845408e-05, + "loss_iou": 0.30078125, + "loss_num": 0.016357421875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 968712796, + "step": 5645 + }, + { + "epoch": 64.09631728045326, + "grad_norm": 2.269957941464663, + "learning_rate": 5e-06, + "loss": 0.0283, + "num_input_tokens_seen": 968884624, + "step": 5646 + }, + { + "epoch": 64.09631728045326, + "loss": 0.023347310721874237, + "loss_ce": 2.4251186914625578e-05, + "loss_iou": 0.248046875, + "loss_num": 0.004669189453125, + "loss_xval": 0.0233154296875, + "num_input_tokens_seen": 968884624, + "step": 5646 + }, + { + "epoch": 64.10764872521247, + "grad_norm": 2.556019841203874, + "learning_rate": 5e-06, + "loss": 0.0423, + "num_input_tokens_seen": 969056416, + "step": 5647 + }, + { + "epoch": 64.10764872521247, + "loss": 0.03283655643463135, + "loss_ce": 1.4902190741850063e-05, + "loss_iou": 0.333984375, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 969056416, + "step": 5647 + }, + { + "epoch": 64.11898016997168, + "grad_norm": 2.652508352726603, + "learning_rate": 5e-06, + "loss": 0.0368, + "num_input_tokens_seen": 969227468, + "step": 5648 + }, + { + "epoch": 64.11898016997168, + "loss": 0.0322101004421711, + "loss_ce": 2.9315209758351557e-05, + "loss_iou": 0.455078125, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 969227468, + "step": 5648 + }, + { + "epoch": 64.13031161473087, + "grad_norm": 3.314436439791771, + "learning_rate": 5e-06, + "loss": 0.0783, + "num_input_tokens_seen": 969397608, + "step": 5649 + }, + { + "epoch": 64.13031161473087, + "loss": 0.09146112203598022, + "loss_ce": 4.571592580759898e-05, + "loss_iou": 0.3515625, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 969397608, + "step": 5649 + }, + { + "epoch": 64.14164305949008, + "grad_norm": 3.292731792346954, + "learning_rate": 5e-06, + "loss": 0.0533, + "num_input_tokens_seen": 969569360, + "step": 5650 + }, + { + "epoch": 64.14164305949008, + "loss": 0.04446880519390106, + "loss_ce": 5.0470534915803e-05, + "loss_iou": 0.44140625, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 969569360, + "step": 5650 + }, + { + "epoch": 64.15297450424929, + "grad_norm": 3.1997910092185937, + "learning_rate": 5e-06, + "loss": 0.0566, + "num_input_tokens_seen": 969740256, + "step": 5651 + }, + { + "epoch": 64.15297450424929, + "loss": 0.040051646530628204, + "loss_ce": 1.2582495401147753e-05, + "loss_iou": 0.32421875, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 969740256, + "step": 5651 + }, + { + "epoch": 64.1643059490085, + "grad_norm": 2.833640324178782, + "learning_rate": 5e-06, + "loss": 0.0541, + "num_input_tokens_seen": 969911480, + "step": 5652 + }, + { + "epoch": 64.1643059490085, + "loss": 0.05397683382034302, + "loss_ce": 3.701582318171859e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0107421875, + "loss_xval": 0.053955078125, + "num_input_tokens_seen": 969911480, + "step": 5652 + }, + { + "epoch": 64.1756373937677, + "grad_norm": 3.052946854182395, + "learning_rate": 5e-06, + "loss": 0.0568, + "num_input_tokens_seen": 970083080, + "step": 5653 + }, + { + "epoch": 64.1756373937677, + "loss": 0.07143557071685791, + "loss_ce": 5.496014273376204e-05, + "loss_iou": 0.328125, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 970083080, + "step": 5653 + }, + { + "epoch": 64.18696883852691, + "grad_norm": 3.574714947392389, + "learning_rate": 5e-06, + "loss": 0.0478, + "num_input_tokens_seen": 970255200, + "step": 5654 + }, + { + "epoch": 64.18696883852691, + "loss": 0.055493105202913284, + "loss_ce": 2.740791933319997e-05, + "loss_iou": 0.26953125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 970255200, + "step": 5654 + }, + { + "epoch": 64.19830028328612, + "grad_norm": 8.874588734579559, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 970426736, + "step": 5655 + }, + { + "epoch": 64.19830028328612, + "loss": 0.0505923330783844, + "loss_ce": 3.996703162556514e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 970426736, + "step": 5655 + }, + { + "epoch": 64.20963172804532, + "grad_norm": 4.4667537098046886, + "learning_rate": 5e-06, + "loss": 0.0517, + "num_input_tokens_seen": 970598584, + "step": 5656 + }, + { + "epoch": 64.20963172804532, + "loss": 0.0637560784816742, + "loss_ce": 4.300387081457302e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 970598584, + "step": 5656 + }, + { + "epoch": 64.22096317280453, + "grad_norm": 3.9314811588230287, + "learning_rate": 5e-06, + "loss": 0.0407, + "num_input_tokens_seen": 970770300, + "step": 5657 + }, + { + "epoch": 64.22096317280453, + "loss": 0.029440617188811302, + "loss_ce": 3.6931080103386194e-05, + "loss_iou": 0.52734375, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 970770300, + "step": 5657 + }, + { + "epoch": 64.23229461756374, + "grad_norm": 4.7000634688927345, + "learning_rate": 5e-06, + "loss": 0.0475, + "num_input_tokens_seen": 970941796, + "step": 5658 + }, + { + "epoch": 64.23229461756374, + "loss": 0.04870712012052536, + "loss_ce": 3.158202889608219e-05, + "loss_iou": 0.296875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 970941796, + "step": 5658 + }, + { + "epoch": 64.24362606232295, + "grad_norm": 3.3144629295899684, + "learning_rate": 5e-06, + "loss": 0.0545, + "num_input_tokens_seen": 971112680, + "step": 5659 + }, + { + "epoch": 64.24362606232295, + "loss": 0.03153107315301895, + "loss_ce": 2.1673440642189234e-05, + "loss_iou": 0.55859375, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 971112680, + "step": 5659 + }, + { + "epoch": 64.25495750708215, + "grad_norm": 3.7107457610403656, + "learning_rate": 5e-06, + "loss": 0.0372, + "num_input_tokens_seen": 971283676, + "step": 5660 + }, + { + "epoch": 64.25495750708215, + "loss": 0.03189505636692047, + "loss_ce": 1.9446108126430772e-05, + "loss_iou": 0.400390625, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 971283676, + "step": 5660 + }, + { + "epoch": 64.26628895184136, + "grad_norm": 3.3812816437690736, + "learning_rate": 5e-06, + "loss": 0.0496, + "num_input_tokens_seen": 971455240, + "step": 5661 + }, + { + "epoch": 64.26628895184136, + "loss": 0.03896826505661011, + "loss_ce": 5.835268530063331e-05, + "loss_iou": 0.5390625, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 971455240, + "step": 5661 + }, + { + "epoch": 64.27762039660057, + "grad_norm": 3.5732360849154614, + "learning_rate": 5e-06, + "loss": 0.0574, + "num_input_tokens_seen": 971626224, + "step": 5662 + }, + { + "epoch": 64.27762039660057, + "loss": 0.05453784763813019, + "loss_ce": 1.8197564713773318e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0108642578125, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 971626224, + "step": 5662 + }, + { + "epoch": 64.28895184135978, + "grad_norm": 3.20346346620919, + "learning_rate": 5e-06, + "loss": 0.0514, + "num_input_tokens_seen": 971796696, + "step": 5663 + }, + { + "epoch": 64.28895184135978, + "loss": 0.027630165219306946, + "loss_ce": 4.2273662984371185e-05, + "loss_iou": 0.48046875, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 971796696, + "step": 5663 + }, + { + "epoch": 64.30028328611898, + "grad_norm": 3.9955071759580933, + "learning_rate": 5e-06, + "loss": 0.0416, + "num_input_tokens_seen": 971966764, + "step": 5664 + }, + { + "epoch": 64.30028328611898, + "loss": 0.04953522980213165, + "loss_ce": 5.097504617879167e-05, + "loss_iou": 0.419921875, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 971966764, + "step": 5664 + }, + { + "epoch": 64.31161473087819, + "grad_norm": 3.1604054124686374, + "learning_rate": 5e-06, + "loss": 0.0472, + "num_input_tokens_seen": 972138996, + "step": 5665 + }, + { + "epoch": 64.31161473087819, + "loss": 0.030261635780334473, + "loss_ce": 3.397395630599931e-05, + "loss_iou": 0.46484375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 972138996, + "step": 5665 + }, + { + "epoch": 64.3229461756374, + "grad_norm": 3.0790661118265463, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 972310740, + "step": 5666 + }, + { + "epoch": 64.3229461756374, + "loss": 0.036920733749866486, + "loss_ce": 2.4980761736514978e-05, + "loss_iou": 0.345703125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 972310740, + "step": 5666 + }, + { + "epoch": 64.3342776203966, + "grad_norm": 3.63271527234115, + "learning_rate": 5e-06, + "loss": 0.068, + "num_input_tokens_seen": 972482284, + "step": 5667 + }, + { + "epoch": 64.3342776203966, + "loss": 0.065069779753685, + "loss_ce": 3.6813093174714595e-05, + "loss_iou": 0.486328125, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 972482284, + "step": 5667 + }, + { + "epoch": 64.34560906515581, + "grad_norm": 5.398248023016888, + "learning_rate": 5e-06, + "loss": 0.0479, + "num_input_tokens_seen": 972653332, + "step": 5668 + }, + { + "epoch": 64.34560906515581, + "loss": 0.0391923226416111, + "loss_ce": 5.3527455747826025e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 972653332, + "step": 5668 + }, + { + "epoch": 64.35694050991502, + "grad_norm": 3.6619567478599806, + "learning_rate": 5e-06, + "loss": 0.0752, + "num_input_tokens_seen": 972825500, + "step": 5669 + }, + { + "epoch": 64.35694050991502, + "loss": 0.03753100335597992, + "loss_ce": 4.015903323306702e-05, + "loss_iou": 0.462890625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 972825500, + "step": 5669 + }, + { + "epoch": 64.36827195467423, + "grad_norm": 3.72495940807384, + "learning_rate": 5e-06, + "loss": 0.0419, + "num_input_tokens_seen": 972997324, + "step": 5670 + }, + { + "epoch": 64.36827195467423, + "loss": 0.033766455948352814, + "loss_ce": 2.9272661777213216e-05, + "loss_iou": 0.458984375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 972997324, + "step": 5670 + }, + { + "epoch": 64.37960339943342, + "grad_norm": 3.608396455096228, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 973167204, + "step": 5671 + }, + { + "epoch": 64.37960339943342, + "loss": 0.03851725906133652, + "loss_ce": 1.9336674085934646e-05, + "loss_iou": 0.388671875, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 973167204, + "step": 5671 + }, + { + "epoch": 64.39093484419263, + "grad_norm": 3.486497386678485, + "learning_rate": 5e-06, + "loss": 0.045, + "num_input_tokens_seen": 973338916, + "step": 5672 + }, + { + "epoch": 64.39093484419263, + "loss": 0.03376801311969757, + "loss_ce": 4.608709423337132e-05, + "loss_iou": 0.53125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 973338916, + "step": 5672 + }, + { + "epoch": 64.40226628895184, + "grad_norm": 3.539497225739517, + "learning_rate": 5e-06, + "loss": 0.0491, + "num_input_tokens_seen": 973508200, + "step": 5673 + }, + { + "epoch": 64.40226628895184, + "loss": 0.03594028577208519, + "loss_ce": 2.109771958203055e-05, + "loss_iou": 0.5234375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 973508200, + "step": 5673 + }, + { + "epoch": 64.41359773371104, + "grad_norm": 3.514136880019449, + "learning_rate": 5e-06, + "loss": 0.0476, + "num_input_tokens_seen": 973680328, + "step": 5674 + }, + { + "epoch": 64.41359773371104, + "loss": 0.030760731548070908, + "loss_ce": 0.0013112699380144477, + "loss_iou": 0.396484375, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 973680328, + "step": 5674 + }, + { + "epoch": 64.42492917847025, + "grad_norm": 3.686446307197577, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 973850304, + "step": 5675 + }, + { + "epoch": 64.42492917847025, + "loss": 0.03783392906188965, + "loss_ce": 2.2651593098999e-05, + "loss_iou": 0.6015625, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 973850304, + "step": 5675 + }, + { + "epoch": 64.43626062322946, + "grad_norm": 3.458810633124514, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 974022192, + "step": 5676 + }, + { + "epoch": 64.43626062322946, + "loss": 0.047905053943395615, + "loss_ce": 2.297363789693918e-05, + "loss_iou": 0.3515625, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 974022192, + "step": 5676 + }, + { + "epoch": 64.44759206798867, + "grad_norm": 3.559519439300565, + "learning_rate": 5e-06, + "loss": 0.089, + "num_input_tokens_seen": 974193944, + "step": 5677 + }, + { + "epoch": 64.44759206798867, + "loss": 0.1032581552863121, + "loss_ce": 1.71900937857572e-05, + "loss_iou": 0.50390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 974193944, + "step": 5677 + }, + { + "epoch": 64.45892351274787, + "grad_norm": 3.520029139844324, + "learning_rate": 5e-06, + "loss": 0.0547, + "num_input_tokens_seen": 974365960, + "step": 5678 + }, + { + "epoch": 64.45892351274787, + "loss": 0.049592651426792145, + "loss_ce": 4.736337359645404e-05, + "loss_iou": 0.5625, + "loss_num": 0.0098876953125, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 974365960, + "step": 5678 + }, + { + "epoch": 64.47025495750708, + "grad_norm": 3.7315271964070154, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 974537520, + "step": 5679 + }, + { + "epoch": 64.47025495750708, + "loss": 0.03711051493883133, + "loss_ce": 3.1655843486078084e-05, + "loss_iou": 0.470703125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 974537520, + "step": 5679 + }, + { + "epoch": 64.48158640226629, + "grad_norm": 4.648282599937621, + "learning_rate": 5e-06, + "loss": 0.0465, + "num_input_tokens_seen": 974707524, + "step": 5680 + }, + { + "epoch": 64.48158640226629, + "loss": 0.03814130276441574, + "loss_ce": 2.4845245206961408e-05, + "loss_iou": 0.423828125, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 974707524, + "step": 5680 + }, + { + "epoch": 64.4929178470255, + "grad_norm": 3.2678062672198256, + "learning_rate": 5e-06, + "loss": 0.0558, + "num_input_tokens_seen": 974879300, + "step": 5681 + }, + { + "epoch": 64.4929178470255, + "loss": 0.07701968401670456, + "loss_ce": 0.00013827778457198292, + "loss_iou": 0.45703125, + "loss_num": 0.015380859375, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 974879300, + "step": 5681 + }, + { + "epoch": 64.5042492917847, + "grad_norm": 3.2989228814855935, + "learning_rate": 5e-06, + "loss": 0.0342, + "num_input_tokens_seen": 975051216, + "step": 5682 + }, + { + "epoch": 64.5042492917847, + "loss": 0.029823914170265198, + "loss_ce": 3.875717084156349e-05, + "loss_iou": 0.400390625, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 975051216, + "step": 5682 + }, + { + "epoch": 64.51558073654391, + "grad_norm": 3.2641555983620685, + "learning_rate": 5e-06, + "loss": 0.0553, + "num_input_tokens_seen": 975221584, + "step": 5683 + }, + { + "epoch": 64.51558073654391, + "loss": 0.03420019894838333, + "loss_ce": 3.5772707633441314e-05, + "loss_iou": 0.46875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 975221584, + "step": 5683 + }, + { + "epoch": 64.52691218130312, + "grad_norm": 3.1877772493264906, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 975393476, + "step": 5684 + }, + { + "epoch": 64.52691218130312, + "loss": 0.058207154273986816, + "loss_ce": 7.117014320101589e-05, + "loss_iou": 0.40625, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 975393476, + "step": 5684 + }, + { + "epoch": 64.53824362606233, + "grad_norm": 3.915987478606102, + "learning_rate": 5e-06, + "loss": 0.0384, + "num_input_tokens_seen": 975565764, + "step": 5685 + }, + { + "epoch": 64.53824362606233, + "loss": 0.04126343876123428, + "loss_ce": 3.4191343729617074e-05, + "loss_iou": 0.421875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 975565764, + "step": 5685 + }, + { + "epoch": 64.54957507082153, + "grad_norm": 3.5377990606580147, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 975737520, + "step": 5686 + }, + { + "epoch": 64.54957507082153, + "loss": 0.035324983298778534, + "loss_ce": 7.718152482993901e-05, + "loss_iou": 0.5078125, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 975737520, + "step": 5686 + }, + { + "epoch": 64.56090651558074, + "grad_norm": 3.490277098444185, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 975908960, + "step": 5687 + }, + { + "epoch": 64.56090651558074, + "loss": 0.04415760934352875, + "loss_ce": 1.3927259715273976e-05, + "loss_iou": 0.474609375, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 975908960, + "step": 5687 + }, + { + "epoch": 64.57223796033995, + "grad_norm": 5.138649997461146, + "learning_rate": 5e-06, + "loss": 0.0801, + "num_input_tokens_seen": 976080856, + "step": 5688 + }, + { + "epoch": 64.57223796033995, + "loss": 0.03360157459974289, + "loss_ce": 1.6977472114376724e-05, + "loss_iou": 0.380859375, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 976080856, + "step": 5688 + }, + { + "epoch": 64.58356940509915, + "grad_norm": 2.928120706613792, + "learning_rate": 5e-06, + "loss": 0.0384, + "num_input_tokens_seen": 976251688, + "step": 5689 + }, + { + "epoch": 64.58356940509915, + "loss": 0.026877131313085556, + "loss_ce": 6.4029891291284e-06, + "loss_iou": 0.1015625, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 976251688, + "step": 5689 + }, + { + "epoch": 64.59490084985836, + "grad_norm": 2.6874185357359237, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 976422544, + "step": 5690 + }, + { + "epoch": 64.59490084985836, + "loss": 0.03900842368602753, + "loss_ce": 6.799708353355527e-05, + "loss_iou": 0.357421875, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 976422544, + "step": 5690 + }, + { + "epoch": 64.60623229461757, + "grad_norm": 3.1095773453465476, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 976594244, + "step": 5691 + }, + { + "epoch": 64.60623229461757, + "loss": 0.043078623712062836, + "loss_ce": 1.832183261285536e-05, + "loss_iou": 0.53125, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 976594244, + "step": 5691 + }, + { + "epoch": 64.61756373937678, + "grad_norm": 2.9965862760586988, + "learning_rate": 5e-06, + "loss": 0.0441, + "num_input_tokens_seen": 976766444, + "step": 5692 + }, + { + "epoch": 64.61756373937678, + "loss": 0.07904849946498871, + "loss_ce": 0.00011478344822535291, + "loss_iou": 0.4140625, + "loss_num": 0.0157470703125, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 976766444, + "step": 5692 + }, + { + "epoch": 64.62889518413597, + "grad_norm": 6.873534064891267, + "learning_rate": 5e-06, + "loss": 0.0529, + "num_input_tokens_seen": 976937280, + "step": 5693 + }, + { + "epoch": 64.62889518413597, + "loss": 0.05382588505744934, + "loss_ce": 5.391139711719006e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 976937280, + "step": 5693 + }, + { + "epoch": 64.64022662889518, + "grad_norm": 3.5559464002296726, + "learning_rate": 5e-06, + "loss": 0.0801, + "num_input_tokens_seen": 977105888, + "step": 5694 + }, + { + "epoch": 64.64022662889518, + "loss": 0.04109952971339226, + "loss_ce": 2.2868898668093607e-05, + "loss_iou": 0.279296875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 977105888, + "step": 5694 + }, + { + "epoch": 64.65155807365439, + "grad_norm": 4.000632100500444, + "learning_rate": 5e-06, + "loss": 0.0595, + "num_input_tokens_seen": 977277820, + "step": 5695 + }, + { + "epoch": 64.65155807365439, + "loss": 0.03450049087405205, + "loss_ce": 7.666256715310737e-05, + "loss_iou": 0.44921875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 977277820, + "step": 5695 + }, + { + "epoch": 64.66288951841359, + "grad_norm": 4.133296833399541, + "learning_rate": 5e-06, + "loss": 0.0544, + "num_input_tokens_seen": 977449952, + "step": 5696 + }, + { + "epoch": 64.66288951841359, + "loss": 0.08129125833511353, + "loss_ce": 3.820850542979315e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0162353515625, + "loss_xval": 0.0810546875, + "num_input_tokens_seen": 977449952, + "step": 5696 + }, + { + "epoch": 64.6742209631728, + "grad_norm": 4.627647126351046, + "learning_rate": 5e-06, + "loss": 0.0434, + "num_input_tokens_seen": 977621140, + "step": 5697 + }, + { + "epoch": 64.6742209631728, + "loss": 0.04074341058731079, + "loss_ce": 1.770470043993555e-05, + "loss_iou": 0.3125, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 977621140, + "step": 5697 + }, + { + "epoch": 64.68555240793201, + "grad_norm": 3.50543315635961, + "learning_rate": 5e-06, + "loss": 0.0481, + "num_input_tokens_seen": 977792744, + "step": 5698 + }, + { + "epoch": 64.68555240793201, + "loss": 0.0386078767478466, + "loss_ce": 6.417420809157193e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 977792744, + "step": 5698 + }, + { + "epoch": 64.69688385269122, + "grad_norm": 2.686637750687325, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 977963340, + "step": 5699 + }, + { + "epoch": 64.69688385269122, + "loss": 0.04813689738512039, + "loss_ce": 2.5937277314369567e-05, + "loss_iou": 0.4921875, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 977963340, + "step": 5699 + }, + { + "epoch": 64.70821529745042, + "grad_norm": 2.720560968972478, + "learning_rate": 5e-06, + "loss": 0.0566, + "num_input_tokens_seen": 978134064, + "step": 5700 + }, + { + "epoch": 64.70821529745042, + "loss": 0.044515762478113174, + "loss_ce": 3.6394052585819736e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 978134064, + "step": 5700 + }, + { + "epoch": 64.71954674220963, + "grad_norm": 51.96241600684382, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 978306100, + "step": 5701 + }, + { + "epoch": 64.71954674220963, + "loss": 0.028195591643452644, + "loss_ce": 2.786701952572912e-05, + "loss_iou": 0.1455078125, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 978306100, + "step": 5701 + }, + { + "epoch": 64.73087818696884, + "grad_norm": 3.0046149853037902, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 978477960, + "step": 5702 + }, + { + "epoch": 64.73087818696884, + "loss": 0.04333553463220596, + "loss_ce": 4.6350498450919986e-05, + "loss_iou": 0.390625, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 978477960, + "step": 5702 + }, + { + "epoch": 64.74220963172804, + "grad_norm": 6.296504067866645, + "learning_rate": 5e-06, + "loss": 0.0631, + "num_input_tokens_seen": 978649964, + "step": 5703 + }, + { + "epoch": 64.74220963172804, + "loss": 0.05528460443019867, + "loss_ce": 0.000993833295069635, + "loss_iou": 0.53125, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 978649964, + "step": 5703 + }, + { + "epoch": 64.75354107648725, + "grad_norm": 3.576862711492624, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 978821816, + "step": 5704 + }, + { + "epoch": 64.75354107648725, + "loss": 0.06209405139088631, + "loss_ce": 3.6554796679411083e-05, + "loss_iou": 0.408203125, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 978821816, + "step": 5704 + }, + { + "epoch": 64.76487252124646, + "grad_norm": 2.8915771054261503, + "learning_rate": 5e-06, + "loss": 0.043, + "num_input_tokens_seen": 978993128, + "step": 5705 + }, + { + "epoch": 64.76487252124646, + "loss": 0.05067801475524902, + "loss_ce": 3.409415512578562e-05, + "loss_iou": 0.357421875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 978993128, + "step": 5705 + }, + { + "epoch": 64.77620396600567, + "grad_norm": 3.3214370040191206, + "learning_rate": 5e-06, + "loss": 0.0531, + "num_input_tokens_seen": 979164708, + "step": 5706 + }, + { + "epoch": 64.77620396600567, + "loss": 0.05327049642801285, + "loss_ce": 3.257872594986111e-05, + "loss_iou": 0.30859375, + "loss_num": 0.01068115234375, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 979164708, + "step": 5706 + }, + { + "epoch": 64.78753541076487, + "grad_norm": 3.8183336050826213, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 979336676, + "step": 5707 + }, + { + "epoch": 64.78753541076487, + "loss": 0.03260376676917076, + "loss_ce": 2.6249990696669556e-05, + "loss_iou": 0.392578125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 979336676, + "step": 5707 + }, + { + "epoch": 64.79886685552408, + "grad_norm": 3.939627741630551, + "learning_rate": 5e-06, + "loss": 0.0652, + "num_input_tokens_seen": 979506688, + "step": 5708 + }, + { + "epoch": 64.79886685552408, + "loss": 0.040414802730083466, + "loss_ce": 2.4786560970824212e-05, + "loss_iou": 0.482421875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 979506688, + "step": 5708 + }, + { + "epoch": 64.81019830028329, + "grad_norm": 5.572343923519142, + "learning_rate": 5e-06, + "loss": 0.0545, + "num_input_tokens_seen": 979678420, + "step": 5709 + }, + { + "epoch": 64.81019830028329, + "loss": 0.08625856041908264, + "loss_ce": 4.6400498831644654e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0172119140625, + "loss_xval": 0.08642578125, + "num_input_tokens_seen": 979678420, + "step": 5709 + }, + { + "epoch": 64.8215297450425, + "grad_norm": 2.9141808544941203, + "learning_rate": 5e-06, + "loss": 0.0382, + "num_input_tokens_seen": 979849404, + "step": 5710 + }, + { + "epoch": 64.8215297450425, + "loss": 0.05351022630929947, + "loss_ce": 4.3428146454971284e-05, + "loss_iou": 0.388671875, + "loss_num": 0.01068115234375, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 979849404, + "step": 5710 + }, + { + "epoch": 64.8328611898017, + "grad_norm": 3.3921465956807886, + "learning_rate": 5e-06, + "loss": 0.034, + "num_input_tokens_seen": 980021316, + "step": 5711 + }, + { + "epoch": 64.8328611898017, + "loss": 0.04531057924032211, + "loss_ce": 5.300892371451482e-05, + "loss_iou": 0.130859375, + "loss_num": 0.009033203125, + "loss_xval": 0.045166015625, + "num_input_tokens_seen": 980021316, + "step": 5711 + }, + { + "epoch": 64.84419263456091, + "grad_norm": 3.6504093743421926, + "learning_rate": 5e-06, + "loss": 0.0608, + "num_input_tokens_seen": 980193032, + "step": 5712 + }, + { + "epoch": 64.84419263456091, + "loss": 0.13299715518951416, + "loss_ce": 4.7332356189144775e-05, + "loss_iou": 0.37890625, + "loss_num": 0.026611328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 980193032, + "step": 5712 + }, + { + "epoch": 64.85552407932012, + "grad_norm": 3.666979601813156, + "learning_rate": 5e-06, + "loss": 0.0464, + "num_input_tokens_seen": 980363416, + "step": 5713 + }, + { + "epoch": 64.85552407932012, + "loss": 0.03740091994404793, + "loss_ce": 1.6887715901248157e-05, + "loss_iou": 0.408203125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 980363416, + "step": 5713 + }, + { + "epoch": 64.86685552407933, + "grad_norm": 3.5495682185182016, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 980534876, + "step": 5714 + }, + { + "epoch": 64.86685552407933, + "loss": 0.037795357406139374, + "loss_ce": 2.9854807507945225e-05, + "loss_iou": 0.287109375, + "loss_num": 0.007537841796875, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 980534876, + "step": 5714 + }, + { + "epoch": 64.87818696883852, + "grad_norm": 2.8497783315438867, + "learning_rate": 5e-06, + "loss": 0.0622, + "num_input_tokens_seen": 980706424, + "step": 5715 + }, + { + "epoch": 64.87818696883852, + "loss": 0.031192386522889137, + "loss_ce": 4.9198362830793485e-05, + "loss_iou": 0.12353515625, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 980706424, + "step": 5715 + }, + { + "epoch": 64.88951841359773, + "grad_norm": 2.287255791980991, + "learning_rate": 5e-06, + "loss": 0.0708, + "num_input_tokens_seen": 980878004, + "step": 5716 + }, + { + "epoch": 64.88951841359773, + "loss": 0.04274478554725647, + "loss_ce": 2.0179269995423965e-05, + "loss_iou": 0.30859375, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 980878004, + "step": 5716 + }, + { + "epoch": 64.90084985835693, + "grad_norm": 4.263638228683565, + "learning_rate": 5e-06, + "loss": 0.0908, + "num_input_tokens_seen": 981049860, + "step": 5717 + }, + { + "epoch": 64.90084985835693, + "loss": 0.1389244645833969, + "loss_ce": 5.422658796305768e-05, + "loss_iou": 0.1689453125, + "loss_num": 0.02783203125, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 981049860, + "step": 5717 + }, + { + "epoch": 64.91218130311614, + "grad_norm": 3.803421017811128, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 981221956, + "step": 5718 + }, + { + "epoch": 64.91218130311614, + "loss": 0.04325833544135094, + "loss_ce": 4.544557668850757e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 981221956, + "step": 5718 + }, + { + "epoch": 64.92351274787535, + "grad_norm": 4.0302904918822575, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 981388288, + "step": 5719 + }, + { + "epoch": 64.92351274787535, + "loss": 0.0562330037355423, + "loss_ce": 9.591891284799203e-05, + "loss_iou": 0.64453125, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 981388288, + "step": 5719 + }, + { + "epoch": 64.93484419263456, + "grad_norm": 3.566501978859216, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 981560188, + "step": 5720 + }, + { + "epoch": 64.93484419263456, + "loss": 0.08308477699756622, + "loss_ce": 3.118738459306769e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0166015625, + "loss_xval": 0.0830078125, + "num_input_tokens_seen": 981560188, + "step": 5720 + }, + { + "epoch": 64.94617563739376, + "grad_norm": 3.584624047164368, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 981731952, + "step": 5721 + }, + { + "epoch": 64.94617563739376, + "loss": 0.06078848987817764, + "loss_ce": 2.7986985514871776e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 981731952, + "step": 5721 + }, + { + "epoch": 64.95750708215297, + "grad_norm": 3.354499966578575, + "learning_rate": 5e-06, + "loss": 0.0523, + "num_input_tokens_seen": 981904072, + "step": 5722 + }, + { + "epoch": 64.95750708215297, + "loss": 0.07813683152198792, + "loss_ce": 4.2348918213974684e-05, + "loss_iou": 0.33203125, + "loss_num": 0.01556396484375, + "loss_xval": 0.078125, + "num_input_tokens_seen": 981904072, + "step": 5722 + }, + { + "epoch": 64.96883852691218, + "grad_norm": 3.627788052273909, + "learning_rate": 5e-06, + "loss": 0.0559, + "num_input_tokens_seen": 982074112, + "step": 5723 + }, + { + "epoch": 64.96883852691218, + "loss": 0.04120307043194771, + "loss_ce": 4.24871759605594e-05, + "loss_iou": 0.41796875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 982074112, + "step": 5723 + }, + { + "epoch": 64.98016997167139, + "grad_norm": 3.8320040003143294, + "learning_rate": 5e-06, + "loss": 0.038, + "num_input_tokens_seen": 982242872, + "step": 5724 + }, + { + "epoch": 64.98016997167139, + "loss": 0.04043585807085037, + "loss_ce": 3.058449146919884e-05, + "loss_iou": 0.34765625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 982242872, + "step": 5724 + }, + { + "epoch": 64.9915014164306, + "grad_norm": 3.908828546511955, + "learning_rate": 5e-06, + "loss": 0.0543, + "num_input_tokens_seen": 982414804, + "step": 5725 + }, + { + "epoch": 64.9915014164306, + "loss": 0.09888176620006561, + "loss_ce": 2.0067374862264842e-05, + "loss_iou": 0.47265625, + "loss_num": 0.019775390625, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 982414804, + "step": 5725 + }, + { + "epoch": 64.9915014164306, + "loss": 0.03593401610851288, + "loss_ce": 3.008566091011744e-05, + "loss_iou": 0.49609375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 982543972, + "step": 5725 + }, + { + "epoch": 65.0028328611898, + "grad_norm": 4.278739973485845, + "learning_rate": 5e-06, + "loss": 0.0723, + "num_input_tokens_seen": 982587144, + "step": 5726 + }, + { + "epoch": 65.0028328611898, + "loss": 0.04389403015375137, + "loss_ce": 4.027242539450526e-05, + "loss_iou": 0.474609375, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 982587144, + "step": 5726 + }, + { + "epoch": 65.01416430594901, + "grad_norm": 3.344390384944367, + "learning_rate": 5e-06, + "loss": 0.0445, + "num_input_tokens_seen": 982758360, + "step": 5727 + }, + { + "epoch": 65.01416430594901, + "loss": 0.03912629932165146, + "loss_ce": 1.80219085450517e-05, + "loss_iou": 0.0, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 982758360, + "step": 5727 + }, + { + "epoch": 65.02549575070822, + "grad_norm": 5.0895530611280995, + "learning_rate": 5e-06, + "loss": 0.0588, + "num_input_tokens_seen": 982930100, + "step": 5728 + }, + { + "epoch": 65.02549575070822, + "loss": 0.02324196696281433, + "loss_ce": 7.149645534809679e-05, + "loss_iou": 0.33203125, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 982930100, + "step": 5728 + }, + { + "epoch": 65.03682719546742, + "grad_norm": 3.244496383794755, + "learning_rate": 5e-06, + "loss": 0.0393, + "num_input_tokens_seen": 983101820, + "step": 5729 + }, + { + "epoch": 65.03682719546742, + "loss": 0.043377332389354706, + "loss_ce": 4.237061148160137e-05, + "loss_iou": 0.0, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 983101820, + "step": 5729 + }, + { + "epoch": 65.04815864022663, + "grad_norm": 3.1005707560814937, + "learning_rate": 5e-06, + "loss": 0.0573, + "num_input_tokens_seen": 983273340, + "step": 5730 + }, + { + "epoch": 65.04815864022663, + "loss": 0.0363239049911499, + "loss_ce": 3.0871364288032055e-05, + "loss_iou": 0.40234375, + "loss_num": 0.00726318359375, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 983273340, + "step": 5730 + }, + { + "epoch": 65.05949008498584, + "grad_norm": 3.1630157186545462, + "learning_rate": 5e-06, + "loss": 0.0463, + "num_input_tokens_seen": 983444220, + "step": 5731 + }, + { + "epoch": 65.05949008498584, + "loss": 0.05001368746161461, + "loss_ce": 4.115112096769735e-05, + "loss_iou": 0.423828125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 983444220, + "step": 5731 + }, + { + "epoch": 65.07082152974505, + "grad_norm": 2.601181032677839, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 983616740, + "step": 5732 + }, + { + "epoch": 65.07082152974505, + "loss": 0.029378995299339294, + "loss_ce": 3.634267341112718e-05, + "loss_iou": 0.38671875, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 983616740, + "step": 5732 + }, + { + "epoch": 65.08215297450425, + "grad_norm": 3.265062334190223, + "learning_rate": 5e-06, + "loss": 0.0351, + "num_input_tokens_seen": 983788724, + "step": 5733 + }, + { + "epoch": 65.08215297450425, + "loss": 0.03413556516170502, + "loss_ce": 3.217053017579019e-05, + "loss_iou": 0.42578125, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 983788724, + "step": 5733 + }, + { + "epoch": 65.09348441926346, + "grad_norm": 3.9142167297086194, + "learning_rate": 5e-06, + "loss": 0.0519, + "num_input_tokens_seen": 983959888, + "step": 5734 + }, + { + "epoch": 65.09348441926346, + "loss": 0.0876886248588562, + "loss_ce": 4.2142957681789994e-05, + "loss_iou": 0.470703125, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 983959888, + "step": 5734 + }, + { + "epoch": 65.10481586402267, + "grad_norm": 3.731325356539566, + "learning_rate": 5e-06, + "loss": 0.0707, + "num_input_tokens_seen": 984131464, + "step": 5735 + }, + { + "epoch": 65.10481586402267, + "loss": 0.13541018962860107, + "loss_ce": 6.472597306128591e-05, + "loss_iou": 0.50390625, + "loss_num": 0.027099609375, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 984131464, + "step": 5735 + }, + { + "epoch": 65.11614730878188, + "grad_norm": 2.9642035457082985, + "learning_rate": 5e-06, + "loss": 0.0395, + "num_input_tokens_seen": 984303212, + "step": 5736 + }, + { + "epoch": 65.11614730878188, + "loss": 0.058792926371097565, + "loss_ce": 0.00016866040823515505, + "loss_iou": 0.078125, + "loss_num": 0.01171875, + "loss_xval": 0.05859375, + "num_input_tokens_seen": 984303212, + "step": 5736 + }, + { + "epoch": 65.12747875354107, + "grad_norm": 3.14199247012568, + "learning_rate": 5e-06, + "loss": 0.0629, + "num_input_tokens_seen": 984473684, + "step": 5737 + }, + { + "epoch": 65.12747875354107, + "loss": 0.12547801434993744, + "loss_ce": 2.0248595319571905e-05, + "loss_iou": 0.357421875, + "loss_num": 0.0250244140625, + "loss_xval": 0.125, + "num_input_tokens_seen": 984473684, + "step": 5737 + }, + { + "epoch": 65.13881019830028, + "grad_norm": 3.810742643843869, + "learning_rate": 5e-06, + "loss": 0.0523, + "num_input_tokens_seen": 984645292, + "step": 5738 + }, + { + "epoch": 65.13881019830028, + "loss": 0.03057882748544216, + "loss_ce": 4.599188105203211e-05, + "loss_iou": 0.494140625, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 984645292, + "step": 5738 + }, + { + "epoch": 65.15014164305948, + "grad_norm": 3.6153608489384146, + "learning_rate": 5e-06, + "loss": 0.0609, + "num_input_tokens_seen": 984817140, + "step": 5739 + }, + { + "epoch": 65.15014164305948, + "loss": 0.05022459849715233, + "loss_ce": 2.3182514269137755e-05, + "loss_iou": 0.51171875, + "loss_num": 0.010009765625, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 984817140, + "step": 5739 + }, + { + "epoch": 65.16147308781869, + "grad_norm": 3.2427250268432157, + "learning_rate": 5e-06, + "loss": 0.0363, + "num_input_tokens_seen": 984989060, + "step": 5740 + }, + { + "epoch": 65.16147308781869, + "loss": 0.04532986879348755, + "loss_ce": 2.652549846970942e-05, + "loss_iou": 0.3359375, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 984989060, + "step": 5740 + }, + { + "epoch": 65.1728045325779, + "grad_norm": 2.8996045963031767, + "learning_rate": 5e-06, + "loss": 0.0542, + "num_input_tokens_seen": 985160680, + "step": 5741 + }, + { + "epoch": 65.1728045325779, + "loss": 0.04038078337907791, + "loss_ce": 3.654730608104728e-05, + "loss_iou": 0.5078125, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 985160680, + "step": 5741 + }, + { + "epoch": 65.1841359773371, + "grad_norm": 2.7577053711303443, + "learning_rate": 5e-06, + "loss": 0.0463, + "num_input_tokens_seen": 985332472, + "step": 5742 + }, + { + "epoch": 65.1841359773371, + "loss": 0.03965802863240242, + "loss_ce": 6.146983650978655e-05, + "loss_iou": 0.37890625, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 985332472, + "step": 5742 + }, + { + "epoch": 65.19546742209631, + "grad_norm": 3.132942549717698, + "learning_rate": 5e-06, + "loss": 0.0347, + "num_input_tokens_seen": 985502956, + "step": 5743 + }, + { + "epoch": 65.19546742209631, + "loss": 0.0280813779681921, + "loss_ce": 1.283594156120671e-05, + "loss_iou": 0.41796875, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 985502956, + "step": 5743 + }, + { + "epoch": 65.20679886685552, + "grad_norm": 2.745008962198351, + "learning_rate": 5e-06, + "loss": 0.0561, + "num_input_tokens_seen": 985674840, + "step": 5744 + }, + { + "epoch": 65.20679886685552, + "loss": 0.025214754045009613, + "loss_ce": 2.2494146833196282e-05, + "loss_iou": 0.24609375, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 985674840, + "step": 5744 + }, + { + "epoch": 65.21813031161473, + "grad_norm": 2.9213145978826693, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 985846348, + "step": 5745 + }, + { + "epoch": 65.21813031161473, + "loss": 0.03277846798300743, + "loss_ce": 4.8366287956014276e-05, + "loss_iou": 0.29296875, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 985846348, + "step": 5745 + }, + { + "epoch": 65.22946175637394, + "grad_norm": 3.0008774580000157, + "learning_rate": 5e-06, + "loss": 0.0526, + "num_input_tokens_seen": 986016048, + "step": 5746 + }, + { + "epoch": 65.22946175637394, + "loss": 0.030263621360063553, + "loss_ce": 2.0701359972008504e-05, + "loss_iou": 0.34375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 986016048, + "step": 5746 + }, + { + "epoch": 65.24079320113314, + "grad_norm": 3.593550432236033, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 986187112, + "step": 5747 + }, + { + "epoch": 65.24079320113314, + "loss": 0.03626236319541931, + "loss_ce": 3.799736441578716e-05, + "loss_iou": 0.40625, + "loss_num": 0.00726318359375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 986187112, + "step": 5747 + }, + { + "epoch": 65.25212464589235, + "grad_norm": 4.1807311587745595, + "learning_rate": 5e-06, + "loss": 0.0358, + "num_input_tokens_seen": 986359172, + "step": 5748 + }, + { + "epoch": 65.25212464589235, + "loss": 0.03177310526371002, + "loss_ce": 3.482650572550483e-05, + "loss_iou": 0.3125, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 986359172, + "step": 5748 + }, + { + "epoch": 65.26345609065156, + "grad_norm": 3.1281716946654208, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 986531024, + "step": 5749 + }, + { + "epoch": 65.26345609065156, + "loss": 0.035262130200862885, + "loss_ce": 3.7216508644632995e-05, + "loss_iou": 0.3671875, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 986531024, + "step": 5749 + }, + { + "epoch": 65.27478753541077, + "grad_norm": 3.853304110181165, + "learning_rate": 5e-06, + "loss": 0.0396, + "num_input_tokens_seen": 986702852, + "step": 5750 + }, + { + "epoch": 65.27478753541077, + "eval_seeclick_CIoU": 0.5279392600059509, + "eval_seeclick_GIoU": 0.5279070436954498, + "eval_seeclick_IoU": 0.5653687417507172, + "eval_seeclick_MAE_all": 0.0654074177145958, + "eval_seeclick_MAE_h": 0.032020159997045994, + "eval_seeclick_MAE_w": 0.09991196915507317, + "eval_seeclick_MAE_x": 0.1016213670372963, + "eval_seeclick_MAE_y": 0.028076169081032276, + "eval_seeclick_NUM_probability": 0.9999834299087524, + "eval_seeclick_inside_bbox": 0.9076704680919647, + "eval_seeclick_loss": 0.9440920352935791, + "eval_seeclick_loss_ce": 0.6966625452041626, + "eval_seeclick_loss_iou": 0.5435791015625, + "eval_seeclick_loss_num": 0.049468994140625, + "eval_seeclick_loss_xval": 0.247283935546875, + "eval_seeclick_runtime": 69.1799, + "eval_seeclick_samples_per_second": 0.622, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 986702852, + "step": 5750 + }, + { + "epoch": 65.27478753541077, + "eval_icons_CIoU": 0.7469396889209747, + "eval_icons_GIoU": 0.7457391917705536, + "eval_icons_IoU": 0.7601137757301331, + "eval_icons_MAE_all": 0.03410924784839153, + "eval_icons_MAE_h": 0.031143520027399063, + "eval_icons_MAE_w": 0.038319991901516914, + "eval_icons_MAE_x": 0.033546026796102524, + "eval_icons_MAE_y": 0.033427443355321884, + "eval_icons_NUM_probability": 0.9996732473373413, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.11560887098312378, + "eval_icons_loss_ce": 0.003657496767118573, + "eval_icons_loss_iou": 0.54168701171875, + "eval_icons_loss_num": 0.020341873168945312, + "eval_icons_loss_xval": 0.101776123046875, + "eval_icons_runtime": 79.6161, + "eval_icons_samples_per_second": 0.628, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 986702852, + "step": 5750 + }, + { + "epoch": 65.27478753541077, + "eval_screenspot_CIoU": 0.6210390130678812, + "eval_screenspot_GIoU": 0.617551843325297, + "eval_screenspot_IoU": 0.6457635164260864, + "eval_screenspot_MAE_all": 0.0647940809528033, + "eval_screenspot_MAE_h": 0.04260993810991446, + "eval_screenspot_MAE_w": 0.11057011783123016, + "eval_screenspot_MAE_x": 0.07184754063685735, + "eval_screenspot_MAE_y": 0.0341487160573403, + "eval_screenspot_NUM_probability": 0.9999564091364542, + "eval_screenspot_inside_bbox": 0.9183333317438761, + "eval_screenspot_loss": 0.29010704159736633, + "eval_screenspot_loss_ce": 0.0171851241029799, + "eval_screenspot_loss_iou": 0.5324300130208334, + "eval_screenspot_loss_num": 0.054255167643229164, + "eval_screenspot_loss_xval": 0.2713623046875, + "eval_screenspot_runtime": 136.7024, + "eval_screenspot_samples_per_second": 0.651, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 986702852, + "step": 5750 + }, + { + "epoch": 65.27478753541077, + "eval_compot_CIoU": 0.8379127681255341, + "eval_compot_GIoU": 0.8338889479637146, + "eval_compot_IoU": 0.8521289229393005, + "eval_compot_MAE_all": 0.026224092580378056, + "eval_compot_MAE_h": 0.020950539968907833, + "eval_compot_MAE_w": 0.032208927907049656, + "eval_compot_MAE_x": 0.030544021166861057, + "eval_compot_MAE_y": 0.02119287569075823, + "eval_compot_NUM_probability": 0.9999770820140839, + "eval_compot_inside_bbox": 0.9409722089767456, + "eval_compot_loss": 0.08464618772268295, + "eval_compot_loss_ce": 2.4417404347332194e-05, + "eval_compot_loss_iou": 0.5771484375, + "eval_compot_loss_num": 0.014888763427734375, + "eval_compot_loss_xval": 0.0743865966796875, + "eval_compot_runtime": 87.2073, + "eval_compot_samples_per_second": 0.573, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 986702852, + "step": 5750 + }, + { + "epoch": 65.27478753541077, + "eval_custom_ui_MAE_all": 0.0198229500092566, + "eval_custom_ui_MAE_x": 0.03207499347627163, + "eval_custom_ui_MAE_y": 0.007570905378088355, + "eval_custom_ui_NUM_probability": 0.9998688995838165, + "eval_custom_ui_loss": 0.22339992225170135, + "eval_custom_ui_loss_ce": 0.12163322418928146, + "eval_custom_ui_loss_num": 0.019514083862304688, + "eval_custom_ui_loss_xval": 0.09759521484375, + "eval_custom_ui_runtime": 65.9456, + "eval_custom_ui_samples_per_second": 0.758, + "eval_custom_ui_steps_per_second": 0.03, + "num_input_tokens_seen": 986702852, + "step": 5750 + }, + { + "epoch": 65.27478753541077, + "loss": 0.2739031910896301, + "loss_ce": 0.15262635052204132, + "loss_iou": 0.0, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 986702852, + "step": 5750 + }, + { + "epoch": 65.28611898016997, + "grad_norm": 3.5189714360163653, + "learning_rate": 5e-06, + "loss": 0.0525, + "num_input_tokens_seen": 986875044, + "step": 5751 + }, + { + "epoch": 65.28611898016997, + "loss": 0.040284451097249985, + "loss_ce": 3.176531754434109e-05, + "loss_iou": 0.435546875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 986875044, + "step": 5751 + }, + { + "epoch": 65.29745042492918, + "grad_norm": 2.918074328603341, + "learning_rate": 5e-06, + "loss": 0.0525, + "num_input_tokens_seen": 987045164, + "step": 5752 + }, + { + "epoch": 65.29745042492918, + "loss": 0.03508681431412697, + "loss_ce": 2.2118176275398582e-05, + "loss_iou": 0.365234375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 987045164, + "step": 5752 + }, + { + "epoch": 65.30878186968839, + "grad_norm": 2.7106561879634805, + "learning_rate": 5e-06, + "loss": 0.0356, + "num_input_tokens_seen": 987216812, + "step": 5753 + }, + { + "epoch": 65.30878186968839, + "loss": 0.05032968521118164, + "loss_ce": 2.1456542526721023e-05, + "loss_iou": 0.52734375, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 987216812, + "step": 5753 + }, + { + "epoch": 65.3201133144476, + "grad_norm": 2.8396150977942467, + "learning_rate": 5e-06, + "loss": 0.0581, + "num_input_tokens_seen": 987388264, + "step": 5754 + }, + { + "epoch": 65.3201133144476, + "loss": 0.029339594766497612, + "loss_ce": 4.2719671910163015e-05, + "loss_iou": 0.453125, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 987388264, + "step": 5754 + }, + { + "epoch": 65.3314447592068, + "grad_norm": 3.563886772336644, + "learning_rate": 5e-06, + "loss": 0.0693, + "num_input_tokens_seen": 987560316, + "step": 5755 + }, + { + "epoch": 65.3314447592068, + "loss": 0.03773288428783417, + "loss_ce": 1.3158956789993681e-05, + "loss_iou": 0.5390625, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 987560316, + "step": 5755 + }, + { + "epoch": 65.34277620396601, + "grad_norm": 3.188420084164555, + "learning_rate": 5e-06, + "loss": 0.0785, + "num_input_tokens_seen": 987732324, + "step": 5756 + }, + { + "epoch": 65.34277620396601, + "loss": 0.07110798358917236, + "loss_ce": 1.7283775378018618e-05, + "loss_iou": 0.37890625, + "loss_num": 0.01422119140625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 987732324, + "step": 5756 + }, + { + "epoch": 65.35410764872522, + "grad_norm": 3.3629373273694183, + "learning_rate": 5e-06, + "loss": 0.0409, + "num_input_tokens_seen": 987902472, + "step": 5757 + }, + { + "epoch": 65.35410764872522, + "loss": 0.03837970271706581, + "loss_ce": 1.9107495973003097e-05, + "loss_iou": 0.2578125, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 987902472, + "step": 5757 + }, + { + "epoch": 65.36543909348443, + "grad_norm": 3.7747000969198683, + "learning_rate": 5e-06, + "loss": 0.0507, + "num_input_tokens_seen": 988073228, + "step": 5758 + }, + { + "epoch": 65.36543909348443, + "loss": 0.033994633704423904, + "loss_ce": 1.3310223948792554e-05, + "loss_iou": 0.4140625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 988073228, + "step": 5758 + }, + { + "epoch": 65.37677053824362, + "grad_norm": 2.8496583040792607, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 988241896, + "step": 5759 + }, + { + "epoch": 65.37677053824362, + "loss": 0.026833347976207733, + "loss_ce": 3.8915659388294443e-05, + "loss_iou": 0.4296875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 988241896, + "step": 5759 + }, + { + "epoch": 65.38810198300283, + "grad_norm": 2.848140053213339, + "learning_rate": 5e-06, + "loss": 0.0378, + "num_input_tokens_seen": 988413896, + "step": 5760 + }, + { + "epoch": 65.38810198300283, + "loss": 0.040435243397951126, + "loss_ce": 1.470960251026554e-05, + "loss_iou": 0.439453125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 988413896, + "step": 5760 + }, + { + "epoch": 65.39943342776203, + "grad_norm": 3.2017273348398727, + "learning_rate": 5e-06, + "loss": 0.0579, + "num_input_tokens_seen": 988585760, + "step": 5761 + }, + { + "epoch": 65.39943342776203, + "loss": 0.08214667439460754, + "loss_ce": 2.3867934942245483e-05, + "loss_iou": 0.439453125, + "loss_num": 0.0164794921875, + "loss_xval": 0.08203125, + "num_input_tokens_seen": 988585760, + "step": 5761 + }, + { + "epoch": 65.41076487252124, + "grad_norm": 2.515146429526058, + "learning_rate": 5e-06, + "loss": 0.0645, + "num_input_tokens_seen": 988757560, + "step": 5762 + }, + { + "epoch": 65.41076487252124, + "loss": 0.09435395151376724, + "loss_ce": 2.4117889552144334e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0189208984375, + "loss_xval": 0.09423828125, + "num_input_tokens_seen": 988757560, + "step": 5762 + }, + { + "epoch": 65.42209631728045, + "grad_norm": 3.169178426645544, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 988929536, + "step": 5763 + }, + { + "epoch": 65.42209631728045, + "loss": 0.04145709052681923, + "loss_ce": 1.4219082004274242e-05, + "loss_iou": 0.61328125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 988929536, + "step": 5763 + }, + { + "epoch": 65.43342776203966, + "grad_norm": 3.4570929116556885, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 989100740, + "step": 5764 + }, + { + "epoch": 65.43342776203966, + "loss": 0.06846779584884644, + "loss_ce": 3.212683805031702e-05, + "loss_iou": 0.390625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 989100740, + "step": 5764 + }, + { + "epoch": 65.44475920679886, + "grad_norm": 3.65635693285648, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 989272332, + "step": 5765 + }, + { + "epoch": 65.44475920679886, + "loss": 0.03286335617303848, + "loss_ce": 1.1184663890162483e-05, + "loss_iou": 0.369140625, + "loss_num": 0.006561279296875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 989272332, + "step": 5765 + }, + { + "epoch": 65.45609065155807, + "grad_norm": 14.478673610780442, + "learning_rate": 5e-06, + "loss": 0.0432, + "num_input_tokens_seen": 989441612, + "step": 5766 + }, + { + "epoch": 65.45609065155807, + "loss": 0.02933388203382492, + "loss_ce": 3.70066991308704e-05, + "loss_iou": 0.390625, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 989441612, + "step": 5766 + }, + { + "epoch": 65.46742209631728, + "grad_norm": 3.3031396609222394, + "learning_rate": 5e-06, + "loss": 0.0418, + "num_input_tokens_seen": 989613176, + "step": 5767 + }, + { + "epoch": 65.46742209631728, + "loss": 0.02758553810417652, + "loss_ce": 2.8164911782369018e-05, + "loss_iou": 0.435546875, + "loss_num": 0.0054931640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 989613176, + "step": 5767 + }, + { + "epoch": 65.47875354107649, + "grad_norm": 3.2221096835054857, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 989785412, + "step": 5768 + }, + { + "epoch": 65.47875354107649, + "loss": 0.08748716115951538, + "loss_ce": 2.377748205617536e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 989785412, + "step": 5768 + }, + { + "epoch": 65.4900849858357, + "grad_norm": 3.1833275123196416, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 989957212, + "step": 5769 + }, + { + "epoch": 65.4900849858357, + "loss": 0.030664196237921715, + "loss_ce": 3.980598921771161e-05, + "loss_iou": 0.193359375, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 989957212, + "step": 5769 + }, + { + "epoch": 65.5014164305949, + "grad_norm": 2.8106543301552502, + "learning_rate": 5e-06, + "loss": 0.0521, + "num_input_tokens_seen": 990127676, + "step": 5770 + }, + { + "epoch": 65.5014164305949, + "loss": 0.07636883854866028, + "loss_ce": 2.9118898964952677e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 990127676, + "step": 5770 + }, + { + "epoch": 65.51274787535411, + "grad_norm": 3.0117490827801823, + "learning_rate": 5e-06, + "loss": 0.0499, + "num_input_tokens_seen": 990299612, + "step": 5771 + }, + { + "epoch": 65.51274787535411, + "loss": 0.03499307483434677, + "loss_ce": 0.00016488679102621973, + "loss_iou": 0.392578125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 990299612, + "step": 5771 + }, + { + "epoch": 65.52407932011332, + "grad_norm": 3.481769144821567, + "learning_rate": 5e-06, + "loss": 0.0321, + "num_input_tokens_seen": 990471752, + "step": 5772 + }, + { + "epoch": 65.52407932011332, + "loss": 0.0201481394469738, + "loss_ce": 3.705561539391056e-05, + "loss_iou": 0.33203125, + "loss_num": 0.0040283203125, + "loss_xval": 0.0201416015625, + "num_input_tokens_seen": 990471752, + "step": 5772 + }, + { + "epoch": 65.53541076487252, + "grad_norm": 3.808287545096577, + "learning_rate": 5e-06, + "loss": 0.0358, + "num_input_tokens_seen": 990643820, + "step": 5773 + }, + { + "epoch": 65.53541076487252, + "loss": 0.036998577415943146, + "loss_ce": 1.127464565797709e-05, + "loss_iou": 0.51171875, + "loss_num": 0.00738525390625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 990643820, + "step": 5773 + }, + { + "epoch": 65.54674220963173, + "grad_norm": 3.557916191116322, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 990814096, + "step": 5774 + }, + { + "epoch": 65.54674220963173, + "loss": 0.04732809215784073, + "loss_ce": 2.5847934011835605e-05, + "loss_iou": 0.423828125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 990814096, + "step": 5774 + }, + { + "epoch": 65.55807365439094, + "grad_norm": 3.0852354078520934, + "learning_rate": 5e-06, + "loss": 0.0482, + "num_input_tokens_seen": 990986084, + "step": 5775 + }, + { + "epoch": 65.55807365439094, + "loss": 0.05569403991103172, + "loss_ce": 4.5235283323563635e-05, + "loss_iou": 0.546875, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 990986084, + "step": 5775 + }, + { + "epoch": 65.56940509915015, + "grad_norm": 4.03868127531456, + "learning_rate": 5e-06, + "loss": 0.062, + "num_input_tokens_seen": 991158164, + "step": 5776 + }, + { + "epoch": 65.56940509915015, + "loss": 0.042034778743982315, + "loss_ce": 4.259090928826481e-05, + "loss_iou": 0.328125, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 991158164, + "step": 5776 + }, + { + "epoch": 65.58073654390935, + "grad_norm": 4.921513598889633, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 991329256, + "step": 5777 + }, + { + "epoch": 65.58073654390935, + "loss": 0.04119446873664856, + "loss_ce": 2.6259327569277957e-05, + "loss_iou": 0.57421875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 991329256, + "step": 5777 + }, + { + "epoch": 65.59206798866856, + "grad_norm": 3.858369560899777, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 991499740, + "step": 5778 + }, + { + "epoch": 65.59206798866856, + "loss": 0.0308595709502697, + "loss_ce": 3.681503949337639e-05, + "loss_iou": 0.4296875, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 991499740, + "step": 5778 + }, + { + "epoch": 65.60339943342777, + "grad_norm": 3.219289885126241, + "learning_rate": 5e-06, + "loss": 0.0397, + "num_input_tokens_seen": 991671760, + "step": 5779 + }, + { + "epoch": 65.60339943342777, + "loss": 0.04247557371854782, + "loss_ce": 2.5621367967687547e-05, + "loss_iou": 0.52734375, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 991671760, + "step": 5779 + }, + { + "epoch": 65.61473087818698, + "grad_norm": 3.3675850115156463, + "learning_rate": 5e-06, + "loss": 0.0453, + "num_input_tokens_seen": 991843572, + "step": 5780 + }, + { + "epoch": 65.61473087818698, + "loss": 0.04054023325443268, + "loss_ce": 0.00010444331564940512, + "loss_iou": 0.2578125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 991843572, + "step": 5780 + }, + { + "epoch": 65.62606232294617, + "grad_norm": 3.2036837375601754, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 992014720, + "step": 5781 + }, + { + "epoch": 65.62606232294617, + "loss": 0.037506699562072754, + "loss_ce": 0.00030577252618968487, + "loss_iou": 0.46875, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 992014720, + "step": 5781 + }, + { + "epoch": 65.63739376770538, + "grad_norm": 5.414410987094799, + "learning_rate": 5e-06, + "loss": 0.0647, + "num_input_tokens_seen": 992185968, + "step": 5782 + }, + { + "epoch": 65.63739376770538, + "loss": 0.032044366002082825, + "loss_ce": 1.616914596525021e-05, + "loss_iou": 0.470703125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 992185968, + "step": 5782 + }, + { + "epoch": 65.64872521246458, + "grad_norm": 4.543922422501434, + "learning_rate": 5e-06, + "loss": 0.0667, + "num_input_tokens_seen": 992355448, + "step": 5783 + }, + { + "epoch": 65.64872521246458, + "loss": 0.08454081416130066, + "loss_ce": 2.238055458292365e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0169677734375, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 992355448, + "step": 5783 + }, + { + "epoch": 65.66005665722379, + "grad_norm": 2.7286435562118303, + "learning_rate": 5e-06, + "loss": 0.0324, + "num_input_tokens_seen": 992527064, + "step": 5784 + }, + { + "epoch": 65.66005665722379, + "loss": 0.028632130473852158, + "loss_ce": 2.1901407308178023e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0057373046875, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 992527064, + "step": 5784 + }, + { + "epoch": 65.671388101983, + "grad_norm": 3.118595815003266, + "learning_rate": 5e-06, + "loss": 0.061, + "num_input_tokens_seen": 992698188, + "step": 5785 + }, + { + "epoch": 65.671388101983, + "loss": 0.030240904539823532, + "loss_ce": 0.00018871987413149327, + "loss_iou": 0.416015625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 992698188, + "step": 5785 + }, + { + "epoch": 65.6827195467422, + "grad_norm": 3.233464806094293, + "learning_rate": 5e-06, + "loss": 0.0328, + "num_input_tokens_seen": 992869924, + "step": 5786 + }, + { + "epoch": 65.6827195467422, + "loss": 0.03187212720513344, + "loss_ce": 4.229322075843811e-05, + "loss_iou": 0.3515625, + "loss_num": 0.006378173828125, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 992869924, + "step": 5786 + }, + { + "epoch": 65.69405099150141, + "grad_norm": 3.5123250941426436, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 993040912, + "step": 5787 + }, + { + "epoch": 65.69405099150141, + "loss": 0.03946268558502197, + "loss_ce": 3.397458203835413e-05, + "loss_iou": 0.3125, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 993040912, + "step": 5787 + }, + { + "epoch": 65.70538243626062, + "grad_norm": 3.3410576426367093, + "learning_rate": 5e-06, + "loss": 0.039, + "num_input_tokens_seen": 993212884, + "step": 5788 + }, + { + "epoch": 65.70538243626062, + "loss": 0.026257412508130074, + "loss_ce": 5.807226261822507e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 993212884, + "step": 5788 + }, + { + "epoch": 65.71671388101983, + "grad_norm": 4.329024998225538, + "learning_rate": 5e-06, + "loss": 0.0528, + "num_input_tokens_seen": 993383984, + "step": 5789 + }, + { + "epoch": 65.71671388101983, + "loss": 0.10554572194814682, + "loss_ce": 1.59339870151598e-05, + "loss_iou": 0.486328125, + "loss_num": 0.0211181640625, + "loss_xval": 0.10546875, + "num_input_tokens_seen": 993383984, + "step": 5789 + }, + { + "epoch": 65.72804532577904, + "grad_norm": 3.3322995804244333, + "learning_rate": 5e-06, + "loss": 0.0586, + "num_input_tokens_seen": 993555656, + "step": 5790 + }, + { + "epoch": 65.72804532577904, + "loss": 0.041612058877944946, + "loss_ce": 1.659831832512282e-05, + "loss_iou": 0.55078125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 993555656, + "step": 5790 + }, + { + "epoch": 65.73937677053824, + "grad_norm": 3.1685012912060966, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 993727760, + "step": 5791 + }, + { + "epoch": 65.73937677053824, + "loss": 0.04165208339691162, + "loss_ce": 2.610619048937224e-05, + "loss_iou": 0.2080078125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 993727760, + "step": 5791 + }, + { + "epoch": 65.75070821529745, + "grad_norm": 3.3555801751964274, + "learning_rate": 5e-06, + "loss": 0.0631, + "num_input_tokens_seen": 993898768, + "step": 5792 + }, + { + "epoch": 65.75070821529745, + "loss": 0.03631053492426872, + "loss_ce": 3.276379356975667e-05, + "loss_iou": 0.2890625, + "loss_num": 0.007232666015625, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 993898768, + "step": 5792 + }, + { + "epoch": 65.76203966005666, + "grad_norm": 3.4643511357462073, + "learning_rate": 5e-06, + "loss": 0.059, + "num_input_tokens_seen": 994070592, + "step": 5793 + }, + { + "epoch": 65.76203966005666, + "loss": 0.052137747406959534, + "loss_ce": 1.372660062770592e-05, + "loss_iou": 0.373046875, + "loss_num": 0.01043701171875, + "loss_xval": 0.05224609375, + "num_input_tokens_seen": 994070592, + "step": 5793 + }, + { + "epoch": 65.77337110481587, + "grad_norm": 4.011102195483074, + "learning_rate": 5e-06, + "loss": 0.084, + "num_input_tokens_seen": 994240024, + "step": 5794 + }, + { + "epoch": 65.77337110481587, + "loss": 0.0688139796257019, + "loss_ce": 2.7361627871869132e-05, + "loss_iou": 0.3671875, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 994240024, + "step": 5794 + }, + { + "epoch": 65.78470254957507, + "grad_norm": 3.676975177737194, + "learning_rate": 5e-06, + "loss": 0.0467, + "num_input_tokens_seen": 994410604, + "step": 5795 + }, + { + "epoch": 65.78470254957507, + "loss": 0.03442037105560303, + "loss_ce": 7.283472223207355e-05, + "loss_iou": 0.423828125, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 994410604, + "step": 5795 + }, + { + "epoch": 65.79603399433428, + "grad_norm": 3.958102057118438, + "learning_rate": 5e-06, + "loss": 0.0647, + "num_input_tokens_seen": 994580852, + "step": 5796 + }, + { + "epoch": 65.79603399433428, + "loss": 0.05996246263384819, + "loss_ce": 5.6456676247762516e-05, + "loss_iou": 0.322265625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 994580852, + "step": 5796 + }, + { + "epoch": 65.80736543909349, + "grad_norm": 3.6256979084322625, + "learning_rate": 5e-06, + "loss": 0.0877, + "num_input_tokens_seen": 994752808, + "step": 5797 + }, + { + "epoch": 65.80736543909349, + "loss": 0.04044720157980919, + "loss_ce": 7.244764128699899e-05, + "loss_iou": 0.373046875, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 994752808, + "step": 5797 + }, + { + "epoch": 65.8186968838527, + "grad_norm": 3.6506547708510024, + "learning_rate": 5e-06, + "loss": 0.051, + "num_input_tokens_seen": 994921444, + "step": 5798 + }, + { + "epoch": 65.8186968838527, + "loss": 0.05915772542357445, + "loss_ce": 7.569455192424357e-05, + "loss_iou": 0.220703125, + "loss_num": 0.01177978515625, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 994921444, + "step": 5798 + }, + { + "epoch": 65.8300283286119, + "grad_norm": 4.0529672243563475, + "learning_rate": 5e-06, + "loss": 0.0534, + "num_input_tokens_seen": 995093628, + "step": 5799 + }, + { + "epoch": 65.8300283286119, + "loss": 0.09639350324869156, + "loss_ce": 1.8988373994943686e-05, + "loss_iou": 0.3671875, + "loss_num": 0.019287109375, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 995093628, + "step": 5799 + }, + { + "epoch": 65.84135977337111, + "grad_norm": 2.760159503163887, + "learning_rate": 5e-06, + "loss": 0.0329, + "num_input_tokens_seen": 995265672, + "step": 5800 + }, + { + "epoch": 65.84135977337111, + "loss": 0.027931788936257362, + "loss_ce": 2.346429755561985e-05, + "loss_iou": 0.435546875, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 995265672, + "step": 5800 + }, + { + "epoch": 65.85269121813032, + "grad_norm": 2.0083589946602425, + "learning_rate": 5e-06, + "loss": 0.0396, + "num_input_tokens_seen": 995436604, + "step": 5801 + }, + { + "epoch": 65.85269121813032, + "loss": 0.038099292665719986, + "loss_ce": 1.3354522707231808e-05, + "loss_iou": 0.271484375, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 995436604, + "step": 5801 + }, + { + "epoch": 65.86402266288952, + "grad_norm": 3.1245451031496283, + "learning_rate": 5e-06, + "loss": 0.0475, + "num_input_tokens_seen": 995608516, + "step": 5802 + }, + { + "epoch": 65.86402266288952, + "loss": 0.043090157210826874, + "loss_ce": 0.00021295633632689714, + "loss_iou": 0.32421875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 995608516, + "step": 5802 + }, + { + "epoch": 65.87535410764872, + "grad_norm": 3.583962028136142, + "learning_rate": 5e-06, + "loss": 0.0391, + "num_input_tokens_seen": 995779672, + "step": 5803 + }, + { + "epoch": 65.87535410764872, + "loss": 0.04351666197180748, + "loss_ce": 2.91143442154862e-05, + "loss_iou": 0.26171875, + "loss_num": 0.00872802734375, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 995779672, + "step": 5803 + }, + { + "epoch": 65.88668555240793, + "grad_norm": 3.7307450487507774, + "learning_rate": 5e-06, + "loss": 0.0445, + "num_input_tokens_seen": 995950140, + "step": 5804 + }, + { + "epoch": 65.88668555240793, + "loss": 0.04242198169231415, + "loss_ce": 1.780654929461889e-05, + "loss_iou": 0.453125, + "loss_num": 0.00848388671875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 995950140, + "step": 5804 + }, + { + "epoch": 65.89801699716713, + "grad_norm": 3.333835225797573, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 996122144, + "step": 5805 + }, + { + "epoch": 65.89801699716713, + "loss": 0.03899308294057846, + "loss_ce": 2.213602783740498e-05, + "loss_iou": 0.34765625, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 996122144, + "step": 5805 + }, + { + "epoch": 65.90934844192634, + "grad_norm": 3.088832301858489, + "learning_rate": 5e-06, + "loss": 0.049, + "num_input_tokens_seen": 996293124, + "step": 5806 + }, + { + "epoch": 65.90934844192634, + "loss": 0.04271329194307327, + "loss_ce": 1.919774877023883e-05, + "loss_iou": 0.36328125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 996293124, + "step": 5806 + }, + { + "epoch": 65.92067988668555, + "grad_norm": 3.1918272762212734, + "learning_rate": 5e-06, + "loss": 0.0461, + "num_input_tokens_seen": 996465304, + "step": 5807 + }, + { + "epoch": 65.92067988668555, + "loss": 0.06512096524238586, + "loss_ce": 7.274914969457313e-05, + "loss_iou": 0.388671875, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 996465304, + "step": 5807 + }, + { + "epoch": 65.93201133144476, + "grad_norm": 3.3837897582855914, + "learning_rate": 5e-06, + "loss": 0.0482, + "num_input_tokens_seen": 996637152, + "step": 5808 + }, + { + "epoch": 65.93201133144476, + "loss": 0.03420586884021759, + "loss_ce": 5.6695262173889205e-05, + "loss_iou": 0.54296875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 996637152, + "step": 5808 + }, + { + "epoch": 65.94334277620396, + "grad_norm": 3.5474891351220488, + "learning_rate": 5e-06, + "loss": 0.0654, + "num_input_tokens_seen": 996809196, + "step": 5809 + }, + { + "epoch": 65.94334277620396, + "loss": 0.06417332589626312, + "loss_ce": 2.5378167265444063e-05, + "loss_iou": 0.5078125, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 996809196, + "step": 5809 + }, + { + "epoch": 65.95467422096317, + "grad_norm": 3.358706827509532, + "learning_rate": 5e-06, + "loss": 0.0501, + "num_input_tokens_seen": 996980928, + "step": 5810 + }, + { + "epoch": 65.95467422096317, + "loss": 0.028481513261795044, + "loss_ce": 2.3870634322520345e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 996980928, + "step": 5810 + }, + { + "epoch": 65.96600566572238, + "grad_norm": 3.570636376145596, + "learning_rate": 5e-06, + "loss": 0.0519, + "num_input_tokens_seen": 997152636, + "step": 5811 + }, + { + "epoch": 65.96600566572238, + "loss": 0.09663854539394379, + "loss_ce": 0.00020299361494835466, + "loss_iou": 0.412109375, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 997152636, + "step": 5811 + }, + { + "epoch": 65.97733711048159, + "grad_norm": 3.5896584375412366, + "learning_rate": 5e-06, + "loss": 0.0451, + "num_input_tokens_seen": 997322024, + "step": 5812 + }, + { + "epoch": 65.97733711048159, + "loss": 0.03467239439487457, + "loss_ce": 3.49411930073984e-05, + "loss_iou": 0.36328125, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 997322024, + "step": 5812 + }, + { + "epoch": 65.98866855524079, + "grad_norm": 3.19433135796106, + "learning_rate": 5e-06, + "loss": 0.0415, + "num_input_tokens_seen": 997493992, + "step": 5813 + }, + { + "epoch": 65.98866855524079, + "loss": 0.061878662556409836, + "loss_ce": 3.479280348983593e-05, + "loss_iou": 0.4453125, + "loss_num": 0.01239013671875, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 997493992, + "step": 5813 + }, + { + "epoch": 66.0, + "grad_norm": 3.2508703732801822, + "learning_rate": 5e-06, + "loss": 0.036, + "num_input_tokens_seen": 997665704, + "step": 5814 + }, + { + "epoch": 66.0, + "loss": 0.029385238885879517, + "loss_ce": 1.2070284356013872e-05, + "loss_iou": 0.412109375, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 997665704, + "step": 5814 + }, + { + "epoch": 66.01133144475921, + "grad_norm": 2.982087293680888, + "learning_rate": 5e-06, + "loss": 0.0481, + "num_input_tokens_seen": 997836092, + "step": 5815 + }, + { + "epoch": 66.01133144475921, + "loss": 0.04796624928712845, + "loss_ce": 2.3131056877900846e-05, + "loss_iou": 0.55078125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 997836092, + "step": 5815 + }, + { + "epoch": 66.02266288951841, + "grad_norm": 2.9031186038490406, + "learning_rate": 5e-06, + "loss": 0.0401, + "num_input_tokens_seen": 998007956, + "step": 5816 + }, + { + "epoch": 66.02266288951841, + "loss": 0.044926710426807404, + "loss_ce": 3.5350502002984285e-05, + "loss_iou": 0.43359375, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 998007956, + "step": 5816 + }, + { + "epoch": 66.03399433427762, + "grad_norm": 2.879831266219969, + "learning_rate": 5e-06, + "loss": 0.0389, + "num_input_tokens_seen": 998177812, + "step": 5817 + }, + { + "epoch": 66.03399433427762, + "loss": 0.040174782276153564, + "loss_ce": 1.3646911611431278e-05, + "loss_iou": 0.3125, + "loss_num": 0.008056640625, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 998177812, + "step": 5817 + }, + { + "epoch": 66.04532577903683, + "grad_norm": 3.080295728619409, + "learning_rate": 5e-06, + "loss": 0.0299, + "num_input_tokens_seen": 998349064, + "step": 5818 + }, + { + "epoch": 66.04532577903683, + "loss": 0.022834401577711105, + "loss_ce": 2.2511003407998942e-05, + "loss_iou": 0.0, + "loss_num": 0.00457763671875, + "loss_xval": 0.0228271484375, + "num_input_tokens_seen": 998349064, + "step": 5818 + }, + { + "epoch": 66.05665722379604, + "grad_norm": 3.173499252796179, + "learning_rate": 5e-06, + "loss": 0.0406, + "num_input_tokens_seen": 998520968, + "step": 5819 + }, + { + "epoch": 66.05665722379604, + "loss": 0.038249991834163666, + "loss_ce": 1.1462408110674005e-05, + "loss_iou": 0.478515625, + "loss_num": 0.00762939453125, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 998520968, + "step": 5819 + }, + { + "epoch": 66.06798866855524, + "grad_norm": 2.6150134399018246, + "learning_rate": 5e-06, + "loss": 0.027, + "num_input_tokens_seen": 998692948, + "step": 5820 + }, + { + "epoch": 66.06798866855524, + "loss": 0.029565272852778435, + "loss_ce": 5.4774878663010895e-05, + "loss_iou": 0.435546875, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 998692948, + "step": 5820 + }, + { + "epoch": 66.07932011331445, + "grad_norm": 3.025580036903358, + "learning_rate": 5e-06, + "loss": 0.0593, + "num_input_tokens_seen": 998863436, + "step": 5821 + }, + { + "epoch": 66.07932011331445, + "loss": 0.026316750794649124, + "loss_ce": 1.8229262423119508e-05, + "loss_iou": 0.44140625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 998863436, + "step": 5821 + }, + { + "epoch": 66.09065155807366, + "grad_norm": 4.718583219379033, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 999034668, + "step": 5822 + }, + { + "epoch": 66.09065155807366, + "loss": 0.09526698291301727, + "loss_ce": 2.161873271688819e-05, + "loss_iou": 0.361328125, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 999034668, + "step": 5822 + }, + { + "epoch": 66.10198300283287, + "grad_norm": 3.260550952286194, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 999206140, + "step": 5823 + }, + { + "epoch": 66.10198300283287, + "loss": 0.066210076212883, + "loss_ce": 1.7453430700697936e-05, + "loss_iou": 0.498046875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 999206140, + "step": 5823 + }, + { + "epoch": 66.11331444759207, + "grad_norm": 3.7022550385730697, + "learning_rate": 5e-06, + "loss": 0.0375, + "num_input_tokens_seen": 999378324, + "step": 5824 + }, + { + "epoch": 66.11331444759207, + "loss": 0.03564482182264328, + "loss_ce": 0.001808458473533392, + "loss_iou": 0.369140625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 999378324, + "step": 5824 + }, + { + "epoch": 66.12464589235128, + "grad_norm": 4.090211646471778, + "learning_rate": 5e-06, + "loss": 0.0422, + "num_input_tokens_seen": 999549100, + "step": 5825 + }, + { + "epoch": 66.12464589235128, + "loss": 0.04278557002544403, + "loss_ce": 3.044380537176039e-05, + "loss_iou": 0.51953125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 999549100, + "step": 5825 + }, + { + "epoch": 66.13597733711048, + "grad_norm": 4.372960864223768, + "learning_rate": 5e-06, + "loss": 0.0574, + "num_input_tokens_seen": 999721040, + "step": 5826 + }, + { + "epoch": 66.13597733711048, + "loss": 0.04125019162893295, + "loss_ce": 2.0940937247360125e-05, + "loss_iou": 0.4765625, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 999721040, + "step": 5826 + }, + { + "epoch": 66.14730878186968, + "grad_norm": 3.346314970089101, + "learning_rate": 5e-06, + "loss": 0.0616, + "num_input_tokens_seen": 999893296, + "step": 5827 + }, + { + "epoch": 66.14730878186968, + "loss": 0.033320240676403046, + "loss_ce": 2.556304752943106e-05, + "loss_iou": 0.4765625, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 999893296, + "step": 5827 + }, + { + "epoch": 66.15864022662889, + "grad_norm": 3.5593086325218426, + "learning_rate": 5e-06, + "loss": 0.0422, + "num_input_tokens_seen": 1000065396, + "step": 5828 + }, + { + "epoch": 66.15864022662889, + "loss": 0.06338623911142349, + "loss_ce": 4.7003570216475055e-05, + "loss_iou": 0.25, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 1000065396, + "step": 5828 + }, + { + "epoch": 66.1699716713881, + "grad_norm": 3.659081421388046, + "learning_rate": 5e-06, + "loss": 0.0604, + "num_input_tokens_seen": 1000235576, + "step": 5829 + }, + { + "epoch": 66.1699716713881, + "loss": 0.09226740151643753, + "loss_ce": 4.327727583586238e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 1000235576, + "step": 5829 + }, + { + "epoch": 66.1813031161473, + "grad_norm": 3.053870960199708, + "learning_rate": 5e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1000407280, + "step": 5830 + }, + { + "epoch": 66.1813031161473, + "loss": 0.03429713100194931, + "loss_ce": 2.5887809897540137e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1000407280, + "step": 5830 + }, + { + "epoch": 66.19263456090651, + "grad_norm": 3.547001296451918, + "learning_rate": 5e-06, + "loss": 0.033, + "num_input_tokens_seen": 1000579428, + "step": 5831 + }, + { + "epoch": 66.19263456090651, + "loss": 0.028814319521188736, + "loss_ce": 2.0983618014724925e-05, + "loss_iou": 0.55859375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 1000579428, + "step": 5831 + }, + { + "epoch": 66.20396600566572, + "grad_norm": 3.5174642503992857, + "learning_rate": 5e-06, + "loss": 0.0584, + "num_input_tokens_seen": 1000751072, + "step": 5832 + }, + { + "epoch": 66.20396600566572, + "loss": 0.030492212623357773, + "loss_ce": 2.0411091099958867e-05, + "loss_iou": 0.25390625, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1000751072, + "step": 5832 + }, + { + "epoch": 66.21529745042493, + "grad_norm": 3.403598196342013, + "learning_rate": 5e-06, + "loss": 0.0335, + "num_input_tokens_seen": 1000922844, + "step": 5833 + }, + { + "epoch": 66.21529745042493, + "loss": 0.03076525591313839, + "loss_ce": 8.74602046678774e-05, + "loss_iou": 0.56640625, + "loss_num": 0.006134033203125, + "loss_xval": 0.0306396484375, + "num_input_tokens_seen": 1000922844, + "step": 5833 + }, + { + "epoch": 66.22662889518413, + "grad_norm": 3.667321037271246, + "learning_rate": 5e-06, + "loss": 0.038, + "num_input_tokens_seen": 1001094860, + "step": 5834 + }, + { + "epoch": 66.22662889518413, + "loss": 0.04786504805088043, + "loss_ce": 2.874444362532813e-05, + "loss_iou": 0.470703125, + "loss_num": 0.00958251953125, + "loss_xval": 0.0478515625, + "num_input_tokens_seen": 1001094860, + "step": 5834 + }, + { + "epoch": 66.23796033994334, + "grad_norm": 3.0871553739144177, + "learning_rate": 5e-06, + "loss": 0.049, + "num_input_tokens_seen": 1001266512, + "step": 5835 + }, + { + "epoch": 66.23796033994334, + "loss": 0.04659239575266838, + "loss_ce": 2.2572769012185745e-05, + "loss_iou": 0.357421875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 1001266512, + "step": 5835 + }, + { + "epoch": 66.24929178470255, + "grad_norm": 2.9355038930060147, + "learning_rate": 5e-06, + "loss": 0.0479, + "num_input_tokens_seen": 1001438652, + "step": 5836 + }, + { + "epoch": 66.24929178470255, + "loss": 0.0538221038877964, + "loss_ce": 1.9614308257587254e-05, + "loss_iou": 0.25390625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 1001438652, + "step": 5836 + }, + { + "epoch": 66.26062322946176, + "grad_norm": 3.0633631698576447, + "learning_rate": 5e-06, + "loss": 0.0393, + "num_input_tokens_seen": 1001609572, + "step": 5837 + }, + { + "epoch": 66.26062322946176, + "loss": 0.03917674720287323, + "loss_ce": 4.5582084567286074e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 1001609572, + "step": 5837 + }, + { + "epoch": 66.27195467422096, + "grad_norm": 3.196188132065316, + "learning_rate": 5e-06, + "loss": 0.0552, + "num_input_tokens_seen": 1001780432, + "step": 5838 + }, + { + "epoch": 66.27195467422096, + "loss": 0.08353699743747711, + "loss_ce": 2.5648923838161863e-05, + "loss_iou": 0.3125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 1001780432, + "step": 5838 + }, + { + "epoch": 66.28328611898017, + "grad_norm": 3.1527980281563592, + "learning_rate": 5e-06, + "loss": 0.036, + "num_input_tokens_seen": 1001952348, + "step": 5839 + }, + { + "epoch": 66.28328611898017, + "loss": 0.04768617823719978, + "loss_ce": 4.8238645831588656e-05, + "loss_iou": 0.27734375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 1001952348, + "step": 5839 + }, + { + "epoch": 66.29461756373938, + "grad_norm": 3.027742268942204, + "learning_rate": 5e-06, + "loss": 0.0381, + "num_input_tokens_seen": 1002124200, + "step": 5840 + }, + { + "epoch": 66.29461756373938, + "loss": 0.028493424877524376, + "loss_ce": 2.0525785657810047e-05, + "loss_iou": 0.3515625, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 1002124200, + "step": 5840 + }, + { + "epoch": 66.30594900849859, + "grad_norm": 2.396562150462242, + "learning_rate": 5e-06, + "loss": 0.0469, + "num_input_tokens_seen": 1002295348, + "step": 5841 + }, + { + "epoch": 66.30594900849859, + "loss": 0.030366579070687294, + "loss_ce": 4.736658956971951e-05, + "loss_iou": 0.52734375, + "loss_num": 0.006072998046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1002295348, + "step": 5841 + }, + { + "epoch": 66.3172804532578, + "grad_norm": 2.464358527557364, + "learning_rate": 5e-06, + "loss": 0.0317, + "num_input_tokens_seen": 1002465672, + "step": 5842 + }, + { + "epoch": 66.3172804532578, + "loss": 0.03490632772445679, + "loss_ce": 3.999529144493863e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1002465672, + "step": 5842 + }, + { + "epoch": 66.328611898017, + "grad_norm": 3.0308350853691026, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 1002636504, + "step": 5843 + }, + { + "epoch": 66.328611898017, + "loss": 0.047676317393779755, + "loss_ce": 5.3635478252545e-05, + "loss_iou": 0.427734375, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 1002636504, + "step": 5843 + }, + { + "epoch": 66.33994334277621, + "grad_norm": 3.519476984782888, + "learning_rate": 5e-06, + "loss": 0.0411, + "num_input_tokens_seen": 1002807508, + "step": 5844 + }, + { + "epoch": 66.33994334277621, + "loss": 0.028714781627058983, + "loss_ce": 4.351743700681254e-05, + "loss_iou": 0.46875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 1002807508, + "step": 5844 + }, + { + "epoch": 66.35127478753542, + "grad_norm": 3.6832435047484644, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 1002979232, + "step": 5845 + }, + { + "epoch": 66.35127478753542, + "loss": 0.058221690356731415, + "loss_ce": 2.4667113393661566e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01165771484375, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 1002979232, + "step": 5845 + }, + { + "epoch": 66.36260623229462, + "grad_norm": 3.2213159575847463, + "learning_rate": 5e-06, + "loss": 0.0342, + "num_input_tokens_seen": 1003150108, + "step": 5846 + }, + { + "epoch": 66.36260623229462, + "loss": 0.03850887715816498, + "loss_ce": 2.6208439521724358e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1003150108, + "step": 5846 + }, + { + "epoch": 66.37393767705383, + "grad_norm": 3.0780758587924186, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 1003322116, + "step": 5847 + }, + { + "epoch": 66.37393767705383, + "loss": 0.1336781233549118, + "loss_ce": 2.6381814677733928e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0267333984375, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 1003322116, + "step": 5847 + }, + { + "epoch": 66.38526912181302, + "grad_norm": 3.0114202711595164, + "learning_rate": 5e-06, + "loss": 0.0368, + "num_input_tokens_seen": 1003493668, + "step": 5848 + }, + { + "epoch": 66.38526912181302, + "loss": 0.03140972927212715, + "loss_ce": 6.817728717578575e-05, + "loss_iou": 0.38671875, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1003493668, + "step": 5848 + }, + { + "epoch": 66.39660056657223, + "grad_norm": 3.0600816238815436, + "learning_rate": 5e-06, + "loss": 0.0533, + "num_input_tokens_seen": 1003664036, + "step": 5849 + }, + { + "epoch": 66.39660056657223, + "loss": 0.05011823773384094, + "loss_ce": 9.992759441956878e-05, + "loss_iou": 0.54296875, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 1003664036, + "step": 5849 + }, + { + "epoch": 66.40793201133144, + "grad_norm": 3.1787078847168337, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 1003835124, + "step": 5850 + }, + { + "epoch": 66.40793201133144, + "loss": 0.045853085815906525, + "loss_ce": 1.568091283843387e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 1003835124, + "step": 5850 + }, + { + "epoch": 66.41926345609065, + "grad_norm": 2.8499363990472206, + "learning_rate": 5e-06, + "loss": 0.0653, + "num_input_tokens_seen": 1004006716, + "step": 5851 + }, + { + "epoch": 66.41926345609065, + "loss": 0.02343943528831005, + "loss_ce": 3.245188054279424e-05, + "loss_iou": 0.361328125, + "loss_num": 0.004669189453125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 1004006716, + "step": 5851 + }, + { + "epoch": 66.43059490084985, + "grad_norm": 2.90798252892652, + "learning_rate": 5e-06, + "loss": 0.0365, + "num_input_tokens_seen": 1004178412, + "step": 5852 + }, + { + "epoch": 66.43059490084985, + "loss": 0.03418688476085663, + "loss_ce": 3.771146293729544e-05, + "loss_iou": 0.5078125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1004178412, + "step": 5852 + }, + { + "epoch": 66.44192634560906, + "grad_norm": 3.7013428476250754, + "learning_rate": 5e-06, + "loss": 0.0695, + "num_input_tokens_seen": 1004350224, + "step": 5853 + }, + { + "epoch": 66.44192634560906, + "loss": 0.06857158988714218, + "loss_ce": 1.3844116438122e-05, + "loss_iou": 0.466796875, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 1004350224, + "step": 5853 + }, + { + "epoch": 66.45325779036827, + "grad_norm": 3.6578450552378907, + "learning_rate": 5e-06, + "loss": 0.0634, + "num_input_tokens_seen": 1004521616, + "step": 5854 + }, + { + "epoch": 66.45325779036827, + "loss": 0.03935757279396057, + "loss_ce": 2.0414892787812278e-05, + "loss_iou": 0.56640625, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 1004521616, + "step": 5854 + }, + { + "epoch": 66.46458923512748, + "grad_norm": 3.5320625754576453, + "learning_rate": 5e-06, + "loss": 0.0422, + "num_input_tokens_seen": 1004693676, + "step": 5855 + }, + { + "epoch": 66.46458923512748, + "loss": 0.03869687393307686, + "loss_ce": 1.5844016161281615e-05, + "loss_iou": 0.37109375, + "loss_num": 0.00775146484375, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1004693676, + "step": 5855 + }, + { + "epoch": 66.47592067988668, + "grad_norm": 3.3561761166729176, + "learning_rate": 5e-06, + "loss": 0.0438, + "num_input_tokens_seen": 1004863876, + "step": 5856 + }, + { + "epoch": 66.47592067988668, + "loss": 0.034067802131175995, + "loss_ce": 2.544432572904043e-05, + "loss_iou": 0.396484375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1004863876, + "step": 5856 + }, + { + "epoch": 66.48725212464589, + "grad_norm": 3.0898957578953783, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 1005035548, + "step": 5857 + }, + { + "epoch": 66.48725212464589, + "loss": 0.0312093123793602, + "loss_ce": 2.0346453311503865e-05, + "loss_iou": 0.53125, + "loss_num": 0.0062255859375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1005035548, + "step": 5857 + }, + { + "epoch": 66.4985835694051, + "grad_norm": 5.9490567310327345, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 1005206636, + "step": 5858 + }, + { + "epoch": 66.4985835694051, + "loss": 0.07717415690422058, + "loss_ce": 0.0008725836523808539, + "loss_iou": 0.265625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 1005206636, + "step": 5858 + }, + { + "epoch": 66.5099150141643, + "grad_norm": 3.587159520297805, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1005377448, + "step": 5859 + }, + { + "epoch": 66.5099150141643, + "loss": 0.0374593585729599, + "loss_ce": 4.480699135456234e-05, + "loss_iou": 0.55078125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 1005377448, + "step": 5859 + }, + { + "epoch": 66.52124645892351, + "grad_norm": 3.221606922633493, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 1005548260, + "step": 5860 + }, + { + "epoch": 66.52124645892351, + "loss": 0.07446883618831635, + "loss_ce": 3.6456749512581155e-05, + "loss_iou": 0.369140625, + "loss_num": 0.01483154296875, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 1005548260, + "step": 5860 + }, + { + "epoch": 66.53257790368272, + "grad_norm": 2.9724617301496763, + "learning_rate": 5e-06, + "loss": 0.0445, + "num_input_tokens_seen": 1005718152, + "step": 5861 + }, + { + "epoch": 66.53257790368272, + "loss": 0.08820773661136627, + "loss_ce": 7.29785897419788e-05, + "loss_iou": 0.421875, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 1005718152, + "step": 5861 + }, + { + "epoch": 66.54390934844193, + "grad_norm": 2.2221241336411763, + "learning_rate": 5e-06, + "loss": 0.0389, + "num_input_tokens_seen": 1005889012, + "step": 5862 + }, + { + "epoch": 66.54390934844193, + "loss": 0.02373218536376953, + "loss_ce": 2.002680230361875e-05, + "loss_iou": 0.361328125, + "loss_num": 0.004730224609375, + "loss_xval": 0.023681640625, + "num_input_tokens_seen": 1005889012, + "step": 5862 + }, + { + "epoch": 66.55524079320114, + "grad_norm": 2.2178180780421948, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1006059780, + "step": 5863 + }, + { + "epoch": 66.55524079320114, + "loss": 0.029174644500017166, + "loss_ce": 0.001701194210909307, + "loss_iou": 0.376953125, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 1006059780, + "step": 5863 + }, + { + "epoch": 66.56657223796034, + "grad_norm": 2.4287566193503576, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 1006231408, + "step": 5864 + }, + { + "epoch": 66.56657223796034, + "loss": 0.05618676915764809, + "loss_ce": 1.916783003252931e-05, + "loss_iou": 0.3984375, + "loss_num": 0.01123046875, + "loss_xval": 0.05615234375, + "num_input_tokens_seen": 1006231408, + "step": 5864 + }, + { + "epoch": 66.57790368271955, + "grad_norm": 2.8344261082103213, + "learning_rate": 5e-06, + "loss": 0.0385, + "num_input_tokens_seen": 1006402524, + "step": 5865 + }, + { + "epoch": 66.57790368271955, + "loss": 0.026119187474250793, + "loss_ce": 2.665856300154701e-05, + "loss_iou": 0.443359375, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 1006402524, + "step": 5865 + }, + { + "epoch": 66.58923512747876, + "grad_norm": 3.8200780796833893, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 1006573884, + "step": 5866 + }, + { + "epoch": 66.58923512747876, + "loss": 0.03293679282069206, + "loss_ce": 2.3586859242641367e-05, + "loss_iou": 0.322265625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 1006573884, + "step": 5866 + }, + { + "epoch": 66.60056657223797, + "grad_norm": 4.397566459606763, + "learning_rate": 5e-06, + "loss": 0.0621, + "num_input_tokens_seen": 1006745588, + "step": 5867 + }, + { + "epoch": 66.60056657223797, + "loss": 0.03528425097465515, + "loss_ce": 3.645121250883676e-05, + "loss_iou": 0.5078125, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1006745588, + "step": 5867 + }, + { + "epoch": 66.61189801699717, + "grad_norm": 3.5239528076772406, + "learning_rate": 5e-06, + "loss": 0.0519, + "num_input_tokens_seen": 1006917580, + "step": 5868 + }, + { + "epoch": 66.61189801699717, + "loss": 0.03473358973860741, + "loss_ce": 3.510230453684926e-05, + "loss_iou": 0.515625, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1006917580, + "step": 5868 + }, + { + "epoch": 66.62322946175638, + "grad_norm": 3.080813349138261, + "learning_rate": 5e-06, + "loss": 0.0406, + "num_input_tokens_seen": 1007087732, + "step": 5869 + }, + { + "epoch": 66.62322946175638, + "loss": 0.034828707575798035, + "loss_ce": 2.3409364075632766e-05, + "loss_iou": 0.439453125, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1007087732, + "step": 5869 + }, + { + "epoch": 66.63456090651557, + "grad_norm": 3.083926074968894, + "learning_rate": 5e-06, + "loss": 0.0746, + "num_input_tokens_seen": 1007259412, + "step": 5870 + }, + { + "epoch": 66.63456090651557, + "loss": 0.09153534471988678, + "loss_ce": 1.3125488294463139e-05, + "loss_iou": 0.435546875, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 1007259412, + "step": 5870 + }, + { + "epoch": 66.64589235127478, + "grad_norm": 3.3350508360299043, + "learning_rate": 5e-06, + "loss": 0.0722, + "num_input_tokens_seen": 1007430116, + "step": 5871 + }, + { + "epoch": 66.64589235127478, + "loss": 0.029761623591184616, + "loss_ce": 3.750433825189248e-05, + "loss_iou": 0.482421875, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 1007430116, + "step": 5871 + }, + { + "epoch": 66.65722379603399, + "grad_norm": 3.482152951698697, + "learning_rate": 5e-06, + "loss": 0.0404, + "num_input_tokens_seen": 1007602320, + "step": 5872 + }, + { + "epoch": 66.65722379603399, + "loss": 0.04584764689207077, + "loss_ce": 4.0761849959380925e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 1007602320, + "step": 5872 + }, + { + "epoch": 66.6685552407932, + "grad_norm": 3.8432140256900147, + "learning_rate": 5e-06, + "loss": 0.0674, + "num_input_tokens_seen": 1007774248, + "step": 5873 + }, + { + "epoch": 66.6685552407932, + "loss": 0.06982676684856415, + "loss_ce": 1.7805064999265596e-05, + "loss_iou": 0.263671875, + "loss_num": 0.01397705078125, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 1007774248, + "step": 5873 + }, + { + "epoch": 66.6798866855524, + "grad_norm": 3.0378203476707664, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 1007945096, + "step": 5874 + }, + { + "epoch": 66.6798866855524, + "loss": 0.028539368882775307, + "loss_ce": 9.698656504042447e-05, + "loss_iou": 0.41015625, + "loss_num": 0.00567626953125, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 1007945096, + "step": 5874 + }, + { + "epoch": 66.69121813031161, + "grad_norm": 3.340986507941867, + "learning_rate": 5e-06, + "loss": 0.0556, + "num_input_tokens_seen": 1008116756, + "step": 5875 + }, + { + "epoch": 66.69121813031161, + "loss": 0.03618955239653587, + "loss_ce": 2.6222069209325127e-05, + "loss_iou": 0.44921875, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 1008116756, + "step": 5875 + }, + { + "epoch": 66.70254957507082, + "grad_norm": 3.7052307835458067, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 1008288816, + "step": 5876 + }, + { + "epoch": 66.70254957507082, + "loss": 0.03358769416809082, + "loss_ce": 1.8356109649175778e-05, + "loss_iou": 0.4609375, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1008288816, + "step": 5876 + }, + { + "epoch": 66.71388101983003, + "grad_norm": 3.5810112715152584, + "learning_rate": 5e-06, + "loss": 0.0593, + "num_input_tokens_seen": 1008459372, + "step": 5877 + }, + { + "epoch": 66.71388101983003, + "loss": 0.0730247050523758, + "loss_ce": 1.1404594260966405e-05, + "loss_iou": 0.49609375, + "loss_num": 0.01458740234375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 1008459372, + "step": 5877 + }, + { + "epoch": 66.72521246458923, + "grad_norm": 3.7009198375560497, + "learning_rate": 5e-06, + "loss": 0.0425, + "num_input_tokens_seen": 1008631252, + "step": 5878 + }, + { + "epoch": 66.72521246458923, + "loss": 0.054692186415195465, + "loss_ce": 1.9942915969295427e-05, + "loss_iou": 0.47265625, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 1008631252, + "step": 5878 + }, + { + "epoch": 66.73654390934844, + "grad_norm": 3.5753158759883266, + "learning_rate": 5e-06, + "loss": 0.0426, + "num_input_tokens_seen": 1008803048, + "step": 5879 + }, + { + "epoch": 66.73654390934844, + "loss": 0.06965494900941849, + "loss_ce": 2.9097438527969643e-05, + "loss_iou": 0.40234375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 1008803048, + "step": 5879 + }, + { + "epoch": 66.74787535410765, + "grad_norm": 4.073487495975577, + "learning_rate": 5e-06, + "loss": 0.0498, + "num_input_tokens_seen": 1008973744, + "step": 5880 + }, + { + "epoch": 66.74787535410765, + "loss": 0.04496660828590393, + "loss_ce": 7.524955435656011e-05, + "loss_iou": 0.296875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 1008973744, + "step": 5880 + }, + { + "epoch": 66.75920679886686, + "grad_norm": 4.021521978359537, + "learning_rate": 5e-06, + "loss": 0.0709, + "num_input_tokens_seen": 1009144380, + "step": 5881 + }, + { + "epoch": 66.75920679886686, + "loss": 0.037859391421079636, + "loss_ce": 3.285380080342293e-05, + "loss_iou": 0.447265625, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 1009144380, + "step": 5881 + }, + { + "epoch": 66.77053824362606, + "grad_norm": 3.495021723455332, + "learning_rate": 5e-06, + "loss": 0.0574, + "num_input_tokens_seen": 1009315944, + "step": 5882 + }, + { + "epoch": 66.77053824362606, + "loss": 0.03854941576719284, + "loss_ce": 5.149029311724007e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1009315944, + "step": 5882 + }, + { + "epoch": 66.78186968838527, + "grad_norm": 3.2580233992851557, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 1009485472, + "step": 5883 + }, + { + "epoch": 66.78186968838527, + "loss": 0.04334977641701698, + "loss_ce": 1.4816567272646353e-05, + "loss_iou": 0.373046875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 1009485472, + "step": 5883 + }, + { + "epoch": 66.79320113314448, + "grad_norm": 3.5475812380333536, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 1009657520, + "step": 5884 + }, + { + "epoch": 66.79320113314448, + "loss": 0.07149433344602585, + "loss_ce": 6.911292075528763e-06, + "loss_iou": 0.515625, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 1009657520, + "step": 5884 + }, + { + "epoch": 66.80453257790369, + "grad_norm": 3.9318048365168767, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 1009829400, + "step": 5885 + }, + { + "epoch": 66.80453257790369, + "loss": 0.04538831114768982, + "loss_ce": 2.3933404008857906e-05, + "loss_iou": 0.578125, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 1009829400, + "step": 5885 + }, + { + "epoch": 66.8158640226629, + "grad_norm": 3.9569411520948616, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 1010001112, + "step": 5886 + }, + { + "epoch": 66.8158640226629, + "loss": 0.04939759895205498, + "loss_ce": 9.645023965276778e-05, + "loss_iou": 0.40625, + "loss_num": 0.0098876953125, + "loss_xval": 0.04931640625, + "num_input_tokens_seen": 1010001112, + "step": 5886 + }, + { + "epoch": 66.8271954674221, + "grad_norm": 3.267906343917386, + "learning_rate": 5e-06, + "loss": 0.0506, + "num_input_tokens_seen": 1010173532, + "step": 5887 + }, + { + "epoch": 66.8271954674221, + "loss": 0.06323733925819397, + "loss_ce": 2.017248516494874e-05, + "loss_iou": 0.4765625, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 1010173532, + "step": 5887 + }, + { + "epoch": 66.83852691218131, + "grad_norm": 3.4780046126140705, + "learning_rate": 5e-06, + "loss": 0.0556, + "num_input_tokens_seen": 1010345404, + "step": 5888 + }, + { + "epoch": 66.83852691218131, + "loss": 0.07771638035774231, + "loss_ce": 3.38833051500842e-05, + "loss_iou": 0.380859375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 1010345404, + "step": 5888 + }, + { + "epoch": 66.84985835694052, + "grad_norm": 3.846023019233165, + "learning_rate": 5e-06, + "loss": 0.0382, + "num_input_tokens_seen": 1010516620, + "step": 5889 + }, + { + "epoch": 66.84985835694052, + "loss": 0.03444063663482666, + "loss_ce": 3.206765541108325e-05, + "loss_iou": 0.53515625, + "loss_num": 0.006866455078125, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 1010516620, + "step": 5889 + }, + { + "epoch": 66.86118980169972, + "grad_norm": 3.638996902667991, + "learning_rate": 5e-06, + "loss": 0.0581, + "num_input_tokens_seen": 1010688608, + "step": 5890 + }, + { + "epoch": 66.86118980169972, + "loss": 0.04404052719473839, + "loss_ce": 1.8919668946182355e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 1010688608, + "step": 5890 + }, + { + "epoch": 66.87252124645893, + "grad_norm": 3.6262065352479524, + "learning_rate": 5e-06, + "loss": 0.0666, + "num_input_tokens_seen": 1010860780, + "step": 5891 + }, + { + "epoch": 66.87252124645893, + "loss": 0.03483825922012329, + "loss_ce": 1.7704283891362138e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1010860780, + "step": 5891 + }, + { + "epoch": 66.88385269121812, + "grad_norm": 3.7192873429292064, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 1011032796, + "step": 5892 + }, + { + "epoch": 66.88385269121812, + "loss": 0.040547676384449005, + "loss_ce": 2.033288728853222e-05, + "loss_iou": 0.37109375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1011032796, + "step": 5892 + }, + { + "epoch": 66.89518413597733, + "grad_norm": 3.230528540400533, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 1011203144, + "step": 5893 + }, + { + "epoch": 66.89518413597733, + "loss": 0.04866734892129898, + "loss_ce": 0.00826207548379898, + "loss_iou": 0.435546875, + "loss_num": 0.008056640625, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1011203144, + "step": 5893 + }, + { + "epoch": 66.90651558073654, + "grad_norm": 3.128661556011401, + "learning_rate": 5e-06, + "loss": 0.0452, + "num_input_tokens_seen": 1011375216, + "step": 5894 + }, + { + "epoch": 66.90651558073654, + "loss": 0.03125831484794617, + "loss_ce": 5.40924520464614e-05, + "loss_iou": 0.26171875, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1011375216, + "step": 5894 + }, + { + "epoch": 66.91784702549575, + "grad_norm": 3.4362861421303217, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 1011547284, + "step": 5895 + }, + { + "epoch": 66.91784702549575, + "loss": 0.037881843745708466, + "loss_ce": 2.478659553162288e-05, + "loss_iou": 0.48046875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 1011547284, + "step": 5895 + }, + { + "epoch": 66.92917847025495, + "grad_norm": 4.216842845805076, + "learning_rate": 5e-06, + "loss": 0.0589, + "num_input_tokens_seen": 1011719284, + "step": 5896 + }, + { + "epoch": 66.92917847025495, + "loss": 0.029956012964248657, + "loss_ce": 1.8267914128955454e-05, + "loss_iou": 0.51953125, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 1011719284, + "step": 5896 + }, + { + "epoch": 66.94050991501416, + "grad_norm": 3.3632649210293906, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 1011890616, + "step": 5897 + }, + { + "epoch": 66.94050991501416, + "loss": 0.06394051015377045, + "loss_ce": 2.1438916519400664e-05, + "loss_iou": 0.4765625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 1011890616, + "step": 5897 + }, + { + "epoch": 66.95184135977337, + "grad_norm": 3.5603245536371477, + "learning_rate": 5e-06, + "loss": 0.067, + "num_input_tokens_seen": 1012062656, + "step": 5898 + }, + { + "epoch": 66.95184135977337, + "loss": 0.0314258374273777, + "loss_ce": 2.325181230844464e-05, + "loss_iou": 0.2734375, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 1012062656, + "step": 5898 + }, + { + "epoch": 66.96317280453258, + "grad_norm": 3.295416702105483, + "learning_rate": 5e-06, + "loss": 0.0406, + "num_input_tokens_seen": 1012233696, + "step": 5899 + }, + { + "epoch": 66.96317280453258, + "loss": 0.06347407400608063, + "loss_ce": 0.0002569097268860787, + "loss_iou": 0.51953125, + "loss_num": 0.01263427734375, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 1012233696, + "step": 5899 + }, + { + "epoch": 66.97450424929178, + "grad_norm": 2.776204452955552, + "learning_rate": 5e-06, + "loss": 0.0447, + "num_input_tokens_seen": 1012405572, + "step": 5900 + }, + { + "epoch": 66.97450424929178, + "loss": 0.032131895422935486, + "loss_ce": 4.266363612259738e-05, + "loss_iou": 0.427734375, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1012405572, + "step": 5900 + }, + { + "epoch": 66.98583569405099, + "grad_norm": 2.4209620667450964, + "learning_rate": 5e-06, + "loss": 0.0396, + "num_input_tokens_seen": 1012577560, + "step": 5901 + }, + { + "epoch": 66.98583569405099, + "loss": 0.03457998111844063, + "loss_ce": 2.6453566533746198e-05, + "loss_iou": 0.392578125, + "loss_num": 0.00689697265625, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1012577560, + "step": 5901 + }, + { + "epoch": 66.9971671388102, + "grad_norm": 2.9366039666022847, + "learning_rate": 5e-06, + "loss": 0.0755, + "num_input_tokens_seen": 1012748648, + "step": 5902 + }, + { + "epoch": 66.9971671388102, + "loss": 0.03843795508146286, + "loss_ce": 3.158029721817002e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1012748648, + "step": 5902 + }, + { + "epoch": 66.9971671388102, + "loss": 0.032693568617105484, + "loss_ce": 9.243114618584514e-06, + "loss_iou": 0.41015625, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 1012791668, + "step": 5902 + }, + { + "epoch": 67.0084985835694, + "grad_norm": 3.410598001843645, + "learning_rate": 5e-06, + "loss": 0.0517, + "num_input_tokens_seen": 1012921052, + "step": 5903 + }, + { + "epoch": 67.0084985835694, + "loss": 0.0994865819811821, + "loss_ce": 1.453044023946859e-05, + "loss_iou": 0.51171875, + "loss_num": 0.0198974609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 1012921052, + "step": 5903 + }, + { + "epoch": 67.01983002832861, + "grad_norm": 3.139367601624398, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1013091632, + "step": 5904 + }, + { + "epoch": 67.01983002832861, + "loss": 0.07225705683231354, + "loss_ce": 2.194325134041719e-05, + "loss_iou": 0.4765625, + "loss_num": 0.01446533203125, + "loss_xval": 0.072265625, + "num_input_tokens_seen": 1013091632, + "step": 5904 + }, + { + "epoch": 67.03116147308782, + "grad_norm": 3.170914475670413, + "learning_rate": 5e-06, + "loss": 0.0306, + "num_input_tokens_seen": 1013263640, + "step": 5905 + }, + { + "epoch": 67.03116147308782, + "loss": 0.028303295373916626, + "loss_ce": 4.401723708724603e-05, + "loss_iou": 0.0, + "loss_num": 0.005645751953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 1013263640, + "step": 5905 + }, + { + "epoch": 67.04249291784703, + "grad_norm": 3.4233038333469907, + "learning_rate": 5e-06, + "loss": 0.0479, + "num_input_tokens_seen": 1013435128, + "step": 5906 + }, + { + "epoch": 67.04249291784703, + "loss": 0.06967861205339432, + "loss_ce": 6.983099865465192e-06, + "loss_iou": 0.40234375, + "loss_num": 0.013916015625, + "loss_xval": 0.06982421875, + "num_input_tokens_seen": 1013435128, + "step": 5906 + }, + { + "epoch": 67.05382436260624, + "grad_norm": 2.9230778941112296, + "learning_rate": 5e-06, + "loss": 0.0651, + "num_input_tokens_seen": 1013606892, + "step": 5907 + }, + { + "epoch": 67.05382436260624, + "loss": 0.03308812528848648, + "loss_ce": 3.7587400584015995e-05, + "loss_iou": 0.41015625, + "loss_num": 0.006591796875, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 1013606892, + "step": 5907 + }, + { + "epoch": 67.06515580736544, + "grad_norm": 3.149083378186109, + "learning_rate": 5e-06, + "loss": 0.059, + "num_input_tokens_seen": 1013777608, + "step": 5908 + }, + { + "epoch": 67.06515580736544, + "loss": 0.03846941143274307, + "loss_ce": 1.7260059394175187e-05, + "loss_iou": 0.115234375, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1013777608, + "step": 5908 + }, + { + "epoch": 67.07648725212465, + "grad_norm": 3.822322036963989, + "learning_rate": 5e-06, + "loss": 0.0372, + "num_input_tokens_seen": 1013949232, + "step": 5909 + }, + { + "epoch": 67.07648725212465, + "loss": 0.0286090187728405, + "loss_ce": 5.98249607719481e-05, + "loss_iou": 0.1201171875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 1013949232, + "step": 5909 + }, + { + "epoch": 67.08781869688386, + "grad_norm": 3.059393793797116, + "learning_rate": 5e-06, + "loss": 0.054, + "num_input_tokens_seen": 1014120344, + "step": 5910 + }, + { + "epoch": 67.08781869688386, + "loss": 0.07710748165845871, + "loss_ce": 0.00017267190560232848, + "loss_iou": 0.41015625, + "loss_num": 0.015380859375, + "loss_xval": 0.0771484375, + "num_input_tokens_seen": 1014120344, + "step": 5910 + }, + { + "epoch": 67.09915014164307, + "grad_norm": 3.357287027752127, + "learning_rate": 5e-06, + "loss": 0.0493, + "num_input_tokens_seen": 1014292544, + "step": 5911 + }, + { + "epoch": 67.09915014164307, + "loss": 0.026154689490795135, + "loss_ce": 3.164316149195656e-05, + "loss_iou": 0.546875, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 1014292544, + "step": 5911 + }, + { + "epoch": 67.11048158640227, + "grad_norm": 3.8670329388988627, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 1014464684, + "step": 5912 + }, + { + "epoch": 67.11048158640227, + "loss": 0.026670530438423157, + "loss_ce": 2.8684116841759533e-05, + "loss_iou": 0.439453125, + "loss_num": 0.00531005859375, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 1014464684, + "step": 5912 + }, + { + "epoch": 67.12181303116148, + "grad_norm": 4.266413390132541, + "learning_rate": 5e-06, + "loss": 0.0575, + "num_input_tokens_seen": 1014635884, + "step": 5913 + }, + { + "epoch": 67.12181303116148, + "loss": 0.04703632742166519, + "loss_ce": 2.399746881565079e-05, + "loss_iou": 0.27734375, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 1014635884, + "step": 5913 + }, + { + "epoch": 67.13314447592067, + "grad_norm": 3.56594202447463, + "learning_rate": 5e-06, + "loss": 0.0523, + "num_input_tokens_seen": 1014806688, + "step": 5914 + }, + { + "epoch": 67.13314447592067, + "loss": 0.034070972353219986, + "loss_ce": 4.387211083667353e-05, + "loss_iou": 0.4375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1014806688, + "step": 5914 + }, + { + "epoch": 67.14447592067988, + "grad_norm": 3.8889687679784086, + "learning_rate": 5e-06, + "loss": 0.0407, + "num_input_tokens_seen": 1014977536, + "step": 5915 + }, + { + "epoch": 67.14447592067988, + "loss": 0.0505722351372242, + "loss_ce": 1.9869721654686145e-05, + "loss_iou": 0.470703125, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 1014977536, + "step": 5915 + }, + { + "epoch": 67.15580736543909, + "grad_norm": 4.063078445855036, + "learning_rate": 5e-06, + "loss": 0.06, + "num_input_tokens_seen": 1015148744, + "step": 5916 + }, + { + "epoch": 67.15580736543909, + "loss": 0.040352530777454376, + "loss_ce": 2.3553355276817456e-05, + "loss_iou": 0.4765625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 1015148744, + "step": 5916 + }, + { + "epoch": 67.1671388101983, + "grad_norm": 3.6187104370536463, + "learning_rate": 5e-06, + "loss": 0.0541, + "num_input_tokens_seen": 1015321004, + "step": 5917 + }, + { + "epoch": 67.1671388101983, + "loss": 0.03494476526975632, + "loss_ce": 1.7398597265128046e-05, + "loss_iou": 0.263671875, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1015321004, + "step": 5917 + }, + { + "epoch": 67.1784702549575, + "grad_norm": 3.874309356516632, + "learning_rate": 5e-06, + "loss": 0.0546, + "num_input_tokens_seen": 1015492936, + "step": 5918 + }, + { + "epoch": 67.1784702549575, + "loss": 0.05753310024738312, + "loss_ce": 2.2725695089320652e-05, + "loss_iou": 0.4375, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 1015492936, + "step": 5918 + }, + { + "epoch": 67.18980169971671, + "grad_norm": 3.495275834336859, + "learning_rate": 5e-06, + "loss": 0.0563, + "num_input_tokens_seen": 1015664848, + "step": 5919 + }, + { + "epoch": 67.18980169971671, + "loss": 0.03310427442193031, + "loss_ce": 2.322067302884534e-05, + "loss_iou": 0.55078125, + "loss_num": 0.006591796875, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1015664848, + "step": 5919 + }, + { + "epoch": 67.20113314447592, + "grad_norm": 5.61487747988574, + "learning_rate": 5e-06, + "loss": 0.0517, + "num_input_tokens_seen": 1015835460, + "step": 5920 + }, + { + "epoch": 67.20113314447592, + "loss": 0.057879336178302765, + "loss_ce": 1.8009741324931383e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 1015835460, + "step": 5920 + }, + { + "epoch": 67.21246458923513, + "grad_norm": 3.28088184153324, + "learning_rate": 5e-06, + "loss": 0.0362, + "num_input_tokens_seen": 1016006280, + "step": 5921 + }, + { + "epoch": 67.21246458923513, + "loss": 0.03148248791694641, + "loss_ce": 1.8867329345084727e-05, + "loss_iou": 0.1640625, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 1016006280, + "step": 5921 + }, + { + "epoch": 67.22379603399433, + "grad_norm": 3.1613892748712806, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 1016176752, + "step": 5922 + }, + { + "epoch": 67.22379603399433, + "loss": 0.033312857151031494, + "loss_ce": 1.818059172364883e-05, + "loss_iou": 0.40234375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1016176752, + "step": 5922 + }, + { + "epoch": 67.23512747875354, + "grad_norm": 3.2289602236315087, + "learning_rate": 5e-06, + "loss": 0.0506, + "num_input_tokens_seen": 1016348216, + "step": 5923 + }, + { + "epoch": 67.23512747875354, + "loss": 0.0476088672876358, + "loss_ce": 1.6705838788766414e-05, + "loss_iou": 0.404296875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 1016348216, + "step": 5923 + }, + { + "epoch": 67.24645892351275, + "grad_norm": 4.005952449048984, + "learning_rate": 5e-06, + "loss": 0.0504, + "num_input_tokens_seen": 1016520284, + "step": 5924 + }, + { + "epoch": 67.24645892351275, + "loss": 0.03556230291724205, + "loss_ce": 9.32509919948643e-06, + "loss_iou": 0.369140625, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 1016520284, + "step": 5924 + }, + { + "epoch": 67.25779036827196, + "grad_norm": 3.807551435154973, + "learning_rate": 5e-06, + "loss": 0.047, + "num_input_tokens_seen": 1016691804, + "step": 5925 + }, + { + "epoch": 67.25779036827196, + "loss": 0.027643997222185135, + "loss_ce": 2.5588775315554813e-05, + "loss_iou": 0.181640625, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 1016691804, + "step": 5925 + }, + { + "epoch": 67.26912181303116, + "grad_norm": 2.1898374351273358, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 1016861324, + "step": 5926 + }, + { + "epoch": 67.26912181303116, + "loss": 0.027906589210033417, + "loss_ce": 4.4037507905159146e-05, + "loss_iou": 0.349609375, + "loss_num": 0.005584716796875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 1016861324, + "step": 5926 + }, + { + "epoch": 67.28045325779037, + "grad_norm": 2.6943783282324074, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 1017033324, + "step": 5927 + }, + { + "epoch": 67.28045325779037, + "loss": 0.03989099711179733, + "loss_ce": 5.030314059695229e-05, + "loss_iou": 0.2412109375, + "loss_num": 0.00799560546875, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 1017033324, + "step": 5927 + }, + { + "epoch": 67.29178470254958, + "grad_norm": 3.8670128059336544, + "learning_rate": 5e-06, + "loss": 0.0683, + "num_input_tokens_seen": 1017204996, + "step": 5928 + }, + { + "epoch": 67.29178470254958, + "loss": 0.0622432604432106, + "loss_ce": 1.791580689314287e-05, + "loss_iou": 0.443359375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 1017204996, + "step": 5928 + }, + { + "epoch": 67.30311614730878, + "grad_norm": 3.4525496258784427, + "learning_rate": 5e-06, + "loss": 0.0438, + "num_input_tokens_seen": 1017375868, + "step": 5929 + }, + { + "epoch": 67.30311614730878, + "loss": 0.028353175148367882, + "loss_ce": 3.286193896201439e-05, + "loss_iou": 0.330078125, + "loss_num": 0.00567626953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 1017375868, + "step": 5929 + }, + { + "epoch": 67.31444759206799, + "grad_norm": 3.3125732237198084, + "learning_rate": 5e-06, + "loss": 0.052, + "num_input_tokens_seen": 1017547372, + "step": 5930 + }, + { + "epoch": 67.31444759206799, + "loss": 0.03600083291530609, + "loss_ce": 2.0605415556929074e-05, + "loss_iou": 0.41796875, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1017547372, + "step": 5930 + }, + { + "epoch": 67.3257790368272, + "grad_norm": 2.6714252256092736, + "learning_rate": 5e-06, + "loss": 0.0346, + "num_input_tokens_seen": 1017719404, + "step": 5931 + }, + { + "epoch": 67.3257790368272, + "loss": 0.04466455802321434, + "loss_ce": 4.785820055985823e-05, + "loss_iou": 0.357421875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 1017719404, + "step": 5931 + }, + { + "epoch": 67.33711048158641, + "grad_norm": 2.62488654195433, + "learning_rate": 5e-06, + "loss": 0.0437, + "num_input_tokens_seen": 1017889532, + "step": 5932 + }, + { + "epoch": 67.33711048158641, + "loss": 0.07380382716655731, + "loss_ce": 1.2323328519414645e-05, + "loss_iou": 0.1923828125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 1017889532, + "step": 5932 + }, + { + "epoch": 67.34844192634561, + "grad_norm": 2.9472113088538143, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 1018061060, + "step": 5933 + }, + { + "epoch": 67.34844192634561, + "loss": 0.02997974306344986, + "loss_ce": 2.6739504392025992e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 1018061060, + "step": 5933 + }, + { + "epoch": 67.35977337110482, + "grad_norm": 3.2866108667328433, + "learning_rate": 5e-06, + "loss": 0.0384, + "num_input_tokens_seen": 1018233152, + "step": 5934 + }, + { + "epoch": 67.35977337110482, + "loss": 0.04820363596081734, + "loss_ce": 1.6378886357415468e-05, + "loss_iou": 0.291015625, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 1018233152, + "step": 5934 + }, + { + "epoch": 67.37110481586403, + "grad_norm": 3.776423617043664, + "learning_rate": 5e-06, + "loss": 0.0593, + "num_input_tokens_seen": 1018404816, + "step": 5935 + }, + { + "epoch": 67.37110481586403, + "loss": 0.032211679965257645, + "loss_ce": 1.5637106116628274e-05, + "loss_iou": 0.49609375, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1018404816, + "step": 5935 + }, + { + "epoch": 67.38243626062322, + "grad_norm": 3.1408827241364103, + "learning_rate": 5e-06, + "loss": 0.0365, + "num_input_tokens_seen": 1018575060, + "step": 5936 + }, + { + "epoch": 67.38243626062322, + "loss": 0.029682811349630356, + "loss_ce": 3.498440491966903e-05, + "loss_iou": 0.05419921875, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 1018575060, + "step": 5936 + }, + { + "epoch": 67.39376770538243, + "grad_norm": 3.1382655961215646, + "learning_rate": 5e-06, + "loss": 0.0736, + "num_input_tokens_seen": 1018746740, + "step": 5937 + }, + { + "epoch": 67.39376770538243, + "loss": 0.09658536314964294, + "loss_ce": 1.248701119038742e-05, + "loss_iou": 0.4921875, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 1018746740, + "step": 5937 + }, + { + "epoch": 67.40509915014164, + "grad_norm": 3.2910981273672024, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 1018918468, + "step": 5938 + }, + { + "epoch": 67.40509915014164, + "loss": 0.023494010791182518, + "loss_ce": 5.651146057061851e-05, + "loss_iou": 0.205078125, + "loss_num": 0.00469970703125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 1018918468, + "step": 5938 + }, + { + "epoch": 67.41643059490085, + "grad_norm": 3.689264062316093, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 1019089508, + "step": 5939 + }, + { + "epoch": 67.41643059490085, + "loss": 0.03473558649420738, + "loss_ce": 5.235806020209566e-05, + "loss_iou": 0.546875, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1019089508, + "step": 5939 + }, + { + "epoch": 67.42776203966005, + "grad_norm": 3.3120753030258636, + "learning_rate": 5e-06, + "loss": 0.0573, + "num_input_tokens_seen": 1019261244, + "step": 5940 + }, + { + "epoch": 67.42776203966005, + "loss": 0.034588318318128586, + "loss_ce": 2.7160000172443688e-05, + "loss_iou": 0.2421875, + "loss_num": 0.00689697265625, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1019261244, + "step": 5940 + }, + { + "epoch": 67.43909348441926, + "grad_norm": 2.014337567173122, + "learning_rate": 5e-06, + "loss": 0.0581, + "num_input_tokens_seen": 1019431908, + "step": 5941 + }, + { + "epoch": 67.43909348441926, + "loss": 0.08788033574819565, + "loss_ce": 2.0221694285282865e-05, + "loss_iou": 0.115234375, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 1019431908, + "step": 5941 + }, + { + "epoch": 67.45042492917847, + "grad_norm": 1.8326181119829843, + "learning_rate": 5e-06, + "loss": 0.0351, + "num_input_tokens_seen": 1019602880, + "step": 5942 + }, + { + "epoch": 67.45042492917847, + "loss": 0.05754459649324417, + "loss_ce": 3.421810470172204e-05, + "loss_iou": 0.2734375, + "loss_num": 0.011474609375, + "loss_xval": 0.0576171875, + "num_input_tokens_seen": 1019602880, + "step": 5942 + }, + { + "epoch": 67.46175637393767, + "grad_norm": 2.4943084113623866, + "learning_rate": 5e-06, + "loss": 0.0356, + "num_input_tokens_seen": 1019773856, + "step": 5943 + }, + { + "epoch": 67.46175637393767, + "loss": 0.03813718259334564, + "loss_ce": 2.835897248587571e-05, + "loss_iou": 0.26171875, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 1019773856, + "step": 5943 + }, + { + "epoch": 67.47308781869688, + "grad_norm": 3.3729050981254973, + "learning_rate": 5e-06, + "loss": 0.0401, + "num_input_tokens_seen": 1019945988, + "step": 5944 + }, + { + "epoch": 67.47308781869688, + "loss": 0.031304746866226196, + "loss_ce": 8.967769645096269e-06, + "loss_iou": 0.39453125, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1019945988, + "step": 5944 + }, + { + "epoch": 67.48441926345609, + "grad_norm": 3.6145136190826728, + "learning_rate": 5e-06, + "loss": 0.0452, + "num_input_tokens_seen": 1020117512, + "step": 5945 + }, + { + "epoch": 67.48441926345609, + "loss": 0.06070343777537346, + "loss_ce": 1.923059426189866e-05, + "loss_iou": 0.431640625, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 1020117512, + "step": 5945 + }, + { + "epoch": 67.4957507082153, + "grad_norm": 3.7382029337158165, + "learning_rate": 5e-06, + "loss": 0.0645, + "num_input_tokens_seen": 1020289056, + "step": 5946 + }, + { + "epoch": 67.4957507082153, + "loss": 0.08486691117286682, + "loss_ce": 1.278760282730218e-05, + "loss_iou": 0.2060546875, + "loss_num": 0.0169677734375, + "loss_xval": 0.0849609375, + "num_input_tokens_seen": 1020289056, + "step": 5946 + }, + { + "epoch": 67.5070821529745, + "grad_norm": 3.9162870187893954, + "learning_rate": 5e-06, + "loss": 0.0621, + "num_input_tokens_seen": 1020460748, + "step": 5947 + }, + { + "epoch": 67.5070821529745, + "loss": 0.03843928873538971, + "loss_ce": 3.2913085306063294e-05, + "loss_iou": 0.0546875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1020460748, + "step": 5947 + }, + { + "epoch": 67.51841359773371, + "grad_norm": 3.178981981993921, + "learning_rate": 5e-06, + "loss": 0.0313, + "num_input_tokens_seen": 1020629756, + "step": 5948 + }, + { + "epoch": 67.51841359773371, + "loss": 0.03202046826481819, + "loss_ce": 2.2787029593018815e-05, + "loss_iou": 0.41015625, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1020629756, + "step": 5948 + }, + { + "epoch": 67.52974504249292, + "grad_norm": 4.49468163895135, + "learning_rate": 5e-06, + "loss": 0.0354, + "num_input_tokens_seen": 1020801676, + "step": 5949 + }, + { + "epoch": 67.52974504249292, + "loss": 0.03027253784239292, + "loss_ce": 1.435909689462278e-05, + "loss_iou": 0.400390625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1020801676, + "step": 5949 + }, + { + "epoch": 67.54107648725213, + "grad_norm": 2.870764415016361, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1020973340, + "step": 5950 + }, + { + "epoch": 67.54107648725213, + "loss": 0.04656238853931427, + "loss_ce": 2.308426337549463e-05, + "loss_iou": 0.392578125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 1020973340, + "step": 5950 + }, + { + "epoch": 67.55240793201133, + "grad_norm": 2.152731324318681, + "learning_rate": 5e-06, + "loss": 0.0544, + "num_input_tokens_seen": 1021144032, + "step": 5951 + }, + { + "epoch": 67.55240793201133, + "loss": 0.09358450025320053, + "loss_ce": 1.760596023814287e-05, + "loss_iou": 0.169921875, + "loss_num": 0.0186767578125, + "loss_xval": 0.09375, + "num_input_tokens_seen": 1021144032, + "step": 5951 + }, + { + "epoch": 67.56373937677054, + "grad_norm": 2.614658031229655, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 1021315680, + "step": 5952 + }, + { + "epoch": 67.56373937677054, + "loss": 0.029144007712602615, + "loss_ce": 1.4978529179643374e-05, + "loss_iou": 0.482421875, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 1021315680, + "step": 5952 + }, + { + "epoch": 67.57507082152975, + "grad_norm": 2.455817125054252, + "learning_rate": 5e-06, + "loss": 0.0781, + "num_input_tokens_seen": 1021487188, + "step": 5953 + }, + { + "epoch": 67.57507082152975, + "loss": 0.04054902493953705, + "loss_ce": 6.425943865906447e-06, + "loss_iou": 0.3984375, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1021487188, + "step": 5953 + }, + { + "epoch": 67.58640226628896, + "grad_norm": 2.7790801756106007, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 1021657144, + "step": 5954 + }, + { + "epoch": 67.58640226628896, + "loss": 0.05249221622943878, + "loss_ce": 2.4869255867088214e-05, + "loss_iou": 0.431640625, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 1021657144, + "step": 5954 + }, + { + "epoch": 67.59773371104816, + "grad_norm": 3.990036489675684, + "learning_rate": 5e-06, + "loss": 0.0739, + "num_input_tokens_seen": 1021828840, + "step": 5955 + }, + { + "epoch": 67.59773371104816, + "loss": 0.11182501912117004, + "loss_ce": 0.0006342322449199855, + "loss_iou": 0.4765625, + "loss_num": 0.022216796875, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 1021828840, + "step": 5955 + }, + { + "epoch": 67.60906515580737, + "grad_norm": 3.3139858742748847, + "learning_rate": 5e-06, + "loss": 0.0362, + "num_input_tokens_seen": 1022000628, + "step": 5956 + }, + { + "epoch": 67.60906515580737, + "loss": 0.024956900626420975, + "loss_ce": 2.4039345589699224e-05, + "loss_iou": 0.404296875, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 1022000628, + "step": 5956 + }, + { + "epoch": 67.62039660056658, + "grad_norm": 3.325913767312133, + "learning_rate": 5e-06, + "loss": 0.0474, + "num_input_tokens_seen": 1022171756, + "step": 5957 + }, + { + "epoch": 67.62039660056658, + "loss": 0.03688191995024681, + "loss_ce": 1.6683925423421897e-05, + "loss_iou": 0.53515625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1022171756, + "step": 5957 + }, + { + "epoch": 67.63172804532577, + "grad_norm": 3.9717894907036566, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 1022341876, + "step": 5958 + }, + { + "epoch": 67.63172804532577, + "loss": 0.0380752868950367, + "loss_ce": 1.9868448362103663e-05, + "loss_iou": 0.34375, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 1022341876, + "step": 5958 + }, + { + "epoch": 67.64305949008498, + "grad_norm": 3.0809299599576145, + "learning_rate": 5e-06, + "loss": 0.0405, + "num_input_tokens_seen": 1022511024, + "step": 5959 + }, + { + "epoch": 67.64305949008498, + "loss": 0.03251413628458977, + "loss_ce": 1.2914847502543125e-05, + "loss_iou": 0.47265625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 1022511024, + "step": 5959 + }, + { + "epoch": 67.65439093484419, + "grad_norm": 2.9675342226452934, + "learning_rate": 5e-06, + "loss": 0.037, + "num_input_tokens_seen": 1022683032, + "step": 5960 + }, + { + "epoch": 67.65439093484419, + "loss": 0.04448346048593521, + "loss_ce": 0.011158263310790062, + "loss_iou": 0.34375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1022683032, + "step": 5960 + }, + { + "epoch": 67.6657223796034, + "grad_norm": 3.3878513421076475, + "learning_rate": 5e-06, + "loss": 0.0365, + "num_input_tokens_seen": 1022854104, + "step": 5961 + }, + { + "epoch": 67.6657223796034, + "loss": 0.04200320318341255, + "loss_ce": 4.1534469346515834e-05, + "loss_iou": 0.3203125, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1022854104, + "step": 5961 + }, + { + "epoch": 67.6770538243626, + "grad_norm": 2.931597839566919, + "learning_rate": 5e-06, + "loss": 0.062, + "num_input_tokens_seen": 1023026068, + "step": 5962 + }, + { + "epoch": 67.6770538243626, + "loss": 0.09601771831512451, + "loss_ce": 9.412933650310151e-06, + "loss_iou": 0.427734375, + "loss_num": 0.0191650390625, + "loss_xval": 0.09619140625, + "num_input_tokens_seen": 1023026068, + "step": 5962 + }, + { + "epoch": 67.68838526912181, + "grad_norm": 2.9075194371976374, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 1023197708, + "step": 5963 + }, + { + "epoch": 67.68838526912181, + "loss": 0.027083203196525574, + "loss_ce": 2.173969551222399e-05, + "loss_iou": 0.37890625, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1023197708, + "step": 5963 + }, + { + "epoch": 67.69971671388102, + "grad_norm": 3.06052980489186, + "learning_rate": 5e-06, + "loss": 0.0483, + "num_input_tokens_seen": 1023369932, + "step": 5964 + }, + { + "epoch": 67.69971671388102, + "loss": 0.047607045620679855, + "loss_ce": 3.0140927265165374e-05, + "loss_iou": 0.23828125, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 1023369932, + "step": 5964 + }, + { + "epoch": 67.71104815864022, + "grad_norm": 2.233890716038348, + "learning_rate": 5e-06, + "loss": 0.0283, + "num_input_tokens_seen": 1023541852, + "step": 5965 + }, + { + "epoch": 67.71104815864022, + "loss": 0.020587986335158348, + "loss_ce": 2.676712756510824e-05, + "loss_iou": 0.359375, + "loss_num": 0.004119873046875, + "loss_xval": 0.0205078125, + "num_input_tokens_seen": 1023541852, + "step": 5965 + }, + { + "epoch": 67.72237960339943, + "grad_norm": 2.5856713842759476, + "learning_rate": 5e-06, + "loss": 0.0498, + "num_input_tokens_seen": 1023712040, + "step": 5966 + }, + { + "epoch": 67.72237960339943, + "loss": 0.04573599249124527, + "loss_ce": 2.066115484922193e-05, + "loss_iou": 0.376953125, + "loss_num": 0.0091552734375, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 1023712040, + "step": 5966 + }, + { + "epoch": 67.73371104815864, + "grad_norm": 2.9594529508452845, + "learning_rate": 5e-06, + "loss": 0.0547, + "num_input_tokens_seen": 1023883848, + "step": 5967 + }, + { + "epoch": 67.73371104815864, + "loss": 0.038973450660705566, + "loss_ce": 2.5389812435605563e-05, + "loss_iou": 0.435546875, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 1023883848, + "step": 5967 + }, + { + "epoch": 67.74504249291785, + "grad_norm": 4.5825447113779205, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 1024055500, + "step": 5968 + }, + { + "epoch": 67.74504249291785, + "loss": 0.028818221762776375, + "loss_ce": 3.251616726629436e-05, + "loss_iou": 0.189453125, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 1024055500, + "step": 5968 + }, + { + "epoch": 67.75637393767705, + "grad_norm": 3.553929893449383, + "learning_rate": 5e-06, + "loss": 0.0833, + "num_input_tokens_seen": 1024226744, + "step": 5969 + }, + { + "epoch": 67.75637393767705, + "loss": 0.08989392220973969, + "loss_ce": 3.491756797302514e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08984375, + "num_input_tokens_seen": 1024226744, + "step": 5969 + }, + { + "epoch": 67.76770538243626, + "grad_norm": 3.081348734401079, + "learning_rate": 5e-06, + "loss": 0.0291, + "num_input_tokens_seen": 1024398684, + "step": 5970 + }, + { + "epoch": 67.76770538243626, + "loss": 0.02868819236755371, + "loss_ce": 3.218708297936246e-05, + "loss_iou": 0.296875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 1024398684, + "step": 5970 + }, + { + "epoch": 67.77903682719547, + "grad_norm": 3.6556474210840806, + "learning_rate": 5e-06, + "loss": 0.0303, + "num_input_tokens_seen": 1024570544, + "step": 5971 + }, + { + "epoch": 67.77903682719547, + "loss": 0.03137829899787903, + "loss_ce": 2.148860221495852e-05, + "loss_iou": 0.384765625, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1024570544, + "step": 5971 + }, + { + "epoch": 67.79036827195468, + "grad_norm": 4.246381103833915, + "learning_rate": 5e-06, + "loss": 0.0414, + "num_input_tokens_seen": 1024742620, + "step": 5972 + }, + { + "epoch": 67.79036827195468, + "loss": 0.04452531412243843, + "loss_ce": 3.068404839723371e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 1024742620, + "step": 5972 + }, + { + "epoch": 67.80169971671388, + "grad_norm": 3.622694740462554, + "learning_rate": 5e-06, + "loss": 0.0532, + "num_input_tokens_seen": 1024914520, + "step": 5973 + }, + { + "epoch": 67.80169971671388, + "loss": 0.03686343505978584, + "loss_ce": 2.8719499823637307e-05, + "loss_iou": 0.388671875, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1024914520, + "step": 5973 + }, + { + "epoch": 67.81303116147309, + "grad_norm": 4.308399468186588, + "learning_rate": 5e-06, + "loss": 0.07, + "num_input_tokens_seen": 1025082984, + "step": 5974 + }, + { + "epoch": 67.81303116147309, + "loss": 0.09234784543514252, + "loss_ce": 0.0019852956756949425, + "loss_iou": 0.326171875, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 1025082984, + "step": 5974 + }, + { + "epoch": 67.8243626062323, + "grad_norm": 3.6874468633881823, + "learning_rate": 5e-06, + "loss": 0.0398, + "num_input_tokens_seen": 1025254168, + "step": 5975 + }, + { + "epoch": 67.8243626062323, + "loss": 0.04916127026081085, + "loss_ce": 0.00010426143853692338, + "loss_iou": 0.404296875, + "loss_num": 0.00982666015625, + "loss_xval": 0.049072265625, + "num_input_tokens_seen": 1025254168, + "step": 5975 + }, + { + "epoch": 67.8356940509915, + "grad_norm": 3.1187827386990836, + "learning_rate": 5e-06, + "loss": 0.0337, + "num_input_tokens_seen": 1025425440, + "step": 5976 + }, + { + "epoch": 67.8356940509915, + "loss": 0.0417618528008461, + "loss_ce": 2.9063037800369784e-05, + "loss_iou": 0.44921875, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 1025425440, + "step": 5976 + }, + { + "epoch": 67.84702549575071, + "grad_norm": 3.2357237380576946, + "learning_rate": 5e-06, + "loss": 0.0536, + "num_input_tokens_seen": 1025597704, + "step": 5977 + }, + { + "epoch": 67.84702549575071, + "loss": 0.02934318408370018, + "loss_ce": 3.1050603865878657e-05, + "loss_iou": 0.353515625, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 1025597704, + "step": 5977 + }, + { + "epoch": 67.85835694050992, + "grad_norm": 3.711116833290631, + "learning_rate": 5e-06, + "loss": 0.0578, + "num_input_tokens_seen": 1025767168, + "step": 5978 + }, + { + "epoch": 67.85835694050992, + "loss": 0.0322192944586277, + "loss_ce": 3.85077582905069e-05, + "loss_iou": 0.3984375, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1025767168, + "step": 5978 + }, + { + "epoch": 67.86968838526913, + "grad_norm": 3.4783556474564588, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 1025939408, + "step": 5979 + }, + { + "epoch": 67.86968838526913, + "loss": 0.13107489049434662, + "loss_ce": 1.7151229258161038e-05, + "loss_iou": 0.578125, + "loss_num": 0.0262451171875, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 1025939408, + "step": 5979 + }, + { + "epoch": 67.88101983002832, + "grad_norm": 3.548318530409953, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 1026111376, + "step": 5980 + }, + { + "epoch": 67.88101983002832, + "loss": 0.03401817008852959, + "loss_ce": 2.1587828086921945e-05, + "loss_iou": 0.396484375, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1026111376, + "step": 5980 + }, + { + "epoch": 67.89235127478753, + "grad_norm": 3.207230953688128, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 1026282652, + "step": 5981 + }, + { + "epoch": 67.89235127478753, + "loss": 0.03093486651778221, + "loss_ce": 2.0560566554195248e-05, + "loss_iou": 0.671875, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 1026282652, + "step": 5981 + }, + { + "epoch": 67.90368271954674, + "grad_norm": 3.64447854110498, + "learning_rate": 5e-06, + "loss": 0.045, + "num_input_tokens_seen": 1026454576, + "step": 5982 + }, + { + "epoch": 67.90368271954674, + "loss": 0.02514209970831871, + "loss_ce": 2.613234391901642e-05, + "loss_iou": 0.5078125, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 1026454576, + "step": 5982 + }, + { + "epoch": 67.91501416430594, + "grad_norm": 3.8475526626672796, + "learning_rate": 5e-06, + "loss": 0.062, + "num_input_tokens_seen": 1026625752, + "step": 5983 + }, + { + "epoch": 67.91501416430594, + "loss": 0.09264905005693436, + "loss_ce": 0.0014014916960150003, + "loss_iou": 0.453125, + "loss_num": 0.018310546875, + "loss_xval": 0.09130859375, + "num_input_tokens_seen": 1026625752, + "step": 5983 + }, + { + "epoch": 67.92634560906515, + "grad_norm": 3.9230716331848154, + "learning_rate": 5e-06, + "loss": 0.0492, + "num_input_tokens_seen": 1026797564, + "step": 5984 + }, + { + "epoch": 67.92634560906515, + "loss": 0.04877150431275368, + "loss_ce": 0.0004011422861367464, + "loss_iou": 0.2734375, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 1026797564, + "step": 5984 + }, + { + "epoch": 67.93767705382436, + "grad_norm": 3.0421690646658455, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 1026969224, + "step": 5985 + }, + { + "epoch": 67.93767705382436, + "loss": 0.04318626597523689, + "loss_ce": 3.441072112764232e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1026969224, + "step": 5985 + }, + { + "epoch": 67.94900849858357, + "grad_norm": 2.6075163387190443, + "learning_rate": 5e-06, + "loss": 0.0325, + "num_input_tokens_seen": 1027141024, + "step": 5986 + }, + { + "epoch": 67.94900849858357, + "loss": 0.024318058043718338, + "loss_ce": 0.00027020624838769436, + "loss_iou": 0.390625, + "loss_num": 0.00482177734375, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 1027141024, + "step": 5986 + }, + { + "epoch": 67.96033994334277, + "grad_norm": 2.949921975660462, + "learning_rate": 5e-06, + "loss": 0.0498, + "num_input_tokens_seen": 1027311160, + "step": 5987 + }, + { + "epoch": 67.96033994334277, + "loss": 0.04512227326631546, + "loss_ce": 0.0001241056015715003, + "loss_iou": 0.470703125, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 1027311160, + "step": 5987 + }, + { + "epoch": 67.97167138810198, + "grad_norm": 3.4021908133242165, + "learning_rate": 5e-06, + "loss": 0.0395, + "num_input_tokens_seen": 1027481832, + "step": 5988 + }, + { + "epoch": 67.97167138810198, + "loss": 0.024713270366191864, + "loss_ce": 3.980904148193076e-05, + "loss_iou": 0.3125, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 1027481832, + "step": 5988 + }, + { + "epoch": 67.98300283286119, + "grad_norm": 3.6481015501735263, + "learning_rate": 5e-06, + "loss": 0.0652, + "num_input_tokens_seen": 1027653396, + "step": 5989 + }, + { + "epoch": 67.98300283286119, + "loss": 0.036867640912532806, + "loss_ce": 0.00012447308108676225, + "loss_iou": 0.37109375, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1027653396, + "step": 5989 + }, + { + "epoch": 67.9943342776204, + "grad_norm": 3.1972775382174485, + "learning_rate": 5e-06, + "loss": 0.0356, + "num_input_tokens_seen": 1027824828, + "step": 5990 + }, + { + "epoch": 67.9943342776204, + "loss": 0.031084056943655014, + "loss_ce": 9.345810394734144e-05, + "loss_iou": 0.330078125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1027824828, + "step": 5990 + }, + { + "epoch": 67.9943342776204, + "loss": 0.044801052659749985, + "loss_ce": 0.00045901184785179794, + "loss_iou": 0.337890625, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 1027910640, + "step": 5990 + }, + { + "epoch": 68.0056657223796, + "grad_norm": 3.3392750182851, + "learning_rate": 5e-06, + "loss": 0.0441, + "num_input_tokens_seen": 1027996508, + "step": 5991 + }, + { + "epoch": 68.0056657223796, + "loss": 0.02234051190316677, + "loss_ce": 4.742142846225761e-05, + "loss_iou": 0.376953125, + "loss_num": 0.00445556640625, + "loss_xval": 0.0223388671875, + "num_input_tokens_seen": 1027996508, + "step": 5991 + }, + { + "epoch": 68.01699716713881, + "grad_norm": 3.717837503755839, + "learning_rate": 5e-06, + "loss": 0.059, + "num_input_tokens_seen": 1028167196, + "step": 5992 + }, + { + "epoch": 68.01699716713881, + "loss": 0.040140435099601746, + "loss_ce": 0.005914973560720682, + "loss_iou": 0.08642578125, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1028167196, + "step": 5992 + }, + { + "epoch": 68.02832861189802, + "grad_norm": 3.76317119270108, + "learning_rate": 5e-06, + "loss": 0.0327, + "num_input_tokens_seen": 1028339244, + "step": 5993 + }, + { + "epoch": 68.02832861189802, + "loss": 0.03492102399468422, + "loss_ce": 0.0001615022192709148, + "loss_iou": 0.443359375, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1028339244, + "step": 5993 + }, + { + "epoch": 68.03966005665723, + "grad_norm": 3.6665597602083944, + "learning_rate": 5e-06, + "loss": 0.0634, + "num_input_tokens_seen": 1028511300, + "step": 5994 + }, + { + "epoch": 68.03966005665723, + "loss": 0.03729729726910591, + "loss_ce": 0.003819514298811555, + "loss_iou": 0.59375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1028511300, + "step": 5994 + }, + { + "epoch": 68.05099150141643, + "grad_norm": 3.3119987573277734, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 1028683484, + "step": 5995 + }, + { + "epoch": 68.05099150141643, + "loss": 0.03806358575820923, + "loss_ce": 2.3425462131854147e-05, + "loss_iou": 0.259765625, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 1028683484, + "step": 5995 + }, + { + "epoch": 68.06232294617564, + "grad_norm": 3.5869846460505164, + "learning_rate": 5e-06, + "loss": 0.0439, + "num_input_tokens_seen": 1028855680, + "step": 5996 + }, + { + "epoch": 68.06232294617564, + "loss": 0.058862023055553436, + "loss_ce": 6.990783731453121e-05, + "loss_iou": 0.45703125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 1028855680, + "step": 5996 + }, + { + "epoch": 68.07365439093485, + "grad_norm": 4.217078139634235, + "learning_rate": 5e-06, + "loss": 0.0457, + "num_input_tokens_seen": 1029027400, + "step": 5997 + }, + { + "epoch": 68.07365439093485, + "loss": 0.06039019301533699, + "loss_ce": 2.642503022798337e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0120849609375, + "loss_xval": 0.060302734375, + "num_input_tokens_seen": 1029027400, + "step": 5997 + }, + { + "epoch": 68.08498583569406, + "grad_norm": 3.9664111072443577, + "learning_rate": 5e-06, + "loss": 0.0604, + "num_input_tokens_seen": 1029199416, + "step": 5998 + }, + { + "epoch": 68.08498583569406, + "loss": 0.0984577089548111, + "loss_ce": 2.325842433492653e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0196533203125, + "loss_xval": 0.0986328125, + "num_input_tokens_seen": 1029199416, + "step": 5998 + }, + { + "epoch": 68.09631728045326, + "grad_norm": 3.3733826022513007, + "learning_rate": 5e-06, + "loss": 0.031, + "num_input_tokens_seen": 1029371352, + "step": 5999 + }, + { + "epoch": 68.09631728045326, + "loss": 0.035213347524404526, + "loss_ce": 0.0036123963072896004, + "loss_iou": 0.46875, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 1029371352, + "step": 5999 + }, + { + "epoch": 68.10764872521247, + "grad_norm": 3.252815059622613, + "learning_rate": 5e-06, + "loss": 0.0414, + "num_input_tokens_seen": 1029543136, + "step": 6000 + }, + { + "epoch": 68.10764872521247, + "eval_seeclick_CIoU": 0.5355914235115051, + "eval_seeclick_GIoU": 0.5389807820320129, + "eval_seeclick_IoU": 0.5723366141319275, + "eval_seeclick_MAE_all": 0.06617512553930283, + "eval_seeclick_MAE_h": 0.030604134313762188, + "eval_seeclick_MAE_w": 0.10251282155513763, + "eval_seeclick_MAE_x": 0.10159862786531448, + "eval_seeclick_MAE_y": 0.02998490259051323, + "eval_seeclick_NUM_probability": 0.9999902844429016, + "eval_seeclick_inside_bbox": 0.9076704680919647, + "eval_seeclick_loss": 0.9886494874954224, + "eval_seeclick_loss_ce": 0.7358945608139038, + "eval_seeclick_loss_iou": 0.53857421875, + "eval_seeclick_loss_num": 0.05078125, + "eval_seeclick_loss_xval": 0.25390625, + "eval_seeclick_runtime": 68.2118, + "eval_seeclick_samples_per_second": 0.63, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 1029543136, + "step": 6000 + }, + { + "epoch": 68.10764872521247, + "eval_icons_CIoU": 0.7497312128543854, + "eval_icons_GIoU": 0.7520346641540527, + "eval_icons_IoU": 0.7618784606456757, + "eval_icons_MAE_all": 0.03181108087301254, + "eval_icons_MAE_h": 0.02700522541999817, + "eval_icons_MAE_w": 0.03983544930815697, + "eval_icons_MAE_x": 0.031000439543277025, + "eval_icons_MAE_y": 0.02940320409834385, + "eval_icons_NUM_probability": 0.9996767938137054, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.12723614275455475, + "eval_icons_loss_ce": 0.008824684657156467, + "eval_icons_loss_iou": 0.5606689453125, + "eval_icons_loss_num": 0.02147674560546875, + "eval_icons_loss_xval": 0.107391357421875, + "eval_icons_runtime": 90.7617, + "eval_icons_samples_per_second": 0.551, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 1029543136, + "step": 6000 + }, + { + "epoch": 68.10764872521247, + "eval_screenspot_CIoU": 0.650463879108429, + "eval_screenspot_GIoU": 0.6525246898333231, + "eval_screenspot_IoU": 0.6760209004084269, + "eval_screenspot_MAE_all": 0.061041269451379776, + "eval_screenspot_MAE_h": 0.033623721450567245, + "eval_screenspot_MAE_w": 0.10974500266214211, + "eval_screenspot_MAE_x": 0.07187512516975403, + "eval_screenspot_MAE_y": 0.02892123783628146, + "eval_screenspot_NUM_probability": 0.9999533494313558, + "eval_screenspot_inside_bbox": 0.9287499984105428, + "eval_screenspot_loss": 0.2678432762622833, + "eval_screenspot_loss_ce": 0.015532740236570438, + "eval_screenspot_loss_iou": 0.503173828125, + "eval_screenspot_loss_num": 0.050409952799479164, + "eval_screenspot_loss_xval": 0.2519938151041667, + "eval_screenspot_runtime": 143.366, + "eval_screenspot_samples_per_second": 0.621, + "eval_screenspot_steps_per_second": 0.021, + "num_input_tokens_seen": 1029543136, + "step": 6000 + }, + { + "epoch": 68.10764872521247, + "eval_compot_CIoU": 0.8502991795539856, + "eval_compot_GIoU": 0.844149112701416, + "eval_compot_IoU": 0.8653054535388947, + "eval_compot_MAE_all": 0.027687517926096916, + "eval_compot_MAE_h": 0.0234012259170413, + "eval_compot_MAE_w": 0.03197004646062851, + "eval_compot_MAE_x": 0.03141937404870987, + "eval_compot_MAE_y": 0.02395942062139511, + "eval_compot_NUM_probability": 0.9999739825725555, + "eval_compot_inside_bbox": 0.9409722089767456, + "eval_compot_loss": 0.08844759315252304, + "eval_compot_loss_ce": 1.9227887605666183e-05, + "eval_compot_loss_iou": 0.5478515625, + "eval_compot_loss_num": 0.0153656005859375, + "eval_compot_loss_xval": 0.0768280029296875, + "eval_compot_runtime": 81.9061, + "eval_compot_samples_per_second": 0.61, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 1029543136, + "step": 6000 + }, + { + "epoch": 68.10764872521247, + "eval_custom_ui_MAE_all": 0.019856389611959457, + "eval_custom_ui_MAE_x": 0.03132188878953457, + "eval_custom_ui_MAE_y": 0.00839089136570692, + "eval_custom_ui_NUM_probability": 0.9998889267444611, + "eval_custom_ui_loss": 0.21336433291435242, + "eval_custom_ui_loss_ce": 0.11450638249516487, + "eval_custom_ui_loss_num": 0.019094467163085938, + "eval_custom_ui_loss_xval": 0.0953521728515625, + "eval_custom_ui_runtime": 59.4084, + "eval_custom_ui_samples_per_second": 0.842, + "eval_custom_ui_steps_per_second": 0.034, + "num_input_tokens_seen": 1029543136, + "step": 6000 + }, + { + "epoch": 68.10764872521247, + "loss": 0.2626144289970398, + "loss_ce": 0.1441451907157898, + "loss_iou": 0.0, + "loss_num": 0.023681640625, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 1029543136, + "step": 6000 + }, + { + "epoch": 68.11898016997168, + "grad_norm": 3.5271743295168365, + "learning_rate": 5e-06, + "loss": 0.0438, + "num_input_tokens_seen": 1029713776, + "step": 6001 + }, + { + "epoch": 68.11898016997168, + "loss": 0.07258082181215286, + "loss_ce": 5.579826029133983e-05, + "loss_iou": 0.33984375, + "loss_num": 0.01446533203125, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 1029713776, + "step": 6001 + }, + { + "epoch": 68.13031161473087, + "grad_norm": 3.7402497158029813, + "learning_rate": 5e-06, + "loss": 0.0398, + "num_input_tokens_seen": 1029884776, + "step": 6002 + }, + { + "epoch": 68.13031161473087, + "loss": 0.05135255306959152, + "loss_ce": 3.724159978446551e-05, + "loss_iou": 0.376953125, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 1029884776, + "step": 6002 + }, + { + "epoch": 68.14164305949008, + "grad_norm": 4.126340138413504, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 1030055976, + "step": 6003 + }, + { + "epoch": 68.14164305949008, + "loss": 0.05936101824045181, + "loss_ce": 0.006351982709020376, + "loss_iou": 0.48828125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 1030055976, + "step": 6003 + }, + { + "epoch": 68.15297450424929, + "grad_norm": 3.428165531831982, + "learning_rate": 5e-06, + "loss": 0.0501, + "num_input_tokens_seen": 1030226216, + "step": 6004 + }, + { + "epoch": 68.15297450424929, + "loss": 0.05081775784492493, + "loss_ce": 0.0006468593492172658, + "loss_iou": 0.400390625, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 1030226216, + "step": 6004 + }, + { + "epoch": 68.1643059490085, + "grad_norm": 3.043971150622534, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 1030397028, + "step": 6005 + }, + { + "epoch": 68.1643059490085, + "loss": 0.03183009475469589, + "loss_ce": 0.0040896194986999035, + "loss_iou": 0.50390625, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 1030397028, + "step": 6005 + }, + { + "epoch": 68.1756373937677, + "grad_norm": 3.081223206313506, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 1030568936, + "step": 6006 + }, + { + "epoch": 68.1756373937677, + "loss": 0.03006034716963768, + "loss_ce": 0.00068717694375664, + "loss_iou": 0.447265625, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 1030568936, + "step": 6006 + }, + { + "epoch": 68.18696883852691, + "grad_norm": 3.4644385077962907, + "learning_rate": 5e-06, + "loss": 0.0526, + "num_input_tokens_seen": 1030740296, + "step": 6007 + }, + { + "epoch": 68.18696883852691, + "loss": 0.030875112861394882, + "loss_ce": 0.001700307708233595, + "loss_iou": 0.3515625, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 1030740296, + "step": 6007 + }, + { + "epoch": 68.19830028328612, + "grad_norm": 3.693014341918968, + "learning_rate": 5e-06, + "loss": 0.0513, + "num_input_tokens_seen": 1030911176, + "step": 6008 + }, + { + "epoch": 68.19830028328612, + "loss": 0.06895988434553146, + "loss_ce": 3.5931345337303355e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 1030911176, + "step": 6008 + }, + { + "epoch": 68.20963172804532, + "grad_norm": 3.5069177145608217, + "learning_rate": 5e-06, + "loss": 0.049, + "num_input_tokens_seen": 1031082608, + "step": 6009 + }, + { + "epoch": 68.20963172804532, + "loss": 0.05237337946891785, + "loss_ce": 0.000707118830177933, + "loss_iou": 0.439453125, + "loss_num": 0.0103759765625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 1031082608, + "step": 6009 + }, + { + "epoch": 68.22096317280453, + "grad_norm": 3.771492340230315, + "learning_rate": 5e-06, + "loss": 0.0528, + "num_input_tokens_seen": 1031253728, + "step": 6010 + }, + { + "epoch": 68.22096317280453, + "loss": 0.05987983196973801, + "loss_ce": 0.0001264110323973, + "loss_iou": 0.33984375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 1031253728, + "step": 6010 + }, + { + "epoch": 68.23229461756374, + "grad_norm": 3.518149160080209, + "learning_rate": 5e-06, + "loss": 0.0376, + "num_input_tokens_seen": 1031425328, + "step": 6011 + }, + { + "epoch": 68.23229461756374, + "loss": 0.04251479730010033, + "loss_ce": 0.0012397740501910448, + "loss_iou": 0.54296875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041259765625, + "num_input_tokens_seen": 1031425328, + "step": 6011 + }, + { + "epoch": 68.24362606232295, + "grad_norm": 3.318930889226568, + "learning_rate": 5e-06, + "loss": 0.0445, + "num_input_tokens_seen": 1031595404, + "step": 6012 + }, + { + "epoch": 68.24362606232295, + "loss": 0.022058993577957153, + "loss_ce": 8.633681864012033e-05, + "loss_iou": 0.3515625, + "loss_num": 0.00439453125, + "loss_xval": 0.02197265625, + "num_input_tokens_seen": 1031595404, + "step": 6012 + }, + { + "epoch": 68.25495750708215, + "grad_norm": 3.3077254795503803, + "learning_rate": 5e-06, + "loss": 0.0364, + "num_input_tokens_seen": 1031767040, + "step": 6013 + }, + { + "epoch": 68.25495750708215, + "loss": 0.03557591885328293, + "loss_ce": 0.00011449439625721425, + "loss_iou": 0.494140625, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 1031767040, + "step": 6013 + }, + { + "epoch": 68.26628895184136, + "grad_norm": 3.3666912902008925, + "learning_rate": 5e-06, + "loss": 0.0378, + "num_input_tokens_seen": 1031938692, + "step": 6014 + }, + { + "epoch": 68.26628895184136, + "loss": 0.030247028917074203, + "loss_ce": 0.0002329906856175512, + "loss_iou": 0.1728515625, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1031938692, + "step": 6014 + }, + { + "epoch": 68.27762039660057, + "grad_norm": 3.4014988637377868, + "learning_rate": 5e-06, + "loss": 0.0367, + "num_input_tokens_seen": 1032110584, + "step": 6015 + }, + { + "epoch": 68.27762039660057, + "loss": 0.05519992485642433, + "loss_ce": 4.7031175199663267e-05, + "loss_iou": 0.26953125, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 1032110584, + "step": 6015 + }, + { + "epoch": 68.28895184135978, + "grad_norm": 3.3531783133478994, + "learning_rate": 5e-06, + "loss": 0.057, + "num_input_tokens_seen": 1032281612, + "step": 6016 + }, + { + "epoch": 68.28895184135978, + "loss": 0.06379108130931854, + "loss_ce": 5.511831113835797e-05, + "loss_iou": 0.32421875, + "loss_num": 0.01275634765625, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 1032281612, + "step": 6016 + }, + { + "epoch": 68.30028328611898, + "grad_norm": 3.305365183510959, + "learning_rate": 5e-06, + "loss": 0.0348, + "num_input_tokens_seen": 1032453304, + "step": 6017 + }, + { + "epoch": 68.30028328611898, + "loss": 0.02550710365176201, + "loss_ce": 2.492560633982066e-05, + "loss_iou": 0.44140625, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 1032453304, + "step": 6017 + }, + { + "epoch": 68.31161473087819, + "grad_norm": 2.913452668035862, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 1032624204, + "step": 6018 + }, + { + "epoch": 68.31161473087819, + "loss": 0.044340990483760834, + "loss_ce": 0.00015153891581576318, + "loss_iou": 0.267578125, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 1032624204, + "step": 6018 + }, + { + "epoch": 68.3229461756374, + "grad_norm": 3.2352085898190484, + "learning_rate": 5e-06, + "loss": 0.0491, + "num_input_tokens_seen": 1032794584, + "step": 6019 + }, + { + "epoch": 68.3229461756374, + "loss": 0.0685746893286705, + "loss_ce": 9.324547863798216e-05, + "loss_iou": 0.416015625, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 1032794584, + "step": 6019 + }, + { + "epoch": 68.3342776203966, + "grad_norm": 3.6128382610473855, + "learning_rate": 5e-06, + "loss": 0.0334, + "num_input_tokens_seen": 1032966740, + "step": 6020 + }, + { + "epoch": 68.3342776203966, + "loss": 0.03370404243469238, + "loss_ce": 1.2636179235414602e-05, + "loss_iou": 0.484375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1032966740, + "step": 6020 + }, + { + "epoch": 68.34560906515581, + "grad_norm": 3.4112636185759064, + "learning_rate": 5e-06, + "loss": 0.038, + "num_input_tokens_seen": 1033138656, + "step": 6021 + }, + { + "epoch": 68.34560906515581, + "loss": 0.042105741798877716, + "loss_ce": 9.829607733990997e-05, + "loss_iou": 0.61328125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1033138656, + "step": 6021 + }, + { + "epoch": 68.35694050991502, + "grad_norm": 3.4430701279976796, + "learning_rate": 5e-06, + "loss": 0.045, + "num_input_tokens_seen": 1033310520, + "step": 6022 + }, + { + "epoch": 68.35694050991502, + "loss": 0.033855095505714417, + "loss_ce": 5.687963493983261e-05, + "loss_iou": 0.345703125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1033310520, + "step": 6022 + }, + { + "epoch": 68.36827195467423, + "grad_norm": 3.694661922592041, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 1033481756, + "step": 6023 + }, + { + "epoch": 68.36827195467423, + "loss": 0.04631166160106659, + "loss_ce": 1.6495927411597222e-05, + "loss_iou": 0.2431640625, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 1033481756, + "step": 6023 + }, + { + "epoch": 68.37960339943342, + "grad_norm": 3.6295488126088777, + "learning_rate": 5e-06, + "loss": 0.072, + "num_input_tokens_seen": 1033653796, + "step": 6024 + }, + { + "epoch": 68.37960339943342, + "loss": 0.09208717197179794, + "loss_ce": 1.563353362143971e-05, + "loss_iou": 0.486328125, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 1033653796, + "step": 6024 + }, + { + "epoch": 68.39093484419263, + "grad_norm": 4.276016949571233, + "learning_rate": 5e-06, + "loss": 0.0417, + "num_input_tokens_seen": 1033825684, + "step": 6025 + }, + { + "epoch": 68.39093484419263, + "loss": 0.028254598379135132, + "loss_ce": 4.1098268411587924e-05, + "loss_iou": 0.45703125, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 1033825684, + "step": 6025 + }, + { + "epoch": 68.40226628895184, + "grad_norm": 3.3259976906337756, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 1033996880, + "step": 6026 + }, + { + "epoch": 68.40226628895184, + "loss": 0.061343878507614136, + "loss_ce": 0.0004918276681564748, + "loss_iou": 0.35546875, + "loss_num": 0.01214599609375, + "loss_xval": 0.060791015625, + "num_input_tokens_seen": 1033996880, + "step": 6026 + }, + { + "epoch": 68.41359773371104, + "grad_norm": 3.474480196527888, + "learning_rate": 5e-06, + "loss": 0.0546, + "num_input_tokens_seen": 1034168664, + "step": 6027 + }, + { + "epoch": 68.41359773371104, + "loss": 0.041671402752399445, + "loss_ce": 7.594195631099865e-05, + "loss_iou": 0.49609375, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 1034168664, + "step": 6027 + }, + { + "epoch": 68.42492917847025, + "grad_norm": 3.5426219987918084, + "learning_rate": 5e-06, + "loss": 0.0587, + "num_input_tokens_seen": 1034340060, + "step": 6028 + }, + { + "epoch": 68.42492917847025, + "loss": 0.02955089882016182, + "loss_ce": 2.514255902497098e-05, + "loss_iou": 0.419921875, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 1034340060, + "step": 6028 + }, + { + "epoch": 68.43626062322946, + "grad_norm": 3.1110351160121583, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 1034510928, + "step": 6029 + }, + { + "epoch": 68.43626062322946, + "loss": 0.026297051459550858, + "loss_ce": 2.9046279450994916e-05, + "loss_iou": 0.232421875, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 1034510928, + "step": 6029 + }, + { + "epoch": 68.44759206798867, + "grad_norm": 3.886738075438796, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 1034681744, + "step": 6030 + }, + { + "epoch": 68.44759206798867, + "loss": 0.0462028905749321, + "loss_ce": 5.268330642138608e-05, + "loss_iou": 0.431640625, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 1034681744, + "step": 6030 + }, + { + "epoch": 68.45892351274787, + "grad_norm": 3.5529192312233326, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 1034852356, + "step": 6031 + }, + { + "epoch": 68.45892351274787, + "loss": 0.039018433541059494, + "loss_ce": 0.0005968009936623275, + "loss_iou": 0.51953125, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1034852356, + "step": 6031 + }, + { + "epoch": 68.47025495750708, + "grad_norm": 2.371103549395048, + "learning_rate": 5e-06, + "loss": 0.0473, + "num_input_tokens_seen": 1035024596, + "step": 6032 + }, + { + "epoch": 68.47025495750708, + "loss": 0.033117782324552536, + "loss_ce": 2.146774932043627e-05, + "loss_iou": 0.306640625, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1035024596, + "step": 6032 + }, + { + "epoch": 68.48158640226629, + "grad_norm": 2.7509865723152487, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 1035196280, + "step": 6033 + }, + { + "epoch": 68.48158640226629, + "loss": 0.04057038575410843, + "loss_ce": 1.2522734323283657e-05, + "loss_iou": 0.408203125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1035196280, + "step": 6033 + }, + { + "epoch": 68.4929178470255, + "grad_norm": 2.7816715261384037, + "learning_rate": 5e-06, + "loss": 0.0521, + "num_input_tokens_seen": 1035365140, + "step": 6034 + }, + { + "epoch": 68.4929178470255, + "loss": 0.028911568224430084, + "loss_ce": 0.0002479329996276647, + "loss_iou": 0.38671875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 1035365140, + "step": 6034 + }, + { + "epoch": 68.5042492917847, + "grad_norm": 2.8451632858893956, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 1035536688, + "step": 6035 + }, + { + "epoch": 68.5042492917847, + "loss": 0.04116325080394745, + "loss_ce": 5.607565981335938e-05, + "loss_iou": 0.404296875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1035536688, + "step": 6035 + }, + { + "epoch": 68.51558073654391, + "grad_norm": 4.862706201586702, + "learning_rate": 5e-06, + "loss": 0.0623, + "num_input_tokens_seen": 1035708404, + "step": 6036 + }, + { + "epoch": 68.51558073654391, + "loss": 0.031239386647939682, + "loss_ce": 0.004773015156388283, + "loss_iou": 0.458984375, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 1035708404, + "step": 6036 + }, + { + "epoch": 68.52691218130312, + "grad_norm": 3.2819575596366137, + "learning_rate": 5e-06, + "loss": 0.0688, + "num_input_tokens_seen": 1035877140, + "step": 6037 + }, + { + "epoch": 68.52691218130312, + "loss": 0.08378823101520538, + "loss_ce": 0.00030739742214791477, + "loss_iou": 0.453125, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 1035877140, + "step": 6037 + }, + { + "epoch": 68.53824362606233, + "grad_norm": 2.968974868863053, + "learning_rate": 5e-06, + "loss": 0.0318, + "num_input_tokens_seen": 1036047680, + "step": 6038 + }, + { + "epoch": 68.53824362606233, + "loss": 0.03451596945524216, + "loss_ce": 9.214033343596384e-05, + "loss_iou": 0.271484375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 1036047680, + "step": 6038 + }, + { + "epoch": 68.54957507082153, + "grad_norm": 3.528375599277384, + "learning_rate": 5e-06, + "loss": 0.0573, + "num_input_tokens_seen": 1036219264, + "step": 6039 + }, + { + "epoch": 68.54957507082153, + "loss": 0.026953136548399925, + "loss_ce": 3.663220559246838e-05, + "loss_iou": 0.482421875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 1036219264, + "step": 6039 + }, + { + "epoch": 68.56090651558074, + "grad_norm": 3.956795338038998, + "learning_rate": 5e-06, + "loss": 0.0452, + "num_input_tokens_seen": 1036391036, + "step": 6040 + }, + { + "epoch": 68.56090651558074, + "loss": 0.03081545978784561, + "loss_ce": 9.95180889731273e-05, + "loss_iou": 0.46484375, + "loss_num": 0.006134033203125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 1036391036, + "step": 6040 + }, + { + "epoch": 68.57223796033995, + "grad_norm": 4.597195948699677, + "learning_rate": 5e-06, + "loss": 0.0592, + "num_input_tokens_seen": 1036562056, + "step": 6041 + }, + { + "epoch": 68.57223796033995, + "loss": 0.042367078363895416, + "loss_ce": 8.497034286847338e-05, + "loss_iou": 0.44140625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 1036562056, + "step": 6041 + }, + { + "epoch": 68.58356940509915, + "grad_norm": 4.148584366220655, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 1036732424, + "step": 6042 + }, + { + "epoch": 68.58356940509915, + "loss": 0.036121152341365814, + "loss_ce": 1.885701931314543e-05, + "loss_iou": 0.484375, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 1036732424, + "step": 6042 + }, + { + "epoch": 68.59490084985836, + "grad_norm": 3.700407683064666, + "learning_rate": 5e-06, + "loss": 0.0473, + "num_input_tokens_seen": 1036900404, + "step": 6043 + }, + { + "epoch": 68.59490084985836, + "loss": 0.04376261681318283, + "loss_ce": 1.5666475519537926e-05, + "loss_iou": 0.51953125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 1036900404, + "step": 6043 + }, + { + "epoch": 68.60623229461757, + "grad_norm": 3.6403288671672596, + "learning_rate": 5e-06, + "loss": 0.05, + "num_input_tokens_seen": 1037070436, + "step": 6044 + }, + { + "epoch": 68.60623229461757, + "loss": 0.04341422766447067, + "loss_ce": 1.8233891751151532e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 1037070436, + "step": 6044 + }, + { + "epoch": 68.61756373937678, + "grad_norm": 3.518003676297302, + "learning_rate": 5e-06, + "loss": 0.0409, + "num_input_tokens_seen": 1037240848, + "step": 6045 + }, + { + "epoch": 68.61756373937678, + "loss": 0.0429302416741848, + "loss_ce": 2.2526497559738345e-05, + "loss_iou": 0.66015625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 1037240848, + "step": 6045 + }, + { + "epoch": 68.62889518413597, + "grad_norm": 3.0585796312948714, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 1037412604, + "step": 6046 + }, + { + "epoch": 68.62889518413597, + "loss": 0.03132706135511398, + "loss_ce": 3.128579191979952e-05, + "loss_iou": 0.41796875, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1037412604, + "step": 6046 + }, + { + "epoch": 68.64022662889518, + "grad_norm": 3.275455915419227, + "learning_rate": 5e-06, + "loss": 0.0471, + "num_input_tokens_seen": 1037584524, + "step": 6047 + }, + { + "epoch": 68.64022662889518, + "loss": 0.034866221249103546, + "loss_ce": 0.00024402780400123447, + "loss_iou": 0.37109375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1037584524, + "step": 6047 + }, + { + "epoch": 68.65155807365439, + "grad_norm": 3.4777212867909446, + "learning_rate": 5e-06, + "loss": 0.0509, + "num_input_tokens_seen": 1037756352, + "step": 6048 + }, + { + "epoch": 68.65155807365439, + "loss": 0.04108915105462074, + "loss_ce": 2.7749953005695716e-05, + "loss_iou": 0.3984375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1037756352, + "step": 6048 + }, + { + "epoch": 68.66288951841359, + "grad_norm": 3.7213501227500942, + "learning_rate": 5e-06, + "loss": 0.0405, + "num_input_tokens_seen": 1037928264, + "step": 6049 + }, + { + "epoch": 68.66288951841359, + "loss": 0.030276209115982056, + "loss_ce": 3.3289456041529775e-05, + "loss_iou": 0.474609375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1037928264, + "step": 6049 + }, + { + "epoch": 68.6742209631728, + "grad_norm": 3.6707126448892713, + "learning_rate": 5e-06, + "loss": 0.0461, + "num_input_tokens_seen": 1038099788, + "step": 6050 + }, + { + "epoch": 68.6742209631728, + "loss": 0.04867839068174362, + "loss_ce": 7.914793968666345e-05, + "loss_iou": 0.431640625, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 1038099788, + "step": 6050 + }, + { + "epoch": 68.68555240793201, + "grad_norm": 2.854682568962777, + "learning_rate": 5e-06, + "loss": 0.0434, + "num_input_tokens_seen": 1038271472, + "step": 6051 + }, + { + "epoch": 68.68555240793201, + "loss": 0.04150331765413284, + "loss_ce": 0.0005029530730098486, + "loss_iou": 0.40625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1038271472, + "step": 6051 + }, + { + "epoch": 68.69688385269122, + "grad_norm": 2.9646780319420847, + "learning_rate": 5e-06, + "loss": 0.0373, + "num_input_tokens_seen": 1038443476, + "step": 6052 + }, + { + "epoch": 68.69688385269122, + "loss": 0.043342575430870056, + "loss_ce": 2.2872329282108694e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1038443476, + "step": 6052 + }, + { + "epoch": 68.70821529745042, + "grad_norm": 3.3077502423371548, + "learning_rate": 5e-06, + "loss": 0.043, + "num_input_tokens_seen": 1038613584, + "step": 6053 + }, + { + "epoch": 68.70821529745042, + "loss": 0.038875751197338104, + "loss_ce": 4.2135252442676574e-05, + "loss_iou": 0.455078125, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 1038613584, + "step": 6053 + }, + { + "epoch": 68.71954674220963, + "grad_norm": 3.131680385838785, + "learning_rate": 5e-06, + "loss": 0.0416, + "num_input_tokens_seen": 1038785480, + "step": 6054 + }, + { + "epoch": 68.71954674220963, + "loss": 0.030011728405952454, + "loss_ce": 2.0577959730871953e-05, + "loss_iou": 0.314453125, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1038785480, + "step": 6054 + }, + { + "epoch": 68.73087818696884, + "grad_norm": 3.1863758737240397, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 1038956232, + "step": 6055 + }, + { + "epoch": 68.73087818696884, + "loss": 0.03920143470168114, + "loss_ce": 3.2123578421305865e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 1038956232, + "step": 6055 + }, + { + "epoch": 68.74220963172804, + "grad_norm": 3.3607580232360257, + "learning_rate": 5e-06, + "loss": 0.0368, + "num_input_tokens_seen": 1039128224, + "step": 6056 + }, + { + "epoch": 68.74220963172804, + "loss": 0.06432376801967621, + "loss_ce": 4.612480552168563e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 1039128224, + "step": 6056 + }, + { + "epoch": 68.75354107648725, + "grad_norm": 3.7702577525963137, + "learning_rate": 5e-06, + "loss": 0.0544, + "num_input_tokens_seen": 1039299376, + "step": 6057 + }, + { + "epoch": 68.75354107648725, + "loss": 0.03325468301773071, + "loss_ce": 2.1041156287537888e-05, + "loss_iou": 0.46484375, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1039299376, + "step": 6057 + }, + { + "epoch": 68.76487252124646, + "grad_norm": 3.1813173480590247, + "learning_rate": 5e-06, + "loss": 0.0622, + "num_input_tokens_seen": 1039470276, + "step": 6058 + }, + { + "epoch": 68.76487252124646, + "loss": 0.0703522115945816, + "loss_ce": 0.0001770367380231619, + "loss_iou": 0.41015625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 1039470276, + "step": 6058 + }, + { + "epoch": 68.77620396600567, + "grad_norm": 2.77467348214069, + "learning_rate": 5e-06, + "loss": 0.0301, + "num_input_tokens_seen": 1039641980, + "step": 6059 + }, + { + "epoch": 68.77620396600567, + "loss": 0.030268246307969093, + "loss_ce": 2.5325391106889583e-05, + "loss_iou": 0.48828125, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1039641980, + "step": 6059 + }, + { + "epoch": 68.78753541076487, + "grad_norm": 2.8478234165742937, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 1039813768, + "step": 6060 + }, + { + "epoch": 68.78753541076487, + "loss": 0.06594222784042358, + "loss_ce": 7.003456266829744e-05, + "loss_iou": 0.42578125, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 1039813768, + "step": 6060 + }, + { + "epoch": 68.79886685552408, + "grad_norm": 3.4791135533331805, + "learning_rate": 5e-06, + "loss": 0.0664, + "num_input_tokens_seen": 1039985460, + "step": 6061 + }, + { + "epoch": 68.79886685552408, + "loss": 0.05560876056551933, + "loss_ce": 8.202873868867755e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0111083984375, + "loss_xval": 0.055419921875, + "num_input_tokens_seen": 1039985460, + "step": 6061 + }, + { + "epoch": 68.81019830028329, + "grad_norm": 3.430999117936259, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 1040156356, + "step": 6062 + }, + { + "epoch": 68.81019830028329, + "loss": 0.070872962474823, + "loss_ce": 0.0006672763847745955, + "loss_iou": 0.30078125, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 1040156356, + "step": 6062 + }, + { + "epoch": 68.8215297450425, + "grad_norm": 3.3756290846917683, + "learning_rate": 5e-06, + "loss": 0.0348, + "num_input_tokens_seen": 1040328108, + "step": 6063 + }, + { + "epoch": 68.8215297450425, + "loss": 0.03579780459403992, + "loss_ce": 1.594540663063526e-05, + "loss_iou": 0.251953125, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1040328108, + "step": 6063 + }, + { + "epoch": 68.8328611898017, + "grad_norm": 3.1679235989591086, + "learning_rate": 5e-06, + "loss": 0.071, + "num_input_tokens_seen": 1040499692, + "step": 6064 + }, + { + "epoch": 68.8328611898017, + "loss": 0.0366927795112133, + "loss_ce": 2.5910276235663332e-05, + "loss_iou": 0.326171875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1040499692, + "step": 6064 + }, + { + "epoch": 68.84419263456091, + "grad_norm": 4.155807624272627, + "learning_rate": 5e-06, + "loss": 0.0516, + "num_input_tokens_seen": 1040670536, + "step": 6065 + }, + { + "epoch": 68.84419263456091, + "loss": 0.03323636204004288, + "loss_ce": 4.849334800383076e-05, + "loss_iou": 0.416015625, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1040670536, + "step": 6065 + }, + { + "epoch": 68.85552407932012, + "grad_norm": 3.208425084220272, + "learning_rate": 5e-06, + "loss": 0.0733, + "num_input_tokens_seen": 1040841752, + "step": 6066 + }, + { + "epoch": 68.85552407932012, + "loss": 0.1210227832198143, + "loss_ce": 3.584111254895106e-05, + "loss_iou": 0.314453125, + "loss_num": 0.024169921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 1040841752, + "step": 6066 + }, + { + "epoch": 68.86685552407933, + "grad_norm": 4.031176393493782, + "learning_rate": 5e-06, + "loss": 0.0538, + "num_input_tokens_seen": 1041013608, + "step": 6067 + }, + { + "epoch": 68.86685552407933, + "loss": 0.03931416571140289, + "loss_ce": 0.0010451258858665824, + "loss_iou": 0.5234375, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1041013608, + "step": 6067 + }, + { + "epoch": 68.87818696883852, + "grad_norm": 3.8254088541235234, + "learning_rate": 5e-06, + "loss": 0.0428, + "num_input_tokens_seen": 1041183664, + "step": 6068 + }, + { + "epoch": 68.87818696883852, + "loss": 0.042976222932338715, + "loss_ce": 0.0006788604077883065, + "loss_iou": 0.294921875, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 1041183664, + "step": 6068 + }, + { + "epoch": 68.88951841359773, + "grad_norm": 3.5545591987673903, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 1041355312, + "step": 6069 + }, + { + "epoch": 68.88951841359773, + "loss": 0.07258428633213043, + "loss_ce": 2.874880374292843e-05, + "loss_iou": 0.5, + "loss_num": 0.01446533203125, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 1041355312, + "step": 6069 + }, + { + "epoch": 68.90084985835693, + "grad_norm": 3.7588090899489215, + "learning_rate": 5e-06, + "loss": 0.0546, + "num_input_tokens_seen": 1041526628, + "step": 6070 + }, + { + "epoch": 68.90084985835693, + "loss": 0.08877602219581604, + "loss_ce": 0.0004734083195216954, + "loss_iou": 0.4609375, + "loss_num": 0.0177001953125, + "loss_xval": 0.08837890625, + "num_input_tokens_seen": 1041526628, + "step": 6070 + }, + { + "epoch": 68.91218130311614, + "grad_norm": 2.922613969510062, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 1041696912, + "step": 6071 + }, + { + "epoch": 68.91218130311614, + "loss": 0.030344445258378983, + "loss_ce": 0.0001167829759651795, + "loss_iou": 0.416015625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1041696912, + "step": 6071 + }, + { + "epoch": 68.92351274787535, + "grad_norm": 2.7330679061523746, + "learning_rate": 5e-06, + "loss": 0.0318, + "num_input_tokens_seen": 1041868040, + "step": 6072 + }, + { + "epoch": 68.92351274787535, + "loss": 0.02817266434431076, + "loss_ce": 0.00018041461589746177, + "loss_iou": 0.373046875, + "loss_num": 0.005584716796875, + "loss_xval": 0.0279541015625, + "num_input_tokens_seen": 1041868040, + "step": 6072 + }, + { + "epoch": 68.93484419263456, + "grad_norm": 3.009534152062361, + "learning_rate": 5e-06, + "loss": 0.0579, + "num_input_tokens_seen": 1042039836, + "step": 6073 + }, + { + "epoch": 68.93484419263456, + "loss": 0.023865772411227226, + "loss_ce": 7.732024823781103e-05, + "loss_iou": 0.396484375, + "loss_num": 0.0047607421875, + "loss_xval": 0.0238037109375, + "num_input_tokens_seen": 1042039836, + "step": 6073 + }, + { + "epoch": 68.94617563739376, + "grad_norm": 3.2542066267616097, + "learning_rate": 5e-06, + "loss": 0.0589, + "num_input_tokens_seen": 1042211748, + "step": 6074 + }, + { + "epoch": 68.94617563739376, + "loss": 0.09823054820299149, + "loss_ce": 7.075207395246252e-05, + "loss_iou": 0.376953125, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 1042211748, + "step": 6074 + }, + { + "epoch": 68.95750708215297, + "grad_norm": 2.7210391719931013, + "learning_rate": 5e-06, + "loss": 0.0303, + "num_input_tokens_seen": 1042382716, + "step": 6075 + }, + { + "epoch": 68.95750708215297, + "loss": 0.02861340343952179, + "loss_ce": 4.8950609198072925e-05, + "loss_iou": 0.326171875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 1042382716, + "step": 6075 + }, + { + "epoch": 68.96883852691218, + "grad_norm": 3.784531080526908, + "learning_rate": 5e-06, + "loss": 0.0634, + "num_input_tokens_seen": 1042553516, + "step": 6076 + }, + { + "epoch": 68.96883852691218, + "loss": 0.03051602467894554, + "loss_ce": 9.000018326332793e-05, + "loss_iou": 0.16015625, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 1042553516, + "step": 6076 + }, + { + "epoch": 68.98016997167139, + "grad_norm": 3.4738884577869746, + "learning_rate": 5e-06, + "loss": 0.031, + "num_input_tokens_seen": 1042724344, + "step": 6077 + }, + { + "epoch": 68.98016997167139, + "loss": 0.03250674903392792, + "loss_ce": 6.656155892414972e-05, + "loss_iou": 0.59765625, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 1042724344, + "step": 6077 + }, + { + "epoch": 68.9915014164306, + "grad_norm": 3.6709092426138783, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 1042896016, + "step": 6078 + }, + { + "epoch": 68.9915014164306, + "loss": 0.032034341245889664, + "loss_ce": 2.1402305719675496e-05, + "loss_iou": 0.53125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1042896016, + "step": 6078 + }, + { + "epoch": 68.9915014164306, + "loss": 0.04184401407837868, + "loss_ce": 8.070863987086341e-05, + "loss_iou": 0.302734375, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 1043023892, + "step": 6078 + }, + { + "epoch": 69.0028328611898, + "grad_norm": 3.566578336891107, + "learning_rate": 5e-06, + "loss": 0.0487, + "num_input_tokens_seen": 1043067080, + "step": 6079 + }, + { + "epoch": 69.0028328611898, + "loss": 0.04661869630217552, + "loss_ce": 1.8353712221141905e-05, + "loss_iou": 0.5078125, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 1043067080, + "step": 6079 + }, + { + "epoch": 69.01416430594901, + "grad_norm": 2.9009560791276146, + "learning_rate": 5e-06, + "loss": 0.0326, + "num_input_tokens_seen": 1043238072, + "step": 6080 + }, + { + "epoch": 69.01416430594901, + "loss": 0.027608666568994522, + "loss_ce": 2.8403321266523562e-05, + "loss_iou": 0.4296875, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 1043238072, + "step": 6080 + }, + { + "epoch": 69.02549575070822, + "grad_norm": 2.8979708147926844, + "learning_rate": 5e-06, + "loss": 0.0399, + "num_input_tokens_seen": 1043408260, + "step": 6081 + }, + { + "epoch": 69.02549575070822, + "loss": 0.023163681849837303, + "loss_ce": 1.6099183994811028e-05, + "loss_iou": 0.421875, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 1043408260, + "step": 6081 + }, + { + "epoch": 69.03682719546742, + "grad_norm": 3.3045306706829276, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 1043576916, + "step": 6082 + }, + { + "epoch": 69.03682719546742, + "loss": 0.036885086447000504, + "loss_ce": 3.5111792385578156e-05, + "loss_iou": 0.275390625, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1043576916, + "step": 6082 + }, + { + "epoch": 69.04815864022663, + "grad_norm": 4.533060458499613, + "learning_rate": 5e-06, + "loss": 0.0597, + "num_input_tokens_seen": 1043747496, + "step": 6083 + }, + { + "epoch": 69.04815864022663, + "loss": 0.07012467086315155, + "loss_ce": 2.579538158897776e-05, + "loss_iou": 0.1728515625, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 1043747496, + "step": 6083 + }, + { + "epoch": 69.05949008498584, + "grad_norm": 2.925154068098337, + "learning_rate": 5e-06, + "loss": 0.0291, + "num_input_tokens_seen": 1043918968, + "step": 6084 + }, + { + "epoch": 69.05949008498584, + "loss": 0.032461017370224, + "loss_ce": 0.0001123846450354904, + "loss_iou": 0.46875, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1043918968, + "step": 6084 + }, + { + "epoch": 69.07082152974505, + "grad_norm": 2.4533193899115484, + "learning_rate": 5e-06, + "loss": 0.032, + "num_input_tokens_seen": 1044090704, + "step": 6085 + }, + { + "epoch": 69.07082152974505, + "loss": 0.03167867660522461, + "loss_ce": 0.00013875971490051597, + "loss_iou": 0.328125, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 1044090704, + "step": 6085 + }, + { + "epoch": 69.08215297450425, + "grad_norm": 2.782693400322336, + "learning_rate": 5e-06, + "loss": 0.0424, + "num_input_tokens_seen": 1044261496, + "step": 6086 + }, + { + "epoch": 69.08215297450425, + "loss": 0.02466420829296112, + "loss_ce": 3.652416489785537e-05, + "loss_iou": 0.125, + "loss_num": 0.004913330078125, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 1044261496, + "step": 6086 + }, + { + "epoch": 69.09348441926346, + "grad_norm": 2.4640254531508496, + "learning_rate": 5e-06, + "loss": 0.0315, + "num_input_tokens_seen": 1044433476, + "step": 6087 + }, + { + "epoch": 69.09348441926346, + "loss": 0.04142580181360245, + "loss_ce": 1.3446344382828102e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 1044433476, + "step": 6087 + }, + { + "epoch": 69.10481586402267, + "grad_norm": 3.8157631205734632, + "learning_rate": 5e-06, + "loss": 0.0506, + "num_input_tokens_seen": 1044605324, + "step": 6088 + }, + { + "epoch": 69.10481586402267, + "loss": 0.04222281277179718, + "loss_ce": 1.7004045730573125e-05, + "loss_iou": 0.1962890625, + "loss_num": 0.0084228515625, + "loss_xval": 0.042236328125, + "num_input_tokens_seen": 1044605324, + "step": 6088 + }, + { + "epoch": 69.11614730878188, + "grad_norm": 3.564928900768224, + "learning_rate": 5e-06, + "loss": 0.0515, + "num_input_tokens_seen": 1044776076, + "step": 6089 + }, + { + "epoch": 69.11614730878188, + "loss": 0.031930938363075256, + "loss_ce": 1.7182930605486035e-05, + "loss_iou": 0.408203125, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1044776076, + "step": 6089 + }, + { + "epoch": 69.12747875354107, + "grad_norm": 3.1182661273176424, + "learning_rate": 5e-06, + "loss": 0.0417, + "num_input_tokens_seen": 1044947544, + "step": 6090 + }, + { + "epoch": 69.12747875354107, + "loss": 0.03167203441262245, + "loss_ce": 4.8195892304647714e-05, + "loss_iou": 0.2578125, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 1044947544, + "step": 6090 + }, + { + "epoch": 69.13881019830028, + "grad_norm": 3.1791290479697523, + "learning_rate": 5e-06, + "loss": 0.0738, + "num_input_tokens_seen": 1045119312, + "step": 6091 + }, + { + "epoch": 69.13881019830028, + "loss": 0.02324497140944004, + "loss_ce": 2.109505658154376e-05, + "loss_iou": 0.53515625, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 1045119312, + "step": 6091 + }, + { + "epoch": 69.15014164305948, + "grad_norm": 3.860080170310789, + "learning_rate": 5e-06, + "loss": 0.0547, + "num_input_tokens_seen": 1045288128, + "step": 6092 + }, + { + "epoch": 69.15014164305948, + "loss": 0.10384989529848099, + "loss_ce": 1.3841778127243742e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0208740234375, + "loss_xval": 0.10400390625, + "num_input_tokens_seen": 1045288128, + "step": 6092 + }, + { + "epoch": 69.16147308781869, + "grad_norm": 4.252370975724673, + "learning_rate": 5e-06, + "loss": 0.0481, + "num_input_tokens_seen": 1045458920, + "step": 6093 + }, + { + "epoch": 69.16147308781869, + "loss": 0.059459540992975235, + "loss_ce": 2.6554151190794073e-05, + "loss_iou": 0.306640625, + "loss_num": 0.01190185546875, + "loss_xval": 0.059326171875, + "num_input_tokens_seen": 1045458920, + "step": 6093 + }, + { + "epoch": 69.1728045325779, + "grad_norm": 10.401991581393569, + "learning_rate": 5e-06, + "loss": 0.0484, + "num_input_tokens_seen": 1045630384, + "step": 6094 + }, + { + "epoch": 69.1728045325779, + "loss": 0.041744448244571686, + "loss_ce": 2.691516056074761e-05, + "loss_iou": 0.39453125, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 1045630384, + "step": 6094 + }, + { + "epoch": 69.1841359773371, + "grad_norm": 3.379250977751393, + "learning_rate": 5e-06, + "loss": 0.039, + "num_input_tokens_seen": 1045802072, + "step": 6095 + }, + { + "epoch": 69.1841359773371, + "loss": 0.03261017054319382, + "loss_ce": 6.317358202068135e-05, + "loss_iou": 0.53125, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 1045802072, + "step": 6095 + }, + { + "epoch": 69.19546742209631, + "grad_norm": 2.907778734033868, + "learning_rate": 5e-06, + "loss": 0.039, + "num_input_tokens_seen": 1045974228, + "step": 6096 + }, + { + "epoch": 69.19546742209631, + "loss": 0.03515955060720444, + "loss_ce": 2.6189010895905085e-05, + "loss_iou": 0.09228515625, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1045974228, + "step": 6096 + }, + { + "epoch": 69.20679886685552, + "grad_norm": 2.401889926366466, + "learning_rate": 5e-06, + "loss": 0.0324, + "num_input_tokens_seen": 1046145732, + "step": 6097 + }, + { + "epoch": 69.20679886685552, + "loss": 0.03377799689769745, + "loss_ce": 1.0294672392774373e-05, + "loss_iou": 0.40234375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1046145732, + "step": 6097 + }, + { + "epoch": 69.21813031161473, + "grad_norm": 2.516323955612636, + "learning_rate": 5e-06, + "loss": 0.0727, + "num_input_tokens_seen": 1046315712, + "step": 6098 + }, + { + "epoch": 69.21813031161473, + "loss": 0.1046159565448761, + "loss_ce": 6.273268081713468e-05, + "loss_iou": 0.14453125, + "loss_num": 0.0208740234375, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 1046315712, + "step": 6098 + }, + { + "epoch": 69.22946175637394, + "grad_norm": 2.8023313680286677, + "learning_rate": 5e-06, + "loss": 0.0422, + "num_input_tokens_seen": 1046487624, + "step": 6099 + }, + { + "epoch": 69.22946175637394, + "loss": 0.06589950621128082, + "loss_ce": 4.2571951780701056e-05, + "loss_iou": 0.4140625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 1046487624, + "step": 6099 + }, + { + "epoch": 69.24079320113314, + "grad_norm": 3.1140863974854156, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 1046658000, + "step": 6100 + }, + { + "epoch": 69.24079320113314, + "loss": 0.026163093745708466, + "loss_ce": 2.4787481379462406e-05, + "loss_iou": 0.2294921875, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 1046658000, + "step": 6100 + }, + { + "epoch": 69.25212464589235, + "grad_norm": 3.690686521105264, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 1046829060, + "step": 6101 + }, + { + "epoch": 69.25212464589235, + "loss": 0.09821905195713043, + "loss_ce": 2.874925485230051e-05, + "loss_iou": 0.43359375, + "loss_num": 0.0196533203125, + "loss_xval": 0.09814453125, + "num_input_tokens_seen": 1046829060, + "step": 6101 + }, + { + "epoch": 69.26345609065156, + "grad_norm": 3.663949545387579, + "learning_rate": 5e-06, + "loss": 0.0509, + "num_input_tokens_seen": 1047000952, + "step": 6102 + }, + { + "epoch": 69.26345609065156, + "loss": 0.03768531233072281, + "loss_ce": 2.6622426958056167e-05, + "loss_iou": 0.52734375, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 1047000952, + "step": 6102 + }, + { + "epoch": 69.27478753541077, + "grad_norm": 3.480404452511672, + "learning_rate": 5e-06, + "loss": 0.0456, + "num_input_tokens_seen": 1047172624, + "step": 6103 + }, + { + "epoch": 69.27478753541077, + "loss": 0.031147504225373268, + "loss_ce": 0.00017216356354765594, + "loss_iou": 0.51953125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1047172624, + "step": 6103 + }, + { + "epoch": 69.28611898016997, + "grad_norm": 3.8079999909703814, + "learning_rate": 5e-06, + "loss": 0.0572, + "num_input_tokens_seen": 1047344452, + "step": 6104 + }, + { + "epoch": 69.28611898016997, + "loss": 0.052981358021497726, + "loss_ce": 4.861808702116832e-05, + "loss_iou": 0.404296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 1047344452, + "step": 6104 + }, + { + "epoch": 69.29745042492918, + "grad_norm": 3.725031553337833, + "learning_rate": 5e-06, + "loss": 0.0577, + "num_input_tokens_seen": 1047516296, + "step": 6105 + }, + { + "epoch": 69.29745042492918, + "loss": 0.06311427056789398, + "loss_ce": 0.00015650886052753776, + "loss_iou": 0.421875, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 1047516296, + "step": 6105 + }, + { + "epoch": 69.30878186968839, + "grad_norm": 3.6927872218784086, + "learning_rate": 5e-06, + "loss": 0.0424, + "num_input_tokens_seen": 1047687796, + "step": 6106 + }, + { + "epoch": 69.30878186968839, + "loss": 0.054484352469444275, + "loss_ce": 4.0995531890075654e-05, + "loss_iou": 0.45703125, + "loss_num": 0.01092529296875, + "loss_xval": 0.054443359375, + "num_input_tokens_seen": 1047687796, + "step": 6106 + }, + { + "epoch": 69.3201133144476, + "grad_norm": 5.54216898432565, + "learning_rate": 5e-06, + "loss": 0.0621, + "num_input_tokens_seen": 1047857392, + "step": 6107 + }, + { + "epoch": 69.3201133144476, + "loss": 0.07165655493736267, + "loss_ce": 0.0010846630902960896, + "loss_iou": 0.1455078125, + "loss_num": 0.01409912109375, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 1047857392, + "step": 6107 + }, + { + "epoch": 69.3314447592068, + "grad_norm": 2.7671178100971905, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 1048028528, + "step": 6108 + }, + { + "epoch": 69.3314447592068, + "loss": 0.03650432825088501, + "loss_ce": 1.2933372090628836e-05, + "loss_iou": 0.45703125, + "loss_num": 0.00732421875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 1048028528, + "step": 6108 + }, + { + "epoch": 69.34277620396601, + "grad_norm": 3.26740394776147, + "learning_rate": 5e-06, + "loss": 0.0582, + "num_input_tokens_seen": 1048199580, + "step": 6109 + }, + { + "epoch": 69.34277620396601, + "loss": 0.040207166224718094, + "loss_ce": 3.0773429898545146e-05, + "loss_iou": 0.43359375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 1048199580, + "step": 6109 + }, + { + "epoch": 69.35410764872522, + "grad_norm": 3.3191409920318256, + "learning_rate": 5e-06, + "loss": 0.0332, + "num_input_tokens_seen": 1048370948, + "step": 6110 + }, + { + "epoch": 69.35410764872522, + "loss": 0.03141346946358681, + "loss_ce": 4.139923112234101e-05, + "loss_iou": 0.39453125, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1048370948, + "step": 6110 + }, + { + "epoch": 69.36543909348443, + "grad_norm": 3.4791550491549947, + "learning_rate": 5e-06, + "loss": 0.0349, + "num_input_tokens_seen": 1048542868, + "step": 6111 + }, + { + "epoch": 69.36543909348443, + "loss": 0.02632526308298111, + "loss_ce": 2.674159622984007e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 1048542868, + "step": 6111 + }, + { + "epoch": 69.37677053824362, + "grad_norm": 4.522326384274253, + "learning_rate": 5e-06, + "loss": 0.055, + "num_input_tokens_seen": 1048714876, + "step": 6112 + }, + { + "epoch": 69.37677053824362, + "loss": 0.09521997720003128, + "loss_ce": 0.009602908976376057, + "loss_iou": 0.416015625, + "loss_num": 0.01708984375, + "loss_xval": 0.08544921875, + "num_input_tokens_seen": 1048714876, + "step": 6112 + }, + { + "epoch": 69.38810198300283, + "grad_norm": 2.906494423595446, + "learning_rate": 5e-06, + "loss": 0.0605, + "num_input_tokens_seen": 1048886344, + "step": 6113 + }, + { + "epoch": 69.38810198300283, + "loss": 0.03917405381798744, + "loss_ce": 8.103667641989887e-05, + "loss_iou": 0.412109375, + "loss_num": 0.0078125, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 1048886344, + "step": 6113 + }, + { + "epoch": 69.39943342776203, + "grad_norm": 2.1402710245133654, + "learning_rate": 5e-06, + "loss": 0.0578, + "num_input_tokens_seen": 1049058144, + "step": 6114 + }, + { + "epoch": 69.39943342776203, + "loss": 0.028720121830701828, + "loss_ce": 1.834042814152781e-05, + "loss_iou": 0.263671875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 1049058144, + "step": 6114 + }, + { + "epoch": 69.41076487252124, + "grad_norm": 2.815526873758044, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1049229860, + "step": 6115 + }, + { + "epoch": 69.41076487252124, + "loss": 0.045954931527376175, + "loss_ce": 0.0001480501814512536, + "loss_iou": 0.427734375, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 1049229860, + "step": 6115 + }, + { + "epoch": 69.42209631728045, + "grad_norm": 3.9225679743618254, + "learning_rate": 5e-06, + "loss": 0.065, + "num_input_tokens_seen": 1049400552, + "step": 6116 + }, + { + "epoch": 69.42209631728045, + "loss": 0.03415348753333092, + "loss_ce": 9.586734813638031e-05, + "loss_iou": 0.478515625, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1049400552, + "step": 6116 + }, + { + "epoch": 69.43342776203966, + "grad_norm": 3.918168041140486, + "learning_rate": 5e-06, + "loss": 0.049, + "num_input_tokens_seen": 1049572756, + "step": 6117 + }, + { + "epoch": 69.43342776203966, + "loss": 0.08863445371389389, + "loss_ce": 1.140386302722618e-05, + "loss_iou": 0.55078125, + "loss_num": 0.0177001953125, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 1049572756, + "step": 6117 + }, + { + "epoch": 69.44475920679886, + "grad_norm": 4.933180234363589, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 1049744256, + "step": 6118 + }, + { + "epoch": 69.44475920679886, + "loss": 0.03854266554117203, + "loss_ce": 2.9484068363672122e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1049744256, + "step": 6118 + }, + { + "epoch": 69.45609065155807, + "grad_norm": 2.5139675123367624, + "learning_rate": 5e-06, + "loss": 0.0653, + "num_input_tokens_seen": 1049915984, + "step": 6119 + }, + { + "epoch": 69.45609065155807, + "loss": 0.08057437092065811, + "loss_ce": 3.848466803901829e-05, + "loss_iou": 0.478515625, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 1049915984, + "step": 6119 + }, + { + "epoch": 69.46742209631728, + "grad_norm": 2.682071752666328, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1050086952, + "step": 6120 + }, + { + "epoch": 69.46742209631728, + "loss": 0.047244325280189514, + "loss_ce": 3.362995630595833e-05, + "loss_iou": 0.4375, + "loss_num": 0.00946044921875, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 1050086952, + "step": 6120 + }, + { + "epoch": 69.47875354107649, + "grad_norm": 3.2324562057725745, + "learning_rate": 5e-06, + "loss": 0.0336, + "num_input_tokens_seen": 1050259208, + "step": 6121 + }, + { + "epoch": 69.47875354107649, + "loss": 0.033484216779470444, + "loss_ce": 2.169225990655832e-05, + "loss_iou": 0.46484375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1050259208, + "step": 6121 + }, + { + "epoch": 69.4900849858357, + "grad_norm": 3.343413611918484, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1050431172, + "step": 6122 + }, + { + "epoch": 69.4900849858357, + "loss": 0.02710139751434326, + "loss_ce": 1.704711394268088e-05, + "loss_iou": 0.427734375, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1050431172, + "step": 6122 + }, + { + "epoch": 69.5014164305949, + "grad_norm": 2.8679378961770907, + "learning_rate": 5e-06, + "loss": 0.0474, + "num_input_tokens_seen": 1050601156, + "step": 6123 + }, + { + "epoch": 69.5014164305949, + "loss": 0.023616516962647438, + "loss_ce": 4.1689767385832965e-05, + "loss_iou": 0.203125, + "loss_num": 0.004730224609375, + "loss_xval": 0.0235595703125, + "num_input_tokens_seen": 1050601156, + "step": 6123 + }, + { + "epoch": 69.51274787535411, + "grad_norm": 2.8652092682585577, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1050772208, + "step": 6124 + }, + { + "epoch": 69.51274787535411, + "loss": 0.030777039006352425, + "loss_ce": 3.057961293961853e-05, + "loss_iou": 0.173828125, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 1050772208, + "step": 6124 + }, + { + "epoch": 69.52407932011332, + "grad_norm": 3.6032131609225653, + "learning_rate": 5e-06, + "loss": 0.0525, + "num_input_tokens_seen": 1050943468, + "step": 6125 + }, + { + "epoch": 69.52407932011332, + "loss": 0.031950563192367554, + "loss_ce": 1.3917691831011325e-05, + "loss_iou": 0.58984375, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1050943468, + "step": 6125 + }, + { + "epoch": 69.53541076487252, + "grad_norm": 3.7051864058376247, + "learning_rate": 5e-06, + "loss": 0.0542, + "num_input_tokens_seen": 1051111868, + "step": 6126 + }, + { + "epoch": 69.53541076487252, + "loss": 0.045482244342565536, + "loss_ce": 2.631138158903923e-05, + "loss_iou": 0.421875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 1051111868, + "step": 6126 + }, + { + "epoch": 69.54674220963173, + "grad_norm": 3.2277313792402915, + "learning_rate": 5e-06, + "loss": 0.0335, + "num_input_tokens_seen": 1051282032, + "step": 6127 + }, + { + "epoch": 69.54674220963173, + "loss": 0.031909506767988205, + "loss_ce": 3.389604898984544e-05, + "loss_iou": 0.38671875, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1051282032, + "step": 6127 + }, + { + "epoch": 69.55807365439094, + "grad_norm": 3.516043378714864, + "learning_rate": 5e-06, + "loss": 0.036, + "num_input_tokens_seen": 1051453444, + "step": 6128 + }, + { + "epoch": 69.55807365439094, + "loss": 0.03425643965601921, + "loss_ce": 0.00019882083870470524, + "loss_iou": 0.35546875, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1051453444, + "step": 6128 + }, + { + "epoch": 69.56940509915015, + "grad_norm": 3.814911899148198, + "learning_rate": 5e-06, + "loss": 0.0414, + "num_input_tokens_seen": 1051625236, + "step": 6129 + }, + { + "epoch": 69.56940509915015, + "loss": 0.03843668848276138, + "loss_ce": 0.0009458417189307511, + "loss_iou": 0.52734375, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 1051625236, + "step": 6129 + }, + { + "epoch": 69.58073654390935, + "grad_norm": 3.506933612733227, + "learning_rate": 5e-06, + "loss": 0.0491, + "num_input_tokens_seen": 1051797336, + "step": 6130 + }, + { + "epoch": 69.58073654390935, + "loss": 0.07583128660917282, + "loss_ce": 2.562140070949681e-05, + "loss_iou": 0.40234375, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 1051797336, + "step": 6130 + }, + { + "epoch": 69.59206798866856, + "grad_norm": 3.050175570988111, + "learning_rate": 5e-06, + "loss": 0.0503, + "num_input_tokens_seen": 1051969108, + "step": 6131 + }, + { + "epoch": 69.59206798866856, + "loss": 0.07978345453739166, + "loss_ce": 5.627945211017504e-05, + "loss_iou": 0.4765625, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 1051969108, + "step": 6131 + }, + { + "epoch": 69.60339943342777, + "grad_norm": 3.292593363396408, + "learning_rate": 5e-06, + "loss": 0.0341, + "num_input_tokens_seen": 1052140812, + "step": 6132 + }, + { + "epoch": 69.60339943342777, + "loss": 0.027203774079680443, + "loss_ce": 2.7870884878211655e-05, + "loss_iou": 0.33984375, + "loss_num": 0.00543212890625, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 1052140812, + "step": 6132 + }, + { + "epoch": 69.61473087818698, + "grad_norm": 3.75680394740765, + "learning_rate": 5e-06, + "loss": 0.0476, + "num_input_tokens_seen": 1052311980, + "step": 6133 + }, + { + "epoch": 69.61473087818698, + "loss": 0.05178377404808998, + "loss_ce": 4.1219303966499865e-05, + "loss_iou": 0.412109375, + "loss_num": 0.01031494140625, + "loss_xval": 0.0517578125, + "num_input_tokens_seen": 1052311980, + "step": 6133 + }, + { + "epoch": 69.62606232294617, + "grad_norm": 3.413427238896476, + "learning_rate": 5e-06, + "loss": 0.0537, + "num_input_tokens_seen": 1052483728, + "step": 6134 + }, + { + "epoch": 69.62606232294617, + "loss": 0.08037745952606201, + "loss_ce": 0.0006350207841023803, + "loss_iou": 0.3828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 1052483728, + "step": 6134 + }, + { + "epoch": 69.63739376770538, + "grad_norm": 2.861953296818038, + "learning_rate": 5e-06, + "loss": 0.0334, + "num_input_tokens_seen": 1052654588, + "step": 6135 + }, + { + "epoch": 69.63739376770538, + "loss": 0.03404511511325836, + "loss_ce": 0.00044526217970997095, + "loss_iou": 0.486328125, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1052654588, + "step": 6135 + }, + { + "epoch": 69.64872521246458, + "grad_norm": 3.1769476984958165, + "learning_rate": 5e-06, + "loss": 0.0463, + "num_input_tokens_seen": 1052826496, + "step": 6136 + }, + { + "epoch": 69.64872521246458, + "loss": 0.03207669034600258, + "loss_ce": 1.797280492610298e-05, + "loss_iou": 0.318359375, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1052826496, + "step": 6136 + }, + { + "epoch": 69.66005665722379, + "grad_norm": 2.9367353324832517, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 1052998300, + "step": 6137 + }, + { + "epoch": 69.66005665722379, + "loss": 0.022776728495955467, + "loss_ce": 1.824560240493156e-05, + "loss_iou": 0.31640625, + "loss_num": 0.004547119140625, + "loss_xval": 0.022705078125, + "num_input_tokens_seen": 1052998300, + "step": 6137 + }, + { + "epoch": 69.671388101983, + "grad_norm": 3.1937218268913146, + "learning_rate": 5e-06, + "loss": 0.0444, + "num_input_tokens_seen": 1053170284, + "step": 6138 + }, + { + "epoch": 69.671388101983, + "loss": 0.08735427260398865, + "loss_ce": 2.822013630066067e-05, + "loss_iou": 0.32421875, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 1053170284, + "step": 6138 + }, + { + "epoch": 69.6827195467422, + "grad_norm": 3.0279243998692658, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1053341504, + "step": 6139 + }, + { + "epoch": 69.6827195467422, + "loss": 0.05386228859424591, + "loss_ce": 2.9281665774760768e-05, + "loss_iou": 0.275390625, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 1053341504, + "step": 6139 + }, + { + "epoch": 69.69405099150141, + "grad_norm": 3.3349301100090076, + "learning_rate": 5e-06, + "loss": 0.079, + "num_input_tokens_seen": 1053512476, + "step": 6140 + }, + { + "epoch": 69.69405099150141, + "loss": 0.114628367125988, + "loss_ce": 1.960219378815964e-05, + "loss_iou": 0.37890625, + "loss_num": 0.02294921875, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 1053512476, + "step": 6140 + }, + { + "epoch": 69.70538243626062, + "grad_norm": 3.1141144975063, + "learning_rate": 5e-06, + "loss": 0.0257, + "num_input_tokens_seen": 1053684084, + "step": 6141 + }, + { + "epoch": 69.70538243626062, + "loss": 0.02537723071873188, + "loss_ce": 0.00013919416232965887, + "loss_iou": 0.29296875, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 1053684084, + "step": 6141 + }, + { + "epoch": 69.71671388101983, + "grad_norm": 3.0929758989747924, + "learning_rate": 5e-06, + "loss": 0.0419, + "num_input_tokens_seen": 1053856048, + "step": 6142 + }, + { + "epoch": 69.71671388101983, + "loss": 0.026500869542360306, + "loss_ce": 1.9242288544774055e-05, + "loss_iou": 0.478515625, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 1053856048, + "step": 6142 + }, + { + "epoch": 69.72804532577904, + "grad_norm": 3.5319484659013742, + "learning_rate": 5e-06, + "loss": 0.0424, + "num_input_tokens_seen": 1054026288, + "step": 6143 + }, + { + "epoch": 69.72804532577904, + "loss": 0.030305106192827225, + "loss_ce": 6.218641647137702e-05, + "loss_iou": 0.2578125, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1054026288, + "step": 6143 + }, + { + "epoch": 69.73937677053824, + "grad_norm": 3.3771881888549613, + "learning_rate": 5e-06, + "loss": 0.0387, + "num_input_tokens_seen": 1054193228, + "step": 6144 + }, + { + "epoch": 69.73937677053824, + "loss": 0.03407318890094757, + "loss_ce": 0.00015289938892237842, + "loss_iou": 0.416015625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1054193228, + "step": 6144 + }, + { + "epoch": 69.75070821529745, + "grad_norm": 3.049368731658451, + "learning_rate": 5e-06, + "loss": 0.0255, + "num_input_tokens_seen": 1054365340, + "step": 6145 + }, + { + "epoch": 69.75070821529745, + "loss": 0.026761401444673538, + "loss_ce": 2.800256697810255e-05, + "loss_iou": 0.265625, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 1054365340, + "step": 6145 + }, + { + "epoch": 69.76203966005666, + "grad_norm": 3.1954434838993593, + "learning_rate": 5e-06, + "loss": 0.0553, + "num_input_tokens_seen": 1054537048, + "step": 6146 + }, + { + "epoch": 69.76203966005666, + "loss": 0.040646135807037354, + "loss_ce": 4.2498337279539555e-05, + "loss_iou": 0.3125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1054537048, + "step": 6146 + }, + { + "epoch": 69.77337110481587, + "grad_norm": 3.467828717186582, + "learning_rate": 5e-06, + "loss": 0.0326, + "num_input_tokens_seen": 1054708616, + "step": 6147 + }, + { + "epoch": 69.77337110481587, + "loss": 0.02807961031794548, + "loss_ce": 0.0005832723691128194, + "loss_iou": 0.154296875, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 1054708616, + "step": 6147 + }, + { + "epoch": 69.78470254957507, + "grad_norm": 3.2862520091862293, + "learning_rate": 5e-06, + "loss": 0.0606, + "num_input_tokens_seen": 1054880828, + "step": 6148 + }, + { + "epoch": 69.78470254957507, + "loss": 0.06495119631290436, + "loss_ce": 4.030777563457377e-05, + "loss_iou": 0.365234375, + "loss_num": 0.01300048828125, + "loss_xval": 0.06494140625, + "num_input_tokens_seen": 1054880828, + "step": 6148 + }, + { + "epoch": 69.79603399433428, + "grad_norm": 3.3807713252664224, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 1055052056, + "step": 6149 + }, + { + "epoch": 69.79603399433428, + "loss": 0.035884223878383636, + "loss_ce": 1.0808475053636357e-05, + "loss_iou": 0.53125, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1055052056, + "step": 6149 + }, + { + "epoch": 69.80736543909349, + "grad_norm": 3.416896191566224, + "learning_rate": 5e-06, + "loss": 0.0325, + "num_input_tokens_seen": 1055223968, + "step": 6150 + }, + { + "epoch": 69.80736543909349, + "loss": 0.03593236207962036, + "loss_ce": 0.002881825203076005, + "loss_iou": 0.34765625, + "loss_num": 0.006622314453125, + "loss_xval": 0.032958984375, + "num_input_tokens_seen": 1055223968, + "step": 6150 + }, + { + "epoch": 69.8186968838527, + "grad_norm": 3.273938069814868, + "learning_rate": 5e-06, + "loss": 0.0451, + "num_input_tokens_seen": 1055394816, + "step": 6151 + }, + { + "epoch": 69.8186968838527, + "loss": 0.03881031274795532, + "loss_ce": 2.2470598196377978e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 1055394816, + "step": 6151 + }, + { + "epoch": 69.8300283286119, + "grad_norm": 3.1745237028102844, + "learning_rate": 5e-06, + "loss": 0.0464, + "num_input_tokens_seen": 1055566528, + "step": 6152 + }, + { + "epoch": 69.8300283286119, + "loss": 0.051968708634376526, + "loss_ce": 2.778843554551713e-05, + "loss_iou": 0.341796875, + "loss_num": 0.0103759765625, + "loss_xval": 0.052001953125, + "num_input_tokens_seen": 1055566528, + "step": 6152 + }, + { + "epoch": 69.84135977337111, + "grad_norm": 3.076305780753445, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 1055736848, + "step": 6153 + }, + { + "epoch": 69.84135977337111, + "loss": 0.037253763526678085, + "loss_ce": 0.00020542369748000056, + "loss_iou": 0.421875, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 1055736848, + "step": 6153 + }, + { + "epoch": 69.85269121813032, + "grad_norm": 3.7851088014842476, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1055908736, + "step": 6154 + }, + { + "epoch": 69.85269121813032, + "loss": 0.044917941093444824, + "loss_ce": 5.710304685635492e-05, + "loss_iou": 0.337890625, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 1055908736, + "step": 6154 + }, + { + "epoch": 69.86402266288952, + "grad_norm": 3.490260886009078, + "learning_rate": 5e-06, + "loss": 0.0532, + "num_input_tokens_seen": 1056080536, + "step": 6155 + }, + { + "epoch": 69.86402266288952, + "loss": 0.08909451216459274, + "loss_ce": 0.0021194161381572485, + "loss_iou": 0.39453125, + "loss_num": 0.017333984375, + "loss_xval": 0.0869140625, + "num_input_tokens_seen": 1056080536, + "step": 6155 + }, + { + "epoch": 69.87535410764872, + "grad_norm": 3.109214370172217, + "learning_rate": 5e-06, + "loss": 0.0602, + "num_input_tokens_seen": 1056251592, + "step": 6156 + }, + { + "epoch": 69.87535410764872, + "loss": 0.05837704613804817, + "loss_ce": 7.32170301489532e-05, + "loss_iou": 0.28125, + "loss_num": 0.01165771484375, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 1056251592, + "step": 6156 + }, + { + "epoch": 69.88668555240793, + "grad_norm": 3.2973669778192445, + "learning_rate": 5e-06, + "loss": 0.0476, + "num_input_tokens_seen": 1056422588, + "step": 6157 + }, + { + "epoch": 69.88668555240793, + "loss": 0.03679753839969635, + "loss_ce": 5.437579602585174e-05, + "loss_iou": 0.39453125, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1056422588, + "step": 6157 + }, + { + "epoch": 69.89801699716713, + "grad_norm": 2.832491080343421, + "learning_rate": 5e-06, + "loss": 0.0349, + "num_input_tokens_seen": 1056592052, + "step": 6158 + }, + { + "epoch": 69.89801699716713, + "loss": 0.02813900262117386, + "loss_ce": 3.2312989787897095e-05, + "loss_iou": 0.408203125, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 1056592052, + "step": 6158 + }, + { + "epoch": 69.90934844192634, + "grad_norm": 3.1540004610022834, + "learning_rate": 5e-06, + "loss": 0.0675, + "num_input_tokens_seen": 1056763104, + "step": 6159 + }, + { + "epoch": 69.90934844192634, + "loss": 0.03876408934593201, + "loss_ce": 6.767540980945341e-06, + "loss_iou": 0.515625, + "loss_num": 0.00775146484375, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 1056763104, + "step": 6159 + }, + { + "epoch": 69.92067988668555, + "grad_norm": 3.540570572833432, + "learning_rate": 5e-06, + "loss": 0.0476, + "num_input_tokens_seen": 1056934912, + "step": 6160 + }, + { + "epoch": 69.92067988668555, + "loss": 0.03748191148042679, + "loss_ce": 3.684240800794214e-05, + "loss_iou": 0.41796875, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 1056934912, + "step": 6160 + }, + { + "epoch": 69.93201133144476, + "grad_norm": 3.555012099284406, + "learning_rate": 5e-06, + "loss": 0.0377, + "num_input_tokens_seen": 1057106784, + "step": 6161 + }, + { + "epoch": 69.93201133144476, + "loss": 0.035901911556720734, + "loss_ce": 2.8501468477770686e-05, + "loss_iou": 0.26953125, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1057106784, + "step": 6161 + }, + { + "epoch": 69.94334277620396, + "grad_norm": 2.911021570874157, + "learning_rate": 5e-06, + "loss": 0.0532, + "num_input_tokens_seen": 1057278580, + "step": 6162 + }, + { + "epoch": 69.94334277620396, + "loss": 0.047409527003765106, + "loss_ce": 0.00010728441702667624, + "loss_iou": 0.294921875, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 1057278580, + "step": 6162 + }, + { + "epoch": 69.95467422096317, + "grad_norm": 4.711453198508495, + "learning_rate": 5e-06, + "loss": 0.0278, + "num_input_tokens_seen": 1057449572, + "step": 6163 + }, + { + "epoch": 69.95467422096317, + "loss": 0.03106820210814476, + "loss_ce": 0.0005124761955812573, + "loss_iou": 0.330078125, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1057449572, + "step": 6163 + }, + { + "epoch": 69.96600566572238, + "grad_norm": 3.3138851567082996, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 1057621108, + "step": 6164 + }, + { + "epoch": 69.96600566572238, + "loss": 0.07658272236585617, + "loss_ce": 3.319292954984121e-05, + "loss_iou": 0.390625, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 1057621108, + "step": 6164 + }, + { + "epoch": 69.97733711048159, + "grad_norm": 3.5273757360648847, + "learning_rate": 5e-06, + "loss": 0.04, + "num_input_tokens_seen": 1057791844, + "step": 6165 + }, + { + "epoch": 69.97733711048159, + "loss": 0.03083958476781845, + "loss_ce": 4.73485270049423e-05, + "loss_iou": 0.365234375, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 1057791844, + "step": 6165 + }, + { + "epoch": 69.98866855524079, + "grad_norm": 3.2129878766061672, + "learning_rate": 5e-06, + "loss": 0.037, + "num_input_tokens_seen": 1057962708, + "step": 6166 + }, + { + "epoch": 69.98866855524079, + "loss": 0.029260292649269104, + "loss_ce": 2.4452669094898738e-05, + "loss_iou": 0.369140625, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 1057962708, + "step": 6166 + }, + { + "epoch": 70.0, + "grad_norm": 3.4529212212668274, + "learning_rate": 5e-06, + "loss": 0.0385, + "num_input_tokens_seen": 1058134280, + "step": 6167 + }, + { + "epoch": 70.0, + "loss": 0.03967504948377609, + "loss_ce": 1.745303234201856e-05, + "loss_iou": 0.421875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 1058134280, + "step": 6167 + }, + { + "epoch": 70.01133144475921, + "grad_norm": 4.509035215887961, + "learning_rate": 5e-06, + "loss": 0.0575, + "num_input_tokens_seen": 1058306384, + "step": 6168 + }, + { + "epoch": 70.01133144475921, + "loss": 0.08040237426757812, + "loss_ce": 6.484663754235953e-05, + "loss_iou": 0.455078125, + "loss_num": 0.01611328125, + "loss_xval": 0.08056640625, + "num_input_tokens_seen": 1058306384, + "step": 6168 + }, + { + "epoch": 70.02266288951841, + "grad_norm": 3.088021551782971, + "learning_rate": 5e-06, + "loss": 0.0439, + "num_input_tokens_seen": 1058478524, + "step": 6169 + }, + { + "epoch": 70.02266288951841, + "loss": 0.03102484717965126, + "loss_ce": 3.424622991587967e-05, + "loss_iou": 0.59375, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1058478524, + "step": 6169 + }, + { + "epoch": 70.03399433427762, + "grad_norm": 2.911710590305498, + "learning_rate": 5e-06, + "loss": 0.0272, + "num_input_tokens_seen": 1058648464, + "step": 6170 + }, + { + "epoch": 70.03399433427762, + "loss": 0.024744171649217606, + "loss_ce": 2.4934066459536552e-05, + "loss_iou": 0.35546875, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 1058648464, + "step": 6170 + }, + { + "epoch": 70.04532577903683, + "grad_norm": 3.307225420282024, + "learning_rate": 5e-06, + "loss": 0.0362, + "num_input_tokens_seen": 1058820208, + "step": 6171 + }, + { + "epoch": 70.04532577903683, + "loss": 0.035976119339466095, + "loss_ce": 1.1152664228575304e-05, + "loss_iou": 0.59765625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1058820208, + "step": 6171 + }, + { + "epoch": 70.05665722379604, + "grad_norm": 3.253539414051452, + "learning_rate": 5e-06, + "loss": 0.048, + "num_input_tokens_seen": 1058991988, + "step": 6172 + }, + { + "epoch": 70.05665722379604, + "loss": 0.04432141035795212, + "loss_ce": 1.7517057131044567e-05, + "loss_iou": 0.06103515625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 1058991988, + "step": 6172 + }, + { + "epoch": 70.06798866855524, + "grad_norm": 3.7577538459775837, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 1059163612, + "step": 6173 + }, + { + "epoch": 70.06798866855524, + "loss": 0.041993699967861176, + "loss_ce": 4.728862768388353e-05, + "loss_iou": 0.5859375, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1059163612, + "step": 6173 + }, + { + "epoch": 70.07932011331445, + "grad_norm": 3.2451374050671786, + "learning_rate": 5e-06, + "loss": 0.0749, + "num_input_tokens_seen": 1059335328, + "step": 6174 + }, + { + "epoch": 70.07932011331445, + "loss": 0.03083517961204052, + "loss_ce": 2.7682533982442692e-05, + "loss_iou": 0.49609375, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 1059335328, + "step": 6174 + }, + { + "epoch": 70.09065155807366, + "grad_norm": 2.7313674203319174, + "learning_rate": 5e-06, + "loss": 0.0441, + "num_input_tokens_seen": 1059507120, + "step": 6175 + }, + { + "epoch": 70.09065155807366, + "loss": 0.06758061051368713, + "loss_ce": 6.0467391449492425e-05, + "loss_iou": 0.515625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 1059507120, + "step": 6175 + }, + { + "epoch": 70.10198300283287, + "grad_norm": 2.959345007315852, + "learning_rate": 5e-06, + "loss": 0.0378, + "num_input_tokens_seen": 1059678680, + "step": 6176 + }, + { + "epoch": 70.10198300283287, + "loss": 0.03468804061412811, + "loss_ce": 2.0072940969839692e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1059678680, + "step": 6176 + }, + { + "epoch": 70.11331444759207, + "grad_norm": 3.4026638160561733, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 1059850668, + "step": 6177 + }, + { + "epoch": 70.11331444759207, + "loss": 0.06843946129083633, + "loss_ce": 4.957188502885401e-05, + "loss_iou": 0.33984375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 1059850668, + "step": 6177 + }, + { + "epoch": 70.12464589235128, + "grad_norm": 3.6583013106266984, + "learning_rate": 5e-06, + "loss": 0.0837, + "num_input_tokens_seen": 1060022348, + "step": 6178 + }, + { + "epoch": 70.12464589235128, + "loss": 0.10209149122238159, + "loss_ce": 5.5965760111575946e-05, + "loss_iou": 0.30078125, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 1060022348, + "step": 6178 + }, + { + "epoch": 70.13597733711048, + "grad_norm": 3.714873490747718, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 1060191120, + "step": 6179 + }, + { + "epoch": 70.13597733711048, + "loss": 0.03477904945611954, + "loss_ce": 6.530592509079725e-05, + "loss_iou": 0.5, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1060191120, + "step": 6179 + }, + { + "epoch": 70.14730878186968, + "grad_norm": 4.0095924521720345, + "learning_rate": 5e-06, + "loss": 0.0518, + "num_input_tokens_seen": 1060362696, + "step": 6180 + }, + { + "epoch": 70.14730878186968, + "loss": 0.03978504613041878, + "loss_ce": 2.0641280570998788e-05, + "loss_iou": 0.328125, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 1060362696, + "step": 6180 + }, + { + "epoch": 70.15864022662889, + "grad_norm": 3.6641240301162243, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 1060532088, + "step": 6181 + }, + { + "epoch": 70.15864022662889, + "loss": 0.025972293689846992, + "loss_ce": 3.235208714613691e-05, + "loss_iou": 0.39453125, + "loss_num": 0.00518798828125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 1060532088, + "step": 6181 + }, + { + "epoch": 70.1699716713881, + "grad_norm": 3.7563183727561134, + "learning_rate": 5e-06, + "loss": 0.0353, + "num_input_tokens_seen": 1060704268, + "step": 6182 + }, + { + "epoch": 70.1699716713881, + "loss": 0.03175430744886398, + "loss_ce": 4.654276926885359e-05, + "loss_iou": 0.275390625, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 1060704268, + "step": 6182 + }, + { + "epoch": 70.1813031161473, + "grad_norm": 3.885898202831319, + "learning_rate": 5e-06, + "loss": 0.0424, + "num_input_tokens_seen": 1060876196, + "step": 6183 + }, + { + "epoch": 70.1813031161473, + "loss": 0.027663579210639, + "loss_ce": 2.991233122884296e-05, + "loss_iou": 0.515625, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 1060876196, + "step": 6183 + }, + { + "epoch": 70.19263456090651, + "grad_norm": 4.256689402027802, + "learning_rate": 5e-06, + "loss": 0.0524, + "num_input_tokens_seen": 1061047692, + "step": 6184 + }, + { + "epoch": 70.19263456090651, + "loss": 0.06845005601644516, + "loss_ce": 4.4908549170941114e-05, + "loss_iou": 0.333984375, + "loss_num": 0.013671875, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 1061047692, + "step": 6184 + }, + { + "epoch": 70.20396600566572, + "grad_norm": 3.109819554699469, + "learning_rate": 5e-06, + "loss": 0.0498, + "num_input_tokens_seen": 1061217068, + "step": 6185 + }, + { + "epoch": 70.20396600566572, + "loss": 0.02164578251540661, + "loss_ce": 3.9335609471891075e-05, + "loss_iou": 0.546875, + "loss_num": 0.00433349609375, + "loss_xval": 0.0216064453125, + "num_input_tokens_seen": 1061217068, + "step": 6185 + }, + { + "epoch": 70.21529745042493, + "grad_norm": 3.5351829257635243, + "learning_rate": 5e-06, + "loss": 0.0308, + "num_input_tokens_seen": 1061388988, + "step": 6186 + }, + { + "epoch": 70.21529745042493, + "loss": 0.028955310583114624, + "loss_ce": 2.4647413738421164e-05, + "loss_iou": 0.49609375, + "loss_num": 0.00579833984375, + "loss_xval": 0.0289306640625, + "num_input_tokens_seen": 1061388988, + "step": 6186 + }, + { + "epoch": 70.22662889518413, + "grad_norm": 3.6217002103751734, + "learning_rate": 5e-06, + "loss": 0.0473, + "num_input_tokens_seen": 1061560576, + "step": 6187 + }, + { + "epoch": 70.22662889518413, + "loss": 0.03518831729888916, + "loss_ce": 1.6808910004328936e-05, + "loss_iou": 0.41796875, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1061560576, + "step": 6187 + }, + { + "epoch": 70.23796033994334, + "grad_norm": 3.2643411458164104, + "learning_rate": 5e-06, + "loss": 0.0348, + "num_input_tokens_seen": 1061732068, + "step": 6188 + }, + { + "epoch": 70.23796033994334, + "loss": 0.0447344109416008, + "loss_ce": 5.6677912652958184e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 1061732068, + "step": 6188 + }, + { + "epoch": 70.24929178470255, + "grad_norm": 3.0986623092849768, + "learning_rate": 5e-06, + "loss": 0.0373, + "num_input_tokens_seen": 1061903412, + "step": 6189 + }, + { + "epoch": 70.24929178470255, + "loss": 0.033926405012607574, + "loss_ce": 9.766806761035696e-05, + "loss_iou": 0.28515625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1061903412, + "step": 6189 + }, + { + "epoch": 70.26062322946176, + "grad_norm": 2.7389124235081392, + "learning_rate": 5e-06, + "loss": 0.0343, + "num_input_tokens_seen": 1062075140, + "step": 6190 + }, + { + "epoch": 70.26062322946176, + "loss": 0.03864557296037674, + "loss_ce": 4.084153624717146e-05, + "loss_iou": 0.25, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1062075140, + "step": 6190 + }, + { + "epoch": 70.27195467422096, + "grad_norm": 2.3240647748971157, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 1062244632, + "step": 6191 + }, + { + "epoch": 70.27195467422096, + "loss": 0.030351605266332626, + "loss_ce": 0.00021549731900449842, + "loss_iou": 0.34375, + "loss_num": 0.006011962890625, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 1062244632, + "step": 6191 + }, + { + "epoch": 70.28328611898017, + "grad_norm": 3.2711494664173673, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 1062416580, + "step": 6192 + }, + { + "epoch": 70.28328611898017, + "loss": 0.04531951993703842, + "loss_ce": 3.143498906865716e-05, + "loss_iou": 0.423828125, + "loss_num": 0.009033203125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 1062416580, + "step": 6192 + }, + { + "epoch": 70.29461756373938, + "grad_norm": 3.0356899201502197, + "learning_rate": 5e-06, + "loss": 0.0342, + "num_input_tokens_seen": 1062587472, + "step": 6193 + }, + { + "epoch": 70.29461756373938, + "loss": 0.03430301323533058, + "loss_ce": 7.755000842735171e-05, + "loss_iou": 0.26171875, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1062587472, + "step": 6193 + }, + { + "epoch": 70.30594900849859, + "grad_norm": 2.687015101448378, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 1062757956, + "step": 6194 + }, + { + "epoch": 70.30594900849859, + "loss": 0.028414569795131683, + "loss_ce": 1.796483229554724e-05, + "loss_iou": 0.408203125, + "loss_num": 0.00567626953125, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 1062757956, + "step": 6194 + }, + { + "epoch": 70.3172804532578, + "grad_norm": 3.270507006458377, + "learning_rate": 5e-06, + "loss": 0.033, + "num_input_tokens_seen": 1062929548, + "step": 6195 + }, + { + "epoch": 70.3172804532578, + "loss": 0.03262282907962799, + "loss_ce": 0.0016780063742771745, + "loss_iou": 0.341796875, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1062929548, + "step": 6195 + }, + { + "epoch": 70.328611898017, + "grad_norm": 3.420441285138797, + "learning_rate": 5e-06, + "loss": 0.0632, + "num_input_tokens_seen": 1063101376, + "step": 6196 + }, + { + "epoch": 70.328611898017, + "loss": 0.050790995359420776, + "loss_ce": 4.026099486509338e-05, + "loss_iou": 0.3203125, + "loss_num": 0.0101318359375, + "loss_xval": 0.05078125, + "num_input_tokens_seen": 1063101376, + "step": 6196 + }, + { + "epoch": 70.33994334277621, + "grad_norm": 3.021693814612, + "learning_rate": 5e-06, + "loss": 0.0318, + "num_input_tokens_seen": 1063272232, + "step": 6197 + }, + { + "epoch": 70.33994334277621, + "loss": 0.025040145963430405, + "loss_ce": 0.00013780235894955695, + "loss_iou": 0.25390625, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 1063272232, + "step": 6197 + }, + { + "epoch": 70.35127478753542, + "grad_norm": 2.777533714470306, + "learning_rate": 5e-06, + "loss": 0.0581, + "num_input_tokens_seen": 1063442568, + "step": 6198 + }, + { + "epoch": 70.35127478753542, + "loss": 0.07694695889949799, + "loss_ce": 0.00021050882060080767, + "loss_iou": 0.341796875, + "loss_num": 0.01531982421875, + "loss_xval": 0.07666015625, + "num_input_tokens_seen": 1063442568, + "step": 6198 + }, + { + "epoch": 70.36260623229462, + "grad_norm": 3.0098235443338974, + "learning_rate": 5e-06, + "loss": 0.0343, + "num_input_tokens_seen": 1063614480, + "step": 6199 + }, + { + "epoch": 70.36260623229462, + "loss": 0.0431799441576004, + "loss_ce": 1.282903667743085e-05, + "loss_iou": 0.46875, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1063614480, + "step": 6199 + }, + { + "epoch": 70.37393767705383, + "grad_norm": 3.5299715853278415, + "learning_rate": 5e-06, + "loss": 0.0406, + "num_input_tokens_seen": 1063786596, + "step": 6200 + }, + { + "epoch": 70.37393767705383, + "loss": 0.0365288183093071, + "loss_ce": 1.4537613424181473e-05, + "loss_iou": 0.4375, + "loss_num": 0.007293701171875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1063786596, + "step": 6200 + }, + { + "epoch": 70.38526912181302, + "grad_norm": 3.2553877384854326, + "learning_rate": 5e-06, + "loss": 0.0447, + "num_input_tokens_seen": 1063958432, + "step": 6201 + }, + { + "epoch": 70.38526912181302, + "loss": 0.030552154406905174, + "loss_ce": 4.983360122423619e-05, + "loss_iou": 0.34765625, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1063958432, + "step": 6201 + }, + { + "epoch": 70.39660056657223, + "grad_norm": 3.0266139359761626, + "learning_rate": 5e-06, + "loss": 0.0529, + "num_input_tokens_seen": 1064129860, + "step": 6202 + }, + { + "epoch": 70.39660056657223, + "loss": 0.09253968298435211, + "loss_ce": 4.090438596904278e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0185546875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 1064129860, + "step": 6202 + }, + { + "epoch": 70.40793201133144, + "grad_norm": 3.556413640662341, + "learning_rate": 5e-06, + "loss": 0.0597, + "num_input_tokens_seen": 1064301736, + "step": 6203 + }, + { + "epoch": 70.40793201133144, + "loss": 0.09668193757534027, + "loss_ce": 3.2764088246040046e-05, + "loss_iou": 0.4140625, + "loss_num": 0.019287109375, + "loss_xval": 0.0966796875, + "num_input_tokens_seen": 1064301736, + "step": 6203 + }, + { + "epoch": 70.41926345609065, + "grad_norm": 3.690919878043717, + "learning_rate": 5e-06, + "loss": 0.0418, + "num_input_tokens_seen": 1064473712, + "step": 6204 + }, + { + "epoch": 70.41926345609065, + "loss": 0.03813214600086212, + "loss_ce": 1.569224514241796e-05, + "loss_iou": 0.0625, + "loss_num": 0.00762939453125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 1064473712, + "step": 6204 + }, + { + "epoch": 70.43059490084985, + "grad_norm": 3.4682080094381607, + "learning_rate": 5e-06, + "loss": 0.0464, + "num_input_tokens_seen": 1064643888, + "step": 6205 + }, + { + "epoch": 70.43059490084985, + "loss": 0.0844365656375885, + "loss_ce": 4.020178676000796e-05, + "loss_iou": 0.390625, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 1064643888, + "step": 6205 + }, + { + "epoch": 70.44192634560906, + "grad_norm": 2.621481060362728, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 1064814664, + "step": 6206 + }, + { + "epoch": 70.44192634560906, + "loss": 0.07125361263751984, + "loss_ce": 4.084207466803491e-05, + "loss_iou": 0.216796875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 1064814664, + "step": 6206 + }, + { + "epoch": 70.45325779036827, + "grad_norm": 2.4895973992645364, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1064986300, + "step": 6207 + }, + { + "epoch": 70.45325779036827, + "loss": 0.07286182045936584, + "loss_ce": 1.6363295799237676e-05, + "loss_iou": 0.318359375, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 1064986300, + "step": 6207 + }, + { + "epoch": 70.46458923512748, + "grad_norm": 3.4498795981142663, + "learning_rate": 5e-06, + "loss": 0.0659, + "num_input_tokens_seen": 1065158052, + "step": 6208 + }, + { + "epoch": 70.46458923512748, + "loss": 0.09581515192985535, + "loss_ce": 2.0473917174967937e-05, + "loss_iou": 0.1396484375, + "loss_num": 0.0191650390625, + "loss_xval": 0.095703125, + "num_input_tokens_seen": 1065158052, + "step": 6208 + }, + { + "epoch": 70.47592067988668, + "grad_norm": 3.687708636754502, + "learning_rate": 5e-06, + "loss": 0.0449, + "num_input_tokens_seen": 1065328704, + "step": 6209 + }, + { + "epoch": 70.47592067988668, + "loss": 0.04114636778831482, + "loss_ce": 6.970532558625564e-05, + "loss_iou": 0.341796875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1065328704, + "step": 6209 + }, + { + "epoch": 70.48725212464589, + "grad_norm": 3.4492124862625517, + "learning_rate": 5e-06, + "loss": 0.051, + "num_input_tokens_seen": 1065500172, + "step": 6210 + }, + { + "epoch": 70.48725212464589, + "loss": 0.02557147666811943, + "loss_ce": 2.8261265470064245e-05, + "loss_iou": 0.2890625, + "loss_num": 0.005096435546875, + "loss_xval": 0.0255126953125, + "num_input_tokens_seen": 1065500172, + "step": 6210 + }, + { + "epoch": 70.4985835694051, + "grad_norm": 3.253813081175315, + "learning_rate": 5e-06, + "loss": 0.0553, + "num_input_tokens_seen": 1065671412, + "step": 6211 + }, + { + "epoch": 70.4985835694051, + "loss": 0.028385695070028305, + "loss_ce": 3.486586865619756e-05, + "loss_iou": 0.408203125, + "loss_num": 0.00567626953125, + "loss_xval": 0.0283203125, + "num_input_tokens_seen": 1065671412, + "step": 6211 + }, + { + "epoch": 70.5099150141643, + "grad_norm": 2.851093667591735, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 1065842528, + "step": 6212 + }, + { + "epoch": 70.5099150141643, + "loss": 0.03827948495745659, + "loss_ce": 4.096064003533684e-05, + "loss_iou": 0.439453125, + "loss_num": 0.00762939453125, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1065842528, + "step": 6212 + }, + { + "epoch": 70.52124645892351, + "grad_norm": 2.738830959647769, + "learning_rate": 5e-06, + "loss": 0.0704, + "num_input_tokens_seen": 1066013720, + "step": 6213 + }, + { + "epoch": 70.52124645892351, + "loss": 0.027378059923648834, + "loss_ce": 4.9569291149964556e-05, + "loss_iou": 0.4375, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 1066013720, + "step": 6213 + }, + { + "epoch": 70.53257790368272, + "grad_norm": 2.63645023513762, + "learning_rate": 5e-06, + "loss": 0.0354, + "num_input_tokens_seen": 1066184636, + "step": 6214 + }, + { + "epoch": 70.53257790368272, + "loss": 0.033459797501564026, + "loss_ce": 2.7791174943558872e-05, + "loss_iou": 0.4453125, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1066184636, + "step": 6214 + }, + { + "epoch": 70.54390934844193, + "grad_norm": 2.893925016911087, + "learning_rate": 5e-06, + "loss": 0.052, + "num_input_tokens_seen": 1066357252, + "step": 6215 + }, + { + "epoch": 70.54390934844193, + "loss": 0.09516491740942001, + "loss_ce": 2.6371020794613287e-05, + "loss_iou": 0.375, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 1066357252, + "step": 6215 + }, + { + "epoch": 70.55524079320114, + "grad_norm": 2.5061209317037147, + "learning_rate": 5e-06, + "loss": 0.0364, + "num_input_tokens_seen": 1066528296, + "step": 6216 + }, + { + "epoch": 70.55524079320114, + "loss": 0.03342249616980553, + "loss_ce": 2.1009296688134782e-05, + "loss_iou": 0.25390625, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1066528296, + "step": 6216 + }, + { + "epoch": 70.56657223796034, + "grad_norm": 2.7913668419555995, + "learning_rate": 5e-06, + "loss": 0.0524, + "num_input_tokens_seen": 1066699960, + "step": 6217 + }, + { + "epoch": 70.56657223796034, + "loss": 0.023264724761247635, + "loss_ce": 4.084629108547233e-05, + "loss_iou": 0.46875, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 1066699960, + "step": 6217 + }, + { + "epoch": 70.57790368271955, + "grad_norm": 3.243231601320792, + "learning_rate": 5e-06, + "loss": 0.0387, + "num_input_tokens_seen": 1066871824, + "step": 6218 + }, + { + "epoch": 70.57790368271955, + "loss": 0.04991789162158966, + "loss_ce": 5.217043508309871e-05, + "loss_iou": 0.33984375, + "loss_num": 0.010009765625, + "loss_xval": 0.0498046875, + "num_input_tokens_seen": 1066871824, + "step": 6218 + }, + { + "epoch": 70.58923512747876, + "grad_norm": 3.1959772068900536, + "learning_rate": 5e-06, + "loss": 0.0432, + "num_input_tokens_seen": 1067042872, + "step": 6219 + }, + { + "epoch": 70.58923512747876, + "loss": 0.06666183471679688, + "loss_ce": 4.19584393966943e-05, + "loss_iou": 0.609375, + "loss_num": 0.0133056640625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 1067042872, + "step": 6219 + }, + { + "epoch": 70.60056657223797, + "grad_norm": 3.7139893822689927, + "learning_rate": 5e-06, + "loss": 0.0369, + "num_input_tokens_seen": 1067213744, + "step": 6220 + }, + { + "epoch": 70.60056657223797, + "loss": 0.04193349927663803, + "loss_ce": 3.2866824767552316e-05, + "loss_iou": 0.380859375, + "loss_num": 0.00836181640625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1067213744, + "step": 6220 + }, + { + "epoch": 70.61189801699717, + "grad_norm": 2.879566284772091, + "learning_rate": 5e-06, + "loss": 0.038, + "num_input_tokens_seen": 1067385320, + "step": 6221 + }, + { + "epoch": 70.61189801699717, + "loss": 0.030481917783617973, + "loss_ce": 5.589351712842472e-05, + "loss_iou": 0.58203125, + "loss_num": 0.006103515625, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 1067385320, + "step": 6221 + }, + { + "epoch": 70.62322946175638, + "grad_norm": 2.9003876610040393, + "learning_rate": 5e-06, + "loss": 0.0338, + "num_input_tokens_seen": 1067557464, + "step": 6222 + }, + { + "epoch": 70.62322946175638, + "loss": 0.02531801536679268, + "loss_ce": 1.8942057067761198e-05, + "loss_iou": 0.4375, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 1067557464, + "step": 6222 + }, + { + "epoch": 70.63456090651557, + "grad_norm": 3.1934085265570475, + "learning_rate": 5e-06, + "loss": 0.0512, + "num_input_tokens_seen": 1067728976, + "step": 6223 + }, + { + "epoch": 70.63456090651557, + "loss": 0.03895484656095505, + "loss_ce": 1.4415631085284986e-05, + "loss_iou": 0.69921875, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 1067728976, + "step": 6223 + }, + { + "epoch": 70.64589235127478, + "grad_norm": 3.215257014153987, + "learning_rate": 5e-06, + "loss": 0.0406, + "num_input_tokens_seen": 1067900632, + "step": 6224 + }, + { + "epoch": 70.64589235127478, + "loss": 0.024823594838380814, + "loss_ce": 5.857882206328213e-05, + "loss_iou": 0.4296875, + "loss_num": 0.00494384765625, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 1067900632, + "step": 6224 + }, + { + "epoch": 70.65722379603399, + "grad_norm": 3.3511728802599636, + "learning_rate": 5e-06, + "loss": 0.0375, + "num_input_tokens_seen": 1068072332, + "step": 6225 + }, + { + "epoch": 70.65722379603399, + "loss": 0.0377066433429718, + "loss_ce": 4.795142012881115e-05, + "loss_iou": 0.44921875, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 1068072332, + "step": 6225 + }, + { + "epoch": 70.6685552407932, + "grad_norm": 3.134878021635956, + "learning_rate": 5e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1068243588, + "step": 6226 + }, + { + "epoch": 70.6685552407932, + "loss": 0.03613574802875519, + "loss_ce": 1.8194015865447e-05, + "loss_iou": 0.2890625, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 1068243588, + "step": 6226 + }, + { + "epoch": 70.6798866855524, + "grad_norm": 2.243108998814011, + "learning_rate": 5e-06, + "loss": 0.0367, + "num_input_tokens_seen": 1068415116, + "step": 6227 + }, + { + "epoch": 70.6798866855524, + "loss": 0.02113475650548935, + "loss_ce": 1.659293775446713e-05, + "loss_iou": 0.24609375, + "loss_num": 0.00421142578125, + "loss_xval": 0.0211181640625, + "num_input_tokens_seen": 1068415116, + "step": 6227 + }, + { + "epoch": 70.69121813031161, + "grad_norm": 2.949245700762008, + "learning_rate": 5e-06, + "loss": 0.0337, + "num_input_tokens_seen": 1068586076, + "step": 6228 + }, + { + "epoch": 70.69121813031161, + "loss": 0.03588490933179855, + "loss_ce": 2.675283030839637e-05, + "loss_iou": 0.349609375, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1068586076, + "step": 6228 + }, + { + "epoch": 70.70254957507082, + "grad_norm": 3.5533962202450087, + "learning_rate": 5e-06, + "loss": 0.0287, + "num_input_tokens_seen": 1068757740, + "step": 6229 + }, + { + "epoch": 70.70254957507082, + "loss": 0.03083064593374729, + "loss_ce": 7.891836503404193e-06, + "loss_iou": 0.578125, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 1068757740, + "step": 6229 + }, + { + "epoch": 70.71388101983003, + "grad_norm": 3.391818726883472, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 1068929096, + "step": 6230 + }, + { + "epoch": 70.71388101983003, + "loss": 0.04288551211357117, + "loss_ce": 5.409139703260735e-05, + "loss_iou": 0.296875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 1068929096, + "step": 6230 + }, + { + "epoch": 70.72521246458923, + "grad_norm": 3.249833817117871, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 1069100744, + "step": 6231 + }, + { + "epoch": 70.72521246458923, + "loss": 0.03534594550728798, + "loss_ce": 8.28854099381715e-05, + "loss_iou": 0.365234375, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1069100744, + "step": 6231 + }, + { + "epoch": 70.73654390934844, + "grad_norm": 3.3667022486172273, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 1069272520, + "step": 6232 + }, + { + "epoch": 70.73654390934844, + "loss": 0.027780842036008835, + "loss_ce": 2.5106468456215225e-05, + "loss_iou": 0.359375, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 1069272520, + "step": 6232 + }, + { + "epoch": 70.74787535410765, + "grad_norm": 3.240958815165247, + "learning_rate": 5e-06, + "loss": 0.0641, + "num_input_tokens_seen": 1069443876, + "step": 6233 + }, + { + "epoch": 70.74787535410765, + "loss": 0.043085820972919464, + "loss_ce": 7.12929613655433e-05, + "loss_iou": 0.2421875, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 1069443876, + "step": 6233 + }, + { + "epoch": 70.75920679886686, + "grad_norm": 3.2056680044434955, + "learning_rate": 5e-06, + "loss": 0.0486, + "num_input_tokens_seen": 1069614980, + "step": 6234 + }, + { + "epoch": 70.75920679886686, + "loss": 0.08273632824420929, + "loss_ce": 9.47221415117383e-05, + "loss_iou": 0.138671875, + "loss_num": 0.0164794921875, + "loss_xval": 0.08251953125, + "num_input_tokens_seen": 1069614980, + "step": 6234 + }, + { + "epoch": 70.77053824362606, + "grad_norm": 3.260752791093577, + "learning_rate": 5e-06, + "loss": 0.0362, + "num_input_tokens_seen": 1069785840, + "step": 6235 + }, + { + "epoch": 70.77053824362606, + "loss": 0.04071637988090515, + "loss_ce": 5.928438895352883e-06, + "loss_iou": 0.388671875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1069785840, + "step": 6235 + }, + { + "epoch": 70.78186968838527, + "grad_norm": 4.037914205742755, + "learning_rate": 5e-06, + "loss": 0.0469, + "num_input_tokens_seen": 1069957884, + "step": 6236 + }, + { + "epoch": 70.78186968838527, + "loss": 0.03360344097018242, + "loss_ce": 0.00011039771197829396, + "loss_iou": 0.478515625, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1069957884, + "step": 6236 + }, + { + "epoch": 70.79320113314448, + "grad_norm": 2.606737444567429, + "learning_rate": 5e-06, + "loss": 0.067, + "num_input_tokens_seen": 1070128984, + "step": 6237 + }, + { + "epoch": 70.79320113314448, + "loss": 0.03347121924161911, + "loss_ce": 5.446997965918854e-05, + "loss_iou": 0.55859375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1070128984, + "step": 6237 + }, + { + "epoch": 70.80453257790369, + "grad_norm": 2.4293053177601283, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 1070300848, + "step": 6238 + }, + { + "epoch": 70.80453257790369, + "loss": 0.0309780091047287, + "loss_ce": 4.844508657697588e-05, + "loss_iou": 0.2578125, + "loss_num": 0.006195068359375, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 1070300848, + "step": 6238 + }, + { + "epoch": 70.8158640226629, + "grad_norm": 2.8708135379197763, + "learning_rate": 5e-06, + "loss": 0.0516, + "num_input_tokens_seen": 1070471432, + "step": 6239 + }, + { + "epoch": 70.8158640226629, + "loss": 0.04072948172688484, + "loss_ce": 3.429186108405702e-05, + "loss_iou": 0.32421875, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1070471432, + "step": 6239 + }, + { + "epoch": 70.8271954674221, + "grad_norm": 3.569295927151639, + "learning_rate": 5e-06, + "loss": 0.0809, + "num_input_tokens_seen": 1070643176, + "step": 6240 + }, + { + "epoch": 70.8271954674221, + "loss": 0.07012691348791122, + "loss_ce": 4.329534567659721e-05, + "loss_iou": 0.4375, + "loss_num": 0.0140380859375, + "loss_xval": 0.0703125, + "num_input_tokens_seen": 1070643176, + "step": 6240 + }, + { + "epoch": 70.83852691218131, + "grad_norm": 3.2310263945865003, + "learning_rate": 5e-06, + "loss": 0.0293, + "num_input_tokens_seen": 1070814120, + "step": 6241 + }, + { + "epoch": 70.83852691218131, + "loss": 0.029400072991847992, + "loss_ce": 2.6904177502729e-05, + "loss_iou": 0.47265625, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 1070814120, + "step": 6241 + }, + { + "epoch": 70.84985835694052, + "grad_norm": 3.6011500650156387, + "learning_rate": 5e-06, + "loss": 0.0638, + "num_input_tokens_seen": 1070985284, + "step": 6242 + }, + { + "epoch": 70.84985835694052, + "loss": 0.03525716811418533, + "loss_ce": 0.00020772943389602005, + "loss_iou": 0.3046875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1070985284, + "step": 6242 + }, + { + "epoch": 70.86118980169972, + "grad_norm": 3.28721032710094, + "learning_rate": 5e-06, + "loss": 0.0487, + "num_input_tokens_seen": 1071157168, + "step": 6243 + }, + { + "epoch": 70.86118980169972, + "loss": 0.05845300480723381, + "loss_ce": 1.1842974345199764e-05, + "loss_iou": 0.390625, + "loss_num": 0.01171875, + "loss_xval": 0.058349609375, + "num_input_tokens_seen": 1071157168, + "step": 6243 + }, + { + "epoch": 70.87252124645893, + "grad_norm": 3.649765057288374, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 1071329260, + "step": 6244 + }, + { + "epoch": 70.87252124645893, + "loss": 0.05465655401349068, + "loss_ce": 1.4832143278908916e-05, + "loss_iou": 0.51171875, + "loss_num": 0.01092529296875, + "loss_xval": 0.0546875, + "num_input_tokens_seen": 1071329260, + "step": 6244 + }, + { + "epoch": 70.88385269121812, + "grad_norm": 3.344622770963368, + "learning_rate": 5e-06, + "loss": 0.0382, + "num_input_tokens_seen": 1071500976, + "step": 6245 + }, + { + "epoch": 70.88385269121812, + "loss": 0.03332934528589249, + "loss_ce": 1.9410406821407378e-05, + "loss_iou": 0.328125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1071500976, + "step": 6245 + }, + { + "epoch": 70.89518413597733, + "grad_norm": 3.415172816286984, + "learning_rate": 5e-06, + "loss": 0.0703, + "num_input_tokens_seen": 1071672856, + "step": 6246 + }, + { + "epoch": 70.89518413597733, + "loss": 0.04589034616947174, + "loss_ce": 2.2425159841077402e-05, + "loss_iou": 0.171875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 1071672856, + "step": 6246 + }, + { + "epoch": 70.90651558073654, + "grad_norm": 3.3976247946749623, + "learning_rate": 5e-06, + "loss": 0.0533, + "num_input_tokens_seen": 1071844692, + "step": 6247 + }, + { + "epoch": 70.90651558073654, + "loss": 0.10620458424091339, + "loss_ce": 1.8668979464564472e-05, + "loss_iou": 0.32421875, + "loss_num": 0.021240234375, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 1071844692, + "step": 6247 + }, + { + "epoch": 70.91784702549575, + "grad_norm": 3.409508605869081, + "learning_rate": 5e-06, + "loss": 0.0298, + "num_input_tokens_seen": 1072016720, + "step": 6248 + }, + { + "epoch": 70.91784702549575, + "loss": 0.03481808304786682, + "loss_ce": 2.804333416861482e-05, + "loss_iou": 0.4453125, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1072016720, + "step": 6248 + }, + { + "epoch": 70.92917847025495, + "grad_norm": 3.3234102062615842, + "learning_rate": 5e-06, + "loss": 0.0616, + "num_input_tokens_seen": 1072187036, + "step": 6249 + }, + { + "epoch": 70.92917847025495, + "loss": 0.04293372482061386, + "loss_ce": 7.178887608461082e-05, + "loss_iou": 0.46875, + "loss_num": 0.008544921875, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 1072187036, + "step": 6249 + }, + { + "epoch": 70.94050991501416, + "grad_norm": 3.2652082636012816, + "learning_rate": 5e-06, + "loss": 0.0463, + "num_input_tokens_seen": 1072358776, + "step": 6250 + }, + { + "epoch": 70.94050991501416, + "eval_seeclick_CIoU": 0.5332036912441254, + "eval_seeclick_GIoU": 0.537353515625, + "eval_seeclick_IoU": 0.5715103447437286, + "eval_seeclick_MAE_all": 0.06658566743135452, + "eval_seeclick_MAE_h": 0.03053866233676672, + "eval_seeclick_MAE_w": 0.10175251960754395, + "eval_seeclick_MAE_x": 0.10434956476092339, + "eval_seeclick_MAE_y": 0.029701927676796913, + "eval_seeclick_NUM_probability": 0.999990314245224, + "eval_seeclick_inside_bbox": 0.8778409063816071, + "eval_seeclick_loss": 1.0003641843795776, + "eval_seeclick_loss_ce": 0.7595674097537994, + "eval_seeclick_loss_iou": 0.527099609375, + "eval_seeclick_loss_num": 0.0487518310546875, + "eval_seeclick_loss_xval": 0.24383544921875, + "eval_seeclick_runtime": 69.491, + "eval_seeclick_samples_per_second": 0.619, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 1072358776, + "step": 6250 + }, + { + "epoch": 70.94050991501416, + "eval_icons_CIoU": 0.7184293568134308, + "eval_icons_GIoU": 0.7168136835098267, + "eval_icons_IoU": 0.7327116429805756, + "eval_icons_MAE_all": 0.0377997811883688, + "eval_icons_MAE_h": 0.033207567408680916, + "eval_icons_MAE_w": 0.04584744665771723, + "eval_icons_MAE_x": 0.036356305703520775, + "eval_icons_MAE_y": 0.0357878003269434, + "eval_icons_NUM_probability": 0.9997987747192383, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.14739839732646942, + "eval_icons_loss_ce": 0.005564313847571611, + "eval_icons_loss_iou": 0.6092529296875, + "eval_icons_loss_num": 0.0262298583984375, + "eval_icons_loss_xval": 0.131103515625, + "eval_icons_runtime": 79.2524, + "eval_icons_samples_per_second": 0.631, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 1072358776, + "step": 6250 + }, + { + "epoch": 70.94050991501416, + "eval_screenspot_CIoU": 0.6451834638913473, + "eval_screenspot_GIoU": 0.6446922222773234, + "eval_screenspot_IoU": 0.6738998492558798, + "eval_screenspot_MAE_all": 0.0641996053357919, + "eval_screenspot_MAE_h": 0.03544317185878754, + "eval_screenspot_MAE_w": 0.11415816657245159, + "eval_screenspot_MAE_x": 0.0754590214540561, + "eval_screenspot_MAE_y": 0.031738064251840115, + "eval_screenspot_NUM_probability": 0.9999789396921793, + "eval_screenspot_inside_bbox": 0.9183333317438761, + "eval_screenspot_loss": 0.2720736265182495, + "eval_screenspot_loss_ce": 0.015694058500230312, + "eval_screenspot_loss_iou": 0.458251953125, + "eval_screenspot_loss_num": 0.051142374674479164, + "eval_screenspot_loss_xval": 0.2555033365885417, + "eval_screenspot_runtime": 154.6758, + "eval_screenspot_samples_per_second": 0.575, + "eval_screenspot_steps_per_second": 0.019, + "num_input_tokens_seen": 1072358776, + "step": 6250 + }, + { + "epoch": 70.94050991501416, + "eval_compot_CIoU": 0.8608261346817017, + "eval_compot_GIoU": 0.8591076731681824, + "eval_compot_IoU": 0.8748774826526642, + "eval_compot_MAE_all": 0.026053864508867264, + "eval_compot_MAE_h": 0.02242235280573368, + "eval_compot_MAE_w": 0.030955156311392784, + "eval_compot_MAE_x": 0.028336072340607643, + "eval_compot_MAE_y": 0.02250187285244465, + "eval_compot_NUM_probability": 0.99997878074646, + "eval_compot_inside_bbox": 0.9409722089767456, + "eval_compot_loss": 0.08311405032873154, + "eval_compot_loss_ce": 1.6617660548945423e-05, + "eval_compot_loss_iou": 0.4324951171875, + "eval_compot_loss_num": 0.014416694641113281, + "eval_compot_loss_xval": 0.07208251953125, + "eval_compot_runtime": 82.8768, + "eval_compot_samples_per_second": 0.603, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 1072358776, + "step": 6250 + }, + { + "epoch": 70.94050991501416, + "eval_custom_ui_MAE_all": 0.018017619382590055, + "eval_custom_ui_MAE_x": 0.029217696748673916, + "eval_custom_ui_MAE_y": 0.00681754294782877, + "eval_custom_ui_NUM_probability": 0.9999310374259949, + "eval_custom_ui_loss": 0.20993298292160034, + "eval_custom_ui_loss_ce": 0.1193423680961132, + "eval_custom_ui_loss_num": 0.017267227172851562, + "eval_custom_ui_loss_xval": 0.08635711669921875, + "eval_custom_ui_runtime": 60.0096, + "eval_custom_ui_samples_per_second": 0.833, + "eval_custom_ui_steps_per_second": 0.033, + "num_input_tokens_seen": 1072358776, + "step": 6250 + }, + { + "epoch": 70.94050991501416, + "loss": 0.25332245230674744, + "loss_ce": 0.14626678824424744, + "loss_iou": 0.0, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 1072358776, + "step": 6250 + }, + { + "epoch": 70.95184135977337, + "grad_norm": 2.6022644252433853, + "learning_rate": 5e-06, + "loss": 0.0398, + "num_input_tokens_seen": 1072531044, + "step": 6251 + }, + { + "epoch": 70.95184135977337, + "loss": 0.028813624754548073, + "loss_ce": 2.792044688249007e-05, + "loss_iou": 0.3515625, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 1072531044, + "step": 6251 + }, + { + "epoch": 70.96317280453258, + "grad_norm": 3.6093790969367756, + "learning_rate": 5e-06, + "loss": 0.0331, + "num_input_tokens_seen": 1072702736, + "step": 6252 + }, + { + "epoch": 70.96317280453258, + "loss": 0.03342428058385849, + "loss_ce": 3.805007872870192e-05, + "loss_iou": 0.5, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1072702736, + "step": 6252 + }, + { + "epoch": 70.97450424929178, + "grad_norm": 3.51340177743106, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 1072874292, + "step": 6253 + }, + { + "epoch": 70.97450424929178, + "loss": 0.05357645824551582, + "loss_ce": 3.336972076795064e-05, + "loss_iou": 0.212890625, + "loss_num": 0.0107421875, + "loss_xval": 0.053466796875, + "num_input_tokens_seen": 1072874292, + "step": 6253 + }, + { + "epoch": 70.98583569405099, + "grad_norm": 3.7240316323212315, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 1073045900, + "step": 6254 + }, + { + "epoch": 70.98583569405099, + "loss": 0.03787202015519142, + "loss_ce": 4.548008291749284e-05, + "loss_iou": 0.294921875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 1073045900, + "step": 6254 + }, + { + "epoch": 70.9971671388102, + "grad_norm": 3.090858616541715, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 1073216328, + "step": 6255 + }, + { + "epoch": 70.9971671388102, + "loss": 0.0370880588889122, + "loss_ce": 9.20179627428297e-06, + "loss_iou": 0.439453125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 1073216328, + "step": 6255 + }, + { + "epoch": 70.9971671388102, + "loss": 0.07131224125623703, + "loss_ce": 2.318295810255222e-05, + "loss_iou": 0.466796875, + "loss_num": 0.0142822265625, + "loss_xval": 0.0712890625, + "num_input_tokens_seen": 1073259208, + "step": 6255 + }, + { + "epoch": 71.0084985835694, + "grad_norm": 3.8275486719057987, + "learning_rate": 5e-06, + "loss": 0.0525, + "num_input_tokens_seen": 1073387672, + "step": 6256 + }, + { + "epoch": 71.0084985835694, + "loss": 0.040035270154476166, + "loss_ce": 1.9094208255410194e-05, + "loss_iou": 0.0, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 1073387672, + "step": 6256 + }, + { + "epoch": 71.01983002832861, + "grad_norm": 3.981111185185952, + "learning_rate": 5e-06, + "loss": 0.0447, + "num_input_tokens_seen": 1073559764, + "step": 6257 + }, + { + "epoch": 71.01983002832861, + "loss": 0.02991669252514839, + "loss_ce": 2.4726070478209294e-05, + "loss_iou": 0.46875, + "loss_num": 0.0059814453125, + "loss_xval": 0.0299072265625, + "num_input_tokens_seen": 1073559764, + "step": 6257 + }, + { + "epoch": 71.03116147308782, + "grad_norm": 5.3028674476185165, + "learning_rate": 5e-06, + "loss": 0.047, + "num_input_tokens_seen": 1073731528, + "step": 6258 + }, + { + "epoch": 71.03116147308782, + "loss": 0.03628016263246536, + "loss_ce": 5.57987586944364e-05, + "loss_iou": 0.28125, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 1073731528, + "step": 6258 + }, + { + "epoch": 71.04249291784703, + "grad_norm": 3.6641537516413427, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 1073902840, + "step": 6259 + }, + { + "epoch": 71.04249291784703, + "loss": 0.041697751730680466, + "loss_ce": 1.073991552402731e-05, + "loss_iou": 0.515625, + "loss_num": 0.00836181640625, + "loss_xval": 0.041748046875, + "num_input_tokens_seen": 1073902840, + "step": 6259 + }, + { + "epoch": 71.05382436260624, + "grad_norm": 3.650230366892165, + "learning_rate": 5e-06, + "loss": 0.0417, + "num_input_tokens_seen": 1074074256, + "step": 6260 + }, + { + "epoch": 71.05382436260624, + "loss": 0.05637975037097931, + "loss_ce": 2.9039892979199067e-05, + "loss_iou": 0.365234375, + "loss_num": 0.01129150390625, + "loss_xval": 0.056396484375, + "num_input_tokens_seen": 1074074256, + "step": 6260 + }, + { + "epoch": 71.06515580736544, + "grad_norm": 3.377017852906457, + "learning_rate": 5e-06, + "loss": 0.0771, + "num_input_tokens_seen": 1074244400, + "step": 6261 + }, + { + "epoch": 71.06515580736544, + "loss": 0.05887112393975258, + "loss_ce": 1.7974434740608558e-05, + "loss_iou": 0.1865234375, + "loss_num": 0.01171875, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 1074244400, + "step": 6261 + }, + { + "epoch": 71.07648725212465, + "grad_norm": 3.519817402950062, + "learning_rate": 5e-06, + "loss": 0.0386, + "num_input_tokens_seen": 1074416888, + "step": 6262 + }, + { + "epoch": 71.07648725212465, + "loss": 0.04325985908508301, + "loss_ce": 3.171001299051568e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1074416888, + "step": 6262 + }, + { + "epoch": 71.08781869688386, + "grad_norm": 3.730597520861243, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1074589212, + "step": 6263 + }, + { + "epoch": 71.08781869688386, + "loss": 0.030119143426418304, + "loss_ce": 2.881279942812398e-05, + "loss_iou": 0.51953125, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1074589212, + "step": 6263 + }, + { + "epoch": 71.09915014164307, + "grad_norm": 3.427052563276431, + "learning_rate": 5e-06, + "loss": 0.0337, + "num_input_tokens_seen": 1074759160, + "step": 6264 + }, + { + "epoch": 71.09915014164307, + "loss": 0.029640046879649162, + "loss_ce": 3.7994970625732094e-05, + "loss_iou": 0.376953125, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 1074759160, + "step": 6264 + }, + { + "epoch": 71.11048158640227, + "grad_norm": 2.8740436245657053, + "learning_rate": 5e-06, + "loss": 0.0497, + "num_input_tokens_seen": 1074929984, + "step": 6265 + }, + { + "epoch": 71.11048158640227, + "loss": 0.043136321008205414, + "loss_ce": 3.024235411430709e-05, + "loss_iou": 0.3984375, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1074929984, + "step": 6265 + }, + { + "epoch": 71.12181303116148, + "grad_norm": 2.695746382285947, + "learning_rate": 5e-06, + "loss": 0.0366, + "num_input_tokens_seen": 1075102372, + "step": 6266 + }, + { + "epoch": 71.12181303116148, + "loss": 0.026429925113916397, + "loss_ce": 3.221843144274317e-05, + "loss_iou": 0.0, + "loss_num": 0.005279541015625, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 1075102372, + "step": 6266 + }, + { + "epoch": 71.13314447592067, + "grad_norm": 2.9772407460159775, + "learning_rate": 5e-06, + "loss": 0.0306, + "num_input_tokens_seen": 1075274072, + "step": 6267 + }, + { + "epoch": 71.13314447592067, + "loss": 0.025593260303139687, + "loss_ce": 1.9531062207533978e-05, + "loss_iou": 0.67578125, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 1075274072, + "step": 6267 + }, + { + "epoch": 71.14447592067988, + "grad_norm": 2.8037580673440234, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 1075445940, + "step": 6268 + }, + { + "epoch": 71.14447592067988, + "loss": 0.026813849806785583, + "loss_ce": 1.941429582075216e-05, + "loss_iou": 0.1357421875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 1075445940, + "step": 6268 + }, + { + "epoch": 71.15580736543909, + "grad_norm": 2.5285215725299155, + "learning_rate": 5e-06, + "loss": 0.037, + "num_input_tokens_seen": 1075617656, + "step": 6269 + }, + { + "epoch": 71.15580736543909, + "loss": 0.031994231045246124, + "loss_ce": 2.7069992938777432e-05, + "loss_iou": 0.6328125, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1075617656, + "step": 6269 + }, + { + "epoch": 71.1671388101983, + "grad_norm": 3.0378333829680857, + "learning_rate": 5e-06, + "loss": 0.0536, + "num_input_tokens_seen": 1075789716, + "step": 6270 + }, + { + "epoch": 71.1671388101983, + "loss": 0.022927870973944664, + "loss_ce": 1.679891829553526e-05, + "loss_iou": 0.28125, + "loss_num": 0.00457763671875, + "loss_xval": 0.02294921875, + "num_input_tokens_seen": 1075789716, + "step": 6270 + }, + { + "epoch": 71.1784702549575, + "grad_norm": 2.9116909751112456, + "learning_rate": 5e-06, + "loss": 0.064, + "num_input_tokens_seen": 1075961440, + "step": 6271 + }, + { + "epoch": 71.1784702549575, + "loss": 0.02443828620016575, + "loss_ce": 1.6594827684457414e-05, + "loss_iou": 0.1416015625, + "loss_num": 0.0048828125, + "loss_xval": 0.0244140625, + "num_input_tokens_seen": 1075961440, + "step": 6271 + }, + { + "epoch": 71.18980169971671, + "grad_norm": 3.0447241125512035, + "learning_rate": 5e-06, + "loss": 0.0687, + "num_input_tokens_seen": 1076131520, + "step": 6272 + }, + { + "epoch": 71.18980169971671, + "loss": 0.12112133949995041, + "loss_ce": 2.7589607270783745e-05, + "loss_iou": 0.0, + "loss_num": 0.0242919921875, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 1076131520, + "step": 6272 + }, + { + "epoch": 71.20113314447592, + "grad_norm": 3.0092335967304904, + "learning_rate": 5e-06, + "loss": 0.0397, + "num_input_tokens_seen": 1076300164, + "step": 6273 + }, + { + "epoch": 71.20113314447592, + "loss": 0.039823926985263824, + "loss_ce": 5.952330684522167e-05, + "loss_iou": 0.44921875, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 1076300164, + "step": 6273 + }, + { + "epoch": 71.21246458923513, + "grad_norm": 3.7401573267971195, + "learning_rate": 5e-06, + "loss": 0.0409, + "num_input_tokens_seen": 1076472136, + "step": 6274 + }, + { + "epoch": 71.21246458923513, + "loss": 0.050376009196043015, + "loss_ce": 2.200378366978839e-05, + "loss_iou": 0.44921875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 1076472136, + "step": 6274 + }, + { + "epoch": 71.22379603399433, + "grad_norm": 4.310655285148014, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 1076642556, + "step": 6275 + }, + { + "epoch": 71.22379603399433, + "loss": 0.032685473561286926, + "loss_ce": 3.1665284041082487e-05, + "loss_iou": 0.4140625, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 1076642556, + "step": 6275 + }, + { + "epoch": 71.23512747875354, + "grad_norm": 2.9037329739437503, + "learning_rate": 5e-06, + "loss": 0.044, + "num_input_tokens_seen": 1076814060, + "step": 6276 + }, + { + "epoch": 71.23512747875354, + "loss": 0.0578373521566391, + "loss_ce": 2.1802028641104698e-05, + "loss_iou": 0.08544921875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 1076814060, + "step": 6276 + }, + { + "epoch": 71.24645892351275, + "grad_norm": 2.839188700892403, + "learning_rate": 5e-06, + "loss": 0.0371, + "num_input_tokens_seen": 1076985036, + "step": 6277 + }, + { + "epoch": 71.24645892351275, + "loss": 0.04363429546356201, + "loss_ce": 2.4674371161381714e-05, + "loss_iou": 0.443359375, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 1076985036, + "step": 6277 + }, + { + "epoch": 71.25779036827196, + "grad_norm": 3.8292807486423244, + "learning_rate": 5e-06, + "loss": 0.0478, + "num_input_tokens_seen": 1077156980, + "step": 6278 + }, + { + "epoch": 71.25779036827196, + "loss": 0.0277617909014225, + "loss_ce": 2.1310916054062545e-05, + "loss_iou": 0.421875, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 1077156980, + "step": 6278 + }, + { + "epoch": 71.26912181303116, + "grad_norm": 4.019725194955129, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 1077328852, + "step": 6279 + }, + { + "epoch": 71.26912181303116, + "loss": 0.05417455732822418, + "loss_ce": 2.1115471099619754e-05, + "loss_iou": 0.40234375, + "loss_num": 0.01080322265625, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 1077328852, + "step": 6279 + }, + { + "epoch": 71.28045325779037, + "grad_norm": 3.2862892866898465, + "learning_rate": 5e-06, + "loss": 0.0658, + "num_input_tokens_seen": 1077498664, + "step": 6280 + }, + { + "epoch": 71.28045325779037, + "loss": 0.11885473132133484, + "loss_ce": 4.980745870852843e-05, + "loss_iou": 0.2109375, + "loss_num": 0.0238037109375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 1077498664, + "step": 6280 + }, + { + "epoch": 71.29178470254958, + "grad_norm": 2.9884669215106188, + "learning_rate": 5e-06, + "loss": 0.0368, + "num_input_tokens_seen": 1077669776, + "step": 6281 + }, + { + "epoch": 71.29178470254958, + "loss": 0.03479103744029999, + "loss_ce": 2.388862230873201e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1077669776, + "step": 6281 + }, + { + "epoch": 71.30311614730878, + "grad_norm": 2.472156314154236, + "learning_rate": 5e-06, + "loss": 0.0476, + "num_input_tokens_seen": 1077841404, + "step": 6282 + }, + { + "epoch": 71.30311614730878, + "loss": 0.02248840592801571, + "loss_ce": 5.79866609768942e-05, + "loss_iou": 0.185546875, + "loss_num": 0.004486083984375, + "loss_xval": 0.0224609375, + "num_input_tokens_seen": 1077841404, + "step": 6282 + }, + { + "epoch": 71.31444759206799, + "grad_norm": 2.4002984493697572, + "learning_rate": 5e-06, + "loss": 0.0284, + "num_input_tokens_seen": 1078013028, + "step": 6283 + }, + { + "epoch": 71.31444759206799, + "loss": 0.02115441858768463, + "loss_ce": 2.099564517266117e-05, + "loss_iou": 0.35546875, + "loss_num": 0.00421142578125, + "loss_xval": 0.0211181640625, + "num_input_tokens_seen": 1078013028, + "step": 6283 + }, + { + "epoch": 71.3257790368272, + "grad_norm": 2.7621721933387104, + "learning_rate": 5e-06, + "loss": 0.0387, + "num_input_tokens_seen": 1078184604, + "step": 6284 + }, + { + "epoch": 71.3257790368272, + "loss": 0.043080884963274, + "loss_ce": 5.110111305839382e-05, + "loss_iou": 0.443359375, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 1078184604, + "step": 6284 + }, + { + "epoch": 71.33711048158641, + "grad_norm": 2.806500798875873, + "learning_rate": 5e-06, + "loss": 0.0335, + "num_input_tokens_seen": 1078356264, + "step": 6285 + }, + { + "epoch": 71.33711048158641, + "loss": 0.03789878264069557, + "loss_ce": 2.6467809220775962e-05, + "loss_iou": 0.294921875, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 1078356264, + "step": 6285 + }, + { + "epoch": 71.34844192634561, + "grad_norm": 2.671688308401394, + "learning_rate": 5e-06, + "loss": 0.0415, + "num_input_tokens_seen": 1078528144, + "step": 6286 + }, + { + "epoch": 71.34844192634561, + "loss": 0.024138683453202248, + "loss_ce": 2.9797256502206437e-05, + "loss_iou": 0.31640625, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 1078528144, + "step": 6286 + }, + { + "epoch": 71.35977337110482, + "grad_norm": 3.112345052585686, + "learning_rate": 5e-06, + "loss": 0.0732, + "num_input_tokens_seen": 1078698068, + "step": 6287 + }, + { + "epoch": 71.35977337110482, + "loss": 0.08573763817548752, + "loss_ce": 1.3763445167569444e-05, + "loss_iou": 0.421875, + "loss_num": 0.01708984375, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 1078698068, + "step": 6287 + }, + { + "epoch": 71.37110481586403, + "grad_norm": 3.1752111271016727, + "learning_rate": 5e-06, + "loss": 0.0594, + "num_input_tokens_seen": 1078869732, + "step": 6288 + }, + { + "epoch": 71.37110481586403, + "loss": 0.051510654389858246, + "loss_ce": 1.2237802366144024e-05, + "loss_iou": 0.322265625, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 1078869732, + "step": 6288 + }, + { + "epoch": 71.38243626062322, + "grad_norm": 3.397525029542879, + "learning_rate": 5e-06, + "loss": 0.0375, + "num_input_tokens_seen": 1079040728, + "step": 6289 + }, + { + "epoch": 71.38243626062322, + "loss": 0.0289616659283638, + "loss_ce": 0.0004887655377388, + "loss_iou": 0.333984375, + "loss_num": 0.005706787109375, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 1079040728, + "step": 6289 + }, + { + "epoch": 71.39376770538243, + "grad_norm": 3.0440196126369647, + "learning_rate": 5e-06, + "loss": 0.063, + "num_input_tokens_seen": 1079212388, + "step": 6290 + }, + { + "epoch": 71.39376770538243, + "loss": 0.029235374182462692, + "loss_ce": 3.0051163776079193e-05, + "loss_iou": 0.28125, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 1079212388, + "step": 6290 + }, + { + "epoch": 71.40509915014164, + "grad_norm": 3.155561962055097, + "learning_rate": 5e-06, + "loss": 0.0333, + "num_input_tokens_seen": 1079384088, + "step": 6291 + }, + { + "epoch": 71.40509915014164, + "loss": 0.0399649553000927, + "loss_ce": 1.7446262063458562e-05, + "loss_iou": 0.5, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 1079384088, + "step": 6291 + }, + { + "epoch": 71.41643059490085, + "grad_norm": 3.6339630246857952, + "learning_rate": 5e-06, + "loss": 0.061, + "num_input_tokens_seen": 1079556112, + "step": 6292 + }, + { + "epoch": 71.41643059490085, + "loss": 0.08425532281398773, + "loss_ce": 1.1549636838026345e-05, + "loss_iou": 0.3828125, + "loss_num": 0.016845703125, + "loss_xval": 0.08447265625, + "num_input_tokens_seen": 1079556112, + "step": 6292 + }, + { + "epoch": 71.42776203966005, + "grad_norm": 3.1195031790617396, + "learning_rate": 5e-06, + "loss": 0.0356, + "num_input_tokens_seen": 1079728104, + "step": 6293 + }, + { + "epoch": 71.42776203966005, + "loss": 0.03666820749640465, + "loss_ce": 4.711440487881191e-05, + "loss_iou": 0.357421875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1079728104, + "step": 6293 + }, + { + "epoch": 71.43909348441926, + "grad_norm": 2.52210980621293, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 1079898968, + "step": 6294 + }, + { + "epoch": 71.43909348441926, + "loss": 0.04158343747258186, + "loss_ce": 1.849461295932997e-05, + "loss_iou": 0.392578125, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 1079898968, + "step": 6294 + }, + { + "epoch": 71.45042492917847, + "grad_norm": 2.592978632615565, + "learning_rate": 5e-06, + "loss": 0.043, + "num_input_tokens_seen": 1080067824, + "step": 6295 + }, + { + "epoch": 71.45042492917847, + "loss": 0.05258346349000931, + "loss_ce": 3.219364589313045e-05, + "loss_iou": 0.248046875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 1080067824, + "step": 6295 + }, + { + "epoch": 71.46175637393767, + "grad_norm": 3.4400110356131206, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1080239864, + "step": 6296 + }, + { + "epoch": 71.46175637393767, + "loss": 0.04466085135936737, + "loss_ce": 1.3634474271384533e-05, + "loss_iou": 0.408203125, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 1080239864, + "step": 6296 + }, + { + "epoch": 71.47308781869688, + "grad_norm": 3.4743187480599094, + "learning_rate": 5e-06, + "loss": 0.0492, + "num_input_tokens_seen": 1080411512, + "step": 6297 + }, + { + "epoch": 71.47308781869688, + "loss": 0.06579624116420746, + "loss_ce": 1.560635973874014e-05, + "loss_iou": 0.1640625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 1080411512, + "step": 6297 + }, + { + "epoch": 71.48441926345609, + "grad_norm": 3.3283175783501693, + "learning_rate": 5e-06, + "loss": 0.0441, + "num_input_tokens_seen": 1080583192, + "step": 6298 + }, + { + "epoch": 71.48441926345609, + "loss": 0.03355266526341438, + "loss_ce": 1.3849156857759226e-05, + "loss_iou": 0.54296875, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1080583192, + "step": 6298 + }, + { + "epoch": 71.4957507082153, + "grad_norm": 3.848911347362622, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1080753388, + "step": 6299 + }, + { + "epoch": 71.4957507082153, + "loss": 0.029399413615465164, + "loss_ce": 2.6242038074997254e-05, + "loss_iou": 0.43359375, + "loss_num": 0.005859375, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 1080753388, + "step": 6299 + }, + { + "epoch": 71.5070821529745, + "grad_norm": 7.918903226565688, + "learning_rate": 5e-06, + "loss": 0.043, + "num_input_tokens_seen": 1080925680, + "step": 6300 + }, + { + "epoch": 71.5070821529745, + "loss": 0.044917866587638855, + "loss_ce": 2.6508409064263105e-05, + "loss_iou": 0.294921875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 1080925680, + "step": 6300 + }, + { + "epoch": 71.51841359773371, + "grad_norm": 3.2990587227078425, + "learning_rate": 5e-06, + "loss": 0.0553, + "num_input_tokens_seen": 1081097496, + "step": 6301 + }, + { + "epoch": 71.51841359773371, + "loss": 0.12951254844665527, + "loss_ce": 1.1201225788681768e-05, + "loss_iou": 0.349609375, + "loss_num": 0.02587890625, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 1081097496, + "step": 6301 + }, + { + "epoch": 71.52974504249292, + "grad_norm": 3.2439060139701823, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1081269304, + "step": 6302 + }, + { + "epoch": 71.52974504249292, + "loss": 0.04331529513001442, + "loss_ce": 4.137136784265749e-05, + "loss_iou": 0.390625, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1081269304, + "step": 6302 + }, + { + "epoch": 71.54107648725213, + "grad_norm": 3.060696665030482, + "learning_rate": 5e-06, + "loss": 0.0636, + "num_input_tokens_seen": 1081440292, + "step": 6303 + }, + { + "epoch": 71.54107648725213, + "loss": 0.06857012212276459, + "loss_ce": 8.868220902513713e-05, + "loss_iou": 0.30859375, + "loss_num": 0.01373291015625, + "loss_xval": 0.068359375, + "num_input_tokens_seen": 1081440292, + "step": 6303 + }, + { + "epoch": 71.55240793201133, + "grad_norm": 3.340584040354602, + "learning_rate": 5e-06, + "loss": 0.067, + "num_input_tokens_seen": 1081610408, + "step": 6304 + }, + { + "epoch": 71.55240793201133, + "loss": 0.1014086976647377, + "loss_ce": 9.033540118252859e-05, + "loss_iou": 0.515625, + "loss_num": 0.020263671875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 1081610408, + "step": 6304 + }, + { + "epoch": 71.56373937677054, + "grad_norm": 3.4659722910630024, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 1081782428, + "step": 6305 + }, + { + "epoch": 71.56373937677054, + "loss": 0.04283825308084488, + "loss_ce": 2.208867226727307e-05, + "loss_iou": 0.45703125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 1081782428, + "step": 6305 + }, + { + "epoch": 71.57507082152975, + "grad_norm": 3.580784678437838, + "learning_rate": 5e-06, + "loss": 0.0444, + "num_input_tokens_seen": 1081954660, + "step": 6306 + }, + { + "epoch": 71.57507082152975, + "loss": 0.07088121771812439, + "loss_ce": 3.465483314357698e-05, + "loss_iou": 0.39453125, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 1081954660, + "step": 6306 + }, + { + "epoch": 71.58640226628896, + "grad_norm": 3.5295961317650923, + "learning_rate": 5e-06, + "loss": 0.0432, + "num_input_tokens_seen": 1082124756, + "step": 6307 + }, + { + "epoch": 71.58640226628896, + "loss": 0.036429621279239655, + "loss_ce": 2.2147392883198336e-05, + "loss_iou": 0.6484375, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 1082124756, + "step": 6307 + }, + { + "epoch": 71.59773371104816, + "grad_norm": 3.700651667499769, + "learning_rate": 5e-06, + "loss": 0.0584, + "num_input_tokens_seen": 1082295712, + "step": 6308 + }, + { + "epoch": 71.59773371104816, + "loss": 0.04259459674358368, + "loss_ce": 2.257281812489964e-05, + "loss_iou": 0.478515625, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 1082295712, + "step": 6308 + }, + { + "epoch": 71.60906515580737, + "grad_norm": 3.2547996891083355, + "learning_rate": 5e-06, + "loss": 0.0391, + "num_input_tokens_seen": 1082467384, + "step": 6309 + }, + { + "epoch": 71.60906515580737, + "loss": 0.03559117391705513, + "loss_ce": 2.2936623281566426e-05, + "loss_iou": 0.5390625, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 1082467384, + "step": 6309 + }, + { + "epoch": 71.62039660056658, + "grad_norm": 3.0060754687615217, + "learning_rate": 5e-06, + "loss": 0.0449, + "num_input_tokens_seen": 1082639748, + "step": 6310 + }, + { + "epoch": 71.62039660056658, + "loss": 0.04740023985505104, + "loss_ce": 2.1699372155126184e-05, + "loss_iou": 0.359375, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 1082639748, + "step": 6310 + }, + { + "epoch": 71.63172804532577, + "grad_norm": 3.1697521022581125, + "learning_rate": 5e-06, + "loss": 0.0434, + "num_input_tokens_seen": 1082811936, + "step": 6311 + }, + { + "epoch": 71.63172804532577, + "loss": 0.07100522518157959, + "loss_ce": 2.1335294150048867e-05, + "loss_iou": 0.404296875, + "loss_num": 0.01416015625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 1082811936, + "step": 6311 + }, + { + "epoch": 71.64305949008498, + "grad_norm": 3.2591715848672007, + "learning_rate": 5e-06, + "loss": 0.0377, + "num_input_tokens_seen": 1082982144, + "step": 6312 + }, + { + "epoch": 71.64305949008498, + "loss": 0.02785702608525753, + "loss_ce": 1.7365549865644425e-05, + "loss_iou": 0.38671875, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 1082982144, + "step": 6312 + }, + { + "epoch": 71.65439093484419, + "grad_norm": 3.3285965882845083, + "learning_rate": 5e-06, + "loss": 0.0308, + "num_input_tokens_seen": 1083152036, + "step": 6313 + }, + { + "epoch": 71.65439093484419, + "loss": 0.029422510415315628, + "loss_ce": 3.4084452636307105e-05, + "loss_iou": 0.484375, + "loss_num": 0.005859375, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 1083152036, + "step": 6313 + }, + { + "epoch": 71.6657223796034, + "grad_norm": 3.2564390359104487, + "learning_rate": 5e-06, + "loss": 0.0608, + "num_input_tokens_seen": 1083323460, + "step": 6314 + }, + { + "epoch": 71.6657223796034, + "loss": 0.038522012531757355, + "loss_ce": 8.828123100101948e-06, + "loss_iou": 0.349609375, + "loss_num": 0.0076904296875, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1083323460, + "step": 6314 + }, + { + "epoch": 71.6770538243626, + "grad_norm": 2.7396137946480557, + "learning_rate": 5e-06, + "loss": 0.0494, + "num_input_tokens_seen": 1083495424, + "step": 6315 + }, + { + "epoch": 71.6770538243626, + "loss": 0.05260499194264412, + "loss_ce": 3.84659506380558e-05, + "loss_iou": 0.51171875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 1083495424, + "step": 6315 + }, + { + "epoch": 71.68838526912181, + "grad_norm": 2.266224632014876, + "learning_rate": 5e-06, + "loss": 0.0338, + "num_input_tokens_seen": 1083667124, + "step": 6316 + }, + { + "epoch": 71.68838526912181, + "loss": 0.030978474766016006, + "loss_ce": 2.6021647499874234e-05, + "loss_iou": 0.3828125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1083667124, + "step": 6316 + }, + { + "epoch": 71.69971671388102, + "grad_norm": 2.4898532380264133, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 1083838976, + "step": 6317 + }, + { + "epoch": 71.69971671388102, + "loss": 0.024016916751861572, + "loss_ce": 2.2471380361821502e-05, + "loss_iou": 0.45703125, + "loss_num": 0.004791259765625, + "loss_xval": 0.0240478515625, + "num_input_tokens_seen": 1083838976, + "step": 6317 + }, + { + "epoch": 71.71104815864022, + "grad_norm": 3.105282815254955, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 1084010628, + "step": 6318 + }, + { + "epoch": 71.71104815864022, + "loss": 0.029156381264328957, + "loss_ce": 1.2093717487005051e-05, + "loss_iou": 0.5, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 1084010628, + "step": 6318 + }, + { + "epoch": 71.72237960339943, + "grad_norm": 3.18531159092337, + "learning_rate": 5e-06, + "loss": 0.0386, + "num_input_tokens_seen": 1084182672, + "step": 6319 + }, + { + "epoch": 71.72237960339943, + "loss": 0.023192856460809708, + "loss_ce": 3.0014671210665256e-05, + "loss_iou": 0.345703125, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 1084182672, + "step": 6319 + }, + { + "epoch": 71.73371104815864, + "grad_norm": 3.3128075410602986, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 1084354528, + "step": 6320 + }, + { + "epoch": 71.73371104815864, + "loss": 0.08003723621368408, + "loss_ce": 1.2516074093582574e-05, + "loss_iou": 0.423828125, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 1084354528, + "step": 6320 + }, + { + "epoch": 71.74504249291785, + "grad_norm": 3.3971306092465654, + "learning_rate": 5e-06, + "loss": 0.0604, + "num_input_tokens_seen": 1084526304, + "step": 6321 + }, + { + "epoch": 71.74504249291785, + "loss": 0.04324670508503914, + "loss_ce": 1.855498157965485e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1084526304, + "step": 6321 + }, + { + "epoch": 71.75637393767705, + "grad_norm": 3.389335234027946, + "learning_rate": 5e-06, + "loss": 0.0308, + "num_input_tokens_seen": 1084696772, + "step": 6322 + }, + { + "epoch": 71.75637393767705, + "loss": 0.03128332644701004, + "loss_ce": 1.8069695215672255e-05, + "loss_iou": 0.41015625, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1084696772, + "step": 6322 + }, + { + "epoch": 71.76770538243626, + "grad_norm": 4.15401820229542, + "learning_rate": 5e-06, + "loss": 0.0444, + "num_input_tokens_seen": 1084868796, + "step": 6323 + }, + { + "epoch": 71.76770538243626, + "loss": 0.04818715155124664, + "loss_ce": 1.5153706044657156e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 1084868796, + "step": 6323 + }, + { + "epoch": 71.77903682719547, + "grad_norm": 3.825831923996868, + "learning_rate": 5e-06, + "loss": 0.0633, + "num_input_tokens_seen": 1085040796, + "step": 6324 + }, + { + "epoch": 71.77903682719547, + "loss": 0.08951297402381897, + "loss_ce": 3.5430595744401217e-05, + "loss_iou": 0.37109375, + "loss_num": 0.0179443359375, + "loss_xval": 0.08935546875, + "num_input_tokens_seen": 1085040796, + "step": 6324 + }, + { + "epoch": 71.79036827195468, + "grad_norm": 3.210139623039176, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 1085212396, + "step": 6325 + }, + { + "epoch": 71.79036827195468, + "loss": 0.0444817915558815, + "loss_ce": 6.345735164359212e-05, + "loss_iou": 0.51171875, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 1085212396, + "step": 6325 + }, + { + "epoch": 71.80169971671388, + "grad_norm": 2.9837470222283824, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 1085380304, + "step": 6326 + }, + { + "epoch": 71.80169971671388, + "loss": 0.058926261961460114, + "loss_ce": 4.259422712493688e-05, + "loss_iou": 0.17578125, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 1085380304, + "step": 6326 + }, + { + "epoch": 71.81303116147309, + "grad_norm": 3.0765421742915606, + "learning_rate": 5e-06, + "loss": 0.0382, + "num_input_tokens_seen": 1085552212, + "step": 6327 + }, + { + "epoch": 71.81303116147309, + "loss": 0.03482535108923912, + "loss_ce": 2.0052239051437937e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0069580078125, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1085552212, + "step": 6327 + }, + { + "epoch": 71.8243626062323, + "grad_norm": 3.1333489212861947, + "learning_rate": 5e-06, + "loss": 0.0509, + "num_input_tokens_seen": 1085723968, + "step": 6328 + }, + { + "epoch": 71.8243626062323, + "loss": 0.031688421964645386, + "loss_ce": 1.1178676686540712e-05, + "loss_iou": 0.439453125, + "loss_num": 0.00634765625, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 1085723968, + "step": 6328 + }, + { + "epoch": 71.8356940509915, + "grad_norm": 3.5824008093267814, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1085894568, + "step": 6329 + }, + { + "epoch": 71.8356940509915, + "loss": 0.03835016116499901, + "loss_ce": 2.008280534937512e-05, + "loss_iou": 0.5546875, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1085894568, + "step": 6329 + }, + { + "epoch": 71.84702549575071, + "grad_norm": 3.9530717543199807, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 1086066124, + "step": 6330 + }, + { + "epoch": 71.84702549575071, + "loss": 0.034444116055965424, + "loss_ce": 2.02841947611887e-05, + "loss_iou": 0.314453125, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 1086066124, + "step": 6330 + }, + { + "epoch": 71.85835694050992, + "grad_norm": 3.0279630139462412, + "learning_rate": 5e-06, + "loss": 0.0364, + "num_input_tokens_seen": 1086238232, + "step": 6331 + }, + { + "epoch": 71.85835694050992, + "loss": 0.02691538631916046, + "loss_ce": 1.4142431609798223e-05, + "loss_iou": 0.466796875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 1086238232, + "step": 6331 + }, + { + "epoch": 71.86968838526913, + "grad_norm": 2.759156970980906, + "learning_rate": 5e-06, + "loss": 0.035, + "num_input_tokens_seen": 1086409028, + "step": 6332 + }, + { + "epoch": 71.86968838526913, + "loss": 0.040620580315589905, + "loss_ce": 1.6944686649367213e-05, + "loss_iou": 0.486328125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1086409028, + "step": 6332 + }, + { + "epoch": 71.88101983002832, + "grad_norm": 3.123038518789632, + "learning_rate": 5e-06, + "loss": 0.0242, + "num_input_tokens_seen": 1086579040, + "step": 6333 + }, + { + "epoch": 71.88101983002832, + "loss": 0.027104049921035767, + "loss_ce": 1.9699866243172437e-05, + "loss_iou": 0.44921875, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1086579040, + "step": 6333 + }, + { + "epoch": 71.89235127478753, + "grad_norm": 3.0824204519206835, + "learning_rate": 5e-06, + "loss": 0.0324, + "num_input_tokens_seen": 1086750344, + "step": 6334 + }, + { + "epoch": 71.89235127478753, + "loss": 0.04207644611597061, + "loss_ce": 7.965642907947768e-06, + "loss_iou": 0.2255859375, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1086750344, + "step": 6334 + }, + { + "epoch": 71.90368271954674, + "grad_norm": 3.155146531260981, + "learning_rate": 5e-06, + "loss": 0.0487, + "num_input_tokens_seen": 1086921492, + "step": 6335 + }, + { + "epoch": 71.90368271954674, + "loss": 0.029619436711072922, + "loss_ce": 5.5533313570776954e-05, + "loss_iou": 0.40625, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 1086921492, + "step": 6335 + }, + { + "epoch": 71.91501416430594, + "grad_norm": 3.419832680079542, + "learning_rate": 5e-06, + "loss": 0.0417, + "num_input_tokens_seen": 1087092360, + "step": 6336 + }, + { + "epoch": 71.91501416430594, + "loss": 0.026729252189397812, + "loss_ce": 1.1113129403383937e-05, + "loss_iou": 0.337890625, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 1087092360, + "step": 6336 + }, + { + "epoch": 71.92634560906515, + "grad_norm": 2.609690859679022, + "learning_rate": 5e-06, + "loss": 0.0348, + "num_input_tokens_seen": 1087263292, + "step": 6337 + }, + { + "epoch": 71.92634560906515, + "loss": 0.024927429854869843, + "loss_ce": 4.7973782784538344e-05, + "loss_iou": 0.359375, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 1087263292, + "step": 6337 + }, + { + "epoch": 71.93767705382436, + "grad_norm": 6.157433000399018, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 1087434068, + "step": 6338 + }, + { + "epoch": 71.93767705382436, + "loss": 0.03840063512325287, + "loss_ce": 3.2410272979177535e-05, + "loss_iou": 0.208984375, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1087434068, + "step": 6338 + }, + { + "epoch": 71.94900849858357, + "grad_norm": 3.7948587291065867, + "learning_rate": 5e-06, + "loss": 0.0772, + "num_input_tokens_seen": 1087605904, + "step": 6339 + }, + { + "epoch": 71.94900849858357, + "loss": 0.12592989206314087, + "loss_ce": 1.4367837138706818e-05, + "loss_iou": 0.318359375, + "loss_num": 0.0252685546875, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 1087605904, + "step": 6339 + }, + { + "epoch": 71.96033994334277, + "grad_norm": 2.9600195851625672, + "learning_rate": 5e-06, + "loss": 0.0444, + "num_input_tokens_seen": 1087777780, + "step": 6340 + }, + { + "epoch": 71.96033994334277, + "loss": 0.08376624435186386, + "loss_ce": 4.1264436731580645e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 1087777780, + "step": 6340 + }, + { + "epoch": 71.97167138810198, + "grad_norm": 3.0581412759152875, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 1087949588, + "step": 6341 + }, + { + "epoch": 71.97167138810198, + "loss": 0.03042149543762207, + "loss_ce": 2.598770151962526e-05, + "loss_iou": 0.51953125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 1087949588, + "step": 6341 + }, + { + "epoch": 71.98300283286119, + "grad_norm": 3.0108472863819835, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 1088121404, + "step": 6342 + }, + { + "epoch": 71.98300283286119, + "loss": 0.039963483810424805, + "loss_ce": 3.123074566246942e-05, + "loss_iou": 0.1650390625, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 1088121404, + "step": 6342 + }, + { + "epoch": 71.9943342776204, + "grad_norm": 2.9866481980623267, + "learning_rate": 5e-06, + "loss": 0.0459, + "num_input_tokens_seen": 1088290780, + "step": 6343 + }, + { + "epoch": 71.9943342776204, + "loss": 0.07404462993144989, + "loss_ce": 8.989324669528287e-06, + "loss_iou": 0.392578125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 1088290780, + "step": 6343 + }, + { + "epoch": 71.9943342776204, + "loss": 0.03748292475938797, + "loss_ce": 0.00014466721040662378, + "loss_iou": 0.4453125, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 1088376584, + "step": 6343 + }, + { + "epoch": 72.0056657223796, + "grad_norm": 2.6230742453159093, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 1088461620, + "step": 6344 + }, + { + "epoch": 72.0056657223796, + "loss": 0.021838389337062836, + "loss_ce": 2.5949284463422373e-05, + "loss_iou": 0.427734375, + "loss_num": 0.004364013671875, + "loss_xval": 0.0218505859375, + "num_input_tokens_seen": 1088461620, + "step": 6344 + }, + { + "epoch": 72.01699716713881, + "grad_norm": 2.5743993105138983, + "learning_rate": 5e-06, + "loss": 0.034, + "num_input_tokens_seen": 1088632324, + "step": 6345 + }, + { + "epoch": 72.01699716713881, + "loss": 0.02753208950161934, + "loss_ce": 1.286371480091475e-05, + "loss_iou": 0.06201171875, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 1088632324, + "step": 6345 + }, + { + "epoch": 72.02832861189802, + "grad_norm": 4.0657011342861855, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 1088804568, + "step": 6346 + }, + { + "epoch": 72.02832861189802, + "loss": 0.061970219016075134, + "loss_ce": 1.95354332390707e-05, + "loss_iou": 0.41015625, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 1088804568, + "step": 6346 + }, + { + "epoch": 72.03966005665723, + "grad_norm": 3.697535219229695, + "learning_rate": 5e-06, + "loss": 0.0747, + "num_input_tokens_seen": 1088975984, + "step": 6347 + }, + { + "epoch": 72.03966005665723, + "loss": 0.05731949955224991, + "loss_ce": 2.274712096550502e-05, + "loss_iou": 0.462890625, + "loss_num": 0.011474609375, + "loss_xval": 0.057373046875, + "num_input_tokens_seen": 1088975984, + "step": 6347 + }, + { + "epoch": 72.05099150141643, + "grad_norm": 3.057934850127661, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 1089148072, + "step": 6348 + }, + { + "epoch": 72.05099150141643, + "loss": 0.035268183797597885, + "loss_ce": 1.2753221199091058e-05, + "loss_iou": 0.474609375, + "loss_num": 0.007049560546875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1089148072, + "step": 6348 + }, + { + "epoch": 72.06232294617564, + "grad_norm": 3.1599802504278465, + "learning_rate": 5e-06, + "loss": 0.0436, + "num_input_tokens_seen": 1089319728, + "step": 6349 + }, + { + "epoch": 72.06232294617564, + "loss": 0.058867134153842926, + "loss_ce": 2.924150430771988e-05, + "loss_iou": 0.50390625, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 1089319728, + "step": 6349 + }, + { + "epoch": 72.07365439093485, + "grad_norm": 3.0273473143408074, + "learning_rate": 5e-06, + "loss": 0.0654, + "num_input_tokens_seen": 1089491660, + "step": 6350 + }, + { + "epoch": 72.07365439093485, + "loss": 0.027279410511255264, + "loss_ce": 1.1955982699873857e-05, + "loss_iou": 0.482421875, + "loss_num": 0.005462646484375, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 1089491660, + "step": 6350 + }, + { + "epoch": 72.08498583569406, + "grad_norm": 3.073425291741844, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 1089663228, + "step": 6351 + }, + { + "epoch": 72.08498583569406, + "loss": 0.053060173988342285, + "loss_ce": 6.640057836193591e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 1089663228, + "step": 6351 + }, + { + "epoch": 72.09631728045326, + "grad_norm": 3.7254015656290926, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 1089834216, + "step": 6352 + }, + { + "epoch": 72.09631728045326, + "loss": 0.03060598112642765, + "loss_ce": 2.7369242161512375e-05, + "loss_iou": 0.28515625, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1089834216, + "step": 6352 + }, + { + "epoch": 72.10764872521247, + "grad_norm": 3.1308690975300855, + "learning_rate": 5e-06, + "loss": 0.0366, + "num_input_tokens_seen": 1090005696, + "step": 6353 + }, + { + "epoch": 72.10764872521247, + "loss": 0.025884132832288742, + "loss_ce": 2.0485880668275058e-05, + "loss_iou": 0.421875, + "loss_num": 0.00518798828125, + "loss_xval": 0.02587890625, + "num_input_tokens_seen": 1090005696, + "step": 6353 + }, + { + "epoch": 72.11898016997168, + "grad_norm": 3.1659734315883097, + "learning_rate": 5e-06, + "loss": 0.0393, + "num_input_tokens_seen": 1090177840, + "step": 6354 + }, + { + "epoch": 72.11898016997168, + "loss": 0.049676500260829926, + "loss_ce": 3.965653741033748e-05, + "loss_iou": 0.4140625, + "loss_num": 0.00994873046875, + "loss_xval": 0.049560546875, + "num_input_tokens_seen": 1090177840, + "step": 6354 + }, + { + "epoch": 72.13031161473087, + "grad_norm": 3.0206836599688476, + "learning_rate": 5e-06, + "loss": 0.0537, + "num_input_tokens_seen": 1090348972, + "step": 6355 + }, + { + "epoch": 72.13031161473087, + "loss": 0.022258980199694633, + "loss_ce": 1.1666428690659814e-05, + "loss_iou": 0.51171875, + "loss_num": 0.00445556640625, + "loss_xval": 0.022216796875, + "num_input_tokens_seen": 1090348972, + "step": 6355 + }, + { + "epoch": 72.14164305949008, + "grad_norm": 2.8412842413214427, + "learning_rate": 5e-06, + "loss": 0.0507, + "num_input_tokens_seen": 1090520284, + "step": 6356 + }, + { + "epoch": 72.14164305949008, + "loss": 0.08763731271028519, + "loss_ce": 0.00028074689907953143, + "loss_iou": 0.51953125, + "loss_num": 0.0174560546875, + "loss_xval": 0.08740234375, + "num_input_tokens_seen": 1090520284, + "step": 6356 + }, + { + "epoch": 72.15297450424929, + "grad_norm": 3.9974403631731033, + "learning_rate": 5e-06, + "loss": 0.0816, + "num_input_tokens_seen": 1090692048, + "step": 6357 + }, + { + "epoch": 72.15297450424929, + "loss": 0.100965216755867, + "loss_ce": 1.307215279666707e-05, + "loss_iou": 0.45703125, + "loss_num": 0.020263671875, + "loss_xval": 0.10107421875, + "num_input_tokens_seen": 1090692048, + "step": 6357 + }, + { + "epoch": 72.1643059490085, + "grad_norm": 3.5454203929180657, + "learning_rate": 5e-06, + "loss": 0.0556, + "num_input_tokens_seen": 1090862808, + "step": 6358 + }, + { + "epoch": 72.1643059490085, + "loss": 0.03559213876724243, + "loss_ce": 2.390237023064401e-05, + "loss_iou": 0.41796875, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 1090862808, + "step": 6358 + }, + { + "epoch": 72.1756373937677, + "grad_norm": 3.8905664284263115, + "learning_rate": 5e-06, + "loss": 0.0506, + "num_input_tokens_seen": 1091034328, + "step": 6359 + }, + { + "epoch": 72.1756373937677, + "loss": 0.08154311776161194, + "loss_ce": 1.540574521641247e-05, + "loss_iou": 0.4765625, + "loss_num": 0.016357421875, + "loss_xval": 0.08154296875, + "num_input_tokens_seen": 1091034328, + "step": 6359 + }, + { + "epoch": 72.18696883852691, + "grad_norm": 3.601051258444796, + "learning_rate": 5e-06, + "loss": 0.0367, + "num_input_tokens_seen": 1091206096, + "step": 6360 + }, + { + "epoch": 72.18696883852691, + "loss": 0.029772158712148666, + "loss_ce": 1.7518334061605856e-05, + "loss_iou": 0.388671875, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 1091206096, + "step": 6360 + }, + { + "epoch": 72.19830028328612, + "grad_norm": 4.007055157856261, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 1091378120, + "step": 6361 + }, + { + "epoch": 72.19830028328612, + "loss": 0.043407365679740906, + "loss_ce": 5.714629151043482e-05, + "loss_iou": 0.40234375, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 1091378120, + "step": 6361 + }, + { + "epoch": 72.20963172804532, + "grad_norm": 2.8065246633678336, + "learning_rate": 5e-06, + "loss": 0.0375, + "num_input_tokens_seen": 1091549844, + "step": 6362 + }, + { + "epoch": 72.20963172804532, + "loss": 0.027665428817272186, + "loss_ce": 4.702120349975303e-05, + "loss_iou": 0.39453125, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 1091549844, + "step": 6362 + }, + { + "epoch": 72.22096317280453, + "grad_norm": 2.691046088896478, + "learning_rate": 5e-06, + "loss": 0.0267, + "num_input_tokens_seen": 1091721780, + "step": 6363 + }, + { + "epoch": 72.22096317280453, + "loss": 0.029430441558361053, + "loss_ce": 2.6752926714834757e-05, + "loss_iou": 0.3515625, + "loss_num": 0.005889892578125, + "loss_xval": 0.0294189453125, + "num_input_tokens_seen": 1091721780, + "step": 6363 + }, + { + "epoch": 72.23229461756374, + "grad_norm": 3.1806246620934266, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 1091893040, + "step": 6364 + }, + { + "epoch": 72.23229461756374, + "loss": 0.06900182366371155, + "loss_ce": 1.683681330177933e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 1091893040, + "step": 6364 + }, + { + "epoch": 72.24362606232295, + "grad_norm": 2.8295989546131324, + "learning_rate": 5e-06, + "loss": 0.0451, + "num_input_tokens_seen": 1092064772, + "step": 6365 + }, + { + "epoch": 72.24362606232295, + "loss": 0.027438584715127945, + "loss_ce": 2.617051541164983e-05, + "loss_iou": 0.30859375, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 1092064772, + "step": 6365 + }, + { + "epoch": 72.25495750708215, + "grad_norm": 2.788002037871266, + "learning_rate": 5e-06, + "loss": 0.0331, + "num_input_tokens_seen": 1092236716, + "step": 6366 + }, + { + "epoch": 72.25495750708215, + "loss": 0.04069644585251808, + "loss_ce": 1.6512391084688716e-05, + "loss_iou": 0.375, + "loss_num": 0.00811767578125, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1092236716, + "step": 6366 + }, + { + "epoch": 72.26628895184136, + "grad_norm": 2.593735249886273, + "learning_rate": 5e-06, + "loss": 0.0324, + "num_input_tokens_seen": 1092408476, + "step": 6367 + }, + { + "epoch": 72.26628895184136, + "loss": 0.02480863593518734, + "loss_ce": 1.3103700439387467e-05, + "loss_iou": 0.28515625, + "loss_num": 0.004974365234375, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 1092408476, + "step": 6367 + }, + { + "epoch": 72.27762039660057, + "grad_norm": 2.994600128926469, + "learning_rate": 5e-06, + "loss": 0.0545, + "num_input_tokens_seen": 1092579344, + "step": 6368 + }, + { + "epoch": 72.27762039660057, + "loss": 0.13209834694862366, + "loss_ce": 1.8278251445735805e-05, + "loss_iou": 0.447265625, + "loss_num": 0.0263671875, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 1092579344, + "step": 6368 + }, + { + "epoch": 72.28895184135978, + "grad_norm": 3.356586619114031, + "learning_rate": 5e-06, + "loss": 0.061, + "num_input_tokens_seen": 1092751416, + "step": 6369 + }, + { + "epoch": 72.28895184135978, + "loss": 0.05163809284567833, + "loss_ce": 1.7609203496249393e-05, + "loss_iou": 0.42578125, + "loss_num": 0.01031494140625, + "loss_xval": 0.051513671875, + "num_input_tokens_seen": 1092751416, + "step": 6369 + }, + { + "epoch": 72.30028328611898, + "grad_norm": 3.3015117766675948, + "learning_rate": 5e-06, + "loss": 0.0295, + "num_input_tokens_seen": 1092922696, + "step": 6370 + }, + { + "epoch": 72.30028328611898, + "loss": 0.03412819281220436, + "loss_ce": 2.4800161554594524e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0068359375, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1092922696, + "step": 6370 + }, + { + "epoch": 72.31161473087819, + "grad_norm": 3.163256542833529, + "learning_rate": 5e-06, + "loss": 0.0329, + "num_input_tokens_seen": 1093094364, + "step": 6371 + }, + { + "epoch": 72.31161473087819, + "loss": 0.040805984288454056, + "loss_ce": 1.924399839481339e-05, + "loss_iou": 0.4609375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1093094364, + "step": 6371 + }, + { + "epoch": 72.3229461756374, + "grad_norm": 2.966859222890234, + "learning_rate": 5e-06, + "loss": 0.0313, + "num_input_tokens_seen": 1093265244, + "step": 6372 + }, + { + "epoch": 72.3229461756374, + "loss": 0.030220340937376022, + "loss_ce": 7.93865092418855e-06, + "loss_iou": 0.41015625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1093265244, + "step": 6372 + }, + { + "epoch": 72.3342776203966, + "grad_norm": 2.881963139230349, + "learning_rate": 5e-06, + "loss": 0.0522, + "num_input_tokens_seen": 1093436892, + "step": 6373 + }, + { + "epoch": 72.3342776203966, + "loss": 0.02472321316599846, + "loss_ce": 3.449180439929478e-05, + "loss_iou": 0.326171875, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 1093436892, + "step": 6373 + }, + { + "epoch": 72.34560906515581, + "grad_norm": 4.050290191683477, + "learning_rate": 5e-06, + "loss": 0.0542, + "num_input_tokens_seen": 1093606436, + "step": 6374 + }, + { + "epoch": 72.34560906515581, + "loss": 0.09272155910730362, + "loss_ce": 9.157141903415322e-06, + "loss_iou": 0.466796875, + "loss_num": 0.0185546875, + "loss_xval": 0.0927734375, + "num_input_tokens_seen": 1093606436, + "step": 6374 + }, + { + "epoch": 72.35694050991502, + "grad_norm": 3.673134995852733, + "learning_rate": 5e-06, + "loss": 0.0409, + "num_input_tokens_seen": 1093777372, + "step": 6375 + }, + { + "epoch": 72.35694050991502, + "loss": 0.03644733875989914, + "loss_ce": 3.986688534496352e-05, + "loss_iou": 0.64453125, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 1093777372, + "step": 6375 + }, + { + "epoch": 72.36827195467423, + "grad_norm": 3.026457567967279, + "learning_rate": 5e-06, + "loss": 0.0387, + "num_input_tokens_seen": 1093948476, + "step": 6376 + }, + { + "epoch": 72.36827195467423, + "loss": 0.036039236932992935, + "loss_ce": 1.3235137885203585e-05, + "loss_iou": 0.39453125, + "loss_num": 0.0072021484375, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 1093948476, + "step": 6376 + }, + { + "epoch": 72.37960339943342, + "grad_norm": 3.2663509329947313, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 1094120208, + "step": 6377 + }, + { + "epoch": 72.37960339943342, + "loss": 0.040823910385370255, + "loss_ce": 6.649356237176107e-06, + "loss_iou": 0.3359375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1094120208, + "step": 6377 + }, + { + "epoch": 72.39093484419263, + "grad_norm": 3.7452499230689535, + "learning_rate": 5e-06, + "loss": 0.0492, + "num_input_tokens_seen": 1094290888, + "step": 6378 + }, + { + "epoch": 72.39093484419263, + "loss": 0.03225569427013397, + "loss_ce": 2.9134240321582183e-05, + "loss_iou": 0.49609375, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1094290888, + "step": 6378 + }, + { + "epoch": 72.40226628895184, + "grad_norm": 3.6645911498181, + "learning_rate": 5e-06, + "loss": 0.0524, + "num_input_tokens_seen": 1094462640, + "step": 6379 + }, + { + "epoch": 72.40226628895184, + "loss": 0.02852441370487213, + "loss_ce": 2.099523408105597e-05, + "loss_iou": 0.63671875, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 1094462640, + "step": 6379 + }, + { + "epoch": 72.41359773371104, + "grad_norm": 3.2392320312589855, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 1094634252, + "step": 6380 + }, + { + "epoch": 72.41359773371104, + "loss": 0.05362439528107643, + "loss_ce": 2.0267836589482613e-05, + "loss_iou": 0.248046875, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 1094634252, + "step": 6380 + }, + { + "epoch": 72.42492917847025, + "grad_norm": 2.637737527351458, + "learning_rate": 5e-06, + "loss": 0.0359, + "num_input_tokens_seen": 1094803536, + "step": 6381 + }, + { + "epoch": 72.42492917847025, + "loss": 0.028267571702599525, + "loss_ce": 5.406998752732761e-05, + "loss_iou": 0.279296875, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 1094803536, + "step": 6381 + }, + { + "epoch": 72.43626062322946, + "grad_norm": 3.0048239657208, + "learning_rate": 5e-06, + "loss": 0.0349, + "num_input_tokens_seen": 1094975200, + "step": 6382 + }, + { + "epoch": 72.43626062322946, + "loss": 0.03899305313825607, + "loss_ce": 6.84635597281158e-06, + "loss_iou": 0.4921875, + "loss_num": 0.007781982421875, + "loss_xval": 0.0390625, + "num_input_tokens_seen": 1094975200, + "step": 6382 + }, + { + "epoch": 72.44759206798867, + "grad_norm": 2.7316506971804104, + "learning_rate": 5e-06, + "loss": 0.0713, + "num_input_tokens_seen": 1095147272, + "step": 6383 + }, + { + "epoch": 72.44759206798867, + "loss": 0.07629137486219406, + "loss_ce": 2.795118598442059e-05, + "loss_iou": 0.2890625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 1095147272, + "step": 6383 + }, + { + "epoch": 72.45892351274787, + "grad_norm": 3.363695395337474, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 1095318468, + "step": 6384 + }, + { + "epoch": 72.45892351274787, + "loss": 0.036173634231090546, + "loss_ce": 1.030434032145422e-05, + "loss_iou": 0.22265625, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 1095318468, + "step": 6384 + }, + { + "epoch": 72.47025495750708, + "grad_norm": 3.096272130763295, + "learning_rate": 5e-06, + "loss": 0.0326, + "num_input_tokens_seen": 1095489368, + "step": 6385 + }, + { + "epoch": 72.47025495750708, + "loss": 0.025361312553286552, + "loss_ce": 0.00011564575834199786, + "loss_iou": 0.40234375, + "loss_num": 0.005035400390625, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 1095489368, + "step": 6385 + }, + { + "epoch": 72.48158640226629, + "grad_norm": 2.8884396188120927, + "learning_rate": 5e-06, + "loss": 0.077, + "num_input_tokens_seen": 1095660904, + "step": 6386 + }, + { + "epoch": 72.48158640226629, + "loss": 0.040947325527668, + "loss_ce": 0.00013006487279199064, + "loss_iou": 0.2734375, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1095660904, + "step": 6386 + }, + { + "epoch": 72.4929178470255, + "grad_norm": 3.23385595715523, + "learning_rate": 5e-06, + "loss": 0.0318, + "num_input_tokens_seen": 1095832840, + "step": 6387 + }, + { + "epoch": 72.4929178470255, + "loss": 0.02521863952279091, + "loss_ce": 1.1119105693069287e-05, + "loss_iou": 0.43359375, + "loss_num": 0.005035400390625, + "loss_xval": 0.025146484375, + "num_input_tokens_seen": 1095832840, + "step": 6387 + }, + { + "epoch": 72.5042492917847, + "grad_norm": 3.8777402460745356, + "learning_rate": 5e-06, + "loss": 0.0414, + "num_input_tokens_seen": 1096004648, + "step": 6388 + }, + { + "epoch": 72.5042492917847, + "loss": 0.03499479219317436, + "loss_ce": 3.6908062611473724e-05, + "loss_iou": 0.31640625, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1096004648, + "step": 6388 + }, + { + "epoch": 72.51558073654391, + "grad_norm": 3.3672274204661914, + "learning_rate": 5e-06, + "loss": 0.0418, + "num_input_tokens_seen": 1096176320, + "step": 6389 + }, + { + "epoch": 72.51558073654391, + "loss": 0.07335026562213898, + "loss_ce": 1.6526042600162327e-05, + "loss_iou": 0.4453125, + "loss_num": 0.0146484375, + "loss_xval": 0.0732421875, + "num_input_tokens_seen": 1096176320, + "step": 6389 + }, + { + "epoch": 72.52691218130312, + "grad_norm": 3.0684152286121025, + "learning_rate": 5e-06, + "loss": 0.0565, + "num_input_tokens_seen": 1096346712, + "step": 6390 + }, + { + "epoch": 72.52691218130312, + "loss": 0.05328311771154404, + "loss_ce": 2.994591704919003e-05, + "loss_iou": 0.224609375, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 1096346712, + "step": 6390 + }, + { + "epoch": 72.53824362606233, + "grad_norm": 2.809210817791173, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 1096518388, + "step": 6391 + }, + { + "epoch": 72.53824362606233, + "loss": 0.038925543427467346, + "loss_ce": 3.0889827030478045e-05, + "loss_iou": 0.3125, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 1096518388, + "step": 6391 + }, + { + "epoch": 72.54957507082153, + "grad_norm": 2.70614106490957, + "learning_rate": 5e-06, + "loss": 0.0345, + "num_input_tokens_seen": 1096690044, + "step": 6392 + }, + { + "epoch": 72.54957507082153, + "loss": 0.03132588788866997, + "loss_ce": 1.4854080291115679e-05, + "loss_iou": 0.42578125, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1096690044, + "step": 6392 + }, + { + "epoch": 72.56090651558074, + "grad_norm": 3.662964555163521, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 1096860176, + "step": 6393 + }, + { + "epoch": 72.56090651558074, + "loss": 0.03108474239706993, + "loss_ce": 1.7848155039246194e-05, + "loss_iou": 0.427734375, + "loss_num": 0.0062255859375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1096860176, + "step": 6393 + }, + { + "epoch": 72.57223796033995, + "grad_norm": 2.9861319328524516, + "learning_rate": 5e-06, + "loss": 0.0527, + "num_input_tokens_seen": 1097030844, + "step": 6394 + }, + { + "epoch": 72.57223796033995, + "loss": 0.059183087199926376, + "loss_ce": 2.4766082788119093e-05, + "loss_iou": 0.333984375, + "loss_num": 0.0118408203125, + "loss_xval": 0.05908203125, + "num_input_tokens_seen": 1097030844, + "step": 6394 + }, + { + "epoch": 72.58356940509915, + "grad_norm": 3.126706457541378, + "learning_rate": 5e-06, + "loss": 0.0363, + "num_input_tokens_seen": 1097202252, + "step": 6395 + }, + { + "epoch": 72.58356940509915, + "loss": 0.02511187270283699, + "loss_ce": 3.405200186534785e-05, + "loss_iou": 0.412109375, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 1097202252, + "step": 6395 + }, + { + "epoch": 72.59490084985836, + "grad_norm": 3.3284161292466363, + "learning_rate": 5e-06, + "loss": 0.0559, + "num_input_tokens_seen": 1097374284, + "step": 6396 + }, + { + "epoch": 72.59490084985836, + "loss": 0.12613177299499512, + "loss_ce": 3.313073830213398e-05, + "loss_iou": 0.376953125, + "loss_num": 0.025146484375, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 1097374284, + "step": 6396 + }, + { + "epoch": 72.60623229461757, + "grad_norm": 3.3783026259225273, + "learning_rate": 5e-06, + "loss": 0.0455, + "num_input_tokens_seen": 1097544512, + "step": 6397 + }, + { + "epoch": 72.60623229461757, + "loss": 0.03002443164587021, + "loss_ce": 1.0393116099294275e-05, + "loss_iou": 0.4609375, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1097544512, + "step": 6397 + }, + { + "epoch": 72.61756373937678, + "grad_norm": 3.3631935856148014, + "learning_rate": 5e-06, + "loss": 0.0393, + "num_input_tokens_seen": 1097715356, + "step": 6398 + }, + { + "epoch": 72.61756373937678, + "loss": 0.0634152889251709, + "loss_ce": 4.5536198740592226e-05, + "loss_iou": 0.345703125, + "loss_num": 0.0126953125, + "loss_xval": 0.0634765625, + "num_input_tokens_seen": 1097715356, + "step": 6398 + }, + { + "epoch": 72.62889518413597, + "grad_norm": 2.880927828933064, + "learning_rate": 5e-06, + "loss": 0.0315, + "num_input_tokens_seen": 1097885468, + "step": 6399 + }, + { + "epoch": 72.62889518413597, + "loss": 0.025341372936964035, + "loss_ce": 1.1783835361711681e-05, + "loss_iou": 0.2119140625, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 1097885468, + "step": 6399 + }, + { + "epoch": 72.64022662889518, + "grad_norm": 2.705033329201722, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 1098057020, + "step": 6400 + }, + { + "epoch": 72.64022662889518, + "loss": 0.05277795344591141, + "loss_ce": 1.3056976058578584e-05, + "loss_iou": 0.466796875, + "loss_num": 0.01055908203125, + "loss_xval": 0.052734375, + "num_input_tokens_seen": 1098057020, + "step": 6400 + }, + { + "epoch": 72.65155807365439, + "grad_norm": 3.162785202256377, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 1098227424, + "step": 6401 + }, + { + "epoch": 72.65155807365439, + "loss": 0.027142629027366638, + "loss_ce": 2.77605122391833e-05, + "loss_iou": 0.427734375, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1098227424, + "step": 6401 + }, + { + "epoch": 72.66288951841359, + "grad_norm": 3.156325189735493, + "learning_rate": 5e-06, + "loss": 0.0358, + "num_input_tokens_seen": 1098398900, + "step": 6402 + }, + { + "epoch": 72.66288951841359, + "loss": 0.03227756917476654, + "loss_ce": 1.2857719411840662e-05, + "loss_iou": 0.33984375, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1098398900, + "step": 6402 + }, + { + "epoch": 72.6742209631728, + "grad_norm": 2.5847939039886376, + "learning_rate": 5e-06, + "loss": 0.0271, + "num_input_tokens_seen": 1098570032, + "step": 6403 + }, + { + "epoch": 72.6742209631728, + "loss": 0.020491670817136765, + "loss_ce": 1.4374811144080013e-05, + "loss_iou": 0.40625, + "loss_num": 0.00408935546875, + "loss_xval": 0.0205078125, + "num_input_tokens_seen": 1098570032, + "step": 6403 + }, + { + "epoch": 72.68555240793201, + "grad_norm": 2.8781350671666925, + "learning_rate": 5e-06, + "loss": 0.0423, + "num_input_tokens_seen": 1098741204, + "step": 6404 + }, + { + "epoch": 72.68555240793201, + "loss": 0.0322367399930954, + "loss_ce": 2.5438566808588803e-05, + "loss_iou": 0.3828125, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1098741204, + "step": 6404 + }, + { + "epoch": 72.69688385269122, + "grad_norm": 3.0884883214756598, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 1098913076, + "step": 6405 + }, + { + "epoch": 72.69688385269122, + "loss": 0.07983721047639847, + "loss_ce": 3.374271545908414e-05, + "loss_iou": 0.36328125, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 1098913076, + "step": 6405 + }, + { + "epoch": 72.70821529745042, + "grad_norm": 3.5041441826181363, + "learning_rate": 5e-06, + "loss": 0.057, + "num_input_tokens_seen": 1099084620, + "step": 6406 + }, + { + "epoch": 72.70821529745042, + "loss": 0.0833563581109047, + "loss_ce": 1.2848513506469317e-05, + "loss_iou": 0.435546875, + "loss_num": 0.0167236328125, + "loss_xval": 0.08349609375, + "num_input_tokens_seen": 1099084620, + "step": 6406 + }, + { + "epoch": 72.71954674220963, + "grad_norm": 3.381943111102298, + "learning_rate": 5e-06, + "loss": 0.0618, + "num_input_tokens_seen": 1099254128, + "step": 6407 + }, + { + "epoch": 72.71954674220963, + "loss": 0.04095417633652687, + "loss_ce": 1.4843945791653823e-05, + "loss_iou": 0.486328125, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1099254128, + "step": 6407 + }, + { + "epoch": 72.73087818696884, + "grad_norm": 3.0092830599290936, + "learning_rate": 5e-06, + "loss": 0.0327, + "num_input_tokens_seen": 1099424932, + "step": 6408 + }, + { + "epoch": 72.73087818696884, + "loss": 0.03320252522826195, + "loss_ce": 4.5176191633800045e-05, + "loss_iou": 0.373046875, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1099424932, + "step": 6408 + }, + { + "epoch": 72.74220963172804, + "grad_norm": 3.048968242766737, + "learning_rate": 5e-06, + "loss": 0.0343, + "num_input_tokens_seen": 1099597220, + "step": 6409 + }, + { + "epoch": 72.74220963172804, + "loss": 0.03239120915532112, + "loss_ce": 1.9688148313434795e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0064697265625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 1099597220, + "step": 6409 + }, + { + "epoch": 72.75354107648725, + "grad_norm": 3.1656607363930127, + "learning_rate": 5e-06, + "loss": 0.0367, + "num_input_tokens_seen": 1099768968, + "step": 6410 + }, + { + "epoch": 72.75354107648725, + "loss": 0.040279995650053024, + "loss_ce": 1.20499844342703e-05, + "loss_iou": 0.4375, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 1099768968, + "step": 6410 + }, + { + "epoch": 72.76487252124646, + "grad_norm": 3.3913853820335262, + "learning_rate": 5e-06, + "loss": 0.0306, + "num_input_tokens_seen": 1099938672, + "step": 6411 + }, + { + "epoch": 72.76487252124646, + "loss": 0.028848998248577118, + "loss_ce": 5.5662807426415384e-05, + "loss_iou": 0.462890625, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 1099938672, + "step": 6411 + }, + { + "epoch": 72.77620396600567, + "grad_norm": 2.9403937630499297, + "learning_rate": 5e-06, + "loss": 0.0421, + "num_input_tokens_seen": 1100110368, + "step": 6412 + }, + { + "epoch": 72.77620396600567, + "loss": 0.048872239887714386, + "loss_ce": 1.3594909432868008e-05, + "loss_iou": 0.369140625, + "loss_num": 0.009765625, + "loss_xval": 0.048828125, + "num_input_tokens_seen": 1100110368, + "step": 6412 + }, + { + "epoch": 72.78753541076487, + "grad_norm": 2.385304148353764, + "learning_rate": 5e-06, + "loss": 0.042, + "num_input_tokens_seen": 1100282236, + "step": 6413 + }, + { + "epoch": 72.78753541076487, + "loss": 0.03129076957702637, + "loss_ce": 1.0252973879687488e-05, + "loss_iou": 0.396484375, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1100282236, + "step": 6413 + }, + { + "epoch": 72.79886685552408, + "grad_norm": 3.1936189494910256, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 1100454052, + "step": 6414 + }, + { + "epoch": 72.79886685552408, + "loss": 0.04210112988948822, + "loss_ce": 1.7386093531968072e-05, + "loss_iou": 0.4453125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1100454052, + "step": 6414 + }, + { + "epoch": 72.81019830028329, + "grad_norm": 3.9225931916393053, + "learning_rate": 5e-06, + "loss": 0.0506, + "num_input_tokens_seen": 1100625908, + "step": 6415 + }, + { + "epoch": 72.81019830028329, + "loss": 0.03038671612739563, + "loss_ce": 3.698662112583406e-05, + "loss_iou": 0.3515625, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 1100625908, + "step": 6415 + }, + { + "epoch": 72.8215297450425, + "grad_norm": 3.6949332877671903, + "learning_rate": 5e-06, + "loss": 0.0493, + "num_input_tokens_seen": 1100797744, + "step": 6416 + }, + { + "epoch": 72.8215297450425, + "loss": 0.06131046265363693, + "loss_ce": 1.5906749467831105e-05, + "loss_iou": 0.35546875, + "loss_num": 0.01226806640625, + "loss_xval": 0.061279296875, + "num_input_tokens_seen": 1100797744, + "step": 6416 + }, + { + "epoch": 72.8328611898017, + "grad_norm": 3.1794617188706633, + "learning_rate": 5e-06, + "loss": 0.0354, + "num_input_tokens_seen": 1100969404, + "step": 6417 + }, + { + "epoch": 72.8328611898017, + "loss": 0.02156449295580387, + "loss_ce": 1.9082250219071284e-05, + "loss_iou": 0.23046875, + "loss_num": 0.004302978515625, + "loss_xval": 0.021484375, + "num_input_tokens_seen": 1100969404, + "step": 6417 + }, + { + "epoch": 72.84419263456091, + "grad_norm": 2.7363584770403735, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1101141208, + "step": 6418 + }, + { + "epoch": 72.84419263456091, + "loss": 0.03769973665475845, + "loss_ce": 8.682251063873991e-05, + "loss_iou": 0.322265625, + "loss_num": 0.00750732421875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 1101141208, + "step": 6418 + }, + { + "epoch": 72.85552407932012, + "grad_norm": 3.0160878846217303, + "learning_rate": 5e-06, + "loss": 0.055, + "num_input_tokens_seen": 1101313280, + "step": 6419 + }, + { + "epoch": 72.85552407932012, + "loss": 0.033302415162324905, + "loss_ce": 2.2996295228949748e-05, + "loss_iou": 0.578125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1101313280, + "step": 6419 + }, + { + "epoch": 72.86685552407933, + "grad_norm": 2.5464081025820207, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 1101484928, + "step": 6420 + }, + { + "epoch": 72.86685552407933, + "loss": 0.0377458892762661, + "loss_ce": 7.193784404080361e-05, + "loss_iou": 0.39453125, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 1101484928, + "step": 6420 + }, + { + "epoch": 72.87818696883852, + "grad_norm": 2.1841767032141584, + "learning_rate": 5e-06, + "loss": 0.0291, + "num_input_tokens_seen": 1101655796, + "step": 6421 + }, + { + "epoch": 72.87818696883852, + "loss": 0.04816635698080063, + "loss_ce": 0.00011643294419627637, + "loss_iou": 0.2265625, + "loss_num": 0.00958251953125, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 1101655796, + "step": 6421 + }, + { + "epoch": 72.88951841359773, + "grad_norm": 2.3475046303012523, + "learning_rate": 5e-06, + "loss": 0.0448, + "num_input_tokens_seen": 1101826844, + "step": 6422 + }, + { + "epoch": 72.88951841359773, + "loss": 0.0436820313334465, + "loss_ce": 1.137554590968648e-05, + "loss_iou": 0.33203125, + "loss_num": 0.00872802734375, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 1101826844, + "step": 6422 + }, + { + "epoch": 72.90084985835693, + "grad_norm": 3.3492603447476252, + "learning_rate": 5e-06, + "loss": 0.0395, + "num_input_tokens_seen": 1101997848, + "step": 6423 + }, + { + "epoch": 72.90084985835693, + "loss": 0.06725621223449707, + "loss_ce": 4.124757106183097e-05, + "loss_iou": 0.296875, + "loss_num": 0.013427734375, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 1101997848, + "step": 6423 + }, + { + "epoch": 72.91218130311614, + "grad_norm": 3.4347234155492328, + "learning_rate": 5e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1102169952, + "step": 6424 + }, + { + "epoch": 72.91218130311614, + "loss": 0.04631930589675903, + "loss_ce": 2.4137785658240318e-05, + "loss_iou": 0.30078125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 1102169952, + "step": 6424 + }, + { + "epoch": 72.92351274787535, + "grad_norm": 3.022699629832468, + "learning_rate": 5e-06, + "loss": 0.0336, + "num_input_tokens_seen": 1102340788, + "step": 6425 + }, + { + "epoch": 72.92351274787535, + "loss": 0.041486747562885284, + "loss_ce": 4.3875479605048895e-05, + "loss_iou": 0.1875, + "loss_num": 0.00830078125, + "loss_xval": 0.04150390625, + "num_input_tokens_seen": 1102340788, + "step": 6425 + }, + { + "epoch": 72.93484419263456, + "grad_norm": 3.1689008663761293, + "learning_rate": 5e-06, + "loss": 0.0681, + "num_input_tokens_seen": 1102511728, + "step": 6426 + }, + { + "epoch": 72.93484419263456, + "loss": 0.0775311291217804, + "loss_ce": 1.6485340893268585e-05, + "loss_iou": 0.2734375, + "loss_num": 0.0155029296875, + "loss_xval": 0.07763671875, + "num_input_tokens_seen": 1102511728, + "step": 6426 + }, + { + "epoch": 72.94617563739376, + "grad_norm": 3.467805795385136, + "learning_rate": 5e-06, + "loss": 0.0487, + "num_input_tokens_seen": 1102682212, + "step": 6427 + }, + { + "epoch": 72.94617563739376, + "loss": 0.030984679237008095, + "loss_ce": 9.336401490145363e-06, + "loss_iou": 0.470703125, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1102682212, + "step": 6427 + }, + { + "epoch": 72.95750708215297, + "grad_norm": 3.4446084932003944, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 1102854436, + "step": 6428 + }, + { + "epoch": 72.95750708215297, + "loss": 0.046675343066453934, + "loss_ce": 4.448468826012686e-05, + "loss_iou": 0.294921875, + "loss_num": 0.00933837890625, + "loss_xval": 0.046630859375, + "num_input_tokens_seen": 1102854436, + "step": 6428 + }, + { + "epoch": 72.96883852691218, + "grad_norm": 3.106152647942425, + "learning_rate": 5e-06, + "loss": 0.0702, + "num_input_tokens_seen": 1103026072, + "step": 6429 + }, + { + "epoch": 72.96883852691218, + "loss": 0.08778481185436249, + "loss_ce": 4.677658944274299e-05, + "loss_iou": 0.2890625, + "loss_num": 0.017578125, + "loss_xval": 0.087890625, + "num_input_tokens_seen": 1103026072, + "step": 6429 + }, + { + "epoch": 72.98016997167139, + "grad_norm": 2.910520697482817, + "learning_rate": 5e-06, + "loss": 0.0375, + "num_input_tokens_seen": 1103195616, + "step": 6430 + }, + { + "epoch": 72.98016997167139, + "loss": 0.05527765303850174, + "loss_ce": 4.083516250830144e-05, + "loss_iou": 0.333984375, + "loss_num": 0.01104736328125, + "loss_xval": 0.05517578125, + "num_input_tokens_seen": 1103195616, + "step": 6430 + }, + { + "epoch": 72.9915014164306, + "grad_norm": 2.981832397353389, + "learning_rate": 5e-06, + "loss": 0.0366, + "num_input_tokens_seen": 1103366048, + "step": 6431 + }, + { + "epoch": 72.9915014164306, + "loss": 0.04713235795497894, + "loss_ce": 1.3217547348176595e-05, + "loss_iou": 0.296875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 1103366048, + "step": 6431 + }, + { + "epoch": 72.9915014164306, + "loss": 0.10227605700492859, + "loss_ce": 2.691092959139496e-05, + "loss_iou": 0.279296875, + "loss_num": 0.0205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 1103495140, + "step": 6431 + }, + { + "epoch": 73.0028328611898, + "grad_norm": 3.6994591782662516, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 1103537952, + "step": 6432 + }, + { + "epoch": 73.0028328611898, + "loss": 0.027685847133398056, + "loss_ce": 6.402898634405574e-06, + "loss_iou": 0.55078125, + "loss_num": 0.00555419921875, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 1103537952, + "step": 6432 + }, + { + "epoch": 73.01416430594901, + "grad_norm": 3.4648379887529073, + "learning_rate": 5e-06, + "loss": 0.0406, + "num_input_tokens_seen": 1103708580, + "step": 6433 + }, + { + "epoch": 73.01416430594901, + "loss": 0.03935055807232857, + "loss_ce": 1.3397737347986549e-05, + "loss_iou": 0.64453125, + "loss_num": 0.00787353515625, + "loss_xval": 0.039306640625, + "num_input_tokens_seen": 1103708580, + "step": 6433 + }, + { + "epoch": 73.02549575070822, + "grad_norm": 2.9207199662965877, + "learning_rate": 5e-06, + "loss": 0.0337, + "num_input_tokens_seen": 1103880156, + "step": 6434 + }, + { + "epoch": 73.02549575070822, + "loss": 0.03269508481025696, + "loss_ce": 4.1278435674030334e-05, + "loss_iou": 0.4140625, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 1103880156, + "step": 6434 + }, + { + "epoch": 73.03682719546742, + "grad_norm": 2.905982254364295, + "learning_rate": 5e-06, + "loss": 0.0435, + "num_input_tokens_seen": 1104052064, + "step": 6435 + }, + { + "epoch": 73.03682719546742, + "loss": 0.036076027899980545, + "loss_ce": 0.000141577998874709, + "loss_iou": 0.6953125, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1104052064, + "step": 6435 + }, + { + "epoch": 73.04815864022663, + "grad_norm": 2.640863974585965, + "learning_rate": 5e-06, + "loss": 0.0443, + "num_input_tokens_seen": 1104223684, + "step": 6436 + }, + { + "epoch": 73.04815864022663, + "loss": 0.048409804701805115, + "loss_ce": 2.418357689748518e-05, + "loss_iou": 0.287109375, + "loss_num": 0.00970458984375, + "loss_xval": 0.04833984375, + "num_input_tokens_seen": 1104223684, + "step": 6436 + }, + { + "epoch": 73.05949008498584, + "grad_norm": 3.313332227660754, + "learning_rate": 5e-06, + "loss": 0.0588, + "num_input_tokens_seen": 1104393612, + "step": 6437 + }, + { + "epoch": 73.05949008498584, + "loss": 0.05031883716583252, + "loss_ce": 6.401594146154821e-05, + "loss_iou": 0.3046875, + "loss_num": 0.01007080078125, + "loss_xval": 0.05029296875, + "num_input_tokens_seen": 1104393612, + "step": 6437 + }, + { + "epoch": 73.07082152974505, + "grad_norm": 3.450378233183144, + "learning_rate": 5e-06, + "loss": 0.0634, + "num_input_tokens_seen": 1104563424, + "step": 6438 + }, + { + "epoch": 73.07082152974505, + "loss": 0.03137843310832977, + "loss_ce": 2.1618816390400752e-05, + "loss_iou": 0.416015625, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1104563424, + "step": 6438 + }, + { + "epoch": 73.08215297450425, + "grad_norm": 2.8020120825118693, + "learning_rate": 5e-06, + "loss": 0.0432, + "num_input_tokens_seen": 1104735500, + "step": 6439 + }, + { + "epoch": 73.08215297450425, + "loss": 0.026850799098610878, + "loss_ce": 1.0589408702799119e-05, + "loss_iou": 0.47265625, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 1104735500, + "step": 6439 + }, + { + "epoch": 73.09348441926346, + "grad_norm": 2.6340272115492276, + "learning_rate": 5e-06, + "loss": 0.0405, + "num_input_tokens_seen": 1104906872, + "step": 6440 + }, + { + "epoch": 73.09348441926346, + "loss": 0.07926710695028305, + "loss_ce": 2.0583902369253337e-05, + "loss_iou": 0.494140625, + "loss_num": 0.015869140625, + "loss_xval": 0.0791015625, + "num_input_tokens_seen": 1104906872, + "step": 6440 + }, + { + "epoch": 73.10481586402267, + "grad_norm": 2.584400808954087, + "learning_rate": 5e-06, + "loss": 0.0319, + "num_input_tokens_seen": 1105077888, + "step": 6441 + }, + { + "epoch": 73.10481586402267, + "loss": 0.020703855901956558, + "loss_ce": 2.8195639970363118e-05, + "loss_iou": 0.41015625, + "loss_num": 0.004150390625, + "loss_xval": 0.0206298828125, + "num_input_tokens_seen": 1105077888, + "step": 6441 + }, + { + "epoch": 73.11614730878188, + "grad_norm": 2.0846174342460033, + "learning_rate": 5e-06, + "loss": 0.0292, + "num_input_tokens_seen": 1105249828, + "step": 6442 + }, + { + "epoch": 73.11614730878188, + "loss": 0.030573632568120956, + "loss_ce": 7.894216105341911e-05, + "loss_iou": 0.392578125, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1105249828, + "step": 6442 + }, + { + "epoch": 73.12747875354107, + "grad_norm": 2.696051878153036, + "learning_rate": 5e-06, + "loss": 0.0391, + "num_input_tokens_seen": 1105418924, + "step": 6443 + }, + { + "epoch": 73.12747875354107, + "loss": 0.04201575368642807, + "loss_ce": 2.356819823035039e-05, + "loss_iou": 0.298828125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1105418924, + "step": 6443 + }, + { + "epoch": 73.13881019830028, + "grad_norm": 3.58811776792575, + "learning_rate": 5e-06, + "loss": 0.0657, + "num_input_tokens_seen": 1105589376, + "step": 6444 + }, + { + "epoch": 73.13881019830028, + "loss": 0.03687825798988342, + "loss_ce": 1.30264616018394e-05, + "loss_iou": 0.5078125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1105589376, + "step": 6444 + }, + { + "epoch": 73.15014164305948, + "grad_norm": 3.749253289314688, + "learning_rate": 5e-06, + "loss": 0.0389, + "num_input_tokens_seen": 1105760624, + "step": 6445 + }, + { + "epoch": 73.15014164305948, + "loss": 0.042613156139850616, + "loss_ce": 1.0616137842589524e-05, + "loss_iou": 0.51171875, + "loss_num": 0.008544921875, + "loss_xval": 0.04248046875, + "num_input_tokens_seen": 1105760624, + "step": 6445 + }, + { + "epoch": 73.16147308781869, + "grad_norm": 3.877064264441872, + "learning_rate": 5e-06, + "loss": 0.0407, + "num_input_tokens_seen": 1105932508, + "step": 6446 + }, + { + "epoch": 73.16147308781869, + "loss": 0.04025866091251373, + "loss_ce": 2.12349186767824e-05, + "loss_iou": 0.41015625, + "loss_num": 0.008056640625, + "loss_xval": 0.040283203125, + "num_input_tokens_seen": 1105932508, + "step": 6446 + }, + { + "epoch": 73.1728045325779, + "grad_norm": 3.8991843133533117, + "learning_rate": 5e-06, + "loss": 0.0469, + "num_input_tokens_seen": 1106103540, + "step": 6447 + }, + { + "epoch": 73.1728045325779, + "loss": 0.07381882518529892, + "loss_ce": 1.2061761481163558e-05, + "loss_iou": 0.1298828125, + "loss_num": 0.0147705078125, + "loss_xval": 0.07373046875, + "num_input_tokens_seen": 1106103540, + "step": 6447 + }, + { + "epoch": 73.1841359773371, + "grad_norm": 3.500376497813227, + "learning_rate": 5e-06, + "loss": 0.0291, + "num_input_tokens_seen": 1106275008, + "step": 6448 + }, + { + "epoch": 73.1841359773371, + "loss": 0.03309686481952667, + "loss_ce": 1.58109196490841e-05, + "loss_iou": 0.47265625, + "loss_num": 0.006622314453125, + "loss_xval": 0.033203125, + "num_input_tokens_seen": 1106275008, + "step": 6448 + }, + { + "epoch": 73.19546742209631, + "grad_norm": 2.6903158899230797, + "learning_rate": 5e-06, + "loss": 0.0457, + "num_input_tokens_seen": 1106446248, + "step": 6449 + }, + { + "epoch": 73.19546742209631, + "loss": 0.08586322516202927, + "loss_ce": 1.7282774933846667e-05, + "loss_iou": 0.640625, + "loss_num": 0.0172119140625, + "loss_xval": 0.0859375, + "num_input_tokens_seen": 1106446248, + "step": 6449 + }, + { + "epoch": 73.20679886685552, + "grad_norm": 2.436789809227241, + "learning_rate": 5e-06, + "loss": 0.0561, + "num_input_tokens_seen": 1106618140, + "step": 6450 + }, + { + "epoch": 73.20679886685552, + "loss": 0.03357922285795212, + "loss_ce": 2.5147364794975147e-05, + "loss_iou": 0.326171875, + "loss_num": 0.0067138671875, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1106618140, + "step": 6450 + }, + { + "epoch": 73.21813031161473, + "grad_norm": 2.848381549558379, + "learning_rate": 5e-06, + "loss": 0.032, + "num_input_tokens_seen": 1106789616, + "step": 6451 + }, + { + "epoch": 73.21813031161473, + "loss": 0.04093830659985542, + "loss_ce": 1.4233784895623103e-05, + "loss_iou": 0.22265625, + "loss_num": 0.0081787109375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1106789616, + "step": 6451 + }, + { + "epoch": 73.22946175637394, + "grad_norm": 3.1241714664854245, + "learning_rate": 5e-06, + "loss": 0.041, + "num_input_tokens_seen": 1106959604, + "step": 6452 + }, + { + "epoch": 73.22946175637394, + "loss": 0.03366551548242569, + "loss_ce": 3.514260606607422e-05, + "loss_iou": 0.3359375, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1106959604, + "step": 6452 + }, + { + "epoch": 73.24079320113314, + "grad_norm": 3.0840479155024445, + "learning_rate": 5e-06, + "loss": 0.037, + "num_input_tokens_seen": 1107131352, + "step": 6453 + }, + { + "epoch": 73.24079320113314, + "loss": 0.030013732612133026, + "loss_ce": 1.49521238199668e-05, + "loss_iou": 0.451171875, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1107131352, + "step": 6453 + }, + { + "epoch": 73.25212464589235, + "grad_norm": 3.5992573840646767, + "learning_rate": 5e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1107303088, + "step": 6454 + }, + { + "epoch": 73.25212464589235, + "loss": 0.04085526615381241, + "loss_ce": 2.2747990442439914e-05, + "loss_iou": 0.35546875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1107303088, + "step": 6454 + }, + { + "epoch": 73.26345609065156, + "grad_norm": 3.8252708912968534, + "learning_rate": 5e-06, + "loss": 0.0559, + "num_input_tokens_seen": 1107474824, + "step": 6455 + }, + { + "epoch": 73.26345609065156, + "loss": 0.0406356155872345, + "loss_ce": 1.6718397091608495e-05, + "loss_iou": 0.4921875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1107474824, + "step": 6455 + }, + { + "epoch": 73.27478753541077, + "grad_norm": 4.017300771050497, + "learning_rate": 5e-06, + "loss": 0.0384, + "num_input_tokens_seen": 1107646632, + "step": 6456 + }, + { + "epoch": 73.27478753541077, + "loss": 0.034910544753074646, + "loss_ce": 1.3692781067220494e-05, + "loss_iou": 0.4375, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1107646632, + "step": 6456 + }, + { + "epoch": 73.28611898016997, + "grad_norm": 2.989494704346733, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1107818212, + "step": 6457 + }, + { + "epoch": 73.28611898016997, + "loss": 0.03674791380763054, + "loss_ce": 2.0007406419608742e-05, + "loss_iou": 0.427734375, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1107818212, + "step": 6457 + }, + { + "epoch": 73.29745042492918, + "grad_norm": 3.1306413996893503, + "learning_rate": 5e-06, + "loss": 0.0343, + "num_input_tokens_seen": 1107986928, + "step": 6458 + }, + { + "epoch": 73.29745042492918, + "loss": 0.031121093779802322, + "loss_ce": 2.368139939790126e-05, + "loss_iou": 0.337890625, + "loss_num": 0.0062255859375, + "loss_xval": 0.0311279296875, + "num_input_tokens_seen": 1107986928, + "step": 6458 + }, + { + "epoch": 73.30878186968839, + "grad_norm": 3.0920400310808733, + "learning_rate": 5e-06, + "loss": 0.0487, + "num_input_tokens_seen": 1108158608, + "step": 6459 + }, + { + "epoch": 73.30878186968839, + "loss": 0.04605382680892944, + "loss_ce": 1.8060127331409603e-05, + "loss_iou": 0.55078125, + "loss_num": 0.00921630859375, + "loss_xval": 0.046142578125, + "num_input_tokens_seen": 1108158608, + "step": 6459 + }, + { + "epoch": 73.3201133144476, + "grad_norm": 5.88684457277583, + "learning_rate": 5e-06, + "loss": 0.0429, + "num_input_tokens_seen": 1108330740, + "step": 6460 + }, + { + "epoch": 73.3201133144476, + "loss": 0.032196275889873505, + "loss_ce": 1.5490251826122403e-05, + "loss_iou": 0.31640625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1108330740, + "step": 6460 + }, + { + "epoch": 73.3314447592068, + "grad_norm": 3.0814263094685934, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1108502448, + "step": 6461 + }, + { + "epoch": 73.3314447592068, + "loss": 0.0262615904211998, + "loss_ce": 1.6473162759211846e-05, + "loss_iou": 0.431640625, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 1108502448, + "step": 6461 + }, + { + "epoch": 73.34277620396601, + "grad_norm": 2.751703744798392, + "learning_rate": 5e-06, + "loss": 0.0418, + "num_input_tokens_seen": 1108673192, + "step": 6462 + }, + { + "epoch": 73.34277620396601, + "loss": 0.023171426728367805, + "loss_ce": 1.6213718481594697e-05, + "loss_iou": 0.220703125, + "loss_num": 0.004638671875, + "loss_xval": 0.023193359375, + "num_input_tokens_seen": 1108673192, + "step": 6462 + }, + { + "epoch": 73.35410764872522, + "grad_norm": 3.094501974301736, + "learning_rate": 5e-06, + "loss": 0.0364, + "num_input_tokens_seen": 1108842956, + "step": 6463 + }, + { + "epoch": 73.35410764872522, + "loss": 0.028249815106391907, + "loss_ce": 2.105399835272692e-05, + "loss_iou": 0.4453125, + "loss_num": 0.005645751953125, + "loss_xval": 0.0281982421875, + "num_input_tokens_seen": 1108842956, + "step": 6463 + }, + { + "epoch": 73.36543909348443, + "grad_norm": 3.5133096138128637, + "learning_rate": 5e-06, + "loss": 0.0405, + "num_input_tokens_seen": 1109014732, + "step": 6464 + }, + { + "epoch": 73.36543909348443, + "loss": 0.03406064584851265, + "loss_ce": 0.0035278077702969313, + "loss_iou": 0.50390625, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1109014732, + "step": 6464 + }, + { + "epoch": 73.37677053824362, + "grad_norm": 3.1754817078113504, + "learning_rate": 5e-06, + "loss": 0.0443, + "num_input_tokens_seen": 1109184920, + "step": 6465 + }, + { + "epoch": 73.37677053824362, + "loss": 0.05430317670106888, + "loss_ce": 2.7667218091664836e-05, + "loss_iou": 0.302734375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 1109184920, + "step": 6465 + }, + { + "epoch": 73.38810198300283, + "grad_norm": 2.9395621782941648, + "learning_rate": 5e-06, + "loss": 0.0353, + "num_input_tokens_seen": 1109356608, + "step": 6466 + }, + { + "epoch": 73.38810198300283, + "loss": 0.031933143734931946, + "loss_ce": 1.1758004802686628e-05, + "loss_iou": 0.283203125, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1109356608, + "step": 6466 + }, + { + "epoch": 73.39943342776203, + "grad_norm": 3.5523172752121637, + "learning_rate": 5e-06, + "loss": 0.0538, + "num_input_tokens_seen": 1109526300, + "step": 6467 + }, + { + "epoch": 73.39943342776203, + "loss": 0.05787474662065506, + "loss_ce": 1.341482038696995e-05, + "loss_iou": 0.44921875, + "loss_num": 0.0115966796875, + "loss_xval": 0.057861328125, + "num_input_tokens_seen": 1109526300, + "step": 6467 + }, + { + "epoch": 73.41076487252124, + "grad_norm": 2.9660119784878916, + "learning_rate": 5e-06, + "loss": 0.0463, + "num_input_tokens_seen": 1109698084, + "step": 6468 + }, + { + "epoch": 73.41076487252124, + "loss": 0.079731285572052, + "loss_ce": 1.9377464923309162e-05, + "loss_iou": 0.349609375, + "loss_num": 0.0159912109375, + "loss_xval": 0.07958984375, + "num_input_tokens_seen": 1109698084, + "step": 6468 + }, + { + "epoch": 73.42209631728045, + "grad_norm": 2.586709458627176, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 1109868764, + "step": 6469 + }, + { + "epoch": 73.42209631728045, + "loss": 0.05429849773645401, + "loss_ce": 2.2983678718446754e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0108642578125, + "loss_xval": 0.05419921875, + "num_input_tokens_seen": 1109868764, + "step": 6469 + }, + { + "epoch": 73.43342776203966, + "grad_norm": 3.306600132488779, + "learning_rate": 5e-06, + "loss": 0.0419, + "num_input_tokens_seen": 1110040360, + "step": 6470 + }, + { + "epoch": 73.43342776203966, + "loss": 0.06186573952436447, + "loss_ce": 3.712794205057435e-05, + "loss_iou": 0.306640625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 1110040360, + "step": 6470 + }, + { + "epoch": 73.44475920679886, + "grad_norm": 3.312855548906545, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 1110212048, + "step": 6471 + }, + { + "epoch": 73.44475920679886, + "loss": 0.035165928304195404, + "loss_ce": 9.676637091615703e-06, + "loss_iou": 0.34375, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1110212048, + "step": 6471 + }, + { + "epoch": 73.45609065155807, + "grad_norm": 3.28920872797524, + "learning_rate": 5e-06, + "loss": 0.0461, + "num_input_tokens_seen": 1110383036, + "step": 6472 + }, + { + "epoch": 73.45609065155807, + "loss": 0.03040408156812191, + "loss_ce": 8.572325896238908e-06, + "loss_iou": 0.447265625, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 1110383036, + "step": 6472 + }, + { + "epoch": 73.46742209631728, + "grad_norm": 3.2592469296490307, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 1110553644, + "step": 6473 + }, + { + "epoch": 73.46742209631728, + "loss": 0.043555401265621185, + "loss_ce": 3.733702396857552e-05, + "loss_iou": 0.3046875, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 1110553644, + "step": 6473 + }, + { + "epoch": 73.47875354107649, + "grad_norm": 2.6549269921249867, + "learning_rate": 5e-06, + "loss": 0.0336, + "num_input_tokens_seen": 1110725256, + "step": 6474 + }, + { + "epoch": 73.47875354107649, + "loss": 0.026280013844370842, + "loss_ce": 1.963696922757663e-05, + "loss_iou": 0.208984375, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 1110725256, + "step": 6474 + }, + { + "epoch": 73.4900849858357, + "grad_norm": 2.8951843138895166, + "learning_rate": 5e-06, + "loss": 0.0527, + "num_input_tokens_seen": 1110896804, + "step": 6475 + }, + { + "epoch": 73.4900849858357, + "loss": 0.11417710036039352, + "loss_ce": 5.661951581714675e-05, + "loss_iou": 0.03662109375, + "loss_num": 0.0228271484375, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 1110896804, + "step": 6475 + }, + { + "epoch": 73.5014164305949, + "grad_norm": 3.7301456964179827, + "learning_rate": 5e-06, + "loss": 0.0817, + "num_input_tokens_seen": 1111068760, + "step": 6476 + }, + { + "epoch": 73.5014164305949, + "loss": 0.06310199201107025, + "loss_ce": 8.319588960148394e-05, + "loss_iou": 0.392578125, + "loss_num": 0.0125732421875, + "loss_xval": 0.06298828125, + "num_input_tokens_seen": 1111068760, + "step": 6476 + }, + { + "epoch": 73.51274787535411, + "grad_norm": 3.498005795239106, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 1111240540, + "step": 6477 + }, + { + "epoch": 73.51274787535411, + "loss": 0.04817181080579758, + "loss_ce": 1.5074914699653164e-05, + "loss_iou": 0.458984375, + "loss_num": 0.0096435546875, + "loss_xval": 0.048095703125, + "num_input_tokens_seen": 1111240540, + "step": 6477 + }, + { + "epoch": 73.52407932011332, + "grad_norm": 3.055170153768895, + "learning_rate": 5e-06, + "loss": 0.0358, + "num_input_tokens_seen": 1111412424, + "step": 6478 + }, + { + "epoch": 73.52407932011332, + "loss": 0.04850431904196739, + "loss_ce": 2.714867878239602e-05, + "loss_iou": 0.375, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 1111412424, + "step": 6478 + }, + { + "epoch": 73.53541076487252, + "grad_norm": 3.352723256610269, + "learning_rate": 5e-06, + "loss": 0.0457, + "num_input_tokens_seen": 1111584168, + "step": 6479 + }, + { + "epoch": 73.53541076487252, + "loss": 0.04059762507677078, + "loss_ce": 9.247715752280783e-06, + "loss_iou": 0.515625, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1111584168, + "step": 6479 + }, + { + "epoch": 73.54674220963173, + "grad_norm": 4.009041258935215, + "learning_rate": 5e-06, + "loss": 0.0695, + "num_input_tokens_seen": 1111754700, + "step": 6480 + }, + { + "epoch": 73.54674220963173, + "loss": 0.04311896860599518, + "loss_ce": 1.2887299817521125e-05, + "loss_iou": 0.41015625, + "loss_num": 0.00860595703125, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1111754700, + "step": 6480 + }, + { + "epoch": 73.55807365439094, + "grad_norm": 3.1537946213291375, + "learning_rate": 5e-06, + "loss": 0.0418, + "num_input_tokens_seen": 1111925588, + "step": 6481 + }, + { + "epoch": 73.55807365439094, + "loss": 0.031039375811815262, + "loss_ce": 1.825577419367619e-05, + "loss_iou": 0.5546875, + "loss_num": 0.006195068359375, + "loss_xval": 0.031005859375, + "num_input_tokens_seen": 1111925588, + "step": 6481 + }, + { + "epoch": 73.56940509915015, + "grad_norm": 2.2943597392871053, + "learning_rate": 5e-06, + "loss": 0.0367, + "num_input_tokens_seen": 1112097356, + "step": 6482 + }, + { + "epoch": 73.56940509915015, + "loss": 0.03549940139055252, + "loss_ce": 7.459177140844986e-06, + "loss_iou": 0.37890625, + "loss_num": 0.007110595703125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 1112097356, + "step": 6482 + }, + { + "epoch": 73.58073654390935, + "grad_norm": 2.090884541047774, + "learning_rate": 5e-06, + "loss": 0.0492, + "num_input_tokens_seen": 1112269016, + "step": 6483 + }, + { + "epoch": 73.58073654390935, + "loss": 0.07406432181596756, + "loss_ce": 4.3938751332461834e-05, + "loss_iou": 0.494140625, + "loss_num": 0.0147705078125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 1112269016, + "step": 6483 + }, + { + "epoch": 73.59206798866856, + "grad_norm": 3.1708341647785434, + "learning_rate": 5e-06, + "loss": 0.0326, + "num_input_tokens_seen": 1112440168, + "step": 6484 + }, + { + "epoch": 73.59206798866856, + "loss": 0.03127949312329292, + "loss_ce": 1.4234448826755397e-05, + "loss_iou": 0.453125, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1112440168, + "step": 6484 + }, + { + "epoch": 73.60339943342777, + "grad_norm": 3.8607870766871435, + "learning_rate": 5e-06, + "loss": 0.0462, + "num_input_tokens_seen": 1112609292, + "step": 6485 + }, + { + "epoch": 73.60339943342777, + "loss": 0.03704557567834854, + "loss_ce": 2.7751655579777434e-05, + "loss_iou": 0.55859375, + "loss_num": 0.00738525390625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 1112609292, + "step": 6485 + }, + { + "epoch": 73.61473087818698, + "grad_norm": 2.2399730241428277, + "learning_rate": 5e-06, + "loss": 0.0259, + "num_input_tokens_seen": 1112779556, + "step": 6486 + }, + { + "epoch": 73.61473087818698, + "loss": 0.02919117361307144, + "loss_ce": 2.3999054974410683e-05, + "loss_iou": 0.357421875, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 1112779556, + "step": 6486 + }, + { + "epoch": 73.62606232294617, + "grad_norm": 3.8101749347443814, + "learning_rate": 5e-06, + "loss": 0.0668, + "num_input_tokens_seen": 1112950660, + "step": 6487 + }, + { + "epoch": 73.62606232294617, + "loss": 0.022443873807787895, + "loss_ce": 8.97480858839117e-05, + "loss_iou": 0.341796875, + "loss_num": 0.00445556640625, + "loss_xval": 0.0223388671875, + "num_input_tokens_seen": 1112950660, + "step": 6487 + }, + { + "epoch": 73.63739376770538, + "grad_norm": 3.894889633043946, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 1113120940, + "step": 6488 + }, + { + "epoch": 73.63739376770538, + "loss": 0.03205784782767296, + "loss_ce": 1.4391994227480609e-05, + "loss_iou": 0.46875, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1113120940, + "step": 6488 + }, + { + "epoch": 73.64872521246458, + "grad_norm": 3.120787083301744, + "learning_rate": 5e-06, + "loss": 0.0361, + "num_input_tokens_seen": 1113292748, + "step": 6489 + }, + { + "epoch": 73.64872521246458, + "loss": 0.02735951915383339, + "loss_ce": 3.1029641831992194e-05, + "loss_iou": 0.447265625, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 1113292748, + "step": 6489 + }, + { + "epoch": 73.66005665722379, + "grad_norm": 3.221535969834847, + "learning_rate": 5e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1113464820, + "step": 6490 + }, + { + "epoch": 73.66005665722379, + "loss": 0.03254421055316925, + "loss_ce": 4.2991916416212916e-05, + "loss_iou": 0.38671875, + "loss_num": 0.006500244140625, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 1113464820, + "step": 6490 + }, + { + "epoch": 73.671388101983, + "grad_norm": 4.151994683365662, + "learning_rate": 5e-06, + "loss": 0.0578, + "num_input_tokens_seen": 1113636328, + "step": 6491 + }, + { + "epoch": 73.671388101983, + "loss": 0.03663531690835953, + "loss_ce": 2.9481716410373338e-05, + "loss_iou": 0.546875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1113636328, + "step": 6491 + }, + { + "epoch": 73.6827195467422, + "grad_norm": 3.2698866394695214, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 1113807864, + "step": 6492 + }, + { + "epoch": 73.6827195467422, + "loss": 0.02959362044930458, + "loss_ce": 2.2087617253419012e-05, + "loss_iou": 0.51171875, + "loss_num": 0.00592041015625, + "loss_xval": 0.029541015625, + "num_input_tokens_seen": 1113807864, + "step": 6492 + }, + { + "epoch": 73.69405099150141, + "grad_norm": 3.244195012469785, + "learning_rate": 5e-06, + "loss": 0.0346, + "num_input_tokens_seen": 1113978272, + "step": 6493 + }, + { + "epoch": 73.69405099150141, + "loss": 0.035467371344566345, + "loss_ce": 2.120135832228698e-05, + "loss_iou": 0.4140625, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 1113978272, + "step": 6493 + }, + { + "epoch": 73.70538243626062, + "grad_norm": 3.655037573224194, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 1114149392, + "step": 6494 + }, + { + "epoch": 73.70538243626062, + "loss": 0.044869210571050644, + "loss_ce": 8.370522664336022e-06, + "loss_iou": 0.482421875, + "loss_num": 0.00897216796875, + "loss_xval": 0.044921875, + "num_input_tokens_seen": 1114149392, + "step": 6494 + }, + { + "epoch": 73.71671388101983, + "grad_norm": 3.7782426891480454, + "learning_rate": 5e-06, + "loss": 0.0395, + "num_input_tokens_seen": 1114318948, + "step": 6495 + }, + { + "epoch": 73.71671388101983, + "loss": 0.03719808906316757, + "loss_ce": 4.293832171242684e-05, + "loss_iou": 0.5859375, + "loss_num": 0.0074462890625, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 1114318948, + "step": 6495 + }, + { + "epoch": 73.72804532577904, + "grad_norm": 3.499634486668161, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 1114489916, + "step": 6496 + }, + { + "epoch": 73.72804532577904, + "loss": 0.030175620689988136, + "loss_ce": 2.425476850476116e-05, + "loss_iou": 0.46484375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 1114489916, + "step": 6496 + }, + { + "epoch": 73.73937677053824, + "grad_norm": 2.8426321920918576, + "learning_rate": 5e-06, + "loss": 0.0335, + "num_input_tokens_seen": 1114661624, + "step": 6497 + }, + { + "epoch": 73.73937677053824, + "loss": 0.026565423235297203, + "loss_ce": 2.2759606508770958e-05, + "loss_iou": 0.294921875, + "loss_num": 0.00531005859375, + "loss_xval": 0.0264892578125, + "num_input_tokens_seen": 1114661624, + "step": 6497 + }, + { + "epoch": 73.75070821529745, + "grad_norm": 3.121434993905259, + "learning_rate": 5e-06, + "loss": 0.0572, + "num_input_tokens_seen": 1114833784, + "step": 6498 + }, + { + "epoch": 73.75070821529745, + "loss": 0.07860241830348969, + "loss_ce": 1.9653536583064124e-05, + "loss_iou": 0.380859375, + "loss_num": 0.0157470703125, + "loss_xval": 0.07861328125, + "num_input_tokens_seen": 1114833784, + "step": 6498 + }, + { + "epoch": 73.76203966005666, + "grad_norm": 3.0212671657824584, + "learning_rate": 5e-06, + "loss": 0.0498, + "num_input_tokens_seen": 1115005612, + "step": 6499 + }, + { + "epoch": 73.76203966005666, + "loss": 0.026860613375902176, + "loss_ce": 3.566223676898517e-05, + "loss_iou": 0.54296875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 1115005612, + "step": 6499 + }, + { + "epoch": 73.77337110481587, + "grad_norm": 3.305158194069835, + "learning_rate": 5e-06, + "loss": 0.0331, + "num_input_tokens_seen": 1115176432, + "step": 6500 + }, + { + "epoch": 73.77337110481587, + "eval_seeclick_CIoU": 0.5174305289983749, + "eval_seeclick_GIoU": 0.5203062295913696, + "eval_seeclick_IoU": 0.5562175512313843, + "eval_seeclick_MAE_all": 0.06904058903455734, + "eval_seeclick_MAE_h": 0.034702371805906296, + "eval_seeclick_MAE_w": 0.1056600883603096, + "eval_seeclick_MAE_x": 0.10088222846388817, + "eval_seeclick_MAE_y": 0.03491765260696411, + "eval_seeclick_NUM_probability": 0.9999951124191284, + "eval_seeclick_inside_bbox": 0.9076704680919647, + "eval_seeclick_loss": 1.0270713567733765, + "eval_seeclick_loss_ce": 0.7699527740478516, + "eval_seeclick_loss_iou": 0.560791015625, + "eval_seeclick_loss_num": 0.052215576171875, + "eval_seeclick_loss_xval": 0.2611083984375, + "eval_seeclick_runtime": 70.9732, + "eval_seeclick_samples_per_second": 0.606, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 1115176432, + "step": 6500 + }, + { + "epoch": 73.77337110481587, + "eval_icons_CIoU": 0.7356639802455902, + "eval_icons_GIoU": 0.7380691468715668, + "eval_icons_IoU": 0.7486778497695923, + "eval_icons_MAE_all": 0.03320657089352608, + "eval_icons_MAE_h": 0.02907582465559244, + "eval_icons_MAE_w": 0.039673191495239735, + "eval_icons_MAE_x": 0.03304527560248971, + "eval_icons_MAE_y": 0.031031984835863113, + "eval_icons_NUM_probability": 0.9998223185539246, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.13039979338645935, + "eval_icons_loss_ce": 0.004626711364835501, + "eval_icons_loss_iou": 0.56982421875, + "eval_icons_loss_num": 0.0230712890625, + "eval_icons_loss_xval": 0.115386962890625, + "eval_icons_runtime": 80.9779, + "eval_icons_samples_per_second": 0.617, + "eval_icons_steps_per_second": 0.025, + "num_input_tokens_seen": 1115176432, + "step": 6500 + }, + { + "epoch": 73.77337110481587, + "eval_screenspot_CIoU": 0.6431978742281595, + "eval_screenspot_GIoU": 0.6427284081776937, + "eval_screenspot_IoU": 0.6707419554392496, + "eval_screenspot_MAE_all": 0.06348163510362308, + "eval_screenspot_MAE_h": 0.03672616680463155, + "eval_screenspot_MAE_w": 0.1126464003076156, + "eval_screenspot_MAE_x": 0.07129109464585781, + "eval_screenspot_MAE_y": 0.033262877725064754, + "eval_screenspot_NUM_probability": 0.9999578396479288, + "eval_screenspot_inside_bbox": 0.9183333317438761, + "eval_screenspot_loss": 0.2698996067047119, + "eval_screenspot_loss_ce": 0.014380348846316338, + "eval_screenspot_loss_iou": 0.4943033854166667, + "eval_screenspot_loss_num": 0.050984700520833336, + "eval_screenspot_loss_xval": 0.2549641927083333, + "eval_screenspot_runtime": 152.8165, + "eval_screenspot_samples_per_second": 0.582, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 1115176432, + "step": 6500 + }, + { + "epoch": 73.77337110481587, + "eval_compot_CIoU": 0.8599472343921661, + "eval_compot_GIoU": 0.8595451414585114, + "eval_compot_IoU": 0.8748769760131836, + "eval_compot_MAE_all": 0.024935631081461906, + "eval_compot_MAE_h": 0.022948280908167362, + "eval_compot_MAE_w": 0.02818493079394102, + "eval_compot_MAE_x": 0.02519217785447836, + "eval_compot_MAE_y": 0.023417134769260883, + "eval_compot_NUM_probability": 0.9999797642230988, + "eval_compot_inside_bbox": 0.9409722089767456, + "eval_compot_loss": 0.08053144812583923, + "eval_compot_loss_ce": 1.2657139905059012e-05, + "eval_compot_loss_iou": 0.522216796875, + "eval_compot_loss_num": 0.013930320739746094, + "eval_compot_loss_xval": 0.069610595703125, + "eval_compot_runtime": 86.145, + "eval_compot_samples_per_second": 0.58, + "eval_compot_steps_per_second": 0.023, + "num_input_tokens_seen": 1115176432, + "step": 6500 + }, + { + "epoch": 73.77337110481587, + "eval_custom_ui_MAE_all": 0.020075481850653887, + "eval_custom_ui_MAE_x": 0.03277571313083172, + "eval_custom_ui_MAE_y": 0.007375251268967986, + "eval_custom_ui_NUM_probability": 0.999919593334198, + "eval_custom_ui_loss": 0.22876690328121185, + "eval_custom_ui_loss_ce": 0.1282486617565155, + "eval_custom_ui_loss_num": 0.019617080688476562, + "eval_custom_ui_loss_xval": 0.0980987548828125, + "eval_custom_ui_runtime": 64.9955, + "eval_custom_ui_samples_per_second": 0.769, + "eval_custom_ui_steps_per_second": 0.031, + "num_input_tokens_seen": 1115176432, + "step": 6500 + }, + { + "epoch": 73.77337110481587, + "loss": 0.27542316913604736, + "loss_ce": 0.15579429268836975, + "loss_iou": 0.0, + "loss_num": 0.02392578125, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 1115176432, + "step": 6500 + }, + { + "epoch": 73.78470254957507, + "grad_norm": 3.074138302570329, + "learning_rate": 5e-06, + "loss": 0.036, + "num_input_tokens_seen": 1115346552, + "step": 6501 + }, + { + "epoch": 73.78470254957507, + "loss": 0.03765908628702164, + "loss_ce": 1.5651125067961402e-05, + "loss_iou": 0.5078125, + "loss_num": 0.007537841796875, + "loss_xval": 0.03759765625, + "num_input_tokens_seen": 1115346552, + "step": 6501 + }, + { + "epoch": 73.79603399433428, + "grad_norm": 4.332527683362927, + "learning_rate": 5e-06, + "loss": 0.0689, + "num_input_tokens_seen": 1115517988, + "step": 6502 + }, + { + "epoch": 73.79603399433428, + "loss": 0.1721544861793518, + "loss_ce": 2.7715950636775233e-05, + "loss_iou": 0.37890625, + "loss_num": 0.034423828125, + "loss_xval": 0.171875, + "num_input_tokens_seen": 1115517988, + "step": 6502 + }, + { + "epoch": 73.80736543909349, + "grad_norm": 4.103666190926778, + "learning_rate": 5e-06, + "loss": 0.0348, + "num_input_tokens_seen": 1115689260, + "step": 6503 + }, + { + "epoch": 73.80736543909349, + "loss": 0.03792804479598999, + "loss_ce": 9.954062988981605e-06, + "loss_iou": 0.447265625, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 1115689260, + "step": 6503 + }, + { + "epoch": 73.8186968838527, + "grad_norm": 3.183386145933696, + "learning_rate": 5e-06, + "loss": 0.0423, + "num_input_tokens_seen": 1115860240, + "step": 6504 + }, + { + "epoch": 73.8186968838527, + "loss": 0.06231614574790001, + "loss_ce": 1.4506962543237023e-05, + "loss_iou": 0.375, + "loss_num": 0.012451171875, + "loss_xval": 0.062255859375, + "num_input_tokens_seen": 1115860240, + "step": 6504 + }, + { + "epoch": 73.8300283286119, + "grad_norm": 2.6778214579147934, + "learning_rate": 5e-06, + "loss": 0.0411, + "num_input_tokens_seen": 1116029880, + "step": 6505 + }, + { + "epoch": 73.8300283286119, + "loss": 0.03555383160710335, + "loss_ce": 6.951671093702316e-05, + "loss_iou": 0.431640625, + "loss_num": 0.007110595703125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 1116029880, + "step": 6505 + }, + { + "epoch": 73.84135977337111, + "grad_norm": 3.2073385362650293, + "learning_rate": 5e-06, + "loss": 0.0481, + "num_input_tokens_seen": 1116201856, + "step": 6506 + }, + { + "epoch": 73.84135977337111, + "loss": 0.032043490558862686, + "loss_ce": 3.0551869713235646e-05, + "loss_iou": 0.3984375, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1116201856, + "step": 6506 + }, + { + "epoch": 73.85269121813032, + "grad_norm": 3.370755909312059, + "learning_rate": 5e-06, + "loss": 0.0476, + "num_input_tokens_seen": 1116373880, + "step": 6507 + }, + { + "epoch": 73.85269121813032, + "loss": 0.035537585616111755, + "loss_ce": 1.5125366189749911e-05, + "loss_iou": 0.35546875, + "loss_num": 0.007110595703125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 1116373880, + "step": 6507 + }, + { + "epoch": 73.86402266288952, + "grad_norm": 3.708180200731744, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 1116545764, + "step": 6508 + }, + { + "epoch": 73.86402266288952, + "loss": 0.035140153020620346, + "loss_ce": 1.4421272680920083e-05, + "loss_iou": 0.4296875, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1116545764, + "step": 6508 + }, + { + "epoch": 73.87535410764872, + "grad_norm": 3.2301353830037516, + "learning_rate": 5e-06, + "loss": 0.0427, + "num_input_tokens_seen": 1116717468, + "step": 6509 + }, + { + "epoch": 73.87535410764872, + "loss": 0.043307363986968994, + "loss_ce": 1.8178719983552583e-05, + "loss_iou": 0.625, + "loss_num": 0.0086669921875, + "loss_xval": 0.043212890625, + "num_input_tokens_seen": 1116717468, + "step": 6509 + }, + { + "epoch": 73.88668555240793, + "grad_norm": 4.004722300627511, + "learning_rate": 5e-06, + "loss": 0.0417, + "num_input_tokens_seen": 1116888892, + "step": 6510 + }, + { + "epoch": 73.88668555240793, + "loss": 0.06704437732696533, + "loss_ce": 2.777582085400354e-05, + "loss_iou": 0.40625, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 1116888892, + "step": 6510 + }, + { + "epoch": 73.89801699716713, + "grad_norm": 4.163893794625848, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1117060236, + "step": 6511 + }, + { + "epoch": 73.89801699716713, + "loss": 0.043467551469802856, + "loss_ce": 2.5777859264053404e-05, + "loss_iou": 0.515625, + "loss_num": 0.0086669921875, + "loss_xval": 0.04345703125, + "num_input_tokens_seen": 1117060236, + "step": 6511 + }, + { + "epoch": 73.90934844192634, + "grad_norm": 3.7593655545216818, + "learning_rate": 5e-06, + "loss": 0.0415, + "num_input_tokens_seen": 1117231864, + "step": 6512 + }, + { + "epoch": 73.90934844192634, + "loss": 0.03475530445575714, + "loss_ce": 7.207525050034747e-05, + "loss_iou": 0.55078125, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1117231864, + "step": 6512 + }, + { + "epoch": 73.92067988668555, + "grad_norm": 3.0722646414303862, + "learning_rate": 5e-06, + "loss": 0.0312, + "num_input_tokens_seen": 1117402148, + "step": 6513 + }, + { + "epoch": 73.92067988668555, + "loss": 0.02711726725101471, + "loss_ce": 1.765920387697406e-05, + "loss_iou": 0.453125, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1117402148, + "step": 6513 + }, + { + "epoch": 73.93201133144476, + "grad_norm": 3.0359591532116337, + "learning_rate": 5e-06, + "loss": 0.0646, + "num_input_tokens_seen": 1117573852, + "step": 6514 + }, + { + "epoch": 73.93201133144476, + "loss": 0.059854790568351746, + "loss_ce": 2.5079119950532913e-05, + "loss_iou": 0.412109375, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 1117573852, + "step": 6514 + }, + { + "epoch": 73.94334277620396, + "grad_norm": 2.897343816417393, + "learning_rate": 5e-06, + "loss": 0.0327, + "num_input_tokens_seen": 1117745864, + "step": 6515 + }, + { + "epoch": 73.94334277620396, + "loss": 0.03228772059082985, + "loss_ce": 3.826897227554582e-05, + "loss_iou": 0.40625, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1117745864, + "step": 6515 + }, + { + "epoch": 73.95467422096317, + "grad_norm": 4.028246615971039, + "learning_rate": 5e-06, + "loss": 0.0573, + "num_input_tokens_seen": 1117917648, + "step": 6516 + }, + { + "epoch": 73.95467422096317, + "loss": 0.06714378297328949, + "loss_ce": 2.0364366719149984e-05, + "loss_iou": 0.310546875, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 1117917648, + "step": 6516 + }, + { + "epoch": 73.96600566572238, + "grad_norm": 3.9567889367108693, + "learning_rate": 5e-06, + "loss": 0.0535, + "num_input_tokens_seen": 1118089424, + "step": 6517 + }, + { + "epoch": 73.96600566572238, + "loss": 0.03859155997633934, + "loss_ce": 1.7340553313260898e-05, + "loss_iou": 0.14453125, + "loss_num": 0.007720947265625, + "loss_xval": 0.03857421875, + "num_input_tokens_seen": 1118089424, + "step": 6517 + }, + { + "epoch": 73.97733711048159, + "grad_norm": 2.9648063966099025, + "learning_rate": 5e-06, + "loss": 0.0549, + "num_input_tokens_seen": 1118261220, + "step": 6518 + }, + { + "epoch": 73.97733711048159, + "loss": 0.055747825652360916, + "loss_ce": 3.798906254814938e-05, + "loss_iou": 0.458984375, + "loss_num": 0.0111083984375, + "loss_xval": 0.0556640625, + "num_input_tokens_seen": 1118261220, + "step": 6518 + }, + { + "epoch": 73.98866855524079, + "grad_norm": 2.9961977730138494, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1118433176, + "step": 6519 + }, + { + "epoch": 73.98866855524079, + "loss": 0.04088219255208969, + "loss_ce": 1.9152939785271883e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1118433176, + "step": 6519 + }, + { + "epoch": 74.0, + "grad_norm": 3.3755395370940406, + "learning_rate": 5e-06, + "loss": 0.0597, + "num_input_tokens_seen": 1118602448, + "step": 6520 + }, + { + "epoch": 74.0, + "loss": 0.032127220183610916, + "loss_ce": 7.469081538147293e-06, + "loss_iou": 0.421875, + "loss_num": 0.00640869140625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1118602448, + "step": 6520 + }, + { + "epoch": 74.01133144475921, + "grad_norm": 3.092078476524234, + "learning_rate": 5e-06, + "loss": 0.0293, + "num_input_tokens_seen": 1118773860, + "step": 6521 + }, + { + "epoch": 74.01133144475921, + "loss": 0.036174528300762177, + "loss_ce": 1.120026263379259e-05, + "loss_iou": 0.0, + "loss_num": 0.007232666015625, + "loss_xval": 0.0361328125, + "num_input_tokens_seen": 1118773860, + "step": 6521 + }, + { + "epoch": 74.02266288951841, + "grad_norm": 2.971499851196099, + "learning_rate": 5e-06, + "loss": 0.053, + "num_input_tokens_seen": 1118945720, + "step": 6522 + }, + { + "epoch": 74.02266288951841, + "loss": 0.07449185103178024, + "loss_ce": 1.3699782357434742e-05, + "loss_iou": 0.4453125, + "loss_num": 0.014892578125, + "loss_xval": 0.07470703125, + "num_input_tokens_seen": 1118945720, + "step": 6522 + }, + { + "epoch": 74.03399433427762, + "grad_norm": 5.598040763860579, + "learning_rate": 5e-06, + "loss": 0.0544, + "num_input_tokens_seen": 1119117352, + "step": 6523 + }, + { + "epoch": 74.03399433427762, + "loss": 0.025059839710593224, + "loss_ce": 2.0166649846942164e-05, + "loss_iou": 0.47265625, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 1119117352, + "step": 6523 + }, + { + "epoch": 74.04532577903683, + "grad_norm": 20.947211042536978, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 1119287792, + "step": 6524 + }, + { + "epoch": 74.04532577903683, + "loss": 0.03474951535463333, + "loss_ce": 3.5771379771176726e-05, + "loss_iou": 0.41015625, + "loss_num": 0.0069580078125, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1119287792, + "step": 6524 + }, + { + "epoch": 74.05665722379604, + "grad_norm": 3.3105554612824744, + "learning_rate": 5e-06, + "loss": 0.0388, + "num_input_tokens_seen": 1119459256, + "step": 6525 + }, + { + "epoch": 74.05665722379604, + "loss": 0.06944504380226135, + "loss_ce": 1.755080302245915e-05, + "loss_iou": 0.4375, + "loss_num": 0.013916015625, + "loss_xval": 0.0693359375, + "num_input_tokens_seen": 1119459256, + "step": 6525 + }, + { + "epoch": 74.06798866855524, + "grad_norm": 2.67025463899964, + "learning_rate": 5e-06, + "loss": 0.0438, + "num_input_tokens_seen": 1119630180, + "step": 6526 + }, + { + "epoch": 74.06798866855524, + "loss": 0.040065426379442215, + "loss_ce": 1.1105581506853923e-05, + "loss_iou": 0.453125, + "loss_num": 0.00799560546875, + "loss_xval": 0.0400390625, + "num_input_tokens_seen": 1119630180, + "step": 6526 + }, + { + "epoch": 74.07932011331445, + "grad_norm": 2.9205934349374885, + "learning_rate": 5e-06, + "loss": 0.0377, + "num_input_tokens_seen": 1119802516, + "step": 6527 + }, + { + "epoch": 74.07932011331445, + "loss": 0.04559585079550743, + "loss_ce": 1.0219937394140288e-05, + "loss_iou": 0.4296875, + "loss_num": 0.00909423828125, + "loss_xval": 0.045654296875, + "num_input_tokens_seen": 1119802516, + "step": 6527 + }, + { + "epoch": 74.09065155807366, + "grad_norm": 2.9843199474765485, + "learning_rate": 5e-06, + "loss": 0.0376, + "num_input_tokens_seen": 1119974392, + "step": 6528 + }, + { + "epoch": 74.09065155807366, + "loss": 0.026788223534822464, + "loss_ce": 9.048206266015768e-06, + "loss_iou": 0.5703125, + "loss_num": 0.00537109375, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 1119974392, + "step": 6528 + }, + { + "epoch": 74.10198300283287, + "grad_norm": 3.04783184834557, + "learning_rate": 5e-06, + "loss": 0.042, + "num_input_tokens_seen": 1120146056, + "step": 6529 + }, + { + "epoch": 74.10198300283287, + "loss": 0.030398353934288025, + "loss_ce": 1.8105823983205482e-05, + "loss_iou": 0.439453125, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 1120146056, + "step": 6529 + }, + { + "epoch": 74.11331444759207, + "grad_norm": 3.2375599818467187, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1120317720, + "step": 6530 + }, + { + "epoch": 74.11331444759207, + "loss": 0.06411470472812653, + "loss_ce": 1.2532809705589898e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0128173828125, + "loss_xval": 0.06396484375, + "num_input_tokens_seen": 1120317720, + "step": 6530 + }, + { + "epoch": 74.12464589235128, + "grad_norm": 3.345333962402265, + "learning_rate": 5e-06, + "loss": 0.0531, + "num_input_tokens_seen": 1120489804, + "step": 6531 + }, + { + "epoch": 74.12464589235128, + "loss": 0.026301445439457893, + "loss_ce": 1.0550304978096392e-05, + "loss_iou": 0.271484375, + "loss_num": 0.0052490234375, + "loss_xval": 0.0262451171875, + "num_input_tokens_seen": 1120489804, + "step": 6531 + }, + { + "epoch": 74.13597733711048, + "grad_norm": 3.3609352826898613, + "learning_rate": 5e-06, + "loss": 0.0536, + "num_input_tokens_seen": 1120661268, + "step": 6532 + }, + { + "epoch": 74.13597733711048, + "loss": 0.10038816183805466, + "loss_ce": 1.5849176634219475e-05, + "loss_iou": 0.345703125, + "loss_num": 0.02001953125, + "loss_xval": 0.1005859375, + "num_input_tokens_seen": 1120661268, + "step": 6532 + }, + { + "epoch": 74.14730878186968, + "grad_norm": 3.426619580685186, + "learning_rate": 5e-06, + "loss": 0.0385, + "num_input_tokens_seen": 1120833440, + "step": 6533 + }, + { + "epoch": 74.14730878186968, + "loss": 0.027686811983585358, + "loss_ce": 2.262873385916464e-05, + "loss_iou": 0.326171875, + "loss_num": 0.005523681640625, + "loss_xval": 0.0277099609375, + "num_input_tokens_seen": 1120833440, + "step": 6533 + }, + { + "epoch": 74.15864022662889, + "grad_norm": 3.5166757680499914, + "learning_rate": 5e-06, + "loss": 0.0404, + "num_input_tokens_seen": 1121005280, + "step": 6534 + }, + { + "epoch": 74.15864022662889, + "loss": 0.03233487904071808, + "loss_ce": 1.6764171959948726e-05, + "loss_iou": 0.44140625, + "loss_num": 0.0064697265625, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1121005280, + "step": 6534 + }, + { + "epoch": 74.1699716713881, + "grad_norm": 4.6375297336868355, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 1121176436, + "step": 6535 + }, + { + "epoch": 74.1699716713881, + "loss": 0.06014683470129967, + "loss_ce": 2.7205438527744263e-05, + "loss_iou": 0.486328125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 1121176436, + "step": 6535 + }, + { + "epoch": 74.1813031161473, + "grad_norm": 3.0243636504944513, + "learning_rate": 5e-06, + "loss": 0.0552, + "num_input_tokens_seen": 1121348124, + "step": 6536 + }, + { + "epoch": 74.1813031161473, + "loss": 0.026662033051252365, + "loss_ce": 1.255888128071092e-05, + "loss_iou": 0.396484375, + "loss_num": 0.005340576171875, + "loss_xval": 0.026611328125, + "num_input_tokens_seen": 1121348124, + "step": 6536 + }, + { + "epoch": 74.19263456090651, + "grad_norm": 2.600093201754161, + "learning_rate": 5e-06, + "loss": 0.0608, + "num_input_tokens_seen": 1121519948, + "step": 6537 + }, + { + "epoch": 74.19263456090651, + "loss": 0.033793188631534576, + "loss_ce": 2.549035161791835e-05, + "loss_iou": 0.51953125, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1121519948, + "step": 6537 + }, + { + "epoch": 74.20396600566572, + "grad_norm": 2.6841947473673113, + "learning_rate": 5e-06, + "loss": 0.0517, + "num_input_tokens_seen": 1121691796, + "step": 6538 + }, + { + "epoch": 74.20396600566572, + "loss": 0.022242574021220207, + "loss_ce": 1.0517702321521938e-05, + "loss_iou": 0.34375, + "loss_num": 0.00445556640625, + "loss_xval": 0.022216796875, + "num_input_tokens_seen": 1121691796, + "step": 6538 + }, + { + "epoch": 74.21529745042493, + "grad_norm": 2.9045826869659304, + "learning_rate": 5e-06, + "loss": 0.059, + "num_input_tokens_seen": 1121863312, + "step": 6539 + }, + { + "epoch": 74.21529745042493, + "loss": 0.053002290427684784, + "loss_ce": 2.377333294134587e-05, + "loss_iou": 0.287109375, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 1121863312, + "step": 6539 + }, + { + "epoch": 74.22662889518413, + "grad_norm": 2.644822881678392, + "learning_rate": 5e-06, + "loss": 0.0344, + "num_input_tokens_seen": 1122035228, + "step": 6540 + }, + { + "epoch": 74.22662889518413, + "loss": 0.06761403381824493, + "loss_ce": 1.7599053535377607e-05, + "loss_iou": 0.365234375, + "loss_num": 0.0135498046875, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 1122035228, + "step": 6540 + }, + { + "epoch": 74.23796033994334, + "grad_norm": 2.866182283587977, + "learning_rate": 5e-06, + "loss": 0.0301, + "num_input_tokens_seen": 1122206848, + "step": 6541 + }, + { + "epoch": 74.23796033994334, + "loss": 0.034088488668203354, + "loss_ce": 1.56136757141212e-05, + "loss_iou": 0.345703125, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1122206848, + "step": 6541 + }, + { + "epoch": 74.24929178470255, + "grad_norm": 3.092252378469409, + "learning_rate": 5e-06, + "loss": 0.0304, + "num_input_tokens_seen": 1122378008, + "step": 6542 + }, + { + "epoch": 74.24929178470255, + "loss": 0.0221841000020504, + "loss_ce": 2.070872506010346e-05, + "loss_iou": 0.412109375, + "loss_num": 0.004425048828125, + "loss_xval": 0.022216796875, + "num_input_tokens_seen": 1122378008, + "step": 6542 + }, + { + "epoch": 74.26062322946176, + "grad_norm": 3.3128737241389437, + "learning_rate": 5e-06, + "loss": 0.034, + "num_input_tokens_seen": 1122548344, + "step": 6543 + }, + { + "epoch": 74.26062322946176, + "loss": 0.036753054708242416, + "loss_ce": 9.889366992865689e-06, + "loss_iou": 0.38671875, + "loss_num": 0.007354736328125, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1122548344, + "step": 6543 + }, + { + "epoch": 74.27195467422096, + "grad_norm": 3.0350620237385955, + "learning_rate": 5e-06, + "loss": 0.0372, + "num_input_tokens_seen": 1122720252, + "step": 6544 + }, + { + "epoch": 74.27195467422096, + "loss": 0.03843335062265396, + "loss_ce": 1.9351271475898102e-05, + "loss_iou": 0.48828125, + "loss_num": 0.0076904296875, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1122720252, + "step": 6544 + }, + { + "epoch": 74.28328611898017, + "grad_norm": 2.880081483896375, + "learning_rate": 5e-06, + "loss": 0.048, + "num_input_tokens_seen": 1122890520, + "step": 6545 + }, + { + "epoch": 74.28328611898017, + "loss": 0.09767642617225647, + "loss_ce": 2.0175570170977153e-05, + "loss_iou": 0.267578125, + "loss_num": 0.01953125, + "loss_xval": 0.09765625, + "num_input_tokens_seen": 1122890520, + "step": 6545 + }, + { + "epoch": 74.29461756373938, + "grad_norm": 5.350790177112331, + "learning_rate": 5e-06, + "loss": 0.047, + "num_input_tokens_seen": 1123061460, + "step": 6546 + }, + { + "epoch": 74.29461756373938, + "loss": 0.024730706587433815, + "loss_ce": 1.1468640877865255e-05, + "loss_iou": 0.265625, + "loss_num": 0.00494384765625, + "loss_xval": 0.024658203125, + "num_input_tokens_seen": 1123061460, + "step": 6546 + }, + { + "epoch": 74.30594900849859, + "grad_norm": 2.9657035232887274, + "learning_rate": 5e-06, + "loss": 0.0493, + "num_input_tokens_seen": 1123233092, + "step": 6547 + }, + { + "epoch": 74.30594900849859, + "loss": 0.039753399789333344, + "loss_ce": 1.951216472662054e-05, + "loss_iou": 0.44921875, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 1123233092, + "step": 6547 + }, + { + "epoch": 74.3172804532578, + "grad_norm": 3.127766017975965, + "learning_rate": 5e-06, + "loss": 0.0495, + "num_input_tokens_seen": 1123401852, + "step": 6548 + }, + { + "epoch": 74.3172804532578, + "loss": 0.03144387528300285, + "loss_ce": 1.076921216736082e-05, + "loss_iou": 0.46875, + "loss_num": 0.00628662109375, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 1123401852, + "step": 6548 + }, + { + "epoch": 74.328611898017, + "grad_norm": 3.0473684714921467, + "learning_rate": 5e-06, + "loss": 0.0418, + "num_input_tokens_seen": 1123573480, + "step": 6549 + }, + { + "epoch": 74.328611898017, + "loss": 0.02534647099673748, + "loss_ce": 1.6881414921954274e-05, + "loss_iou": 0.392578125, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 1123573480, + "step": 6549 + }, + { + "epoch": 74.33994334277621, + "grad_norm": 2.8736710243541985, + "learning_rate": 5e-06, + "loss": 0.0613, + "num_input_tokens_seen": 1123745096, + "step": 6550 + }, + { + "epoch": 74.33994334277621, + "loss": 0.06466744095087051, + "loss_ce": 1.5952380636008456e-05, + "loss_iou": 0.173828125, + "loss_num": 0.012939453125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 1123745096, + "step": 6550 + }, + { + "epoch": 74.35127478753542, + "grad_norm": 2.1977943765302212, + "learning_rate": 5e-06, + "loss": 0.027, + "num_input_tokens_seen": 1123916736, + "step": 6551 + }, + { + "epoch": 74.35127478753542, + "loss": 0.01735638827085495, + "loss_ce": 3.0034945666557178e-05, + "loss_iou": 0.353515625, + "loss_num": 0.0034637451171875, + "loss_xval": 0.017333984375, + "num_input_tokens_seen": 1123916736, + "step": 6551 + }, + { + "epoch": 74.36260623229462, + "grad_norm": 2.1753931528727954, + "learning_rate": 5e-06, + "loss": 0.0376, + "num_input_tokens_seen": 1124086696, + "step": 6552 + }, + { + "epoch": 74.36260623229462, + "loss": 0.03582029789686203, + "loss_ce": 7.922475560917519e-06, + "loss_iou": 0.3671875, + "loss_num": 0.007171630859375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1124086696, + "step": 6552 + }, + { + "epoch": 74.37393767705383, + "grad_norm": 6.772045594611746, + "learning_rate": 5e-06, + "loss": 0.04, + "num_input_tokens_seen": 1124258948, + "step": 6553 + }, + { + "epoch": 74.37393767705383, + "loss": 0.030590107664465904, + "loss_ce": 1.912384141178336e-05, + "loss_iou": 0.1279296875, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1124258948, + "step": 6553 + }, + { + "epoch": 74.38526912181302, + "grad_norm": 3.1339157702441343, + "learning_rate": 5e-06, + "loss": 0.042, + "num_input_tokens_seen": 1124430820, + "step": 6554 + }, + { + "epoch": 74.38526912181302, + "loss": 0.07429294288158417, + "loss_ce": 1.3157720786693972e-05, + "loss_iou": 0.32421875, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 1124430820, + "step": 6554 + }, + { + "epoch": 74.39660056657223, + "grad_norm": 3.1952496038681786, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1124602920, + "step": 6555 + }, + { + "epoch": 74.39660056657223, + "loss": 0.03208663687109947, + "loss_ce": 1.2664571841014549e-05, + "loss_iou": 0.62109375, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1124602920, + "step": 6555 + }, + { + "epoch": 74.40793201133144, + "grad_norm": 4.043248135548104, + "learning_rate": 5e-06, + "loss": 0.053, + "num_input_tokens_seen": 1124773948, + "step": 6556 + }, + { + "epoch": 74.40793201133144, + "loss": 0.03340116888284683, + "loss_ce": 7.309532975341426e-06, + "loss_iou": 0.271484375, + "loss_num": 0.006683349609375, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1124773948, + "step": 6556 + }, + { + "epoch": 74.41926345609065, + "grad_norm": 2.9120655542665035, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 1124946244, + "step": 6557 + }, + { + "epoch": 74.41926345609065, + "loss": 0.03089434839785099, + "loss_ce": 1.0557387213339098e-05, + "loss_iou": 0.345703125, + "loss_num": 0.00616455078125, + "loss_xval": 0.0308837890625, + "num_input_tokens_seen": 1124946244, + "step": 6557 + }, + { + "epoch": 74.43059490084985, + "grad_norm": 3.376758922847797, + "learning_rate": 5e-06, + "loss": 0.0261, + "num_input_tokens_seen": 1125118024, + "step": 6558 + }, + { + "epoch": 74.43059490084985, + "loss": 0.027084695175290108, + "loss_ce": 1.5603432984789833e-05, + "loss_iou": 0.37890625, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1125118024, + "step": 6558 + }, + { + "epoch": 74.44192634560906, + "grad_norm": 3.3811898333494916, + "learning_rate": 5e-06, + "loss": 0.0378, + "num_input_tokens_seen": 1125287312, + "step": 6559 + }, + { + "epoch": 74.44192634560906, + "loss": 0.04278423264622688, + "loss_ce": 1.3848143680661451e-05, + "loss_iou": 0.357421875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 1125287312, + "step": 6559 + }, + { + "epoch": 74.45325779036827, + "grad_norm": 3.295039162291036, + "learning_rate": 5e-06, + "loss": 0.0403, + "num_input_tokens_seen": 1125456656, + "step": 6560 + }, + { + "epoch": 74.45325779036827, + "loss": 0.048660967499017715, + "loss_ce": 1.5948513464536518e-05, + "loss_iou": 0.419921875, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 1125456656, + "step": 6560 + }, + { + "epoch": 74.46458923512748, + "grad_norm": 4.141489323193328, + "learning_rate": 5e-06, + "loss": 0.0419, + "num_input_tokens_seen": 1125627848, + "step": 6561 + }, + { + "epoch": 74.46458923512748, + "loss": 0.028617870062589645, + "loss_ce": 7.641958291060291e-06, + "loss_iou": 0.515625, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 1125627848, + "step": 6561 + }, + { + "epoch": 74.47592067988668, + "grad_norm": 2.987924891331927, + "learning_rate": 5e-06, + "loss": 0.0342, + "num_input_tokens_seen": 1125797816, + "step": 6562 + }, + { + "epoch": 74.47592067988668, + "loss": 0.028473205864429474, + "loss_ce": 1.5564743080176413e-05, + "loss_iou": 0.45703125, + "loss_num": 0.00567626953125, + "loss_xval": 0.0284423828125, + "num_input_tokens_seen": 1125797816, + "step": 6562 + }, + { + "epoch": 74.48725212464589, + "grad_norm": 2.881362799091236, + "learning_rate": 5e-06, + "loss": 0.03, + "num_input_tokens_seen": 1125968112, + "step": 6563 + }, + { + "epoch": 74.48725212464589, + "loss": 0.04204528033733368, + "loss_ce": 2.2578688003704883e-05, + "loss_iou": 0.328125, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1125968112, + "step": 6563 + }, + { + "epoch": 74.4985835694051, + "grad_norm": 2.5457582393322116, + "learning_rate": 5e-06, + "loss": 0.0346, + "num_input_tokens_seen": 1126139936, + "step": 6564 + }, + { + "epoch": 74.4985835694051, + "loss": 0.03218161314725876, + "loss_ce": 8.456287105218507e-06, + "loss_iou": 0.318359375, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1126139936, + "step": 6564 + }, + { + "epoch": 74.5099150141643, + "grad_norm": 3.242619201626592, + "learning_rate": 5e-06, + "loss": 0.0317, + "num_input_tokens_seen": 1126311664, + "step": 6565 + }, + { + "epoch": 74.5099150141643, + "loss": 0.03974199295043945, + "loss_ce": 2.336677789571695e-05, + "loss_iou": 0.259765625, + "loss_num": 0.0079345703125, + "loss_xval": 0.039794921875, + "num_input_tokens_seen": 1126311664, + "step": 6565 + }, + { + "epoch": 74.52124645892351, + "grad_norm": 3.8942997766225345, + "learning_rate": 5e-06, + "loss": 0.0708, + "num_input_tokens_seen": 1126483416, + "step": 6566 + }, + { + "epoch": 74.52124645892351, + "loss": 0.10694268345832825, + "loss_ce": 9.09206755750347e-06, + "loss_iou": 0.435546875, + "loss_num": 0.0213623046875, + "loss_xval": 0.10693359375, + "num_input_tokens_seen": 1126483416, + "step": 6566 + }, + { + "epoch": 74.53257790368272, + "grad_norm": 2.8869488214410244, + "learning_rate": 5e-06, + "loss": 0.026, + "num_input_tokens_seen": 1126654220, + "step": 6567 + }, + { + "epoch": 74.53257790368272, + "loss": 0.021920349448919296, + "loss_ce": 8.728484317543916e-06, + "loss_iou": 0.37890625, + "loss_num": 0.00439453125, + "loss_xval": 0.02197265625, + "num_input_tokens_seen": 1126654220, + "step": 6567 + }, + { + "epoch": 74.54390934844193, + "grad_norm": 2.5256706389271173, + "learning_rate": 5e-06, + "loss": 0.06, + "num_input_tokens_seen": 1126825992, + "step": 6568 + }, + { + "epoch": 74.54390934844193, + "loss": 0.10881047695875168, + "loss_ce": 4.583359259413555e-05, + "loss_iou": 0.12158203125, + "loss_num": 0.021728515625, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 1126825992, + "step": 6568 + }, + { + "epoch": 74.55524079320114, + "grad_norm": 2.2031247186967446, + "learning_rate": 5e-06, + "loss": 0.0268, + "num_input_tokens_seen": 1126997916, + "step": 6569 + }, + { + "epoch": 74.55524079320114, + "loss": 0.03448734059929848, + "loss_ce": 5.5885753681650385e-05, + "loss_iou": 0.359375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 1126997916, + "step": 6569 + }, + { + "epoch": 74.56657223796034, + "grad_norm": 2.623623901419336, + "learning_rate": 5e-06, + "loss": 0.0574, + "num_input_tokens_seen": 1127169720, + "step": 6570 + }, + { + "epoch": 74.56657223796034, + "loss": 0.09014096111059189, + "loss_ce": 2.2551414076588117e-05, + "loss_iou": 0.3359375, + "loss_num": 0.01806640625, + "loss_xval": 0.09033203125, + "num_input_tokens_seen": 1127169720, + "step": 6570 + }, + { + "epoch": 74.57790368271955, + "grad_norm": 7.897089597829697, + "learning_rate": 5e-06, + "loss": 0.0689, + "num_input_tokens_seen": 1127341224, + "step": 6571 + }, + { + "epoch": 74.57790368271955, + "loss": 0.03404213488101959, + "loss_ce": 1.5037266166473273e-05, + "loss_iou": 0.279296875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1127341224, + "step": 6571 + }, + { + "epoch": 74.58923512747876, + "grad_norm": 3.261585871586935, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1127512132, + "step": 6572 + }, + { + "epoch": 74.58923512747876, + "loss": 0.03402257338166237, + "loss_ce": 1.0732976079452783e-05, + "loss_iou": 0.38671875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1127512132, + "step": 6572 + }, + { + "epoch": 74.60056657223797, + "grad_norm": 2.919091564399831, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 1127683784, + "step": 6573 + }, + { + "epoch": 74.60056657223797, + "loss": 0.028045453131198883, + "loss_ce": 3.031630149052944e-05, + "loss_iou": 0.50390625, + "loss_num": 0.005615234375, + "loss_xval": 0.028076171875, + "num_input_tokens_seen": 1127683784, + "step": 6573 + }, + { + "epoch": 74.61189801699717, + "grad_norm": 3.067986514972103, + "learning_rate": 5e-06, + "loss": 0.0524, + "num_input_tokens_seen": 1127854752, + "step": 6574 + }, + { + "epoch": 74.61189801699717, + "loss": 0.08889612555503845, + "loss_ce": 2.8932770874234848e-05, + "loss_iou": 0.25390625, + "loss_num": 0.017822265625, + "loss_xval": 0.0888671875, + "num_input_tokens_seen": 1127854752, + "step": 6574 + }, + { + "epoch": 74.62322946175638, + "grad_norm": 3.4098896002097594, + "learning_rate": 5e-06, + "loss": 0.0631, + "num_input_tokens_seen": 1128026192, + "step": 6575 + }, + { + "epoch": 74.62322946175638, + "loss": 0.11072533577680588, + "loss_ce": 7.562517566839233e-06, + "loss_iou": 0.4375, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 1128026192, + "step": 6575 + }, + { + "epoch": 74.63456090651557, + "grad_norm": 3.129265554680959, + "learning_rate": 5e-06, + "loss": 0.048, + "num_input_tokens_seen": 1128198076, + "step": 6576 + }, + { + "epoch": 74.63456090651557, + "loss": 0.07285108417272568, + "loss_ce": 5.140165376360528e-05, + "loss_iou": 0.3828125, + "loss_num": 0.0145263671875, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 1128198076, + "step": 6576 + }, + { + "epoch": 74.64589235127478, + "grad_norm": 6.5740103890432335, + "learning_rate": 5e-06, + "loss": 0.0531, + "num_input_tokens_seen": 1128368572, + "step": 6577 + }, + { + "epoch": 74.64589235127478, + "loss": 0.025021247565746307, + "loss_ce": 2.7350888558430597e-05, + "loss_iou": 0.46875, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 1128368572, + "step": 6577 + }, + { + "epoch": 74.65722379603399, + "grad_norm": 3.0016955243098815, + "learning_rate": 5e-06, + "loss": 0.0496, + "num_input_tokens_seen": 1128540320, + "step": 6578 + }, + { + "epoch": 74.65722379603399, + "loss": 0.06106710806488991, + "loss_ce": 6.246630800887942e-05, + "loss_iou": 0.359375, + "loss_num": 0.01220703125, + "loss_xval": 0.06103515625, + "num_input_tokens_seen": 1128540320, + "step": 6578 + }, + { + "epoch": 74.6685552407932, + "grad_norm": 2.9110242886428654, + "learning_rate": 5e-06, + "loss": 0.0408, + "num_input_tokens_seen": 1128712092, + "step": 6579 + }, + { + "epoch": 74.6685552407932, + "loss": 0.03408673405647278, + "loss_ce": 1.3858853890269529e-05, + "loss_iou": 0.34765625, + "loss_num": 0.006805419921875, + "loss_xval": 0.0341796875, + "num_input_tokens_seen": 1128712092, + "step": 6579 + }, + { + "epoch": 74.6798866855524, + "grad_norm": 3.5661725253959573, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 1128882144, + "step": 6580 + }, + { + "epoch": 74.6798866855524, + "loss": 0.04359672963619232, + "loss_ce": 0.0007195360958576202, + "loss_iou": 0.291015625, + "loss_num": 0.00860595703125, + "loss_xval": 0.04296875, + "num_input_tokens_seen": 1128882144, + "step": 6580 + }, + { + "epoch": 74.69121813031161, + "grad_norm": 2.731680176846638, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 1129053908, + "step": 6581 + }, + { + "epoch": 74.69121813031161, + "loss": 0.06613113731145859, + "loss_ce": 1.4803251360717695e-05, + "loss_iou": 0.341796875, + "loss_num": 0.01324462890625, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 1129053908, + "step": 6581 + }, + { + "epoch": 74.70254957507082, + "grad_norm": 2.4624581703642607, + "learning_rate": 5e-06, + "loss": 0.052, + "num_input_tokens_seen": 1129224084, + "step": 6582 + }, + { + "epoch": 74.70254957507082, + "loss": 0.04896687716245651, + "loss_ce": 0.002076619304716587, + "loss_iou": 0.1640625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 1129224084, + "step": 6582 + }, + { + "epoch": 74.71388101983003, + "grad_norm": 2.2279051896986664, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1129395740, + "step": 6583 + }, + { + "epoch": 74.71388101983003, + "loss": 0.02393813617527485, + "loss_ce": 1.2354178579698782e-05, + "loss_iou": 0.50390625, + "loss_num": 0.004791259765625, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 1129395740, + "step": 6583 + }, + { + "epoch": 74.72521246458923, + "grad_norm": 2.214563796199562, + "learning_rate": 5e-06, + "loss": 0.0303, + "num_input_tokens_seen": 1129567948, + "step": 6584 + }, + { + "epoch": 74.72521246458923, + "loss": 0.03841949999332428, + "loss_ce": 1.3128978025633842e-05, + "loss_iou": 0.421875, + "loss_num": 0.007659912109375, + "loss_xval": 0.038330078125, + "num_input_tokens_seen": 1129567948, + "step": 6584 + }, + { + "epoch": 74.73654390934844, + "grad_norm": 2.4723858598221367, + "learning_rate": 5e-06, + "loss": 0.0248, + "num_input_tokens_seen": 1129738824, + "step": 6585 + }, + { + "epoch": 74.73654390934844, + "loss": 0.02932172268629074, + "loss_ce": 2.484569813532289e-05, + "loss_iou": 0.466796875, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 1129738824, + "step": 6585 + }, + { + "epoch": 74.74787535410765, + "grad_norm": 2.852778627022531, + "learning_rate": 5e-06, + "loss": 0.0417, + "num_input_tokens_seen": 1129910312, + "step": 6586 + }, + { + "epoch": 74.74787535410765, + "loss": 0.03491400182247162, + "loss_ce": 3.240866135456599e-05, + "loss_iou": 0.150390625, + "loss_num": 0.006988525390625, + "loss_xval": 0.034912109375, + "num_input_tokens_seen": 1129910312, + "step": 6586 + }, + { + "epoch": 74.75920679886686, + "grad_norm": 2.9994155111322427, + "learning_rate": 5e-06, + "loss": 0.0469, + "num_input_tokens_seen": 1130080956, + "step": 6587 + }, + { + "epoch": 74.75920679886686, + "loss": 0.07636642456054688, + "loss_ce": 1.1439266017987393e-05, + "loss_iou": 0.34765625, + "loss_num": 0.0152587890625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 1130080956, + "step": 6587 + }, + { + "epoch": 74.77053824362606, + "grad_norm": 3.705925377055238, + "learning_rate": 5e-06, + "loss": 0.0486, + "num_input_tokens_seen": 1130251972, + "step": 6588 + }, + { + "epoch": 74.77053824362606, + "loss": 0.07284902036190033, + "loss_ce": 1.8822984202415682e-05, + "loss_iou": 0.375, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 1130251972, + "step": 6588 + }, + { + "epoch": 74.78186968838527, + "grad_norm": 3.104827330766222, + "learning_rate": 5e-06, + "loss": 0.0343, + "num_input_tokens_seen": 1130422384, + "step": 6589 + }, + { + "epoch": 74.78186968838527, + "loss": 0.033627867698669434, + "loss_ce": 1.2753389455610886e-05, + "loss_iou": 0.197265625, + "loss_num": 0.0067138671875, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1130422384, + "step": 6589 + }, + { + "epoch": 74.79320113314448, + "grad_norm": 3.38921178910297, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 1130594036, + "step": 6590 + }, + { + "epoch": 74.79320113314448, + "loss": 0.05058871954679489, + "loss_ce": 2.1094274416100234e-05, + "loss_iou": 0.1435546875, + "loss_num": 0.0101318359375, + "loss_xval": 0.050537109375, + "num_input_tokens_seen": 1130594036, + "step": 6590 + }, + { + "epoch": 74.80453257790369, + "grad_norm": 3.2946389524645587, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 1130764112, + "step": 6591 + }, + { + "epoch": 74.80453257790369, + "loss": 0.026711052283644676, + "loss_ce": 8.171422450686805e-06, + "loss_iou": 0.5, + "loss_num": 0.005340576171875, + "loss_xval": 0.0267333984375, + "num_input_tokens_seen": 1130764112, + "step": 6591 + }, + { + "epoch": 74.8158640226629, + "grad_norm": 3.5085739993113663, + "learning_rate": 5e-06, + "loss": 0.035, + "num_input_tokens_seen": 1130934424, + "step": 6592 + }, + { + "epoch": 74.8158640226629, + "loss": 0.03191527724266052, + "loss_ce": 9.152002348855603e-06, + "loss_iou": 0.486328125, + "loss_num": 0.006378173828125, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1130934424, + "step": 6592 + }, + { + "epoch": 74.8271954674221, + "grad_norm": 2.9625154039273123, + "learning_rate": 5e-06, + "loss": 0.032, + "num_input_tokens_seen": 1131106348, + "step": 6593 + }, + { + "epoch": 74.8271954674221, + "loss": 0.03468397259712219, + "loss_ce": 3.126372757833451e-05, + "loss_iou": 0.37890625, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1131106348, + "step": 6593 + }, + { + "epoch": 74.83852691218131, + "grad_norm": 2.238460167599019, + "learning_rate": 5e-06, + "loss": 0.0608, + "num_input_tokens_seen": 1131277076, + "step": 6594 + }, + { + "epoch": 74.83852691218131, + "loss": 0.03013141080737114, + "loss_ce": 2.581985609140247e-05, + "loss_iou": 0.265625, + "loss_num": 0.006011962890625, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 1131277076, + "step": 6594 + }, + { + "epoch": 74.84985835694052, + "grad_norm": 2.745086066254167, + "learning_rate": 5e-06, + "loss": 0.0468, + "num_input_tokens_seen": 1131448800, + "step": 6595 + }, + { + "epoch": 74.84985835694052, + "loss": 0.0662766546010971, + "loss_ce": 0.0001145425922004506, + "loss_iou": 0.259765625, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 1131448800, + "step": 6595 + }, + { + "epoch": 74.86118980169972, + "grad_norm": 4.161388633340837, + "learning_rate": 5e-06, + "loss": 0.039, + "num_input_tokens_seen": 1131620540, + "step": 6596 + }, + { + "epoch": 74.86118980169972, + "loss": 0.03462936729192734, + "loss_ce": 7.173584890551865e-06, + "loss_iou": 0.5234375, + "loss_num": 0.006927490234375, + "loss_xval": 0.03466796875, + "num_input_tokens_seen": 1131620540, + "step": 6596 + }, + { + "epoch": 74.87252124645893, + "grad_norm": 3.942770467044283, + "learning_rate": 5e-06, + "loss": 0.0372, + "num_input_tokens_seen": 1131792352, + "step": 6597 + }, + { + "epoch": 74.87252124645893, + "loss": 0.030213255435228348, + "loss_ce": 1.6111145669128746e-05, + "loss_iou": 0.59375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 1131792352, + "step": 6597 + }, + { + "epoch": 74.88385269121812, + "grad_norm": 6.28398597284707, + "learning_rate": 5e-06, + "loss": 0.0688, + "num_input_tokens_seen": 1131963328, + "step": 6598 + }, + { + "epoch": 74.88385269121812, + "loss": 0.04276979714632034, + "loss_ce": 1.4671622921014205e-05, + "loss_iou": 0.2216796875, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 1131963328, + "step": 6598 + }, + { + "epoch": 74.89518413597733, + "grad_norm": 3.117787571982213, + "learning_rate": 5e-06, + "loss": 0.0404, + "num_input_tokens_seen": 1132134912, + "step": 6599 + }, + { + "epoch": 74.89518413597733, + "loss": 0.02718474715948105, + "loss_ce": 8.842358511174098e-06, + "loss_iou": 0.333984375, + "loss_num": 0.00543212890625, + "loss_xval": 0.0272216796875, + "num_input_tokens_seen": 1132134912, + "step": 6599 + }, + { + "epoch": 74.90651558073654, + "grad_norm": 3.377153752117538, + "learning_rate": 5e-06, + "loss": 0.0391, + "num_input_tokens_seen": 1132306592, + "step": 6600 + }, + { + "epoch": 74.90651558073654, + "loss": 0.04378780722618103, + "loss_ce": 1.0341590495954733e-05, + "loss_iou": 0.462890625, + "loss_num": 0.0087890625, + "loss_xval": 0.043701171875, + "num_input_tokens_seen": 1132306592, + "step": 6600 + }, + { + "epoch": 74.91784702549575, + "grad_norm": 3.2312934907367965, + "learning_rate": 5e-06, + "loss": 0.0304, + "num_input_tokens_seen": 1132477732, + "step": 6601 + }, + { + "epoch": 74.91784702549575, + "loss": 0.02760060504078865, + "loss_ce": 1.2714146578218788e-05, + "loss_iou": 0.419921875, + "loss_num": 0.005523681640625, + "loss_xval": 0.027587890625, + "num_input_tokens_seen": 1132477732, + "step": 6601 + }, + { + "epoch": 74.92917847025495, + "grad_norm": 2.9390795376256085, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 1132648924, + "step": 6602 + }, + { + "epoch": 74.92917847025495, + "loss": 0.11069358885288239, + "loss_ce": 2.158638380933553e-05, + "loss_iou": 0.31640625, + "loss_num": 0.0220947265625, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 1132648924, + "step": 6602 + }, + { + "epoch": 74.94050991501416, + "grad_norm": 2.675223345899106, + "learning_rate": 5e-06, + "loss": 0.046, + "num_input_tokens_seen": 1132820768, + "step": 6603 + }, + { + "epoch": 74.94050991501416, + "loss": 0.06753931939601898, + "loss_ce": 1.1552236173884012e-05, + "loss_iou": 0.259765625, + "loss_num": 0.01348876953125, + "loss_xval": 0.0673828125, + "num_input_tokens_seen": 1132820768, + "step": 6603 + }, + { + "epoch": 74.95184135977337, + "grad_norm": 3.6814864893805255, + "learning_rate": 5e-06, + "loss": 0.035, + "num_input_tokens_seen": 1132992448, + "step": 6604 + }, + { + "epoch": 74.95184135977337, + "loss": 0.05241741985082626, + "loss_ce": 3.399672277737409e-05, + "loss_iou": 0.435546875, + "loss_num": 0.010498046875, + "loss_xval": 0.052490234375, + "num_input_tokens_seen": 1132992448, + "step": 6604 + }, + { + "epoch": 74.96317280453258, + "grad_norm": 3.4656831250717595, + "learning_rate": 5e-06, + "loss": 0.0453, + "num_input_tokens_seen": 1133163448, + "step": 6605 + }, + { + "epoch": 74.96317280453258, + "loss": 0.044289443641901016, + "loss_ce": 8.437264114036225e-06, + "loss_iou": 0.390625, + "loss_num": 0.00885009765625, + "loss_xval": 0.044189453125, + "num_input_tokens_seen": 1133163448, + "step": 6605 + }, + { + "epoch": 74.97450424929178, + "grad_norm": 3.4958870464672467, + "learning_rate": 5e-06, + "loss": 0.0397, + "num_input_tokens_seen": 1133334400, + "step": 6606 + }, + { + "epoch": 74.97450424929178, + "loss": 0.04712475836277008, + "loss_ce": 2.087727261823602e-05, + "loss_iou": 0.48046875, + "loss_num": 0.0093994140625, + "loss_xval": 0.047119140625, + "num_input_tokens_seen": 1133334400, + "step": 6606 + }, + { + "epoch": 74.98583569405099, + "grad_norm": 3.188127732940439, + "learning_rate": 5e-06, + "loss": 0.051, + "num_input_tokens_seen": 1133506188, + "step": 6607 + }, + { + "epoch": 74.98583569405099, + "loss": 0.043926019221544266, + "loss_ce": 1.1226005881326273e-05, + "loss_iou": 0.431640625, + "loss_num": 0.0087890625, + "loss_xval": 0.0439453125, + "num_input_tokens_seen": 1133506188, + "step": 6607 + }, + { + "epoch": 74.9971671388102, + "grad_norm": 3.032565899470877, + "learning_rate": 5e-06, + "loss": 0.0588, + "num_input_tokens_seen": 1133674920, + "step": 6608 + }, + { + "epoch": 74.9971671388102, + "loss": 0.058987658470869064, + "loss_ce": 4.2952389776473865e-05, + "loss_iou": 0.357421875, + "loss_num": 0.01177978515625, + "loss_xval": 0.058837890625, + "num_input_tokens_seen": 1133674920, + "step": 6608 + }, + { + "epoch": 74.9971671388102, + "loss": 0.040857791900634766, + "loss_ce": 1.0014868166763335e-05, + "loss_iou": 0.447265625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1133717820, + "step": 6608 + }, + { + "epoch": 75.0084985835694, + "grad_norm": 2.758072465777096, + "learning_rate": 5e-06, + "loss": 0.0294, + "num_input_tokens_seen": 1133846548, + "step": 6609 + }, + { + "epoch": 75.0084985835694, + "loss": 0.02699139714241028, + "loss_ce": 2.9117847589077428e-05, + "loss_iou": 0.0, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 1133846548, + "step": 6609 + }, + { + "epoch": 75.01983002832861, + "grad_norm": 2.7561372054264255, + "learning_rate": 5e-06, + "loss": 0.0352, + "num_input_tokens_seen": 1134017688, + "step": 6610 + }, + { + "epoch": 75.01983002832861, + "loss": 0.029750000685453415, + "loss_ce": 1.0622742593113799e-05, + "loss_iou": 0.0, + "loss_num": 0.005950927734375, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 1134017688, + "step": 6610 + }, + { + "epoch": 75.03116147308782, + "grad_norm": 2.7277961566773232, + "learning_rate": 5e-06, + "loss": 0.0502, + "num_input_tokens_seen": 1134189520, + "step": 6611 + }, + { + "epoch": 75.03116147308782, + "loss": 0.03339288383722305, + "loss_ce": 5.243175837676972e-05, + "loss_iou": 0.486328125, + "loss_num": 0.00665283203125, + "loss_xval": 0.033447265625, + "num_input_tokens_seen": 1134189520, + "step": 6611 + }, + { + "epoch": 75.04249291784703, + "grad_norm": 3.3672931179972703, + "learning_rate": 5e-06, + "loss": 0.0434, + "num_input_tokens_seen": 1134360152, + "step": 6612 + }, + { + "epoch": 75.04249291784703, + "loss": 0.024752387776970863, + "loss_ce": 1.026110567181604e-05, + "loss_iou": 0.388671875, + "loss_num": 0.00494384765625, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 1134360152, + "step": 6612 + }, + { + "epoch": 75.05382436260624, + "grad_norm": 2.8869852001999785, + "learning_rate": 5e-06, + "loss": 0.0615, + "num_input_tokens_seen": 1134531532, + "step": 6613 + }, + { + "epoch": 75.05382436260624, + "loss": 0.027328990399837494, + "loss_ce": 3.1015144486445934e-05, + "loss_iou": 0.6875, + "loss_num": 0.005462646484375, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 1134531532, + "step": 6613 + }, + { + "epoch": 75.06515580736544, + "grad_norm": 2.999026999517069, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 1134703164, + "step": 6614 + }, + { + "epoch": 75.06515580736544, + "loss": 0.14103229343891144, + "loss_ce": 1.0569621736067347e-05, + "loss_iou": 0.23828125, + "loss_num": 0.0281982421875, + "loss_xval": 0.140625, + "num_input_tokens_seen": 1134703164, + "step": 6614 + }, + { + "epoch": 75.07648725212465, + "grad_norm": 6.40104962066838, + "learning_rate": 5e-06, + "loss": 0.0445, + "num_input_tokens_seen": 1134875064, + "step": 6615 + }, + { + "epoch": 75.07648725212465, + "loss": 0.030785927549004555, + "loss_ce": 8.950310984801035e-06, + "loss_iou": 0.58984375, + "loss_num": 0.00616455078125, + "loss_xval": 0.03076171875, + "num_input_tokens_seen": 1134875064, + "step": 6615 + }, + { + "epoch": 75.08781869688386, + "grad_norm": 3.9704458714465303, + "learning_rate": 5e-06, + "loss": 0.0402, + "num_input_tokens_seen": 1135046852, + "step": 6616 + }, + { + "epoch": 75.08781869688386, + "loss": 0.041131727397441864, + "loss_ce": 9.290861271438189e-06, + "loss_iou": 0.423828125, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1135046852, + "step": 6616 + }, + { + "epoch": 75.09915014164307, + "grad_norm": 3.6034962388222778, + "learning_rate": 5e-06, + "loss": 0.0475, + "num_input_tokens_seen": 1135219272, + "step": 6617 + }, + { + "epoch": 75.09915014164307, + "loss": 0.03451300039887428, + "loss_ce": 1.2876675100415014e-05, + "loss_iou": 0.490234375, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 1135219272, + "step": 6617 + }, + { + "epoch": 75.11048158640227, + "grad_norm": 3.054477993280501, + "learning_rate": 5e-06, + "loss": 0.0533, + "num_input_tokens_seen": 1135389292, + "step": 6618 + }, + { + "epoch": 75.11048158640227, + "loss": 0.11907505989074707, + "loss_ce": 1.072060149454046e-05, + "loss_iou": 0.375, + "loss_num": 0.02392578125, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 1135389292, + "step": 6618 + }, + { + "epoch": 75.12181303116148, + "grad_norm": 3.0063760272545856, + "learning_rate": 5e-06, + "loss": 0.0293, + "num_input_tokens_seen": 1135560732, + "step": 6619 + }, + { + "epoch": 75.12181303116148, + "loss": 0.0369563102722168, + "loss_ce": 2.2411819372791797e-05, + "loss_iou": 0.39453125, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1135560732, + "step": 6619 + }, + { + "epoch": 75.13314447592067, + "grad_norm": 3.349644319625914, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 1135731944, + "step": 6620 + }, + { + "epoch": 75.13314447592067, + "loss": 0.026946287602186203, + "loss_ce": 1.4525796359521337e-05, + "loss_iou": 0.34375, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 1135731944, + "step": 6620 + }, + { + "epoch": 75.14447592067988, + "grad_norm": 3.4479739877604243, + "learning_rate": 5e-06, + "loss": 0.0372, + "num_input_tokens_seen": 1135903928, + "step": 6621 + }, + { + "epoch": 75.14447592067988, + "loss": 0.06711386889219284, + "loss_ce": 2.097502874676138e-05, + "loss_iou": 0.51953125, + "loss_num": 0.013427734375, + "loss_xval": 0.06689453125, + "num_input_tokens_seen": 1135903928, + "step": 6621 + }, + { + "epoch": 75.15580736543909, + "grad_norm": 3.7447745089891695, + "learning_rate": 5e-06, + "loss": 0.0384, + "num_input_tokens_seen": 1136075736, + "step": 6622 + }, + { + "epoch": 75.15580736543909, + "loss": 0.02858702465891838, + "loss_ce": 1.4943962924007792e-05, + "loss_iou": 0.51953125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 1136075736, + "step": 6622 + }, + { + "epoch": 75.1671388101983, + "grad_norm": 4.059687802559946, + "learning_rate": 5e-06, + "loss": 0.0314, + "num_input_tokens_seen": 1136244004, + "step": 6623 + }, + { + "epoch": 75.1671388101983, + "loss": 0.027831776067614555, + "loss_ce": 7.375477707682876e-06, + "loss_iou": 0.43359375, + "loss_num": 0.00555419921875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 1136244004, + "step": 6623 + }, + { + "epoch": 75.1784702549575, + "grad_norm": 3.3001793878709997, + "learning_rate": 5e-06, + "loss": 0.0311, + "num_input_tokens_seen": 1136415752, + "step": 6624 + }, + { + "epoch": 75.1784702549575, + "loss": 0.028650036081671715, + "loss_ce": 9.288190994993784e-06, + "loss_iou": 0.3046875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 1136415752, + "step": 6624 + }, + { + "epoch": 75.18980169971671, + "grad_norm": 2.9926771387093587, + "learning_rate": 5e-06, + "loss": 0.0531, + "num_input_tokens_seen": 1136587840, + "step": 6625 + }, + { + "epoch": 75.18980169971671, + "loss": 0.02985565736889839, + "loss_ce": 1.7094267604989e-05, + "loss_iou": 0.44921875, + "loss_num": 0.0059814453125, + "loss_xval": 0.02978515625, + "num_input_tokens_seen": 1136587840, + "step": 6625 + }, + { + "epoch": 75.20113314447592, + "grad_norm": 3.011683975089285, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 1136759296, + "step": 6626 + }, + { + "epoch": 75.20113314447592, + "loss": 0.036683566868305206, + "loss_ce": 1.6696771126589738e-05, + "loss_iou": 0.435546875, + "loss_num": 0.00732421875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1136759296, + "step": 6626 + }, + { + "epoch": 75.21246458923513, + "grad_norm": 2.589423643540145, + "learning_rate": 5e-06, + "loss": 0.0411, + "num_input_tokens_seen": 1136930908, + "step": 6627 + }, + { + "epoch": 75.21246458923513, + "loss": 0.028652256354689598, + "loss_ce": 2.6767382223624736e-05, + "loss_iou": 0.5078125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 1136930908, + "step": 6627 + }, + { + "epoch": 75.22379603399433, + "grad_norm": 3.0961173867377507, + "learning_rate": 5e-06, + "loss": 0.0383, + "num_input_tokens_seen": 1137102796, + "step": 6628 + }, + { + "epoch": 75.22379603399433, + "loss": 0.05981450155377388, + "loss_ce": 1.5307865396607667e-05, + "loss_iou": 0.384765625, + "loss_num": 0.011962890625, + "loss_xval": 0.059814453125, + "num_input_tokens_seen": 1137102796, + "step": 6628 + }, + { + "epoch": 75.23512747875354, + "grad_norm": 3.2605992830967643, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 1137274736, + "step": 6629 + }, + { + "epoch": 75.23512747875354, + "loss": 0.04676087200641632, + "loss_ce": 7.94199058873346e-06, + "loss_iou": 0.3515625, + "loss_num": 0.00933837890625, + "loss_xval": 0.046875, + "num_input_tokens_seen": 1137274736, + "step": 6629 + }, + { + "epoch": 75.24645892351275, + "grad_norm": 3.5075056754683884, + "learning_rate": 5e-06, + "loss": 0.0368, + "num_input_tokens_seen": 1137446040, + "step": 6630 + }, + { + "epoch": 75.24645892351275, + "loss": 0.0396280474960804, + "loss_ce": 1.623117714188993e-05, + "loss_iou": 0.318359375, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 1137446040, + "step": 6630 + }, + { + "epoch": 75.25779036827196, + "grad_norm": 3.203424984960221, + "learning_rate": 5e-06, + "loss": 0.0394, + "num_input_tokens_seen": 1137617756, + "step": 6631 + }, + { + "epoch": 75.25779036827196, + "loss": 0.03802847862243652, + "loss_ce": 1.883343793451786e-05, + "loss_iou": 0.498046875, + "loss_num": 0.007598876953125, + "loss_xval": 0.0380859375, + "num_input_tokens_seen": 1137617756, + "step": 6631 + }, + { + "epoch": 75.26912181303116, + "grad_norm": 3.59352516117109, + "learning_rate": 5e-06, + "loss": 0.0447, + "num_input_tokens_seen": 1137788248, + "step": 6632 + }, + { + "epoch": 75.26912181303116, + "loss": 0.061858586966991425, + "loss_ce": 1.4712988559040241e-05, + "loss_iou": 0.478515625, + "loss_num": 0.0123291015625, + "loss_xval": 0.061767578125, + "num_input_tokens_seen": 1137788248, + "step": 6632 + }, + { + "epoch": 75.28045325779037, + "grad_norm": 3.7772396063241342, + "learning_rate": 5e-06, + "loss": 0.0583, + "num_input_tokens_seen": 1137959916, + "step": 6633 + }, + { + "epoch": 75.28045325779037, + "loss": 0.03259987384080887, + "loss_ce": 2.2356794943334535e-05, + "loss_iou": 0.5, + "loss_num": 0.00653076171875, + "loss_xval": 0.032470703125, + "num_input_tokens_seen": 1137959916, + "step": 6633 + }, + { + "epoch": 75.29178470254958, + "grad_norm": 2.8745239306164696, + "learning_rate": 5e-06, + "loss": 0.0297, + "num_input_tokens_seen": 1138130020, + "step": 6634 + }, + { + "epoch": 75.29178470254958, + "loss": 0.034460075199604034, + "loss_ce": 5.150468132342212e-05, + "loss_iou": 0.369140625, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 1138130020, + "step": 6634 + }, + { + "epoch": 75.30311614730878, + "grad_norm": 3.3563284691669337, + "learning_rate": 5e-06, + "loss": 0.0492, + "num_input_tokens_seen": 1138302012, + "step": 6635 + }, + { + "epoch": 75.30311614730878, + "loss": 0.04271052032709122, + "loss_ce": 1.6429548850283027e-05, + "loss_iou": 0.53125, + "loss_num": 0.008544921875, + "loss_xval": 0.042724609375, + "num_input_tokens_seen": 1138302012, + "step": 6635 + }, + { + "epoch": 75.31444759206799, + "grad_norm": 3.803427072091682, + "learning_rate": 5e-06, + "loss": 0.0545, + "num_input_tokens_seen": 1138474204, + "step": 6636 + }, + { + "epoch": 75.31444759206799, + "loss": 0.040617987513542175, + "loss_ce": 1.4352742255141493e-05, + "loss_iou": 0.46875, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1138474204, + "step": 6636 + }, + { + "epoch": 75.3257790368272, + "grad_norm": 3.6134514059485423, + "learning_rate": 5e-06, + "loss": 0.0323, + "num_input_tokens_seen": 1138646060, + "step": 6637 + }, + { + "epoch": 75.3257790368272, + "loss": 0.02564416080713272, + "loss_ce": 9.39374240260804e-06, + "loss_iou": 0.58203125, + "loss_num": 0.005126953125, + "loss_xval": 0.025634765625, + "num_input_tokens_seen": 1138646060, + "step": 6637 + }, + { + "epoch": 75.33711048158641, + "grad_norm": 3.6706366778505313, + "learning_rate": 5e-06, + "loss": 0.0602, + "num_input_tokens_seen": 1138816984, + "step": 6638 + }, + { + "epoch": 75.33711048158641, + "loss": 0.044599637389183044, + "loss_ce": 1.3456691704050172e-05, + "loss_iou": 0.451171875, + "loss_num": 0.0089111328125, + "loss_xval": 0.044677734375, + "num_input_tokens_seen": 1138816984, + "step": 6638 + }, + { + "epoch": 75.34844192634561, + "grad_norm": 2.9328785578990813, + "learning_rate": 5e-06, + "loss": 0.0591, + "num_input_tokens_seen": 1138989100, + "step": 6639 + }, + { + "epoch": 75.34844192634561, + "loss": 0.026982787996530533, + "loss_ce": 3.5766301152762026e-05, + "loss_iou": 0.474609375, + "loss_num": 0.005401611328125, + "loss_xval": 0.0269775390625, + "num_input_tokens_seen": 1138989100, + "step": 6639 + }, + { + "epoch": 75.35977337110482, + "grad_norm": 2.7000132793099856, + "learning_rate": 5e-06, + "loss": 0.0555, + "num_input_tokens_seen": 1139160796, + "step": 6640 + }, + { + "epoch": 75.35977337110482, + "loss": 0.03656289726495743, + "loss_ce": 1.8098895452567376e-05, + "loss_iou": 0.44921875, + "loss_num": 0.007293701171875, + "loss_xval": 0.03662109375, + "num_input_tokens_seen": 1139160796, + "step": 6640 + }, + { + "epoch": 75.37110481586403, + "grad_norm": 2.7058270581428956, + "learning_rate": 5e-06, + "loss": 0.0287, + "num_input_tokens_seen": 1139332176, + "step": 6641 + }, + { + "epoch": 75.37110481586403, + "loss": 0.025388851761817932, + "loss_ce": 1.3486058378475718e-05, + "loss_iou": 0.43359375, + "loss_num": 0.00506591796875, + "loss_xval": 0.025390625, + "num_input_tokens_seen": 1139332176, + "step": 6641 + }, + { + "epoch": 75.38243626062322, + "grad_norm": 3.201883920930135, + "learning_rate": 5e-06, + "loss": 0.0387, + "num_input_tokens_seen": 1139503828, + "step": 6642 + }, + { + "epoch": 75.38243626062322, + "loss": 0.02916206791996956, + "loss_ce": 3.304060373920947e-05, + "loss_iou": 0.34765625, + "loss_num": 0.005828857421875, + "loss_xval": 0.0291748046875, + "num_input_tokens_seen": 1139503828, + "step": 6642 + }, + { + "epoch": 75.39376770538243, + "grad_norm": 2.5807513155674044, + "learning_rate": 5e-06, + "loss": 0.0423, + "num_input_tokens_seen": 1139674364, + "step": 6643 + }, + { + "epoch": 75.39376770538243, + "loss": 0.07299471646547318, + "loss_ce": 3.4817956475308165e-05, + "loss_iou": 0.203125, + "loss_num": 0.01458740234375, + "loss_xval": 0.07275390625, + "num_input_tokens_seen": 1139674364, + "step": 6643 + }, + { + "epoch": 75.40509915014164, + "grad_norm": 2.3856992311643794, + "learning_rate": 5e-06, + "loss": 0.063, + "num_input_tokens_seen": 1139845776, + "step": 6644 + }, + { + "epoch": 75.40509915014164, + "loss": 0.10186062008142471, + "loss_ce": 5.397787026595324e-05, + "loss_iou": 0.12109375, + "loss_num": 0.0203857421875, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 1139845776, + "step": 6644 + }, + { + "epoch": 75.41643059490085, + "grad_norm": 1.9521959466451755, + "learning_rate": 5e-06, + "loss": 0.0302, + "num_input_tokens_seen": 1140017744, + "step": 6645 + }, + { + "epoch": 75.41643059490085, + "loss": 0.02789829671382904, + "loss_ce": 2.048969327006489e-05, + "loss_iou": 0.439453125, + "loss_num": 0.005584716796875, + "loss_xval": 0.02783203125, + "num_input_tokens_seen": 1140017744, + "step": 6645 + }, + { + "epoch": 75.42776203966005, + "grad_norm": 2.3466580034481934, + "learning_rate": 5e-06, + "loss": 0.0412, + "num_input_tokens_seen": 1140189228, + "step": 6646 + }, + { + "epoch": 75.42776203966005, + "loss": 0.030522137880325317, + "loss_ce": 1.9818293367279693e-05, + "loss_iou": 0.376953125, + "loss_num": 0.006103515625, + "loss_xval": 0.030517578125, + "num_input_tokens_seen": 1140189228, + "step": 6646 + }, + { + "epoch": 75.43909348441926, + "grad_norm": 2.8872076815482277, + "learning_rate": 5e-06, + "loss": 0.0283, + "num_input_tokens_seen": 1140359360, + "step": 6647 + }, + { + "epoch": 75.43909348441926, + "loss": 0.024180948734283447, + "loss_ce": 1.1026722859242e-05, + "loss_iou": 0.333984375, + "loss_num": 0.00482177734375, + "loss_xval": 0.024169921875, + "num_input_tokens_seen": 1140359360, + "step": 6647 + }, + { + "epoch": 75.45042492917847, + "grad_norm": 3.031967190159842, + "learning_rate": 5e-06, + "loss": 0.0442, + "num_input_tokens_seen": 1140531128, + "step": 6648 + }, + { + "epoch": 75.45042492917847, + "loss": 0.03687503561377525, + "loss_ce": 2.5059895051526837e-05, + "loss_iou": 0.390625, + "loss_num": 0.00738525390625, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1140531128, + "step": 6648 + }, + { + "epoch": 75.46175637393767, + "grad_norm": 2.9603860164549545, + "learning_rate": 5e-06, + "loss": 0.033, + "num_input_tokens_seen": 1140702992, + "step": 6649 + }, + { + "epoch": 75.46175637393767, + "loss": 0.029325708746910095, + "loss_ce": 2.12053710129112e-05, + "loss_iou": 0.60546875, + "loss_num": 0.005859375, + "loss_xval": 0.029296875, + "num_input_tokens_seen": 1140702992, + "step": 6649 + }, + { + "epoch": 75.47308781869688, + "grad_norm": 3.2910846177482047, + "learning_rate": 5e-06, + "loss": 0.0346, + "num_input_tokens_seen": 1140874604, + "step": 6650 + }, + { + "epoch": 75.47308781869688, + "loss": 0.028882550075650215, + "loss_ce": 2.8181069865240715e-05, + "loss_iou": 0.43359375, + "loss_num": 0.005767822265625, + "loss_xval": 0.02880859375, + "num_input_tokens_seen": 1140874604, + "step": 6650 + }, + { + "epoch": 75.48441926345609, + "grad_norm": 3.5063045319901307, + "learning_rate": 5e-06, + "loss": 0.0377, + "num_input_tokens_seen": 1141044580, + "step": 6651 + }, + { + "epoch": 75.48441926345609, + "loss": 0.03537952899932861, + "loss_ce": 2.49135719059268e-05, + "loss_iou": 0.37109375, + "loss_num": 0.007080078125, + "loss_xval": 0.035400390625, + "num_input_tokens_seen": 1141044580, + "step": 6651 + }, + { + "epoch": 75.4957507082153, + "grad_norm": 4.529549886685166, + "learning_rate": 5e-06, + "loss": 0.0528, + "num_input_tokens_seen": 1141216568, + "step": 6652 + }, + { + "epoch": 75.4957507082153, + "loss": 0.0756077691912651, + "loss_ce": 4.624552093446255e-05, + "loss_iou": 0.3671875, + "loss_num": 0.01513671875, + "loss_xval": 0.07568359375, + "num_input_tokens_seen": 1141216568, + "step": 6652 + }, + { + "epoch": 75.5070821529745, + "grad_norm": 5.702531039173888, + "learning_rate": 5e-06, + "loss": 0.0433, + "num_input_tokens_seen": 1141388436, + "step": 6653 + }, + { + "epoch": 75.5070821529745, + "loss": 0.041098494082689285, + "loss_ce": 2.1833089704159647e-05, + "loss_iou": 0.60546875, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1141388436, + "step": 6653 + }, + { + "epoch": 75.51841359773371, + "grad_norm": 4.173235326074557, + "learning_rate": 5e-06, + "loss": 0.0483, + "num_input_tokens_seen": 1141559964, + "step": 6654 + }, + { + "epoch": 75.51841359773371, + "loss": 0.06591375172138214, + "loss_ce": 1.1045767678297125e-05, + "loss_iou": 0.3515625, + "loss_num": 0.01318359375, + "loss_xval": 0.06591796875, + "num_input_tokens_seen": 1141559964, + "step": 6654 + }, + { + "epoch": 75.52974504249292, + "grad_norm": 3.272257811938323, + "learning_rate": 5e-06, + "loss": 0.0416, + "num_input_tokens_seen": 1141730152, + "step": 6655 + }, + { + "epoch": 75.52974504249292, + "loss": 0.03391244262456894, + "loss_ce": 7.413479579554405e-06, + "loss_iou": 0.44921875, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1141730152, + "step": 6655 + }, + { + "epoch": 75.54107648725213, + "grad_norm": 3.8810884567390467, + "learning_rate": 5e-06, + "loss": 0.051, + "num_input_tokens_seen": 1141902064, + "step": 6656 + }, + { + "epoch": 75.54107648725213, + "loss": 0.032190896570682526, + "loss_ce": 1.0111092706210911e-05, + "loss_iou": 0.38671875, + "loss_num": 0.006439208984375, + "loss_xval": 0.0322265625, + "num_input_tokens_seen": 1141902064, + "step": 6656 + }, + { + "epoch": 75.55240793201133, + "grad_norm": 3.7569852476853343, + "learning_rate": 5e-06, + "loss": 0.0319, + "num_input_tokens_seen": 1142073736, + "step": 6657 + }, + { + "epoch": 75.55240793201133, + "loss": 0.03570391237735748, + "loss_ce": 1.3606133506982587e-05, + "loss_iou": 0.54296875, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 1142073736, + "step": 6657 + }, + { + "epoch": 75.56373937677054, + "grad_norm": 3.2002252008567087, + "learning_rate": 5e-06, + "loss": 0.039, + "num_input_tokens_seen": 1142244908, + "step": 6658 + }, + { + "epoch": 75.56373937677054, + "loss": 0.053096137940883636, + "loss_ce": 1.0810408639372326e-05, + "loss_iou": 0.330078125, + "loss_num": 0.0106201171875, + "loss_xval": 0.052978515625, + "num_input_tokens_seen": 1142244908, + "step": 6658 + }, + { + "epoch": 75.57507082152975, + "grad_norm": 3.2672547875452085, + "learning_rate": 5e-06, + "loss": 0.0559, + "num_input_tokens_seen": 1142416592, + "step": 6659 + }, + { + "epoch": 75.57507082152975, + "loss": 0.06007232889533043, + "loss_ce": 2.8991431463509798e-05, + "loss_iou": 0.45703125, + "loss_num": 0.01202392578125, + "loss_xval": 0.06005859375, + "num_input_tokens_seen": 1142416592, + "step": 6659 + }, + { + "epoch": 75.58640226628896, + "grad_norm": 2.2120304026232414, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1142588436, + "step": 6660 + }, + { + "epoch": 75.58640226628896, + "loss": 0.03027614951133728, + "loss_ce": 7.900712080299854e-05, + "loss_iou": 0.25390625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 1142588436, + "step": 6660 + }, + { + "epoch": 75.59773371104816, + "grad_norm": 2.119201606880293, + "learning_rate": 5e-06, + "loss": 0.0293, + "num_input_tokens_seen": 1142760220, + "step": 6661 + }, + { + "epoch": 75.59773371104816, + "loss": 0.027178173884749413, + "loss_ce": 3.278976146248169e-05, + "loss_iou": 0.4140625, + "loss_num": 0.00543212890625, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1142760220, + "step": 6661 + }, + { + "epoch": 75.60906515580737, + "grad_norm": 2.403888445968798, + "learning_rate": 5e-06, + "loss": 0.0221, + "num_input_tokens_seen": 1142932264, + "step": 6662 + }, + { + "epoch": 75.60906515580737, + "loss": 0.03135679289698601, + "loss_ce": 3.049948463740293e-05, + "loss_iou": 0.3984375, + "loss_num": 0.006256103515625, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1142932264, + "step": 6662 + }, + { + "epoch": 75.62039660056658, + "grad_norm": 3.39996957938274, + "learning_rate": 5e-06, + "loss": 0.0282, + "num_input_tokens_seen": 1143104196, + "step": 6663 + }, + { + "epoch": 75.62039660056658, + "loss": 0.027434950694441795, + "loss_ce": 1.4907400327501819e-05, + "loss_iou": 0.4609375, + "loss_num": 0.0054931640625, + "loss_xval": 0.0274658203125, + "num_input_tokens_seen": 1143104196, + "step": 6663 + }, + { + "epoch": 75.63172804532577, + "grad_norm": 3.437044465632727, + "learning_rate": 5e-06, + "loss": 0.0474, + "num_input_tokens_seen": 1143276156, + "step": 6664 + }, + { + "epoch": 75.63172804532577, + "loss": 0.0396089032292366, + "loss_ce": 4.2866828152909875e-05, + "loss_iou": 0.435546875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 1143276156, + "step": 6664 + }, + { + "epoch": 75.64305949008498, + "grad_norm": 2.812134809677353, + "learning_rate": 5e-06, + "loss": 0.0431, + "num_input_tokens_seen": 1143446468, + "step": 6665 + }, + { + "epoch": 75.64305949008498, + "loss": 0.03706753998994827, + "loss_ce": 3.4461074392311275e-05, + "loss_iou": 0.3125, + "loss_num": 0.007415771484375, + "loss_xval": 0.037109375, + "num_input_tokens_seen": 1143446468, + "step": 6665 + }, + { + "epoch": 75.65439093484419, + "grad_norm": 2.9006621397520758, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1143618708, + "step": 6666 + }, + { + "epoch": 75.65439093484419, + "loss": 0.04202726483345032, + "loss_ce": 1.9816994608845562e-05, + "loss_iou": 0.4140625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1143618708, + "step": 6666 + }, + { + "epoch": 75.6657223796034, + "grad_norm": 3.3625161917795054, + "learning_rate": 5e-06, + "loss": 0.0421, + "num_input_tokens_seen": 1143789444, + "step": 6667 + }, + { + "epoch": 75.6657223796034, + "loss": 0.05386757850646973, + "loss_ce": 0.00023293087724596262, + "loss_iou": 0.474609375, + "loss_num": 0.0107421875, + "loss_xval": 0.0537109375, + "num_input_tokens_seen": 1143789444, + "step": 6667 + }, + { + "epoch": 75.6770538243626, + "grad_norm": 3.7325123012343937, + "learning_rate": 5e-06, + "loss": 0.0668, + "num_input_tokens_seen": 1143960568, + "step": 6668 + }, + { + "epoch": 75.6770538243626, + "loss": 0.04736229404807091, + "loss_ce": 1.427180086466251e-05, + "loss_iou": 0.345703125, + "loss_num": 0.00946044921875, + "loss_xval": 0.04736328125, + "num_input_tokens_seen": 1143960568, + "step": 6668 + }, + { + "epoch": 75.68838526912181, + "grad_norm": 5.078434440900139, + "learning_rate": 5e-06, + "loss": 0.0591, + "num_input_tokens_seen": 1144132432, + "step": 6669 + }, + { + "epoch": 75.68838526912181, + "loss": 0.0710034891963005, + "loss_ce": 1.9605802663136274e-05, + "loss_iou": 0.42578125, + "loss_num": 0.01422119140625, + "loss_xval": 0.07080078125, + "num_input_tokens_seen": 1144132432, + "step": 6669 + }, + { + "epoch": 75.69971671388102, + "grad_norm": 4.1809444971230505, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1144302040, + "step": 6670 + }, + { + "epoch": 75.69971671388102, + "loss": 0.030401837080717087, + "loss_ce": 2.1587624360108748e-05, + "loss_iou": 0.390625, + "loss_num": 0.006072998046875, + "loss_xval": 0.0303955078125, + "num_input_tokens_seen": 1144302040, + "step": 6670 + }, + { + "epoch": 75.71104815864022, + "grad_norm": 3.175617142640843, + "learning_rate": 5e-06, + "loss": 0.0362, + "num_input_tokens_seen": 1144473412, + "step": 6671 + }, + { + "epoch": 75.71104815864022, + "loss": 0.02501879446208477, + "loss_ce": 9.640024472901132e-06, + "loss_iou": 0.5078125, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 1144473412, + "step": 6671 + }, + { + "epoch": 75.72237960339943, + "grad_norm": 3.2636468707945774, + "learning_rate": 5e-06, + "loss": 0.0567, + "num_input_tokens_seen": 1144645004, + "step": 6672 + }, + { + "epoch": 75.72237960339943, + "loss": 0.07613909244537354, + "loss_ce": 5.8768029703060165e-05, + "loss_iou": 0.38671875, + "loss_num": 0.01519775390625, + "loss_xval": 0.076171875, + "num_input_tokens_seen": 1144645004, + "step": 6672 + }, + { + "epoch": 75.73371104815864, + "grad_norm": 2.173765635881127, + "learning_rate": 5e-06, + "loss": 0.0505, + "num_input_tokens_seen": 1144816820, + "step": 6673 + }, + { + "epoch": 75.73371104815864, + "loss": 0.027086056768894196, + "loss_ce": 2.459430106682703e-05, + "loss_iou": 0.265625, + "loss_num": 0.005401611328125, + "loss_xval": 0.027099609375, + "num_input_tokens_seen": 1144816820, + "step": 6673 + }, + { + "epoch": 75.74504249291785, + "grad_norm": 2.5653880769482424, + "learning_rate": 5e-06, + "loss": 0.0336, + "num_input_tokens_seen": 1144988400, + "step": 6674 + }, + { + "epoch": 75.74504249291785, + "loss": 0.04108980670571327, + "loss_ce": 1.3146077435521875e-05, + "loss_iou": 0.427734375, + "loss_num": 0.00823974609375, + "loss_xval": 0.041015625, + "num_input_tokens_seen": 1144988400, + "step": 6674 + }, + { + "epoch": 75.75637393767705, + "grad_norm": 3.112950775261105, + "learning_rate": 5e-06, + "loss": 0.0392, + "num_input_tokens_seen": 1145160336, + "step": 6675 + }, + { + "epoch": 75.75637393767705, + "loss": 0.03883012756705284, + "loss_ce": 1.1768001058953814e-05, + "loss_iou": 0.390625, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 1145160336, + "step": 6675 + }, + { + "epoch": 75.76770538243626, + "grad_norm": 4.058973674720043, + "learning_rate": 5e-06, + "loss": 0.0681, + "num_input_tokens_seen": 1145332772, + "step": 6676 + }, + { + "epoch": 75.76770538243626, + "loss": 0.15601879358291626, + "loss_ce": 1.2941011846123729e-05, + "loss_iou": 0.357421875, + "loss_num": 0.03125, + "loss_xval": 0.15625, + "num_input_tokens_seen": 1145332772, + "step": 6676 + }, + { + "epoch": 75.77903682719547, + "grad_norm": 3.4316266732291565, + "learning_rate": 5e-06, + "loss": 0.0425, + "num_input_tokens_seen": 1145503140, + "step": 6677 + }, + { + "epoch": 75.77903682719547, + "loss": 0.028674378991127014, + "loss_ce": 1.0742652193584945e-05, + "loss_iou": 0.38671875, + "loss_num": 0.0057373046875, + "loss_xval": 0.0286865234375, + "num_input_tokens_seen": 1145503140, + "step": 6677 + }, + { + "epoch": 75.79036827195468, + "grad_norm": 2.6864191523547776, + "learning_rate": 5e-06, + "loss": 0.0306, + "num_input_tokens_seen": 1145674124, + "step": 6678 + }, + { + "epoch": 75.79036827195468, + "loss": 0.03138051554560661, + "loss_ce": 8.442595571978018e-06, + "loss_iou": 0.384765625, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1145674124, + "step": 6678 + }, + { + "epoch": 75.80169971671388, + "grad_norm": 2.585479455315279, + "learning_rate": 5e-06, + "loss": 0.0254, + "num_input_tokens_seen": 1145845716, + "step": 6679 + }, + { + "epoch": 75.80169971671388, + "loss": 0.024859212338924408, + "loss_ce": 7.893960719229653e-05, + "loss_iou": 0.46875, + "loss_num": 0.00494384765625, + "loss_xval": 0.0247802734375, + "num_input_tokens_seen": 1145845716, + "step": 6679 + }, + { + "epoch": 75.81303116147309, + "grad_norm": 2.9421066973364973, + "learning_rate": 5e-06, + "loss": 0.0534, + "num_input_tokens_seen": 1146016148, + "step": 6680 + }, + { + "epoch": 75.81303116147309, + "loss": 0.06626441329717636, + "loss_ce": 1.0752217349363491e-05, + "loss_iou": 0.30859375, + "loss_num": 0.01324462890625, + "loss_xval": 0.06640625, + "num_input_tokens_seen": 1146016148, + "step": 6680 + }, + { + "epoch": 75.8243626062323, + "grad_norm": 3.2144953504060023, + "learning_rate": 5e-06, + "loss": 0.0389, + "num_input_tokens_seen": 1146188196, + "step": 6681 + }, + { + "epoch": 75.8243626062323, + "loss": 0.024401476606726646, + "loss_ce": 7.896759052528068e-05, + "loss_iou": 0.427734375, + "loss_num": 0.004852294921875, + "loss_xval": 0.0242919921875, + "num_input_tokens_seen": 1146188196, + "step": 6681 + }, + { + "epoch": 75.8356940509915, + "grad_norm": 3.259132412895314, + "learning_rate": 5e-06, + "loss": 0.0384, + "num_input_tokens_seen": 1146359360, + "step": 6682 + }, + { + "epoch": 75.8356940509915, + "loss": 0.02609214186668396, + "loss_ce": 1.4871469829813577e-05, + "loss_iou": 0.40625, + "loss_num": 0.005218505859375, + "loss_xval": 0.026123046875, + "num_input_tokens_seen": 1146359360, + "step": 6682 + }, + { + "epoch": 75.84702549575071, + "grad_norm": 3.5376500694855406, + "learning_rate": 5e-06, + "loss": 0.0537, + "num_input_tokens_seen": 1146531176, + "step": 6683 + }, + { + "epoch": 75.84702549575071, + "loss": 0.04434838145971298, + "loss_ce": 2.1600109903374687e-05, + "loss_iou": 0.466796875, + "loss_num": 0.00885009765625, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 1146531176, + "step": 6683 + }, + { + "epoch": 75.85835694050992, + "grad_norm": 3.174695679723189, + "learning_rate": 5e-06, + "loss": 0.03, + "num_input_tokens_seen": 1146701748, + "step": 6684 + }, + { + "epoch": 75.85835694050992, + "loss": 0.035643965005874634, + "loss_ce": 1.4690532225358766e-05, + "loss_iou": 0.455078125, + "loss_num": 0.00714111328125, + "loss_xval": 0.03564453125, + "num_input_tokens_seen": 1146701748, + "step": 6684 + }, + { + "epoch": 75.86968838526913, + "grad_norm": 3.469891437466592, + "learning_rate": 5e-06, + "loss": 0.0422, + "num_input_tokens_seen": 1146873696, + "step": 6685 + }, + { + "epoch": 75.86968838526913, + "loss": 0.03279561549425125, + "loss_ce": 4.4745111154043116e-06, + "loss_iou": 0.54296875, + "loss_num": 0.006561279296875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 1146873696, + "step": 6685 + }, + { + "epoch": 75.88101983002832, + "grad_norm": 3.64185882665407, + "learning_rate": 5e-06, + "loss": 0.0477, + "num_input_tokens_seen": 1147044404, + "step": 6686 + }, + { + "epoch": 75.88101983002832, + "loss": 0.06192648783326149, + "loss_ce": 6.3236630012397654e-06, + "loss_iou": 0.451171875, + "loss_num": 0.01239013671875, + "loss_xval": 0.06201171875, + "num_input_tokens_seen": 1147044404, + "step": 6686 + }, + { + "epoch": 75.89235127478753, + "grad_norm": 3.2899063958324746, + "learning_rate": 5e-06, + "loss": 0.0319, + "num_input_tokens_seen": 1147216440, + "step": 6687 + }, + { + "epoch": 75.89235127478753, + "loss": 0.030222224071621895, + "loss_ce": 2.508055513317231e-05, + "loss_iou": 0.50390625, + "loss_num": 0.00604248046875, + "loss_xval": 0.0301513671875, + "num_input_tokens_seen": 1147216440, + "step": 6687 + }, + { + "epoch": 75.90368271954674, + "grad_norm": 3.514346245156699, + "learning_rate": 5e-06, + "loss": 0.0572, + "num_input_tokens_seen": 1147387340, + "step": 6688 + }, + { + "epoch": 75.90368271954674, + "loss": 0.03265424072742462, + "loss_ce": 1.5691683074692264e-05, + "loss_iou": 0.49609375, + "loss_num": 0.00653076171875, + "loss_xval": 0.03271484375, + "num_input_tokens_seen": 1147387340, + "step": 6688 + }, + { + "epoch": 75.91501416430594, + "grad_norm": 3.3733432012211915, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 1147558264, + "step": 6689 + }, + { + "epoch": 75.91501416430594, + "loss": 0.04083312302827835, + "loss_ce": 1.586188045621384e-05, + "loss_iou": 0.640625, + "loss_num": 0.0081787109375, + "loss_xval": 0.040771484375, + "num_input_tokens_seen": 1147558264, + "step": 6689 + }, + { + "epoch": 75.92634560906515, + "grad_norm": 3.6304490685557522, + "learning_rate": 5e-06, + "loss": 0.0501, + "num_input_tokens_seen": 1147729988, + "step": 6690 + }, + { + "epoch": 75.92634560906515, + "loss": 0.09520262479782104, + "loss_ce": 1.8297483620699495e-05, + "loss_iou": 0.29296875, + "loss_num": 0.01904296875, + "loss_xval": 0.09521484375, + "num_input_tokens_seen": 1147729988, + "step": 6690 + }, + { + "epoch": 75.93767705382436, + "grad_norm": 3.504647030603883, + "learning_rate": 5e-06, + "loss": 0.0333, + "num_input_tokens_seen": 1147902024, + "step": 6691 + }, + { + "epoch": 75.93767705382436, + "loss": 0.03515082225203514, + "loss_ce": 3.271982495789416e-05, + "loss_iou": 0.390625, + "loss_num": 0.00701904296875, + "loss_xval": 0.03515625, + "num_input_tokens_seen": 1147902024, + "step": 6691 + }, + { + "epoch": 75.94900849858357, + "grad_norm": 3.6373306013619557, + "learning_rate": 5e-06, + "loss": 0.0458, + "num_input_tokens_seen": 1148073580, + "step": 6692 + }, + { + "epoch": 75.94900849858357, + "loss": 0.033994030207395554, + "loss_ce": 1.2707678251899779e-05, + "loss_iou": 0.41796875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1148073580, + "step": 6692 + }, + { + "epoch": 75.96033994334277, + "grad_norm": 3.623874562672049, + "learning_rate": 5e-06, + "loss": 0.0405, + "num_input_tokens_seen": 1148245624, + "step": 6693 + }, + { + "epoch": 75.96033994334277, + "loss": 0.029669811949133873, + "loss_ce": 2.1983621991239488e-05, + "loss_iou": 0.478515625, + "loss_num": 0.00592041015625, + "loss_xval": 0.0296630859375, + "num_input_tokens_seen": 1148245624, + "step": 6693 + }, + { + "epoch": 75.97167138810198, + "grad_norm": 3.423271052882423, + "learning_rate": 5e-06, + "loss": 0.042, + "num_input_tokens_seen": 1148416304, + "step": 6694 + }, + { + "epoch": 75.97167138810198, + "loss": 0.033641472458839417, + "loss_ce": 1.1105205885542091e-05, + "loss_iou": 0.43359375, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1148416304, + "step": 6694 + }, + { + "epoch": 75.98300283286119, + "grad_norm": 3.4081389156869983, + "learning_rate": 5e-06, + "loss": 0.04, + "num_input_tokens_seen": 1148588124, + "step": 6695 + }, + { + "epoch": 75.98300283286119, + "loss": 0.07447037100791931, + "loss_ce": 3.800016929744743e-05, + "loss_iou": 0.37109375, + "loss_num": 0.014892578125, + "loss_xval": 0.07421875, + "num_input_tokens_seen": 1148588124, + "step": 6695 + }, + { + "epoch": 75.9943342776204, + "grad_norm": 2.8942014844608597, + "learning_rate": 5e-06, + "loss": 0.0532, + "num_input_tokens_seen": 1148758284, + "step": 6696 + }, + { + "epoch": 75.9943342776204, + "loss": 0.03029724769294262, + "loss_ce": 8.551374776288867e-06, + "loss_iou": 0.375, + "loss_num": 0.00604248046875, + "loss_xval": 0.0302734375, + "num_input_tokens_seen": 1148758284, + "step": 6696 + }, + { + "epoch": 75.9943342776204, + "loss": 0.020227624103426933, + "loss_ce": 9.727918040880468e-06, + "loss_iou": 0.333984375, + "loss_num": 0.0040283203125, + "loss_xval": 0.020263671875, + "num_input_tokens_seen": 1148844128, + "step": 6696 + }, + { + "epoch": 76.0056657223796, + "grad_norm": 2.142813520793836, + "learning_rate": 5e-06, + "loss": 0.0472, + "num_input_tokens_seen": 1148930216, + "step": 6697 + }, + { + "epoch": 76.0056657223796, + "loss": 0.023491788655519485, + "loss_ce": 8.513385182595812e-06, + "loss_iou": 0.3515625, + "loss_num": 0.00469970703125, + "loss_xval": 0.0234375, + "num_input_tokens_seen": 1148930216, + "step": 6697 + }, + { + "epoch": 76.01699716713881, + "grad_norm": 2.364388857413835, + "learning_rate": 5e-06, + "loss": 0.0339, + "num_input_tokens_seen": 1149100288, + "step": 6698 + }, + { + "epoch": 76.01699716713881, + "loss": 0.04649578034877777, + "loss_ce": 1.7512007616460323e-05, + "loss_iou": 0.408203125, + "loss_num": 0.00927734375, + "loss_xval": 0.04638671875, + "num_input_tokens_seen": 1149100288, + "step": 6698 + }, + { + "epoch": 76.02832861189802, + "grad_norm": 2.1706255465948185, + "learning_rate": 5e-06, + "loss": 0.0297, + "num_input_tokens_seen": 1149272144, + "step": 6699 + }, + { + "epoch": 76.02832861189802, + "loss": 0.03590993583202362, + "loss_ce": 2.12640061363345e-05, + "loss_iou": 0.44140625, + "loss_num": 0.0072021484375, + "loss_xval": 0.035888671875, + "num_input_tokens_seen": 1149272144, + "step": 6699 + }, + { + "epoch": 76.03966005665723, + "grad_norm": 2.378750537462879, + "learning_rate": 5e-06, + "loss": 0.0277, + "num_input_tokens_seen": 1149443848, + "step": 6700 + }, + { + "epoch": 76.03966005665723, + "loss": 0.038892585784196854, + "loss_ce": 1.3190378012950532e-05, + "loss_iou": 0.388671875, + "loss_num": 0.007781982421875, + "loss_xval": 0.038818359375, + "num_input_tokens_seen": 1149443848, + "step": 6700 + }, + { + "epoch": 76.05099150141643, + "grad_norm": 3.51565247921851, + "learning_rate": 5e-06, + "loss": 0.04, + "num_input_tokens_seen": 1149615740, + "step": 6701 + }, + { + "epoch": 76.05099150141643, + "loss": 0.018701083958148956, + "loss_ce": 1.6695030353730544e-05, + "loss_iou": 0.380859375, + "loss_num": 0.0037384033203125, + "loss_xval": 0.0186767578125, + "num_input_tokens_seen": 1149615740, + "step": 6701 + }, + { + "epoch": 76.06232294617564, + "grad_norm": 2.74667367170532, + "learning_rate": 5e-06, + "loss": 0.0614, + "num_input_tokens_seen": 1149787284, + "step": 6702 + }, + { + "epoch": 76.06232294617564, + "loss": 0.027399137616157532, + "loss_ce": 1.7241462046513334e-05, + "loss_iou": 0.40625, + "loss_num": 0.0054931640625, + "loss_xval": 0.02734375, + "num_input_tokens_seen": 1149787284, + "step": 6702 + }, + { + "epoch": 76.07365439093485, + "grad_norm": 6.632180810782623, + "learning_rate": 5e-06, + "loss": 0.0324, + "num_input_tokens_seen": 1149959316, + "step": 6703 + }, + { + "epoch": 76.07365439093485, + "loss": 0.024576185271143913, + "loss_ce": 9.535842764307745e-06, + "loss_iou": 0.431640625, + "loss_num": 0.004913330078125, + "loss_xval": 0.0245361328125, + "num_input_tokens_seen": 1149959316, + "step": 6703 + }, + { + "epoch": 76.08498583569406, + "grad_norm": 3.796454105620394, + "learning_rate": 5e-06, + "loss": 0.0489, + "num_input_tokens_seen": 1150130608, + "step": 6704 + }, + { + "epoch": 76.08498583569406, + "loss": 0.09224532544612885, + "loss_ce": 2.1197325622779317e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0184326171875, + "loss_xval": 0.09228515625, + "num_input_tokens_seen": 1150130608, + "step": 6704 + }, + { + "epoch": 76.09631728045326, + "grad_norm": 3.3551287482854293, + "learning_rate": 5e-06, + "loss": 0.0348, + "num_input_tokens_seen": 1150302596, + "step": 6705 + }, + { + "epoch": 76.09631728045326, + "loss": 0.03742213919758797, + "loss_ce": 7.587786967633292e-06, + "loss_iou": 0.447265625, + "loss_num": 0.007476806640625, + "loss_xval": 0.037353515625, + "num_input_tokens_seen": 1150302596, + "step": 6705 + }, + { + "epoch": 76.10764872521247, + "grad_norm": 8.126262764623439, + "learning_rate": 5e-06, + "loss": 0.0413, + "num_input_tokens_seen": 1150473404, + "step": 6706 + }, + { + "epoch": 76.10764872521247, + "loss": 0.034440137445926666, + "loss_ce": 3.156549428240396e-05, + "loss_iou": 0.38671875, + "loss_num": 0.00689697265625, + "loss_xval": 0.034423828125, + "num_input_tokens_seen": 1150473404, + "step": 6706 + }, + { + "epoch": 76.11898016997168, + "grad_norm": 4.338004033444712, + "learning_rate": 5e-06, + "loss": 0.0385, + "num_input_tokens_seen": 1150645052, + "step": 6707 + }, + { + "epoch": 76.11898016997168, + "loss": 0.03648258000612259, + "loss_ce": 2.933320138254203e-05, + "loss_iou": 0.5390625, + "loss_num": 0.007293701171875, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 1150645052, + "step": 6707 + }, + { + "epoch": 76.13031161473087, + "grad_norm": 3.158468614834595, + "learning_rate": 5e-06, + "loss": 0.0386, + "num_input_tokens_seen": 1150816712, + "step": 6708 + }, + { + "epoch": 76.13031161473087, + "loss": 0.03373480588197708, + "loss_ce": 1.2880462236353196e-05, + "loss_iou": 0.185546875, + "loss_num": 0.006744384765625, + "loss_xval": 0.03369140625, + "num_input_tokens_seen": 1150816712, + "step": 6708 + }, + { + "epoch": 76.14164305949008, + "grad_norm": 4.015923163663293, + "learning_rate": 5e-06, + "loss": 0.0584, + "num_input_tokens_seen": 1150987940, + "step": 6709 + }, + { + "epoch": 76.14164305949008, + "loss": 0.03205908462405205, + "loss_ce": 1.562702891533263e-05, + "loss_iou": 0.302734375, + "loss_num": 0.00640869140625, + "loss_xval": 0.031982421875, + "num_input_tokens_seen": 1150987940, + "step": 6709 + }, + { + "epoch": 76.15297450424929, + "grad_norm": 4.982810922612806, + "learning_rate": 5e-06, + "loss": 0.0446, + "num_input_tokens_seen": 1151156912, + "step": 6710 + }, + { + "epoch": 76.15297450424929, + "loss": 0.026900295168161392, + "loss_ce": 6.678551471850369e-06, + "loss_iou": 0.373046875, + "loss_num": 0.00537109375, + "loss_xval": 0.02685546875, + "num_input_tokens_seen": 1151156912, + "step": 6710 + }, + { + "epoch": 76.1643059490085, + "grad_norm": 3.186404904076927, + "learning_rate": 5e-06, + "loss": 0.0618, + "num_input_tokens_seen": 1151328616, + "step": 6711 + }, + { + "epoch": 76.1643059490085, + "loss": 0.03951674699783325, + "loss_ce": 4.226102464599535e-05, + "loss_iou": 0.55859375, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 1151328616, + "step": 6711 + }, + { + "epoch": 76.1756373937677, + "grad_norm": 2.986152407553737, + "learning_rate": 5e-06, + "loss": 0.0546, + "num_input_tokens_seen": 1151500404, + "step": 6712 + }, + { + "epoch": 76.1756373937677, + "loss": 0.09450555592775345, + "loss_ce": 7.873508366174065e-06, + "loss_iou": 0.4453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.0947265625, + "num_input_tokens_seen": 1151500404, + "step": 6712 + }, + { + "epoch": 76.18696883852691, + "grad_norm": 2.7997377199803486, + "learning_rate": 5e-06, + "loss": 0.0329, + "num_input_tokens_seen": 1151671476, + "step": 6713 + }, + { + "epoch": 76.18696883852691, + "loss": 0.024938397109508514, + "loss_ce": 1.3164338270144071e-05, + "loss_iou": 0.41015625, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 1151671476, + "step": 6713 + }, + { + "epoch": 76.19830028328612, + "grad_norm": 2.7542865452134406, + "learning_rate": 5e-06, + "loss": 0.0364, + "num_input_tokens_seen": 1151841756, + "step": 6714 + }, + { + "epoch": 76.19830028328612, + "loss": 0.0285926666110754, + "loss_ce": 1.295272886636667e-05, + "loss_iou": 0.328125, + "loss_num": 0.005706787109375, + "loss_xval": 0.028564453125, + "num_input_tokens_seen": 1151841756, + "step": 6714 + }, + { + "epoch": 76.20963172804532, + "grad_norm": 2.850649834079787, + "learning_rate": 5e-06, + "loss": 0.0277, + "num_input_tokens_seen": 1152012624, + "step": 6715 + }, + { + "epoch": 76.20963172804532, + "loss": 0.030105112120509148, + "loss_ce": 1.4781064237467945e-05, + "loss_iou": 0.49609375, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1152012624, + "step": 6715 + }, + { + "epoch": 76.22096317280453, + "grad_norm": 2.7709607289535336, + "learning_rate": 5e-06, + "loss": 0.0336, + "num_input_tokens_seen": 1152182652, + "step": 6716 + }, + { + "epoch": 76.22096317280453, + "loss": 0.03405708819627762, + "loss_ce": 1.4731094779563136e-05, + "loss_iou": 0.310546875, + "loss_num": 0.006805419921875, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1152182652, + "step": 6716 + }, + { + "epoch": 76.23229461756374, + "grad_norm": 2.7755466240660986, + "learning_rate": 5e-06, + "loss": 0.0366, + "num_input_tokens_seen": 1152352324, + "step": 6717 + }, + { + "epoch": 76.23229461756374, + "loss": 0.053192902356386185, + "loss_ce": 1.602433985681273e-05, + "loss_iou": 0.54296875, + "loss_num": 0.0106201171875, + "loss_xval": 0.05322265625, + "num_input_tokens_seen": 1152352324, + "step": 6717 + }, + { + "epoch": 76.24362606232295, + "grad_norm": 2.6765404393720997, + "learning_rate": 5e-06, + "loss": 0.0572, + "num_input_tokens_seen": 1152524428, + "step": 6718 + }, + { + "epoch": 76.24362606232295, + "loss": 0.025339476764202118, + "loss_ce": 4.040456406073645e-05, + "loss_iou": 0.44140625, + "loss_num": 0.00506591796875, + "loss_xval": 0.0252685546875, + "num_input_tokens_seen": 1152524428, + "step": 6718 + }, + { + "epoch": 76.25495750708215, + "grad_norm": 2.778012442088198, + "learning_rate": 5e-06, + "loss": 0.0663, + "num_input_tokens_seen": 1152695868, + "step": 6719 + }, + { + "epoch": 76.25495750708215, + "loss": 0.04451717436313629, + "loss_ce": 2.254739956697449e-05, + "loss_iou": 0.5078125, + "loss_num": 0.0089111328125, + "loss_xval": 0.04443359375, + "num_input_tokens_seen": 1152695868, + "step": 6719 + }, + { + "epoch": 76.26628895184136, + "grad_norm": 2.581844634505008, + "learning_rate": 5e-06, + "loss": 0.0357, + "num_input_tokens_seen": 1152867256, + "step": 6720 + }, + { + "epoch": 76.26628895184136, + "loss": 0.03184906765818596, + "loss_ce": 1.9235512809245847e-05, + "loss_iou": 0.423828125, + "loss_num": 0.006378173828125, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 1152867256, + "step": 6720 + }, + { + "epoch": 76.27762039660057, + "grad_norm": 2.959147014006936, + "learning_rate": 5e-06, + "loss": 0.0285, + "num_input_tokens_seen": 1153039436, + "step": 6721 + }, + { + "epoch": 76.27762039660057, + "loss": 0.023907314985990524, + "loss_ce": 1.205211810884066e-05, + "loss_iou": 0.439453125, + "loss_num": 0.004791259765625, + "loss_xval": 0.02392578125, + "num_input_tokens_seen": 1153039436, + "step": 6721 + }, + { + "epoch": 76.28895184135978, + "grad_norm": 3.259455427741046, + "learning_rate": 5e-06, + "loss": 0.0409, + "num_input_tokens_seen": 1153210448, + "step": 6722 + }, + { + "epoch": 76.28895184135978, + "loss": 0.02499602735042572, + "loss_ce": 1.7390973880537786e-05, + "loss_iou": 0.357421875, + "loss_num": 0.0050048828125, + "loss_xval": 0.0250244140625, + "num_input_tokens_seen": 1153210448, + "step": 6722 + }, + { + "epoch": 76.30028328611898, + "grad_norm": 3.7055552445953435, + "learning_rate": 5e-06, + "loss": 0.0539, + "num_input_tokens_seen": 1153381396, + "step": 6723 + }, + { + "epoch": 76.30028328611898, + "loss": 0.039533935487270355, + "loss_ce": 2.8930055123055354e-05, + "loss_iou": 0.291015625, + "loss_num": 0.00787353515625, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 1153381396, + "step": 6723 + }, + { + "epoch": 76.31161473087819, + "grad_norm": 2.980235199301115, + "learning_rate": 5e-06, + "loss": 0.0324, + "num_input_tokens_seen": 1153552208, + "step": 6724 + }, + { + "epoch": 76.31161473087819, + "loss": 0.03134167566895485, + "loss_ce": 1.5381949197035283e-05, + "loss_iou": 0.3125, + "loss_num": 0.00628662109375, + "loss_xval": 0.03125, + "num_input_tokens_seen": 1153552208, + "step": 6724 + }, + { + "epoch": 76.3229461756374, + "grad_norm": 3.451790865187317, + "learning_rate": 5e-06, + "loss": 0.0425, + "num_input_tokens_seen": 1153723456, + "step": 6725 + }, + { + "epoch": 76.3229461756374, + "loss": 0.03680173307657242, + "loss_ce": 2.8052258130628616e-05, + "loss_iou": 0.26953125, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1153723456, + "step": 6725 + }, + { + "epoch": 76.3342776203966, + "grad_norm": 3.882913298644648, + "learning_rate": 5e-06, + "loss": 0.0454, + "num_input_tokens_seen": 1153895360, + "step": 6726 + }, + { + "epoch": 76.3342776203966, + "loss": 0.06896818429231644, + "loss_ce": 1.3716375178773887e-05, + "loss_iou": 0.3515625, + "loss_num": 0.0137939453125, + "loss_xval": 0.06884765625, + "num_input_tokens_seen": 1153895360, + "step": 6726 + }, + { + "epoch": 76.34560906515581, + "grad_norm": 3.5824310438613276, + "learning_rate": 5e-06, + "loss": 0.0313, + "num_input_tokens_seen": 1154065480, + "step": 6727 + }, + { + "epoch": 76.34560906515581, + "loss": 0.031630292534828186, + "loss_ce": 1.4083432688494213e-05, + "loss_iou": 0.29296875, + "loss_num": 0.006317138671875, + "loss_xval": 0.03173828125, + "num_input_tokens_seen": 1154065480, + "step": 6727 + }, + { + "epoch": 76.35694050991502, + "grad_norm": 3.5855519577328705, + "learning_rate": 5e-06, + "loss": 0.0349, + "num_input_tokens_seen": 1154237336, + "step": 6728 + }, + { + "epoch": 76.35694050991502, + "loss": 0.03963451087474823, + "loss_ce": 2.2692787752021104e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0079345703125, + "loss_xval": 0.03955078125, + "num_input_tokens_seen": 1154237336, + "step": 6728 + }, + { + "epoch": 76.36827195467423, + "grad_norm": 3.107590296644265, + "learning_rate": 5e-06, + "loss": 0.036, + "num_input_tokens_seen": 1154408496, + "step": 6729 + }, + { + "epoch": 76.36827195467423, + "loss": 0.037957750260829926, + "loss_ce": 2.4401826522080228e-05, + "loss_iou": 0.3828125, + "loss_num": 0.007568359375, + "loss_xval": 0.037841796875, + "num_input_tokens_seen": 1154408496, + "step": 6729 + }, + { + "epoch": 76.37960339943342, + "grad_norm": 4.057694296249576, + "learning_rate": 5e-06, + "loss": 0.0807, + "num_input_tokens_seen": 1154580376, + "step": 6730 + }, + { + "epoch": 76.37960339943342, + "loss": 0.050042614340782166, + "loss_ce": 2.4300499717355706e-05, + "loss_iou": 0.4453125, + "loss_num": 0.010009765625, + "loss_xval": 0.050048828125, + "num_input_tokens_seen": 1154580376, + "step": 6730 + }, + { + "epoch": 76.39093484419263, + "grad_norm": 3.8431018046852685, + "learning_rate": 5e-06, + "loss": 0.0601, + "num_input_tokens_seen": 1154749188, + "step": 6731 + }, + { + "epoch": 76.39093484419263, + "loss": 0.04194425791501999, + "loss_ce": 0.005170574877411127, + "loss_iou": 0.458984375, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1154749188, + "step": 6731 + }, + { + "epoch": 76.40226628895184, + "grad_norm": 3.222343304965176, + "learning_rate": 5e-06, + "loss": 0.0343, + "num_input_tokens_seen": 1154920744, + "step": 6732 + }, + { + "epoch": 76.40226628895184, + "loss": 0.0338994562625885, + "loss_ce": 2.494702857802622e-05, + "loss_iou": 0.447265625, + "loss_num": 0.00677490234375, + "loss_xval": 0.033935546875, + "num_input_tokens_seen": 1154920744, + "step": 6732 + }, + { + "epoch": 76.41359773371104, + "grad_norm": 3.254521229914388, + "learning_rate": 5e-06, + "loss": 0.036, + "num_input_tokens_seen": 1155092700, + "step": 6733 + }, + { + "epoch": 76.41359773371104, + "loss": 0.04597647860646248, + "loss_ce": 4.7525096306344494e-05, + "loss_iou": 0.310546875, + "loss_num": 0.0091552734375, + "loss_xval": 0.0458984375, + "num_input_tokens_seen": 1155092700, + "step": 6733 + }, + { + "epoch": 76.42492917847025, + "grad_norm": 3.5812796985408597, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 1155264796, + "step": 6734 + }, + { + "epoch": 76.42492917847025, + "loss": 0.04866945743560791, + "loss_ce": 2.444002529955469e-05, + "loss_iou": 0.439453125, + "loss_num": 0.00970458984375, + "loss_xval": 0.048583984375, + "num_input_tokens_seen": 1155264796, + "step": 6734 + }, + { + "epoch": 76.43626062322946, + "grad_norm": 3.1875034200454926, + "learning_rate": 5e-06, + "loss": 0.0482, + "num_input_tokens_seen": 1155436384, + "step": 6735 + }, + { + "epoch": 76.43626062322946, + "loss": 0.06429353356361389, + "loss_ce": 2.3516251530963928e-05, + "loss_iou": 0.390625, + "loss_num": 0.0128173828125, + "loss_xval": 0.064453125, + "num_input_tokens_seen": 1155436384, + "step": 6735 + }, + { + "epoch": 76.44759206798867, + "grad_norm": 3.1861560871577215, + "learning_rate": 5e-06, + "loss": 0.0322, + "num_input_tokens_seen": 1155607288, + "step": 6736 + }, + { + "epoch": 76.44759206798867, + "loss": 0.030070578679442406, + "loss_ce": 4.1281000449089333e-05, + "loss_iou": 0.314453125, + "loss_num": 0.0059814453125, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1155607288, + "step": 6736 + }, + { + "epoch": 76.45892351274787, + "grad_norm": 3.3144382378426442, + "learning_rate": 5e-06, + "loss": 0.0432, + "num_input_tokens_seen": 1155779228, + "step": 6737 + }, + { + "epoch": 76.45892351274787, + "loss": 0.08017963916063309, + "loss_ce": 9.964962373487651e-06, + "loss_iou": 0.46484375, + "loss_num": 0.0159912109375, + "loss_xval": 0.080078125, + "num_input_tokens_seen": 1155779228, + "step": 6737 + }, + { + "epoch": 76.47025495750708, + "grad_norm": 3.383780044199627, + "learning_rate": 5e-06, + "loss": 0.0469, + "num_input_tokens_seen": 1155950196, + "step": 6738 + }, + { + "epoch": 76.47025495750708, + "loss": 0.04567316919565201, + "loss_ce": 0.00014094336074776947, + "loss_iou": 0.451171875, + "loss_num": 0.00909423828125, + "loss_xval": 0.04541015625, + "num_input_tokens_seen": 1155950196, + "step": 6738 + }, + { + "epoch": 76.48158640226629, + "grad_norm": 3.622071716453547, + "learning_rate": 5e-06, + "loss": 0.0571, + "num_input_tokens_seen": 1156121144, + "step": 6739 + }, + { + "epoch": 76.48158640226629, + "loss": 0.05220004916191101, + "loss_ce": 0.0009152608108706772, + "loss_iou": 0.365234375, + "loss_num": 0.01025390625, + "loss_xval": 0.05126953125, + "num_input_tokens_seen": 1156121144, + "step": 6739 + }, + { + "epoch": 76.4929178470255, + "grad_norm": 3.4324059357214742, + "learning_rate": 5e-06, + "loss": 0.0527, + "num_input_tokens_seen": 1156292092, + "step": 6740 + }, + { + "epoch": 76.4929178470255, + "loss": 0.05812738835811615, + "loss_ce": 8.295338193420321e-05, + "loss_iou": 0.4296875, + "loss_num": 0.0115966796875, + "loss_xval": 0.05810546875, + "num_input_tokens_seen": 1156292092, + "step": 6740 + }, + { + "epoch": 76.5042492917847, + "grad_norm": 3.112818253550665, + "learning_rate": 5e-06, + "loss": 0.0369, + "num_input_tokens_seen": 1156463808, + "step": 6741 + }, + { + "epoch": 76.5042492917847, + "loss": 0.03158782422542572, + "loss_ce": 1.7389553249813616e-05, + "loss_iou": 0.5703125, + "loss_num": 0.006317138671875, + "loss_xval": 0.031494140625, + "num_input_tokens_seen": 1156463808, + "step": 6741 + }, + { + "epoch": 76.51558073654391, + "grad_norm": 2.4148675084999134, + "learning_rate": 5e-06, + "loss": 0.0415, + "num_input_tokens_seen": 1156635544, + "step": 6742 + }, + { + "epoch": 76.51558073654391, + "loss": 0.036333635449409485, + "loss_ce": 5.586558472714387e-05, + "loss_iou": 0.375, + "loss_num": 0.007232666015625, + "loss_xval": 0.036376953125, + "num_input_tokens_seen": 1156635544, + "step": 6742 + }, + { + "epoch": 76.52691218130312, + "grad_norm": 2.1242609993958737, + "learning_rate": 5e-06, + "loss": 0.0432, + "num_input_tokens_seen": 1156807580, + "step": 6743 + }, + { + "epoch": 76.52691218130312, + "loss": 0.03686174005270004, + "loss_ce": 7.279974670382217e-05, + "loss_iou": 0.490234375, + "loss_num": 0.007354736328125, + "loss_xval": 0.036865234375, + "num_input_tokens_seen": 1156807580, + "step": 6743 + }, + { + "epoch": 76.53824362606233, + "grad_norm": 3.1722154420360895, + "learning_rate": 5e-06, + "loss": 0.0363, + "num_input_tokens_seen": 1156978424, + "step": 6744 + }, + { + "epoch": 76.53824362606233, + "loss": 0.040512923151254654, + "loss_ce": 7.713256491115317e-05, + "loss_iou": 0.3125, + "loss_num": 0.00811767578125, + "loss_xval": 0.04052734375, + "num_input_tokens_seen": 1156978424, + "step": 6744 + }, + { + "epoch": 76.54957507082153, + "grad_norm": 2.7252823904909125, + "learning_rate": 5e-06, + "loss": 0.0488, + "num_input_tokens_seen": 1157150032, + "step": 6745 + }, + { + "epoch": 76.54957507082153, + "loss": 0.047656770795583725, + "loss_ce": 1.1202017958567012e-05, + "loss_iou": 0.296875, + "loss_num": 0.009521484375, + "loss_xval": 0.047607421875, + "num_input_tokens_seen": 1157150032, + "step": 6745 + }, + { + "epoch": 76.56090651558074, + "grad_norm": 3.6473200451005217, + "learning_rate": 5e-06, + "loss": 0.032, + "num_input_tokens_seen": 1157322088, + "step": 6746 + }, + { + "epoch": 76.56090651558074, + "loss": 0.024936577305197716, + "loss_ce": 3.423343878239393e-05, + "loss_iou": 0.44140625, + "loss_num": 0.004974365234375, + "loss_xval": 0.02490234375, + "num_input_tokens_seen": 1157322088, + "step": 6746 + }, + { + "epoch": 76.57223796033995, + "grad_norm": 3.2244182499202094, + "learning_rate": 5e-06, + "loss": 0.0475, + "num_input_tokens_seen": 1157493908, + "step": 6747 + }, + { + "epoch": 76.57223796033995, + "loss": 0.04200422018766403, + "loss_ce": 1.2032525773975067e-05, + "loss_iou": 0.384765625, + "loss_num": 0.0084228515625, + "loss_xval": 0.0419921875, + "num_input_tokens_seen": 1157493908, + "step": 6747 + }, + { + "epoch": 76.58356940509915, + "grad_norm": 2.812770553290911, + "learning_rate": 5e-06, + "loss": 0.0367, + "num_input_tokens_seen": 1157664756, + "step": 6748 + }, + { + "epoch": 76.58356940509915, + "loss": 0.026366740465164185, + "loss_ce": 1.4810613720328547e-05, + "loss_iou": 0.384765625, + "loss_num": 0.005279541015625, + "loss_xval": 0.0263671875, + "num_input_tokens_seen": 1157664756, + "step": 6748 + }, + { + "epoch": 76.59490084985836, + "grad_norm": 4.0153110644259264, + "learning_rate": 5e-06, + "loss": 0.0562, + "num_input_tokens_seen": 1157836356, + "step": 6749 + }, + { + "epoch": 76.59490084985836, + "loss": 0.03164807707071304, + "loss_ce": 0.001649297191761434, + "loss_iou": 0.41796875, + "loss_num": 0.006011962890625, + "loss_xval": 0.030029296875, + "num_input_tokens_seen": 1157836356, + "step": 6749 + }, + { + "epoch": 76.60623229461757, + "grad_norm": 3.387376251531648, + "learning_rate": 5e-06, + "loss": 0.0286, + "num_input_tokens_seen": 1158008468, + "step": 6750 + }, + { + "epoch": 76.60623229461757, + "eval_seeclick_CIoU": 0.5175180286169052, + "eval_seeclick_GIoU": 0.5201136469841003, + "eval_seeclick_IoU": 0.5568788349628448, + "eval_seeclick_MAE_all": 0.06802032887935638, + "eval_seeclick_MAE_h": 0.03438388183712959, + "eval_seeclick_MAE_w": 0.10334005951881409, + "eval_seeclick_MAE_x": 0.10081024095416069, + "eval_seeclick_MAE_y": 0.033547136932611465, + "eval_seeclick_NUM_probability": 0.9999966025352478, + "eval_seeclick_inside_bbox": 0.9076704680919647, + "eval_seeclick_loss": 1.0442501306533813, + "eval_seeclick_loss_ce": 0.7959894239902496, + "eval_seeclick_loss_iou": 0.5408935546875, + "eval_seeclick_loss_num": 0.050487518310546875, + "eval_seeclick_loss_xval": 0.252197265625, + "eval_seeclick_runtime": 73.7402, + "eval_seeclick_samples_per_second": 0.583, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 1158008468, + "step": 6750 + }, + { + "epoch": 76.60623229461757, + "eval_icons_CIoU": 0.7266184091567993, + "eval_icons_GIoU": 0.7282552719116211, + "eval_icons_IoU": 0.7413917481899261, + "eval_icons_MAE_all": 0.035040028393268585, + "eval_icons_MAE_h": 0.02840566076338291, + "eval_icons_MAE_w": 0.04360402841120958, + "eval_icons_MAE_x": 0.03670475818216801, + "eval_icons_MAE_y": 0.03144566807895899, + "eval_icons_NUM_probability": 0.9998024106025696, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 0.13592630624771118, + "eval_icons_loss_ce": 0.005310217617079616, + "eval_icons_loss_iou": 0.569580078125, + "eval_icons_loss_num": 0.02378082275390625, + "eval_icons_loss_xval": 0.1189117431640625, + "eval_icons_runtime": 84.2962, + "eval_icons_samples_per_second": 0.593, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 1158008468, + "step": 6750 + }, + { + "epoch": 76.60623229461757, + "eval_screenspot_CIoU": 0.6396422783533732, + "eval_screenspot_GIoU": 0.6394848624865214, + "eval_screenspot_IoU": 0.6678770184516907, + "eval_screenspot_MAE_all": 0.06549978007872899, + "eval_screenspot_MAE_h": 0.0410217580695947, + "eval_screenspot_MAE_w": 0.11402194077769916, + "eval_screenspot_MAE_x": 0.073229289924105, + "eval_screenspot_MAE_y": 0.03372615793099006, + "eval_screenspot_NUM_probability": 0.9999787012736002, + "eval_screenspot_inside_bbox": 0.9287499984105428, + "eval_screenspot_loss": 0.277182012796402, + "eval_screenspot_loss_ce": 0.015680634416639805, + "eval_screenspot_loss_iou": 0.49853515625, + "eval_screenspot_loss_num": 0.051724751790364586, + "eval_screenspot_loss_xval": 0.258544921875, + "eval_screenspot_runtime": 148.4232, + "eval_screenspot_samples_per_second": 0.6, + "eval_screenspot_steps_per_second": 0.02, + "num_input_tokens_seen": 1158008468, + "step": 6750 + }, + { + "epoch": 76.60623229461757, + "eval_compot_CIoU": 0.8883132338523865, + "eval_compot_GIoU": 0.8871381878852844, + "eval_compot_IoU": 0.8967113792896271, + "eval_compot_MAE_all": 0.017100587021559477, + "eval_compot_MAE_h": 0.00995214655995369, + "eval_compot_MAE_w": 0.02563900128006935, + "eval_compot_MAE_x": 0.023243038915097713, + "eval_compot_MAE_y": 0.009568159701302648, + "eval_compot_NUM_probability": 0.9999822676181793, + "eval_compot_inside_bbox": 0.96875, + "eval_compot_loss": 0.06645932793617249, + "eval_compot_loss_ce": 1.2750736914313165e-05, + "eval_compot_loss_iou": 0.4945068359375, + "eval_compot_loss_num": 0.011624336242675781, + "eval_compot_loss_xval": 0.05809783935546875, + "eval_compot_runtime": 84.7839, + "eval_compot_samples_per_second": 0.59, + "eval_compot_steps_per_second": 0.024, + "num_input_tokens_seen": 1158008468, + "step": 6750 + }, + { + "epoch": 76.60623229461757, + "eval_custom_ui_MAE_all": 0.019065924920141697, + "eval_custom_ui_MAE_x": 0.03074595332145691, + "eval_custom_ui_MAE_y": 0.007385896518826485, + "eval_custom_ui_NUM_probability": 0.9998641610145569, + "eval_custom_ui_loss": 0.21941247582435608, + "eval_custom_ui_loss_ce": 0.12301512807607651, + "eval_custom_ui_loss_num": 0.0182952880859375, + "eval_custom_ui_loss_xval": 0.0915069580078125, + "eval_custom_ui_runtime": 65.79, + "eval_custom_ui_samples_per_second": 0.76, + "eval_custom_ui_steps_per_second": 0.03, + "num_input_tokens_seen": 1158008468, + "step": 6750 + } + ], + "logging_steps": 1.0, + "max_steps": 8096, + "num_input_tokens_seen": 1158008468, + "num_train_epochs": 92, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8354828616859648.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}