{ "best_metric": 0.8672510958046337, "best_model_checkpoint": "convnextv2-tiny-1k-224-finetuned-galaxy10-decals/checkpoint-785", "epoch": 9.977728285077951, "eval_steps": 500, "global_step": 1120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08908685968819599, "grad_norm": 2.9069173336029053, "learning_rate": 4.464285714285715e-06, "loss": 2.3159, "step": 10 }, { "epoch": 0.17817371937639198, "grad_norm": 4.669096946716309, "learning_rate": 8.92857142857143e-06, "loss": 2.2657, "step": 20 }, { "epoch": 0.267260579064588, "grad_norm": 5.892533302307129, "learning_rate": 1.3392857142857144e-05, "loss": 2.1852, "step": 30 }, { "epoch": 0.35634743875278396, "grad_norm": 4.657855033874512, "learning_rate": 1.785714285714286e-05, "loss": 2.0843, "step": 40 }, { "epoch": 0.44543429844098, "grad_norm": 4.414278030395508, "learning_rate": 2.2321428571428575e-05, "loss": 1.9644, "step": 50 }, { "epoch": 0.534521158129176, "grad_norm": 6.150153636932373, "learning_rate": 2.6785714285714288e-05, "loss": 1.7921, "step": 60 }, { "epoch": 0.623608017817372, "grad_norm": 7.54302978515625, "learning_rate": 3.125e-05, "loss": 1.5743, "step": 70 }, { "epoch": 0.7126948775055679, "grad_norm": 11.624669075012207, "learning_rate": 3.571428571428572e-05, "loss": 1.42, "step": 80 }, { "epoch": 0.8017817371937639, "grad_norm": 11.175118446350098, "learning_rate": 4.017857142857143e-05, "loss": 1.301, "step": 90 }, { "epoch": 0.89086859688196, "grad_norm": 13.484392166137695, "learning_rate": 4.464285714285715e-05, "loss": 1.2246, "step": 100 }, { "epoch": 0.9799554565701559, "grad_norm": 22.607799530029297, "learning_rate": 4.910714285714286e-05, "loss": 1.0664, "step": 110 }, { "epoch": 0.9977728285077951, "eval_accuracy": 0.6725109580463369, "eval_loss": 0.9818494915962219, "eval_runtime": 12.2808, "eval_samples_per_second": 130.041, "eval_steps_per_second": 4.071, "step": 112 }, { "epoch": 1.069042316258352, "grad_norm": 11.895453453063965, "learning_rate": 4.960317460317461e-05, "loss": 1.0415, "step": 120 }, { "epoch": 1.158129175946548, "grad_norm": 15.956673622131348, "learning_rate": 4.910714285714286e-05, "loss": 1.0021, "step": 130 }, { "epoch": 1.247216035634744, "grad_norm": 12.564276695251465, "learning_rate": 4.8611111111111115e-05, "loss": 0.9518, "step": 140 }, { "epoch": 1.3363028953229399, "grad_norm": 15.347941398620605, "learning_rate": 4.811507936507937e-05, "loss": 0.9316, "step": 150 }, { "epoch": 1.4253897550111359, "grad_norm": 13.298521041870117, "learning_rate": 4.761904761904762e-05, "loss": 0.8316, "step": 160 }, { "epoch": 1.5144766146993318, "grad_norm": 12.444368362426758, "learning_rate": 4.7123015873015876e-05, "loss": 0.8863, "step": 170 }, { "epoch": 1.6035634743875278, "grad_norm": 10.18455982208252, "learning_rate": 4.662698412698413e-05, "loss": 0.8304, "step": 180 }, { "epoch": 1.692650334075724, "grad_norm": 13.635807991027832, "learning_rate": 4.613095238095239e-05, "loss": 0.7748, "step": 190 }, { "epoch": 1.7817371937639197, "grad_norm": 9.778557777404785, "learning_rate": 4.563492063492064e-05, "loss": 0.7561, "step": 200 }, { "epoch": 1.8708240534521159, "grad_norm": 16.08139419555664, "learning_rate": 4.5138888888888894e-05, "loss": 0.8051, "step": 210 }, { "epoch": 1.9599109131403119, "grad_norm": 12.761980056762695, "learning_rate": 4.464285714285715e-05, "loss": 0.8019, "step": 220 }, { "epoch": 1.9955456570155903, "eval_accuracy": 0.7989981214777708, "eval_loss": 0.6332681775093079, "eval_runtime": 12.3035, "eval_samples_per_second": 129.8, "eval_steps_per_second": 4.064, "step": 224 }, { "epoch": 2.048997772828508, "grad_norm": 10.136615753173828, "learning_rate": 4.41468253968254e-05, "loss": 0.7221, "step": 230 }, { "epoch": 2.138084632516704, "grad_norm": 13.145553588867188, "learning_rate": 4.3650793650793655e-05, "loss": 0.6877, "step": 240 }, { "epoch": 2.2271714922048997, "grad_norm": 14.941648483276367, "learning_rate": 4.315476190476191e-05, "loss": 0.7462, "step": 250 }, { "epoch": 2.316258351893096, "grad_norm": 9.842031478881836, "learning_rate": 4.265873015873016e-05, "loss": 0.6886, "step": 260 }, { "epoch": 2.4053452115812917, "grad_norm": 11.978471755981445, "learning_rate": 4.2162698412698416e-05, "loss": 0.6843, "step": 270 }, { "epoch": 2.494432071269488, "grad_norm": 12.24606990814209, "learning_rate": 4.166666666666667e-05, "loss": 0.7038, "step": 280 }, { "epoch": 2.5835189309576836, "grad_norm": 12.357331275939941, "learning_rate": 4.117063492063492e-05, "loss": 0.696, "step": 290 }, { "epoch": 2.6726057906458798, "grad_norm": 16.765913009643555, "learning_rate": 4.067460317460318e-05, "loss": 0.7174, "step": 300 }, { "epoch": 2.7616926503340755, "grad_norm": 13.57442569732666, "learning_rate": 4.017857142857143e-05, "loss": 0.7305, "step": 310 }, { "epoch": 2.8507795100222717, "grad_norm": 13.48181438446045, "learning_rate": 3.968253968253968e-05, "loss": 0.6957, "step": 320 }, { "epoch": 2.939866369710468, "grad_norm": 9.809232711791992, "learning_rate": 3.918650793650794e-05, "loss": 0.6524, "step": 330 }, { "epoch": 2.9933184855233854, "eval_accuracy": 0.8340638697557922, "eval_loss": 0.5247990489006042, "eval_runtime": 12.2883, "eval_samples_per_second": 129.961, "eval_steps_per_second": 4.069, "step": 336 }, { "epoch": 3.0289532293986636, "grad_norm": 16.003482818603516, "learning_rate": 3.8690476190476195e-05, "loss": 0.6928, "step": 340 }, { "epoch": 3.11804008908686, "grad_norm": 23.448598861694336, "learning_rate": 3.8194444444444444e-05, "loss": 0.6696, "step": 350 }, { "epoch": 3.2071269487750556, "grad_norm": 12.516254425048828, "learning_rate": 3.76984126984127e-05, "loss": 0.6665, "step": 360 }, { "epoch": 3.2962138084632517, "grad_norm": 13.503238677978516, "learning_rate": 3.7202380952380956e-05, "loss": 0.6189, "step": 370 }, { "epoch": 3.3853006681514475, "grad_norm": 14.721902847290039, "learning_rate": 3.6706349206349205e-05, "loss": 0.6405, "step": 380 }, { "epoch": 3.4743875278396437, "grad_norm": 11.428637504577637, "learning_rate": 3.621031746031746e-05, "loss": 0.6502, "step": 390 }, { "epoch": 3.5634743875278394, "grad_norm": 8.628026008605957, "learning_rate": 3.571428571428572e-05, "loss": 0.6335, "step": 400 }, { "epoch": 3.6525612472160356, "grad_norm": 12.637211799621582, "learning_rate": 3.521825396825397e-05, "loss": 0.6457, "step": 410 }, { "epoch": 3.7416481069042318, "grad_norm": 13.72917652130127, "learning_rate": 3.472222222222222e-05, "loss": 0.6338, "step": 420 }, { "epoch": 3.8307349665924275, "grad_norm": 14.159635543823242, "learning_rate": 3.422619047619048e-05, "loss": 0.6318, "step": 430 }, { "epoch": 3.9198218262806237, "grad_norm": 12.676724433898926, "learning_rate": 3.3730158730158734e-05, "loss": 0.6339, "step": 440 }, { "epoch": 4.0, "eval_accuracy": 0.8447088290544772, "eval_loss": 0.4730662703514099, "eval_runtime": 12.4926, "eval_samples_per_second": 127.835, "eval_steps_per_second": 4.002, "step": 449 }, { "epoch": 4.008908685968819, "grad_norm": 11.317361831665039, "learning_rate": 3.3234126984126983e-05, "loss": 0.613, "step": 450 }, { "epoch": 4.097995545657016, "grad_norm": 14.402922630310059, "learning_rate": 3.273809523809524e-05, "loss": 0.6124, "step": 460 }, { "epoch": 4.187082405345212, "grad_norm": 9.939033508300781, "learning_rate": 3.2242063492063495e-05, "loss": 0.5868, "step": 470 }, { "epoch": 4.276169265033408, "grad_norm": 10.611005783081055, "learning_rate": 3.1746031746031745e-05, "loss": 0.5786, "step": 480 }, { "epoch": 4.365256124721603, "grad_norm": 11.104296684265137, "learning_rate": 3.125e-05, "loss": 0.544, "step": 490 }, { "epoch": 4.4543429844097995, "grad_norm": 14.008048057556152, "learning_rate": 3.075396825396826e-05, "loss": 0.6175, "step": 500 }, { "epoch": 4.543429844097996, "grad_norm": 9.320144653320312, "learning_rate": 3.0257936507936506e-05, "loss": 0.5999, "step": 510 }, { "epoch": 4.632516703786192, "grad_norm": 9.274946212768555, "learning_rate": 2.9761904761904762e-05, "loss": 0.5709, "step": 520 }, { "epoch": 4.721603563474387, "grad_norm": 12.640064239501953, "learning_rate": 2.9265873015873018e-05, "loss": 0.6231, "step": 530 }, { "epoch": 4.810690423162583, "grad_norm": 11.968724250793457, "learning_rate": 2.876984126984127e-05, "loss": 0.6206, "step": 540 }, { "epoch": 4.8997772828507795, "grad_norm": 11.681157112121582, "learning_rate": 2.8273809523809523e-05, "loss": 0.6031, "step": 550 }, { "epoch": 4.988864142538976, "grad_norm": 13.320256233215332, "learning_rate": 2.777777777777778e-05, "loss": 0.5178, "step": 560 }, { "epoch": 4.997772828507795, "eval_accuracy": 0.8503443957420163, "eval_loss": 0.4537416100502014, "eval_runtime": 12.2913, "eval_samples_per_second": 129.93, "eval_steps_per_second": 4.068, "step": 561 }, { "epoch": 5.077951002227172, "grad_norm": 15.332996368408203, "learning_rate": 2.7281746031746032e-05, "loss": 0.5617, "step": 570 }, { "epoch": 5.167037861915367, "grad_norm": 14.994087219238281, "learning_rate": 2.6785714285714288e-05, "loss": 0.5797, "step": 580 }, { "epoch": 5.256124721603563, "grad_norm": 13.461969375610352, "learning_rate": 2.628968253968254e-05, "loss": 0.5524, "step": 590 }, { "epoch": 5.3452115812917596, "grad_norm": 12.29080581665039, "learning_rate": 2.5793650793650796e-05, "loss": 0.5824, "step": 600 }, { "epoch": 5.434298440979956, "grad_norm": 11.07197380065918, "learning_rate": 2.529761904761905e-05, "loss": 0.554, "step": 610 }, { "epoch": 5.523385300668151, "grad_norm": 9.797560691833496, "learning_rate": 2.4801587301587305e-05, "loss": 0.5108, "step": 620 }, { "epoch": 5.612472160356347, "grad_norm": 10.469209671020508, "learning_rate": 2.4305555555555558e-05, "loss": 0.5586, "step": 630 }, { "epoch": 5.701559020044543, "grad_norm": 13.22735595703125, "learning_rate": 2.380952380952381e-05, "loss": 0.5358, "step": 640 }, { "epoch": 5.79064587973274, "grad_norm": 8.305063247680664, "learning_rate": 2.3313492063492066e-05, "loss": 0.5295, "step": 650 }, { "epoch": 5.879732739420936, "grad_norm": 18.399051666259766, "learning_rate": 2.281746031746032e-05, "loss": 0.5442, "step": 660 }, { "epoch": 5.968819599109131, "grad_norm": 8.103595733642578, "learning_rate": 2.2321428571428575e-05, "loss": 0.5907, "step": 670 }, { "epoch": 5.99554565701559, "eval_accuracy": 0.8472135253600501, "eval_loss": 0.4555535316467285, "eval_runtime": 12.2927, "eval_samples_per_second": 129.914, "eval_steps_per_second": 4.067, "step": 673 }, { "epoch": 6.057906458797327, "grad_norm": 10.681763648986816, "learning_rate": 2.1825396825396827e-05, "loss": 0.5332, "step": 680 }, { "epoch": 6.146993318485523, "grad_norm": 10.129424095153809, "learning_rate": 2.132936507936508e-05, "loss": 0.4747, "step": 690 }, { "epoch": 6.23608017817372, "grad_norm": 16.834814071655273, "learning_rate": 2.0833333333333336e-05, "loss": 0.5576, "step": 700 }, { "epoch": 6.325167037861915, "grad_norm": 11.258397102355957, "learning_rate": 2.033730158730159e-05, "loss": 0.5063, "step": 710 }, { "epoch": 6.414253897550111, "grad_norm": 15.159914016723633, "learning_rate": 1.984126984126984e-05, "loss": 0.5385, "step": 720 }, { "epoch": 6.503340757238307, "grad_norm": 10.242027282714844, "learning_rate": 1.9345238095238097e-05, "loss": 0.5046, "step": 730 }, { "epoch": 6.5924276169265035, "grad_norm": 10.377813339233398, "learning_rate": 1.884920634920635e-05, "loss": 0.5247, "step": 740 }, { "epoch": 6.6815144766147, "grad_norm": 12.55459213256836, "learning_rate": 1.8353174603174602e-05, "loss": 0.529, "step": 750 }, { "epoch": 6.770601336302895, "grad_norm": 16.02656364440918, "learning_rate": 1.785714285714286e-05, "loss": 0.5073, "step": 760 }, { "epoch": 6.859688195991091, "grad_norm": 16.140487670898438, "learning_rate": 1.736111111111111e-05, "loss": 0.5414, "step": 770 }, { "epoch": 6.948775055679287, "grad_norm": 17.321931838989258, "learning_rate": 1.6865079365079367e-05, "loss": 0.5292, "step": 780 }, { "epoch": 6.993318485523385, "eval_accuracy": 0.8672510958046337, "eval_loss": 0.41685062646865845, "eval_runtime": 12.3673, "eval_samples_per_second": 129.131, "eval_steps_per_second": 4.043, "step": 785 }, { "epoch": 7.0378619153674835, "grad_norm": 9.479693412780762, "learning_rate": 1.636904761904762e-05, "loss": 0.4586, "step": 790 }, { "epoch": 7.12694877505568, "grad_norm": 11.711000442504883, "learning_rate": 1.5873015873015872e-05, "loss": 0.5188, "step": 800 }, { "epoch": 7.216035634743875, "grad_norm": 11.616864204406738, "learning_rate": 1.537698412698413e-05, "loss": 0.5024, "step": 810 }, { "epoch": 7.305122494432071, "grad_norm": 10.370725631713867, "learning_rate": 1.4880952380952381e-05, "loss": 0.4902, "step": 820 }, { "epoch": 7.394209354120267, "grad_norm": 14.04218864440918, "learning_rate": 1.4384920634920635e-05, "loss": 0.5149, "step": 830 }, { "epoch": 7.4832962138084635, "grad_norm": 13.194646835327148, "learning_rate": 1.388888888888889e-05, "loss": 0.5562, "step": 840 }, { "epoch": 7.57238307349666, "grad_norm": 9.960190773010254, "learning_rate": 1.3392857142857144e-05, "loss": 0.4921, "step": 850 }, { "epoch": 7.661469933184855, "grad_norm": 15.14493465423584, "learning_rate": 1.2896825396825398e-05, "loss": 0.471, "step": 860 }, { "epoch": 7.750556792873051, "grad_norm": 11.185235977172852, "learning_rate": 1.2400793650793652e-05, "loss": 0.4963, "step": 870 }, { "epoch": 7.839643652561247, "grad_norm": 12.782095909118652, "learning_rate": 1.1904761904761905e-05, "loss": 0.4915, "step": 880 }, { "epoch": 7.928730512249444, "grad_norm": 11.89919376373291, "learning_rate": 1.140873015873016e-05, "loss": 0.5017, "step": 890 }, { "epoch": 8.0, "eval_accuracy": 0.8597370068879149, "eval_loss": 0.4106651544570923, "eval_runtime": 12.3902, "eval_samples_per_second": 128.892, "eval_steps_per_second": 4.035, "step": 898 }, { "epoch": 8.017817371937639, "grad_norm": 12.601805686950684, "learning_rate": 1.0912698412698414e-05, "loss": 0.5064, "step": 900 }, { "epoch": 8.106904231625835, "grad_norm": 8.723831176757812, "learning_rate": 1.0416666666666668e-05, "loss": 0.4181, "step": 910 }, { "epoch": 8.195991091314031, "grad_norm": 12.781538963317871, "learning_rate": 9.92063492063492e-06, "loss": 0.4427, "step": 920 }, { "epoch": 8.285077951002227, "grad_norm": 12.263012886047363, "learning_rate": 9.424603174603175e-06, "loss": 0.5087, "step": 930 }, { "epoch": 8.374164810690424, "grad_norm": 17.41984748840332, "learning_rate": 8.92857142857143e-06, "loss": 0.5301, "step": 940 }, { "epoch": 8.46325167037862, "grad_norm": 10.731024742126465, "learning_rate": 8.432539682539684e-06, "loss": 0.4987, "step": 950 }, { "epoch": 8.552338530066816, "grad_norm": 15.722013473510742, "learning_rate": 7.936507936507936e-06, "loss": 0.4613, "step": 960 }, { "epoch": 8.64142538975501, "grad_norm": 11.301126480102539, "learning_rate": 7.4404761904761905e-06, "loss": 0.5136, "step": 970 }, { "epoch": 8.730512249443207, "grad_norm": 21.23493194580078, "learning_rate": 6.944444444444445e-06, "loss": 0.494, "step": 980 }, { "epoch": 8.819599109131403, "grad_norm": 10.211363792419434, "learning_rate": 6.448412698412699e-06, "loss": 0.4619, "step": 990 }, { "epoch": 8.908685968819599, "grad_norm": 12.277856826782227, "learning_rate": 5.9523809523809525e-06, "loss": 0.4361, "step": 1000 }, { "epoch": 8.997772828507795, "grad_norm": 12.28085708618164, "learning_rate": 5.456349206349207e-06, "loss": 0.4605, "step": 1010 }, { "epoch": 8.997772828507795, "eval_accuracy": 0.8634940513462742, "eval_loss": 0.40621063113212585, "eval_runtime": 12.3051, "eval_samples_per_second": 129.783, "eval_steps_per_second": 4.063, "step": 1010 }, { "epoch": 9.086859688195991, "grad_norm": 8.358485221862793, "learning_rate": 4.96031746031746e-06, "loss": 0.4403, "step": 1020 }, { "epoch": 9.175946547884188, "grad_norm": 8.35409164428711, "learning_rate": 4.464285714285715e-06, "loss": 0.4514, "step": 1030 }, { "epoch": 9.265033407572384, "grad_norm": 10.057600021362305, "learning_rate": 3.968253968253968e-06, "loss": 0.427, "step": 1040 }, { "epoch": 9.35412026726058, "grad_norm": 7.57137393951416, "learning_rate": 3.4722222222222224e-06, "loss": 0.4619, "step": 1050 }, { "epoch": 9.443207126948774, "grad_norm": 9.249728202819824, "learning_rate": 2.9761904761904763e-06, "loss": 0.4606, "step": 1060 }, { "epoch": 9.53229398663697, "grad_norm": 10.303194046020508, "learning_rate": 2.48015873015873e-06, "loss": 0.508, "step": 1070 }, { "epoch": 9.621380846325167, "grad_norm": 11.307740211486816, "learning_rate": 1.984126984126984e-06, "loss": 0.4667, "step": 1080 }, { "epoch": 9.710467706013363, "grad_norm": 11.34073543548584, "learning_rate": 1.4880952380952381e-06, "loss": 0.4803, "step": 1090 }, { "epoch": 9.799554565701559, "grad_norm": 12.684574127197266, "learning_rate": 9.92063492063492e-07, "loss": 0.4613, "step": 1100 }, { "epoch": 9.888641425389755, "grad_norm": 12.430156707763672, "learning_rate": 4.96031746031746e-07, "loss": 0.434, "step": 1110 }, { "epoch": 9.977728285077951, "grad_norm": 19.223121643066406, "learning_rate": 0.0, "loss": 0.4765, "step": 1120 }, { "epoch": 9.977728285077951, "eval_accuracy": 0.8647463994990607, "eval_loss": 0.3980247676372528, "eval_runtime": 12.312, "eval_samples_per_second": 129.711, "eval_steps_per_second": 4.061, "step": 1120 }, { "epoch": 9.977728285077951, "step": 1120, "total_flos": 3.6084187126879396e+18, "train_loss": 0.7042633635657174, "train_runtime": 2366.3213, "train_samples_per_second": 60.706, "train_steps_per_second": 0.473 } ], "logging_steps": 10, "max_steps": 1120, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.6084187126879396e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }