|
{ |
|
"best_metric": 0.8425624321389794, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-Mid-NonMidMarket-Classification/checkpoint-453", |
|
"epoch": 9.884169884169884, |
|
"eval_steps": 500, |
|
"global_step": 640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15444015444015444, |
|
"grad_norm": 6.145582675933838, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 1.1143, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"grad_norm": 4.804866313934326, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.9049, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.46332046332046334, |
|
"grad_norm": 4.023855686187744, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 0.8321, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 5.751364707946777, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.7323, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 4.711376667022705, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 0.6296, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 5.55043363571167, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.5809, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9884169884169884, |
|
"eval_accuracy": 0.7937024972855592, |
|
"eval_loss": 0.5023894309997559, |
|
"eval_runtime": 107.4922, |
|
"eval_samples_per_second": 8.568, |
|
"eval_steps_per_second": 0.27, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 4.052460670471191, |
|
"learning_rate": 4.947916666666667e-05, |
|
"loss": 0.5564, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2355212355212355, |
|
"grad_norm": 4.5774359703063965, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.5529, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3899613899613898, |
|
"grad_norm": 7.030020713806152, |
|
"learning_rate": 4.774305555555556e-05, |
|
"loss": 0.5762, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.5444015444015444, |
|
"grad_norm": 4.231429100036621, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.5017, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.698841698841699, |
|
"grad_norm": 3.0762908458709717, |
|
"learning_rate": 4.6006944444444444e-05, |
|
"loss": 0.5173, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.8532818532818531, |
|
"grad_norm": 3.419095039367676, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 0.5326, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.9922779922779923, |
|
"eval_accuracy": 0.8132464712269273, |
|
"eval_loss": 0.4402076005935669, |
|
"eval_runtime": 106.2614, |
|
"eval_samples_per_second": 8.667, |
|
"eval_steps_per_second": 0.273, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.0077220077220077, |
|
"grad_norm": 4.168376445770264, |
|
"learning_rate": 4.4270833333333337e-05, |
|
"loss": 0.5082, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 4.488511562347412, |
|
"learning_rate": 4.340277777777778e-05, |
|
"loss": 0.4642, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.3166023166023164, |
|
"grad_norm": 3.359255313873291, |
|
"learning_rate": 4.253472222222222e-05, |
|
"loss": 0.4952, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.471042471042471, |
|
"grad_norm": 4.701355457305908, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4925, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.6254826254826256, |
|
"grad_norm": 4.631283283233643, |
|
"learning_rate": 4.0798611111111115e-05, |
|
"loss": 0.4795, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.7799227799227797, |
|
"grad_norm": 4.823609352111816, |
|
"learning_rate": 3.993055555555556e-05, |
|
"loss": 0.4878, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.9343629343629343, |
|
"grad_norm": 3.890035390853882, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 0.4626, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.9961389961389964, |
|
"eval_accuracy": 0.8284473398479913, |
|
"eval_loss": 0.4243987202644348, |
|
"eval_runtime": 107.2966, |
|
"eval_samples_per_second": 8.584, |
|
"eval_steps_per_second": 0.27, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 3.088803088803089, |
|
"grad_norm": 3.165153741836548, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 0.4434, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2432432432432434, |
|
"grad_norm": 3.860111713409424, |
|
"learning_rate": 3.7326388888888893e-05, |
|
"loss": 0.4383, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.3976833976833976, |
|
"grad_norm": 7.453273296356201, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"loss": 0.4432, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.552123552123552, |
|
"grad_norm": 5.200356483459473, |
|
"learning_rate": 3.559027777777778e-05, |
|
"loss": 0.4766, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.7065637065637067, |
|
"grad_norm": 4.4838433265686035, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.4428, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.861003861003861, |
|
"grad_norm": 3.7621071338653564, |
|
"learning_rate": 3.385416666666667e-05, |
|
"loss": 0.4778, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8273615635179153, |
|
"eval_loss": 0.42337119579315186, |
|
"eval_runtime": 106.8233, |
|
"eval_samples_per_second": 8.622, |
|
"eval_steps_per_second": 0.271, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 4.015444015444015, |
|
"grad_norm": 4.093021869659424, |
|
"learning_rate": 3.2986111111111115e-05, |
|
"loss": 0.4306, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.1698841698841695, |
|
"grad_norm": 3.669459581375122, |
|
"learning_rate": 3.211805555555556e-05, |
|
"loss": 0.4606, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.324324324324325, |
|
"grad_norm": 4.8231892585754395, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.3992, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.478764478764479, |
|
"grad_norm": 4.587674617767334, |
|
"learning_rate": 3.0381944444444444e-05, |
|
"loss": 0.4024, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.633204633204633, |
|
"grad_norm": 9.029142379760742, |
|
"learning_rate": 2.951388888888889e-05, |
|
"loss": 0.4408, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.787644787644788, |
|
"grad_norm": 3.7836337089538574, |
|
"learning_rate": 2.8645833333333333e-05, |
|
"loss": 0.4332, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.942084942084942, |
|
"grad_norm": 5.781239032745361, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.4109, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.988416988416988, |
|
"eval_accuracy": 0.8306188925081434, |
|
"eval_loss": 0.4197309613227844, |
|
"eval_runtime": 106.1724, |
|
"eval_samples_per_second": 8.675, |
|
"eval_steps_per_second": 0.273, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 5.096525096525096, |
|
"grad_norm": 4.933718204498291, |
|
"learning_rate": 2.6909722222222222e-05, |
|
"loss": 0.4144, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.250965250965251, |
|
"grad_norm": 4.699091911315918, |
|
"learning_rate": 2.604166666666667e-05, |
|
"loss": 0.4191, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.405405405405405, |
|
"grad_norm": 5.412081718444824, |
|
"learning_rate": 2.517361111111111e-05, |
|
"loss": 0.4314, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.559845559845559, |
|
"grad_norm": 3.4998362064361572, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.3793, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 3.954893112182617, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 0.3815, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.8687258687258685, |
|
"grad_norm": 4.797443866729736, |
|
"learning_rate": 2.2569444444444447e-05, |
|
"loss": 0.3764, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.992277992277993, |
|
"eval_accuracy": 0.8295331161780674, |
|
"eval_loss": 0.4095376133918762, |
|
"eval_runtime": 106.8074, |
|
"eval_samples_per_second": 8.623, |
|
"eval_steps_per_second": 0.272, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 6.023166023166024, |
|
"grad_norm": 6.05122709274292, |
|
"learning_rate": 2.170138888888889e-05, |
|
"loss": 0.3947, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.177606177606178, |
|
"grad_norm": 4.6088666915893555, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.3712, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.332046332046332, |
|
"grad_norm": 3.9029107093811035, |
|
"learning_rate": 1.996527777777778e-05, |
|
"loss": 0.359, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.486486486486487, |
|
"grad_norm": 4.032750129699707, |
|
"learning_rate": 1.9097222222222222e-05, |
|
"loss": 0.4075, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.640926640926641, |
|
"grad_norm": 4.65377140045166, |
|
"learning_rate": 1.8229166666666668e-05, |
|
"loss": 0.3921, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.795366795366795, |
|
"grad_norm": 5.404110908508301, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.3905, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.94980694980695, |
|
"grad_norm": 4.541908264160156, |
|
"learning_rate": 1.6493055555555557e-05, |
|
"loss": 0.3725, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.996138996138996, |
|
"eval_accuracy": 0.8425624321389794, |
|
"eval_loss": 0.4046495258808136, |
|
"eval_runtime": 105.8848, |
|
"eval_samples_per_second": 8.698, |
|
"eval_steps_per_second": 0.274, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 7.104247104247104, |
|
"grad_norm": 4.969923496246338, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.375, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.258687258687258, |
|
"grad_norm": 4.746829986572266, |
|
"learning_rate": 1.4756944444444445e-05, |
|
"loss": 0.3536, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.413127413127413, |
|
"grad_norm": 6.098570823669434, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.3418, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.5675675675675675, |
|
"grad_norm": 9.79119873046875, |
|
"learning_rate": 1.3020833333333334e-05, |
|
"loss": 0.3769, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.722007722007722, |
|
"grad_norm": 7.424502849578857, |
|
"learning_rate": 1.2152777777777779e-05, |
|
"loss": 0.3598, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.876447876447877, |
|
"grad_norm": 3.9304542541503906, |
|
"learning_rate": 1.1284722222222223e-05, |
|
"loss": 0.3583, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8371335504885994, |
|
"eval_loss": 0.4108859896659851, |
|
"eval_runtime": 107.4011, |
|
"eval_samples_per_second": 8.575, |
|
"eval_steps_per_second": 0.27, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 8.03088803088803, |
|
"grad_norm": 4.0118632316589355, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.36, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.185328185328185, |
|
"grad_norm": 6.711050510406494, |
|
"learning_rate": 9.548611111111111e-06, |
|
"loss": 0.3427, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.339768339768339, |
|
"grad_norm": 4.516994476318359, |
|
"learning_rate": 8.680555555555556e-06, |
|
"loss": 0.3335, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.494208494208495, |
|
"grad_norm": 7.737695217132568, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.3658, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.64864864864865, |
|
"grad_norm": 5.0886759757995605, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.3635, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.803088803088803, |
|
"grad_norm": 3.6143264770507812, |
|
"learning_rate": 6.076388888888889e-06, |
|
"loss": 0.3493, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.957528957528957, |
|
"grad_norm": 4.734116554260254, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 0.3451, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.988416988416988, |
|
"eval_accuracy": 0.8349619978284474, |
|
"eval_loss": 0.4170722961425781, |
|
"eval_runtime": 108.6012, |
|
"eval_samples_per_second": 8.481, |
|
"eval_steps_per_second": 0.267, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 9.111969111969112, |
|
"grad_norm": 4.281556129455566, |
|
"learning_rate": 4.340277777777778e-06, |
|
"loss": 0.3268, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.266409266409266, |
|
"grad_norm": 3.9609580039978027, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.371, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.420849420849422, |
|
"grad_norm": 5.559605598449707, |
|
"learning_rate": 2.604166666666667e-06, |
|
"loss": 0.3446, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.575289575289576, |
|
"grad_norm": 4.212625980377197, |
|
"learning_rate": 1.7361111111111112e-06, |
|
"loss": 0.3206, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.72972972972973, |
|
"grad_norm": 3.9171366691589355, |
|
"learning_rate": 8.680555555555556e-07, |
|
"loss": 0.3654, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.884169884169884, |
|
"grad_norm": 5.055610179901123, |
|
"learning_rate": 0.0, |
|
"loss": 0.3351, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.884169884169884, |
|
"eval_accuracy": 0.8403908794788274, |
|
"eval_loss": 0.41527804732322693, |
|
"eval_runtime": 106.36, |
|
"eval_samples_per_second": 8.659, |
|
"eval_steps_per_second": 0.273, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.884169884169884, |
|
"step": 640, |
|
"total_flos": 2.0360358039744737e+18, |
|
"train_loss": 0.45154881179332734, |
|
"train_runtime": 9581.2043, |
|
"train_samples_per_second": 8.649, |
|
"train_steps_per_second": 0.067 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0360358039744737e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|