|
{ |
|
"best_metric": 0.6882572227399814, |
|
"best_model_checkpoint": "microsoft/swin-base-patch4-window7-224-finetuned-dsc/checkpoint-336", |
|
"epoch": 6.973977695167286, |
|
"eval_steps": 500, |
|
"global_step": 469, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14869888475836432, |
|
"grad_norm": 3.660515308380127, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"loss": 0.5513, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.29739776951672864, |
|
"grad_norm": 6.621876239776611, |
|
"learning_rate": 4.255319148936171e-06, |
|
"loss": 0.5441, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44609665427509293, |
|
"grad_norm": 3.9983763694763184, |
|
"learning_rate": 6.382978723404256e-06, |
|
"loss": 0.514, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5947955390334573, |
|
"grad_norm": 3.805126905441284, |
|
"learning_rate": 8.510638297872341e-06, |
|
"loss": 0.5214, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7434944237918215, |
|
"grad_norm": 22.76973533630371, |
|
"learning_rate": 9.928909952606636e-06, |
|
"loss": 0.5192, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8921933085501859, |
|
"grad_norm": 4.288576126098633, |
|
"learning_rate": 9.691943127962086e-06, |
|
"loss": 0.516, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9962825278810409, |
|
"eval_f1": 0.6873252562907736, |
|
"eval_loss": 0.596564769744873, |
|
"eval_runtime": 41.8919, |
|
"eval_samples_per_second": 51.227, |
|
"eval_steps_per_second": 1.623, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0408921933085502, |
|
"grad_norm": 5.175764083862305, |
|
"learning_rate": 9.454976303317538e-06, |
|
"loss": 0.5326, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1895910780669146, |
|
"grad_norm": 6.2289018630981445, |
|
"learning_rate": 9.218009478672988e-06, |
|
"loss": 0.5109, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3382899628252787, |
|
"grad_norm": 4.065168857574463, |
|
"learning_rate": 8.981042654028437e-06, |
|
"loss": 0.4875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.486988847583643, |
|
"grad_norm": 6.063797950744629, |
|
"learning_rate": 8.744075829383887e-06, |
|
"loss": 0.5308, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6356877323420074, |
|
"grad_norm": 7.436602592468262, |
|
"learning_rate": 8.507109004739337e-06, |
|
"loss": 0.5143, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.7843866171003717, |
|
"grad_norm": 6.663632869720459, |
|
"learning_rate": 8.270142180094787e-06, |
|
"loss": 0.4992, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.933085501858736, |
|
"grad_norm": 5.46168327331543, |
|
"learning_rate": 8.033175355450237e-06, |
|
"loss": 0.5235, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9925650557620818, |
|
"eval_f1": 0.684995340167754, |
|
"eval_loss": 0.5976402163505554, |
|
"eval_runtime": 42.0403, |
|
"eval_samples_per_second": 51.046, |
|
"eval_steps_per_second": 1.617, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.0817843866171004, |
|
"grad_norm": 15.794987678527832, |
|
"learning_rate": 7.796208530805689e-06, |
|
"loss": 0.5037, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.2304832713754648, |
|
"grad_norm": 5.5622968673706055, |
|
"learning_rate": 7.559241706161138e-06, |
|
"loss": 0.5009, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.379182156133829, |
|
"grad_norm": 7.298517227172852, |
|
"learning_rate": 7.322274881516588e-06, |
|
"loss": 0.4899, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.5278810408921935, |
|
"grad_norm": 8.912810325622559, |
|
"learning_rate": 7.085308056872039e-06, |
|
"loss": 0.5268, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.6765799256505574, |
|
"grad_norm": 6.775835037231445, |
|
"learning_rate": 6.848341232227489e-06, |
|
"loss": 0.5132, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.825278810408922, |
|
"grad_norm": 5.441810131072998, |
|
"learning_rate": 6.611374407582939e-06, |
|
"loss": 0.495, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.973977695167286, |
|
"grad_norm": 16.180599212646484, |
|
"learning_rate": 6.374407582938389e-06, |
|
"loss": 0.5327, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9888475836431225, |
|
"eval_f1": 0.6873252562907736, |
|
"eval_loss": 0.5961228609085083, |
|
"eval_runtime": 41.9926, |
|
"eval_samples_per_second": 51.104, |
|
"eval_steps_per_second": 1.619, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.1226765799256504, |
|
"grad_norm": 6.712996482849121, |
|
"learning_rate": 6.137440758293839e-06, |
|
"loss": 0.4957, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.2713754646840147, |
|
"grad_norm": 5.289604663848877, |
|
"learning_rate": 5.90047393364929e-06, |
|
"loss": 0.4931, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.420074349442379, |
|
"grad_norm": 5.065057754516602, |
|
"learning_rate": 5.66350710900474e-06, |
|
"loss": 0.5049, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.5687732342007434, |
|
"grad_norm": 5.3732757568359375, |
|
"learning_rate": 5.42654028436019e-06, |
|
"loss": 0.484, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.717472118959108, |
|
"grad_norm": 5.8500800132751465, |
|
"learning_rate": 5.18957345971564e-06, |
|
"loss": 0.5327, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.866171003717472, |
|
"grad_norm": 5.2958831787109375, |
|
"learning_rate": 4.952606635071091e-06, |
|
"loss": 0.5494, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.684995340167754, |
|
"eval_loss": 0.5995984673500061, |
|
"eval_runtime": 42.2701, |
|
"eval_samples_per_second": 50.769, |
|
"eval_steps_per_second": 1.609, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 4.014869888475836, |
|
"grad_norm": 5.300852298736572, |
|
"learning_rate": 4.715639810426541e-06, |
|
"loss": 0.5221, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.163568773234201, |
|
"grad_norm": 9.723475456237793, |
|
"learning_rate": 4.478672985781991e-06, |
|
"loss": 0.5011, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.312267657992565, |
|
"grad_norm": 6.0906782150268555, |
|
"learning_rate": 4.2417061611374415e-06, |
|
"loss": 0.5186, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.4609665427509295, |
|
"grad_norm": 9.154439926147461, |
|
"learning_rate": 4.004739336492891e-06, |
|
"loss": 0.5182, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.609665427509293, |
|
"grad_norm": 8.661027908325195, |
|
"learning_rate": 3.7677725118483417e-06, |
|
"loss": 0.5084, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.758364312267658, |
|
"grad_norm": 4.503110885620117, |
|
"learning_rate": 3.5308056872037916e-06, |
|
"loss": 0.5281, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.907063197026022, |
|
"grad_norm": 9.175127983093262, |
|
"learning_rate": 3.293838862559242e-06, |
|
"loss": 0.5238, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.996282527881041, |
|
"eval_f1": 0.6882572227399814, |
|
"eval_loss": 0.5969865322113037, |
|
"eval_runtime": 42.0429, |
|
"eval_samples_per_second": 51.043, |
|
"eval_steps_per_second": 1.617, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 5.055762081784387, |
|
"grad_norm": 5.675656795501709, |
|
"learning_rate": 3.0568720379146923e-06, |
|
"loss": 0.5225, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.204460966542751, |
|
"grad_norm": 8.187640190124512, |
|
"learning_rate": 2.8199052132701426e-06, |
|
"loss": 0.5238, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.353159851301116, |
|
"grad_norm": 9.604748725891113, |
|
"learning_rate": 2.5829383886255925e-06, |
|
"loss": 0.524, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.5018587360594795, |
|
"grad_norm": 10.201473236083984, |
|
"learning_rate": 2.345971563981043e-06, |
|
"loss": 0.5271, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.650557620817844, |
|
"grad_norm": 5.156704902648926, |
|
"learning_rate": 2.109004739336493e-06, |
|
"loss": 0.524, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.799256505576208, |
|
"grad_norm": 4.2334818840026855, |
|
"learning_rate": 1.8720379146919433e-06, |
|
"loss": 0.5219, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.947955390334572, |
|
"grad_norm": 10.407471656799316, |
|
"learning_rate": 1.6350710900473934e-06, |
|
"loss": 0.522, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.992565055762082, |
|
"eval_f1": 0.6812674743709226, |
|
"eval_loss": 0.6023790240287781, |
|
"eval_runtime": 41.9596, |
|
"eval_samples_per_second": 51.144, |
|
"eval_steps_per_second": 1.621, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 6.096654275092937, |
|
"grad_norm": 5.082653045654297, |
|
"learning_rate": 1.3981042654028437e-06, |
|
"loss": 0.508, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.245353159851301, |
|
"grad_norm": 5.41875696182251, |
|
"learning_rate": 1.161137440758294e-06, |
|
"loss": 0.5053, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.394052044609666, |
|
"grad_norm": 6.110033988952637, |
|
"learning_rate": 9.241706161137441e-07, |
|
"loss": 0.5157, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.5427509293680295, |
|
"grad_norm": 8.269786834716797, |
|
"learning_rate": 6.872037914691944e-07, |
|
"loss": 0.5157, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.691449814126394, |
|
"grad_norm": 6.4608588218688965, |
|
"learning_rate": 4.502369668246446e-07, |
|
"loss": 0.5176, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.840148698884758, |
|
"grad_norm": 7.629857063293457, |
|
"learning_rate": 2.132701421800948e-07, |
|
"loss": 0.5134, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.973977695167286, |
|
"eval_f1": 0.6868592730661697, |
|
"eval_loss": 0.5974318981170654, |
|
"eval_runtime": 42.3025, |
|
"eval_samples_per_second": 50.73, |
|
"eval_steps_per_second": 1.607, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 6.973977695167286, |
|
"step": 469, |
|
"total_flos": 4.691018488784044e+18, |
|
"train_loss": 0.5164371167164622, |
|
"train_runtime": 2212.4689, |
|
"train_samples_per_second": 27.152, |
|
"train_steps_per_second": 0.212 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 469, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.691018488784044e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|