|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"best_supernet_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 41175, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.06, |
|
"learning_rate": 2.9989346402731773e-05, |
|
"loss": 5.5867, |
|
"step": 500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.12, |
|
"learning_rate": 2.9956882021999416e-05, |
|
"loss": 3.1338, |
|
"step": 1000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.18, |
|
"learning_rate": 2.990265252786665e-05, |
|
"loss": 2.5776, |
|
"step": 1500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.24, |
|
"learning_rate": 2.982673683458095e-05, |
|
"loss": 2.3185, |
|
"step": 2000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.3, |
|
"learning_rate": 2.972924541394191e-05, |
|
"loss": 2.0948, |
|
"step": 2500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.36, |
|
"learning_rate": 2.9610320134543718e-05, |
|
"loss": 1.9616, |
|
"step": 3000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.43, |
|
"learning_rate": 2.9470134055329297e-05, |
|
"loss": 1.8149, |
|
"step": 3500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.49, |
|
"learning_rate": 2.9308891173756593e-05, |
|
"loss": 1.7187, |
|
"step": 4000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.55, |
|
"learning_rate": 2.9126826128943387e-05, |
|
"loss": 1.6408, |
|
"step": 4500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.61, |
|
"learning_rate": 2.892420386022268e-05, |
|
"loss": 1.5817, |
|
"step": 5000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.67, |
|
"learning_rate": 2.8701319221605467e-05, |
|
"loss": 1.4504, |
|
"step": 5500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.73, |
|
"learning_rate": 2.8458496552711964e-05, |
|
"loss": 1.4507, |
|
"step": 6000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.79, |
|
"learning_rate": 2.819608920679567e-05, |
|
"loss": 1.4081, |
|
"step": 6500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.85, |
|
"learning_rate": 2.7915061153522062e-05, |
|
"loss": 1.3659, |
|
"step": 7000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.91, |
|
"learning_rate": 2.7614695114198718e-05, |
|
"loss": 1.3084, |
|
"step": 7500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 0.97, |
|
"learning_rate": 2.7295972289733377e-05, |
|
"loss": 1.2903, |
|
"step": 8000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", |
|
"epoch": 1.0, |
|
"eval_HasAns_exact": 67.15587044534414, |
|
"eval_HasAns_f1": 73.70381219321077, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 76.23212783851976, |
|
"eval_NoAns_f1": 76.23212783851976, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 71.70049692579803, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 74.96978006244028, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 71.70049692579803, |
|
"eval_f1": 74.96978006244046, |
|
"eval_runtime": 23.6796, |
|
"eval_samples_per_second": 512.425, |
|
"eval_steps_per_second": 4.012, |
|
"eval_total": 11873, |
|
"step": 8235 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 1.0, |
|
"eval_HasAns_exact": 70.73211875843455, |
|
"eval_HasAns_f1": 76.86664314060151, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 77.19091673675358, |
|
"eval_NoAns_f1": 77.19091673675358, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 73.9661416659648, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 77.02901208940321, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 73.9661416659648, |
|
"eval_f1": 77.02901208940332, |
|
"eval_runtime": 33.7937, |
|
"eval_samples_per_second": 359.061, |
|
"eval_steps_per_second": 2.811, |
|
"eval_total": 11873, |
|
"step": 8235 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.03, |
|
"learning_rate": 2.6960047250064117e-05, |
|
"loss": 1.047, |
|
"step": 8500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.09, |
|
"learning_rate": 2.6606062600812005e-05, |
|
"loss": 0.8362, |
|
"step": 9000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.15, |
|
"learning_rate": 2.6235947166838656e-05, |
|
"loss": 0.8438, |
|
"step": 9500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.21, |
|
"learning_rate": 2.5848756288308777e-05, |
|
"loss": 0.8345, |
|
"step": 10000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.28, |
|
"learning_rate": 2.5445778402131093e-05, |
|
"loss": 0.7877, |
|
"step": 10500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.34, |
|
"learning_rate": 2.502759991791811e-05, |
|
"loss": 0.8156, |
|
"step": 11000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.4, |
|
"learning_rate": 2.4595709053900474e-05, |
|
"loss": 0.7898, |
|
"step": 11500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.46, |
|
"learning_rate": 2.4149003478440486e-05, |
|
"loss": 0.8215, |
|
"step": 12000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.52, |
|
"learning_rate": 2.3688984359561745e-05, |
|
"loss": 0.7959, |
|
"step": 12500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.58, |
|
"learning_rate": 2.321632111273962e-05, |
|
"loss": 0.7867, |
|
"step": 13000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.64, |
|
"learning_rate": 2.2731701553064993e-05, |
|
"loss": 0.7743, |
|
"step": 13500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.7, |
|
"learning_rate": 2.2235830894342245e-05, |
|
"loss": 0.7451, |
|
"step": 14000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.76, |
|
"learning_rate": 2.172943072286878e-05, |
|
"loss": 0.7544, |
|
"step": 14500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.82, |
|
"learning_rate": 2.1213237947389485e-05, |
|
"loss": 0.7433, |
|
"step": 15000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.88, |
|
"learning_rate": 2.0688003726754053e-05, |
|
"loss": 0.7204, |
|
"step": 15500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 1.94, |
|
"learning_rate": 2.015556714598373e-05, |
|
"loss": 0.7353, |
|
"step": 16000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", |
|
"epoch": 2.0, |
|
"eval_HasAns_exact": 70.8502024291498, |
|
"eval_HasAns_f1": 78.09609572680118, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 73.8099243061396, |
|
"eval_NoAns_f1": 73.8099243061396, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 72.33218226227575, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 75.94994150328284, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 72.33218226227575, |
|
"eval_f1": 75.94994150328287, |
|
"eval_runtime": 23.4965, |
|
"eval_samples_per_second": 516.418, |
|
"eval_steps_per_second": 4.043, |
|
"eval_total": 11873, |
|
"step": 16470 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 2.0, |
|
"eval_HasAns_exact": 73.65047233468286, |
|
"eval_HasAns_f1": 80.28842836522877, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 75.57611438183348, |
|
"eval_NoAns_f1": 75.57611438183348, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 74.6146719447486, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 77.92889778060096, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 74.6146719447486, |
|
"eval_f1": 77.92889778060105, |
|
"eval_runtime": 33.5553, |
|
"eval_samples_per_second": 361.612, |
|
"eval_steps_per_second": 2.831, |
|
"eval_total": 11873, |
|
"step": 16470 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.0, |
|
"learning_rate": 1.961674714301095e-05, |
|
"loss": 0.711, |
|
"step": 16500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.06, |
|
"learning_rate": 1.907016099695466e-05, |
|
"loss": 0.4295, |
|
"step": 17000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.13, |
|
"learning_rate": 1.851656308325107e-05, |
|
"loss": 0.4419, |
|
"step": 17500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.19, |
|
"learning_rate": 1.795784790017448e-05, |
|
"loss": 0.4418, |
|
"step": 18000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.25, |
|
"learning_rate": 1.7394828484780373e-05, |
|
"loss": 0.4349, |
|
"step": 18500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.31, |
|
"learning_rate": 1.682832413760241e-05, |
|
"loss": 0.4178, |
|
"step": 19000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.37, |
|
"learning_rate": 1.6259159230414073e-05, |
|
"loss": 0.4274, |
|
"step": 19500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.43, |
|
"learning_rate": 1.5688162006610627e-05, |
|
"loss": 0.4216, |
|
"step": 20000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.49, |
|
"learning_rate": 1.5116163375957171e-05, |
|
"loss": 0.3889, |
|
"step": 20500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.55, |
|
"learning_rate": 1.4543995705456567e-05, |
|
"loss": 0.4117, |
|
"step": 21000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.61, |
|
"learning_rate": 1.3973633400994553e-05, |
|
"loss": 0.4197, |
|
"step": 21500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.67, |
|
"learning_rate": 1.3404758687969215e-05, |
|
"loss": 0.3958, |
|
"step": 22000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.73, |
|
"learning_rate": 1.283706355615611e-05, |
|
"loss": 0.4128, |
|
"step": 22500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.79, |
|
"learning_rate": 1.2272515909019886e-05, |
|
"loss": 0.4129, |
|
"step": 23000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.85, |
|
"learning_rate": 1.1711937270962604e-05, |
|
"loss": 0.3932, |
|
"step": 23500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.91, |
|
"learning_rate": 1.1156143390721824e-05, |
|
"loss": 0.3946, |
|
"step": 24000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 2.98, |
|
"learning_rate": 1.0605943054300711e-05, |
|
"loss": 0.3972, |
|
"step": 24500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", |
|
"epoch": 3.0, |
|
"eval_HasAns_exact": 72.48650472334683, |
|
"eval_HasAns_f1": 79.20784888444516, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 74.4659377628259, |
|
"eval_NoAns_f1": 74.4659377628259, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 73.47763833908868, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 76.83349854181672, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 73.47763833908868, |
|
"eval_f1": 76.83349854181674, |
|
"eval_runtime": 23.7252, |
|
"eval_samples_per_second": 511.439, |
|
"eval_steps_per_second": 4.004, |
|
"eval_total": 11873, |
|
"step": 24705 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 3.0, |
|
"eval_HasAns_exact": 74.61201079622133, |
|
"eval_HasAns_f1": 81.01977814822307, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 77.17409587888983, |
|
"eval_NoAns_f1": 77.17409587888983, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 75.8948875600101, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 79.09418385097837, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 75.8948875600101, |
|
"eval_f1": 79.09418385097838, |
|
"eval_runtime": 33.7285, |
|
"eval_samples_per_second": 359.755, |
|
"eval_steps_per_second": 2.817, |
|
"eval_total": 11873, |
|
"step": 24705 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.04, |
|
"learning_rate": 1.0063217611277036e-05, |
|
"loss": 0.3297, |
|
"step": 25000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.1, |
|
"learning_rate": 9.5276474237082e-06, |
|
"loss": 0.2814, |
|
"step": 25500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.16, |
|
"learning_rate": 8.998959819917635e-06, |
|
"loss": 0.2885, |
|
"step": 26000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.22, |
|
"learning_rate": 8.479004873097141e-06, |
|
"loss": 0.2824, |
|
"step": 26500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.28, |
|
"learning_rate": 7.968539216772597e-06, |
|
"loss": 0.2794, |
|
"step": 27000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.34, |
|
"learning_rate": 7.468305675744049e-06, |
|
"loss": 0.279, |
|
"step": 27500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.4, |
|
"learning_rate": 6.979032185133999e-06, |
|
"loss": 0.2758, |
|
"step": 28000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.46, |
|
"learning_rate": 6.501430731103145e-06, |
|
"loss": 0.2828, |
|
"step": 28500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.52, |
|
"learning_rate": 6.036196314774858e-06, |
|
"loss": 0.2727, |
|
"step": 29000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.58, |
|
"learning_rate": 5.584005940876061e-06, |
|
"loss": 0.2761, |
|
"step": 29500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.64, |
|
"learning_rate": 5.145517632566403e-06, |
|
"loss": 0.2683, |
|
"step": 30000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.7, |
|
"learning_rate": 4.721369473889145e-06, |
|
"loss": 0.2731, |
|
"step": 30500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.76, |
|
"learning_rate": 4.3129817358827764e-06, |
|
"loss": 0.2653, |
|
"step": 31000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.83, |
|
"learning_rate": 3.919312074840663e-06, |
|
"loss": 0.2557, |
|
"step": 31500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.89, |
|
"learning_rate": 3.541766926172923e-06, |
|
"loss": 0.2702, |
|
"step": 32000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 3.95, |
|
"learning_rate": 3.18089569001271e-06, |
|
"loss": 0.2674, |
|
"step": 32500 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", |
|
"epoch": 4.0, |
|
"eval_HasAns_exact": 73.6336032388664, |
|
"eval_HasAns_f1": 80.4921220873396, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 73.4735071488646, |
|
"eval_NoAns_f1": 73.4735071488646, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 73.55344057946601, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 76.97778992114449, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 73.55344057946601, |
|
"eval_f1": 76.97778992114453, |
|
"eval_runtime": 23.6627, |
|
"eval_samples_per_second": 512.79, |
|
"eval_steps_per_second": 4.015, |
|
"eval_total": 11873, |
|
"step": 32940 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 4.0, |
|
"eval_HasAns_exact": 75.94466936572199, |
|
"eval_HasAns_f1": 82.29694376223831, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 75.20605550883096, |
|
"eval_NoAns_f1": 75.20605550883096, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 75.57483365619473, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 78.74642319738464, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 75.57483365619473, |
|
"eval_f1": 78.7464231973847, |
|
"eval_runtime": 33.7341, |
|
"eval_samples_per_second": 359.695, |
|
"eval_steps_per_second": 2.816, |
|
"eval_total": 11873, |
|
"step": 32940 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.01, |
|
"learning_rate": 2.83856325972758e-06, |
|
"loss": 0.2597, |
|
"step": 33000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.07, |
|
"learning_rate": 2.5125184709830478e-06, |
|
"loss": 0.2285, |
|
"step": 33500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.13, |
|
"learning_rate": 2.204645347373324e-06, |
|
"loss": 0.2257, |
|
"step": 34000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.19, |
|
"learning_rate": 1.9153919029589925e-06, |
|
"loss": 0.2191, |
|
"step": 34500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.25, |
|
"learning_rate": 1.6457002137685011e-06, |
|
"loss": 0.2288, |
|
"step": 35000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.31, |
|
"learning_rate": 1.394881935346834e-06, |
|
"loss": 0.2239, |
|
"step": 35500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.37, |
|
"learning_rate": 1.1638616961868014e-06, |
|
"loss": 0.2249, |
|
"step": 36000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.43, |
|
"learning_rate": 9.529756747576662e-07, |
|
"loss": 0.2158, |
|
"step": 36500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.49, |
|
"learning_rate": 7.625307504052814e-07, |
|
"loss": 0.2238, |
|
"step": 37000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.55, |
|
"learning_rate": 5.928040567841786e-07, |
|
"loss": 0.2208, |
|
"step": 37500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.61, |
|
"learning_rate": 4.4404257857527e-07, |
|
"loss": 0.2302, |
|
"step": 38000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.68, |
|
"learning_rate": 3.1646279207609816e-07, |
|
"loss": 0.2286, |
|
"step": 38500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.74, |
|
"learning_rate": 2.1025035018655492e-07, |
|
"loss": 0.2334, |
|
"step": 39000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.8, |
|
"learning_rate": 1.2555981224854353e-07, |
|
"loss": 0.2191, |
|
"step": 39500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.86, |
|
"learning_rate": 6.251441913268763e-08, |
|
"loss": 0.2244, |
|
"step": 40000 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.92, |
|
"learning_rate": 2.1205913899372543e-08, |
|
"loss": 0.2174, |
|
"step": 40500 |
|
}, |
|
{ |
|
"compression_loss": 0.0, |
|
"epoch": 4.98, |
|
"learning_rate": 1.7116537419809008e-09, |
|
"loss": 0.2197, |
|
"step": 41000 |
|
}, |
|
{ |
|
"Minimum SubNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 576, 1: 448, 2: 576, 3: 576, 4: 576, 5: 448, 6: 576, 7: 512, 8: 576, 9: 320, 10: 384, 11: 384, 12: 966, 13: 1052, 14: 1040, 15: 1007, 16: 989, 17: 938, 18: 789, 19: 693, 20: 517, 21: 280, 22: 332, 23: 505})])", |
|
"epoch": 5.0, |
|
"eval_HasAns_exact": 73.36369770580296, |
|
"eval_HasAns_f1": 80.20962919069306, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 74.26408746846089, |
|
"eval_NoAns_f1": 74.26408746846089, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 73.81453718521014, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 77.23260185651709, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 73.81453718521014, |
|
"eval_f1": 77.23260185651719, |
|
"eval_runtime": 23.6601, |
|
"eval_samples_per_second": 512.846, |
|
"eval_steps_per_second": 4.015, |
|
"eval_total": 11873, |
|
"step": 41175 |
|
}, |
|
{ |
|
"SuperNet": "OrderedDict([(<ElasticityDim.WIDTH: 'width'>, {0: 768, 1: 768, 2: 768, 3: 768, 4: 768, 5: 768, 6: 768, 7: 768, 8: 768, 9: 768, 10: 768, 11: 768, 12: 3072, 13: 3072, 14: 3072, 15: 3072, 16: 3072, 17: 3072, 18: 3072, 19: 3072, 20: 3072, 21: 3072, 22: 3072, 23: 3072})])", |
|
"epoch": 5.0, |
|
"eval_HasAns_exact": 75.42172739541161, |
|
"eval_HasAns_f1": 81.86740867460124, |
|
"eval_HasAns_total": 5928, |
|
"eval_NoAns_exact": 75.34062237174096, |
|
"eval_NoAns_f1": 75.34062237174096, |
|
"eval_NoAns_total": 5945, |
|
"eval_best_exact": 75.38111681967489, |
|
"eval_best_exact_thresh": 0.0, |
|
"eval_best_f1": 78.59934293127556, |
|
"eval_best_f1_thresh": 0.0, |
|
"eval_exact": 75.38111681967489, |
|
"eval_f1": 78.59934293127563, |
|
"eval_runtime": 33.7058, |
|
"eval_samples_per_second": 359.997, |
|
"eval_steps_per_second": 2.819, |
|
"eval_total": 11873, |
|
"step": 41175 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 41175, |
|
"total_flos": 1.2910084832623104e+17, |
|
"train_loss": 0.7438544612808089, |
|
"train_runtime": 64410.4338, |
|
"train_samples_per_second": 10.228, |
|
"train_steps_per_second": 0.639 |
|
} |
|
], |
|
"max_steps": 41175, |
|
"min_subnet_acc": null, |
|
"min_subnet_best_acc": null, |
|
"num_train_epochs": 5, |
|
"supernet_acc": null, |
|
"supernet_best_acc": null, |
|
"total_flos": 1.2910084832623104e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|