|
{ |
|
"best_metric": 0.9616444395459054, |
|
"best_model_checkpoint": "drive/MyDrive/albertina/checkpoint-3000", |
|
"epoch": 0.26749888542131073, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.1336, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.1124, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.1444, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.0761, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1128, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.0611, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 0.1892, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.0343, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 0.0828, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1669, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.092, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.0867, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.0552, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.0261, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0743, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.0904, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.0438, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.0075, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.0208, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0486, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.2147, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.0606, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.1771, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.1196, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1299, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.0107, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.1268, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.1448, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.8e-06, |
|
"loss": 0.0086, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 0.0931, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.0277, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.0705, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.0323, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7e-06, |
|
"loss": 0.1415, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.0234, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.0493, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.0803, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 0.0166, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0832, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.2e-06, |
|
"loss": 0.0722, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.0077, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.082, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.0458, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9e-06, |
|
"loss": 0.0319, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.096, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.0713, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.1074, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.0695, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0408, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.998200629779578e-06, |
|
"loss": 0.0412, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.996401259559155e-06, |
|
"loss": 0.092, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.994601889338731e-06, |
|
"loss": 0.0777, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.99280251911831e-06, |
|
"loss": 0.0442, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.991003148897887e-06, |
|
"loss": 0.1633, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.989203778677464e-06, |
|
"loss": 0.0985, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.98740440845704e-06, |
|
"loss": 0.0819, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.985605038236617e-06, |
|
"loss": 0.1122, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.983805668016196e-06, |
|
"loss": 0.0936, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.982006297795773e-06, |
|
"loss": 0.0693, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.98020692757535e-06, |
|
"loss": 0.0854, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.978407557354927e-06, |
|
"loss": 0.137, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.976608187134503e-06, |
|
"loss": 0.0019, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.97480881691408e-06, |
|
"loss": 0.1362, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.973009446693657e-06, |
|
"loss": 0.0923, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.971210076473236e-06, |
|
"loss": 0.0557, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.969410706252813e-06, |
|
"loss": 0.0505, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.96761133603239e-06, |
|
"loss": 0.0414, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.965811965811966e-06, |
|
"loss": 0.074, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.964012595591543e-06, |
|
"loss": 0.1012, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.962213225371122e-06, |
|
"loss": 0.0792, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.960413855150699e-06, |
|
"loss": 0.0928, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.958614484930275e-06, |
|
"loss": 0.0312, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.956815114709852e-06, |
|
"loss": 0.0529, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.955015744489429e-06, |
|
"loss": 0.0793, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.953216374269008e-06, |
|
"loss": 0.0597, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.951417004048583e-06, |
|
"loss": 0.1541, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.949617633828161e-06, |
|
"loss": 0.087, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.947818263607738e-06, |
|
"loss": 0.034, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.946018893387315e-06, |
|
"loss": 0.0948, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.944219523166892e-06, |
|
"loss": 0.0078, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.942420152946469e-06, |
|
"loss": 0.0797, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.940620782726047e-06, |
|
"loss": 0.0859, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.938821412505624e-06, |
|
"loss": 0.1256, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.937022042285201e-06, |
|
"loss": 0.0775, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.935222672064778e-06, |
|
"loss": 0.0539, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.933423301844355e-06, |
|
"loss": 0.0795, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.931623931623933e-06, |
|
"loss": 0.0099, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.929824561403509e-06, |
|
"loss": 0.0253, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.928025191183087e-06, |
|
"loss": 0.0442, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.926225820962664e-06, |
|
"loss": 0.1785, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.92442645074224e-06, |
|
"loss": 0.1241, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.922627080521818e-06, |
|
"loss": 0.1332, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.920827710301395e-06, |
|
"loss": 0.0407, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.919028340080973e-06, |
|
"loss": 0.0546, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.91722896986055e-06, |
|
"loss": 0.1157, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.915429599640127e-06, |
|
"loss": 0.0697, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.913630229419704e-06, |
|
"loss": 0.1253, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.91183085919928e-06, |
|
"loss": 0.0205, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.910031488978859e-06, |
|
"loss": 0.1289, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.908232118758436e-06, |
|
"loss": 0.0956, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.906432748538013e-06, |
|
"loss": 0.0972, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.90463337831759e-06, |
|
"loss": 0.0199, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.902834008097167e-06, |
|
"loss": 0.0263, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.901034637876743e-06, |
|
"loss": 0.0388, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.89923526765632e-06, |
|
"loss": 0.119, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.897435897435899e-06, |
|
"loss": 0.032, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.895636527215476e-06, |
|
"loss": 0.0877, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.893837156995053e-06, |
|
"loss": 0.0679, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.89203778677463e-06, |
|
"loss": 0.0273, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.890238416554206e-06, |
|
"loss": 0.048, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.888439046333785e-06, |
|
"loss": 0.0054, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.886639676113362e-06, |
|
"loss": 0.0812, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.884840305892939e-06, |
|
"loss": 0.0903, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.883040935672515e-06, |
|
"loss": 0.0092, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.881241565452092e-06, |
|
"loss": 0.0013, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.879442195231669e-06, |
|
"loss": 0.165, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.877642825011246e-06, |
|
"loss": 0.0509, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.875843454790825e-06, |
|
"loss": 0.0653, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.874044084570401e-06, |
|
"loss": 0.0253, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.872244714349978e-06, |
|
"loss": 0.0375, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.870445344129555e-06, |
|
"loss": 0.0689, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.868645973909132e-06, |
|
"loss": 0.0674, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.86684660368871e-06, |
|
"loss": 0.1405, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.865047233468287e-06, |
|
"loss": 0.0604, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.863247863247864e-06, |
|
"loss": 0.0388, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.861448493027441e-06, |
|
"loss": 0.0627, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.859649122807018e-06, |
|
"loss": 0.0909, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.857849752586597e-06, |
|
"loss": 0.0419, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.856050382366172e-06, |
|
"loss": 0.0019, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.85425101214575e-06, |
|
"loss": 0.1776, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.852451641925327e-06, |
|
"loss": 0.003, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.850652271704904e-06, |
|
"loss": 0.0764, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.84885290148448e-06, |
|
"loss": 0.0753, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.847053531264058e-06, |
|
"loss": 0.0831, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.845254161043636e-06, |
|
"loss": 0.1177, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.843454790823213e-06, |
|
"loss": 0.1527, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.84165542060279e-06, |
|
"loss": 0.1304, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.839856050382367e-06, |
|
"loss": 0.0945, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.838056680161944e-06, |
|
"loss": 0.0516, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.836257309941522e-06, |
|
"loss": 0.0216, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.834457939721097e-06, |
|
"loss": 0.1259, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.832658569500676e-06, |
|
"loss": 0.0558, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.830859199280253e-06, |
|
"loss": 0.0711, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.82905982905983e-06, |
|
"loss": 0.1041, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.827260458839407e-06, |
|
"loss": 0.1173, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.825461088618983e-06, |
|
"loss": 0.1194, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.823661718398562e-06, |
|
"loss": 0.1103, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.821862348178139e-06, |
|
"loss": 0.0481, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.820062977957716e-06, |
|
"loss": 0.0413, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.9769145744512869, |
|
"eval_f1": 0.9595305320267951, |
|
"eval_loss": 0.07930105179548264, |
|
"eval_precision": 0.9639599555061179, |
|
"eval_recall": 0.955141629009148, |
|
"eval_runtime": 436.6474, |
|
"eval_samples_per_second": 72.518, |
|
"eval_steps_per_second": 4.535, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.818263607737293e-06, |
|
"loss": 0.0825, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.81646423751687e-06, |
|
"loss": 0.0769, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.814664867296448e-06, |
|
"loss": 0.0901, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.812865497076025e-06, |
|
"loss": 0.0237, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.811066126855602e-06, |
|
"loss": 0.1067, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.809266756635179e-06, |
|
"loss": 0.0536, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.807467386414755e-06, |
|
"loss": 0.0372, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.805668016194332e-06, |
|
"loss": 0.075, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.803868645973909e-06, |
|
"loss": 0.0022, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.802069275753488e-06, |
|
"loss": 0.1077, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.800269905533065e-06, |
|
"loss": 0.019, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.798470535312641e-06, |
|
"loss": 0.0456, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.796671165092218e-06, |
|
"loss": 0.0074, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.794871794871795e-06, |
|
"loss": 0.0944, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.793072424651374e-06, |
|
"loss": 0.0055, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.79127305443095e-06, |
|
"loss": 0.0979, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.789473684210527e-06, |
|
"loss": 0.2072, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.787674313990104e-06, |
|
"loss": 0.1238, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.785874943769681e-06, |
|
"loss": 0.0366, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.784075573549258e-06, |
|
"loss": 0.0754, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.782276203328835e-06, |
|
"loss": 0.042, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.780476833108413e-06, |
|
"loss": 0.0614, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.77867746288799e-06, |
|
"loss": 0.0572, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.776878092667567e-06, |
|
"loss": 0.0323, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.775078722447144e-06, |
|
"loss": 0.058, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.77327935222672e-06, |
|
"loss": 0.0715, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.7714799820063e-06, |
|
"loss": 0.0783, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.769680611785876e-06, |
|
"loss": 0.0982, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.767881241565453e-06, |
|
"loss": 0.0858, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.76608187134503e-06, |
|
"loss": 0.0535, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.764282501124607e-06, |
|
"loss": 0.0447, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.762483130904185e-06, |
|
"loss": 0.0819, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.76068376068376e-06, |
|
"loss": 0.0565, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.758884390463339e-06, |
|
"loss": 0.0829, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.757085020242916e-06, |
|
"loss": 0.1505, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.755285650022493e-06, |
|
"loss": 0.1586, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.75348627980207e-06, |
|
"loss": 0.0098, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.751686909581647e-06, |
|
"loss": 0.1513, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.749887539361225e-06, |
|
"loss": 0.136, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.748088169140802e-06, |
|
"loss": 0.1072, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.746288798920379e-06, |
|
"loss": 0.0829, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.744489428699956e-06, |
|
"loss": 0.0647, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.742690058479533e-06, |
|
"loss": 0.035, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.740890688259111e-06, |
|
"loss": 0.077, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.739091318038686e-06, |
|
"loss": 0.0325, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.737291947818265e-06, |
|
"loss": 0.0155, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.735492577597842e-06, |
|
"loss": 0.041, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.733693207377419e-06, |
|
"loss": 0.1311, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.731893837156995e-06, |
|
"loss": 0.1448, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.730094466936572e-06, |
|
"loss": 0.1902, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.72829509671615e-06, |
|
"loss": 0.1521, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.726495726495728e-06, |
|
"loss": 0.2189, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.724696356275305e-06, |
|
"loss": 0.0697, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.722896986054881e-06, |
|
"loss": 0.0175, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.721097615834458e-06, |
|
"loss": 0.0511, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.719298245614037e-06, |
|
"loss": 0.1222, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.717498875393614e-06, |
|
"loss": 0.0637, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.71569950517319e-06, |
|
"loss": 0.0311, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.713900134952767e-06, |
|
"loss": 0.0493, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.712100764732344e-06, |
|
"loss": 0.0671, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.710301394511921e-06, |
|
"loss": 0.0948, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.708502024291498e-06, |
|
"loss": 0.0475, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.706702654071076e-06, |
|
"loss": 0.042, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.704903283850653e-06, |
|
"loss": 0.0755, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.70310391363023e-06, |
|
"loss": 0.0164, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.701304543409807e-06, |
|
"loss": 0.0568, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.699505173189384e-06, |
|
"loss": 0.0074, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.697705802968962e-06, |
|
"loss": 0.2288, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.69590643274854e-06, |
|
"loss": 0.1608, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.694107062528116e-06, |
|
"loss": 0.0774, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.692307692307693e-06, |
|
"loss": 0.1041, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.69050832208727e-06, |
|
"loss": 0.0561, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.688708951866847e-06, |
|
"loss": 0.1579, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.686909581646424e-06, |
|
"loss": 0.0396, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.685110211426002e-06, |
|
"loss": 0.0969, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.683310841205579e-06, |
|
"loss": 0.0862, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.681511470985156e-06, |
|
"loss": 0.1806, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.679712100764733e-06, |
|
"loss": 0.0316, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.67791273054431e-06, |
|
"loss": 0.059, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.676113360323888e-06, |
|
"loss": 0.0042, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.674313990103465e-06, |
|
"loss": 0.0331, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.672514619883042e-06, |
|
"loss": 0.0941, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.670715249662619e-06, |
|
"loss": 0.0765, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.668915879442196e-06, |
|
"loss": 0.1153, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.667116509221774e-06, |
|
"loss": 0.208, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.66531713900135e-06, |
|
"loss": 0.1071, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.663517768780928e-06, |
|
"loss": 0.0316, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.661718398560505e-06, |
|
"loss": 0.0204, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.659919028340082e-06, |
|
"loss": 0.0503, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.658119658119659e-06, |
|
"loss": 0.1018, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.656320287899235e-06, |
|
"loss": 0.0993, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.654520917678814e-06, |
|
"loss": 0.0304, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.65272154745839e-06, |
|
"loss": 0.0567, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.650922177237968e-06, |
|
"loss": 0.0308, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.649122807017545e-06, |
|
"loss": 0.0647, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.647323436797121e-06, |
|
"loss": 0.1115, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.6455240665767e-06, |
|
"loss": 0.0392, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.643724696356275e-06, |
|
"loss": 0.1018, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.641925326135854e-06, |
|
"loss": 0.0271, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.64012595591543e-06, |
|
"loss": 0.1548, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.638326585695007e-06, |
|
"loss": 0.0135, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.636527215474584e-06, |
|
"loss": 0.0794, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.634727845254161e-06, |
|
"loss": 0.0612, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.63292847503374e-06, |
|
"loss": 0.0375, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.631129104813316e-06, |
|
"loss": 0.0702, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.629329734592893e-06, |
|
"loss": 0.0353, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.62753036437247e-06, |
|
"loss": 0.0771, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.625730994152047e-06, |
|
"loss": 0.0078, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.623931623931626e-06, |
|
"loss": 0.0934, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.6221322537112e-06, |
|
"loss": 0.0922, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.62033288349078e-06, |
|
"loss": 0.0676, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.618533513270356e-06, |
|
"loss": 0.1065, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.616734143049933e-06, |
|
"loss": 0.0321, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.61493477282951e-06, |
|
"loss": 0.0563, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.613135402609087e-06, |
|
"loss": 0.1313, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.611336032388665e-06, |
|
"loss": 0.0676, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.609536662168242e-06, |
|
"loss": 0.0306, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.607737291947819e-06, |
|
"loss": 0.0298, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.605937921727396e-06, |
|
"loss": 0.0494, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.604138551506973e-06, |
|
"loss": 0.024, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.602339181286551e-06, |
|
"loss": 0.0462, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.600539811066128e-06, |
|
"loss": 0.0931, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.598740440845705e-06, |
|
"loss": 0.1424, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.596941070625282e-06, |
|
"loss": 0.0485, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.595141700404859e-06, |
|
"loss": 0.1279, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.593342330184436e-06, |
|
"loss": 0.0226, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.591542959964013e-06, |
|
"loss": 0.0528, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.589743589743591e-06, |
|
"loss": 0.0527, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.587944219523168e-06, |
|
"loss": 0.0817, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.586144849302745e-06, |
|
"loss": 0.0079, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.584345479082322e-06, |
|
"loss": 0.1174, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.582546108861898e-06, |
|
"loss": 0.0392, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.580746738641477e-06, |
|
"loss": 0.0268, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.578947368421054e-06, |
|
"loss": 0.0946, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.57714799820063e-06, |
|
"loss": 0.0524, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.575348627980208e-06, |
|
"loss": 0.0382, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.573549257759784e-06, |
|
"loss": 0.0014, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.571749887539361e-06, |
|
"loss": 0.0695, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.569950517318938e-06, |
|
"loss": 0.0376, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.568151147098517e-06, |
|
"loss": 0.04, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.566351776878094e-06, |
|
"loss": 0.0388, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.56455240665767e-06, |
|
"loss": 0.0649, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.562753036437247e-06, |
|
"loss": 0.1104, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.560953666216824e-06, |
|
"loss": 0.1071, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.559154295996403e-06, |
|
"loss": 0.1165, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.55735492577598e-06, |
|
"loss": 0.0218, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.0353, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.553756185335133e-06, |
|
"loss": 0.0902, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.55195681511471e-06, |
|
"loss": 0.0277, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.550157444894289e-06, |
|
"loss": 0.0377, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.9766303489657351, |
|
"eval_f1": 0.9593183067619572, |
|
"eval_loss": 0.09248499572277069, |
|
"eval_precision": 0.9570034002413075, |
|
"eval_recall": 0.9616444395459054, |
|
"eval_runtime": 436.5505, |
|
"eval_samples_per_second": 72.535, |
|
"eval_steps_per_second": 4.536, |
|
"step": 3000 |
|
} |
|
], |
|
"max_steps": 56075, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.5414374334464e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|