|
{ |
|
"best_metric": 0.4984971284866333, |
|
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google/flan_t5_small_twitter/checkpoint-200", |
|
"epoch": 3.0, |
|
"eval_steps": 50, |
|
"global_step": 816, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.320362091064453, |
|
"learning_rate": 0.0004938725490196079, |
|
"loss": 0.6107, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.2164921760559082, |
|
"learning_rate": 0.0004877450980392157, |
|
"loss": 0.585, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.7246904373168945, |
|
"learning_rate": 0.00048161764705882356, |
|
"loss": 0.5643, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.0919380187988281, |
|
"learning_rate": 0.00047549019607843134, |
|
"loss": 0.5213, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.6304311752319336, |
|
"learning_rate": 0.0004693627450980392, |
|
"loss": 0.5055, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.7536764705882353, |
|
"eval_f1_macro": 0.7230132041417308, |
|
"eval_f1_micro": 0.7536764705882353, |
|
"eval_loss": 0.5209915041923523, |
|
"eval_runtime": 1.0764, |
|
"eval_samples_per_second": 1010.761, |
|
"eval_steps_per_second": 31.586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.424161195755005, |
|
"learning_rate": 0.0004632352941176471, |
|
"loss": 0.5007, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.3720046281814575, |
|
"learning_rate": 0.0004571078431372549, |
|
"loss": 0.5132, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.943393588066101, |
|
"learning_rate": 0.0004509803921568628, |
|
"loss": 0.4845, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9219595193862915, |
|
"learning_rate": 0.00044485294117647056, |
|
"loss": 0.4548, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.8596079349517822, |
|
"learning_rate": 0.00043872549019607844, |
|
"loss": 0.5045, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.7444852941176471, |
|
"eval_f1_macro": 0.6934304201871463, |
|
"eval_f1_micro": 0.7444852941176471, |
|
"eval_loss": 0.501828134059906, |
|
"eval_runtime": 1.1287, |
|
"eval_samples_per_second": 963.908, |
|
"eval_steps_per_second": 30.122, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.045985460281372, |
|
"learning_rate": 0.0004325980392156863, |
|
"loss": 0.4759, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.124852180480957, |
|
"learning_rate": 0.0004264705882352941, |
|
"loss": 0.509, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.3049920797348022, |
|
"learning_rate": 0.000420343137254902, |
|
"loss": 0.4681, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.5385198593139648, |
|
"learning_rate": 0.0004142156862745098, |
|
"loss": 0.51, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.8772047162055969, |
|
"learning_rate": 0.00040808823529411766, |
|
"loss": 0.4727, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.7242647058823529, |
|
"eval_f1_macro": 0.6002488646773823, |
|
"eval_f1_micro": 0.7242647058823529, |
|
"eval_loss": 0.5356110334396362, |
|
"eval_runtime": 1.1278, |
|
"eval_samples_per_second": 964.717, |
|
"eval_steps_per_second": 30.147, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.149582028388977, |
|
"learning_rate": 0.0004019607843137255, |
|
"loss": 0.4921, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.1237943172454834, |
|
"learning_rate": 0.0003958333333333333, |
|
"loss": 0.5149, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.629279613494873, |
|
"learning_rate": 0.0003897058823529412, |
|
"loss": 0.5094, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.2666165828704834, |
|
"learning_rate": 0.00038357843137254904, |
|
"loss": 0.4891, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 1.3141692876815796, |
|
"learning_rate": 0.0003774509803921569, |
|
"loss": 0.4924, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7472426470588235, |
|
"eval_f1_macro": 0.6953600443112663, |
|
"eval_f1_micro": 0.7472426470588235, |
|
"eval_loss": 0.4984971284866333, |
|
"eval_runtime": 1.078, |
|
"eval_samples_per_second": 1009.235, |
|
"eval_steps_per_second": 31.539, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.6602263450622559, |
|
"learning_rate": 0.0003713235294117647, |
|
"loss": 0.4878, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.3005311489105225, |
|
"learning_rate": 0.00036519607843137254, |
|
"loss": 0.5078, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 1.2845886945724487, |
|
"learning_rate": 0.0003590686274509804, |
|
"loss": 0.4773, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.7171024084091187, |
|
"learning_rate": 0.00035294117647058826, |
|
"loss": 0.4326, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.302473783493042, |
|
"learning_rate": 0.0003468137254901961, |
|
"loss": 0.4847, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.7527573529411765, |
|
"eval_f1_macro": 0.7106608971260056, |
|
"eval_f1_micro": 0.7527573529411765, |
|
"eval_loss": 0.4992017149925232, |
|
"eval_runtime": 1.1293, |
|
"eval_samples_per_second": 963.438, |
|
"eval_steps_per_second": 30.107, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.1889142990112305, |
|
"learning_rate": 0.0003406862745098039, |
|
"loss": 0.4215, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.0543372631072998, |
|
"learning_rate": 0.00033455882352941176, |
|
"loss": 0.5115, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 2.2345387935638428, |
|
"learning_rate": 0.0003284313725490196, |
|
"loss": 0.4543, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.7095710039138794, |
|
"learning_rate": 0.0003223039215686275, |
|
"loss": 0.3815, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.091468334197998, |
|
"learning_rate": 0.0003161764705882353, |
|
"loss": 0.4107, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.75, |
|
"eval_f1_macro": 0.7086223125425839, |
|
"eval_f1_micro": 0.75, |
|
"eval_loss": 0.5264343023300171, |
|
"eval_runtime": 1.1324, |
|
"eval_samples_per_second": 960.833, |
|
"eval_steps_per_second": 30.026, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 1.4771665334701538, |
|
"learning_rate": 0.00031004901960784314, |
|
"loss": 0.4181, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.9134888648986816, |
|
"learning_rate": 0.00030392156862745097, |
|
"loss": 0.378, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.9835084676742554, |
|
"learning_rate": 0.0002977941176470588, |
|
"loss": 0.4827, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 2.08085036277771, |
|
"learning_rate": 0.0002916666666666667, |
|
"loss": 0.3999, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.597136378288269, |
|
"learning_rate": 0.0002855392156862745, |
|
"loss": 0.4197, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.7435661764705882, |
|
"eval_f1_macro": 0.7126257342882434, |
|
"eval_f1_micro": 0.7435661764705882, |
|
"eval_loss": 0.5231310725212097, |
|
"eval_runtime": 1.0822, |
|
"eval_samples_per_second": 1005.332, |
|
"eval_steps_per_second": 31.417, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.3496707677841187, |
|
"learning_rate": 0.00027941176470588236, |
|
"loss": 0.4006, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 2.2473416328430176, |
|
"learning_rate": 0.0002732843137254902, |
|
"loss": 0.3715, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.1425981521606445, |
|
"learning_rate": 0.000267156862745098, |
|
"loss": 0.502, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 1.080175757408142, |
|
"learning_rate": 0.0002610294117647059, |
|
"loss": 0.3921, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 1.7369410991668701, |
|
"learning_rate": 0.00025490196078431374, |
|
"loss": 0.4002, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.7509191176470589, |
|
"eval_f1_macro": 0.7008802711523263, |
|
"eval_f1_micro": 0.7509191176470589, |
|
"eval_loss": 0.5311830043792725, |
|
"eval_runtime": 1.1348, |
|
"eval_samples_per_second": 958.773, |
|
"eval_steps_per_second": 29.962, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.4130823612213135, |
|
"learning_rate": 0.00024877450980392157, |
|
"loss": 0.4369, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 1.7928345203399658, |
|
"learning_rate": 0.0002426470588235294, |
|
"loss": 0.3897, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.4081470966339111, |
|
"learning_rate": 0.00023651960784313726, |
|
"loss": 0.3966, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.4095189571380615, |
|
"learning_rate": 0.0002303921568627451, |
|
"loss": 0.3822, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 2.2179651260375977, |
|
"learning_rate": 0.00022426470588235296, |
|
"loss": 0.4381, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_accuracy": 0.7481617647058824, |
|
"eval_f1_macro": 0.6999822874901374, |
|
"eval_f1_micro": 0.7481617647058824, |
|
"eval_loss": 0.5215727686882019, |
|
"eval_runtime": 1.1354, |
|
"eval_samples_per_second": 958.294, |
|
"eval_steps_per_second": 29.947, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 1.261864185333252, |
|
"learning_rate": 0.0002181372549019608, |
|
"loss": 0.3909, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.3953834772109985, |
|
"learning_rate": 0.00021200980392156862, |
|
"loss": 0.4049, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.1786818504333496, |
|
"learning_rate": 0.00020588235294117645, |
|
"loss": 0.3903, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.1706407070159912, |
|
"learning_rate": 0.00019975490196078434, |
|
"loss": 0.3724, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.9597140550613403, |
|
"learning_rate": 0.00019362745098039217, |
|
"loss": 0.4125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.7509191176470589, |
|
"eval_f1_macro": 0.716105501438017, |
|
"eval_f1_micro": 0.7509191176470589, |
|
"eval_loss": 0.5261591672897339, |
|
"eval_runtime": 1.1364, |
|
"eval_samples_per_second": 957.372, |
|
"eval_steps_per_second": 29.918, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.382961630821228, |
|
"learning_rate": 0.0001875, |
|
"loss": 0.4511, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 1.5272341966629028, |
|
"learning_rate": 0.00018137254901960784, |
|
"loss": 0.4201, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 1.3144079446792603, |
|
"learning_rate": 0.00017524509803921567, |
|
"loss": 0.4401, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 1.6088247299194336, |
|
"learning_rate": 0.00016911764705882356, |
|
"loss": 0.4102, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.457602858543396, |
|
"learning_rate": 0.0001629901960784314, |
|
"loss": 0.3665, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.7545955882352942, |
|
"eval_f1_macro": 0.7190493000314321, |
|
"eval_f1_micro": 0.7545955882352942, |
|
"eval_loss": 0.5205144286155701, |
|
"eval_runtime": 1.1386, |
|
"eval_samples_per_second": 955.59, |
|
"eval_steps_per_second": 29.862, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.6489365100860596, |
|
"learning_rate": 0.00015686274509803922, |
|
"loss": 0.387, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 1.551331877708435, |
|
"learning_rate": 0.00015073529411764705, |
|
"loss": 0.3103, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 2.288947582244873, |
|
"learning_rate": 0.0001446078431372549, |
|
"loss": 0.3649, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 1.4870082139968872, |
|
"learning_rate": 0.00013848039215686274, |
|
"loss": 0.3382, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 1.9287152290344238, |
|
"learning_rate": 0.0001323529411764706, |
|
"loss": 0.3855, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.7536764705882353, |
|
"eval_f1_macro": 0.7160618855460168, |
|
"eval_f1_micro": 0.7536764705882353, |
|
"eval_loss": 0.5672034025192261, |
|
"eval_runtime": 1.1377, |
|
"eval_samples_per_second": 956.304, |
|
"eval_steps_per_second": 29.884, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 1.6499133110046387, |
|
"learning_rate": 0.00012622549019607844, |
|
"loss": 0.3595, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 1.7214213609695435, |
|
"learning_rate": 0.00012009803921568628, |
|
"loss": 0.2884, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 2.0640528202056885, |
|
"learning_rate": 0.00011397058823529411, |
|
"loss": 0.3366, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 2.1024091243743896, |
|
"learning_rate": 0.00010784313725490197, |
|
"loss": 0.3589, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 1.5444495677947998, |
|
"learning_rate": 0.0001017156862745098, |
|
"loss": 0.3125, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_accuracy": 0.7509191176470589, |
|
"eval_f1_macro": 0.7065648900543882, |
|
"eval_f1_micro": 0.7509191176470589, |
|
"eval_loss": 0.5732391476631165, |
|
"eval_runtime": 1.0847, |
|
"eval_samples_per_second": 1003.018, |
|
"eval_steps_per_second": 31.344, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 2.2737483978271484, |
|
"learning_rate": 9.558823529411764e-05, |
|
"loss": 0.352, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 2.3418612480163574, |
|
"learning_rate": 8.94607843137255e-05, |
|
"loss": 0.3647, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.5297809839248657, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.3322, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 2.407879590988159, |
|
"learning_rate": 7.720588235294118e-05, |
|
"loss": 0.2971, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 6.948758125305176, |
|
"learning_rate": 7.107843137254902e-05, |
|
"loss": 0.2955, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.7555147058823529, |
|
"eval_f1_macro": 0.7122208301016248, |
|
"eval_f1_micro": 0.7555147058823529, |
|
"eval_loss": 0.5928493738174438, |
|
"eval_runtime": 1.1387, |
|
"eval_samples_per_second": 955.456, |
|
"eval_steps_per_second": 29.858, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 1.9339895248413086, |
|
"learning_rate": 6.495098039215687e-05, |
|
"loss": 0.3605, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 1.7344573736190796, |
|
"learning_rate": 5.882352941176471e-05, |
|
"loss": 0.3154, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 2.3221147060394287, |
|
"learning_rate": 5.2696078431372553e-05, |
|
"loss": 0.3483, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"grad_norm": 2.296344518661499, |
|
"learning_rate": 4.656862745098039e-05, |
|
"loss": 0.3768, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 1.7548387050628662, |
|
"learning_rate": 4.044117647058824e-05, |
|
"loss": 0.3556, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.7536764705882353, |
|
"eval_f1_macro": 0.7173598644493644, |
|
"eval_f1_micro": 0.7536764705882353, |
|
"eval_loss": 0.5704131126403809, |
|
"eval_runtime": 1.1365, |
|
"eval_samples_per_second": 957.351, |
|
"eval_steps_per_second": 29.917, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 1.6045734882354736, |
|
"learning_rate": 3.4313725490196084e-05, |
|
"loss": 0.3194, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.3206483125686646, |
|
"learning_rate": 2.8186274509803924e-05, |
|
"loss": 0.3289, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 1.6699401140213013, |
|
"learning_rate": 2.2058823529411766e-05, |
|
"loss": 0.32, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.2462468147277832, |
|
"learning_rate": 1.593137254901961e-05, |
|
"loss": 0.3742, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 1.7137000560760498, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.3578, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.7518382352941176, |
|
"eval_f1_macro": 0.7069047400495676, |
|
"eval_f1_micro": 0.7518382352941176, |
|
"eval_loss": 0.5706028938293457, |
|
"eval_runtime": 1.0853, |
|
"eval_samples_per_second": 1002.521, |
|
"eval_steps_per_second": 31.329, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.6963423490524292, |
|
"learning_rate": 3.6764705882352942e-06, |
|
"loss": 0.3367, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 816, |
|
"total_flos": 888901800034304.0, |
|
"train_loss": 0.4176732173152998, |
|
"train_runtime": 109.5646, |
|
"train_samples_per_second": 238.216, |
|
"train_steps_per_second": 7.448 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 816, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"total_flos": 888901800034304.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|