{ "best_metric": 0.4984971284866333, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google/flan_t5_small_twitter/checkpoint-200", "epoch": 3.0, "eval_steps": 50, "global_step": 816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 2.320362091064453, "learning_rate": 0.0004938725490196079, "loss": 0.6107, "step": 10 }, { "epoch": 0.07, "grad_norm": 1.2164921760559082, "learning_rate": 0.0004877450980392157, "loss": 0.585, "step": 20 }, { "epoch": 0.11, "grad_norm": 1.7246904373168945, "learning_rate": 0.00048161764705882356, "loss": 0.5643, "step": 30 }, { "epoch": 0.15, "grad_norm": 1.0919380187988281, "learning_rate": 0.00047549019607843134, "loss": 0.5213, "step": 40 }, { "epoch": 0.18, "grad_norm": 1.6304311752319336, "learning_rate": 0.0004693627450980392, "loss": 0.5055, "step": 50 }, { "epoch": 0.18, "eval_accuracy": 0.7536764705882353, "eval_f1_macro": 0.7230132041417308, "eval_f1_micro": 0.7536764705882353, "eval_loss": 0.5209915041923523, "eval_runtime": 1.0764, "eval_samples_per_second": 1010.761, "eval_steps_per_second": 31.586, "step": 50 }, { "epoch": 0.22, "grad_norm": 3.424161195755005, "learning_rate": 0.0004632352941176471, "loss": 0.5007, "step": 60 }, { "epoch": 0.26, "grad_norm": 1.3720046281814575, "learning_rate": 0.0004571078431372549, "loss": 0.5132, "step": 70 }, { "epoch": 0.29, "grad_norm": 1.943393588066101, "learning_rate": 0.0004509803921568628, "loss": 0.4845, "step": 80 }, { "epoch": 0.33, "grad_norm": 1.9219595193862915, "learning_rate": 0.00044485294117647056, "loss": 0.4548, "step": 90 }, { "epoch": 0.37, "grad_norm": 2.8596079349517822, "learning_rate": 0.00043872549019607844, "loss": 0.5045, "step": 100 }, { "epoch": 0.37, "eval_accuracy": 0.7444852941176471, "eval_f1_macro": 0.6934304201871463, "eval_f1_micro": 0.7444852941176471, "eval_loss": 0.501828134059906, "eval_runtime": 1.1287, "eval_samples_per_second": 963.908, "eval_steps_per_second": 30.122, "step": 100 }, { "epoch": 0.4, "grad_norm": 2.045985460281372, "learning_rate": 0.0004325980392156863, "loss": 0.4759, "step": 110 }, { "epoch": 0.44, "grad_norm": 2.124852180480957, "learning_rate": 0.0004264705882352941, "loss": 0.509, "step": 120 }, { "epoch": 0.48, "grad_norm": 1.3049920797348022, "learning_rate": 0.000420343137254902, "loss": 0.4681, "step": 130 }, { "epoch": 0.51, "grad_norm": 1.5385198593139648, "learning_rate": 0.0004142156862745098, "loss": 0.51, "step": 140 }, { "epoch": 0.55, "grad_norm": 0.8772047162055969, "learning_rate": 0.00040808823529411766, "loss": 0.4727, "step": 150 }, { "epoch": 0.55, "eval_accuracy": 0.7242647058823529, "eval_f1_macro": 0.6002488646773823, "eval_f1_micro": 0.7242647058823529, "eval_loss": 0.5356110334396362, "eval_runtime": 1.1278, "eval_samples_per_second": 964.717, "eval_steps_per_second": 30.147, "step": 150 }, { "epoch": 0.59, "grad_norm": 1.149582028388977, "learning_rate": 0.0004019607843137255, "loss": 0.4921, "step": 160 }, { "epoch": 0.62, "grad_norm": 2.1237943172454834, "learning_rate": 0.0003958333333333333, "loss": 0.5149, "step": 170 }, { "epoch": 0.66, "grad_norm": 1.629279613494873, "learning_rate": 0.0003897058823529412, "loss": 0.5094, "step": 180 }, { "epoch": 0.7, "grad_norm": 1.2666165828704834, "learning_rate": 0.00038357843137254904, "loss": 0.4891, "step": 190 }, { "epoch": 0.74, "grad_norm": 1.3141692876815796, "learning_rate": 0.0003774509803921569, "loss": 0.4924, "step": 200 }, { "epoch": 0.74, "eval_accuracy": 0.7472426470588235, "eval_f1_macro": 0.6953600443112663, "eval_f1_micro": 0.7472426470588235, "eval_loss": 0.4984971284866333, "eval_runtime": 1.078, "eval_samples_per_second": 1009.235, "eval_steps_per_second": 31.539, "step": 200 }, { "epoch": 0.77, "grad_norm": 1.6602263450622559, "learning_rate": 0.0003713235294117647, "loss": 0.4878, "step": 210 }, { "epoch": 0.81, "grad_norm": 3.3005311489105225, "learning_rate": 0.00036519607843137254, "loss": 0.5078, "step": 220 }, { "epoch": 0.85, "grad_norm": 1.2845886945724487, "learning_rate": 0.0003590686274509804, "loss": 0.4773, "step": 230 }, { "epoch": 0.88, "grad_norm": 0.7171024084091187, "learning_rate": 0.00035294117647058826, "loss": 0.4326, "step": 240 }, { "epoch": 0.92, "grad_norm": 1.302473783493042, "learning_rate": 0.0003468137254901961, "loss": 0.4847, "step": 250 }, { "epoch": 0.92, "eval_accuracy": 0.7527573529411765, "eval_f1_macro": 0.7106608971260056, "eval_f1_micro": 0.7527573529411765, "eval_loss": 0.4992017149925232, "eval_runtime": 1.1293, "eval_samples_per_second": 963.438, "eval_steps_per_second": 30.107, "step": 250 }, { "epoch": 0.96, "grad_norm": 1.1889142990112305, "learning_rate": 0.0003406862745098039, "loss": 0.4215, "step": 260 }, { "epoch": 0.99, "grad_norm": 1.0543372631072998, "learning_rate": 0.00033455882352941176, "loss": 0.5115, "step": 270 }, { "epoch": 1.03, "grad_norm": 2.2345387935638428, "learning_rate": 0.0003284313725490196, "loss": 0.4543, "step": 280 }, { "epoch": 1.07, "grad_norm": 1.7095710039138794, "learning_rate": 0.0003223039215686275, "loss": 0.3815, "step": 290 }, { "epoch": 1.1, "grad_norm": 1.091468334197998, "learning_rate": 0.0003161764705882353, "loss": 0.4107, "step": 300 }, { "epoch": 1.1, "eval_accuracy": 0.75, "eval_f1_macro": 0.7086223125425839, "eval_f1_micro": 0.75, "eval_loss": 0.5264343023300171, "eval_runtime": 1.1324, "eval_samples_per_second": 960.833, "eval_steps_per_second": 30.026, "step": 300 }, { "epoch": 1.14, "grad_norm": 1.4771665334701538, "learning_rate": 0.00031004901960784314, "loss": 0.4181, "step": 310 }, { "epoch": 1.18, "grad_norm": 0.9134888648986816, "learning_rate": 0.00030392156862745097, "loss": 0.378, "step": 320 }, { "epoch": 1.21, "grad_norm": 0.9835084676742554, "learning_rate": 0.0002977941176470588, "loss": 0.4827, "step": 330 }, { "epoch": 1.25, "grad_norm": 2.08085036277771, "learning_rate": 0.0002916666666666667, "loss": 0.3999, "step": 340 }, { "epoch": 1.29, "grad_norm": 1.597136378288269, "learning_rate": 0.0002855392156862745, "loss": 0.4197, "step": 350 }, { "epoch": 1.29, "eval_accuracy": 0.7435661764705882, "eval_f1_macro": 0.7126257342882434, "eval_f1_micro": 0.7435661764705882, "eval_loss": 0.5231310725212097, "eval_runtime": 1.0822, "eval_samples_per_second": 1005.332, "eval_steps_per_second": 31.417, "step": 350 }, { "epoch": 1.32, "grad_norm": 1.3496707677841187, "learning_rate": 0.00027941176470588236, "loss": 0.4006, "step": 360 }, { "epoch": 1.36, "grad_norm": 2.2473416328430176, "learning_rate": 0.0002732843137254902, "loss": 0.3715, "step": 370 }, { "epoch": 1.4, "grad_norm": 1.1425981521606445, "learning_rate": 0.000267156862745098, "loss": 0.502, "step": 380 }, { "epoch": 1.43, "grad_norm": 1.080175757408142, "learning_rate": 0.0002610294117647059, "loss": 0.3921, "step": 390 }, { "epoch": 1.47, "grad_norm": 1.7369410991668701, "learning_rate": 0.00025490196078431374, "loss": 0.4002, "step": 400 }, { "epoch": 1.47, "eval_accuracy": 0.7509191176470589, "eval_f1_macro": 0.7008802711523263, "eval_f1_micro": 0.7509191176470589, "eval_loss": 0.5311830043792725, "eval_runtime": 1.1348, "eval_samples_per_second": 958.773, "eval_steps_per_second": 29.962, "step": 400 }, { "epoch": 1.51, "grad_norm": 1.4130823612213135, "learning_rate": 0.00024877450980392157, "loss": 0.4369, "step": 410 }, { "epoch": 1.54, "grad_norm": 1.7928345203399658, "learning_rate": 0.0002426470588235294, "loss": 0.3897, "step": 420 }, { "epoch": 1.58, "grad_norm": 1.4081470966339111, "learning_rate": 0.00023651960784313726, "loss": 0.3966, "step": 430 }, { "epoch": 1.62, "grad_norm": 1.4095189571380615, "learning_rate": 0.0002303921568627451, "loss": 0.3822, "step": 440 }, { "epoch": 1.65, "grad_norm": 2.2179651260375977, "learning_rate": 0.00022426470588235296, "loss": 0.4381, "step": 450 }, { "epoch": 1.65, "eval_accuracy": 0.7481617647058824, "eval_f1_macro": 0.6999822874901374, "eval_f1_micro": 0.7481617647058824, "eval_loss": 0.5215727686882019, "eval_runtime": 1.1354, "eval_samples_per_second": 958.294, "eval_steps_per_second": 29.947, "step": 450 }, { "epoch": 1.69, "grad_norm": 1.261864185333252, "learning_rate": 0.0002181372549019608, "loss": 0.3909, "step": 460 }, { "epoch": 1.73, "grad_norm": 1.3953834772109985, "learning_rate": 0.00021200980392156862, "loss": 0.4049, "step": 470 }, { "epoch": 1.76, "grad_norm": 1.1786818504333496, "learning_rate": 0.00020588235294117645, "loss": 0.3903, "step": 480 }, { "epoch": 1.8, "grad_norm": 1.1706407070159912, "learning_rate": 0.00019975490196078434, "loss": 0.3724, "step": 490 }, { "epoch": 1.84, "grad_norm": 1.9597140550613403, "learning_rate": 0.00019362745098039217, "loss": 0.4125, "step": 500 }, { "epoch": 1.84, "eval_accuracy": 0.7509191176470589, "eval_f1_macro": 0.716105501438017, "eval_f1_micro": 0.7509191176470589, "eval_loss": 0.5261591672897339, "eval_runtime": 1.1364, "eval_samples_per_second": 957.372, "eval_steps_per_second": 29.918, "step": 500 }, { "epoch": 1.88, "grad_norm": 1.382961630821228, "learning_rate": 0.0001875, "loss": 0.4511, "step": 510 }, { "epoch": 1.91, "grad_norm": 1.5272341966629028, "learning_rate": 0.00018137254901960784, "loss": 0.4201, "step": 520 }, { "epoch": 1.95, "grad_norm": 1.3144079446792603, "learning_rate": 0.00017524509803921567, "loss": 0.4401, "step": 530 }, { "epoch": 1.99, "grad_norm": 1.6088247299194336, "learning_rate": 0.00016911764705882356, "loss": 0.4102, "step": 540 }, { "epoch": 2.02, "grad_norm": 1.457602858543396, "learning_rate": 0.0001629901960784314, "loss": 0.3665, "step": 550 }, { "epoch": 2.02, "eval_accuracy": 0.7545955882352942, "eval_f1_macro": 0.7190493000314321, "eval_f1_micro": 0.7545955882352942, "eval_loss": 0.5205144286155701, "eval_runtime": 1.1386, "eval_samples_per_second": 955.59, "eval_steps_per_second": 29.862, "step": 550 }, { "epoch": 2.06, "grad_norm": 1.6489365100860596, "learning_rate": 0.00015686274509803922, "loss": 0.387, "step": 560 }, { "epoch": 2.1, "grad_norm": 1.551331877708435, "learning_rate": 0.00015073529411764705, "loss": 0.3103, "step": 570 }, { "epoch": 2.13, "grad_norm": 2.288947582244873, "learning_rate": 0.0001446078431372549, "loss": 0.3649, "step": 580 }, { "epoch": 2.17, "grad_norm": 1.4870082139968872, "learning_rate": 0.00013848039215686274, "loss": 0.3382, "step": 590 }, { "epoch": 2.21, "grad_norm": 1.9287152290344238, "learning_rate": 0.0001323529411764706, "loss": 0.3855, "step": 600 }, { "epoch": 2.21, "eval_accuracy": 0.7536764705882353, "eval_f1_macro": 0.7160618855460168, "eval_f1_micro": 0.7536764705882353, "eval_loss": 0.5672034025192261, "eval_runtime": 1.1377, "eval_samples_per_second": 956.304, "eval_steps_per_second": 29.884, "step": 600 }, { "epoch": 2.24, "grad_norm": 1.6499133110046387, "learning_rate": 0.00012622549019607844, "loss": 0.3595, "step": 610 }, { "epoch": 2.28, "grad_norm": 1.7214213609695435, "learning_rate": 0.00012009803921568628, "loss": 0.2884, "step": 620 }, { "epoch": 2.32, "grad_norm": 2.0640528202056885, "learning_rate": 0.00011397058823529411, "loss": 0.3366, "step": 630 }, { "epoch": 2.35, "grad_norm": 2.1024091243743896, "learning_rate": 0.00010784313725490197, "loss": 0.3589, "step": 640 }, { "epoch": 2.39, "grad_norm": 1.5444495677947998, "learning_rate": 0.0001017156862745098, "loss": 0.3125, "step": 650 }, { "epoch": 2.39, "eval_accuracy": 0.7509191176470589, "eval_f1_macro": 0.7065648900543882, "eval_f1_micro": 0.7509191176470589, "eval_loss": 0.5732391476631165, "eval_runtime": 1.0847, "eval_samples_per_second": 1003.018, "eval_steps_per_second": 31.344, "step": 650 }, { "epoch": 2.43, "grad_norm": 2.2737483978271484, "learning_rate": 9.558823529411764e-05, "loss": 0.352, "step": 660 }, { "epoch": 2.46, "grad_norm": 2.3418612480163574, "learning_rate": 8.94607843137255e-05, "loss": 0.3647, "step": 670 }, { "epoch": 2.5, "grad_norm": 1.5297809839248657, "learning_rate": 8.333333333333333e-05, "loss": 0.3322, "step": 680 }, { "epoch": 2.54, "grad_norm": 2.407879590988159, "learning_rate": 7.720588235294118e-05, "loss": 0.2971, "step": 690 }, { "epoch": 2.57, "grad_norm": 6.948758125305176, "learning_rate": 7.107843137254902e-05, "loss": 0.2955, "step": 700 }, { "epoch": 2.57, "eval_accuracy": 0.7555147058823529, "eval_f1_macro": 0.7122208301016248, "eval_f1_micro": 0.7555147058823529, "eval_loss": 0.5928493738174438, "eval_runtime": 1.1387, "eval_samples_per_second": 955.456, "eval_steps_per_second": 29.858, "step": 700 }, { "epoch": 2.61, "grad_norm": 1.9339895248413086, "learning_rate": 6.495098039215687e-05, "loss": 0.3605, "step": 710 }, { "epoch": 2.65, "grad_norm": 1.7344573736190796, "learning_rate": 5.882352941176471e-05, "loss": 0.3154, "step": 720 }, { "epoch": 2.68, "grad_norm": 2.3221147060394287, "learning_rate": 5.2696078431372553e-05, "loss": 0.3483, "step": 730 }, { "epoch": 2.72, "grad_norm": 2.296344518661499, "learning_rate": 4.656862745098039e-05, "loss": 0.3768, "step": 740 }, { "epoch": 2.76, "grad_norm": 1.7548387050628662, "learning_rate": 4.044117647058824e-05, "loss": 0.3556, "step": 750 }, { "epoch": 2.76, "eval_accuracy": 0.7536764705882353, "eval_f1_macro": 0.7173598644493644, "eval_f1_micro": 0.7536764705882353, "eval_loss": 0.5704131126403809, "eval_runtime": 1.1365, "eval_samples_per_second": 957.351, "eval_steps_per_second": 29.917, "step": 750 }, { "epoch": 2.79, "grad_norm": 1.6045734882354736, "learning_rate": 3.4313725490196084e-05, "loss": 0.3194, "step": 760 }, { "epoch": 2.83, "grad_norm": 1.3206483125686646, "learning_rate": 2.8186274509803924e-05, "loss": 0.3289, "step": 770 }, { "epoch": 2.87, "grad_norm": 1.6699401140213013, "learning_rate": 2.2058823529411766e-05, "loss": 0.32, "step": 780 }, { "epoch": 2.9, "grad_norm": 1.2462468147277832, "learning_rate": 1.593137254901961e-05, "loss": 0.3742, "step": 790 }, { "epoch": 2.94, "grad_norm": 1.7137000560760498, "learning_rate": 9.803921568627451e-06, "loss": 0.3578, "step": 800 }, { "epoch": 2.94, "eval_accuracy": 0.7518382352941176, "eval_f1_macro": 0.7069047400495676, "eval_f1_micro": 0.7518382352941176, "eval_loss": 0.5706028938293457, "eval_runtime": 1.0853, "eval_samples_per_second": 1002.521, "eval_steps_per_second": 31.329, "step": 800 }, { "epoch": 2.98, "grad_norm": 1.6963423490524292, "learning_rate": 3.6764705882352942e-06, "loss": 0.3367, "step": 810 }, { "epoch": 3.0, "step": 816, "total_flos": 888901800034304.0, "train_loss": 0.4176732173152998, "train_runtime": 109.5646, "train_samples_per_second": 238.216, "train_steps_per_second": 7.448 } ], "logging_steps": 10, "max_steps": 816, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 888901800034304.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }