akkky02's picture
Upload folder using huggingface_hub
aa5b5e2 verified
raw
history blame
18.9 kB
{
"best_metric": 0.4984971284866333,
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google/flan_t5_small_twitter/checkpoint-200",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 816,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 2.320362091064453,
"learning_rate": 0.0004938725490196079,
"loss": 0.6107,
"step": 10
},
{
"epoch": 0.07,
"grad_norm": 1.2164921760559082,
"learning_rate": 0.0004877450980392157,
"loss": 0.585,
"step": 20
},
{
"epoch": 0.11,
"grad_norm": 1.7246904373168945,
"learning_rate": 0.00048161764705882356,
"loss": 0.5643,
"step": 30
},
{
"epoch": 0.15,
"grad_norm": 1.0919380187988281,
"learning_rate": 0.00047549019607843134,
"loss": 0.5213,
"step": 40
},
{
"epoch": 0.18,
"grad_norm": 1.6304311752319336,
"learning_rate": 0.0004693627450980392,
"loss": 0.5055,
"step": 50
},
{
"epoch": 0.18,
"eval_accuracy": 0.7536764705882353,
"eval_f1_macro": 0.7230132041417308,
"eval_f1_micro": 0.7536764705882353,
"eval_loss": 0.5209915041923523,
"eval_runtime": 1.0764,
"eval_samples_per_second": 1010.761,
"eval_steps_per_second": 31.586,
"step": 50
},
{
"epoch": 0.22,
"grad_norm": 3.424161195755005,
"learning_rate": 0.0004632352941176471,
"loss": 0.5007,
"step": 60
},
{
"epoch": 0.26,
"grad_norm": 1.3720046281814575,
"learning_rate": 0.0004571078431372549,
"loss": 0.5132,
"step": 70
},
{
"epoch": 0.29,
"grad_norm": 1.943393588066101,
"learning_rate": 0.0004509803921568628,
"loss": 0.4845,
"step": 80
},
{
"epoch": 0.33,
"grad_norm": 1.9219595193862915,
"learning_rate": 0.00044485294117647056,
"loss": 0.4548,
"step": 90
},
{
"epoch": 0.37,
"grad_norm": 2.8596079349517822,
"learning_rate": 0.00043872549019607844,
"loss": 0.5045,
"step": 100
},
{
"epoch": 0.37,
"eval_accuracy": 0.7444852941176471,
"eval_f1_macro": 0.6934304201871463,
"eval_f1_micro": 0.7444852941176471,
"eval_loss": 0.501828134059906,
"eval_runtime": 1.1287,
"eval_samples_per_second": 963.908,
"eval_steps_per_second": 30.122,
"step": 100
},
{
"epoch": 0.4,
"grad_norm": 2.045985460281372,
"learning_rate": 0.0004325980392156863,
"loss": 0.4759,
"step": 110
},
{
"epoch": 0.44,
"grad_norm": 2.124852180480957,
"learning_rate": 0.0004264705882352941,
"loss": 0.509,
"step": 120
},
{
"epoch": 0.48,
"grad_norm": 1.3049920797348022,
"learning_rate": 0.000420343137254902,
"loss": 0.4681,
"step": 130
},
{
"epoch": 0.51,
"grad_norm": 1.5385198593139648,
"learning_rate": 0.0004142156862745098,
"loss": 0.51,
"step": 140
},
{
"epoch": 0.55,
"grad_norm": 0.8772047162055969,
"learning_rate": 0.00040808823529411766,
"loss": 0.4727,
"step": 150
},
{
"epoch": 0.55,
"eval_accuracy": 0.7242647058823529,
"eval_f1_macro": 0.6002488646773823,
"eval_f1_micro": 0.7242647058823529,
"eval_loss": 0.5356110334396362,
"eval_runtime": 1.1278,
"eval_samples_per_second": 964.717,
"eval_steps_per_second": 30.147,
"step": 150
},
{
"epoch": 0.59,
"grad_norm": 1.149582028388977,
"learning_rate": 0.0004019607843137255,
"loss": 0.4921,
"step": 160
},
{
"epoch": 0.62,
"grad_norm": 2.1237943172454834,
"learning_rate": 0.0003958333333333333,
"loss": 0.5149,
"step": 170
},
{
"epoch": 0.66,
"grad_norm": 1.629279613494873,
"learning_rate": 0.0003897058823529412,
"loss": 0.5094,
"step": 180
},
{
"epoch": 0.7,
"grad_norm": 1.2666165828704834,
"learning_rate": 0.00038357843137254904,
"loss": 0.4891,
"step": 190
},
{
"epoch": 0.74,
"grad_norm": 1.3141692876815796,
"learning_rate": 0.0003774509803921569,
"loss": 0.4924,
"step": 200
},
{
"epoch": 0.74,
"eval_accuracy": 0.7472426470588235,
"eval_f1_macro": 0.6953600443112663,
"eval_f1_micro": 0.7472426470588235,
"eval_loss": 0.4984971284866333,
"eval_runtime": 1.078,
"eval_samples_per_second": 1009.235,
"eval_steps_per_second": 31.539,
"step": 200
},
{
"epoch": 0.77,
"grad_norm": 1.6602263450622559,
"learning_rate": 0.0003713235294117647,
"loss": 0.4878,
"step": 210
},
{
"epoch": 0.81,
"grad_norm": 3.3005311489105225,
"learning_rate": 0.00036519607843137254,
"loss": 0.5078,
"step": 220
},
{
"epoch": 0.85,
"grad_norm": 1.2845886945724487,
"learning_rate": 0.0003590686274509804,
"loss": 0.4773,
"step": 230
},
{
"epoch": 0.88,
"grad_norm": 0.7171024084091187,
"learning_rate": 0.00035294117647058826,
"loss": 0.4326,
"step": 240
},
{
"epoch": 0.92,
"grad_norm": 1.302473783493042,
"learning_rate": 0.0003468137254901961,
"loss": 0.4847,
"step": 250
},
{
"epoch": 0.92,
"eval_accuracy": 0.7527573529411765,
"eval_f1_macro": 0.7106608971260056,
"eval_f1_micro": 0.7527573529411765,
"eval_loss": 0.4992017149925232,
"eval_runtime": 1.1293,
"eval_samples_per_second": 963.438,
"eval_steps_per_second": 30.107,
"step": 250
},
{
"epoch": 0.96,
"grad_norm": 1.1889142990112305,
"learning_rate": 0.0003406862745098039,
"loss": 0.4215,
"step": 260
},
{
"epoch": 0.99,
"grad_norm": 1.0543372631072998,
"learning_rate": 0.00033455882352941176,
"loss": 0.5115,
"step": 270
},
{
"epoch": 1.03,
"grad_norm": 2.2345387935638428,
"learning_rate": 0.0003284313725490196,
"loss": 0.4543,
"step": 280
},
{
"epoch": 1.07,
"grad_norm": 1.7095710039138794,
"learning_rate": 0.0003223039215686275,
"loss": 0.3815,
"step": 290
},
{
"epoch": 1.1,
"grad_norm": 1.091468334197998,
"learning_rate": 0.0003161764705882353,
"loss": 0.4107,
"step": 300
},
{
"epoch": 1.1,
"eval_accuracy": 0.75,
"eval_f1_macro": 0.7086223125425839,
"eval_f1_micro": 0.75,
"eval_loss": 0.5264343023300171,
"eval_runtime": 1.1324,
"eval_samples_per_second": 960.833,
"eval_steps_per_second": 30.026,
"step": 300
},
{
"epoch": 1.14,
"grad_norm": 1.4771665334701538,
"learning_rate": 0.00031004901960784314,
"loss": 0.4181,
"step": 310
},
{
"epoch": 1.18,
"grad_norm": 0.9134888648986816,
"learning_rate": 0.00030392156862745097,
"loss": 0.378,
"step": 320
},
{
"epoch": 1.21,
"grad_norm": 0.9835084676742554,
"learning_rate": 0.0002977941176470588,
"loss": 0.4827,
"step": 330
},
{
"epoch": 1.25,
"grad_norm": 2.08085036277771,
"learning_rate": 0.0002916666666666667,
"loss": 0.3999,
"step": 340
},
{
"epoch": 1.29,
"grad_norm": 1.597136378288269,
"learning_rate": 0.0002855392156862745,
"loss": 0.4197,
"step": 350
},
{
"epoch": 1.29,
"eval_accuracy": 0.7435661764705882,
"eval_f1_macro": 0.7126257342882434,
"eval_f1_micro": 0.7435661764705882,
"eval_loss": 0.5231310725212097,
"eval_runtime": 1.0822,
"eval_samples_per_second": 1005.332,
"eval_steps_per_second": 31.417,
"step": 350
},
{
"epoch": 1.32,
"grad_norm": 1.3496707677841187,
"learning_rate": 0.00027941176470588236,
"loss": 0.4006,
"step": 360
},
{
"epoch": 1.36,
"grad_norm": 2.2473416328430176,
"learning_rate": 0.0002732843137254902,
"loss": 0.3715,
"step": 370
},
{
"epoch": 1.4,
"grad_norm": 1.1425981521606445,
"learning_rate": 0.000267156862745098,
"loss": 0.502,
"step": 380
},
{
"epoch": 1.43,
"grad_norm": 1.080175757408142,
"learning_rate": 0.0002610294117647059,
"loss": 0.3921,
"step": 390
},
{
"epoch": 1.47,
"grad_norm": 1.7369410991668701,
"learning_rate": 0.00025490196078431374,
"loss": 0.4002,
"step": 400
},
{
"epoch": 1.47,
"eval_accuracy": 0.7509191176470589,
"eval_f1_macro": 0.7008802711523263,
"eval_f1_micro": 0.7509191176470589,
"eval_loss": 0.5311830043792725,
"eval_runtime": 1.1348,
"eval_samples_per_second": 958.773,
"eval_steps_per_second": 29.962,
"step": 400
},
{
"epoch": 1.51,
"grad_norm": 1.4130823612213135,
"learning_rate": 0.00024877450980392157,
"loss": 0.4369,
"step": 410
},
{
"epoch": 1.54,
"grad_norm": 1.7928345203399658,
"learning_rate": 0.0002426470588235294,
"loss": 0.3897,
"step": 420
},
{
"epoch": 1.58,
"grad_norm": 1.4081470966339111,
"learning_rate": 0.00023651960784313726,
"loss": 0.3966,
"step": 430
},
{
"epoch": 1.62,
"grad_norm": 1.4095189571380615,
"learning_rate": 0.0002303921568627451,
"loss": 0.3822,
"step": 440
},
{
"epoch": 1.65,
"grad_norm": 2.2179651260375977,
"learning_rate": 0.00022426470588235296,
"loss": 0.4381,
"step": 450
},
{
"epoch": 1.65,
"eval_accuracy": 0.7481617647058824,
"eval_f1_macro": 0.6999822874901374,
"eval_f1_micro": 0.7481617647058824,
"eval_loss": 0.5215727686882019,
"eval_runtime": 1.1354,
"eval_samples_per_second": 958.294,
"eval_steps_per_second": 29.947,
"step": 450
},
{
"epoch": 1.69,
"grad_norm": 1.261864185333252,
"learning_rate": 0.0002181372549019608,
"loss": 0.3909,
"step": 460
},
{
"epoch": 1.73,
"grad_norm": 1.3953834772109985,
"learning_rate": 0.00021200980392156862,
"loss": 0.4049,
"step": 470
},
{
"epoch": 1.76,
"grad_norm": 1.1786818504333496,
"learning_rate": 0.00020588235294117645,
"loss": 0.3903,
"step": 480
},
{
"epoch": 1.8,
"grad_norm": 1.1706407070159912,
"learning_rate": 0.00019975490196078434,
"loss": 0.3724,
"step": 490
},
{
"epoch": 1.84,
"grad_norm": 1.9597140550613403,
"learning_rate": 0.00019362745098039217,
"loss": 0.4125,
"step": 500
},
{
"epoch": 1.84,
"eval_accuracy": 0.7509191176470589,
"eval_f1_macro": 0.716105501438017,
"eval_f1_micro": 0.7509191176470589,
"eval_loss": 0.5261591672897339,
"eval_runtime": 1.1364,
"eval_samples_per_second": 957.372,
"eval_steps_per_second": 29.918,
"step": 500
},
{
"epoch": 1.88,
"grad_norm": 1.382961630821228,
"learning_rate": 0.0001875,
"loss": 0.4511,
"step": 510
},
{
"epoch": 1.91,
"grad_norm": 1.5272341966629028,
"learning_rate": 0.00018137254901960784,
"loss": 0.4201,
"step": 520
},
{
"epoch": 1.95,
"grad_norm": 1.3144079446792603,
"learning_rate": 0.00017524509803921567,
"loss": 0.4401,
"step": 530
},
{
"epoch": 1.99,
"grad_norm": 1.6088247299194336,
"learning_rate": 0.00016911764705882356,
"loss": 0.4102,
"step": 540
},
{
"epoch": 2.02,
"grad_norm": 1.457602858543396,
"learning_rate": 0.0001629901960784314,
"loss": 0.3665,
"step": 550
},
{
"epoch": 2.02,
"eval_accuracy": 0.7545955882352942,
"eval_f1_macro": 0.7190493000314321,
"eval_f1_micro": 0.7545955882352942,
"eval_loss": 0.5205144286155701,
"eval_runtime": 1.1386,
"eval_samples_per_second": 955.59,
"eval_steps_per_second": 29.862,
"step": 550
},
{
"epoch": 2.06,
"grad_norm": 1.6489365100860596,
"learning_rate": 0.00015686274509803922,
"loss": 0.387,
"step": 560
},
{
"epoch": 2.1,
"grad_norm": 1.551331877708435,
"learning_rate": 0.00015073529411764705,
"loss": 0.3103,
"step": 570
},
{
"epoch": 2.13,
"grad_norm": 2.288947582244873,
"learning_rate": 0.0001446078431372549,
"loss": 0.3649,
"step": 580
},
{
"epoch": 2.17,
"grad_norm": 1.4870082139968872,
"learning_rate": 0.00013848039215686274,
"loss": 0.3382,
"step": 590
},
{
"epoch": 2.21,
"grad_norm": 1.9287152290344238,
"learning_rate": 0.0001323529411764706,
"loss": 0.3855,
"step": 600
},
{
"epoch": 2.21,
"eval_accuracy": 0.7536764705882353,
"eval_f1_macro": 0.7160618855460168,
"eval_f1_micro": 0.7536764705882353,
"eval_loss": 0.5672034025192261,
"eval_runtime": 1.1377,
"eval_samples_per_second": 956.304,
"eval_steps_per_second": 29.884,
"step": 600
},
{
"epoch": 2.24,
"grad_norm": 1.6499133110046387,
"learning_rate": 0.00012622549019607844,
"loss": 0.3595,
"step": 610
},
{
"epoch": 2.28,
"grad_norm": 1.7214213609695435,
"learning_rate": 0.00012009803921568628,
"loss": 0.2884,
"step": 620
},
{
"epoch": 2.32,
"grad_norm": 2.0640528202056885,
"learning_rate": 0.00011397058823529411,
"loss": 0.3366,
"step": 630
},
{
"epoch": 2.35,
"grad_norm": 2.1024091243743896,
"learning_rate": 0.00010784313725490197,
"loss": 0.3589,
"step": 640
},
{
"epoch": 2.39,
"grad_norm": 1.5444495677947998,
"learning_rate": 0.0001017156862745098,
"loss": 0.3125,
"step": 650
},
{
"epoch": 2.39,
"eval_accuracy": 0.7509191176470589,
"eval_f1_macro": 0.7065648900543882,
"eval_f1_micro": 0.7509191176470589,
"eval_loss": 0.5732391476631165,
"eval_runtime": 1.0847,
"eval_samples_per_second": 1003.018,
"eval_steps_per_second": 31.344,
"step": 650
},
{
"epoch": 2.43,
"grad_norm": 2.2737483978271484,
"learning_rate": 9.558823529411764e-05,
"loss": 0.352,
"step": 660
},
{
"epoch": 2.46,
"grad_norm": 2.3418612480163574,
"learning_rate": 8.94607843137255e-05,
"loss": 0.3647,
"step": 670
},
{
"epoch": 2.5,
"grad_norm": 1.5297809839248657,
"learning_rate": 8.333333333333333e-05,
"loss": 0.3322,
"step": 680
},
{
"epoch": 2.54,
"grad_norm": 2.407879590988159,
"learning_rate": 7.720588235294118e-05,
"loss": 0.2971,
"step": 690
},
{
"epoch": 2.57,
"grad_norm": 6.948758125305176,
"learning_rate": 7.107843137254902e-05,
"loss": 0.2955,
"step": 700
},
{
"epoch": 2.57,
"eval_accuracy": 0.7555147058823529,
"eval_f1_macro": 0.7122208301016248,
"eval_f1_micro": 0.7555147058823529,
"eval_loss": 0.5928493738174438,
"eval_runtime": 1.1387,
"eval_samples_per_second": 955.456,
"eval_steps_per_second": 29.858,
"step": 700
},
{
"epoch": 2.61,
"grad_norm": 1.9339895248413086,
"learning_rate": 6.495098039215687e-05,
"loss": 0.3605,
"step": 710
},
{
"epoch": 2.65,
"grad_norm": 1.7344573736190796,
"learning_rate": 5.882352941176471e-05,
"loss": 0.3154,
"step": 720
},
{
"epoch": 2.68,
"grad_norm": 2.3221147060394287,
"learning_rate": 5.2696078431372553e-05,
"loss": 0.3483,
"step": 730
},
{
"epoch": 2.72,
"grad_norm": 2.296344518661499,
"learning_rate": 4.656862745098039e-05,
"loss": 0.3768,
"step": 740
},
{
"epoch": 2.76,
"grad_norm": 1.7548387050628662,
"learning_rate": 4.044117647058824e-05,
"loss": 0.3556,
"step": 750
},
{
"epoch": 2.76,
"eval_accuracy": 0.7536764705882353,
"eval_f1_macro": 0.7173598644493644,
"eval_f1_micro": 0.7536764705882353,
"eval_loss": 0.5704131126403809,
"eval_runtime": 1.1365,
"eval_samples_per_second": 957.351,
"eval_steps_per_second": 29.917,
"step": 750
},
{
"epoch": 2.79,
"grad_norm": 1.6045734882354736,
"learning_rate": 3.4313725490196084e-05,
"loss": 0.3194,
"step": 760
},
{
"epoch": 2.83,
"grad_norm": 1.3206483125686646,
"learning_rate": 2.8186274509803924e-05,
"loss": 0.3289,
"step": 770
},
{
"epoch": 2.87,
"grad_norm": 1.6699401140213013,
"learning_rate": 2.2058823529411766e-05,
"loss": 0.32,
"step": 780
},
{
"epoch": 2.9,
"grad_norm": 1.2462468147277832,
"learning_rate": 1.593137254901961e-05,
"loss": 0.3742,
"step": 790
},
{
"epoch": 2.94,
"grad_norm": 1.7137000560760498,
"learning_rate": 9.803921568627451e-06,
"loss": 0.3578,
"step": 800
},
{
"epoch": 2.94,
"eval_accuracy": 0.7518382352941176,
"eval_f1_macro": 0.7069047400495676,
"eval_f1_micro": 0.7518382352941176,
"eval_loss": 0.5706028938293457,
"eval_runtime": 1.0853,
"eval_samples_per_second": 1002.521,
"eval_steps_per_second": 31.329,
"step": 800
},
{
"epoch": 2.98,
"grad_norm": 1.6963423490524292,
"learning_rate": 3.6764705882352942e-06,
"loss": 0.3367,
"step": 810
},
{
"epoch": 3.0,
"step": 816,
"total_flos": 888901800034304.0,
"train_loss": 0.4176732173152998,
"train_runtime": 109.5646,
"train_samples_per_second": 238.216,
"train_steps_per_second": 7.448
}
],
"logging_steps": 10,
"max_steps": 816,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"total_flos": 888901800034304.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}