akkky02's picture
Upload folder using huggingface_hub
aa5b5e2 verified
raw
history blame
18.9 kB
{
"best_metric": 0.4980033338069916,
"best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/google/flan_t5_base_twitter/checkpoint-50",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 816,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 1.5092765092849731,
"learning_rate": 0.0004938725490196079,
"loss": 0.5853,
"step": 10
},
{
"epoch": 0.07,
"grad_norm": 2.050435781478882,
"learning_rate": 0.0004877450980392157,
"loss": 0.5303,
"step": 20
},
{
"epoch": 0.11,
"grad_norm": 2.564333200454712,
"learning_rate": 0.00048161764705882356,
"loss": 0.5528,
"step": 30
},
{
"epoch": 0.15,
"grad_norm": 1.0580081939697266,
"learning_rate": 0.00047549019607843134,
"loss": 0.5186,
"step": 40
},
{
"epoch": 0.18,
"grad_norm": 1.274806261062622,
"learning_rate": 0.0004693627450980392,
"loss": 0.4683,
"step": 50
},
{
"epoch": 0.18,
"eval_accuracy": 0.765625,
"eval_f1_macro": 0.7274910494600209,
"eval_f1_micro": 0.765625,
"eval_loss": 0.4980033338069916,
"eval_runtime": 3.0123,
"eval_samples_per_second": 361.184,
"eval_steps_per_second": 11.287,
"step": 50
},
{
"epoch": 0.22,
"grad_norm": 2.2350311279296875,
"learning_rate": 0.0004632352941176471,
"loss": 0.4985,
"step": 60
},
{
"epoch": 0.26,
"grad_norm": 1.238688349723816,
"learning_rate": 0.0004571078431372549,
"loss": 0.4864,
"step": 70
},
{
"epoch": 0.29,
"grad_norm": 2.0093843936920166,
"learning_rate": 0.0004509803921568628,
"loss": 0.4836,
"step": 80
},
{
"epoch": 0.33,
"grad_norm": 1.2151559591293335,
"learning_rate": 0.00044485294117647056,
"loss": 0.458,
"step": 90
},
{
"epoch": 0.37,
"grad_norm": 1.6234703063964844,
"learning_rate": 0.00043872549019607844,
"loss": 0.5241,
"step": 100
},
{
"epoch": 0.37,
"eval_accuracy": 0.7582720588235294,
"eval_f1_macro": 0.7287382342541164,
"eval_f1_micro": 0.7582720588235294,
"eval_loss": 0.5018450617790222,
"eval_runtime": 3.0237,
"eval_samples_per_second": 359.823,
"eval_steps_per_second": 11.244,
"step": 100
},
{
"epoch": 0.4,
"grad_norm": 1.0740729570388794,
"learning_rate": 0.0004325980392156863,
"loss": 0.5105,
"step": 110
},
{
"epoch": 0.44,
"grad_norm": 1.214935541152954,
"learning_rate": 0.0004264705882352941,
"loss": 0.4946,
"step": 120
},
{
"epoch": 0.48,
"grad_norm": 1.2495511770248413,
"learning_rate": 0.000420343137254902,
"loss": 0.462,
"step": 130
},
{
"epoch": 0.51,
"grad_norm": 1.5260485410690308,
"learning_rate": 0.0004142156862745098,
"loss": 0.5227,
"step": 140
},
{
"epoch": 0.55,
"grad_norm": 1.3698198795318604,
"learning_rate": 0.00040808823529411766,
"loss": 0.4367,
"step": 150
},
{
"epoch": 0.55,
"eval_accuracy": 0.7564338235294118,
"eval_f1_macro": 0.6810277208948305,
"eval_f1_micro": 0.7564338235294118,
"eval_loss": 0.5173827409744263,
"eval_runtime": 3.033,
"eval_samples_per_second": 358.725,
"eval_steps_per_second": 11.21,
"step": 150
},
{
"epoch": 0.59,
"grad_norm": 1.0891311168670654,
"learning_rate": 0.0004019607843137255,
"loss": 0.4669,
"step": 160
},
{
"epoch": 0.62,
"grad_norm": 1.4805020093917847,
"learning_rate": 0.0003958333333333333,
"loss": 0.5305,
"step": 170
},
{
"epoch": 0.66,
"grad_norm": 1.3479547500610352,
"learning_rate": 0.0003897058823529412,
"loss": 0.5154,
"step": 180
},
{
"epoch": 0.7,
"grad_norm": 1.1003795862197876,
"learning_rate": 0.00038357843137254904,
"loss": 0.4962,
"step": 190
},
{
"epoch": 0.74,
"grad_norm": 1.0502173900604248,
"learning_rate": 0.0003774509803921569,
"loss": 0.4868,
"step": 200
},
{
"epoch": 0.74,
"eval_accuracy": 0.7490808823529411,
"eval_f1_macro": 0.695327189081901,
"eval_f1_micro": 0.7490808823529411,
"eval_loss": 0.5048360228538513,
"eval_runtime": 3.0382,
"eval_samples_per_second": 358.107,
"eval_steps_per_second": 11.191,
"step": 200
},
{
"epoch": 0.77,
"grad_norm": 0.9742279052734375,
"learning_rate": 0.0003713235294117647,
"loss": 0.4764,
"step": 210
},
{
"epoch": 0.81,
"grad_norm": 2.2106645107269287,
"learning_rate": 0.00036519607843137254,
"loss": 0.5063,
"step": 220
},
{
"epoch": 0.85,
"grad_norm": 0.7889574766159058,
"learning_rate": 0.0003590686274509804,
"loss": 0.4689,
"step": 230
},
{
"epoch": 0.88,
"grad_norm": 0.676556408405304,
"learning_rate": 0.00035294117647058826,
"loss": 0.4483,
"step": 240
},
{
"epoch": 0.92,
"grad_norm": 1.182094693183899,
"learning_rate": 0.0003468137254901961,
"loss": 0.4824,
"step": 250
},
{
"epoch": 0.92,
"eval_accuracy": 0.7545955882352942,
"eval_f1_macro": 0.7132815160263528,
"eval_f1_micro": 0.7545955882352942,
"eval_loss": 0.5126115083694458,
"eval_runtime": 3.0384,
"eval_samples_per_second": 358.088,
"eval_steps_per_second": 11.19,
"step": 250
},
{
"epoch": 0.96,
"grad_norm": 0.6538687944412231,
"learning_rate": 0.0003406862745098039,
"loss": 0.4162,
"step": 260
},
{
"epoch": 0.99,
"grad_norm": 0.750243604183197,
"learning_rate": 0.00033455882352941176,
"loss": 0.5175,
"step": 270
},
{
"epoch": 1.03,
"grad_norm": 1.4954441785812378,
"learning_rate": 0.0003284313725490196,
"loss": 0.441,
"step": 280
},
{
"epoch": 1.07,
"grad_norm": 1.2219609022140503,
"learning_rate": 0.0003223039215686275,
"loss": 0.354,
"step": 290
},
{
"epoch": 1.1,
"grad_norm": 0.8662141561508179,
"learning_rate": 0.0003161764705882353,
"loss": 0.4008,
"step": 300
},
{
"epoch": 1.1,
"eval_accuracy": 0.75,
"eval_f1_macro": 0.7067094804860161,
"eval_f1_micro": 0.75,
"eval_loss": 0.5365704894065857,
"eval_runtime": 3.0443,
"eval_samples_per_second": 357.39,
"eval_steps_per_second": 11.168,
"step": 300
},
{
"epoch": 1.14,
"grad_norm": 0.8905866146087646,
"learning_rate": 0.00031004901960784314,
"loss": 0.3744,
"step": 310
},
{
"epoch": 1.18,
"grad_norm": 1.7313600778579712,
"learning_rate": 0.00030392156862745097,
"loss": 0.3296,
"step": 320
},
{
"epoch": 1.21,
"grad_norm": 0.8115060925483704,
"learning_rate": 0.0002977941176470588,
"loss": 0.4732,
"step": 330
},
{
"epoch": 1.25,
"grad_norm": 1.17222261428833,
"learning_rate": 0.0002916666666666667,
"loss": 0.3701,
"step": 340
},
{
"epoch": 1.29,
"grad_norm": 0.9669880867004395,
"learning_rate": 0.0002855392156862745,
"loss": 0.3972,
"step": 350
},
{
"epoch": 1.29,
"eval_accuracy": 0.7279411764705882,
"eval_f1_macro": 0.6977233135099418,
"eval_f1_micro": 0.7279411764705882,
"eval_loss": 0.5493308901786804,
"eval_runtime": 3.0428,
"eval_samples_per_second": 357.566,
"eval_steps_per_second": 11.174,
"step": 350
},
{
"epoch": 1.32,
"grad_norm": 1.2415670156478882,
"learning_rate": 0.00027941176470588236,
"loss": 0.3653,
"step": 360
},
{
"epoch": 1.36,
"grad_norm": 2.286729574203491,
"learning_rate": 0.0002732843137254902,
"loss": 0.3603,
"step": 370
},
{
"epoch": 1.4,
"grad_norm": 1.094009280204773,
"learning_rate": 0.000267156862745098,
"loss": 0.4615,
"step": 380
},
{
"epoch": 1.43,
"grad_norm": 0.9113813042640686,
"learning_rate": 0.0002610294117647059,
"loss": 0.3903,
"step": 390
},
{
"epoch": 1.47,
"grad_norm": 1.8263704776763916,
"learning_rate": 0.00025490196078431374,
"loss": 0.3889,
"step": 400
},
{
"epoch": 1.47,
"eval_accuracy": 0.7637867647058824,
"eval_f1_macro": 0.7163329508713944,
"eval_f1_micro": 0.7637867647058824,
"eval_loss": 0.5429306030273438,
"eval_runtime": 3.0433,
"eval_samples_per_second": 357.509,
"eval_steps_per_second": 11.172,
"step": 400
},
{
"epoch": 1.51,
"grad_norm": 1.1813833713531494,
"learning_rate": 0.00024877450980392157,
"loss": 0.4215,
"step": 410
},
{
"epoch": 1.54,
"grad_norm": 1.8237628936767578,
"learning_rate": 0.0002426470588235294,
"loss": 0.3536,
"step": 420
},
{
"epoch": 1.58,
"grad_norm": 1.337441086769104,
"learning_rate": 0.00023651960784313726,
"loss": 0.3962,
"step": 430
},
{
"epoch": 1.62,
"grad_norm": 1.8698618412017822,
"learning_rate": 0.0002303921568627451,
"loss": 0.3342,
"step": 440
},
{
"epoch": 1.65,
"grad_norm": 1.4117869138717651,
"learning_rate": 0.00022426470588235296,
"loss": 0.4151,
"step": 450
},
{
"epoch": 1.65,
"eval_accuracy": 0.7490808823529411,
"eval_f1_macro": 0.7160661999822198,
"eval_f1_micro": 0.7490808823529411,
"eval_loss": 0.5364983677864075,
"eval_runtime": 3.0413,
"eval_samples_per_second": 357.743,
"eval_steps_per_second": 11.179,
"step": 450
},
{
"epoch": 1.69,
"grad_norm": 1.9636564254760742,
"learning_rate": 0.0002181372549019608,
"loss": 0.373,
"step": 460
},
{
"epoch": 1.73,
"grad_norm": 1.8975811004638672,
"learning_rate": 0.00021200980392156862,
"loss": 0.3889,
"step": 470
},
{
"epoch": 1.76,
"grad_norm": 1.7080246210098267,
"learning_rate": 0.00020588235294117645,
"loss": 0.3495,
"step": 480
},
{
"epoch": 1.8,
"grad_norm": 1.1755714416503906,
"learning_rate": 0.00019975490196078434,
"loss": 0.3661,
"step": 490
},
{
"epoch": 1.84,
"grad_norm": 1.639256238937378,
"learning_rate": 0.00019362745098039217,
"loss": 0.3842,
"step": 500
},
{
"epoch": 1.84,
"eval_accuracy": 0.75,
"eval_f1_macro": 0.7184907842868666,
"eval_f1_micro": 0.75,
"eval_loss": 0.5349798798561096,
"eval_runtime": 3.0415,
"eval_samples_per_second": 357.716,
"eval_steps_per_second": 11.179,
"step": 500
},
{
"epoch": 1.88,
"grad_norm": 1.3755793571472168,
"learning_rate": 0.0001875,
"loss": 0.4437,
"step": 510
},
{
"epoch": 1.91,
"grad_norm": 1.4269018173217773,
"learning_rate": 0.00018137254901960784,
"loss": 0.4263,
"step": 520
},
{
"epoch": 1.95,
"grad_norm": 1.2029131650924683,
"learning_rate": 0.00017524509803921567,
"loss": 0.4145,
"step": 530
},
{
"epoch": 1.99,
"grad_norm": 1.048883080482483,
"learning_rate": 0.00016911764705882356,
"loss": 0.3648,
"step": 540
},
{
"epoch": 2.02,
"grad_norm": 0.9863032102584839,
"learning_rate": 0.0001629901960784314,
"loss": 0.3276,
"step": 550
},
{
"epoch": 2.02,
"eval_accuracy": 0.7564338235294118,
"eval_f1_macro": 0.7223909884910498,
"eval_f1_micro": 0.7564338235294118,
"eval_loss": 0.5359387397766113,
"eval_runtime": 3.039,
"eval_samples_per_second": 358.017,
"eval_steps_per_second": 11.188,
"step": 550
},
{
"epoch": 2.06,
"grad_norm": 1.1191493272781372,
"learning_rate": 0.00015686274509803922,
"loss": 0.3376,
"step": 560
},
{
"epoch": 2.1,
"grad_norm": 1.004733681678772,
"learning_rate": 0.00015073529411764705,
"loss": 0.26,
"step": 570
},
{
"epoch": 2.13,
"grad_norm": 2.2562036514282227,
"learning_rate": 0.0001446078431372549,
"loss": 0.2736,
"step": 580
},
{
"epoch": 2.17,
"grad_norm": 1.4733184576034546,
"learning_rate": 0.00013848039215686274,
"loss": 0.2466,
"step": 590
},
{
"epoch": 2.21,
"grad_norm": 1.0705807209014893,
"learning_rate": 0.0001323529411764706,
"loss": 0.3135,
"step": 600
},
{
"epoch": 2.21,
"eval_accuracy": 0.7564338235294118,
"eval_f1_macro": 0.7120873963341872,
"eval_f1_micro": 0.7564338235294118,
"eval_loss": 0.6310513615608215,
"eval_runtime": 3.0409,
"eval_samples_per_second": 357.789,
"eval_steps_per_second": 11.181,
"step": 600
},
{
"epoch": 2.24,
"grad_norm": 1.9386248588562012,
"learning_rate": 0.00012622549019607844,
"loss": 0.2654,
"step": 610
},
{
"epoch": 2.28,
"grad_norm": 1.9174495935440063,
"learning_rate": 0.00012009803921568628,
"loss": 0.2348,
"step": 620
},
{
"epoch": 2.32,
"grad_norm": 4.020888328552246,
"learning_rate": 0.00011397058823529411,
"loss": 0.2742,
"step": 630
},
{
"epoch": 2.35,
"grad_norm": 2.1899900436401367,
"learning_rate": 0.00010784313725490197,
"loss": 0.31,
"step": 640
},
{
"epoch": 2.39,
"grad_norm": 1.9852439165115356,
"learning_rate": 0.0001017156862745098,
"loss": 0.2369,
"step": 650
},
{
"epoch": 2.39,
"eval_accuracy": 0.7490808823529411,
"eval_f1_macro": 0.7048950673768484,
"eval_f1_micro": 0.7490808823529411,
"eval_loss": 0.6068652868270874,
"eval_runtime": 3.0376,
"eval_samples_per_second": 358.18,
"eval_steps_per_second": 11.193,
"step": 650
},
{
"epoch": 2.43,
"grad_norm": 2.2759952545166016,
"learning_rate": 9.558823529411764e-05,
"loss": 0.2773,
"step": 660
},
{
"epoch": 2.46,
"grad_norm": 1.9017343521118164,
"learning_rate": 8.94607843137255e-05,
"loss": 0.2468,
"step": 670
},
{
"epoch": 2.5,
"grad_norm": 1.6225106716156006,
"learning_rate": 8.333333333333333e-05,
"loss": 0.268,
"step": 680
},
{
"epoch": 2.54,
"grad_norm": 1.8427222967147827,
"learning_rate": 7.720588235294118e-05,
"loss": 0.2501,
"step": 690
},
{
"epoch": 2.57,
"grad_norm": 1.5418189764022827,
"learning_rate": 7.107843137254902e-05,
"loss": 0.208,
"step": 700
},
{
"epoch": 2.57,
"eval_accuracy": 0.7536764705882353,
"eval_f1_macro": 0.7090758160492004,
"eval_f1_micro": 0.7536764705882353,
"eval_loss": 0.6517214179039001,
"eval_runtime": 3.0424,
"eval_samples_per_second": 357.618,
"eval_steps_per_second": 11.176,
"step": 700
},
{
"epoch": 2.61,
"grad_norm": 2.459115743637085,
"learning_rate": 6.495098039215687e-05,
"loss": 0.2884,
"step": 710
},
{
"epoch": 2.65,
"grad_norm": 1.4037036895751953,
"learning_rate": 5.882352941176471e-05,
"loss": 0.2304,
"step": 720
},
{
"epoch": 2.68,
"grad_norm": 1.80465567111969,
"learning_rate": 5.2696078431372553e-05,
"loss": 0.2145,
"step": 730
},
{
"epoch": 2.72,
"grad_norm": 4.031944751739502,
"learning_rate": 4.656862745098039e-05,
"loss": 0.2753,
"step": 740
},
{
"epoch": 2.76,
"grad_norm": 2.259483814239502,
"learning_rate": 4.044117647058824e-05,
"loss": 0.2667,
"step": 750
},
{
"epoch": 2.76,
"eval_accuracy": 0.7545955882352942,
"eval_f1_macro": 0.7151215187713478,
"eval_f1_micro": 0.7545955882352942,
"eval_loss": 0.6563965678215027,
"eval_runtime": 3.0386,
"eval_samples_per_second": 358.058,
"eval_steps_per_second": 11.189,
"step": 750
},
{
"epoch": 2.79,
"grad_norm": 1.8323283195495605,
"learning_rate": 3.4313725490196084e-05,
"loss": 0.2648,
"step": 760
},
{
"epoch": 2.83,
"grad_norm": 2.718439817428589,
"learning_rate": 2.8186274509803924e-05,
"loss": 0.2613,
"step": 770
},
{
"epoch": 2.87,
"grad_norm": 1.7432453632354736,
"learning_rate": 2.2058823529411766e-05,
"loss": 0.2136,
"step": 780
},
{
"epoch": 2.9,
"grad_norm": 1.44099760055542,
"learning_rate": 1.593137254901961e-05,
"loss": 0.278,
"step": 790
},
{
"epoch": 2.94,
"grad_norm": 1.8403719663619995,
"learning_rate": 9.803921568627451e-06,
"loss": 0.3232,
"step": 800
},
{
"epoch": 2.94,
"eval_accuracy": 0.7628676470588235,
"eval_f1_macro": 0.7184852374839538,
"eval_f1_micro": 0.7628676470588235,
"eval_loss": 0.651547908782959,
"eval_runtime": 3.0405,
"eval_samples_per_second": 357.841,
"eval_steps_per_second": 11.183,
"step": 800
},
{
"epoch": 2.98,
"grad_norm": 3.050947666168213,
"learning_rate": 3.6764705882352942e-06,
"loss": 0.2703,
"step": 810
},
{
"epoch": 3.0,
"step": 816,
"total_flos": 3987151653961728.0,
"train_loss": 0.38329169475564767,
"train_runtime": 337.4804,
"train_samples_per_second": 77.338,
"train_steps_per_second": 2.418
}
],
"logging_steps": 10,
"max_steps": 816,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"total_flos": 3987151653961728.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}