MEGL-LLaVA-Baseline-Object / trainer_state.json
TnTerry's picture
Upload 5 files
31b2722 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 17.77459749552773,
"eval_steps": 500,
"global_step": 1242,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14311270125223613,
"grad_norm": 7.146514892578125,
"learning_rate": 0.00039677938808373593,
"loss": 3.5751,
"step": 10
},
{
"epoch": 0.28622540250447226,
"grad_norm": 2.1325223445892334,
"learning_rate": 0.00039355877616747184,
"loss": 0.939,
"step": 20
},
{
"epoch": 0.4293381037567084,
"grad_norm": 0.8520782589912415,
"learning_rate": 0.00039033816425120774,
"loss": 0.2653,
"step": 30
},
{
"epoch": 0.5724508050089445,
"grad_norm": 0.7653748393058777,
"learning_rate": 0.00038711755233494365,
"loss": 0.1603,
"step": 40
},
{
"epoch": 0.7155635062611807,
"grad_norm": 0.661469578742981,
"learning_rate": 0.00038389694041867956,
"loss": 0.1886,
"step": 50
},
{
"epoch": 0.8586762075134168,
"grad_norm": 0.39610955119132996,
"learning_rate": 0.00038067632850241547,
"loss": 0.1859,
"step": 60
},
{
"epoch": 1.0017889087656529,
"grad_norm": 0.4488755464553833,
"learning_rate": 0.0003774557165861514,
"loss": 0.1538,
"step": 70
},
{
"epoch": 1.144901610017889,
"grad_norm": 0.2944377362728119,
"learning_rate": 0.00037423510466988734,
"loss": 0.1195,
"step": 80
},
{
"epoch": 1.2880143112701252,
"grad_norm": 0.29124024510383606,
"learning_rate": 0.0003710144927536232,
"loss": 0.1271,
"step": 90
},
{
"epoch": 1.4311270125223614,
"grad_norm": 0.42328736186027527,
"learning_rate": 0.0003677938808373591,
"loss": 0.1018,
"step": 100
},
{
"epoch": 1.5742397137745976,
"grad_norm": 0.3259565234184265,
"learning_rate": 0.00036457326892109506,
"loss": 0.0848,
"step": 110
},
{
"epoch": 1.7173524150268338,
"grad_norm": 0.479124516248703,
"learning_rate": 0.0003613526570048309,
"loss": 0.106,
"step": 120
},
{
"epoch": 1.8604651162790697,
"grad_norm": 0.40788090229034424,
"learning_rate": 0.0003581320450885668,
"loss": 0.0969,
"step": 130
},
{
"epoch": 2.0035778175313057,
"grad_norm": 0.3574964106082916,
"learning_rate": 0.0003549114331723028,
"loss": 0.124,
"step": 140
},
{
"epoch": 2.146690518783542,
"grad_norm": 0.37805065512657166,
"learning_rate": 0.0003516908212560387,
"loss": 0.0491,
"step": 150
},
{
"epoch": 2.289803220035778,
"grad_norm": 0.25937220454216003,
"learning_rate": 0.00034847020933977455,
"loss": 0.0669,
"step": 160
},
{
"epoch": 2.4329159212880143,
"grad_norm": 0.34056201577186584,
"learning_rate": 0.00034524959742351046,
"loss": 0.0595,
"step": 170
},
{
"epoch": 2.5760286225402504,
"grad_norm": 0.30211707949638367,
"learning_rate": 0.0003420289855072464,
"loss": 0.0648,
"step": 180
},
{
"epoch": 2.7191413237924866,
"grad_norm": 0.18458786606788635,
"learning_rate": 0.0003388083735909823,
"loss": 0.0545,
"step": 190
},
{
"epoch": 2.862254025044723,
"grad_norm": 0.27384912967681885,
"learning_rate": 0.0003355877616747182,
"loss": 0.0684,
"step": 200
},
{
"epoch": 3.005366726296959,
"grad_norm": 0.16877304017543793,
"learning_rate": 0.00033236714975845414,
"loss": 0.0695,
"step": 210
},
{
"epoch": 3.148479427549195,
"grad_norm": 0.07739146798849106,
"learning_rate": 0.00032914653784219005,
"loss": 0.0256,
"step": 220
},
{
"epoch": 3.2915921288014314,
"grad_norm": 0.2832132577896118,
"learning_rate": 0.0003259259259259259,
"loss": 0.0263,
"step": 230
},
{
"epoch": 3.434704830053667,
"grad_norm": 0.21412289142608643,
"learning_rate": 0.00032270531400966187,
"loss": 0.0287,
"step": 240
},
{
"epoch": 3.5778175313059033,
"grad_norm": 0.1840696483850479,
"learning_rate": 0.0003194847020933978,
"loss": 0.0469,
"step": 250
},
{
"epoch": 3.7209302325581395,
"grad_norm": 0.34246236085891724,
"learning_rate": 0.00031626409017713363,
"loss": 0.0243,
"step": 260
},
{
"epoch": 3.8640429338103757,
"grad_norm": 0.056173525750637054,
"learning_rate": 0.0003130434782608696,
"loss": 0.0252,
"step": 270
},
{
"epoch": 4.007155635062611,
"grad_norm": 0.09256428480148315,
"learning_rate": 0.0003098228663446055,
"loss": 0.0216,
"step": 280
},
{
"epoch": 4.150268336314848,
"grad_norm": 0.20085078477859497,
"learning_rate": 0.0003066022544283414,
"loss": 0.0102,
"step": 290
},
{
"epoch": 4.293381037567084,
"grad_norm": 0.021982286125421524,
"learning_rate": 0.0003033816425120773,
"loss": 0.0131,
"step": 300
},
{
"epoch": 4.43649373881932,
"grad_norm": 0.054368916898965836,
"learning_rate": 0.0003001610305958132,
"loss": 0.0145,
"step": 310
},
{
"epoch": 4.579606440071556,
"grad_norm": 0.0868581086397171,
"learning_rate": 0.00029694041867954913,
"loss": 0.0181,
"step": 320
},
{
"epoch": 4.722719141323792,
"grad_norm": 0.24308475852012634,
"learning_rate": 0.00029371980676328504,
"loss": 0.0125,
"step": 330
},
{
"epoch": 4.8658318425760285,
"grad_norm": 0.14394602179527283,
"learning_rate": 0.00029049919484702095,
"loss": 0.0149,
"step": 340
},
{
"epoch": 5.008944543828265,
"grad_norm": 0.05040862783789635,
"learning_rate": 0.00028727858293075686,
"loss": 0.0096,
"step": 350
},
{
"epoch": 5.152057245080501,
"grad_norm": 0.28047820925712585,
"learning_rate": 0.00028405797101449276,
"loss": 0.0032,
"step": 360
},
{
"epoch": 5.295169946332737,
"grad_norm": 0.07502233237028122,
"learning_rate": 0.0002808373590982287,
"loss": 0.0038,
"step": 370
},
{
"epoch": 5.438282647584973,
"grad_norm": 0.8537871837615967,
"learning_rate": 0.0002776167471819646,
"loss": 0.0073,
"step": 380
},
{
"epoch": 5.5813953488372094,
"grad_norm": 0.005727715790271759,
"learning_rate": 0.0002743961352657005,
"loss": 0.0106,
"step": 390
},
{
"epoch": 5.724508050089446,
"grad_norm": 0.04042937234044075,
"learning_rate": 0.0002711755233494364,
"loss": 0.0041,
"step": 400
},
{
"epoch": 5.867620751341682,
"grad_norm": 0.11248348653316498,
"learning_rate": 0.0002679549114331723,
"loss": 0.003,
"step": 410
},
{
"epoch": 6.010733452593918,
"grad_norm": 0.014976495876908302,
"learning_rate": 0.00026473429951690827,
"loss": 0.0067,
"step": 420
},
{
"epoch": 6.153846153846154,
"grad_norm": 0.15070898830890656,
"learning_rate": 0.0002615136876006441,
"loss": 0.0035,
"step": 430
},
{
"epoch": 6.29695885509839,
"grad_norm": 0.0066925715655088425,
"learning_rate": 0.00025829307568438003,
"loss": 0.0027,
"step": 440
},
{
"epoch": 6.440071556350626,
"grad_norm": 0.015314973890781403,
"learning_rate": 0.00025507246376811594,
"loss": 0.0045,
"step": 450
},
{
"epoch": 6.583184257602863,
"grad_norm": 0.030470581725239754,
"learning_rate": 0.00025185185185185185,
"loss": 0.0029,
"step": 460
},
{
"epoch": 6.726296958855098,
"grad_norm": 0.0092542115598917,
"learning_rate": 0.00024863123993558775,
"loss": 0.0043,
"step": 470
},
{
"epoch": 6.869409660107334,
"grad_norm": 0.016118695959448814,
"learning_rate": 0.00024541062801932366,
"loss": 0.0011,
"step": 480
},
{
"epoch": 7.01252236135957,
"grad_norm": 0.01760493591427803,
"learning_rate": 0.0002421900161030596,
"loss": 0.004,
"step": 490
},
{
"epoch": 7.155635062611807,
"grad_norm": 0.0077838534489274025,
"learning_rate": 0.0002389694041867955,
"loss": 0.0004,
"step": 500
},
{
"epoch": 7.298747763864043,
"grad_norm": 0.006766254547983408,
"learning_rate": 0.00023574879227053139,
"loss": 0.0015,
"step": 510
},
{
"epoch": 7.441860465116279,
"grad_norm": 0.009164445102214813,
"learning_rate": 0.00023252818035426732,
"loss": 0.0006,
"step": 520
},
{
"epoch": 7.584973166368515,
"grad_norm": 0.002783432835713029,
"learning_rate": 0.00022930756843800323,
"loss": 0.0005,
"step": 530
},
{
"epoch": 7.728085867620751,
"grad_norm": 0.15486985445022583,
"learning_rate": 0.0002260869565217391,
"loss": 0.0036,
"step": 540
},
{
"epoch": 7.8711985688729875,
"grad_norm": 0.042650897055864334,
"learning_rate": 0.00022286634460547507,
"loss": 0.0006,
"step": 550
},
{
"epoch": 8.014311270125223,
"grad_norm": 0.0018309111474081874,
"learning_rate": 0.00021964573268921095,
"loss": 0.0011,
"step": 560
},
{
"epoch": 8.15742397137746,
"grad_norm": 0.005560223013162613,
"learning_rate": 0.00021642512077294686,
"loss": 0.001,
"step": 570
},
{
"epoch": 8.300536672629695,
"grad_norm": 0.03727242350578308,
"learning_rate": 0.0002132045088566828,
"loss": 0.0015,
"step": 580
},
{
"epoch": 8.443649373881932,
"grad_norm": 0.34321093559265137,
"learning_rate": 0.00020998389694041868,
"loss": 0.0031,
"step": 590
},
{
"epoch": 8.586762075134168,
"grad_norm": 0.020942572504281998,
"learning_rate": 0.00020676328502415459,
"loss": 0.0022,
"step": 600
},
{
"epoch": 8.729874776386405,
"grad_norm": 0.010578208602964878,
"learning_rate": 0.00020354267310789052,
"loss": 0.0006,
"step": 610
},
{
"epoch": 8.87298747763864,
"grad_norm": 0.004323468543589115,
"learning_rate": 0.00020032206119162643,
"loss": 0.0005,
"step": 620
},
{
"epoch": 9.016100178890877,
"grad_norm": 0.1614646017551422,
"learning_rate": 0.00019710144927536234,
"loss": 0.0011,
"step": 630
},
{
"epoch": 9.159212880143112,
"grad_norm": 0.0012326347641646862,
"learning_rate": 0.00019388083735909825,
"loss": 0.0005,
"step": 640
},
{
"epoch": 9.30232558139535,
"grad_norm": 0.006543063558638096,
"learning_rate": 0.00019066022544283415,
"loss": 0.0002,
"step": 650
},
{
"epoch": 9.445438282647585,
"grad_norm": 0.21280421316623688,
"learning_rate": 0.00018743961352657006,
"loss": 0.0003,
"step": 660
},
{
"epoch": 9.588550983899822,
"grad_norm": 0.006327577400952578,
"learning_rate": 0.00018421900161030597,
"loss": 0.0002,
"step": 670
},
{
"epoch": 9.731663685152057,
"grad_norm": 0.0025285291485488415,
"learning_rate": 0.00018099838969404188,
"loss": 0.0001,
"step": 680
},
{
"epoch": 9.874776386404294,
"grad_norm": 0.0014309959951788187,
"learning_rate": 0.00017777777777777779,
"loss": 0.0001,
"step": 690
},
{
"epoch": 10.01788908765653,
"grad_norm": 0.0024150668177753687,
"learning_rate": 0.0001745571658615137,
"loss": 0.0001,
"step": 700
},
{
"epoch": 10.161001788908766,
"grad_norm": 0.0025761763099581003,
"learning_rate": 0.0001713365539452496,
"loss": 0.0001,
"step": 710
},
{
"epoch": 10.304114490161002,
"grad_norm": 0.0017020882805809379,
"learning_rate": 0.0001681159420289855,
"loss": 0.0001,
"step": 720
},
{
"epoch": 10.447227191413237,
"grad_norm": 0.0028596080373972654,
"learning_rate": 0.00016489533011272142,
"loss": 0.0001,
"step": 730
},
{
"epoch": 10.590339892665474,
"grad_norm": 0.0019378801807761192,
"learning_rate": 0.00016167471819645735,
"loss": 0.0001,
"step": 740
},
{
"epoch": 10.73345259391771,
"grad_norm": 0.001211544731631875,
"learning_rate": 0.00015845410628019323,
"loss": 0.0001,
"step": 750
},
{
"epoch": 10.876565295169947,
"grad_norm": 0.0033484594896435738,
"learning_rate": 0.00015523349436392914,
"loss": 0.0001,
"step": 760
},
{
"epoch": 11.019677996422182,
"grad_norm": 0.001493943389505148,
"learning_rate": 0.00015201288244766508,
"loss": 0.0001,
"step": 770
},
{
"epoch": 11.162790697674419,
"grad_norm": 0.0019909776747226715,
"learning_rate": 0.00014879227053140096,
"loss": 0.0001,
"step": 780
},
{
"epoch": 11.305903398926654,
"grad_norm": 0.0011982638388872147,
"learning_rate": 0.0001455716586151369,
"loss": 0.0001,
"step": 790
},
{
"epoch": 11.449016100178891,
"grad_norm": 0.0015958467265591025,
"learning_rate": 0.0001423510466988728,
"loss": 0.0001,
"step": 800
},
{
"epoch": 11.592128801431127,
"grad_norm": 0.0008461058023385704,
"learning_rate": 0.0001391304347826087,
"loss": 0.0001,
"step": 810
},
{
"epoch": 11.735241502683364,
"grad_norm": 0.0005576548865064979,
"learning_rate": 0.00013590982286634462,
"loss": 0.0001,
"step": 820
},
{
"epoch": 11.878354203935599,
"grad_norm": 0.0017713948618620634,
"learning_rate": 0.0001326892109500805,
"loss": 0.0001,
"step": 830
},
{
"epoch": 12.021466905187836,
"grad_norm": 0.001206880551762879,
"learning_rate": 0.00012946859903381643,
"loss": 0.0001,
"step": 840
},
{
"epoch": 12.164579606440071,
"grad_norm": 0.0013083606027066708,
"learning_rate": 0.00012624798711755234,
"loss": 0.0001,
"step": 850
},
{
"epoch": 12.307692307692308,
"grad_norm": 0.0008201482123695314,
"learning_rate": 0.00012302737520128825,
"loss": 0.0001,
"step": 860
},
{
"epoch": 12.450805008944544,
"grad_norm": 0.0006802495336160064,
"learning_rate": 0.00011980676328502416,
"loss": 0.0001,
"step": 870
},
{
"epoch": 12.59391771019678,
"grad_norm": 0.0017911783652380109,
"learning_rate": 0.00011658615136876008,
"loss": 0.0001,
"step": 880
},
{
"epoch": 12.737030411449016,
"grad_norm": 0.0007388959056697786,
"learning_rate": 0.00011336553945249598,
"loss": 0.0,
"step": 890
},
{
"epoch": 12.880143112701251,
"grad_norm": 0.0007727427291683853,
"learning_rate": 0.00011014492753623188,
"loss": 0.0001,
"step": 900
},
{
"epoch": 13.023255813953488,
"grad_norm": 0.0008818788919597864,
"learning_rate": 0.0001069243156199678,
"loss": 0.0001,
"step": 910
},
{
"epoch": 13.166368515205724,
"grad_norm": 0.0005572364898398519,
"learning_rate": 0.0001037037037037037,
"loss": 0.0,
"step": 920
},
{
"epoch": 13.30948121645796,
"grad_norm": 0.0009758470696397126,
"learning_rate": 0.00010048309178743962,
"loss": 0.0001,
"step": 930
},
{
"epoch": 13.452593917710196,
"grad_norm": 0.0003166435344610363,
"learning_rate": 9.726247987117553e-05,
"loss": 0.0001,
"step": 940
},
{
"epoch": 13.595706618962433,
"grad_norm": 0.0005005749990232289,
"learning_rate": 9.404186795491144e-05,
"loss": 0.0,
"step": 950
},
{
"epoch": 13.738819320214668,
"grad_norm": 0.0003304154670331627,
"learning_rate": 9.082125603864735e-05,
"loss": 0.0001,
"step": 960
},
{
"epoch": 13.881932021466906,
"grad_norm": 0.0005377003108151257,
"learning_rate": 8.760064412238325e-05,
"loss": 0.0001,
"step": 970
},
{
"epoch": 14.02504472271914,
"grad_norm": 0.0015913191018626094,
"learning_rate": 8.438003220611916e-05,
"loss": 0.0001,
"step": 980
},
{
"epoch": 14.168157423971378,
"grad_norm": 0.000676720985211432,
"learning_rate": 8.115942028985508e-05,
"loss": 0.0001,
"step": 990
},
{
"epoch": 14.311270125223613,
"grad_norm": 0.0007494900492019951,
"learning_rate": 7.793880837359099e-05,
"loss": 0.0001,
"step": 1000
},
{
"epoch": 14.45438282647585,
"grad_norm": 0.0015422647120431066,
"learning_rate": 7.47181964573269e-05,
"loss": 0.0,
"step": 1010
},
{
"epoch": 14.597495527728086,
"grad_norm": 0.0005012313486076891,
"learning_rate": 7.14975845410628e-05,
"loss": 0.0,
"step": 1020
},
{
"epoch": 14.740608228980323,
"grad_norm": 0.0008338551269844174,
"learning_rate": 6.827697262479872e-05,
"loss": 0.0,
"step": 1030
},
{
"epoch": 14.883720930232558,
"grad_norm": 0.0006810138584114611,
"learning_rate": 6.505636070853462e-05,
"loss": 0.0001,
"step": 1040
},
{
"epoch": 15.026833631484795,
"grad_norm": 0.00043299293611198664,
"learning_rate": 6.183574879227053e-05,
"loss": 0.0,
"step": 1050
},
{
"epoch": 15.16994633273703,
"grad_norm": 0.0005277034360915422,
"learning_rate": 5.861513687600645e-05,
"loss": 0.0,
"step": 1060
},
{
"epoch": 15.313059033989267,
"grad_norm": 0.0006858156993985176,
"learning_rate": 5.5394524959742355e-05,
"loss": 0.0001,
"step": 1070
},
{
"epoch": 15.456171735241503,
"grad_norm": 0.0008438636432401836,
"learning_rate": 5.217391304347826e-05,
"loss": 0.0,
"step": 1080
},
{
"epoch": 15.59928443649374,
"grad_norm": 0.0012173138093203306,
"learning_rate": 4.895330112721417e-05,
"loss": 0.0001,
"step": 1090
},
{
"epoch": 15.742397137745975,
"grad_norm": 0.002290137577801943,
"learning_rate": 4.573268921095008e-05,
"loss": 0.0,
"step": 1100
},
{
"epoch": 15.88550983899821,
"grad_norm": 0.0005496228695847094,
"learning_rate": 4.2512077294685994e-05,
"loss": 0.0,
"step": 1110
},
{
"epoch": 16.028622540250446,
"grad_norm": 0.0018827420426532626,
"learning_rate": 3.92914653784219e-05,
"loss": 0.0,
"step": 1120
},
{
"epoch": 16.171735241502684,
"grad_norm": 0.00045006562140770257,
"learning_rate": 3.607085346215781e-05,
"loss": 0.0001,
"step": 1130
},
{
"epoch": 16.31484794275492,
"grad_norm": 0.0005126325413584709,
"learning_rate": 3.2850241545893725e-05,
"loss": 0.0,
"step": 1140
},
{
"epoch": 16.457960644007155,
"grad_norm": 0.00035093360929749906,
"learning_rate": 2.962962962962963e-05,
"loss": 0.0,
"step": 1150
},
{
"epoch": 16.60107334525939,
"grad_norm": 0.0010109692811965942,
"learning_rate": 2.640901771336554e-05,
"loss": 0.0001,
"step": 1160
},
{
"epoch": 16.74418604651163,
"grad_norm": 0.0006910230731591582,
"learning_rate": 2.318840579710145e-05,
"loss": 0.0,
"step": 1170
},
{
"epoch": 16.887298747763865,
"grad_norm": 0.0004351095121819526,
"learning_rate": 1.996779388083736e-05,
"loss": 0.0,
"step": 1180
},
{
"epoch": 17.0304114490161,
"grad_norm": 0.0006468660430982709,
"learning_rate": 1.674718196457327e-05,
"loss": 0.0,
"step": 1190
},
{
"epoch": 17.173524150268335,
"grad_norm": 0.0002576902334112674,
"learning_rate": 1.3526570048309179e-05,
"loss": 0.0,
"step": 1200
},
{
"epoch": 17.316636851520574,
"grad_norm": 0.0010522498050704598,
"learning_rate": 1.0305958132045089e-05,
"loss": 0.0,
"step": 1210
},
{
"epoch": 17.45974955277281,
"grad_norm": 0.0007789513911120594,
"learning_rate": 7.0853462157809985e-06,
"loss": 0.0001,
"step": 1220
},
{
"epoch": 17.602862254025045,
"grad_norm": 0.0009570368565618992,
"learning_rate": 3.864734299516908e-06,
"loss": 0.0,
"step": 1230
},
{
"epoch": 17.74597495527728,
"grad_norm": 0.0009920781012624502,
"learning_rate": 6.44122383252818e-07,
"loss": 0.0,
"step": 1240
},
{
"epoch": 17.77459749552773,
"step": 1242,
"total_flos": 6.807580263736934e+16,
"train_loss": 0.056659956144777014,
"train_runtime": 9770.217,
"train_samples_per_second": 6.177,
"train_steps_per_second": 0.127
}
],
"logging_steps": 10,
"max_steps": 1242,
"num_input_tokens_seen": 0,
"num_train_epochs": 18,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.807580263736934e+16,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}