zephyr-1.1b-sft-full / trainer_state.json
amazingvince's picture
Model save
40291a8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7025997267496681,
"eval_steps": 500,
"global_step": 2282,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9999995319361092e-05,
"loss": 1.7428,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.999988298424633e-05,
"loss": 1.5853,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 1.9999531939723856e-05,
"loss": 1.4404,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.9998946874648124e-05,
"loss": 1.4037,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 1.9998127802711502e-05,
"loss": 1.3591,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 1.9997074743082852e-05,
"loss": 1.3367,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 1.9995787720407095e-05,
"loss": 1.3399,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 1.9994266764804605e-05,
"loss": 1.3106,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 1.999251191187054e-05,
"loss": 1.3077,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 1.9990523202673992e-05,
"loss": 1.2856,
"step": 45
},
{
"epoch": 0.02,
"learning_rate": 1.9988300683757016e-05,
"loss": 1.3115,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 1.998584440713356e-05,
"loss": 1.3046,
"step": 55
},
{
"epoch": 0.02,
"learning_rate": 1.9983154430288237e-05,
"loss": 1.3149,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 1.9980230816174982e-05,
"loss": 1.3038,
"step": 65
},
{
"epoch": 0.02,
"learning_rate": 1.997707363321557e-05,
"loss": 1.266,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 1.9973682955298038e-05,
"loss": 1.2732,
"step": 75
},
{
"epoch": 0.02,
"learning_rate": 1.9970058861774924e-05,
"loss": 1.2475,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 1.996620143746144e-05,
"loss": 1.2846,
"step": 85
},
{
"epoch": 0.03,
"learning_rate": 1.996211077263347e-05,
"loss": 1.2978,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 1.995778696302546e-05,
"loss": 1.2706,
"step": 95
},
{
"epoch": 0.03,
"learning_rate": 1.995323010982817e-05,
"loss": 1.2472,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 1.9948440319686334e-05,
"loss": 1.2557,
"step": 105
},
{
"epoch": 0.03,
"learning_rate": 1.994341770469612e-05,
"loss": 1.2501,
"step": 110
},
{
"epoch": 0.04,
"learning_rate": 1.9938162382402556e-05,
"loss": 1.2599,
"step": 115
},
{
"epoch": 0.04,
"learning_rate": 1.993267447579673e-05,
"loss": 1.2891,
"step": 120
},
{
"epoch": 0.04,
"learning_rate": 1.992695411331296e-05,
"loss": 1.2394,
"step": 125
},
{
"epoch": 0.04,
"learning_rate": 1.992100142882574e-05,
"loss": 1.2639,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 1.9914816561646648e-05,
"loss": 1.2778,
"step": 135
},
{
"epoch": 0.04,
"learning_rate": 1.9908399656521066e-05,
"loss": 1.2688,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 1.9901750863624784e-05,
"loss": 1.2677,
"step": 145
},
{
"epoch": 0.05,
"learning_rate": 1.9894870338560512e-05,
"loss": 1.2821,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 1.988775824235421e-05,
"loss": 1.2956,
"step": 155
},
{
"epoch": 0.05,
"learning_rate": 1.9880414741451336e-05,
"loss": 1.2875,
"step": 160
},
{
"epoch": 0.05,
"learning_rate": 1.9872840007712953e-05,
"loss": 1.2634,
"step": 165
},
{
"epoch": 0.05,
"learning_rate": 1.9865034218411698e-05,
"loss": 1.2588,
"step": 170
},
{
"epoch": 0.05,
"learning_rate": 1.9856997556227632e-05,
"loss": 1.2692,
"step": 175
},
{
"epoch": 0.06,
"learning_rate": 1.984873020924397e-05,
"loss": 1.2452,
"step": 180
},
{
"epoch": 0.06,
"learning_rate": 1.984023237094268e-05,
"loss": 1.2333,
"step": 185
},
{
"epoch": 0.06,
"learning_rate": 1.9831504240199956e-05,
"loss": 1.251,
"step": 190
},
{
"epoch": 0.06,
"learning_rate": 1.982254602128155e-05,
"loss": 1.2642,
"step": 195
},
{
"epoch": 0.06,
"learning_rate": 1.981335792383802e-05,
"loss": 1.2163,
"step": 200
},
{
"epoch": 0.06,
"learning_rate": 1.9803940162899785e-05,
"loss": 1.2599,
"step": 205
},
{
"epoch": 0.06,
"learning_rate": 1.979429295887213e-05,
"loss": 1.2397,
"step": 210
},
{
"epoch": 0.07,
"learning_rate": 1.978441653753003e-05,
"loss": 1.2563,
"step": 215
},
{
"epoch": 0.07,
"learning_rate": 1.977431113001285e-05,
"loss": 1.2619,
"step": 220
},
{
"epoch": 0.07,
"learning_rate": 1.9763976972818973e-05,
"loss": 1.2359,
"step": 225
},
{
"epoch": 0.07,
"learning_rate": 1.9753414307800238e-05,
"loss": 1.2392,
"step": 230
},
{
"epoch": 0.07,
"learning_rate": 1.9742623382156284e-05,
"loss": 1.2283,
"step": 235
},
{
"epoch": 0.07,
"learning_rate": 1.9731604448428773e-05,
"loss": 1.225,
"step": 240
},
{
"epoch": 0.08,
"learning_rate": 1.9720357764495466e-05,
"loss": 1.2274,
"step": 245
},
{
"epoch": 0.08,
"learning_rate": 1.9708883593564207e-05,
"loss": 1.2501,
"step": 250
},
{
"epoch": 0.08,
"learning_rate": 1.969718220416675e-05,
"loss": 1.234,
"step": 255
},
{
"epoch": 0.08,
"learning_rate": 1.9685253870152472e-05,
"loss": 1.2111,
"step": 260
},
{
"epoch": 0.08,
"learning_rate": 1.967309887068197e-05,
"loss": 1.2351,
"step": 265
},
{
"epoch": 0.08,
"learning_rate": 1.9660717490220535e-05,
"loss": 1.2288,
"step": 270
},
{
"epoch": 0.08,
"learning_rate": 1.964811001853147e-05,
"loss": 1.2718,
"step": 275
},
{
"epoch": 0.09,
"learning_rate": 1.9635276750669345e-05,
"loss": 1.2506,
"step": 280
},
{
"epoch": 0.09,
"learning_rate": 1.962221798697306e-05,
"loss": 1.2577,
"step": 285
},
{
"epoch": 0.09,
"learning_rate": 1.9608934033058826e-05,
"loss": 1.2609,
"step": 290
},
{
"epoch": 0.09,
"learning_rate": 1.9595425199813026e-05,
"loss": 1.2402,
"step": 295
},
{
"epoch": 0.09,
"learning_rate": 1.9581691803384914e-05,
"loss": 1.21,
"step": 300
},
{
"epoch": 0.09,
"learning_rate": 1.9567734165179236e-05,
"loss": 1.2323,
"step": 305
},
{
"epoch": 0.1,
"learning_rate": 1.955355261184871e-05,
"loss": 1.2391,
"step": 310
},
{
"epoch": 0.1,
"learning_rate": 1.953914747528636e-05,
"loss": 1.236,
"step": 315
},
{
"epoch": 0.1,
"learning_rate": 1.9524519092617772e-05,
"loss": 1.2183,
"step": 320
},
{
"epoch": 0.1,
"learning_rate": 1.9509667806193186e-05,
"loss": 1.194,
"step": 325
},
{
"epoch": 0.1,
"learning_rate": 1.9494593963579503e-05,
"loss": 1.2314,
"step": 330
},
{
"epoch": 0.1,
"learning_rate": 1.947929791755213e-05,
"loss": 1.2567,
"step": 335
},
{
"epoch": 0.1,
"learning_rate": 1.9463780026086735e-05,
"loss": 1.2213,
"step": 340
},
{
"epoch": 0.11,
"learning_rate": 1.9448040652350878e-05,
"loss": 1.2281,
"step": 345
},
{
"epoch": 0.11,
"learning_rate": 1.9432080164695492e-05,
"loss": 1.2395,
"step": 350
},
{
"epoch": 0.11,
"learning_rate": 1.9415898936646275e-05,
"loss": 1.2553,
"step": 355
},
{
"epoch": 0.11,
"learning_rate": 1.9399497346894942e-05,
"loss": 1.2489,
"step": 360
},
{
"epoch": 0.11,
"learning_rate": 1.9382875779290378e-05,
"loss": 1.1973,
"step": 365
},
{
"epoch": 0.11,
"learning_rate": 1.936603462282963e-05,
"loss": 1.1874,
"step": 370
},
{
"epoch": 0.12,
"learning_rate": 1.9348974271648822e-05,
"loss": 1.2221,
"step": 375
},
{
"epoch": 0.12,
"learning_rate": 1.9331695125013928e-05,
"loss": 1.2517,
"step": 380
},
{
"epoch": 0.12,
"learning_rate": 1.9314197587311413e-05,
"loss": 1.2187,
"step": 385
},
{
"epoch": 0.12,
"learning_rate": 1.9296482068038797e-05,
"loss": 1.2315,
"step": 390
},
{
"epoch": 0.12,
"learning_rate": 1.9278548981795045e-05,
"loss": 1.2588,
"step": 395
},
{
"epoch": 0.12,
"learning_rate": 1.9260398748270878e-05,
"loss": 1.2061,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 1.924203179223895e-05,
"loss": 1.2329,
"step": 405
},
{
"epoch": 0.13,
"learning_rate": 1.9223448543543893e-05,
"loss": 1.2245,
"step": 410
},
{
"epoch": 0.13,
"learning_rate": 1.9204649437092284e-05,
"loss": 1.2233,
"step": 415
},
{
"epoch": 0.13,
"learning_rate": 1.9185634912842445e-05,
"loss": 1.2197,
"step": 420
},
{
"epoch": 0.13,
"learning_rate": 1.916640541579415e-05,
"loss": 1.2045,
"step": 425
},
{
"epoch": 0.13,
"learning_rate": 1.9146961395978218e-05,
"loss": 1.2194,
"step": 430
},
{
"epoch": 0.13,
"learning_rate": 1.9127303308445978e-05,
"loss": 1.234,
"step": 435
},
{
"epoch": 0.14,
"learning_rate": 1.9107431613258614e-05,
"loss": 1.2175,
"step": 440
},
{
"epoch": 0.14,
"learning_rate": 1.9087346775476402e-05,
"loss": 1.2364,
"step": 445
},
{
"epoch": 0.14,
"learning_rate": 1.906704926514783e-05,
"loss": 1.238,
"step": 450
},
{
"epoch": 0.14,
"learning_rate": 1.9046539557298597e-05,
"loss": 1.223,
"step": 455
},
{
"epoch": 0.14,
"learning_rate": 1.9025818131920478e-05,
"loss": 1.2248,
"step": 460
},
{
"epoch": 0.14,
"learning_rate": 1.900488547396012e-05,
"loss": 1.2044,
"step": 465
},
{
"epoch": 0.14,
"learning_rate": 1.8983742073307667e-05,
"loss": 1.216,
"step": 470
},
{
"epoch": 0.15,
"learning_rate": 1.896238842478532e-05,
"loss": 1.2172,
"step": 475
},
{
"epoch": 0.15,
"learning_rate": 1.8940825028135726e-05,
"loss": 1.2224,
"step": 480
},
{
"epoch": 0.15,
"learning_rate": 1.891905238801031e-05,
"loss": 1.1959,
"step": 485
},
{
"epoch": 0.15,
"learning_rate": 1.8897071013957456e-05,
"loss": 1.2134,
"step": 490
},
{
"epoch": 0.15,
"learning_rate": 1.8874881420410566e-05,
"loss": 1.2168,
"step": 495
},
{
"epoch": 0.15,
"learning_rate": 1.885248412667605e-05,
"loss": 1.2193,
"step": 500
},
{
"epoch": 0.16,
"learning_rate": 1.8829879656921146e-05,
"loss": 1.1973,
"step": 505
},
{
"epoch": 0.16,
"learning_rate": 1.880706854016166e-05,
"loss": 1.2266,
"step": 510
},
{
"epoch": 0.16,
"learning_rate": 1.8784051310249606e-05,
"loss": 1.2047,
"step": 515
},
{
"epoch": 0.16,
"learning_rate": 1.876082850586068e-05,
"loss": 1.2237,
"step": 520
},
{
"epoch": 0.16,
"learning_rate": 1.873740067048168e-05,
"loss": 1.2192,
"step": 525
},
{
"epoch": 0.16,
"learning_rate": 1.871376835239776e-05,
"loss": 1.206,
"step": 530
},
{
"epoch": 0.16,
"learning_rate": 1.8689932104679628e-05,
"loss": 1.2098,
"step": 535
},
{
"epoch": 0.17,
"learning_rate": 1.866589248517058e-05,
"loss": 1.2125,
"step": 540
},
{
"epoch": 0.17,
"learning_rate": 1.8641650056473457e-05,
"loss": 1.1842,
"step": 545
},
{
"epoch": 0.17,
"learning_rate": 1.8617205385937466e-05,
"loss": 1.2151,
"step": 550
},
{
"epoch": 0.17,
"learning_rate": 1.8592559045644925e-05,
"loss": 1.2003,
"step": 555
},
{
"epoch": 0.17,
"learning_rate": 1.8567711612397847e-05,
"loss": 1.201,
"step": 560
},
{
"epoch": 0.17,
"learning_rate": 1.854266366770445e-05,
"loss": 1.2158,
"step": 565
},
{
"epoch": 0.18,
"learning_rate": 1.8517415797765575e-05,
"loss": 1.1945,
"step": 570
},
{
"epoch": 0.18,
"learning_rate": 1.8491968593460913e-05,
"loss": 1.1924,
"step": 575
},
{
"epoch": 0.18,
"learning_rate": 1.8466322650335234e-05,
"loss": 1.1939,
"step": 580
},
{
"epoch": 0.18,
"learning_rate": 1.8440478568584402e-05,
"loss": 1.1886,
"step": 585
},
{
"epoch": 0.18,
"learning_rate": 1.841443695304136e-05,
"loss": 1.2121,
"step": 590
},
{
"epoch": 0.18,
"learning_rate": 1.8388198413161962e-05,
"loss": 1.2061,
"step": 595
},
{
"epoch": 0.18,
"learning_rate": 1.836176356301072e-05,
"loss": 1.2154,
"step": 600
},
{
"epoch": 0.19,
"learning_rate": 1.8335133021246402e-05,
"loss": 1.1947,
"step": 605
},
{
"epoch": 0.19,
"learning_rate": 1.83083074111076e-05,
"loss": 1.1973,
"step": 610
},
{
"epoch": 0.19,
"learning_rate": 1.828128736039811e-05,
"loss": 1.2221,
"step": 615
},
{
"epoch": 0.19,
"learning_rate": 1.8254073501472255e-05,
"loss": 1.1999,
"step": 620
},
{
"epoch": 0.19,
"learning_rate": 1.822666647122007e-05,
"loss": 1.1843,
"step": 625
},
{
"epoch": 0.19,
"learning_rate": 1.819906691105242e-05,
"loss": 1.2095,
"step": 630
},
{
"epoch": 0.2,
"learning_rate": 1.8171275466885975e-05,
"loss": 1.2207,
"step": 635
},
{
"epoch": 0.2,
"learning_rate": 1.8143292789128085e-05,
"loss": 1.2243,
"step": 640
},
{
"epoch": 0.2,
"learning_rate": 1.8115119532661576e-05,
"loss": 1.2123,
"step": 645
},
{
"epoch": 0.2,
"learning_rate": 1.808675635682942e-05,
"loss": 1.2292,
"step": 650
},
{
"epoch": 0.2,
"learning_rate": 1.8058203925419298e-05,
"loss": 1.2219,
"step": 655
},
{
"epoch": 0.2,
"learning_rate": 1.802946290664806e-05,
"loss": 1.1849,
"step": 660
},
{
"epoch": 0.2,
"learning_rate": 1.8000533973146102e-05,
"loss": 1.2105,
"step": 665
},
{
"epoch": 0.21,
"learning_rate": 1.797141780194162e-05,
"loss": 1.1909,
"step": 670
},
{
"epoch": 0.21,
"learning_rate": 1.7942115074444753e-05,
"loss": 1.1912,
"step": 675
},
{
"epoch": 0.21,
"learning_rate": 1.7912626476431648e-05,
"loss": 1.1956,
"step": 680
},
{
"epoch": 0.21,
"learning_rate": 1.7882952698028414e-05,
"loss": 1.2026,
"step": 685
},
{
"epoch": 0.21,
"learning_rate": 1.785309443369496e-05,
"loss": 1.2417,
"step": 690
},
{
"epoch": 0.21,
"learning_rate": 1.7823052382208744e-05,
"loss": 1.1953,
"step": 695
},
{
"epoch": 0.22,
"learning_rate": 1.7792827246648422e-05,
"loss": 1.2126,
"step": 700
},
{
"epoch": 0.22,
"learning_rate": 1.7762419734377406e-05,
"loss": 1.2442,
"step": 705
},
{
"epoch": 0.22,
"learning_rate": 1.7731830557027278e-05,
"loss": 1.1959,
"step": 710
},
{
"epoch": 0.22,
"learning_rate": 1.7701060430481178e-05,
"loss": 1.1956,
"step": 715
},
{
"epoch": 0.22,
"learning_rate": 1.767011007485701e-05,
"loss": 1.2106,
"step": 720
},
{
"epoch": 0.22,
"learning_rate": 1.763898021449061e-05,
"loss": 1.191,
"step": 725
},
{
"epoch": 0.22,
"learning_rate": 1.7607671577918798e-05,
"loss": 1.1803,
"step": 730
},
{
"epoch": 0.23,
"learning_rate": 1.7576184897862313e-05,
"loss": 1.1951,
"step": 735
},
{
"epoch": 0.23,
"learning_rate": 1.7544520911208672e-05,
"loss": 1.2031,
"step": 740
},
{
"epoch": 0.23,
"learning_rate": 1.7512680358994926e-05,
"loss": 1.1791,
"step": 745
},
{
"epoch": 0.23,
"learning_rate": 1.7480663986390324e-05,
"loss": 1.2023,
"step": 750
},
{
"epoch": 0.23,
"learning_rate": 1.7448472542678855e-05,
"loss": 1.2087,
"step": 755
},
{
"epoch": 0.23,
"learning_rate": 1.7416106781241734e-05,
"loss": 1.2153,
"step": 760
},
{
"epoch": 0.24,
"learning_rate": 1.738356745953975e-05,
"loss": 1.1961,
"step": 765
},
{
"epoch": 0.24,
"learning_rate": 1.735085533909555e-05,
"loss": 1.1993,
"step": 770
},
{
"epoch": 0.24,
"learning_rate": 1.7317971185475828e-05,
"loss": 1.193,
"step": 775
},
{
"epoch": 0.24,
"learning_rate": 1.728491576827338e-05,
"loss": 1.1551,
"step": 780
},
{
"epoch": 0.24,
"learning_rate": 1.7251689861089126e-05,
"loss": 1.1809,
"step": 785
},
{
"epoch": 0.24,
"learning_rate": 1.7218294241513973e-05,
"loss": 1.1911,
"step": 790
},
{
"epoch": 0.24,
"learning_rate": 1.7184729691110642e-05,
"loss": 1.1837,
"step": 795
},
{
"epoch": 0.25,
"learning_rate": 1.7150996995395367e-05,
"loss": 1.1815,
"step": 800
},
{
"epoch": 0.25,
"learning_rate": 1.711709694381951e-05,
"loss": 1.1964,
"step": 805
},
{
"epoch": 0.25,
"learning_rate": 1.7083030329751086e-05,
"loss": 1.1985,
"step": 810
},
{
"epoch": 0.25,
"learning_rate": 1.70487979504562e-05,
"loss": 1.2122,
"step": 815
},
{
"epoch": 0.25,
"learning_rate": 1.7014400607080386e-05,
"loss": 1.2333,
"step": 820
},
{
"epoch": 0.25,
"learning_rate": 1.6979839104629853e-05,
"loss": 1.1995,
"step": 825
},
{
"epoch": 0.26,
"learning_rate": 1.6945114251952654e-05,
"loss": 1.1991,
"step": 830
},
{
"epoch": 0.26,
"learning_rate": 1.691022686171975e-05,
"loss": 1.1733,
"step": 835
},
{
"epoch": 0.26,
"learning_rate": 1.6875177750405998e-05,
"loss": 1.1853,
"step": 840
},
{
"epoch": 0.26,
"learning_rate": 1.6839967738271024e-05,
"loss": 1.1959,
"step": 845
},
{
"epoch": 0.26,
"learning_rate": 1.680459764934006e-05,
"loss": 1.2479,
"step": 850
},
{
"epoch": 0.26,
"learning_rate": 1.676906831138462e-05,
"loss": 1.2328,
"step": 855
},
{
"epoch": 0.26,
"learning_rate": 1.6733380555903156e-05,
"loss": 1.2156,
"step": 860
},
{
"epoch": 0.27,
"learning_rate": 1.6697535218101595e-05,
"loss": 1.2296,
"step": 865
},
{
"epoch": 0.27,
"learning_rate": 1.666153313687378e-05,
"loss": 1.1847,
"step": 870
},
{
"epoch": 0.27,
"learning_rate": 1.6625375154781834e-05,
"loss": 1.2071,
"step": 875
},
{
"epoch": 0.27,
"learning_rate": 1.6589062118036477e-05,
"loss": 1.2149,
"step": 880
},
{
"epoch": 0.27,
"learning_rate": 1.6552594876477173e-05,
"loss": 1.2063,
"step": 885
},
{
"epoch": 0.27,
"learning_rate": 1.6515974283552273e-05,
"loss": 1.191,
"step": 890
},
{
"epoch": 0.28,
"learning_rate": 1.647920119629904e-05,
"loss": 1.1844,
"step": 895
},
{
"epoch": 0.28,
"learning_rate": 1.6442276475323568e-05,
"loss": 1.2153,
"step": 900
},
{
"epoch": 0.28,
"learning_rate": 1.6405200984780677e-05,
"loss": 1.185,
"step": 905
},
{
"epoch": 0.28,
"learning_rate": 1.6367975592353652e-05,
"loss": 1.1784,
"step": 910
},
{
"epoch": 0.28,
"learning_rate": 1.633060116923397e-05,
"loss": 1.2095,
"step": 915
},
{
"epoch": 0.28,
"learning_rate": 1.6293078590100886e-05,
"loss": 1.2006,
"step": 920
},
{
"epoch": 0.28,
"learning_rate": 1.6255408733100977e-05,
"loss": 1.1608,
"step": 925
},
{
"epoch": 0.29,
"learning_rate": 1.6217592479827583e-05,
"loss": 1.1822,
"step": 930
},
{
"epoch": 0.29,
"learning_rate": 1.617963071530018e-05,
"loss": 1.1893,
"step": 935
},
{
"epoch": 0.29,
"learning_rate": 1.6141524327943666e-05,
"loss": 1.1719,
"step": 940
},
{
"epoch": 0.29,
"learning_rate": 1.6103274209567567e-05,
"loss": 1.1842,
"step": 945
},
{
"epoch": 0.29,
"learning_rate": 1.6064881255345167e-05,
"loss": 1.1935,
"step": 950
},
{
"epoch": 0.29,
"learning_rate": 1.6026346363792565e-05,
"loss": 1.1797,
"step": 955
},
{
"epoch": 0.3,
"learning_rate": 1.5987670436747634e-05,
"loss": 1.2015,
"step": 960
},
{
"epoch": 0.3,
"learning_rate": 1.594885437934893e-05,
"loss": 1.1996,
"step": 965
},
{
"epoch": 0.3,
"learning_rate": 1.5909899100014485e-05,
"loss": 1.1926,
"step": 970
},
{
"epoch": 0.3,
"learning_rate": 1.587080551042058e-05,
"loss": 1.1997,
"step": 975
},
{
"epoch": 0.3,
"learning_rate": 1.5831574525480387e-05,
"loss": 1.1826,
"step": 980
},
{
"epoch": 0.3,
"learning_rate": 1.5792207063322554e-05,
"loss": 1.2177,
"step": 985
},
{
"epoch": 0.3,
"learning_rate": 1.5752704045269737e-05,
"loss": 1.1831,
"step": 990
},
{
"epoch": 0.31,
"learning_rate": 1.5713066395817013e-05,
"loss": 1.1852,
"step": 995
},
{
"epoch": 0.31,
"learning_rate": 1.5673295042610278e-05,
"loss": 1.1694,
"step": 1000
},
{
"epoch": 0.31,
"learning_rate": 1.5633390916424503e-05,
"loss": 1.1886,
"step": 1005
},
{
"epoch": 0.31,
"learning_rate": 1.5593354951141963e-05,
"loss": 1.1711,
"step": 1010
},
{
"epoch": 0.31,
"learning_rate": 1.5553188083730392e-05,
"loss": 1.2088,
"step": 1015
},
{
"epoch": 0.31,
"learning_rate": 1.5512891254221046e-05,
"loss": 1.1905,
"step": 1020
},
{
"epoch": 0.32,
"learning_rate": 1.5472465405686693e-05,
"loss": 1.1856,
"step": 1025
},
{
"epoch": 0.32,
"learning_rate": 1.5431911484219565e-05,
"loss": 1.1962,
"step": 1030
},
{
"epoch": 0.32,
"learning_rate": 1.5391230438909195e-05,
"loss": 1.1836,
"step": 1035
},
{
"epoch": 0.32,
"learning_rate": 1.5350423221820223e-05,
"loss": 1.1906,
"step": 1040
},
{
"epoch": 0.32,
"learning_rate": 1.5309490787970094e-05,
"loss": 1.2194,
"step": 1045
},
{
"epoch": 0.32,
"learning_rate": 1.526843409530674e-05,
"loss": 1.1873,
"step": 1050
},
{
"epoch": 0.32,
"learning_rate": 1.5227254104686115e-05,
"loss": 1.1763,
"step": 1055
},
{
"epoch": 0.33,
"learning_rate": 1.5185951779849754e-05,
"loss": 1.198,
"step": 1060
},
{
"epoch": 0.33,
"learning_rate": 1.5144528087402191e-05,
"loss": 1.1885,
"step": 1065
},
{
"epoch": 0.33,
"learning_rate": 1.5102983996788341e-05,
"loss": 1.2233,
"step": 1070
},
{
"epoch": 0.33,
"learning_rate": 1.5061320480270817e-05,
"loss": 1.173,
"step": 1075
},
{
"epoch": 0.33,
"learning_rate": 1.5019538512907184e-05,
"loss": 1.1813,
"step": 1080
},
{
"epoch": 0.33,
"learning_rate": 1.4977639072527115e-05,
"loss": 1.1783,
"step": 1085
},
{
"epoch": 0.34,
"learning_rate": 1.4935623139709532e-05,
"loss": 1.2148,
"step": 1090
},
{
"epoch": 0.34,
"learning_rate": 1.4893491697759642e-05,
"loss": 1.2125,
"step": 1095
},
{
"epoch": 0.34,
"learning_rate": 1.4851245732685935e-05,
"loss": 1.2219,
"step": 1100
},
{
"epoch": 0.34,
"learning_rate": 1.4808886233177096e-05,
"loss": 1.1876,
"step": 1105
},
{
"epoch": 0.34,
"learning_rate": 1.4766414190578879e-05,
"loss": 1.2137,
"step": 1110
},
{
"epoch": 0.34,
"learning_rate": 1.4723830598870898e-05,
"loss": 1.2021,
"step": 1115
},
{
"epoch": 0.34,
"learning_rate": 1.468113645464337e-05,
"loss": 1.1969,
"step": 1120
},
{
"epoch": 0.35,
"learning_rate": 1.4638332757073785e-05,
"loss": 1.1855,
"step": 1125
},
{
"epoch": 0.35,
"learning_rate": 1.4595420507903534e-05,
"loss": 1.1714,
"step": 1130
},
{
"epoch": 0.35,
"learning_rate": 1.455240071141445e-05,
"loss": 1.1907,
"step": 1135
},
{
"epoch": 0.35,
"learning_rate": 1.450927437440531e-05,
"loss": 1.2007,
"step": 1140
},
{
"epoch": 0.35,
"learning_rate": 1.4466042506168285e-05,
"loss": 1.1712,
"step": 1145
},
{
"epoch": 0.35,
"learning_rate": 1.4422706118465306e-05,
"loss": 1.1598,
"step": 1150
},
{
"epoch": 0.36,
"learning_rate": 1.437926622550438e-05,
"loss": 1.1676,
"step": 1155
},
{
"epoch": 0.36,
"learning_rate": 1.4335723843915876e-05,
"loss": 1.1773,
"step": 1160
},
{
"epoch": 0.36,
"learning_rate": 1.4292079992728712e-05,
"loss": 1.1894,
"step": 1165
},
{
"epoch": 0.36,
"learning_rate": 1.4248335693346513e-05,
"loss": 1.1996,
"step": 1170
},
{
"epoch": 0.36,
"learning_rate": 1.4204491969523715e-05,
"loss": 1.2128,
"step": 1175
},
{
"epoch": 0.36,
"learning_rate": 1.4160549847341591e-05,
"loss": 1.1819,
"step": 1180
},
{
"epoch": 0.36,
"learning_rate": 1.4116510355184256e-05,
"loss": 1.1785,
"step": 1185
},
{
"epoch": 0.37,
"learning_rate": 1.4072374523714577e-05,
"loss": 1.186,
"step": 1190
},
{
"epoch": 0.37,
"learning_rate": 1.4028143385850077e-05,
"loss": 1.1752,
"step": 1195
},
{
"epoch": 0.37,
"learning_rate": 1.3983817976738736e-05,
"loss": 1.1863,
"step": 1200
},
{
"epoch": 0.37,
"learning_rate": 1.3939399333734789e-05,
"loss": 1.1947,
"step": 1205
},
{
"epoch": 0.37,
"learning_rate": 1.3894888496374433e-05,
"loss": 1.1798,
"step": 1210
},
{
"epoch": 0.37,
"learning_rate": 1.38502865063515e-05,
"loss": 1.1756,
"step": 1215
},
{
"epoch": 0.38,
"learning_rate": 1.3805594407493093e-05,
"loss": 1.1631,
"step": 1220
},
{
"epoch": 0.38,
"learning_rate": 1.3760813245735133e-05,
"loss": 1.2141,
"step": 1225
},
{
"epoch": 0.38,
"learning_rate": 1.3715944069097899e-05,
"loss": 1.1619,
"step": 1230
},
{
"epoch": 0.38,
"learning_rate": 1.3670987927661498e-05,
"loss": 1.1761,
"step": 1235
},
{
"epoch": 0.38,
"learning_rate": 1.3625945873541285e-05,
"loss": 1.1849,
"step": 1240
},
{
"epoch": 0.38,
"learning_rate": 1.3580818960863233e-05,
"loss": 1.1654,
"step": 1245
},
{
"epoch": 0.38,
"learning_rate": 1.3535608245739293e-05,
"loss": 1.1971,
"step": 1250
},
{
"epoch": 0.39,
"learning_rate": 1.349031478624264e-05,
"loss": 1.1807,
"step": 1255
},
{
"epoch": 0.39,
"learning_rate": 1.3444939642382932e-05,
"loss": 1.199,
"step": 1260
},
{
"epoch": 0.39,
"learning_rate": 1.3399483876081506e-05,
"loss": 1.1758,
"step": 1265
},
{
"epoch": 0.39,
"learning_rate": 1.3353948551146511e-05,
"loss": 1.1622,
"step": 1270
},
{
"epoch": 0.39,
"learning_rate": 1.3308334733248019e-05,
"loss": 1.1608,
"step": 1275
},
{
"epoch": 0.39,
"learning_rate": 1.3262643489893084e-05,
"loss": 1.2018,
"step": 1280
},
{
"epoch": 0.4,
"learning_rate": 1.3216875890400762e-05,
"loss": 1.1874,
"step": 1285
},
{
"epoch": 0.4,
"learning_rate": 1.3171033005877086e-05,
"loss": 1.1819,
"step": 1290
},
{
"epoch": 0.4,
"learning_rate": 1.3125115909189989e-05,
"loss": 1.1533,
"step": 1295
},
{
"epoch": 0.4,
"learning_rate": 1.307912567494421e-05,
"loss": 1.1781,
"step": 1300
},
{
"epoch": 0.4,
"learning_rate": 1.3033063379456128e-05,
"loss": 1.1568,
"step": 1305
},
{
"epoch": 0.4,
"learning_rate": 1.298693010072859e-05,
"loss": 1.1713,
"step": 1310
},
{
"epoch": 0.4,
"learning_rate": 1.2940726918425674e-05,
"loss": 1.164,
"step": 1315
},
{
"epoch": 0.41,
"learning_rate": 1.2894454913847419e-05,
"loss": 1.1971,
"step": 1320
},
{
"epoch": 0.41,
"learning_rate": 1.2848115169904519e-05,
"loss": 1.1826,
"step": 1325
},
{
"epoch": 0.41,
"learning_rate": 1.2801708771092991e-05,
"loss": 1.1468,
"step": 1330
},
{
"epoch": 0.41,
"learning_rate": 1.275523680346878e-05,
"loss": 1.1665,
"step": 1335
},
{
"epoch": 0.41,
"learning_rate": 1.270870035462235e-05,
"loss": 1.1604,
"step": 1340
},
{
"epoch": 0.41,
"learning_rate": 1.2662100513653226e-05,
"loss": 1.1805,
"step": 1345
},
{
"epoch": 0.42,
"learning_rate": 1.2615438371144511e-05,
"loss": 1.1966,
"step": 1350
},
{
"epoch": 0.42,
"learning_rate": 1.256871501913736e-05,
"loss": 1.1789,
"step": 1355
},
{
"epoch": 0.42,
"learning_rate": 1.2521931551105427e-05,
"loss": 1.1968,
"step": 1360
},
{
"epoch": 0.42,
"learning_rate": 1.2475089061929257e-05,
"loss": 1.2045,
"step": 1365
},
{
"epoch": 0.42,
"learning_rate": 1.2428188647870691e-05,
"loss": 1.2092,
"step": 1370
},
{
"epoch": 0.42,
"learning_rate": 1.2381231406547189e-05,
"loss": 1.1558,
"step": 1375
},
{
"epoch": 0.42,
"learning_rate": 1.2334218436906149e-05,
"loss": 1.1617,
"step": 1380
},
{
"epoch": 0.43,
"learning_rate": 1.228715083919918e-05,
"loss": 1.1606,
"step": 1385
},
{
"epoch": 0.43,
"learning_rate": 1.224002971495637e-05,
"loss": 1.1966,
"step": 1390
},
{
"epoch": 0.43,
"learning_rate": 1.2192856166960493e-05,
"loss": 1.1851,
"step": 1395
},
{
"epoch": 0.43,
"learning_rate": 1.2145631299221201e-05,
"loss": 1.1774,
"step": 1400
},
{
"epoch": 0.43,
"learning_rate": 1.2098356216949196e-05,
"loss": 1.1549,
"step": 1405
},
{
"epoch": 0.43,
"learning_rate": 1.2051032026530349e-05,
"loss": 1.1633,
"step": 1410
},
{
"epoch": 0.44,
"learning_rate": 1.2003659835499826e-05,
"loss": 1.2149,
"step": 1415
},
{
"epoch": 0.44,
"learning_rate": 1.1956240752516153e-05,
"loss": 1.1954,
"step": 1420
},
{
"epoch": 0.44,
"learning_rate": 1.1908775887335275e-05,
"loss": 1.1987,
"step": 1425
},
{
"epoch": 0.44,
"learning_rate": 1.1861266350784588e-05,
"loss": 1.1619,
"step": 1430
},
{
"epoch": 0.44,
"learning_rate": 1.1813713254736938e-05,
"loss": 1.151,
"step": 1435
},
{
"epoch": 0.44,
"learning_rate": 1.1766117712084599e-05,
"loss": 1.1601,
"step": 1440
},
{
"epoch": 0.44,
"learning_rate": 1.1718480836713228e-05,
"loss": 1.1541,
"step": 1445
},
{
"epoch": 0.45,
"learning_rate": 1.1670803743475805e-05,
"loss": 1.1893,
"step": 1450
},
{
"epoch": 0.45,
"learning_rate": 1.1623087548166525e-05,
"loss": 1.18,
"step": 1455
},
{
"epoch": 0.45,
"learning_rate": 1.1575333367494698e-05,
"loss": 1.1749,
"step": 1460
},
{
"epoch": 0.45,
"learning_rate": 1.1527542319058618e-05,
"loss": 1.1631,
"step": 1465
},
{
"epoch": 0.45,
"learning_rate": 1.1479715521319393e-05,
"loss": 1.1945,
"step": 1470
},
{
"epoch": 0.45,
"learning_rate": 1.1431854093574776e-05,
"loss": 1.1445,
"step": 1475
},
{
"epoch": 0.46,
"learning_rate": 1.1383959155932978e-05,
"loss": 1.1709,
"step": 1480
},
{
"epoch": 0.46,
"learning_rate": 1.1336031829286444e-05,
"loss": 1.1953,
"step": 1485
},
{
"epoch": 0.46,
"learning_rate": 1.1288073235285626e-05,
"loss": 1.1635,
"step": 1490
},
{
"epoch": 0.46,
"learning_rate": 1.1240084496312724e-05,
"loss": 1.1638,
"step": 1495
},
{
"epoch": 0.46,
"learning_rate": 1.1192066735455431e-05,
"loss": 1.2029,
"step": 1500
},
{
"epoch": 0.46,
"learning_rate": 1.1144021076480644e-05,
"loss": 1.2041,
"step": 1505
},
{
"epoch": 0.46,
"learning_rate": 1.109594864380816e-05,
"loss": 1.1587,
"step": 1510
},
{
"epoch": 0.47,
"learning_rate": 1.104785056248437e-05,
"loss": 1.2077,
"step": 1515
},
{
"epoch": 0.47,
"learning_rate": 1.099972795815592e-05,
"loss": 1.171,
"step": 1520
},
{
"epoch": 0.47,
"learning_rate": 1.0951581957043369e-05,
"loss": 1.168,
"step": 1525
},
{
"epoch": 0.47,
"learning_rate": 1.0903413685914843e-05,
"loss": 1.1637,
"step": 1530
},
{
"epoch": 0.47,
"learning_rate": 1.085522427205965e-05,
"loss": 1.1865,
"step": 1535
},
{
"epoch": 0.47,
"learning_rate": 1.0807014843261904e-05,
"loss": 1.1895,
"step": 1540
},
{
"epoch": 0.48,
"learning_rate": 1.0758786527774142e-05,
"loss": 1.1966,
"step": 1545
},
{
"epoch": 0.48,
"learning_rate": 1.0710540454290891e-05,
"loss": 1.1916,
"step": 1550
},
{
"epoch": 0.48,
"learning_rate": 1.0662277751922286e-05,
"loss": 1.1827,
"step": 1555
},
{
"epoch": 0.48,
"learning_rate": 1.0613999550167626e-05,
"loss": 1.2037,
"step": 1560
},
{
"epoch": 0.48,
"learning_rate": 1.0565706978888942e-05,
"loss": 1.1763,
"step": 1565
},
{
"epoch": 0.48,
"learning_rate": 1.051740116828456e-05,
"loss": 1.1784,
"step": 1570
},
{
"epoch": 0.48,
"learning_rate": 1.0469083248862645e-05,
"loss": 1.1698,
"step": 1575
},
{
"epoch": 0.49,
"learning_rate": 1.0420754351414755e-05,
"loss": 1.1708,
"step": 1580
},
{
"epoch": 0.49,
"learning_rate": 1.0372415606989351e-05,
"loss": 1.16,
"step": 1585
},
{
"epoch": 0.49,
"learning_rate": 1.0324068146865365e-05,
"loss": 1.2111,
"step": 1590
},
{
"epoch": 0.49,
"learning_rate": 1.0275713102525689e-05,
"loss": 1.1751,
"step": 1595
},
{
"epoch": 0.49,
"learning_rate": 1.0227351605630711e-05,
"loss": 1.1805,
"step": 1600
},
{
"epoch": 0.49,
"learning_rate": 1.0178984787991842e-05,
"loss": 1.2023,
"step": 1605
},
{
"epoch": 0.5,
"learning_rate": 1.0130613781544998e-05,
"loss": 1.1633,
"step": 1610
},
{
"epoch": 0.5,
"learning_rate": 1.0082239718324136e-05,
"loss": 1.1667,
"step": 1615
},
{
"epoch": 0.5,
"learning_rate": 1.0033863730434752e-05,
"loss": 1.1676,
"step": 1620
},
{
"epoch": 0.5,
"learning_rate": 9.98548695002738e-06,
"loss": 1.1934,
"step": 1625
},
{
"epoch": 0.5,
"learning_rate": 9.937110509271101e-06,
"loss": 1.1881,
"step": 1630
},
{
"epoch": 0.5,
"learning_rate": 9.888735540327059e-06,
"loss": 1.1607,
"step": 1635
},
{
"epoch": 0.5,
"learning_rate": 9.840363175321932e-06,
"loss": 1.159,
"step": 1640
},
{
"epoch": 0.51,
"learning_rate": 9.791994546321478e-06,
"loss": 1.1442,
"step": 1645
},
{
"epoch": 0.51,
"learning_rate": 9.743630785304012e-06,
"loss": 1.1564,
"step": 1650
},
{
"epoch": 0.51,
"learning_rate": 9.695273024133916e-06,
"loss": 1.1934,
"step": 1655
},
{
"epoch": 0.51,
"learning_rate": 9.646922394535173e-06,
"loss": 1.168,
"step": 1660
},
{
"epoch": 0.51,
"learning_rate": 9.598580028064851e-06,
"loss": 1.1991,
"step": 1665
},
{
"epoch": 0.51,
"learning_rate": 9.550247056086641e-06,
"loss": 1.1808,
"step": 1670
},
{
"epoch": 0.52,
"learning_rate": 9.501924609744367e-06,
"loss": 1.1679,
"step": 1675
},
{
"epoch": 0.52,
"learning_rate": 9.45361381993553e-06,
"loss": 1.1802,
"step": 1680
},
{
"epoch": 0.52,
"learning_rate": 9.405315817284825e-06,
"loss": 1.1756,
"step": 1685
},
{
"epoch": 0.52,
"learning_rate": 9.357031732117687e-06,
"loss": 1.1793,
"step": 1690
},
{
"epoch": 0.52,
"learning_rate": 9.308762694433842e-06,
"loss": 1.1719,
"step": 1695
},
{
"epoch": 0.52,
"learning_rate": 9.260509833880848e-06,
"loss": 1.1508,
"step": 1700
},
{
"epoch": 0.52,
"learning_rate": 9.212274279727676e-06,
"loss": 1.146,
"step": 1705
},
{
"epoch": 0.53,
"learning_rate": 9.16405716083828e-06,
"loss": 1.2294,
"step": 1710
},
{
"epoch": 0.53,
"learning_rate": 9.115859605645149e-06,
"loss": 1.1678,
"step": 1715
},
{
"epoch": 0.53,
"learning_rate": 9.067682742122937e-06,
"loss": 1.1477,
"step": 1720
},
{
"epoch": 0.53,
"learning_rate": 9.019527697762047e-06,
"loss": 1.1584,
"step": 1725
},
{
"epoch": 0.53,
"learning_rate": 8.971395599542232e-06,
"loss": 1.2014,
"step": 1730
},
{
"epoch": 0.53,
"learning_rate": 8.92328757390625e-06,
"loss": 1.1781,
"step": 1735
},
{
"epoch": 0.54,
"learning_rate": 8.875204746733474e-06,
"loss": 1.1736,
"step": 1740
},
{
"epoch": 0.54,
"learning_rate": 8.827148243313553e-06,
"loss": 1.1895,
"step": 1745
},
{
"epoch": 0.54,
"learning_rate": 8.779119188320082e-06,
"loss": 1.151,
"step": 1750
},
{
"epoch": 0.54,
"learning_rate": 8.73111870578428e-06,
"loss": 1.1404,
"step": 1755
},
{
"epoch": 0.54,
"learning_rate": 8.683147919068665e-06,
"loss": 1.1952,
"step": 1760
},
{
"epoch": 0.54,
"learning_rate": 8.635207950840795e-06,
"loss": 1.1963,
"step": 1765
},
{
"epoch": 0.54,
"learning_rate": 8.587299923046977e-06,
"loss": 1.1789,
"step": 1770
},
{
"epoch": 0.55,
"learning_rate": 8.539424956885998e-06,
"loss": 1.1806,
"step": 1775
},
{
"epoch": 0.55,
"learning_rate": 8.49158417278291e-06,
"loss": 1.1747,
"step": 1780
},
{
"epoch": 0.55,
"learning_rate": 8.443778690362801e-06,
"loss": 1.1907,
"step": 1785
},
{
"epoch": 0.55,
"learning_rate": 8.396009628424574e-06,
"loss": 1.1948,
"step": 1790
},
{
"epoch": 0.55,
"learning_rate": 8.348278104914791e-06,
"loss": 1.1802,
"step": 1795
},
{
"epoch": 0.55,
"learning_rate": 8.300585236901489e-06,
"loss": 1.1969,
"step": 1800
},
{
"epoch": 0.56,
"learning_rate": 8.252932140548046e-06,
"loss": 1.1954,
"step": 1805
},
{
"epoch": 0.56,
"learning_rate": 8.205319931087062e-06,
"loss": 1.1884,
"step": 1810
},
{
"epoch": 0.56,
"learning_rate": 8.15774972279425e-06,
"loss": 1.1829,
"step": 1815
},
{
"epoch": 0.56,
"learning_rate": 8.110222628962366e-06,
"loss": 1.1676,
"step": 1820
},
{
"epoch": 0.56,
"learning_rate": 8.06273976187515e-06,
"loss": 1.1486,
"step": 1825
},
{
"epoch": 0.56,
"learning_rate": 8.0153022327813e-06,
"loss": 1.1814,
"step": 1830
},
{
"epoch": 0.56,
"learning_rate": 7.967911151868454e-06,
"loss": 1.1653,
"step": 1835
},
{
"epoch": 0.57,
"learning_rate": 7.920567628237228e-06,
"loss": 1.1542,
"step": 1840
},
{
"epoch": 0.57,
"learning_rate": 7.873272769875244e-06,
"loss": 1.182,
"step": 1845
},
{
"epoch": 0.57,
"learning_rate": 7.82602768363119e-06,
"loss": 1.1686,
"step": 1850
},
{
"epoch": 0.57,
"learning_rate": 7.778833475188949e-06,
"loss": 1.1805,
"step": 1855
},
{
"epoch": 0.57,
"learning_rate": 7.7316912490417e-06,
"loss": 1.1775,
"step": 1860
},
{
"epoch": 0.57,
"learning_rate": 7.684602108466055e-06,
"loss": 1.2037,
"step": 1865
},
{
"epoch": 0.58,
"learning_rate": 7.637567155496277e-06,
"loss": 1.155,
"step": 1870
},
{
"epoch": 0.58,
"learning_rate": 7.59058749089846e-06,
"loss": 1.1766,
"step": 1875
},
{
"epoch": 0.58,
"learning_rate": 7.543664214144773e-06,
"loss": 1.1813,
"step": 1880
},
{
"epoch": 0.58,
"learning_rate": 7.496798423387733e-06,
"loss": 1.1761,
"step": 1885
},
{
"epoch": 0.58,
"learning_rate": 7.449991215434514e-06,
"loss": 1.176,
"step": 1890
},
{
"epoch": 0.58,
"learning_rate": 7.403243685721251e-06,
"loss": 1.1658,
"step": 1895
},
{
"epoch": 0.58,
"learning_rate": 7.356556928287433e-06,
"loss": 1.1525,
"step": 1900
},
{
"epoch": 0.59,
"learning_rate": 7.309932035750276e-06,
"loss": 1.1843,
"step": 1905
},
{
"epoch": 0.59,
"learning_rate": 7.263370099279173e-06,
"loss": 1.1704,
"step": 1910
},
{
"epoch": 0.59,
"learning_rate": 7.2168722085701416e-06,
"loss": 1.1924,
"step": 1915
},
{
"epoch": 0.59,
"learning_rate": 7.170439451820323e-06,
"loss": 1.1638,
"step": 1920
},
{
"epoch": 0.59,
"learning_rate": 7.124072915702524e-06,
"loss": 1.1604,
"step": 1925
},
{
"epoch": 0.59,
"learning_rate": 7.077773685339783e-06,
"loss": 1.1719,
"step": 1930
},
{
"epoch": 0.6,
"learning_rate": 7.031542844279962e-06,
"loss": 1.1768,
"step": 1935
},
{
"epoch": 0.6,
"learning_rate": 6.985381474470402e-06,
"loss": 1.1596,
"step": 1940
},
{
"epoch": 0.6,
"learning_rate": 6.939290656232604e-06,
"loss": 1.1894,
"step": 1945
},
{
"epoch": 0.6,
"learning_rate": 6.89327146823693e-06,
"loss": 1.1764,
"step": 1950
},
{
"epoch": 0.6,
"learning_rate": 6.847324987477375e-06,
"loss": 1.1855,
"step": 1955
},
{
"epoch": 0.6,
"learning_rate": 6.801452289246356e-06,
"loss": 1.187,
"step": 1960
},
{
"epoch": 0.6,
"learning_rate": 6.755654447109538e-06,
"loss": 1.1709,
"step": 1965
},
{
"epoch": 0.61,
"learning_rate": 6.709932532880732e-06,
"loss": 1.1836,
"step": 1970
},
{
"epoch": 0.61,
"learning_rate": 6.664287616596785e-06,
"loss": 1.1602,
"step": 1975
},
{
"epoch": 0.61,
"learning_rate": 6.6187207664925505e-06,
"loss": 1.1752,
"step": 1980
},
{
"epoch": 0.61,
"learning_rate": 6.573233048975891e-06,
"loss": 1.16,
"step": 1985
},
{
"epoch": 0.61,
"learning_rate": 6.527825528602719e-06,
"loss": 1.1683,
"step": 1990
},
{
"epoch": 0.61,
"learning_rate": 6.482499268052077e-06,
"loss": 1.1981,
"step": 1995
},
{
"epoch": 0.62,
"learning_rate": 6.4372553281012695e-06,
"loss": 1.1877,
"step": 2000
},
{
"epoch": 0.62,
"learning_rate": 6.392094767601051e-06,
"loss": 1.1774,
"step": 2005
},
{
"epoch": 0.62,
"learning_rate": 6.347018643450815e-06,
"loss": 1.1758,
"step": 2010
},
{
"epoch": 0.62,
"learning_rate": 6.302028010573898e-06,
"loss": 1.1841,
"step": 2015
},
{
"epoch": 0.62,
"learning_rate": 6.257123921892862e-06,
"loss": 1.1538,
"step": 2020
},
{
"epoch": 0.62,
"learning_rate": 6.212307428304858e-06,
"loss": 1.1824,
"step": 2025
},
{
"epoch": 0.63,
"learning_rate": 6.167579578657044e-06,
"loss": 1.1608,
"step": 2030
},
{
"epoch": 0.63,
"learning_rate": 6.122941419722032e-06,
"loss": 1.1857,
"step": 2035
},
{
"epoch": 0.63,
"learning_rate": 6.078393996173375e-06,
"loss": 1.1987,
"step": 2040
},
{
"epoch": 0.63,
"learning_rate": 6.0339383505611444e-06,
"loss": 1.1816,
"step": 2045
},
{
"epoch": 0.63,
"learning_rate": 5.9895755232875216e-06,
"loss": 1.1852,
"step": 2050
},
{
"epoch": 0.63,
"learning_rate": 5.945306552582431e-06,
"loss": 1.1918,
"step": 2055
},
{
"epoch": 0.63,
"learning_rate": 5.9011324744792716e-06,
"loss": 1.1628,
"step": 2060
},
{
"epoch": 0.64,
"learning_rate": 5.8570543227906565e-06,
"loss": 1.1497,
"step": 2065
},
{
"epoch": 0.64,
"learning_rate": 5.813073129084206e-06,
"loss": 1.1987,
"step": 2070
},
{
"epoch": 0.64,
"learning_rate": 5.769189922658423e-06,
"loss": 1.183,
"step": 2075
},
{
"epoch": 0.64,
"learning_rate": 5.725405730518616e-06,
"loss": 1.1487,
"step": 2080
},
{
"epoch": 0.64,
"learning_rate": 5.681721577352818e-06,
"loss": 1.1558,
"step": 2085
},
{
"epoch": 0.64,
"learning_rate": 5.638138485507861e-06,
"loss": 1.186,
"step": 2090
},
{
"epoch": 0.65,
"learning_rate": 5.594657474965408e-06,
"loss": 1.1615,
"step": 2095
},
{
"epoch": 0.65,
"learning_rate": 5.551279563318106e-06,
"loss": 1.1436,
"step": 2100
},
{
"epoch": 0.65,
"learning_rate": 5.508005765745753e-06,
"loss": 1.162,
"step": 2105
},
{
"epoch": 0.65,
"learning_rate": 5.464837094991569e-06,
"loss": 1.1526,
"step": 2110
},
{
"epoch": 0.65,
"learning_rate": 5.421774561338446e-06,
"loss": 1.1535,
"step": 2115
},
{
"epoch": 0.65,
"learning_rate": 5.3788191725853656e-06,
"loss": 1.1648,
"step": 2120
},
{
"epoch": 0.65,
"learning_rate": 5.335971934023757e-06,
"loss": 1.1607,
"step": 2125
},
{
"epoch": 0.66,
"learning_rate": 5.2932338484140055e-06,
"loss": 1.1613,
"step": 2130
},
{
"epoch": 0.66,
"learning_rate": 5.250605915961966e-06,
"loss": 1.1767,
"step": 2135
},
{
"epoch": 0.66,
"learning_rate": 5.20808913429558e-06,
"loss": 1.1886,
"step": 2140
},
{
"epoch": 0.66,
"learning_rate": 5.1656844984414785e-06,
"loss": 1.1761,
"step": 2145
},
{
"epoch": 0.66,
"learning_rate": 5.1233930008017615e-06,
"loss": 1.1817,
"step": 2150
},
{
"epoch": 0.66,
"learning_rate": 5.081215631130719e-06,
"loss": 1.1521,
"step": 2155
},
{
"epoch": 0.67,
"learning_rate": 5.0391533765116895e-06,
"loss": 1.1811,
"step": 2160
},
{
"epoch": 0.67,
"learning_rate": 4.9972072213339555e-06,
"loss": 1.1719,
"step": 2165
},
{
"epoch": 0.67,
"learning_rate": 4.955378147269719e-06,
"loss": 1.1766,
"step": 2170
},
{
"epoch": 0.67,
"learning_rate": 4.913667133251091e-06,
"loss": 1.1764,
"step": 2175
},
{
"epoch": 0.67,
"learning_rate": 4.872075155447231e-06,
"loss": 1.1636,
"step": 2180
},
{
"epoch": 0.67,
"learning_rate": 4.830603187241463e-06,
"loss": 1.178,
"step": 2185
},
{
"epoch": 0.67,
"learning_rate": 4.789252199208508e-06,
"loss": 1.1748,
"step": 2190
},
{
"epoch": 0.68,
"learning_rate": 4.748023159091773e-06,
"loss": 1.1494,
"step": 2195
},
{
"epoch": 0.68,
"learning_rate": 4.706917031780698e-06,
"loss": 1.1737,
"step": 2200
},
{
"epoch": 0.68,
"learning_rate": 4.6659347792881756e-06,
"loss": 1.1757,
"step": 2205
},
{
"epoch": 0.68,
"learning_rate": 4.6250773607280375e-06,
"loss": 1.1464,
"step": 2210
},
{
"epoch": 0.68,
"learning_rate": 4.584345732292613e-06,
"loss": 1.2013,
"step": 2215
},
{
"epoch": 0.68,
"learning_rate": 4.543740847230341e-06,
"loss": 1.1716,
"step": 2220
},
{
"epoch": 0.69,
"learning_rate": 4.503263655823466e-06,
"loss": 1.2016,
"step": 2225
},
{
"epoch": 0.69,
"learning_rate": 4.462915105365798e-06,
"loss": 1.1922,
"step": 2230
},
{
"epoch": 0.69,
"learning_rate": 4.422696140140547e-06,
"loss": 1.1874,
"step": 2235
},
{
"epoch": 0.69,
"learning_rate": 4.382607701398217e-06,
"loss": 1.1498,
"step": 2240
},
{
"epoch": 0.69,
"learning_rate": 4.342650727334591e-06,
"loss": 1.1924,
"step": 2245
},
{
"epoch": 0.69,
"learning_rate": 4.302826153068741e-06,
"loss": 1.168,
"step": 2250
},
{
"epoch": 0.69,
"learning_rate": 4.263134910621196e-06,
"loss": 1.1673,
"step": 2255
},
{
"epoch": 0.7,
"learning_rate": 4.223577928892073e-06,
"loss": 1.163,
"step": 2260
},
{
"epoch": 0.7,
"learning_rate": 4.184156133639388e-06,
"loss": 1.146,
"step": 2265
},
{
"epoch": 0.7,
"learning_rate": 4.1448704474573546e-06,
"loss": 1.1513,
"step": 2270
},
{
"epoch": 0.7,
"learning_rate": 4.105721789754805e-06,
"loss": 1.1913,
"step": 2275
},
{
"epoch": 0.7,
"learning_rate": 4.066711076733677e-06,
"loss": 1.1826,
"step": 2280
},
{
"epoch": 0.7,
"eval_loss": 1.1736973524093628,
"eval_runtime": 1794.5729,
"eval_samples_per_second": 12.878,
"eval_steps_per_second": 3.22,
"step": 2282
},
{
"epoch": 0.7,
"step": 2282,
"total_flos": 165398351708160.0,
"train_loss": 1.200063390965842,
"train_runtime": 41297.3635,
"train_samples_per_second": 5.033,
"train_steps_per_second": 0.079
}
],
"logging_steps": 5,
"max_steps": 3247,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 165398351708160.0,
"trial_name": null,
"trial_params": null
}