{ "best_metric": 3.0452535152435303, "best_model_checkpoint": "output/the-notorious-big/checkpoint-2101", "epoch": 11.0, "global_step": 2101, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.318581390145182e-07, "loss": 3.6069, "step": 5 }, { "epoch": 0.05, "learning_rate": 9.258652616957599e-07, "loss": 3.6646, "step": 10 }, { "epoch": 0.08, "learning_rate": 2.0773300794160956e-06, "loss": 3.5544, "step": 15 }, { "epoch": 0.1, "learning_rate": 3.6784690210450094e-06, "loss": 3.8578, "step": 20 }, { "epoch": 0.13, "learning_rate": 5.718458847652147e-06, "loss": 3.5273, "step": 25 }, { "epoch": 0.16, "learning_rate": 8.183509814491012e-06, "loss": 3.6649, "step": 30 }, { "epoch": 0.18, "learning_rate": 1.1056958885708937e-05, "loss": 3.4757, "step": 35 }, { "epoch": 0.21, "learning_rate": 1.4319382371678895e-05, "loss": 3.4231, "step": 40 }, { "epoch": 0.24, "learning_rate": 1.7948727227557336e-05, "loss": 3.4855, "step": 45 }, { "epoch": 0.26, "learning_rate": 2.1920460125527185e-05, "loss": 3.7272, "step": 50 }, { "epoch": 0.29, "learning_rate": 2.6207733293040745e-05, "loss": 3.6966, "step": 55 }, { "epoch": 0.31, "learning_rate": 3.078156599604364e-05, "loss": 3.5331, "step": 60 }, { "epoch": 0.34, "learning_rate": 3.561104044040511e-05, "loss": 3.5794, "step": 65 }, { "epoch": 0.37, "learning_rate": 4.0663510767318035e-05, "loss": 3.5518, "step": 70 }, { "epoch": 0.39, "learning_rate": 4.590482372991862e-05, "loss": 3.5769, "step": 75 }, { "epoch": 0.42, "learning_rate": 5.12995495594168e-05, "loss": 3.7063, "step": 80 }, { "epoch": 0.45, "learning_rate": 5.681122146014723e-05, "loss": 3.5721, "step": 85 }, { "epoch": 0.47, "learning_rate": 6.240258211462417e-05, "loss": 3.45, "step": 90 }, { "epoch": 0.5, "learning_rate": 6.803583553229638e-05, "loss": 3.5444, "step": 95 }, { "epoch": 0.52, "learning_rate": 7.36729025395774e-05, "loss": 3.5872, "step": 100 }, { "epoch": 0.55, "learning_rate": 7.927567818411118e-05, "loss": 3.5725, "step": 105 }, { "epoch": 0.58, "learning_rate": 8.480628931329552e-05, "loss": 3.5443, "step": 110 }, { "epoch": 0.6, "learning_rate": 9.022735058590523e-05, "loss": 3.4685, "step": 115 }, { "epoch": 0.63, "learning_rate": 9.550221718625067e-05, "loss": 3.5604, "step": 120 }, { "epoch": 0.65, "learning_rate": 0.00010059523253259703, "loss": 3.5, "step": 125 }, { "epoch": 0.68, "learning_rate": 0.00010547196930540564, "loss": 3.4513, "step": 130 }, { "epoch": 0.71, "learning_rate": 0.00011009946216611652, "loss": 3.6782, "step": 135 }, { "epoch": 0.73, "learning_rate": 0.00011444643059335986, "loss": 3.7351, "step": 140 }, { "epoch": 0.76, "learning_rate": 0.00011848349033028726, "loss": 3.4672, "step": 145 }, { "epoch": 0.79, "learning_rate": 0.0001221833520137007, "loss": 3.4939, "step": 150 }, { "epoch": 0.81, "learning_rate": 0.00012552100564230202, "loss": 3.3836, "step": 155 }, { "epoch": 0.84, "learning_rate": 0.00012847388963711152, "loss": 3.5585, "step": 160 }, { "epoch": 0.86, "learning_rate": 0.00013102204335125564, "loss": 3.6885, "step": 165 }, { "epoch": 0.89, "learning_rate": 0.00013314824199820138, "loss": 3.4539, "step": 170 }, { "epoch": 0.92, "learning_rate": 0.00013483811308636105, "loss": 3.44, "step": 175 }, { "epoch": 0.94, "learning_rate": 0.00013608023357300362, "loss": 3.2787, "step": 180 }, { "epoch": 0.97, "learning_rate": 0.00013686620708073944, "loss": 3.4844, "step": 185 }, { "epoch": 0.99, "learning_rate": 0.0001371907206546175, "loss": 3.4819, "step": 190 }, { "epoch": 1.0, "eval_loss": 3.4188125133514404, "eval_runtime": 3.4458, "eval_samples_per_second": 76.904, "eval_steps_per_second": 9.867, "step": 191 }, { "epoch": 1.02, "learning_rate": 0.0001370515806761736, "loss": 3.3397, "step": 195 }, { "epoch": 1.05, "learning_rate": 0.00013644972769166031, "loss": 3.3572, "step": 200 }, { "epoch": 1.07, "learning_rate": 0.00013538923005422486, "loss": 3.262, "step": 205 }, { "epoch": 1.1, "learning_rate": 0.00013387725642301142, "loss": 3.1682, "step": 210 }, { "epoch": 1.13, "learning_rate": 0.00013192402730508626, "loss": 3.4747, "step": 215 }, { "epoch": 1.15, "learning_rate": 0.00012954274596774817, "loss": 3.4567, "step": 220 }, { "epoch": 1.18, "learning_rate": 0.0001267495091882376, "loss": 3.2241, "step": 225 }, { "epoch": 1.2, "learning_rate": 0.00012356319844415004, "loss": 3.104, "step": 230 }, { "epoch": 1.23, "learning_rate": 0.00012000535228007566, "loss": 3.0082, "step": 235 }, { "epoch": 1.26, "learning_rate": 0.0001161000207132303, "loss": 3.2858, "step": 240 }, { "epoch": 1.28, "learning_rate": 0.00011187360266225318, "loss": 3.2368, "step": 245 }, { "epoch": 1.31, "learning_rate": 0.00010735466749810626, "loss": 3.2569, "step": 250 }, { "epoch": 1.34, "learning_rate": 0.0001025737619233402, "loss": 3.1777, "step": 255 }, { "epoch": 1.36, "learning_rate": 9.756320348516688e-05, "loss": 3.3335, "step": 260 }, { "epoch": 1.39, "learning_rate": 9.23568621181315e-05, "loss": 3.0882, "step": 265 }, { "epoch": 1.41, "learning_rate": 8.69899311930922e-05, "loss": 3.2659, "step": 270 }, { "epoch": 1.44, "learning_rate": 8.149868962015093e-05, "loss": 3.2573, "step": 275 }, { "epoch": 1.47, "learning_rate": 7.592025661365123e-05, "loss": 3.1257, "step": 280 }, { "epoch": 1.49, "learning_rate": 7.029234077696063e-05, "loss": 3.4355, "step": 285 }, { "epoch": 1.52, "learning_rate": 6.465298520315264e-05, "loss": 3.089, "step": 290 }, { "epoch": 1.54, "learning_rate": 5.9040310314635076e-05, "loss": 3.0155, "step": 295 }, { "epoch": 1.57, "learning_rate": 5.349225618005358e-05, "loss": 3.098, "step": 300 }, { "epoch": 1.6, "learning_rate": 4.804632605033502e-05, "loss": 3.0511, "step": 305 }, { "epoch": 1.62, "learning_rate": 4.273933284749334e-05, "loss": 3.254, "step": 310 }, { "epoch": 1.65, "learning_rate": 3.760715031986035e-05, "loss": 3.4357, "step": 315 }, { "epoch": 1.68, "learning_rate": 3.268447054585981e-05, "loss": 3.1706, "step": 320 }, { "epoch": 1.7, "learning_rate": 2.8004569425531536e-05, "loss": 3.2949, "step": 325 }, { "epoch": 1.73, "learning_rate": 2.359908174501344e-05, "loss": 3.0581, "step": 330 }, { "epoch": 1.75, "learning_rate": 1.949778733448209e-05, "loss": 2.9815, "step": 335 }, { "epoch": 1.78, "learning_rate": 1.572840976506218e-05, "loss": 3.1285, "step": 340 }, { "epoch": 1.81, "learning_rate": 1.2316428945455985e-05, "loss": 3.2872, "step": 345 }, { "epoch": 1.83, "learning_rate": 9.284908885085886e-06, "loss": 3.3075, "step": 350 }, { "epoch": 1.86, "learning_rate": 6.654341788021345e-06, "loss": 3.1916, "step": 355 }, { "epoch": 1.88, "learning_rate": 4.442509531570712e-06, "loss": 3.2904, "step": 360 }, { "epoch": 1.91, "learning_rate": 2.6643634659029754e-06, "loss": 3.1763, "step": 365 }, { "epoch": 1.94, "learning_rate": 1.3319233472187158e-06, "loss": 3.2474, "step": 370 }, { "epoch": 1.96, "learning_rate": 4.541960876539865e-07, "loss": 3.295, "step": 375 }, { "epoch": 1.99, "learning_rate": 3.711487114362266e-08, "loss": 2.9591, "step": 380 }, { "epoch": 2.0, "eval_loss": 3.276902914047241, "eval_runtime": 3.4584, "eval_samples_per_second": 76.624, "eval_steps_per_second": 9.831, "step": 382 }, { "epoch": 2.02, "learning_rate": 8.349904680360514e-08, "loss": 3.0232, "step": 385 }, { "epoch": 2.04, "learning_rate": 5.930350709415567e-07, "loss": 3.027, "step": 390 }, { "epoch": 2.07, "learning_rate": 1.5622786265220133e-06, "loss": 3.0191, "step": 395 }, { "epoch": 2.09, "learning_rate": 2.9846779057592496e-06, "loss": 2.7702, "step": 400 }, { "epoch": 2.12, "learning_rate": 4.850617898454477e-06, "loss": 2.9301, "step": 405 }, { "epoch": 2.15, "learning_rate": 7.147485386700417e-06, "loss": 2.9283, "step": 410 }, { "epoch": 2.17, "learning_rate": 9.859754206608232e-06, "loss": 3.039, "step": 415 }, { "epoch": 2.2, "learning_rate": 1.2969090200711697e-05, "loss": 3.1938, "step": 420 }, { "epoch": 2.23, "learning_rate": 1.645447515160081e-05, "loss": 2.8778, "step": 425 }, { "epoch": 2.25, "learning_rate": 2.0292348859028838e-05, "loss": 3.1301, "step": 430 }, { "epoch": 2.28, "learning_rate": 2.4456768400092677e-05, "loss": 3.2, "step": 435 }, { "epoch": 2.3, "learning_rate": 2.891958349593333e-05, "loss": 3.2341, "step": 440 }, { "epoch": 2.33, "learning_rate": 3.3650626799527644e-05, "loss": 3.0918, "step": 445 }, { "epoch": 2.36, "learning_rate": 3.861791781828188e-05, "loss": 3.0801, "step": 450 }, { "epoch": 2.38, "learning_rate": 4.378787909296712e-05, "loss": 3.1323, "step": 455 }, { "epoch": 2.41, "learning_rate": 4.91255631716905e-05, "loss": 2.9001, "step": 460 }, { "epoch": 2.43, "learning_rate": 5.4594888844622835e-05, "loss": 2.9404, "step": 465 }, { "epoch": 2.46, "learning_rate": 6.0158885042604066e-05, "loss": 2.8547, "step": 470 }, { "epoch": 2.49, "learning_rate": 6.577994075094185e-05, "loss": 3.023, "step": 475 }, { "epoch": 2.51, "learning_rate": 7.14200592490581e-05, "loss": 2.8448, "step": 480 }, { "epoch": 2.54, "learning_rate": 7.704111495739588e-05, "loss": 2.933, "step": 485 }, { "epoch": 2.57, "learning_rate": 8.260511115537712e-05, "loss": 2.9129, "step": 490 }, { "epoch": 2.59, "learning_rate": 8.807443682830946e-05, "loss": 2.9149, "step": 495 }, { "epoch": 2.62, "learning_rate": 9.341212090703282e-05, "loss": 3.051, "step": 500 }, { "epoch": 2.64, "learning_rate": 9.858208218171807e-05, "loss": 2.9097, "step": 505 }, { "epoch": 2.67, "learning_rate": 0.00010354937320047231, "loss": 3.0169, "step": 510 }, { "epoch": 2.7, "learning_rate": 0.00010828041650406663, "loss": 2.9528, "step": 515 }, { "epoch": 2.72, "learning_rate": 0.00011274323159990728, "loss": 3.1967, "step": 520 }, { "epoch": 2.75, "learning_rate": 0.00011690765114097114, "loss": 3.1819, "step": 525 }, { "epoch": 2.77, "learning_rate": 0.00012074552484839915, "loss": 2.9504, "step": 530 }, { "epoch": 2.8, "learning_rate": 0.00012423090979928828, "loss": 2.9404, "step": 535 }, { "epoch": 2.83, "learning_rate": 0.00012734024579339175, "loss": 3.0818, "step": 540 }, { "epoch": 2.85, "learning_rate": 0.00013005251461329956, "loss": 3.14, "step": 545 }, { "epoch": 2.88, "learning_rate": 0.0001323493821015455, "loss": 2.9756, "step": 550 }, { "epoch": 2.91, "learning_rate": 0.00013421532209424074, "loss": 3.1237, "step": 555 }, { "epoch": 2.93, "learning_rate": 0.000135637721373478, "loss": 2.7926, "step": 560 }, { "epoch": 2.96, "learning_rate": 0.00013660696492905842, "loss": 3.0619, "step": 565 }, { "epoch": 2.98, "learning_rate": 0.00013711650095319638, "loss": 3.0946, "step": 570 }, { "epoch": 3.0, "eval_loss": 3.266069173812866, "eval_runtime": 3.4464, "eval_samples_per_second": 76.891, "eval_steps_per_second": 9.865, "step": 573 }, { "epoch": 3.01, "learning_rate": 0.00013716288512885638, "loss": 3.1661, "step": 575 }, { "epoch": 3.04, "learning_rate": 0.000136745803912346, "loss": 2.6779, "step": 580 }, { "epoch": 3.06, "learning_rate": 0.00013586807665278127, "loss": 2.6821, "step": 585 }, { "epoch": 3.09, "learning_rate": 0.00013453563653409702, "loss": 2.7079, "step": 590 }, { "epoch": 3.12, "learning_rate": 0.00013275749046842927, "loss": 2.7825, "step": 595 }, { "epoch": 3.14, "learning_rate": 0.0001305456582119787, "loss": 2.8057, "step": 600 }, { "epoch": 3.17, "learning_rate": 0.00012791509111491415, "loss": 2.9291, "step": 605 }, { "epoch": 3.19, "learning_rate": 0.00012488357105454404, "loss": 2.9353, "step": 610 }, { "epoch": 3.22, "learning_rate": 0.00012147159023493787, "loss": 2.8316, "step": 615 }, { "epoch": 3.25, "learning_rate": 0.00011770221266551796, "loss": 2.9357, "step": 620 }, { "epoch": 3.27, "learning_rate": 0.00011360091825498661, "loss": 2.9269, "step": 625 }, { "epoch": 3.3, "learning_rate": 0.00010919543057446852, "loss": 2.8717, "step": 630 }, { "epoch": 3.32, "learning_rate": 0.00010451552945414024, "loss": 2.8045, "step": 635 }, { "epoch": 3.35, "learning_rate": 9.959284968013971e-05, "loss": 2.9829, "step": 640 }, { "epoch": 3.38, "learning_rate": 9.446066715250671e-05, "loss": 2.7457, "step": 645 }, { "epoch": 3.4, "learning_rate": 8.915367394966503e-05, "loss": 2.7768, "step": 650 }, { "epoch": 3.43, "learning_rate": 8.370774381994648e-05, "loss": 2.5404, "step": 655 }, { "epoch": 3.46, "learning_rate": 7.815968968536493e-05, "loss": 2.6558, "step": 660 }, { "epoch": 3.48, "learning_rate": 7.254701479684736e-05, "loss": 2.8658, "step": 665 }, { "epoch": 3.51, "learning_rate": 6.690765922303943e-05, "loss": 3.0811, "step": 670 }, { "epoch": 3.53, "learning_rate": 6.127974338634884e-05, "loss": 2.7066, "step": 675 }, { "epoch": 3.56, "learning_rate": 5.570131037984919e-05, "loss": 2.8037, "step": 680 }, { "epoch": 3.59, "learning_rate": 5.0210068806907975e-05, "loss": 2.5833, "step": 685 }, { "epoch": 3.61, "learning_rate": 4.484313788186866e-05, "loss": 2.8664, "step": 690 }, { "epoch": 3.64, "learning_rate": 3.963679651483327e-05, "loss": 2.7661, "step": 695 }, { "epoch": 3.66, "learning_rate": 3.462623807665996e-05, "loss": 2.6497, "step": 700 }, { "epoch": 3.69, "learning_rate": 2.9845332501893884e-05, "loss": 2.6264, "step": 705 }, { "epoch": 3.72, "learning_rate": 2.5326397337746965e-05, "loss": 2.7237, "step": 710 }, { "epoch": 3.74, "learning_rate": 2.109997928676983e-05, "loss": 2.8732, "step": 715 }, { "epoch": 3.77, "learning_rate": 1.719464771992433e-05, "loss": 2.9159, "step": 720 }, { "epoch": 3.8, "learning_rate": 1.3636801555849966e-05, "loss": 2.9253, "step": 725 }, { "epoch": 3.82, "learning_rate": 1.0450490811762374e-05, "loss": 2.6746, "step": 730 }, { "epoch": 3.85, "learning_rate": 7.657254032251816e-06, "loss": 2.7104, "step": 735 }, { "epoch": 3.87, "learning_rate": 5.275972694913755e-06, "loss": 2.8526, "step": 740 }, { "epoch": 3.9, "learning_rate": 3.3227435769885726e-06, "loss": 2.8344, "step": 745 }, { "epoch": 3.93, "learning_rate": 1.8107699457751103e-06, "loss": 2.7425, "step": 750 }, { "epoch": 3.95, "learning_rate": 7.502723083396683e-07, "loss": 2.8643, "step": 755 }, { "epoch": 3.98, "learning_rate": 1.4841932382638991e-07, "loss": 2.8953, "step": 760 }, { "epoch": 4.0, "eval_loss": 3.1601226329803467, "eval_runtime": 3.449, "eval_samples_per_second": 76.834, "eval_steps_per_second": 9.858, "step": 764 }, { "epoch": 4.01, "learning_rate": 9.279345382488647e-09, "loss": 2.6508, "step": 765 }, { "epoch": 4.03, "learning_rate": 3.3379291926057076e-07, "loss": 2.4879, "step": 770 }, { "epoch": 4.06, "learning_rate": 1.119766426996385e-06, "loss": 2.5634, "step": 775 }, { "epoch": 4.08, "learning_rate": 2.361886913638944e-06, "loss": 2.6256, "step": 780 }, { "epoch": 4.11, "learning_rate": 4.051758001798611e-06, "loss": 2.7326, "step": 785 }, { "epoch": 4.14, "learning_rate": 6.1779566487443804e-06, "loss": 2.6579, "step": 790 }, { "epoch": 4.16, "learning_rate": 8.726110362888466e-06, "loss": 2.5027, "step": 795 }, { "epoch": 4.19, "learning_rate": 1.167899435769802e-05, "loss": 2.9039, "step": 800 }, { "epoch": 4.21, "learning_rate": 1.5016647986299231e-05, "loss": 2.4959, "step": 805 }, { "epoch": 4.24, "learning_rate": 1.8716509669712747e-05, "loss": 2.5862, "step": 810 }, { "epoch": 4.27, "learning_rate": 2.2753569406640054e-05, "loss": 2.5506, "step": 815 }, { "epoch": 4.29, "learning_rate": 2.7100537833883422e-05, "loss": 2.3108, "step": 820 }, { "epoch": 4.32, "learning_rate": 3.172803069459421e-05, "loss": 2.536, "step": 825 }, { "epoch": 4.35, "learning_rate": 3.660476746740293e-05, "loss": 2.5062, "step": 830 }, { "epoch": 4.37, "learning_rate": 4.169778281374917e-05, "loss": 2.4466, "step": 835 }, { "epoch": 4.4, "learning_rate": 4.697264941409472e-05, "loss": 2.4376, "step": 840 }, { "epoch": 4.42, "learning_rate": 5.2393710686704254e-05, "loss": 2.8146, "step": 845 }, { "epoch": 4.45, "learning_rate": 5.79243218158887e-05, "loss": 2.6586, "step": 850 }, { "epoch": 4.48, "learning_rate": 6.35270974604226e-05, "loss": 2.5297, "step": 855 }, { "epoch": 4.5, "learning_rate": 6.916416446770344e-05, "loss": 2.33, "step": 860 }, { "epoch": 4.53, "learning_rate": 7.47974178853758e-05, "loss": 2.5292, "step": 865 }, { "epoch": 4.55, "learning_rate": 8.038877853985258e-05, "loss": 2.5357, "step": 870 }, { "epoch": 4.58, "learning_rate": 8.590045044058314e-05, "loss": 2.6811, "step": 875 }, { "epoch": 4.61, "learning_rate": 9.129517627008144e-05, "loss": 2.5603, "step": 880 }, { "epoch": 4.63, "learning_rate": 9.653648923268214e-05, "loss": 2.716, "step": 885 }, { "epoch": 4.66, "learning_rate": 0.00010158895955959489, "loss": 2.7213, "step": 890 }, { "epoch": 4.69, "learning_rate": 0.00010641843400395645, "loss": 2.4893, "step": 895 }, { "epoch": 4.71, "learning_rate": 0.0001109922667069592, "loss": 2.6099, "step": 900 }, { "epoch": 4.74, "learning_rate": 0.00011527953987447286, "loss": 2.7473, "step": 905 }, { "epoch": 4.76, "learning_rate": 0.00011925127277244263, "loss": 2.7703, "step": 910 }, { "epoch": 4.79, "learning_rate": 0.00012288061762832114, "loss": 2.75, "step": 915 }, { "epoch": 4.82, "learning_rate": 0.00012614304111429104, "loss": 2.7698, "step": 920 }, { "epoch": 4.84, "learning_rate": 0.000129016490185509, "loss": 2.7411, "step": 925 }, { "epoch": 4.87, "learning_rate": 0.0001314815411523478, "loss": 2.6514, "step": 930 }, { "epoch": 4.9, "learning_rate": 0.000133521530978955, "loss": 2.7788, "step": 935 }, { "epoch": 4.92, "learning_rate": 0.00013512266992058388, "loss": 2.5737, "step": 940 }, { "epoch": 4.95, "learning_rate": 0.00013627413473830423, "loss": 2.3355, "step": 945 }, { "epoch": 4.97, "learning_rate": 0.00013696814186098548, "loss": 2.7536, "step": 950 }, { "epoch": 5.0, "learning_rate": 0.0001372, "loss": 2.8008, "step": 955 }, { "epoch": 5.0, "eval_loss": 3.157210350036621, "eval_runtime": 3.4457, "eval_samples_per_second": 76.906, "eval_steps_per_second": 9.867, "step": 955 }, { "epoch": 5.03, "learning_rate": 0.00013696814186098548, "loss": 2.3276, "step": 960 }, { "epoch": 5.05, "learning_rate": 0.00013627413473830426, "loss": 2.5804, "step": 965 }, { "epoch": 5.08, "learning_rate": 0.0001351226699205839, "loss": 2.554, "step": 970 }, { "epoch": 5.1, "learning_rate": 0.00013352153097895503, "loss": 2.5025, "step": 975 }, { "epoch": 5.13, "learning_rate": 0.00013148154115234786, "loss": 2.4069, "step": 980 }, { "epoch": 5.16, "learning_rate": 0.00012901649018550904, "loss": 2.6704, "step": 985 }, { "epoch": 5.18, "learning_rate": 0.0001261430411142911, "loss": 2.2362, "step": 990 }, { "epoch": 5.21, "learning_rate": 0.00012288061762832122, "loss": 2.3459, "step": 995 }, { "epoch": 5.24, "learning_rate": 0.0001192512727724427, "loss": 2.2415, "step": 1000 }, { "epoch": 5.26, "learning_rate": 0.00011527953987447295, "loss": 2.5082, "step": 1005 }, { "epoch": 5.29, "learning_rate": 0.00011099226670695929, "loss": 2.3534, "step": 1010 }, { "epoch": 5.31, "learning_rate": 0.00010641843400395655, "loss": 2.5017, "step": 1015 }, { "epoch": 5.34, "learning_rate": 0.00010158895955959498, "loss": 2.4172, "step": 1020 }, { "epoch": 5.37, "learning_rate": 9.653648923268222e-05, "loss": 2.5386, "step": 1025 }, { "epoch": 5.39, "learning_rate": 9.129517627008154e-05, "loss": 2.4429, "step": 1030 }, { "epoch": 5.42, "learning_rate": 8.590045044058324e-05, "loss": 2.3742, "step": 1035 }, { "epoch": 5.45, "learning_rate": 8.038877853985269e-05, "loss": 2.4793, "step": 1040 }, { "epoch": 5.47, "learning_rate": 7.479741788537588e-05, "loss": 2.6054, "step": 1045 }, { "epoch": 5.5, "learning_rate": 6.916416446770354e-05, "loss": 2.4472, "step": 1050 }, { "epoch": 5.52, "learning_rate": 6.35270974604227e-05, "loss": 2.5238, "step": 1055 }, { "epoch": 5.55, "learning_rate": 5.792432181588881e-05, "loss": 2.2441, "step": 1060 }, { "epoch": 5.58, "learning_rate": 5.239371068670435e-05, "loss": 2.5048, "step": 1065 }, { "epoch": 5.6, "learning_rate": 4.6972649414094817e-05, "loss": 2.4761, "step": 1070 }, { "epoch": 5.63, "learning_rate": 4.1697782813749265e-05, "loss": 2.5068, "step": 1075 }, { "epoch": 5.65, "learning_rate": 3.6604767467403014e-05, "loss": 2.4981, "step": 1080 }, { "epoch": 5.68, "learning_rate": 3.17280306945943e-05, "loss": 2.3925, "step": 1085 }, { "epoch": 5.71, "learning_rate": 2.7100537833883503e-05, "loss": 2.271, "step": 1090 }, { "epoch": 5.73, "learning_rate": 2.275356940664013e-05, "loss": 2.5484, "step": 1095 }, { "epoch": 5.76, "learning_rate": 1.8716509669712814e-05, "loss": 2.3203, "step": 1100 }, { "epoch": 5.79, "learning_rate": 1.50166479862993e-05, "loss": 2.4399, "step": 1105 }, { "epoch": 5.81, "learning_rate": 1.1678994357698073e-05, "loss": 2.5271, "step": 1110 }, { "epoch": 5.84, "learning_rate": 8.726110362888518e-06, "loss": 2.535, "step": 1115 }, { "epoch": 5.86, "learning_rate": 6.177956648744426e-06, "loss": 2.3464, "step": 1120 }, { "epoch": 5.89, "learning_rate": 4.0517580017986414e-06, "loss": 2.2768, "step": 1125 }, { "epoch": 5.92, "learning_rate": 2.3618869136389365e-06, "loss": 2.5173, "step": 1130 }, { "epoch": 5.94, "learning_rate": 1.1197664269964003e-06, "loss": 2.4667, "step": 1135 }, { "epoch": 5.97, "learning_rate": 3.3379291926057076e-07, "loss": 2.4227, "step": 1140 }, { "epoch": 5.99, "learning_rate": 9.279345382488647e-09, "loss": 2.5067, "step": 1145 }, { "epoch": 6.0, "eval_loss": 3.07564377784729, "eval_runtime": 3.4511, "eval_samples_per_second": 76.787, "eval_steps_per_second": 9.852, "step": 1146 }, { "epoch": 6.02, "learning_rate": 1.4841932382638991e-07, "loss": 2.2549, "step": 1150 }, { "epoch": 6.05, "learning_rate": 7.502723083396531e-07, "loss": 2.2228, "step": 1155 }, { "epoch": 6.07, "learning_rate": 1.8107699457751103e-06, "loss": 2.1061, "step": 1160 }, { "epoch": 6.1, "learning_rate": 3.322743576988504e-06, "loss": 2.1815, "step": 1165 }, { "epoch": 6.13, "learning_rate": 5.275972694913717e-06, "loss": 2.4782, "step": 1170 }, { "epoch": 6.15, "learning_rate": 7.657254032251716e-06, "loss": 2.1363, "step": 1175 }, { "epoch": 6.18, "learning_rate": 1.045049081176232e-05, "loss": 2.2368, "step": 1180 }, { "epoch": 6.2, "learning_rate": 1.3636801555849831e-05, "loss": 2.2992, "step": 1185 }, { "epoch": 6.23, "learning_rate": 1.719464771992426e-05, "loss": 2.3285, "step": 1190 }, { "epoch": 6.26, "learning_rate": 2.1099979286769666e-05, "loss": 2.2711, "step": 1195 }, { "epoch": 6.28, "learning_rate": 2.5326397337746883e-05, "loss": 2.3187, "step": 1200 }, { "epoch": 6.31, "learning_rate": 2.9845332501893694e-05, "loss": 2.0419, "step": 1205 }, { "epoch": 6.34, "learning_rate": 3.462623807665987e-05, "loss": 2.0977, "step": 1210 }, { "epoch": 6.36, "learning_rate": 3.9636796514833065e-05, "loss": 2.2842, "step": 1215 }, { "epoch": 6.39, "learning_rate": 4.484313788186857e-05, "loss": 2.3898, "step": 1220 }, { "epoch": 6.41, "learning_rate": 5.021006880690775e-05, "loss": 2.3513, "step": 1225 }, { "epoch": 6.44, "learning_rate": 5.570131037984909e-05, "loss": 1.947, "step": 1230 }, { "epoch": 6.47, "learning_rate": 6.127974338634885e-05, "loss": 2.2545, "step": 1235 }, { "epoch": 6.49, "learning_rate": 6.690765922303933e-05, "loss": 2.07, "step": 1240 }, { "epoch": 6.52, "learning_rate": 7.254701479684739e-05, "loss": 2.1936, "step": 1245 }, { "epoch": 6.54, "learning_rate": 7.815968968536482e-05, "loss": 2.4149, "step": 1250 }, { "epoch": 6.57, "learning_rate": 8.37077438199465e-05, "loss": 2.3692, "step": 1255 }, { "epoch": 6.6, "learning_rate": 8.915367394966493e-05, "loss": 2.3514, "step": 1260 }, { "epoch": 6.62, "learning_rate": 9.446066715250662e-05, "loss": 1.9397, "step": 1265 }, { "epoch": 6.65, "learning_rate": 9.959284968013953e-05, "loss": 2.3535, "step": 1270 }, { "epoch": 6.68, "learning_rate": 0.00010451552945414016, "loss": 2.111, "step": 1275 }, { "epoch": 6.7, "learning_rate": 0.00010919543057446833, "loss": 2.2016, "step": 1280 }, { "epoch": 6.73, "learning_rate": 0.00011360091825498652, "loss": 2.4254, "step": 1285 }, { "epoch": 6.75, "learning_rate": 0.00011770221266551781, "loss": 2.4963, "step": 1290 }, { "epoch": 6.78, "learning_rate": 0.0001214715902349378, "loss": 2.3937, "step": 1295 }, { "epoch": 6.81, "learning_rate": 0.00012488357105454407, "loss": 2.2234, "step": 1300 }, { "epoch": 6.83, "learning_rate": 0.0001279150911149141, "loss": 2.3214, "step": 1305 }, { "epoch": 6.86, "learning_rate": 0.0001305456582119787, "loss": 2.3266, "step": 1310 }, { "epoch": 6.88, "learning_rate": 0.00013275749046842922, "loss": 2.3065, "step": 1315 }, { "epoch": 6.91, "learning_rate": 0.00013453563653409702, "loss": 2.4139, "step": 1320 }, { "epoch": 6.94, "learning_rate": 0.00013586807665278124, "loss": 2.38, "step": 1325 }, { "epoch": 6.96, "learning_rate": 0.000136745803912346, "loss": 2.3551, "step": 1330 }, { "epoch": 6.99, "learning_rate": 0.00013716288512885636, "loss": 2.3378, "step": 1335 }, { "epoch": 7.0, "eval_loss": 3.0805811882019043, "eval_runtime": 3.449, "eval_samples_per_second": 76.834, "eval_steps_per_second": 9.858, "step": 1337 }, { "epoch": 7.02, "learning_rate": 0.0001371165009531964, "loss": 2.2552, "step": 1340 }, { "epoch": 7.04, "learning_rate": 0.00013660696492905845, "loss": 2.1842, "step": 1345 }, { "epoch": 7.07, "learning_rate": 0.00013563772137347802, "loss": 2.097, "step": 1350 }, { "epoch": 7.09, "learning_rate": 0.00013421532209424082, "loss": 1.7536, "step": 1355 }, { "epoch": 7.12, "learning_rate": 0.00013234938210154558, "loss": 2.2293, "step": 1360 }, { "epoch": 7.15, "learning_rate": 0.0001300525146132997, "loss": 2.0082, "step": 1365 }, { "epoch": 7.17, "learning_rate": 0.00012734024579339186, "loss": 1.951, "step": 1370 }, { "epoch": 7.2, "learning_rate": 0.00012423090979928834, "loss": 2.3563, "step": 1375 }, { "epoch": 7.23, "learning_rate": 0.00012074552484839929, "loss": 2.0015, "step": 1380 }, { "epoch": 7.25, "learning_rate": 0.00011690765114097119, "loss": 2.3068, "step": 1385 }, { "epoch": 7.28, "learning_rate": 0.00011274323159990744, "loss": 2.2242, "step": 1390 }, { "epoch": 7.3, "learning_rate": 0.0001082804165040668, "loss": 2.1933, "step": 1395 }, { "epoch": 7.33, "learning_rate": 0.00010354937320047261, "loss": 2.0466, "step": 1400 }, { "epoch": 7.36, "learning_rate": 9.858208218171827e-05, "loss": 2.1267, "step": 1405 }, { "epoch": 7.38, "learning_rate": 9.341212090703316e-05, "loss": 2.08, "step": 1410 }, { "epoch": 7.41, "learning_rate": 8.807443682830967e-05, "loss": 2.1487, "step": 1415 }, { "epoch": 7.43, "learning_rate": 8.260511115537746e-05, "loss": 2.0919, "step": 1420 }, { "epoch": 7.46, "learning_rate": 7.70411149573961e-05, "loss": 2.2303, "step": 1425 }, { "epoch": 7.49, "learning_rate": 7.142005924905844e-05, "loss": 2.2703, "step": 1430 }, { "epoch": 7.51, "learning_rate": 6.57799407509417e-05, "loss": 2.1199, "step": 1435 }, { "epoch": 7.54, "learning_rate": 6.015888504260404e-05, "loss": 2.1918, "step": 1440 }, { "epoch": 7.57, "learning_rate": 5.4594888844622686e-05, "loss": 2.1859, "step": 1445 }, { "epoch": 7.59, "learning_rate": 4.912556317169047e-05, "loss": 2.2244, "step": 1450 }, { "epoch": 7.62, "learning_rate": 4.378787909296698e-05, "loss": 2.0414, "step": 1455 }, { "epoch": 7.64, "learning_rate": 3.861791781828185e-05, "loss": 2.2903, "step": 1460 }, { "epoch": 7.67, "learning_rate": 3.365062679952752e-05, "loss": 2.1773, "step": 1465 }, { "epoch": 7.7, "learning_rate": 2.8919583495933312e-05, "loss": 2.0957, "step": 1470 }, { "epoch": 7.72, "learning_rate": 2.4456768400092664e-05, "loss": 2.1373, "step": 1475 }, { "epoch": 7.75, "learning_rate": 2.029234885902891e-05, "loss": 2.2073, "step": 1480 }, { "epoch": 7.77, "learning_rate": 1.6454475151600793e-05, "loss": 2.0692, "step": 1485 }, { "epoch": 7.8, "learning_rate": 1.2969090200711758e-05, "loss": 2.1887, "step": 1490 }, { "epoch": 7.83, "learning_rate": 9.859754206608216e-06, "loss": 2.0833, "step": 1495 }, { "epoch": 7.85, "learning_rate": 7.147485386700356e-06, "loss": 2.3646, "step": 1500 }, { "epoch": 7.88, "learning_rate": 4.850617898454462e-06, "loss": 2.2123, "step": 1505 }, { "epoch": 7.91, "learning_rate": 2.9846779057592115e-06, "loss": 2.0273, "step": 1510 }, { "epoch": 7.93, "learning_rate": 1.5622786265220057e-06, "loss": 1.9322, "step": 1515 }, { "epoch": 7.96, "learning_rate": 5.93035070941549e-07, "loss": 2.0401, "step": 1520 }, { "epoch": 7.98, "learning_rate": 8.349904680360514e-08, "loss": 2.0603, "step": 1525 }, { "epoch": 8.0, "eval_loss": 3.059950828552246, "eval_runtime": 3.4514, "eval_samples_per_second": 76.78, "eval_steps_per_second": 9.851, "step": 1528 }, { "epoch": 8.01, "learning_rate": 3.711487114362266e-08, "loss": 2.0764, "step": 1530 }, { "epoch": 8.04, "learning_rate": 4.5419608765397886e-07, "loss": 1.9304, "step": 1535 }, { "epoch": 8.06, "learning_rate": 1.3319233472187234e-06, "loss": 1.8933, "step": 1540 }, { "epoch": 8.09, "learning_rate": 2.664363465902945e-06, "loss": 2.2206, "step": 1545 }, { "epoch": 8.12, "learning_rate": 4.44250953157072e-06, "loss": 2.0486, "step": 1550 }, { "epoch": 8.14, "learning_rate": 6.654341788021246e-06, "loss": 1.8876, "step": 1555 }, { "epoch": 8.17, "learning_rate": 9.28490888508584e-06, "loss": 1.7926, "step": 1560 }, { "epoch": 8.19, "learning_rate": 1.2316428945455863e-05, "loss": 1.7425, "step": 1565 }, { "epoch": 8.22, "learning_rate": 1.572840976506211e-05, "loss": 1.9577, "step": 1570 }, { "epoch": 8.25, "learning_rate": 1.9497787334482106e-05, "loss": 2.0153, "step": 1575 }, { "epoch": 8.27, "learning_rate": 2.3599081745013365e-05, "loss": 1.9377, "step": 1580 }, { "epoch": 8.3, "learning_rate": 2.800456942553155e-05, "loss": 2.2322, "step": 1585 }, { "epoch": 8.32, "learning_rate": 3.268447054585973e-05, "loss": 1.7475, "step": 1590 }, { "epoch": 8.35, "learning_rate": 3.760715031986036e-05, "loss": 1.8289, "step": 1595 }, { "epoch": 8.38, "learning_rate": 4.273933284749324e-05, "loss": 1.8663, "step": 1600 }, { "epoch": 8.4, "learning_rate": 4.804632605033493e-05, "loss": 2.0319, "step": 1605 }, { "epoch": 8.43, "learning_rate": 5.349225618005337e-05, "loss": 2.1028, "step": 1610 }, { "epoch": 8.46, "learning_rate": 5.9040310314635035e-05, "loss": 1.9332, "step": 1615 }, { "epoch": 8.48, "learning_rate": 6.465298520315248e-05, "loss": 1.8289, "step": 1620 }, { "epoch": 8.51, "learning_rate": 7.029234077696052e-05, "loss": 1.8439, "step": 1625 }, { "epoch": 8.53, "learning_rate": 7.5920256613651e-05, "loss": 1.7968, "step": 1630 }, { "epoch": 8.56, "learning_rate": 8.149868962015078e-05, "loss": 2.0554, "step": 1635 }, { "epoch": 8.59, "learning_rate": 8.698993119309211e-05, "loss": 2.0439, "step": 1640 }, { "epoch": 8.61, "learning_rate": 9.235686211813131e-05, "loss": 2.1254, "step": 1645 }, { "epoch": 8.64, "learning_rate": 9.75632034851668e-05, "loss": 1.8997, "step": 1650 }, { "epoch": 8.66, "learning_rate": 0.00010257376192334, "loss": 1.8282, "step": 1655 }, { "epoch": 8.69, "learning_rate": 0.00010735466749810619, "loss": 1.977, "step": 1660 }, { "epoch": 8.72, "learning_rate": 0.000111873602662253, "loss": 1.9521, "step": 1665 }, { "epoch": 8.74, "learning_rate": 0.00011610002071323023, "loss": 2.227, "step": 1670 }, { "epoch": 8.77, "learning_rate": 0.00012000535228007549, "loss": 1.9954, "step": 1675 }, { "epoch": 8.8, "learning_rate": 0.00012356319844414993, "loss": 2.1384, "step": 1680 }, { "epoch": 8.82, "learning_rate": 0.00012674950918823747, "loss": 1.9971, "step": 1685 }, { "epoch": 8.85, "learning_rate": 0.0001295427459677481, "loss": 1.9247, "step": 1690 }, { "epoch": 8.87, "learning_rate": 0.00013192402730508612, "loss": 2.0361, "step": 1695 }, { "epoch": 8.9, "learning_rate": 0.00013387725642301137, "loss": 1.9686, "step": 1700 }, { "epoch": 8.93, "learning_rate": 0.0001353892300542248, "loss": 2.1605, "step": 1705 }, { "epoch": 8.95, "learning_rate": 0.0001364497276916603, "loss": 2.12, "step": 1710 }, { "epoch": 8.98, "learning_rate": 0.0001370515806761736, "loss": 2.0884, "step": 1715 }, { "epoch": 9.0, "eval_loss": 3.1394710540771484, "eval_runtime": 3.4497, "eval_samples_per_second": 76.819, "eval_steps_per_second": 9.856, "step": 1719 }, { "epoch": 9.01, "learning_rate": 0.0001371907206546175, "loss": 2.0551, "step": 1720 }, { "epoch": 9.03, "learning_rate": 0.00013686620708073944, "loss": 1.8553, "step": 1725 }, { "epoch": 9.06, "learning_rate": 0.00013608023357300367, "loss": 1.9159, "step": 1730 }, { "epoch": 9.08, "learning_rate": 0.0001348381130863611, "loss": 1.7198, "step": 1735 }, { "epoch": 9.11, "learning_rate": 0.0001331482419982015, "loss": 1.8409, "step": 1740 }, { "epoch": 9.14, "learning_rate": 0.00013102204335125575, "loss": 1.8302, "step": 1745 }, { "epoch": 9.16, "learning_rate": 0.00012847388963711166, "loss": 1.8527, "step": 1750 }, { "epoch": 9.19, "learning_rate": 0.00012552100564230202, "loss": 2.1229, "step": 1755 }, { "epoch": 9.21, "learning_rate": 0.0001221833520137008, "loss": 1.8018, "step": 1760 }, { "epoch": 9.24, "learning_rate": 0.00011848349033028711, "loss": 2.0999, "step": 1765 }, { "epoch": 9.27, "learning_rate": 0.00011444643059335979, "loss": 1.8556, "step": 1770 }, { "epoch": 9.29, "learning_rate": 0.00011009946216611662, "loss": 1.918, "step": 1775 }, { "epoch": 9.32, "learning_rate": 0.00010547196930540541, "loss": 1.8345, "step": 1780 }, { "epoch": 9.35, "learning_rate": 0.0001005952325325969, "loss": 1.9419, "step": 1785 }, { "epoch": 9.37, "learning_rate": 9.550221718625064e-05, "loss": 1.7719, "step": 1790 }, { "epoch": 9.4, "learning_rate": 9.022735058590532e-05, "loss": 1.6571, "step": 1795 }, { "epoch": 9.42, "learning_rate": 8.480628931329531e-05, "loss": 1.7121, "step": 1800 }, { "epoch": 9.45, "learning_rate": 7.92756781841111e-05, "loss": 1.8914, "step": 1805 }, { "epoch": 9.48, "learning_rate": 7.367290253957744e-05, "loss": 2.0196, "step": 1810 }, { "epoch": 9.5, "learning_rate": 6.80358355322966e-05, "loss": 1.7844, "step": 1815 }, { "epoch": 9.53, "learning_rate": 6.240258211462402e-05, "loss": 1.9618, "step": 1820 }, { "epoch": 9.55, "learning_rate": 5.681122146014721e-05, "loss": 1.9428, "step": 1825 }, { "epoch": 9.58, "learning_rate": 5.129954955941689e-05, "loss": 1.7754, "step": 1830 }, { "epoch": 9.61, "learning_rate": 4.590482372991883e-05, "loss": 1.9829, "step": 1835 }, { "epoch": 9.63, "learning_rate": 4.066351076731789e-05, "loss": 1.8845, "step": 1840 }, { "epoch": 9.66, "learning_rate": 3.561104044040515e-05, "loss": 1.9994, "step": 1845 }, { "epoch": 9.69, "learning_rate": 3.078156599604378e-05, "loss": 1.826, "step": 1850 }, { "epoch": 9.71, "learning_rate": 2.6207733293040633e-05, "loss": 1.8695, "step": 1855 }, { "epoch": 9.74, "learning_rate": 2.1920460125527168e-05, "loss": 1.9426, "step": 1860 }, { "epoch": 9.76, "learning_rate": 1.7948727227557404e-05, "loss": 1.7794, "step": 1865 }, { "epoch": 9.79, "learning_rate": 1.4319382371679026e-05, "loss": 1.5958, "step": 1870 }, { "epoch": 9.82, "learning_rate": 1.105695888570886e-05, "loss": 1.9165, "step": 1875 }, { "epoch": 9.84, "learning_rate": 8.18350981449102e-06, "loss": 1.8373, "step": 1880 }, { "epoch": 9.87, "learning_rate": 5.7184588476522e-06, "loss": 1.9335, "step": 1885 }, { "epoch": 9.9, "learning_rate": 3.6784690210450856e-06, "loss": 1.7418, "step": 1890 }, { "epoch": 9.92, "learning_rate": 2.0773300794160803e-06, "loss": 2.0059, "step": 1895 }, { "epoch": 9.95, "learning_rate": 9.258652616957751e-07, "loss": 1.7969, "step": 1900 }, { "epoch": 9.97, "learning_rate": 2.3185813901453346e-07, "loss": 1.9667, "step": 1905 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 2.0133, "step": 1910 }, { "epoch": 10.0, "eval_loss": 3.0807313919067383, "eval_runtime": 3.4521, "eval_samples_per_second": 76.764, "eval_steps_per_second": 9.849, "step": 1910 }, { "epoch": 10.03, "learning_rate": 2.318581390145182e-07, "loss": 1.8296, "step": 1915 }, { "epoch": 10.05, "learning_rate": 9.258652616957447e-07, "loss": 1.6133, "step": 1920 }, { "epoch": 10.08, "learning_rate": 2.0773300794160346e-06, "loss": 1.7176, "step": 1925 }, { "epoch": 10.1, "learning_rate": 3.6784690210450323e-06, "loss": 1.7257, "step": 1930 }, { "epoch": 10.13, "learning_rate": 5.718458847652131e-06, "loss": 1.7978, "step": 1935 }, { "epoch": 10.16, "learning_rate": 8.183509814490936e-06, "loss": 1.798, "step": 1940 }, { "epoch": 10.18, "learning_rate": 1.1056958885708754e-05, "loss": 1.6063, "step": 1945 }, { "epoch": 10.21, "learning_rate": 1.4319382371678919e-05, "loss": 1.5478, "step": 1950 }, { "epoch": 10.24, "learning_rate": 1.7948727227557282e-05, "loss": 1.6348, "step": 1955 }, { "epoch": 10.26, "learning_rate": 2.1920460125527032e-05, "loss": 1.7918, "step": 1960 }, { "epoch": 10.29, "learning_rate": 2.6207733293040488e-05, "loss": 1.7877, "step": 1965 }, { "epoch": 10.31, "learning_rate": 3.078156599604362e-05, "loss": 1.7034, "step": 1970 }, { "epoch": 10.34, "learning_rate": 3.561104044040498e-05, "loss": 1.6054, "step": 1975 }, { "epoch": 10.37, "learning_rate": 4.066351076731773e-05, "loss": 1.7727, "step": 1980 }, { "epoch": 10.39, "learning_rate": 4.5904823729918646e-05, "loss": 1.7093, "step": 1985 }, { "epoch": 10.42, "learning_rate": 5.1299549559416716e-05, "loss": 1.5078, "step": 1990 }, { "epoch": 10.45, "learning_rate": 5.681122146014703e-05, "loss": 1.7173, "step": 1995 }, { "epoch": 10.47, "learning_rate": 6.240258211462383e-05, "loss": 2.0244, "step": 2000 }, { "epoch": 10.5, "learning_rate": 6.803583553229642e-05, "loss": 1.5563, "step": 2005 }, { "epoch": 10.52, "learning_rate": 7.367290253957726e-05, "loss": 1.587, "step": 2010 }, { "epoch": 10.55, "learning_rate": 7.927567818411092e-05, "loss": 1.6739, "step": 2015 }, { "epoch": 10.58, "learning_rate": 8.480628931329514e-05, "loss": 1.7724, "step": 2020 }, { "epoch": 10.6, "learning_rate": 9.022735058590513e-05, "loss": 1.7843, "step": 2025 }, { "epoch": 10.63, "learning_rate": 9.550221718625046e-05, "loss": 1.7604, "step": 2030 }, { "epoch": 10.65, "learning_rate": 0.00010059523253259673, "loss": 1.8025, "step": 2035 }, { "epoch": 10.68, "learning_rate": 0.00010547196930540525, "loss": 1.6344, "step": 2040 }, { "epoch": 10.71, "learning_rate": 0.00011009946216611647, "loss": 1.8952, "step": 2045 }, { "epoch": 10.73, "learning_rate": 0.00011444643059335966, "loss": 1.5879, "step": 2050 }, { "epoch": 10.76, "learning_rate": 0.00011848349033028699, "loss": 1.7946, "step": 2055 }, { "epoch": 10.79, "learning_rate": 0.00012218335201370068, "loss": 1.6615, "step": 2060 }, { "epoch": 10.81, "learning_rate": 0.0001255210056423019, "loss": 1.6975, "step": 2065 }, { "epoch": 10.84, "learning_rate": 0.00012847388963711158, "loss": 1.465, "step": 2070 }, { "epoch": 10.86, "learning_rate": 0.00013102204335125564, "loss": 2.0658, "step": 2075 }, { "epoch": 10.89, "learning_rate": 0.00013314824199820135, "loss": 1.9283, "step": 2080 }, { "epoch": 10.92, "learning_rate": 0.00013483811308636113, "loss": 1.8069, "step": 2085 }, { "epoch": 10.94, "learning_rate": 0.00013608023357300364, "loss": 1.7805, "step": 2090 }, { "epoch": 10.97, "learning_rate": 0.0001368662070807394, "loss": 1.6908, "step": 2095 }, { "epoch": 10.99, "learning_rate": 0.0001371907206546175, "loss": 1.8003, "step": 2100 }, { "epoch": 11.0, "eval_loss": 3.0452535152435303, "eval_runtime": 3.4493, "eval_samples_per_second": 76.827, "eval_steps_per_second": 9.857, "step": 2101 } ], "max_steps": 11651, "num_train_epochs": 61, "total_flos": 2193024024576000.0, "trial_name": null, "trial_params": null }