|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9986799924570997, |
|
"eval_steps": 500, |
|
"global_step": 331, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003017160098057703, |
|
"grad_norm": 20.078946749628592, |
|
"learning_rate": 0.0, |
|
"loss": 1.4468, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.006034320196115406, |
|
"grad_norm": 4.878738050586557, |
|
"learning_rate": 5e-06, |
|
"loss": 1.1585, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00905148029417311, |
|
"grad_norm": 5.517865422306177, |
|
"learning_rate": 7.924812503605782e-06, |
|
"loss": 1.1812, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.012068640392230812, |
|
"grad_norm": 2.1032343536449445, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1021, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.015085800490288516, |
|
"grad_norm": 2.592635550230835, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1066, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01810296058834622, |
|
"grad_norm": 8.062413826195554, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0541, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.021120120686403922, |
|
"grad_norm": 470.52936338944204, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1385, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.024137280784461625, |
|
"grad_norm": 5.56198780429065, |
|
"learning_rate": 1e-05, |
|
"loss": 1.141, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02715444088251933, |
|
"grad_norm": 3.1646497668353124, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0944, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.030171600980577033, |
|
"grad_norm": 2.0038474699480413, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0951, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.033188761078634735, |
|
"grad_norm": 1.4888187403163304, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0766, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03620592117669244, |
|
"grad_norm": 1.7152815980179157, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0706, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03922308127475014, |
|
"grad_norm": 1.4300762542819367, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0547, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.042240241372807845, |
|
"grad_norm": 1.4526317270279436, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0137, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04525740147086555, |
|
"grad_norm": 1.2974541466019103, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0501, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04827456156892325, |
|
"grad_norm": 1.2017850759065756, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0339, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.051291721666980955, |
|
"grad_norm": 1.1650312606754425, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0284, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05430888176503866, |
|
"grad_norm": 1.218417579737997, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0101, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05732604186309636, |
|
"grad_norm": 1.1592394623790696, |
|
"learning_rate": 1e-05, |
|
"loss": 1.006, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.060343201961154065, |
|
"grad_norm": 1.2412789400669013, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0081, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06336036205921176, |
|
"grad_norm": 1.1656784199257328, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0274, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06637752215726947, |
|
"grad_norm": 1.156112593546092, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0349, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06939468225532718, |
|
"grad_norm": 1.1792204176695948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9633, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07241184235338488, |
|
"grad_norm": 1.1670774133822364, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9912, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07542900245144257, |
|
"grad_norm": 1.1275491972747138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9776, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07844616254950028, |
|
"grad_norm": 1.1802067276415016, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0141, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08146332264755798, |
|
"grad_norm": 1.228045833889261, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9936, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08448048274561569, |
|
"grad_norm": 1.2484185459639683, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9932, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0874976428436734, |
|
"grad_norm": 1.2339564712822966, |
|
"learning_rate": 1e-05, |
|
"loss": 0.95, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0905148029417311, |
|
"grad_norm": 1.211784252377512, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9711, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09353196303978879, |
|
"grad_norm": 1.1575974315931057, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9771, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0965491231378465, |
|
"grad_norm": 1.2117409563580097, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9495, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0995662832359042, |
|
"grad_norm": 1.109114323976095, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9782, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10258344333396191, |
|
"grad_norm": 1.0945987254678533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9621, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10560060343201962, |
|
"grad_norm": 1.1441153052598225, |
|
"learning_rate": 1e-05, |
|
"loss": 0.976, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10861776353007732, |
|
"grad_norm": 1.0701754107374204, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9364, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11163492362813501, |
|
"grad_norm": 1.0982624299638388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9873, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11465208372619272, |
|
"grad_norm": 1.1658342852048704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9727, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11766924382425042, |
|
"grad_norm": 1.140425640145812, |
|
"learning_rate": 1e-05, |
|
"loss": 0.962, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12068640392230813, |
|
"grad_norm": 1.2113716827018828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9794, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12370356402036584, |
|
"grad_norm": 1.0811873110464318, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9614, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12672072411842353, |
|
"grad_norm": 1.1319113211223166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9588, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12973788421648125, |
|
"grad_norm": 1.0450336690045927, |
|
"learning_rate": 1e-05, |
|
"loss": 0.951, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13275504431453894, |
|
"grad_norm": 1.0888061493358059, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9631, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13577220441259663, |
|
"grad_norm": 1.092452045324181, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9648, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13878936451065435, |
|
"grad_norm": 0.9926260475544896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9274, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14180652460871204, |
|
"grad_norm": 1.1102540887315238, |
|
"learning_rate": 1e-05, |
|
"loss": 0.919, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14482368470676976, |
|
"grad_norm": 1.0120307941022935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9526, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14784084480482745, |
|
"grad_norm": 1.0962499453405279, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9078, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.15085800490288515, |
|
"grad_norm": 1.1837778452202325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9376, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15387516500094287, |
|
"grad_norm": 1.064164454110782, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9128, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15689232509900056, |
|
"grad_norm": 1.1508638128558335, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9533, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.15990948519705828, |
|
"grad_norm": 1.1609288313516257, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9551, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16292664529511597, |
|
"grad_norm": 1.0596156545796473, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9083, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1659438053931737, |
|
"grad_norm": 1.0399099372944773, |
|
"learning_rate": 1e-05, |
|
"loss": 0.95, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16896096549123138, |
|
"grad_norm": 1.0914115337494106, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9159, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17197812558928907, |
|
"grad_norm": 1.1208151691589456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9404, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1749952856873468, |
|
"grad_norm": 1.1380068007034254, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9248, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17801244578540448, |
|
"grad_norm": 1.1725768888546018, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9013, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1810296058834622, |
|
"grad_norm": 1.0230578555799743, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9184, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1840467659815199, |
|
"grad_norm": 1.1965830709717151, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9636, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.18706392607957759, |
|
"grad_norm": 1.0854955199511183, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9149, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1900810861776353, |
|
"grad_norm": 1.0776069945953626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.895, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.193098246275693, |
|
"grad_norm": 1.0925276020001646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9017, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.19611540637375072, |
|
"grad_norm": 1.051729020151003, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9168, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1991325664718084, |
|
"grad_norm": 1.1276428621962828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9359, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.2021497265698661, |
|
"grad_norm": 1.039214095313065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9458, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.20516688666792382, |
|
"grad_norm": 1.108230624792472, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9012, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2081840467659815, |
|
"grad_norm": 1.0659907986207233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.932, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.21120120686403923, |
|
"grad_norm": 1.1484942212933387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9212, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21421836696209692, |
|
"grad_norm": 1.0080008398377343, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9335, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21723552706015464, |
|
"grad_norm": 1.099906576331005, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9478, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.22025268715821233, |
|
"grad_norm": 1.002361667650721, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9215, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.22326984725627003, |
|
"grad_norm": 1.0106913848906898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9103, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22628700735432775, |
|
"grad_norm": 1.031802831727699, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9591, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22930416745238544, |
|
"grad_norm": 1.091719276309555, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9431, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.23232132755044316, |
|
"grad_norm": 1.0393979811839362, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8945, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23533848764850085, |
|
"grad_norm": 1.1503832361551256, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9163, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23835564774655854, |
|
"grad_norm": 1.1141284366563844, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9093, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.24137280784461626, |
|
"grad_norm": 1.0708909622724119, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9359, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24438996794267395, |
|
"grad_norm": 1.0342956680966762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9066, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.24740712804073167, |
|
"grad_norm": 1.041231965271414, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9042, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.25042428813878936, |
|
"grad_norm": 1.0891891643003724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9157, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.25344144823684706, |
|
"grad_norm": 0.9463606441668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8652, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.25645860833490475, |
|
"grad_norm": 0.9943090136893116, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9138, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2594757684329625, |
|
"grad_norm": 1.0622250754003486, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9029, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2624929285310202, |
|
"grad_norm": 1.0190810445181397, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8701, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2655100886290779, |
|
"grad_norm": 1.0471126343927182, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9173, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.26852724872713557, |
|
"grad_norm": 1.094441431551059, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9539, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.27154440882519326, |
|
"grad_norm": 1.02572224910356, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9036, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.274561568923251, |
|
"grad_norm": 1.0647463602381582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8895, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2775787290213087, |
|
"grad_norm": 1.0867418650830927, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8903, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2805958891193664, |
|
"grad_norm": 1.0448439572661483, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9013, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2836130492174241, |
|
"grad_norm": 1.0782737190600102, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9088, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2866302093154818, |
|
"grad_norm": 1.0853475076773287, |
|
"learning_rate": 1e-05, |
|
"loss": 0.876, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2896473694135395, |
|
"grad_norm": 1.0978686199970553, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9286, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2926645295115972, |
|
"grad_norm": 1.0673815458666747, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8973, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2956816896096549, |
|
"grad_norm": 1.184691172374345, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8944, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2986988497077126, |
|
"grad_norm": 0.9861386386631925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9048, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.3017160098057703, |
|
"grad_norm": 1.1329936556607971, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9401, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.30473316990382804, |
|
"grad_norm": 0.9543730308331345, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9149, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.30775033000188573, |
|
"grad_norm": 1.1462193660108113, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9071, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3107674900999434, |
|
"grad_norm": 1.0904581574091061, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8971, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3137846501980011, |
|
"grad_norm": 1.0495767709008346, |
|
"learning_rate": 1e-05, |
|
"loss": 0.919, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.31680181029605886, |
|
"grad_norm": 0.9640542155531093, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8713, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.31981897039411655, |
|
"grad_norm": 0.9610132463708185, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8933, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.32283613049217424, |
|
"grad_norm": 1.0843824473667214, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9093, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.32585329059023194, |
|
"grad_norm": 0.9945425158672618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9071, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.32887045068828963, |
|
"grad_norm": 1.0765546524524479, |
|
"learning_rate": 1e-05, |
|
"loss": 0.865, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3318876107863474, |
|
"grad_norm": 1.01041125101344, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8907, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.33490477088440507, |
|
"grad_norm": 1.0874945222389107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.894, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.33792193098246276, |
|
"grad_norm": 1.0493603540512813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9138, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.34093909108052045, |
|
"grad_norm": 1.0124123297722831, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9149, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.34395625117857814, |
|
"grad_norm": 1.0531088139319633, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8856, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3469734112766359, |
|
"grad_norm": 0.9763912034026013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.905, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3499905713746936, |
|
"grad_norm": 1.0488345908663115, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8907, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3530077314727513, |
|
"grad_norm": 1.0142709732139472, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8983, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.35602489157080897, |
|
"grad_norm": 0.9983943961658387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8881, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.35904205166886666, |
|
"grad_norm": 1.0281018778884328, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8989, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3620592117669244, |
|
"grad_norm": 1.0050409064254446, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9113, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3650763718649821, |
|
"grad_norm": 1.0317911761320575, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8926, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3680935319630398, |
|
"grad_norm": 1.0867071456016715, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8698, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3711106920610975, |
|
"grad_norm": 1.0792050089286005, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8385, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.37412785215915517, |
|
"grad_norm": 1.136911850598409, |
|
"learning_rate": 1e-05, |
|
"loss": 0.884, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3771450122572129, |
|
"grad_norm": 0.9468447682708495, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9048, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3801621723552706, |
|
"grad_norm": 0.99640914665336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9092, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3831793324533283, |
|
"grad_norm": 1.0035906051414294, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8612, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.386196492551386, |
|
"grad_norm": 1.0443720188110175, |
|
"learning_rate": 1e-05, |
|
"loss": 0.898, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3892136526494437, |
|
"grad_norm": 1.0019284346638992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9194, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.39223081274750143, |
|
"grad_norm": 0.9947835482545767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8823, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3952479728455591, |
|
"grad_norm": 1.117124665050851, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8698, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3982651329436168, |
|
"grad_norm": 1.0902265836998275, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9015, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.4012822930416745, |
|
"grad_norm": 0.9890942371356929, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8702, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.4042994531397322, |
|
"grad_norm": 1.079316692788872, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9274, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.40731661323778995, |
|
"grad_norm": 1.0507683546070807, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8548, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.41033377333584764, |
|
"grad_norm": 1.1173910988640408, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8663, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.41335093343390533, |
|
"grad_norm": 1.073210708775559, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8612, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.416368093531963, |
|
"grad_norm": 1.0396491185269086, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8994, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4193852536300207, |
|
"grad_norm": 1.1500475629157627, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8525, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.42240241372807846, |
|
"grad_norm": 1.2168830597788158, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9374, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42541957382613615, |
|
"grad_norm": 0.9553833702288597, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9103, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.42843673392419385, |
|
"grad_norm": 0.9850371340573859, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8806, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.43145389402225154, |
|
"grad_norm": 1.1473904156348889, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8773, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.4344710541203093, |
|
"grad_norm": 1.160071820812732, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8672, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.437488214218367, |
|
"grad_norm": 1.0914396615807336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8916, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.44050537431642467, |
|
"grad_norm": 0.9961823995771022, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8801, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.44352253441448236, |
|
"grad_norm": 1.1627229625741373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8486, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.44653969451254005, |
|
"grad_norm": 1.088993191172265, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8541, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4495568546105978, |
|
"grad_norm": 0.9925318555430557, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8823, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4525740147086555, |
|
"grad_norm": 1.080125550684176, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8916, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4555911748067132, |
|
"grad_norm": 1.0335542992312043, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8523, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4586083349047709, |
|
"grad_norm": 1.1905484694931652, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8861, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.46162549500282857, |
|
"grad_norm": 1.0739628080299815, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8335, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4646426551008863, |
|
"grad_norm": 1.0040085132370382, |
|
"learning_rate": 1e-05, |
|
"loss": 0.867, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.467659815198944, |
|
"grad_norm": 1.056602846699296, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8579, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4706769752970017, |
|
"grad_norm": 0.9891380470884372, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8454, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4736941353950594, |
|
"grad_norm": 0.9790395676599171, |
|
"learning_rate": 1e-05, |
|
"loss": 0.896, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4767112954931171, |
|
"grad_norm": 1.027120791419062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.924, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.47972845559117483, |
|
"grad_norm": 1.0959500600764212, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8666, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4827456156892325, |
|
"grad_norm": 1.0401653704511453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8685, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4857627757872902, |
|
"grad_norm": 1.0428336555982345, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8739, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4887799358853479, |
|
"grad_norm": 1.0035589157775742, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9231, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4917970959834056, |
|
"grad_norm": 1.1332138472380069, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8874, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.49481425608146334, |
|
"grad_norm": 0.9889223927200703, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9152, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.49783141617952104, |
|
"grad_norm": 0.9903630601730351, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8619, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.5008485762775787, |
|
"grad_norm": 0.9870755137328874, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8652, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5038657363756365, |
|
"grad_norm": 1.0295502212015513, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8701, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5068828964736941, |
|
"grad_norm": 0.9980789726799827, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8948, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5099000565717519, |
|
"grad_norm": 0.9895924927460445, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8652, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5129172166698095, |
|
"grad_norm": 0.9897691179372796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8872, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5159343767678672, |
|
"grad_norm": 1.0976883085536995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8663, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.518951536865925, |
|
"grad_norm": 0.9855831526257474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8379, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5219686969639826, |
|
"grad_norm": 1.007385512988202, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8806, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5249858570620404, |
|
"grad_norm": 0.9811416651561021, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8812, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.528003017160098, |
|
"grad_norm": 1.008567949000168, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9088, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5310201772581558, |
|
"grad_norm": 1.0735249094278072, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8625, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5340373373562135, |
|
"grad_norm": 1.0099080610350626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8774, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5370544974542711, |
|
"grad_norm": 0.9869119389250737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8602, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5400716575523289, |
|
"grad_norm": 1.0140325571494992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8951, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5430888176503865, |
|
"grad_norm": 0.9746000686910684, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8546, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5461059777484443, |
|
"grad_norm": 0.9912039011517177, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8699, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.549123137846502, |
|
"grad_norm": 0.9866319014883533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8708, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5521402979445597, |
|
"grad_norm": 0.9316538866319553, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8759, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5551574580426174, |
|
"grad_norm": 0.9917975239418116, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8743, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.558174618140675, |
|
"grad_norm": 0.998938334137341, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8353, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5611917782387328, |
|
"grad_norm": 1.0536268087144185, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8892, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5642089383367905, |
|
"grad_norm": 1.0702585591377483, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8459, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5672260984348482, |
|
"grad_norm": 1.0196433385874035, |
|
"learning_rate": 1e-05, |
|
"loss": 0.92, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5702432585329059, |
|
"grad_norm": 1.065268517886477, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8497, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5732604186309636, |
|
"grad_norm": 0.9934866856892721, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8795, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5762775787290213, |
|
"grad_norm": 0.982727769285356, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9056, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.579294738827079, |
|
"grad_norm": 1.0628075311898817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8588, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5823118989251367, |
|
"grad_norm": 0.9227204348296366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8569, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5853290590231944, |
|
"grad_norm": 0.9899338734692382, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8857, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5883462191212521, |
|
"grad_norm": 1.0191415487699054, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8959, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5913633792193098, |
|
"grad_norm": 1.0972049175262022, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8473, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5943805393173676, |
|
"grad_norm": 0.9801443916922342, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8882, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5973976994154252, |
|
"grad_norm": 1.05420177137371, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8617, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.600414859513483, |
|
"grad_norm": 0.9763149595538684, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8317, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.6034320196115406, |
|
"grad_norm": 1.0142128497052438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8801, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6064491797095983, |
|
"grad_norm": 1.1223250391910486, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8651, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.6094663398076561, |
|
"grad_norm": 0.9445148128595584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9247, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.6124834999057137, |
|
"grad_norm": 1.0205891212652056, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8568, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6155006600037715, |
|
"grad_norm": 1.0132942213261507, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8474, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6185178201018292, |
|
"grad_norm": 1.0654107515701503, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8436, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6215349801998868, |
|
"grad_norm": 0.9832515183706407, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8357, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6245521402979446, |
|
"grad_norm": 1.0121196517905966, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8852, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6275693003960022, |
|
"grad_norm": 1.2152715233993958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8215, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.63058646049406, |
|
"grad_norm": 1.0114694608145163, |
|
"learning_rate": 1e-05, |
|
"loss": 0.865, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6336036205921177, |
|
"grad_norm": 1.0466026516397904, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8566, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6366207806901754, |
|
"grad_norm": 0.9459705410503775, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8327, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6396379407882331, |
|
"grad_norm": 0.9749006528453033, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8642, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6426551008862907, |
|
"grad_norm": 0.9590985337067398, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8447, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6456722609843485, |
|
"grad_norm": 1.097321284543017, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8272, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6486894210824062, |
|
"grad_norm": 1.0054521678760522, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8472, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6517065811804639, |
|
"grad_norm": 1.0099516647809557, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8654, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6547237412785216, |
|
"grad_norm": 0.9567498655316358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8566, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6577409013765793, |
|
"grad_norm": 0.9670178838314303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8356, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.660758061474637, |
|
"grad_norm": 0.9767210115729468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9223, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6637752215726948, |
|
"grad_norm": 1.0035943684822244, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8778, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6667923816707524, |
|
"grad_norm": 0.9836721726138078, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8601, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6698095417688101, |
|
"grad_norm": 1.0555276758382468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8687, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6728267018668678, |
|
"grad_norm": 1.0236557982823795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.869, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6758438619649255, |
|
"grad_norm": 1.0490641185205225, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8513, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6788610220629833, |
|
"grad_norm": 1.0112063959996538, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8671, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6818781821610409, |
|
"grad_norm": 0.9797715955787868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8721, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6848953422590986, |
|
"grad_norm": 1.023163793890387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8238, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6879125023571563, |
|
"grad_norm": 0.9901311938407963, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8455, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.690929662455214, |
|
"grad_norm": 1.0333937580724437, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8799, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6939468225532718, |
|
"grad_norm": 0.9781251267252234, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8558, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6969639826513294, |
|
"grad_norm": 0.9991779679723747, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8534, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6999811427493872, |
|
"grad_norm": 1.0389629537546219, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8466, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.7029983028474448, |
|
"grad_norm": 1.0204794940839597, |
|
"learning_rate": 1e-05, |
|
"loss": 0.832, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.7060154629455025, |
|
"grad_norm": 1.0124333073703313, |
|
"learning_rate": 1e-05, |
|
"loss": 0.826, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.7090326230435603, |
|
"grad_norm": 0.9469683819775043, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8746, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7120497831416179, |
|
"grad_norm": 0.9681088394641286, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8777, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7150669432396757, |
|
"grad_norm": 0.9534971580611808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.828, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7180841033377333, |
|
"grad_norm": 0.9231164924550516, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8851, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7211012634357911, |
|
"grad_norm": 0.9564412650063757, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8407, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7241184235338488, |
|
"grad_norm": 0.9387537660650889, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8195, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7271355836319064, |
|
"grad_norm": 0.984184826582449, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8417, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7301527437299642, |
|
"grad_norm": 0.9680344075351693, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8679, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.7331699038280218, |
|
"grad_norm": 0.9508741001956648, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8182, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7361870639260796, |
|
"grad_norm": 0.9991310599384764, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8917, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7392042240241373, |
|
"grad_norm": 1.0472945255103463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9124, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.742221384122195, |
|
"grad_norm": 0.99539173598333, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8638, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7452385442202527, |
|
"grad_norm": 1.0251801530213278, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8413, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7482557043183103, |
|
"grad_norm": 1.0408585876445724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8381, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7512728644163681, |
|
"grad_norm": 0.9731694388232142, |
|
"learning_rate": 1e-05, |
|
"loss": 0.855, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7542900245144258, |
|
"grad_norm": 1.0030115847404013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8596, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7573071846124835, |
|
"grad_norm": 0.9841974420990679, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8742, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7603243447105412, |
|
"grad_norm": 1.055927374179432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8286, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7633415048085989, |
|
"grad_norm": 1.0037342058458762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8624, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7663586649066566, |
|
"grad_norm": 0.9779177922525308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8224, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7693758250047144, |
|
"grad_norm": 1.0485152455162672, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8353, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.772392985102772, |
|
"grad_norm": 0.9427366336442022, |
|
"learning_rate": 1e-05, |
|
"loss": 0.859, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7754101452008297, |
|
"grad_norm": 0.9826256006927524, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8781, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7784273052988874, |
|
"grad_norm": 1.097163176429486, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8603, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7814444653969451, |
|
"grad_norm": 1.0755684776060006, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8803, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7844616254950029, |
|
"grad_norm": 1.060543596503769, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8749, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7874787855930605, |
|
"grad_norm": 1.0054727401343206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8656, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7904959456911183, |
|
"grad_norm": 1.0209085800021502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8821, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7935131057891759, |
|
"grad_norm": 0.9243375424290232, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8622, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7965302658872336, |
|
"grad_norm": 1.0429931807106287, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8196, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7995474259852914, |
|
"grad_norm": 1.0354133838924724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8339, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.802564586083349, |
|
"grad_norm": 0.9226609889690002, |
|
"learning_rate": 1e-05, |
|
"loss": 0.854, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.8055817461814068, |
|
"grad_norm": 0.8608785777678977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8371, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.8085989062794644, |
|
"grad_norm": 1.0583845408082262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8158, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.8116160663775221, |
|
"grad_norm": 0.9488390352752252, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8569, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.8146332264755799, |
|
"grad_norm": 1.040418044582926, |
|
"learning_rate": 1e-05, |
|
"loss": 0.847, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8176503865736375, |
|
"grad_norm": 1.1259258576677653, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8178, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.8206675466716953, |
|
"grad_norm": 0.9418385845633765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8293, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.8236847067697529, |
|
"grad_norm": 0.9498163839438614, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8472, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8267018668678107, |
|
"grad_norm": 0.9864544845499665, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8751, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.8297190269658684, |
|
"grad_norm": 1.0406919672946373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.798, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.832736187063926, |
|
"grad_norm": 1.0806255336515647, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8282, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.8357533471619838, |
|
"grad_norm": 0.9860936672416695, |
|
"learning_rate": 1e-05, |
|
"loss": 0.858, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.8387705072600414, |
|
"grad_norm": 0.9718313802197633, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8466, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.8417876673580992, |
|
"grad_norm": 1.0544122713831847, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8744, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8448048274561569, |
|
"grad_norm": 1.081992381916025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8743, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8478219875542146, |
|
"grad_norm": 0.9532379328140087, |
|
"learning_rate": 1e-05, |
|
"loss": 0.857, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8508391476522723, |
|
"grad_norm": 0.9411887622889539, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8408, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8538563077503301, |
|
"grad_norm": 0.9619536563065685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8501, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8568734678483877, |
|
"grad_norm": 0.9802987123695353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8235, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8598906279464454, |
|
"grad_norm": 1.0032810101495542, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8516, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8629077880445031, |
|
"grad_norm": 0.9916945079271281, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8324, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8659249481425608, |
|
"grad_norm": 0.9901223884800562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8408, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8689421082406186, |
|
"grad_norm": 0.9835034534966476, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8495, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8719592683386762, |
|
"grad_norm": 0.9750801588302662, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8563, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.874976428436734, |
|
"grad_norm": 0.9641976460403898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8421, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8779935885347916, |
|
"grad_norm": 0.9501946334304521, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8654, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8810107486328493, |
|
"grad_norm": 0.9100014151244762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8631, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8840279087309071, |
|
"grad_norm": 0.9944179407196427, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8464, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8870450688289647, |
|
"grad_norm": 0.9853713974171726, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8605, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8900622289270225, |
|
"grad_norm": 0.9254523406321978, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8478, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8930793890250801, |
|
"grad_norm": 0.8959456626105031, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8457, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8960965491231379, |
|
"grad_norm": 1.0037255347844305, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8576, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8991137092211956, |
|
"grad_norm": 0.9449138138513178, |
|
"learning_rate": 1e-05, |
|
"loss": 0.822, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.9021308693192532, |
|
"grad_norm": 0.9597546807553757, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7988, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.905148029417311, |
|
"grad_norm": 0.9624792569398237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8389, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9081651895153686, |
|
"grad_norm": 0.9435277465551941, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8722, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.9111823496134264, |
|
"grad_norm": 0.9681970370411234, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8183, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.9141995097114841, |
|
"grad_norm": 0.9291647991127606, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8076, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.9172166698095418, |
|
"grad_norm": 1.0177809366807526, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8645, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9202338299075995, |
|
"grad_norm": 0.9855696615573117, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8546, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.9232509900056571, |
|
"grad_norm": 0.9745085927690291, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8442, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.9262681501037149, |
|
"grad_norm": 0.9356489869997006, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8571, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9292853102017726, |
|
"grad_norm": 1.0346965656565765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.823, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.9323024702998303, |
|
"grad_norm": 1.123418692750864, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8717, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.935319630397888, |
|
"grad_norm": 1.0319249558634143, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8433, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9383367904959456, |
|
"grad_norm": 0.9462383670551003, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8607, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.9413539505940034, |
|
"grad_norm": 0.9441101670098899, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8186, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9443711106920611, |
|
"grad_norm": 1.0504882586379691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.845, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9473882707901188, |
|
"grad_norm": 0.9048369513823565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8174, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9504054308881765, |
|
"grad_norm": 0.9019137471005251, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8556, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.9534225909862342, |
|
"grad_norm": 0.8875508030205819, |
|
"learning_rate": 1e-05, |
|
"loss": 0.861, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9564397510842919, |
|
"grad_norm": 0.9993353590451418, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8669, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9594569111823497, |
|
"grad_norm": 0.9488161087038608, |
|
"learning_rate": 1e-05, |
|
"loss": 0.878, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9624740712804073, |
|
"grad_norm": 0.9365808217541111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8222, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.965491231378465, |
|
"grad_norm": 0.9799964559542612, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8468, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9685083914765227, |
|
"grad_norm": 1.0661880405150197, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8381, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9715255515745804, |
|
"grad_norm": 1.0265204942074564, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8467, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9745427116726382, |
|
"grad_norm": 1.067821792666605, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8351, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9775598717706958, |
|
"grad_norm": 1.1466057402506813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8551, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9805770318687536, |
|
"grad_norm": 0.9899578843967808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8248, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9835941919668112, |
|
"grad_norm": 0.8886227742234571, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8527, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9866113520648689, |
|
"grad_norm": 0.9780662079081274, |
|
"learning_rate": 1e-05, |
|
"loss": 0.862, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9896285121629267, |
|
"grad_norm": 1.0240427016912055, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8646, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9926456722609843, |
|
"grad_norm": 0.9797020588373134, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8764, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9956628323590421, |
|
"grad_norm": 0.9980560765723044, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8767, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9986799924570997, |
|
"grad_norm": 0.9334505105444044, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8189, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9986799924570997, |
|
"step": 331, |
|
"total_flos": 437602977054720.0, |
|
"train_loss": 0.8955676523220143, |
|
"train_runtime": 47565.9142, |
|
"train_samples_per_second": 0.892, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 331, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 437602977054720.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|