upload log loss and training config
Browse files- loss_log.txt +647 -0
- training_config_phase3.yaml +93 -0
loss_log.txt
ADDED
@@ -0,0 +1,647 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Step 1 | loss:1.221569538116455 lr:1.875e-06 tokens_per_second_per_gpu:776.4896475342558
|
2 |
+
Step 2 | loss:1.1539162397384644 lr:3.75e-06 tokens_per_second_per_gpu:2626.745768632657
|
3 |
+
Step 3 | loss:1.1532124280929565 lr:5.625e-06 tokens_per_second_per_gpu:3060.6355265050215
|
4 |
+
Step 4 | loss:0.9598733186721802 lr:7.5e-06 tokens_per_second_per_gpu:3063.150374667415
|
5 |
+
Step 5 | loss:1.020167589187622 lr:9.375000000000001e-06 tokens_per_second_per_gpu:2611.9293671879514
|
6 |
+
Step 6 | loss:1.1163296699523926 lr:1.125e-05 tokens_per_second_per_gpu:2225.818982541921
|
7 |
+
Step 7 | loss:1.077873706817627 lr:1.3125e-05 tokens_per_second_per_gpu:2603.7906186749615
|
8 |
+
Step 8 | loss:1.0882045030593872 lr:1.5e-05 tokens_per_second_per_gpu:2991.735662820424
|
9 |
+
Step 9 | loss:1.0246179103851318 lr:1.4999909358220728e-05 tokens_per_second_per_gpu:2410.161469741817
|
10 |
+
Step 10 | loss:0.8495532870292664 lr:1.4999637435073829e-05 tokens_per_second_per_gpu:3242.2852700467556
|
11 |
+
Step 11 | loss:0.9978005886077881 lr:1.4999184237131991e-05 tokens_per_second_per_gpu:3383.1990451125507
|
12 |
+
Step 12 | loss:0.9687494039535522 lr:1.499854977534953e-05 tokens_per_second_per_gpu:1902.8084244378674
|
13 |
+
Step 13 | loss:1.0104714632034302 lr:1.499773406506211e-05 tokens_per_second_per_gpu:2933.4949978737454
|
14 |
+
Step 14 | loss:0.7931515574455261 lr:1.4996737125986377e-05 tokens_per_second_per_gpu:4085.888567800566
|
15 |
+
Step 15 | loss:0.9825239777565002 lr:1.499555898221949e-05 tokens_per_second_per_gpu:2138.2084216687877
|
16 |
+
Step 16 | loss:0.8750104904174805 lr:1.4994199662238527e-05 tokens_per_second_per_gpu:3703.7749114369176
|
17 |
+
Step 17 | loss:0.8804910182952881 lr:1.4992659198899803e-05 tokens_per_second_per_gpu:2835.028146310613
|
18 |
+
Step 18 | loss:0.9359980821609497 lr:1.4990937629438073e-05 tokens_per_second_per_gpu:2644.538015822797
|
19 |
+
Step 19 | loss:0.8891004323959351 lr:1.4989034995465637e-05 tokens_per_second_per_gpu:2837.833590827904
|
20 |
+
Step 20 | loss:1.0624065399169922 lr:1.4986951342971331e-05 tokens_per_second_per_gpu:2155.890103956241
|
21 |
+
Step 21 | loss:0.9780190587043762 lr:1.498468672231941e-05 tokens_per_second_per_gpu:2565.782185800595
|
22 |
+
Step 22 | loss:1.071376919746399 lr:1.4982241188248342e-05 tokens_per_second_per_gpu:2007.8092025468436
|
23 |
+
Step 23 | loss:0.7347007989883423 lr:1.4979614799869475e-05 tokens_per_second_per_gpu:4224.400969539578
|
24 |
+
Step 24 | loss:0.9955159425735474 lr:1.4976807620665614e-05 tokens_per_second_per_gpu:2596.19245051982
|
25 |
+
Step 25 | loss:0.8271186947822571 lr:1.4973819718489483e-05 tokens_per_second_per_gpu:3168.5746698450243
|
26 |
+
Step 26 | loss:0.8975570201873779 lr:1.4970651165562086e-05 tokens_per_second_per_gpu:2739.229439199024
|
27 |
+
Step 27 | loss:1.089482307434082 lr:1.4967302038470966e-05 tokens_per_second_per_gpu:2347.5008041359756
|
28 |
+
Step 28 | loss:1.2102779150009155 lr:1.4963772418168344e-05 tokens_per_second_per_gpu:1994.4357047693568
|
29 |
+
Step 29 | loss:0.860817551612854 lr:1.4960062389969173e-05 tokens_per_second_per_gpu:2934.9487425459815
|
30 |
+
Step 30 | loss:0.9468755722045898 lr:1.4956172043549067e-05 tokens_per_second_per_gpu:2541.4215466420164
|
31 |
+
Step 31 | loss:1.0479042530059814 lr:1.4952101472942138e-05 tokens_per_second_per_gpu:2074.585452185979
|
32 |
+
Step 32 | loss:0.904244065284729 lr:1.4947850776538721e-05 tokens_per_second_per_gpu:2358.220020036641
|
33 |
+
Step 33 | loss:0.9157129526138306 lr:1.4943420057083003e-05 tokens_per_second_per_gpu:2490.175938191269
|
34 |
+
Step 34 | loss:0.8834834694862366 lr:1.4938809421670526e-05 tokens_per_second_per_gpu:3300.2406807038383
|
35 |
+
Step 35 | loss:0.9919298887252808 lr:1.493401898174561e-05 tokens_per_second_per_gpu:1849.696643954453
|
36 |
+
Step 36 | loss:0.9854268431663513 lr:1.4929048853098655e-05 tokens_per_second_per_gpu:2373.8255744270264
|
37 |
+
Step 37 | loss:0.8895089626312256 lr:1.492389915586334e-05 tokens_per_second_per_gpu:3564.264325106374
|
38 |
+
Step 38 | loss:1.0618623495101929 lr:1.491857001451373e-05 tokens_per_second_per_gpu:2546.3652101537164
|
39 |
+
Step 39 | loss:0.8307267427444458 lr:1.4913061557861243e-05 tokens_per_second_per_gpu:3101.1759165612048
|
40 |
+
Step 40 | loss:0.9268414974212646 lr:1.4907373919051571e-05 tokens_per_second_per_gpu:2604.8350849059198
|
41 |
+
Step 41 | loss:0.9643033146858215 lr:1.4901507235561428e-05 tokens_per_second_per_gpu:2546.930067535709
|
42 |
+
Step 42 | loss:0.8905935287475586 lr:1.4895461649195253e-05 tokens_per_second_per_gpu:3042.225207721806
|
43 |
+
Step 43 | loss:0.8909382224082947 lr:1.4889237306081767e-05 tokens_per_second_per_gpu:2220.37359285759
|
44 |
+
Step 44 | loss:1.0663670301437378 lr:1.4882834356670442e-05 tokens_per_second_per_gpu:1992.5821375012035
|
45 |
+
Step 45 | loss:0.7980119585990906 lr:1.4876252955727876e-05 tokens_per_second_per_gpu:2536.9415861871867
|
46 |
+
Step 46 | loss:0.9315576553344727 lr:1.486949326233404e-05 tokens_per_second_per_gpu:2015.857266504341
|
47 |
+
Step 47 | loss:0.818637490272522 lr:1.4862555439878434e-05 tokens_per_second_per_gpu:2764.9466270905377
|
48 |
+
Step 48 | loss:0.9148813486099243 lr:1.4855439656056144e-05 tokens_per_second_per_gpu:2458.8630962085804
|
49 |
+
Step 49 | loss:0.8426403403282166 lr:1.484814608286379e-05 tokens_per_second_per_gpu:3836.9937286372974
|
50 |
+
Step 50 | loss:0.9631166458129883 lr:1.4840674896595355e-05 tokens_per_second_per_gpu:2344.5407425233243
|
51 |
+
Step 51 | loss:0.9489616751670837 lr:1.4833026277837935e-05 tokens_per_second_per_gpu:2590.65407572909
|
52 |
+
Step 52 | loss:0.7524951696395874 lr:1.4825200411467377e-05 tokens_per_second_per_gpu:3316.8777964828005
|
53 |
+
Step 53 | loss:0.8084532618522644 lr:1.4817197486643802e-05 tokens_per_second_per_gpu:3023.881910604271
|
54 |
+
Step 54 | loss:0.8935003876686096 lr:1.480901769680703e-05 tokens_per_second_per_gpu:2796.6835981724653
|
55 |
+
Step 55 | loss:0.9641337990760803 lr:1.4800661239671925e-05 tokens_per_second_per_gpu:2619.6033697751586
|
56 |
+
Step 56 | loss:0.9214082360267639 lr:1.4792128317223583e-05 tokens_per_second_per_gpu:2223.910054733981
|
57 |
+
Step 57 | loss:0.8928937315940857 lr:1.4783419135712481e-05 tokens_per_second_per_gpu:2822.2559809661334
|
58 |
+
Step 58 | loss:0.9725896716117859 lr:1.4774533905649475e-05 tokens_per_second_per_gpu:3135.307257349326
|
59 |
+
Step 59 | loss:1.1065843105316162 lr:1.4765472841800714e-05 tokens_per_second_per_gpu:2383.5806852300307
|
60 |
+
Step 60 | loss:0.8419440984725952 lr:1.4756236163182447e-05 tokens_per_second_per_gpu:3695.211899955055
|
61 |
+
Step 61 | loss:0.8362283110618591 lr:1.4746824093055744e-05 tokens_per_second_per_gpu:3351.0990224903917
|
62 |
+
Step 62 | loss:0.9842981100082397 lr:1.4737236858921075e-05 tokens_per_second_per_gpu:3464.31229640519
|
63 |
+
Step 63 | loss:0.9068053364753723 lr:1.4727474692512831e-05 tokens_per_second_per_gpu:2966.1821865894576
|
64 |
+
Step 64 | loss:0.834957480430603 lr:1.4717537829793716e-05 tokens_per_second_per_gpu:2601.8903601590896
|
65 |
+
Step 65 | loss:0.7882446050643921 lr:1.4707426510949038e-05 tokens_per_second_per_gpu:3476.6321229392124
|
66 |
+
Step 66 | loss:0.799394965171814 lr:1.4697140980380914e-05 tokens_per_second_per_gpu:3120.9911279253056
|
67 |
+
Step 67 | loss:1.0695223808288574 lr:1.4686681486702354e-05 tokens_per_second_per_gpu:2008.6733315623778
|
68 |
+
Step 68 | loss:0.8479028940200806 lr:1.4676048282731259e-05 tokens_per_second_per_gpu:3461.3166586382476
|
69 |
+
Step 69 | loss:0.7161128520965576 lr:1.4665241625484297e-05 tokens_per_second_per_gpu:3323.2151026335396
|
70 |
+
Step 70 | loss:0.9997295141220093 lr:1.4654261776170711e-05 tokens_per_second_per_gpu:3499.3840144689457
|
71 |
+
Step 71 | loss:0.9570843577384949 lr:1.4643109000185986e-05 tokens_per_second_per_gpu:2803.3989631359914
|
72 |
+
Step 72 | loss:0.9544796347618103 lr:1.4631783567105444e-05 tokens_per_second_per_gpu:2907.0792063075314
|
73 |
+
Step 73 | loss:0.989665687084198 lr:1.4620285750677729e-05 tokens_per_second_per_gpu:2315.0531647541266
|
74 |
+
Step 74 | loss:0.8606690168380737 lr:1.460861582881818e-05 tokens_per_second_per_gpu:1761.639594790866
|
75 |
+
Step 75 | loss:0.8831307291984558 lr:1.4596774083602131e-05 tokens_per_second_per_gpu:2578.250829971227
|
76 |
+
Step 76 | loss:0.8336813449859619 lr:1.4584760801258072e-05 tokens_per_second_per_gpu:3966.441079179973
|
77 |
+
Step 77 | loss:1.0065804719924927 lr:1.4572576272160752e-05 tokens_per_second_per_gpu:2010.6956576896216
|
78 |
+
Step 78 | loss:0.9828115105628967 lr:1.4560220790824135e-05 tokens_per_second_per_gpu:2676.9438755928054
|
79 |
+
Step 79 | loss:0.8407449126243591 lr:1.4547694655894313e-05 tokens_per_second_per_gpu:3043.8163508680937
|
80 |
+
Step 80 | loss:0.8303096890449524 lr:1.4534998170142256e-05 tokens_per_second_per_gpu:2782.5148196337404
|
81 |
+
Step 81 | loss:0.7780717611312866 lr:1.4522131640456515e-05 tokens_per_second_per_gpu:3300.6096232500304
|
82 |
+
Step 82 | loss:0.7947230935096741 lr:1.4509095377835793e-05 tokens_per_second_per_gpu:3028.0607479145733
|
83 |
+
Step 83 | loss:0.8650649189949036 lr:1.4495889697381437e-05 tokens_per_second_per_gpu:3024.391338030527
|
84 |
+
Step 84 | loss:0.9109087586402893 lr:1.4482514918289811e-05 tokens_per_second_per_gpu:2882.55956475511
|
85 |
+
Step 85 | loss:0.8643304705619812 lr:1.4468971363844589e-05 tokens_per_second_per_gpu:2820.9174435328187
|
86 |
+
Step 86 | loss:0.8797863125801086 lr:1.4455259361408933e-05 tokens_per_second_per_gpu:3187.6221907265112
|
87 |
+
Step 87 | loss:1.0112887620925903 lr:1.4441379242417597e-05 tokens_per_second_per_gpu:2276.054803118254
|
88 |
+
Step 88 | loss:0.9944532513618469 lr:1.4427331342368889e-05 tokens_per_second_per_gpu:2373.6844209284745
|
89 |
+
Step 89 | loss:1.0218238830566406 lr:1.4413116000816587e-05 tokens_per_second_per_gpu:2183.503625916952
|
90 |
+
Step 90 | loss:0.9786317944526672 lr:1.439873356136172e-05 tokens_per_second_per_gpu:2308.142962546143
|
91 |
+
Step 91 | loss:1.0663024187088013 lr:1.4384184371644257e-05 tokens_per_second_per_gpu:2280.0466053008804
|
92 |
+
Step 92 | loss:0.9506876468658447 lr:1.436946878333472e-05 tokens_per_second_per_gpu:2214.0901274233033
|
93 |
+
Step 93 | loss:1.040751338005066 lr:1.435458715212567e-05 tokens_per_second_per_gpu:2776.228928077468
|
94 |
+
Step 94 | loss:0.8327547311782837 lr:1.4339539837723114e-05 tokens_per_second_per_gpu:2611.7891324599004
|
95 |
+
Step 95 | loss:0.8437411785125732 lr:1.4324327203837813e-05 tokens_per_second_per_gpu:2564.3074569788087
|
96 |
+
Step 96 | loss:0.8928688764572144 lr:1.4308949618176488e-05 tokens_per_second_per_gpu:3033.7427038894925
|
97 |
+
Step 97 | loss:0.8910201787948608 lr:1.4293407452432934e-05 tokens_per_second_per_gpu:2926.1872538932953
|
98 |
+
Step 98 | loss:1.1611385345458984 lr:1.4277701082279029e-05 tokens_per_second_per_gpu:2118.714336752679
|
99 |
+
Step 99 | loss:0.7676631212234497 lr:1.4261830887355666e-05 tokens_per_second_per_gpu:3119.234051386954
|
100 |
+
Step 100 | loss:0.9407196640968323 lr:1.4245797251263566e-05 tokens_per_second_per_gpu:3431.829631389003
|
101 |
+
Step 101 | loss:1.0271466970443726 lr:1.422960056155401e-05 tokens_per_second_per_gpu:2297.2551692634543
|
102 |
+
Step 102 | loss:0.7903147339820862 lr:1.4213241209719468e-05 tokens_per_second_per_gpu:3183.486695789974
|
103 |
+
Step 103 | loss:0.9327342510223389 lr:1.4196719591184145e-05 tokens_per_second_per_gpu:2308.249818645735
|
104 |
+
Step 104 | loss:1.1140373945236206 lr:1.4180036105294413e-05 tokens_per_second_per_gpu:2201.0614619912385
|
105 |
+
Step 105 | loss:0.8893833756446838 lr:1.4163191155309168e-05 tokens_per_second_per_gpu:2472.4719880913913
|
106 |
+
Step 106 | loss:0.8200833201408386 lr:1.4146185148390068e-05 tokens_per_second_per_gpu:3466.1216698989483
|
107 |
+
Step 107 | loss:0.9029530882835388 lr:1.4129018495591707e-05 tokens_per_second_per_gpu:2665.603306051977
|
108 |
+
Step 108 | loss:0.906825065612793 lr:1.4111691611851679e-05 tokens_per_second_per_gpu:2326.7020632435033
|
109 |
+
Step 109 | loss:0.852420449256897 lr:1.409420491598053e-05 tokens_per_second_per_gpu:2829.945359127956
|
110 |
+
Step 110 | loss:0.8402610421180725 lr:1.4076558830651655e-05 tokens_per_second_per_gpu:2424.5583345317846
|
111 |
+
Step 111 | loss:0.8761624097824097 lr:1.4058753782391077e-05 tokens_per_second_per_gpu:2714.916085371875
|
112 |
+
Step 112 | loss:1.0591604709625244 lr:1.4040790201567126e-05 tokens_per_second_per_gpu:1954.819773307979
|
113 |
+
Step 113 | loss:0.871858537197113 lr:1.4022668522380051e-05 tokens_per_second_per_gpu:1990.9860968133723
|
114 |
+
Step 114 | loss:0.9173691868782043 lr:1.4004389182851519e-05 tokens_per_second_per_gpu:3093.1846380992297
|
115 |
+
Step 115 | loss:0.7526230216026306 lr:1.3985952624814021e-05 tokens_per_second_per_gpu:3434.845480530211
|
116 |
+
Step 116 | loss:0.9190866947174072 lr:1.396735929390021e-05 tokens_per_second_per_gpu:2757.0267437124444
|
117 |
+
Step 117 | loss:0.9001188278198242 lr:1.3948609639532107e-05 tokens_per_second_per_gpu:3245.090811839929
|
118 |
+
Step 118 | loss:0.853203296661377 lr:1.3929704114910252e-05 tokens_per_second_per_gpu:2276.739974081572
|
119 |
+
Step 119 | loss:0.855078399181366 lr:1.3910643177002755e-05 tokens_per_second_per_gpu:2723.094778935103
|
120 |
+
Step 120 | loss:0.9485069513320923 lr:1.3891427286534234e-05 tokens_per_second_per_gpu:2207.578804777831
|
121 |
+
Step 121 | loss:0.8687245845794678 lr:1.3872056907974688e-05 tokens_per_second_per_gpu:2167.7910866884936
|
122 |
+
Step 122 | loss:0.7418152689933777 lr:1.3852532509528272e-05 tokens_per_second_per_gpu:3405.231105064688
|
123 |
+
Step 123 | loss:0.7311168909072876 lr:1.3832854563121978e-05 tokens_per_second_per_gpu:3483.8702435947193
|
124 |
+
Step 124 | loss:0.9829015731811523 lr:1.3813023544394226e-05 tokens_per_second_per_gpu:2585.438509633648
|
125 |
+
Step 125 | loss:0.8702593445777893 lr:1.3793039932683365e-05 tokens_per_second_per_gpu:2956.5308034715326
|
126 |
+
Step 126 | loss:0.927323579788208 lr:1.3772904211016098e-05 tokens_per_second_per_gpu:2484.745269269927
|
127 |
+
Step 127 | loss:0.947740375995636 lr:1.3752616866095793e-05 tokens_per_second_per_gpu:2078.82632153693
|
128 |
+
Step 128 | loss:0.9461579918861389 lr:1.373217838829073e-05 tokens_per_second_per_gpu:2705.8367865703003
|
129 |
+
Step 129 | loss:0.8957231044769287 lr:1.3711589271622237e-05 tokens_per_second_per_gpu:2992.601213940647
|
130 |
+
Step 130 | loss:0.8435050249099731 lr:1.369085001375276e-05 tokens_per_second_per_gpu:2422.468716757249
|
131 |
+
Step 131 | loss:0.8020814657211304 lr:1.3669961115973831e-05 tokens_per_second_per_gpu:2716.525699767116
|
132 |
+
Step 132 | loss:1.0148298740386963 lr:1.3648923083193945e-05 tokens_per_second_per_gpu:3222.4037213065576
|
133 |
+
Step 133 | loss:0.8540488481521606 lr:1.3627736423926358e-05 tokens_per_second_per_gpu:3156.5818359892814
|
134 |
+
Step 134 | loss:1.0741864442825317 lr:1.360640165027681e-05 tokens_per_second_per_gpu:3006.5484287697604
|
135 |
+
Step 135 | loss:0.9130982756614685 lr:1.3584919277931113e-05 tokens_per_second_per_gpu:3060.7231378677075
|
136 |
+
Step 136 | loss:0.9075719118118286 lr:1.3563289826142734e-05 tokens_per_second_per_gpu:2994.295564077511
|
137 |
+
Step 137 | loss:0.7380925416946411 lr:1.3541513817720196e-05 tokens_per_second_per_gpu:3380.35958901387
|
138 |
+
Step 138 | loss:0.7887682914733887 lr:1.351959177901448e-05 tokens_per_second_per_gpu:3411.1643530753004
|
139 |
+
Step 139 | loss:1.0572377443313599 lr:1.3497524239906268e-05 tokens_per_second_per_gpu:1846.5574752328034
|
140 |
+
Step 140 | loss:0.9169989824295044 lr:1.3475311733793171e-05 tokens_per_second_per_gpu:2659.9050056281694
|
141 |
+
Step 141 | loss:0.7694222331047058 lr:1.3452954797576803e-05 tokens_per_second_per_gpu:2248.619441667407
|
142 |
+
Step 142 | loss:0.9348461627960205 lr:1.343045397164983e-05 tokens_per_second_per_gpu:2748.0654635407595
|
143 |
+
Step 143 | loss:1.0059735774993896 lr:1.3407809799882887e-05 tokens_per_second_per_gpu:2571.4433195970273
|
144 |
+
Step 144 | loss:0.9550279974937439 lr:1.338502282961145e-05 tokens_per_second_per_gpu:2268.0708636948866
|
145 |
+
Step 145 | loss:0.975806713104248 lr:1.3362093611622587e-05 tokens_per_second_per_gpu:2522.5177215146223
|
146 |
+
Step 146 | loss:0.7611340284347534 lr:1.333902270014167e-05 tokens_per_second_per_gpu:2906.4275538550883
|
147 |
+
Step 147 | loss:0.9788836240768433 lr:1.3315810652818951e-05 tokens_per_second_per_gpu:3477.341552657733
|
148 |
+
Step 148 | loss:0.8796938061714172 lr:1.3292458030716105e-05 tokens_per_second_per_gpu:2631.896237909145
|
149 |
+
Step 149 | loss:0.9417287707328796 lr:1.3268965398292655e-05 tokens_per_second_per_gpu:3192.0815935061937
|
150 |
+
Step 150 | loss:1.0355844497680664 lr:1.3245333323392335e-05 tokens_per_second_per_gpu:2245.145788317112
|
151 |
+
Step 151 | loss:0.8991612195968628 lr:1.3221562377229365e-05 tokens_per_second_per_gpu:2578.650347755809
|
152 |
+
Step 152 | loss:0.9595103859901428 lr:1.3197653134374637e-05 tokens_per_second_per_gpu:2434.651933495438
|
153 |
+
Step 153 | loss:0.9001013040542603 lr:1.3173606172741836e-05 tokens_per_second_per_gpu:1842.5350349006453
|
154 |
+
Step 154 | loss:0.8173960447311401 lr:1.3149422073573466e-05 tokens_per_second_per_gpu:2488.3925420974924
|
155 |
+
Step 155 | loss:0.8045638203620911 lr:1.3125101421426801e-05 tokens_per_second_per_gpu:3302.756369196822
|
156 |
+
Step 156 | loss:0.7967942357063293 lr:1.3100644804159756e-05 tokens_per_second_per_gpu:2950.507653722916
|
157 |
+
Step 157 | loss:0.9466549754142761 lr:1.307605281291668e-05 tokens_per_second_per_gpu:2554.2876959141595
|
158 |
+
Step 158 | loss:0.7874048352241516 lr:1.3051326042114062e-05 tokens_per_second_per_gpu:2503.830061713888
|
159 |
+
Step 159 | loss:0.7649244070053101 lr:1.3026465089426172e-05 tokens_per_second_per_gpu:3764.466272015238
|
160 |
+
Step 160 | loss:0.9769793152809143 lr:1.3001470555770603e-05 tokens_per_second_per_gpu:2134.8838608368396
|
161 |
+
Step 161 | loss:0.6409870386123657 lr:1.2976343045293758e-05 tokens_per_second_per_gpu:3831.6710466709137
|
162 |
+
Step 162 | loss:0.9095283150672913 lr:1.2951083165356238e-05 tokens_per_second_per_gpu:2975.461143498561
|
163 |
+
Step 163 | loss:0.987684428691864 lr:1.2925691526518168e-05 tokens_per_second_per_gpu:2419.6660438127183
|
164 |
+
Step 164 | loss:0.8870441317558289 lr:1.2900168742524431e-05 tokens_per_second_per_gpu:2720.042439707338
|
165 |
+
Step 165 | loss:1.0281869173049927 lr:1.2874515430289843e-05 tokens_per_second_per_gpu:2667.0587877368976
|
166 |
+
Step 166 | loss:0.9013745784759521 lr:1.2848732209884236e-05 tokens_per_second_per_gpu:2421.311690375509
|
167 |
+
Step 167 | loss:0.8012135028839111 lr:1.2822819704517467e-05 tokens_per_second_per_gpu:2909.767624354188
|
168 |
+
Step 168 | loss:0.8641454577445984 lr:1.2796778540524362e-05 tokens_per_second_per_gpu:3065.6247029851916
|
169 |
+
Step 169 | loss:0.8661941885948181 lr:1.277060934734957e-05 tokens_per_second_per_gpu:2696.294949936513
|
170 |
+
Step 170 | loss:0.9361519813537598 lr:1.2744312757532357e-05 tokens_per_second_per_gpu:3128.4635750227962
|
171 |
+
Step 171 | loss:0.9445534944534302 lr:1.2717889406691301e-05 tokens_per_second_per_gpu:2804.1901845669454
|
172 |
+
Step 172 | loss:0.8409781455993652 lr:1.2691339933508946e-05 tokens_per_second_per_gpu:2300.349952366563
|
173 |
+
Step 173 | loss:0.7127365469932556 lr:1.266466497971636e-05 tokens_per_second_per_gpu:3292.686682785377
|
174 |
+
Step 174 | loss:0.9441195726394653 lr:1.263786519007761e-05 tokens_per_second_per_gpu:2457.8393080536093
|
175 |
+
Step 175 | loss:0.8693594336509705 lr:1.2610941212374196e-05 tokens_per_second_per_gpu:2543.008326528785
|
176 |
+
Step 176 | loss:0.8825166821479797 lr:1.2583893697389384e-05 tokens_per_second_per_gpu:2725.6009677627276
|
177 |
+
Step 177 | loss:0.8768410682678223 lr:1.2556723298892478e-05 tokens_per_second_per_gpu:3557.1565324485255
|
178 |
+
Step 178 | loss:1.1754149198532104 lr:1.252943067362301e-05 tokens_per_second_per_gpu:2417.834316522929
|
179 |
+
Step 179 | loss:0.8500137329101562 lr:1.250201648127488e-05 tokens_per_second_per_gpu:2250.918842990208
|
180 |
+
Step 180 | loss:1.1264166831970215 lr:1.2474481384480403e-05 tokens_per_second_per_gpu:2391.435699419417
|
181 |
+
Step 181 | loss:0.7425944805145264 lr:1.2446826048794285e-05 tokens_per_second_per_gpu:3704.124439704411
|
182 |
+
Step 182 | loss:0.9915411472320557 lr:1.2419051142677552e-05 tokens_per_second_per_gpu:2500.2454386416653
|
183 |
+
Step 183 | loss:1.094246506690979 lr:1.2391157337481379e-05 tokens_per_second_per_gpu:2292.6884961819756
|
184 |
+
Step 184 | loss:0.8413046002388 lr:1.2363145307430866e-05 tokens_per_second_per_gpu:3114.0205371534985
|
185 |
+
Step 185 | loss:0.8624469041824341 lr:1.2335015729608747e-05 tokens_per_second_per_gpu:2642.208561896737
|
186 |
+
Step 186 | loss:0.8683147430419922 lr:1.2306769283939017e-05 tokens_per_second_per_gpu:3871.095670001607
|
187 |
+
Step 187 | loss:0.9512786865234375 lr:1.2278406653170503e-05 tokens_per_second_per_gpu:2628.580728112808
|
188 |
+
Step 188 | loss:0.8004306554794312 lr:1.2249928522860353e-05 tokens_per_second_per_gpu:3667.905766206227
|
189 |
+
Step 189 | loss:0.8889120221138 lr:1.2221335581357475e-05 tokens_per_second_per_gpu:2248.7450084836746
|
190 |
+
Step 190 | loss:0.8228364586830139 lr:1.2192628519785897e-05 tokens_per_second_per_gpu:2514.6297054243464
|
191 |
+
Step 191 | loss:0.944223165512085 lr:1.2163808032028051e-05 tokens_per_second_per_gpu:2934.22518036366
|
192 |
+
Step 192 | loss:0.7095787525177002 lr:1.213487481470802e-05 tokens_per_second_per_gpu:3819.992979162668
|
193 |
+
Step 193 | loss:0.8380393981933594 lr:1.2105829567174679e-05 tokens_per_second_per_gpu:2800.2528555689605
|
194 |
+
Step 194 | loss:1.0584007501602173 lr:1.2076672991484807e-05 tokens_per_second_per_gpu:2438.3413375541963
|
195 |
+
Step 195 | loss:1.125288486480713 lr:1.2047405792386106e-05 tokens_per_second_per_gpu:1937.2355566529484
|
196 |
+
Step 196 | loss:0.9059362411499023 lr:1.2018028677300183e-05 tokens_per_second_per_gpu:2153.221698146553
|
197 |
+
Step 197 | loss:0.8372734189033508 lr:1.1988542356305431e-05 tokens_per_second_per_gpu:2404.6834628720703
|
198 |
+
Step 198 | loss:0.8674279451370239 lr:1.1958947542119874e-05 tokens_per_second_per_gpu:2321.484011546196
|
199 |
+
Step 199 | loss:0.8657681941986084 lr:1.1929244950083946e-05 tokens_per_second_per_gpu:2348.2580286455236
|
200 |
+
Step 200 | loss:0.9470924139022827 lr:1.1899435298143186e-05 tokens_per_second_per_gpu:2393.2309464100285
|
201 |
+
Step 201 | loss:0.892507791519165 lr:1.1869519306830906e-05 tokens_per_second_per_gpu:3206.4545463886843
|
202 |
+
Step 202 | loss:0.9081307649612427 lr:1.1839497699250747e-05 tokens_per_second_per_gpu:3404.974687265603
|
203 |
+
Step 203 | loss:0.8427631855010986 lr:1.1809371201059226e-05 tokens_per_second_per_gpu:3047.938780516295
|
204 |
+
Step 204 | loss:0.7953866720199585 lr:1.1779140540448186e-05 tokens_per_second_per_gpu:2537.5554252587053
|
205 |
+
Step 205 | loss:0.8499541878700256 lr:1.1748806448127187e-05 tokens_per_second_per_gpu:3022.9311446708475
|
206 |
+
Step 206 | loss:0.9877169132232666 lr:1.1718369657305856e-05 tokens_per_second_per_gpu:1923.481770749864
|
207 |
+
Step 207 | loss:0.9637613892555237 lr:1.1687830903676162e-05 tokens_per_second_per_gpu:1922.9230396108537
|
208 |
+
Step 208 | loss:0.8312227129936218 lr:1.1657190925394627e-05 tokens_per_second_per_gpu:2981.907027352506
|
209 |
+
Step 209 | loss:0.7979338765144348 lr:1.1626450463064492e-05 tokens_per_second_per_gpu:2995.4292103615694
|
210 |
+
Step 210 | loss:0.8698200583457947 lr:1.1595610259717812e-05 tokens_per_second_per_gpu:2902.551497227684
|
211 |
+
Step 211 | loss:0.8625684380531311 lr:1.1564671060797493e-05 tokens_per_second_per_gpu:2660.138583168492
|
212 |
+
Step 212 | loss:0.7270165681838989 lr:1.1533633614139274e-05 tokens_per_second_per_gpu:2835.768233651723
|
213 |
+
Step 213 | loss:0.9477829337120056 lr:1.1502498669953669e-05 tokens_per_second_per_gpu:2406.7431958408943
|
214 |
+
Step 214 | loss:0.7979986071586609 lr:1.1471266980807803e-05 tokens_per_second_per_gpu:2528.5572177691274
|
215 |
+
Step 215 | loss:0.8460088968276978 lr:1.143993930160724e-05 tokens_per_second_per_gpu:2736.724211562585
|
216 |
+
Step 216 | loss:0.8999170660972595 lr:1.1408516389577742e-05 tokens_per_second_per_gpu:2756.4852763216604
|
217 |
+
Step 217 | loss:0.9149824380874634 lr:1.1376999004246942e-05 tokens_per_second_per_gpu:2998.14918902869
|
218 |
+
Step 218 | loss:1.035408616065979 lr:1.1345387907426016e-05 tokens_per_second_per_gpu:2247.566429188033
|
219 |
+
Step 219 | loss:0.9593961238861084 lr:1.1313683863191246e-05 tokens_per_second_per_gpu:2672.649906422322
|
220 |
+
Step 220 | loss:0.9525635242462158 lr:1.1281887637865556e-05 tokens_per_second_per_gpu:2702.378081627573
|
221 |
+
Step 221 | loss:0.8132772445678711 lr:1.125e-05 tokens_per_second_per_gpu:3544.073171959475
|
222 |
+
Step 222 | loss:0.8736097812652588 lr:1.1218021720355171e-05 tokens_per_second_per_gpu:3763.371300035145
|
223 |
+
Step 223 | loss:0.8046478033065796 lr:1.1185953571882582e-05 tokens_per_second_per_gpu:2600.942499066943
|
224 |
+
Step 224 | loss:0.8822525143623352 lr:1.1153796329705974e-05 tokens_per_second_per_gpu:3243.5748920774936
|
225 |
+
Step 225 | loss:1.209902048110962 lr:1.1121550771102586e-05 tokens_per_second_per_gpu:1669.9666501667505
|
226 |
+
Step 226 | loss:0.832476794719696 lr:1.108921767548437e-05 tokens_per_second_per_gpu:2864.4252757404315
|
227 |
+
Step 227 | loss:0.8101415634155273 lr:1.1056797824379137e-05 tokens_per_second_per_gpu:2598.617402658873
|
228 |
+
Step 228 | loss:0.9285197854042053 lr:1.1024292001411689e-05 tokens_per_second_per_gpu:2088.9722996943365
|
229 |
+
Step 229 | loss:0.8414878845214844 lr:1.0991700992284858e-05 tokens_per_second_per_gpu:3063.9756544789043
|
230 |
+
Step 230 | loss:0.9940211772918701 lr:1.0959025584760526e-05 tokens_per_second_per_gpu:2433.3544485786088
|
231 |
+
Step 231 | loss:0.8977429270744324 lr:1.0926266568640585e-05 tokens_per_second_per_gpu:2542.186006721706
|
232 |
+
Step 232 | loss:0.9585859179496765 lr:1.089342473574783e-05 tokens_per_second_per_gpu:2300.665172041385
|
233 |
+
Step 233 | loss:0.9505215883255005 lr:1.0860500879906838e-05 tokens_per_second_per_gpu:3180.361033842941
|
234 |
+
Step 234 | loss:0.8571861982345581 lr:1.0827495796924784e-05 tokens_per_second_per_gpu:2594.923533596223
|
235 |
+
Step 235 | loss:0.7735648155212402 lr:1.0794410284572181e-05 tokens_per_second_per_gpu:2694.024955898258
|
236 |
+
Step 236 | loss:0.8936963677406311 lr:1.0761245142563622e-05 tokens_per_second_per_gpu:2670.446505293131
|
237 |
+
Step 237 | loss:0.9809367656707764 lr:1.0728001172538428e-05 tokens_per_second_per_gpu:2571.567322130418
|
238 |
+
Step 238 | loss:0.9553939700126648 lr:1.06946791780413e-05 tokens_per_second_per_gpu:2541.26905948664
|
239 |
+
Step 239 | loss:0.7851515412330627 lr:1.0661279964502861e-05 tokens_per_second_per_gpu:3012.100826567419
|
240 |
+
Step 240 | loss:0.8377625346183777 lr:1.0627804339220218e-05 tokens_per_second_per_gpu:2908.6314595373565
|
241 |
+
Step 241 | loss:1.0584880113601685 lr:1.0594253111337442e-05 tokens_per_second_per_gpu:2127.0916213056576
|
242 |
+
Step 242 | loss:0.8319791555404663 lr:1.0560627091825987e-05 tokens_per_second_per_gpu:2945.196078622988
|
243 |
+
Step 243 | loss:0.9787124395370483 lr:1.0526927093465123e-05 tokens_per_second_per_gpu:2276.353220549065
|
244 |
+
Step 244 | loss:0.9965022206306458 lr:1.0493153930822264e-05 tokens_per_second_per_gpu:2830.526282612664
|
245 |
+
Step 245 | loss:0.8443072438240051 lr:1.0459308420233291e-05 tokens_per_second_per_gpu:2699.7709016776953
|
246 |
+
Step 246 | loss:0.8488413691520691 lr:1.042539137978282e-05 tokens_per_second_per_gpu:2581.172056006769
|
247 |
+
Step 247 | loss:0.7844489812850952 lr:1.039140362928442e-05 tokens_per_second_per_gpu:3944.3421615811703
|
248 |
+
Step 248 | loss:0.8647257685661316 lr:1.0357345990260806e-05 tokens_per_second_per_gpu:2875.752228028819
|
249 |
+
Step 249 | loss:0.8394876718521118 lr:1.0323219285923976e-05 tokens_per_second_per_gpu:2754.3186309441694
|
250 |
+
Step 250 | loss:0.8988162279129028 lr:1.0289024341155321e-05 tokens_per_second_per_gpu:2413.305312945081
|
251 |
+
Step 251 | loss:0.8642686605453491 lr:1.0254761982485678e-05 tokens_per_second_per_gpu:2703.537161585324
|
252 |
+
Step 252 | loss:0.9334269165992737 lr:1.0220433038075347e-05 tokens_per_second_per_gpu:2806.6086135918135
|
253 |
+
Step 253 | loss:0.8606577515602112 lr:1.0186038337694097e-05 tokens_per_second_per_gpu:2776.029742452688
|
254 |
+
Step 254 | loss:0.9714118838310242 lr:1.0151578712701077e-05 tokens_per_second_per_gpu:2382.253753459991
|
255 |
+
Step 255 | loss:0.865350067615509 lr:1.0117054996024753e-05 tokens_per_second_per_gpu:2716.911389419169
|
256 |
+
Step 256 | loss:0.8225361704826355 lr:1.008246802214275e-05 tokens_per_second_per_gpu:2850.587447893
|
257 |
+
Step 257 | loss:0.7821142673492432 lr:1.0047818627061696e-05 tokens_per_second_per_gpu:3493.851190905611
|
258 |
+
Step 258 | loss:0.9727859497070312 lr:1.0013107648297013e-05 tokens_per_second_per_gpu:2163.1960733975143
|
259 |
+
Step 259 | loss:0.8025305271148682 lr:9.978335924852662e-06 tokens_per_second_per_gpu:2213.270287504602
|
260 |
+
Step 260 | loss:0.7221256494522095 lr:9.943504297200883e-06 tokens_per_second_per_gpu:4165.593745585308
|
261 |
+
Step 261 | loss:1.0640528202056885 lr:9.908613607261861e-06 tokens_per_second_per_gpu:2175.662315838582
|
262 |
+
Step 262 | loss:0.8858134746551514 lr:9.873664698383386e-06 tokens_per_second_per_gpu:2108.5291412185948
|
263 |
+
Step 263 | loss:0.997913122177124 lr:9.838658415320474e-06 tokens_per_second_per_gpu:2297.180689912119
|
264 |
+
Step 264 | loss:0.8774775266647339 lr:9.803595604214924e-06 tokens_per_second_per_gpu:2114.8571076658072
|
265 |
+
Step 265 | loss:0.8805571794509888 lr:9.768477112574902e-06 tokens_per_second_per_gpu:3400.560855606231
|
266 |
+
Step 266 | loss:0.9601130485534668 lr:9.733303789254418e-06 tokens_per_second_per_gpu:2678.9658431685334
|
267 |
+
Step 267 | loss:0.9212712049484253 lr:9.698076484432837e-06 tokens_per_second_per_gpu:2042.823107268897
|
268 |
+
Step 268 | loss:0.7927682399749756 lr:9.662796049594319e-06 tokens_per_second_per_gpu:3287.43519927435
|
269 |
+
Step 269 | loss:1.1724016666412354 lr:9.627463337507226e-06 tokens_per_second_per_gpu:2966.2408737525116
|
270 |
+
Step 270 | loss:0.9678674936294556 lr:9.592079202203536e-06 tokens_per_second_per_gpu:2884.935388288082
|
271 |
+
Step 271 | loss:0.8864798545837402 lr:9.556644498958176e-06 tokens_per_second_per_gpu:2400.49316338112
|
272 |
+
Step 272 | loss:0.938960075378418 lr:9.521160084268355e-06 tokens_per_second_per_gpu:2212.7791306773547
|
273 |
+
Step 273 | loss:0.8877987265586853 lr:9.485626815832877e-06 tokens_per_second_per_gpu:3354.291244394353
|
274 |
+
Step 274 | loss:0.8895339369773865 lr:9.450045552531383e-06 tokens_per_second_per_gpu:3240.0340597607606
|
275 |
+
Step 275 | loss:0.8576871752738953 lr:9.414417154403609e-06 tokens_per_second_per_gpu:3453.885492750505
|
276 |
+
Step 276 | loss:0.9489049315452576 lr:9.378742482628598e-06 tokens_per_second_per_gpu:3090.6101442457916
|
277 |
+
Step 277 | loss:0.899044394493103 lr:9.343022399503873e-06 tokens_per_second_per_gpu:2594.79916468504
|
278 |
+
Step 278 | loss:0.8113265633583069 lr:9.307257768424607e-06 tokens_per_second_per_gpu:3177.796880529129
|
279 |
+
Step 279 | loss:0.9940325617790222 lr:9.271449453862748e-06 tokens_per_second_per_gpu:2404.427909840856
|
280 |
+
Step 280 | loss:0.8677517771720886 lr:9.235598321346119e-06 tokens_per_second_per_gpu:3642.3763622655156
|
281 |
+
Step 281 | loss:0.9939593076705933 lr:9.1997052374375e-06 tokens_per_second_per_gpu:2429.8607833688475
|
282 |
+
Step 282 | loss:0.9184926748275757 lr:9.163771069713696e-06 tokens_per_second_per_gpu:2404.497162239973
|
283 |
+
Step 283 | loss:0.8467150926589966 lr:9.12779668674454e-06 tokens_per_second_per_gpu:3138.575898512493
|
284 |
+
Step 284 | loss:0.892140805721283 lr:9.091782958071923e-06 tokens_per_second_per_gpu:2558.36640338516
|
285 |
+
Step 285 | loss:0.747302234172821 lr:9.055730754188765e-06 tokens_per_second_per_gpu:2942.3912091461602
|
286 |
+
Step 286 | loss:0.857389509677887 lr:9.019640946517972e-06 tokens_per_second_per_gpu:2200.4241563666415
|
287 |
+
Step 287 | loss:0.9071866869926453 lr:8.98351440739138e-06 tokens_per_second_per_gpu:2901.754213786485
|
288 |
+
Step 288 | loss:0.9092150926589966 lr:8.947352010028666e-06 tokens_per_second_per_gpu:2387.9788412693297
|
289 |
+
Step 289 | loss:0.9917126297950745 lr:8.911154628516236e-06 tokens_per_second_per_gpu:3061.2987875670756
|
290 |
+
Step 290 | loss:0.9317039847373962 lr:8.874923137786114e-06 tokens_per_second_per_gpu:3225.279467510707
|
291 |
+
Step 291 | loss:0.7867501974105835 lr:8.838658413594773e-06 tokens_per_second_per_gpu:3152.63949537652
|
292 |
+
Step 292 | loss:0.9737382531166077 lr:8.80236133250198e-06 tokens_per_second_per_gpu:2712.0793822195424
|
293 |
+
Step 293 | loss:0.8233653903007507 lr:8.766032771849601e-06 tokens_per_second_per_gpu:2616.28775170671
|
294 |
+
Step 294 | loss:0.8503261208534241 lr:8.729673609740411e-06 tokens_per_second_per_gpu:3122.336820825079
|
295 |
+
Step 295 | loss:0.831436038017273 lr:8.693284725016845e-06 tokens_per_second_per_gpu:2756.8460814254904
|
296 |
+
Step 296 | loss:0.8883024454116821 lr:8.656866997239767e-06 tokens_per_second_per_gpu:2849.1394403891695
|
297 |
+
Step 297 | loss:1.030733346939087 lr:8.620421306667225e-06 tokens_per_second_per_gpu:2177.6471527849394
|
298 |
+
Step 298 | loss:0.880768358707428 lr:8.58394853423314e-06 tokens_per_second_per_gpu:2537.216693346244
|
299 |
+
Step 299 | loss:0.8131841421127319 lr:8.547449561526045e-06 tokens_per_second_per_gpu:3331.622489013824
|
300 |
+
Step 300 | loss:0.904315710067749 lr:8.510925270767766e-06 tokens_per_second_per_gpu:2341.584545888117
|
301 |
+
Step 301 | loss:0.8966640830039978 lr:8.474376544792087e-06 tokens_per_second_per_gpu:2190.578843299298
|
302 |
+
Step 302 | loss:0.8454634547233582 lr:8.43780426702342e-06 tokens_per_second_per_gpu:2610.455821912868
|
303 |
+
Step 303 | loss:0.8394374847412109 lr:8.40120932145545e-06 tokens_per_second_per_gpu:3411.2844261560285
|
304 |
+
Step 304 | loss:0.8339081406593323 lr:8.36459259262978e-06 tokens_per_second_per_gpu:2536.6041667350532
|
305 |
+
Step 305 | loss:0.87993323802948 lr:8.327954965614526e-06 tokens_per_second_per_gpu:2955.8406787717813
|
306 |
+
Step 306 | loss:0.8896875977516174 lr:8.291297325982935e-06 tokens_per_second_per_gpu:2856.849362787982
|
307 |
+
Step 307 | loss:0.8964165449142456 lr:8.254620559792e-06 tokens_per_second_per_gpu:2514.582769533579
|
308 |
+
Step 308 | loss:0.8504213094711304 lr:8.217925553561006e-06 tokens_per_second_per_gpu:2586.2976029121282
|
309 |
+
Step 309 | loss:0.9949404001235962 lr:8.18121319425013e-06 tokens_per_second_per_gpu:2718.513680534231
|
310 |
+
Step 310 | loss:0.8377817273139954 lr:8.14448436923899e-06 tokens_per_second_per_gpu:2479.359962245908
|
311 |
+
Step 311 | loss:0.9631475806236267 lr:8.1077399663052e-06 tokens_per_second_per_gpu:2560.2062470472647
|
312 |
+
Step 312 | loss:0.8673937320709229 lr:8.070980873602909e-06 tokens_per_second_per_gpu:2299.3708471618284
|
313 |
+
Step 313 | loss:0.7618104815483093 lr:8.034207979641328e-06 tokens_per_second_per_gpu:2870.2023552491078
|
314 |
+
Step 314 | loss:0.9050427675247192 lr:7.99742217326328e-06 tokens_per_second_per_gpu:2367.7275558955394
|
315 |
+
Step 315 | loss:0.8086662888526917 lr:7.960624343623674e-06 tokens_per_second_per_gpu:2866.746041665019
|
316 |
+
Step 316 | loss:0.8907902836799622 lr:7.923815380168046e-06 tokens_per_second_per_gpu:2496.810028140203
|
317 |
+
Step 317 | loss:0.9406841993331909 lr:7.886996172611049e-06 tokens_per_second_per_gpu:2524.8161688710366
|
318 |
+
Step 318 | loss:0.8578323125839233 lr:7.850167610914942e-06 tokens_per_second_per_gpu:3224.289434866302
|
319 |
+
Step 319 | loss:0.886796236038208 lr:7.813330585268092e-06 tokens_per_second_per_gpu:3023.1847564125983
|
320 |
+
Step 320 | loss:0.8145670890808105 lr:7.776485986063437e-06 tokens_per_second_per_gpu:2596.918537084552
|
321 |
+
Step 321 | loss:0.9760152697563171 lr:7.73963470387699e-06 tokens_per_second_per_gpu:2434.8368875247716
|
322 |
+
Step 322 | loss:0.8733581304550171 lr:7.702777629446298e-06 tokens_per_second_per_gpu:2350.605592846224
|
323 |
+
Step 323 | loss:0.9446771144866943 lr:7.665915653648901e-06 tokens_per_second_per_gpu:2151.967467013007
|
324 |
+
Step 324 | loss:1.06463623046875 lr:7.629049667480826e-06 tokens_per_second_per_gpu:2519.802402001832
|
325 |
+
Step 325 | loss:0.8435251116752625 lr:7.592180562035022e-06 tokens_per_second_per_gpu:3336.522359641145
|
326 |
+
Step 326 | loss:0.8608092069625854 lr:7.555309228479843e-06 tokens_per_second_per_gpu:3277.118785268545
|
327 |
+
Step 327 | loss:0.7815330028533936 lr:7.518436558037498e-06 tokens_per_second_per_gpu:3369.0019186297413
|
328 |
+
Step 328 | loss:0.8153398036956787 lr:7.481563441962503e-06 tokens_per_second_per_gpu:3267.182160183334
|
329 |
+
Step 329 | loss:0.8034992814064026 lr:7.444690771520157e-06 tokens_per_second_per_gpu:3690.7640838779034
|
330 |
+
Step 330 | loss:0.8502854108810425 lr:7.407819437964981e-06 tokens_per_second_per_gpu:2631.1245605629197
|
331 |
+
Step 331 | loss:0.8993874788284302 lr:7.370950332519176e-06 tokens_per_second_per_gpu:2930.8409402558013
|
332 |
+
Step 332 | loss:0.9783048629760742 lr:7.3340843463511e-06 tokens_per_second_per_gpu:2899.8105315950947
|
333 |
+
Step 333 | loss:0.9575924277305603 lr:7.2972223705537036e-06 tokens_per_second_per_gpu:2429.0229008539395
|
334 |
+
Step 334 | loss:0.865286648273468 lr:7.26036529612301e-06 tokens_per_second_per_gpu:2839.6573926637784
|
335 |
+
Step 335 | loss:0.8176185488700867 lr:7.223514013936566e-06 tokens_per_second_per_gpu:3094.1096527236145
|
336 |
+
Step 336 | loss:0.7247341275215149 lr:7.186669414731913e-06 tokens_per_second_per_gpu:3315.033496164722
|
337 |
+
Step 337 | loss:0.8403685092926025 lr:7.14983238908506e-06 tokens_per_second_per_gpu:3039.9672711448557
|
338 |
+
Step 338 | loss:0.7528369426727295 lr:7.1130038273889515e-06 tokens_per_second_per_gpu:3136.680127173355
|
339 |
+
Step 339 | loss:0.8461508750915527 lr:7.0761846198319535e-06 tokens_per_second_per_gpu:2716.7048475070237
|
340 |
+
Step 340 | loss:1.0012428760528564 lr:7.039375656376329e-06 tokens_per_second_per_gpu:2181.341224281936
|
341 |
+
Step 341 | loss:0.8235898017883301 lr:7.0025778267367225e-06 tokens_per_second_per_gpu:3569.9367239582057
|
342 |
+
Step 342 | loss:1.0386333465576172 lr:6.965792020358672e-06 tokens_per_second_per_gpu:2647.7206930607267
|
343 |
+
Step 343 | loss:0.900693953037262 lr:6.9290191263970935e-06 tokens_per_second_per_gpu:2909.1159397211577
|
344 |
+
Step 344 | loss:0.8273099660873413 lr:6.8922600336948e-06 tokens_per_second_per_gpu:2416.7004437081237
|
345 |
+
Step 345 | loss:0.9471863508224487 lr:6.8555156307610125e-06 tokens_per_second_per_gpu:2800.786061572154
|
346 |
+
Step 346 | loss:1.011666178703308 lr:6.818786805749872e-06 tokens_per_second_per_gpu:3052.515712984993
|
347 |
+
Step 347 | loss:0.8267448544502258 lr:6.782074446438995e-06 tokens_per_second_per_gpu:2917.6837742563043
|
348 |
+
Step 348 | loss:0.8594385385513306 lr:6.745379440208001e-06 tokens_per_second_per_gpu:3103.4111269442988
|
349 |
+
Step 349 | loss:1.0058006048202515 lr:6.708702674017064e-06 tokens_per_second_per_gpu:2278.381957741059
|
350 |
+
Step 350 | loss:0.9441714286804199 lr:6.672045034385478e-06 tokens_per_second_per_gpu:2718.044038121958
|
351 |
+
Step 351 | loss:0.9618679285049438 lr:6.635407407370222e-06 tokens_per_second_per_gpu:3258.8267915094248
|
352 |
+
Step 352 | loss:0.9324498176574707 lr:6.598790678544549e-06 tokens_per_second_per_gpu:2416.68747120486
|
353 |
+
Step 353 | loss:0.8107538819313049 lr:6.562195732976582e-06 tokens_per_second_per_gpu:2136.3859791339296
|
354 |
+
Step 354 | loss:0.8707559108734131 lr:6.525623455207914e-06 tokens_per_second_per_gpu:3920.9124449450605
|
355 |
+
Step 355 | loss:1.0396661758422852 lr:6.489074729232236e-06 tokens_per_second_per_gpu:2677.917087444475
|
356 |
+
Step 356 | loss:0.9951695203781128 lr:6.452550438473955e-06 tokens_per_second_per_gpu:2513.724453514746
|
357 |
+
Step 357 | loss:0.8180606365203857 lr:6.416051465766861e-06 tokens_per_second_per_gpu:2136.395063021162
|
358 |
+
Step 358 | loss:0.8964337706565857 lr:6.379578693332777e-06 tokens_per_second_per_gpu:2502.99951437964
|
359 |
+
Step 359 | loss:0.8790155053138733 lr:6.343133002760231e-06 tokens_per_second_per_gpu:2575.228954991082
|
360 |
+
Step 360 | loss:0.8716058731079102 lr:6.30671527498316e-06 tokens_per_second_per_gpu:2902.1444594976692
|
361 |
+
Step 361 | loss:0.8309515714645386 lr:6.270326390259591e-06 tokens_per_second_per_gpu:2755.057233486617
|
362 |
+
Step 362 | loss:1.0263001918792725 lr:6.233967228150399e-06 tokens_per_second_per_gpu:2805.8836613470926
|
363 |
+
Step 363 | loss:0.951583206653595 lr:6.197638667498023e-06 tokens_per_second_per_gpu:2874.3500077114672
|
364 |
+
Step 364 | loss:0.7161557078361511 lr:6.161341586405229e-06 tokens_per_second_per_gpu:3021.893258538902
|
365 |
+
Step 365 | loss:0.9259535074234009 lr:6.125076862213888e-06 tokens_per_second_per_gpu:2695.4417612817692
|
366 |
+
Step 366 | loss:1.016567587852478 lr:6.088845371483765e-06 tokens_per_second_per_gpu:3001.6669903895245
|
367 |
+
Step 367 | loss:0.9344175457954407 lr:6.0526479899713365e-06 tokens_per_second_per_gpu:2613.816790424833
|
368 |
+
Step 368 | loss:0.9176039695739746 lr:6.0164855926086204e-06 tokens_per_second_per_gpu:2374.669316445514
|
369 |
+
Step 369 | loss:0.8734990358352661 lr:5.980359053482029e-06 tokens_per_second_per_gpu:2290.221515119589
|
370 |
+
Step 370 | loss:0.8557796478271484 lr:5.944269245811238e-06 tokens_per_second_per_gpu:2167.941273792827
|
371 |
+
Step 371 | loss:0.8427585363388062 lr:5.908217041928078e-06 tokens_per_second_per_gpu:2712.4728476039068
|
372 |
+
Step 372 | loss:0.7605788111686707 lr:5.87220331325546e-06 tokens_per_second_per_gpu:2384.5312207906018
|
373 |
+
Step 373 | loss:0.8790305256843567 lr:5.836228930286305e-06 tokens_per_second_per_gpu:2610.707228443902
|
374 |
+
Step 374 | loss:1.0703303813934326 lr:5.800294762562498e-06 tokens_per_second_per_gpu:1830.7243589924585
|
375 |
+
Step 375 | loss:0.8680859804153442 lr:5.764401678653884e-06 tokens_per_second_per_gpu:3279.2616810969766
|
376 |
+
Step 376 | loss:0.822643518447876 lr:5.728550546137254e-06 tokens_per_second_per_gpu:2555.8667929702524
|
377 |
+
Step 377 | loss:0.8699767589569092 lr:5.692742231575392e-06 tokens_per_second_per_gpu:3389.3087628096314
|
378 |
+
Step 378 | loss:0.8780330419540405 lr:5.656977600496128e-06 tokens_per_second_per_gpu:2654.4637897137886
|
379 |
+
Step 379 | loss:0.7730309367179871 lr:5.621257517371404e-06 tokens_per_second_per_gpu:3445.66256348602
|
380 |
+
Step 380 | loss:0.8410382270812988 lr:5.585582845596393e-06 tokens_per_second_per_gpu:3472.3318332317604
|
381 |
+
Step 381 | loss:0.9238551259040833 lr:5.54995444746862e-06 tokens_per_second_per_gpu:2329.4743010850307
|
382 |
+
Step 382 | loss:1.0120571851730347 lr:5.514373184167124e-06 tokens_per_second_per_gpu:2835.2976676586004
|
383 |
+
Step 383 | loss:0.8865228891372681 lr:5.478839915731643e-06 tokens_per_second_per_gpu:2756.7384007998317
|
384 |
+
Step 384 | loss:0.7867243885993958 lr:5.4433555010418245e-06 tokens_per_second_per_gpu:3345.717039940193
|
385 |
+
Step 385 | loss:0.8795440196990967 lr:5.407920797796467e-06 tokens_per_second_per_gpu:3480.7882876326375
|
386 |
+
Step 386 | loss:0.8486223220825195 lr:5.372536662492775e-06 tokens_per_second_per_gpu:2192.7935593406287
|
387 |
+
Step 387 | loss:0.7376230955123901 lr:5.337203950405684e-06 tokens_per_second_per_gpu:3305.03296637997
|
388 |
+
Step 388 | loss:0.9511170387268066 lr:5.301923515567164e-06 tokens_per_second_per_gpu:2384.3194346306673
|
389 |
+
Step 389 | loss:0.9646419882774353 lr:5.266696210745586e-06 tokens_per_second_per_gpu:2997.5034964349775
|
390 |
+
Step 390 | loss:0.8492111563682556 lr:5.231522887425101e-06 tokens_per_second_per_gpu:2428.3334671595767
|
391 |
+
Step 391 | loss:0.8491449952125549 lr:5.196404395785076e-06 tokens_per_second_per_gpu:3138.2991290180353
|
392 |
+
Step 392 | loss:0.8455160856246948 lr:5.161341584679528e-06 tokens_per_second_per_gpu:2930.049873705843
|
393 |
+
Step 393 | loss:0.7958143949508667 lr:5.126335301616613e-06 tokens_per_second_per_gpu:2856.2636955887865
|
394 |
+
Step 394 | loss:0.7903842926025391 lr:5.091386392738142e-06 tokens_per_second_per_gpu:2432.8871043673084
|
395 |
+
Step 395 | loss:0.9168512225151062 lr:5.056495702799119e-06 tokens_per_second_per_gpu:2128.874388217258
|
396 |
+
Step 396 | loss:0.8486897349357605 lr:5.02166407514734e-06 tokens_per_second_per_gpu:2714.435543410219
|
397 |
+
Step 397 | loss:0.8215017318725586 lr:4.9868923517029894e-06 tokens_per_second_per_gpu:3343.137622210726
|
398 |
+
Step 398 | loss:0.8552722930908203 lr:4.9521813729383045e-06 tokens_per_second_per_gpu:2519.8796099196766
|
399 |
+
Step 399 | loss:0.788427472114563 lr:4.9175319778572534e-06 tokens_per_second_per_gpu:2166.8862532052426
|
400 |
+
Step 400 | loss:0.9515634179115295 lr:4.882945003975251e-06 tokens_per_second_per_gpu:3065.2233860728775
|
401 |
+
Step 401 | loss:0.810390293598175 lr:4.848421287298924e-06 tokens_per_second_per_gpu:2659.3075028331896
|
402 |
+
Step 402 | loss:0.8705076575279236 lr:4.813961662305905e-06 tokens_per_second_per_gpu:2782.8155438173562
|
403 |
+
Step 403 | loss:0.9843229055404663 lr:4.779566961924652e-06 tokens_per_second_per_gpu:2582.5921207447886
|
404 |
+
Step 404 | loss:0.9581823945045471 lr:4.745238017514326e-06 tokens_per_second_per_gpu:2686.0533558342913
|
405 |
+
Step 405 | loss:0.8852664232254028 lr:4.71097565884468e-06 tokens_per_second_per_gpu:3322.62504395818
|
406 |
+
Step 406 | loss:1.0313615798950195 lr:4.676780714076024e-06 tokens_per_second_per_gpu:2349.1757169557204
|
407 |
+
Step 407 | loss:0.8631855845451355 lr:4.6426540097391955e-06 tokens_per_second_per_gpu:2534.359572679826
|
408 |
+
Step 408 | loss:0.829632580280304 lr:4.6085963707155815e-06 tokens_per_second_per_gpu:2800.40334172245
|
409 |
+
Step 409 | loss:0.9864691495895386 lr:4.574608620217182e-06 tokens_per_second_per_gpu:2304.7700393033388
|
410 |
+
Step 410 | loss:0.9433596134185791 lr:4.54069157976671e-06 tokens_per_second_per_gpu:2128.7283080192083
|
411 |
+
Step 411 | loss:0.8118562698364258 lr:4.506846069177737e-06 tokens_per_second_per_gpu:3097.3892470665614
|
412 |
+
Step 412 | loss:0.8059877157211304 lr:4.473072906534878e-06 tokens_per_second_per_gpu:3564.4255635926534
|
413 |
+
Step 413 | loss:0.9641546010971069 lr:4.4393729081740125e-06 tokens_per_second_per_gpu:2130.3184632629923
|
414 |
+
Step 414 | loss:0.8071569204330444 lr:4.405746888662562e-06 tokens_per_second_per_gpu:2281.9392502990377
|
415 |
+
Step 415 | loss:0.8968546986579895 lr:4.372195660779782e-06 tokens_per_second_per_gpu:2232.3985049022785
|
416 |
+
Step 416 | loss:0.8468106389045715 lr:4.338720035497139e-06 tokens_per_second_per_gpu:3083.9242151564563
|
417 |
+
Step 417 | loss:0.8717325925827026 lr:4.305320821958703e-06 tokens_per_second_per_gpu:3372.5584573988763
|
418 |
+
Step 418 | loss:0.8747634887695312 lr:4.271998827461571e-06 tokens_per_second_per_gpu:2445.4055033972445
|
419 |
+
Step 419 | loss:0.9820618033409119 lr:4.238754857436381e-06 tokens_per_second_per_gpu:2342.6436446309026
|
420 |
+
Step 420 | loss:0.8231772780418396 lr:4.20558971542782e-06 tokens_per_second_per_gpu:2528.9826899571935
|
421 |
+
Step 421 | loss:0.7924979329109192 lr:4.172504203075216e-06 tokens_per_second_per_gpu:2729.9206992294567
|
422 |
+
Step 422 | loss:0.9110404849052429 lr:4.139499120093161e-06 tokens_per_second_per_gpu:3315.3490158265113
|
423 |
+
Step 423 | loss:0.75404953956604 lr:4.106575264252172e-06 tokens_per_second_per_gpu:3090.977701701262
|
424 |
+
Step 424 | loss:0.938966691493988 lr:4.073733431359421e-06 tokens_per_second_per_gpu:2904.2331904283437
|
425 |
+
Step 425 | loss:0.8179991245269775 lr:4.040974415239475e-06 tokens_per_second_per_gpu:2990.000297168357
|
426 |
+
Step 426 | loss:1.0704540014266968 lr:4.0082990077151445e-06 tokens_per_second_per_gpu:1991.6900351358886
|
427 |
+
Step 427 | loss:0.8314566016197205 lr:3.975707998588312e-06 tokens_per_second_per_gpu:3039.0039030063976
|
428 |
+
Step 428 | loss:0.8255379796028137 lr:3.9432021756208656e-06 tokens_per_second_per_gpu:2817.339944564254
|
429 |
+
Step 429 | loss:0.8793909549713135 lr:3.910782324515634e-06 tokens_per_second_per_gpu:2784.943798011668
|
430 |
+
Step 430 | loss:0.9148744940757751 lr:3.878449228897414e-06 tokens_per_second_per_gpu:3249.35877414692
|
431 |
+
Step 431 | loss:0.7125563025474548 lr:3.846203670294028e-06 tokens_per_second_per_gpu:3253.7632080442154
|
432 |
+
Step 432 | loss:0.9870259165763855 lr:3.814046428117417e-06 tokens_per_second_per_gpu:2434.1776382820162
|
433 |
+
Step 433 | loss:0.9864261150360107 lr:3.781978279644829e-06 tokens_per_second_per_gpu:2380.6335664694293
|
434 |
+
Step 434 | loss:0.8019827604293823 lr:3.750000000000002e-06 tokens_per_second_per_gpu:3123.600926175111
|
435 |
+
Step 435 | loss:0.7339569926261902 lr:3.718112362134444e-06 tokens_per_second_per_gpu:3781.257392588865
|
436 |
+
Step 436 | loss:0.8225141763687134 lr:3.686316136808756e-06 tokens_per_second_per_gpu:2818.7608784896115
|
437 |
+
Step 437 | loss:0.9751465916633606 lr:3.6546120925739825e-06 tokens_per_second_per_gpu:2280.5290359147593
|
438 |
+
Step 438 | loss:1.2062530517578125 lr:3.6230009957530574e-06 tokens_per_second_per_gpu:2214.766667892836
|
439 |
+
Step 439 | loss:0.8194683194160461 lr:3.59148361042226e-06 tokens_per_second_per_gpu:2587.946850567925
|
440 |
+
Step 440 | loss:0.9677116274833679 lr:3.5600606983927607e-06 tokens_per_second_per_gpu:2214.6139566548404
|
441 |
+
Step 441 | loss:0.8907654285430908 lr:3.528733019192198e-06 tokens_per_second_per_gpu:2491.138869734744
|
442 |
+
Step 442 | loss:0.8188056349754333 lr:3.4975013300463318e-06 tokens_per_second_per_gpu:2672.3598495643787
|
443 |
+
Step 443 | loss:0.9130949378013611 lr:3.4663663858607234e-06 tokens_per_second_per_gpu:2951.3410891423564
|
444 |
+
Step 444 | loss:0.9273456335067749 lr:3.4353289392025103e-06 tokens_per_second_per_gpu:2988.781719929555
|
445 |
+
Step 445 | loss:0.7225141525268555 lr:3.404389740282191e-06 tokens_per_second_per_gpu:3436.804380837058
|
446 |
+
Step 446 | loss:0.8584692478179932 lr:3.3735495369355077e-06 tokens_per_second_per_gpu:3279.7335439817807
|
447 |
+
Step 447 | loss:0.8927696347236633 lr:3.342809074605375e-06 tokens_per_second_per_gpu:2235.93741901587
|
448 |
+
Step 448 | loss:0.9940071105957031 lr:3.3121690963238414e-06 tokens_per_second_per_gpu:2386.527757758584
|
449 |
+
Step 449 | loss:0.7678663730621338 lr:3.2816303426941454e-06 tokens_per_second_per_gpu:3482.820850954397
|
450 |
+
Step 450 | loss:0.9011093378067017 lr:3.2511935518728157e-06 tokens_per_second_per_gpu:1981.403319469987
|
451 |
+
Step 451 | loss:0.9154335260391235 lr:3.2208594595518152e-06 tokens_per_second_per_gpu:2839.676474707793
|
452 |
+
Step 452 | loss:0.9392225742340088 lr:3.1906287989407737e-06 tokens_per_second_per_gpu:2792.9917171852126
|
453 |
+
Step 453 | loss:0.941868245601654 lr:3.1605023007492552e-06 tokens_per_second_per_gpu:2564.1183459402805
|
454 |
+
Step 454 | loss:0.8091152310371399 lr:3.1304806931690975e-06 tokens_per_second_per_gpu:3746.246185380797
|
455 |
+
Step 455 | loss:0.9475477933883667 lr:3.100564701856814e-06 tokens_per_second_per_gpu:2413.935688483254
|
456 |
+
Step 456 | loss:0.9426826238632202 lr:3.070755049916057e-06 tokens_per_second_per_gpu:2303.012048381734
|
457 |
+
Step 457 | loss:1.0177905559539795 lr:3.0410524578801263e-06 tokens_per_second_per_gpu:2555.0729947502355
|
458 |
+
Step 458 | loss:0.9545515775680542 lr:3.01145764369457e-06 tokens_per_second_per_gpu:2975.888591278216
|
459 |
+
Step 459 | loss:0.8237252831459045 lr:2.9819713226998186e-06 tokens_per_second_per_gpu:2493.940474143623
|
460 |
+
Step 460 | loss:0.963207483291626 lr:2.9525942076138937e-06 tokens_per_second_per_gpu:1983.0426983275186
|
461 |
+
Step 461 | loss:0.9316860437393188 lr:2.9233270085151965e-06 tokens_per_second_per_gpu:2681.451947505957
|
462 |
+
Step 462 | loss:0.880352258682251 lr:2.8941704328253223e-06 tokens_per_second_per_gpu:2415.10368731483
|
463 |
+
Step 463 | loss:1.0479156970977783 lr:2.8651251852919812e-06 tokens_per_second_per_gpu:2803.8099792533726
|
464 |
+
Step 464 | loss:0.902858555316925 lr:2.8361919679719494e-06 tokens_per_second_per_gpu:2516.2445296751976
|
465 |
+
Step 465 | loss:1.0508098602294922 lr:2.8073714802141027e-06 tokens_per_second_per_gpu:2480.442905915016
|
466 |
+
Step 466 | loss:0.8653034567832947 lr:2.7786644186425245e-06 tokens_per_second_per_gpu:1932.6317201110714
|
467 |
+
Step 467 | loss:0.9926808476448059 lr:2.7500714771396464e-06 tokens_per_second_per_gpu:1877.5562851011355
|
468 |
+
Step 468 | loss:0.8932638168334961 lr:2.721593346829501e-06 tokens_per_second_per_gpu:3161.1173865778374
|
469 |
+
Step 469 | loss:0.8979821801185608 lr:2.693230716060984e-06 tokens_per_second_per_gpu:2036.8558420047852
|
470 |
+
Step 470 | loss:0.7667185664176941 lr:2.6649842703912558e-06 tokens_per_second_per_gpu:2900.220984639236
|
471 |
+
Step 471 | loss:0.7836153507232666 lr:2.636854692569135e-06 tokens_per_second_per_gpu:2817.8863131727257
|
472 |
+
Step 472 | loss:0.9054886102676392 lr:2.6088426625186216e-06 tokens_per_second_per_gpu:2930.6798374473015
|
473 |
+
Step 473 | loss:0.860165536403656 lr:2.58094885732245e-06 tokens_per_second_per_gpu:2410.5742824676117
|
474 |
+
Step 474 | loss:0.9614354968070984 lr:2.553173951205715e-06 tokens_per_second_per_gpu:2776.494678342581
|
475 |
+
Step 475 | loss:0.8513482809066772 lr:2.5255186155195993e-06 tokens_per_second_per_gpu:3631.276628324878
|
476 |
+
Step 476 | loss:0.8641606569290161 lr:2.497983518725119e-06 tokens_per_second_per_gpu:2638.6224491605603
|
477 |
+
Step 477 | loss:0.8530397415161133 lr:2.470569326376991e-06 tokens_per_second_per_gpu:2284.1034573050524
|
478 |
+
Step 478 | loss:0.8508715033531189 lr:2.443276701107525e-06 tokens_per_second_per_gpu:2732.368505701632
|
479 |
+
Step 479 | loss:0.8788378834724426 lr:2.4161063026106153e-06 tokens_per_second_per_gpu:2150.624908574507
|
480 |
+
Step 480 | loss:0.962777316570282 lr:2.389058787625805e-06 tokens_per_second_per_gpu:2568.8995848617747
|
481 |
+
Step 481 | loss:0.7333717942237854 lr:2.3621348099223907e-06 tokens_per_second_per_gpu:3710.6072079951878
|
482 |
+
Step 482 | loss:0.9811513423919678 lr:2.335335020283642e-06 tokens_per_second_per_gpu:2925.5796188715262
|
483 |
+
Step 483 | loss:0.8781923651695251 lr:2.308660066491055e-06 tokens_per_second_per_gpu:2126.7808277712015
|
484 |
+
Step 484 | loss:0.9604898691177368 lr:2.282110593308702e-06 tokens_per_second_per_gpu:1926.7847478492406
|
485 |
+
Step 485 | loss:1.1256814002990723 lr:2.2556872424676447e-06 tokens_per_second_per_gpu:2558.8925285932874
|
486 |
+
Step 486 | loss:0.9171305894851685 lr:2.22939065265043e-06 tokens_per_second_per_gpu:1903.1195805184752
|
487 |
+
Step 487 | loss:0.8313721418380737 lr:2.203221459475638e-06 tokens_per_second_per_gpu:2979.98771275172
|
488 |
+
Step 488 | loss:0.8055751323699951 lr:2.1771802954825338e-06 tokens_per_second_per_gpu:3015.6667404412465
|
489 |
+
Step 489 | loss:1.0302447080612183 lr:2.151267790115766e-06 tokens_per_second_per_gpu:2216.9913058020384
|
490 |
+
Step 490 | loss:1.1314735412597656 lr:2.1254845697101576e-06 tokens_per_second_per_gpu:2104.5816237570134
|
491 |
+
Step 491 | loss:0.9303261637687683 lr:2.099831257475571e-06 tokens_per_second_per_gpu:2165.4810755645126
|
492 |
+
Step 492 | loss:0.7989121675491333 lr:2.074308473481833e-06 tokens_per_second_per_gpu:2985.2464129518576
|
493 |
+
Step 493 | loss:1.00505530834198 lr:2.0489168346437635e-06 tokens_per_second_per_gpu:2471.4941659452065
|
494 |
+
Step 494 | loss:1.0171761512756348 lr:2.023656954706244e-06 tokens_per_second_per_gpu:2918.608675039668
|
495 |
+
Step 495 | loss:0.937871515750885 lr:1.998529444229398e-06 tokens_per_second_per_gpu:2691.00456421299
|
496 |
+
Step 496 | loss:0.8979736566543579 lr:1.97353491057383e-06 tokens_per_second_per_gpu:2747.281150085522
|
497 |
+
Step 497 | loss:0.8915017247200012 lr:1.948673957885937e-06 tokens_per_second_per_gpu:2392.395426595406
|
498 |
+
Step 498 | loss:0.9766404628753662 lr:1.923947187083323e-06 tokens_per_second_per_gpu:2406.2149686418447
|
499 |
+
Step 499 | loss:0.903343915939331 lr:1.8993551958402444e-06 tokens_per_second_per_gpu:3398.4092182111635
|
500 |
+
Step 500 | loss:0.8752915263175964 lr:1.8748985785732007e-06 tokens_per_second_per_gpu:2725.839322744673
|
501 |
+
Step 501 | loss:0.9260117411613464 lr:1.8505779264265335e-06 tokens_per_second_per_gpu:2492.531712956765
|
502 |
+
Step 502 | loss:0.8971195220947266 lr:1.8263938272581646e-06 tokens_per_second_per_gpu:2901.933455496639
|
503 |
+
Step 503 | loss:0.8458098769187927 lr:1.8023468656253648e-06 tokens_per_second_per_gpu:2507.8217453974867
|
504 |
+
Step 504 | loss:1.0840578079223633 lr:1.7784376227706355e-06 tokens_per_second_per_gpu:2613.3231747806803
|
505 |
+
Step 505 | loss:0.9679098725318909 lr:1.7546666766076658e-06 tokens_per_second_per_gpu:2675.238641139705
|
506 |
+
Step 506 | loss:0.879393994808197 lr:1.7310346017073452e-06 tokens_per_second_per_gpu:2837.0564466992423
|
507 |
+
Step 507 | loss:0.7026180624961853 lr:1.7075419692838958e-06 tokens_per_second_per_gpu:2933.1935810857617
|
508 |
+
Step 508 | loss:0.848726212978363 lr:1.6841893471810504e-06 tokens_per_second_per_gpu:2718.772584505565
|
509 |
+
Step 509 | loss:0.9221838712692261 lr:1.6609772998583307e-06 tokens_per_second_per_gpu:1915.4857629787555
|
510 |
+
Step 510 | loss:0.8497966527938843 lr:1.6379063883774126e-06 tokens_per_second_per_gpu:2039.9625666861666
|
511 |
+
Step 511 | loss:0.8197170495986938 lr:1.6149771703885507e-06 tokens_per_second_per_gpu:2976.637056029542
|
512 |
+
Step 512 | loss:0.8008678555488586 lr:1.5921902001171148e-06 tokens_per_second_per_gpu:3667.89278300197
|
513 |
+
Step 513 | loss:0.9182004332542419 lr:1.5695460283501717e-06 tokens_per_second_per_gpu:2733.1730924705953
|
514 |
+
Step 514 | loss:0.9430605173110962 lr:1.5470452024231982e-06 tokens_per_second_per_gpu:2897.3090684371555
|
515 |
+
Step 515 | loss:0.7945107817649841 lr:1.5246882662068304e-06 tokens_per_second_per_gpu:3425.0149827938044
|
516 |
+
Step 516 | loss:0.9675235748291016 lr:1.5024757600937314e-06 tokens_per_second_per_gpu:2672.878682305898
|
517 |
+
Step 517 | loss:0.8425970673561096 lr:1.4804082209855243e-06 tokens_per_second_per_gpu:2473.8463345979508
|
518 |
+
Step 518 | loss:0.8976823687553406 lr:1.4584861822798037e-06 tokens_per_second_per_gpu:2927.3740187446383
|
519 |
+
Step 519 | loss:0.9743514657020569 lr:1.4367101738572688e-06 tokens_per_second_per_gpu:2356.6379119384883
|
520 |
+
Step 520 | loss:0.8489241600036621 lr:1.4150807220688866e-06 tokens_per_second_per_gpu:2448.2238090330206
|
521 |
+
Step 521 | loss:0.7413209676742554 lr:1.3935983497231943e-06 tokens_per_second_per_gpu:3176.6007732610974
|
522 |
+
Step 522 | loss:0.7783915400505066 lr:1.3722635760736431e-06 tokens_per_second_per_gpu:3504.9946590495997
|
523 |
+
Step 523 | loss:0.8300887942314148 lr:1.351076916806057e-06 tokens_per_second_per_gpu:2424.9390276385934
|
524 |
+
Step 524 | loss:0.9937973618507385 lr:1.3300388840261708e-06 tokens_per_second_per_gpu:2906.0669261298367
|
525 |
+
Step 525 | loss:0.9671134948730469 lr:1.3091499862472395e-06 tokens_per_second_per_gpu:2832.7584552787275
|
526 |
+
Step 526 | loss:0.9223825931549072 lr:1.2884107283777644e-06 tokens_per_second_per_gpu:2943.4211173912004
|
527 |
+
Step 527 | loss:0.911368727684021 lr:1.2678216117092729e-06 tokens_per_second_per_gpu:3176.3560024048184
|
528 |
+
Step 528 | loss:0.9336380958557129 lr:1.2473831339042085e-06 tokens_per_second_per_gpu:2298.0152377434993
|
529 |
+
Step 529 | loss:0.8240398168563843 lr:1.227095788983903e-06 tokens_per_second_per_gpu:2798.8999239376394
|
530 |
+
Step 530 | loss:0.7940638065338135 lr:1.206960067316636e-06 tokens_per_second_per_gpu:3165.2590382202575
|
531 |
+
Step 531 | loss:0.8540267944335938 lr:1.186976455605775e-06 tokens_per_second_per_gpu:2187.2335292148864
|
532 |
+
Step 532 | loss:0.8410139083862305 lr:1.1671454368780222e-06 tokens_per_second_per_gpu:2474.942310509921
|
533 |
+
Step 533 | loss:1.1205919981002808 lr:1.1474674904717284e-06 tokens_per_second_per_gpu:2181.678986703241
|
534 |
+
Step 534 | loss:0.8875789642333984 lr:1.1279430920253122e-06 tokens_per_second_per_gpu:2593.2963355514185
|
535 |
+
Step 535 | loss:0.9577472805976868 lr:1.1085727134657672e-06 tokens_per_second_per_gpu:2744.679461314659
|
536 |
+
Step 536 | loss:0.7591468691825867 lr:1.0893568229972445e-06 tokens_per_second_per_gpu:2984.9695645483403
|
537 |
+
Step 537 | loss:0.8278703093528748 lr:1.0702958850897482e-06 tokens_per_second_per_gpu:3224.3677192462064
|
538 |
+
Step 538 | loss:0.9749331474304199 lr:1.0513903604678959e-06 tokens_per_second_per_gpu:2179.7933704651227
|
539 |
+
Step 539 | loss:0.8200403451919556 lr:1.0326407060997912e-06 tokens_per_second_per_gpu:2585.873381589468
|
540 |
+
Step 540 | loss:0.8951087594032288 lr:1.0140473751859792e-06 tokens_per_second_per_gpu:2359.7089376063186
|
541 |
+
Step 541 | loss:0.8001818060874939 lr:9.956108171484813e-07 tokens_per_second_per_gpu:3679.2858057381336
|
542 |
+
Step 542 | loss:0.9417346119880676 lr:9.773314776199499e-07 tokens_per_second_per_gpu:3021.156241879852
|
543 |
+
Step 543 | loss:0.7579294443130493 lr:9.59209798432874e-07 tokens_per_second_per_gpu:2599.789473685248
|
544 |
+
Step 544 | loss:0.9392054080963135 lr:9.412462176089243e-07 tokens_per_second_per_gpu:2654.3657026369724
|
545 |
+
Step 545 | loss:1.048731803894043 lr:9.234411693483439e-07 tokens_per_second_per_gpu:2791.9461967353373
|
546 |
+
Step 546 | loss:0.8341087102890015 lr:9.057950840194687e-07 tokens_per_second_per_gpu:2785.6629892119927
|
547 |
+
Step 547 | loss:0.8932492136955261 lr:8.883083881483225e-07 tokens_per_second_per_gpu:2820.3743680286916
|
548 |
+
Step 548 | loss:0.922091543674469 lr:8.709815044082916e-07 tokens_per_second_per_gpu:2609.799256644826
|
549 |
+
Step 549 | loss:0.8205937147140503 lr:8.53814851609934e-07 tokens_per_second_per_gpu:2846.3717400831083
|
550 |
+
Step 550 | loss:0.7681778073310852 lr:8.368088446908337e-07 tokens_per_second_per_gpu:3047.900830971702
|
551 |
+
Step 551 | loss:1.028059720993042 lr:8.199638947055871e-07 tokens_per_second_per_gpu:2346.733057129004
|
552 |
+
Step 552 | loss:0.9609188437461853 lr:8.032804088158568e-07 tokens_per_second_per_gpu:2578.2629488621687
|
553 |
+
Step 553 | loss:0.8993929028511047 lr:7.867587902805334e-07 tokens_per_second_per_gpu:2471.7660146749104
|
554 |
+
Step 554 | loss:0.9311540126800537 lr:7.703994384459929e-07 tokens_per_second_per_gpu:2232.6221368289566
|
555 |
+
Step 555 | loss:1.0247182846069336 lr:7.542027487364342e-07 tokens_per_second_per_gpu:2030.4209899077885
|
556 |
+
Step 556 | loss:0.8552548885345459 lr:7.381691126443343e-07 tokens_per_second_per_gpu:2798.180824884108
|
557 |
+
Step 557 | loss:0.8369226455688477 lr:7.222989177209718e-07 tokens_per_second_per_gpu:1813.2487757380513
|
558 |
+
Step 558 | loss:0.9284541010856628 lr:7.065925475670679e-07 tokens_per_second_per_gpu:2572.308551755442
|
559 |
+
Step 559 | loss:1.1530259847640991 lr:6.910503818235119e-07 tokens_per_second_per_gpu:1981.7748187521522
|
560 |
+
Step 560 | loss:0.8481274843215942 lr:6.75672796162187e-07 tokens_per_second_per_gpu:2449.174249102278
|
561 |
+
Step 561 | loss:1.0015965700149536 lr:6.604601622768886e-07 tokens_per_second_per_gpu:2805.9037697407794
|
562 |
+
Step 562 | loss:0.8001527786254883 lr:6.454128478743318e-07 tokens_per_second_per_gpu:3317.809001429568
|
563 |
+
Step 563 | loss:0.78518146276474 lr:6.305312166652816e-07 tokens_per_second_per_gpu:3155.550225666974
|
564 |
+
Step 564 | loss:0.875720739364624 lr:6.158156283557429e-07 tokens_per_second_per_gpu:2783.8704022847533
|
565 |
+
Step 565 | loss:0.9692037105560303 lr:6.01266438638281e-07 tokens_per_second_per_gpu:2805.3746426385587
|
566 |
+
Step 566 | loss:1.054160237312317 lr:5.868839991834129e-07 tokens_per_second_per_gpu:2411.8992377023055
|
567 |
+
Step 567 | loss:0.9159079194068909 lr:5.726686576311104e-07 tokens_per_second_per_gpu:2706.8539397659056
|
568 |
+
Step 568 | loss:0.8968900442123413 lr:5.586207575824046e-07 tokens_per_second_per_gpu:3760.640807850762
|
569 |
+
Step 569 | loss:0.8418700098991394 lr:5.447406385910661e-07 tokens_per_second_per_gpu:2949.5486193635475
|
570 |
+
Step 570 | loss:1.0156406164169312 lr:5.310286361554125e-07 tokens_per_second_per_gpu:2600.276591475816
|
571 |
+
Step 571 | loss:0.825329601764679 lr:5.174850817101903e-07 tokens_per_second_per_gpu:2637.289992319456
|
572 |
+
Step 572 | loss:0.9666601419448853 lr:5.041103026185642e-07 tokens_per_second_per_gpu:2454.7473876850663
|
573 |
+
Step 573 | loss:0.6319097280502319 lr:4.909046221642066e-07 tokens_per_second_per_gpu:4199.945046520476
|
574 |
+
Step 574 | loss:0.8879971504211426 lr:4.77868359543486e-07 tokens_per_second_per_gpu:2809.1533118309153
|
575 |
+
Step 575 | loss:0.8433800935745239 lr:4.6500182985774406e-07 tokens_per_second_per_gpu:3222.208727332213
|
576 |
+
Step 576 | loss:0.7500202655792236 lr:4.5230534410568764e-07 tokens_per_second_per_gpu:3835.5875929096846
|
577 |
+
Step 577 | loss:0.8776817321777344 lr:4.3977920917586455e-07 tokens_per_second_per_gpu:3588.3562798100056
|
578 |
+
Step 578 | loss:0.7789593935012817 lr:4.2742372783924984e-07 tokens_per_second_per_gpu:2702.664818633485
|
579 |
+
Step 579 | loss:1.0058625936508179 lr:4.1523919874192797e-07 tokens_per_second_per_gpu:3198.8889390277964
|
580 |
+
Step 580 | loss:1.1509525775909424 lr:4.0322591639786885e-07 tokens_per_second_per_gpu:1779.936655769417
|
581 |
+
Step 581 | loss:0.8506143093109131 lr:3.9138417118182e-07 tokens_per_second_per_gpu:2704.5816345540325
|
582 |
+
Step 582 | loss:1.063564419746399 lr:3.797142493222733e-07 tokens_per_second_per_gpu:1961.6927563427632
|
583 |
+
Step 583 | loss:1.075540542602539 lr:3.6821643289455586e-07 tokens_per_second_per_gpu:2343.735008970859
|
584 |
+
Step 584 | loss:0.9307299852371216 lr:3.5689099981401517e-07 tokens_per_second_per_gpu:2528.258117529406
|
585 |
+
Step 585 | loss:0.909044086933136 lr:3.457382238292892e-07 tokens_per_second_per_gpu:2716.7367247826746
|
586 |
+
Step 586 | loss:0.9028974771499634 lr:3.3475837451570404e-07 tokens_per_second_per_gpu:2008.4029901200033
|
587 |
+
Step 587 | loss:0.8828522562980652 lr:3.239517172687428e-07 tokens_per_second_per_gpu:2328.6354197459186
|
588 |
+
Step 588 | loss:0.9550468921661377 lr:3.1331851329764654e-07 tokens_per_second_per_gpu:2858.7367313426307
|
589 |
+
Step 589 | loss:0.964181661605835 lr:3.028590196190864e-07 tokens_per_second_per_gpu:2039.593965890504
|
590 |
+
Step 590 | loss:0.8647264838218689 lr:2.9257348905096156e-07 tokens_per_second_per_gpu:2408.2528550609186
|
591 |
+
Step 591 | loss:0.7582616209983826 lr:2.8246217020628544e-07 tokens_per_second_per_gpu:3584.2282370995854
|
592 |
+
Step 592 | loss:0.9030933380126953 lr:2.725253074871692e-07 tokens_per_second_per_gpu:2740.8135271861065
|
593 |
+
Step 593 | loss:0.820608913898468 lr:2.6276314107892607e-07 tokens_per_second_per_gpu:2632.8270224856783
|
594 |
+
Step 594 | loss:0.9622331261634827 lr:2.5317590694425716e-07 tokens_per_second_per_gpu:2733.3939723230974
|
595 |
+
Step 595 | loss:0.8576785326004028 lr:2.4376383681755323e-07 tokens_per_second_per_gpu:2548.113990591433
|
596 |
+
Step 596 | loss:0.8732478022575378 lr:2.345271581992886e-07 tokens_per_second_per_gpu:2347.85222649422
|
597 |
+
Step 597 | loss:0.9593178033828735 lr:2.2546609435052546e-07 tokens_per_second_per_gpu:2727.4039541246357
|
598 |
+
Step 598 | loss:0.9085021018981934 lr:2.1658086428751888e-07 tokens_per_second_per_gpu:2564.083915396395
|
599 |
+
Step 599 | loss:0.9484013915061951 lr:2.0787168277641627e-07 tokens_per_second_per_gpu:2287.069566160566
|
600 |
+
Step 600 | loss:0.7718688249588013 lr:1.993387603280755e-07 tokens_per_second_per_gpu:2726.9851612511115
|
601 |
+
Step 601 | loss:0.9098013043403625 lr:1.9098230319296834e-07 tokens_per_second_per_gpu:1928.3522236853428
|
602 |
+
Step 602 | loss:0.8552864789962769 lr:1.828025133561992e-07 tokens_per_second_per_gpu:2926.7052991542437
|
603 |
+
Step 603 | loss:0.9261455535888672 lr:1.747995885326234e-07 tokens_per_second_per_gpu:3406.735424527618
|
604 |
+
Step 604 | loss:0.9243958592414856 lr:1.6697372216206514e-07 tokens_per_second_per_gpu:2535.3725387216314
|
605 |
+
Step 605 | loss:0.9653189182281494 lr:1.5932510340464694e-07 tokens_per_second_per_gpu:2746.3850646471515
|
606 |
+
Step 606 | loss:0.834320604801178 lr:1.51853917136211e-07 tokens_per_second_per_gpu:3429.3423919016786
|
607 |
+
Step 607 | loss:0.8928940892219543 lr:1.4456034394385587e-07 tokens_per_second_per_gpu:2297.879477800558
|
608 |
+
Step 608 | loss:0.7676761746406555 lr:1.3744456012156764e-07 tokens_per_second_per_gpu:3314.490893171432
|
609 |
+
Step 609 | loss:0.8587438464164734 lr:1.305067376659616e-07 tokens_per_second_per_gpu:2284.4486288898347
|
610 |
+
Step 610 | loss:1.0006862878799438 lr:1.2374704427212392e-07 tokens_per_second_per_gpu:2558.2291684611323
|
611 |
+
Step 611 | loss:0.9732027649879456 lr:1.1716564332955815e-07 tokens_per_second_per_gpu:3952.588941664968
|
612 |
+
Step 612 | loss:1.2337478399276733 lr:1.1076269391823435e-07 tokens_per_second_per_gpu:1877.1004881061126
|
613 |
+
Step 613 | loss:0.8134404420852661 lr:1.0453835080474706e-07 tokens_per_second_per_gpu:3074.0139322158097
|
614 |
+
Step 614 | loss:1.006320834159851 lr:9.849276443857164e-08 tokens_per_second_per_gpu:2227.792011738089
|
615 |
+
Step 615 | loss:0.8254751563072205 lr:9.262608094842972e-08 tokens_per_second_per_gpu:2500.2796254751793
|
616 |
+
Step 616 | loss:0.8692588806152344 lr:8.693844213875702e-08 tokens_per_second_per_gpu:2593.9356798742424
|
617 |
+
Step 617 | loss:0.9400972723960876 lr:8.142998548627184e-08 tokens_per_second_per_gpu:2395.1619591858566
|
618 |
+
Step 618 | loss:0.9176926016807556 lr:7.610084413665869e-08 tokens_per_second_per_gpu:3514.4466872465096
|
619 |
+
Step 619 | loss:0.812978208065033 lr:7.095114690134574e-08 tokens_per_second_per_gpu:2740.6624145248784
|
620 |
+
Step 620 | loss:0.8113747835159302 lr:6.598101825439074e-08 tokens_per_second_per_gpu:2798.1631566407473
|
621 |
+
Step 621 | loss:0.9036160707473755 lr:6.1190578329475e-08 tokens_per_second_per_gpu:2909.508395066856
|
622 |
+
Step 622 | loss:0.9566043615341187 lr:5.6579942916997474e-08 tokens_per_second_per_gpu:2437.719725089656
|
623 |
+
Step 623 | loss:0.8796412348747253 lr:5.214922346127859e-08 tokens_per_second_per_gpu:2324.519840460166
|
624 |
+
Step 624 | loss:0.861414909362793 lr:4.7898527057862464e-08 tokens_per_second_per_gpu:2770.5794749156607
|
625 |
+
Step 625 | loss:0.8064419031143188 lr:4.382795645093307e-08 tokens_per_second_per_gpu:2469.646669989894
|
626 |
+
Step 626 | loss:0.9710454940795898 lr:3.9937610030827134e-08 tokens_per_second_per_gpu:2404.1522318995067
|
627 |
+
Step 627 | loss:0.881490170955658 lr:3.6227581831655144e-08 tokens_per_second_per_gpu:2572.367154951392
|
628 |
+
Step 628 | loss:0.82835853099823 lr:3.269796152903404e-08 tokens_per_second_per_gpu:3620.7784223349213
|
629 |
+
Step 629 | loss:0.8445915579795837 lr:2.934883443791392e-08 tokens_per_second_per_gpu:2854.4862142352713
|
630 |
+
Step 630 | loss:1.0001657009124756 lr:2.6180281510518044e-08 tokens_per_second_per_gpu:2978.064983348182
|
631 |
+
Step 631 | loss:0.9440408945083618 lr:2.3192379334386894e-08 tokens_per_second_per_gpu:2696.540350146697
|
632 |
+
Step 632 | loss:0.7228038907051086 lr:2.0385200130525472e-08 tokens_per_second_per_gpu:3655.488183918562
|
633 |
+
Step 633 | loss:0.9987472891807556 lr:1.7758811751658887e-08 tokens_per_second_per_gpu:2617.6189994415217
|
634 |
+
Step 634 | loss:0.8774012923240662 lr:1.531327768059032e-08 tokens_per_second_per_gpu:2923.040995277404
|
635 |
+
Step 635 | loss:0.949100911617279 lr:1.3048657028669752e-08 tokens_per_second_per_gpu:2582.158040430071
|
636 |
+
Step 636 | loss:0.940438985824585 lr:1.096500453436261e-08 tokens_per_second_per_gpu:2001.7866179040327
|
637 |
+
Step 637 | loss:0.9458042979240417 lr:9.062370561927491e-09 tokens_per_second_per_gpu:2777.8153040677407
|
638 |
+
Step 638 | loss:0.8899053931236267 lr:7.340801100197969e-09 tokens_per_second_per_gpu:2513.1679147270943
|
639 |
+
Step 639 | loss:0.8548555374145508 lr:5.800337761473484e-09 tokens_per_second_per_gpu:2289.6067396171884
|
640 |
+
Step 640 | loss:0.9246522784233093 lr:4.441017780510148e-09 tokens_per_second_per_gpu:2645.0215660487215
|
641 |
+
Step 641 | loss:1.0237187147140503 lr:3.262874013622297e-09 tokens_per_second_per_gpu:2515.905628267492
|
642 |
+
Step 642 | loss:0.8752281665802002 lr:2.265934937891456e-09 tokens_per_second_per_gpu:2979.220301682006
|
643 |
+
Step 643 | loss:0.962019681930542 lr:1.450224650471066e-09 tokens_per_second_per_gpu:2538.88920163452
|
644 |
+
Step 644 | loss:0.8064459562301636 lr:8.157628680094397e-10 tokens_per_second_per_gpu:2978.4258421814507
|
645 |
+
Step 645 | loss:0.8946848511695862 lr:3.6256492617264826e-10 tokens_per_second_per_gpu:2379.746472667621
|
646 |
+
Step 646 | loss:0.7861452102661133 lr:9.064177927231576e-11 tokens_per_second_per_gpu:2941.276869680707
|
647 |
+
Step 647 | loss:0.9995426535606384 lr:0.0 tokens_per_second_per_gpu:2276.393936517497
|
training_config_phase3.yaml
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Config for multi-device full finetuning in full_finetune_distributed.py
|
2 |
+
# using a Llama3 8B Instruct model
|
3 |
+
#
|
4 |
+
# This config assumes that you've run the following command before launching
|
5 |
+
# this run:
|
6 |
+
# tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
|
7 |
+
#
|
8 |
+
# To launch on 4 devices, run the following command from root:
|
9 |
+
# tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
|
10 |
+
#
|
11 |
+
# You can add specific overrides through the command line. For example
|
12 |
+
# to override the checkpointer directory while launching training
|
13 |
+
# you can run:
|
14 |
+
# tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
|
15 |
+
#
|
16 |
+
# This config works best when the model is being fine-tuned on 2+ GPUs.
|
17 |
+
# Single device full finetuning requires more memory optimizations. It's
|
18 |
+
# best to use 8B_full_single_device.yaml for those cases
|
19 |
+
# Tokenizer
|
20 |
+
tokenizer:
|
21 |
+
_component_: torchtune.models.llama3.llama3_s_tokenizer
|
22 |
+
path: ../model_zoo/tokenizer.model
|
23 |
+
max_seq_len: 4096
|
24 |
+
|
25 |
+
# Dataset
|
26 |
+
dataset:
|
27 |
+
_component_: torchtune.datasets.chat_dataset
|
28 |
+
source: jan-hq/mixed-instruction-speech-multiturn-noise-clean
|
29 |
+
conversation_style: openai
|
30 |
+
max_seq_len: 4096
|
31 |
+
split: train
|
32 |
+
train_on_input: True
|
33 |
+
|
34 |
+
seed: 42
|
35 |
+
shuffle: False
|
36 |
+
# Model Arguments
|
37 |
+
model:
|
38 |
+
_component_: torchtune.models.llama3_1.llama3_1_s_8b
|
39 |
+
# path: model_zoo/Llama3.1_s_8b_init
|
40 |
+
checkpointer:
|
41 |
+
_component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
|
42 |
+
checkpoint_dir: ../model_zoo/llama3.1-s-cp-7000
|
43 |
+
checkpoint_files: [
|
44 |
+
model-00001-of-00004.safetensors,
|
45 |
+
model-00002-of-00004.safetensors,
|
46 |
+
model-00003-of-00004.safetensors,
|
47 |
+
model-00004-of-00004.safetensors,
|
48 |
+
]
|
49 |
+
recipe_checkpoint: null
|
50 |
+
output_dir: ../model_zoo/llama3-s-instruct-lr-3e-5
|
51 |
+
model_type: LLAMA3
|
52 |
+
resume_from_checkpoint: False
|
53 |
+
save_every_n_steps: 200
|
54 |
+
max_checkpoints: 3
|
55 |
+
# Fine-tuning arguments
|
56 |
+
batch_size: 4
|
57 |
+
epochs: 1
|
58 |
+
max_steps_per_epoch: null
|
59 |
+
gradient_accumulation_steps: 8
|
60 |
+
compile: False
|
61 |
+
# Optimizer and Scheduler
|
62 |
+
optimizer:
|
63 |
+
_component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
|
64 |
+
weight_decay: 0.005
|
65 |
+
lr: 1.5e-5
|
66 |
+
fused: True
|
67 |
+
lr_scheduler:
|
68 |
+
_component_: torchtune.modules.get_cosine_schedule_with_warmup
|
69 |
+
num_warmup_steps: 8
|
70 |
+
|
71 |
+
loss:
|
72 |
+
_component_: torch.nn.CrossEntropyLoss
|
73 |
+
|
74 |
+
fsdp:
|
75 |
+
cpu_offload: False
|
76 |
+
|
77 |
+
# Training env
|
78 |
+
device: cuda
|
79 |
+
dtype: bf16
|
80 |
+
|
81 |
+
# Memory management
|
82 |
+
enable_activation_checkpointing: True
|
83 |
+
memory_efficient_fsdp_wrap: True
|
84 |
+
ac_mode: 'selective'
|
85 |
+
|
86 |
+
|
87 |
+
# Logging
|
88 |
+
metric_logger:
|
89 |
+
_component_: torchtune.utils.metric_logging.DiskLogger
|
90 |
+
log_dir: ${output_dir}
|
91 |
+
output_dir: ../model_zoo/Llama3-instruct-log-lr-3e-5/
|
92 |
+
log_every_n_steps: 1
|
93 |
+
log_peak_memory_stats: False
|