svjack's picture
Upload folder using huggingface_hub
d16fbc5 verified
{"current_steps": 5, "total_steps": 375, "loss": 0.688, "learning_rate": 4.997807075247146e-05, "epoch": 0.04, "percentage": 1.33, "elapsed_time": "0:00:57", "remaining_time": "1:11:26", "throughput": "807.59", "total_tokens": 46776}
{"current_steps": 10, "total_steps": 375, "loss": 0.763, "learning_rate": 4.991232148123761e-05, "epoch": 0.08, "percentage": 2.67, "elapsed_time": "0:01:47", "remaining_time": "1:05:40", "throughput": "788.63", "total_tokens": 85136}
{"current_steps": 15, "total_steps": 375, "loss": 0.6882, "learning_rate": 4.980286753286195e-05, "epoch": 0.12, "percentage": 4.0, "elapsed_time": "0:02:41", "remaining_time": "1:04:42", "throughput": "782.46", "total_tokens": 126584}
{"current_steps": 20, "total_steps": 375, "loss": 0.6951, "learning_rate": 4.964990092676263e-05, "epoch": 0.16, "percentage": 5.33, "elapsed_time": "0:03:35", "remaining_time": "1:03:49", "throughput": "778.58", "total_tokens": 167968}
{"current_steps": 25, "total_steps": 375, "loss": 0.5008, "learning_rate": 4.9453690018345144e-05, "epoch": 0.2, "percentage": 6.67, "elapsed_time": "0:04:26", "remaining_time": "1:02:07", "throughput": "776.76", "total_tokens": 206832}
{"current_steps": 30, "total_steps": 375, "loss": 0.542, "learning_rate": 4.9214579028215776e-05, "epoch": 0.24, "percentage": 8.0, "elapsed_time": "0:05:12", "remaining_time": "0:59:50", "throughput": "780.46", "total_tokens": 243656}
{"current_steps": 35, "total_steps": 375, "loss": 0.5369, "learning_rate": 4.893298743830168e-05, "epoch": 0.28, "percentage": 9.33, "elapsed_time": "0:06:03", "remaining_time": "0:58:51", "throughput": "781.27", "total_tokens": 284016}
{"current_steps": 40, "total_steps": 375, "loss": 0.4948, "learning_rate": 4.860940925593703e-05, "epoch": 0.32, "percentage": 10.67, "elapsed_time": "0:06:59", "remaining_time": "0:58:33", "throughput": "780.39", "total_tokens": 327408}
{"current_steps": 45, "total_steps": 375, "loss": 0.5244, "learning_rate": 4.8244412147206284e-05, "epoch": 0.36, "percentage": 12.0, "elapsed_time": "0:07:46", "remaining_time": "0:57:02", "throughput": "778.70", "total_tokens": 363376}
{"current_steps": 50, "total_steps": 375, "loss": 0.421, "learning_rate": 4.783863644106502e-05, "epoch": 0.4, "percentage": 13.33, "elapsed_time": "0:08:31", "remaining_time": "0:55:22", "throughput": "780.25", "total_tokens": 398840}
{"current_steps": 55, "total_steps": 375, "loss": 0.4517, "learning_rate": 4.7392794005985326e-05, "epoch": 0.44, "percentage": 14.67, "elapsed_time": "0:09:19", "remaining_time": "0:54:13", "throughput": "779.83", "total_tokens": 436136}
{"current_steps": 60, "total_steps": 375, "loss": 0.4661, "learning_rate": 4.690766700109659e-05, "epoch": 0.48, "percentage": 16.0, "elapsed_time": "0:10:13", "remaining_time": "0:53:41", "throughput": "775.58", "total_tokens": 475856}
{"current_steps": 65, "total_steps": 375, "loss": 0.4928, "learning_rate": 4.638410650401267e-05, "epoch": 0.52, "percentage": 17.33, "elapsed_time": "0:11:03", "remaining_time": "0:52:43", "throughput": "775.62", "total_tokens": 514496}
{"current_steps": 70, "total_steps": 375, "loss": 0.5424, "learning_rate": 4.5823031017752485e-05, "epoch": 0.56, "percentage": 18.67, "elapsed_time": "0:11:54", "remaining_time": "0:51:53", "throughput": "775.79", "total_tokens": 554424}
{"current_steps": 75, "total_steps": 375, "loss": 0.5419, "learning_rate": 4.522542485937369e-05, "epoch": 0.6, "percentage": 20.0, "elapsed_time": "0:12:46", "remaining_time": "0:51:05", "throughput": "774.15", "total_tokens": 593264}
{"current_steps": 80, "total_steps": 375, "loss": 0.4558, "learning_rate": 4.4592336433146e-05, "epoch": 0.64, "percentage": 21.33, "elapsed_time": "0:13:33", "remaining_time": "0:49:59", "throughput": "774.75", "total_tokens": 630264}
{"current_steps": 85, "total_steps": 375, "loss": 0.5656, "learning_rate": 4.3924876391293915e-05, "epoch": 0.68, "percentage": 22.67, "elapsed_time": "0:14:21", "remaining_time": "0:48:57", "throughput": "776.75", "total_tokens": 668864}
{"current_steps": 90, "total_steps": 375, "loss": 0.4832, "learning_rate": 4.3224215685535294e-05, "epoch": 0.72, "percentage": 24.0, "elapsed_time": "0:15:12", "remaining_time": "0:48:09", "throughput": "780.75", "total_tokens": 712504}
{"current_steps": 95, "total_steps": 375, "loss": 0.4626, "learning_rate": 4.249158351283414e-05, "epoch": 0.76, "percentage": 25.33, "elapsed_time": "0:15:58", "remaining_time": "0:47:05", "throughput": "781.15", "total_tokens": 748872}
{"current_steps": 100, "total_steps": 375, "loss": 0.4837, "learning_rate": 4.172826515897146e-05, "epoch": 0.8, "percentage": 26.67, "elapsed_time": "0:16:50", "remaining_time": "0:46:18", "throughput": "780.33", "total_tokens": 788408}
{"current_steps": 105, "total_steps": 375, "loss": 0.5144, "learning_rate": 4.093559974371725e-05, "epoch": 0.84, "percentage": 28.0, "elapsed_time": "0:17:42", "remaining_time": "0:45:31", "throughput": "779.83", "total_tokens": 828448}
{"current_steps": 110, "total_steps": 375, "loss": 0.493, "learning_rate": 4.011497787155938e-05, "epoch": 0.88, "percentage": 29.33, "elapsed_time": "0:18:27", "remaining_time": "0:44:28", "throughput": "780.58", "total_tokens": 864680}
{"current_steps": 115, "total_steps": 375, "loss": 0.4083, "learning_rate": 3.92678391921108e-05, "epoch": 0.92, "percentage": 30.67, "elapsed_time": "0:19:14", "remaining_time": "0:43:30", "throughput": "781.80", "total_tokens": 902568}
{"current_steps": 120, "total_steps": 375, "loss": 0.5172, "learning_rate": 3.8395669874474915e-05, "epoch": 0.96, "percentage": 32.0, "elapsed_time": "0:20:08", "remaining_time": "0:42:47", "throughput": "782.01", "total_tokens": 944752}
{"current_steps": 125, "total_steps": 375, "loss": 0.5843, "learning_rate": 3.7500000000000003e-05, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:21:03", "remaining_time": "0:42:07", "throughput": "782.22", "total_tokens": 988656}
{"current_steps": 130, "total_steps": 375, "loss": 0.4567, "learning_rate": 3.6582400877996546e-05, "epoch": 1.04, "percentage": 34.67, "elapsed_time": "0:21:52", "remaining_time": "0:41:13", "throughput": "783.58", "total_tokens": 1028584}
{"current_steps": 135, "total_steps": 375, "loss": 0.418, "learning_rate": 3.564448228912682e-05, "epoch": 1.08, "percentage": 36.0, "elapsed_time": "0:22:36", "remaining_time": "0:40:12", "throughput": "784.96", "total_tokens": 1065104}
{"current_steps": 140, "total_steps": 375, "loss": 0.3785, "learning_rate": 3.4687889661302576e-05, "epoch": 1.12, "percentage": 37.33, "elapsed_time": "0:23:31", "remaining_time": "0:39:29", "throughput": "783.96", "total_tokens": 1106744}
{"current_steps": 145, "total_steps": 375, "loss": 0.4097, "learning_rate": 3.3714301183045385e-05, "epoch": 1.16, "percentage": 38.67, "elapsed_time": "0:24:16", "remaining_time": "0:38:30", "throughput": "783.11", "total_tokens": 1140672}
{"current_steps": 150, "total_steps": 375, "loss": 0.4507, "learning_rate": 3.272542485937369e-05, "epoch": 1.2, "percentage": 40.0, "elapsed_time": "0:25:03", "remaining_time": "0:37:34", "throughput": "783.25", "total_tokens": 1177280}
{"current_steps": 155, "total_steps": 375, "loss": 0.368, "learning_rate": 3.172299551538164e-05, "epoch": 1.24, "percentage": 41.33, "elapsed_time": "0:25:54", "remaining_time": "0:36:45", "throughput": "782.23", "total_tokens": 1215744}
{"current_steps": 160, "total_steps": 375, "loss": 0.4301, "learning_rate": 3.0708771752766394e-05, "epoch": 1.28, "percentage": 42.67, "elapsed_time": "0:26:46", "remaining_time": "0:35:59", "throughput": "782.26", "total_tokens": 1257040}
{"current_steps": 165, "total_steps": 375, "loss": 0.4488, "learning_rate": 2.9684532864643122e-05, "epoch": 1.32, "percentage": 44.0, "elapsed_time": "0:27:37", "remaining_time": "0:35:09", "throughput": "781.89", "total_tokens": 1295992}
{"current_steps": 170, "total_steps": 375, "loss": 0.4075, "learning_rate": 2.8652075714060295e-05, "epoch": 1.3599999999999999, "percentage": 45.33, "elapsed_time": "0:28:27", "remaining_time": "0:34:18", "throughput": "781.74", "total_tokens": 1334672}
{"current_steps": 175, "total_steps": 375, "loss": 0.4991, "learning_rate": 2.761321158169134e-05, "epoch": 1.4, "percentage": 46.67, "elapsed_time": "0:29:24", "remaining_time": "0:33:36", "throughput": "781.86", "total_tokens": 1379280}
{"current_steps": 180, "total_steps": 375, "loss": 0.4894, "learning_rate": 2.656976298823284e-05, "epoch": 1.44, "percentage": 48.0, "elapsed_time": "0:30:14", "remaining_time": "0:32:45", "throughput": "782.49", "total_tokens": 1419704}
{"current_steps": 185, "total_steps": 375, "loss": 0.4967, "learning_rate": 2.5523560497083926e-05, "epoch": 1.48, "percentage": 49.33, "elapsed_time": "0:31:05", "remaining_time": "0:31:56", "throughput": "782.06", "total_tokens": 1459256}
{"current_steps": 190, "total_steps": 375, "loss": 0.5297, "learning_rate": 2.447643950291608e-05, "epoch": 1.52, "percentage": 50.67, "elapsed_time": "0:32:00", "remaining_time": "0:31:09", "throughput": "783.03", "total_tokens": 1503576}
{"current_steps": 195, "total_steps": 375, "loss": 0.3939, "learning_rate": 2.3430237011767167e-05, "epoch": 1.56, "percentage": 52.0, "elapsed_time": "0:32:49", "remaining_time": "0:30:18", "throughput": "781.82", "total_tokens": 1539904}
{"current_steps": 200, "total_steps": 375, "loss": 0.461, "learning_rate": 2.238678841830867e-05, "epoch": 1.6, "percentage": 53.33, "elapsed_time": "0:33:43", "remaining_time": "0:29:30", "throughput": "781.19", "total_tokens": 1580560}
{"current_steps": 205, "total_steps": 375, "loss": 0.4622, "learning_rate": 2.1347924285939714e-05, "epoch": 1.6400000000000001, "percentage": 54.67, "elapsed_time": "0:34:41", "remaining_time": "0:28:46", "throughput": "779.87", "total_tokens": 1623608}
{"current_steps": 210, "total_steps": 375, "loss": 0.4043, "learning_rate": 2.031546713535688e-05, "epoch": 1.6800000000000002, "percentage": 56.0, "elapsed_time": "0:35:30", "remaining_time": "0:27:53", "throughput": "779.61", "total_tokens": 1660744}
{"current_steps": 215, "total_steps": 375, "loss": 0.428, "learning_rate": 1.9291228247233605e-05, "epoch": 1.72, "percentage": 57.33, "elapsed_time": "0:36:19", "remaining_time": "0:27:01", "throughput": "779.45", "total_tokens": 1698784}
{"current_steps": 220, "total_steps": 375, "loss": 0.3779, "learning_rate": 1.827700448461836e-05, "epoch": 1.76, "percentage": 58.67, "elapsed_time": "0:37:08", "remaining_time": "0:26:10", "throughput": "778.48", "total_tokens": 1734792}
{"current_steps": 225, "total_steps": 375, "loss": 0.4526, "learning_rate": 1.7274575140626318e-05, "epoch": 1.8, "percentage": 60.0, "elapsed_time": "0:37:59", "remaining_time": "0:25:19", "throughput": "779.26", "total_tokens": 1776144}
{"current_steps": 230, "total_steps": 375, "loss": 0.4627, "learning_rate": 1.6285698816954624e-05, "epoch": 1.8399999999999999, "percentage": 61.33, "elapsed_time": "0:38:50", "remaining_time": "0:24:29", "throughput": "779.12", "total_tokens": 1815864}
{"current_steps": 235, "total_steps": 375, "loss": 0.4873, "learning_rate": 1.5312110338697426e-05, "epoch": 1.88, "percentage": 62.67, "elapsed_time": "0:39:42", "remaining_time": "0:23:39", "throughput": "779.09", "total_tokens": 1856168}
{"current_steps": 240, "total_steps": 375, "loss": 0.3234, "learning_rate": 1.4355517710873184e-05, "epoch": 1.92, "percentage": 64.0, "elapsed_time": "0:40:34", "remaining_time": "0:22:49", "throughput": "780.05", "total_tokens": 1899120}
{"current_steps": 245, "total_steps": 375, "loss": 0.4438, "learning_rate": 1.3417599122003464e-05, "epoch": 1.96, "percentage": 65.33, "elapsed_time": "0:41:21", "remaining_time": "0:21:56", "throughput": "780.37", "total_tokens": 1936808}
{"current_steps": 250, "total_steps": 375, "loss": 0.4407, "learning_rate": 1.2500000000000006e-05, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:42:15", "remaining_time": "0:21:07", "throughput": "779.97", "total_tokens": 1977312}
{"current_steps": 255, "total_steps": 375, "loss": 0.4401, "learning_rate": 1.1604330125525079e-05, "epoch": 2.04, "percentage": 68.0, "elapsed_time": "0:43:09", "remaining_time": "0:20:18", "throughput": "779.51", "total_tokens": 2018336}
{"current_steps": 260, "total_steps": 375, "loss": 0.3771, "learning_rate": 1.0732160807889211e-05, "epoch": 2.08, "percentage": 69.33, "elapsed_time": "0:43:57", "remaining_time": "0:19:26", "throughput": "780.06", "total_tokens": 2057776}
{"current_steps": 265, "total_steps": 375, "loss": 0.4043, "learning_rate": 9.88502212844063e-06, "epoch": 2.12, "percentage": 70.67, "elapsed_time": "0:44:51", "remaining_time": "0:18:37", "throughput": "781.03", "total_tokens": 2102312}
{"current_steps": 270, "total_steps": 375, "loss": 0.4018, "learning_rate": 9.064400256282757e-06, "epoch": 2.16, "percentage": 72.0, "elapsed_time": "0:45:36", "remaining_time": "0:17:44", "throughput": "781.14", "total_tokens": 2137304}
{"current_steps": 275, "total_steps": 375, "loss": 0.4258, "learning_rate": 8.271734841028553e-06, "epoch": 2.2, "percentage": 73.33, "elapsed_time": "0:46:26", "remaining_time": "0:16:53", "throughput": "781.03", "total_tokens": 2176144}
{"current_steps": 280, "total_steps": 375, "loss": 0.3912, "learning_rate": 7.508416487165862e-06, "epoch": 2.24, "percentage": 74.67, "elapsed_time": "0:47:13", "remaining_time": "0:16:01", "throughput": "780.49", "total_tokens": 2211640}
{"current_steps": 285, "total_steps": 375, "loss": 0.3458, "learning_rate": 6.775784314464717e-06, "epoch": 2.2800000000000002, "percentage": 76.0, "elapsed_time": "0:48:00", "remaining_time": "0:15:09", "throughput": "780.17", "total_tokens": 2247272}
{"current_steps": 290, "total_steps": 375, "loss": 0.4255, "learning_rate": 6.075123608706093e-06, "epoch": 2.32, "percentage": 77.33, "elapsed_time": "0:48:49", "remaining_time": "0:14:18", "throughput": "780.22", "total_tokens": 2285776}
{"current_steps": 295, "total_steps": 375, "loss": 0.4222, "learning_rate": 5.4076635668540075e-06, "epoch": 2.36, "percentage": 78.67, "elapsed_time": "0:49:39", "remaining_time": "0:13:27", "throughput": "780.80", "total_tokens": 2326112}
{"current_steps": 300, "total_steps": 375, "loss": 0.399, "learning_rate": 4.7745751406263165e-06, "epoch": 2.4, "percentage": 80.0, "elapsed_time": "0:50:27", "remaining_time": "0:12:36", "throughput": "780.45", "total_tokens": 2362816}
{"current_steps": 305, "total_steps": 375, "loss": 0.3382, "learning_rate": 4.176968982247514e-06, "epoch": 2.44, "percentage": 81.33, "elapsed_time": "0:51:19", "remaining_time": "0:11:46", "throughput": "780.00", "total_tokens": 2402328}
{"current_steps": 310, "total_steps": 375, "loss": 0.4465, "learning_rate": 3.6158934959873353e-06, "epoch": 2.48, "percentage": 82.67, "elapsed_time": "0:52:14", "remaining_time": "0:10:57", "throughput": "780.28", "total_tokens": 2445600}
{"current_steps": 315, "total_steps": 375, "loss": 0.325, "learning_rate": 3.092332998903416e-06, "epoch": 2.52, "percentage": 84.0, "elapsed_time": "0:53:07", "remaining_time": "0:10:07", "throughput": "779.93", "total_tokens": 2485840}
{"current_steps": 320, "total_steps": 375, "loss": 0.392, "learning_rate": 2.6072059940146775e-06, "epoch": 2.56, "percentage": 85.33, "elapsed_time": "0:53:58", "remaining_time": "0:09:16", "throughput": "779.59", "total_tokens": 2524760}
{"current_steps": 325, "total_steps": 375, "loss": 0.3672, "learning_rate": 2.1613635589349756e-06, "epoch": 2.6, "percentage": 86.67, "elapsed_time": "0:54:47", "remaining_time": "0:08:25", "throughput": "779.34", "total_tokens": 2562040}
{"current_steps": 330, "total_steps": 375, "loss": 0.3554, "learning_rate": 1.7555878527937164e-06, "epoch": 2.64, "percentage": 88.0, "elapsed_time": "0:55:37", "remaining_time": "0:07:35", "throughput": "779.07", "total_tokens": 2600056}
{"current_steps": 335, "total_steps": 375, "loss": 0.3801, "learning_rate": 1.3905907440629752e-06, "epoch": 2.68, "percentage": 89.33, "elapsed_time": "0:56:21", "remaining_time": "0:06:43", "throughput": "778.76", "total_tokens": 2633392}
{"current_steps": 340, "total_steps": 375, "loss": 0.435, "learning_rate": 1.067012561698319e-06, "epoch": 2.7199999999999998, "percentage": 90.67, "elapsed_time": "0:57:14", "remaining_time": "0:05:53", "throughput": "779.56", "total_tokens": 2677016}
{"current_steps": 345, "total_steps": 375, "loss": 0.4063, "learning_rate": 7.854209717842231e-07, "epoch": 2.76, "percentage": 92.0, "elapsed_time": "0:58:09", "remaining_time": "0:05:03", "throughput": "779.43", "total_tokens": 2719536}
{"current_steps": 350, "total_steps": 375, "loss": 0.4894, "learning_rate": 5.463099816548579e-07, "epoch": 2.8, "percentage": 93.33, "elapsed_time": "0:59:09", "remaining_time": "0:04:13", "throughput": "779.76", "total_tokens": 2767424}
{"current_steps": 355, "total_steps": 375, "loss": 0.3822, "learning_rate": 3.5009907323737825e-07, "epoch": 2.84, "percentage": 94.67, "elapsed_time": "0:59:59", "remaining_time": "0:03:22", "throughput": "779.50", "total_tokens": 2805832}
{"current_steps": 360, "total_steps": 375, "loss": 0.4028, "learning_rate": 1.9713246713805588e-07, "epoch": 2.88, "percentage": 96.0, "elapsed_time": "1:00:51", "remaining_time": "0:02:32", "throughput": "779.46", "total_tokens": 2845912}
{"current_steps": 365, "total_steps": 375, "loss": 0.4293, "learning_rate": 8.767851876239074e-08, "epoch": 2.92, "percentage": 97.33, "elapsed_time": "1:01:44", "remaining_time": "0:01:41", "throughput": "779.71", "total_tokens": 2888128}
{"current_steps": 370, "total_steps": 375, "loss": 0.428, "learning_rate": 2.192924752854042e-08, "epoch": 2.96, "percentage": 98.67, "elapsed_time": "1:02:30", "remaining_time": "0:00:50", "throughput": "779.90", "total_tokens": 2925024}
{"current_steps": 375, "total_steps": 375, "loss": 0.4766, "learning_rate": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:03:23", "remaining_time": "0:00:00", "throughput": "779.83", "total_tokens": 2965968}
{"current_steps": 375, "total_steps": 375, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:03:23", "remaining_time": "0:00:00", "throughput": "779.83", "total_tokens": 2965968}