DongfuJiang
commited on
Commit
•
93620e3
1
Parent(s):
325cfc3
Training in progress, step 1200
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +201 -0
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 54446840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffd7c97a99f476106e9a4c117b69ad4aad5bf1314e6751088013c5c1ad7dd8ed
|
3 |
size 54446840
|
trainer_log.jsonl
CHANGED
@@ -399,3 +399,204 @@
|
|
399 |
{"current_steps": 796, "total_steps": 3080, "loss": 0.286, "learning_rate": 8.858445399165033e-06, "epoch": 0.516631510627941, "percentage": 25.84, "elapsed_time": "4:41:13", "remaining_time": "13:26:56", "throughput": "0.00", "total_tokens": 0}
|
400 |
{"current_steps": 798, "total_steps": 3080, "loss": 0.2698, "learning_rate": 8.851607893136065e-06, "epoch": 0.5179295797501217, "percentage": 25.91, "elapsed_time": "4:41:54", "remaining_time": "13:26:10", "throughput": "0.00", "total_tokens": 0}
|
401 |
{"current_steps": 800, "total_steps": 3080, "loss": 0.2875, "learning_rate": 8.84475262669681e-06, "epoch": 0.5192276488723024, "percentage": 25.97, "elapsed_time": "4:42:33", "remaining_time": "13:25:17", "throughput": "0.00", "total_tokens": 0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
{"current_steps": 796, "total_steps": 3080, "loss": 0.286, "learning_rate": 8.858445399165033e-06, "epoch": 0.516631510627941, "percentage": 25.84, "elapsed_time": "4:41:13", "remaining_time": "13:26:56", "throughput": "0.00", "total_tokens": 0}
|
400 |
{"current_steps": 798, "total_steps": 3080, "loss": 0.2698, "learning_rate": 8.851607893136065e-06, "epoch": 0.5179295797501217, "percentage": 25.91, "elapsed_time": "4:41:54", "remaining_time": "13:26:10", "throughput": "0.00", "total_tokens": 0}
|
401 |
{"current_steps": 800, "total_steps": 3080, "loss": 0.2875, "learning_rate": 8.84475262669681e-06, "epoch": 0.5192276488723024, "percentage": 25.97, "elapsed_time": "4:42:33", "remaining_time": "13:25:17", "throughput": "0.00", "total_tokens": 0}
|
402 |
+
{"current_steps": 802, "total_steps": 3080, "loss": 0.3084, "learning_rate": 8.837879631458053e-06, "epoch": 0.5205257179944832, "percentage": 26.04, "elapsed_time": "4:43:16", "remaining_time": "13:24:36", "throughput": "0.00", "total_tokens": 0}
|
403 |
+
{"current_steps": 804, "total_steps": 3080, "loss": 0.2895, "learning_rate": 8.830988939112326e-06, "epoch": 0.521823787116664, "percentage": 26.1, "elapsed_time": "4:43:57", "remaining_time": "13:23:50", "throughput": "0.00", "total_tokens": 0}
|
404 |
+
{"current_steps": 806, "total_steps": 3080, "loss": 0.2592, "learning_rate": 8.824080581433769e-06, "epoch": 0.5231218562388448, "percentage": 26.17, "elapsed_time": "4:44:38", "remaining_time": "13:23:03", "throughput": "0.00", "total_tokens": 0}
|
405 |
+
{"current_steps": 808, "total_steps": 3080, "loss": 0.2771, "learning_rate": 8.817154590277976e-06, "epoch": 0.5244199253610254, "percentage": 26.23, "elapsed_time": "4:45:20", "remaining_time": "13:22:22", "throughput": "0.00", "total_tokens": 0}
|
406 |
+
{"current_steps": 810, "total_steps": 3080, "loss": 0.2726, "learning_rate": 8.810210997581859e-06, "epoch": 0.5257179944832062, "percentage": 26.3, "elapsed_time": "4:46:02", "remaining_time": "13:21:36", "throughput": "0.00", "total_tokens": 0}
|
407 |
+
{"current_steps": 812, "total_steps": 3080, "loss": 0.3111, "learning_rate": 8.803249835363486e-06, "epoch": 0.527016063605387, "percentage": 26.36, "elapsed_time": "4:46:42", "remaining_time": "13:20:48", "throughput": "0.00", "total_tokens": 0}
|
408 |
+
{"current_steps": 814, "total_steps": 3080, "loss": 0.294, "learning_rate": 8.796271135721944e-06, "epoch": 0.5283141327275678, "percentage": 26.43, "elapsed_time": "4:47:25", "remaining_time": "13:20:08", "throughput": "0.00", "total_tokens": 0}
|
409 |
+
{"current_steps": 816, "total_steps": 3080, "loss": 0.2836, "learning_rate": 8.789274930837189e-06, "epoch": 0.5296122018497486, "percentage": 26.49, "elapsed_time": "4:48:06", "remaining_time": "13:19:20", "throughput": "0.00", "total_tokens": 0}
|
410 |
+
{"current_steps": 818, "total_steps": 3080, "loss": 0.2853, "learning_rate": 8.782261252969895e-06, "epoch": 0.5309102709719292, "percentage": 26.56, "elapsed_time": "4:48:45", "remaining_time": "13:18:29", "throughput": "0.00", "total_tokens": 0}
|
411 |
+
{"current_steps": 820, "total_steps": 3080, "loss": 0.3105, "learning_rate": 8.775230134461307e-06, "epoch": 0.53220834009411, "percentage": 26.62, "elapsed_time": "4:49:28", "remaining_time": "13:17:48", "throughput": "0.00", "total_tokens": 0}
|
412 |
+
{"current_steps": 822, "total_steps": 3080, "loss": 0.2888, "learning_rate": 8.768181607733095e-06, "epoch": 0.5335064092162908, "percentage": 26.69, "elapsed_time": "4:50:07", "remaining_time": "13:16:58", "throughput": "0.00", "total_tokens": 0}
|
413 |
+
{"current_steps": 824, "total_steps": 3080, "loss": 0.2872, "learning_rate": 8.761115705287195e-06, "epoch": 0.5348044783384716, "percentage": 26.75, "elapsed_time": "4:50:53", "remaining_time": "13:16:26", "throughput": "0.00", "total_tokens": 0}
|
414 |
+
{"current_steps": 826, "total_steps": 3080, "loss": 0.2748, "learning_rate": 8.754032459705672e-06, "epoch": 0.5361025474606523, "percentage": 26.82, "elapsed_time": "4:51:34", "remaining_time": "13:15:40", "throughput": "0.00", "total_tokens": 0}
|
415 |
+
{"current_steps": 828, "total_steps": 3080, "loss": 0.275, "learning_rate": 8.746931903650558e-06, "epoch": 0.537400616582833, "percentage": 26.88, "elapsed_time": "4:52:16", "remaining_time": "13:14:56", "throughput": "0.00", "total_tokens": 0}
|
416 |
+
{"current_steps": 830, "total_steps": 3080, "loss": 0.2818, "learning_rate": 8.739814069863708e-06, "epoch": 0.5386986857050138, "percentage": 26.95, "elapsed_time": "4:52:57", "remaining_time": "13:14:08", "throughput": "0.00", "total_tokens": 0}
|
417 |
+
{"current_steps": 832, "total_steps": 3080, "loss": 0.2827, "learning_rate": 8.732678991166647e-06, "epoch": 0.5399967548271946, "percentage": 27.01, "elapsed_time": "4:53:36", "remaining_time": "13:13:18", "throughput": "0.00", "total_tokens": 0}
|
418 |
+
{"current_steps": 834, "total_steps": 3080, "loss": 0.2972, "learning_rate": 8.725526700460426e-06, "epoch": 0.5412948239493753, "percentage": 27.08, "elapsed_time": "4:54:19", "remaining_time": "13:12:36", "throughput": "0.00", "total_tokens": 0}
|
419 |
+
{"current_steps": 836, "total_steps": 3080, "loss": 0.2896, "learning_rate": 8.71835723072545e-06, "epoch": 0.5425928930715561, "percentage": 27.14, "elapsed_time": "4:54:56", "remaining_time": "13:11:41", "throughput": "0.00", "total_tokens": 0}
|
420 |
+
{"current_steps": 838, "total_steps": 3080, "loss": 0.3427, "learning_rate": 8.71117061502135e-06, "epoch": 0.5438909621937368, "percentage": 27.21, "elapsed_time": "4:55:36", "remaining_time": "13:10:53", "throughput": "0.00", "total_tokens": 0}
|
421 |
+
{"current_steps": 840, "total_steps": 3080, "loss": 0.2943, "learning_rate": 8.703966886486819e-06, "epoch": 0.5451890313159176, "percentage": 27.27, "elapsed_time": "4:56:15", "remaining_time": "13:10:00", "throughput": "0.00", "total_tokens": 0}
|
422 |
+
{"current_steps": 842, "total_steps": 3080, "loss": 0.2923, "learning_rate": 8.696746078339455e-06, "epoch": 0.5464871004380983, "percentage": 27.34, "elapsed_time": "4:56:54", "remaining_time": "13:09:10", "throughput": "0.00", "total_tokens": 0}
|
423 |
+
{"current_steps": 844, "total_steps": 3080, "loss": 0.2812, "learning_rate": 8.68950822387562e-06, "epoch": 0.5477851695602791, "percentage": 27.4, "elapsed_time": "4:57:30", "remaining_time": "13:08:11", "throughput": "0.00", "total_tokens": 0}
|
424 |
+
{"current_steps": 846, "total_steps": 3080, "loss": 0.2878, "learning_rate": 8.68225335647027e-06, "epoch": 0.5490832386824599, "percentage": 27.47, "elapsed_time": "4:58:12", "remaining_time": "13:07:27", "throughput": "0.00", "total_tokens": 0}
|
425 |
+
{"current_steps": 848, "total_steps": 3080, "loss": 0.3083, "learning_rate": 8.674981509576819e-06, "epoch": 0.5503813078046406, "percentage": 27.53, "elapsed_time": "4:58:50", "remaining_time": "13:06:34", "throughput": "0.00", "total_tokens": 0}
|
426 |
+
{"current_steps": 850, "total_steps": 3080, "loss": 0.2929, "learning_rate": 8.667692716726974e-06, "epoch": 0.5516793769268213, "percentage": 27.6, "elapsed_time": "4:59:28", "remaining_time": "13:05:41", "throughput": "0.00", "total_tokens": 0}
|
427 |
+
{"current_steps": 852, "total_steps": 3080, "loss": 0.292, "learning_rate": 8.66038701153058e-06, "epoch": 0.5529774460490021, "percentage": 27.66, "elapsed_time": "5:00:10", "remaining_time": "13:04:56", "throughput": "0.00", "total_tokens": 0}
|
428 |
+
{"current_steps": 854, "total_steps": 3080, "loss": 0.3135, "learning_rate": 8.65306442767547e-06, "epoch": 0.5542755151711829, "percentage": 27.73, "elapsed_time": "5:00:51", "remaining_time": "13:04:11", "throughput": "0.00", "total_tokens": 0}
|
429 |
+
{"current_steps": 856, "total_steps": 3080, "loss": 0.3071, "learning_rate": 8.64572499892731e-06, "epoch": 0.5555735842933637, "percentage": 27.79, "elapsed_time": "5:01:33", "remaining_time": "13:03:28", "throughput": "0.00", "total_tokens": 0}
|
430 |
+
{"current_steps": 858, "total_steps": 3080, "loss": 0.2722, "learning_rate": 8.638368759129433e-06, "epoch": 0.5568716534155443, "percentage": 27.86, "elapsed_time": "5:02:11", "remaining_time": "13:02:35", "throughput": "0.00", "total_tokens": 0}
|
431 |
+
{"current_steps": 860, "total_steps": 3080, "loss": 0.2919, "learning_rate": 8.630995742202695e-06, "epoch": 0.5581697225377251, "percentage": 27.92, "elapsed_time": "5:02:52", "remaining_time": "13:01:50", "throughput": "0.00", "total_tokens": 0}
|
432 |
+
{"current_steps": 862, "total_steps": 3080, "loss": 0.27, "learning_rate": 8.623605982145318e-06, "epoch": 0.5594677916599059, "percentage": 27.99, "elapsed_time": "5:03:35", "remaining_time": "13:01:10", "throughput": "0.00", "total_tokens": 0}
|
433 |
+
{"current_steps": 864, "total_steps": 3080, "loss": 0.2926, "learning_rate": 8.616199513032723e-06, "epoch": 0.5607658607820867, "percentage": 28.05, "elapsed_time": "5:04:15", "remaining_time": "13:00:21", "throughput": "0.00", "total_tokens": 0}
|
434 |
+
{"current_steps": 866, "total_steps": 3080, "loss": 0.2993, "learning_rate": 8.608776369017382e-06, "epoch": 0.5620639299042675, "percentage": 28.12, "elapsed_time": "5:04:56", "remaining_time": "12:59:37", "throughput": "0.00", "total_tokens": 0}
|
435 |
+
{"current_steps": 868, "total_steps": 3080, "loss": 0.2817, "learning_rate": 8.601336584328659e-06, "epoch": 0.5633619990264481, "percentage": 28.18, "elapsed_time": "5:05:37", "remaining_time": "12:58:50", "throughput": "0.00", "total_tokens": 0}
|
436 |
+
{"current_steps": 870, "total_steps": 3080, "loss": 0.3029, "learning_rate": 8.593880193272649e-06, "epoch": 0.5646600681486289, "percentage": 28.25, "elapsed_time": "5:06:19", "remaining_time": "12:58:07", "throughput": "0.00", "total_tokens": 0}
|
437 |
+
{"current_steps": 872, "total_steps": 3080, "loss": 0.3101, "learning_rate": 8.58640723023202e-06, "epoch": 0.5659581372708097, "percentage": 28.31, "elapsed_time": "5:06:59", "remaining_time": "12:57:21", "throughput": "0.00", "total_tokens": 0}
|
438 |
+
{"current_steps": 874, "total_steps": 3080, "loss": 0.2947, "learning_rate": 8.578917729665863e-06, "epoch": 0.5672562063929905, "percentage": 28.38, "elapsed_time": "5:07:39", "remaining_time": "12:56:33", "throughput": "0.00", "total_tokens": 0}
|
439 |
+
{"current_steps": 876, "total_steps": 3080, "loss": 0.2869, "learning_rate": 8.571411726109518e-06, "epoch": 0.5685542755151712, "percentage": 28.44, "elapsed_time": "5:08:18", "remaining_time": "12:55:42", "throughput": "0.00", "total_tokens": 0}
|
440 |
+
{"current_steps": 878, "total_steps": 3080, "loss": 0.3198, "learning_rate": 8.563889254174429e-06, "epoch": 0.5698523446373519, "percentage": 28.51, "elapsed_time": "5:09:01", "remaining_time": "12:55:01", "throughput": "0.00", "total_tokens": 0}
|
441 |
+
{"current_steps": 880, "total_steps": 3080, "loss": 0.3014, "learning_rate": 8.556350348547978e-06, "epoch": 0.5711504137595327, "percentage": 28.57, "elapsed_time": "5:09:40", "remaining_time": "12:54:10", "throughput": "0.00", "total_tokens": 0}
|
442 |
+
{"current_steps": 882, "total_steps": 3080, "loss": 0.302, "learning_rate": 8.548795043993316e-06, "epoch": 0.5724484828817135, "percentage": 28.64, "elapsed_time": "5:10:18", "remaining_time": "12:53:17", "throughput": "0.00", "total_tokens": 0}
|
443 |
+
{"current_steps": 884, "total_steps": 3080, "loss": 0.2879, "learning_rate": 8.54122337534923e-06, "epoch": 0.5737465520038942, "percentage": 28.7, "elapsed_time": "5:10:59", "remaining_time": "12:52:34", "throughput": "0.00", "total_tokens": 0}
|
444 |
+
{"current_steps": 886, "total_steps": 3080, "loss": 0.2969, "learning_rate": 8.533635377529949e-06, "epoch": 0.575044621126075, "percentage": 28.77, "elapsed_time": "5:11:41", "remaining_time": "12:51:51", "throughput": "0.00", "total_tokens": 0}
|
445 |
+
{"current_steps": 888, "total_steps": 3080, "loss": 0.3158, "learning_rate": 8.526031085525004e-06, "epoch": 0.5763426902482557, "percentage": 28.83, "elapsed_time": "5:12:18", "remaining_time": "12:50:54", "throughput": "0.00", "total_tokens": 0}
|
446 |
+
{"current_steps": 890, "total_steps": 3080, "loss": 0.2846, "learning_rate": 8.518410534399063e-06, "epoch": 0.5776407593704365, "percentage": 28.9, "elapsed_time": "5:12:58", "remaining_time": "12:50:08", "throughput": "0.00", "total_tokens": 0}
|
447 |
+
{"current_steps": 892, "total_steps": 3080, "loss": 0.2806, "learning_rate": 8.510773759291768e-06, "epoch": 0.5789388284926172, "percentage": 28.96, "elapsed_time": "5:13:36", "remaining_time": "12:49:14", "throughput": "0.00", "total_tokens": 0}
|
448 |
+
{"current_steps": 894, "total_steps": 3080, "loss": 0.2829, "learning_rate": 8.503120795417568e-06, "epoch": 0.580236897614798, "percentage": 29.03, "elapsed_time": "5:14:15", "remaining_time": "12:48:26", "throughput": "0.00", "total_tokens": 0}
|
449 |
+
{"current_steps": 896, "total_steps": 3080, "loss": 0.3033, "learning_rate": 8.495451678065563e-06, "epoch": 0.5815349667369788, "percentage": 29.09, "elapsed_time": "5:15:01", "remaining_time": "12:47:51", "throughput": "0.00", "total_tokens": 0}
|
450 |
+
{"current_steps": 898, "total_steps": 3080, "loss": 0.2656, "learning_rate": 8.487766442599339e-06, "epoch": 0.5828330358591595, "percentage": 29.16, "elapsed_time": "5:15:44", "remaining_time": "12:47:12", "throughput": "0.00", "total_tokens": 0}
|
451 |
+
{"current_steps": 900, "total_steps": 3080, "loss": 0.2883, "learning_rate": 8.48006512445681e-06, "epoch": 0.5841311049813402, "percentage": 29.22, "elapsed_time": "5:16:25", "remaining_time": "12:46:27", "throughput": "0.00", "total_tokens": 0}
|
452 |
+
{"current_steps": 902, "total_steps": 3080, "loss": 0.269, "learning_rate": 8.472347759150044e-06, "epoch": 0.585429174103521, "percentage": 29.29, "elapsed_time": "5:17:04", "remaining_time": "12:45:37", "throughput": "0.00", "total_tokens": 0}
|
453 |
+
{"current_steps": 904, "total_steps": 3080, "loss": 0.3032, "learning_rate": 8.464614382265107e-06, "epoch": 0.5867272432257018, "percentage": 29.35, "elapsed_time": "5:17:47", "remaining_time": "12:44:56", "throughput": "0.00", "total_tokens": 0}
|
454 |
+
{"current_steps": 906, "total_steps": 3080, "loss": 0.303, "learning_rate": 8.4568650294619e-06, "epoch": 0.5880253123478826, "percentage": 29.42, "elapsed_time": "5:18:24", "remaining_time": "12:44:01", "throughput": "0.00", "total_tokens": 0}
|
455 |
+
{"current_steps": 908, "total_steps": 3080, "loss": 0.3056, "learning_rate": 8.449099736473986e-06, "epoch": 0.5893233814700632, "percentage": 29.48, "elapsed_time": "5:19:01", "remaining_time": "12:43:07", "throughput": "0.00", "total_tokens": 0}
|
456 |
+
{"current_steps": 910, "total_steps": 3080, "loss": 0.3327, "learning_rate": 8.441318539108433e-06, "epoch": 0.590621450592244, "percentage": 29.55, "elapsed_time": "5:19:42", "remaining_time": "12:42:21", "throughput": "0.00", "total_tokens": 0}
|
457 |
+
{"current_steps": 912, "total_steps": 3080, "loss": 0.2803, "learning_rate": 8.433521473245653e-06, "epoch": 0.5919195197144248, "percentage": 29.61, "elapsed_time": "5:20:23", "remaining_time": "12:41:37", "throughput": "0.00", "total_tokens": 0}
|
458 |
+
{"current_steps": 914, "total_steps": 3080, "loss": 0.306, "learning_rate": 8.425708574839221e-06, "epoch": 0.5932175888366056, "percentage": 29.68, "elapsed_time": "5:21:02", "remaining_time": "12:40:49", "throughput": "0.00", "total_tokens": 0}
|
459 |
+
{"current_steps": 916, "total_steps": 3080, "loss": 0.2743, "learning_rate": 8.417879879915724e-06, "epoch": 0.5945156579587864, "percentage": 29.74, "elapsed_time": "5:21:38", "remaining_time": "12:39:51", "throughput": "0.00", "total_tokens": 0}
|
460 |
+
{"current_steps": 918, "total_steps": 3080, "loss": 0.3026, "learning_rate": 8.410035424574587e-06, "epoch": 0.595813727080967, "percentage": 29.81, "elapsed_time": "5:22:16", "remaining_time": "12:39:00", "throughput": "0.00", "total_tokens": 0}
|
461 |
+
{"current_steps": 920, "total_steps": 3080, "loss": 0.3008, "learning_rate": 8.40217524498791e-06, "epoch": 0.5971117962031478, "percentage": 29.87, "elapsed_time": "5:22:56", "remaining_time": "12:38:13", "throughput": "0.00", "total_tokens": 0}
|
462 |
+
{"current_steps": 922, "total_steps": 3080, "loss": 0.295, "learning_rate": 8.394299377400301e-06, "epoch": 0.5984098653253286, "percentage": 29.94, "elapsed_time": "5:23:39", "remaining_time": "12:37:32", "throughput": "0.00", "total_tokens": 0}
|
463 |
+
{"current_steps": 924, "total_steps": 3080, "loss": 0.277, "learning_rate": 8.386407858128707e-06, "epoch": 0.5997079344475094, "percentage": 30.0, "elapsed_time": "5:24:17", "remaining_time": "12:36:41", "throughput": "0.00", "total_tokens": 0}
|
464 |
+
{"current_steps": 926, "total_steps": 3080, "loss": 0.3059, "learning_rate": 8.378500723562243e-06, "epoch": 0.6010060035696901, "percentage": 30.06, "elapsed_time": "5:24:58", "remaining_time": "12:35:56", "throughput": "0.00", "total_tokens": 0}
|
465 |
+
{"current_steps": 928, "total_steps": 3080, "loss": 0.2849, "learning_rate": 8.370578010162043e-06, "epoch": 0.6023040726918708, "percentage": 30.13, "elapsed_time": "5:25:38", "remaining_time": "12:35:08", "throughput": "0.00", "total_tokens": 0}
|
466 |
+
{"current_steps": 930, "total_steps": 3080, "loss": 0.2743, "learning_rate": 8.362639754461057e-06, "epoch": 0.6036021418140516, "percentage": 30.19, "elapsed_time": "5:26:19", "remaining_time": "12:34:25", "throughput": "0.00", "total_tokens": 0}
|
467 |
+
{"current_steps": 932, "total_steps": 3080, "loss": 0.3138, "learning_rate": 8.354685993063923e-06, "epoch": 0.6049002109362324, "percentage": 30.26, "elapsed_time": "5:27:02", "remaining_time": "12:33:43", "throughput": "0.00", "total_tokens": 0}
|
468 |
+
{"current_steps": 934, "total_steps": 3080, "loss": 0.2959, "learning_rate": 8.346716762646763e-06, "epoch": 0.6061982800584131, "percentage": 30.32, "elapsed_time": "5:27:38", "remaining_time": "12:32:49", "throughput": "0.00", "total_tokens": 0}
|
469 |
+
{"current_steps": 936, "total_steps": 3080, "loss": 0.2785, "learning_rate": 8.338732099957038e-06, "epoch": 0.6074963491805939, "percentage": 30.39, "elapsed_time": "5:28:20", "remaining_time": "12:32:05", "throughput": "0.00", "total_tokens": 0}
|
470 |
+
{"current_steps": 938, "total_steps": 3080, "loss": 0.2939, "learning_rate": 8.330732041813367e-06, "epoch": 0.6087944183027746, "percentage": 30.45, "elapsed_time": "5:28:57", "remaining_time": "12:31:12", "throughput": "0.00", "total_tokens": 0}
|
471 |
+
{"current_steps": 940, "total_steps": 3080, "loss": 0.2649, "learning_rate": 8.322716625105363e-06, "epoch": 0.6100924874249554, "percentage": 30.52, "elapsed_time": "5:29:34", "remaining_time": "12:30:19", "throughput": "0.00", "total_tokens": 0}
|
472 |
+
{"current_steps": 942, "total_steps": 3080, "loss": 0.2782, "learning_rate": 8.314685886793456e-06, "epoch": 0.6113905565471361, "percentage": 30.58, "elapsed_time": "5:30:10", "remaining_time": "12:29:22", "throughput": "0.00", "total_tokens": 0}
|
473 |
+
{"current_steps": 944, "total_steps": 3080, "loss": 0.2938, "learning_rate": 8.306639863908725e-06, "epoch": 0.6126886256693169, "percentage": 30.65, "elapsed_time": "5:30:51", "remaining_time": "12:28:37", "throughput": "0.00", "total_tokens": 0}
|
474 |
+
{"current_steps": 946, "total_steps": 3080, "loss": 0.2734, "learning_rate": 8.298578593552737e-06, "epoch": 0.6139866947914977, "percentage": 30.71, "elapsed_time": "5:31:31", "remaining_time": "12:27:52", "throughput": "0.00", "total_tokens": 0}
|
475 |
+
{"current_steps": 948, "total_steps": 3080, "loss": 0.3051, "learning_rate": 8.290502112897357e-06, "epoch": 0.6152847639136784, "percentage": 30.78, "elapsed_time": "5:32:17", "remaining_time": "12:27:18", "throughput": "0.00", "total_tokens": 0}
|
476 |
+
{"current_steps": 950, "total_steps": 3080, "loss": 0.2907, "learning_rate": 8.282410459184597e-06, "epoch": 0.6165828330358591, "percentage": 30.84, "elapsed_time": "5:32:58", "remaining_time": "12:26:33", "throughput": "0.00", "total_tokens": 0}
|
477 |
+
{"current_steps": 952, "total_steps": 3080, "loss": 0.2764, "learning_rate": 8.274303669726427e-06, "epoch": 0.6178809021580399, "percentage": 30.91, "elapsed_time": "5:33:36", "remaining_time": "12:25:43", "throughput": "0.00", "total_tokens": 0}
|
478 |
+
{"current_steps": 954, "total_steps": 3080, "loss": 0.3078, "learning_rate": 8.266181781904613e-06, "epoch": 0.6191789712802207, "percentage": 30.97, "elapsed_time": "5:34:17", "remaining_time": "12:24:57", "throughput": "0.00", "total_tokens": 0}
|
479 |
+
{"current_steps": 956, "total_steps": 3080, "loss": 0.2884, "learning_rate": 8.258044833170545e-06, "epoch": 0.6204770404024015, "percentage": 31.04, "elapsed_time": "5:34:57", "remaining_time": "12:24:11", "throughput": "0.00", "total_tokens": 0}
|
480 |
+
{"current_steps": 958, "total_steps": 3080, "loss": 0.32, "learning_rate": 8.24989286104506e-06, "epoch": 0.6217751095245821, "percentage": 31.1, "elapsed_time": "5:35:36", "remaining_time": "12:23:23", "throughput": "0.00", "total_tokens": 0}
|
481 |
+
{"current_steps": 960, "total_steps": 3080, "loss": 0.2941, "learning_rate": 8.241725903118264e-06, "epoch": 0.6230731786467629, "percentage": 31.17, "elapsed_time": "5:36:15", "remaining_time": "12:22:33", "throughput": "0.00", "total_tokens": 0}
|
482 |
+
{"current_steps": 962, "total_steps": 3080, "loss": 0.2909, "learning_rate": 8.233543997049376e-06, "epoch": 0.6243712477689437, "percentage": 31.23, "elapsed_time": "5:36:59", "remaining_time": "12:21:55", "throughput": "0.00", "total_tokens": 0}
|
483 |
+
{"current_steps": 964, "total_steps": 3080, "loss": 0.2879, "learning_rate": 8.225347180566534e-06, "epoch": 0.6256693168911245, "percentage": 31.3, "elapsed_time": "5:37:42", "remaining_time": "12:21:16", "throughput": "0.00", "total_tokens": 0}
|
484 |
+
{"current_steps": 966, "total_steps": 3080, "loss": 0.2916, "learning_rate": 8.217135491466636e-06, "epoch": 0.6269673860133053, "percentage": 31.36, "elapsed_time": "5:38:20", "remaining_time": "12:20:25", "throughput": "0.00", "total_tokens": 0}
|
485 |
+
{"current_steps": 968, "total_steps": 3080, "loss": 0.2712, "learning_rate": 8.208908967615159e-06, "epoch": 0.6282654551354859, "percentage": 31.43, "elapsed_time": "5:39:01", "remaining_time": "12:19:41", "throughput": "0.00", "total_tokens": 0}
|
486 |
+
{"current_steps": 970, "total_steps": 3080, "loss": 0.289, "learning_rate": 8.200667646945983e-06, "epoch": 0.6295635242576667, "percentage": 31.49, "elapsed_time": "5:39:45", "remaining_time": "12:19:04", "throughput": "0.00", "total_tokens": 0}
|
487 |
+
{"current_steps": 972, "total_steps": 3080, "loss": 0.3077, "learning_rate": 8.192411567461222e-06, "epoch": 0.6308615933798475, "percentage": 31.56, "elapsed_time": "5:40:34", "remaining_time": "12:18:36", "throughput": "0.00", "total_tokens": 0}
|
488 |
+
{"current_steps": 974, "total_steps": 3080, "loss": 0.2742, "learning_rate": 8.184140767231044e-06, "epoch": 0.6321596625020283, "percentage": 31.62, "elapsed_time": "5:41:13", "remaining_time": "12:17:48", "throughput": "0.00", "total_tokens": 0}
|
489 |
+
{"current_steps": 976, "total_steps": 3080, "loss": 0.266, "learning_rate": 8.175855284393495e-06, "epoch": 0.633457731624209, "percentage": 31.69, "elapsed_time": "5:41:53", "remaining_time": "12:17:01", "throughput": "0.00", "total_tokens": 0}
|
490 |
+
{"current_steps": 978, "total_steps": 3080, "loss": 0.2756, "learning_rate": 8.167555157154327e-06, "epoch": 0.6347558007463897, "percentage": 31.75, "elapsed_time": "5:42:30", "remaining_time": "12:16:09", "throughput": "0.00", "total_tokens": 0}
|
491 |
+
{"current_steps": 980, "total_steps": 3080, "loss": 0.289, "learning_rate": 8.15924042378682e-06, "epoch": 0.6360538698685705, "percentage": 31.82, "elapsed_time": "5:43:08", "remaining_time": "12:15:17", "throughput": "0.00", "total_tokens": 0}
|
492 |
+
{"current_steps": 982, "total_steps": 3080, "loss": 0.302, "learning_rate": 8.150911122631606e-06, "epoch": 0.6373519389907513, "percentage": 31.88, "elapsed_time": "5:43:48", "remaining_time": "12:14:32", "throughput": "0.00", "total_tokens": 0}
|
493 |
+
{"current_steps": 984, "total_steps": 3080, "loss": 0.2831, "learning_rate": 8.142567292096488e-06, "epoch": 0.638650008112932, "percentage": 31.95, "elapsed_time": "5:44:36", "remaining_time": "12:14:02", "throughput": "0.00", "total_tokens": 0}
|
494 |
+
{"current_steps": 986, "total_steps": 3080, "loss": 0.2893, "learning_rate": 8.13420897065627e-06, "epoch": 0.6399480772351128, "percentage": 32.01, "elapsed_time": "5:45:21", "remaining_time": "12:13:26", "throughput": "0.00", "total_tokens": 0}
|
495 |
+
{"current_steps": 988, "total_steps": 3080, "loss": 0.3004, "learning_rate": 8.125836196852577e-06, "epoch": 0.6412461463572935, "percentage": 32.08, "elapsed_time": "5:46:10", "remaining_time": "12:12:58", "throughput": "0.00", "total_tokens": 0}
|
496 |
+
{"current_steps": 990, "total_steps": 3080, "loss": 0.3045, "learning_rate": 8.117449009293668e-06, "epoch": 0.6425442154794743, "percentage": 32.14, "elapsed_time": "5:46:51", "remaining_time": "12:12:14", "throughput": "0.00", "total_tokens": 0}
|
497 |
+
{"current_steps": 992, "total_steps": 3080, "loss": 0.2833, "learning_rate": 8.109047446654276e-06, "epoch": 0.643842284601655, "percentage": 32.21, "elapsed_time": "5:47:31", "remaining_time": "12:11:28", "throughput": "0.00", "total_tokens": 0}
|
498 |
+
{"current_steps": 994, "total_steps": 3080, "loss": 0.269, "learning_rate": 8.100631547675417e-06, "epoch": 0.6451403537238358, "percentage": 32.27, "elapsed_time": "5:48:12", "remaining_time": "12:10:45", "throughput": "0.00", "total_tokens": 0}
|
499 |
+
{"current_steps": 996, "total_steps": 3080, "loss": 0.3091, "learning_rate": 8.092201351164213e-06, "epoch": 0.6464384228460166, "percentage": 32.34, "elapsed_time": "5:48:57", "remaining_time": "12:10:09", "throughput": "0.00", "total_tokens": 0}
|
500 |
+
{"current_steps": 998, "total_steps": 3080, "loss": 0.2857, "learning_rate": 8.083756895993712e-06, "epoch": 0.6477364919681973, "percentage": 32.4, "elapsed_time": "5:49:38", "remaining_time": "12:09:24", "throughput": "0.00", "total_tokens": 0}
|
501 |
+
{"current_steps": 1000, "total_steps": 3080, "loss": 0.3026, "learning_rate": 8.075298221102714e-06, "epoch": 0.649034561090378, "percentage": 32.47, "elapsed_time": "5:50:17", "remaining_time": "12:08:37", "throughput": "0.00", "total_tokens": 0}
|
502 |
+
{"current_steps": 1000, "total_steps": 3080, "eval_loss": 0.29006972908973694, "epoch": 0.649034561090378, "percentage": 32.47, "elapsed_time": "6:04:06", "remaining_time": "12:37:21", "throughput": "0.00", "total_tokens": 0}
|
503 |
+
{"current_steps": 1002, "total_steps": 3080, "loss": 0.2813, "learning_rate": 8.066825365495591e-06, "epoch": 0.6503326302125588, "percentage": 32.53, "elapsed_time": "6:04:48", "remaining_time": "12:36:32", "throughput": "0.00", "total_tokens": 0}
|
504 |
+
{"current_steps": 1004, "total_steps": 3080, "loss": 0.2702, "learning_rate": 8.058338368242103e-06, "epoch": 0.6516306993347396, "percentage": 32.6, "elapsed_time": "6:05:28", "remaining_time": "12:35:42", "throughput": "0.00", "total_tokens": 0}
|
505 |
+
{"current_steps": 1006, "total_steps": 3080, "loss": 0.297, "learning_rate": 8.049837268477213e-06, "epoch": 0.6529287684569204, "percentage": 32.66, "elapsed_time": "6:06:04", "remaining_time": "12:34:43", "throughput": "0.00", "total_tokens": 0}
|
506 |
+
{"current_steps": 1008, "total_steps": 3080, "loss": 0.305, "learning_rate": 8.041322105400923e-06, "epoch": 0.654226837579101, "percentage": 32.73, "elapsed_time": "6:06:40", "remaining_time": "12:33:44", "throughput": "0.00", "total_tokens": 0}
|
507 |
+
{"current_steps": 1010, "total_steps": 3080, "loss": 0.2955, "learning_rate": 8.032792918278076e-06, "epoch": 0.6555249067012818, "percentage": 32.79, "elapsed_time": "6:07:18", "remaining_time": "12:32:48", "throughput": "0.00", "total_tokens": 0}
|
508 |
+
{"current_steps": 1012, "total_steps": 3080, "loss": 0.303, "learning_rate": 8.024249746438189e-06, "epoch": 0.6568229758234626, "percentage": 32.86, "elapsed_time": "6:07:55", "remaining_time": "12:31:50", "throughput": "0.00", "total_tokens": 0}
|
509 |
+
{"current_steps": 1014, "total_steps": 3080, "loss": 0.2898, "learning_rate": 8.015692629275256e-06, "epoch": 0.6581210449456434, "percentage": 32.92, "elapsed_time": "6:08:39", "remaining_time": "12:31:07", "throughput": "0.00", "total_tokens": 0}
|
510 |
+
{"current_steps": 1016, "total_steps": 3080, "loss": 0.2811, "learning_rate": 8.007121606247583e-06, "epoch": 0.6594191140678242, "percentage": 32.99, "elapsed_time": "6:09:18", "remaining_time": "12:30:15", "throughput": "0.00", "total_tokens": 0}
|
511 |
+
{"current_steps": 1018, "total_steps": 3080, "loss": 0.2968, "learning_rate": 7.998536716877593e-06, "epoch": 0.6607171831900048, "percentage": 33.05, "elapsed_time": "6:09:56", "remaining_time": "12:29:19", "throughput": "0.00", "total_tokens": 0}
|
512 |
+
{"current_steps": 1020, "total_steps": 3080, "loss": 0.3056, "learning_rate": 7.989938000751655e-06, "epoch": 0.6620152523121856, "percentage": 33.12, "elapsed_time": "6:10:37", "remaining_time": "12:28:31", "throughput": "0.00", "total_tokens": 0}
|
513 |
+
{"current_steps": 1022, "total_steps": 3080, "loss": 0.2927, "learning_rate": 7.981325497519892e-06, "epoch": 0.6633133214343664, "percentage": 33.18, "elapsed_time": "6:11:16", "remaining_time": "12:27:38", "throughput": "0.00", "total_tokens": 0}
|
514 |
+
{"current_steps": 1024, "total_steps": 3080, "loss": 0.2725, "learning_rate": 7.972699246895996e-06, "epoch": 0.6646113905565472, "percentage": 33.25, "elapsed_time": "6:11:54", "remaining_time": "12:26:43", "throughput": "0.00", "total_tokens": 0}
|
515 |
+
{"current_steps": 1026, "total_steps": 3080, "loss": 0.2711, "learning_rate": 7.964059288657061e-06, "epoch": 0.6659094596787279, "percentage": 33.31, "elapsed_time": "6:12:35", "remaining_time": "12:25:54", "throughput": "0.00", "total_tokens": 0}
|
516 |
+
{"current_steps": 1028, "total_steps": 3080, "loss": 0.3139, "learning_rate": 7.955405662643384e-06, "epoch": 0.6672075288009086, "percentage": 33.38, "elapsed_time": "6:13:15", "remaining_time": "12:25:02", "throughput": "0.00", "total_tokens": 0}
|
517 |
+
{"current_steps": 1030, "total_steps": 3080, "loss": 0.2636, "learning_rate": 7.946738408758283e-06, "epoch": 0.6685055979230894, "percentage": 33.44, "elapsed_time": "6:13:59", "remaining_time": "12:24:20", "throughput": "0.00", "total_tokens": 0}
|
518 |
+
{"current_steps": 1032, "total_steps": 3080, "loss": 0.3031, "learning_rate": 7.938057566967926e-06, "epoch": 0.6698036670452702, "percentage": 33.51, "elapsed_time": "6:14:40", "remaining_time": "12:23:32", "throughput": "0.00", "total_tokens": 0}
|
519 |
+
{"current_steps": 1034, "total_steps": 3080, "loss": 0.2623, "learning_rate": 7.929363177301124e-06, "epoch": 0.671101736167451, "percentage": 33.57, "elapsed_time": "6:15:18", "remaining_time": "12:22:38", "throughput": "0.00", "total_tokens": 0}
|
520 |
+
{"current_steps": 1036, "total_steps": 3080, "loss": 0.2624, "learning_rate": 7.920655279849173e-06, "epoch": 0.6723998052896317, "percentage": 33.64, "elapsed_time": "6:15:59", "remaining_time": "12:21:50", "throughput": "0.00", "total_tokens": 0}
|
521 |
+
{"current_steps": 1038, "total_steps": 3080, "loss": 0.2634, "learning_rate": 7.911933914765645e-06, "epoch": 0.6736978744118124, "percentage": 33.7, "elapsed_time": "6:16:40", "remaining_time": "12:21:00", "throughput": "0.00", "total_tokens": 0}
|
522 |
+
{"current_steps": 1040, "total_steps": 3080, "loss": 0.2734, "learning_rate": 7.90319912226622e-06, "epoch": 0.6749959435339932, "percentage": 33.77, "elapsed_time": "6:17:18", "remaining_time": "12:20:06", "throughput": "0.00", "total_tokens": 0}
|
523 |
+
{"current_steps": 1042, "total_steps": 3080, "loss": 0.3134, "learning_rate": 7.894450942628491e-06, "epoch": 0.676294012656174, "percentage": 33.83, "elapsed_time": "6:18:01", "remaining_time": "12:19:21", "throughput": "0.00", "total_tokens": 0}
|
524 |
+
{"current_steps": 1044, "total_steps": 3080, "loss": 0.3776, "learning_rate": 7.885689416191785e-06, "epoch": 0.6775920817783547, "percentage": 33.9, "elapsed_time": "6:18:44", "remaining_time": "12:18:37", "throughput": "0.00", "total_tokens": 0}
|
525 |
+
{"current_steps": 1046, "total_steps": 3080, "loss": 0.2825, "learning_rate": 7.876914583356965e-06, "epoch": 0.6788901509005355, "percentage": 33.96, "elapsed_time": "6:19:25", "remaining_time": "12:17:48", "throughput": "0.00", "total_tokens": 0}
|
526 |
+
{"current_steps": 1048, "total_steps": 3080, "loss": 0.2897, "learning_rate": 7.868126484586261e-06, "epoch": 0.6801882200227162, "percentage": 34.03, "elapsed_time": "6:20:05", "remaining_time": "12:16:57", "throughput": "0.00", "total_tokens": 0}
|
527 |
+
{"current_steps": 1050, "total_steps": 3080, "loss": 0.304, "learning_rate": 7.859325160403073e-06, "epoch": 0.681486289144897, "percentage": 34.09, "elapsed_time": "6:20:44", "remaining_time": "12:16:06", "throughput": "0.00", "total_tokens": 0}
|
528 |
+
{"current_steps": 1052, "total_steps": 3080, "loss": 0.3055, "learning_rate": 7.850510651391778e-06, "epoch": 0.6827843582670777, "percentage": 34.16, "elapsed_time": "6:21:24", "remaining_time": "12:15:16", "throughput": "0.00", "total_tokens": 0}
|
529 |
+
{"current_steps": 1054, "total_steps": 3080, "loss": 0.2995, "learning_rate": 7.841682998197561e-06, "epoch": 0.6840824273892585, "percentage": 34.22, "elapsed_time": "6:22:06", "remaining_time": "12:14:28", "throughput": "0.00", "total_tokens": 0}
|
530 |
+
{"current_steps": 1056, "total_steps": 3080, "loss": 0.2953, "learning_rate": 7.832842241526212e-06, "epoch": 0.6853804965114393, "percentage": 34.29, "elapsed_time": "6:22:46", "remaining_time": "12:13:38", "throughput": "0.00", "total_tokens": 0}
|
531 |
+
{"current_steps": 1058, "total_steps": 3080, "loss": 0.2902, "learning_rate": 7.823988422143942e-06, "epoch": 0.68667856563362, "percentage": 34.35, "elapsed_time": "6:23:24", "remaining_time": "12:12:45", "throughput": "0.00", "total_tokens": 0}
|
532 |
+
{"current_steps": 1060, "total_steps": 3080, "loss": 0.312, "learning_rate": 7.815121580877197e-06, "epoch": 0.6879766347558007, "percentage": 34.42, "elapsed_time": "6:24:01", "remaining_time": "12:11:49", "throughput": "0.00", "total_tokens": 0}
|
533 |
+
{"current_steps": 1062, "total_steps": 3080, "loss": 0.2721, "learning_rate": 7.806241758612471e-06, "epoch": 0.6892747038779815, "percentage": 34.48, "elapsed_time": "6:24:43", "remaining_time": "12:11:02", "throughput": "0.00", "total_tokens": 0}
|
534 |
+
{"current_steps": 1064, "total_steps": 3080, "loss": 0.3408, "learning_rate": 7.797348996296116e-06, "epoch": 0.6905727730001623, "percentage": 34.55, "elapsed_time": "6:25:28", "remaining_time": "12:10:21", "throughput": "0.00", "total_tokens": 0}
|
535 |
+
{"current_steps": 1066, "total_steps": 3080, "loss": 0.3036, "learning_rate": 7.788443334934148e-06, "epoch": 0.6918708421223431, "percentage": 34.61, "elapsed_time": "6:26:09", "remaining_time": "12:09:34", "throughput": "0.00", "total_tokens": 0}
|
536 |
+
{"current_steps": 1068, "total_steps": 3080, "loss": 0.2832, "learning_rate": 7.779524815592068e-06, "epoch": 0.6931689112445237, "percentage": 34.68, "elapsed_time": "6:26:48", "remaining_time": "12:08:42", "throughput": "0.00", "total_tokens": 0}
|
537 |
+
{"current_steps": 1070, "total_steps": 3080, "loss": 0.2845, "learning_rate": 7.770593479394664e-06, "epoch": 0.6944669803667045, "percentage": 34.74, "elapsed_time": "6:27:32", "remaining_time": "12:07:59", "throughput": "0.00", "total_tokens": 0}
|
538 |
+
{"current_steps": 1072, "total_steps": 3080, "loss": 0.2725, "learning_rate": 7.761649367525828e-06, "epoch": 0.6957650494888853, "percentage": 34.81, "elapsed_time": "6:28:16", "remaining_time": "12:07:18", "throughput": "0.00", "total_tokens": 0}
|
539 |
+
{"current_steps": 1074, "total_steps": 3080, "loss": 0.2757, "learning_rate": 7.752692521228357e-06, "epoch": 0.6970631186110661, "percentage": 34.87, "elapsed_time": "6:29:00", "remaining_time": "12:06:35", "throughput": "0.00", "total_tokens": 0}
|
540 |
+
{"current_steps": 1076, "total_steps": 3080, "loss": 0.2983, "learning_rate": 7.743722981803777e-06, "epoch": 0.6983611877332468, "percentage": 34.94, "elapsed_time": "6:29:37", "remaining_time": "12:05:39", "throughput": "0.00", "total_tokens": 0}
|
541 |
+
{"current_steps": 1078, "total_steps": 3080, "loss": 0.2903, "learning_rate": 7.734740790612137e-06, "epoch": 0.6996592568554275, "percentage": 35.0, "elapsed_time": "6:30:15", "remaining_time": "12:04:46", "throughput": "0.00", "total_tokens": 0}
|
542 |
+
{"current_steps": 1080, "total_steps": 3080, "loss": 0.265, "learning_rate": 7.725745989071825e-06, "epoch": 0.7009573259776083, "percentage": 35.06, "elapsed_time": "6:30:59", "remaining_time": "12:04:03", "throughput": "0.00", "total_tokens": 0}
|
543 |
+
{"current_steps": 1082, "total_steps": 3080, "loss": 0.3073, "learning_rate": 7.716738618659382e-06, "epoch": 0.7022553950997891, "percentage": 35.13, "elapsed_time": "6:31:44", "remaining_time": "12:03:23", "throughput": "0.00", "total_tokens": 0}
|
544 |
+
{"current_steps": 1084, "total_steps": 3080, "loss": 0.3093, "learning_rate": 7.707718720909308e-06, "epoch": 0.7035534642219698, "percentage": 35.19, "elapsed_time": "6:32:24", "remaining_time": "12:02:33", "throughput": "0.00", "total_tokens": 0}
|
545 |
+
{"current_steps": 1086, "total_steps": 3080, "loss": 0.2929, "learning_rate": 7.698686337413862e-06, "epoch": 0.7048515333441506, "percentage": 35.26, "elapsed_time": "6:33:06", "remaining_time": "12:01:47", "throughput": "0.00", "total_tokens": 0}
|
546 |
+
{"current_steps": 1088, "total_steps": 3080, "loss": 0.2523, "learning_rate": 7.689641509822879e-06, "epoch": 0.7061496024663313, "percentage": 35.32, "elapsed_time": "6:33:46", "remaining_time": "12:00:57", "throughput": "0.00", "total_tokens": 0}
|
547 |
+
{"current_steps": 1090, "total_steps": 3080, "loss": 0.2905, "learning_rate": 7.680584279843579e-06, "epoch": 0.7074476715885121, "percentage": 35.39, "elapsed_time": "6:34:29", "remaining_time": "12:00:13", "throughput": "0.00", "total_tokens": 0}
|
548 |
+
{"current_steps": 1092, "total_steps": 3080, "loss": 0.2932, "learning_rate": 7.671514689240366e-06, "epoch": 0.7087457407106929, "percentage": 35.45, "elapsed_time": "6:35:11", "remaining_time": "11:59:27", "throughput": "0.00", "total_tokens": 0}
|
549 |
+
{"current_steps": 1094, "total_steps": 3080, "loss": 0.2846, "learning_rate": 7.662432779834648e-06, "epoch": 0.7100438098328736, "percentage": 35.52, "elapsed_time": "6:35:50", "remaining_time": "11:58:35", "throughput": "0.00", "total_tokens": 0}
|
550 |
+
{"current_steps": 1096, "total_steps": 3080, "loss": 0.2658, "learning_rate": 7.653338593504632e-06, "epoch": 0.7113418789550544, "percentage": 35.58, "elapsed_time": "6:36:29", "remaining_time": "11:57:44", "throughput": "0.00", "total_tokens": 0}
|
551 |
+
{"current_steps": 1098, "total_steps": 3080, "loss": 0.3104, "learning_rate": 7.644232172185133e-06, "epoch": 0.7126399480772351, "percentage": 35.65, "elapsed_time": "6:37:05", "remaining_time": "11:56:48", "throughput": "0.00", "total_tokens": 0}
|
552 |
+
{"current_steps": 1100, "total_steps": 3080, "loss": 0.2621, "learning_rate": 7.635113557867395e-06, "epoch": 0.7139380171994159, "percentage": 35.71, "elapsed_time": "6:37:44", "remaining_time": "11:55:56", "throughput": "0.00", "total_tokens": 0}
|
553 |
+
{"current_steps": 1102, "total_steps": 3080, "loss": 0.3384, "learning_rate": 7.625982792598874e-06, "epoch": 0.7152360863215966, "percentage": 35.78, "elapsed_time": "6:38:28", "remaining_time": "11:55:13", "throughput": "0.00", "total_tokens": 0}
|
554 |
+
{"current_steps": 1104, "total_steps": 3080, "loss": 0.2787, "learning_rate": 7.616839918483061e-06, "epoch": 0.7165341554437774, "percentage": 35.84, "elapsed_time": "6:39:06", "remaining_time": "11:54:21", "throughput": "0.00", "total_tokens": 0}
|
555 |
+
{"current_steps": 1106, "total_steps": 3080, "loss": 0.2877, "learning_rate": 7.607684977679284e-06, "epoch": 0.7178322245659582, "percentage": 35.91, "elapsed_time": "6:39:50", "remaining_time": "11:53:38", "throughput": "0.00", "total_tokens": 0}
|
556 |
+
{"current_steps": 1108, "total_steps": 3080, "loss": 0.2774, "learning_rate": 7.598518012402509e-06, "epoch": 0.7191302936881389, "percentage": 35.97, "elapsed_time": "6:40:31", "remaining_time": "11:52:50", "throughput": "0.00", "total_tokens": 0}
|
557 |
+
{"current_steps": 1110, "total_steps": 3080, "loss": 0.2635, "learning_rate": 7.5893390649231555e-06, "epoch": 0.7204283628103196, "percentage": 36.04, "elapsed_time": "6:41:14", "remaining_time": "11:52:06", "throughput": "0.00", "total_tokens": 0}
|
558 |
+
{"current_steps": 1112, "total_steps": 3080, "loss": 0.2878, "learning_rate": 7.580148177566886e-06, "epoch": 0.7217264319325004, "percentage": 36.1, "elapsed_time": "6:41:52", "remaining_time": "11:51:13", "throughput": "0.00", "total_tokens": 0}
|
559 |
+
{"current_steps": 1114, "total_steps": 3080, "loss": 0.3085, "learning_rate": 7.5709453927144275e-06, "epoch": 0.7230245010546812, "percentage": 36.17, "elapsed_time": "6:42:33", "remaining_time": "11:50:26", "throughput": "0.00", "total_tokens": 0}
|
560 |
+
{"current_steps": 1116, "total_steps": 3080, "loss": 0.2655, "learning_rate": 7.561730752801364e-06, "epoch": 0.724322570176862, "percentage": 36.23, "elapsed_time": "6:43:12", "remaining_time": "11:49:35", "throughput": "0.00", "total_tokens": 0}
|
561 |
+
{"current_steps": 1118, "total_steps": 3080, "loss": 0.3159, "learning_rate": 7.552504300317944e-06, "epoch": 0.7256206392990426, "percentage": 36.3, "elapsed_time": "6:43:51", "remaining_time": "11:48:43", "throughput": "0.00", "total_tokens": 0}
|
562 |
+
{"current_steps": 1120, "total_steps": 3080, "loss": 0.2947, "learning_rate": 7.543266077808893e-06, "epoch": 0.7269187084212234, "percentage": 36.36, "elapsed_time": "6:44:30", "remaining_time": "11:47:53", "throughput": "0.00", "total_tokens": 0}
|
563 |
+
{"current_steps": 1122, "total_steps": 3080, "loss": 0.2815, "learning_rate": 7.5340161278732e-06, "epoch": 0.7282167775434042, "percentage": 36.43, "elapsed_time": "6:45:09", "remaining_time": "11:47:01", "throughput": "0.00", "total_tokens": 0}
|
564 |
+
{"current_steps": 1124, "total_steps": 3080, "loss": 0.2753, "learning_rate": 7.524754493163939e-06, "epoch": 0.729514846665585, "percentage": 36.49, "elapsed_time": "6:45:49", "remaining_time": "11:46:14", "throughput": "0.00", "total_tokens": 0}
|
565 |
+
{"current_steps": 1126, "total_steps": 3080, "loss": 0.2803, "learning_rate": 7.515481216388063e-06, "epoch": 0.7308129157877657, "percentage": 36.56, "elapsed_time": "6:46:34", "remaining_time": "11:45:33", "throughput": "0.00", "total_tokens": 0}
|
566 |
+
{"current_steps": 1128, "total_steps": 3080, "loss": 0.3105, "learning_rate": 7.506196340306204e-06, "epoch": 0.7321109849099464, "percentage": 36.62, "elapsed_time": "6:47:17", "remaining_time": "11:44:49", "throughput": "0.00", "total_tokens": 0}
|
567 |
+
{"current_steps": 1130, "total_steps": 3080, "loss": 0.2957, "learning_rate": 7.496899907732485e-06, "epoch": 0.7334090540321272, "percentage": 36.69, "elapsed_time": "6:47:55", "remaining_time": "11:43:55", "throughput": "0.00", "total_tokens": 0}
|
568 |
+
{"current_steps": 1132, "total_steps": 3080, "loss": 0.3114, "learning_rate": 7.487591961534319e-06, "epoch": 0.734707123154308, "percentage": 36.75, "elapsed_time": "6:48:33", "remaining_time": "11:43:04", "throughput": "0.00", "total_tokens": 0}
|
569 |
+
{"current_steps": 1134, "total_steps": 3080, "loss": 0.3076, "learning_rate": 7.478272544632204e-06, "epoch": 0.7360051922764888, "percentage": 36.82, "elapsed_time": "6:49:13", "remaining_time": "11:42:15", "throughput": "0.00", "total_tokens": 0}
|
570 |
+
{"current_steps": 1136, "total_steps": 3080, "loss": 0.2903, "learning_rate": 7.468941699999535e-06, "epoch": 0.7373032613986695, "percentage": 36.88, "elapsed_time": "6:49:53", "remaining_time": "11:41:26", "throughput": "0.00", "total_tokens": 0}
|
571 |
+
{"current_steps": 1138, "total_steps": 3080, "loss": 0.2498, "learning_rate": 7.4595994706624065e-06, "epoch": 0.7386013305208502, "percentage": 36.95, "elapsed_time": "6:50:36", "remaining_time": "11:40:42", "throughput": "0.00", "total_tokens": 0}
|
572 |
+
{"current_steps": 1140, "total_steps": 3080, "loss": 0.2836, "learning_rate": 7.450245899699401e-06, "epoch": 0.739899399643031, "percentage": 37.01, "elapsed_time": "6:51:15", "remaining_time": "11:39:52", "throughput": "0.00", "total_tokens": 0}
|
573 |
+
{"current_steps": 1142, "total_steps": 3080, "loss": 0.2799, "learning_rate": 7.440881030241407e-06, "epoch": 0.7411974687652118, "percentage": 37.08, "elapsed_time": "6:51:56", "remaining_time": "11:39:05", "throughput": "0.00", "total_tokens": 0}
|
574 |
+
{"current_steps": 1144, "total_steps": 3080, "loss": 0.2966, "learning_rate": 7.431504905471407e-06, "epoch": 0.7424955378873925, "percentage": 37.14, "elapsed_time": "6:52:39", "remaining_time": "11:38:21", "throughput": "0.00", "total_tokens": 0}
|
575 |
+
{"current_steps": 1146, "total_steps": 3080, "loss": 0.3034, "learning_rate": 7.422117568624288e-06, "epoch": 0.7437936070095733, "percentage": 37.21, "elapsed_time": "6:53:16", "remaining_time": "11:37:27", "throughput": "0.00", "total_tokens": 0}
|
576 |
+
{"current_steps": 1148, "total_steps": 3080, "loss": 0.2795, "learning_rate": 7.412719062986632e-06, "epoch": 0.745091676131754, "percentage": 37.27, "elapsed_time": "6:53:59", "remaining_time": "11:36:43", "throughput": "0.00", "total_tokens": 0}
|
577 |
+
{"current_steps": 1150, "total_steps": 3080, "loss": 0.2872, "learning_rate": 7.403309431896528e-06, "epoch": 0.7463897452539348, "percentage": 37.34, "elapsed_time": "6:54:37", "remaining_time": "11:35:51", "throughput": "0.00", "total_tokens": 0}
|
578 |
+
{"current_steps": 1152, "total_steps": 3080, "loss": 0.2747, "learning_rate": 7.393888718743362e-06, "epoch": 0.7476878143761155, "percentage": 37.4, "elapsed_time": "6:55:14", "remaining_time": "11:34:56", "throughput": "0.00", "total_tokens": 0}
|
579 |
+
{"current_steps": 1154, "total_steps": 3080, "loss": 0.2707, "learning_rate": 7.384456966967624e-06, "epoch": 0.7489858834982963, "percentage": 37.47, "elapsed_time": "6:55:58", "remaining_time": "11:34:15", "throughput": "0.00", "total_tokens": 0}
|
580 |
+
{"current_steps": 1156, "total_steps": 3080, "loss": 0.2773, "learning_rate": 7.375014220060705e-06, "epoch": 0.7502839526204771, "percentage": 37.53, "elapsed_time": "6:56:38", "remaining_time": "11:33:26", "throughput": "0.00", "total_tokens": 0}
|
581 |
+
{"current_steps": 1158, "total_steps": 3080, "loss": 0.2758, "learning_rate": 7.365560521564696e-06, "epoch": 0.7515820217426578, "percentage": 37.6, "elapsed_time": "6:57:18", "remaining_time": "11:32:38", "throughput": "0.00", "total_tokens": 0}
|
582 |
+
{"current_steps": 1160, "total_steps": 3080, "loss": 0.2951, "learning_rate": 7.3560959150721844e-06, "epoch": 0.7528800908648385, "percentage": 37.66, "elapsed_time": "6:58:00", "remaining_time": "11:31:52", "throughput": "0.00", "total_tokens": 0}
|
583 |
+
{"current_steps": 1162, "total_steps": 3080, "loss": 0.3057, "learning_rate": 7.3466204442260605e-06, "epoch": 0.7541781599870193, "percentage": 37.73, "elapsed_time": "6:58:40", "remaining_time": "11:31:03", "throughput": "0.00", "total_tokens": 0}
|
584 |
+
{"current_steps": 1164, "total_steps": 3080, "loss": 0.3171, "learning_rate": 7.337134152719312e-06, "epoch": 0.7554762291092001, "percentage": 37.79, "elapsed_time": "6:59:19", "remaining_time": "11:30:12", "throughput": "0.00", "total_tokens": 0}
|
585 |
+
{"current_steps": 1166, "total_steps": 3080, "loss": 0.2789, "learning_rate": 7.327637084294818e-06, "epoch": 0.7567742982313809, "percentage": 37.86, "elapsed_time": "6:59:59", "remaining_time": "11:29:25", "throughput": "0.00", "total_tokens": 0}
|
586 |
+
{"current_steps": 1168, "total_steps": 3080, "loss": 0.2784, "learning_rate": 7.318129282745152e-06, "epoch": 0.7580723673535615, "percentage": 37.92, "elapsed_time": "7:00:39", "remaining_time": "11:28:36", "throughput": "0.00", "total_tokens": 0}
|
587 |
+
{"current_steps": 1170, "total_steps": 3080, "loss": 0.3189, "learning_rate": 7.30861079191239e-06, "epoch": 0.7593704364757423, "percentage": 37.99, "elapsed_time": "7:01:26", "remaining_time": "11:27:59", "throughput": "0.00", "total_tokens": 0}
|
588 |
+
{"current_steps": 1172, "total_steps": 3080, "loss": 0.2832, "learning_rate": 7.299081655687885e-06, "epoch": 0.7606685055979231, "percentage": 38.05, "elapsed_time": "7:02:07", "remaining_time": "11:27:12", "throughput": "0.00", "total_tokens": 0}
|
589 |
+
{"current_steps": 1174, "total_steps": 3080, "loss": 0.2758, "learning_rate": 7.2895419180120855e-06, "epoch": 0.7619665747201039, "percentage": 38.12, "elapsed_time": "7:02:47", "remaining_time": "11:26:23", "throughput": "0.00", "total_tokens": 0}
|
590 |
+
{"current_steps": 1176, "total_steps": 3080, "loss": 0.261, "learning_rate": 7.279991622874319e-06, "epoch": 0.7632646438422847, "percentage": 38.18, "elapsed_time": "7:03:26", "remaining_time": "11:25:33", "throughput": "0.00", "total_tokens": 0}
|
591 |
+
{"current_steps": 1178, "total_steps": 3080, "loss": 0.3018, "learning_rate": 7.2704308143126035e-06, "epoch": 0.7645627129644653, "percentage": 38.25, "elapsed_time": "7:04:02", "remaining_time": "11:24:40", "throughput": "0.00", "total_tokens": 0}
|
592 |
+
{"current_steps": 1180, "total_steps": 3080, "loss": 0.2817, "learning_rate": 7.260859536413429e-06, "epoch": 0.7658607820866461, "percentage": 38.31, "elapsed_time": "7:04:41", "remaining_time": "11:23:50", "throughput": "0.00", "total_tokens": 0}
|
593 |
+
{"current_steps": 1182, "total_steps": 3080, "loss": 0.2765, "learning_rate": 7.251277833311565e-06, "epoch": 0.7671588512088269, "percentage": 38.38, "elapsed_time": "7:05:20", "remaining_time": "11:22:58", "throughput": "0.00", "total_tokens": 0}
|
594 |
+
{"current_steps": 1184, "total_steps": 3080, "loss": 0.2902, "learning_rate": 7.241685749189851e-06, "epoch": 0.7684569203310077, "percentage": 38.44, "elapsed_time": "7:06:06", "remaining_time": "11:22:20", "throughput": "0.00", "total_tokens": 0}
|
595 |
+
{"current_steps": 1186, "total_steps": 3080, "loss": 0.3157, "learning_rate": 7.2320833282789985e-06, "epoch": 0.7697549894531884, "percentage": 38.51, "elapsed_time": "7:06:47", "remaining_time": "11:21:34", "throughput": "0.00", "total_tokens": 0}
|
596 |
+
{"current_steps": 1188, "total_steps": 3080, "loss": 0.2672, "learning_rate": 7.22247061485738e-06, "epoch": 0.7710530585753691, "percentage": 38.57, "elapsed_time": "7:07:27", "remaining_time": "11:20:45", "throughput": "0.00", "total_tokens": 0}
|
597 |
+
{"current_steps": 1190, "total_steps": 3080, "loss": 0.2896, "learning_rate": 7.212847653250828e-06, "epoch": 0.7723511276975499, "percentage": 38.64, "elapsed_time": "7:08:04", "remaining_time": "11:19:52", "throughput": "0.00", "total_tokens": 0}
|
598 |
+
{"current_steps": 1192, "total_steps": 3080, "loss": 0.2868, "learning_rate": 7.203214487832437e-06, "epoch": 0.7736491968197307, "percentage": 38.7, "elapsed_time": "7:08:45", "remaining_time": "11:19:07", "throughput": "0.00", "total_tokens": 0}
|
599 |
+
{"current_steps": 1194, "total_steps": 3080, "loss": 0.2774, "learning_rate": 7.193571163022348e-06, "epoch": 0.7749472659419114, "percentage": 38.77, "elapsed_time": "7:09:23", "remaining_time": "11:18:15", "throughput": "0.00", "total_tokens": 0}
|
600 |
+
{"current_steps": 1196, "total_steps": 3080, "loss": 0.2881, "learning_rate": 7.18391772328755e-06, "epoch": 0.7762453350640922, "percentage": 38.83, "elapsed_time": "7:09:59", "remaining_time": "11:17:21", "throughput": "0.00", "total_tokens": 0}
|
601 |
+
{"current_steps": 1198, "total_steps": 3080, "loss": 0.29, "learning_rate": 7.174254213141671e-06, "epoch": 0.7775434041862729, "percentage": 38.9, "elapsed_time": "7:10:40", "remaining_time": "11:16:33", "throughput": "0.00", "total_tokens": 0}
|
602 |
+
{"current_steps": 1200, "total_steps": 3080, "loss": 0.3319, "learning_rate": 7.164580677144781e-06, "epoch": 0.7788414733084537, "percentage": 38.96, "elapsed_time": "7:11:22", "remaining_time": "11:15:49", "throughput": "0.00", "total_tokens": 0}
|