DongfuJiang commited on
Commit
a4c3020
1 Parent(s): be25e4c

Training in progress, step 2000

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +201 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:127e302458ff0cb917ce4722e01b5ff413ec0a4a20161741b57afcc1cdb01e9e
3
  size 54446840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97e4cb13fb4504b1c17b4b49cc10eeffccf7ea28fc3b4fc03ca2291317c0c63
3
  size 54446840
trainer_log.jsonl CHANGED
@@ -801,3 +801,204 @@
801
  {"current_steps": 1596, "total_steps": 3080, "loss": 0.3149, "learning_rate": 5.112727026523461e-06, "epoch": 1.0358591595002433, "percentage": 51.82, "elapsed_time": "9:37:54", "remaining_time": "8:57:21", "throughput": "0.00", "total_tokens": 0}
802
  {"current_steps": 1598, "total_steps": 3080, "loss": 0.2801, "learning_rate": 5.101992686957028e-06, "epoch": 1.0371572286224242, "percentage": 51.88, "elapsed_time": "9:38:40", "remaining_time": "8:56:40", "throughput": "0.00", "total_tokens": 0}
803
  {"current_steps": 1600, "total_steps": 3080, "loss": 0.2836, "learning_rate": 5.091257877085201e-06, "epoch": 1.0384552977446049, "percentage": 51.95, "elapsed_time": "9:39:17", "remaining_time": "8:55:50", "throughput": "0.00", "total_tokens": 0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
  {"current_steps": 1596, "total_steps": 3080, "loss": 0.3149, "learning_rate": 5.112727026523461e-06, "epoch": 1.0358591595002433, "percentage": 51.82, "elapsed_time": "9:37:54", "remaining_time": "8:57:21", "throughput": "0.00", "total_tokens": 0}
802
  {"current_steps": 1598, "total_steps": 3080, "loss": 0.2801, "learning_rate": 5.101992686957028e-06, "epoch": 1.0371572286224242, "percentage": 51.88, "elapsed_time": "9:38:40", "remaining_time": "8:56:40", "throughput": "0.00", "total_tokens": 0}
803
  {"current_steps": 1600, "total_steps": 3080, "loss": 0.2836, "learning_rate": 5.091257877085201e-06, "epoch": 1.0384552977446049, "percentage": 51.95, "elapsed_time": "9:39:17", "remaining_time": "8:55:50", "throughput": "0.00", "total_tokens": 0}
804
+ {"current_steps": 1602, "total_steps": 3080, "loss": 0.2795, "learning_rate": 5.080522646407987e-06, "epoch": 1.0397533668667855, "percentage": 52.01, "elapsed_time": "9:39:53", "remaining_time": "8:55:00", "throughput": "0.00", "total_tokens": 0}
805
+ {"current_steps": 1604, "total_steps": 3080, "loss": 0.2952, "learning_rate": 5.069787044427341e-06, "epoch": 1.0410514359889664, "percentage": 52.08, "elapsed_time": "9:40:34", "remaining_time": "8:54:14", "throughput": "0.00", "total_tokens": 0}
806
+ {"current_steps": 1606, "total_steps": 3080, "loss": 0.2812, "learning_rate": 5.059051120646924e-06, "epoch": 1.042349505111147, "percentage": 52.14, "elapsed_time": "9:41:12", "remaining_time": "8:53:26", "throughput": "0.00", "total_tokens": 0}
807
+ {"current_steps": 1608, "total_steps": 3080, "loss": 0.2785, "learning_rate": 5.0483149245718785e-06, "epoch": 1.043647574233328, "percentage": 52.21, "elapsed_time": "9:41:53", "remaining_time": "8:52:40", "throughput": "0.00", "total_tokens": 0}
808
+ {"current_steps": 1610, "total_steps": 3080, "loss": 0.274, "learning_rate": 5.03757850570861e-06, "epoch": 1.0449456433555087, "percentage": 52.27, "elapsed_time": "9:42:34", "remaining_time": "8:51:54", "throughput": "0.00", "total_tokens": 0}
809
+ {"current_steps": 1612, "total_steps": 3080, "loss": 0.2527, "learning_rate": 5.0268419135645455e-06, "epoch": 1.0462437124776893, "percentage": 52.34, "elapsed_time": "9:43:12", "remaining_time": "8:51:06", "throughput": "0.00", "total_tokens": 0}
810
+ {"current_steps": 1614, "total_steps": 3080, "loss": 0.2767, "learning_rate": 5.016105197647914e-06, "epoch": 1.0475417815998702, "percentage": 52.4, "elapsed_time": "9:43:51", "remaining_time": "8:50:18", "throughput": "0.00", "total_tokens": 0}
811
+ {"current_steps": 1616, "total_steps": 3080, "loss": 0.2833, "learning_rate": 5.005368407467515e-06, "epoch": 1.0488398507220509, "percentage": 52.47, "elapsed_time": "9:44:31", "remaining_time": "8:49:32", "throughput": "0.00", "total_tokens": 0}
812
+ {"current_steps": 1618, "total_steps": 3080, "loss": 0.2506, "learning_rate": 4.994631592532487e-06, "epoch": 1.0501379198442318, "percentage": 52.53, "elapsed_time": "9:45:08", "remaining_time": "8:48:43", "throughput": "0.00", "total_tokens": 0}
813
+ {"current_steps": 1620, "total_steps": 3080, "loss": 0.2526, "learning_rate": 4.9838948023520864e-06, "epoch": 1.0514359889664124, "percentage": 52.6, "elapsed_time": "9:45:50", "remaining_time": "8:47:58", "throughput": "0.00", "total_tokens": 0}
814
+ {"current_steps": 1622, "total_steps": 3080, "loss": 0.2794, "learning_rate": 4.9731580864354544e-06, "epoch": 1.052734058088593, "percentage": 52.66, "elapsed_time": "9:46:28", "remaining_time": "8:47:10", "throughput": "0.00", "total_tokens": 0}
815
+ {"current_steps": 1624, "total_steps": 3080, "loss": 0.2733, "learning_rate": 4.9624214942913916e-06, "epoch": 1.054032127210774, "percentage": 52.73, "elapsed_time": "9:47:10", "remaining_time": "8:46:26", "throughput": "0.00", "total_tokens": 0}
816
+ {"current_steps": 1626, "total_steps": 3080, "loss": 0.2948, "learning_rate": 4.951685075428122e-06, "epoch": 1.0553301963329547, "percentage": 52.79, "elapsed_time": "9:47:47", "remaining_time": "8:45:36", "throughput": "0.00", "total_tokens": 0}
817
+ {"current_steps": 1628, "total_steps": 3080, "loss": 0.2796, "learning_rate": 4.940948879353078e-06, "epoch": 1.0566282654551356, "percentage": 52.86, "elapsed_time": "9:48:28", "remaining_time": "8:44:51", "throughput": "0.00", "total_tokens": 0}
818
+ {"current_steps": 1630, "total_steps": 3080, "loss": 0.2853, "learning_rate": 4.9302129555726615e-06, "epoch": 1.0579263345773162, "percentage": 52.92, "elapsed_time": "9:49:08", "remaining_time": "8:44:05", "throughput": "0.00", "total_tokens": 0}
819
+ {"current_steps": 1632, "total_steps": 3080, "loss": 0.2773, "learning_rate": 4.919477353592015e-06, "epoch": 1.0592244036994969, "percentage": 52.99, "elapsed_time": "9:49:49", "remaining_time": "8:43:19", "throughput": "0.00", "total_tokens": 0}
820
+ {"current_steps": 1634, "total_steps": 3080, "loss": 0.2719, "learning_rate": 4.908742122914802e-06, "epoch": 1.0605224728216778, "percentage": 53.05, "elapsed_time": "9:50:31", "remaining_time": "8:42:34", "throughput": "0.00", "total_tokens": 0}
821
+ {"current_steps": 1636, "total_steps": 3080, "loss": 0.2871, "learning_rate": 4.898007313042975e-06, "epoch": 1.0618205419438584, "percentage": 53.12, "elapsed_time": "9:51:10", "remaining_time": "8:41:47", "throughput": "0.00", "total_tokens": 0}
822
+ {"current_steps": 1638, "total_steps": 3080, "loss": 0.2747, "learning_rate": 4.88727297347654e-06, "epoch": 1.0631186110660393, "percentage": 53.18, "elapsed_time": "9:51:49", "remaining_time": "8:41:00", "throughput": "0.00", "total_tokens": 0}
823
+ {"current_steps": 1640, "total_steps": 3080, "loss": 0.2833, "learning_rate": 4.87653915371334e-06, "epoch": 1.06441668018822, "percentage": 53.25, "elapsed_time": "9:52:28", "remaining_time": "8:40:13", "throughput": "0.00", "total_tokens": 0}
824
+ {"current_steps": 1642, "total_steps": 3080, "loss": 0.2863, "learning_rate": 4.8658059032488185e-06, "epoch": 1.0657147493104007, "percentage": 53.31, "elapsed_time": "9:53:07", "remaining_time": "8:39:26", "throughput": "0.00", "total_tokens": 0}
825
+ {"current_steps": 1644, "total_steps": 3080, "loss": 0.274, "learning_rate": 4.8550732715757965e-06, "epoch": 1.0670128184325816, "percentage": 53.38, "elapsed_time": "9:53:45", "remaining_time": "8:38:38", "throughput": "0.00", "total_tokens": 0}
826
+ {"current_steps": 1646, "total_steps": 3080, "loss": 0.276, "learning_rate": 4.8443413081842354e-06, "epoch": 1.0683108875547622, "percentage": 53.44, "elapsed_time": "9:54:26", "remaining_time": "8:37:53", "throughput": "0.00", "total_tokens": 0}
827
+ {"current_steps": 1648, "total_steps": 3080, "loss": 0.2734, "learning_rate": 4.833610062561022e-06, "epoch": 1.0696089566769431, "percentage": 53.51, "elapsed_time": "9:55:11", "remaining_time": "8:37:10", "throughput": "0.00", "total_tokens": 0}
828
+ {"current_steps": 1650, "total_steps": 3080, "loss": 0.3016, "learning_rate": 4.822879584189732e-06, "epoch": 1.0709070257991238, "percentage": 53.57, "elapsed_time": "9:55:52", "remaining_time": "8:36:25", "throughput": "0.00", "total_tokens": 0}
829
+ {"current_steps": 1652, "total_steps": 3080, "loss": 0.3063, "learning_rate": 4.8121499225503974e-06, "epoch": 1.0722050949213044, "percentage": 53.64, "elapsed_time": "9:56:31", "remaining_time": "8:35:38", "throughput": "0.00", "total_tokens": 0}
830
+ {"current_steps": 1654, "total_steps": 3080, "loss": 0.2582, "learning_rate": 4.8014211271192914e-06, "epoch": 1.0735031640434853, "percentage": 53.7, "elapsed_time": "9:57:11", "remaining_time": "8:34:52", "throughput": "0.00", "total_tokens": 0}
831
+ {"current_steps": 1656, "total_steps": 3080, "loss": 0.2583, "learning_rate": 4.79069324736869e-06, "epoch": 1.074801233165666, "percentage": 53.77, "elapsed_time": "9:57:49", "remaining_time": "8:34:04", "throughput": "0.00", "total_tokens": 0}
832
+ {"current_steps": 1658, "total_steps": 3080, "loss": 0.2762, "learning_rate": 4.7799663327666455e-06, "epoch": 1.076099302287847, "percentage": 53.83, "elapsed_time": "9:58:29", "remaining_time": "8:33:18", "throughput": "0.00", "total_tokens": 0}
833
+ {"current_steps": 1660, "total_steps": 3080, "loss": 0.2742, "learning_rate": 4.7692404327767615e-06, "epoch": 1.0773973714100276, "percentage": 53.9, "elapsed_time": "9:59:09", "remaining_time": "8:32:32", "throughput": "0.00", "total_tokens": 0}
834
+ {"current_steps": 1662, "total_steps": 3080, "loss": 0.2769, "learning_rate": 4.758515596857965e-06, "epoch": 1.0786954405322082, "percentage": 53.96, "elapsed_time": "9:59:50", "remaining_time": "8:31:46", "throughput": "0.00", "total_tokens": 0}
835
+ {"current_steps": 1664, "total_steps": 3080, "loss": 0.2665, "learning_rate": 4.747791874464269e-06, "epoch": 1.0799935096543891, "percentage": 54.03, "elapsed_time": "10:00:29", "remaining_time": "8:31:00", "throughput": "0.00", "total_tokens": 0}
836
+ {"current_steps": 1666, "total_steps": 3080, "loss": 0.312, "learning_rate": 4.737069315044562e-06, "epoch": 1.0812915787765698, "percentage": 54.09, "elapsed_time": "10:01:06", "remaining_time": "8:30:11", "throughput": "0.00", "total_tokens": 0}
837
+ {"current_steps": 1668, "total_steps": 3080, "loss": 0.2749, "learning_rate": 4.726347968042364e-06, "epoch": 1.0825896478987507, "percentage": 54.16, "elapsed_time": "10:01:48", "remaining_time": "8:29:26", "throughput": "0.00", "total_tokens": 0}
838
+ {"current_steps": 1670, "total_steps": 3080, "loss": 0.2831, "learning_rate": 4.7156278828956005e-06, "epoch": 1.0838877170209313, "percentage": 54.22, "elapsed_time": "10:02:29", "remaining_time": "8:28:41", "throughput": "0.00", "total_tokens": 0}
839
+ {"current_steps": 1672, "total_steps": 3080, "loss": 0.2789, "learning_rate": 4.704909109036387e-06, "epoch": 1.085185786143112, "percentage": 54.29, "elapsed_time": "10:03:09", "remaining_time": "8:27:54", "throughput": "0.00", "total_tokens": 0}
840
+ {"current_steps": 1674, "total_steps": 3080, "loss": 0.2708, "learning_rate": 4.694191695890788e-06, "epoch": 1.086483855265293, "percentage": 54.35, "elapsed_time": "10:03:50", "remaining_time": "8:27:10", "throughput": "0.00", "total_tokens": 0}
841
+ {"current_steps": 1676, "total_steps": 3080, "loss": 0.2965, "learning_rate": 4.683475692878592e-06, "epoch": 1.0877819243874736, "percentage": 54.42, "elapsed_time": "10:04:38", "remaining_time": "8:26:30", "throughput": "0.00", "total_tokens": 0}
842
+ {"current_steps": 1678, "total_steps": 3080, "loss": 0.2759, "learning_rate": 4.672761149413089e-06, "epoch": 1.0890799935096545, "percentage": 54.48, "elapsed_time": "10:05:12", "remaining_time": "8:25:40", "throughput": "0.00", "total_tokens": 0}
843
+ {"current_steps": 1680, "total_steps": 3080, "loss": 0.3137, "learning_rate": 4.662048114900837e-06, "epoch": 1.0903780626318351, "percentage": 54.55, "elapsed_time": "10:05:56", "remaining_time": "8:24:57", "throughput": "0.00", "total_tokens": 0}
844
+ {"current_steps": 1682, "total_steps": 3080, "loss": 0.2422, "learning_rate": 4.651336638741433e-06, "epoch": 1.0916761317540158, "percentage": 54.61, "elapsed_time": "10:06:39", "remaining_time": "8:24:13", "throughput": "0.00", "total_tokens": 0}
845
+ {"current_steps": 1684, "total_steps": 3080, "loss": 0.2581, "learning_rate": 4.640626770327293e-06, "epoch": 1.0929742008761967, "percentage": 54.68, "elapsed_time": "10:07:19", "remaining_time": "8:23:27", "throughput": "0.00", "total_tokens": 0}
846
+ {"current_steps": 1686, "total_steps": 3080, "loss": 0.281, "learning_rate": 4.6299185590434185e-06, "epoch": 1.0942722699983773, "percentage": 54.74, "elapsed_time": "10:08:01", "remaining_time": "8:22:43", "throughput": "0.00", "total_tokens": 0}
847
+ {"current_steps": 1688, "total_steps": 3080, "loss": 0.2772, "learning_rate": 4.6192120542671665e-06, "epoch": 1.0955703391205582, "percentage": 54.81, "elapsed_time": "10:08:42", "remaining_time": "8:21:58", "throughput": "0.00", "total_tokens": 0}
848
+ {"current_steps": 1690, "total_steps": 3080, "loss": 0.2942, "learning_rate": 4.608507305368027e-06, "epoch": 1.096868408242739, "percentage": 54.87, "elapsed_time": "10:09:26", "remaining_time": "8:21:15", "throughput": "0.00", "total_tokens": 0}
849
+ {"current_steps": 1692, "total_steps": 3080, "loss": 0.2694, "learning_rate": 4.597804361707395e-06, "epoch": 1.0981664773649196, "percentage": 54.94, "elapsed_time": "10:10:07", "remaining_time": "8:20:30", "throughput": "0.00", "total_tokens": 0}
850
+ {"current_steps": 1694, "total_steps": 3080, "loss": 0.3027, "learning_rate": 4.587103272638339e-06, "epoch": 1.0994645464871005, "percentage": 55.0, "elapsed_time": "10:10:49", "remaining_time": "8:19:46", "throughput": "0.00", "total_tokens": 0}
851
+ {"current_steps": 1696, "total_steps": 3080, "loss": 0.268, "learning_rate": 4.576404087505375e-06, "epoch": 1.1007626156092811, "percentage": 55.06, "elapsed_time": "10:11:30", "remaining_time": "8:19:00", "throughput": "0.00", "total_tokens": 0}
852
+ {"current_steps": 1698, "total_steps": 3080, "loss": 0.2621, "learning_rate": 4.565706855644242e-06, "epoch": 1.102060684731462, "percentage": 55.13, "elapsed_time": "10:12:11", "remaining_time": "8:18:16", "throughput": "0.00", "total_tokens": 0}
853
+ {"current_steps": 1700, "total_steps": 3080, "loss": 0.2934, "learning_rate": 4.555011626381674e-06, "epoch": 1.1033587538536427, "percentage": 55.19, "elapsed_time": "10:12:52", "remaining_time": "8:17:30", "throughput": "0.00", "total_tokens": 0}
854
+ {"current_steps": 1702, "total_steps": 3080, "loss": 0.2828, "learning_rate": 4.544318449035163e-06, "epoch": 1.1046568229758233, "percentage": 55.26, "elapsed_time": "10:13:33", "remaining_time": "8:16:45", "throughput": "0.00", "total_tokens": 0}
855
+ {"current_steps": 1704, "total_steps": 3080, "loss": 0.2614, "learning_rate": 4.533627372912744e-06, "epoch": 1.1059548920980042, "percentage": 55.32, "elapsed_time": "10:14:13", "remaining_time": "8:15:59", "throughput": "0.00", "total_tokens": 0}
856
+ {"current_steps": 1706, "total_steps": 3080, "loss": 0.2555, "learning_rate": 4.522938447312768e-06, "epoch": 1.107252961220185, "percentage": 55.39, "elapsed_time": "10:14:52", "remaining_time": "8:15:13", "throughput": "0.00", "total_tokens": 0}
857
+ {"current_steps": 1708, "total_steps": 3080, "loss": 0.2877, "learning_rate": 4.512251721523659e-06, "epoch": 1.1085510303423658, "percentage": 55.45, "elapsed_time": "10:15:30", "remaining_time": "8:14:25", "throughput": "0.00", "total_tokens": 0}
858
+ {"current_steps": 1710, "total_steps": 3080, "loss": 0.2827, "learning_rate": 4.501567244823706e-06, "epoch": 1.1098490994645465, "percentage": 55.52, "elapsed_time": "10:16:09", "remaining_time": "8:13:38", "throughput": "0.00", "total_tokens": 0}
859
+ {"current_steps": 1712, "total_steps": 3080, "loss": 0.2775, "learning_rate": 4.4908850664808245e-06, "epoch": 1.1111471685867271, "percentage": 55.58, "elapsed_time": "10:16:48", "remaining_time": "8:12:52", "throughput": "0.00", "total_tokens": 0}
860
+ {"current_steps": 1714, "total_steps": 3080, "loss": 0.2755, "learning_rate": 4.480205235752328e-06, "epoch": 1.112445237708908, "percentage": 55.65, "elapsed_time": "10:17:27", "remaining_time": "8:12:05", "throughput": "0.00", "total_tokens": 0}
861
+ {"current_steps": 1716, "total_steps": 3080, "loss": 0.2816, "learning_rate": 4.46952780188471e-06, "epoch": 1.1137433068310887, "percentage": 55.71, "elapsed_time": "10:18:04", "remaining_time": "8:11:17", "throughput": "0.00", "total_tokens": 0}
862
+ {"current_steps": 1718, "total_steps": 3080, "loss": 0.2905, "learning_rate": 4.458852814113412e-06, "epoch": 1.1150413759532696, "percentage": 55.78, "elapsed_time": "10:18:44", "remaining_time": "8:10:31", "throughput": "0.00", "total_tokens": 0}
863
+ {"current_steps": 1720, "total_steps": 3080, "loss": 0.2674, "learning_rate": 4.448180321662588e-06, "epoch": 1.1163394450754502, "percentage": 55.84, "elapsed_time": "10:19:25", "remaining_time": "8:09:46", "throughput": "0.00", "total_tokens": 0}
864
+ {"current_steps": 1722, "total_steps": 3080, "loss": 0.276, "learning_rate": 4.437510373744897e-06, "epoch": 1.117637514197631, "percentage": 55.91, "elapsed_time": "10:20:05", "remaining_time": "8:09:01", "throughput": "0.00", "total_tokens": 0}
865
+ {"current_steps": 1724, "total_steps": 3080, "loss": 0.2707, "learning_rate": 4.426843019561259e-06, "epoch": 1.1189355833198118, "percentage": 55.97, "elapsed_time": "10:20:46", "remaining_time": "8:08:15", "throughput": "0.00", "total_tokens": 0}
866
+ {"current_steps": 1726, "total_steps": 3080, "loss": 0.2641, "learning_rate": 4.41617830830063e-06, "epoch": 1.1202336524419925, "percentage": 56.04, "elapsed_time": "10:21:23", "remaining_time": "8:07:27", "throughput": "0.00", "total_tokens": 0}
867
+ {"current_steps": 1728, "total_steps": 3080, "loss": 0.2667, "learning_rate": 4.405516289139785e-06, "epoch": 1.1215317215641734, "percentage": 56.1, "elapsed_time": "10:22:06", "remaining_time": "8:06:44", "throughput": "0.00", "total_tokens": 0}
868
+ {"current_steps": 1730, "total_steps": 3080, "loss": 0.2707, "learning_rate": 4.394857011243084e-06, "epoch": 1.122829790686354, "percentage": 56.17, "elapsed_time": "10:22:45", "remaining_time": "8:05:58", "throughput": "0.00", "total_tokens": 0}
869
+ {"current_steps": 1732, "total_steps": 3080, "loss": 0.287, "learning_rate": 4.384200523762249e-06, "epoch": 1.1241278598085347, "percentage": 56.23, "elapsed_time": "10:23:23", "remaining_time": "8:05:10", "throughput": "0.00", "total_tokens": 0}
870
+ {"current_steps": 1734, "total_steps": 3080, "loss": 0.2955, "learning_rate": 4.373546875836125e-06, "epoch": 1.1254259289307156, "percentage": 56.3, "elapsed_time": "10:24:05", "remaining_time": "8:04:27", "throughput": "0.00", "total_tokens": 0}
871
+ {"current_steps": 1736, "total_steps": 3080, "loss": 0.2633, "learning_rate": 4.362896116590475e-06, "epoch": 1.1267239980528962, "percentage": 56.36, "elapsed_time": "10:24:44", "remaining_time": "8:03:39", "throughput": "0.00", "total_tokens": 0}
872
+ {"current_steps": 1738, "total_steps": 3080, "loss": 0.2882, "learning_rate": 4.352248295137739e-06, "epoch": 1.1280220671750771, "percentage": 56.43, "elapsed_time": "10:25:22", "remaining_time": "8:02:52", "throughput": "0.00", "total_tokens": 0}
873
+ {"current_steps": 1740, "total_steps": 3080, "loss": 0.3057, "learning_rate": 4.3416034605768035e-06, "epoch": 1.1293201362972578, "percentage": 56.49, "elapsed_time": "10:26:07", "remaining_time": "8:02:11", "throughput": "0.00", "total_tokens": 0}
874
+ {"current_steps": 1742, "total_steps": 3080, "loss": 0.308, "learning_rate": 4.330961661992788e-06, "epoch": 1.1306182054194385, "percentage": 56.56, "elapsed_time": "10:26:45", "remaining_time": "8:01:24", "throughput": "0.00", "total_tokens": 0}
875
+ {"current_steps": 1744, "total_steps": 3080, "loss": 0.2698, "learning_rate": 4.320322948456814e-06, "epoch": 1.1319162745416194, "percentage": 56.62, "elapsed_time": "10:27:23", "remaining_time": "8:00:37", "throughput": "0.00", "total_tokens": 0}
876
+ {"current_steps": 1746, "total_steps": 3080, "loss": 0.3272, "learning_rate": 4.309687369025772e-06, "epoch": 1.1332143436638, "percentage": 56.69, "elapsed_time": "10:28:03", "remaining_time": "7:59:51", "throughput": "0.00", "total_tokens": 0}
877
+ {"current_steps": 1748, "total_steps": 3080, "loss": 0.2695, "learning_rate": 4.299054972742104e-06, "epoch": 1.134512412785981, "percentage": 56.75, "elapsed_time": "10:28:43", "remaining_time": "7:59:05", "throughput": "0.00", "total_tokens": 0}
878
+ {"current_steps": 1750, "total_steps": 3080, "loss": 0.2815, "learning_rate": 4.2884258086335755e-06, "epoch": 1.1358104819081616, "percentage": 56.82, "elapsed_time": "10:29:22", "remaining_time": "7:58:19", "throughput": "0.00", "total_tokens": 0}
879
+ {"current_steps": 1752, "total_steps": 3080, "loss": 0.3087, "learning_rate": 4.277799925713043e-06, "epoch": 1.1371085510303423, "percentage": 56.88, "elapsed_time": "10:30:01", "remaining_time": "7:57:33", "throughput": "0.00", "total_tokens": 0}
880
+ {"current_steps": 1754, "total_steps": 3080, "loss": 0.2863, "learning_rate": 4.267177372978236e-06, "epoch": 1.1384066201525231, "percentage": 56.95, "elapsed_time": "10:30:37", "remaining_time": "7:56:44", "throughput": "0.00", "total_tokens": 0}
881
+ {"current_steps": 1756, "total_steps": 3080, "loss": 0.2772, "learning_rate": 4.25655819941153e-06, "epoch": 1.1397046892747038, "percentage": 57.01, "elapsed_time": "10:31:21", "remaining_time": "7:56:01", "throughput": "0.00", "total_tokens": 0}
882
+ {"current_steps": 1758, "total_steps": 3080, "loss": 0.2862, "learning_rate": 4.245942453979713e-06, "epoch": 1.1410027583968847, "percentage": 57.08, "elapsed_time": "10:31:58", "remaining_time": "7:55:14", "throughput": "0.00", "total_tokens": 0}
883
+ {"current_steps": 1760, "total_steps": 3080, "loss": 0.3257, "learning_rate": 4.23533018563377e-06, "epoch": 1.1423008275190654, "percentage": 57.14, "elapsed_time": "10:32:38", "remaining_time": "7:54:28", "throughput": "0.00", "total_tokens": 0}
884
+ {"current_steps": 1762, "total_steps": 3080, "loss": 0.2597, "learning_rate": 4.224721443308654e-06, "epoch": 1.143598896641246, "percentage": 57.21, "elapsed_time": "10:33:16", "remaining_time": "7:53:41", "throughput": "0.00", "total_tokens": 0}
885
+ {"current_steps": 1764, "total_steps": 3080, "loss": 0.2716, "learning_rate": 4.214116275923051e-06, "epoch": 1.144896965763427, "percentage": 57.27, "elapsed_time": "10:34:00", "remaining_time": "7:52:59", "throughput": "0.00", "total_tokens": 0}
886
+ {"current_steps": 1766, "total_steps": 3080, "loss": 0.2936, "learning_rate": 4.203514732379168e-06, "epoch": 1.1461950348856076, "percentage": 57.34, "elapsed_time": "10:34:39", "remaining_time": "7:52:12", "throughput": "0.00", "total_tokens": 0}
887
+ {"current_steps": 1768, "total_steps": 3080, "loss": 0.2811, "learning_rate": 4.192916861562503e-06, "epoch": 1.1474931040077885, "percentage": 57.4, "elapsed_time": "10:35:14", "remaining_time": "7:51:24", "throughput": "0.00", "total_tokens": 0}
888
+ {"current_steps": 1770, "total_steps": 3080, "loss": 0.2633, "learning_rate": 4.1823227123416185e-06, "epoch": 1.1487911731299691, "percentage": 57.47, "elapsed_time": "10:35:55", "remaining_time": "7:50:39", "throughput": "0.00", "total_tokens": 0}
889
+ {"current_steps": 1772, "total_steps": 3080, "loss": 0.3023, "learning_rate": 4.171732333567911e-06, "epoch": 1.1500892422521498, "percentage": 57.53, "elapsed_time": "10:36:34", "remaining_time": "7:49:52", "throughput": "0.00", "total_tokens": 0}
890
+ {"current_steps": 1774, "total_steps": 3080, "loss": 0.2649, "learning_rate": 4.161145774075393e-06, "epoch": 1.1513873113743307, "percentage": 57.6, "elapsed_time": "10:37:15", "remaining_time": "7:49:08", "throughput": "0.00", "total_tokens": 0}
891
+ {"current_steps": 1776, "total_steps": 3080, "loss": 0.2864, "learning_rate": 4.150563082680472e-06, "epoch": 1.1526853804965114, "percentage": 57.66, "elapsed_time": "10:37:57", "remaining_time": "7:48:24", "throughput": "0.00", "total_tokens": 0}
892
+ {"current_steps": 1778, "total_steps": 3080, "loss": 0.2533, "learning_rate": 4.1399843081817085e-06, "epoch": 1.1539834496186923, "percentage": 57.73, "elapsed_time": "10:38:36", "remaining_time": "7:47:38", "throughput": "0.00", "total_tokens": 0}
893
+ {"current_steps": 1780, "total_steps": 3080, "loss": 0.2735, "learning_rate": 4.129409499359609e-06, "epoch": 1.155281518740873, "percentage": 57.79, "elapsed_time": "10:39:15", "remaining_time": "7:46:52", "throughput": "0.00", "total_tokens": 0}
894
+ {"current_steps": 1782, "total_steps": 3080, "loss": 0.2877, "learning_rate": 4.118838704976392e-06, "epoch": 1.1565795878630536, "percentage": 57.86, "elapsed_time": "10:39:55", "remaining_time": "7:46:06", "throughput": "0.00", "total_tokens": 0}
895
+ {"current_steps": 1784, "total_steps": 3080, "loss": 0.266, "learning_rate": 4.108271973775763e-06, "epoch": 1.1578776569852345, "percentage": 57.92, "elapsed_time": "10:40:33", "remaining_time": "7:45:20", "throughput": "0.00", "total_tokens": 0}
896
+ {"current_steps": 1786, "total_steps": 3080, "loss": 0.2759, "learning_rate": 4.097709354482693e-06, "epoch": 1.1591757261074151, "percentage": 57.99, "elapsed_time": "10:41:17", "remaining_time": "7:44:38", "throughput": "0.00", "total_tokens": 0}
897
+ {"current_steps": 1788, "total_steps": 3080, "loss": 0.2706, "learning_rate": 4.087150895803192e-06, "epoch": 1.160473795229596, "percentage": 58.05, "elapsed_time": "10:41:57", "remaining_time": "7:43:52", "throughput": "0.00", "total_tokens": 0}
898
+ {"current_steps": 1790, "total_steps": 3080, "loss": 0.3122, "learning_rate": 4.076596646424085e-06, "epoch": 1.1617718643517767, "percentage": 58.12, "elapsed_time": "10:42:39", "remaining_time": "7:43:08", "throughput": "0.00", "total_tokens": 0}
899
+ {"current_steps": 1792, "total_steps": 3080, "loss": 0.2767, "learning_rate": 4.066046655012786e-06, "epoch": 1.1630699334739574, "percentage": 58.18, "elapsed_time": "10:43:17", "remaining_time": "7:42:22", "throughput": "0.00", "total_tokens": 0}
900
+ {"current_steps": 1794, "total_steps": 3080, "loss": 0.2992, "learning_rate": 4.055500970217078e-06, "epoch": 1.1643680025961383, "percentage": 58.25, "elapsed_time": "10:43:56", "remaining_time": "7:41:36", "throughput": "0.00", "total_tokens": 0}
901
+ {"current_steps": 1796, "total_steps": 3080, "loss": 0.3051, "learning_rate": 4.044959640664878e-06, "epoch": 1.165666071718319, "percentage": 58.31, "elapsed_time": "10:44:40", "remaining_time": "7:40:53", "throughput": "0.00", "total_tokens": 0}
902
+ {"current_steps": 1798, "total_steps": 3080, "loss": 0.2531, "learning_rate": 4.03442271496403e-06, "epoch": 1.1669641408404998, "percentage": 58.38, "elapsed_time": "10:45:22", "remaining_time": "7:40:10", "throughput": "0.00", "total_tokens": 0}
903
+ {"current_steps": 1800, "total_steps": 3080, "loss": 0.3112, "learning_rate": 4.023890241702068e-06, "epoch": 1.1682622099626805, "percentage": 58.44, "elapsed_time": "10:46:07", "remaining_time": "7:39:28", "throughput": "0.00", "total_tokens": 0}
904
+ {"current_steps": 1802, "total_steps": 3080, "loss": 0.2668, "learning_rate": 4.013362269445988e-06, "epoch": 1.1695602790848612, "percentage": 58.51, "elapsed_time": "10:46:46", "remaining_time": "7:38:42", "throughput": "0.00", "total_tokens": 0}
905
+ {"current_steps": 1804, "total_steps": 3080, "loss": 0.2614, "learning_rate": 4.002838846742039e-06, "epoch": 1.170858348207042, "percentage": 58.57, "elapsed_time": "10:47:25", "remaining_time": "7:37:56", "throughput": "0.00", "total_tokens": 0}
906
+ {"current_steps": 1806, "total_steps": 3080, "loss": 0.2542, "learning_rate": 3.992320022115492e-06, "epoch": 1.1721564173292227, "percentage": 58.64, "elapsed_time": "10:48:07", "remaining_time": "7:37:12", "throughput": "0.00", "total_tokens": 0}
907
+ {"current_steps": 1808, "total_steps": 3080, "loss": 0.2874, "learning_rate": 3.981805844070407e-06, "epoch": 1.1734544864514036, "percentage": 58.7, "elapsed_time": "10:48:46", "remaining_time": "7:36:26", "throughput": "0.00", "total_tokens": 0}
908
+ {"current_steps": 1810, "total_steps": 3080, "loss": 0.2684, "learning_rate": 3.971296361089427e-06, "epoch": 1.1747525555735843, "percentage": 58.77, "elapsed_time": "10:49:26", "remaining_time": "7:35:40", "throughput": "0.00", "total_tokens": 0}
909
+ {"current_steps": 1812, "total_steps": 3080, "loss": 0.2974, "learning_rate": 3.960791621633539e-06, "epoch": 1.176050624695765, "percentage": 58.83, "elapsed_time": "10:50:03", "remaining_time": "7:34:53", "throughput": "0.00", "total_tokens": 0}
910
+ {"current_steps": 1814, "total_steps": 3080, "loss": 0.2726, "learning_rate": 3.950291674141863e-06, "epoch": 1.1773486938179458, "percentage": 58.9, "elapsed_time": "10:50:43", "remaining_time": "7:34:08", "throughput": "0.00", "total_tokens": 0}
911
+ {"current_steps": 1816, "total_steps": 3080, "loss": 0.268, "learning_rate": 3.939796567031414e-06, "epoch": 1.1786467629401265, "percentage": 58.96, "elapsed_time": "10:51:24", "remaining_time": "7:33:24", "throughput": "0.00", "total_tokens": 0}
912
+ {"current_steps": 1818, "total_steps": 3080, "loss": 0.2827, "learning_rate": 3.929306348696896e-06, "epoch": 1.1799448320623074, "percentage": 59.03, "elapsed_time": "10:52:05", "remaining_time": "7:32:39", "throughput": "0.00", "total_tokens": 0}
913
+ {"current_steps": 1820, "total_steps": 3080, "loss": 0.2608, "learning_rate": 3.918821067510464e-06, "epoch": 1.181242901184488, "percentage": 59.09, "elapsed_time": "10:52:48", "remaining_time": "7:31:56", "throughput": "0.00", "total_tokens": 0}
914
+ {"current_steps": 1822, "total_steps": 3080, "loss": 0.2622, "learning_rate": 3.908340771821509e-06, "epoch": 1.1825409703066687, "percentage": 59.16, "elapsed_time": "10:53:27", "remaining_time": "7:31:10", "throughput": "0.00", "total_tokens": 0}
915
+ {"current_steps": 1824, "total_steps": 3080, "loss": 0.3024, "learning_rate": 3.897865509956434e-06, "epoch": 1.1838390394288496, "percentage": 59.22, "elapsed_time": "10:54:06", "remaining_time": "7:30:25", "throughput": "0.00", "total_tokens": 0}
916
+ {"current_steps": 1826, "total_steps": 3080, "loss": 0.2898, "learning_rate": 3.887395330218429e-06, "epoch": 1.1851371085510303, "percentage": 59.29, "elapsed_time": "10:54:52", "remaining_time": "7:29:43", "throughput": "0.00", "total_tokens": 0}
917
+ {"current_steps": 1828, "total_steps": 3080, "loss": 0.2924, "learning_rate": 3.876930280887247e-06, "epoch": 1.1864351776732112, "percentage": 59.35, "elapsed_time": "10:55:31", "remaining_time": "7:28:57", "throughput": "0.00", "total_tokens": 0}
918
+ {"current_steps": 1830, "total_steps": 3080, "loss": 0.2841, "learning_rate": 3.86647041021899e-06, "epoch": 1.1877332467953918, "percentage": 59.42, "elapsed_time": "10:56:13", "remaining_time": "7:28:14", "throughput": "0.00", "total_tokens": 0}
919
+ {"current_steps": 1832, "total_steps": 3080, "loss": 0.2948, "learning_rate": 3.856015766445877e-06, "epoch": 1.1890313159175725, "percentage": 59.48, "elapsed_time": "10:56:50", "remaining_time": "7:27:27", "throughput": "0.00", "total_tokens": 0}
920
+ {"current_steps": 1834, "total_steps": 3080, "loss": 0.297, "learning_rate": 3.845566397776022e-06, "epoch": 1.1903293850397534, "percentage": 59.55, "elapsed_time": "10:57:31", "remaining_time": "7:26:42", "throughput": "0.00", "total_tokens": 0}
921
+ {"current_steps": 1836, "total_steps": 3080, "loss": 0.2769, "learning_rate": 3.835122352393216e-06, "epoch": 1.191627454161934, "percentage": 59.61, "elapsed_time": "10:58:08", "remaining_time": "7:25:55", "throughput": "0.00", "total_tokens": 0}
922
+ {"current_steps": 1838, "total_steps": 3080, "loss": 0.2632, "learning_rate": 3.824683678456711e-06, "epoch": 1.192925523284115, "percentage": 59.68, "elapsed_time": "10:58:47", "remaining_time": "7:25:10", "throughput": "0.00", "total_tokens": 0}
923
+ {"current_steps": 1840, "total_steps": 3080, "loss": 0.2976, "learning_rate": 3.814250424100978e-06, "epoch": 1.1942235924062956, "percentage": 59.74, "elapsed_time": "10:59:26", "remaining_time": "7:24:24", "throughput": "0.00", "total_tokens": 0}
924
+ {"current_steps": 1842, "total_steps": 3080, "loss": 0.2684, "learning_rate": 3.8038226374355064e-06, "epoch": 1.1955216615284763, "percentage": 59.81, "elapsed_time": "11:00:03", "remaining_time": "7:23:37", "throughput": "0.00", "total_tokens": 0}
925
+ {"current_steps": 1844, "total_steps": 3080, "loss": 0.2844, "learning_rate": 3.79340036654457e-06, "epoch": 1.1968197306506572, "percentage": 59.87, "elapsed_time": "11:00:43", "remaining_time": "7:22:52", "throughput": "0.00", "total_tokens": 0}
926
+ {"current_steps": 1846, "total_steps": 3080, "loss": 0.2697, "learning_rate": 3.7829836594870077e-06, "epoch": 1.1981177997728378, "percentage": 59.94, "elapsed_time": "11:01:22", "remaining_time": "7:22:06", "throughput": "0.00", "total_tokens": 0}
927
+ {"current_steps": 1848, "total_steps": 3080, "loss": 0.2814, "learning_rate": 3.7725725642960047e-06, "epoch": 1.1994158688950187, "percentage": 60.0, "elapsed_time": "11:02:02", "remaining_time": "7:21:21", "throughput": "0.00", "total_tokens": 0}
928
+ {"current_steps": 1850, "total_steps": 3080, "loss": 0.2788, "learning_rate": 3.7621671289788685e-06, "epoch": 1.2007139380171994, "percentage": 60.06, "elapsed_time": "11:02:43", "remaining_time": "7:20:37", "throughput": "0.00", "total_tokens": 0}
929
+ {"current_steps": 1852, "total_steps": 3080, "loss": 0.2811, "learning_rate": 3.751767401516805e-06, "epoch": 1.20201200713938, "percentage": 60.13, "elapsed_time": "11:03:24", "remaining_time": "7:19:53", "throughput": "0.00", "total_tokens": 0}
930
+ {"current_steps": 1854, "total_steps": 3080, "loss": 0.2627, "learning_rate": 3.741373429864704e-06, "epoch": 1.203310076261561, "percentage": 60.19, "elapsed_time": "11:04:02", "remaining_time": "7:19:06", "throughput": "0.00", "total_tokens": 0}
931
+ {"current_steps": 1856, "total_steps": 3080, "loss": 0.29, "learning_rate": 3.7309852619509136e-06, "epoch": 1.2046081453837416, "percentage": 60.26, "elapsed_time": "11:04:44", "remaining_time": "7:18:22", "throughput": "0.00", "total_tokens": 0}
932
+ {"current_steps": 1858, "total_steps": 3080, "loss": 0.2439, "learning_rate": 3.7206029456770186e-06, "epoch": 1.2059062145059225, "percentage": 60.32, "elapsed_time": "11:05:24", "remaining_time": "7:17:38", "throughput": "0.00", "total_tokens": 0}
933
+ {"current_steps": 1860, "total_steps": 3080, "loss": 0.2674, "learning_rate": 3.71022652891762e-06, "epoch": 1.2072042836281032, "percentage": 60.39, "elapsed_time": "11:06:05", "remaining_time": "7:16:54", "throughput": "0.00", "total_tokens": 0}
934
+ {"current_steps": 1862, "total_steps": 3080, "loss": 0.2725, "learning_rate": 3.6998560595201188e-06, "epoch": 1.2085023527502838, "percentage": 60.45, "elapsed_time": "11:06:42", "remaining_time": "7:16:07", "throughput": "0.00", "total_tokens": 0}
935
+ {"current_steps": 1864, "total_steps": 3080, "loss": 0.2742, "learning_rate": 3.689491585304491e-06, "epoch": 1.2098004218724647, "percentage": 60.52, "elapsed_time": "11:07:24", "remaining_time": "7:15:23", "throughput": "0.00", "total_tokens": 0}
936
+ {"current_steps": 1866, "total_steps": 3080, "loss": 0.2821, "learning_rate": 3.6791331540630597e-06, "epoch": 1.2110984909946454, "percentage": 60.58, "elapsed_time": "11:08:06", "remaining_time": "7:14:40", "throughput": "0.00", "total_tokens": 0}
937
+ {"current_steps": 1868, "total_steps": 3080, "loss": 0.2746, "learning_rate": 3.6687808135602955e-06, "epoch": 1.2123965601168263, "percentage": 60.65, "elapsed_time": "11:08:45", "remaining_time": "7:13:54", "throughput": "0.00", "total_tokens": 0}
938
+ {"current_steps": 1870, "total_steps": 3080, "loss": 0.2586, "learning_rate": 3.658434611532578e-06, "epoch": 1.213694629239007, "percentage": 60.71, "elapsed_time": "11:09:25", "remaining_time": "7:13:09", "throughput": "0.00", "total_tokens": 0}
939
+ {"current_steps": 1872, "total_steps": 3080, "loss": 0.2636, "learning_rate": 3.6480945956879765e-06, "epoch": 1.2149926983611876, "percentage": 60.78, "elapsed_time": "11:10:04", "remaining_time": "7:12:24", "throughput": "0.00", "total_tokens": 0}
940
+ {"current_steps": 1874, "total_steps": 3080, "loss": 0.2806, "learning_rate": 3.637760813706041e-06, "epoch": 1.2162907674833685, "percentage": 60.84, "elapsed_time": "11:10:41", "remaining_time": "7:11:36", "throughput": "0.00", "total_tokens": 0}
941
+ {"current_steps": 1876, "total_steps": 3080, "loss": 0.2613, "learning_rate": 3.627433313237576e-06, "epoch": 1.2175888366055492, "percentage": 60.91, "elapsed_time": "11:11:17", "remaining_time": "7:10:49", "throughput": "0.00", "total_tokens": 0}
942
+ {"current_steps": 1878, "total_steps": 3080, "loss": 0.2906, "learning_rate": 3.617112141904416e-06, "epoch": 1.21888690572773, "percentage": 60.97, "elapsed_time": "11:11:59", "remaining_time": "7:10:06", "throughput": "0.00", "total_tokens": 0}
943
+ {"current_steps": 1880, "total_steps": 3080, "loss": 0.3163, "learning_rate": 3.606797347299216e-06, "epoch": 1.2201849748499107, "percentage": 61.04, "elapsed_time": "11:12:41", "remaining_time": "7:09:22", "throughput": "0.00", "total_tokens": 0}
944
+ {"current_steps": 1882, "total_steps": 3080, "loss": 0.2701, "learning_rate": 3.596488976985224e-06, "epoch": 1.2214830439720914, "percentage": 61.1, "elapsed_time": "11:13:24", "remaining_time": "7:08:39", "throughput": "0.00", "total_tokens": 0}
945
+ {"current_steps": 1884, "total_steps": 3080, "loss": 0.2675, "learning_rate": 3.5861870784960657e-06, "epoch": 1.2227811130942723, "percentage": 61.17, "elapsed_time": "11:14:04", "remaining_time": "7:07:54", "throughput": "0.00", "total_tokens": 0}
946
+ {"current_steps": 1886, "total_steps": 3080, "loss": 0.2685, "learning_rate": 3.575891699335523e-06, "epoch": 1.224079182216453, "percentage": 61.23, "elapsed_time": "11:14:43", "remaining_time": "7:07:09", "throughput": "0.00", "total_tokens": 0}
947
+ {"current_steps": 1888, "total_steps": 3080, "loss": 0.2553, "learning_rate": 3.565602886977318e-06, "epoch": 1.2253772513386338, "percentage": 61.3, "elapsed_time": "11:15:28", "remaining_time": "7:06:27", "throughput": "0.00", "total_tokens": 0}
948
+ {"current_steps": 1890, "total_steps": 3080, "loss": 0.276, "learning_rate": 3.555320688864889e-06, "epoch": 1.2266753204608145, "percentage": 61.36, "elapsed_time": "11:16:12", "remaining_time": "7:05:45", "throughput": "0.00", "total_tokens": 0}
949
+ {"current_steps": 1892, "total_steps": 3080, "loss": 0.2704, "learning_rate": 3.545045152411178e-06, "epoch": 1.2279733895829952, "percentage": 61.43, "elapsed_time": "11:16:51", "remaining_time": "7:04:59", "throughput": "0.00", "total_tokens": 0}
950
+ {"current_steps": 1894, "total_steps": 3080, "loss": 0.2628, "learning_rate": 3.5347763249984097e-06, "epoch": 1.229271458705176, "percentage": 61.49, "elapsed_time": "11:17:33", "remaining_time": "7:04:16", "throughput": "0.00", "total_tokens": 0}
951
+ {"current_steps": 1896, "total_steps": 3080, "loss": 0.2704, "learning_rate": 3.5245142539778655e-06, "epoch": 1.2305695278273567, "percentage": 61.56, "elapsed_time": "11:18:09", "remaining_time": "7:03:29", "throughput": "0.00", "total_tokens": 0}
952
+ {"current_steps": 1898, "total_steps": 3080, "loss": 0.2649, "learning_rate": 3.5142589866696804e-06, "epoch": 1.2318675969495376, "percentage": 61.62, "elapsed_time": "11:18:51", "remaining_time": "7:02:46", "throughput": "0.00", "total_tokens": 0}
953
+ {"current_steps": 1900, "total_steps": 3080, "loss": 0.2581, "learning_rate": 3.504010570362612e-06, "epoch": 1.2331656660717183, "percentage": 61.69, "elapsed_time": "11:19:30", "remaining_time": "7:02:00", "throughput": "0.00", "total_tokens": 0}
954
+ {"current_steps": 1902, "total_steps": 3080, "loss": 0.2676, "learning_rate": 3.4937690523138302e-06, "epoch": 1.234463735193899, "percentage": 61.75, "elapsed_time": "11:20:10", "remaining_time": "7:01:15", "throughput": "0.00", "total_tokens": 0}
955
+ {"current_steps": 1904, "total_steps": 3080, "loss": 0.3042, "learning_rate": 3.483534479748688e-06, "epoch": 1.2357618043160798, "percentage": 61.82, "elapsed_time": "11:20:47", "remaining_time": "7:00:29", "throughput": "0.00", "total_tokens": 0}
956
+ {"current_steps": 1906, "total_steps": 3080, "loss": 0.2707, "learning_rate": 3.473306899860519e-06, "epoch": 1.2370598734382605, "percentage": 61.88, "elapsed_time": "11:21:27", "remaining_time": "6:59:44", "throughput": "0.00", "total_tokens": 0}
957
+ {"current_steps": 1908, "total_steps": 3080, "loss": 0.2543, "learning_rate": 3.4630863598104137e-06, "epoch": 1.2383579425604414, "percentage": 61.95, "elapsed_time": "11:22:07", "remaining_time": "6:59:00", "throughput": "0.00", "total_tokens": 0}
958
+ {"current_steps": 1910, "total_steps": 3080, "loss": 0.2794, "learning_rate": 3.4528729067269916e-06, "epoch": 1.239656011682622, "percentage": 62.01, "elapsed_time": "11:22:47", "remaining_time": "6:58:15", "throughput": "0.00", "total_tokens": 0}
959
+ {"current_steps": 1912, "total_steps": 3080, "loss": 0.3174, "learning_rate": 3.442666587706203e-06, "epoch": 1.2409540808048027, "percentage": 62.08, "elapsed_time": "11:23:27", "remaining_time": "6:57:30", "throughput": "0.00", "total_tokens": 0}
960
+ {"current_steps": 1914, "total_steps": 3080, "loss": 0.2572, "learning_rate": 3.4324674498110956e-06, "epoch": 1.2422521499269836, "percentage": 62.14, "elapsed_time": "11:24:06", "remaining_time": "6:56:45", "throughput": "0.00", "total_tokens": 0}
961
+ {"current_steps": 1916, "total_steps": 3080, "loss": 0.2633, "learning_rate": 3.4222755400716044e-06, "epoch": 1.2435502190491643, "percentage": 62.21, "elapsed_time": "11:24:46", "remaining_time": "6:56:00", "throughput": "0.00", "total_tokens": 0}
962
+ {"current_steps": 1918, "total_steps": 3080, "loss": 0.2823, "learning_rate": 3.4120909054843375e-06, "epoch": 1.2448482881713452, "percentage": 62.27, "elapsed_time": "11:25:25", "remaining_time": "6:55:15", "throughput": "0.00", "total_tokens": 0}
963
+ {"current_steps": 1920, "total_steps": 3080, "loss": 0.2692, "learning_rate": 3.401913593012355e-06, "epoch": 1.2461463572935259, "percentage": 62.34, "elapsed_time": "11:26:11", "remaining_time": "6:54:34", "throughput": "0.00", "total_tokens": 0}
964
+ {"current_steps": 1922, "total_steps": 3080, "loss": 0.3076, "learning_rate": 3.3917436495849486e-06, "epoch": 1.2474444264157065, "percentage": 62.4, "elapsed_time": "11:26:49", "remaining_time": "6:53:48", "throughput": "0.00", "total_tokens": 0}
965
+ {"current_steps": 1924, "total_steps": 3080, "loss": 0.2988, "learning_rate": 3.381581122097437e-06, "epoch": 1.2487424955378874, "percentage": 62.47, "elapsed_time": "11:27:27", "remaining_time": "6:53:03", "throughput": "0.00", "total_tokens": 0}
966
+ {"current_steps": 1926, "total_steps": 3080, "loss": 0.2701, "learning_rate": 3.37142605741094e-06, "epoch": 1.250040564660068, "percentage": 62.53, "elapsed_time": "11:28:06", "remaining_time": "6:52:17", "throughput": "0.00", "total_tokens": 0}
967
+ {"current_steps": 1928, "total_steps": 3080, "loss": 0.261, "learning_rate": 3.361278502352161e-06, "epoch": 1.251338633782249, "percentage": 62.6, "elapsed_time": "11:28:43", "remaining_time": "6:51:31", "throughput": "0.00", "total_tokens": 0}
968
+ {"current_steps": 1930, "total_steps": 3080, "loss": 0.2853, "learning_rate": 3.3511385037131827e-06, "epoch": 1.2526367029044296, "percentage": 62.66, "elapsed_time": "11:29:26", "remaining_time": "6:50:48", "throughput": "0.00", "total_tokens": 0}
969
+ {"current_steps": 1932, "total_steps": 3080, "loss": 0.232, "learning_rate": 3.3410061082512422e-06, "epoch": 1.2539347720266103, "percentage": 62.73, "elapsed_time": "11:30:02", "remaining_time": "6:50:01", "throughput": "0.00", "total_tokens": 0}
970
+ {"current_steps": 1934, "total_steps": 3080, "loss": 0.299, "learning_rate": 3.3308813626885096e-06, "epoch": 1.2552328411487912, "percentage": 62.79, "elapsed_time": "11:30:42", "remaining_time": "6:49:16", "throughput": "0.00", "total_tokens": 0}
971
+ {"current_steps": 1936, "total_steps": 3080, "loss": 0.3768, "learning_rate": 3.3207643137118872e-06, "epoch": 1.2565309102709719, "percentage": 62.86, "elapsed_time": "11:31:25", "remaining_time": "6:48:34", "throughput": "0.00", "total_tokens": 0}
972
+ {"current_steps": 1938, "total_steps": 3080, "loss": 0.2779, "learning_rate": 3.31065500797279e-06, "epoch": 1.2578289793931527, "percentage": 62.92, "elapsed_time": "11:32:08", "remaining_time": "6:47:51", "throughput": "0.00", "total_tokens": 0}
973
+ {"current_steps": 1940, "total_steps": 3080, "loss": 0.258, "learning_rate": 3.3005534920869175e-06, "epoch": 1.2591270485153334, "percentage": 62.99, "elapsed_time": "11:32:51", "remaining_time": "6:47:08", "throughput": "0.00", "total_tokens": 0}
974
+ {"current_steps": 1942, "total_steps": 3080, "loss": 0.2601, "learning_rate": 3.290459812634057e-06, "epoch": 1.260425117637514, "percentage": 63.05, "elapsed_time": "11:33:31", "remaining_time": "6:46:24", "throughput": "0.00", "total_tokens": 0}
975
+ {"current_steps": 1944, "total_steps": 3080, "loss": 0.3066, "learning_rate": 3.2803740161578566e-06, "epoch": 1.261723186759695, "percentage": 63.12, "elapsed_time": "11:34:13", "remaining_time": "6:45:40", "throughput": "0.00", "total_tokens": 0}
976
+ {"current_steps": 1946, "total_steps": 3080, "loss": 0.2583, "learning_rate": 3.2702961491656197e-06, "epoch": 1.2630212558818756, "percentage": 63.18, "elapsed_time": "11:34:56", "remaining_time": "6:44:57", "throughput": "0.00", "total_tokens": 0}
977
+ {"current_steps": 1948, "total_steps": 3080, "loss": 0.2963, "learning_rate": 3.2602262581280785e-06, "epoch": 1.2643193250040565, "percentage": 63.25, "elapsed_time": "11:35:33", "remaining_time": "6:44:11", "throughput": "0.00", "total_tokens": 0}
978
+ {"current_steps": 1950, "total_steps": 3080, "loss": 0.2777, "learning_rate": 3.2501643894791912e-06, "epoch": 1.2656173941262372, "percentage": 63.31, "elapsed_time": "11:36:15", "remaining_time": "6:43:28", "throughput": "0.00", "total_tokens": 0}
979
+ {"current_steps": 1952, "total_steps": 3080, "loss": 0.2652, "learning_rate": 3.2401105896159245e-06, "epoch": 1.2669154632484179, "percentage": 63.38, "elapsed_time": "11:36:57", "remaining_time": "6:42:44", "throughput": "0.00", "total_tokens": 0}
980
+ {"current_steps": 1954, "total_steps": 3080, "loss": 0.2666, "learning_rate": 3.2300649048980336e-06, "epoch": 1.2682135323705988, "percentage": 63.44, "elapsed_time": "11:37:38", "remaining_time": "6:42:01", "throughput": "0.00", "total_tokens": 0}
981
+ {"current_steps": 1956, "total_steps": 3080, "loss": 0.2736, "learning_rate": 3.220027381647857e-06, "epoch": 1.2695116014927794, "percentage": 63.51, "elapsed_time": "11:38:19", "remaining_time": "6:41:17", "throughput": "0.00", "total_tokens": 0}
982
+ {"current_steps": 1958, "total_steps": 3080, "loss": 0.2519, "learning_rate": 3.2099980661501016e-06, "epoch": 1.2708096706149603, "percentage": 63.57, "elapsed_time": "11:39:02", "remaining_time": "6:40:34", "throughput": "0.00", "total_tokens": 0}
983
+ {"current_steps": 1960, "total_steps": 3080, "loss": 0.2437, "learning_rate": 3.1999770046516198e-06, "epoch": 1.272107739737141, "percentage": 63.64, "elapsed_time": "11:39:40", "remaining_time": "6:39:49", "throughput": "0.00", "total_tokens": 0}
984
+ {"current_steps": 1962, "total_steps": 3080, "loss": 0.2614, "learning_rate": 3.1899642433612104e-06, "epoch": 1.2734058088593216, "percentage": 63.7, "elapsed_time": "11:40:25", "remaining_time": "6:39:07", "throughput": "0.00", "total_tokens": 0}
985
+ {"current_steps": 1964, "total_steps": 3080, "loss": 0.2929, "learning_rate": 3.179959828449397e-06, "epoch": 1.2747038779815025, "percentage": 63.77, "elapsed_time": "11:41:08", "remaining_time": "6:38:24", "throughput": "0.00", "total_tokens": 0}
986
+ {"current_steps": 1966, "total_steps": 3080, "loss": 0.2699, "learning_rate": 3.1699638060482118e-06, "epoch": 1.2760019471036832, "percentage": 63.83, "elapsed_time": "11:41:49", "remaining_time": "6:37:40", "throughput": "0.00", "total_tokens": 0}
987
+ {"current_steps": 1968, "total_steps": 3080, "loss": 0.2861, "learning_rate": 3.159976222250992e-06, "epoch": 1.277300016225864, "percentage": 63.9, "elapsed_time": "11:42:32", "remaining_time": "6:36:57", "throughput": "0.00", "total_tokens": 0}
988
+ {"current_steps": 1970, "total_steps": 3080, "loss": 0.259, "learning_rate": 3.1499971231121672e-06, "epoch": 1.2785980853480448, "percentage": 63.96, "elapsed_time": "11:43:08", "remaining_time": "6:36:10", "throughput": "0.00", "total_tokens": 0}
989
+ {"current_steps": 1972, "total_steps": 3080, "loss": 0.2645, "learning_rate": 3.140026554647032e-06, "epoch": 1.2798961544702254, "percentage": 64.03, "elapsed_time": "11:43:44", "remaining_time": "6:35:24", "throughput": "0.00", "total_tokens": 0}
990
+ {"current_steps": 1974, "total_steps": 3080, "loss": 0.2406, "learning_rate": 3.130064562831553e-06, "epoch": 1.2811942235924063, "percentage": 64.09, "elapsed_time": "11:44:28", "remaining_time": "6:34:42", "throughput": "0.00", "total_tokens": 0}
991
+ {"current_steps": 1976, "total_steps": 3080, "loss": 0.3222, "learning_rate": 3.1201111936021467e-06, "epoch": 1.282492292714587, "percentage": 64.16, "elapsed_time": "11:45:10", "remaining_time": "6:33:59", "throughput": "0.00", "total_tokens": 0}
992
+ {"current_steps": 1978, "total_steps": 3080, "loss": 0.2812, "learning_rate": 3.110166492855468e-06, "epoch": 1.2837903618367679, "percentage": 64.22, "elapsed_time": "11:45:48", "remaining_time": "6:33:13", "throughput": "0.00", "total_tokens": 0}
993
+ {"current_steps": 1980, "total_steps": 3080, "loss": 0.2415, "learning_rate": 3.1002305064482006e-06, "epoch": 1.2850884309589485, "percentage": 64.29, "elapsed_time": "11:46:25", "remaining_time": "6:32:27", "throughput": "0.00", "total_tokens": 0}
994
+ {"current_steps": 1982, "total_steps": 3080, "loss": 0.2589, "learning_rate": 3.090303280196847e-06, "epoch": 1.2863865000811292, "percentage": 64.35, "elapsed_time": "11:47:05", "remaining_time": "6:31:42", "throughput": "0.00", "total_tokens": 0}
995
+ {"current_steps": 1984, "total_steps": 3080, "loss": 0.2566, "learning_rate": 3.08038485987751e-06, "epoch": 1.28768456920331, "percentage": 64.42, "elapsed_time": "11:47:50", "remaining_time": "6:31:01", "throughput": "0.00", "total_tokens": 0}
996
+ {"current_steps": 1986, "total_steps": 3080, "loss": 0.3038, "learning_rate": 3.070475291225692e-06, "epoch": 1.2889826383254908, "percentage": 64.48, "elapsed_time": "11:48:28", "remaining_time": "6:30:16", "throughput": "0.00", "total_tokens": 0}
997
+ {"current_steps": 1988, "total_steps": 3080, "loss": 0.2317, "learning_rate": 3.0605746199360755e-06, "epoch": 1.2902807074476716, "percentage": 64.55, "elapsed_time": "11:49:09", "remaining_time": "6:29:32", "throughput": "0.00", "total_tokens": 0}
998
+ {"current_steps": 1990, "total_steps": 3080, "loss": 0.2903, "learning_rate": 3.0506828916623198e-06, "epoch": 1.2915787765698523, "percentage": 64.61, "elapsed_time": "11:49:46", "remaining_time": "6:28:46", "throughput": "0.00", "total_tokens": 0}
999
+ {"current_steps": 1992, "total_steps": 3080, "loss": 0.2725, "learning_rate": 3.040800152016841e-06, "epoch": 1.292876845692033, "percentage": 64.68, "elapsed_time": "11:50:31", "remaining_time": "6:28:04", "throughput": "0.00", "total_tokens": 0}
1000
+ {"current_steps": 1994, "total_steps": 3080, "loss": 0.2877, "learning_rate": 3.030926446570611e-06, "epoch": 1.2941749148142139, "percentage": 64.74, "elapsed_time": "11:51:15", "remaining_time": "6:27:22", "throughput": "0.00", "total_tokens": 0}
1001
+ {"current_steps": 1996, "total_steps": 3080, "loss": 0.2592, "learning_rate": 3.021061820852945e-06, "epoch": 1.2954729839363945, "percentage": 64.81, "elapsed_time": "11:51:55", "remaining_time": "6:26:38", "throughput": "0.00", "total_tokens": 0}
1002
+ {"current_steps": 1998, "total_steps": 3080, "loss": 0.262, "learning_rate": 3.0112063203512808e-06, "epoch": 1.2967710530585754, "percentage": 64.87, "elapsed_time": "11:52:38", "remaining_time": "6:25:55", "throughput": "0.00", "total_tokens": 0}
1003
+ {"current_steps": 2000, "total_steps": 3080, "loss": 0.284, "learning_rate": 3.001359990510991e-06, "epoch": 1.298069122180756, "percentage": 64.94, "elapsed_time": "11:53:15", "remaining_time": "6:25:09", "throughput": "0.00", "total_tokens": 0}
1004
+ {"current_steps": 2000, "total_steps": 3080, "eval_loss": 0.2822348475456238, "epoch": 1.298069122180756, "percentage": 64.94, "elapsed_time": "12:07:04", "remaining_time": "6:32:37", "throughput": "0.00", "total_tokens": 0}