KTO-4B-lora / trainer_log.jsonl
Delta-Vector's picture
Upload 14 files
b400a5b verified
{"current_steps": 1, "total_steps": 114, "loss": 0.5, "learning_rate": 2.5e-06, "epoch": 0.017422867513611617, "percentage": 0.88, "elapsed_time": "0:03:01", "remaining_time": "5:42:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2, "total_steps": 114, "loss": 0.5, "learning_rate": 5e-06, "epoch": 0.03484573502722323, "percentage": 1.75, "elapsed_time": "0:05:58", "remaining_time": "5:34:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3, "total_steps": 114, "loss": 0.5049, "learning_rate": 4.955357142857144e-06, "epoch": 0.052268602540834846, "percentage": 2.63, "elapsed_time": "0:08:40", "remaining_time": "5:21:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 4, "total_steps": 114, "loss": 0.5038, "learning_rate": 4.910714285714286e-06, "epoch": 0.06969147005444647, "percentage": 3.51, "elapsed_time": "0:11:31", "remaining_time": "5:17:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 5, "total_steps": 114, "loss": 0.5049, "learning_rate": 4.866071428571429e-06, "epoch": 0.08711433756805807, "percentage": 4.39, "elapsed_time": "0:14:23", "remaining_time": "5:13:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 6, "total_steps": 114, "loss": 0.5036, "learning_rate": 4.821428571428572e-06, "epoch": 0.10453720508166969, "percentage": 5.26, "elapsed_time": "0:17:17", "remaining_time": "5:11:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 7, "total_steps": 114, "loss": 0.5047, "learning_rate": 4.776785714285715e-06, "epoch": 0.12196007259528131, "percentage": 6.14, "elapsed_time": "0:20:02", "remaining_time": "5:06:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 8, "total_steps": 114, "loss": 0.5049, "learning_rate": 4.732142857142857e-06, "epoch": 0.13938294010889293, "percentage": 7.02, "elapsed_time": "0:22:48", "remaining_time": "5:02:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 9, "total_steps": 114, "loss": 0.5047, "learning_rate": 4.6875000000000004e-06, "epoch": 0.15680580762250454, "percentage": 7.89, "elapsed_time": "0:25:41", "remaining_time": "4:59:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 10, "total_steps": 114, "loss": 0.5047, "learning_rate": 4.642857142857144e-06, "epoch": 0.17422867513611615, "percentage": 8.77, "elapsed_time": "0:28:19", "remaining_time": "4:54:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 11, "total_steps": 114, "loss": 0.5044, "learning_rate": 4.5982142857142854e-06, "epoch": 0.19165154264972778, "percentage": 9.65, "elapsed_time": "0:31:04", "remaining_time": "4:51:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 12, "total_steps": 114, "loss": 0.5045, "learning_rate": 4.553571428571429e-06, "epoch": 0.20907441016333939, "percentage": 10.53, "elapsed_time": "0:33:42", "remaining_time": "4:46:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 13, "total_steps": 114, "loss": 0.5041, "learning_rate": 4.508928571428572e-06, "epoch": 0.226497277676951, "percentage": 11.4, "elapsed_time": "0:36:38", "remaining_time": "4:44:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 14, "total_steps": 114, "loss": 0.5055, "learning_rate": 4.464285714285715e-06, "epoch": 0.24392014519056263, "percentage": 12.28, "elapsed_time": "0:39:09", "remaining_time": "4:39:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 15, "total_steps": 114, "loss": 0.5043, "learning_rate": 4.419642857142857e-06, "epoch": 0.2613430127041742, "percentage": 13.16, "elapsed_time": "0:42:04", "remaining_time": "4:37:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 16, "total_steps": 114, "loss": 0.5042, "learning_rate": 4.3750000000000005e-06, "epoch": 0.27876588021778587, "percentage": 14.04, "elapsed_time": "0:44:50", "remaining_time": "4:34:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 16, "total_steps": 114, "eval_loss": 0.5038086175918579, "epoch": 0.27876588021778587, "percentage": 14.04, "elapsed_time": "0:45:32", "remaining_time": "4:38:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 17, "total_steps": 114, "loss": 0.5046, "learning_rate": 4.330357142857143e-06, "epoch": 0.2961887477313975, "percentage": 14.91, "elapsed_time": "0:48:15", "remaining_time": "4:35:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 18, "total_steps": 114, "loss": 0.5048, "learning_rate": 4.2857142857142855e-06, "epoch": 0.3136116152450091, "percentage": 15.79, "elapsed_time": "0:51:01", "remaining_time": "4:32:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 19, "total_steps": 114, "loss": 0.504, "learning_rate": 4.241071428571429e-06, "epoch": 0.3310344827586207, "percentage": 16.67, "elapsed_time": "0:54:11", "remaining_time": "4:30:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 114, "loss": 0.504, "learning_rate": 4.196428571428572e-06, "epoch": 0.3484573502722323, "percentage": 17.54, "elapsed_time": "0:57:04", "remaining_time": "4:28:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 21, "total_steps": 114, "loss": 0.504, "learning_rate": 4.151785714285715e-06, "epoch": 0.3658802177858439, "percentage": 18.42, "elapsed_time": "1:00:14", "remaining_time": "4:26:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 22, "total_steps": 114, "loss": 0.5042, "learning_rate": 4.107142857142857e-06, "epoch": 0.38330308529945556, "percentage": 19.3, "elapsed_time": "1:03:07", "remaining_time": "4:23:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 23, "total_steps": 114, "loss": 0.5039, "learning_rate": 4.0625000000000005e-06, "epoch": 0.40072595281306717, "percentage": 20.18, "elapsed_time": "1:05:59", "remaining_time": "4:21:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 24, "total_steps": 114, "loss": 0.5035, "learning_rate": 4.017857142857143e-06, "epoch": 0.41814882032667877, "percentage": 21.05, "elapsed_time": "1:09:22", "remaining_time": "4:20:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 25, "total_steps": 114, "loss": 0.5033, "learning_rate": 3.9732142857142855e-06, "epoch": 0.4355716878402904, "percentage": 21.93, "elapsed_time": "1:12:00", "remaining_time": "4:16:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 26, "total_steps": 114, "loss": 0.5044, "learning_rate": 3.928571428571429e-06, "epoch": 0.452994555353902, "percentage": 22.81, "elapsed_time": "1:14:41", "remaining_time": "4:12:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 27, "total_steps": 114, "loss": 0.5038, "learning_rate": 3.883928571428572e-06, "epoch": 0.4704174228675136, "percentage": 23.68, "elapsed_time": "1:17:39", "remaining_time": "4:10:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 28, "total_steps": 114, "loss": 0.505, "learning_rate": 3.839285714285715e-06, "epoch": 0.48784029038112525, "percentage": 24.56, "elapsed_time": "1:20:57", "remaining_time": "4:08:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 29, "total_steps": 114, "loss": 0.5035, "learning_rate": 3.794642857142857e-06, "epoch": 0.5052631578947369, "percentage": 25.44, "elapsed_time": "1:23:49", "remaining_time": "4:05:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 114, "loss": 0.5048, "learning_rate": 3.7500000000000005e-06, "epoch": 0.5226860254083484, "percentage": 26.32, "elapsed_time": "1:26:50", "remaining_time": "4:03:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 31, "total_steps": 114, "loss": 0.504, "learning_rate": 3.7053571428571434e-06, "epoch": 0.5401088929219601, "percentage": 27.19, "elapsed_time": "1:29:53", "remaining_time": "4:00:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 32, "total_steps": 114, "loss": 0.5037, "learning_rate": 3.660714285714286e-06, "epoch": 0.5575317604355717, "percentage": 28.07, "elapsed_time": "1:33:04", "remaining_time": "3:58:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 32, "total_steps": 114, "eval_loss": 0.5032945275306702, "epoch": 0.5575317604355717, "percentage": 28.07, "elapsed_time": "1:33:45", "remaining_time": "4:00:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 33, "total_steps": 114, "loss": 0.5043, "learning_rate": 3.616071428571429e-06, "epoch": 0.5749546279491833, "percentage": 28.95, "elapsed_time": "1:36:09", "remaining_time": "3:56:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 34, "total_steps": 114, "loss": 0.5032, "learning_rate": 3.5714285714285718e-06, "epoch": 0.592377495462795, "percentage": 29.82, "elapsed_time": "1:38:36", "remaining_time": "3:52:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 35, "total_steps": 114, "loss": 0.5041, "learning_rate": 3.5267857142857147e-06, "epoch": 0.6098003629764065, "percentage": 30.7, "elapsed_time": "1:41:21", "remaining_time": "3:48:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 36, "total_steps": 114, "loss": 0.5036, "learning_rate": 3.482142857142857e-06, "epoch": 0.6272232304900182, "percentage": 31.58, "elapsed_time": "1:44:04", "remaining_time": "3:45:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 37, "total_steps": 114, "loss": 0.503, "learning_rate": 3.4375e-06, "epoch": 0.6446460980036298, "percentage": 32.46, "elapsed_time": "1:47:04", "remaining_time": "3:42:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 38, "total_steps": 114, "loss": 0.5041, "learning_rate": 3.3928571428571435e-06, "epoch": 0.6620689655172414, "percentage": 33.33, "elapsed_time": "1:50:04", "remaining_time": "3:40:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 39, "total_steps": 114, "loss": 0.5041, "learning_rate": 3.3482142857142855e-06, "epoch": 0.679491833030853, "percentage": 34.21, "elapsed_time": "1:52:52", "remaining_time": "3:37:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 114, "loss": 0.5044, "learning_rate": 3.303571428571429e-06, "epoch": 0.6969147005444646, "percentage": 35.09, "elapsed_time": "1:55:42", "remaining_time": "3:34:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 41, "total_steps": 114, "loss": 0.5054, "learning_rate": 3.258928571428572e-06, "epoch": 0.7143375680580762, "percentage": 35.96, "elapsed_time": "1:58:25", "remaining_time": "3:30:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 42, "total_steps": 114, "loss": 0.5036, "learning_rate": 3.2142857142857147e-06, "epoch": 0.7317604355716878, "percentage": 36.84, "elapsed_time": "2:01:26", "remaining_time": "3:28:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 43, "total_steps": 114, "loss": 0.503, "learning_rate": 3.1696428571428572e-06, "epoch": 0.7491833030852995, "percentage": 37.72, "elapsed_time": "2:04:13", "remaining_time": "3:25:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 44, "total_steps": 114, "loss": 0.5041, "learning_rate": 3.125e-06, "epoch": 0.7666061705989111, "percentage": 38.6, "elapsed_time": "2:06:58", "remaining_time": "3:22:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 45, "total_steps": 114, "loss": 0.5037, "learning_rate": 3.080357142857143e-06, "epoch": 0.7840290381125227, "percentage": 39.47, "elapsed_time": "2:09:31", "remaining_time": "3:18:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 46, "total_steps": 114, "loss": 0.5043, "learning_rate": 3.0357142857142856e-06, "epoch": 0.8014519056261343, "percentage": 40.35, "elapsed_time": "2:12:10", "remaining_time": "3:15:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 47, "total_steps": 114, "loss": 0.5039, "learning_rate": 2.991071428571429e-06, "epoch": 0.8188747731397459, "percentage": 41.23, "elapsed_time": "2:15:05", "remaining_time": "3:12:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 48, "total_steps": 114, "loss": 0.5035, "learning_rate": 2.946428571428572e-06, "epoch": 0.8362976406533575, "percentage": 42.11, "elapsed_time": "2:17:50", "remaining_time": "3:09:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 48, "total_steps": 114, "eval_loss": 0.5040844082832336, "epoch": 0.8362976406533575, "percentage": 42.11, "elapsed_time": "2:18:32", "remaining_time": "3:10:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 49, "total_steps": 114, "loss": 0.5037, "learning_rate": 2.9017857142857148e-06, "epoch": 0.8537205081669691, "percentage": 42.98, "elapsed_time": "2:21:40", "remaining_time": "3:07:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 114, "loss": 0.5046, "learning_rate": 2.8571428571428573e-06, "epoch": 0.8711433756805808, "percentage": 43.86, "elapsed_time": "2:24:19", "remaining_time": "3:04:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 51, "total_steps": 114, "loss": 0.5046, "learning_rate": 2.8125e-06, "epoch": 0.8885662431941924, "percentage": 44.74, "elapsed_time": "2:27:10", "remaining_time": "3:01:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 52, "total_steps": 114, "loss": 0.5032, "learning_rate": 2.767857142857143e-06, "epoch": 0.905989110707804, "percentage": 45.61, "elapsed_time": "2:29:52", "remaining_time": "2:58:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 53, "total_steps": 114, "loss": 0.5041, "learning_rate": 2.7232142857142856e-06, "epoch": 0.9234119782214156, "percentage": 46.49, "elapsed_time": "2:32:45", "remaining_time": "2:55:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 54, "total_steps": 114, "loss": 0.5036, "learning_rate": 2.6785714285714285e-06, "epoch": 0.9408348457350272, "percentage": 47.37, "elapsed_time": "2:35:32", "remaining_time": "2:52:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 55, "total_steps": 114, "loss": 0.5038, "learning_rate": 2.633928571428572e-06, "epoch": 0.9582577132486388, "percentage": 48.25, "elapsed_time": "2:38:40", "remaining_time": "2:50:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 56, "total_steps": 114, "loss": 0.5038, "learning_rate": 2.5892857142857148e-06, "epoch": 0.9756805807622505, "percentage": 49.12, "elapsed_time": "2:41:35", "remaining_time": "2:47:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 57, "total_steps": 114, "loss": 0.5034, "learning_rate": 2.5446428571428573e-06, "epoch": 0.993103448275862, "percentage": 50.0, "elapsed_time": "2:44:13", "remaining_time": "2:44:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 58, "total_steps": 114, "loss": 0.5036, "learning_rate": 2.5e-06, "epoch": 1.0105263157894737, "percentage": 50.88, "elapsed_time": "2:47:10", "remaining_time": "2:41:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 59, "total_steps": 114, "loss": 0.5037, "learning_rate": 2.455357142857143e-06, "epoch": 1.0279491833030854, "percentage": 51.75, "elapsed_time": "2:49:39", "remaining_time": "2:38:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 114, "loss": 0.5036, "learning_rate": 2.410714285714286e-06, "epoch": 1.0453720508166968, "percentage": 52.63, "elapsed_time": "2:52:43", "remaining_time": "2:35:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 61, "total_steps": 114, "loss": 0.5035, "learning_rate": 2.3660714285714285e-06, "epoch": 1.0627949183303085, "percentage": 53.51, "elapsed_time": "2:55:47", "remaining_time": "2:32:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 62, "total_steps": 114, "loss": 0.5034, "learning_rate": 2.321428571428572e-06, "epoch": 1.0802177858439201, "percentage": 54.39, "elapsed_time": "2:58:25", "remaining_time": "2:29:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 63, "total_steps": 114, "loss": 0.5034, "learning_rate": 2.2767857142857144e-06, "epoch": 1.0976406533575318, "percentage": 55.26, "elapsed_time": "3:01:15", "remaining_time": "2:26:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 64, "total_steps": 114, "loss": 0.5037, "learning_rate": 2.2321428571428573e-06, "epoch": 1.1150635208711435, "percentage": 56.14, "elapsed_time": "3:04:11", "remaining_time": "2:23:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 64, "total_steps": 114, "eval_loss": 0.5034511089324951, "epoch": 1.1150635208711435, "percentage": 56.14, "elapsed_time": "3:04:52", "remaining_time": "2:24:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 65, "total_steps": 114, "loss": 0.504, "learning_rate": 2.1875000000000002e-06, "epoch": 1.132486388384755, "percentage": 57.02, "elapsed_time": "3:07:46", "remaining_time": "2:21:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 66, "total_steps": 114, "loss": 0.504, "learning_rate": 2.1428571428571427e-06, "epoch": 1.1499092558983666, "percentage": 57.89, "elapsed_time": "3:11:05", "remaining_time": "2:18:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 67, "total_steps": 114, "loss": 0.5034, "learning_rate": 2.098214285714286e-06, "epoch": 1.1673321234119782, "percentage": 58.77, "elapsed_time": "3:14:11", "remaining_time": "2:16:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 68, "total_steps": 114, "loss": 0.503, "learning_rate": 2.0535714285714286e-06, "epoch": 1.18475499092559, "percentage": 59.65, "elapsed_time": "3:16:50", "remaining_time": "2:13:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 69, "total_steps": 114, "loss": 0.5042, "learning_rate": 2.0089285714285715e-06, "epoch": 1.2021778584392013, "percentage": 60.53, "elapsed_time": "3:19:33", "remaining_time": "2:10:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 114, "loss": 0.5033, "learning_rate": 1.9642857142857144e-06, "epoch": 1.219600725952813, "percentage": 61.4, "elapsed_time": "3:22:11", "remaining_time": "2:07:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 71, "total_steps": 114, "loss": 0.5035, "learning_rate": 1.9196428571428573e-06, "epoch": 1.2370235934664247, "percentage": 62.28, "elapsed_time": "3:25:05", "remaining_time": "2:04:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 72, "total_steps": 114, "loss": 0.5029, "learning_rate": 1.8750000000000003e-06, "epoch": 1.2544464609800363, "percentage": 63.16, "elapsed_time": "3:28:11", "remaining_time": "2:01:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 73, "total_steps": 114, "loss": 0.5034, "learning_rate": 1.830357142857143e-06, "epoch": 1.271869328493648, "percentage": 64.04, "elapsed_time": "3:30:48", "remaining_time": "1:58:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 74, "total_steps": 114, "loss": 0.5032, "learning_rate": 1.7857142857142859e-06, "epoch": 1.2892921960072594, "percentage": 64.91, "elapsed_time": "3:33:21", "remaining_time": "1:55:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 75, "total_steps": 114, "loss": 0.5036, "learning_rate": 1.7410714285714286e-06, "epoch": 1.306715063520871, "percentage": 65.79, "elapsed_time": "3:36:07", "remaining_time": "1:52:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 76, "total_steps": 114, "loss": 0.5033, "learning_rate": 1.6964285714285717e-06, "epoch": 1.3241379310344827, "percentage": 66.67, "elapsed_time": "3:39:00", "remaining_time": "1:49:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 77, "total_steps": 114, "loss": 0.5033, "learning_rate": 1.6517857142857144e-06, "epoch": 1.3415607985480944, "percentage": 67.54, "elapsed_time": "3:41:54", "remaining_time": "1:46:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 78, "total_steps": 114, "loss": 0.5034, "learning_rate": 1.6071428571428574e-06, "epoch": 1.358983666061706, "percentage": 68.42, "elapsed_time": "3:44:25", "remaining_time": "1:43:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 79, "total_steps": 114, "loss": 0.5034, "learning_rate": 1.5625e-06, "epoch": 1.3764065335753175, "percentage": 69.3, "elapsed_time": "3:47:01", "remaining_time": "1:40:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 114, "loss": 0.5036, "learning_rate": 1.5178571428571428e-06, "epoch": 1.3938294010889292, "percentage": 70.18, "elapsed_time": "3:49:38", "remaining_time": "1:37:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 114, "eval_loss": 0.5035938024520874, "epoch": 1.3938294010889292, "percentage": 70.18, "elapsed_time": "3:50:20", "remaining_time": "1:37:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 81, "total_steps": 114, "loss": 0.5037, "learning_rate": 1.473214285714286e-06, "epoch": 1.4112522686025408, "percentage": 71.05, "elapsed_time": "3:53:05", "remaining_time": "1:34:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 82, "total_steps": 114, "loss": 0.5033, "learning_rate": 1.4285714285714286e-06, "epoch": 1.4286751361161525, "percentage": 71.93, "elapsed_time": "3:56:05", "remaining_time": "1:32:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 83, "total_steps": 114, "loss": 0.5033, "learning_rate": 1.3839285714285715e-06, "epoch": 1.4460980036297642, "percentage": 72.81, "elapsed_time": "3:58:33", "remaining_time": "1:29:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 84, "total_steps": 114, "loss": 0.5028, "learning_rate": 1.3392857142857143e-06, "epoch": 1.4635208711433756, "percentage": 73.68, "elapsed_time": "4:01:26", "remaining_time": "1:26:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 85, "total_steps": 114, "loss": 0.5036, "learning_rate": 1.2946428571428574e-06, "epoch": 1.4809437386569873, "percentage": 74.56, "elapsed_time": "4:04:38", "remaining_time": "1:23:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 86, "total_steps": 114, "loss": 0.5039, "learning_rate": 1.25e-06, "epoch": 1.498366606170599, "percentage": 75.44, "elapsed_time": "4:07:19", "remaining_time": "1:20:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 87, "total_steps": 114, "loss": 0.5036, "learning_rate": 1.205357142857143e-06, "epoch": 1.5157894736842106, "percentage": 76.32, "elapsed_time": "4:10:17", "remaining_time": "1:17:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 88, "total_steps": 114, "loss": 0.5034, "learning_rate": 1.160714285714286e-06, "epoch": 1.5332123411978222, "percentage": 77.19, "elapsed_time": "4:13:06", "remaining_time": "1:14:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 89, "total_steps": 114, "loss": 0.5039, "learning_rate": 1.1160714285714287e-06, "epoch": 1.5506352087114337, "percentage": 78.07, "elapsed_time": "4:15:51", "remaining_time": "1:11:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 114, "loss": 0.5031, "learning_rate": 1.0714285714285714e-06, "epoch": 1.5680580762250453, "percentage": 78.95, "elapsed_time": "4:18:20", "remaining_time": "1:08:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 91, "total_steps": 114, "loss": 0.5045, "learning_rate": 1.0267857142857143e-06, "epoch": 1.585480943738657, "percentage": 79.82, "elapsed_time": "4:21:10", "remaining_time": "1:06:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 92, "total_steps": 114, "loss": 0.5035, "learning_rate": 9.821428571428572e-07, "epoch": 1.6029038112522684, "percentage": 80.7, "elapsed_time": "4:23:51", "remaining_time": "1:03:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 93, "total_steps": 114, "loss": 0.5036, "learning_rate": 9.375000000000001e-07, "epoch": 1.6203266787658803, "percentage": 81.58, "elapsed_time": "4:26:48", "remaining_time": "1:00:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 94, "total_steps": 114, "loss": 0.5034, "learning_rate": 8.928571428571429e-07, "epoch": 1.6377495462794918, "percentage": 82.46, "elapsed_time": "4:29:27", "remaining_time": "0:57:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 95, "total_steps": 114, "loss": 0.5024, "learning_rate": 8.482142857142859e-07, "epoch": 1.6551724137931034, "percentage": 83.33, "elapsed_time": "4:32:11", "remaining_time": "0:54:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 96, "total_steps": 114, "loss": 0.5032, "learning_rate": 8.035714285714287e-07, "epoch": 1.672595281306715, "percentage": 84.21, "elapsed_time": "4:35:04", "remaining_time": "0:51:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 96, "total_steps": 114, "eval_loss": 0.503455400466919, "epoch": 1.672595281306715, "percentage": 84.21, "elapsed_time": "4:35:45", "remaining_time": "0:51:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 97, "total_steps": 114, "loss": 0.5024, "learning_rate": 7.589285714285714e-07, "epoch": 1.6900181488203265, "percentage": 85.09, "elapsed_time": "4:38:39", "remaining_time": "0:48:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 98, "total_steps": 114, "loss": 0.5035, "learning_rate": 7.142857142857143e-07, "epoch": 1.7074410163339384, "percentage": 85.96, "elapsed_time": "4:41:44", "remaining_time": "0:45:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 99, "total_steps": 114, "loss": 0.5033, "learning_rate": 6.696428571428571e-07, "epoch": 1.7248638838475499, "percentage": 86.84, "elapsed_time": "4:44:54", "remaining_time": "0:43:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 114, "loss": 0.503, "learning_rate": 6.25e-07, "epoch": 1.7422867513611615, "percentage": 87.72, "elapsed_time": "4:47:42", "remaining_time": "0:40:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 101, "total_steps": 114, "loss": 0.5042, "learning_rate": 5.80357142857143e-07, "epoch": 1.7597096188747732, "percentage": 88.6, "elapsed_time": "4:50:36", "remaining_time": "0:37:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 102, "total_steps": 114, "loss": 0.504, "learning_rate": 5.357142857142857e-07, "epoch": 1.7771324863883846, "percentage": 89.47, "elapsed_time": "4:53:29", "remaining_time": "0:34:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 103, "total_steps": 114, "loss": 0.5027, "learning_rate": 4.910714285714286e-07, "epoch": 1.7945553539019965, "percentage": 90.35, "elapsed_time": "4:56:22", "remaining_time": "0:31:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 104, "total_steps": 114, "loss": 0.5029, "learning_rate": 4.4642857142857147e-07, "epoch": 1.811978221415608, "percentage": 91.23, "elapsed_time": "4:59:35", "remaining_time": "0:28:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 105, "total_steps": 114, "loss": 0.5029, "learning_rate": 4.0178571428571434e-07, "epoch": 1.8294010889292196, "percentage": 92.11, "elapsed_time": "5:02:28", "remaining_time": "0:25:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 106, "total_steps": 114, "loss": 0.5038, "learning_rate": 3.5714285714285716e-07, "epoch": 1.8468239564428313, "percentage": 92.98, "elapsed_time": "5:05:34", "remaining_time": "0:23:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 107, "total_steps": 114, "loss": 0.5048, "learning_rate": 3.125e-07, "epoch": 1.8642468239564427, "percentage": 93.86, "elapsed_time": "5:08:48", "remaining_time": "0:20:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 108, "total_steps": 114, "loss": 0.5035, "learning_rate": 2.6785714285714284e-07, "epoch": 1.8816696914700546, "percentage": 94.74, "elapsed_time": "5:11:06", "remaining_time": "0:17:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 109, "total_steps": 114, "loss": 0.5046, "learning_rate": 2.2321428571428574e-07, "epoch": 1.899092558983666, "percentage": 95.61, "elapsed_time": "5:13:56", "remaining_time": "0:14:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 114, "loss": 0.5031, "learning_rate": 1.7857142857142858e-07, "epoch": 1.9165154264972777, "percentage": 96.49, "elapsed_time": "5:17:12", "remaining_time": "0:11:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 111, "total_steps": 114, "loss": 0.5028, "learning_rate": 1.3392857142857142e-07, "epoch": 1.9339382940108893, "percentage": 97.37, "elapsed_time": "5:19:48", "remaining_time": "0:08:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 112, "total_steps": 114, "loss": 0.5036, "learning_rate": 8.928571428571429e-08, "epoch": 1.9513611615245008, "percentage": 98.25, "elapsed_time": "5:22:22", "remaining_time": "0:05:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 112, "total_steps": 114, "eval_loss": 0.5036724805831909, "epoch": 1.9513611615245008, "percentage": 98.25, "elapsed_time": "5:23:03", "remaining_time": "0:05:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 113, "total_steps": 114, "loss": 0.5031, "learning_rate": 4.4642857142857145e-08, "epoch": 1.9687840290381127, "percentage": 99.12, "elapsed_time": "5:26:05", "remaining_time": "0:02:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 114, "total_steps": 114, "loss": 0.5029, "learning_rate": 0.0, "epoch": 1.986206896551724, "percentage": 100.0, "elapsed_time": "5:29:05", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 114, "total_steps": 114, "epoch": 1.986206896551724, "percentage": 100.0, "elapsed_time": "5:29:11", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}