hp_ablations_mistral_epoch3 / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 0
de775ee verified
raw
history blame
8.95 kB
{"current_steps": 10, "total_steps": 1479, "loss": 0.7569, "lr": 5e-06, "epoch": 0.020253164556962026, "percentage": 0.68, "elapsed_time": "0:04:45", "remaining_time": "11:40:05"}
{"current_steps": 20, "total_steps": 1479, "loss": 0.6507, "lr": 5e-06, "epoch": 0.04050632911392405, "percentage": 1.35, "elapsed_time": "0:09:28", "remaining_time": "11:30:37"}
{"current_steps": 30, "total_steps": 1479, "loss": 0.6295, "lr": 5e-06, "epoch": 0.060759493670886074, "percentage": 2.03, "elapsed_time": "0:14:10", "remaining_time": "11:24:17"}
{"current_steps": 40, "total_steps": 1479, "loss": 0.615, "lr": 5e-06, "epoch": 0.0810126582278481, "percentage": 2.7, "elapsed_time": "0:18:52", "remaining_time": "11:18:46"}
{"current_steps": 50, "total_steps": 1479, "loss": 0.6055, "lr": 5e-06, "epoch": 0.10126582278481013, "percentage": 3.38, "elapsed_time": "0:23:34", "remaining_time": "11:13:41"}
{"current_steps": 60, "total_steps": 1479, "loss": 0.598, "lr": 5e-06, "epoch": 0.12151898734177215, "percentage": 4.06, "elapsed_time": "0:28:15", "remaining_time": "11:08:26"}
{"current_steps": 70, "total_steps": 1479, "loss": 0.5919, "lr": 5e-06, "epoch": 0.14177215189873418, "percentage": 4.73, "elapsed_time": "0:32:57", "remaining_time": "11:03:30"}
{"current_steps": 80, "total_steps": 1479, "loss": 0.5885, "lr": 5e-06, "epoch": 0.1620253164556962, "percentage": 5.41, "elapsed_time": "0:37:39", "remaining_time": "10:58:35"}
{"current_steps": 90, "total_steps": 1479, "loss": 0.5878, "lr": 5e-06, "epoch": 0.18227848101265823, "percentage": 6.09, "elapsed_time": "0:42:21", "remaining_time": "10:53:47"}
{"current_steps": 100, "total_steps": 1479, "loss": 0.5912, "lr": 5e-06, "epoch": 0.20253164556962025, "percentage": 6.76, "elapsed_time": "0:47:04", "remaining_time": "10:49:03"}
{"current_steps": 110, "total_steps": 1479, "loss": 0.5834, "lr": 5e-06, "epoch": 0.22278481012658227, "percentage": 7.44, "elapsed_time": "0:51:46", "remaining_time": "10:44:16"}
{"current_steps": 120, "total_steps": 1479, "loss": 0.5806, "lr": 5e-06, "epoch": 0.2430379746835443, "percentage": 8.11, "elapsed_time": "0:56:28", "remaining_time": "10:39:30"}
{"current_steps": 130, "total_steps": 1479, "loss": 0.5793, "lr": 5e-06, "epoch": 0.26329113924050634, "percentage": 8.79, "elapsed_time": "1:01:09", "remaining_time": "10:34:42"}
{"current_steps": 140, "total_steps": 1479, "loss": 0.5684, "lr": 5e-06, "epoch": 0.28354430379746837, "percentage": 9.47, "elapsed_time": "1:05:51", "remaining_time": "10:29:55"}
{"current_steps": 150, "total_steps": 1479, "loss": 0.5792, "lr": 5e-06, "epoch": 0.3037974683544304, "percentage": 10.14, "elapsed_time": "1:10:33", "remaining_time": "10:25:11"}
{"current_steps": 160, "total_steps": 1479, "loss": 0.5782, "lr": 5e-06, "epoch": 0.3240506329113924, "percentage": 10.82, "elapsed_time": "1:15:16", "remaining_time": "10:20:30"}
{"current_steps": 170, "total_steps": 1479, "loss": 0.5764, "lr": 5e-06, "epoch": 0.34430379746835443, "percentage": 11.49, "elapsed_time": "1:19:58", "remaining_time": "10:15:46"}
{"current_steps": 180, "total_steps": 1479, "loss": 0.5716, "lr": 5e-06, "epoch": 0.36455696202531646, "percentage": 12.17, "elapsed_time": "1:24:40", "remaining_time": "10:11:01"}
{"current_steps": 190, "total_steps": 1479, "loss": 0.5719, "lr": 5e-06, "epoch": 0.3848101265822785, "percentage": 12.85, "elapsed_time": "1:29:22", "remaining_time": "10:06:17"}
{"current_steps": 200, "total_steps": 1479, "loss": 0.5679, "lr": 5e-06, "epoch": 0.4050632911392405, "percentage": 13.52, "elapsed_time": "1:34:04", "remaining_time": "10:01:36"}
{"current_steps": 210, "total_steps": 1479, "loss": 0.5636, "lr": 5e-06, "epoch": 0.4253164556962025, "percentage": 14.2, "elapsed_time": "1:38:46", "remaining_time": "9:56:54"}
{"current_steps": 220, "total_steps": 1479, "loss": 0.5684, "lr": 5e-06, "epoch": 0.44556962025316454, "percentage": 14.87, "elapsed_time": "1:43:28", "remaining_time": "9:52:12"}
{"current_steps": 230, "total_steps": 1479, "loss": 0.567, "lr": 5e-06, "epoch": 0.46582278481012657, "percentage": 15.55, "elapsed_time": "1:48:11", "remaining_time": "9:47:30"}
{"current_steps": 240, "total_steps": 1479, "loss": 0.5633, "lr": 5e-06, "epoch": 0.4860759493670886, "percentage": 16.23, "elapsed_time": "1:52:53", "remaining_time": "9:42:48"}
{"current_steps": 250, "total_steps": 1479, "loss": 0.5672, "lr": 5e-06, "epoch": 0.5063291139240507, "percentage": 16.9, "elapsed_time": "1:57:35", "remaining_time": "9:38:06"}
{"current_steps": 260, "total_steps": 1479, "loss": 0.5541, "lr": 5e-06, "epoch": 0.5265822784810127, "percentage": 17.58, "elapsed_time": "2:02:18", "remaining_time": "9:33:24"}
{"current_steps": 270, "total_steps": 1479, "loss": 0.5634, "lr": 5e-06, "epoch": 0.5468354430379747, "percentage": 18.26, "elapsed_time": "2:07:00", "remaining_time": "9:28:42"}
{"current_steps": 280, "total_steps": 1479, "loss": 0.5585, "lr": 5e-06, "epoch": 0.5670886075949367, "percentage": 18.93, "elapsed_time": "2:11:42", "remaining_time": "9:24:00"}
{"current_steps": 290, "total_steps": 1479, "loss": 0.5589, "lr": 5e-06, "epoch": 0.5873417721518988, "percentage": 19.61, "elapsed_time": "2:16:24", "remaining_time": "9:19:16"}
{"current_steps": 300, "total_steps": 1479, "loss": 0.5584, "lr": 5e-06, "epoch": 0.6075949367088608, "percentage": 20.28, "elapsed_time": "2:21:06", "remaining_time": "9:14:33"}
{"current_steps": 310, "total_steps": 1479, "loss": 0.5641, "lr": 5e-06, "epoch": 0.6278481012658228, "percentage": 20.96, "elapsed_time": "2:25:48", "remaining_time": "9:09:51"}
{"current_steps": 320, "total_steps": 1479, "loss": 0.5602, "lr": 5e-06, "epoch": 0.6481012658227848, "percentage": 21.64, "elapsed_time": "2:30:30", "remaining_time": "9:05:09"}
{"current_steps": 330, "total_steps": 1479, "loss": 0.5598, "lr": 5e-06, "epoch": 0.6683544303797468, "percentage": 22.31, "elapsed_time": "2:35:13", "remaining_time": "9:00:26"}
{"current_steps": 340, "total_steps": 1479, "loss": 0.5592, "lr": 5e-06, "epoch": 0.6886075949367089, "percentage": 22.99, "elapsed_time": "2:39:55", "remaining_time": "8:55:44"}
{"current_steps": 350, "total_steps": 1479, "loss": 0.5606, "lr": 5e-06, "epoch": 0.7088607594936709, "percentage": 23.66, "elapsed_time": "2:44:37", "remaining_time": "8:51:01"}
{"current_steps": 360, "total_steps": 1479, "loss": 0.5627, "lr": 5e-06, "epoch": 0.7291139240506329, "percentage": 24.34, "elapsed_time": "2:49:19", "remaining_time": "8:46:19"}
{"current_steps": 370, "total_steps": 1479, "loss": 0.5553, "lr": 5e-06, "epoch": 0.7493670886075949, "percentage": 25.02, "elapsed_time": "2:54:01", "remaining_time": "8:41:37"}
{"current_steps": 380, "total_steps": 1479, "loss": 0.5523, "lr": 5e-06, "epoch": 0.769620253164557, "percentage": 25.69, "elapsed_time": "2:58:43", "remaining_time": "8:36:54"}
{"current_steps": 390, "total_steps": 1479, "loss": 0.5554, "lr": 5e-06, "epoch": 0.789873417721519, "percentage": 26.37, "elapsed_time": "3:03:26", "remaining_time": "8:32:12"}
{"current_steps": 400, "total_steps": 1479, "loss": 0.5554, "lr": 5e-06, "epoch": 0.810126582278481, "percentage": 27.05, "elapsed_time": "3:08:08", "remaining_time": "8:27:30"}
{"current_steps": 410, "total_steps": 1479, "loss": 0.5499, "lr": 5e-06, "epoch": 0.830379746835443, "percentage": 27.72, "elapsed_time": "3:12:50", "remaining_time": "8:22:48"}
{"current_steps": 420, "total_steps": 1479, "loss": 0.5583, "lr": 5e-06, "epoch": 0.850632911392405, "percentage": 28.4, "elapsed_time": "3:17:33", "remaining_time": "8:18:07"}
{"current_steps": 430, "total_steps": 1479, "loss": 0.552, "lr": 5e-06, "epoch": 0.8708860759493671, "percentage": 29.07, "elapsed_time": "3:22:15", "remaining_time": "8:13:25"}
{"current_steps": 440, "total_steps": 1479, "loss": 0.5576, "lr": 5e-06, "epoch": 0.8911392405063291, "percentage": 29.75, "elapsed_time": "3:26:57", "remaining_time": "8:08:42"}
{"current_steps": 450, "total_steps": 1479, "loss": 0.5486, "lr": 5e-06, "epoch": 0.9113924050632911, "percentage": 30.43, "elapsed_time": "3:31:40", "remaining_time": "8:04:01"}
{"current_steps": 460, "total_steps": 1479, "loss": 0.553, "lr": 5e-06, "epoch": 0.9316455696202531, "percentage": 31.1, "elapsed_time": "3:36:22", "remaining_time": "7:59:18"}
{"current_steps": 470, "total_steps": 1479, "loss": 0.5537, "lr": 5e-06, "epoch": 0.9518987341772152, "percentage": 31.78, "elapsed_time": "3:41:04", "remaining_time": "7:54:35"}
{"current_steps": 480, "total_steps": 1479, "loss": 0.5511, "lr": 5e-06, "epoch": 0.9721518987341772, "percentage": 32.45, "elapsed_time": "3:45:46", "remaining_time": "7:49:53"}
{"current_steps": 490, "total_steps": 1479, "loss": 0.5475, "lr": 5e-06, "epoch": 0.9924050632911392, "percentage": 33.13, "elapsed_time": "3:50:28", "remaining_time": "7:45:11"}
{"current_steps": 493, "total_steps": 1479, "eval_loss": 0.13758791983127594, "epoch": 0.9984810126582279, "percentage": 33.33, "elapsed_time": "3:56:21", "remaining_time": "7:52:42"}