OH_original_wo_gpt4_llm / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 2
c5bb390 verified
{"current_steps": 10, "total_steps": 996, "loss": 0.801, "learning_rate": 5e-06, "epoch": 0.03007518796992481, "percentage": 1.0, "elapsed_time": "0:02:57", "remaining_time": "4:51:57"}
{"current_steps": 20, "total_steps": 996, "loss": 0.7172, "learning_rate": 5e-06, "epoch": 0.06015037593984962, "percentage": 2.01, "elapsed_time": "0:05:49", "remaining_time": "4:44:17"}
{"current_steps": 30, "total_steps": 996, "loss": 0.6923, "learning_rate": 5e-06, "epoch": 0.09022556390977443, "percentage": 3.01, "elapsed_time": "0:08:39", "remaining_time": "4:38:55"}
{"current_steps": 40, "total_steps": 996, "loss": 0.6834, "learning_rate": 5e-06, "epoch": 0.12030075187969924, "percentage": 4.02, "elapsed_time": "0:11:31", "remaining_time": "4:35:28"}
{"current_steps": 50, "total_steps": 996, "loss": 0.6655, "learning_rate": 5e-06, "epoch": 0.15037593984962405, "percentage": 5.02, "elapsed_time": "0:14:22", "remaining_time": "4:31:49"}
{"current_steps": 60, "total_steps": 996, "loss": 0.6568, "learning_rate": 5e-06, "epoch": 0.18045112781954886, "percentage": 6.02, "elapsed_time": "0:17:11", "remaining_time": "4:28:06"}
{"current_steps": 70, "total_steps": 996, "loss": 0.6548, "learning_rate": 5e-06, "epoch": 0.21052631578947367, "percentage": 7.03, "elapsed_time": "0:20:01", "remaining_time": "4:24:50"}
{"current_steps": 80, "total_steps": 996, "loss": 0.6454, "learning_rate": 5e-06, "epoch": 0.24060150375939848, "percentage": 8.03, "elapsed_time": "0:22:51", "remaining_time": "4:21:43"}
{"current_steps": 90, "total_steps": 996, "loss": 0.6331, "learning_rate": 5e-06, "epoch": 0.2706766917293233, "percentage": 9.04, "elapsed_time": "0:25:43", "remaining_time": "4:18:57"}
{"current_steps": 100, "total_steps": 996, "loss": 0.6267, "learning_rate": 5e-06, "epoch": 0.3007518796992481, "percentage": 10.04, "elapsed_time": "0:28:35", "remaining_time": "4:16:06"}
{"current_steps": 110, "total_steps": 996, "loss": 0.6305, "learning_rate": 5e-06, "epoch": 0.3308270676691729, "percentage": 11.04, "elapsed_time": "0:31:24", "remaining_time": "4:13:01"}
{"current_steps": 120, "total_steps": 996, "loss": 0.6246, "learning_rate": 5e-06, "epoch": 0.3609022556390977, "percentage": 12.05, "elapsed_time": "0:34:14", "remaining_time": "4:09:58"}
{"current_steps": 130, "total_steps": 996, "loss": 0.6305, "learning_rate": 5e-06, "epoch": 0.39097744360902253, "percentage": 13.05, "elapsed_time": "0:37:04", "remaining_time": "4:06:57"}
{"current_steps": 140, "total_steps": 996, "loss": 0.6291, "learning_rate": 5e-06, "epoch": 0.42105263157894735, "percentage": 14.06, "elapsed_time": "0:39:54", "remaining_time": "4:03:57"}
{"current_steps": 150, "total_steps": 996, "loss": 0.6279, "learning_rate": 5e-06, "epoch": 0.45112781954887216, "percentage": 15.06, "elapsed_time": "0:42:44", "remaining_time": "4:01:01"}
{"current_steps": 160, "total_steps": 996, "loss": 0.6167, "learning_rate": 5e-06, "epoch": 0.48120300751879697, "percentage": 16.06, "elapsed_time": "0:45:34", "remaining_time": "3:58:05"}
{"current_steps": 170, "total_steps": 996, "loss": 0.6189, "learning_rate": 5e-06, "epoch": 0.5112781954887218, "percentage": 17.07, "elapsed_time": "0:48:23", "remaining_time": "3:55:05"}
{"current_steps": 180, "total_steps": 996, "loss": 0.6096, "learning_rate": 5e-06, "epoch": 0.5413533834586466, "percentage": 18.07, "elapsed_time": "0:51:11", "remaining_time": "3:52:04"}
{"current_steps": 190, "total_steps": 996, "loss": 0.6207, "learning_rate": 5e-06, "epoch": 0.5714285714285714, "percentage": 19.08, "elapsed_time": "0:54:00", "remaining_time": "3:49:06"}
{"current_steps": 200, "total_steps": 996, "loss": 0.6143, "learning_rate": 5e-06, "epoch": 0.6015037593984962, "percentage": 20.08, "elapsed_time": "0:56:50", "remaining_time": "3:46:12"}
{"current_steps": 210, "total_steps": 996, "loss": 0.619, "learning_rate": 5e-06, "epoch": 0.631578947368421, "percentage": 21.08, "elapsed_time": "0:59:40", "remaining_time": "3:43:20"}
{"current_steps": 220, "total_steps": 996, "loss": 0.6229, "learning_rate": 5e-06, "epoch": 0.6616541353383458, "percentage": 22.09, "elapsed_time": "1:02:29", "remaining_time": "3:40:26"}
{"current_steps": 230, "total_steps": 996, "loss": 0.6141, "learning_rate": 5e-06, "epoch": 0.6917293233082706, "percentage": 23.09, "elapsed_time": "1:05:19", "remaining_time": "3:37:32"}
{"current_steps": 240, "total_steps": 996, "loss": 0.6148, "learning_rate": 5e-06, "epoch": 0.7218045112781954, "percentage": 24.1, "elapsed_time": "1:08:08", "remaining_time": "3:34:39"}
{"current_steps": 250, "total_steps": 996, "loss": 0.6112, "learning_rate": 5e-06, "epoch": 0.7518796992481203, "percentage": 25.1, "elapsed_time": "1:10:56", "remaining_time": "3:31:40"}
{"current_steps": 260, "total_steps": 996, "loss": 0.6124, "learning_rate": 5e-06, "epoch": 0.7819548872180451, "percentage": 26.1, "elapsed_time": "1:13:44", "remaining_time": "3:28:44"}
{"current_steps": 270, "total_steps": 996, "loss": 0.6101, "learning_rate": 5e-06, "epoch": 0.8120300751879699, "percentage": 27.11, "elapsed_time": "1:16:33", "remaining_time": "3:25:50"}
{"current_steps": 280, "total_steps": 996, "loss": 0.6101, "learning_rate": 5e-06, "epoch": 0.8421052631578947, "percentage": 28.11, "elapsed_time": "1:19:20", "remaining_time": "3:22:54"}
{"current_steps": 290, "total_steps": 996, "loss": 0.6074, "learning_rate": 5e-06, "epoch": 0.8721804511278195, "percentage": 29.12, "elapsed_time": "1:22:10", "remaining_time": "3:20:02"}
{"current_steps": 300, "total_steps": 996, "loss": 0.6035, "learning_rate": 5e-06, "epoch": 0.9022556390977443, "percentage": 30.12, "elapsed_time": "1:24:59", "remaining_time": "3:17:10"}
{"current_steps": 310, "total_steps": 996, "loss": 0.6037, "learning_rate": 5e-06, "epoch": 0.9323308270676691, "percentage": 31.12, "elapsed_time": "1:27:48", "remaining_time": "3:14:18"}
{"current_steps": 320, "total_steps": 996, "loss": 0.6128, "learning_rate": 5e-06, "epoch": 0.9624060150375939, "percentage": 32.13, "elapsed_time": "1:30:37", "remaining_time": "3:11:26"}
{"current_steps": 330, "total_steps": 996, "loss": 0.6043, "learning_rate": 5e-06, "epoch": 0.9924812030075187, "percentage": 33.13, "elapsed_time": "1:33:27", "remaining_time": "3:08:36"}
{"current_steps": 332, "total_steps": 996, "eval_loss": 0.6096732020378113, "epoch": 0.9984962406015038, "percentage": 33.33, "elapsed_time": "1:36:05", "remaining_time": "3:12:11"}
{"current_steps": 340, "total_steps": 996, "loss": 0.58, "learning_rate": 5e-06, "epoch": 1.0225563909774436, "percentage": 34.14, "elapsed_time": "1:39:11", "remaining_time": "3:11:23"}
{"current_steps": 350, "total_steps": 996, "loss": 0.5596, "learning_rate": 5e-06, "epoch": 1.0526315789473684, "percentage": 35.14, "elapsed_time": "1:42:00", "remaining_time": "3:08:17"}
{"current_steps": 360, "total_steps": 996, "loss": 0.5591, "learning_rate": 5e-06, "epoch": 1.0827067669172932, "percentage": 36.14, "elapsed_time": "1:44:49", "remaining_time": "3:05:11"}
{"current_steps": 370, "total_steps": 996, "loss": 0.5553, "learning_rate": 5e-06, "epoch": 1.112781954887218, "percentage": 37.15, "elapsed_time": "1:47:38", "remaining_time": "3:02:07"}
{"current_steps": 380, "total_steps": 996, "loss": 0.5564, "learning_rate": 5e-06, "epoch": 1.1428571428571428, "percentage": 38.15, "elapsed_time": "1:50:27", "remaining_time": "2:59:02"}
{"current_steps": 390, "total_steps": 996, "loss": 0.5605, "learning_rate": 5e-06, "epoch": 1.1729323308270676, "percentage": 39.16, "elapsed_time": "1:53:15", "remaining_time": "2:55:59"}
{"current_steps": 400, "total_steps": 996, "loss": 0.5519, "learning_rate": 5e-06, "epoch": 1.2030075187969924, "percentage": 40.16, "elapsed_time": "1:56:04", "remaining_time": "2:52:57"}
{"current_steps": 410, "total_steps": 996, "loss": 0.5574, "learning_rate": 5e-06, "epoch": 1.2330827067669172, "percentage": 41.16, "elapsed_time": "1:58:53", "remaining_time": "2:49:55"}
{"current_steps": 420, "total_steps": 996, "loss": 0.5541, "learning_rate": 5e-06, "epoch": 1.263157894736842, "percentage": 42.17, "elapsed_time": "2:01:42", "remaining_time": "2:46:54"}
{"current_steps": 430, "total_steps": 996, "loss": 0.5582, "learning_rate": 5e-06, "epoch": 1.2932330827067668, "percentage": 43.17, "elapsed_time": "2:04:30", "remaining_time": "2:43:53"}
{"current_steps": 440, "total_steps": 996, "loss": 0.5532, "learning_rate": 5e-06, "epoch": 1.3233082706766917, "percentage": 44.18, "elapsed_time": "2:07:18", "remaining_time": "2:40:52"}
{"current_steps": 450, "total_steps": 996, "loss": 0.5571, "learning_rate": 5e-06, "epoch": 1.3533834586466165, "percentage": 45.18, "elapsed_time": "2:10:05", "remaining_time": "2:37:50"}
{"current_steps": 460, "total_steps": 996, "loss": 0.5487, "learning_rate": 5e-06, "epoch": 1.3834586466165413, "percentage": 46.18, "elapsed_time": "2:12:52", "remaining_time": "2:34:49"}
{"current_steps": 470, "total_steps": 996, "loss": 0.5586, "learning_rate": 5e-06, "epoch": 1.413533834586466, "percentage": 47.19, "elapsed_time": "2:15:40", "remaining_time": "2:31:50"}
{"current_steps": 480, "total_steps": 996, "loss": 0.5528, "learning_rate": 5e-06, "epoch": 1.443609022556391, "percentage": 48.19, "elapsed_time": "2:18:28", "remaining_time": "2:28:51"}
{"current_steps": 490, "total_steps": 996, "loss": 0.5624, "learning_rate": 5e-06, "epoch": 1.4736842105263157, "percentage": 49.2, "elapsed_time": "2:21:15", "remaining_time": "2:25:52"}
{"current_steps": 500, "total_steps": 996, "loss": 0.5565, "learning_rate": 5e-06, "epoch": 1.5037593984962405, "percentage": 50.2, "elapsed_time": "2:24:03", "remaining_time": "2:22:54"}
{"current_steps": 510, "total_steps": 996, "loss": 0.5587, "learning_rate": 5e-06, "epoch": 1.5338345864661656, "percentage": 51.2, "elapsed_time": "2:26:50", "remaining_time": "2:19:55"}
{"current_steps": 520, "total_steps": 996, "loss": 0.5493, "learning_rate": 5e-06, "epoch": 1.5639097744360901, "percentage": 52.21, "elapsed_time": "2:29:38", "remaining_time": "2:16:59"}
{"current_steps": 530, "total_steps": 996, "loss": 0.5603, "learning_rate": 5e-06, "epoch": 1.5939849624060152, "percentage": 53.21, "elapsed_time": "2:32:27", "remaining_time": "2:14:02"}
{"current_steps": 540, "total_steps": 996, "loss": 0.5564, "learning_rate": 5e-06, "epoch": 1.6240601503759398, "percentage": 54.22, "elapsed_time": "2:35:15", "remaining_time": "2:11:06"}
{"current_steps": 550, "total_steps": 996, "loss": 0.5605, "learning_rate": 5e-06, "epoch": 1.6541353383458648, "percentage": 55.22, "elapsed_time": "2:38:04", "remaining_time": "2:08:10"}
{"current_steps": 560, "total_steps": 996, "loss": 0.5601, "learning_rate": 5e-06, "epoch": 1.6842105263157894, "percentage": 56.22, "elapsed_time": "2:40:52", "remaining_time": "2:05:15"}
{"current_steps": 570, "total_steps": 996, "loss": 0.5554, "learning_rate": 5e-06, "epoch": 1.7142857142857144, "percentage": 57.23, "elapsed_time": "2:43:41", "remaining_time": "2:02:20"}
{"current_steps": 580, "total_steps": 996, "loss": 0.5645, "learning_rate": 5e-06, "epoch": 1.744360902255639, "percentage": 58.23, "elapsed_time": "2:46:28", "remaining_time": "1:59:24"}
{"current_steps": 590, "total_steps": 996, "loss": 0.5536, "learning_rate": 5e-06, "epoch": 1.774436090225564, "percentage": 59.24, "elapsed_time": "2:49:16", "remaining_time": "1:56:29"}
{"current_steps": 600, "total_steps": 996, "loss": 0.5625, "learning_rate": 5e-06, "epoch": 1.8045112781954886, "percentage": 60.24, "elapsed_time": "2:52:04", "remaining_time": "1:53:34"}
{"current_steps": 610, "total_steps": 996, "loss": 0.554, "learning_rate": 5e-06, "epoch": 1.8345864661654137, "percentage": 61.24, "elapsed_time": "2:54:53", "remaining_time": "1:50:40"}
{"current_steps": 620, "total_steps": 996, "loss": 0.5529, "learning_rate": 5e-06, "epoch": 1.8646616541353382, "percentage": 62.25, "elapsed_time": "2:57:42", "remaining_time": "1:47:46"}
{"current_steps": 630, "total_steps": 996, "loss": 0.547, "learning_rate": 5e-06, "epoch": 1.8947368421052633, "percentage": 63.25, "elapsed_time": "3:00:31", "remaining_time": "1:44:52"}
{"current_steps": 640, "total_steps": 996, "loss": 0.5508, "learning_rate": 5e-06, "epoch": 1.9248120300751879, "percentage": 64.26, "elapsed_time": "3:03:18", "remaining_time": "1:41:58"}
{"current_steps": 650, "total_steps": 996, "loss": 0.5626, "learning_rate": 5e-06, "epoch": 1.954887218045113, "percentage": 65.26, "elapsed_time": "3:06:05", "remaining_time": "1:39:03"}
{"current_steps": 660, "total_steps": 996, "loss": 0.5579, "learning_rate": 5e-06, "epoch": 1.9849624060150375, "percentage": 66.27, "elapsed_time": "3:08:52", "remaining_time": "1:36:09"}
{"current_steps": 665, "total_steps": 996, "eval_loss": 0.6012639403343201, "epoch": 2.0, "percentage": 66.77, "elapsed_time": "3:12:11", "remaining_time": "1:35:39"}
{"current_steps": 670, "total_steps": 996, "loss": 0.5349, "learning_rate": 5e-06, "epoch": 2.0150375939849625, "percentage": 67.27, "elapsed_time": "3:14:41", "remaining_time": "1:34:43"}