llama_8b_lima_8 / trainer_log.jsonl
OpenLeecher's picture
Model save
3e3886a verified
raw
history blame
15.9 kB
{"current_steps": 5, "total_steps": 350, "loss": 0.8722, "learning_rate": 1.6e-06, "epoch": 0.014285714285714285, "percentage": 1.43, "elapsed_time": "0:00:28", "remaining_time": "0:33:06"}
{"current_steps": 10, "total_steps": 350, "loss": 0.7825, "learning_rate": 3.2e-06, "epoch": 0.02857142857142857, "percentage": 2.86, "elapsed_time": "0:00:53", "remaining_time": "0:30:22"}
{"current_steps": 15, "total_steps": 350, "loss": 0.7646, "learning_rate": 4.8e-06, "epoch": 0.04285714285714286, "percentage": 4.29, "elapsed_time": "0:01:10", "remaining_time": "0:26:11"}
{"current_steps": 20, "total_steps": 350, "loss": 0.7226, "learning_rate": 6.4e-06, "epoch": 0.05714285714285714, "percentage": 5.71, "elapsed_time": "0:01:27", "remaining_time": "0:24:11"}
{"current_steps": 25, "total_steps": 350, "loss": 0.6666, "learning_rate": 8e-06, "epoch": 0.07142857142857142, "percentage": 7.14, "elapsed_time": "0:01:50", "remaining_time": "0:23:57"}
{"current_steps": 30, "total_steps": 350, "loss": 0.7263, "learning_rate": 7.771005917159763e-06, "epoch": 0.08571428571428572, "percentage": 8.57, "elapsed_time": "0:02:14", "remaining_time": "0:23:59"}
{"current_steps": 35, "total_steps": 350, "loss": 0.6211, "learning_rate": 7.545562130177514e-06, "epoch": 0.1, "percentage": 10.0, "elapsed_time": "0:02:42", "remaining_time": "0:24:24"}
{"current_steps": 35, "total_steps": 350, "eval_loss": 0.9106850624084473, "epoch": 0.1, "percentage": 10.0, "elapsed_time": "0:02:51", "remaining_time": "0:25:39"}
{"current_steps": 40, "total_steps": 350, "loss": 0.6989, "learning_rate": 7.323668639053254e-06, "epoch": 0.11428571428571428, "percentage": 11.43, "elapsed_time": "0:03:19", "remaining_time": "0:25:44"}
{"current_steps": 45, "total_steps": 350, "loss": 0.704, "learning_rate": 7.105325443786982e-06, "epoch": 0.12857142857142856, "percentage": 12.86, "elapsed_time": "0:03:48", "remaining_time": "0:25:47"}
{"current_steps": 50, "total_steps": 350, "loss": 0.6654, "learning_rate": 6.890532544378699e-06, "epoch": 0.14285714285714285, "percentage": 14.29, "elapsed_time": "0:04:07", "remaining_time": "0:24:46"}
{"current_steps": 55, "total_steps": 350, "loss": 0.7127, "learning_rate": 6.679289940828402e-06, "epoch": 0.15714285714285714, "percentage": 15.71, "elapsed_time": "0:04:22", "remaining_time": "0:23:27"}
{"current_steps": 60, "total_steps": 350, "loss": 0.6253, "learning_rate": 6.471597633136094e-06, "epoch": 0.17142857142857143, "percentage": 17.14, "elapsed_time": "0:04:45", "remaining_time": "0:22:59"}
{"current_steps": 65, "total_steps": 350, "loss": 0.6973, "learning_rate": 6.2674556213017745e-06, "epoch": 0.18571428571428572, "percentage": 18.57, "elapsed_time": "0:05:09", "remaining_time": "0:22:35"}
{"current_steps": 70, "total_steps": 350, "loss": 0.6976, "learning_rate": 6.066863905325444e-06, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:05:36", "remaining_time": "0:22:25"}
{"current_steps": 70, "total_steps": 350, "eval_loss": 0.852588951587677, "epoch": 0.2, "percentage": 20.0, "elapsed_time": "0:05:44", "remaining_time": "0:22:57"}
{"current_steps": 75, "total_steps": 350, "loss": 0.6194, "learning_rate": 5.869822485207101e-06, "epoch": 0.21428571428571427, "percentage": 21.43, "elapsed_time": "0:06:06", "remaining_time": "0:22:23"}
{"current_steps": 80, "total_steps": 350, "loss": 0.6906, "learning_rate": 5.676331360946745e-06, "epoch": 0.22857142857142856, "percentage": 22.86, "elapsed_time": "0:06:32", "remaining_time": "0:22:03"}
{"current_steps": 85, "total_steps": 350, "loss": 0.6401, "learning_rate": 5.486390532544378e-06, "epoch": 0.24285714285714285, "percentage": 24.29, "elapsed_time": "0:06:52", "remaining_time": "0:21:27"}
{"current_steps": 90, "total_steps": 350, "loss": 0.6326, "learning_rate": 5.300000000000002e-06, "epoch": 0.2571428571428571, "percentage": 25.71, "elapsed_time": "0:07:16", "remaining_time": "0:21:00"}
{"current_steps": 95, "total_steps": 350, "loss": 0.6019, "learning_rate": 5.1171597633136094e-06, "epoch": 0.2714285714285714, "percentage": 27.14, "elapsed_time": "0:07:35", "remaining_time": "0:20:22"}
{"current_steps": 100, "total_steps": 350, "loss": 0.6404, "learning_rate": 4.9378698224852065e-06, "epoch": 0.2857142857142857, "percentage": 28.57, "elapsed_time": "0:07:51", "remaining_time": "0:19:39"}
{"current_steps": 105, "total_steps": 350, "loss": 0.5762, "learning_rate": 4.762130177514793e-06, "epoch": 0.3, "percentage": 30.0, "elapsed_time": "0:08:08", "remaining_time": "0:18:59"}
{"current_steps": 105, "total_steps": 350, "eval_loss": 0.7999083399772644, "epoch": 0.3, "percentage": 30.0, "elapsed_time": "0:08:16", "remaining_time": "0:19:18"}
{"current_steps": 110, "total_steps": 350, "loss": 0.7544, "learning_rate": 4.589940828402367e-06, "epoch": 0.3142857142857143, "percentage": 31.43, "elapsed_time": "0:08:45", "remaining_time": "0:19:07"}
{"current_steps": 115, "total_steps": 350, "loss": 0.6494, "learning_rate": 4.421301775147928e-06, "epoch": 0.32857142857142857, "percentage": 32.86, "elapsed_time": "0:09:12", "remaining_time": "0:18:48"}
{"current_steps": 120, "total_steps": 350, "loss": 0.6542, "learning_rate": 4.2562130177514784e-06, "epoch": 0.34285714285714286, "percentage": 34.29, "elapsed_time": "0:09:39", "remaining_time": "0:18:29"}
{"current_steps": 125, "total_steps": 350, "loss": 0.6631, "learning_rate": 4.094674556213017e-06, "epoch": 0.35714285714285715, "percentage": 35.71, "elapsed_time": "0:10:01", "remaining_time": "0:18:02"}
{"current_steps": 130, "total_steps": 350, "loss": 0.6086, "learning_rate": 3.936686390532545e-06, "epoch": 0.37142857142857144, "percentage": 37.14, "elapsed_time": "0:10:25", "remaining_time": "0:17:38"}
{"current_steps": 135, "total_steps": 350, "loss": 0.5935, "learning_rate": 3.7822485207100586e-06, "epoch": 0.38571428571428573, "percentage": 38.57, "elapsed_time": "0:10:44", "remaining_time": "0:17:07"}
{"current_steps": 140, "total_steps": 350, "loss": 0.5226, "learning_rate": 3.631360946745561e-06, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:11:04", "remaining_time": "0:16:36"}
{"current_steps": 140, "total_steps": 350, "eval_loss": 0.7717307209968567, "epoch": 0.4, "percentage": 40.0, "elapsed_time": "0:11:12", "remaining_time": "0:16:48"}
{"current_steps": 145, "total_steps": 350, "loss": 0.5967, "learning_rate": 3.484023668639053e-06, "epoch": 0.4142857142857143, "percentage": 41.43, "elapsed_time": "0:11:33", "remaining_time": "0:16:20"}
{"current_steps": 150, "total_steps": 350, "loss": 0.6379, "learning_rate": 3.3402366863905327e-06, "epoch": 0.42857142857142855, "percentage": 42.86, "elapsed_time": "0:11:52", "remaining_time": "0:15:50"}
{"current_steps": 155, "total_steps": 350, "loss": 0.69, "learning_rate": 3.1999999999999994e-06, "epoch": 0.44285714285714284, "percentage": 44.29, "elapsed_time": "0:12:18", "remaining_time": "0:15:28"}
{"current_steps": 160, "total_steps": 350, "loss": 0.6177, "learning_rate": 3.0633136094674547e-06, "epoch": 0.45714285714285713, "percentage": 45.71, "elapsed_time": "0:12:35", "remaining_time": "0:14:57"}
{"current_steps": 165, "total_steps": 350, "loss": 0.6451, "learning_rate": 2.930177514792899e-06, "epoch": 0.4714285714285714, "percentage": 47.14, "elapsed_time": "0:12:55", "remaining_time": "0:14:29"}
{"current_steps": 170, "total_steps": 350, "loss": 0.5706, "learning_rate": 2.8005917159763313e-06, "epoch": 0.4857142857142857, "percentage": 48.57, "elapsed_time": "0:13:23", "remaining_time": "0:14:10"}
{"current_steps": 175, "total_steps": 350, "loss": 0.5866, "learning_rate": 2.674556213017751e-06, "epoch": 0.5, "percentage": 50.0, "elapsed_time": "0:13:39", "remaining_time": "0:13:39"}
{"current_steps": 175, "total_steps": 350, "eval_loss": 0.7432886958122253, "epoch": 0.5, "percentage": 50.0, "elapsed_time": "0:13:47", "remaining_time": "0:13:47"}
{"current_steps": 180, "total_steps": 350, "loss": 0.6236, "learning_rate": 2.5520710059171586e-06, "epoch": 0.5142857142857142, "percentage": 51.43, "elapsed_time": "0:14:04", "remaining_time": "0:13:17"}
{"current_steps": 185, "total_steps": 350, "loss": 0.614, "learning_rate": 2.4331360946745558e-06, "epoch": 0.5285714285714286, "percentage": 52.86, "elapsed_time": "0:14:24", "remaining_time": "0:12:50"}
{"current_steps": 190, "total_steps": 350, "loss": 0.5696, "learning_rate": 2.317751479289941e-06, "epoch": 0.5428571428571428, "percentage": 54.29, "elapsed_time": "0:14:39", "remaining_time": "0:12:20"}
{"current_steps": 195, "total_steps": 350, "loss": 0.6017, "learning_rate": 2.2059171597633135e-06, "epoch": 0.5571428571428572, "percentage": 55.71, "elapsed_time": "0:15:00", "remaining_time": "0:11:55"}
{"current_steps": 200, "total_steps": 350, "loss": 0.5215, "learning_rate": 2.0976331360946745e-06, "epoch": 0.5714285714285714, "percentage": 57.14, "elapsed_time": "0:15:17", "remaining_time": "0:11:28"}
{"current_steps": 205, "total_steps": 350, "loss": 0.6335, "learning_rate": 1.9928994082840233e-06, "epoch": 0.5857142857142857, "percentage": 58.57, "elapsed_time": "0:15:41", "remaining_time": "0:11:05"}
{"current_steps": 210, "total_steps": 350, "loss": 0.5999, "learning_rate": 1.891715976331361e-06, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:15:56", "remaining_time": "0:10:37"}
{"current_steps": 210, "total_steps": 350, "eval_loss": 0.7398412227630615, "epoch": 0.6, "percentage": 60.0, "elapsed_time": "0:16:04", "remaining_time": "0:10:43"}
{"current_steps": 215, "total_steps": 350, "loss": 0.6837, "learning_rate": 1.7940828402366863e-06, "epoch": 0.6142857142857143, "percentage": 61.43, "elapsed_time": "0:16:25", "remaining_time": "0:10:18"}
{"current_steps": 220, "total_steps": 350, "loss": 0.6247, "learning_rate": 1.7000000000000002e-06, "epoch": 0.6285714285714286, "percentage": 62.86, "elapsed_time": "0:16:52", "remaining_time": "0:09:58"}
{"current_steps": 225, "total_steps": 350, "loss": 0.5437, "learning_rate": 1.6094674556213014e-06, "epoch": 0.6428571428571429, "percentage": 64.29, "elapsed_time": "0:17:05", "remaining_time": "0:09:29"}
{"current_steps": 230, "total_steps": 350, "loss": 0.6034, "learning_rate": 1.5224852071005916e-06, "epoch": 0.6571428571428571, "percentage": 65.71, "elapsed_time": "0:17:33", "remaining_time": "0:09:09"}
{"current_steps": 235, "total_steps": 350, "loss": 0.5687, "learning_rate": 1.4390532544378696e-06, "epoch": 0.6714285714285714, "percentage": 67.14, "elapsed_time": "0:17:58", "remaining_time": "0:08:47"}
{"current_steps": 240, "total_steps": 350, "loss": 0.6636, "learning_rate": 1.3591715976331362e-06, "epoch": 0.6857142857142857, "percentage": 68.57, "elapsed_time": "0:18:32", "remaining_time": "0:08:29"}
{"current_steps": 245, "total_steps": 350, "loss": 0.6464, "learning_rate": 1.2828402366863903e-06, "epoch": 0.7, "percentage": 70.0, "elapsed_time": "0:18:56", "remaining_time": "0:08:07"}
{"current_steps": 245, "total_steps": 350, "eval_loss": 0.7203609943389893, "epoch": 0.7, "percentage": 70.0, "elapsed_time": "0:19:05", "remaining_time": "0:08:10"}
{"current_steps": 250, "total_steps": 350, "loss": 0.6081, "learning_rate": 1.2100591715976333e-06, "epoch": 0.7142857142857143, "percentage": 71.43, "elapsed_time": "0:19:36", "remaining_time": "0:07:50"}
{"current_steps": 255, "total_steps": 350, "loss": 0.5808, "learning_rate": 1.1408284023668636e-06, "epoch": 0.7285714285714285, "percentage": 72.86, "elapsed_time": "0:20:03", "remaining_time": "0:07:28"}
{"current_steps": 260, "total_steps": 350, "loss": 0.6201, "learning_rate": 1.0751479289940828e-06, "epoch": 0.7428571428571429, "percentage": 74.29, "elapsed_time": "0:20:21", "remaining_time": "0:07:02"}
{"current_steps": 265, "total_steps": 350, "loss": 0.5034, "learning_rate": 1.0130177514792898e-06, "epoch": 0.7571428571428571, "percentage": 75.71, "elapsed_time": "0:20:41", "remaining_time": "0:06:38"}
{"current_steps": 270, "total_steps": 350, "loss": 0.5932, "learning_rate": 9.544378698224853e-07, "epoch": 0.7714285714285715, "percentage": 77.14, "elapsed_time": "0:20:59", "remaining_time": "0:06:13"}
{"current_steps": 275, "total_steps": 350, "loss": 0.5742, "learning_rate": 8.994082840236684e-07, "epoch": 0.7857142857142857, "percentage": 78.57, "elapsed_time": "0:21:22", "remaining_time": "0:05:49"}
{"current_steps": 280, "total_steps": 350, "loss": 0.552, "learning_rate": 8.479289940828401e-07, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:21:42", "remaining_time": "0:05:25"}
{"current_steps": 280, "total_steps": 350, "eval_loss": 0.7106707692146301, "epoch": 0.8, "percentage": 80.0, "elapsed_time": "0:21:50", "remaining_time": "0:05:27"}
{"current_steps": 285, "total_steps": 350, "loss": 0.6199, "learning_rate": 7.999999999999998e-07, "epoch": 0.8142857142857143, "percentage": 81.43, "elapsed_time": "0:22:10", "remaining_time": "0:05:03"}
{"current_steps": 290, "total_steps": 350, "loss": 0.5638, "learning_rate": 7.556213017751479e-07, "epoch": 0.8285714285714286, "percentage": 82.86, "elapsed_time": "0:22:29", "remaining_time": "0:04:39"}
{"current_steps": 295, "total_steps": 350, "loss": 0.5326, "learning_rate": 7.147928994082838e-07, "epoch": 0.8428571428571429, "percentage": 84.29, "elapsed_time": "0:22:49", "remaining_time": "0:04:15"}
{"current_steps": 300, "total_steps": 350, "loss": 0.6127, "learning_rate": 6.775147928994083e-07, "epoch": 0.8571428571428571, "percentage": 85.71, "elapsed_time": "0:23:09", "remaining_time": "0:03:51"}
{"current_steps": 305, "total_steps": 350, "loss": 0.5634, "learning_rate": 6.437869822485206e-07, "epoch": 0.8714285714285714, "percentage": 87.14, "elapsed_time": "0:23:32", "remaining_time": "0:03:28"}
{"current_steps": 310, "total_steps": 350, "loss": 0.5592, "learning_rate": 6.136094674556213e-07, "epoch": 0.8857142857142857, "percentage": 88.57, "elapsed_time": "0:24:04", "remaining_time": "0:03:06"}
{"current_steps": 315, "total_steps": 350, "loss": 0.5657, "learning_rate": 5.869822485207099e-07, "epoch": 0.9, "percentage": 90.0, "elapsed_time": "0:24:33", "remaining_time": "0:02:43"}
{"current_steps": 315, "total_steps": 350, "eval_loss": 0.7130246758460999, "epoch": 0.9, "percentage": 90.0, "elapsed_time": "0:24:41", "remaining_time": "0:02:44"}
{"current_steps": 320, "total_steps": 350, "loss": 0.6271, "learning_rate": 5.63905325443787e-07, "epoch": 0.9142857142857143, "percentage": 91.43, "elapsed_time": "0:25:03", "remaining_time": "0:02:20"}
{"current_steps": 325, "total_steps": 350, "loss": 0.5349, "learning_rate": 5.443786982248519e-07, "epoch": 0.9285714285714286, "percentage": 92.86, "elapsed_time": "0:25:17", "remaining_time": "0:01:56"}
{"current_steps": 330, "total_steps": 350, "loss": 0.7169, "learning_rate": 5.284023668639053e-07, "epoch": 0.9428571428571428, "percentage": 94.29, "elapsed_time": "0:25:50", "remaining_time": "0:01:33"}
{"current_steps": 335, "total_steps": 350, "loss": 0.606, "learning_rate": 5.159763313609466e-07, "epoch": 0.9571428571428572, "percentage": 95.71, "elapsed_time": "0:26:15", "remaining_time": "0:01:10"}
{"current_steps": 340, "total_steps": 350, "loss": 0.5669, "learning_rate": 5.071005917159763e-07, "epoch": 0.9714285714285714, "percentage": 97.14, "elapsed_time": "0:26:45", "remaining_time": "0:00:47"}
{"current_steps": 345, "total_steps": 350, "loss": 0.5541, "learning_rate": 5.01775147928994e-07, "epoch": 0.9857142857142858, "percentage": 98.57, "elapsed_time": "0:27:02", "remaining_time": "0:00:23"}
{"current_steps": 350, "total_steps": 350, "loss": 0.5687, "learning_rate": 5e-07, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:27:25", "remaining_time": "0:00:00"}
{"current_steps": 350, "total_steps": 350, "eval_loss": 0.7043666839599609, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:27:33", "remaining_time": "0:00:00"}
{"current_steps": 350, "total_steps": 350, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:29:05", "remaining_time": "0:00:00"}