gridoneai commited on
Commit
d2a6f37
1 Parent(s): 76f5da0

Upload folder using huggingface_hub

Browse files
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "total_flos": 5.534889401217761e+17,
4
- "train_loss": 0.022140414769101203,
5
- "train_runtime": 22088.826,
6
- "train_samples_per_second": 0.791,
7
- "train_steps_per_second": 0.049
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "total_flos": 5.535926593026785e+17,
4
+ "train_loss": 0.10646422604585472,
5
+ "train_runtime": 4310.9661,
6
+ "train_samples_per_second": 4.052,
7
+ "train_steps_per_second": 0.063
8
  }
trainer_log.jsonl CHANGED
@@ -1,113 +1,31 @@
1
- {"current_steps": 10, "total_steps": 1092, "loss": 1.062, "learning_rate": 9.998360806194573e-05, "epoch": 0.027472527472527472, "percentage": 0.92, "elapsed_time": "0:03:06", "remaining_time": "5:36:06"}
2
- {"current_steps": 20, "total_steps": 1092, "loss": 0.3685, "learning_rate": 9.992609214317185e-05, "epoch": 0.054945054945054944, "percentage": 1.83, "elapsed_time": "0:05:54", "remaining_time": "5:16:22"}
3
- {"current_steps": 30, "total_steps": 1092, "loss": 0.1275, "learning_rate": 9.982724943876865e-05, "epoch": 0.08241758241758242, "percentage": 2.75, "elapsed_time": "0:08:42", "remaining_time": "5:08:22"}
4
- {"current_steps": 40, "total_steps": 1092, "loss": 0.0686, "learning_rate": 9.968716176670104e-05, "epoch": 0.10989010989010989, "percentage": 3.66, "elapsed_time": "0:11:31", "remaining_time": "5:02:56"}
5
- {"current_steps": 50, "total_steps": 1092, "loss": 0.0578, "learning_rate": 9.95059450858383e-05, "epoch": 0.13736263736263737, "percentage": 4.58, "elapsed_time": "0:14:19", "remaining_time": "4:58:39"}
6
- {"current_steps": 60, "total_steps": 1092, "loss": 0.0574, "learning_rate": 9.92837493999681e-05, "epoch": 0.16483516483516483, "percentage": 5.49, "elapsed_time": "0:17:09", "remaining_time": "4:55:03"}
7
- {"current_steps": 70, "total_steps": 1092, "loss": 0.057, "learning_rate": 9.902075863362932e-05, "epoch": 0.19230769230769232, "percentage": 6.41, "elapsed_time": "0:19:58", "remaining_time": "4:51:31"}
8
- {"current_steps": 80, "total_steps": 1092, "loss": 0.0368, "learning_rate": 9.871719047986694e-05, "epoch": 0.21978021978021978, "percentage": 7.33, "elapsed_time": "0:22:48", "remaining_time": "4:48:28"}
9
- {"current_steps": 90, "total_steps": 1092, "loss": 0.0381, "learning_rate": 9.837329622003461e-05, "epoch": 0.24725274725274726, "percentage": 8.24, "elapsed_time": "0:25:37", "remaining_time": "4:45:17"}
10
- {"current_steps": 100, "total_steps": 1092, "loss": 0.0373, "learning_rate": 9.798936051579408e-05, "epoch": 0.27472527472527475, "percentage": 9.16, "elapsed_time": "0:28:26", "remaining_time": "4:42:03"}
11
- {"current_steps": 110, "total_steps": 1092, "loss": 0.0277, "learning_rate": 9.756570117348396e-05, "epoch": 0.3021978021978022, "percentage": 10.07, "elapsed_time": "0:31:14", "remaining_time": "4:38:55"}
12
- {"current_steps": 120, "total_steps": 1092, "loss": 0.0197, "learning_rate": 9.710266888105252e-05, "epoch": 0.32967032967032966, "percentage": 10.99, "elapsed_time": "0:34:03", "remaining_time": "4:35:51"}
13
- {"current_steps": 130, "total_steps": 1092, "loss": 0.032, "learning_rate": 9.660064691777256e-05, "epoch": 0.35714285714285715, "percentage": 11.9, "elapsed_time": "0:36:52", "remaining_time": "4:32:53"}
14
- {"current_steps": 140, "total_steps": 1092, "loss": 0.038, "learning_rate": 9.606005083697832e-05, "epoch": 0.38461538461538464, "percentage": 12.82, "elapsed_time": "0:39:41", "remaining_time": "4:29:53"}
15
- {"current_steps": 150, "total_steps": 1092, "loss": 0.0341, "learning_rate": 9.548132812208746e-05, "epoch": 0.41208791208791207, "percentage": 13.74, "elapsed_time": "0:42:30", "remaining_time": "4:26:55"}
16
- {"current_steps": 160, "total_steps": 1092, "loss": 0.0267, "learning_rate": 9.486495781619253e-05, "epoch": 0.43956043956043955, "percentage": 14.65, "elapsed_time": "0:45:19", "remaining_time": "4:23:59"}
17
- {"current_steps": 170, "total_steps": 1092, "loss": 0.0219, "learning_rate": 9.421145012552851e-05, "epoch": 0.46703296703296704, "percentage": 15.57, "elapsed_time": "0:48:07", "remaining_time": "4:21:02"}
18
- {"current_steps": 180, "total_steps": 1092, "loss": 0.0157, "learning_rate": 9.35213459971449e-05, "epoch": 0.4945054945054945, "percentage": 16.48, "elapsed_time": "0:50:56", "remaining_time": "4:18:05"}
19
- {"current_steps": 190, "total_steps": 1092, "loss": 0.019, "learning_rate": 9.279521667113185e-05, "epoch": 0.521978021978022, "percentage": 17.4, "elapsed_time": "0:53:45", "remaining_time": "4:15:11"}
20
- {"current_steps": 200, "total_steps": 1092, "loss": 0.0205, "learning_rate": 9.203366320777081e-05, "epoch": 0.5494505494505495, "percentage": 18.32, "elapsed_time": "0:56:33", "remaining_time": "4:12:17"}
21
- {"current_steps": 210, "total_steps": 1092, "loss": 0.0082, "learning_rate": 9.123731599000133e-05, "epoch": 0.5769230769230769, "percentage": 19.23, "elapsed_time": "0:59:22", "remaining_time": "4:09:22"}
22
- {"current_steps": 220, "total_steps": 1092, "loss": 0.0135, "learning_rate": 9.040683420161573e-05, "epoch": 0.6043956043956044, "percentage": 20.15, "elapsed_time": "1:02:11", "remaining_time": "4:06:29"}
23
- {"current_steps": 230, "total_steps": 1092, "loss": 0.0215, "learning_rate": 8.954290528161363e-05, "epoch": 0.6318681318681318, "percentage": 21.06, "elapsed_time": "1:04:59", "remaining_time": "4:03:35"}
24
- {"current_steps": 240, "total_steps": 1092, "loss": 0.0128, "learning_rate": 8.864624435516794e-05, "epoch": 0.6593406593406593, "percentage": 21.98, "elapsed_time": "1:07:48", "remaining_time": "4:00:44"}
25
- {"current_steps": 250, "total_steps": 1092, "loss": 0.0055, "learning_rate": 8.771759364167334e-05, "epoch": 0.6868131868131868, "percentage": 22.89, "elapsed_time": "1:10:37", "remaining_time": "3:57:51"}
26
- {"current_steps": 260, "total_steps": 1092, "loss": 0.0186, "learning_rate": 8.67577218403673e-05, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "1:13:25", "remaining_time": "3:54:56"}
27
- {"current_steps": 270, "total_steps": 1092, "loss": 0.0039, "learning_rate": 8.576742349403221e-05, "epoch": 0.7417582417582418, "percentage": 24.73, "elapsed_time": "1:16:13", "remaining_time": "3:52:04"}
28
- {"current_steps": 280, "total_steps": 1092, "loss": 0.0079, "learning_rate": 8.474751833130514e-05, "epoch": 0.7692307692307693, "percentage": 25.64, "elapsed_time": "1:19:02", "remaining_time": "3:49:12"}
29
- {"current_steps": 290, "total_steps": 1092, "loss": 0.0116, "learning_rate": 8.369885058813997e-05, "epoch": 0.7967032967032966, "percentage": 26.56, "elapsed_time": "1:21:51", "remaining_time": "3:46:22"}
30
- {"current_steps": 300, "total_steps": 1092, "loss": 0.006, "learning_rate": 8.262228830898313e-05, "epoch": 0.8241758241758241, "percentage": 27.47, "elapsed_time": "1:24:39", "remaining_time": "3:43:31"}
31
- {"current_steps": 310, "total_steps": 1092, "loss": 0.0077, "learning_rate": 8.151872262824185e-05, "epoch": 0.8516483516483516, "percentage": 28.39, "elapsed_time": "1:27:28", "remaining_time": "3:40:38"}
32
- {"current_steps": 320, "total_steps": 1092, "loss": 0.0088, "learning_rate": 8.038906703263939e-05, "epoch": 0.8791208791208791, "percentage": 29.3, "elapsed_time": "1:30:16", "remaining_time": "3:37:47"}
33
- {"current_steps": 330, "total_steps": 1092, "loss": 0.0083, "learning_rate": 7.923425660506786e-05, "epoch": 0.9065934065934066, "percentage": 30.22, "elapsed_time": "1:33:04", "remaining_time": "3:34:55"}
34
- {"current_steps": 340, "total_steps": 1092, "loss": 0.0117, "learning_rate": 7.805524725056479e-05, "epoch": 0.9340659340659341, "percentage": 31.14, "elapsed_time": "1:35:53", "remaining_time": "3:32:06"}
35
- {"current_steps": 350, "total_steps": 1092, "loss": 0.005, "learning_rate": 7.685301490505379e-05, "epoch": 0.9615384615384616, "percentage": 32.05, "elapsed_time": "1:38:43", "remaining_time": "3:29:17"}
36
- {"current_steps": 360, "total_steps": 1092, "loss": 0.0061, "learning_rate": 7.56285547275046e-05, "epoch": 0.989010989010989, "percentage": 32.97, "elapsed_time": "1:41:31", "remaining_time": "3:26:25"}
37
- {"current_steps": 364, "total_steps": 1092, "eval_loss": 0.008958667516708374, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "1:46:12", "remaining_time": "3:32:25"}
38
- {"current_steps": 370, "total_steps": 1092, "loss": 0.0046, "learning_rate": 7.438288027618096e-05, "epoch": 1.0164835164835164, "percentage": 33.88, "elapsed_time": "1:48:04", "remaining_time": "3:30:53"}
39
- {"current_steps": 380, "total_steps": 1092, "loss": 0.004, "learning_rate": 7.311702266965843e-05, "epoch": 1.043956043956044, "percentage": 34.8, "elapsed_time": "1:50:52", "remaining_time": "3:27:44"}
40
- {"current_steps": 390, "total_steps": 1092, "loss": 0.004, "learning_rate": 7.183202973330643e-05, "epoch": 1.0714285714285714, "percentage": 35.71, "elapsed_time": "1:53:40", "remaining_time": "3:24:36"}
41
- {"current_steps": 400, "total_steps": 1092, "loss": 0.005, "learning_rate": 7.052896513194099e-05, "epoch": 1.098901098901099, "percentage": 36.63, "elapsed_time": "1:56:28", "remaining_time": "3:21:30"}
42
- {"current_steps": 410, "total_steps": 1092, "loss": 0.0047, "learning_rate": 6.92089074893664e-05, "epoch": 1.1263736263736264, "percentage": 37.55, "elapsed_time": "1:59:17", "remaining_time": "3:18:25"}
43
- {"current_steps": 420, "total_steps": 1092, "loss": 0.0031, "learning_rate": 6.78729494955344e-05, "epoch": 1.1538461538461537, "percentage": 38.46, "elapsed_time": "2:02:05", "remaining_time": "3:15:20"}
44
- {"current_steps": 430, "total_steps": 1092, "loss": 0.0006, "learning_rate": 6.652219700205989e-05, "epoch": 1.1813186813186813, "percentage": 39.38, "elapsed_time": "2:04:53", "remaining_time": "3:12:16"}
45
- {"current_steps": 440, "total_steps": 1092, "loss": 0.0016, "learning_rate": 6.51577681068419e-05, "epoch": 1.2087912087912087, "percentage": 40.29, "elapsed_time": "2:07:42", "remaining_time": "3:09:13"}
46
- {"current_steps": 450, "total_steps": 1092, "loss": 0.0024, "learning_rate": 6.378079222854794e-05, "epoch": 1.2362637362637363, "percentage": 41.21, "elapsed_time": "2:10:30", "remaining_time": "3:06:11"}
47
- {"current_steps": 460, "total_steps": 1092, "loss": 0.0049, "learning_rate": 6.239240917172701e-05, "epoch": 1.2637362637362637, "percentage": 42.12, "elapsed_time": "2:13:19", "remaining_time": "3:03:10"}
48
- {"current_steps": 470, "total_steps": 1092, "loss": 0.0032, "learning_rate": 6.099376818332589e-05, "epoch": 1.2912087912087913, "percentage": 43.04, "elapsed_time": "2:16:07", "remaining_time": "3:00:08"}
49
- {"current_steps": 480, "total_steps": 1092, "loss": 0.0003, "learning_rate": 5.9586027001389346e-05, "epoch": 1.3186813186813187, "percentage": 43.96, "elapsed_time": "2:18:55", "remaining_time": "2:57:08"}
50
- {"current_steps": 490, "total_steps": 1092, "loss": 0.001, "learning_rate": 5.8170350896731675e-05, "epoch": 1.3461538461538463, "percentage": 44.87, "elapsed_time": "2:21:45", "remaining_time": "2:54:09"}
51
- {"current_steps": 500, "total_steps": 1092, "loss": 0.0005, "learning_rate": 5.674791170837306e-05, "epoch": 1.3736263736263736, "percentage": 45.79, "elapsed_time": "2:24:33", "remaining_time": "2:51:09"}
52
- {"current_steps": 510, "total_steps": 1092, "loss": 0.0041, "learning_rate": 5.5319886873538885e-05, "epoch": 1.401098901098901, "percentage": 46.7, "elapsed_time": "2:27:22", "remaining_time": "2:48:10"}
53
- {"current_steps": 520, "total_steps": 1092, "loss": 0.0065, "learning_rate": 5.388745845302521e-05, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "2:30:09", "remaining_time": "2:45:10"}
54
- {"current_steps": 530, "total_steps": 1092, "loss": 0.0024, "learning_rate": 5.2451812152736965e-05, "epoch": 1.456043956043956, "percentage": 48.53, "elapsed_time": "2:32:59", "remaining_time": "2:42:13"}
55
- {"current_steps": 540, "total_steps": 1092, "loss": 0.0033, "learning_rate": 5.101413634220888e-05, "epoch": 1.4835164835164836, "percentage": 49.45, "elapsed_time": "2:35:48", "remaining_time": "2:39:15"}
56
- {"current_steps": 550, "total_steps": 1092, "loss": 0.0009, "learning_rate": 4.957562107092163e-05, "epoch": 1.510989010989011, "percentage": 50.37, "elapsed_time": "2:38:36", "remaining_time": "2:36:18"}
57
- {"current_steps": 560, "total_steps": 1092, "loss": 0.0061, "learning_rate": 4.813745708322727e-05, "epoch": 1.5384615384615383, "percentage": 51.28, "elapsed_time": "2:41:24", "remaining_time": "2:33:20"}
58
- {"current_steps": 570, "total_steps": 1092, "loss": 0.0043, "learning_rate": 4.6700834832699644e-05, "epoch": 1.565934065934066, "percentage": 52.2, "elapsed_time": "2:44:14", "remaining_time": "2:30:24"}
59
- {"current_steps": 580, "total_steps": 1092, "loss": 0.0003, "learning_rate": 4.5266943496725344e-05, "epoch": 1.5934065934065935, "percentage": 53.11, "elapsed_time": "2:47:03", "remaining_time": "2:27:28"}
60
- {"current_steps": 590, "total_steps": 1092, "loss": 0.0062, "learning_rate": 4.3836969992151154e-05, "epoch": 1.620879120879121, "percentage": 54.03, "elapsed_time": "2:49:52", "remaining_time": "2:24:32"}
61
- {"current_steps": 600, "total_steps": 1092, "loss": 0.0025, "learning_rate": 4.241209799280253e-05, "epoch": 1.6483516483516483, "percentage": 54.95, "elapsed_time": "2:52:42", "remaining_time": "2:21:36"}
62
- {"current_steps": 610, "total_steps": 1092, "loss": 0.0013, "learning_rate": 4.0993506949686764e-05, "epoch": 1.6758241758241759, "percentage": 55.86, "elapsed_time": "2:55:30", "remaining_time": "2:18:41"}
63
- {"current_steps": 620, "total_steps": 1092, "loss": 0.0001, "learning_rate": 3.958237111469131e-05, "epoch": 1.7032967032967035, "percentage": 56.78, "elapsed_time": "2:58:19", "remaining_time": "2:15:45"}
64
- {"current_steps": 630, "total_steps": 1092, "loss": 0.0001, "learning_rate": 3.817985856858605e-05, "epoch": 1.7307692307692308, "percentage": 57.69, "elapsed_time": "3:01:08", "remaining_time": "2:12:49"}
65
- {"current_steps": 640, "total_steps": 1092, "loss": 0.0001, "learning_rate": 3.678713025413354e-05, "epoch": 1.7582417582417582, "percentage": 58.61, "elapsed_time": "3:03:56", "remaining_time": "2:09:54"}
66
- {"current_steps": 650, "total_steps": 1092, "loss": 0.0, "learning_rate": 3.540533901510782e-05, "epoch": 1.7857142857142856, "percentage": 59.52, "elapsed_time": "3:06:44", "remaining_time": "2:06:59"}
67
- {"current_steps": 660, "total_steps": 1092, "loss": 0.0001, "learning_rate": 3.403562864201742e-05, "epoch": 1.8131868131868132, "percentage": 60.44, "elapsed_time": "3:09:33", "remaining_time": "2:04:04"}
68
- {"current_steps": 670, "total_steps": 1092, "loss": 0.0, "learning_rate": 3.267913292532202e-05, "epoch": 1.8406593406593408, "percentage": 61.36, "elapsed_time": "3:12:22", "remaining_time": "2:01:09"}
69
- {"current_steps": 680, "total_steps": 1092, "loss": 0.0, "learning_rate": 3.133697471692704e-05, "epoch": 1.8681318681318682, "percentage": 62.27, "elapsed_time": "3:15:09", "remaining_time": "1:58:14"}
70
- {"current_steps": 690, "total_steps": 1092, "loss": 0.0, "learning_rate": 3.0010265000732406e-05, "epoch": 1.8956043956043955, "percentage": 63.19, "elapsed_time": "3:17:57", "remaining_time": "1:55:20"}
71
- {"current_steps": 700, "total_steps": 1092, "loss": 0.0, "learning_rate": 2.8700101973005455e-05, "epoch": 1.9230769230769231, "percentage": 64.1, "elapsed_time": "3:20:45", "remaining_time": "1:52:25"}
72
- {"current_steps": 710, "total_steps": 1092, "loss": 0.0, "learning_rate": 2.7407570133338733e-05, "epoch": 1.9505494505494505, "percentage": 65.02, "elapsed_time": "3:23:33", "remaining_time": "1:49:30"}
73
- {"current_steps": 720, "total_steps": 1092, "loss": 0.0, "learning_rate": 2.613373938694539e-05, "epoch": 1.978021978021978, "percentage": 65.93, "elapsed_time": "3:26:22", "remaining_time": "1:46:37"}
74
- {"current_steps": 728, "total_steps": 1092, "eval_loss": 0.0039693755097687244, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "3:33:00", "remaining_time": "1:46:30"}
75
- {"current_steps": 730, "total_steps": 1092, "loss": 0.0013, "learning_rate": 2.4879664159035277e-05, "epoch": 2.0054945054945055, "percentage": 66.85, "elapsed_time": "3:33:59", "remaining_time": "1:46:06"}
76
- {"current_steps": 740, "total_steps": 1092, "loss": 0.0005, "learning_rate": 2.364638252200462e-05, "epoch": 2.032967032967033, "percentage": 67.77, "elapsed_time": "3:37:34", "remaining_time": "1:43:29"}
77
- {"current_steps": 750, "total_steps": 1092, "loss": 0.0, "learning_rate": 2.2434915336162103e-05, "epoch": 2.0604395604395602, "percentage": 68.68, "elapsed_time": "3:41:26", "remaining_time": "1:40:58"}
78
- {"current_steps": 760, "total_steps": 1092, "loss": 0.0001, "learning_rate": 2.1246265404702098e-05, "epoch": 2.087912087912088, "percentage": 69.6, "elapsed_time": "3:45:35", "remaining_time": "1:38:32"}
79
- {"current_steps": 770, "total_steps": 1092, "loss": 0.0001, "learning_rate": 2.00814166436252e-05, "epoch": 2.1153846153846154, "percentage": 70.51, "elapsed_time": "3:49:44", "remaining_time": "1:36:04"}
80
- {"current_steps": 780, "total_steps": 1092, "loss": 0.0019, "learning_rate": 1.894133326729256e-05, "epoch": 2.142857142857143, "percentage": 71.43, "elapsed_time": "3:53:51", "remaining_time": "1:33:32"}
81
- {"current_steps": 790, "total_steps": 1092, "loss": 0.0001, "learning_rate": 1.7826958990288457e-05, "epoch": 2.17032967032967, "percentage": 72.34, "elapsed_time": "3:57:59", "remaining_time": "1:30:58"}
82
- {"current_steps": 800, "total_steps": 1092, "loss": 0.0001, "learning_rate": 1.673921624625192e-05, "epoch": 2.197802197802198, "percentage": 73.26, "elapsed_time": "4:02:07", "remaining_time": "1:28:22"}
83
- {"current_steps": 810, "total_steps": 1092, "loss": 0.0, "learning_rate": 1.5679005424323605e-05, "epoch": 2.2252747252747254, "percentage": 74.18, "elapsed_time": "4:06:15", "remaining_time": "1:25:43"}
84
- {"current_steps": 820, "total_steps": 1092, "loss": 0.0, "learning_rate": 1.4647204123840457e-05, "epoch": 2.2527472527472527, "percentage": 75.09, "elapsed_time": "4:10:23", "remaining_time": "1:23:03"}
85
- {"current_steps": 830, "total_steps": 1092, "loss": 0.0, "learning_rate": 1.3644666427894614e-05, "epoch": 2.28021978021978, "percentage": 76.01, "elapsed_time": "4:14:31", "remaining_time": "1:20:20"}
86
- {"current_steps": 840, "total_steps": 1092, "loss": 0.0001, "learning_rate": 1.267222219635829e-05, "epoch": 2.3076923076923075, "percentage": 76.92, "elapsed_time": "4:18:38", "remaining_time": "1:17:35"}
87
- {"current_steps": 850, "total_steps": 1092, "loss": 0.0, "learning_rate": 1.1730676378959537e-05, "epoch": 2.3351648351648353, "percentage": 77.84, "elapsed_time": "4:22:47", "remaining_time": "1:14:49"}
88
- {"current_steps": 860, "total_steps": 1092, "loss": 0.0001, "learning_rate": 1.0820808348977662e-05, "epoch": 2.3626373626373627, "percentage": 78.75, "elapsed_time": "4:26:55", "remaining_time": "1:12:00"}
89
- {"current_steps": 870, "total_steps": 1092, "loss": 0.0001, "learning_rate": 9.943371258109707e-06, "epoch": 2.39010989010989, "percentage": 79.67, "elapsed_time": "4:31:03", "remaining_time": "1:09:10"}
90
- {"current_steps": 880, "total_steps": 1092, "loss": 0.0002, "learning_rate": 9.09909141304226e-06, "epoch": 2.4175824175824174, "percentage": 80.59, "elapsed_time": "4:35:11", "remaining_time": "1:06:17"}
91
- {"current_steps": 890, "total_steps": 1092, "loss": 0.0016, "learning_rate": 8.288667674244271e-06, "epoch": 2.4450549450549453, "percentage": 81.5, "elapsed_time": "4:39:21", "remaining_time": "1:03:24"}
92
- {"current_steps": 900, "total_steps": 1092, "loss": 0.0, "learning_rate": 7.512770877478848e-06, "epoch": 2.4725274725274726, "percentage": 82.42, "elapsed_time": "4:43:29", "remaining_time": "1:00:28"}
93
- {"current_steps": 910, "total_steps": 1092, "loss": 0.001, "learning_rate": 6.7720432785127465e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "4:47:36", "remaining_time": "0:57:31"}
94
- {"current_steps": 920, "total_steps": 1092, "loss": 0.0025, "learning_rate": 6.067098021483209e-06, "epoch": 2.5274725274725274, "percentage": 84.25, "elapsed_time": "4:51:44", "remaining_time": "0:54:32"}
95
- {"current_steps": 930, "total_steps": 1092, "loss": 0.0002, "learning_rate": 5.3985186313621595e-06, "epoch": 2.5549450549450547, "percentage": 85.16, "elapsed_time": "4:55:45", "remaining_time": "0:51:31"}
96
- {"current_steps": 940, "total_steps": 1092, "loss": 0.0016, "learning_rate": 4.766858530938106e-06, "epoch": 2.5824175824175826, "percentage": 86.08, "elapsed_time": "4:59:52", "remaining_time": "0:48:29"}
97
- {"current_steps": 950, "total_steps": 1092, "loss": 0.0005, "learning_rate": 4.172640582715187e-06, "epoch": 2.60989010989011, "percentage": 87.0, "elapsed_time": "5:04:01", "remaining_time": "0:45:26"}
98
- {"current_steps": 960, "total_steps": 1092, "loss": 0.0001, "learning_rate": 3.6163566561089713e-06, "epoch": 2.6373626373626373, "percentage": 87.91, "elapsed_time": "5:08:08", "remaining_time": "0:42:22"}
99
- {"current_steps": 970, "total_steps": 1092, "loss": 0.0001, "learning_rate": 3.0984672202969246e-06, "epoch": 2.6648351648351647, "percentage": 88.83, "elapsed_time": "5:12:17", "remaining_time": "0:39:16"}
100
- {"current_steps": 980, "total_steps": 1092, "loss": 0.001, "learning_rate": 2.6194009630608196e-06, "epoch": 2.6923076923076925, "percentage": 89.74, "elapsed_time": "5:16:24", "remaining_time": "0:36:09"}
101
- {"current_steps": 990, "total_steps": 1092, "loss": 0.0002, "learning_rate": 2.179554435936454e-06, "epoch": 2.71978021978022, "percentage": 90.66, "elapsed_time": "5:20:30", "remaining_time": "0:33:01"}
102
- {"current_steps": 1000, "total_steps": 1092, "loss": 0.0003, "learning_rate": 1.7792917259643993e-06, "epoch": 2.7472527472527473, "percentage": 91.58, "elapsed_time": "5:24:38", "remaining_time": "0:29:51"}
103
- {"current_steps": 1010, "total_steps": 1092, "loss": 0.0001, "learning_rate": 1.4189441543136228e-06, "epoch": 2.7747252747252746, "percentage": 92.49, "elapsed_time": "5:28:45", "remaining_time": "0:26:41"}
104
- {"current_steps": 1020, "total_steps": 1092, "loss": 0.0, "learning_rate": 1.0988100020272785e-06, "epoch": 2.802197802197802, "percentage": 93.41, "elapsed_time": "5:32:54", "remaining_time": "0:23:29"}
105
- {"current_steps": 1030, "total_steps": 1092, "loss": 0.0001, "learning_rate": 8.191542631178439e-07, "epoch": 2.82967032967033, "percentage": 94.32, "elapsed_time": "5:37:02", "remaining_time": "0:20:17"}
106
- {"current_steps": 1040, "total_steps": 1092, "loss": 0.0008, "learning_rate": 5.802084252158613e-07, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "5:41:08", "remaining_time": "0:17:03"}
107
- {"current_steps": 1050, "total_steps": 1092, "loss": 0.0, "learning_rate": 3.8217027795387497e-07, "epoch": 2.8846153846153846, "percentage": 96.15, "elapsed_time": "5:45:16", "remaining_time": "0:13:48"}
108
- {"current_steps": 1060, "total_steps": 1092, "loss": 0.0001, "learning_rate": 2.252037492442738e-07, "epoch": 2.912087912087912, "percentage": 97.07, "elapsed_time": "5:49:25", "remaining_time": "0:10:32"}
109
- {"current_steps": 1070, "total_steps": 1092, "loss": 0.0002, "learning_rate": 1.0943876958640631e-07, "epoch": 2.9395604395604398, "percentage": 97.99, "elapsed_time": "5:53:34", "remaining_time": "0:07:16"}
110
- {"current_steps": 1080, "total_steps": 1092, "loss": 0.0001, "learning_rate": 3.497116451542382e-08, "epoch": 2.967032967032967, "percentage": 98.9, "elapsed_time": "5:57:41", "remaining_time": "0:03:58"}
111
- {"current_steps": 1090, "total_steps": 1092, "loss": 0.0007, "learning_rate": 1.862575281807999e-09, "epoch": 2.9945054945054945, "percentage": 99.82, "elapsed_time": "6:01:51", "remaining_time": "0:00:39"}
112
- {"current_steps": 1092, "total_steps": 1092, "eval_loss": 0.004094572272151709, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "6:07:59", "remaining_time": "0:00:00"}
113
- {"current_steps": 1092, "total_steps": 1092, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "6:08:08", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 273, "loss": 1.5241, "learning_rate": 9.979337278845333e-05, "epoch": 0.10989010989010989, "percentage": 3.66, "elapsed_time": "0:02:30", "remaining_time": "1:06:00"}
2
+ {"current_steps": 20, "total_steps": 273, "loss": 0.5751, "learning_rate": 9.905673043533644e-05, "epoch": 0.21978021978021978, "percentage": 7.33, "elapsed_time": "0:04:55", "remaining_time": "1:02:19"}
3
+ {"current_steps": 30, "total_steps": 273, "loss": 0.2692, "learning_rate": 9.762171498693778e-05, "epoch": 0.32967032967032966, "percentage": 10.99, "elapsed_time": "0:07:21", "remaining_time": "0:59:33"}
4
+ {"current_steps": 40, "total_steps": 273, "loss": 0.2083, "learning_rate": 9.55562968825368e-05, "epoch": 0.43956043956043955, "percentage": 14.65, "elapsed_time": "0:09:46", "remaining_time": "0:56:56"}
5
+ {"current_steps": 50, "total_steps": 273, "loss": 0.1299, "learning_rate": 9.288781753684576e-05, "epoch": 0.5494505494505495, "percentage": 18.32, "elapsed_time": "0:12:12", "remaining_time": "0:54:25"}
6
+ {"current_steps": 60, "total_steps": 273, "loss": 0.052, "learning_rate": 8.9651601516976e-05, "epoch": 0.6593406593406593, "percentage": 21.98, "elapsed_time": "0:14:37", "remaining_time": "0:51:53"}
7
+ {"current_steps": 70, "total_steps": 273, "loss": 0.0471, "learning_rate": 8.589048892591169e-05, "epoch": 0.7692307692307693, "percentage": 25.64, "elapsed_time": "0:17:01", "remaining_time": "0:49:22"}
8
+ {"current_steps": 80, "total_steps": 273, "loss": 0.0194, "learning_rate": 8.165426829746728e-05, "epoch": 0.8791208791208791, "percentage": 29.3, "elapsed_time": "0:19:26", "remaining_time": "0:46:54"}
9
+ {"current_steps": 90, "total_steps": 273, "loss": 0.0165, "learning_rate": 7.699901750990307e-05, "epoch": 0.989010989010989, "percentage": 32.97, "elapsed_time": "0:21:51", "remaining_time": "0:44:26"}
10
+ {"current_steps": 91, "total_steps": 273, "eval_loss": 0.01004050299525261, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:22:23", "remaining_time": "0:44:47"}
11
+ {"current_steps": 100, "total_steps": 273, "loss": 0.0085, "learning_rate": 7.19863614429959e-05, "epoch": 1.098901098901099, "percentage": 36.63, "elapsed_time": "0:26:10", "remaining_time": "0:45:16"}
12
+ {"current_steps": 110, "total_steps": 273, "loss": 0.0143, "learning_rate": 6.668265620548888e-05, "epoch": 1.2087912087912087, "percentage": 40.29, "elapsed_time": "0:28:36", "remaining_time": "0:42:23"}
13
+ {"current_steps": 120, "total_steps": 273, "loss": 0.0107, "learning_rate": 6.115811073188369e-05, "epoch": 1.3186813186813187, "percentage": 43.96, "elapsed_time": "0:31:02", "remaining_time": "0:39:34"}
14
+ {"current_steps": 130, "total_steps": 273, "loss": 0.0134, "learning_rate": 5.548585737662666e-05, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:33:27", "remaining_time": "0:36:48"}
15
+ {"current_steps": 140, "total_steps": 273, "loss": 0.0058, "learning_rate": 4.9740983808896736e-05, "epoch": 1.5384615384615383, "percentage": 51.28, "elapsed_time": "0:35:51", "remaining_time": "0:34:04"}
16
+ {"current_steps": 150, "total_steps": 273, "loss": 0.0026, "learning_rate": 4.399953902349573e-05, "epoch": 1.6483516483516483, "percentage": 54.95, "elapsed_time": "0:38:17", "remaining_time": "0:31:24"}
17
+ {"current_steps": 160, "total_steps": 273, "loss": 0.0008, "learning_rate": 3.833752662598464e-05, "epoch": 1.7582417582417582, "percentage": 58.61, "elapsed_time": "0:40:42", "remaining_time": "0:28:44"}
18
+ {"current_steps": 170, "total_steps": 273, "loss": 0.0006, "learning_rate": 3.282989871866994e-05, "epoch": 1.8681318681318682, "percentage": 62.27, "elapsed_time": "0:43:06", "remaining_time": "0:26:07"}
19
+ {"current_steps": 180, "total_steps": 273, "loss": 0.0001, "learning_rate": 2.7549563706089264e-05, "epoch": 1.978021978021978, "percentage": 65.93, "elapsed_time": "0:45:30", "remaining_time": "0:23:30"}
20
+ {"current_steps": 182, "total_steps": 273, "eval_loss": 0.004785448778420687, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:46:17", "remaining_time": "0:23:08"}
21
+ {"current_steps": 190, "total_steps": 273, "loss": 0.0034, "learning_rate": 2.2566421154384364e-05, "epoch": 2.087912087912088, "percentage": 69.6, "elapsed_time": "0:49:51", "remaining_time": "0:21:46"}
22
+ {"current_steps": 200, "total_steps": 273, "loss": 0.0007, "learning_rate": 1.7946436480816985e-05, "epoch": 2.197802197802198, "percentage": 73.26, "elapsed_time": "0:52:17", "remaining_time": "0:19:05"}
23
+ {"current_steps": 210, "total_steps": 273, "loss": 0.0001, "learning_rate": 1.3750767722423518e-05, "epoch": 2.3076923076923075, "percentage": 76.92, "elapsed_time": "0:54:43", "remaining_time": "0:16:25"}
24
+ {"current_steps": 220, "total_steps": 273, "loss": 0.0002, "learning_rate": 1.0034955943395396e-05, "epoch": 2.4175824175824174, "percentage": 80.59, "elapsed_time": "0:57:08", "remaining_time": "0:13:46"}
25
+ {"current_steps": 230, "total_steps": 273, "loss": 0.0014, "learning_rate": 6.848189998341231e-06, "epoch": 2.5274725274725274, "percentage": 84.25, "elapsed_time": "0:59:33", "remaining_time": "0:11:08"}
26
+ {"current_steps": 240, "total_steps": 273, "loss": 0.0007, "learning_rate": 4.232655384284196e-06, "epoch": 2.6373626373626373, "percentage": 87.91, "elapsed_time": "1:01:57", "remaining_time": "0:08:31"}
27
+ {"current_steps": 250, "total_steps": 273, "loss": 0.0007, "learning_rate": 2.222975801106164e-06, "epoch": 2.7472527472527473, "percentage": 91.58, "elapsed_time": "1:04:22", "remaining_time": "0:05:55"}
28
+ {"current_steps": 260, "total_steps": 273, "loss": 0.0001, "learning_rate": 8.457548129017113e-07, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "1:06:47", "remaining_time": "0:03:20"}
29
+ {"current_steps": 270, "total_steps": 273, "loss": 0.0008, "learning_rate": 1.192236775981237e-07, "epoch": 2.967032967032967, "percentage": 98.9, "elapsed_time": "1:09:12", "remaining_time": "0:00:46"}
30
+ {"current_steps": 273, "total_steps": 273, "eval_loss": 0.004738230258226395, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:10:14", "remaining_time": "0:00:00"}
31
+ {"current_steps": 273, "total_steps": 273, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:11:50", "remaining_time": "0:00:00"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_eval_loss.png CHANGED
training_loss.png CHANGED