sedrickkeh commited on
Commit
3f0454d
1 Parent(s): f1da9d5

Training in progress, epoch 2

Browse files
config.json.sagemaker-uploaded ADDED
File without changes
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deae05c50f383dc68b44613efba68667f6cb0dfb329159e0933481b995a67431
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fae2eb3974985f2c0270723f64aa6d615c0fbd121e7a45f5f29c589a47f972a
3
  size 4976698672
model-00001-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72972b23d6b41558749bab9faf88de1616623b5387fff97ddca00c37534f2944
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e316c279463be2352ce7e7e37790c234864cec4796d1a8627ac98fc53c82332f
3
  size 4999802720
model-00002-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2037bcd1e3bd97ea35f78ef8d71dd0d846749171cb79dca234d89e99a2d2cb67
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8e49c000b447d90f293a49a5f1e1cf880b7a0b05fadd166dca07988a5ce5f0
3
  size 4915916176
model-00003-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea74960e420ffbd930e3d6e08b39e9bfe3c98683e442926b8a5ee78a70c7497a
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baf3220e0cbd2383543343301440a0611de155d3e1d16f59f08253f9072d6e32
3
  size 1168138808
model-00004-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
special_tokens_map.json.sagemaker-uploaded ADDED
File without changes
tokenizer.json.sagemaker-uploaded ADDED
File without changes
tokenizer_config.json.sagemaker-uploaded ADDED
File without changes
trainer_log.jsonl CHANGED
@@ -30,3 +30,67 @@
30
  {"current_steps": 300, "total_steps": 921, "loss": 0.6185, "learning_rate": 5e-06, "epoch": 0.974817221770918, "percentage": 32.57, "elapsed_time": "1:21:04", "remaining_time": "2:47:49"}
31
  {"current_steps": 307, "total_steps": 921, "eval_loss": 0.6177791357040405, "epoch": 0.9975629569455727, "percentage": 33.33, "elapsed_time": "1:24:57", "remaining_time": "2:49:54"}
32
  {"current_steps": 310, "total_steps": 921, "loss": 0.6074, "learning_rate": 5e-06, "epoch": 1.007311129163282, "percentage": 33.66, "elapsed_time": "1:26:32", "remaining_time": "2:50:34"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  {"current_steps": 300, "total_steps": 921, "loss": 0.6185, "learning_rate": 5e-06, "epoch": 0.974817221770918, "percentage": 32.57, "elapsed_time": "1:21:04", "remaining_time": "2:47:49"}
31
  {"current_steps": 307, "total_steps": 921, "eval_loss": 0.6177791357040405, "epoch": 0.9975629569455727, "percentage": 33.33, "elapsed_time": "1:24:57", "remaining_time": "2:49:54"}
32
  {"current_steps": 310, "total_steps": 921, "loss": 0.6074, "learning_rate": 5e-06, "epoch": 1.007311129163282, "percentage": 33.66, "elapsed_time": "1:26:32", "remaining_time": "2:50:34"}
33
+ {"current_steps": 320, "total_steps": 921, "loss": 0.5701, "learning_rate": 5e-06, "epoch": 1.0398050365556457, "percentage": 34.74, "elapsed_time": "1:29:14", "remaining_time": "2:47:35"}
34
+ {"current_steps": 330, "total_steps": 921, "loss": 0.5836, "learning_rate": 5e-06, "epoch": 1.0722989439480097, "percentage": 35.83, "elapsed_time": "1:31:55", "remaining_time": "2:44:37"}
35
+ {"current_steps": 340, "total_steps": 921, "loss": 0.5676, "learning_rate": 5e-06, "epoch": 1.1047928513403737, "percentage": 36.92, "elapsed_time": "1:34:36", "remaining_time": "2:41:40"}
36
+ {"current_steps": 350, "total_steps": 921, "loss": 0.5722, "learning_rate": 5e-06, "epoch": 1.1372867587327375, "percentage": 38.0, "elapsed_time": "1:37:18", "remaining_time": "2:38:44"}
37
+ {"current_steps": 360, "total_steps": 921, "loss": 0.5692, "learning_rate": 5e-06, "epoch": 1.1697806661251016, "percentage": 39.09, "elapsed_time": "1:39:59", "remaining_time": "2:35:49"}
38
+ {"current_steps": 370, "total_steps": 921, "loss": 0.5716, "learning_rate": 5e-06, "epoch": 1.2022745735174656, "percentage": 40.17, "elapsed_time": "1:42:39", "remaining_time": "2:32:52"}
39
+ {"current_steps": 380, "total_steps": 921, "loss": 0.573, "learning_rate": 5e-06, "epoch": 1.2347684809098294, "percentage": 41.26, "elapsed_time": "1:45:20", "remaining_time": "2:29:58"}
40
+ {"current_steps": 390, "total_steps": 921, "loss": 0.5759, "learning_rate": 5e-06, "epoch": 1.2672623883021934, "percentage": 42.35, "elapsed_time": "1:48:01", "remaining_time": "2:27:04"}
41
+ {"current_steps": 400, "total_steps": 921, "loss": 0.5812, "learning_rate": 5e-06, "epoch": 1.2997562956945572, "percentage": 43.43, "elapsed_time": "1:50:41", "remaining_time": "2:24:10"}
42
+ {"current_steps": 410, "total_steps": 921, "loss": 0.5781, "learning_rate": 5e-06, "epoch": 1.3322502030869212, "percentage": 44.52, "elapsed_time": "1:53:23", "remaining_time": "2:21:19"}
43
+ {"current_steps": 420, "total_steps": 921, "loss": 0.5829, "learning_rate": 5e-06, "epoch": 1.3647441104792852, "percentage": 45.6, "elapsed_time": "1:56:04", "remaining_time": "2:18:27"}
44
+ {"current_steps": 430, "total_steps": 921, "loss": 0.5743, "learning_rate": 5e-06, "epoch": 1.397238017871649, "percentage": 46.69, "elapsed_time": "1:58:46", "remaining_time": "2:15:37"}
45
+ {"current_steps": 440, "total_steps": 921, "loss": 0.568, "learning_rate": 5e-06, "epoch": 1.429731925264013, "percentage": 47.77, "elapsed_time": "2:01:26", "remaining_time": "2:12:45"}
46
+ {"current_steps": 450, "total_steps": 921, "loss": 0.5688, "learning_rate": 5e-06, "epoch": 1.4622258326563768, "percentage": 48.86, "elapsed_time": "2:04:07", "remaining_time": "2:09:54"}
47
+ {"current_steps": 460, "total_steps": 921, "loss": 0.5765, "learning_rate": 5e-06, "epoch": 1.4947197400487409, "percentage": 49.95, "elapsed_time": "2:06:48", "remaining_time": "2:07:04"}
48
+ {"current_steps": 470, "total_steps": 921, "loss": 0.5709, "learning_rate": 5e-06, "epoch": 1.5272136474411049, "percentage": 51.03, "elapsed_time": "2:09:29", "remaining_time": "2:04:15"}
49
+ {"current_steps": 480, "total_steps": 921, "loss": 0.5744, "learning_rate": 5e-06, "epoch": 1.5597075548334687, "percentage": 52.12, "elapsed_time": "2:12:10", "remaining_time": "2:01:26"}
50
+ {"current_steps": 490, "total_steps": 921, "loss": 0.5736, "learning_rate": 5e-06, "epoch": 1.5922014622258327, "percentage": 53.2, "elapsed_time": "2:14:51", "remaining_time": "1:58:37"}
51
+ {"current_steps": 500, "total_steps": 921, "loss": 0.5644, "learning_rate": 5e-06, "epoch": 1.6246953696181965, "percentage": 54.29, "elapsed_time": "2:17:31", "remaining_time": "1:55:48"}
52
+ {"current_steps": 510, "total_steps": 921, "loss": 0.5631, "learning_rate": 5e-06, "epoch": 1.6571892770105605, "percentage": 55.37, "elapsed_time": "2:20:12", "remaining_time": "1:52:59"}
53
+ {"current_steps": 520, "total_steps": 921, "loss": 0.5736, "learning_rate": 5e-06, "epoch": 1.6896831844029245, "percentage": 56.46, "elapsed_time": "2:22:52", "remaining_time": "1:50:10"}
54
+ {"current_steps": 530, "total_steps": 921, "loss": 0.565, "learning_rate": 5e-06, "epoch": 1.7221770917952883, "percentage": 57.55, "elapsed_time": "2:25:32", "remaining_time": "1:47:22"}
55
+ {"current_steps": 540, "total_steps": 921, "loss": 0.5735, "learning_rate": 5e-06, "epoch": 1.7546709991876523, "percentage": 58.63, "elapsed_time": "2:28:13", "remaining_time": "1:44:34"}
56
+ {"current_steps": 550, "total_steps": 921, "loss": 0.5673, "learning_rate": 5e-06, "epoch": 1.7871649065800161, "percentage": 59.72, "elapsed_time": "2:30:53", "remaining_time": "1:41:46"}
57
+ {"current_steps": 560, "total_steps": 921, "loss": 0.5678, "learning_rate": 5e-06, "epoch": 1.8196588139723802, "percentage": 60.8, "elapsed_time": "2:33:34", "remaining_time": "1:39:00"}
58
+ {"current_steps": 570, "total_steps": 921, "loss": 0.5685, "learning_rate": 5e-06, "epoch": 1.8521527213647442, "percentage": 61.89, "elapsed_time": "2:36:14", "remaining_time": "1:36:12"}
59
+ {"current_steps": 580, "total_steps": 921, "loss": 0.5717, "learning_rate": 5e-06, "epoch": 1.8846466287571082, "percentage": 62.98, "elapsed_time": "2:38:55", "remaining_time": "1:33:26"}
60
+ {"current_steps": 590, "total_steps": 921, "loss": 0.5744, "learning_rate": 5e-06, "epoch": 1.917140536149472, "percentage": 64.06, "elapsed_time": "2:41:37", "remaining_time": "1:30:40"}
61
+ {"current_steps": 600, "total_steps": 921, "loss": 0.5646, "learning_rate": 5e-06, "epoch": 1.9496344435418358, "percentage": 65.15, "elapsed_time": "2:44:18", "remaining_time": "1:27:54"}
62
+ {"current_steps": 610, "total_steps": 921, "loss": 0.5652, "learning_rate": 5e-06, "epoch": 1.9821283509341998, "percentage": 66.23, "elapsed_time": "2:47:00", "remaining_time": "1:25:08"}
63
+ {"current_steps": 615, "total_steps": 921, "eval_loss": 0.6079972982406616, "epoch": 1.9983753046303818, "percentage": 66.78, "elapsed_time": "2:50:13", "remaining_time": "1:24:41"}
64
+ {"current_steps": 620, "total_steps": 921, "loss": 0.5462, "learning_rate": 5e-06, "epoch": 2.014622258326564, "percentage": 67.32, "elapsed_time": "2:52:32", "remaining_time": "1:23:45"}
65
+ {"current_steps": 630, "total_steps": 921, "loss": 0.5103, "learning_rate": 5e-06, "epoch": 2.047116165718928, "percentage": 68.4, "elapsed_time": "2:55:12", "remaining_time": "1:20:56"}
66
+ {"current_steps": 640, "total_steps": 921, "loss": 0.5161, "learning_rate": 5e-06, "epoch": 2.0796100731112914, "percentage": 69.49, "elapsed_time": "2:57:54", "remaining_time": "1:18:06"}
67
+ {"current_steps": 650, "total_steps": 921, "loss": 0.5166, "learning_rate": 5e-06, "epoch": 2.1121039805036554, "percentage": 70.58, "elapsed_time": "3:00:35", "remaining_time": "1:15:17"}
68
+ {"current_steps": 660, "total_steps": 921, "loss": 0.5057, "learning_rate": 5e-06, "epoch": 2.1445978878960195, "percentage": 71.66, "elapsed_time": "3:03:16", "remaining_time": "1:12:28"}
69
+ {"current_steps": 670, "total_steps": 921, "loss": 0.5193, "learning_rate": 5e-06, "epoch": 2.1770917952883835, "percentage": 72.75, "elapsed_time": "3:05:57", "remaining_time": "1:09:39"}
70
+ {"current_steps": 680, "total_steps": 921, "loss": 0.5261, "learning_rate": 5e-06, "epoch": 2.2095857026807475, "percentage": 73.83, "elapsed_time": "3:08:39", "remaining_time": "1:06:51"}
71
+ {"current_steps": 690, "total_steps": 921, "loss": 0.5216, "learning_rate": 5e-06, "epoch": 2.2420796100731115, "percentage": 74.92, "elapsed_time": "3:11:20", "remaining_time": "1:04:03"}
72
+ {"current_steps": 700, "total_steps": 921, "loss": 0.5239, "learning_rate": 5e-06, "epoch": 2.274573517465475, "percentage": 76.0, "elapsed_time": "3:14:01", "remaining_time": "1:01:15"}
73
+ {"current_steps": 710, "total_steps": 921, "loss": 0.518, "learning_rate": 5e-06, "epoch": 2.307067424857839, "percentage": 77.09, "elapsed_time": "3:16:43", "remaining_time": "0:58:27"}
74
+ {"current_steps": 720, "total_steps": 921, "loss": 0.5311, "learning_rate": 5e-06, "epoch": 2.339561332250203, "percentage": 78.18, "elapsed_time": "3:19:24", "remaining_time": "0:55:40"}
75
+ {"current_steps": 730, "total_steps": 921, "loss": 0.5225, "learning_rate": 5e-06, "epoch": 2.372055239642567, "percentage": 79.26, "elapsed_time": "3:22:05", "remaining_time": "0:52:52"}
76
+ {"current_steps": 740, "total_steps": 921, "loss": 0.5228, "learning_rate": 5e-06, "epoch": 2.404549147034931, "percentage": 80.35, "elapsed_time": "3:24:46", "remaining_time": "0:50:05"}
77
+ {"current_steps": 750, "total_steps": 921, "loss": 0.5269, "learning_rate": 5e-06, "epoch": 2.4370430544272947, "percentage": 81.43, "elapsed_time": "3:27:29", "remaining_time": "0:47:18"}
78
+ {"current_steps": 760, "total_steps": 921, "loss": 0.5175, "learning_rate": 5e-06, "epoch": 2.4695369618196588, "percentage": 82.52, "elapsed_time": "3:30:11", "remaining_time": "0:44:31"}
79
+ {"current_steps": 770, "total_steps": 921, "loss": 0.5219, "learning_rate": 5e-06, "epoch": 2.502030869212023, "percentage": 83.6, "elapsed_time": "3:32:53", "remaining_time": "0:41:44"}
80
+ {"current_steps": 780, "total_steps": 921, "loss": 0.5196, "learning_rate": 5e-06, "epoch": 2.534524776604387, "percentage": 84.69, "elapsed_time": "3:35:35", "remaining_time": "0:38:58"}
81
+ {"current_steps": 790, "total_steps": 921, "loss": 0.5167, "learning_rate": 5e-06, "epoch": 2.567018683996751, "percentage": 85.78, "elapsed_time": "3:38:15", "remaining_time": "0:36:11"}
82
+ {"current_steps": 800, "total_steps": 921, "loss": 0.5186, "learning_rate": 5e-06, "epoch": 2.5995125913891144, "percentage": 86.86, "elapsed_time": "3:40:56", "remaining_time": "0:33:24"}
83
+ {"current_steps": 810, "total_steps": 921, "loss": 0.5269, "learning_rate": 5e-06, "epoch": 2.6320064987814784, "percentage": 87.95, "elapsed_time": "3:43:36", "remaining_time": "0:30:38"}
84
+ {"current_steps": 820, "total_steps": 921, "loss": 0.5241, "learning_rate": 5e-06, "epoch": 2.6645004061738424, "percentage": 89.03, "elapsed_time": "3:46:18", "remaining_time": "0:27:52"}
85
+ {"current_steps": 830, "total_steps": 921, "loss": 0.52, "learning_rate": 5e-06, "epoch": 2.6969943135662064, "percentage": 90.12, "elapsed_time": "3:49:00", "remaining_time": "0:25:06"}
86
+ {"current_steps": 840, "total_steps": 921, "loss": 0.5218, "learning_rate": 5e-06, "epoch": 2.7294882209585705, "percentage": 91.21, "elapsed_time": "3:51:41", "remaining_time": "0:22:20"}
87
+ {"current_steps": 850, "total_steps": 921, "loss": 0.5295, "learning_rate": 5e-06, "epoch": 2.761982128350934, "percentage": 92.29, "elapsed_time": "3:54:23", "remaining_time": "0:19:34"}
88
+ {"current_steps": 860, "total_steps": 921, "loss": 0.5237, "learning_rate": 5e-06, "epoch": 2.794476035743298, "percentage": 93.38, "elapsed_time": "3:57:05", "remaining_time": "0:16:48"}
89
+ {"current_steps": 870, "total_steps": 921, "loss": 0.522, "learning_rate": 5e-06, "epoch": 2.826969943135662, "percentage": 94.46, "elapsed_time": "3:59:46", "remaining_time": "0:14:03"}
90
+ {"current_steps": 880, "total_steps": 921, "loss": 0.5188, "learning_rate": 5e-06, "epoch": 2.859463850528026, "percentage": 95.55, "elapsed_time": "4:02:28", "remaining_time": "0:11:17"}
91
+ {"current_steps": 890, "total_steps": 921, "loss": 0.5252, "learning_rate": 5e-06, "epoch": 2.89195775792039, "percentage": 96.63, "elapsed_time": "4:05:08", "remaining_time": "0:08:32"}
92
+ {"current_steps": 900, "total_steps": 921, "loss": 0.5181, "learning_rate": 5e-06, "epoch": 2.9244516653127537, "percentage": 97.72, "elapsed_time": "4:07:49", "remaining_time": "0:05:46"}
93
+ {"current_steps": 910, "total_steps": 921, "loss": 0.5282, "learning_rate": 5e-06, "epoch": 2.9569455727051177, "percentage": 98.81, "elapsed_time": "4:10:29", "remaining_time": "0:03:01"}
94
+ {"current_steps": 920, "total_steps": 921, "loss": 0.5197, "learning_rate": 5e-06, "epoch": 2.9894394800974817, "percentage": 99.89, "elapsed_time": "4:13:09", "remaining_time": "0:00:16"}
95
+ {"current_steps": 921, "total_steps": 921, "eval_loss": 0.6120628714561462, "epoch": 2.992688870836718, "percentage": 100.0, "elapsed_time": "4:16:16", "remaining_time": "0:00:00"}
96
+ {"current_steps": 921, "total_steps": 921, "epoch": 2.992688870836718, "percentage": 100.0, "elapsed_time": "4:17:12", "remaining_time": "0:00:00"}
training_args.bin.sagemaker-uploaded ADDED
File without changes