Update README.md
Browse files
README.md
CHANGED
@@ -88,6 +88,18 @@ bpe分词:"vocab_size"=30000
|
|
88 |
[INFO|trainer.py:1638] 2022-12-03 21:44:00,184 >> Continuing training from epoch 107
|
89 |
[INFO|trainer.py:1639] 2022-12-03 21:44:00,184 >> Continuing training from global step 84500
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
{'loss': 8.0431, 'learning_rate': 4.970998635229893e-05, 'epoch': 0.64}
|
92 |
{'loss': 7.4867, 'learning_rate': 4.94158548637583e-05, 'epoch': 1.27}
|
93 |
{'loss': 7.322, 'learning_rate': 4.912172337521766e-05, 'epoch': 1.91}
|
@@ -99,27 +111,29 @@ bpe分词:"vocab_size"=30000
|
|
99 |
{'loss': 3.1592, 'learning_rate': 3.1413242976140055e-07, 'epoch': 214.74}
|
100 |
{'loss': 3.1625, 'learning_rate': 1.6706668549108195e-07, 'epoch': 215.37}
|
101 |
{'train_runtime': 72271.9602, 'train_samples_per_second': 28.222, 'train_steps_per_second': 2.352, 'train_loss': 1.7180436183842016, 'epoch': 216.0}
|
|
|
|
|
102 |
***** train metrics *****
|
103 |
-
epoch =
|
104 |
-
train_loss =
|
105 |
-
train_runtime = 20:
|
106 |
train_samples = 9443
|
107 |
-
train_samples_per_second =
|
108 |
-
train_steps_per_second =
|
109 |
-
12/
|
110 |
-
[INFO|trainer.py:2929] 2022-12-04
|
111 |
-
[INFO|trainer.py:2931] 2022-12-04
|
112 |
-
[INFO|trainer.py:2934] 2022-12-04
|
113 |
100%|██████████| 24/24 [00:07<00:00, 3.20it/s]
|
114 |
-
[INFO|modelcard.py:449] 2022-12-04
|
115 |
-
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.
|
116 |
***** eval metrics *****
|
117 |
-
epoch =
|
118 |
-
eval_accuracy =
|
119 |
-
eval_loss = 7.
|
120 |
-
eval_runtime = 0:00:07.
|
121 |
eval_samples = 283
|
122 |
-
eval_samples_per_second =
|
123 |
-
eval_steps_per_second = 3.
|
124 |
-
perplexity =
|
125 |
```
|
|
|
88 |
[INFO|trainer.py:1638] 2022-12-03 21:44:00,184 >> Continuing training from epoch 107
|
89 |
[INFO|trainer.py:1639] 2022-12-03 21:44:00,184 >> Continuing training from global step 84500
|
90 |
|
91 |
+
[INFO|trainer.py:1608] 2022-12-05 07:36:13,626 >> ***** Running training *****
|
92 |
+
[INFO|trainer.py:1609] 2022-12-05 07:36:13,626 >> Num examples = 9443
|
93 |
+
[INFO|trainer.py:1610] 2022-12-05 07:36:13,626 >> Num Epochs = 368
|
94 |
+
[INFO|trainer.py:1611] 2022-12-05 07:36:13,626 >> Instantaneous batch size per device = 12
|
95 |
+
[INFO|trainer.py:1612] 2022-12-05 07:36:13,626 >> Total train batch size (w. parallel, distributed & accumulation) = 12
|
96 |
+
[INFO|trainer.py:1613] 2022-12-05 07:36:13,626 >> Gradient Accumulation steps = 1
|
97 |
+
[INFO|trainer.py:1614] 2022-12-05 07:36:13,626 >> Total optimization steps = 289616
|
98 |
+
[INFO|trainer.py:1616] 2022-12-05 07:36:13,627 >> Number of trainable parameters = 124439808
|
99 |
+
[INFO|trainer.py:1637] 2022-12-05 07:36:13,628 >> Continuing training from checkpoint, will skip to saved global_step
|
100 |
+
[INFO|trainer.py:1638] 2022-12-05 07:36:13,628 >> Continuing training from epoch 255
|
101 |
+
[INFO|trainer.py:1639] 2022-12-05 07:36:13,628 >> Continuing training from global step 201000
|
102 |
+
|
103 |
{'loss': 8.0431, 'learning_rate': 4.970998635229893e-05, 'epoch': 0.64}
|
104 |
{'loss': 7.4867, 'learning_rate': 4.94158548637583e-05, 'epoch': 1.27}
|
105 |
{'loss': 7.322, 'learning_rate': 4.912172337521766e-05, 'epoch': 1.91}
|
|
|
111 |
{'loss': 3.1592, 'learning_rate': 3.1413242976140055e-07, 'epoch': 214.74}
|
112 |
{'loss': 3.1625, 'learning_rate': 1.6706668549108195e-07, 'epoch': 215.37}
|
113 |
{'train_runtime': 72271.9602, 'train_samples_per_second': 28.222, 'train_steps_per_second': 2.352, 'train_loss': 1.7180436183842016, 'epoch': 216.0}
|
114 |
+
{'loss': 2.7087, 'learning_rate': 4.2642671675598036e-08, 'epoch': 367.85}
|
115 |
+
{'train_runtime': 74859.0808, 'train_samples_per_second': 46.421, 'train_steps_per_second': 3.869, 'train_loss': 0.8725239146935282, 'epoch': 368.0}
|
116 |
***** train metrics *****
|
117 |
+
epoch = 368.0
|
118 |
+
train_loss = 0.8725
|
119 |
+
train_runtime = 20:47:39.08
|
120 |
train_samples = 9443
|
121 |
+
train_samples_per_second = 46.421
|
122 |
+
train_steps_per_second = 3.869
|
123 |
+
12/06/2022 04:23:55 - INFO - __main__ - *** Evaluate ***
|
124 |
+
[INFO|trainer.py:2929] 2022-12-06 04:23:55,953 >> ***** Running Evaluation *****
|
125 |
+
[INFO|trainer.py:2931] 2022-12-06 04:23:55,953 >> Num examples = 283
|
126 |
+
[INFO|trainer.py:2934] 2022-12-06 04:23:55,954 >> Batch size = 12
|
127 |
100%|██████████| 24/24 [00:07<00:00, 3.20it/s]
|
128 |
+
[INFO|modelcard.py:449] 2022-12-06 04:24:04,760 >> Dropping the following result as it does not have all the necessary fields:
|
129 |
+
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.19599206157122803}]}
|
130 |
***** eval metrics *****
|
131 |
+
epoch = 368.0
|
132 |
+
eval_accuracy = 0.196
|
133 |
+
eval_loss = 7.9524
|
134 |
+
eval_runtime = 0:00:07.87
|
135 |
eval_samples = 283
|
136 |
+
eval_samples_per_second = 35.94
|
137 |
+
eval_steps_per_second = 3.048
|
138 |
+
perplexity = 2842.2766
|
139 |
```
|