Training in progress, step 200
Browse files- .ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb +68 -106
- fine-tune-whisper-streaming.ipynb +0 -0
- pytorch_model.bin +1 -1
- runs/Dec12_00-48-47_129-213-26-143/1670806154.5337327/events.out.tfevents.1670806154.129-213-26-143.128161.1 +3 -0
- runs/Dec12_00-48-47_129-213-26-143/events.out.tfevents.1670806154.129-213-26-143.128161.0 +3 -0
- runs/Dec12_01-54-52_129-213-26-143/1670810104.398764/events.out.tfevents.1670810104.129-213-26-143.742932.1 +3 -0
- runs/Dec12_01-54-52_129-213-26-143/events.out.tfevents.1670810104.129-213-26-143.742932.0 +3 -0
- runs/Dec12_02-00-57_129-213-26-143/1670810506.3012445/events.out.tfevents.1670810506.129-213-26-143.742932.3 +3 -0
- runs/Dec12_02-00-57_129-213-26-143/events.out.tfevents.1670810506.129-213-26-143.742932.2 +3 -0
- training_args.bin +1 -1
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb
CHANGED
@@ -142,74 +142,21 @@
|
|
142 |
},
|
143 |
{
|
144 |
"cell_type": "code",
|
145 |
-
"execution_count":
|
146 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
|
147 |
"metadata": {},
|
148 |
-
"outputs": [
|
149 |
-
{
|
150 |
-
"data": {
|
151 |
-
"application/vnd.jupyter.widget-view+json": {
|
152 |
-
"model_id": "ecce3a630cdb4ebab217a88a0163b257",
|
153 |
-
"version_major": 2,
|
154 |
-
"version_minor": 0
|
155 |
-
},
|
156 |
-
"text/plain": [
|
157 |
-
"Downloading builder script: 0%| | 0.00/8.30k [00:00<?, ?B/s]"
|
158 |
-
]
|
159 |
-
},
|
160 |
-
"metadata": {},
|
161 |
-
"output_type": "display_data"
|
162 |
-
},
|
163 |
-
{
|
164 |
-
"data": {
|
165 |
-
"application/vnd.jupyter.widget-view+json": {
|
166 |
-
"model_id": "b0141b068f944775867034bc494f88d7",
|
167 |
-
"version_major": 2,
|
168 |
-
"version_minor": 0
|
169 |
-
},
|
170 |
-
"text/plain": [
|
171 |
-
"Downloading readme: 0%| | 0.00/12.2k [00:00<?, ?B/s]"
|
172 |
-
]
|
173 |
-
},
|
174 |
-
"metadata": {},
|
175 |
-
"output_type": "display_data"
|
176 |
-
},
|
177 |
-
{
|
178 |
-
"data": {
|
179 |
-
"application/vnd.jupyter.widget-view+json": {
|
180 |
-
"model_id": "9dd1f4ded47c4160b55f1bcedce2694f",
|
181 |
-
"version_major": 2,
|
182 |
-
"version_minor": 0
|
183 |
-
},
|
184 |
-
"text/plain": [
|
185 |
-
"Downloading extra modules: 0%| | 0.00/3.44k [00:00<?, ?B/s]"
|
186 |
-
]
|
187 |
-
},
|
188 |
-
"metadata": {},
|
189 |
-
"output_type": "display_data"
|
190 |
-
},
|
191 |
-
{
|
192 |
-
"data": {
|
193 |
-
"application/vnd.jupyter.widget-view+json": {
|
194 |
-
"model_id": "a442da1e2a6b4271bae8ae0c655594b6",
|
195 |
-
"version_major": 2,
|
196 |
-
"version_minor": 0
|
197 |
-
},
|
198 |
-
"text/plain": [
|
199 |
-
"Downloading extra modules: 0%| | 0.00/60.9k [00:00<?, ?B/s]"
|
200 |
-
]
|
201 |
-
},
|
202 |
-
"metadata": {},
|
203 |
-
"output_type": "display_data"
|
204 |
-
}
|
205 |
-
],
|
206 |
"source": [
|
207 |
"from datasets import IterableDatasetDict\n",
|
208 |
"\n",
|
209 |
"raw_datasets = IterableDatasetDict()\n",
|
210 |
"\n",
|
211 |
-
"raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-
|
212 |
-
"raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-
|
|
|
|
|
|
|
|
|
213 |
]
|
214 |
},
|
215 |
{
|
@@ -242,14 +189,16 @@
|
|
242 |
},
|
243 |
{
|
244 |
"cell_type": "code",
|
245 |
-
"execution_count":
|
246 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
|
247 |
-
"metadata": {
|
|
|
|
|
248 |
"outputs": [
|
249 |
{
|
250 |
"data": {
|
251 |
"application/vnd.jupyter.widget-view+json": {
|
252 |
-
"model_id": "
|
253 |
"version_major": 2,
|
254 |
"version_minor": 0
|
255 |
},
|
@@ -263,7 +212,7 @@
|
|
263 |
{
|
264 |
"data": {
|
265 |
"application/vnd.jupyter.widget-view+json": {
|
266 |
-
"model_id": "
|
267 |
"version_major": 2,
|
268 |
"version_minor": 0
|
269 |
},
|
@@ -277,7 +226,7 @@
|
|
277 |
{
|
278 |
"data": {
|
279 |
"application/vnd.jupyter.widget-view+json": {
|
280 |
-
"model_id": "
|
281 |
"version_major": 2,
|
282 |
"version_minor": 0
|
283 |
},
|
@@ -291,7 +240,7 @@
|
|
291 |
{
|
292 |
"data": {
|
293 |
"application/vnd.jupyter.widget-view+json": {
|
294 |
-
"model_id": "
|
295 |
"version_major": 2,
|
296 |
"version_minor": 0
|
297 |
},
|
@@ -305,7 +254,7 @@
|
|
305 |
{
|
306 |
"data": {
|
307 |
"application/vnd.jupyter.widget-view+json": {
|
308 |
-
"model_id": "
|
309 |
"version_major": 2,
|
310 |
"version_minor": 0
|
311 |
},
|
@@ -319,7 +268,7 @@
|
|
319 |
{
|
320 |
"data": {
|
321 |
"application/vnd.jupyter.widget-view+json": {
|
322 |
-
"model_id": "
|
323 |
"version_major": 2,
|
324 |
"version_minor": 0
|
325 |
},
|
@@ -333,7 +282,7 @@
|
|
333 |
{
|
334 |
"data": {
|
335 |
"application/vnd.jupyter.widget-view+json": {
|
336 |
-
"model_id": "
|
337 |
"version_major": 2,
|
338 |
"version_minor": 0
|
339 |
},
|
@@ -369,7 +318,7 @@
|
|
369 |
},
|
370 |
{
|
371 |
"cell_type": "code",
|
372 |
-
"execution_count":
|
373 |
"id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
|
374 |
"metadata": {},
|
375 |
"outputs": [
|
@@ -389,7 +338,7 @@
|
|
389 |
" 'segment': Value(dtype='string', id=None)}"
|
390 |
]
|
391 |
},
|
392 |
-
"execution_count":
|
393 |
"metadata": {},
|
394 |
"output_type": "execute_result"
|
395 |
}
|
@@ -415,7 +364,7 @@
|
|
415 |
},
|
416 |
{
|
417 |
"cell_type": "code",
|
418 |
-
"execution_count":
|
419 |
"id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
|
420 |
"metadata": {},
|
421 |
"outputs": [],
|
@@ -435,7 +384,7 @@
|
|
435 |
},
|
436 |
{
|
437 |
"cell_type": "code",
|
438 |
-
"execution_count":
|
439 |
"id": "d041650e-1c48-4439-87b3-5b6f4a514107",
|
440 |
"metadata": {},
|
441 |
"outputs": [],
|
@@ -462,7 +411,7 @@
|
|
462 |
},
|
463 |
{
|
464 |
"cell_type": "code",
|
465 |
-
"execution_count":
|
466 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
467 |
"metadata": {},
|
468 |
"outputs": [],
|
@@ -498,7 +447,7 @@
|
|
498 |
},
|
499 |
{
|
500 |
"cell_type": "code",
|
501 |
-
"execution_count":
|
502 |
"id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
|
503 |
"metadata": {},
|
504 |
"outputs": [],
|
@@ -516,7 +465,7 @@
|
|
516 |
},
|
517 |
{
|
518 |
"cell_type": "code",
|
519 |
-
"execution_count":
|
520 |
"id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
|
521 |
"metadata": {},
|
522 |
"outputs": [],
|
@@ -537,7 +486,7 @@
|
|
537 |
},
|
538 |
{
|
539 |
"cell_type": "code",
|
540 |
-
"execution_count":
|
541 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
542 |
"metadata": {},
|
543 |
"outputs": [],
|
@@ -558,7 +507,7 @@
|
|
558 |
},
|
559 |
{
|
560 |
"cell_type": "code",
|
561 |
-
"execution_count":
|
562 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
563 |
"metadata": {},
|
564 |
"outputs": [],
|
@@ -628,7 +577,7 @@
|
|
628 |
},
|
629 |
{
|
630 |
"cell_type": "code",
|
631 |
-
"execution_count":
|
632 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
|
633 |
"metadata": {},
|
634 |
"outputs": [],
|
@@ -676,7 +625,7 @@
|
|
676 |
},
|
677 |
{
|
678 |
"cell_type": "code",
|
679 |
-
"execution_count":
|
680 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
|
681 |
"metadata": {},
|
682 |
"outputs": [],
|
@@ -703,14 +652,14 @@
|
|
703 |
},
|
704 |
{
|
705 |
"cell_type": "code",
|
706 |
-
"execution_count":
|
707 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
708 |
"metadata": {},
|
709 |
"outputs": [
|
710 |
{
|
711 |
"data": {
|
712 |
"application/vnd.jupyter.widget-view+json": {
|
713 |
-
"model_id": "
|
714 |
"version_major": 2,
|
715 |
"version_minor": 0
|
716 |
},
|
@@ -747,7 +696,7 @@
|
|
747 |
},
|
748 |
{
|
749 |
"cell_type": "code",
|
750 |
-
"execution_count":
|
751 |
"id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
|
752 |
"metadata": {},
|
753 |
"outputs": [],
|
@@ -780,10 +729,10 @@
|
|
780 |
},
|
781 |
{
|
782 |
"cell_type": "markdown",
|
783 |
-
"id": "
|
784 |
"metadata": {},
|
785 |
"source": [
|
786 |
-
"###
|
787 |
]
|
788 |
},
|
789 |
{
|
@@ -797,14 +746,14 @@
|
|
797 |
},
|
798 |
{
|
799 |
"cell_type": "code",
|
800 |
-
"execution_count":
|
801 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
802 |
"metadata": {},
|
803 |
"outputs": [
|
804 |
{
|
805 |
"data": {
|
806 |
"application/vnd.jupyter.widget-view+json": {
|
807 |
-
"model_id": "
|
808 |
"version_major": 2,
|
809 |
"version_minor": 0
|
810 |
},
|
@@ -818,7 +767,7 @@
|
|
818 |
{
|
819 |
"data": {
|
820 |
"application/vnd.jupyter.widget-view+json": {
|
821 |
-
"model_id": "
|
822 |
"version_major": 2,
|
823 |
"version_minor": 0
|
824 |
},
|
@@ -846,7 +795,7 @@
|
|
846 |
},
|
847 |
{
|
848 |
"cell_type": "code",
|
849 |
-
"execution_count":
|
850 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
|
851 |
"metadata": {},
|
852 |
"outputs": [],
|
@@ -874,7 +823,7 @@
|
|
874 |
},
|
875 |
{
|
876 |
"cell_type": "code",
|
877 |
-
"execution_count":
|
878 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
879 |
"metadata": {},
|
880 |
"outputs": [],
|
@@ -886,16 +835,16 @@
|
|
886 |
" per_device_train_batch_size=64,\n",
|
887 |
" gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
|
888 |
" learning_rate=1e-5,\n",
|
889 |
-
" warmup_steps=
|
890 |
-
" max_steps=
|
891 |
" gradient_checkpointing=True,\n",
|
892 |
" fp16=True,\n",
|
893 |
" evaluation_strategy=\"steps\",\n",
|
894 |
" per_device_eval_batch_size=8,\n",
|
895 |
" predict_with_generate=True,\n",
|
896 |
" generation_max_length=225,\n",
|
897 |
-
" save_steps=
|
898 |
-
" eval_steps=
|
899 |
" logging_steps=25,\n",
|
900 |
" report_to=[\"tensorboard\"],\n",
|
901 |
" load_best_model_at_end=True,\n",
|
@@ -924,7 +873,7 @@
|
|
924 |
},
|
925 |
{
|
926 |
"cell_type": "code",
|
927 |
-
"execution_count":
|
928 |
"id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
|
929 |
"metadata": {},
|
930 |
"outputs": [],
|
@@ -953,7 +902,7 @@
|
|
953 |
},
|
954 |
{
|
955 |
"cell_type": "code",
|
956 |
-
"execution_count":
|
957 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
958 |
"metadata": {},
|
959 |
"outputs": [
|
@@ -992,7 +941,7 @@
|
|
992 |
},
|
993 |
{
|
994 |
"cell_type": "code",
|
995 |
-
"execution_count":
|
996 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
997 |
"metadata": {},
|
998 |
"outputs": [
|
@@ -1040,7 +989,9 @@
|
|
1040 |
"cell_type": "code",
|
1041 |
"execution_count": null,
|
1042 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
1043 |
-
"metadata": {
|
|
|
|
|
1044 |
"outputs": [
|
1045 |
{
|
1046 |
"name": "stderr",
|
@@ -1049,14 +1000,14 @@
|
|
1049 |
"/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
1050 |
" warnings.warn(\n",
|
1051 |
"***** Running training *****\n",
|
1052 |
-
" Num examples =
|
1053 |
" Num Epochs = 9223372036854775807\n",
|
1054 |
" Instantaneous batch size per device = 64\n",
|
1055 |
" Total train batch size (w. parallel, distributed & accumulation) = 64\n",
|
1056 |
" Gradient Accumulation steps = 1\n",
|
1057 |
-
" Total optimization steps =
|
1058 |
" Number of trainable parameters = 241734912\n",
|
1059 |
-
"Reading metadata...:
|
1060 |
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
|
1061 |
]
|
1062 |
},
|
@@ -1066,8 +1017,8 @@
|
|
1066 |
"\n",
|
1067 |
" <div>\n",
|
1068 |
" \n",
|
1069 |
-
" <progress value='
|
1070 |
-
" [
|
1071 |
" </div>\n",
|
1072 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1073 |
" <thead>\n",
|
@@ -1087,6 +1038,17 @@
|
|
1087 |
},
|
1088 |
"metadata": {},
|
1089 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1090 |
}
|
1091 |
],
|
1092 |
"source": [
|
@@ -1181,9 +1143,9 @@
|
|
1181 |
],
|
1182 |
"metadata": {
|
1183 |
"kernelspec": {
|
1184 |
-
"display_name": "
|
1185 |
"language": "python",
|
1186 |
-
"name": "
|
1187 |
},
|
1188 |
"language_info": {
|
1189 |
"codemirror_mode": {
|
|
|
142 |
},
|
143 |
{
|
144 |
"cell_type": "code",
|
145 |
+
"execution_count": 3,
|
146 |
"id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
|
147 |
"metadata": {},
|
148 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
"source": [
|
150 |
"from datasets import IterableDatasetDict\n",
|
151 |
"\n",
|
152 |
"raw_datasets = IterableDatasetDict()\n",
|
153 |
"\n",
|
154 |
+
"raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"train\", use_auth_token=True)\n",
|
155 |
+
"raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"test\", use_auth_token=True)\n",
|
156 |
+
"\n",
|
157 |
+
"# zh-TW is low resource\n",
|
158 |
+
"# raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train+validation\", use_auth_token=True)\n",
|
159 |
+
"# raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
|
160 |
]
|
161 |
},
|
162 |
{
|
|
|
189 |
},
|
190 |
{
|
191 |
"cell_type": "code",
|
192 |
+
"execution_count": 4,
|
193 |
"id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
|
194 |
+
"metadata": {
|
195 |
+
"tags": []
|
196 |
+
},
|
197 |
"outputs": [
|
198 |
{
|
199 |
"data": {
|
200 |
"application/vnd.jupyter.widget-view+json": {
|
201 |
+
"model_id": "48ef23eaa9fb4d6ca621fd252befca48",
|
202 |
"version_major": 2,
|
203 |
"version_minor": 0
|
204 |
},
|
|
|
212 |
{
|
213 |
"data": {
|
214 |
"application/vnd.jupyter.widget-view+json": {
|
215 |
+
"model_id": "3087512d9e6642cf8ed5e9b07405b9ef",
|
216 |
"version_major": 2,
|
217 |
"version_minor": 0
|
218 |
},
|
|
|
226 |
{
|
227 |
"data": {
|
228 |
"application/vnd.jupyter.widget-view+json": {
|
229 |
+
"model_id": "37a576b26aa04522b786a919b955fd88",
|
230 |
"version_major": 2,
|
231 |
"version_minor": 0
|
232 |
},
|
|
|
240 |
{
|
241 |
"data": {
|
242 |
"application/vnd.jupyter.widget-view+json": {
|
243 |
+
"model_id": "f62d110f392d495094520328b181a119",
|
244 |
"version_major": 2,
|
245 |
"version_minor": 0
|
246 |
},
|
|
|
254 |
{
|
255 |
"data": {
|
256 |
"application/vnd.jupyter.widget-view+json": {
|
257 |
+
"model_id": "b03cdae0d93f4eacbc32508d5320c583",
|
258 |
"version_major": 2,
|
259 |
"version_minor": 0
|
260 |
},
|
|
|
268 |
{
|
269 |
"data": {
|
270 |
"application/vnd.jupyter.widget-view+json": {
|
271 |
+
"model_id": "fceca071ff53478e98a6c6ceff6647df",
|
272 |
"version_major": 2,
|
273 |
"version_minor": 0
|
274 |
},
|
|
|
282 |
{
|
283 |
"data": {
|
284 |
"application/vnd.jupyter.widget-view+json": {
|
285 |
+
"model_id": "4647b098c1034018b007c86dd0e4ea53",
|
286 |
"version_major": 2,
|
287 |
"version_minor": 0
|
288 |
},
|
|
|
318 |
},
|
319 |
{
|
320 |
"cell_type": "code",
|
321 |
+
"execution_count": 5,
|
322 |
"id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
|
323 |
"metadata": {},
|
324 |
"outputs": [
|
|
|
338 |
" 'segment': Value(dtype='string', id=None)}"
|
339 |
]
|
340 |
},
|
341 |
+
"execution_count": 5,
|
342 |
"metadata": {},
|
343 |
"output_type": "execute_result"
|
344 |
}
|
|
|
364 |
},
|
365 |
{
|
366 |
"cell_type": "code",
|
367 |
+
"execution_count": 6,
|
368 |
"id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
|
369 |
"metadata": {},
|
370 |
"outputs": [],
|
|
|
384 |
},
|
385 |
{
|
386 |
"cell_type": "code",
|
387 |
+
"execution_count": 7,
|
388 |
"id": "d041650e-1c48-4439-87b3-5b6f4a514107",
|
389 |
"metadata": {},
|
390 |
"outputs": [],
|
|
|
411 |
},
|
412 |
{
|
413 |
"cell_type": "code",
|
414 |
+
"execution_count": 8,
|
415 |
"id": "c085911c-a10a-41ef-8874-306e0503e9bb",
|
416 |
"metadata": {},
|
417 |
"outputs": [],
|
|
|
447 |
},
|
448 |
{
|
449 |
"cell_type": "code",
|
450 |
+
"execution_count": 9,
|
451 |
"id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
|
452 |
"metadata": {},
|
453 |
"outputs": [],
|
|
|
465 |
},
|
466 |
{
|
467 |
"cell_type": "code",
|
468 |
+
"execution_count": 10,
|
469 |
"id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
|
470 |
"metadata": {},
|
471 |
"outputs": [],
|
|
|
486 |
},
|
487 |
{
|
488 |
"cell_type": "code",
|
489 |
+
"execution_count": 11,
|
490 |
"id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
|
491 |
"metadata": {},
|
492 |
"outputs": [],
|
|
|
507 |
},
|
508 |
{
|
509 |
"cell_type": "code",
|
510 |
+
"execution_count": 12,
|
511 |
"id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
|
512 |
"metadata": {},
|
513 |
"outputs": [],
|
|
|
577 |
},
|
578 |
{
|
579 |
"cell_type": "code",
|
580 |
+
"execution_count": 13,
|
581 |
"id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
|
582 |
"metadata": {},
|
583 |
"outputs": [],
|
|
|
625 |
},
|
626 |
{
|
627 |
"cell_type": "code",
|
628 |
+
"execution_count": 14,
|
629 |
"id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
|
630 |
"metadata": {},
|
631 |
"outputs": [],
|
|
|
652 |
},
|
653 |
{
|
654 |
"cell_type": "code",
|
655 |
+
"execution_count": 15,
|
656 |
"id": "b22b4011-f31f-4b57-b684-c52332f92890",
|
657 |
"metadata": {},
|
658 |
"outputs": [
|
659 |
{
|
660 |
"data": {
|
661 |
"application/vnd.jupyter.widget-view+json": {
|
662 |
+
"model_id": "037e658a8e194212a068ba9eea85cf11",
|
663 |
"version_major": 2,
|
664 |
"version_minor": 0
|
665 |
},
|
|
|
696 |
},
|
697 |
{
|
698 |
"cell_type": "code",
|
699 |
+
"execution_count": 16,
|
700 |
"id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
|
701 |
"metadata": {},
|
702 |
"outputs": [],
|
|
|
729 |
},
|
730 |
{
|
731 |
"cell_type": "markdown",
|
732 |
+
"id": "e7ca118c",
|
733 |
"metadata": {},
|
734 |
"source": [
|
735 |
+
"### Load a Pre-Trained Checkpoint"
|
736 |
]
|
737 |
},
|
738 |
{
|
|
|
746 |
},
|
747 |
{
|
748 |
"cell_type": "code",
|
749 |
+
"execution_count": 17,
|
750 |
"id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
|
751 |
"metadata": {},
|
752 |
"outputs": [
|
753 |
{
|
754 |
"data": {
|
755 |
"application/vnd.jupyter.widget-view+json": {
|
756 |
+
"model_id": "d7c74c4e4fc3495393f4060c8d5e4bb2",
|
757 |
"version_major": 2,
|
758 |
"version_minor": 0
|
759 |
},
|
|
|
767 |
{
|
768 |
"data": {
|
769 |
"application/vnd.jupyter.widget-view+json": {
|
770 |
+
"model_id": "77b037a9877a417d88d6f1bdd975e034",
|
771 |
"version_major": 2,
|
772 |
"version_minor": 0
|
773 |
},
|
|
|
795 |
},
|
796 |
{
|
797 |
"cell_type": "code",
|
798 |
+
"execution_count": 18,
|
799 |
"id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
|
800 |
"metadata": {},
|
801 |
"outputs": [],
|
|
|
823 |
},
|
824 |
{
|
825 |
"cell_type": "code",
|
826 |
+
"execution_count": 19,
|
827 |
"id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
|
828 |
"metadata": {},
|
829 |
"outputs": [],
|
|
|
835 |
" per_device_train_batch_size=64,\n",
|
836 |
" gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
|
837 |
" learning_rate=1e-5,\n",
|
838 |
+
" warmup_steps=200,\n",
|
839 |
+
" max_steps=2000,\n",
|
840 |
" gradient_checkpointing=True,\n",
|
841 |
" fp16=True,\n",
|
842 |
" evaluation_strategy=\"steps\",\n",
|
843 |
" per_device_eval_batch_size=8,\n",
|
844 |
" predict_with_generate=True,\n",
|
845 |
" generation_max_length=225,\n",
|
846 |
+
" save_steps=400,\n",
|
847 |
+
" eval_steps=200,\n",
|
848 |
" logging_steps=25,\n",
|
849 |
" report_to=[\"tensorboard\"],\n",
|
850 |
" load_best_model_at_end=True,\n",
|
|
|
873 |
},
|
874 |
{
|
875 |
"cell_type": "code",
|
876 |
+
"execution_count": 20,
|
877 |
"id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
|
878 |
"metadata": {},
|
879 |
"outputs": [],
|
|
|
902 |
},
|
903 |
{
|
904 |
"cell_type": "code",
|
905 |
+
"execution_count": 21,
|
906 |
"id": "d546d7fe-0543-479a-b708-2ebabec19493",
|
907 |
"metadata": {},
|
908 |
"outputs": [
|
|
|
941 |
},
|
942 |
{
|
943 |
"cell_type": "code",
|
944 |
+
"execution_count": 22,
|
945 |
"id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
|
946 |
"metadata": {},
|
947 |
"outputs": [
|
|
|
989 |
"cell_type": "code",
|
990 |
"execution_count": null,
|
991 |
"id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
|
992 |
+
"metadata": {
|
993 |
+
"scrolled": false
|
994 |
+
},
|
995 |
"outputs": [
|
996 |
{
|
997 |
"name": "stderr",
|
|
|
1000 |
"/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
1001 |
" warnings.warn(\n",
|
1002 |
"***** Running training *****\n",
|
1003 |
+
" Num examples = 128000\n",
|
1004 |
" Num Epochs = 9223372036854775807\n",
|
1005 |
" Instantaneous batch size per device = 64\n",
|
1006 |
" Total train batch size (w. parallel, distributed & accumulation) = 64\n",
|
1007 |
" Gradient Accumulation steps = 1\n",
|
1008 |
+
" Total optimization steps = 2000\n",
|
1009 |
" Number of trainable parameters = 241734912\n",
|
1010 |
+
"Reading metadata...: 29056it [00:00, 64790.22it/s]\n",
|
1011 |
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
|
1012 |
]
|
1013 |
},
|
|
|
1017 |
"\n",
|
1018 |
" <div>\n",
|
1019 |
" \n",
|
1020 |
+
" <progress value='201' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
1021 |
+
" [ 201/2000 22:53 < 3:26:59, 0.14 it/s, Epoch 0.10/9223372036854775807]\n",
|
1022 |
" </div>\n",
|
1023 |
" <table border=\"1\" class=\"dataframe\">\n",
|
1024 |
" <thead>\n",
|
|
|
1038 |
},
|
1039 |
"metadata": {},
|
1040 |
"output_type": "display_data"
|
1041 |
+
},
|
1042 |
+
{
|
1043 |
+
"name": "stderr",
|
1044 |
+
"output_type": "stream",
|
1045 |
+
"text": [
|
1046 |
+
"***** Running Evaluation *****\n",
|
1047 |
+
" Num examples: Unknown\n",
|
1048 |
+
" Batch size = 8\n",
|
1049 |
+
"Reading metadata...: 10581it [00:00, 28979.41it/s]\n",
|
1050 |
+
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
|
1051 |
+
]
|
1052 |
}
|
1053 |
],
|
1054 |
"source": [
|
|
|
1143 |
],
|
1144 |
"metadata": {
|
1145 |
"kernelspec": {
|
1146 |
+
"display_name": "wspsr",
|
1147 |
"language": "python",
|
1148 |
+
"name": "wspsr"
|
1149 |
},
|
1150 |
"language_info": {
|
1151 |
"codemirror_mode": {
|
fine-tune-whisper-streaming.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 967102601
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:706aa3238840d1df7b20ac7afc35ab74373d09f0af44c2bc541e341cc9b51771
|
3 |
size 967102601
|
runs/Dec12_00-48-47_129-213-26-143/1670806154.5337327/events.out.tfevents.1670806154.129-213-26-143.128161.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:643669bcfb9cfe899fda5786ba8948b49d4519704908d55deef9c50c4a18078b
|
3 |
+
size 5864
|
runs/Dec12_00-48-47_129-213-26-143/events.out.tfevents.1670806154.129-213-26-143.128161.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f260375523d8a81a246006bb8a79ba6533543f960b74a6267c68bd3525d8b722
|
3 |
+
size 5526
|
runs/Dec12_01-54-52_129-213-26-143/1670810104.398764/events.out.tfevents.1670810104.129-213-26-143.742932.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d56cd00dda0c930668340dd283d136919ef21f16a0778d50c99995b33ea74128
|
3 |
+
size 5864
|
runs/Dec12_01-54-52_129-213-26-143/events.out.tfevents.1670810104.129-213-26-143.742932.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8008ff5de93c11cc5c83ead75026474e3dab32f10c8815e6b71fb9ddd18dfda4
|
3 |
+
size 4285
|
runs/Dec12_02-00-57_129-213-26-143/1670810506.3012445/events.out.tfevents.1670810506.129-213-26-143.742932.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cec5b063f93ebdbdb62ebff706f75eff9fc3ddb6b3ff8614e8a1ed748daee8a7
|
3 |
+
size 5864
|
runs/Dec12_02-00-57_129-213-26-143/events.out.tfevents.1670810506.129-213-26-143.742932.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75a75228382f5ea305d02e4c38f7f7105c0c081b96f141bc517ad9f89f2eb234
|
3 |
+
size 5844
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3579
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d97d92b4d57394382fb181e666c91764f7c97306ef64244dbb9caab670c3ac3
|
3 |
size 3579
|