kimbochen commited on
Commit
4b52f11
1 Parent(s): e393db1

Training in progress, step 200

Browse files
.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb CHANGED
@@ -142,74 +142,21 @@
142
  },
143
  {
144
  "cell_type": "code",
145
- "execution_count": 2,
146
  "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
147
  "metadata": {},
148
- "outputs": [
149
- {
150
- "data": {
151
- "application/vnd.jupyter.widget-view+json": {
152
- "model_id": "ecce3a630cdb4ebab217a88a0163b257",
153
- "version_major": 2,
154
- "version_minor": 0
155
- },
156
- "text/plain": [
157
- "Downloading builder script: 0%| | 0.00/8.30k [00:00<?, ?B/s]"
158
- ]
159
- },
160
- "metadata": {},
161
- "output_type": "display_data"
162
- },
163
- {
164
- "data": {
165
- "application/vnd.jupyter.widget-view+json": {
166
- "model_id": "b0141b068f944775867034bc494f88d7",
167
- "version_major": 2,
168
- "version_minor": 0
169
- },
170
- "text/plain": [
171
- "Downloading readme: 0%| | 0.00/12.2k [00:00<?, ?B/s]"
172
- ]
173
- },
174
- "metadata": {},
175
- "output_type": "display_data"
176
- },
177
- {
178
- "data": {
179
- "application/vnd.jupyter.widget-view+json": {
180
- "model_id": "9dd1f4ded47c4160b55f1bcedce2694f",
181
- "version_major": 2,
182
- "version_minor": 0
183
- },
184
- "text/plain": [
185
- "Downloading extra modules: 0%| | 0.00/3.44k [00:00<?, ?B/s]"
186
- ]
187
- },
188
- "metadata": {},
189
- "output_type": "display_data"
190
- },
191
- {
192
- "data": {
193
- "application/vnd.jupyter.widget-view+json": {
194
- "model_id": "a442da1e2a6b4271bae8ae0c655594b6",
195
- "version_major": 2,
196
- "version_minor": 0
197
- },
198
- "text/plain": [
199
- "Downloading extra modules: 0%| | 0.00/60.9k [00:00<?, ?B/s]"
200
- ]
201
- },
202
- "metadata": {},
203
- "output_type": "display_data"
204
- }
205
- ],
206
  "source": [
207
  "from datasets import IterableDatasetDict\n",
208
  "\n",
209
  "raw_datasets = IterableDatasetDict()\n",
210
  "\n",
211
- "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train\", use_auth_token=True) # set split=\"train+validation\" for low-resource\n",
212
- "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
 
 
 
 
213
  ]
214
  },
215
  {
@@ -242,14 +189,16 @@
242
  },
243
  {
244
  "cell_type": "code",
245
- "execution_count": 3,
246
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
247
- "metadata": {},
 
 
248
  "outputs": [
249
  {
250
  "data": {
251
  "application/vnd.jupyter.widget-view+json": {
252
- "model_id": "0d0c17f582474beebea009f021515946",
253
  "version_major": 2,
254
  "version_minor": 0
255
  },
@@ -263,7 +212,7 @@
263
  {
264
  "data": {
265
  "application/vnd.jupyter.widget-view+json": {
266
- "model_id": "9f48049fe65c4045ba74c6fac892945e",
267
  "version_major": 2,
268
  "version_minor": 0
269
  },
@@ -277,7 +226,7 @@
277
  {
278
  "data": {
279
  "application/vnd.jupyter.widget-view+json": {
280
- "model_id": "25615259dd364494bc5782b4e8231b05",
281
  "version_major": 2,
282
  "version_minor": 0
283
  },
@@ -291,7 +240,7 @@
291
  {
292
  "data": {
293
  "application/vnd.jupyter.widget-view+json": {
294
- "model_id": "6867564094bf4c7d82d0046dccb173fe",
295
  "version_major": 2,
296
  "version_minor": 0
297
  },
@@ -305,7 +254,7 @@
305
  {
306
  "data": {
307
  "application/vnd.jupyter.widget-view+json": {
308
- "model_id": "2cb3be77451542868602317c4d7eff85",
309
  "version_major": 2,
310
  "version_minor": 0
311
  },
@@ -319,7 +268,7 @@
319
  {
320
  "data": {
321
  "application/vnd.jupyter.widget-view+json": {
322
- "model_id": "6dfc5dedce13459bbac6f2d695695ae0",
323
  "version_major": 2,
324
  "version_minor": 0
325
  },
@@ -333,7 +282,7 @@
333
  {
334
  "data": {
335
  "application/vnd.jupyter.widget-view+json": {
336
- "model_id": "944cb945f9dd47178ab22d418aa2934b",
337
  "version_major": 2,
338
  "version_minor": 0
339
  },
@@ -369,7 +318,7 @@
369
  },
370
  {
371
  "cell_type": "code",
372
- "execution_count": 4,
373
  "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
374
  "metadata": {},
375
  "outputs": [
@@ -389,7 +338,7 @@
389
  " 'segment': Value(dtype='string', id=None)}"
390
  ]
391
  },
392
- "execution_count": 4,
393
  "metadata": {},
394
  "output_type": "execute_result"
395
  }
@@ -415,7 +364,7 @@
415
  },
416
  {
417
  "cell_type": "code",
418
- "execution_count": 5,
419
  "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
420
  "metadata": {},
421
  "outputs": [],
@@ -435,7 +384,7 @@
435
  },
436
  {
437
  "cell_type": "code",
438
- "execution_count": 6,
439
  "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
440
  "metadata": {},
441
  "outputs": [],
@@ -462,7 +411,7 @@
462
  },
463
  {
464
  "cell_type": "code",
465
- "execution_count": 7,
466
  "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
467
  "metadata": {},
468
  "outputs": [],
@@ -498,7 +447,7 @@
498
  },
499
  {
500
  "cell_type": "code",
501
- "execution_count": 8,
502
  "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
503
  "metadata": {},
504
  "outputs": [],
@@ -516,7 +465,7 @@
516
  },
517
  {
518
  "cell_type": "code",
519
- "execution_count": 9,
520
  "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
521
  "metadata": {},
522
  "outputs": [],
@@ -537,7 +486,7 @@
537
  },
538
  {
539
  "cell_type": "code",
540
- "execution_count": 10,
541
  "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
542
  "metadata": {},
543
  "outputs": [],
@@ -558,7 +507,7 @@
558
  },
559
  {
560
  "cell_type": "code",
561
- "execution_count": 11,
562
  "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
563
  "metadata": {},
564
  "outputs": [],
@@ -628,7 +577,7 @@
628
  },
629
  {
630
  "cell_type": "code",
631
- "execution_count": 12,
632
  "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
633
  "metadata": {},
634
  "outputs": [],
@@ -676,7 +625,7 @@
676
  },
677
  {
678
  "cell_type": "code",
679
- "execution_count": 13,
680
  "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
681
  "metadata": {},
682
  "outputs": [],
@@ -703,14 +652,14 @@
703
  },
704
  {
705
  "cell_type": "code",
706
- "execution_count": 14,
707
  "id": "b22b4011-f31f-4b57-b684-c52332f92890",
708
  "metadata": {},
709
  "outputs": [
710
  {
711
  "data": {
712
  "application/vnd.jupyter.widget-view+json": {
713
- "model_id": "bafc0b31fe9a4d239eedc348d5521dfc",
714
  "version_major": 2,
715
  "version_minor": 0
716
  },
@@ -747,7 +696,7 @@
747
  },
748
  {
749
  "cell_type": "code",
750
- "execution_count": 15,
751
  "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
752
  "metadata": {},
753
  "outputs": [],
@@ -780,10 +729,10 @@
780
  },
781
  {
782
  "cell_type": "markdown",
783
- "id": "daf2a825-6d9f-4a23-b145-c37c0039075b",
784
  "metadata": {},
785
  "source": [
786
- "### Load a Pre-Trained Checkpoint"
787
  ]
788
  },
789
  {
@@ -797,14 +746,14 @@
797
  },
798
  {
799
  "cell_type": "code",
800
- "execution_count": 16,
801
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
802
  "metadata": {},
803
  "outputs": [
804
  {
805
  "data": {
806
  "application/vnd.jupyter.widget-view+json": {
807
- "model_id": "e1d5d79e596a416aa96bde21be6fb551",
808
  "version_major": 2,
809
  "version_minor": 0
810
  },
@@ -818,7 +767,7 @@
818
  {
819
  "data": {
820
  "application/vnd.jupyter.widget-view+json": {
821
- "model_id": "3d722a61d7a440479d0f5497a6200345",
822
  "version_major": 2,
823
  "version_minor": 0
824
  },
@@ -846,7 +795,7 @@
846
  },
847
  {
848
  "cell_type": "code",
849
- "execution_count": 17,
850
  "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
851
  "metadata": {},
852
  "outputs": [],
@@ -874,7 +823,7 @@
874
  },
875
  {
876
  "cell_type": "code",
877
- "execution_count": 18,
878
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
879
  "metadata": {},
880
  "outputs": [],
@@ -886,16 +835,16 @@
886
  " per_device_train_batch_size=64,\n",
887
  " gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
888
  " learning_rate=1e-5,\n",
889
- " warmup_steps=500,\n",
890
- " max_steps=5000,\n",
891
  " gradient_checkpointing=True,\n",
892
  " fp16=True,\n",
893
  " evaluation_strategy=\"steps\",\n",
894
  " per_device_eval_batch_size=8,\n",
895
  " predict_with_generate=True,\n",
896
  " generation_max_length=225,\n",
897
- " save_steps=1000,\n",
898
- " eval_steps=1000,\n",
899
  " logging_steps=25,\n",
900
  " report_to=[\"tensorboard\"],\n",
901
  " load_best_model_at_end=True,\n",
@@ -924,7 +873,7 @@
924
  },
925
  {
926
  "cell_type": "code",
927
- "execution_count": 19,
928
  "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
929
  "metadata": {},
930
  "outputs": [],
@@ -953,7 +902,7 @@
953
  },
954
  {
955
  "cell_type": "code",
956
- "execution_count": 20,
957
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
958
  "metadata": {},
959
  "outputs": [
@@ -992,7 +941,7 @@
992
  },
993
  {
994
  "cell_type": "code",
995
- "execution_count": 21,
996
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
997
  "metadata": {},
998
  "outputs": [
@@ -1040,7 +989,9 @@
1040
  "cell_type": "code",
1041
  "execution_count": null,
1042
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
1043
- "metadata": {},
 
 
1044
  "outputs": [
1045
  {
1046
  "name": "stderr",
@@ -1049,14 +1000,14 @@
1049
  "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
1050
  " warnings.warn(\n",
1051
  "***** Running training *****\n",
1052
- " Num examples = 320000\n",
1053
  " Num Epochs = 9223372036854775807\n",
1054
  " Instantaneous batch size per device = 64\n",
1055
  " Total train batch size (w. parallel, distributed & accumulation) = 64\n",
1056
  " Gradient Accumulation steps = 1\n",
1057
- " Total optimization steps = 5000\n",
1058
  " Number of trainable parameters = 241734912\n",
1059
- "Reading metadata...: 6568it [00:00, 41540.60it/s]\n",
1060
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1061
  ]
1062
  },
@@ -1066,8 +1017,8 @@
1066
  "\n",
1067
  " <div>\n",
1068
  " \n",
1069
- " <progress value='35' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1070
- " [ 35/5000 03:29 < 8:46:02, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
1071
  " </div>\n",
1072
  " <table border=\"1\" class=\"dataframe\">\n",
1073
  " <thead>\n",
@@ -1087,6 +1038,17 @@
1087
  },
1088
  "metadata": {},
1089
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
1090
  }
1091
  ],
1092
  "source": [
@@ -1181,9 +1143,9 @@
1181
  ],
1182
  "metadata": {
1183
  "kernelspec": {
1184
- "display_name": "Python 3 (ipykernel)",
1185
  "language": "python",
1186
- "name": "python3"
1187
  },
1188
  "language_info": {
1189
  "codemirror_mode": {
 
142
  },
143
  {
144
  "cell_type": "code",
145
+ "execution_count": 3,
146
  "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
147
  "metadata": {},
148
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "source": [
150
  "from datasets import IterableDatasetDict\n",
151
  "\n",
152
  "raw_datasets = IterableDatasetDict()\n",
153
  "\n",
154
+ "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"train\", use_auth_token=True)\n",
155
+ "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-CN\", split=\"test\", use_auth_token=True)\n",
156
+ "\n",
157
+ "# zh-TW is low resource\n",
158
+ "# raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train+validation\", use_auth_token=True)\n",
159
+ "# raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
160
  ]
161
  },
162
  {
 
189
  },
190
  {
191
  "cell_type": "code",
192
+ "execution_count": 4,
193
  "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
194
+ "metadata": {
195
+ "tags": []
196
+ },
197
  "outputs": [
198
  {
199
  "data": {
200
  "application/vnd.jupyter.widget-view+json": {
201
+ "model_id": "48ef23eaa9fb4d6ca621fd252befca48",
202
  "version_major": 2,
203
  "version_minor": 0
204
  },
 
212
  {
213
  "data": {
214
  "application/vnd.jupyter.widget-view+json": {
215
+ "model_id": "3087512d9e6642cf8ed5e9b07405b9ef",
216
  "version_major": 2,
217
  "version_minor": 0
218
  },
 
226
  {
227
  "data": {
228
  "application/vnd.jupyter.widget-view+json": {
229
+ "model_id": "37a576b26aa04522b786a919b955fd88",
230
  "version_major": 2,
231
  "version_minor": 0
232
  },
 
240
  {
241
  "data": {
242
  "application/vnd.jupyter.widget-view+json": {
243
+ "model_id": "f62d110f392d495094520328b181a119",
244
  "version_major": 2,
245
  "version_minor": 0
246
  },
 
254
  {
255
  "data": {
256
  "application/vnd.jupyter.widget-view+json": {
257
+ "model_id": "b03cdae0d93f4eacbc32508d5320c583",
258
  "version_major": 2,
259
  "version_minor": 0
260
  },
 
268
  {
269
  "data": {
270
  "application/vnd.jupyter.widget-view+json": {
271
+ "model_id": "fceca071ff53478e98a6c6ceff6647df",
272
  "version_major": 2,
273
  "version_minor": 0
274
  },
 
282
  {
283
  "data": {
284
  "application/vnd.jupyter.widget-view+json": {
285
+ "model_id": "4647b098c1034018b007c86dd0e4ea53",
286
  "version_major": 2,
287
  "version_minor": 0
288
  },
 
318
  },
319
  {
320
  "cell_type": "code",
321
+ "execution_count": 5,
322
  "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
323
  "metadata": {},
324
  "outputs": [
 
338
  " 'segment': Value(dtype='string', id=None)}"
339
  ]
340
  },
341
+ "execution_count": 5,
342
  "metadata": {},
343
  "output_type": "execute_result"
344
  }
 
364
  },
365
  {
366
  "cell_type": "code",
367
+ "execution_count": 6,
368
  "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
369
  "metadata": {},
370
  "outputs": [],
 
384
  },
385
  {
386
  "cell_type": "code",
387
+ "execution_count": 7,
388
  "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
389
  "metadata": {},
390
  "outputs": [],
 
411
  },
412
  {
413
  "cell_type": "code",
414
+ "execution_count": 8,
415
  "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
416
  "metadata": {},
417
  "outputs": [],
 
447
  },
448
  {
449
  "cell_type": "code",
450
+ "execution_count": 9,
451
  "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
452
  "metadata": {},
453
  "outputs": [],
 
465
  },
466
  {
467
  "cell_type": "code",
468
+ "execution_count": 10,
469
  "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
470
  "metadata": {},
471
  "outputs": [],
 
486
  },
487
  {
488
  "cell_type": "code",
489
+ "execution_count": 11,
490
  "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
491
  "metadata": {},
492
  "outputs": [],
 
507
  },
508
  {
509
  "cell_type": "code",
510
+ "execution_count": 12,
511
  "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
512
  "metadata": {},
513
  "outputs": [],
 
577
  },
578
  {
579
  "cell_type": "code",
580
+ "execution_count": 13,
581
  "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
582
  "metadata": {},
583
  "outputs": [],
 
625
  },
626
  {
627
  "cell_type": "code",
628
+ "execution_count": 14,
629
  "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
630
  "metadata": {},
631
  "outputs": [],
 
652
  },
653
  {
654
  "cell_type": "code",
655
+ "execution_count": 15,
656
  "id": "b22b4011-f31f-4b57-b684-c52332f92890",
657
  "metadata": {},
658
  "outputs": [
659
  {
660
  "data": {
661
  "application/vnd.jupyter.widget-view+json": {
662
+ "model_id": "037e658a8e194212a068ba9eea85cf11",
663
  "version_major": 2,
664
  "version_minor": 0
665
  },
 
696
  },
697
  {
698
  "cell_type": "code",
699
+ "execution_count": 16,
700
  "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
701
  "metadata": {},
702
  "outputs": [],
 
729
  },
730
  {
731
  "cell_type": "markdown",
732
+ "id": "e7ca118c",
733
  "metadata": {},
734
  "source": [
735
+ "### Load a Pre-Trained Checkpoint"
736
  ]
737
  },
738
  {
 
746
  },
747
  {
748
  "cell_type": "code",
749
+ "execution_count": 17,
750
  "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
751
  "metadata": {},
752
  "outputs": [
753
  {
754
  "data": {
755
  "application/vnd.jupyter.widget-view+json": {
756
+ "model_id": "d7c74c4e4fc3495393f4060c8d5e4bb2",
757
  "version_major": 2,
758
  "version_minor": 0
759
  },
 
767
  {
768
  "data": {
769
  "application/vnd.jupyter.widget-view+json": {
770
+ "model_id": "77b037a9877a417d88d6f1bdd975e034",
771
  "version_major": 2,
772
  "version_minor": 0
773
  },
 
795
  },
796
  {
797
  "cell_type": "code",
798
+ "execution_count": 18,
799
  "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
800
  "metadata": {},
801
  "outputs": [],
 
823
  },
824
  {
825
  "cell_type": "code",
826
+ "execution_count": 19,
827
  "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
828
  "metadata": {},
829
  "outputs": [],
 
835
  " per_device_train_batch_size=64,\n",
836
  " gradient_accumulation_steps=1, # increase by 2x for every 2x decrease in batch size\n",
837
  " learning_rate=1e-5,\n",
838
+ " warmup_steps=200,\n",
839
+ " max_steps=2000,\n",
840
  " gradient_checkpointing=True,\n",
841
  " fp16=True,\n",
842
  " evaluation_strategy=\"steps\",\n",
843
  " per_device_eval_batch_size=8,\n",
844
  " predict_with_generate=True,\n",
845
  " generation_max_length=225,\n",
846
+ " save_steps=400,\n",
847
+ " eval_steps=200,\n",
848
  " logging_steps=25,\n",
849
  " report_to=[\"tensorboard\"],\n",
850
  " load_best_model_at_end=True,\n",
 
873
  },
874
  {
875
  "cell_type": "code",
876
+ "execution_count": 20,
877
  "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
878
  "metadata": {},
879
  "outputs": [],
 
902
  },
903
  {
904
  "cell_type": "code",
905
+ "execution_count": 21,
906
  "id": "d546d7fe-0543-479a-b708-2ebabec19493",
907
  "metadata": {},
908
  "outputs": [
 
941
  },
942
  {
943
  "cell_type": "code",
944
+ "execution_count": 22,
945
  "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
946
  "metadata": {},
947
  "outputs": [
 
989
  "cell_type": "code",
990
  "execution_count": null,
991
  "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
992
+ "metadata": {
993
+ "scrolled": false
994
+ },
995
  "outputs": [
996
  {
997
  "name": "stderr",
 
1000
  "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
1001
  " warnings.warn(\n",
1002
  "***** Running training *****\n",
1003
+ " Num examples = 128000\n",
1004
  " Num Epochs = 9223372036854775807\n",
1005
  " Instantaneous batch size per device = 64\n",
1006
  " Total train batch size (w. parallel, distributed & accumulation) = 64\n",
1007
  " Gradient Accumulation steps = 1\n",
1008
+ " Total optimization steps = 2000\n",
1009
  " Number of trainable parameters = 241734912\n",
1010
+ "Reading metadata...: 29056it [00:00, 64790.22it/s]\n",
1011
  "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1012
  ]
1013
  },
 
1017
  "\n",
1018
  " <div>\n",
1019
  " \n",
1020
+ " <progress value='201' max='2000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
1021
+ " [ 201/2000 22:53 < 3:26:59, 0.14 it/s, Epoch 0.10/9223372036854775807]\n",
1022
  " </div>\n",
1023
  " <table border=\"1\" class=\"dataframe\">\n",
1024
  " <thead>\n",
 
1038
  },
1039
  "metadata": {},
1040
  "output_type": "display_data"
1041
+ },
1042
+ {
1043
+ "name": "stderr",
1044
+ "output_type": "stream",
1045
+ "text": [
1046
+ "***** Running Evaluation *****\n",
1047
+ " Num examples: Unknown\n",
1048
+ " Batch size = 8\n",
1049
+ "Reading metadata...: 10581it [00:00, 28979.41it/s]\n",
1050
+ "The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
1051
+ ]
1052
  }
1053
  ],
1054
  "source": [
 
1143
  ],
1144
  "metadata": {
1145
  "kernelspec": {
1146
+ "display_name": "wspsr",
1147
  "language": "python",
1148
+ "name": "wspsr"
1149
  },
1150
  "language_info": {
1151
  "codemirror_mode": {
fine-tune-whisper-streaming.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f194b437e01232adf390094500baf37653fed4bfba50c6514a2eb0cdcb74f5e
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:706aa3238840d1df7b20ac7afc35ab74373d09f0af44c2bc541e341cc9b51771
3
  size 967102601
runs/Dec12_00-48-47_129-213-26-143/1670806154.5337327/events.out.tfevents.1670806154.129-213-26-143.128161.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643669bcfb9cfe899fda5786ba8948b49d4519704908d55deef9c50c4a18078b
3
+ size 5864
runs/Dec12_00-48-47_129-213-26-143/events.out.tfevents.1670806154.129-213-26-143.128161.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f260375523d8a81a246006bb8a79ba6533543f960b74a6267c68bd3525d8b722
3
+ size 5526
runs/Dec12_01-54-52_129-213-26-143/1670810104.398764/events.out.tfevents.1670810104.129-213-26-143.742932.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56cd00dda0c930668340dd283d136919ef21f16a0778d50c99995b33ea74128
3
+ size 5864
runs/Dec12_01-54-52_129-213-26-143/events.out.tfevents.1670810104.129-213-26-143.742932.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8008ff5de93c11cc5c83ead75026474e3dab32f10c8815e6b71fb9ddd18dfda4
3
+ size 4285
runs/Dec12_02-00-57_129-213-26-143/1670810506.3012445/events.out.tfevents.1670810506.129-213-26-143.742932.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec5b063f93ebdbdb62ebff706f75eff9fc3ddb6b3ff8614e8a1ed748daee8a7
3
+ size 5864
runs/Dec12_02-00-57_129-213-26-143/events.out.tfevents.1670810506.129-213-26-143.742932.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a75228382f5ea305d02e4c38f7f7105c0c081b96f141bc517ad9f89f2eb234
3
+ size 5844
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dea523f572893e3c4ca713d3731c8194b372c3af7337897cc7ca69fa8dc28498
3
  size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d97d92b4d57394382fb181e666c91764f7c97306ef64244dbb9caab670c3ac3
3
  size 3579