Training in progress, step 1000

Browse files

Files changed (5) hide show

.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb +419 -38
fine-tune-whisper-streaming.ipynb +142 -5
pytorch_model.bin +1 -1
runs/Dec10_02-58-52_129-213-89-27/1670641248.2035987/events.out.tfevents.1670641248.129-213-89-27.128858.1 +3 -0
runs/Dec10_02-58-52_129-213-89-27/events.out.tfevents.1670641248.129-213-89-27.128858.0 +3 -0

.ipynb_checkpoints/fine-tune-whisper-streaming-checkpoint.ipynb CHANGED Viewed

@@ -108,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
    "metadata": {},
    "outputs": [],
@@ -142,17 +142,74 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
    "metadata": {},
-   "outputs": [],
    "source": [
     "from datasets import IterableDatasetDict\n",
     "\n",
     "raw_datasets = IterableDatasetDict()\n",
     "\n",
-    "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"es\", split=\"train\", use_auth_token=True)  # set split=\"train+validation\" for low-resource\n",
-    "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"es\", split=\"test\", use_auth_token=True)"
    ]
   },
   {
@@ -185,14 +242,113 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
    "metadata": {},
-   "outputs": [],
    "source": [
     "from transformers import WhisperProcessor\n",
     "\n",
-    "processor = WhisperProcessor.from_pretrained(\"openai/whisper-small\", language=\"Spanish\", task=\"transcribe\")"
    ]
   },
   {
@@ -213,10 +369,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
    "metadata": {},
-   "outputs": [],
    "source": [
     "raw_datasets[\"train\"].features"
    ]
@@ -238,7 +415,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
    "metadata": {},
    "outputs": [],
@@ -258,7 +435,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
@@ -285,7 +462,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
@@ -321,7 +498,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
    "metadata": {},
    "outputs": [],
@@ -339,7 +516,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
    "metadata": {},
    "outputs": [],
@@ -360,7 +537,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
@@ -381,7 +558,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [],
@@ -451,7 +628,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {},
    "outputs": [],
@@ -499,7 +676,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {},
    "outputs": [],
@@ -526,10 +703,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {},
-   "outputs": [],
    "source": [
     "import evaluate\n",
     "\n",
@@ -555,7 +747,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
    "metadata": {},
    "outputs": [],
@@ -605,10 +797,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {},
-   "outputs": [],
    "source": [
     "from transformers import WhisperForConditionalGeneration\n",
     "\n",
@@ -625,7 +846,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {},
    "outputs": [],
@@ -653,7 +874,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
    "outputs": [],
@@ -703,7 +924,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
@@ -732,10 +953,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
-   "outputs": [],
    "source": [
     "from transformers import Seq2SeqTrainer\n",
     "\n",
@@ -761,10 +992,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
-   "outputs": [],
    "source": [
     "model.save_pretrained(training_args.output_dir)\n",
     "processor.save_pretrained(training_args.output_dir)"
@@ -797,7 +1041,54 @@
    "execution_count": null,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.train()"
    ]
@@ -824,7 +1115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
@@ -832,8 +1123,8 @@
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
-    "    \"language\": \"es\",\n",
-    "    \"model_name\": \"Whisper Small Es - Sanchit Gandhi\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",
@@ -850,10 +1141,100 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]
@@ -875,7 +1256,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.9"
   }
  },
  "nbformat": 4,

   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "id": "065a8cf7-e54f-4ac3-900e-609c80714fca",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ecce3a630cdb4ebab217a88a0163b257",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading builder script:   0%|          | 0.00/8.30k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b0141b068f944775867034bc494f88d7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading readme:   0%|          | 0.00/12.2k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9dd1f4ded47c4160b55f1bcedce2694f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading extra modules:   0%|          | 0.00/3.44k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a442da1e2a6b4271bae8ae0c655594b6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading extra modules:   0%|          | 0.00/60.9k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "from datasets import IterableDatasetDict\n",
     "\n",
     "raw_datasets = IterableDatasetDict()\n",
     "\n",
+    "raw_datasets[\"train\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"train\", use_auth_token=True)  # set split=\"train+validation\" for low-resource\n",
+    "raw_datasets[\"test\"] = load_streaming_dataset(\"mozilla-foundation/common_voice_11_0\", \"zh-TW\", split=\"test\", use_auth_token=True)"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0d0c17f582474beebea009f021515946",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/185k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9f48049fe65c4045ba74c6fac892945e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/829 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "25615259dd364494bc5782b4e8231b05",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/1.04M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6867564094bf4c7d82d0046dccb173fe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/494k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2cb3be77451542868602317c4d7eff85",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/52.7k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6dfc5dedce13459bbac6f2d695695ae0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/2.11k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "944cb945f9dd47178ab22d418aa2934b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/2.06k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "from transformers import WhisperProcessor\n",
     "\n",
+    "processor = WhisperProcessor.from_pretrained(\"openai/whisper-small\", language=\"chinese\", task=\"transcribe\")"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "ab5a13b4-9bd4-4aa0-aef2-b3de9b762988",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'client_id': Value(dtype='string', id=None),\n",
+       " 'path': Value(dtype='string', id=None),\n",
+       " 'audio': Audio(sampling_rate=48000, mono=True, decode=True, id=None),\n",
+       " 'sentence': Value(dtype='string', id=None),\n",
+       " 'up_votes': Value(dtype='int64', id=None),\n",
+       " 'down_votes': Value(dtype='int64', id=None),\n",
+       " 'age': Value(dtype='string', id=None),\n",
+       " 'gender': Value(dtype='string', id=None),\n",
+       " 'accent': Value(dtype='string', id=None),\n",
+       " 'locale': Value(dtype='string', id=None),\n",
+       " 'segment': Value(dtype='string', id=None)}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "raw_datasets[\"train\"].features"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "id": "3ab6a724-3d1e-478b-a9e9-d2f85feb6c39",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "d041650e-1c48-4439-87b3-5b6f4a514107",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "c085911c-a10a-41ef-8874-306e0503e9bb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "a37a7cdb-9013-427f-8de9-6a8d0e9dc684",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "1b145699-acfc-4b1d-93a2-a2ad3d62674c",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "id": "01cb25ef-4bb0-4325-9461-f59198acadf6",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "333f7f6e-6053-4d3b-8924-c733c79b82ac",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "b22b4011-f31f-4b57-b684-c52332f92890",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bafc0b31fe9a4d239eedc348d5521dfc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading builder script:   0%|          | 0.00/4.49k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "import evaluate\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "id": "a11d1bfc-9e28-460f-a287-72d8f7bc1acb",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e1d5d79e596a416aa96bde21be6fb551",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/1.97k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3d722a61d7a440479d0f5497a6200345",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading:   0%|          | 0.00/967M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "from transformers import WhisperForConditionalGeneration\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "id": "3ac16b62-b3c0-4c68-8f3d-9ecf471534b2",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/whisper-small-zh-tw/./ is already a clone of https://huggingface.co/kimbochen/whisper-small-zh-tw. Make sure you pull the latest changes with `repo.git_pull()`.\n",
+      "max_steps is given, it will override any value given in num_train_epochs\n",
+      "Using cuda_amp half precision backend\n"
+     ]
+    }
+   ],
    "source": [
     "from transformers import Seq2SeqTrainer\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "id": "a1ccb9ed-cbc8-4419-91c0-651e9424b672",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    }
+   ],
    "source": [
     "model.save_pretrained(training_args.output_dir)\n",
     "processor.save_pretrained(training_args.output_dir)"
    "execution_count": null,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  warnings.warn(\n",
+      "***** Running training *****\n",
+      "  Num examples = 320000\n",
+      "  Num Epochs = 9223372036854775807\n",
+      "  Instantaneous batch size per device = 64\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 5000\n",
+      "  Number of trainable parameters = 241734912\n",
+      "Reading metadata...: 6568it [00:00, 41540.60it/s]\n",
+      "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='29' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [  29/5000 02:50 < 8:42:35, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "trainer.train()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
+    "    \"language\": \"zh-TW\",\n",
+    "    \"model_name\": \"Whisper Small Chinese - Kimbo Chen\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
     "    \"tags\": \"whisper-event\",\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc59052a3b7f45b2b896c03763c79f57",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1c58442a44e84af9a6dff915e036de83",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file training_args.bin: 100%|##########| 3.50k/3.50k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "remote: Scanning LFS files for validity, may be slow...        \n",
+      "remote: LFS file scan complete.        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
+      "   2ee4cf3..214645d  main -> main\n",
+      "\n",
+      "Dropping the following result as it does not have all the necessary fields:\n",
+      "{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'zh-TW', 'split': 'test', 'args': 'zh-TW'}}\n",
+      "remote: ----------------------------------------------------------\u001b[0;31m        \n",
+      "remote: Sorry, your push was rejected during YAML metadata verification:        \n",
+      "remote: - Error: \"language[0]\" must only contain lowercase characters        \n",
+      "remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "remote: Please find the documentation at:        \n",
+      "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
+      " ! [remote rejected] main -> main (pre-receive hook declined)\n",
+      "error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
+      "\n",
+      "Error pushing update to the model card. Please read logs and retry.\n",
+      "$remote: ----------------------------------------------------------\u001b[0;31m        \n",
+      "remote: Sorry, your push was rejected during YAML metadata verification:        \n",
+      "remote: - Error: \"language[0]\" must only contain lowercase characters        \n",
+      "remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "remote: Please find the documentation at:        \n",
+      "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
+      " ! [remote rejected] main -> main (pre-receive hook declined)\n",
+      "error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/214645d6cd1f0e7ab6a65a854eec2e349529961c'"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,

fine-tune-whisper-streaming.ipynb CHANGED Viewed

@@ -1041,7 +1041,54 @@
    "execution_count": null,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.train()"
    ]
@@ -1068,7 +1115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
@@ -1076,7 +1123,7 @@
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
-    "    \"language\": \"\",\n",
     "    \"model_name\": \"Whisper Small Chinese - Kimbo Chen\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
@@ -1094,10 +1141,100 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
-   "outputs": [],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]

    "execution_count": null,
    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/.venv/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  warnings.warn(\n",
+      "***** Running training *****\n",
+      "  Num examples = 320000\n",
+      "  Num Epochs = 9223372036854775807\n",
+      "  Instantaneous batch size per device = 64\n",
+      "  Total train batch size (w. parallel, distributed & accumulation) = 64\n",
+      "  Gradient Accumulation steps = 1\n",
+      "  Total optimization steps = 5000\n",
+      "  Number of trainable parameters = 241734912\n",
+      "Reading metadata...: 6568it [00:00, 41540.60it/s]\n",
+      "The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`,  you can safely ignore this message.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='35' max='5000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [  35/5000 03:29 < 8:46:02, 0.16 it/s, Epoch 0.01/9223372036854775807]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "trainer.train()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
    "metadata": {},
    "outputs": [],
     "kwargs = {\n",
     "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
     "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
+    "    \"language\": \"zh-TW\",\n",
     "    \"model_name\": \"Whisper Small Chinese - Kimbo Chen\",  # a 'pretty' name for your model\n",
     "    \"finetuned_from\": \"openai/whisper-small\",\n",
     "    \"tasks\": \"automatic-speech-recognition\",\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Saving model checkpoint to ./\n",
+      "Configuration saved in ./config.json\n",
+      "Model weights saved in ./pytorch_model.bin\n",
+      "Feature extractor saved in ./preprocessor_config.json\n",
+      "tokenizer config file saved in ./tokenizer_config.json\n",
+      "Special tokens file saved in ./special_tokens_map.json\n",
+      "added tokens file saved in ./added_tokens.json\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc59052a3b7f45b2b896c03763c79f57",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file pytorch_model.bin:   0%|          | 32.0k/922M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1c58442a44e84af9a6dff915e036de83",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload file training_args.bin: 100%|##########| 3.50k/3.50k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "remote: Scanning LFS files for validity, may be slow...        \n",
+      "remote: LFS file scan complete.        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
+      "   2ee4cf3..214645d  main -> main\n",
+      "\n",
+      "Dropping the following result as it does not have all the necessary fields:\n",
+      "{'task': {'name': 'Automatic Speech Recognition', 'type': 'automatic-speech-recognition'}, 'dataset': {'name': 'Common Voice 11.0', 'type': 'mozilla-foundation/common_voice_11_0', 'config': 'zh-TW', 'split': 'test', 'args': 'zh-TW'}}\n",
+      "remote: ----------------------------------------------------------\u001b[0;31m        \n",
+      "remote: Sorry, your push was rejected during YAML metadata verification:        \n",
+      "remote: - Error: \"language[0]\" must only contain lowercase characters        \n",
+      "remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "remote: Please find the documentation at:        \n",
+      "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
+      " ! [remote rejected] main -> main (pre-receive hook declined)\n",
+      "error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
+      "\n",
+      "Error pushing update to the model card. Please read logs and retry.\n",
+      "$remote: ----------------------------------------------------------\u001b[0;31m        \n",
+      "remote: Sorry, your push was rejected during YAML metadata verification:        \n",
+      "remote: - Error: \"language[0]\" must only contain lowercase characters        \n",
+      "remote: - Error: \"language[0]\" with value \"zh-TW\" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters), or a special value like \"code\", \"multilingual\". If you want to use BCP-47 identifiers, you can specify them in language_bcp47.\u001b[0;32m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "remote: Please find the documentation at:        \n",
+      "remote: https://huggingface.co/docs/hub/model-cards#model-card-metadata\u001b[0;0m        \n",
+      "remote: ----------------------------------------------------------        \n",
+      "To https://huggingface.co/kimbochen/whisper-small-zh-tw\n",
+      " ! [remote rejected] main -> main (pre-receive hook declined)\n",
+      "error: failed to push some refs to 'https://huggingface.co/kimbochen/whisper-small-zh-tw'\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'https://huggingface.co/kimbochen/whisper-small-zh-tw/commit/214645d6cd1f0e7ab6a65a854eec2e349529961c'"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "trainer.push_to_hub(**kwargs)"
    ]

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0fc1b0188915501fc1066b2932bcedbe557ab656231371b7ea5278a28d488d6
 size 967102601

 version https://git-lfs.github.com/spec/v1
+oid sha256:68c37aa36016265b630dfcf67b6593ca65cefa6c6e939ab9dd790e2b04c9b56f
 size 967102601

runs/Dec10_02-58-52_129-213-89-27/1670641248.2035987/events.out.tfevents.1670641248.129-213-89-27.128858.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d7d176987bf05d49e50c322906f78e49182290133c788cbef513dd25194be99
+size 5863

runs/Dec10_02-58-52_129-213-89-27/events.out.tfevents.1670641248.129-213-89-27.128858.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57031bef3a05b71c381e0b0d76e9378fdb1bb7a416a15f96b5296653a4f5bb53
+size 10869