Victor Pontis commited on
Commit
9a38756
1 Parent(s): e6b0c52

CLean up a bit

Browse files
Files changed (2) hide show
  1. create_handler.ipynb +21 -14
  2. handler.py +6 -10
create_handler.ipynb CHANGED
@@ -21,8 +21,9 @@
21
  }
22
  ],
23
  "source": [
24
- "%%writefile requirements.txt\n",
25
- "git+https://github.com/openai/whisper.git@8cf36f3508c9acd341a45eb2364239a3d81458b9"
 
26
  ]
27
  },
28
  {
@@ -65,7 +66,7 @@
65
  }
66
  ],
67
  "source": [
68
- "!wget https://cdn-media.huggingface.co/speech_samples/sample1.flac"
69
  ]
70
  },
71
  {
@@ -118,8 +119,8 @@
118
  }
119
  ],
120
  "source": [
121
- "%%writefile handler.py\n",
122
- "from typing import Dict\n",
123
  "from transformers.pipelines.audio_utils import ffmpeg_read\n",
124
  "import whisper\n",
125
  "import torch\n",
@@ -127,12 +128,10 @@
127
  "SAMPLE_RATE = 16000\n",
128
  "\n",
129
  "\n",
130
- "\n",
131
  "class EndpointHandler():\n",
132
  " def __init__(self, path=\"\"):\n",
133
  " # load the model\n",
134
- " self.model = whisper.load_model(\"medium\")\n",
135
- "\n",
136
  "\n",
137
  " def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:\n",
138
  " \"\"\"\n",
@@ -145,17 +144,25 @@
145
  " # process input\n",
146
  " inputs = data.pop(\"inputs\", data)\n",
147
  " audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)\n",
148
- " audio_tensor= torch.from_numpy(audio_nparray)\n",
149
- " \n",
150
  " # run inference pipeline\n",
151
  " result = self.model.transcribe(audio_nparray)\n",
152
  "\n",
153
  " print(\"Hi this is a custom log!\")\n",
154
  "\n",
155
  " # postprocess the prediction\n",
156
- " return { \"text\": result[\"text\"] }"
157
  ]
158
  },
 
 
 
 
 
 
 
 
 
159
  {
160
  "cell_type": "markdown",
161
  "metadata": {},
@@ -204,8 +211,7 @@
204
  "\n",
205
  "# file reader\n",
206
  "with open(\"sample1.flac\", \"rb\") as f:\n",
207
- " request = {\"inputs\": f.read()}\n",
208
- "\n",
209
  "\n",
210
  "# test the handler\n",
211
  "pred = my_handler(request)"
@@ -250,7 +256,8 @@
250
  "source": [
251
  "import json\n",
252
  "\n",
253
- "json.dumps({'transcription': \" going along slushy country roads and speaking to damp audiences in draughty school rooms day after day for a fortnight. He'll have to put in an appearance at some place of worship on Sunday morning, and he can come to us immediately afterwards.\"})"
 
254
  ]
255
  },
256
  {
 
21
  }
22
  ],
23
  "source": [
24
+ "% % writefile requirements.txt\n",
25
+ "git + https: // github.com / openai / whisper.git @ 8\n",
26
+ "cf36f3508c9acd341a45eb2364239a3d81458b9"
27
  ]
28
  },
29
  {
 
66
  }
67
  ],
68
  "source": [
69
+ "!wget https: // cdn-media.huggingface.co / speech_samples / sample1.flac"
70
  ]
71
  },
72
  {
 
119
  }
120
  ],
121
  "source": [
122
+ "% % writefile handler.py\n",
123
+ "from typing import Dict\n",
124
  "from transformers.pipelines.audio_utils import ffmpeg_read\n",
125
  "import whisper\n",
126
  "import torch\n",
 
128
  "SAMPLE_RATE = 16000\n",
129
  "\n",
130
  "\n",
 
131
  "class EndpointHandler():\n",
132
  " def __init__(self, path=\"\"):\n",
133
  " # load the model\n",
134
+ " self.model = whisper.load_model(\"small\")\n",
 
135
  "\n",
136
  " def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:\n",
137
  " \"\"\"\n",
 
144
  " # process input\n",
145
  " inputs = data.pop(\"inputs\", data)\n",
146
  " audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)\n",
147
+ "\n",
 
148
  " # run inference pipeline\n",
149
  " result = self.model.transcribe(audio_nparray)\n",
150
  "\n",
151
  " print(\"Hi this is a custom log!\")\n",
152
  "\n",
153
  " # postprocess the prediction\n",
154
+ " return {\"text\": result[\"text\"]}"
155
  ]
156
  },
157
+ {
158
+ "cell_type": "code",
159
+ "execution_count": null,
160
+ "outputs": [],
161
+ "source": [],
162
+ "metadata": {
163
+ "collapsed": false
164
+ }
165
+ },
166
  {
167
  "cell_type": "markdown",
168
  "metadata": {},
 
211
  "\n",
212
  "# file reader\n",
213
  "with open(\"sample1.flac\", \"rb\") as f:\n",
214
+ " request = {\"inputs\": f.read()}\n",
 
215
  "\n",
216
  "# test the handler\n",
217
  "pred = my_handler(request)"
 
256
  "source": [
257
  "import json\n",
258
  "\n",
259
+ "json.dumps({\n",
260
+ " 'transcription': \" going along slushy country roads and speaking to damp audiences in draughty school rooms day after day for a fortnight. He'll have to put in an appearance at some place of worship on Sunday morning, and he can come to us immediately afterwards.\"})"
261
  ]
262
  },
263
  {
handler.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Dict
2
  from transformers.pipelines.audio_utils import ffmpeg_read
3
  import whisper
4
  import torch
@@ -6,12 +6,9 @@ import torch
6
  SAMPLE_RATE = 16000
7
 
8
 
9
-
10
- class EndpointHandler():
11
  def __init__(self, path=""):
12
- # load the model
13
- self.model = whisper.load_model("medium")
14
-
15
 
16
  def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
17
  """
@@ -24,12 +21,11 @@ class EndpointHandler():
24
  # process input
25
  inputs = data.pop("inputs", data)
26
  audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
27
- audio_tensor= torch.from_numpy(audio_nparray)
28
-
29
  # run inference pipeline
30
  result = self.model.transcribe(audio_nparray)
31
-
32
  print("Hi this is a custom log!")
33
 
34
  # postprocess the prediction
35
- return { "text": result["text"] }
 
1
+ from typing import Dict
2
  from transformers.pipelines.audio_utils import ffmpeg_read
3
  import whisper
4
  import torch
 
6
  SAMPLE_RATE = 16000
7
 
8
 
9
+ class EndpointHandler:
 
10
  def __init__(self, path=""):
11
+ self.model = whisper.load_model("small")
 
 
12
 
13
  def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
14
  """
 
21
  # process input
22
  inputs = data.pop("inputs", data)
23
  audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
24
+
 
25
  # run inference pipeline
26
  result = self.model.transcribe(audio_nparray)
27
+
28
  print("Hi this is a custom log!")
29
 
30
  # postprocess the prediction
31
+ return {"text": result["text"]}