Victor Pontis
commited on
Commit
•
9a38756
1
Parent(s):
e6b0c52
CLean up a bit
Browse files- create_handler.ipynb +21 -14
- handler.py +6 -10
create_handler.ipynb
CHANGED
@@ -21,8 +21,9 @@
|
|
21 |
}
|
22 |
],
|
23 |
"source": [
|
24 |
-
"
|
25 |
-
"git+https
|
|
|
26 |
]
|
27 |
},
|
28 |
{
|
@@ -65,7 +66,7 @@
|
|
65 |
}
|
66 |
],
|
67 |
"source": [
|
68 |
-
"!wget https
|
69 |
]
|
70 |
},
|
71 |
{
|
@@ -118,8 +119,8 @@
|
|
118 |
}
|
119 |
],
|
120 |
"source": [
|
121 |
-
"
|
122 |
-
"from typing import
|
123 |
"from transformers.pipelines.audio_utils import ffmpeg_read\n",
|
124 |
"import whisper\n",
|
125 |
"import torch\n",
|
@@ -127,12 +128,10 @@
|
|
127 |
"SAMPLE_RATE = 16000\n",
|
128 |
"\n",
|
129 |
"\n",
|
130 |
-
"\n",
|
131 |
"class EndpointHandler():\n",
|
132 |
" def __init__(self, path=\"\"):\n",
|
133 |
" # load the model\n",
|
134 |
-
" self.model = whisper.load_model(\"
|
135 |
-
"\n",
|
136 |
"\n",
|
137 |
" def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:\n",
|
138 |
" \"\"\"\n",
|
@@ -145,17 +144,25 @@
|
|
145 |
" # process input\n",
|
146 |
" inputs = data.pop(\"inputs\", data)\n",
|
147 |
" audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)\n",
|
148 |
-
"
|
149 |
-
" \n",
|
150 |
" # run inference pipeline\n",
|
151 |
" result = self.model.transcribe(audio_nparray)\n",
|
152 |
"\n",
|
153 |
" print(\"Hi this is a custom log!\")\n",
|
154 |
"\n",
|
155 |
" # postprocess the prediction\n",
|
156 |
-
" return {
|
157 |
]
|
158 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
{
|
160 |
"cell_type": "markdown",
|
161 |
"metadata": {},
|
@@ -204,8 +211,7 @@
|
|
204 |
"\n",
|
205 |
"# file reader\n",
|
206 |
"with open(\"sample1.flac\", \"rb\") as f:\n",
|
207 |
-
"
|
208 |
-
"\n",
|
209 |
"\n",
|
210 |
"# test the handler\n",
|
211 |
"pred = my_handler(request)"
|
@@ -250,7 +256,8 @@
|
|
250 |
"source": [
|
251 |
"import json\n",
|
252 |
"\n",
|
253 |
-
"json.dumps({
|
|
|
254 |
]
|
255 |
},
|
256 |
{
|
|
|
21 |
}
|
22 |
],
|
23 |
"source": [
|
24 |
+
"% % writefile requirements.txt\n",
|
25 |
+
"git + https: // github.com / openai / whisper.git @ 8\n",
|
26 |
+
"cf36f3508c9acd341a45eb2364239a3d81458b9"
|
27 |
]
|
28 |
},
|
29 |
{
|
|
|
66 |
}
|
67 |
],
|
68 |
"source": [
|
69 |
+
"!wget https: // cdn-media.huggingface.co / speech_samples / sample1.flac"
|
70 |
]
|
71 |
},
|
72 |
{
|
|
|
119 |
}
|
120 |
],
|
121 |
"source": [
|
122 |
+
"% % writefile handler.py\n",
|
123 |
+
"from typing import Dict\n",
|
124 |
"from transformers.pipelines.audio_utils import ffmpeg_read\n",
|
125 |
"import whisper\n",
|
126 |
"import torch\n",
|
|
|
128 |
"SAMPLE_RATE = 16000\n",
|
129 |
"\n",
|
130 |
"\n",
|
|
|
131 |
"class EndpointHandler():\n",
|
132 |
" def __init__(self, path=\"\"):\n",
|
133 |
" # load the model\n",
|
134 |
+
" self.model = whisper.load_model(\"small\")\n",
|
|
|
135 |
"\n",
|
136 |
" def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:\n",
|
137 |
" \"\"\"\n",
|
|
|
144 |
" # process input\n",
|
145 |
" inputs = data.pop(\"inputs\", data)\n",
|
146 |
" audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)\n",
|
147 |
+
"\n",
|
|
|
148 |
" # run inference pipeline\n",
|
149 |
" result = self.model.transcribe(audio_nparray)\n",
|
150 |
"\n",
|
151 |
" print(\"Hi this is a custom log!\")\n",
|
152 |
"\n",
|
153 |
" # postprocess the prediction\n",
|
154 |
+
" return {\"text\": result[\"text\"]}"
|
155 |
]
|
156 |
},
|
157 |
+
{
|
158 |
+
"cell_type": "code",
|
159 |
+
"execution_count": null,
|
160 |
+
"outputs": [],
|
161 |
+
"source": [],
|
162 |
+
"metadata": {
|
163 |
+
"collapsed": false
|
164 |
+
}
|
165 |
+
},
|
166 |
{
|
167 |
"cell_type": "markdown",
|
168 |
"metadata": {},
|
|
|
211 |
"\n",
|
212 |
"# file reader\n",
|
213 |
"with open(\"sample1.flac\", \"rb\") as f:\n",
|
214 |
+
" request = {\"inputs\": f.read()}\n",
|
|
|
215 |
"\n",
|
216 |
"# test the handler\n",
|
217 |
"pred = my_handler(request)"
|
|
|
256 |
"source": [
|
257 |
"import json\n",
|
258 |
"\n",
|
259 |
+
"json.dumps({\n",
|
260 |
+
" 'transcription': \" going along slushy country roads and speaking to damp audiences in draughty school rooms day after day for a fortnight. He'll have to put in an appearance at some place of worship on Sunday morning, and he can come to us immediately afterwards.\"})"
|
261 |
]
|
262 |
},
|
263 |
{
|
handler.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from typing import
|
2 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
3 |
import whisper
|
4 |
import torch
|
@@ -6,12 +6,9 @@ import torch
|
|
6 |
SAMPLE_RATE = 16000
|
7 |
|
8 |
|
9 |
-
|
10 |
-
class EndpointHandler():
|
11 |
def __init__(self, path=""):
|
12 |
-
|
13 |
-
self.model = whisper.load_model("medium")
|
14 |
-
|
15 |
|
16 |
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
|
17 |
"""
|
@@ -24,12 +21,11 @@ class EndpointHandler():
|
|
24 |
# process input
|
25 |
inputs = data.pop("inputs", data)
|
26 |
audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
|
27 |
-
|
28 |
-
|
29 |
# run inference pipeline
|
30 |
result = self.model.transcribe(audio_nparray)
|
31 |
-
|
32 |
print("Hi this is a custom log!")
|
33 |
|
34 |
# postprocess the prediction
|
35 |
-
return {
|
|
|
1 |
+
from typing import Dict
|
2 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
3 |
import whisper
|
4 |
import torch
|
|
|
6 |
SAMPLE_RATE = 16000
|
7 |
|
8 |
|
9 |
+
class EndpointHandler:
|
|
|
10 |
def __init__(self, path=""):
|
11 |
+
self.model = whisper.load_model("small")
|
|
|
|
|
12 |
|
13 |
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
|
14 |
"""
|
|
|
21 |
# process input
|
22 |
inputs = data.pop("inputs", data)
|
23 |
audio_nparray = ffmpeg_read(inputs, SAMPLE_RATE)
|
24 |
+
|
|
|
25 |
# run inference pipeline
|
26 |
result = self.model.transcribe(audio_nparray)
|
27 |
+
|
28 |
print("Hi this is a custom log!")
|
29 |
|
30 |
# postprocess the prediction
|
31 |
+
return {"text": result["text"]}
|