HF Endpoint handler added
#1
by
giulianopenido
- opened
- handler.py +46 -0
- tokenizer.json +0 -0
handler.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, List
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
|
6 |
+
from transformers.pipelines.audio_utils import ffmpeg_read
|
7 |
+
|
8 |
+
SAMPLE_RATE = 16000
|
9 |
+
|
10 |
+
|
11 |
+
class EndpointHandler:
|
12 |
+
def __init__(self, path=""):
|
13 |
+
torch_dtype = torch.float16
|
14 |
+
device = "cuda"
|
15 |
+
|
16 |
+
whisper_processor = WhisperProcessor.from_pretrained(path)
|
17 |
+
whisper_model = WhisperForConditionalGeneration.from_pretrained(
|
18 |
+
path,
|
19 |
+
torch_dtype=torch_dtype,
|
20 |
+
).to("cuda")
|
21 |
+
|
22 |
+
self.asr_pipeline = pipeline(
|
23 |
+
"automatic-speech-recognition",
|
24 |
+
model=whisper_model,
|
25 |
+
tokenizer=whisper_processor.tokenizer,
|
26 |
+
feature_extractor=whisper_processor.feature_extractor,
|
27 |
+
chunk_length_s=30,
|
28 |
+
torch_dtype=torch_dtype,
|
29 |
+
device=device,
|
30 |
+
model_kwargs={"language": "pt"},
|
31 |
+
)
|
32 |
+
|
33 |
+
def parse_audio(self, audio_bytes):
|
34 |
+
audio_nparray = ffmpeg_read(audio_bytes, SAMPLE_RATE)
|
35 |
+
|
36 |
+
if len(audio_nparray.shape) > 1 and audio_nparray.shape[1] == 2:
|
37 |
+
return np.mean(audio_nparray, axis=1)
|
38 |
+
|
39 |
+
return audio_nparray
|
40 |
+
|
41 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
42 |
+
parameters = data.get("parameters", {})
|
43 |
+
|
44 |
+
audio = self.parse_audio(data["inputs"])
|
45 |
+
|
46 |
+
return self.asr_pipeline(audio, **parameters)
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|