giulianopenido commited on
Commit
c08d737
1 Parent(s): 0c5d6ec

HFEndpoint handler created

Browse files
Files changed (2) hide show
  1. handler.py +46 -0
  2. tokenizer.json +0 -0
handler.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List
2
+
3
+ import numpy as np
4
+ import torch
5
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
6
+ from transformers.pipelines.audio_utils import ffmpeg_read
7
+
8
+ SAMPLE_RATE = 16000
9
+
10
+
11
+ class EndpointHandler:
12
+ def __init__(self, path=""):
13
+ torch_dtype = torch.float16
14
+ device = "cuda"
15
+
16
+ whisper_processor = WhisperProcessor.from_pretrained(path)
17
+ whisper_model = WhisperForConditionalGeneration.from_pretrained(
18
+ path,
19
+ torch_dtype=torch_dtype,
20
+ ).to("cuda")
21
+
22
+ self.asr_pipeline = pipeline(
23
+ "automatic-speech-recognition",
24
+ model=whisper_model,
25
+ tokenizer=whisper_processor.tokenizer,
26
+ feature_extractor=whisper_processor.feature_extractor,
27
+ chunk_length_s=30,
28
+ torch_dtype=torch_dtype,
29
+ device=device,
30
+ model_kwargs={"language": "pt"},
31
+ )
32
+
33
+ def parse_audio(self, audio_bytes):
34
+ audio_nparray = ffmpeg_read(audio_bytes, SAMPLE_RATE)
35
+
36
+ if len(audio_nparray.shape) > 1 and audio_nparray.shape[1] == 2:
37
+ return np.mean(audio_nparray, axis=1)
38
+
39
+ return audio_nparray
40
+
41
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
42
+ parameters = data.get("parameters", {})
43
+
44
+ audio = self.parse_audio(data["inputs"])
45
+
46
+ return self.asr_pipeline(audio, **parameters)
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff