Spaces:
Running
Running
Update apis/chat_api.py
Browse files- apis/chat_api.py +45 -1
apis/chat_api.py
CHANGED
@@ -313,7 +313,51 @@ async def whisper_transcribe(
|
|
313 |
language: str = Form(),
|
314 |
model: str = Form(),
|
315 |
):
|
316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
|
318 |
if __name__ == "__main__":
|
319 |
args = ArgParser().args
|
|
|
313 |
language: str = Form(),
|
314 |
model: str = Form(),
|
315 |
):
|
316 |
+
MODEL_MAP = {
|
317 |
+
"whisper-small": "openai/whisper-small",
|
318 |
+
"whisper-medium": "openai/whisper-medium",
|
319 |
+
"whisper-large": "openai/whisper-large",
|
320 |
+
"default": "openai/whisper-small",
|
321 |
+
}
|
322 |
+
AUDIO_MAP = {
|
323 |
+
"audio/wav": "audio/wav",
|
324 |
+
"audio/mpeg": "audio/mpeg",
|
325 |
+
"audio/x-flac": "audio/x-flac",
|
326 |
+
}
|
327 |
+
item_response = {
|
328 |
+
"statue": 200,
|
329 |
+
"result": "",
|
330 |
+
"start": 0,
|
331 |
+
"end": 0
|
332 |
+
}
|
333 |
+
if audio_file.content_type in AUDIO_MAP.keys():
|
334 |
+
if model in MODEL_MAP.keys():
|
335 |
+
target_model = model
|
336 |
+
else:
|
337 |
+
target_model = "default"
|
338 |
+
|
339 |
+
real_name = MODEL_MAP[target_model]
|
340 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
341 |
+
pipe = pipeline(
|
342 |
+
task="automatic-speech-recognition",
|
343 |
+
model=real_name,
|
344 |
+
chunk_length_s=30,
|
345 |
+
device=device,
|
346 |
+
)
|
347 |
+
time_start = time.time()
|
348 |
+
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
|
349 |
+
text = pipe(audio_file)["text"]
|
350 |
+
time_end = time.time()
|
351 |
+
item_response["status"] = 200
|
352 |
+
item_response["result"] = text
|
353 |
+
item_response["start"] = time_start
|
354 |
+
item_response["end"] = time_end
|
355 |
+
else:
|
356 |
+
item_response["status"] = 400
|
357 |
+
item_response["result"] = 'Acceptable files: audio/wav,audio/mpeg,audio/x-flac'
|
358 |
+
|
359 |
+
|
360 |
+
return item_response
|
361 |
|
362 |
if __name__ == "__main__":
|
363 |
args = ArgParser().args
|