|
import logging |
|
from speechbrain.pretrained import EncoderClassifier |
|
from typing import Dict, List, Any |
|
import requests |
|
from pydub import AudioSegment |
|
from io import BytesIO |
|
import tempfile |
|
import os |
|
|
|
|
|
def save_chunks_to_temp_files(url, chunk_length=5000): |
|
|
|
if not url.startswith("file://"): |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
|
|
|
|
if "audio" not in response.headers["Content-Type"]: |
|
raise ValueError("URL does not seem to be an audio file") |
|
|
|
|
|
audio_file = BytesIO(response.content) |
|
|
|
|
|
audio_segment = AudioSegment.from_file(audio_file) |
|
else: |
|
audio_segment = AudioSegment.from_file(url[7:]) |
|
|
|
|
|
chunks = [ |
|
audio_segment[i : i + chunk_length] |
|
for i in range(0, len(audio_segment), chunk_length) |
|
] |
|
|
|
if len(chunks) > 1: |
|
chunks[-1] = audio_segment[-chunk_length:] |
|
|
|
|
|
temp_files = [] |
|
for idx, chunk in enumerate(chunks): |
|
with tempfile.NamedTemporaryFile( |
|
delete=False, suffix=f"_chunk{idx}.mp3" |
|
) as temp_file: |
|
chunk.export(temp_file.name, format="mp3") |
|
temp_files.append(temp_file.name) |
|
|
|
return temp_files |
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, path=""): |
|
self.model = EncoderClassifier.from_hparams( |
|
"speechbrain/lang-id-voxlingua107-ecapa" |
|
) |
|
print("model loaded") |
|
logging.info("model loaded") |
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
url = data.pop("inputs", data) |
|
|
|
print("audio_url", url) |
|
logging.info(f"audio_url {url}") |
|
|
|
response = [] |
|
|
|
temp_filepaths = save_chunks_to_temp_files(url) |
|
for i, path in enumerate(temp_filepaths): |
|
logging.info(f"processing chunk {i} / {len(temp_filepaths)}") |
|
output = self.model.classify_file(path) |
|
|
|
response.append( |
|
{ |
|
"prediction": float(output[1].exp()[0]), |
|
"language": output[3][0], |
|
} |
|
) |
|
os.remove(path) |
|
|
|
return response |
|
|