YaTharThShaRma999's picture
Create rw_modules/whisper.py
ceba47d verified
raw
history blame
No virus
1.05 kB
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
class whisperHF:
def __init__(self, model_path='distil-whisper/distil-large-v3', device="cuda:0"):
self.device = device
self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_path, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True
).eval().to(device)
self.processor = AutoProcessor.from_pretrained(model_path)
self.pipe = pipeline(
"automatic-speech-recognition",
model=self.model,
tokenizer=self.processor.tokenizer,
feature_extractor=self.processor.feature_extractor,
max_new_tokens=128,
torch_dtype=torch.float16,
device=self.device,
)
def infer(self, file="sound.mp3"):
result = self.pipe(file)
return result["text"]
def infer_timestep(self, file="sound.mp3"):
result = pipe(sample, return_timestamps=True)
return result["chunks"]