Spaces:
Runtime error
Runtime error
File size: 2,129 Bytes
fa6f424 c438acc fa6f424 e9355ad fa6f424 c438acc fa6f424 c438acc fa6f424 e25d134 c438acc e9355ad c438acc e9355ad c438acc e9355ad c438acc e9355ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import torch
from resources import set_start, audit_elapsedtime
#Speech to text transcription model
def init_model_trans ():
print("Initiating transcription model...")
start = set_start()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
batch_size=16,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)
print(f'Init model successful')
return pipe
def transcribe (audio_sample: bytes, pipe) -> str:
print("Initiating transcription...")
start = set_start()
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
# sample = dataset[0]["audio"]
#result = pipe(audio_sample)
result = pipe(audio_sample)
audit_elapsedtime(function="Transcription", start=start)
print("transcription result",result)
#st.write('trancription: ', result["text"])
return result["text"]
# def translate (audio_sample: bytes, pipe) -> str:
# print("Initiating Translation...")
# start = set_start()
# # dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
# # sample = dataset[0]["audio"]
# #result = pipe(audio_sample)
# result = pipe(audio_sample, generate_kwargs={"task": "translate"})
# audit_elapsedtime(function="Translation", start=start)
# print("Translation result",result)
# #st.write('trancription: ', result["text"])
# return result["text"] |