tranny / App /Transcription /Utils /audio_transcription.py
Mbonea's picture
better logging
f55cf0b
raw
history blame
1.66 kB
from faster_whisper import WhisperModel
from tqdm import tqdm
import os
from App.Transcription.Schemas import TranscriptionMetadata
model_size = "tiny"
def transcribe_file(state, file_path, model_size="tiny"):
result = {}
metadata = TranscriptionMetadata()
metadata.logs = "STARTING"
state.update_state(
state="PROGRESS",
meta=metadata.dict(),
)
model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, info = model.transcribe(file_path, beam_size=5)
total_duration = round(info.duration, 2)
metadata.logs = (
"Detected language '%s' with probability %f"
% (info.language, info.language_probability),
)
metadata.language = info.language
state.update_state(
state="PROGRESS",
meta=metadata.dict(),
)
try:
with tqdm(total=total_duration, unit=" seconds") as pbar:
for segment in segments:
segment_duration = segment.end - segment.start
time_stamp = "[%.2fs -> %.2fs]" % (segment.start, segment.end)
result[time_stamp] = segment.text
metadata.logs = "Transcribing.."
metadata.percentage = f"{((segment.end / total_duration)*100)}"
state.update_state(state="PROGRESS", meta=metadata.dict())
pbar.update(segment_duration)
except Exception as e:
metadata.logs = f"Falied error {e}"
state.update_state(
state="FAILED",
meta=metadata.dict(),
)
os.remove(file_path)
return
# delete file
os.remove(file_path)
return result