tranny / App /Transcription /Utils /audio_transcription.py
Mbonea's picture
minor bug
b712add
raw
history blame
1.31 kB
from faster_whisper import WhisperModel
from tqdm import tqdm
import os
model_size = "tiny"
def transcribe_file(state, file_path, model_size="tiny"):
state.update_state(
state="PROGRESS",
meta={"logs": "Starting"},
)
result = {}
model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, info = model.transcribe(file_path, beam_size=5)
total_duration = round(info.duration, 2)
state.update_state(
state="PROGRESS",
meta={
"logs": "Detected language '%s' with probability %f"
% (info.language, info.language_probability),
},
)
with tqdm(total=total_duration, unit=" seconds") as pbar:
for segment in segments:
segment_duration = segment.end - segment.start
time_stamp = "[%.2fs -> %.2fs]" % (segment.start, segment.end)
result[time_stamp] = segment.text
state.update_state(
state="PROGRESS",
meta={
"done": segment.end,
"total": total_duration,
"percentage": f"{((segment.end / total_duration)*100)}",
},
)
pbar.update(segment_duration)
# delete file
os.remove(file_path)
return result