|
from faster_whisper import WhisperModel |
|
from tqdm import tqdm |
|
import os |
|
|
|
model_size = "tiny" |
|
|
|
|
|
def transcribe_file(state, file_path, model_size="tiny"): |
|
state.update_state( |
|
state="PROGRESS", |
|
meta={"logs": "Starting"}, |
|
) |
|
result = {} |
|
model = WhisperModel(model_size, device="cpu", compute_type="int8") |
|
segments, info = model.transcribe(file_path, beam_size=5) |
|
|
|
total_duration = round(info.duration, 2) |
|
state.update_state( |
|
state="PROGRESS", |
|
meta={ |
|
"logs": "Detected language '%s' with probability %f" |
|
% (info.language, info.language_probability), |
|
}, |
|
) |
|
|
|
with tqdm(total=total_duration, unit=" seconds") as pbar: |
|
for segment in segments: |
|
segment_duration = segment.end - segment.start |
|
time_stamp = "[%.2fs -> %.2fs]" % (segment.start, segment.end) |
|
result[time_stamp] = segment.text |
|
state.update_state( |
|
state="PROGRESS", |
|
meta={ |
|
"done": segment.end, |
|
"total": total_duration, |
|
"percentage": f"{((segment.end / total_duration)*100)}", |
|
}, |
|
) |
|
pbar.update(segment_duration) |
|
|
|
|
|
os.remove(file_path) |
|
|
|
return result |
|
|