|
from faster_whisper import WhisperModel |
|
from tqdm import tqdm |
|
import os |
|
from App.Transcription.Schemas import TranscriptionMetadata |
|
|
|
model_size = "tiny" |
|
|
|
|
|
def transcribe_file(state, file_path, model_size="tiny"): |
|
result = {} |
|
metadata = TranscriptionMetadata() |
|
metadata.logs = "Transcription STARTING" |
|
state.update_state( |
|
state="PROGRESS", |
|
meta=metadata.dict(), |
|
) |
|
model = WhisperModel(model_size, device="cpu", compute_type="int8") |
|
segments, info = model.transcribe(file_path, beam_size=5, vad_filter=True) |
|
|
|
total_duration = round(info.duration, 2) |
|
metadata.logs = ( |
|
"Detected language '%s' with probability %f" |
|
% (info.language, info.language_probability), |
|
) |
|
metadata.language = info.language |
|
metadata.duration = int(total_duration) |
|
state.update_state( |
|
state="PROGRESS", |
|
meta=metadata.dict(), |
|
) |
|
|
|
try: |
|
with tqdm(total=total_duration, unit=" seconds") as pbar: |
|
for segment in segments: |
|
segment_duration = segment.end - segment.start |
|
time_stamp = "[%.2fs -> %.2fs]" % (segment.start, segment.end) |
|
result[time_stamp] = segment.text |
|
metadata.logs = "Transcribing.." |
|
metadata.percentage = f"{((segment.end / total_duration)*100)}" |
|
state.update_state(state="PROGRESS", meta=metadata.dict()) |
|
pbar.update(segment_duration) |
|
except Exception as e: |
|
metadata.logs = f"Falied error {e}" |
|
state.update_state( |
|
state="FAILED", |
|
meta=metadata.dict(), |
|
) |
|
os.remove(file_path) |
|
return |
|
|
|
|
|
os.remove(file_path) |
|
|
|
return result |
|
|