from faster_whisper import WhisperModel from tqdm import tqdm import os from App.Transcription.Schemas import TranscriptionMetadata model_size = "tiny" def transcribe_file(state, file_path, model_size="tiny"): result = {} metadata = TranscriptionMetadata() metadata.logs = "STARTING" state.update_state( state="PROGRESS", meta=metadata.dict(), ) model = WhisperModel(model_size, device="cpu", compute_type="int8") segments, info = model.transcribe(file_path, beam_size=5) total_duration = round(info.duration, 2) metadata.logs = ( "Detected language '%s' with probability %f" % (info.language, info.language_probability), ) metadata.language = info.language state.update_state( state="PROGRESS", meta=metadata.dict(), ) try: with tqdm(total=total_duration, unit=" seconds") as pbar: for segment in segments: segment_duration = segment.end - segment.start time_stamp = "[%.2fs -> %.2fs]" % (segment.start, segment.end) result[time_stamp] = segment.text metadata.logs = "Transcribing.." metadata.percentage = f"{((segment.end / total_duration)*100)}" state.update_state(state="PROGRESS", meta=metadata.dict()) pbar.update(segment_duration) except Exception as e: metadata.logs = f"Falied error {e}" state.update_state( state="FAILED", meta=metadata.dict(), ) os.remove(file_path) return # delete file os.remove(file_path) return result