from faster_whisper import WhisperModel from tqdm import tqdm import os from App.Transcription.Schemas import TranscriptionMetadata model_size = "tiny" def transcribe_file(state, file_path, model_size="tiny"): result = [] metadata = TranscriptionMetadata() metadata.logs = "Transcription STARTING" state.update_state( state="PROGRESS", meta=metadata.dict(), ) model = WhisperModel(model_size, device="cpu", compute_type="int8") segments, info = model.transcribe( file_path, beam_size=5, vad_filter=True, word_timestamps=True ) total_duration = round(info.duration, 2) metadata.logs = ( "Detected language '%s' with probability %f" % (info.language, info.language_probability), ) metadata.language = info.language metadata.duration = int(total_duration) state.update_state( state="PROGRESS", meta=metadata.dict(), ) try: with tqdm(total=total_duration, unit=" seconds") as pbar: for segment in segments: for word in segment.words: segment_duration = word.end - word.start temp = { "start": word.start, "end": word.end, "text": word.word, } result.append(temp) metadata.logs = "Transcribing.." metadata.percentage = f"{((word.end / total_duration)*100)}" state.update_state(state="PROGRESS", meta=metadata.dict()) pbar.update(segment_duration) except Exception as e: metadata.logs = f"Falied error {e}" state.update_state( state="FAILED", meta=metadata.dict(), ) os.remove(file_path) return f"Falied error {e}" # delete file os.remove(file_path) return result