from faster_whisper import WhisperModel
from tqdm import tqdm
import os

model_size = "tiny"


def transcribe_file(state, file_path, model_size="tiny"):
    state.update_state(
        state="PROGRESS",
        meta={"logs": "Starting"},
    )
    result = {}
    model = WhisperModel(model_size, device="cpu", compute_type="int8")
    segments, info = model.transcribe(file_path, beam_size=5)

    total_duration = round(info.duration, 2)
    state.update_state(
        state="PROGRESS",
        meta={
            "logs": "Detected language '%s' with probability %f"
            % (info.language, info.language_probability),
        },
    )

    with tqdm(total=total_duration, unit=" seconds") as pbar:
        for segment in segments:
            segment_duration = segment.end - segment.start
            time_stamp = "[%.2fs -> %.2fs]" % (segment.start, segment.end)
            result[time_stamp] = segment.text
            state.update_state(
                state="PROGRESS",
                meta={
                    "done": segment.end,
                    "total": total_duration,
                    "percentage": f"{((segment.end / total_duration)*100)}",
                },
            )
            pbar.update(segment_duration)

    # delete file
    os.remove(file_path)

    return result