Spaces:

ashhadahsan
/

whisperX

Running

File size: 2,843 Bytes

import whisperx as whisper

from deep_translator import GoogleTranslator
import os
from whisperx.utils import write_vtt, write_srt, write_ass, write_tsv, write_txt


def detect_language(filename, model):
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(file=filename)
    audio = whisper.pad_or_trim(audio)
    # make log-Mel spectrogram and move to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    _, probs = model.detect_language(mel)
    print(f"Detected language: {max(probs, key=probs.get)}")
    return {"detected_language": max(probs, key=probs.get)}


def translate_to_english(transcription, json=False):
    if json:
        for text in transcription:
            text["text"] = GoogleTranslator(source="auto", target="en").translate(
                text["text"]
            )
    else:

        for text in transcription["segments"]:
            text["text"] = GoogleTranslator(source="auto", target="en").translate(
                text["text"]
            )
    return transcription


def write(filename, dtype, result_aligned):

    if dtype == "vtt":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".vtt"),
            "w",
            encoding="utf-8",
        ) as vtt:
            write_vtt(result_aligned["segments"], file=vtt)
    if dtype == "srt":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".srt"),
            "w",
            encoding="utf-8",
        ) as srt:
            write_srt(result_aligned["segments"], file=srt)
    if dtype == "ass":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".ass"),
            "w",
            encoding="utf-8",
        ) as ass:
            write_ass(result_aligned["segments"], file=ass)
    if dtype == "tsv":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".tsv"),
            "w",
            encoding="utf-8",
        ) as tsv:
            write_tsv(result_aligned["segments"], file=tsv)
    if dtype == "plain text":
        print("here")
        print(filename)
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".txt"),
            "w",
            encoding="utf-8",
        ) as txt:
            write_txt(result_aligned["segments"], file=txt)


def read(filename, transc):
    if transc == "plain text":
        transc = "txt"
    filename = filename.split(".")[0]
    print(filename)
    with open(f"{filename}.{transc}", encoding="utf-8") as f:
        content = f.readlines()
    content = " ".join(z for z in content)
    return content


from constants import language_dict


def get_key(val):
    for key, value in language_dict.items():
        if val == value:
            return key
    return "Key not found"