import whisperx as whisper from deep_translator import GoogleTranslator import os from whisperx.utils import write_vtt, write_srt, write_ass, write_tsv, write_txt def detect_language(filename, model): # load audio and pad/trim it to fit 30 seconds audio = whisper.load_audio(file=filename) audio = whisper.pad_or_trim(audio) # make log-Mel spectrogram and move to the same device as the model mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) print(f"Detected language: {max(probs, key=probs.get)}") return {"detected_language": max(probs, key=probs.get)} def translate_to_english(transcription, json=False): if json: for text in transcription: text["text"] = GoogleTranslator(source="auto", target="en").translate( text["text"] ) else: for text in transcription["segments"]: text["text"] = GoogleTranslator(source="auto", target="en").translate( text["text"] ) return transcription def write(filename, dtype, result_aligned): if dtype == "vtt": with open( os.path.join(".", os.path.splitext(filename)[0] + ".vtt"), "w", encoding="utf-8", ) as vtt: write_vtt(result_aligned["segments"], file=vtt) if dtype == "srt": with open( os.path.join(".", os.path.splitext(filename)[0] + ".srt"), "w", encoding="utf-8", ) as srt: write_srt(result_aligned["segments"], file=srt) if dtype == "ass": with open( os.path.join(".", os.path.splitext(filename)[0] + ".ass"), "w", encoding="utf-8", ) as ass: write_ass(result_aligned["segments"], file=ass) if dtype == "tsv": with open( os.path.join(".", os.path.splitext(filename)[0] + ".tsv"), "w", encoding="utf-8", ) as tsv: write_tsv(result_aligned["segments"], file=tsv) if dtype == "plain text": print("here") print(filename) with open( os.path.join(".", os.path.splitext(filename)[0] + ".txt"), "w", encoding="utf-8", ) as txt: write_txt(result_aligned["segments"], file=txt) def read(filename, transc): if transc == "plain text": transc = "txt" filename = filename.split(".")[0] print(filename) with open(f"{filename}.{transc}", encoding="utf-8") as f: content = f.readlines() content = " ".join(z for z in content) return content from constants import language_dict def get_key(val): for key, value in language_dict.items(): if val == value: return key return "Key not found"