File size: 2,843 Bytes
452467a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f34b85f
 
 
452467a
 
 
 
f34b85f
 
 
452467a
 
 
 
f34b85f
 
 
452467a
 
 
 
f34b85f
 
 
452467a
 
 
 
 
 
f34b85f
 
 
452467a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f34b85f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import whisperx as whisper

from deep_translator import GoogleTranslator
import os
from whisperx.utils import write_vtt, write_srt, write_ass, write_tsv, write_txt


def detect_language(filename, model):
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(file=filename)
    audio = whisper.pad_or_trim(audio)
    # make log-Mel spectrogram and move to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    _, probs = model.detect_language(mel)
    print(f"Detected language: {max(probs, key=probs.get)}")
    return {"detected_language": max(probs, key=probs.get)}


def translate_to_english(transcription, json=False):
    if json:
        for text in transcription:
            text["text"] = GoogleTranslator(source="auto", target="en").translate(
                text["text"]
            )
    else:

        for text in transcription["segments"]:
            text["text"] = GoogleTranslator(source="auto", target="en").translate(
                text["text"]
            )
    return transcription


def write(filename, dtype, result_aligned):

    if dtype == "vtt":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".vtt"),
            "w",
            encoding="utf-8",
        ) as vtt:
            write_vtt(result_aligned["segments"], file=vtt)
    if dtype == "srt":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".srt"),
            "w",
            encoding="utf-8",
        ) as srt:
            write_srt(result_aligned["segments"], file=srt)
    if dtype == "ass":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".ass"),
            "w",
            encoding="utf-8",
        ) as ass:
            write_ass(result_aligned["segments"], file=ass)
    if dtype == "tsv":
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".tsv"),
            "w",
            encoding="utf-8",
        ) as tsv:
            write_tsv(result_aligned["segments"], file=tsv)
    if dtype == "plain text":
        print("here")
        print(filename)
        with open(
            os.path.join(".", os.path.splitext(filename)[0] + ".txt"),
            "w",
            encoding="utf-8",
        ) as txt:
            write_txt(result_aligned["segments"], file=txt)


def read(filename, transc):
    if transc == "plain text":
        transc = "txt"
    filename = filename.split(".")[0]
    print(filename)
    with open(f"{filename}.{transc}", encoding="utf-8") as f:
        content = f.readlines()
    content = " ".join(z for z in content)
    return content


from constants import language_dict


def get_key(val):
    for key, value in language_dict.items():
        if val == value:
            return key
    return "Key not found"