File size: 1,596 Bytes
4f92cf0
 
 
2417027
 
4f92cf0
 
 
 
 
 
 
 
 
2417027
4f92cf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2417027
4f92cf0
 
 
6ef792a
4f92cf0
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
from transformers import pipeline
from gradio_client import Client, file


def detect_language(file_path):
    client = Client("adrien-alloreview/speechbrain-lang-id-voxlingua107-ecapa")
    result = client.predict(param_0=file(file_path), api_name="/predict")
    language_result = result["label"].split(": ")[1]
    if language_result.lower() in ["russian", "belarussian", "ukrainian"]:
        selected_language = "russian"
    else:
        selected_language = "kazakh"
    return selected_language

# def detect_emotion(audio):
#     pipe = pipeline(
#         "audio-classification",
#         model="HowMannyMore/wav2vec2-lg-xlsr-ur-speech-emotion-recognition",
#     )
#     res = pipe(audio)
#     emotion_with_max_score = res[0]["label"]
#
#     return emotion_with_max_score
#
#
# def detect_toxic_local(text_whisper):
#     model_name_rus = "IlyaGusev/rubertconv_toxic_clf"
#     pipe = pipeline(
#         "text-classification",
#         model=model_name_rus,
#         tokenizer=model_name_rus,
#         framework="pt",
#         max_length=512,
#         truncation=True,
#         device=0,
#     )
#     res = pipe([text_whisper])[0]["label"]
#     if res == "toxic":
#         return True
#     if res == "neutral":
#         return False
#     else:
#         return None


gradio_app = gr.Interface(
    fn=detect_language,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs="text",
    title="File Upload Transcription",
    description="Upload an audio file to determine language."
)

if __name__ == "__main__":
    gradio_app.launch()