File size: 2,559 Bytes
77455c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b99981c
 
 
 
 
77455c8
b99981c
77455c8
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import gradio as gr
import librosa
import numpy as np
import torch
from transformers import pipeline


language_classes = {
    0: "Arabic",
    1: "Basque",
    2: "Breton",
    3: "Catalan",
    4: "Chinese_China",
    5: "Chinese_Hongkong",
    6: "Chinese_Taiwan",
    7: "Chuvash",
    8: "Czech",
    9: "Dhivehi",
    10: "Dutch",
    11: "English",
    12: "Esperanto",
    13: "Estonian",
    14: "French",
    15: "Frisian",
    16: "Georgian",
    17: "German",
    18: "Greek",
    19: "Hakha_Chin",
    20: "Indonesian",
    21: "Interlingua",
    22: "Italian",
    23: "Japanese",
    24: "Kabyle",
    25: "Kinyarwanda",
    26: "Kyrgyz",
    27: "Latvian",
    28: "Maltese",
    29: "Mongolian",
    30: "Persian",
    31: "Polish",
    32: "Portuguese",
    33: "Romanian",
    34: "Romansh_Sursilvan",
    35: "Russian",
    36: "Sakha",
    37: "Slovenian",
    38: "Spanish",
    39: "Swedish",
    40: "Tamil",
    41: "Tatar",
    42: "Turkish",
    43: "Ukranian",
    44: "Welsh"
}




username = "jpbello"  ## Complete your username
model_id = "jpbello/Hubert_emotion-finetuned-common_language"
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline("audio-classification", model=model_id, device=device)

# def predict_trunc(filepath):
#     preprocessed = pipe.preprocess(filepath)
#     truncated = pipe.feature_extractor.pad(preprocessed,truncation=True, max_length = 16_000*30)
#     model_outputs = pipe.forward(truncated)
#     outputs = pipe.postprocess(model_outputs)

#     return outputs


def classify_audio(filepath):
    
    
    preds = pipe(filepath)
    # preds = predict_trunc(filepath)
    outputs = {}
    for p in preds:
        outputs[p["label"]] = p["score"]
    return outputs


title = "Language Classification Model"
description = (
    "Welcome to the Language Classification Model demo powered by Gradio and Hubert Emotion. "
    "This model is trained to identify the language spoken in audio samples, making it a valuable tool "
    "for language identification tasks. Upload an audio file, and let the model predict the spoken language "
    "with confidence scores. Try it out with our provided example audio files to see the model in action!"
    
)
filenames = ['EN_0212.wav', "FR_0061.wav", "JP_0100.wav","AR_0019.wav"]
filenames = [[f"./{f}"] for f in filenames]
demo = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Label(label="Predictions")],
    title=title,
    description=description,
    examples=filenames,
)
demo.launch()