Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -9,12 +9,20 @@ import numpy as np
|
|
9 |
|
10 |
|
11 |
|
|
|
|
|
|
|
12 |
def classify_audio(audio_file):
|
13 |
model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Multi-Attributes", trust_remote_code=True)
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
mask = torch.ones(1, len(norm_wav))
|
20 |
wavs = torch.tensor(norm_wav).unsqueeze(0)
|
@@ -25,11 +33,9 @@ def classify_audio(audio_file):
|
|
25 |
|
26 |
|
27 |
def main():
|
28 |
-
audio_input = gr.inputs.Audio(source="upload", type="filepath")
|
29 |
-
output_text = gr.outputs.Textbox()
|
30 |
|
31 |
-
iface = gr.Interface(fn=classify_audio, inputs=
|
32 |
-
outputs=
|
33 |
description="Upload an audio file and hit the 'Submit'\
|
34 |
button")
|
35 |
|
@@ -38,4 +44,4 @@ def main():
|
|
38 |
|
39 |
if __name__ == '__main__':
|
40 |
main()
|
41 |
-
|
|
|
9 |
|
10 |
|
11 |
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
def classify_audio(audio_file):
|
16 |
model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Multi-Attributes", trust_remote_code=True)
|
17 |
+
sr, raw_wav = audio_file
|
18 |
+
|
19 |
+
print(audio_file, audio_file[1].dtype)
|
20 |
+
y = raw_wav.astype(np.float32)
|
21 |
+
y /= np.max(np.abs(y))
|
22 |
+
|
23 |
+
|
24 |
+
#raw_wav, _ librosa.load(audio_file, sr=16000)
|
25 |
+
norm_wav = (y - mean) / (std+0.000001)
|
26 |
|
27 |
mask = torch.ones(1, len(norm_wav))
|
28 |
wavs = torch.tensor(norm_wav).unsqueeze(0)
|
|
|
33 |
|
34 |
|
35 |
def main():
|
|
|
|
|
36 |
|
37 |
+
iface = gr.Interface(fn=classify_audio, inputs=gr.Audio(sources=["upload", "microphone"], label="Audio file"),
|
38 |
+
outputs=gr.Text(), title="Speech Emotion Recognition App",
|
39 |
description="Upload an audio file and hit the 'Submit'\
|
40 |
button")
|
41 |
|
|
|
44 |
|
45 |
if __name__ == '__main__':
|
46 |
main()
|
47 |
+
|