Spaces:

3loi
/

WavLM-SER-Multi-Baseline-Odyssey2024

Running

3loi commited on Mar 5

Commit

76fe58b

•

1 Parent(s): ad02703

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ import numpy as np
 mean, std = -8.278621631819787e-05, 0.08485510250851999
 id2label = {0: 'arousal', 1: 'dominance', 2: 'valence'}
 description_text = "Multi-label (arousal, dominance, valence) Odyssey 2024 Emotion Recognition competition baseline model.<br> \
         The model is trained on MSP-Podcast. \
@@ -28,19 +30,22 @@ def classify_audio(audio_file):
     y = raw_wav.astype(np.float32, order='C') / np.iinfo(raw_wav.dtype).max
-    norm_wav = (y - mean) / (std+0.000001)
     mask = torch.ones(1, len(norm_wav))
     wavs = torch.tensor(norm_wav).unsqueeze(0)
     pred = model(wavs, mask).detach().numpy()
-    output = ''
-    if sr != 16000:
-        output += "{} sampling rate is uncompatible. The model was trained on {} sampleing rate\n".format(sr, 16000)
-#     for i, audio_pred in enumerate(pred):
-#         output[i] = {}
     for att_i, att_val in enumerate(pred[0]):
         output += "{}: \t{:0.4f}\n".format(id2label[att_i], att_val)

 mean, std = -8.278621631819787e-05, 0.08485510250851999
+model_sr=model.config.sampling_rate
 id2label = {0: 'arousal', 1: 'dominance', 2: 'valence'}
 description_text = "Multi-label (arousal, dominance, valence) Odyssey 2024 Emotion Recognition competition baseline model.<br> \
         The model is trained on MSP-Podcast. \
     y = raw_wav.astype(np.float32, order='C') / np.iinfo(raw_wav.dtype).max
+    output = ''
+    if sr != 16000:
+        y = librosa.resample(y, orig_sr=sr, target_sr=model_sr)
+        output += "{} sampling rate is uncompatible, converted to {} as the model was trained on {} sampling rate<br>".format(sr, model_sr, model_sr)
+    norm_wav = (y - mean) / (std+0.000001)
     mask = torch.ones(1, len(norm_wav))
     wavs = torch.tensor(norm_wav).unsqueeze(0)
     pred = model(wavs, mask).detach().numpy()
     for att_i, att_val in enumerate(pred[0]):
         output += "{}: \t{:0.4f}\n".format(id2label[att_i], att_val)