Spaces:

wenet-e2e
/

wespeaker_demo

Running

App Files Files Community

Liangcd commited on Sep 8, 2022

Commit

0d5f907

•

1 Parent(s): dbee243

[demo] add vox_res34.onnx and cnc_res34.onnx

Browse files

Files changed (3) hide show

app.py +14 -9
pre_model/cnceleb_resnet34.onnx +3 -0
pre_model/voxceleb_resnet34.onnx +3 -0

app.py CHANGED Viewed

@@ -99,10 +99,14 @@ def speaker_verification(audio_path1, audio_path2, lang='CN'):
     if audio_path1 == None or audio_path2 == None:
         output = OUTPUT_ERROR.format('Please enter two audios')
         return output
-    if lang == 'EN':
         model = OnnxModel('pre_model/voxceleb_resnet34_LM.onnx')
-    elif lang == 'CN':
         model = OnnxModel('pre_model/cnceleb_resnet34_LM.onnx')
     else:
         output = OUTPUT_ERROR.format('Please select a language')
         return output
@@ -110,6 +114,7 @@ def speaker_verification(audio_path1, audio_path2, lang='CN'):
     emb2 = model.extract_embedding(audio_path2)
     cos_score = cosine_similarity(emb1.reshape(1, -1), emb2.reshape(1,
                                                                     -1))[0][0]
     if cos_score >= 0.5:
         output = OUTPUT_OK.format(cos_score * 100)
@@ -129,7 +134,7 @@ inputs = [
                     type="filepath",
                     optional=True,
                     label='Speaker#2'),
-    gr.Radio(['CN', 'EN'], label='Language'),
 ]
 output = gr.outputs.HTML(label="")
@@ -143,12 +148,12 @@ article = (
     "</p>")
 examples = [
-    ['examples/BAC009S0764W0228.wav', 'examples/BAC009S0764W0328.wav', 'CN'],
-    ['examples/BAC009S0913W0133.wav', 'examples/BAC009S0764W0228.wav', 'CN'],
-    ['examples/00001_spk1.wav', 'examples/00003_spk2.wav', 'EN'],
-    ['examples/00010_spk2.wav', 'examples/00024_spk1.wav', 'EN'],
-    ['examples/00001_spk1.wav', 'examples/00024_spk1.wav', 'EN'],
-    ['examples/00010_spk2.wav', 'examples/00003_spk2.wav', 'EN'],
 ]
 interface = gr.Interface(

     if audio_path1 == None or audio_path2 == None:
         output = OUTPUT_ERROR.format('Please enter two audios')
         return output
+    if lang == 'vox_res34_LM':
         model = OnnxModel('pre_model/voxceleb_resnet34_LM.onnx')
+    elif lang == 'cnc_res34_LM':
         model = OnnxModel('pre_model/cnceleb_resnet34_LM.onnx')
+    elif lang == 'vox_res34':
+        model = OnnxModel('pre_model/voxceleb_resnet34.onnx')
+    elif lang == 'cnc_res34':
+        model = OnnxModel('pre_model/cnceleb_resnet34.onnx')
     else:
         output = OUTPUT_ERROR.format('Please select a language')
         return output
     emb2 = model.extract_embedding(audio_path2)
     cos_score = cosine_similarity(emb1.reshape(1, -1), emb2.reshape(1,
                                                                     -1))[0][0]
+    cos_score = (cos_score + 1) / 2.0
     if cos_score >= 0.5:
         output = OUTPUT_OK.format(cos_score * 100)
                     type="filepath",
                     optional=True,
                     label='Speaker#2'),
+    gr.Radio(['vox_res34_LM', 'vox_res34', 'cnc_res34_LM', 'cnc_res34'], label='Language'),
 ]
 output = gr.outputs.HTML(label="")
     "</p>")
 examples = [
+    ['examples/BAC009S0764W0228.wav', 'examples/BAC009S0764W0328.wav', 'cnc_res34'],
+    ['examples/BAC009S0913W0133.wav', 'examples/BAC009S0764W0228.wav', 'cnc_res34'],
+    ['examples/00001_spk1.wav', 'examples/00003_spk2.wav', 'vox_res34'],
+    ['examples/00010_spk2.wav', 'examples/00024_spk1.wav', 'vox_res34_LM'],
+    ['examples/00001_spk1.wav', 'examples/00024_spk1.wav', 'vox_res34'],
+    ['examples/00010_spk2.wav', 'examples/00003_spk2.wav', 'vox_res34_LM'],
 ]
 interface = gr.Interface(

pre_model/cnceleb_resnet34.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9af662558ff0b5aaa3bd31a4d10c3adc55d9fcdd376b7db6f0fbdaa8a49df31
+size 26530309

pre_model/voxceleb_resnet34.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7f0b7cb467fee82d251d980c47a93dd47387f80be58d389419e0a588338801a
+size 26530309