Liangcd commited on
Commit
b72dbd0
1 Parent(s): 6f82c1e

[demo] support resampling audio

Browse files
Files changed (1) hide show
  1. app.py +3 -7
app.py CHANGED
@@ -15,7 +15,6 @@
15
 
16
  import json
17
  import gradio as gr
18
- import numpy as np
19
  import wenetruntime as wenet
20
  import librosa
21
 
@@ -27,12 +26,9 @@ decoder_cn = wenet.Decoder(lang='chs')
27
  def recognition(audio):
28
  if audio is None:
29
  return "Input Error! Please enter one audio!"
30
- sr, y = audio
31
  # NOTE: model supports 16k sample_rate
32
- if sr != 16000:
33
- y = librosa.resample((y / max(np.max(y), 1)).astype(np.float),
34
- sr, 16000)
35
- y = (y * (1 << 15)).astype("int16")
36
  ans = decoder_cn.decode(y.tobytes(), True)
37
  if ans is None:
38
  return "ERROR! No text output! Please try again!"
@@ -46,7 +42,7 @@ def recognition(audio):
46
 
47
 
48
  # input
49
- inputs = gr.inputs.Audio(source="microphone", type="numpy", label='Input audio')
50
 
51
  output = gr.outputs.Textbox(label="Output Text")
52
 
 
15
 
16
  import json
17
  import gradio as gr
 
18
  import wenetruntime as wenet
19
  import librosa
20
 
 
26
  def recognition(audio):
27
  if audio is None:
28
  return "Input Error! Please enter one audio!"
29
+ y, _ = librosa.load(audio, sr=16000)
30
  # NOTE: model supports 16k sample_rate
31
+ y = (y * (1 << 15)).astype("int16")
 
 
 
32
  ans = decoder_cn.decode(y.tobytes(), True)
33
  if ans is None:
34
  return "ERROR! No text output! Please try again!"
 
42
 
43
 
44
  # input
45
+ inputs = gr.inputs.Audio(source="microphone", type="filepath", label='Input audio')
46
 
47
  output = gr.outputs.Textbox(label="Output Text")
48