Jiedong Yang commited on
Commit
7d23a88
β€’
1 Parent(s): 72033da

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -7
app.py CHANGED
@@ -1,6 +1,4 @@
1
  import os
2
- import pafy
3
- import time
4
  import whisper
5
  import validators
6
  import gradio as gr
@@ -12,6 +10,16 @@ asr_model = whisper.load_model('base.en')
12
  summarizer = gr.Interface.load("facebook/bart-large-cnn", src='huggingface')
13
 
14
 
 
 
 
 
 
 
 
 
 
 
15
  def audio_from_url(url, dst_dir='data', name=None, format='wav'):
16
  """ Download video from url and save the audio from video
17
 
@@ -40,9 +48,9 @@ def audio_from_url(url, dst_dir='data', name=None, format='wav'):
40
  def speech_to_text(audio, beam_size=5, best_of=5, language='en'):
41
  """ ASR inference with Whisper
42
 
43
- :param audio:
44
- :param beam_size:
45
- :param best_of:
46
  :param language:
47
  :return:
48
  """
@@ -90,7 +98,9 @@ with demo:
90
  # demo description
91
  gr.Markdown("""
92
  ## Speech Summarization with Whisper
93
- This space is intended to summarize a speech, a short one or long one, to save us sometime.
 
 
94
  1. Type in a youtube URL or upload an audio file
95
  2. Generate transcription with Whisper (Currently English Only)
96
  3. Summarize the transcribed speech
@@ -117,6 +127,9 @@ with demo:
117
  text = gr.Textbox(label="Transcription", placeholder="transcription")
118
 
119
  with gr.Row():
 
 
 
120
  beam_size_slider = gr.Slider(1, 10, value=5, step=1, label="param: beam_size")
121
  best_of_slider = gr.Slider(1, 10, value=5, step=1, label="param: best_of")
122
 
@@ -139,7 +152,7 @@ with demo:
139
  image = gr.Image(label="wordcloud", show_label=False).style(height=400, width=400)
140
 
141
  text.change(wordcloud_func, inputs=text, outputs=image)
142
-
143
 
144
  if __name__ == '__main__':
145
  demo.launch()
 
1
  import os
 
 
2
  import whisper
3
  import validators
4
  import gradio as gr
 
10
  summarizer = gr.Interface.load("facebook/bart-large-cnn", src='huggingface')
11
 
12
 
13
+ def load_model(name: str):
14
+ """
15
+
16
+ :param name: model options, tiny or base only, for quick inference
17
+ :return:
18
+ """
19
+ asr_model = whisper.load_model(f"{name.lower()}.en")
20
+ return name
21
+
22
+
23
  def audio_from_url(url, dst_dir='data', name=None, format='wav'):
24
  """ Download video from url and save the audio from video
25
 
 
48
  def speech_to_text(audio, beam_size=5, best_of=5, language='en'):
49
  """ ASR inference with Whisper
50
 
51
+ :param audio: filepath
52
+ :param beam_size: beam search parameter
53
+ :param best_of: number of best results
54
  :param language:
55
  :return:
56
  """
 
98
  # demo description
99
  gr.Markdown("""
100
  ## Speech Summarization with Whisper
101
+ This space is intended to summarize a speech, a short one or long one, to save us sometime
102
+ (runs faster with local GPU inference).
103
+
104
  1. Type in a youtube URL or upload an audio file
105
  2. Generate transcription with Whisper (Currently English Only)
106
  3. Summarize the transcribed speech
 
127
  text = gr.Textbox(label="Transcription", placeholder="transcription")
128
 
129
  with gr.Row():
130
+ model_options = gr.Dropdown(['Tiny', 'Base'], value='Base', label="models")
131
+ model_options.change(load_model, inputs=model_options, outputs=model_options)
132
+
133
  beam_size_slider = gr.Slider(1, 10, value=5, step=1, label="param: beam_size")
134
  best_of_slider = gr.Slider(1, 10, value=5, step=1, label="param: best_of")
135
 
 
152
  image = gr.Image(label="wordcloud", show_label=False).style(height=400, width=400)
153
 
154
  text.change(wordcloud_func, inputs=text, outputs=image)
155
+
156
 
157
  if __name__ == '__main__':
158
  demo.launch()