Spaces:
Runtime error
Runtime error
Jiedong Yang
commited on
Commit
β’
7d23a88
1
Parent(s):
72033da
Upload app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
import os
|
2 |
-
import pafy
|
3 |
-
import time
|
4 |
import whisper
|
5 |
import validators
|
6 |
import gradio as gr
|
@@ -12,6 +10,16 @@ asr_model = whisper.load_model('base.en')
|
|
12 |
summarizer = gr.Interface.load("facebook/bart-large-cnn", src='huggingface')
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def audio_from_url(url, dst_dir='data', name=None, format='wav'):
|
16 |
""" Download video from url and save the audio from video
|
17 |
|
@@ -40,9 +48,9 @@ def audio_from_url(url, dst_dir='data', name=None, format='wav'):
|
|
40 |
def speech_to_text(audio, beam_size=5, best_of=5, language='en'):
|
41 |
""" ASR inference with Whisper
|
42 |
|
43 |
-
:param audio:
|
44 |
-
:param beam_size:
|
45 |
-
:param best_of:
|
46 |
:param language:
|
47 |
:return:
|
48 |
"""
|
@@ -90,7 +98,9 @@ with demo:
|
|
90 |
# demo description
|
91 |
gr.Markdown("""
|
92 |
## Speech Summarization with Whisper
|
93 |
-
This space is intended to summarize a speech, a short one or long one, to save us sometime
|
|
|
|
|
94 |
1. Type in a youtube URL or upload an audio file
|
95 |
2. Generate transcription with Whisper (Currently English Only)
|
96 |
3. Summarize the transcribed speech
|
@@ -117,6 +127,9 @@ with demo:
|
|
117 |
text = gr.Textbox(label="Transcription", placeholder="transcription")
|
118 |
|
119 |
with gr.Row():
|
|
|
|
|
|
|
120 |
beam_size_slider = gr.Slider(1, 10, value=5, step=1, label="param: beam_size")
|
121 |
best_of_slider = gr.Slider(1, 10, value=5, step=1, label="param: best_of")
|
122 |
|
@@ -139,7 +152,7 @@ with demo:
|
|
139 |
image = gr.Image(label="wordcloud", show_label=False).style(height=400, width=400)
|
140 |
|
141 |
text.change(wordcloud_func, inputs=text, outputs=image)
|
142 |
-
|
143 |
|
144 |
if __name__ == '__main__':
|
145 |
demo.launch()
|
|
|
1 |
import os
|
|
|
|
|
2 |
import whisper
|
3 |
import validators
|
4 |
import gradio as gr
|
|
|
10 |
summarizer = gr.Interface.load("facebook/bart-large-cnn", src='huggingface')
|
11 |
|
12 |
|
13 |
+
def load_model(name: str):
|
14 |
+
"""
|
15 |
+
|
16 |
+
:param name: model options, tiny or base only, for quick inference
|
17 |
+
:return:
|
18 |
+
"""
|
19 |
+
asr_model = whisper.load_model(f"{name.lower()}.en")
|
20 |
+
return name
|
21 |
+
|
22 |
+
|
23 |
def audio_from_url(url, dst_dir='data', name=None, format='wav'):
|
24 |
""" Download video from url and save the audio from video
|
25 |
|
|
|
48 |
def speech_to_text(audio, beam_size=5, best_of=5, language='en'):
|
49 |
""" ASR inference with Whisper
|
50 |
|
51 |
+
:param audio: filepath
|
52 |
+
:param beam_size: beam search parameter
|
53 |
+
:param best_of: number of best results
|
54 |
:param language:
|
55 |
:return:
|
56 |
"""
|
|
|
98 |
# demo description
|
99 |
gr.Markdown("""
|
100 |
## Speech Summarization with Whisper
|
101 |
+
This space is intended to summarize a speech, a short one or long one, to save us sometime
|
102 |
+
(runs faster with local GPU inference).
|
103 |
+
|
104 |
1. Type in a youtube URL or upload an audio file
|
105 |
2. Generate transcription with Whisper (Currently English Only)
|
106 |
3. Summarize the transcribed speech
|
|
|
127 |
text = gr.Textbox(label="Transcription", placeholder="transcription")
|
128 |
|
129 |
with gr.Row():
|
130 |
+
model_options = gr.Dropdown(['Tiny', 'Base'], value='Base', label="models")
|
131 |
+
model_options.change(load_model, inputs=model_options, outputs=model_options)
|
132 |
+
|
133 |
beam_size_slider = gr.Slider(1, 10, value=5, step=1, label="param: beam_size")
|
134 |
best_of_slider = gr.Slider(1, 10, value=5, step=1, label="param: best_of")
|
135 |
|
|
|
152 |
image = gr.Image(label="wordcloud", show_label=False).style(height=400, width=400)
|
153 |
|
154 |
text.change(wordcloud_func, inputs=text, outputs=image)
|
155 |
+
|
156 |
|
157 |
if __name__ == '__main__':
|
158 |
demo.launch()
|