Spaces:

jiedong-yang
/

Speech-Summarization-with-Whisper

Runtime error

App Files Files Community

Jiedong Yang commited on Oct 25, 2022

Commit

7d23a88

•

1 Parent(s): 72033da

Upload app.py

Browse files

Files changed (1) hide show

app.py +20 -7

app.py CHANGED Viewed

@@ -1,6 +1,4 @@
 import os
-import pafy
-import time
 import whisper
 import validators
 import gradio as gr
@@ -12,6 +10,16 @@ asr_model = whisper.load_model('base.en')
 summarizer = gr.Interface.load("facebook/bart-large-cnn", src='huggingface')
 def audio_from_url(url, dst_dir='data', name=None, format='wav'):
     """ Download video from url and save the audio from video
@@ -40,9 +48,9 @@ def audio_from_url(url, dst_dir='data', name=None, format='wav'):
 def speech_to_text(audio, beam_size=5, best_of=5, language='en'):
     """ ASR inference with Whisper
-    :param audio:
-    :param beam_size:
-    :param best_of:
     :param language:
     :return:
     """
@@ -90,7 +98,9 @@ with demo:
     # demo description
     gr.Markdown("""
     ## Speech Summarization with Whisper
-    This space is intended to summarize a speech, a short one or long one, to save us sometime.
     1. Type in a youtube URL or upload an audio file
     2. Generate transcription with Whisper (Currently English Only)
     3. Summarize the transcribed speech
@@ -117,6 +127,9 @@ with demo:
     text = gr.Textbox(label="Transcription", placeholder="transcription")
     with gr.Row():
         beam_size_slider = gr.Slider(1, 10, value=5, step=1, label="param: beam_size")
         best_of_slider = gr.Slider(1, 10, value=5, step=1, label="param: best_of")
@@ -139,7 +152,7 @@ with demo:
     image = gr.Image(label="wordcloud", show_label=False).style(height=400, width=400)
     text.change(wordcloud_func, inputs=text, outputs=image)
 if __name__ == '__main__':
     demo.launch()

 import os
 import whisper
 import validators
 import gradio as gr
 summarizer = gr.Interface.load("facebook/bart-large-cnn", src='huggingface')
+def load_model(name: str):
+    """
+    :param name: model options, tiny or base only, for quick inference
+    :return:
+    """
+    asr_model = whisper.load_model(f"{name.lower()}.en")
+    return name
 def audio_from_url(url, dst_dir='data', name=None, format='wav'):
     """ Download video from url and save the audio from video
 def speech_to_text(audio, beam_size=5, best_of=5, language='en'):
     """ ASR inference with Whisper
+    :param audio: filepath
+    :param beam_size: beam search parameter
+    :param best_of: number of best results
     :param language:
     :return:
     """
     # demo description
     gr.Markdown("""
     ## Speech Summarization with Whisper
+    This space is intended to summarize a speech, a short one or long one, to save us sometime
+    (runs faster with local GPU inference).
     1. Type in a youtube URL or upload an audio file
     2. Generate transcription with Whisper (Currently English Only)
     3. Summarize the transcribed speech
     text = gr.Textbox(label="Transcription", placeholder="transcription")
     with gr.Row():
+        model_options = gr.Dropdown(['Tiny', 'Base'], value='Base', label="models")
+        model_options.change(load_model, inputs=model_options, outputs=model_options)
         beam_size_slider = gr.Slider(1, 10, value=5, step=1, label="param: beam_size")
         best_of_slider = gr.Slider(1, 10, value=5, step=1, label="param: best_of")
     image = gr.Image(label="wordcloud", show_label=False).style(height=400, width=400)
     text.change(wordcloud_func, inputs=text, outputs=image)
 if __name__ == '__main__':
     demo.launch()