Spaces:

adriszmar
/

whisper-large-v3-turbo-vs-base-model

Running on Zero

App Files Files Community

adrian-saez-martinez commited on 24 days ago

Commit

f11f43f

•

1 Parent(s): e2c56f2

app.py added

Browse files

Files changed (1) hide show

app.py +93 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import gradio as gr
+from transformers import pipeline
+import concurrent.futures
+import time
+# Load both models
+MODEL_NAME_TURBO = "openai/whisper-large-v3-turbo"
+MODEL_NAME_STANDARD = "openai/whisper-large-v3"
+device = 0 if torch.cuda.is_available() else "cpu"
+# Set up the pipeline for both models
+pipe_turbo = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME_TURBO,
+    chunk_length_s=30,
+    device=device,
+)
+pipe_standard = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME_STANDARD,
+    chunk_length_s=30,
+    device=device,
+)
+# Function to transcribe audio using the turbo model
+def transcribe_turbo(audio):
+    start_time = time.time()
+    text_turbo = pipe_turbo(audio)["text"]
+    elapsed_time = time.time() - start_time
+    return text_turbo, elapsed_time
+# Function to transcribe audio using the standard model
+def transcribe_standard(audio):
+    start_time = time.time()
+    text_standard = pipe_standard(audio)["text"]
+    elapsed_time = time.time() - start_time
+    return text_standard, elapsed_time
+# Function to compare transcriptions and speed
+def compare_transcriptions(audio):
+    if audio is None:
+        raise gr.Error("No audio file submitted! Please record an audio before submitting your request.")
+    # Run both transcriptions in parallel
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        future_turbo = executor.submit(transcribe_turbo, audio)
+        future_standard = executor.submit(transcribe_standard, audio)
+        # Get the results
+        text_turbo, time_turbo = future_turbo.result()
+        text_standard, time_standard = future_standard.result()
+    # Return both transcriptions and processing times
+    return (text_standard, f"{time_standard:.2f} seconds"), (text_turbo, f"{time_turbo:.2f} seconds")
+css = """
+h1 {
+    text-align: center;
+    display:block;
+}
+"""
+# Gradio Interface
+with gr.Blocks(css=css) as demo:
+    # Title and description
+    gr.Markdown("# Whisper large-v3-turbo ...vs... Whisper large-v3")
+    gr.Markdown("This app compares the transcription performance and processing time between openAI 'Whisper large-v3' and 'Whisper large-v3-turbo' models")
+    with gr.Column():
+        with gr.Row():
+            with gr.Group():
+                audio_input = gr.Audio(sources=["microphone"], type="filepath")
+                transcribe_button = gr.Button("Start transcription", variant="primary")
+        with gr.Row():
+            with gr.Row():
+               with gr.Group():
+                  gr.Markdown("### 📝 **Standard model**")
+                  standard_output = gr.Textbox(label="Transcription")
+                  standard_time = gr.Textbox(label="Processing Time")
+               with gr.Group():
+                 gr.Markdown("### ⚡ **Turbo model**")
+                 turbo_output = gr.Textbox(label="Transcription")
+                 turbo_time = gr.Textbox(label="Processing Time")
+    # Set up the interaction
+    transcribe_button.click(fn=compare_transcriptions, inputs=audio_input, outputs=[standard_output, standard_time, turbo_output, turbo_time])
+# Launch the demo
+demo.launch()