adrian-saez-martinez commited on
Commit
f11f43f
β€’
1 Parent(s): e2c56f2

app.py added

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+ import concurrent.futures
5
+ import time
6
+
7
+ # Load both models
8
+ MODEL_NAME_TURBO = "openai/whisper-large-v3-turbo"
9
+ MODEL_NAME_STANDARD = "openai/whisper-large-v3"
10
+
11
+ device = 0 if torch.cuda.is_available() else "cpu"
12
+
13
+ # Set up the pipeline for both models
14
+ pipe_turbo = pipeline(
15
+ task="automatic-speech-recognition",
16
+ model=MODEL_NAME_TURBO,
17
+ chunk_length_s=30,
18
+ device=device,
19
+ )
20
+
21
+ pipe_standard = pipeline(
22
+ task="automatic-speech-recognition",
23
+ model=MODEL_NAME_STANDARD,
24
+ chunk_length_s=30,
25
+ device=device,
26
+ )
27
+
28
+ # Function to transcribe audio using the turbo model
29
+ def transcribe_turbo(audio):
30
+ start_time = time.time()
31
+ text_turbo = pipe_turbo(audio)["text"]
32
+ elapsed_time = time.time() - start_time
33
+ return text_turbo, elapsed_time
34
+
35
+ # Function to transcribe audio using the standard model
36
+ def transcribe_standard(audio):
37
+ start_time = time.time()
38
+ text_standard = pipe_standard(audio)["text"]
39
+ elapsed_time = time.time() - start_time
40
+ return text_standard, elapsed_time
41
+
42
+ # Function to compare transcriptions and speed
43
+ def compare_transcriptions(audio):
44
+ if audio is None:
45
+ raise gr.Error("No audio file submitted! Please record an audio before submitting your request.")
46
+
47
+ # Run both transcriptions in parallel
48
+ with concurrent.futures.ThreadPoolExecutor() as executor:
49
+ future_turbo = executor.submit(transcribe_turbo, audio)
50
+ future_standard = executor.submit(transcribe_standard, audio)
51
+
52
+ # Get the results
53
+ text_turbo, time_turbo = future_turbo.result()
54
+ text_standard, time_standard = future_standard.result()
55
+
56
+ # Return both transcriptions and processing times
57
+ return (text_standard, f"{time_standard:.2f} seconds"), (text_turbo, f"{time_turbo:.2f} seconds")
58
+
59
+ css = """
60
+ h1 {
61
+ text-align: center;
62
+ display:block;
63
+ }
64
+ """
65
+
66
+ # Gradio Interface
67
+ with gr.Blocks(css=css) as demo:
68
+ # Title and description
69
+ gr.Markdown("# Whisper large-v3-turbo ...vs... Whisper large-v3")
70
+ gr.Markdown("This app compares the transcription performance and processing time between openAI 'Whisper large-v3' and 'Whisper large-v3-turbo' models")
71
+
72
+ with gr.Column():
73
+ with gr.Row():
74
+ with gr.Group():
75
+ audio_input = gr.Audio(sources=["microphone"], type="filepath")
76
+ transcribe_button = gr.Button("Start transcription", variant="primary")
77
+
78
+ with gr.Row():
79
+ with gr.Row():
80
+ with gr.Group():
81
+ gr.Markdown("### πŸ“ **Standard model**")
82
+ standard_output = gr.Textbox(label="Transcription")
83
+ standard_time = gr.Textbox(label="Processing Time")
84
+ with gr.Group():
85
+ gr.Markdown("### ⚑ **Turbo model**")
86
+ turbo_output = gr.Textbox(label="Transcription")
87
+ turbo_time = gr.Textbox(label="Processing Time")
88
+
89
+ # Set up the interaction
90
+ transcribe_button.click(fn=compare_transcriptions, inputs=audio_input, outputs=[standard_output, standard_time, turbo_output, turbo_time])
91
+
92
+ # Launch the demo
93
+ demo.launch()