Spaces:

yellowcandle
/

whisper-v3-gradio

Sleeping

App Files Files Community

yellowcandle commited on Jun 18

Commit

6e40332

•

1 Parent(s): e648c2d

feat: Add audio transcription and proofreading functionality

Browse files

- Implement audio transcription using Whisper models
- Add proofreading feature using LLaMA-3-Chinese-8B-Instruct-v3 model
- Create Gradio interface for uploading audio, selecting models, and displaying results

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -2,8 +2,7 @@ import spaces
 import gradio as gr
 # Use a pipeline as a high-level helper
 import torch
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-# from datasets import load_dataset
 @spaces.GPU(duration=120)
 def transcribe_audio(audio, model_id):
@@ -54,18 +53,21 @@ def proofread(text):
     return proofread_text
-demo = gr.Interface(
-    [transcribe_audio, proofread],
-    [
-        gr.Audio(sources="upload", type="filepath"),
-        gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"]),
-        "text"
-    ],
-    "text",
-    allow_flagging="never",
-    title="Audio Transcription and Proofreading",
-    description="Upload an audio file, select a model for transcription, and then proofread the transcribed text.",
-)
-demo.launch()

 import gradio as gr
 # Use a pipeline as a high-level helper
 import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM
 @spaces.GPU(duration=120)
 def transcribe_audio(audio, model_id):
     return proofread_text
+with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcription and Proofreading")
+    gr.Markdown("Upload an audio file, select a model for transcription, and then proofread the transcribed text.")
+    with gr.Row():
+        audio = gr.Audio(source="upload", type="filepath")
+        model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"])
+    transcribe_button = gr.Button("Transcribe")
+    transcribed_text = gr.Textbox(label="Transcribed Text")
+    proofread_button = gr.Button("Proofread")
+    proofread_output = gr.Textbox(label="Proofread Text")
+    transcribe_button.click(transcribe_audio, inputs=[audio, model_dropdown], outputs=transcribed_text)
+    proofread_button.click(proofread, inputs=transcribed_text, outputs=proofread_output)
+demo.launch()