Spaces:
Sleeping
Sleeping
yellowcandle
commited on
Commit
•
6e40332
1
Parent(s):
e648c2d
feat: Add audio transcription and proofreading functionality
Browse files- Implement audio transcription using Whisper models
- Add proofreading feature using LLaMA-3-Chinese-8B-Instruct-v3 model
- Create Gradio interface for uploading audio, selecting models, and displaying results
app.py
CHANGED
@@ -2,8 +2,7 @@ import spaces
|
|
2 |
import gradio as gr
|
3 |
# Use a pipeline as a high-level helper
|
4 |
import torch
|
5 |
-
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
6 |
-
# from datasets import load_dataset
|
7 |
|
8 |
@spaces.GPU(duration=120)
|
9 |
def transcribe_audio(audio, model_id):
|
@@ -54,18 +53,21 @@ def proofread(text):
|
|
54 |
return proofread_text
|
55 |
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
"
|
63 |
-
|
64 |
-
"text",
|
65 |
-
allow_flagging="never",
|
66 |
-
title="Audio Transcription and Proofreading",
|
67 |
-
description="Upload an audio file, select a model for transcription, and then proofread the transcribed text.",
|
68 |
-
)
|
69 |
-
demo.launch()
|
70 |
|
|
|
|
|
|
|
|
|
|
|
71 |
|
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
# Use a pipeline as a high-level helper
|
4 |
import torch
|
5 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM
|
|
|
6 |
|
7 |
@spaces.GPU(duration=120)
|
8 |
def transcribe_audio(audio, model_id):
|
|
|
53 |
return proofread_text
|
54 |
|
55 |
|
56 |
+
with gr.Blocks() as demo:
|
57 |
+
gr.Markdown("# Audio Transcription and Proofreading")
|
58 |
+
gr.Markdown("Upload an audio file, select a model for transcription, and then proofread the transcribed text.")
|
59 |
+
|
60 |
+
with gr.Row():
|
61 |
+
audio = gr.Audio(source="upload", type="filepath")
|
62 |
+
model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"])
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
transcribe_button = gr.Button("Transcribe")
|
65 |
+
transcribed_text = gr.Textbox(label="Transcribed Text")
|
66 |
+
|
67 |
+
proofread_button = gr.Button("Proofread")
|
68 |
+
proofread_output = gr.Textbox(label="Proofread Text")
|
69 |
|
70 |
+
transcribe_button.click(transcribe_audio, inputs=[audio, model_dropdown], outputs=transcribed_text)
|
71 |
+
proofread_button.click(proofread, inputs=transcribed_text, outputs=proofread_output)
|
72 |
+
|
73 |
+
demo.launch()
|