yellowcandle commited on
Commit
0bbcfe0
1 Parent(s): 634e161

Added audio transcription and proofreading functionality using Gradio and Hugging Face Transformers

Browse files

- Implemented `transcribe_audio` function to transcribe audio files using a specified model
- Implemented `proofread` function to proofread transcribed text using a specified model
- Created a Gradio interface to upload audio files, select models, and display transcribed and proofread text
- Integrated GPU support for faster processing

Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -2,7 +2,7 @@ import spaces
2
  import gradio as gr
3
  # Use a pipeline as a high-level helper
4
  import torch
5
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM
6
 
7
  @spaces.GPU(duration=120)
8
  def transcribe_audio(audio, model_id):
@@ -51,7 +51,8 @@ def proofread(prompt, text):
51
  model.to(device)
52
 
53
  # Perform proofreading using the model
54
- input_ids = tokenizer.encode(text, return_tensors="pt").to(device)
 
55
  output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
56
  proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
57
 
@@ -59,8 +60,12 @@ def proofread(prompt, text):
59
 
60
 
61
  with gr.Blocks() as demo:
62
- gr.Markdown("# Audio Transcription and Proofreading")
63
- gr.Markdown("Upload an audio file, select a model for transcription, and then proofread the transcribed text.")
 
 
 
 
64
 
65
  with gr.Row():
66
  audio = gr.Audio(sources="upload", type="filepath")
@@ -73,7 +78,7 @@ with gr.Blocks() as demo:
73
  proofread_output = gr.Textbox(label="Proofread Text")
74
 
75
  transcribe_button.click(transcribe_audio, inputs=[audio, model_dropdown], outputs=transcribed_text)
76
- proofread_button.click(proofread, inputs=transcribed_text, outputs=proofread_output)
77
- transcribed_text.change(proofread, inputs=transcribed_text, outputs=proofread_output)
78
 
79
  demo.launch()
 
2
  import gradio as gr
3
  # Use a pipeline as a high-level helper
4
  import torch
5
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
6
 
7
  @spaces.GPU(duration=120)
8
  def transcribe_audio(audio, model_id):
 
51
  model.to(device)
52
 
53
  # Perform proofreading using the model
54
+ input_text = prompt + text
55
+ input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
56
  output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
57
  proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
58
 
 
60
 
61
 
62
  with gr.Blocks() as demo:
63
+ gr.Markdown("""
64
+ # Audio Transcription and Proofreading
65
+ 1. Upload an audio file (Wait for the file to be fully loaded first)
66
+ 2. Select a model for transcription
67
+ 3. Proofread the transcribed text
68
+ """)
69
 
70
  with gr.Row():
71
  audio = gr.Audio(sources="upload", type="filepath")
 
78
  proofread_output = gr.Textbox(label="Proofread Text")
79
 
80
  transcribe_button.click(transcribe_audio, inputs=[audio, model_dropdown], outputs=transcribed_text)
81
+ proofread_button.click(proofread, inputs=[transcribed_text], outputs=proofread_output)
82
+ transcribed_text.change(proofread, inputs=["", transcribed_text], outputs=proofread_output)
83
 
84
  demo.launch()