import gradio as gr from transformers import pipeline wav2_ft = pipeline("automatic-speech-recognition",model='sanchit-gandhi/wav2vec2-large-tedlium',device=device,trust_remote_code=True) app = gr.Blocks() def inference(path): out = wav2_ft( path, max_new_tokens=256, chunk_length_s=30, batch_size=8, ) return out['text'] mic_mode = gr.Interface( fn=inference, inputs=gr.Audio(sources="microphone", type='filepath', label="Record Your Lecture"), outputs=gr.Textbox(label="Transcription Output"), title="πŸŽ™οΈ Live Lecture Transcription", description="Record through your mic. When you're done, hit stop and wait a moment. Feel free to trim the recording. Then, hit Submit!", examples=[], ) upload_mode = gr.Interface( fn=inference, inputs=gr.Audio(sources="upload", type='filepath', label="Upload Your Lecture Recording"), outputs=gr.Textbox(label="Transcription Output"), title="πŸ“‚ Lecture Recording Transcription", description="Have a recorded lecture? Upload the audio file here, and it'll be transcribed in seconds!", ) with app: gr.Markdown( """ # Lecture Transcription πŸ“ Welcome to **Lecture Transcription**, the go-to tool for transcribing lectures accurately. Whether you’re attending a live lecture or revisiting a recorded one, this app will ensure you don’t miss a single detail. ## How It Works - **Recording Mode:** Record the lecture as it happens. When you stop, your transcription will be generated. - **Upload Mode:** Upload your pre-recorded lecture audio files, and receive a precise transcription. Supports various audio formats including WAV, MP3, and more. ## Optimized for Technical Oration Under the hood, this is a Wav2Vec2 model fine-tuned on the TED-Lium dataset. It's well-versed for accurately transcribing technical speech. **Never miss a word with Lecture Transcription!** """ ) # Add a Tabbed Interface for different modes gr.TabbedInterface( [mic_mode, upload_mode], ["πŸŽ™οΈ Record & Transcribe", "πŸ“‚ Upload & Transcribe"] ) # Launch the app app.launch()