Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import pytube | |
from pytube import YouTube | |
from pprint import pprint | |
from moviepy.editor import VideoFileClip | |
from transformers import pipeline | |
import librosa | |
# Load the Whisper model from Hugging Face | |
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base") | |
def download_video_mp4(youtube_url): | |
try: | |
# Create a youtube object | |
yt = YouTube(youtube_url) | |
# Get the video with the highest resolution and file size | |
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() | |
# Download the video to the current working directory | |
video_filename = video.download() | |
print('Video downloaded') | |
return video_filename, "" | |
except Exception as e: | |
return "", str(e) | |
def create_audio_file(video_filename): | |
try: | |
# Use moviepy to extract the audio track from the video and create an .mp3 audio file | |
audio_filename = video_filename.replace(".mp4", ".mp3") | |
video = VideoFileClip(video_filename) | |
audio = video.audio | |
audio.write_audiofile(audio_filename) | |
return audio_filename, "" | |
except Exception as e: | |
return "", str(e) | |
def transcribe(audio_path): | |
try: | |
# Load the audio file and convert it to a numpy array | |
audio, _ = librosa.load(audio_path, sr=16000) | |
# Transcribe the audio using the Whisper model | |
transcript = transcriber(audio) | |
return transcript["text"], "" | |
except Exception as e: | |
return "", str(e) | |
def process_youtube_url(youtube_url): | |
video_filename, download_error = download_video_mp4(youtube_url) | |
if not video_filename: | |
return "", download_error | |
audio_filename, audio_error = create_audio_file(video_filename) | |
if not audio_filename: | |
return "", audio_error | |
yt_text, transcribe_error = transcribe(audio_filename) | |
if not yt_text: | |
return "", transcribe_error | |
return yt_text, "" | |
iface = gr.Interface( | |
fn=process_youtube_url, | |
inputs=gr.Textbox(label="YouTube URL"), | |
outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Errors")], | |
title="YouTube Video Transcription", | |
description="Enter a YouTube video URL to transcribe the audio using the Whisper model from Hugging Face." | |
) | |
iface.launch() |