Patraskon
Add application file
e4f9d2f
import gradio as gr
import os
import pytube
from pytube import YouTube
from pprint import pprint
from moviepy.editor import VideoFileClip
from transformers import pipeline
import librosa
# Load the Whisper model from Hugging Face
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base")
def download_video_mp4(youtube_url):
try:
# Create a youtube object
yt = YouTube(youtube_url)
# Get the video with the highest resolution and file size
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
# Download the video to the current working directory
video_filename = video.download()
print('Video downloaded')
return video_filename, ""
except Exception as e:
return "", str(e)
def create_audio_file(video_filename):
try:
# Use moviepy to extract the audio track from the video and create an .mp3 audio file
audio_filename = video_filename.replace(".mp4", ".mp3")
video = VideoFileClip(video_filename)
audio = video.audio
audio.write_audiofile(audio_filename)
return audio_filename, ""
except Exception as e:
return "", str(e)
def transcribe(audio_path):
try:
# Load the audio file and convert it to a numpy array
audio, _ = librosa.load(audio_path, sr=16000)
# Transcribe the audio using the Whisper model
transcript = transcriber(audio)
return transcript["text"], ""
except Exception as e:
return "", str(e)
def process_youtube_url(youtube_url):
video_filename, download_error = download_video_mp4(youtube_url)
if not video_filename:
return "", download_error
audio_filename, audio_error = create_audio_file(video_filename)
if not audio_filename:
return "", audio_error
yt_text, transcribe_error = transcribe(audio_filename)
if not yt_text:
return "", transcribe_error
return yt_text, ""
iface = gr.Interface(
fn=process_youtube_url,
inputs=gr.Textbox(label="YouTube URL"),
outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Errors")],
title="YouTube Video Transcription",
description="Enter a YouTube video URL to transcribe the audio using the Whisper model from Hugging Face."
)
iface.launch()