import gradio as gr import os import pytube from pytube import YouTube from pprint import pprint from moviepy.editor import VideoFileClip from transformers import pipeline import librosa # Load the Whisper model from Hugging Face transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base") def download_video_mp4(youtube_url): try: # Create a youtube object yt = YouTube(youtube_url) # Get the video with the highest resolution and file size video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() # Download the video to the current working directory video_filename = video.download() print('Video downloaded') return video_filename, "" except Exception as e: return "", str(e) def create_audio_file(video_filename): try: # Use moviepy to extract the audio track from the video and create an .mp3 audio file audio_filename = video_filename.replace(".mp4", ".mp3") video = VideoFileClip(video_filename) audio = video.audio audio.write_audiofile(audio_filename) return audio_filename, "" except Exception as e: return "", str(e) def transcribe(audio_path): try: # Load the audio file and convert it to a numpy array audio, _ = librosa.load(audio_path, sr=16000) # Transcribe the audio using the Whisper model transcript = transcriber(audio) return transcript["text"], "" except Exception as e: return "", str(e) def process_youtube_url(youtube_url): video_filename, download_error = download_video_mp4(youtube_url) if not video_filename: return "", download_error audio_filename, audio_error = create_audio_file(video_filename) if not audio_filename: return "", audio_error yt_text, transcribe_error = transcribe(audio_filename) if not yt_text: return "", transcribe_error return yt_text, "" iface = gr.Interface( fn=process_youtube_url, inputs=gr.Textbox(label="YouTube URL"), outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Errors")], title="YouTube Video Transcription", description="Enter a YouTube video URL to transcribe the audio using the Whisper model from Hugging Face." ) iface.launch()