import gradio as gr import whisper from pytube import YouTube from fastapi import FastAPI, Response, Request import yt_dlp import uvicorn import re import os from typing import Optional CUSTOM_PATH = "/gradio" app = FastAPI() langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values())) model_size = list(whisper._MODELS.keys()) @app.get("/test") def read_main(): return {"message": "This is your main app"} #async def get_subtitle(url: str): # Download the subtitle with download_subtitle() #subtitle_url = download_subtitle(url) # Stream the subtitle as a response #return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024)) def download_subtitle(url: str, lang: Optional[str] = None) -> Optional[str]: ydl_opts = { "writesubtitles": True, "allsubtitles": True, "subtitleslangs": [lang] if lang else [], "skip_download": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=False) if info_dict.get("subtitles"): # get first available subtitle subtitle_url = info_dict["subtitles"][0]["url"] with ydl.urlopen(subtitle_url) as subtitle: return subtitle.read().decode() return None def get_subtitle(url, lang='en'): if lang is None: lang = 'en' # Download subtitles if available ydl_opts = { 'writesubtitles': True, 'outtmpl': '%(id)s.%(ext)s', 'subtitleslangs': [lang], 'skip_download': True, } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=True) video_id = info_dict.get("id", None) if video_id is None: return None subtitle_file = f"{video_id}.{lang}.vtt" with open(subtitle_file, 'r') as f: subtitle_content = f.read() subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content) return subtitle_content except error: print(error) return None return None def download_audio(video_url, quality: str = '128', speed: float = None): ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': '%(title)s.%(ext)s', 'quiet': True, } if speed: ydl_opts["postprocessors"] = [{ "key": "FFmpegExtractAudio", "preferredcodec": "mp3", "preferredquality": quality, "addopts": f"-filter:a \"atempo={speed}\"", }] with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([video_url]) audio_file = ydl.prepare_filename(ydl.extract_info(video_url, download=False)) print('audio_file', audio_file) return audio_file def get_audio(url): yt = YouTube(url) return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4") def get_transcript(url, model_size, lang, format): if lang == "None": lang = None subtitle = download_subtitle(url, lang) print(subtitle) if subtitle: return subtitle model = whisper.load_model(model_size) result = model.transcribe(download_audio(url), fp16=False, language=lang) if format == "None": return result["text"] elif format == ".srt": return format_to_srt(result["segments"]) def format_to_srt(segments): output = "" for i, segment in enumerate(segments): output += f"{i + 1}\n" output += f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n" output += f"{segment['text']}\n\n" return output def format_timestamp(t): hh = t//3600 mm = (t - hh*3600)//60 ss = t - hh*3600 - mm*60 mi = (t - int(t))*1000 return f"{int(hh):02d}:{int(mm):02d}:{int(ss):02d},{int(mi):03d}" with gr.Blocks() as demo: with gr.Row(): with gr.Column(): with gr.Row(): url = gr.Textbox(placeholder='Youtube video URL', label='URL') with gr.Row(): model_size = gr.Dropdown(choices=model_size, value='tiny', label="Model") lang = gr.Dropdown(choices=langs, value="None", label="Language (Optional)") format = gr.Dropdown(choices=["None", ".srt"], value="None", label="Timestamps? (Optional)") with gr.Row(): gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.") transcribe_btn = gr.Button('Transcribe') with gr.Column(): outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription') transcribe_btn.click(get_transcript, inputs=[url, model_size, lang, format], outputs=outputs) demo.launch(debug=True) #io = gr.Interface(gradio_interface) #app = gr.mount_gradio_app(app, io, path=CUSTOM_PATH) uvicorn.run(app, host="0.0.0.0", port=7860)