import os os.system("pip install git+https://github.com/openai/whisper.git") import whisper from pytube import YouTube import gradio as gr infer_model = whisper.load_model("tiny") def infer(link: str, add_timestamps: bool) -> str: audio_path = download_audio(link) if not audio_path: return "Unable to process request." result = infer_model.transcribe(audio_path) title = "Content" try: title = audio_path.split("/")[-1] title = title.split(".")[0] except Exception as e: print(f"Unable to extract title. Exception {e}") if not add_timestamps: print(result["text"]) return title + "\n" + result["text"] result_text = title + "\n" for segment in result["segments"]: result_text += f"{float(segment['start']):.2f}s - {float(segment['end']):.2f}s : {segment['text']}\n" return result_text.strip("\n") def download_audio(link: str) -> str: try: yt = YouTube(link) stream = yt.streams.get_audio_only() audio_path = stream.download() print(audio_path) return audio_path except Exception as e: print(f"Unable to download file. Exception {e}") return "" demo = gr.Interface( fn=infer, inputs=[gr.Textbox(label = "Youtube Link", placeholder="Copy link here"), gr.Checkbox(value=True, label="Add timestamps?")], outputs=[gr.Textbox(label = "Transcription", placeholder=" Expected processing time ~ Half the length of video. Hang tight!!")], examples=[ ["https://www.youtube.com/watch?v=KL2T0XRzWUI", False], ["https://www.youtube.com/watch?v=yGB_K_xlHdI", False], ["https://www.youtube.com/watch?v=dv9sgFHS2Do", True],] ) demo.launch()