from transformers import pipeline import gradio as gr import time from video_downloader import download_video, download_video1 from moviepy.editor import AudioFileClip from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip import datetime import os from pydub import AudioSegment from pydub.silence import split_on_silence pipe = pipeline("automatic-speech-recognition", model="Sleepyp00/whisper-small-Swedish") def process_video(from_date, to_date): video_path = download_video1(from_date, to_date) # Extract audio from the video audio_path = f"audio_{from_date}_{to_date}.wav" AudioFileClip(video_path).write_audiofile(audio_path) # Split the audio into chunks audio = AudioSegment.from_wav(audio_path) chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40) # Transcribe each chunk transcription = "" for i, chunk in enumerate(chunks): chunk.export(f"chunk{i}.wav", format="wav") with open(f"chunk{i}.wav", "rb") as audio_file: audio = audio_file.read() transcription += pipe(audio)["text"] + "\n\n" os.remove(f"chunk{i}.wav") # Remove the audio file os.remove(audio_path) return video_path, transcription # def process_video(date): # # Download the video # video_path = download_video(date) # # Extract audio from the video # audio_path = f"audio_{date}.wav" # AudioFileClip(video_path).write_audiofile(audio_path) # # Split the audio into chunks # audio = AudioSegment.from_wav(audio_path) # chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40) # # Transcribe each chunk # transcription = "" # for i, chunk in enumerate(chunks): # chunk.export(f"chunk{i}.wav", format="wav") # with open(f"chunk{i}.wav", "rb") as audio_file: # audio = audio_file.read() # transcription += pipe(audio)["text"] + " " # os.remove(f"chunk{i}.wav") # # Remove the audio file # os.remove(audio_path) # return video_path, transcription iface = gr.Interface( fn=process_video, # inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"), inputs=[ gr.inputs.Textbox(label="From date with format YYYY-MM-DD"), gr.inputs.Textbox(label="Date with format YYYY-MM-DD"), ], outputs=[ gr.outputs.Video(), gr.Textbox(lines=1000, max_lines=1000, interactive=True), ], live=True, title="Romanian Transcription Test", ) iface.launch()