File size: 1,711 Bytes
fac06d0 de84263 4b85b27 fac06d0 23c3af4 de84263 23c3af4 de84263 4b85b27 de84263 23c3af4 de84263 fac06d0 534a7d7 de84263 e5f8741 306a78c 536f3e9 306a78c 23c3af4 534a7d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
from transformers import pipeline
import gradio as gr
import time
from video_downloader import download_video
from moviepy.editor import AudioFileClip
import datetime
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
def process_video(date, live):
# Parse the date to the format yyyymmdd
date = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")
# Download the video
video_path = download_video(date)
# Send the video path to the live output
live.update(video_path)
# Extract audio from the video
audio_path = f"audio_{date}.wav"
AudioFileClip(video_path).write_audiofile(audio_path)
# Split the audio into chunks
audio = AudioSegment.from_wav(audio_path)
chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
# Transcribe each chunk
transcription = ""
for i, chunk in enumerate(chunks):
chunk.export(f"chunk{i}.wav", format="wav")
with open(f"chunk{i}.wav", "rb") as audio_file:
audio = audio_file.read()
transcription += pipe(audio)["text"] + "\n "
os.remove(f"chunk{i}.wav")
# Update the live output with the current transcription
live.update(video_path, transcription)
# Remove the audio file
os.remove(audio_path)
iface = gr.Interface(
fn=process_video,
inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
outputs=[
gr.outputs.Video(),
gr.Textbox(lines=1000, max_lines=1000, interactive=True),
],
live=True,
title="Romanian Transcription Test",
)
iface.launch()
|