File size: 2,881 Bytes
fac06d0 1936f1e de84263 e4297a8 de84263 4b85b27 fac06d0 8dffbd8 fac06d0 8dffbd8 de84263 1936f1e 8dffbd8 1936f1e de84263 4b85b27 292ce47 4b85b27 de84263 1936f1e 8dffbd8 292ce47 1936f1e c031f24 292ce47 c031f24 1936f1e 292ce47 1936f1e 292ce47 1936f1e 165b80a 1936f1e be37f4c 292ce47 8dffbd8 292ce47 8dffbd8 292ce47 fac06d0 534a7d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
from transformers import pipeline
import gradio as gr
import time
from video_downloader import download_video, download_video1
from moviepy.editor import AudioFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import datetime
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedish")
def process_video1(date):
video_path = download_video1(date)
# Extract audio from the video
audio_path = f"audio_{date}.wav"
AudioFileClip(video_path).write_audiofile(audio_path)
# Split the audio into chunks
audio = AudioSegment.from_wav(audio_path)
chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
# Transcribe each chunk
transcription = ""
for i, chunk in enumerate(chunks):
chunk.export(f"chunk{i}.wav", format="wav")
with open(f"chunk{i}.wav", "rb") as audio_file:
audio = audio_file.read()
transcription += pipe(audio)["text"] + "\n "
os.remove(f"chunk{i}.wav")
# Remove the audio file
os.remove(audio_path)
return video_path, transcription
iface = gr.Interface(
fn=process_video1,
inputs=[
gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
],
outputs=[
gr.outputs.Video(),
gr.Textbox(lines=1000, max_lines=1000, interactive=True),
],
title="Transcribe Swedish Parliament Decisions",
desription="This app transcribes the top Swedish Parliament decision video from the given date.",
)
def process_video(date):
# Download the video
video_path = download_video(date)
# Extract the first 30 seconds of the video
short_video_path = f"short_{date}.mp4"
ffmpeg_extract_subclip(video_path, 0, 30, targetname=short_video_path)
# Extract audio from the short video
audio_path = f"audio_{date}.wav"
AudioFileClip(short_video_path).write_audiofile(audio_path)
# Split the audio into chunks
audio = AudioSegment.from_wav(audio_path)
chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
# Transcribe each chunk
transcription = ""
for i, chunk in enumerate(chunks):
chunk.export(f"chunk{i}.wav", format="wav")
with open(f"chunk{i}.wav", "rb") as audio_file:
audio = audio_file.read()
transcription += pipe(audio)["text"] + " "
os.remove(f"chunk{i}.wav")
# Remove the audio file
os.remove(audio_path)
return short_video_path, transcription
# iface = gr.Interface(
# fn=process_video,
# inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
# outputs=[
# gr.outputs.Video(),
# gr.Textbox(lines=1000, max_lines=1000, interactive=True),
# ],
# title="Romanian Transcription Test",
# )
iface.launch()
|