Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

FarhadMadadzade commited on Dec 8, 2023

Commit

e4297a8

•

1 Parent(s): be37f4c

testing only for 30 seconds of video

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gradio as gr
 import time
 from video_downloader import download_video
 from moviepy.editor import AudioFileClip
 import datetime
 import os
 from pydub import AudioSegment
@@ -11,16 +12,17 @@ from pydub.silence import split_on_silence
 pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
-def process_video(date, update_fn):
     # Download the video
     video_path = download_video(date)
-    # Update the output with the video
-    update_fn(video=video_path)
-    # Extract audio from the video
     audio_path = f"audio_{date}.wav"
-    AudioFileClip(video_path).write_audiofile(audio_path)
     # Split the audio into chunks
     audio = AudioSegment.from_wav(audio_path)
@@ -35,13 +37,10 @@ def process_video(date, update_fn):
         transcription += pipe(audio)["text"] + " "
         os.remove(f"chunk{i}.wav")
-        # Update the output with the transcription
-        update_fn(transcription=transcription)
     # Remove the audio file
     os.remove(audio_path)
-    return video_path, transcription
 iface = gr.Interface(
@@ -51,7 +50,6 @@ iface = gr.Interface(
         gr.outputs.Video(),
         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
     ],
-    live=True,
     title="Romanian Transcription Test",
 )

 import time
 from video_downloader import download_video
 from moviepy.editor import AudioFileClip
+from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 import datetime
 import os
 from pydub import AudioSegment
 pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
+def process_video(date):
     # Download the video
     video_path = download_video(date)
+    # Extract the first 30 seconds of the video
+    short_video_path = f"short_{date}.mp4"
+    ffmpeg_extract_subclip(video_path, 0, 30, targetname=short_video_path)
+    # Extract audio from the short video
     audio_path = f"audio_{date}.wav"
+    AudioFileClip(short_video_path).write_audiofile(audio_path)
     # Split the audio into chunks
     audio = AudioSegment.from_wav(audio_path)
         transcription += pipe(audio)["text"] + " "
         os.remove(f"chunk{i}.wav")
     # Remove the audio file
     os.remove(audio_path)
+    return short_video_path, transcription
 iface = gr.Interface(
         gr.outputs.Video(),
         gr.Textbox(lines=1000, max_lines=1000, interactive=True),
     ],
     title="Romanian Transcription Test",
 )