FarhadMadadzade commited on
Commit
e4297a8
1 Parent(s): be37f4c

testing only for 30 seconds of video

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  import time
4
  from video_downloader import download_video
5
  from moviepy.editor import AudioFileClip
 
6
  import datetime
7
  import os
8
  from pydub import AudioSegment
@@ -11,16 +12,17 @@ from pydub.silence import split_on_silence
11
  pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
12
 
13
 
14
- def process_video(date, update_fn):
15
  # Download the video
16
  video_path = download_video(date)
17
 
18
- # Update the output with the video
19
- update_fn(video=video_path)
 
20
 
21
- # Extract audio from the video
22
  audio_path = f"audio_{date}.wav"
23
- AudioFileClip(video_path).write_audiofile(audio_path)
24
 
25
  # Split the audio into chunks
26
  audio = AudioSegment.from_wav(audio_path)
@@ -35,13 +37,10 @@ def process_video(date, update_fn):
35
  transcription += pipe(audio)["text"] + " "
36
  os.remove(f"chunk{i}.wav")
37
 
38
- # Update the output with the transcription
39
- update_fn(transcription=transcription)
40
-
41
  # Remove the audio file
42
  os.remove(audio_path)
43
 
44
- return video_path, transcription
45
 
46
 
47
  iface = gr.Interface(
@@ -51,7 +50,6 @@ iface = gr.Interface(
51
  gr.outputs.Video(),
52
  gr.Textbox(lines=1000, max_lines=1000, interactive=True),
53
  ],
54
- live=True,
55
  title="Romanian Transcription Test",
56
  )
57
 
 
3
  import time
4
  from video_downloader import download_video
5
  from moviepy.editor import AudioFileClip
6
+ from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
7
  import datetime
8
  import os
9
  from pydub import AudioSegment
 
12
  pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
13
 
14
 
15
+ def process_video(date):
16
  # Download the video
17
  video_path = download_video(date)
18
 
19
+ # Extract the first 30 seconds of the video
20
+ short_video_path = f"short_{date}.mp4"
21
+ ffmpeg_extract_subclip(video_path, 0, 30, targetname=short_video_path)
22
 
23
+ # Extract audio from the short video
24
  audio_path = f"audio_{date}.wav"
25
+ AudioFileClip(short_video_path).write_audiofile(audio_path)
26
 
27
  # Split the audio into chunks
28
  audio = AudioSegment.from_wav(audio_path)
 
37
  transcription += pipe(audio)["text"] + " "
38
  os.remove(f"chunk{i}.wav")
39
 
 
 
 
40
  # Remove the audio file
41
  os.remove(audio_path)
42
 
43
+ return short_video_path, transcription
44
 
45
 
46
  iface = gr.Interface(
 
50
  gr.outputs.Video(),
51
  gr.Textbox(lines=1000, max_lines=1000, interactive=True),
52
  ],
 
53
  title="Romanian Transcription Test",
54
  )
55