FarhadMadadzade commited on
Commit
20fa434
1 Parent(s): 306506f

added so that is only uses 30 seconds if the video is longer than that

Browse files
Files changed (2) hide show
  1. app.py +10 -43
  2. video_downloader.py +0 -22
app.py CHANGED
@@ -2,7 +2,7 @@ from transformers import pipeline
2
  import gradio as gr
3
  import time
4
  from video_downloader import download_video, download_video1
5
- from moviepy.editor import AudioFileClip
6
  from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
7
  import datetime
8
  import os
@@ -15,6 +15,15 @@ pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedi
15
  def process_video1(date):
16
  video_path = download_video1(date)
17
 
 
 
 
 
 
 
 
 
 
18
  # Extract audio from the video
19
  audio_path = f"audio_{date}.wav"
20
  AudioFileClip(video_path).write_audiofile(audio_path)
@@ -51,46 +60,4 @@ iface = gr.Interface(
51
  desription="This app transcribes the top Swedish Parliament decision video from the given date.",
52
  )
53
 
54
-
55
- def process_video(date):
56
- # Download the video
57
- video_path = download_video(date)
58
-
59
- # Extract the first 30 seconds of the video
60
- short_video_path = f"short_{date}.mp4"
61
- ffmpeg_extract_subclip(video_path, 0, 30, targetname=short_video_path)
62
-
63
- # Extract audio from the short video
64
- audio_path = f"audio_{date}.wav"
65
- AudioFileClip(short_video_path).write_audiofile(audio_path)
66
-
67
- # Split the audio into chunks
68
- audio = AudioSegment.from_wav(audio_path)
69
- chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
70
-
71
- # Transcribe each chunk
72
- transcription = ""
73
- for i, chunk in enumerate(chunks):
74
- chunk.export(f"chunk{i}.wav", format="wav")
75
- with open(f"chunk{i}.wav", "rb") as audio_file:
76
- audio = audio_file.read()
77
- transcription += pipe(audio)["text"] + " "
78
- os.remove(f"chunk{i}.wav")
79
-
80
- # Remove the audio file
81
- os.remove(audio_path)
82
-
83
- return short_video_path, transcription
84
-
85
-
86
- # iface = gr.Interface(
87
- # fn=process_video,
88
- # inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
89
- # outputs=[
90
- # gr.outputs.Video(),
91
- # gr.Textbox(lines=1000, max_lines=1000, interactive=True),
92
- # ],
93
- # title="Romanian Transcription Test",
94
- # )
95
-
96
  iface.launch()
 
2
  import gradio as gr
3
  import time
4
  from video_downloader import download_video, download_video1
5
+ from moviepy.editor import AudioFileClip, VideoFileClip
6
  from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
7
  import datetime
8
  import os
 
15
  def process_video1(date):
16
  video_path = download_video1(date)
17
 
18
+ # Get the duration of the video
19
+ video = VideoFileClip(video_path)
20
+ duration = video.duration
21
+
22
+ # If the video is longer than 30 seconds, only take the first 30 seconds
23
+ if duration > 30:
24
+ video_path = f"short_{date}.mp4"
25
+ ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)
26
+
27
  # Extract audio from the video
28
  audio_path = f"audio_{date}.wav"
29
  AudioFileClip(video_path).write_audiofile(audio_path)
 
60
  desription="This app transcribes the top Swedish Parliament decision video from the given date.",
61
  )
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  iface.launch()
video_downloader.py CHANGED
@@ -1,30 +1,8 @@
1
  import urllib.request
2
- import os
3
- import glob
4
  import requests
5
  from bs4 import BeautifulSoup
6
 
7
 
8
- def download_video(date):
9
- # Delete any existing .mp4 files
10
- for mp4_file in glob.glob("*.mp4"):
11
- os.remove(mp4_file)
12
-
13
- year = date[:4]
14
- url = f"https://www.cdep.ro/u02/comisii/{year}/cp46_{date}.mp4"
15
- try:
16
- urllib.request.urlretrieve(url, f"video_{date}.mp4")
17
- print("Video downloaded successfully.")
18
- return f"video_{date}.mp4"
19
- except urllib.error.HTTPError as e:
20
- if e.code == 404:
21
- print("No video exists for the given date.")
22
- else:
23
- print(f"An error occurred while downloading the video: {e}")
24
- except Exception as e:
25
- print(f"An unexpected error occurred: {e}")
26
-
27
-
28
  def get_response(url):
29
  try:
30
  response = requests.get(url)
 
1
  import urllib.request
 
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def get_response(url):
7
  try:
8
  response = requests.get(url)