FarhadMadadzade commited on
Commit
80f7e89
1 Parent(s): bdc6930

final push with only romanian parliament

Browse files
Files changed (3) hide show
  1. app.py +2 -65
  2. requirements.txt +0 -1
  3. video_downloader.py +0 -60
app.py CHANGED
@@ -1,62 +1,15 @@
1
  from transformers import pipeline
2
  import gradio as gr
3
- import time
4
- from video_downloader import download_video, download_video1, download_youtube_video
5
- from moviepy.editor import AudioFileClip, VideoFileClip
6
  from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
7
- import datetime
8
  import os
9
  from pydub import AudioSegment
10
- from pydub.silence import split_on_silence
11
  import re
12
 
13
  pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian")
14
 
15
 
16
- def process_video1(date):
17
- # If the date is not in YYYY-MM-DD format, return an error message
18
- date_pattern = re.compile(r"\b\d{4}-\d{2}-\d{2}\b")
19
- if not date_pattern.match(date):
20
- video_path = download_youtube_video(
21
- "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
22
- )
23
- transcription = "Please enter a date in the format YYYY-MM-DD."
24
-
25
- return video_path, transcription
26
- try:
27
- video_path = download_video1(date)
28
-
29
- # Get the duration of the video
30
- video = VideoFileClip(video_path)
31
- duration = video.duration
32
-
33
- # If the video is longer than 30 seconds, only take the first 30 seconds
34
- if duration > 30:
35
- video_path = f"short_{date}.mp4"
36
- ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)
37
-
38
- # Extract audio from the video
39
- audio_path = f"audio_{date}.wav"
40
- AudioFileClip(video_path).write_audiofile(audio_path)
41
-
42
- audio = AudioSegment.from_wav(audio_path)
43
- chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
44
-
45
- with open(audio_path, "rb") as audio_file:
46
- audio = audio_file.read()
47
- transcription = pipe(audio)["text"]
48
-
49
- # Remove the audio file
50
- os.remove(audio_path)
51
- except:
52
- video_path = download_youtube_video(
53
- "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
54
- )
55
- transcription = "No decision was made on this date."
56
-
57
- return video_path, transcription
58
-
59
-
60
  def process_video(date):
61
  # If the date is not in YYYY-MM-DD format, return an error message
62
  date_pattern = re.compile(r"\b\d{4}\d{2}\d{2}\b")
@@ -107,20 +60,4 @@ iface = gr.Interface(
107
  + "video will be used if it is longer than that.",
108
  )
109
 
110
-
111
- # iface = gr.Interface(
112
- # fn=process_video1,
113
- # inputs=[
114
- # gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
115
- # ],
116
- # outputs=[
117
- # gr.outputs.Video(),
118
- # gr.Textbox(lines=100, max_lines=100, interactive=True),
119
- # ],
120
- # title="Transcribe Swedish Parliament Decisions",
121
- # description="This app transcribes the top Swedish Parliament decision"
122
- # + " video from the given date. Only the first 30 seconds of the "
123
- # + "video will be used if it is longer than that.",
124
- # )
125
-
126
  iface.launch()
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ from video_downloader import download_video, download_youtube_video
4
+ from moviepy.editor import AudioFileClip
 
5
  from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 
6
  import os
7
  from pydub import AudioSegment
 
8
  import re
9
 
10
  pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian")
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def process_video(date):
14
  # If the date is not in YYYY-MM-DD format, return an error message
15
  date_pattern = re.compile(r"\b\d{4}\d{2}\d{2}\b")
 
60
  + "video will be used if it is longer than that.",
61
  )
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  iface.launch()
requirements.txt CHANGED
@@ -8,5 +8,4 @@ torch
8
  urllib3
9
  moviepy
10
  pydub
11
- beautifulsoup4
12
  pytube
 
8
  urllib3
9
  moviepy
10
  pydub
 
11
  pytube
video_downloader.py CHANGED
@@ -1,30 +1,9 @@
1
  import urllib.request
2
- import requests
3
- from bs4 import BeautifulSoup
4
  from pytube import YouTube
5
  import os
6
  import glob
7
 
8
 
9
- def get_response(url):
10
- try:
11
- response = requests.get(url)
12
- response.raise_for_status()
13
- except requests.exceptions.HTTPError as e:
14
- if e.response.status_code == 404:
15
- print("No video exists for the given date range.")
16
- return None
17
- else:
18
- print(f"An error occurred while getting the webpage: {e}")
19
- return None
20
- except Exception as e:
21
- print(f"An unexpected error occurred: {e}")
22
- return None
23
-
24
- soup = BeautifulSoup(response.text, "html.parser")
25
- return soup
26
-
27
-
28
  def download_video(date):
29
  # Delete any existing .mp4 files
30
  for mp4_file in glob.glob("*.mp4"):
@@ -45,45 +24,6 @@ def download_video(date):
45
  print(f"An unexpected error occurred: {e}")
46
 
47
 
48
- def download_video1(date):
49
- # Delete any existing .mp4 files
50
- for mp4_file in glob.glob("*.mp4"):
51
- os.remove(mp4_file)
52
-
53
- # Get the webpage
54
- url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={date}&tom={date}&doktyp=kam-vo"
55
-
56
- soup = get_response(url)
57
- # Find the download link
58
- try:
59
- dateparse = date.replace("-", "")
60
- video_page = [
61
- a["href"]
62
- for a in soup.find_all("a", href=True)
63
- if a.get("aria-label") and dateparse in a["href"]
64
- ][0]
65
- # go to video_page and get all links
66
- soup = get_response(video_page)
67
- video_link = [
68
- a["href"]
69
- for a in soup.find_all("a", href=True)
70
- if a["href"].startswith("https://mhdownload.riksdagen.se")
71
- ][0]
72
- print(video_link)
73
- except IndexError:
74
- print("No video exists for the given date range.")
75
- return None
76
-
77
- # Download the video
78
- video_path = f"video_{date}.mp4"
79
- try:
80
- urllib.request.urlretrieve(video_link, video_path)
81
- return video_path
82
- except Exception as e:
83
- print(f"An error occurred while downloading the video: {e}")
84
- return None
85
-
86
-
87
  def download_youtube_video(url):
88
  try:
89
  youtube = YouTube(url)
 
1
  import urllib.request
 
 
2
  from pytube import YouTube
3
  import os
4
  import glob
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def download_video(date):
8
  # Delete any existing .mp4 files
9
  for mp4_file in glob.glob("*.mp4"):
 
24
  print(f"An unexpected error occurred: {e}")
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def download_youtube_video(url):
28
  try:
29
  youtube = YouTube(url)