Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

App Files Files Community

FarhadMadadzade commited on Dec 10, 2023

Commit

80f7e89

•

1 Parent(s): bdc6930

final push with only romanian parliament

Browse files

Files changed (3) hide show

app.py +2 -65
requirements.txt +0 -1
video_downloader.py +0 -60

app.py CHANGED Viewed

@@ -1,62 +1,15 @@
 from transformers import pipeline
 import gradio as gr
-import time
-from video_downloader import download_video, download_video1, download_youtube_video
-from moviepy.editor import AudioFileClip, VideoFileClip
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
-import datetime
 import os
 from pydub import AudioSegment
-from pydub.silence import split_on_silence
 import re
 pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian")
-def process_video1(date):
-    # If the date is not in YYYY-MM-DD format, return an error message
-    date_pattern = re.compile(r"\b\d{4}-\d{2}-\d{2}\b")
-    if not date_pattern.match(date):
-        video_path = download_youtube_video(
-            "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
-        )
-        transcription = "Please enter a date in the format YYYY-MM-DD."
-        return video_path, transcription
-    try:
-        video_path = download_video1(date)
-        # Get the duration of the video
-        video = VideoFileClip(video_path)
-        duration = video.duration
-        # If the video is longer than 30 seconds, only take the first 30 seconds
-        if duration > 30:
-            video_path = f"short_{date}.mp4"
-            ffmpeg_extract_subclip(video_path, 0, 30, targetname=video_path)
-        # Extract audio from the video
-        audio_path = f"audio_{date}.wav"
-        AudioFileClip(video_path).write_audiofile(audio_path)
-        audio = AudioSegment.from_wav(audio_path)
-        chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
-        with open(audio_path, "rb") as audio_file:
-            audio = audio_file.read()
-        transcription = pipe(audio)["text"]
-        # Remove the audio file
-        os.remove(audio_path)
-    except:
-        video_path = download_youtube_video(
-            "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
-        )
-        transcription = "No decision was made on this date."
-    return video_path, transcription
 def process_video(date):
     # If the date is not in YYYY-MM-DD format, return an error message
     date_pattern = re.compile(r"\b\d{4}\d{2}\d{2}\b")
@@ -107,20 +60,4 @@ iface = gr.Interface(
     + "video will be used if it is longer than that.",
 )
-# iface = gr.Interface(
-#     fn=process_video1,
-#     inputs=[
-#         gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
-#     ],
-#     outputs=[
-#         gr.outputs.Video(),
-#         gr.Textbox(lines=100, max_lines=100, interactive=True),
-#     ],
-#     title="Transcribe Swedish Parliament Decisions",
-# description="This app transcribes the top Swedish Parliament decision"
-# + " video from the given date. Only the first 30 seconds of the "
-# + "video will be used if it is longer than that.",
-# )
 iface.launch()

 from transformers import pipeline
 import gradio as gr
+from video_downloader import download_video, download_youtube_video
+from moviepy.editor import AudioFileClip
 from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
 import os
 from pydub import AudioSegment
 import re
 pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian")
 def process_video(date):
     # If the date is not in YYYY-MM-DD format, return an error message
     date_pattern = re.compile(r"\b\d{4}\d{2}\d{2}\b")
     + "video will be used if it is longer than that.",
 )
 iface.launch()

requirements.txt CHANGED Viewed

@@ -8,5 +8,4 @@ torch
 urllib3
 moviepy
 pydub
-beautifulsoup4
 pytube

 urllib3
 moviepy
 pydub
 pytube

video_downloader.py CHANGED Viewed

@@ -1,30 +1,9 @@
 import urllib.request
-import requests
-from bs4 import BeautifulSoup
 from pytube import YouTube
 import os
 import glob
-def get_response(url):
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as e:
-        if e.response.status_code == 404:
-            print("No video exists for the given date range.")
-            return None
-        else:
-            print(f"An error occurred while getting the webpage: {e}")
-            return None
-    except Exception as e:
-        print(f"An unexpected error occurred: {e}")
-        return None
-    soup = BeautifulSoup(response.text, "html.parser")
-    return soup
 def download_video(date):
     # Delete any existing .mp4 files
     for mp4_file in glob.glob("*.mp4"):
@@ -45,45 +24,6 @@ def download_video(date):
         print(f"An unexpected error occurred: {e}")
-def download_video1(date):
-    # Delete any existing .mp4 files
-    for mp4_file in glob.glob("*.mp4"):
-        os.remove(mp4_file)
-    # Get the webpage
-    url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={date}&tom={date}&doktyp=kam-vo"
-    soup = get_response(url)
-    # Find the download link
-    try:
-        dateparse = date.replace("-", "")
-        video_page = [
-            a["href"]
-            for a in soup.find_all("a", href=True)
-            if a.get("aria-label") and dateparse in a["href"]
-        ][0]
-        # go to video_page and get all links
-        soup = get_response(video_page)
-        video_link = [
-            a["href"]
-            for a in soup.find_all("a", href=True)
-            if a["href"].startswith("https://mhdownload.riksdagen.se")
-        ][0]
-        print(video_link)
-    except IndexError:
-        print("No video exists for the given date range.")
-        return None
-    # Download the video
-    video_path = f"video_{date}.mp4"
-    try:
-        urllib.request.urlretrieve(video_link, video_path)
-        return video_path
-    except Exception as e:
-        print(f"An error occurred while downloading the video: {e}")
-        return None
 def download_youtube_video(url):
     try:
         youtube = YouTube(url)

 import urllib.request
 from pytube import YouTube
 import os
 import glob
 def download_video(date):
     # Delete any existing .mp4 files
     for mp4_file in glob.glob("*.mp4"):
         print(f"An unexpected error occurred: {e}")
 def download_youtube_video(url):
     try:
         youtube = YouTube(url)