Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

FarhadMadadzade commited on Dec 7, 2023

Commit

de84263

•

1 Parent(s): 534a7d7

fix

Files changed (4) hide show

README.md CHANGED Viewed

@@ -8,6 +8,7 @@ sdk_version: 3.5
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: apache-2.0
+python_version: 3.8
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,27 +1,41 @@
 from transformers import pipeline
 import gradio as gr
 import time
 pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
-def transcribe(rec=None, state=""):
-    if rec is not None:
-        audio = rec
-        text = pipe(audio)["text"]
-        state += text + ". "
-    else:
-        text = ""
-    return text, state
 iface = gr.Interface(
-    fn=transcribe,
-    inputs=[gr.Audio(source="microphone", type="filepath", streaming=True), "state"],
-    outputs=["text", "state"],
     title="Romanian Transcription Test",
-    live=True,
 )
 iface.launch()

 from transformers import pipeline
 import gradio as gr
 import time
+from video_downloader import download_video
+from moviepy.editor import AudioFileClip
+import datetime
+import os
 pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
+def process_video(date):
+    # Parse the date to the format yyyymmdd
+    date = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")
+    # Download the video
+    video_path = download_video(date)
+    # Extract audio from the video
+    audio_path = f"audio_{date}.wav"
+    AudioFileClip(video_path).write_audiofile(audio_path)
+    # Transcribe the audio
+    with open(audio_path, "rb") as audio_file:
+        audio = audio_file.read()
+    transcription = pipe(audio)["text"]
+    # Remove the audio file
+    os.remove(audio_path)
+    return video_path, transcription
 iface = gr.Interface(
+    fn=process_video,
+    inputs="date",
+    outputs=["video", "text"],
     title="Romanian Transcription Test",
 )
 iface.launch()

requirements.txt CHANGED Viewed

@@ -4,4 +4,6 @@ scikit-learn==1.1.1
 httpx==0.24.1
 gradio
 transformers
-torch

 httpx==0.24.1
 gradio
 transformers
+torch
+urllib3
+moviepy

video_downloader.py ADDED Viewed

+import urllib.request
+import os
+import glob
+def download_video(date):
+    # Delete any existing .mp4 files
+    for mp4_file in glob.glob("*.mp4"):
+        os.remove(mp4_file)
+    year = date[:4]
+    url = f"https://www.cdep.ro/u02/comisii/{year}/cp46_{date}.mp4"
+    try:
+        urllib.request.urlretrieve(url, f"video_{date}.mp4")
+        print("Video downloaded successfully.")
+        return f"video_{date}.mp4"
+    except urllib.error.HTTPError as e:
+        if e.code == 404:
+            print("No video exists for the given date.")
+        else:
+            print(f"An error occurred while downloading the video: {e}")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")