FarhadMadadzade commited on
Commit
de84263
1 Parent(s): 534a7d7
Files changed (4) hide show
  1. README.md +1 -0
  2. app.py +27 -13
  3. requirements.txt +3 -1
  4. video_downloader.py +23 -0
README.md CHANGED
@@ -8,6 +8,7 @@ sdk_version: 3.5
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ python_version: 3.8
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,27 +1,41 @@
1
  from transformers import pipeline
2
  import gradio as gr
3
  import time
 
 
 
 
4
 
5
  pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
6
 
7
 
8
- def transcribe(rec=None, state=""):
9
- if rec is not None:
10
- audio = rec
11
- text = pipe(audio)["text"]
12
- state += text + ". "
13
- else:
14
- text = ""
15
- return text, state
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  iface = gr.Interface(
19
- fn=transcribe,
20
- inputs=[gr.Audio(source="microphone", type="filepath", streaming=True), "state"],
21
- outputs=["text", "state"],
22
  title="Romanian Transcription Test",
23
- live=True,
24
  )
25
 
26
-
27
  iface.launch()
 
1
  from transformers import pipeline
2
  import gradio as gr
3
  import time
4
+ from video_downloader import download_video
5
+ from moviepy.editor import AudioFileClip
6
+ import datetime
7
+ import os
8
 
9
  pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")
10
 
11
 
12
+ def process_video(date):
13
+ # Parse the date to the format yyyymmdd
14
+ date = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")
15
+
16
+ # Download the video
17
+ video_path = download_video(date)
18
+
19
+ # Extract audio from the video
20
+ audio_path = f"audio_{date}.wav"
21
+ AudioFileClip(video_path).write_audiofile(audio_path)
22
+
23
+ # Transcribe the audio
24
+ with open(audio_path, "rb") as audio_file:
25
+ audio = audio_file.read()
26
+ transcription = pipe(audio)["text"]
27
+
28
+ # Remove the audio file
29
+ os.remove(audio_path)
30
+
31
+ return video_path, transcription
32
 
33
 
34
  iface = gr.Interface(
35
+ fn=process_video,
36
+ inputs="date",
37
+ outputs=["video", "text"],
38
  title="Romanian Transcription Test",
 
39
  )
40
 
 
41
  iface.launch()
requirements.txt CHANGED
@@ -4,4 +4,6 @@ scikit-learn==1.1.1
4
  httpx==0.24.1
5
  gradio
6
  transformers
7
- torch
 
 
 
4
  httpx==0.24.1
5
  gradio
6
  transformers
7
+ torch
8
+ urllib3
9
+ moviepy
video_downloader.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request
2
+ import os
3
+ import glob
4
+
5
+
6
+ def download_video(date):
7
+ # Delete any existing .mp4 files
8
+ for mp4_file in glob.glob("*.mp4"):
9
+ os.remove(mp4_file)
10
+
11
+ year = date[:4]
12
+ url = f"https://www.cdep.ro/u02/comisii/{year}/cp46_{date}.mp4"
13
+ try:
14
+ urllib.request.urlretrieve(url, f"video_{date}.mp4")
15
+ print("Video downloaded successfully.")
16
+ return f"video_{date}.mp4"
17
+ except urllib.error.HTTPError as e:
18
+ if e.code == 404:
19
+ print("No video exists for the given date.")
20
+ else:
21
+ print(f"An error occurred while downloading the video: {e}")
22
+ except Exception as e:
23
+ print(f"An unexpected error occurred: {e}")