FarhadMadadzade
commited on
Commit
•
8dffbd8
1
Parent(s):
fb970e3
swedish model
Browse files- app.py +21 -20
- video_downloader.py +5 -4
app.py
CHANGED
@@ -9,14 +9,14 @@ import os
|
|
9 |
from pydub import AudioSegment
|
10 |
from pydub.silence import split_on_silence
|
11 |
|
12 |
-
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/
|
13 |
|
14 |
|
15 |
-
def process_video1(
|
16 |
-
video_path = download_video1(
|
17 |
|
18 |
# Extract audio from the video
|
19 |
-
audio_path = f"audio_{
|
20 |
AudioFileClip(video_path).write_audiofile(audio_path)
|
21 |
|
22 |
# Split the audio into chunks
|
@@ -38,6 +38,20 @@ def process_video1(from_date, to_date):
|
|
38 |
return video_path, transcription
|
39 |
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
def process_video(date):
|
42 |
# Download the video
|
43 |
video_path = download_video(date)
|
@@ -70,26 +84,13 @@ def process_video(date):
|
|
70 |
|
71 |
|
72 |
# iface = gr.Interface(
|
73 |
-
# fn=
|
74 |
-
# inputs=
|
75 |
-
# gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
|
76 |
-
# gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
|
77 |
-
# ],
|
78 |
# outputs=[
|
79 |
# gr.outputs.Video(),
|
80 |
# gr.Textbox(lines=1000, max_lines=1000, interactive=True),
|
81 |
# ],
|
82 |
-
# title="
|
83 |
# )
|
84 |
|
85 |
-
iface = gr.Interface(
|
86 |
-
fn=process_video,
|
87 |
-
inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
|
88 |
-
outputs=[
|
89 |
-
gr.outputs.Video(),
|
90 |
-
gr.Textbox(lines=1000, max_lines=1000, interactive=True),
|
91 |
-
],
|
92 |
-
title="Romanian Transcription Test",
|
93 |
-
)
|
94 |
-
|
95 |
iface.launch()
|
|
|
9 |
from pydub import AudioSegment
|
10 |
from pydub.silence import split_on_silence
|
11 |
|
12 |
+
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_swedish")
|
13 |
|
14 |
|
15 |
+
def process_video1(date):
|
16 |
+
video_path = download_video1(date)
|
17 |
|
18 |
# Extract audio from the video
|
19 |
+
audio_path = f"audio_{date}.wav"
|
20 |
AudioFileClip(video_path).write_audiofile(audio_path)
|
21 |
|
22 |
# Split the audio into chunks
|
|
|
38 |
return video_path, transcription
|
39 |
|
40 |
|
41 |
+
iface = gr.Interface(
|
42 |
+
fn=process_video1,
|
43 |
+
inputs=[
|
44 |
+
gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
|
45 |
+
],
|
46 |
+
outputs=[
|
47 |
+
gr.outputs.Video(),
|
48 |
+
gr.Textbox(lines=1000, max_lines=1000, interactive=True),
|
49 |
+
],
|
50 |
+
title="Transcribe Swedish Parliament Decisions",
|
51 |
+
desription="This app transcribes the top Swedish Parliament decision video from the given date.",
|
52 |
+
)
|
53 |
+
|
54 |
+
|
55 |
def process_video(date):
|
56 |
# Download the video
|
57 |
video_path = download_video(date)
|
|
|
84 |
|
85 |
|
86 |
# iface = gr.Interface(
|
87 |
+
# fn=process_video,
|
88 |
+
# inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
|
|
|
|
|
|
|
89 |
# outputs=[
|
90 |
# gr.outputs.Video(),
|
91 |
# gr.Textbox(lines=1000, max_lines=1000, interactive=True),
|
92 |
# ],
|
93 |
+
# title="Romanian Transcription Test",
|
94 |
# )
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
iface.launch()
|
video_downloader.py
CHANGED
@@ -44,17 +44,18 @@ def get_response(url):
|
|
44 |
return soup
|
45 |
|
46 |
|
47 |
-
def download_video1(
|
48 |
# Get the webpage
|
49 |
-
url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={
|
50 |
|
51 |
soup = get_response(url)
|
52 |
# Find the download link
|
53 |
try:
|
|
|
54 |
video_page = [
|
55 |
a["href"]
|
56 |
for a in soup.find_all("a", href=True)
|
57 |
-
if a.get("aria-label") and a["
|
58 |
][0]
|
59 |
# go to video_page and get all links
|
60 |
soup = get_response(video_page)
|
@@ -69,7 +70,7 @@ def download_video1(from_date, to_date):
|
|
69 |
return None
|
70 |
|
71 |
# Download the video
|
72 |
-
video_path = f"video_{
|
73 |
try:
|
74 |
urllib.request.urlretrieve(video_link, video_path)
|
75 |
return video_path
|
|
|
44 |
return soup
|
45 |
|
46 |
|
47 |
+
def download_video1(date):
|
48 |
# Get the webpage
|
49 |
+
url = f"https://www.riksdagen.se/sv/sok/?avd=webbtv&from={date}&tom={date}&doktyp=kam-vo"
|
50 |
|
51 |
soup = get_response(url)
|
52 |
# Find the download link
|
53 |
try:
|
54 |
+
dateparse = date.replace("-", "")
|
55 |
video_page = [
|
56 |
a["href"]
|
57 |
for a in soup.find_all("a", href=True)
|
58 |
+
if a.get("aria-label") and dateparse in a["href"]
|
59 |
][0]
|
60 |
# go to video_page and get all links
|
61 |
soup = get_response(video_page)
|
|
|
70 |
return None
|
71 |
|
72 |
# Download the video
|
73 |
+
video_path = f"video_{date}.mp4"
|
74 |
try:
|
75 |
urllib.request.urlretrieve(video_link, video_path)
|
76 |
return video_path
|