FarhadMadadzade
commited on
Commit
•
04717a8
1
Parent(s):
270a894
new model version
Browse files- app.py +5 -22
- video_downloader.py +3 -0
app.py
CHANGED
@@ -10,7 +10,7 @@ from pydub import AudioSegment
|
|
10 |
from pydub.silence import split_on_silence
|
11 |
import re
|
12 |
|
13 |
-
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/
|
14 |
|
15 |
|
16 |
def process_video1(date):
|
@@ -38,18 +38,12 @@ def process_video1(date):
|
|
38 |
audio_path = f"audio_{date}.wav"
|
39 |
AudioFileClip(video_path).write_audiofile(audio_path)
|
40 |
|
41 |
-
# Split the audio into chunks
|
42 |
audio = AudioSegment.from_wav(audio_path)
|
43 |
chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
chunk.export(f"chunk{i}.wav", format="wav")
|
49 |
-
with open(f"chunk{i}.wav", "rb") as audio_file:
|
50 |
-
audio = audio_file.read()
|
51 |
-
transcription += pipe(audio)["text"] + "\n "
|
52 |
-
os.remove(f"chunk{i}.wav")
|
53 |
|
54 |
# Remove the audio file
|
55 |
os.remove(audio_path)
|
@@ -83,19 +77,8 @@ def process_video(date):
|
|
83 |
# Extract audio from the short video
|
84 |
audio_path = f"audio_{date}.wav"
|
85 |
AudioFileClip(short_video_path).write_audiofile(audio_path)
|
86 |
-
|
87 |
-
# Split the audio into chunks
|
88 |
audio = AudioSegment.from_wav(audio_path)
|
89 |
-
|
90 |
-
|
91 |
-
# # Transcribe each chunk
|
92 |
-
# transcription = ""
|
93 |
-
# for i, chunk in enumerate(chunks):
|
94 |
-
# chunk.export(f"chunk{i}.wav", format="wav")
|
95 |
-
# with open(f"chunk{i}.wav", "rb") as audio_file:
|
96 |
-
# audio = audio_file.read()
|
97 |
-
# transcription += pipe(audio)["text"] + " "
|
98 |
-
# os.remove(f"chunk{i}.wav")
|
99 |
with open(audio_path, "rb") as audio_file:
|
100 |
audio = audio_file.read()
|
101 |
transcription = pipe(audio)["text"]
|
|
|
10 |
from pydub.silence import split_on_silence
|
11 |
import re
|
12 |
|
13 |
+
pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian4")
|
14 |
|
15 |
|
16 |
def process_video1(date):
|
|
|
38 |
audio_path = f"audio_{date}.wav"
|
39 |
AudioFileClip(video_path).write_audiofile(audio_path)
|
40 |
|
|
|
41 |
audio = AudioSegment.from_wav(audio_path)
|
42 |
chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
|
43 |
|
44 |
+
with open(audio_path, "rb") as audio_file:
|
45 |
+
audio = audio_file.read()
|
46 |
+
transcription = pipe(audio)["text"]
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# Remove the audio file
|
49 |
os.remove(audio_path)
|
|
|
77 |
# Extract audio from the short video
|
78 |
audio_path = f"audio_{date}.wav"
|
79 |
AudioFileClip(short_video_path).write_audiofile(audio_path)
|
|
|
|
|
80 |
audio = AudioSegment.from_wav(audio_path)
|
81 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
with open(audio_path, "rb") as audio_file:
|
83 |
audio = audio_file.read()
|
84 |
transcription = pipe(audio)["text"]
|
video_downloader.py
CHANGED
@@ -93,3 +93,6 @@ def download_youtube_video(url):
|
|
93 |
return video_path
|
94 |
except Exception as e:
|
95 |
print(f"An error occurred while downloading the video: {e}")
|
|
|
|
|
|
|
|
93 |
return video_path
|
94 |
except Exception as e:
|
95 |
print(f"An error occurred while downloading the video: {e}")
|
96 |
+
|
97 |
+
|
98 |
+
download_video("20230503")
|