FarhadMadadzade commited on
Commit
04717a8
1 Parent(s): 270a894

new model version

Browse files
Files changed (2) hide show
  1. app.py +5 -22
  2. video_downloader.py +3 -0
app.py CHANGED
@@ -10,7 +10,7 @@ from pydub import AudioSegment
10
  from pydub.silence import split_on_silence
11
  import re
12
 
13
- pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian")
14
 
15
 
16
  def process_video1(date):
@@ -38,18 +38,12 @@ def process_video1(date):
38
  audio_path = f"audio_{date}.wav"
39
  AudioFileClip(video_path).write_audiofile(audio_path)
40
 
41
- # Split the audio into chunks
42
  audio = AudioSegment.from_wav(audio_path)
43
  chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
44
 
45
- # Transcribe each chunk
46
- transcription = ""
47
- for i, chunk in enumerate(chunks):
48
- chunk.export(f"chunk{i}.wav", format="wav")
49
- with open(f"chunk{i}.wav", "rb") as audio_file:
50
- audio = audio_file.read()
51
- transcription += pipe(audio)["text"] + "\n "
52
- os.remove(f"chunk{i}.wav")
53
 
54
  # Remove the audio file
55
  os.remove(audio_path)
@@ -83,19 +77,8 @@ def process_video(date):
83
  # Extract audio from the short video
84
  audio_path = f"audio_{date}.wav"
85
  AudioFileClip(short_video_path).write_audiofile(audio_path)
86
-
87
- # Split the audio into chunks
88
  audio = AudioSegment.from_wav(audio_path)
89
- # chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
90
-
91
- # # Transcribe each chunk
92
- # transcription = ""
93
- # for i, chunk in enumerate(chunks):
94
- # chunk.export(f"chunk{i}.wav", format="wav")
95
- # with open(f"chunk{i}.wav", "rb") as audio_file:
96
- # audio = audio_file.read()
97
- # transcription += pipe(audio)["text"] + " "
98
- # os.remove(f"chunk{i}.wav")
99
  with open(audio_path, "rb") as audio_file:
100
  audio = audio_file.read()
101
  transcription = pipe(audio)["text"]
 
10
  from pydub.silence import split_on_silence
11
  import re
12
 
13
+ pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian4")
14
 
15
 
16
  def process_video1(date):
 
38
  audio_path = f"audio_{date}.wav"
39
  AudioFileClip(video_path).write_audiofile(audio_path)
40
 
 
41
  audio = AudioSegment.from_wav(audio_path)
42
  chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
43
 
44
+ with open(audio_path, "rb") as audio_file:
45
+ audio = audio_file.read()
46
+ transcription = pipe(audio)["text"]
 
 
 
 
 
47
 
48
  # Remove the audio file
49
  os.remove(audio_path)
 
77
  # Extract audio from the short video
78
  audio_path = f"audio_{date}.wav"
79
  AudioFileClip(short_video_path).write_audiofile(audio_path)
 
 
80
  audio = AudioSegment.from_wav(audio_path)
81
+
 
 
 
 
 
 
 
 
 
82
  with open(audio_path, "rb") as audio_file:
83
  audio = audio_file.read()
84
  transcription = pipe(audio)["text"]
video_downloader.py CHANGED
@@ -93,3 +93,6 @@ def download_youtube_video(url):
93
  return video_path
94
  except Exception as e:
95
  print(f"An error occurred while downloading the video: {e}")
 
 
 
 
93
  return video_path
94
  except Exception as e:
95
  print(f"An error occurred while downloading the video: {e}")
96
+
97
+
98
+ download_video("20230503")