sofzcc commited on
Commit
a7c2258
1 Parent(s): 3e5eaad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -25
app.py CHANGED
@@ -228,32 +228,17 @@ import time
228
  from pytube import YouTube
229
  import yt_dlp
230
 
231
- def download_video(url):
232
- print(f"Attempting to download video from URL: {url}")
233
- ydl_opts = {
234
- 'format': 'bestvideo+bestaudio/best',
235
- 'outtmpl': 'downloads/%(title)s.%(ext)s',
236
- }
237
- try:
238
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
239
- info_dict = ydl.extract_info(url, download=True)
240
- print(f"Downloaded: {info_dict['title']}")
241
- return info_dict['title'] # Or return other relevant info
242
- except Exception as e:
243
- print(f"Error: {e}")
244
- raise
245
-
246
 
 
247
 
248
- def video_to_text(filename):
249
- clip = VideoFileClip(filename)
250
- audio_filename = filename[:-4] + ".mp3"
251
- clip.audio.write_audiofile(audio_filename)
252
- clip.close()
253
- time.sleep(5)
254
-
255
  model = whisper.load_model("base")
256
- result = model.transcribe(audio_filename)
257
 
258
  transcription = result["text"]
259
 
@@ -720,8 +705,8 @@ def process_video(url):
720
  # transcript = YouTubeTranscriptApi.get_transcript(video_id)
721
  # transcript_text = ' '.join([t['text'] for t in transcript])
722
 
723
- video = download_video(url)
724
- transcript_text = video_to_text(video)
725
 
726
  # Clean the transcript text
727
  cleaned_text = clean_text(transcript_text)
 
228
  from pytube import YouTube
229
  import yt_dlp
230
 
231
+ def download_video_mp3(URL):
232
+ yt = YouTube(url, on_progress_callback = on_progress)
233
+ ys = yt.streams.get_audio_only()
234
+ file = ys.download(mp3=True)
 
 
 
 
 
 
 
 
 
 
 
235
 
236
+ return file
237
 
238
+ def audio_to_text(filename):
239
+
 
 
 
 
 
240
  model = whisper.load_model("base")
241
+ result = model.transcribe(filename)
242
 
243
  transcription = result["text"]
244
 
 
705
  # transcript = YouTubeTranscriptApi.get_transcript(video_id)
706
  # transcript_text = ' '.join([t['text'] for t in transcript])
707
 
708
+ audio_file = download_video_mp3(url)
709
+ transcript_text = audio_to_text(audio_file)
710
 
711
  # Clean the transcript text
712
  cleaned_text = clean_text(transcript_text)