lanbogao commited on
Commit
b1e39f9
1 Parent(s): 4962756

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -4
app.py CHANGED
@@ -5,6 +5,7 @@ from fastapi import FastAPI, Response, Request
5
  import yt_dlp
6
  import uvicorn
7
  import re
 
8
 
9
  CUSTOM_PATH = "/gradio"
10
 
@@ -23,6 +24,22 @@ def read_main():
23
  # Stream the subtitle as a response
24
  #return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024))
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  def get_subtitle(url, lang='en'):
28
  if lang is None:
@@ -34,18 +51,24 @@ def get_subtitle(url, lang='en'):
34
  'subtitleslangs': [lang],
35
  'skip_download': True,
36
  }
37
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
 
38
  info_dict = ydl.extract_info(url, download=True)
39
  video_id = info_dict.get("id", None)
40
  if video_id is None:
41
  return None
42
 
 
43
  print(info_dict)
44
  subtitle_file = f"{video_id}.{lang}.vtt"
45
  with open(subtitle_file, 'r') as f:
46
  subtitle_content = f.read()
47
  subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
48
  return subtitle_content
 
 
 
 
49
  return None
50
 
51
  def download_audio(video_url, quality: str = '128', speed: float = None):
@@ -80,15 +103,13 @@ def get_transcript(url, model_size, lang, format):
80
  if lang == "None":
81
  lang = None
82
 
83
- subtitle = get_subtitle(url, lang)
84
  print(subtitle)
85
  if subtitle:
86
  return subtitle
87
 
88
  model = whisper.load_model(model_size)
89
 
90
-
91
-
92
  result = model.transcribe(download_audio(url), fp16=False, language=lang)
93
 
94
  if format == "None":
 
5
  import yt_dlp
6
  import uvicorn
7
  import re
8
+ import os
9
 
10
  CUSTOM_PATH = "/gradio"
11
 
 
24
  # Stream the subtitle as a response
25
  #return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024))
26
 
27
+ def download_subtitle(url: str, lang: Optional[str] = None) -> Optional[str]:
28
+ ydl_opts = {
29
+ "writesubtitles": True,
30
+ "allsubtitles": True,
31
+ "subtitleslangs": [lang] if lang else [],
32
+ "skip_download": True,
33
+ }
34
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
35
+ info_dict = ydl.extract_info(url, download=False)
36
+ if info_dict.get("subtitles"):
37
+ # get first available subtitle
38
+ subtitle_url = info_dict["subtitles"][0]["url"]
39
+ with ydl.urlopen(subtitle_url) as subtitle:
40
+ return subtitle.read().decode()
41
+
42
+ return None
43
 
44
  def get_subtitle(url, lang='en'):
45
  if lang is None:
 
51
  'subtitleslangs': [lang],
52
  'skip_download': True,
53
  }
54
+ try:
55
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
56
  info_dict = ydl.extract_info(url, download=True)
57
  video_id = info_dict.get("id", None)
58
  if video_id is None:
59
  return None
60
 
61
+ print(os.list)
62
  print(info_dict)
63
  subtitle_file = f"{video_id}.{lang}.vtt"
64
  with open(subtitle_file, 'r') as f:
65
  subtitle_content = f.read()
66
  subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
67
  return subtitle_content
68
+ except error:
69
+ print(error)
70
+ return None
71
+
72
  return None
73
 
74
  def download_audio(video_url, quality: str = '128', speed: float = None):
 
103
  if lang == "None":
104
  lang = None
105
 
106
+ subtitle = download_subtitle(url, lang)
107
  print(subtitle)
108
  if subtitle:
109
  return subtitle
110
 
111
  model = whisper.load_model(model_size)
112
 
 
 
113
  result = model.transcribe(download_audio(url), fp16=False, language=lang)
114
 
115
  if format == "None":