Joshua Lochner commited on
Commit
94ad7ba
1 Parent(s): c415610

Add support for entering YouTube URL into textbox

Browse files
Files changed (2) hide show
  1. app.py +36 -5
  2. src/utils.py +5 -0
app.py CHANGED
@@ -19,6 +19,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'src
19
  from predict import SegmentationArguments, ClassifierArguments, predict as pred, seconds_to_time # noqa
20
  from evaluate import EvaluationArguments
21
  from shared import device, CATGEGORY_OPTIONS
 
22
 
23
  st.set_page_config(
24
  page_title='SponsorBlock ML',
@@ -31,6 +32,34 @@ st.set_page_config(
31
  # 'About': "# This is a header. This is an *extremely* cool app!"
32
  }
33
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # https://github.com/google-research/text-to-text-transfer-transformer#released-model-checkpoints
35
  # https://github.com/google-research/text-to-text-transfer-transformer/blob/main/released_checkpoints.md#experimental-t5-pre-trained-model-checkpoints
36
 
@@ -140,7 +169,7 @@ def main():
140
  # Load prediction function
141
  predict = load_predict(model_id)
142
 
143
- video_id = st.text_input('Video ID:') # , placeholder='e.g., axtQvkSpoto'
144
 
145
  categories = st.multiselect('Categories:',
146
  CATGEGORY_OPTIONS.keys(),
@@ -152,12 +181,14 @@ def main():
152
  confidence_threshold = st.slider(
153
  'Confidence Threshold (%):', min_value=0, max_value=100)
154
 
155
- video_id_length = len(video_id)
156
- if video_id_length == 0:
 
157
  return
158
 
159
- elif video_id_length != 11:
160
- st.exception(ValueError('Invalid YouTube ID'))
 
161
  return
162
 
163
  with st.spinner('Running model...'):
 
19
  from predict import SegmentationArguments, ClassifierArguments, predict as pred, seconds_to_time # noqa
20
  from evaluate import EvaluationArguments
21
  from shared import device, CATGEGORY_OPTIONS
22
+ from utils import regex_search
23
 
24
  st.set_page_config(
25
  page_title='SponsorBlock ML',
 
32
  # 'About': "# This is a header. This is an *extremely* cool app!"
33
  }
34
  )
35
+
36
+
37
+ YT_VIDEO_REGEX = r'''(?x)^
38
+ (
39
+ # http(s):// or protocol-independent URL
40
+ (?:https?://|//)
41
+ (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
42
+ youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
43
+ (?:.*?\#/)? # handle anchor (#/) redirect urls
44
+ (?: # the various things that can precede the ID:
45
+ # v/ or embed/ or e/
46
+ (?:(?:v|embed|e)/(?!videoseries))
47
+ |(?: # or the v= param in all its forms
48
+ # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
49
+ (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?
50
+ (?:\?|\#!?) # the params delimiter ? or # or #!
51
+ # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
52
+ (?:.*?[&;])??
53
+ v=
54
+ )
55
+ ))
56
+ |(?:
57
+ youtu\.be # just youtu.be/xxxx
58
+ )/)
59
+ )? # all until now is optional -> you can pass the naked ID
60
+ # here is it! the YouTube video ID
61
+ (?P<id>[0-9A-Za-z_-]{11})'''
62
+
63
  # https://github.com/google-research/text-to-text-transfer-transformer#released-model-checkpoints
64
  # https://github.com/google-research/text-to-text-transfer-transformer/blob/main/released_checkpoints.md#experimental-t5-pre-trained-model-checkpoints
65
 
 
169
  # Load prediction function
170
  predict = load_predict(model_id)
171
 
172
+ video_input = st.text_input('Video URL/ID:') # , placeholder='e.g., axtQvkSpoto'
173
 
174
  categories = st.multiselect('Categories:',
175
  CATGEGORY_OPTIONS.keys(),
 
181
  confidence_threshold = st.slider(
182
  'Confidence Threshold (%):', min_value=0, max_value=100)
183
 
184
+
185
+
186
+ if len(video_input) == 0: # No input, do not continue
187
  return
188
 
189
+ video_id = regex_search(video_input, YT_VIDEO_REGEX)
190
+ if video_id is None:
191
+ st.exception(ValueError('Invalid YouTube URL/ID'))
192
  return
193
 
194
  with st.spinner('Running model...'):
src/utils.py CHANGED
@@ -183,3 +183,8 @@ def jaccard(x1, x2, y1, y2):
183
  intersection = max(0, min(x2, y2)-max(x1, y1))
184
  filled_union = max(x2, y2) - min(x1, y1)
185
  return intersection/filled_union if filled_union > 0 else 0
 
 
 
 
 
 
183
  intersection = max(0, min(x2, y2)-max(x1, y1))
184
  filled_union = max(x2, y2) - min(x1, y1)
185
  return intersection/filled_union if filled_union > 0 else 0
186
+
187
+
188
+ def regex_search(text, pattern, group=1, default=None):
189
+ match = re.search(pattern, text)
190
+ return match.group(group) if match else default