whisper-youtube-2-hf_dataset

Runtime error

juancopi81 commited on Feb 16, 2023

Commit

a09bf24

•

1 Parent(s): b56303a

Update transforming/whispertransform.py

Files changed (1) hide show

transforming/whispertransform.py CHANGED Viewed

@@ -30,36 +30,26 @@ class WhisperTransform(Transform):
         """Creates a new video with transcriptions created by Whisper.
         """
         # Create a YouTube object
-        try:
-            yt = YouTube(video.url)
-        except Exception as e:
-            print ("Video not available \n")
-            print(f"Exception: {e}")
         print(f"Video title and url: {video.title} {video.url}")
-        try:
-            audio_file = self._get_audio_from_video(yt)
-            result = self.model.transcribe(audio_file,
-                                           without_timestamps=self.without_timestamps)
-        except Exception as e:
-            print(f"Audio exception print: {e}")
-        else:
-            transcription = result["text"]
-            data = []
-            for seg in result['segments']:
-                data.append(OrderedDict({'start': seg['start'], 'end': seg['end'],'text': seg['text']}))
-            os.remove(audio_file)
-            return YoutubeVideo(channel_name = video.channel_name,
-                                url = video.url,
-                                title = video.title,
-                                description = video.description,
-                                transcription = transcription,
-                                segments = data)
     def _get_audio_from_video(self, yt: Any) -> Path:
         # TODO: Add credits

         """Creates a new video with transcriptions created by Whisper.
         """
         # Create a YouTube object
+        yt = YouTube(video.url)
         print(f"Video title and url: {video.title} {video.url}")
+        audio_file = self._get_audio_from_video(yt)
+        result = self.model.transcribe(audio_file,
+                                       without_timestamps=self.without_timestamps)
+        transcription = result["text"]
+        data = []
+        for seg in result['segments']:
+            data.append(OrderedDict({'start': seg['start'], 'end': seg['end'],'text': seg['text']}))
+        os.remove(audio_file)
+        return YoutubeVideo(channel_name = video.channel_name,
+                            url = video.url,
+                            title = video.title,
+                            description = video.description,
+                            transcription = transcription,
+                            segments = data)
     def _get_audio_from_video(self, yt: Any) -> Path:
         # TODO: Add credits