Spaces:
Runtime error
To handle videos longer than one hour and to transcribe them in segments, we need to make several modifications to the yt_transcribe function.
Browse filesChanges made:
Segmentation: The audio is split into segments of a specified length (default is 30 seconds). This helps in processing long videos and also ensures that each segment is transcribed accurately.
FFmpeg Integration: I've added a placeholder for the ffmpeg_read function. This function should use FFmpeg to extract audio from the segment and convert it to the desired format and sampling rate. You'll need to implement this function based on your requirements.
Transcription: Each segment is transcribed separately, and the results are combined to produce the full transcription of the video.
Note: The _return_yt_html_embed function is referenced but not provided. Ensure you have an implementation for this function. Similarly, you'll need to implement the ffmpeg_read function to handle audio extraction and conversion using FFmpeg.
@@ -71,21 +71,42 @@ def download_yt_audio(yt_url, filename):
|
|
71 |
raise gr.Error(str(err))
|
72 |
|
73 |
|
74 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
html_embed_str = _return_yt_html_embed(yt_url)
|
76 |
|
77 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
78 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
79 |
download_yt_audio(yt_url, filepath)
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
return html_embed_str, text
|
89 |
|
90 |
|
91 |
demo = gr.Blocks()
|
|
|
71 |
raise gr.Error(str(err))
|
72 |
|
73 |
|
74 |
+
def ffmpeg_read(file_path, sampling_rate):
|
75 |
+
# This function should use FFmpeg to extract audio and convert it to the desired format and sampling rate.
|
76 |
+
# The exact implementation will depend on your requirements and setup.
|
77 |
+
# For now, I'll provide a placeholder.
|
78 |
+
raise NotImplementedError("Please implement the ffmpeg_read function.")
|
79 |
+
|
80 |
+
def yt_transcribe(yt_url, task, max_filesize=75.0, segment_length=30*1000):
|
81 |
html_embed_str = _return_yt_html_embed(yt_url)
|
82 |
|
83 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
84 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
85 |
download_yt_audio(yt_url, filepath)
|
86 |
+
|
87 |
+
# Load the audio using pydub
|
88 |
+
audio = AudioSegment.from_file(filepath, format="mp4")
|
89 |
+
|
90 |
+
# Split the audio into segments
|
91 |
+
segments = [audio[i:i+segment_length] for i in range(0, len(audio), segment_length)]
|
92 |
+
|
93 |
+
# Transcribe each segment and combine the results
|
94 |
+
transcriptions = []
|
95 |
+
for segment in segments:
|
96 |
+
with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as segment_file:
|
97 |
+
segment.export(segment_file.name, format="wav")
|
98 |
+
|
99 |
+
# Convert the segment using ffmpeg
|
100 |
+
segment_data = ffmpeg_read(segment_file.name, pipe.feature_extractor.sampling_rate)
|
101 |
+
inputs = {"array": segment_data, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
102 |
+
|
103 |
+
transcription = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
104 |
+
transcriptions.append(transcription)
|
105 |
+
|
106 |
+
full_transcription = " ".join(transcriptions)
|
107 |
+
|
108 |
+
return html_embed_str, full_transcription
|
109 |
|
|
|
110 |
|
111 |
|
112 |
demo = gr.Blocks()
|