Spaces:
Sleeping
Sleeping
v5
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ def get_audio(url):
|
|
29 |
print('aodio over ..')
|
30 |
return audio_file
|
31 |
|
32 |
-
def get_transcript(url
|
33 |
audio_file = get_audio(url)
|
34 |
audio_file = 'tmp.mp4'
|
35 |
files = {
|
@@ -43,24 +43,7 @@ def get_transcript(url, model_size, lang, format):
|
|
43 |
|
44 |
|
45 |
|
46 |
-
def format_to_srt(segments):
|
47 |
-
output = ""
|
48 |
-
for i, segment in enumerate(segments):
|
49 |
-
output += f"{i + 1}\n"
|
50 |
-
output += f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
51 |
-
output += f"{segment['text']}\n\n"
|
52 |
-
return output
|
53 |
|
54 |
-
def format_timestamp(t):
|
55 |
-
hh = t//3600
|
56 |
-
mm = (t - hh*3600)//60
|
57 |
-
ss = t - hh*3600 - mm*60
|
58 |
-
mi = (t - int(t))*1000
|
59 |
-
return f"{int(hh):02d}:{int(mm):02d}:{int(ss):02d},{int(mi):03d}"
|
60 |
-
|
61 |
-
|
62 |
-
langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
|
63 |
-
model_size = list(whisper._MODELS.keys())
|
64 |
|
65 |
with gr.Blocks() as demo:
|
66 |
|
@@ -70,12 +53,7 @@ with gr.Blocks() as demo:
|
|
70 |
|
71 |
with gr.Row():
|
72 |
url = gr.Textbox(placeholder='Youtube video URL', label='URL')
|
73 |
-
|
74 |
-
with gr.Row():
|
75 |
-
|
76 |
-
model_size = gr.Dropdown(choices=model_size, value='tiny', label="Model")
|
77 |
-
lang = gr.Dropdown(choices=langs, value="None", label="Language (Optional)")
|
78 |
-
format = gr.Dropdown(choices=["None", ".srt"], value="None", label="Timestamps? (Optional)")
|
79 |
|
80 |
with gr.Row():
|
81 |
gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
|
@@ -84,6 +62,6 @@ with gr.Blocks() as demo:
|
|
84 |
with gr.Column():
|
85 |
outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
|
86 |
|
87 |
-
transcribe_btn.click(get_transcript, inputs=[url
|
88 |
|
89 |
demo.launch(debug=True)
|
|
|
29 |
print('aodio over ..')
|
30 |
return audio_file
|
31 |
|
32 |
+
def get_transcript(url):
|
33 |
audio_file = get_audio(url)
|
34 |
audio_file = 'tmp.mp4'
|
35 |
files = {
|
|
|
43 |
|
44 |
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
with gr.Blocks() as demo:
|
49 |
|
|
|
53 |
|
54 |
with gr.Row():
|
55 |
url = gr.Textbox(placeholder='Youtube video URL', label='URL')
|
56 |
+
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
with gr.Row():
|
59 |
gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
|
|
|
62 |
with gr.Column():
|
63 |
outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
|
64 |
|
65 |
+
transcribe_btn.click(get_transcript, inputs=[url], outputs=outputs)
|
66 |
|
67 |
demo.launch(debug=True)
|