minliacom commited on
Commit
9420382
1 Parent(s): 1e63cc6

Add application file

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +561 -0
  3. requirements.txt +17 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
app.py ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import base64
5
+
6
+ os.system('git clone https://github.com/ggerganov/whisper.cpp.git')
7
+ os.system('make -C ./whisper.cpp')
8
+ os.system('bash ./whisper.cpp/models/download-ggml-model.sh small')
9
+ os.system('bash ./whisper.cpp/models/download-ggml-model.sh base')
10
+ os.system('bash ./whisper.cpp/models/download-ggml-model.sh medium')
11
+ os.system('bash ./whisper.cpp/models/download-ggml-model.sh large')
12
+ os.system('bash ./whisper.cpp/models/download-ggml-model.sh base.en')
13
+
14
+
15
+ import gradio as gr
16
+ from pathlib import Path
17
+ import pysrt
18
+ import pandas as pd
19
+ import re
20
+ import time
21
+
22
+ from pytube import YouTube
23
+
24
+ headers = {'Authorization': os.environ['DeepL_API_KEY']}
25
+
26
+
27
+ import torch
28
+
29
+ whisper_models = ["base", "small", "medium", "large", "base.en"]
30
+
31
+ custom_models = ["belarus-small"]
32
+
33
+ combined_models = []
34
+ combined_models.extend(whisper_models)
35
+ combined_models.extend(custom_models)
36
+
37
+
38
+ LANGUAGES = {
39
+ "en": "English",
40
+ "zh": "Chinese",
41
+ "de": "German",
42
+ "es": "Spanish",
43
+ "ru": "Russian",
44
+ "ko": "Korean",
45
+ "fr": "French",
46
+ "ja": "Japanese",
47
+ "pt": "Portuguese",
48
+ "tr": "Turkish",
49
+ "pl": "Polish",
50
+ "ca": "Catalan",
51
+ "nl": "Dutch",
52
+ "ar": "Arabic",
53
+ "sv": "Swedish",
54
+ "it": "Italian",
55
+ "id": "Indonesian",
56
+ "hi": "Hindi",
57
+ "fi": "Finnish",
58
+ "vi": "Vietnamese",
59
+ "he": "Hebrew",
60
+ "uk": "Ukrainian",
61
+ "el": "Greek",
62
+ "ms": "Malay",
63
+ "cs": "Czech",
64
+ "ro": "Romanian",
65
+ "da": "Danish",
66
+ "hu": "Hungarian",
67
+ "ta": "Tamil",
68
+ "no": "Norwegian",
69
+ "th": "Thai",
70
+ "ur": "Urdu",
71
+ "hr": "Croatian",
72
+ "bg": "Bulgarian",
73
+ "lt": "Lithuanian",
74
+ "la": "Latin",
75
+ "mi": "Maori",
76
+ "ml": "Malayalam",
77
+ "cy": "Welsh",
78
+ "sk": "Slovak",
79
+ "te": "Telugu",
80
+ "fa": "Persian",
81
+ "lv": "Latvian",
82
+ "bn": "Bengali",
83
+ "sr": "Serbian",
84
+ "az": "Azerbaijani",
85
+ "sl": "Slovenian",
86
+ "kn": "Kannada",
87
+ "et": "Estonian",
88
+ "mk": "Macedonian",
89
+ "br": "Breton",
90
+ "eu": "Basque",
91
+ "is": "Icelandic",
92
+ "hy": "Armenian",
93
+ "ne": "Nepali",
94
+ "mn": "Mongolian",
95
+ "bs": "Bosnian",
96
+ "kk": "Kazakh",
97
+ "sq": "Albanian",
98
+ "sw": "Swahili",
99
+ "gl": "Galician",
100
+ "mr": "Marathi",
101
+ "pa": "Punjabi",
102
+ "si": "Sinhala",
103
+ "km": "Khmer",
104
+ "sn": "Shona",
105
+ "yo": "Yoruba",
106
+ "so": "Somali",
107
+ "af": "Afrikaans",
108
+ "oc": "Occitan",
109
+ "ka": "Georgian",
110
+ "be": "Belarusian",
111
+ "tg": "Tajik",
112
+ "sd": "Sindhi",
113
+ "gu": "Gujarati",
114
+ "am": "Amharic",
115
+ "yi": "Yiddish",
116
+ "lo": "Lao",
117
+ "uz": "Uzbek",
118
+ "fo": "Faroese",
119
+ "ht": "Haitian creole",
120
+ "ps": "Pashto",
121
+ "tk": "Turkmen",
122
+ "nn": "Nynorsk",
123
+ "mt": "Maltese",
124
+ "sa": "Sanskrit",
125
+ "lb": "Luxembourgish",
126
+ "my": "Myanmar",
127
+ "bo": "Tibetan",
128
+ "tl": "Tagalog",
129
+ "mg": "Malagasy",
130
+ "as": "Assamese",
131
+ "tt": "Tatar",
132
+ "haw": "Hawaiian",
133
+ "ln": "Lingala",
134
+ "ha": "Hausa",
135
+ "ba": "Bashkir",
136
+ "jw": "Javanese",
137
+ "su": "Sundanese",
138
+ }
139
+
140
+ # language code lookup by name, with a few language aliases
141
+ source_languages = {
142
+ **{language: code for code, language in LANGUAGES.items()},
143
+ "Burmese": "my",
144
+ "Valencian": "ca",
145
+ "Flemish": "nl",
146
+ "Haitian": "ht",
147
+ "Letzeburgesch": "lb",
148
+ "Pushto": "ps",
149
+ "Panjabi": "pa",
150
+ "Moldavian": "ro",
151
+ "Moldovan": "ro",
152
+ "Sinhalese": "si",
153
+ "Castilian": "es",
154
+ "Let the model analyze": "Let the model analyze"
155
+ }
156
+
157
+ DeepL_language_codes_for_translation = {
158
+ "Bulgarian": "BG",
159
+ "Czech": "CS",
160
+ "Danish": "DA",
161
+ "German": "DE",
162
+ "Greek": "EL",
163
+ "English": "EN",
164
+ "Spanish": "ES",
165
+ "Estonian": "ET",
166
+ "Finnish": "FI",
167
+ "French": "FR",
168
+ "Hungarian": "HU",
169
+ "Indonesian": "ID",
170
+ "Italian": "IT",
171
+ "Japanese": "JA",
172
+ "Lithuanian": "LT",
173
+ "Latvian": "LV",
174
+ "Dutch": "NL",
175
+ "Polish": "PL",
176
+ "Portuguese": "PT",
177
+ "Romanian": "RO",
178
+ "Russian": "RU",
179
+ "Slovak": "SK",
180
+ "Slovenian": "SL",
181
+ "Swedish": "SV",
182
+ "Turkish": "TR",
183
+ "Ukrainian": "UK",
184
+ "Chinese": "ZH"
185
+ }
186
+
187
+
188
+ transcribe_options = dict(beam_size=3, best_of=3, without_timestamps=False)
189
+
190
+
191
+ source_language_list = [key[0] for key in source_languages.items()]
192
+ translation_models_list = [key[0] for key in DeepL_language_codes_for_translation.items()]
193
+
194
+
195
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
196
+ print("DEVICE IS: ")
197
+ print(device)
198
+
199
+ videos_out_path = Path("./videos_out")
200
+ videos_out_path.mkdir(parents=True, exist_ok=True)
201
+
202
+
203
+ def get_youtube(video_url):
204
+ yt = YouTube(video_url)
205
+ abs_video_path = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
206
+ print("LADATATTU POLKUUN")
207
+ print(abs_video_path)
208
+
209
+
210
+ return abs_video_path
211
+
212
+ def speech_to_text(video_file_path, selected_source_lang, whisper_model):
213
+ """
214
+ # Youtube with translated subtitles using OpenAI Whisper and Opus-MT models.
215
+ # Currently supports only English audio
216
+ This space allows you to:
217
+ 1. Download youtube video with a given url
218
+ 2. Watch it in the first video component
219
+ 3. Run automatic speech recognition on the video using fast Whisper models
220
+ 4. Translate the recognized transcriptions to 26 languages supported by deepL
221
+ 5. Download generated subtitles in .vtt and .srt formats
222
+ 6. Watch the the original video with generated subtitles
223
+
224
+ Speech Recognition is based on models from OpenAI Whisper https://github.com/openai/whisper
225
+ This space is using c++ implementation by https://github.com/ggerganov/whisper.cpp
226
+ """
227
+
228
+ if(video_file_path == None):
229
+ raise ValueError("Error no video input")
230
+ print(video_file_path)
231
+ try:
232
+ _,file_ending = os.path.splitext(f'{video_file_path}')
233
+ print(f'file enging is {file_ending}')
234
+ print("starting conversion to wav")
235
+ os.system(f'ffmpeg -i "{video_file_path}" -ar 16000 -ac 1 -c:a pcm_s16le "{video_file_path.replace(file_ending, ".wav")}"')
236
+ print("conversion to wav ready")
237
+
238
+
239
+
240
+ print("starting whisper c++")
241
+ srt_path = str(video_file_path.replace(file_ending, ".wav")) + ".srt"
242
+ os.system(f'rm -f {srt_path}')
243
+ if selected_source_lang == "Let the model analyze":
244
+ os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l "auto" -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
245
+ else:
246
+ if whisper_model in custom_models:
247
+ os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l {source_languages.get(selected_source_lang)} -m ./converted_models/ggml-{whisper_model}.bin -osrt')
248
+ else:
249
+ os.system(f'./whisper.cpp/main "{video_file_path.replace(file_ending, ".wav")}" -t 4 -l {source_languages.get(selected_source_lang)} -m ./whisper.cpp/models/ggml-{whisper_model}.bin -osrt')
250
+ print("starting whisper done with whisper")
251
+ except Exception as e:
252
+ raise RuntimeError("Error converting video to audio")
253
+
254
+ try:
255
+
256
+ df = pd.DataFrame(columns = ['start','end','text'])
257
+ srt_path = str(video_file_path.replace(file_ending, ".wav")) + ".srt"
258
+ subs = pysrt.open(srt_path)
259
+
260
+
261
+ objects = []
262
+ for sub in subs:
263
+
264
+
265
+ start_hours = str(str(sub.start.hours) + "00")[0:2] if len(str(sub.start.hours)) == 2 else str("0" + str(sub.start.hours) + "00")[0:2]
266
+ end_hours = str(str(sub.end.hours) + "00")[0:2] if len(str(sub.end.hours)) == 2 else str("0" + str(sub.end.hours) + "00")[0:2]
267
+
268
+ start_minutes = str(str(sub.start.minutes) + "00")[0:2] if len(str(sub.start.minutes)) == 2 else str("0" + str(sub.start.minutes) + "00")[0:2]
269
+ end_minutes = str(str(sub.end.minutes) + "00")[0:2] if len(str(sub.end.minutes)) == 2 else str("0" + str(sub.end.minutes) + "00")[0:2]
270
+
271
+ start_seconds = str(str(sub.start.seconds) + "00")[0:2] if len(str(sub.start.seconds)) == 2 else str("0" + str(sub.start.seconds) + "00")[0:2]
272
+ end_seconds = str(str(sub.end.seconds) + "00")[0:2] if len(str(sub.end.seconds)) == 2 else str("0" + str(sub.end.seconds) + "00")[0:2]
273
+
274
+ start_millis = str(str(sub.start.milliseconds) + "000")[0:3]
275
+ end_millis = str(str(sub.end.milliseconds) + "000")[0:3]
276
+ objects.append([sub.text, f'{start_hours}:{start_minutes}:{start_seconds}.{start_millis}', f'{end_hours}:{end_minutes}:{end_seconds}.{end_millis}'])
277
+
278
+ for object in objects:
279
+ srt_to_df = {
280
+ 'start': [object[1]],
281
+ 'end': [object[2]],
282
+ 'text': [object[0]]
283
+ }
284
+
285
+ df = pd.concat([df, pd.DataFrame(srt_to_df)])
286
+
287
+
288
+ return df
289
+
290
+ except Exception as e:
291
+ raise RuntimeError("Error Running inference with local model", e)
292
+
293
+
294
+ def translate_transcriptions(df, selected_translation_lang_2):
295
+ if selected_translation_lang_2 is None:
296
+ selected_translation_lang_2 = 'English'
297
+ df.reset_index(inplace=True)
298
+
299
+ print("start_translation")
300
+ translations = []
301
+
302
+
303
+
304
+ text_combined = ""
305
+ for i, sentence in enumerate(df['text']):
306
+ if i == 0:
307
+ text_combined = sentence
308
+ else:
309
+ text_combined = text_combined + '\n' + sentence
310
+
311
+ data = {'text': text_combined,
312
+ 'tag_spitting': 'xml',
313
+ 'target_lang': DeepL_language_codes_for_translation.get(selected_translation_lang_2)
314
+ }
315
+ try:
316
+
317
+ usage = requests.get('https://api-free.deepl.com/v2/usage', headers=headers)
318
+ usage = json.loads(usage.text)
319
+ try:
320
+ print('Usage is at: ' + str(usage['character_count']) + 'characters')
321
+ except Exception as e:
322
+ print(e)
323
+
324
+ if usage['character_count'] >= 490000:
325
+ print("USAGE CLOSE TO LIMIT")
326
+
327
+ response = requests.post('https://api-free.deepl.com/v2/translate', headers=headers, data=data)
328
+
329
+ # Print the response from the server
330
+ translated_sentences = json.loads(response.text)
331
+ translated_sentences = translated_sentences['translations'][0]['text'].split('\n')
332
+ df['translation'] = translated_sentences
333
+ except Exception as e:
334
+ print("EXCEPTION WITH DEEPL API")
335
+ print(e)
336
+ df['translation'] = df['text']
337
+
338
+ print("translations done")
339
+
340
+ print("Starting SRT-file creation")
341
+ print(df.head())
342
+ df.reset_index(inplace=True)
343
+ with open('subtitles.vtt','w', encoding="utf-8") as file:
344
+ print("Starting WEBVTT-file creation")
345
+
346
+ for i in range(len(df)):
347
+ if i == 0:
348
+ file.write('WEBVTT')
349
+ file.write('\n')
350
+
351
+ else:
352
+ file.write(str(i+1))
353
+ file.write('\n')
354
+ start = df.iloc[i]['start']
355
+
356
+
357
+ file.write(f"{start.strip()}")
358
+
359
+ stop = df.iloc[i]['end']
360
+
361
+
362
+ file.write(' --> ')
363
+ file.write(f"{stop}")
364
+ file.write('\n')
365
+ file.writelines(df.iloc[i]['translation'])
366
+ if int(i) != len(df)-1:
367
+ file.write('\n\n')
368
+
369
+ print("WEBVTT DONE")
370
+
371
+ with open('subtitles.srt','w', encoding="utf-8") as file:
372
+ print("Starting SRT-file creation")
373
+
374
+ for i in range(len(df)):
375
+ file.write(str(i+1))
376
+ file.write('\n')
377
+ start = df.iloc[i]['start']
378
+
379
+
380
+ file.write(f"{start.strip()}")
381
+
382
+ stop = df.iloc[i]['end']
383
+
384
+
385
+ file.write(' --> ')
386
+ file.write(f"{stop}")
387
+ file.write('\n')
388
+ file.writelines(df.iloc[i]['translation'])
389
+ if int(i) != len(df)-1:
390
+ file.write('\n\n')
391
+
392
+ print("SRT DONE")
393
+ subtitle_files = ['subtitles.vtt','subtitles.srt']
394
+
395
+ return df, subtitle_files
396
+
397
+ # def burn_srt_to_video(srt_file, video_in):
398
+
399
+ # print("Starting creation of video wit srt")
400
+
401
+ # try:
402
+ # video_out = video_in.replace('.mp4', '_out.mp4')
403
+ # print(os.system('ls -lrth'))
404
+ # print(video_in)
405
+ # print(video_out)
406
+ # command = 'ffmpeg -i "{}" -y -vf subtitles=./subtitles.srt "{}"'.format(video_in, video_out)
407
+ # os.system(command)
408
+
409
+ # return video_out
410
+
411
+ # except Exception as e:
412
+ # print(e)
413
+ # return video_out
414
+
415
+ def create_video_player(subtitle_files, video_in):
416
+
417
+ with open(video_in, "rb") as file:
418
+ video_base64 = base64.b64encode(file.read())
419
+ with open('./subtitles.vtt', "rb") as file:
420
+ subtitle_base64 = base64.b64encode(file.read())
421
+
422
+ video_player = f'''<video id="video" controls preload="metadata">
423
+ <source src="data:video/mp4;base64,{str(video_base64)[2:-1]}" type="video/mp4" />
424
+ <track
425
+ label="English"
426
+ kind="subtitles"
427
+ srclang="en"
428
+ src="data:text/vtt;base64,{str(subtitle_base64)[2:-1]}"
429
+ default />
430
+ </video>
431
+ '''
432
+ #video_player = gr.HTML(video_player)
433
+ return video_player
434
+
435
+
436
+
437
+
438
+ # ---- Gradio Layout -----
439
+ video_in = gr.Video(label="Video file", mirror_webcam=False)
440
+ youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
441
+ video_out = gr.Video(label="Video Out", mirror_webcam=False)
442
+
443
+
444
+
445
+ df_init = pd.DataFrame(columns=['start','end','text', 'translation'])
446
+
447
+ selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="Let the model analyze", label="Spoken language in video", interactive=True)
448
+ selected_translation_lang_2 = gr.Dropdown(choices=translation_models_list, type="value", value="English", label="In which language you want the transcriptions?", interactive=True)
449
+ selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
450
+
451
+ transcription_df = gr.DataFrame(value=df_init,label="Transcription dataframe", row_count=(0, "dynamic"), max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
452
+ transcription_and_translation_df = gr.DataFrame(value=df_init,label="Transcription and translation dataframe", max_rows = 10, wrap=True, overflow_row_behaviour='paginate')
453
+
454
+ subtitle_files = gr.File(
455
+ label="Download srt-file",
456
+ file_count="multiple",
457
+ type="file",
458
+ interactive=False,
459
+ )
460
+
461
+ video_player = gr.HTML('<p>video will be played here after you press the button at step 4')
462
+
463
+
464
+ demo = gr.Blocks(css='''
465
+ #cut_btn, #reset_btn { align-self:stretch; }
466
+ #\\31 3 { max-width: 540px; }
467
+ .output-markdown {max-width: 65ch !important;}
468
+ ''')
469
+ demo.encrypt = False
470
+ with demo:
471
+ transcription_var = gr.Variable()
472
+
473
+ with gr.Row():
474
+ with gr.Column():
475
+ gr.Markdown('''
476
+ ### This space allows you to:
477
+ 1. Download youtube video with a given url
478
+ 2. Watch it in the first video component
479
+ 3. Run automatic speech recognition on the video using fast Whisper models
480
+ 4. Translate the recognized transcriptions to 26 languages supported by deepL
481
+ 5. Download generated subtitles in .vtt and .srt formats
482
+ 6. Watch the the original video with generated subtitles
483
+ ''')
484
+
485
+ with gr.Column():
486
+ gr.Markdown('''
487
+ ### 1. Copy any Youtube video URL to box below
488
+ (But please **consider using short videos** so others won't get queued) or click one of the examples and then press button "1. Download Youtube video"-button:
489
+ ''')
490
+ examples = gr.Examples(examples=
491
+ [ "https://www.youtube.com/watch?v=nlMuHtV82q8&ab_channel=NothingforSale24",
492
+ "https://www.youtube.com/watch?v=JzPfMbG1vrE&ab_channel=ExplainerVideosByLauren",
493
+ "https://www.youtube.com/watch?v=S68vvV0kod8&ab_channel=Pearl-CohnTelevision"],
494
+ label="Examples", inputs=[youtube_url_in])
495
+ # Inspiration from https://huggingface.co/spaces/vumichien/whisper-speaker-diarization
496
+
497
+ with gr.Row():
498
+ with gr.Column():
499
+ youtube_url_in.render()
500
+ download_youtube_btn = gr.Button("Step 1. Download Youtube video")
501
+ download_youtube_btn.click(get_youtube, [youtube_url_in], [
502
+ video_in])
503
+ print(video_in)
504
+
505
+
506
+ with gr.Row():
507
+ with gr.Column():
508
+ video_in.render()
509
+ with gr.Column():
510
+ gr.Markdown('''
511
+ ##### Here you can start the transcription and translation process.
512
+ ##### Be aware that processing will last some time. With base model it is around 3x speed
513
+ ##### **Please select source language** for better transcriptions. Using 'Let the model analyze' makes mistakes sometimes and may lead to bad transcriptions
514
+ ''')
515
+ selected_source_lang.render()
516
+ selected_whisper_model.render()
517
+ transcribe_btn = gr.Button("Step 2. Transcribe audio")
518
+ transcribe_btn.click(speech_to_text, [video_in, selected_source_lang, selected_whisper_model], transcription_df)
519
+
520
+
521
+ with gr.Row():
522
+ gr.Markdown('''
523
+ ##### Here you will get transcription output
524
+ ##### ''')
525
+
526
+ with gr.Row():
527
+ with gr.Column():
528
+ transcription_df.render()
529
+
530
+ with gr.Row():
531
+ with gr.Column():
532
+ gr.Markdown('''
533
+ ##### PLEASE READ BELOW
534
+ Here you will can translate transcriptions to 26 languages.
535
+ If spoken language is not in the list, translation might not work. In this case original transcriptions are used
536
+ ''')
537
+ selected_translation_lang_2.render()
538
+ translate_transcriptions_button = gr.Button("Step 3. Translate transcription")
539
+ translate_transcriptions_button.click(translate_transcriptions, [transcription_df, selected_translation_lang_2], [transcription_and_translation_df, subtitle_files])
540
+ transcription_and_translation_df.render()
541
+
542
+ with gr.Row():
543
+ with gr.Column():
544
+ gr.Markdown('''##### From here you can download subtitles in .srt or .vtt format''')
545
+ subtitle_files.render()
546
+
547
+ with gr.Row():
548
+ with gr.Column():
549
+ gr.Markdown('''
550
+ ##### Now press the Step 4. Button to create output video with translated transcriptions
551
+ ##### ''')
552
+ create_video_button = gr.Button("Step 4. Create and add subtitles to video")
553
+ print(video_in)
554
+ create_video_button.click(create_video_player, [subtitle_files,video_in], [
555
+ video_player])
556
+ video_player.render()
557
+
558
+
559
+
560
+
561
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.12
2
+ ffmpeg-python
3
+ pandas==1.5.0
4
+ pytube==12.1.0
5
+ sacremoses
6
+ sentencepiece
7
+ tokenizers
8
+ torch
9
+ torchaudio
10
+ tqdm==4.64.1
11
+ EasyNMT==2.0.2
12
+ tqdm
13
+ nltk
14
+ transformers
15
+ pysrt
16
+ psutil==5.9.2
17
+ requests