Alex Volkov commited on
Commit
6fd7ef3
1 Parent(s): 8a19f9b

Initial commit, let's see if this works on HF

Browse files
Files changed (6) hide show
  1. README.md +1 -1
  2. app.py +117 -4
  3. download.py +146 -0
  4. fonts/arial.ttf +0 -0
  5. utils/__init__.py +0 -0
  6. utils/subs.py +84 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Vidtranslator
3
- emoji: 🐨
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: gradio
 
1
  ---
2
  title: Vidtranslator
3
+ emoji: 🎥
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: gradio
app.py CHANGED
@@ -1,7 +1,120 @@
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello there " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
+ from download import check_download, download, download_generator
 
4
 
5
+ import anvil.media
6
+ import os
7
+ import pathlib
8
+ from pathlib import Path
9
+ from shutil import rmtree
10
+
11
+ anvil.server.connect(os.environ['ANVIL_UPLINK_KEY'])
12
+ queue_placeholder = None
13
+
14
+ url_input = gr.Textbox(label="Youtube/Twitter/etc video URL (supports many services)", value='https://twitter.com/starsonxh/status/1552945347194142720', lines=1, elem_id="url_input")
15
+ download_status = gr.Textbox(label="Status:", value='', lines=1, elem_id="download_status")
16
+ init_video = gr.Video(label="Downloaded video", visible=False)
17
+ init_audio = gr.Audio(label="Downloaded audio", visible=False)
18
+ output_text = gr.Textbox(label="Output text", lines=5, visible=False, max_lines=10)
19
+ sub_video = gr.Video(label="Subbed video", visible=False)
20
+
21
+
22
+ @anvil.server.callable
23
+ def cleanup_output_dir():
24
+ #make sure we're in the main directory
25
+ os.chdir(pathlib.Path(__file__).parent.absolute())
26
+ #delete the output directory contents
27
+ for path in Path("output").glob("**/*"):
28
+ if path.is_file():
29
+ path.unlink()
30
+ elif path.is_dir():
31
+ rmtree(path)
32
+
33
+ @anvil.server.callable
34
+ def download_api(url):
35
+ print(f'Request from Anvil with URL {url}')
36
+ final_response = ''
37
+ #TODO: figure out how to push an incoming event to the queue
38
+ #THIS DOESN'T WORK queue_placeholder.push_event('download', url)
39
+ #TODO: handle errors
40
+ for response in download_generator(url):
41
+ final_response = response['message']
42
+ print(final_response)
43
+ return final_response
44
+
45
+ def predownload(url):
46
+ for response in download_generator(url):
47
+ updates_object = {}
48
+ updates_object[download_status] = response.get('message', '')
49
+ meta = response.get('meta')
50
+ if 'video' in response:
51
+ updates_object[init_video] = gr.update(visible=True, value=response["video"],
52
+ label=f"Init Video: {meta['id']}.{meta['ext']}")
53
+ updates_object[init_audio] = gr.update(visible=True, value=response["audio"],
54
+ label=f"Extracted audio : {meta['id']}.mp3")
55
+ if 'whisper_result' in response:
56
+ updates_object[output_text] = gr.update(value=response['whisper_result'].get('srt'), visible=True,
57
+ label=f"Subtitles translated from {response['whisper_result'].get('language')} (detected language)")
58
+ if 'sub_video' in response:
59
+ updates_object[sub_video] = gr.update(visible=True, value=response["sub_video"],
60
+ label=f"Subbed video: {meta['id']}_translated.mp4")
61
+
62
+ yield updates_object
63
+
64
+
65
+ subtitled_video = False
66
+ css = """
67
+ #submit{
68
+ position: absolute;
69
+ flex:0 !important;
70
+ width: 120px;
71
+ right: 13px;
72
+ top: 40px;
73
+ }
74
+ #url_input{
75
+ font-size: 40px !important;
76
+ }
77
+ #download_status{
78
+ font-size: 40px !important;
79
+ }
80
+ .gradio-container {background-color: red}
81
+ #input_row{
82
+ position: relative;
83
+ }
84
+ .gradio-interface #submit{
85
+
86
+ }
87
+ """
88
+ with gr.Blocks(css=css+"") as demo:
89
+ gr.Markdown('# Vid Translator 0.1 - get english subtitles for videos in any language')
90
+ gr.Markdown('### Link to a tweet, youtube or other video and get a translated video with @openAi #whisper, built by [@altryne](https://twitter.com/altryne/)')
91
+ gr.Markdown('### This is used as the backend for [@vidtranslator](https://twitter.com/vidtranslator/)')
92
+ with gr.Row(elem_id="input_row"):
93
+ with gr.Group() as group:
94
+ url_input.render()
95
+ greet_btn = gr.Button("Download", elem_id='submit', variant='primary')
96
+ pause_for_editing = gr.Checkbox(label="Pause for editing")
97
+ with gr.Row():
98
+ with gr.Column():
99
+ download_status.render()
100
+ init_video.render()
101
+ init_audio.render()
102
+ with gr.Column():
103
+ with gr.Group() :
104
+ output_text.render()
105
+ gr.Button("Download srt file")
106
+ gr.Button("Bake subtitles into video")
107
+ sub_video.render()
108
+ if(subtitled_video):
109
+ download_video = gr.Button("Download Video", variant='primary')
110
+ download_srt = gr.Button("Download Srt", variant='primary')
111
+
112
+ greet_btn.click(fn=predownload, inputs=[url_input], outputs=[download_status, init_video, init_audio, output_text, sub_video], api_name='predownload')
113
+ url_input.submit(fn=predownload, inputs=[url_input], outputs=[download_status, init_video, init_audio, output_text, sub_video])
114
+
115
+ api_button = gr.Button("API", variant='primary', visible=False).click(fn=cleanup_output_dir, inputs=[], outputs=[], api_name='cleanup_output_dir')
116
+
117
+ queue_placeholder = demo.queue()
118
+
119
+ if __name__ == "__main__":
120
+ demo.launch(show_error=True, debug=True)
download.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import time
3
+ from pathlib import Path
4
+ import anvil.server
5
+ import anvil.media
6
+ from whisper.utils import write_srt
7
+ from youtube_dl import YoutubeDL
8
+ from youtube_dl.utils import DownloadError
9
+ import os
10
+ import tempfile
11
+ import json
12
+ import whisper
13
+ from whisper.tokenizer import LANGUAGES
14
+
15
+ import ffmpeg
16
+ from utils.subs import bake_subs
17
+
18
+ original_dir = os.getcwd()
19
+ output_dir = Path('output')
20
+
21
+ def download_generator(url):
22
+ ### Step 1 : check if video is available
23
+ yield {"message": f"Checking {url} for videos"}
24
+ try:
25
+ meta = check_download(url)
26
+ yield {"message": f"Found video with {meta['duration']} seconds duration from {meta['extractor']}", "meta": meta}
27
+ # create a temp directory with meta ID
28
+ # os.makedirs(output_dir/f"{meta['id']}", exist_ok=False)
29
+ tempdir = output_dir/f"{meta['id']}"
30
+ except Exception as e:
31
+ os.chdir(original_dir)
32
+ yield {"message": f"{e}"}
33
+ return
34
+
35
+ ### Step 2 : Download video and extract audio
36
+ yield {"message": f"Starting download with URL {url}, this may take a while"}
37
+
38
+ try:
39
+ meta, video, audio = download(url, tempdir)
40
+ yield {"message": f"Downloaded video and extracted audio", "video": video, "audio": audio, "meta": meta}
41
+ except Exception as e:
42
+ os.chdir(original_dir)
43
+ yield {"message": f"{e}"}
44
+ raise e
45
+
46
+ ### Step 3 : Transcribe with whisper
47
+ yield {"message": f"[PLEASE WAIT] Starting whisper transcribe with {meta['id']}.mp3"}
48
+ try:
49
+ whisper_result = transcribe(audio)
50
+ srt_path = tempdir / f"{meta['id']}.srt"
51
+ with open(srt_path, "w", encoding="utf-8") as srt:
52
+ write_srt(whisper_result["segments"], file=srt)
53
+
54
+ whisper_result["srt"] = Path(srt_path).read_text()
55
+ yield {"message": f"Transcribe successful", "whisper_result": whisper_result, "meta": meta}
56
+ except Exception as e:
57
+ os.chdir(original_dir)
58
+ yield {"message": f"{e}"}
59
+ #TODO: add return here so users can continue after editing subtitles
60
+
61
+ ### Step 4 : Bake subtitles into video with ffmpeg
62
+ yield {"message": f"[PLEASE WAIT] baking subtitles into video"}
63
+ try:
64
+
65
+ subbed_video_path = tempdir / f"{meta['id']}_translated.mp4"
66
+
67
+ fontsdir = Path('fonts')
68
+ bake_subs(video, subbed_video_path.absolute() , srt_path.absolute(), fontsdir)
69
+ yield {"message": f"Subtitled video ready!", "sub_video": str(subbed_video_path.absolute()), "meta": meta}
70
+ except ffmpeg.Error as e:
71
+ print('stdout:', e.stdout.decode('utf8'))
72
+ print('stderr:', e.stderr.decode('utf8'))
73
+ raise e
74
+ except Exception as e:
75
+ os.chdir(original_dir)
76
+ print('error', file=sys.stderr)
77
+ raise e
78
+ yield {"message": f"{e}"}
79
+
80
+
81
+ def progress_hook(d):
82
+ if d['status'] == 'downloading':
83
+ print("downloading " + str(round(float(d['downloaded_bytes']) / float(d['total_bytes']) * 100, 1)) + "%")
84
+ yield f"{d['_percent_str']} downloaded"
85
+ if d['status'] == 'finished':
86
+ filename = d['filename']
87
+ print(filename)
88
+ yield f"Downloaded {filename}"
89
+
90
+ def download(url, tempdir):
91
+ try:
92
+ ydl_opts = {
93
+ "format": "bestvideo[ext=mp4]+bestaudio/best",
94
+ "keepvideo": True,
95
+ 'postprocessors': [{
96
+ 'key': 'FFmpegExtractAudio',
97
+ 'preferredcodec': 'mp3',
98
+ 'preferredquality': '192',
99
+ }],
100
+ "skip_download": False,
101
+ "outtmpl": f"{tempdir}/%(id)s.%(ext)s",
102
+ "noplaylist": True,
103
+ "verbose": False,
104
+ "quiet": True,
105
+ "progress_hooks": [progress_hook],
106
+
107
+ }
108
+ ydl = YoutubeDL(ydl_opts)
109
+ meta = ydl.extract_info(
110
+ url,
111
+ download=True,
112
+ )
113
+ except DownloadError as e:
114
+ raise e
115
+ else:
116
+ video = tempdir / f"{meta['id']}.{meta['ext']}"
117
+ audio = tempdir / f"{meta['id']}.mp3"
118
+ print(str(video.resolve()))
119
+ return meta, str(video.resolve()), str(audio.resolve())
120
+
121
+ def check_download(url):
122
+ ydl_opts = {
123
+ "format": "bestvideo[ext=mp4]+bestaudio/best",
124
+ "skip_download": True,
125
+ "verbose": False,
126
+ }
127
+ ydl = YoutubeDL(ydl_opts)
128
+ try:
129
+ meta = ydl.extract_info(
130
+ url,
131
+ download=False,
132
+ )
133
+
134
+ except DownloadError as e:
135
+ raise e
136
+ else:
137
+ return meta
138
+
139
+ def transcribe(audio):
140
+ print('Starting transcribe...')
141
+ # model = whisper.load_model('medium')
142
+ # output = model.transcribe(audio, task="translate")
143
+ # output["language"] = LANGUAGES[output["language"]]
144
+ output = {"text": " And then, you throw it and bam! How do you do this when it's going this way? Throw it and bam! That's right! Is that it? Throw it and bam! That's it. That's it. That's it. That's it. That's it. That's right. Throw it and bam! No, this is... Do it like this and... Isn't this the most natural? Bam! What is it?", "segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 2.46, "text": " And then, you throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 1, "seek": 0, "start": 2.46, "end": 4.86, "text": " How do you do this when it's going this way?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 2, "seek": 0, "start": 4.86, "end": 6.36, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 3, "seek": 0, "start": 6.36, "end": 7.92, "text": " That's right!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 4, "seek": 0, "start": 7.92, "end": 8.42, "text": " Is that it?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 5, "seek": 0, "start": 8.42, "end": 9.76, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 6, "seek": 0, "start": 9.76, "end": 10.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 7, "seek": 0, "start": 10.26, "end": 10.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 8, "seek": 0, "start": 10.76, "end": 11.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 9, "seek": 0, "start": 11.26, "end": 11.76, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 10, "seek": 0, "start": 11.76, "end": 12.26, "text": " That's it.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 11, "seek": 0, "start": 12.26, "end": 12.76, "text": " That's right.", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 12, "seek": 0, "start": 12.76, "end": 14.6, "text": " Throw it and bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 13, "seek": 0, "start": 14.6, "end": 15.32, "text": " No, this is...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 14, "seek": 0, "start": 15.32, "end": 16.36, "text": " Do it like this and...", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 15, "seek": 0, "start": 16.36, "end": 17.92, "text": " Isn't this the most natural?", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 16, "seek": 0, "start": 17.92, "end": 18.92, "text": " Bam!", "tokens": [50364, 400, 550, 11, 291, 3507, 309, 293, 18132, 0, 50487, 50487, 1012, 360, 291, 360, 341, 562, 309, 311, 516, 341, 636, 30, 50607, 50607, 22228, 309, 293, 18132, 0, 50682, 50682, 663, 311, 558, 0, 50760, 50760, 1119, 300, 309, 30, 50785, 50785, 22228, 309, 293, 18132, 0, 50852, 50852, 663, 311, 309, 13, 50877, 50877, 663, 311, 309, 13, 50902, 50902, 663, 311, 309, 13, 50927, 50927, 663, 311, 309, 13, 50952, 50952, 663, 311, 309, 13, 50977, 50977, 663, 311, 558, 13, 51002, 51002, 22228, 309, 293, 18132, 0, 51094, 51094, 883, 11, 341, 307, 485, 51130, 51130, 1144, 309, 411, 341, 293, 485, 51182, 51182, 6998, 380, 341, 264, 881, 3303, 30, 51260, 51260, 26630, 0, 51310, 51310, 663, 311, 309, 13, 51354], "temperature": 0.0, "avg_logprob": -0.8081162918445676, "compression_ratio": 2.0533333333333332, "no_speech_prob": 0.14013107120990753}, {"id": 17, "seek": 1892, "start": 18.92, "end": 30.92, "text": " What is it?", "tokens": [50364, 708, 307, 309, 30, 50964], "temperature": 0.0, "avg_logprob": -0.9666390419006348, "compression_ratio": 0.5789473684210527, "no_speech_prob": 0.0033069916535168886}], "language": "korean"}
145
+ print(f'Finished transcribe from {output["language"]}', output["text"])
146
+ return output
fonts/arial.ttf ADDED
Binary file (367 kB). View file
 
utils/__init__.py ADDED
File without changes
utils/subs.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ffmpeg
2
+ import os
3
+ from pathlib import Path, PureWindowsPath
4
+ import anvil.media
5
+ import os
6
+ from typing import Iterator, TextIO
7
+
8
+
9
+
10
+ def bake_subs(input_file, output_file, subs_file, fontsdir):
11
+ print(f"Baking {subs_file} into video... {input_file} -> {output_file}")
12
+
13
+
14
+ fontfile = fontsdir / 'arial.ttf'
15
+ fontstyle = 'Fontsize=18,OutlineColour=&H40000000,BorderStyle=3,FontName=Arial'
16
+ video = ffmpeg.input(input_file)
17
+ audio = video.audio
18
+ (
19
+ ffmpeg
20
+ .concat(
21
+ video.filter('subtitles', subs_file, fontsdir=fontfile, force_style=fontstyle),
22
+ audio, v=1, a=1
23
+ )
24
+ .output(filename=output_file)
25
+ .run(quiet=True, overwrite_output=True)
26
+ )
27
+
28
+
29
+ def str2bool(string):
30
+ str2val = {"True": True, "False": False}
31
+ if string in str2val:
32
+ return str2val[string]
33
+ else:
34
+ raise ValueError(
35
+ f"Expected one of {set(str2val.keys())}, got {string}")
36
+
37
+
38
+ def format_timestamp(seconds: float, always_include_hours: bool = False):
39
+ assert seconds >= 0, "non-negative timestamp expected"
40
+ milliseconds = round(seconds * 1000.0)
41
+
42
+ hours = milliseconds // 3_600_000
43
+ milliseconds -= hours * 3_600_000
44
+
45
+ minutes = milliseconds // 60_000
46
+ milliseconds -= minutes * 60_000
47
+
48
+ seconds = milliseconds // 1_000
49
+ milliseconds -= seconds * 1_000
50
+
51
+ hours_marker = f"{hours}:" if always_include_hours or hours > 0 else ""
52
+ return f"{hours_marker}{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
53
+
54
+
55
+ def write_srt(transcript: Iterator[dict], file: TextIO):
56
+ for i, segment in enumerate(transcript, start=1):
57
+ print(
58
+ f"{i}\n"
59
+ f"{format_timestamp(segment['start'], always_include_hours=True)} --> "
60
+ f"{format_timestamp(segment['end'], always_include_hours=True)}\n"
61
+ f"{segment['text'].strip().replace('-->', '->')}\n",
62
+ file=file,
63
+ flush=True,
64
+ )
65
+
66
+
67
+ def filename(path):
68
+ return os.path.splitext(os.path.basename(path))[0]
69
+
70
+
71
+
72
+ # if __name__ == '__main__':
73
+ # meta = {
74
+ # "id": 1576155093245693954,
75
+ # "ext": 'mp4'
76
+ # }
77
+ # tempdirname = Path(f"encoding/temp/{meta['id']}")
78
+ # video_file_path = f"{meta['id']}.{meta['ext']}"
79
+ # srt_path = f"{meta['id']}.srt"
80
+ # out_path = f"{meta['id']}_translated.mp4"
81
+ # os.chdir(tempdirname)
82
+ # bake_subs(video_file_path, out_path, srt_path)
83
+ # anvil_media = anvil.media.from_file(out_path, 'video/mp4')
84
+ # print(anvil_media)