Spaces:

BatuhanYilmaz
/

Whisper-Auto-Subtitled-Video-Generator

Running

App Files Files Community

BatuhanYilmaz commited on Oct 25, 2022

Commit

613b97e

•

1 Parent(s): 480e8fe

.

Browse files

Files changed (8) hide show

.gitattributes +0 -31
.streamlit/config.toml +8 -0
app.py → 01_🎥_Input_YouTube_Link.py +14 -15
LICENSE +21 -0
README.md +21 -12
pages +0 -0
pages/02_📼_Upload_Video_File.py +230 -0
pages/03_🔊_Upload_Audio_File.py +205 -0

.gitattributes DELETED Viewed

@@ -1,31 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,8 @@

+[theme]
+primaryColor="#F63366"
+backgroundColor="#FFFFFF"
+secondaryBackgroundColor="#F0F2F6"
+textColor="#262730"
+font="sans serif"
+[server]
+maxUploadSize=1028

app.py → 01_🎥_Input_YouTube_Link.py RENAMED Viewed

@@ -75,7 +75,7 @@ def change_model(current_size, size):
 @st.cache(allow_output_mutation=True)
 def inference(link, loaded_model, task):
     yt = YouTube(link)
-    path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
     if task == "Transcribe":
         options = dict(task="transcribe", best_of=5)
         results = loaded_model.transcribe(path, **options)
@@ -153,18 +153,18 @@ def main():
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
-                with open("transcript.vtt", "w+",encoding='utf8') as f:
-                    f.writelines(results[1])
-                    f.close()
-                with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
-                    datavtt = f.read()
-                with open("transcript.srt", "w+",encoding='utf8') as f:
-                    f.writelines(results[2])
-                    f.close()
-                with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
-                    datasrt = f.read()
             with col5:
                 st.download_button(label="Download Transcript (.txt)",
                                 data=datatxt,
@@ -184,7 +184,7 @@ def main():
             with col4:
                 with st.spinner("Generating Subtitled Video"):
-                    video_with_subs = generate_subtitled_video(video, "audio.mp4", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
             with col8:
@@ -212,7 +212,6 @@ def main():
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
             with open("transcript.vtt", "w+",encoding='utf8') as f:
                 f.writelines(results[1])
                 f.close()
@@ -243,7 +242,7 @@ def main():
             with col4:
                 with st.spinner("Generating Subtitled Video"):
-                    video_with_subs = generate_subtitled_video(video, "audio.mp4", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
             with col8:

 @st.cache(allow_output_mutation=True)
 def inference(link, loaded_model, task):
     yt = YouTube(link)
+    path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
     if task == "Transcribe":
         options = dict(task="transcribe", best_of=5)
         results = loaded_model.transcribe(path, **options)
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
             with col5:
                 st.download_button(label="Download Transcript (.txt)",
                                 data=datatxt,
             with col4:
                 with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
             with col8:
             with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
                 datatxt = f.read()
             with open("transcript.vtt", "w+",encoding='utf8') as f:
                 f.writelines(results[1])
                 f.close()
             with col4:
                 with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
                 st.video(video_with_subs)
                 st.balloons()
             with col8:

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Batuhan Yılmaz
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,22 @@
----
-title: Auto Subtitled Video Generator
-emoji: 📚
-colorFrom: yellow
-colorTo: blue
-sdk: streamlit
-sdk_version: 1.10.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+## Auto-Subtitled-Video-Generator
+![Python](https://img.shields.io/badge/Python-FFD43B?style=for-the-badge&logo=python&logoColor=blue)
+![Streamlit](https://img.shields.io/badge/Streamlit-FF4B4B?style=for-the-badge&logo=Streamlit&logoColor=white)
+![OpenAI](https://camo.githubusercontent.com/ea872adb9aba9cf6b4e976262f6d4b83b97972d0d5a7abccfde68eb2ae55325f/68747470733a2f2f696d672e736869656c64732e696f2f7374617469632f76313f7374796c653d666f722d7468652d6261646765266d6573736167653d4f70656e414926636f6c6f723d343132393931266c6f676f3d4f70656e4149266c6f676f436f6c6f723d464646464646266c6162656c3d)
+#### About this project
+- This project is an automatic speech recognition application that takes a YouTube video link or a video file as input to generate a video with subtitles.
+- You can also upload an audio file to generate a transcript as .txt, .vtt, .srt files.
+- The application performs 2 tasks:
+  - Detects the language, transcribes the input video in its original language.
+  - Detects the language, translates it into English and then transcribes.
+- Downloaded the video of the input link using [pytube](https://github.com/pytube/pytube).
+- Generated a transcription of the video using the [OpenAI Whisper](https://openai.com/blog/whisper) model.
+- Saved the transcriptions as .txt, .vtt and .srt files.
+- Generated a subtitled version of the input video using [ffmpeg](https://github.com/FFmpeg).
+- Displayed the original video and the subtitled video side by side.
+- Built a multipage web app using [Streamlit](https://streamlit.io) and hosted on [HuggingFace Spaces](https://huggingface.co/spaces).
+- You can download the generated .txt, .vtt, .srt files and the subtitled video.
+- You can use the app via this [link](https://huggingface.co/spaces/BatuhanYilmaz/Auto-Subtitled-Video-Generator).
+![](auto-sub.gif)

pages DELETED Viewed

File without changes

pages/02_📼_Upload_Video_File.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import whisper
+import streamlit as st
+from streamlit_lottie import st_lottie
+from utils import write_vtt, write_srt
+import ffmpeg
+import requests
+from typing import Iterator
+from io import StringIO
+import numpy as np
+import pathlib
+import os
+st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
+# Define a function that we can use to load lottie files from a link.
+@st.cache(allow_output_mutation=True)
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+APP_DIR = pathlib.Path(__file__).parent.absolute()
+LOCAL_DIR = APP_DIR / "local"
+LOCAL_DIR.mkdir(exist_ok=True)
+save_dir = LOCAL_DIR / "output"
+save_dir.mkdir(exist_ok=True)
+loaded_model = whisper.load_model("base")
+current_size = "None"
+col1, col2 = st.columns([1, 3])
+with col1:
+    lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_HjK9Ol.json")
+    st_lottie(lottie, speed=1, height=250, width=250)
+with col2:
+    st.write("""
+    ## Auto Subtitled Video Generator
+    ##### Upload a video file and get a video with subtitles.
+    ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
+    ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
+    ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
+@st.cache(allow_output_mutation=True)
+def change_model(current_size, size):
+    if current_size != size:
+        loaded_model = whisper.load_model(size)
+        return loaded_model
+    else:
+        raise Exception("Model size is the same as the current size.")
+@st.cache(allow_output_mutation=True)
+def inferecence(loaded_model, uploaded_file, task):
+    with open(f"{save_dir}/input.mp4", "wb") as f:
+            f.write(uploaded_file.read())
+    audio = ffmpeg.input(f"{save_dir}/input.mp4")
+    audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
+    ffmpeg.run(audio, overwrite_output=True)
+    if task == "Transcribe":
+        options = dict(task="transcribe", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    elif task == "Translate":
+        options = dict(task="translate", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    else:
+        raise ValueError("Task not supported")
+def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
+    segmentStream = StringIO()
+    if format == 'vtt':
+        write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    elif format == 'srt':
+        write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    else:
+        raise Exception("Unknown format " + format)
+    segmentStream.seek(0)
+    return segmentStream.read()
+def generate_subtitled_video(video, audio, transcript):
+    video_file = ffmpeg.input(video)
+    audio_file = ffmpeg.input(audio)
+    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
+    video_with_subs = open("final.mp4", "rb")
+    return video_with_subs
+def main():
+    size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
+    loaded_model = change_model(current_size, size)
+    st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
+        f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    input_file = st.file_uploader("File", type=["mp4", "avi", "mov", "mkv"])
+    # get the name of the input_file
+    if input_file is not None:
+        filename = input_file.name[:-4]
+    else:
+        filename = None
+    task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
+    if task == "Transcribe":
+        if st.button("Transcribe"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.video(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+            with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav", "transcript.srt")
+                st.video(video_with_subs)
+                st.snow()
+            with col8:
+                st.download_button(label="Download Video with Subtitles",
+                                data=video_with_subs,
+                                file_name=f"{filename}_with_subs.mp4")
+    elif task == "Translate":
+        if st.button("Translate to English"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.video(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+            with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav", "transcript.srt")
+                st.video(video_with_subs)
+                st.snow()
+            with col8:
+                st.download_button(label="Download Video with Subtitles",
+                                data=video_with_subs,
+                                file_name=f"{filename}_with_subs.mp4")
+    else:
+        st.error("Please select a task.")
+if __name__ == "__main__":
+    main()
+    st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")

pages/03_🔊_Upload_Audio_File.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import whisper
+import streamlit as st
+from streamlit_lottie import st_lottie
+from utils import write_vtt, write_srt
+import ffmpeg
+import requests
+from typing import Iterator
+from io import StringIO
+import numpy as np
+import pathlib
+import os
+st.set_page_config(page_title="Auto Transcriber", page_icon="🔊", layout="wide")
+# Define a function that we can use to load lottie files from a link.
+@st.cache(allow_output_mutation=True)
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+APP_DIR = pathlib.Path(__file__).parent.absolute()
+LOCAL_DIR = APP_DIR / "local_audio"
+LOCAL_DIR.mkdir(exist_ok=True)
+save_dir = LOCAL_DIR / "output"
+save_dir.mkdir(exist_ok=True)
+col1, col2 = st.columns([1, 3])
+with col1:
+    lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_1xbk4d2v.json")
+    st_lottie(lottie, speed=1, height=250, width=250)
+with col2:
+    st.write("""
+    ## Auto Transcriber
+    ##### Input an audio file and get a transcript.
+    ###### ➠ If you want to transcribe the audio in its original language, select the task as "Transcribe"
+    ###### ➠ If you want to translate the transcription to English, select the task as "Translate"
+    ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
+loaded_model = whisper.load_model("base")
+current_size = "None"
+@st.cache(allow_output_mutation=True)
+def change_model(current_size, size):
+    if current_size != size:
+        loaded_model = whisper.load_model(size)
+        return loaded_model
+    else:
+        raise Exception("Model size is the same as the current size.")
+@st.cache(allow_output_mutation=True)
+def inferecence(loaded_model, uploaded_file, task):
+    with open(f"{save_dir}/input.mp3", "wb") as f:
+            f.write(uploaded_file.read())
+    audio = ffmpeg.input(f"{save_dir}/input.mp3")
+    audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
+    ffmpeg.run(audio, overwrite_output=True)
+    if task == "Transcribe":
+        options = dict(task="transcribe", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    elif task == "Translate":
+        options = dict(task="translate", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    else:
+        raise ValueError("Task not supported")
+def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
+    segmentStream = StringIO()
+    if format == 'vtt':
+        write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    elif format == 'srt':
+        write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    else:
+        raise Exception("Unknown format " + format)
+    segmentStream.seek(0)
+    return segmentStream.read()
+def main():
+    size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
+    loaded_model = change_model(current_size, size)
+    st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
+        f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    input_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
+    if input_file is not None:
+        filename = input_file.name[:-4]
+    else:
+        filename = None
+    task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
+    if task == "Transcribe":
+        if st.button("Transcribe"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7 = st.columns(3)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.audio(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+    elif task == "Translate":
+        if st.button("Translate to English"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7 = st.columns(3)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.audio(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+    else:
+        st.error("Please select a task.")
+if __name__ == "__main__":
+    main()
+    st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")