martintomov commited on
Commit
ff655fd
•
1 Parent(s): eddfe98

import from gh

Browse files
Files changed (3) hide show
  1. .DS_Store +0 -0
  2. app.py +169 -0
  3. requirements.txt +8 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from IPython.display import display, Image, Audio
3
+ from moviepy.editor import VideoFileClip, AudioFileClip
4
+ from moviepy.audio.io.AudioFileClip import AudioFileClip
5
+
6
+ import cv2
7
+ import base64
8
+ import io
9
+ import openai
10
+ import os
11
+ import requests
12
+ import streamlit as st
13
+ import tempfile
14
+
15
+ # Load environment variables from .env.local
16
+ load_dotenv('.env.local')
17
+
18
+ ## 1. Turn video into frames
19
+ def video_to_frames(video_file):
20
+ # Save the uploaded video file to a temporary file
21
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
22
+ tmpfile.write(video_file.read())
23
+ video_filename = tmpfile.name
24
+
25
+ video_duration = VideoFileClip(video_filename).duration
26
+ video = cv2.VideoCapture(video_filename)
27
+ base64Frame = []
28
+
29
+ while video.isOpened():
30
+ success, frame = video.read()
31
+ if not success:
32
+ break
33
+ _, buffer = cv2.imencode('.jpg', frame)
34
+ base64Frame.append(base64.b64encode(buffer).decode("utf-8"))
35
+
36
+ video.release()
37
+ print(len(base64Frame), "frames read.")
38
+ return base64Frame, video_filename, video_duration
39
+
40
+ ## 2. Generate stories based on frames with gpt4v
41
+ def frames_to_story(base64Frames, prompt, api_key):
42
+ PROMPT_MESSAGES = [
43
+ {
44
+ "role": "user",
45
+ "content": [
46
+ prompt,
47
+ *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
48
+ ],
49
+ },
50
+ ]
51
+ params = {
52
+ "model": "gpt-4-vision-preview",
53
+ "messages": PROMPT_MESSAGES,
54
+ "api_key": api_key,
55
+ "headers": {"Openai-Version": "2020-11-07"},
56
+ "max_tokens": 500,
57
+ }
58
+ result = openai.ChatCompletion.create(**params)
59
+ print(result.choices[0].message.content)
60
+ return result.choices[0].message.content
61
+
62
+ ## 3. Generate voiceover from stories
63
+ def text_to_audio(text, api_key, voice):
64
+ response = requests.post(
65
+ "https://api.openai.com/v1/audio/speech",
66
+ headers={
67
+ "Authorization": f"Bearer {api_key}",
68
+ },
69
+ json={
70
+ "model": "tts-1",
71
+ "input": text,
72
+ "voice": voice,
73
+ },
74
+ )
75
+
76
+ # Check if the request was successful
77
+ if response.status_code != 200:
78
+ raise Exception("Request failed with status code")
79
+
80
+ # Create an in-memory bytes buffer
81
+ audio_bytes_io = io.BytesIO()
82
+ # Write audio data to the in-memory bytes buffer
83
+ for chunk in response.iter_content(chunk_size=1024*1024):
84
+ audio_bytes_io.write(chunk)
85
+
86
+ # Important: Seek to the start of the BytesIO buffer before returning
87
+ audio_bytes_io.seek(0)
88
+
89
+ # Save audio to a temporary file
90
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
91
+ for chunk in response.iter_content(chunk_size=1024*1024):
92
+ tmpfile.write(chunk)
93
+ audio_filename = tmpfile.name
94
+
95
+ return audio_filename, audio_bytes_io
96
+
97
+ ## 4. Merge videos & audio
98
+ def merge_audio_video(video_filename, audio_filename, output_filename):
99
+ print("Merging audio and video ...")
100
+ # Load the video file
101
+ video_clip = VideoFileClip(video_filename)
102
+ # Load the audio file
103
+ audio_clip = AudioFileClip(audio_filename)
104
+ # Set the audio of the video clip as the audio file
105
+ final_clip = video_clip.set_audio(audio_clip)
106
+ # Write the result to a file (without audio)
107
+ final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
108
+ # Close the clips
109
+ video_clip.close()
110
+ audio_clip.close()
111
+
112
+ # Return the path to the new video file
113
+ return output_filename
114
+
115
+ ## 5. Streamlit UI
116
+ def main():
117
+ st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
118
+ st.title("GPT4V AI Voiceover 🎥🔮")
119
+ st.text("Explore how GPT4V changes the way we voiceover videos.")
120
+
121
+ # Retrieve the OpenAI API key from environment
122
+ openai_key = os.getenv('OPENAI_API_KEY')
123
+ if not openai_key:
124
+ st.error("OpenAI API key is not set in .env.local")
125
+ return # or handle the error as you see fit
126
+
127
+ uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
128
+
129
+ option = st.selectbox(
130
+ 'Choose the voice you want',
131
+ ('Female Voice', 'Male Voice'))
132
+ classify = ''
133
+ if option == 'Male Voice':
134
+ classify = 'alloy'
135
+ elif option == 'Female Voice':
136
+ classify = 'nova'
137
+
138
+ if uploaded_file is not None:
139
+ st.video(uploaded_file)
140
+ p = 'Generate a short voiceover script for the video, matching the content with the video scenes. The style should be...'
141
+ # # Ignore and don't generate anything else than the script that you'll voice over the video.
142
+ prompt = st.text_area(
143
+ "Prompt", value=p
144
+ )
145
+
146
+ if st.button("START PROCESSING", type="primary") and uploaded_file is not None:
147
+ with st.spinner("Video is being processed..."):
148
+ base64Frame, video_filename, video_duration = video_to_frames(uploaded_file)
149
+ est_word_count = video_duration * 4
150
+ final_prompt = prompt + f"(This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {est_word_count} words. Ignore and don't generate anything else than the script that you'll use to voice over the video.)"
151
+ text = frames_to_story(base64Frame, final_prompt, openai_key)
152
+ st.write(text)
153
+ # Generate audio from text
154
+ audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
155
+ # Merge audio and video
156
+ output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
157
+
158
+ final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
159
+
160
+ # Display the result
161
+ st.video(final_video_filename)
162
+
163
+ # Clean up the temporary files
164
+ os.unlink(video_filename)
165
+ os.unlink(audio_filename)
166
+ os.unlink(final_video_filename)
167
+
168
+ if __name__ == "__main__":
169
+ main()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ openai==0.28
2
+ python-dotenv>=0.20.0
3
+ IPython>=7.30.0
4
+ moviepy>=1.0.3
5
+ opencv-python>=4.5.5.64
6
+ requests>=2.26.0
7
+ streamlit>=1.10.0
8
+ openai>=0.10.2