Spaces:
Runtime error
Runtime error
File size: 4,995 Bytes
ff655fd 663e3d1 ff655fd 663e3d1 ff655fd 663e3d1 ff655fd 663e3d1 ff655fd 663e3d1 ff655fd 663e3d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
from IPython.display import display, Image, Audio
from moviepy.editor import VideoFileClip, AudioFileClip
from moviepy.audio.io.AudioFileClip import AudioFileClip
import cv2
import base64
import io
import openai
import os
import requests
import streamlit as st
import tempfile
## 1. Turn video into frames
def video_to_frames(video_file):
# Save the uploaded video file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
tmpfile.write(video_file.read())
video_filename = tmpfile.name
video_duration = VideoFileClip(video_filename).duration
video = cv2.VideoCapture(video_filename)
base64Frame = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode('.jpg', frame)
base64Frame.append(base64.b64encode(buffer).decode("utf-8"))
video.release()
print(len(base64Frame), "frames read.")
return base64Frame, video_filename, video_duration
## 2. Generate stories based on frames with gpt4v
def frames_to_story(base64Frames, prompt, api_key):
PROMPT_MESSAGES = [
{
"role": "user",
"content": [
prompt,
*map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
],
},
]
params = {
"model": "gpt-4-vision-preview",
"messages": PROMPT_MESSAGES,
"api_key": api_key,
"headers": {"Openai-Version": "2020-11-07"},
"max_tokens": 500,
}
result = openai.ChatCompletion.create(**params)
print(result.choices[0].message.content)
return result.choices[0].message.content
## 3. Generate voiceover from stories
def text_to_audio(text, api_key, voice):
response = requests.post(
"https://api.openai.com/v1/audio/speech",
headers={
"Authorization": f"Bearer {api_key}",
},
json={
"model": "tts-1",
"input": text,
"voice": voice,
},
)
if response.status_code != 200:
raise Exception("Request failed with status code")
audio_bytes_io = io.BytesIO()
for chunk in response.iter_content(chunk_size=1024*1024):
audio_bytes_io.write(chunk)
audio_bytes_io.seek(0)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
for chunk in response.iter_content(chunk_size=1024*1024):
tmpfile.write(chunk)
audio_filename = tmpfile.name
return audio_filename, audio_bytes_io
## 4. Merge videos & audio
def merge_audio_video(video_filename, audio_filename, output_filename):
print("Merging audio and video ...")
video_clip = VideoFileClip(video_filename)
audio_clip = AudioFileClip(audio_filename)
final_clip = video_clip.set_audio(audio_clip)
final_clip.write_videofile(output_filename, codec='libx264', audio_codec="aac")
video_clip.close()
audio_clip.close()
return output_filename
## 5. Streamlit UI
def main():
st.set_page_config(page_title="AI Voiceover", page_icon="🔮")
st.title("GPT4V AI Voiceover 🎥🔮")
openai_key = st.text_input("Enter your OpenAI API key")
if not openai_key:
st.error("Please enter your OpenAI API key.")
return
uploaded_file = st.file_uploader("Select a video file", type=["mp4", "avi"])
option = st.selectbox(
'Choose the voice you want',
('Female Voice', 'Male Voice'))
classify = 'alloy' if option == 'Male Voice' else 'nova'
if uploaded_file is not None:
st.video(uploaded_file)
p = 'Generate a short voiceover script for the video, matching the content with the video scenes. The style should be...'
prompt = st.text_area("Prompt", value=p)
if st.button("START PROCESSING", type="primary") and uploaded_file is not None:
with st.spinner("Video is being processed..."):
base64Frame, video_filename, video_duration = video_to_frames(uploaded_file)
est_word_count = video_duration * 4
final_prompt = f"{prompt}(This video is ONLY {video_duration} seconds long. So make sure the voiceover MUST be able to be explained in less than {est_word_count} words. Ignore and don't generate anything else than the script that you'll use to voice over the video.)"
text = frames_to_story(base64Frame, final_prompt, openai_key)
st.write(text)
audio_filename, audio_bytes_io = text_to_audio(text, openai_key, classify)
output_video_filename = os.path.splitext(video_filename)[0] + "_output.mp4"
final_video_filename = merge_audio_video(video_filename, audio_filename, output_video_filename)
st.video(final_video_filename)
os.unlink(video_filename)
os.unlink(audio_filename)
os.unlink(final_video_filename)
if __name__ == "__main__":
main() |