Podfusion / animate.py
3v324v23's picture
mostly merged
77f594c
raw
history blame
6.57 kB
import os
import sys
import cv2
import mediapy
import numpy as np
from frame_interpolation.eval import interpolator, util
from huggingface_hub import snapshot_download
from image_tools.sizes import resize_and_crop
from moviepy.editor import CompositeVideoClip
from moviepy.editor import VideoFileClip as vfc
from PIL import Image
from pathlib import Path
# get key positions at which frame needs to be generated
def list_of_positions(num_contours, num_frames=100):
positions = []
for i in range(0, num_frames):
positions.append(int(num_contours / num_frames * i))
return positions
def contourfinder(image1, image2, text=None, num_frames=100):
# Create two blank pages to write into
# I just hardcoded 1024*1024 as the size, ideally this should be np.shape(image1)
blank=np.zeros(np.shape(image1), dtype="uint8")
blank2=np.zeros(np.shape(image2), dtype="uint8")
# Threshold and contours for image 1 and 2
threshold = cv2.Canny(image=image1, threshold1=100, threshold2=200)
contours, hierarchies = cv2.findContours(
threshold, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
)
threshold2 = cv2.Canny(image=image2, threshold1=100, threshold2=200)
contours2, hierarchies2 = cv2.findContours(
threshold2, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
)
# Initialize three empty videos
vid1 = cv2.VideoWriter(
"vid1.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 24, threshold.shape
)
vid2 = cv2.VideoWriter(
"vid2.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 24, threshold.shape
)
text_vid = cv2.VideoWriter(
"text_vid.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 10, threshold.shape
)
# Get positions
positions = list_of_positions((len(contours)))
frames = []
# Loop over contours adding them to blank image then writing to video
for i in range(0, len(contours)):
cv2.drawContours(
blank, contours=contours, contourIdx=i, color=(125, 200, 255), thickness=1
)
if i in positions:
frames.append(blank)
# Complile to video
vid1.write(blank)
vid1.release()
full_dir_vid_1 = Path("vid1.mp4").resolve().as_posix()
clip1 = vfc(full_dir_vid_1)
positions = list_of_positions((len(contours2)))
for i in range(0, len(contours2)):
cv2.drawContours(
blank2, contours=contours2, contourIdx=i, color=(125, 200, 255), thickness=1
)
if i in positions:
frames.append(blank2)
vid2.write(blank2)
vid2.release()
full_dir_vid_2 = Path("vid2.mp4").resolve().as_posix()
clip3 = vfc(full_dir_vid_2)
# Next is the text vid
if text != None:
# Reading an image in default mode
image = np.zeros(original.shape, dtype="uint8")
# font
font = cv2.FONT_HERSHEY_COMPLEX
# org
org = (10, 400)
# fontScale
fontScale = 3
# Blue color in BGR
color = (186, 184, 108)
# Line thickness of 2 px
thickness = 4
def text_frames(text, image, org):
spacing = 55 # spacing between letters
blink = image
cv2.imwrite(f"blink.png", blink)
for i in range(0, len(text) - 1):
text_vid.write(blink)
# Using cv2.putText() method
image = cv2.putText(
image, text[i], org, font, fontScale, color, thickness, cv2.LINE_AA
)
# Take care of org spacing
org = (org[0] + spacing, org[1])
if text[i].isupper():
org = (org[0] + spacing + 1, org[1])
print(f"Upper {text[i]}")
print(org)
# Displaying the image
cv2.imwrite(f"text_im{i}.png", image)
# Complile to video
text_vid.write(image)
text_vid.release()
text_frames(text, image, org)
return clip1, clip3
def load_model(model_name):
model = interpolator.Interpolator(snapshot_download(repo_id=model_name), None)
return model
model_names = [
"akhaliq/frame-interpolation-film-style",
"NimaBoscarino/frame-interpolation_film_l1",
"NimaBoscarino/frame_interpolation_film_vgg",
]
models = {model_name: load_model(model_name) for model_name in model_names}
ffmpeg_path = util.get_ffmpeg_path()
mediapy.set_ffmpeg(ffmpeg_path)
def resize(width, img):
basewidth = width
img = Image.open(img)
wpercent = basewidth / float(img.size[0])
hsize = int((float(img.size[1]) * float(wpercent)))
img = img.resize((basewidth, hsize), Image.ANTIALIAS)
return img
def resize_img(img1, img2):
img_target_size = Image.open(img1)
img_to_resize = resize_and_crop(
img2,
(
img_target_size.size[0],
img_target_size.size[1],
), # set width and height to match cv2_images[0]
crop_origin="middle",
)
img_to_resize.save("resized_img2.png")
def get_video_frames(images, times_to_interpolate=6, model_name_index=0):
frame1 = images[0]
frame2 = images[1]
model = models[model_names[model_name_index]]
cv2_images = [cv2.imread(frame1), cv2.imread(frame2)]
frame1 = resize(256, frame1)
frame2 = resize(256, frame2)
frame1.save("test1.png")
frame2.save("test2.png")
resize_img("test1.png", "test2.png")
input_frames = ["test1.png", "resized_img2.png"]
frames = list(
util.interpolate_recursively_from_files(
input_frames, times_to_interpolate, model
)
)
return frames, cv2_images
def create_mp4_with_audio(frames, cv2_images, duration, audio, output_path):
temp_vid_path = "TEMP.mp4"
mediapy.write_video(temp_vid_path, frames, fps=5)
print(f"TYPES....{type(cv2_images[0])},{type(cv2_images[1])} SHAPES{cv2_images[0].shape} Img {cv2_images[0]}")
clip1, clip3 = contourfinder(cv2_images[0], cv2_images[1]) # has a third text option
# Use open CV and moviepy code
# So we move from open CV video 1 to out.mp4 to open CV video2
clip1 = clip1
clip2 = vfc(temp_vid_path).resize(8).set_start(clip1.duration - 0.5).crossfadein(2)
clip3 = clip3.set_start((clip1.duration - 0.5) + (clip2.duration)).crossfadein(2)
new_clip = CompositeVideoClip([clip1, clip2, clip3])
new_clip.audio = audio
new_clip.set_duration(duration)
new_clip.write_videofile(output_path, audio_codec="aac")
return output_path