Spaces:

ziyangmai
/

MotionInversion

Running on Zero

App Files Files Community

MotionInversion / inference.py

ziyangmai

page demo

113884e 22 days ago

raw

history blame

4.3 kB

	import torch
	from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
	from train import export_to_video
	from models.unet.motion_embeddings import load_motion_embeddings
	from noise_init.blend_init import BlendInit
	from noise_init.blend_freq_init import BlendFreqInit
	from noise_init.fft_init import FFTInit
	from noise_init.freq_init import FreqInit
	from attn_ctrl import register_attention_control
	import numpy as np
	import os
	from omegaconf import OmegaConf

	def get_pipe(embedding_dir='baseline',config=None,noisy_latent=None, video_round=None):

	# load video generation model
	pipe = DiffusionPipeline.from_pretrained(config.model.pretrained_model_path,torch_dtype=torch.float16)

	# use videocrafterv2 unet
	if config.model.unet == 'videoCrafter2':
	from models.unet.unet_3d_condition import UNet3DConditionModel
	# unet = UNet3DConditionModel.from_pretrained("adamdad/videocrafterv2_diffusers",subfolder='unet',torch_dtype=torch.float16)
	unet = UNet3DConditionModel.from_pretrained("adamdad/videocrafterv2_diffusers",torch_dtype=torch.float16)
	pipe.unet = unet

	# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
	pipe.enable_model_cpu_offload()

	# memory optimization
	pipe.enable_vae_slicing()

	# if 'vanilla' not in embedding_dir:

	noisy_latent = torch.load(f'{embedding_dir}/cached_latents/cached_0.pt')['inversion_noise'][None,]
	if video_round is None:
	motion_embed = torch.load(f'{embedding_dir}/motion_embed.pt')
	else:
	motion_embed = torch.load(f'{embedding_dir}/{video_round}/motion_embed.pt')
	load_motion_embeddings(
	pipe.unet,
	motion_embed,
	)
	config.model['embedding_layers'] = list(motion_embed.keys())

	return pipe, config, noisy_latent

	def inference(embedding_dir='vanilla',
	video_round=None,
	prompt=None,
	save_dir=None,
	seed=None,
	motion_type=None,
	inference_steps=30
	):

	# check motion type is valid
	if motion_type != 'camera' and \
	motion_type != 'object' and \
	motion_type != 'hybrid':
	raise ValueError('Invalid motion type')

	if seed is None:
	seed = 0

	# load motion embedding
	noisy_latent = None

	config = OmegaConf.load(f'{embedding_dir}/config.yaml')


	# different motion type assigns different strategy
	if motion_type == 'camera':
	config['strategy']['removeMFromV'] = True

	elif motion_type == 'object' or motion_type == 'hybrid':
	config['strategy']['vSpatial_frameSubtraction'] = True


	pipe, config, noisy_latent = get_pipe(embedding_dir=embedding_dir,config=config,noisy_latent=noisy_latent,video_round=video_round)
	n_frames = config.val.num_frames

	shape = (config.val.height,config.val.width)
	os.makedirs(save_dir,exist_ok=True)


	cur_save_dir = f'{save_dir}/{"_".join(prompt.split())}.mp4'

	register_attention_control(pipe.unet,config=config)

	if noisy_latent is not None:
	torch.manual_seed(seed)
	noise = torch.randn_like(noisy_latent)
	init_noise = BlendInit(noisy_latent, noise, noise_prior=0.5)
	else:
	init_noise = None

	input_init_noise = init_noise.clone() if not init_noise is None else None
	video_frames = pipe(
	prompt=prompt,
	num_inference_steps=inference_steps,
	guidance_scale=12,
	height=shape[0],
	width=shape[1],
	num_frames=n_frames,
	generator=torch.Generator("cuda").manual_seed(seed),
	latents=input_init_noise,
	).frames[0]

	video_path = export_to_video(video_frames,output_video_path=cur_save_dir,fps=8)

	return video_path


	if __name__ =="__main__":

	prompts = ["A skateboard slides along a city lane",
	"A tank is running in the desert.",
	"A toy train chugs around a roundabout tree"]


	embedding_dir = './results'
	video_round = 'checkpoint-250'
	save_dir = f'outputs'

	inference(
	embedding_dir=embedding_dir,
	prompt=prompts,
	video_round=video_round,
	save_dir=save_dir,
	motion_type='hybrid',
	seed=100
	)