import gradio as gr from text_to_video import model_t2v_fun,setup_seed from omegaconf import OmegaConf import torch import imageio import os import cv2 import pandas as pd import torchvision import random from huggingface_hub import snapshot_download config_path = "./base/configs/sample.yaml" args = OmegaConf.load("./base/configs/sample.yaml") device = "cuda" if torch.cuda.is_available() else "cpu" # ------- get model --------------- # model_t2V = model_t2v_fun(args) # model_t2V.to(device) # if device == "cuda": # model_t2V.enable_xformers_memory_efficient_attention() css = """ h1 { text-align: center; } #component-0 { max-width: 730px; margin: auto; } """ sd_path = args.pretrained_path + "/stable-diffusion-v1-4" unet = get_models(args, sd_path).to(device, dtype=torch.float16) state_dict = find_model("./pretrained_models/lavie_base.pt") unet.load_state_dict(state_dict) vae = AutoencoderKL.from_pretrained(sd_path, subfolder="vae", torch_dtype=torch.float16).to(device) tokenizer_one = CLIPTokenizer.from_pretrained(sd_path, subfolder="tokenizer") text_encoder_one = CLIPTextModel.from_pretrained(sd_path, subfolder="text_encoder", torch_dtype=torch.float16).to(device) # huge unet.eval() vae.eval() text_encoder_one.eval() def infer(prompt, seed_inp, ddim_steps,cfg, infer_type): if seed_inp!=-1: setup_seed(seed_inp) else: seed_inp = random.choice(range(10000000)) setup_seed(seed_inp) if infer_type == 'ddim': scheduler = DDIMScheduler.from_pretrained(sd_path, subfolder="scheduler", beta_start=args.beta_start, beta_end=args.beta_end, beta_schedule=args.beta_schedule) elif infer_type == 'eulerdiscrete': scheduler = EulerDiscreteScheduler.from_pretrained(sd_path, subfolder="scheduler", beta_start=args.beta_start, beta_end=args.beta_end, beta_schedule=args.beta_schedule) elif infer_type == 'ddpm': scheduler = DDPMScheduler.from_pretrained(sd_path, subfolder="scheduler", beta_start=args.beta_start, beta_end=args.beta_end, beta_schedule=args.beta_schedule) model = VideoGenPipeline(vae=vae, text_encoder=text_encoder_one, tokenizer=tokenizer_one, scheduler=scheduler, unet=unet) model.to(device) if device == "cuda": model.enable_xformers_memory_efficient_attention() videos = model(prompt, video_length=16, height = 320, width= 512, num_inference_steps=ddim_steps, guidance_scale=cfg).video print(videos[0].shape) if not os.path.exists(args.output_folder): os.mkdir(args.output_folder) torchvision.io.write_video(args.output_folder + prompt[0:30].replace(' ', '_') + '-'+str(seed_inp)+'-'+str(ddim_steps)+'-'+str(cfg)+ '-.mp4', videos[0], fps=8) return args.output_folder + prompt[0:30].replace(' ', '_') + '-'+str(seed_inp)+'-'+str(ddim_steps)+'-'+str(cfg)+ '-.mp4' # def clean(): # return gr.Video.update(value=None) title = """
Apply Intern·Vchitect to generate a video