import subprocess subprocess.run( 'pip install numpy==1.26.4', shell=True ) import os import gradio as gr import torch import spaces import random from PIL import Image import numpy as np from glob import glob from pathlib import Path from typing import Optional #Core functions from https://github.com/modelscope/DiffSynth-Studio from diffsynth import save_video, ModelManager, SVDVideoPipeline from diffsynth import SDVideoPipeline, ControlNetConfigUnit, VideoData, save_frames from diffsynth.extensions.RIFE import RIFESmoother import cv2 # Constants MAX_SEED = np.iinfo(np.int32).max CSS = """ footer { visibility: hidden; } """ JS = """function () { gradioURL = window.location.href if (!gradioURL.endsWith('?__theme=dark')) { window.location.replace(gradioURL + '?__theme=dark'); } }""" # Ensure model and scheduler are initialized in GPU-enabled function if torch.cuda.is_available(): model_manager2 = ModelManager(torch_dtype=torch.float16, device="cuda") model_manager2.load_textual_inversions("models/textual_inversion") model_manager2.load_models([ "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors", "models/AnimateDiff/mm_sd_v15_v2.ckpt", "models/ControlNet/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth", "models/RIFE/flownet.pkl" ]) pipe2 = SDVideoPipeline.from_model_manager( model_manager2, [ ControlNetConfigUnit( processor_id="lineart", model_path="models/ControlNet/control_v11p_sd15_lineart.pth", scale=0.5 ), ControlNetConfigUnit( processor_id="tile", model_path="models/ControlNet/control_v11f1e_sd15_tile.pth", scale=0.5 ) ] ) smoother = RIFESmoother.from_model_manager(model_manager2) def update_frames(video_in): up_video = VideoData( video_file=video_in) frame_len = len(up_video) video_path = video_in cap = cv2.VideoCapture(video_path) fps_in = cap.get(cv2.CAP_PROP_FPS) width_in = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height_in = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cap.release() return gr.update(maximum=frame_len), gr.update(value=fps_in), gr.update(value=width_in), gr.update(value=height_in) @spaces.GPU(duration=180) def generate( video_in, image_in, prompt: str = "best quality", seed: int = -1, num_inference_steps: int = 10, num_frames: int = 30, height: int = 512, width: int = 512, animatediff_batch_size: int = 32, animatediff_stride: int = 16, fps_id: int = 25, output_folder: str = "outputs", progress=gr.Progress(track_tqdm=True)): video = "" if seed == -1: seed = random.randint(0, MAX_SEED) torch.manual_seed(seed) os.makedirs(output_folder, exist_ok=True) base_count = len(glob(os.path.join(output_folder, "*.mp4"))) video_path = os.path.join(output_folder, f"{base_count:06d}.mp4") up_video = VideoData( video_file=video_in, height=height, width=width) input_video = [up_video[i] for i in range(1, num_frames)] video = pipe2( prompt=prompt, negative_prompt="verybadimagenegative_v1.3", cfg_scale=3, clip_skip=2, controlnet_frames=input_video, num_frames=len(input_video), num_inference_steps=num_inference_steps, height=height, width=width, animatediff_batch_size=animatediff_batch_size, animatediff_stride=animatediff_stride, unet_batch_size=8, controlnet_batch_size=8, vram_limit_level=0, ) video = smoother(video) save_video(video, video_path, fps=fps_id) return video_path, seed examples = [ ['./dancing.mp4', None, "best quality, perfect anime illustration, light, a girl is dancing, smile, solo"], ] # Gradio Interface with gr.Blocks(css=CSS, js=JS, theme="soft") as demo: gr.HTML("

Exvideo📽️Diffutoon

") gr.HTML("""

Exvideo and Diffutoon video generation
Update: Output resize, Frames length control.
Note: ZeroGPU limited, Set the parameters appropriately.

""") with gr.Row(): video_in = gr.Video(label='Upload Video', height=600, scale=2) image_in = gr.Image(label='Upload Image', height=600, scale=2, image_mode="RGB", type="filepath", visible=False) video = gr.Video(label="Generated Video", height=600, scale=2) with gr.Column(scale=1): seed = gr.Slider( label="Seed (-1 Random)", minimum=-1, maximum=MAX_SEED, step=1, value=-1, ) num_inference_steps = gr.Slider( label="Inference steps", info="Inference steps", step=1, value=10, minimum=1, maximum=50, ) num_frames = gr.Slider( label="Num frames", info="Output Frames", step=1, value=30, minimum=1, maximum=128, ) with gr.Row(): height = gr.Slider( label="Height", step=8, value=512, minimum=256, maximum=2560, ) width = gr.Slider( label="Width", step=8, value=512, minimum=256, maximum=2560, ) with gr.Accordion("Diffutoon Options", open=False): animatediff_batch_size = gr.Slider( label="Animatediff batch size", minimum=1, maximum=50, step=1, value=32, ) animatediff_stride = gr.Slider( label="Animatediff stride", minimum=1, maximum=50, step=1, value=16, ) fps_id = gr.Slider( label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, step=1, minimum=5, maximum=30, ) prompt = gr.Textbox(label="Prompt", value="best quality, perfect anime illustration, light, a girl is dancing, smile, solo") with gr.Row(): submit_btn = gr.Button(value="Generate") #stop_btn = gr.Button(value="Stop", variant="stop") clear_btn = gr.ClearButton([video_in, image_in, seed, video]) gr.Examples( examples=examples, fn=generate, inputs=[video_in, image_in, prompt], outputs=[video, seed], cache_examples="lazy", examples_per_page=4, ) video_in.upload(update_frames, inputs=[video_in], outputs=[num_frames, fps_id, width, height]) submit_event = submit_btn.click(fn=generate, inputs=[video_in, image_in, prompt, seed, num_inference_steps, num_frames, height, width, animatediff_batch_size, animatediff_stride, fps_id], outputs=[video, seed], api_name="video") #stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[submit_event]) demo.queue().launch()