|
import gradio as gr |
|
import torch |
|
import torchvision |
|
from diffusers import I2VGenXLPipeline |
|
from diffusers.utils.loading_utils import load_image |
|
from PIL import Image |
|
|
|
def generate(image: Image.Image, prompt: str): |
|
negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms" |
|
generator = torch.manual_seed(8888) |
|
image = image.convert("RGB") |
|
pipeline = I2VGenXLPipeline.from_pretrained("ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16") |
|
pipeline.enable_model_cpu_offload() |
|
pipeline.unet.enable_forward_chunking() |
|
frames = pipeline( |
|
prompt=prompt, |
|
image=image, |
|
num_inference_steps=50, |
|
negative_prompt=negative_prompt, |
|
guidance_scale=9.0, |
|
generator=generator, |
|
decode_chunk_size=6, |
|
).frames[0] |
|
torchvision.io.write_video("video.mp4", frames, fps=16) |
|
return "video.mp4" |
|
|
|
app = gr.Interface( |
|
fn=generate, |
|
inputs=[gr.Image(type="pil"), "text"], |
|
outputs=gr.Video() |
|
) |
|
|
|
if __name__ == "__main__": |
|
app.launch() |
|
|
|
|