import spaces import gradio as gr import torch from diffusers import DiffusionPipeline, UNet2DConditionModel, LCMScheduler from huggingface_hub import hf_hub_download from PIL import Image import requests from translatepy import Translator translator = Translator() # Constants base = "stabilityai/stable-diffusion-xl-base-1.0" repo = "tianweiy/DMD2" checkpoints = { "1-Step" : ["dmd2_sdxl_1step_unet_fp16.bin", 1], "4-Step" : ["dmd2_sdxl_4step_unet_fp16.bin", 4], } loaded = None CSS = """ .gradio-container { max-width: 690px !important; } footer { visibility: hidden; } """ JS = """function () { gradioURL = window.location.href if (!gradioURL.endsWith('?__theme=dark')) { window.location.replace(gradioURL + '?__theme=dark'); } }""" # Ensure model and scheduler are initialized in GPU-enabled function if torch.cuda.is_available(): unet = UNet2DConditionModel.from_config(base, subfolder="unet").to("cuda", torch.float16) pipe = DiffusionPipeline.from_pretrained(base, torch_dtype=torch.float16, variant="fp16").to("cuda") # Function @spaces.GPU() def generate_image(prompt, ckpt="4-Step"): global loaded prompt = str(translator.translate(prompt, 'English')) print(prompt) checkpoint = checkpoints[ckpt][0] num_inference_steps = checkpoints[ckpt][1] if loaded != num_inference_steps: pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing") pipe.unet.load_state_dict(torch.load(hf_hub_download(repo, checkpoint), map_location="cuda")) loaded = num_inference_steps if loaded == 1: timesteps=[399] else: timesteps=[999, 749, 499, 249] results = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=0, timesteps=timesteps) return results.images[0] examples = [ "a cat eating a piece of cheese", "a ROBOT riding a BLUE horse on Mars, photorealistic", "Ironman VS Hulk, ultrarealistic", "a CUTE robot artist painting on an easel", "Astronaut in a jungle, cold color palette, oil pastel, detailed, 8k", "An alien holding sign board contain word 'Flash', futuristic, neonpunk", "Kids going to school, Anime style" ] # Gradio Interface with gr.Blocks(css=CSS, js=JS, theme="soft") as demo: gr.HTML("
DMD2🦖

") gr.HTML("
DMD2 text-to-image generation
Multi-Languages, 4-step is higher quality & 2X slower
") with gr.Group(): with gr.Row(): prompt = gr.Textbox(label='Enter Your Prompt', scale=8) ckpt = gr.Dropdown(label='Steps',choices=['1-Step', '4-Step'], value='4-Step', interactive=True) submit = gr.Button(scale=1, variant='primary') img = gr.Image(label='DMD2 Generated Image') gr.Examples( examples=examples, inputs=prompt, outputs=img, fn=generate_image, cache_examples="lazy", ) prompt.submit(fn=generate_image, inputs=[prompt, ckpt], outputs=img, ) submit.click(fn=generate_image, inputs=[prompt, ckpt], outputs=img, ) demo.queue().launch()