Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import sys | |
sys.path.append("./") | |
import torch | |
from torchvision import transforms | |
from src.transformer import Transformer2DModel | |
from src.pipeline import Pipeline | |
from src.scheduler import Scheduler | |
from transformers import ( | |
CLIPTextModelWithProjection, | |
CLIPTokenizer, | |
) | |
from diffusers import VQModel | |
import gradio as gr | |
import spaces | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
dtype = torch.bfloat16 | |
model_path = "Collov-Labs/Monetico" | |
model = Transformer2DModel.from_pretrained(model_path, subfolder="transformer", torch_dtype=dtype) | |
vq_model = VQModel.from_pretrained(model_path, subfolder="vqvae", torch_dtype=dtype) | |
# text_encoder = CLIPTextModelWithProjection.from_pretrained(model_path, subfolder="text_encoder") | |
text_encoder = CLIPTextModelWithProjection.from_pretrained( #more stable sampling for some cases | |
"laion/CLIP-ViT-H-14-laion2B-s32B-b79K", torch_dtype=dtype | |
) | |
tokenizer = CLIPTokenizer.from_pretrained(model_path, subfolder="tokenizer", torch_dtype=dtype) | |
scheduler = Scheduler.from_pretrained(model_path, subfolder="scheduler", torch_dtype=dtype) | |
pipe = Pipeline(vq_model, tokenizer=tokenizer, text_encoder=text_encoder, transformer=model, scheduler=scheduler) | |
pipe.to(device) | |
MAX_SEED = 2**32 - 1 | |
MAX_IMAGE_SIZE = 512 | |
def generate_image(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)): | |
if randomize_seed or seed == 0: | |
seed = torch.randint(0, MAX_SEED, (1,)).item() | |
torch.manual_seed(seed) | |
image = pipe( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
height=height, | |
width=width, | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps | |
).images[0] | |
return image, seed | |
# Default negative prompt | |
default_negative_prompt = "worst quality, low quality, low res, blurry, distortion, watermark, logo, signature, text, jpeg artifacts, signature, sketch, duplicate, ugly, identifying mark" | |
css = """ | |
#col-container { | |
margin: 0 auto; | |
max-width: 640px; | |
} | |
""" | |
examples = [ | |
"Modern Architecture render with pleasing aesthetics.", | |
"An image of a Pikachu wearing a birthday hat and playing guitar.", | |
"A statue of a lion stands in front of a building.", | |
"A white and blue coffee mug with a picture of a man on it.", | |
"A metal sculpture of a deer with antlers.", | |
"A bronze statue of an owl with its wings spread.", | |
"A white table with a vase of flowers and a cup of coffee on top of it.", | |
"A woman stands on a dock in the fog.", | |
"A lion's head is shown in a grayscale image.", | |
"A sculpture of a Greek woman head with a headband and a head of hair." | |
] | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("# Monetico Text-to-Image Generator") | |
with gr.Row(): | |
prompt = gr.Text( | |
label="Prompt", | |
show_label=False, | |
max_lines=1, | |
placeholder="Enter your prompt", | |
container=False, | |
) | |
run_button = gr.Button("Run", scale=0, variant="primary") | |
result = gr.Image(label="Result", show_label=False) | |
with gr.Accordion("Advanced Settings", open=False): | |
negative_prompt = gr.Text( | |
label="Negative prompt", | |
max_lines=1, | |
placeholder="Enter a negative prompt", | |
value=default_negative_prompt, | |
) | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=0, | |
) | |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
with gr.Row(): | |
width = gr.Slider( | |
label="Width", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=512, | |
) | |
height = gr.Slider( | |
label="Height", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=512, | |
) | |
with gr.Row(): | |
guidance_scale = gr.Slider( | |
label="Guidance scale", | |
minimum=0.0, | |
maximum=20.0, | |
step=0.1, | |
value=9.0, | |
) | |
num_inference_steps = gr.Slider( | |
label="Number of inference steps", | |
minimum=1, | |
maximum=100, | |
step=1, | |
value=48, | |
) | |
gr.Examples(examples=examples, inputs=[prompt]) | |
gr.on( | |
triggers=[run_button.click, prompt.submit], | |
fn=generate_image, | |
inputs=[ | |
prompt, | |
negative_prompt, | |
seed, | |
randomize_seed, | |
width, | |
height, | |
guidance_scale, | |
num_inference_steps, | |
], | |
outputs=[result, seed], | |
) | |
demo.launch() |