radames commited on
Commit
9f33108
1 Parent(s): cf83834
app.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from gradio_imageslider import ImageSlider
4
+ import torch
5
+
6
+ torch.jit.script = lambda f: f
7
+ from diffusers import (
8
+ ControlNetModel,
9
+ StableDiffusionXLControlNetImg2ImgPipeline,
10
+ DDIMScheduler,
11
+ )
12
+ from controlnet_aux import AnylineDetector
13
+ from compel import Compel, ReturnedEmbeddingsType
14
+ from PIL import Image
15
+ import os
16
+ import time
17
+ import numpy as np
18
+
19
+ IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
20
+ IS_SPACE = os.environ.get("SPACE_ID", None) is not None
21
+
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ dtype = torch.float16
24
+
25
+ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
26
+
27
+ print(f"device: {device}")
28
+ print(f"dtype: {dtype}")
29
+ print(f"low memory: {LOW_MEMORY}")
30
+
31
+
32
+ model = "stabilityai/stable-diffusion-xl-base-1.0"
33
+ # model = "stabilityai/sdxl-turbo"
34
+ # vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype)
35
+ scheduler = DDIMScheduler.from_pretrained(model, subfolder="scheduler")
36
+ # controlnet = ControlNetModel.from_pretrained(
37
+ # "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
38
+ # )
39
+ controlnet = ControlNetModel.from_pretrained(
40
+ "TheMistoAI/MistoLine",
41
+ torch_dtype=torch.float16,
42
+ revision="refs/pr/3",
43
+ variant="fp16",
44
+ )
45
+ pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
46
+ model,
47
+ controlnet=controlnet,
48
+ torch_dtype=dtype,
49
+ variant="fp16",
50
+ use_safetensors=True,
51
+ scheduler=scheduler,
52
+ )
53
+
54
+ compel = Compel(
55
+ tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
56
+ text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
57
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
58
+ requires_pooled=[False, True],
59
+ )
60
+ pipe = pipe.to(device)
61
+
62
+ anyline = AnylineDetector.from_pretrained(
63
+ "TheMistoAI/MistoLine", filename="MTEED.pth", subfolder="Anyline"
64
+ ).to(device)
65
+
66
+
67
+ def pad_image(image):
68
+ w, h = image.size
69
+ if w == h:
70
+ return image
71
+ elif w > h:
72
+ new_image = Image.new(image.mode, (w, w), (0, 0, 0))
73
+ pad_w = 0
74
+ pad_h = (w - h) // 2
75
+ new_image.paste(image, (0, pad_h))
76
+ return new_image
77
+ else:
78
+ new_image = Image.new(image.mode, (h, h), (0, 0, 0))
79
+ pad_w = (h - w) // 2
80
+ pad_h = 0
81
+ new_image.paste(image, (pad_w, 0))
82
+ return new_image
83
+
84
+
85
+ @spaces.GPU
86
+ def predict(
87
+ input_image,
88
+ prompt,
89
+ negative_prompt,
90
+ seed,
91
+ guidance_scale=8.5,
92
+ controlnet_conditioning_scale=0.5,
93
+ strength=1.0,
94
+ controlnet_start=0.0,
95
+ controlnet_end=1.0,
96
+ guassian_sigma=2.0,
97
+ intensity_threshold=3,
98
+ progress=gr.Progress(track_tqdm=True),
99
+ ):
100
+ if input_image is None:
101
+ raise gr.Error("Please upload an image.")
102
+ padded_image = pad_image(input_image).resize((1024, 1024)).convert("RGB")
103
+ conditioning, pooled = compel([prompt, negative_prompt])
104
+ generator = torch.manual_seed(seed)
105
+ last_time = time.time()
106
+ anyline_image = anyline(
107
+ padded_image,
108
+ detect_resolution=1280,
109
+ guassian_sigma=max(0.01, guassian_sigma),
110
+ intensity_threshold=intensity_threshold,
111
+ )
112
+
113
+ images = pipe(
114
+ image=padded_image,
115
+ control_image=anyline_image,
116
+ strength=strength,
117
+ prompt_embeds=conditioning[0:1],
118
+ pooled_prompt_embeds=pooled[0:1],
119
+ negative_prompt_embeds=conditioning[1:2],
120
+ negative_pooled_prompt_embeds=pooled[1:2],
121
+ width=1024,
122
+ height=1024,
123
+ controlnet_conditioning_scale=float(controlnet_conditioning_scale),
124
+ controlnet_start=float(controlnet_start),
125
+ controlnet_end=float(controlnet_end),
126
+ generator=generator,
127
+ num_inference_steps=30,
128
+ guidance_scale=guidance_scale,
129
+ eta=1.0,
130
+ )
131
+ print(f"Time taken: {time.time() - last_time}")
132
+ return (padded_image, images.images[0]), padded_image, anyline_image
133
+
134
+
135
+ css = """
136
+ #intro{
137
+ # max-width: 32rem;
138
+ # text-align: center;
139
+ # margin: 0 auto;
140
+ }
141
+ """
142
+
143
+ with gr.Blocks(css=css) as demo:
144
+ gr.Markdown(
145
+ """
146
+ # MistoLine ControlNet demo
147
+
148
+ You can upload an initial image and prompt to generate an enhanced version.
149
+ SDXL Controlnet [TheMistoAI/MistoLine](https://huggingface.co/TheMistoAI/MistoLine)
150
+ [Anyline with Controlnet Aux ](https://github.com/huggingface/controlnet_aux)
151
+ For upscaling see [Enhance This Demo](https://huggingface.co/spaces/radames/Enhance-This-HiDiffusion-SDXL)
152
+ """,
153
+ elem_id="intro",
154
+ )
155
+ with gr.Row():
156
+ with gr.Column(scale=1):
157
+ image_input = gr.Image(type="pil", label="Input Image")
158
+ prompt = gr.Textbox(
159
+ label="Prompt",
160
+ info="The prompt is very important to get the desired results. Please try to describe the image as best as you can. Accepts Compel Syntax",
161
+ )
162
+ negative_prompt = gr.Textbox(
163
+ label="Negative Prompt",
164
+ value="blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
165
+ )
166
+ seed = gr.Slider(
167
+ minimum=0,
168
+ maximum=2**64 - 1,
169
+ value=1415926535897932,
170
+ step=1,
171
+ label="Seed",
172
+ randomize=True,
173
+ )
174
+ with gr.Accordion(label="Advanced", open=False):
175
+ guidance_scale = gr.Slider(
176
+ minimum=0,
177
+ maximum=50,
178
+ value=8.5,
179
+ step=0.001,
180
+ label="Guidance Scale",
181
+ )
182
+ controlnet_conditioning_scale = gr.Slider(
183
+ minimum=0,
184
+ maximum=1,
185
+ step=0.001,
186
+ value=0.5,
187
+ label="ControlNet Conditioning Scale",
188
+ )
189
+ strength = gr.Slider(
190
+ minimum=0,
191
+ maximum=1,
192
+ step=0.001,
193
+ value=1,
194
+ label="Strength",
195
+ )
196
+ controlnet_start = gr.Slider(
197
+ minimum=0,
198
+ maximum=1,
199
+ step=0.001,
200
+ value=0.0,
201
+ label="ControlNet Start",
202
+ )
203
+ controlnet_end = gr.Slider(
204
+ minimum=0.0,
205
+ maximum=1.0,
206
+ step=0.001,
207
+ value=1.0,
208
+ label="ControlNet End",
209
+ )
210
+ guassian_sigma = gr.Slider(
211
+ minimum=0.01,
212
+ maximum=10.0,
213
+ step=0.1,
214
+ value=2.0,
215
+ label="(Anyline) Guassian Sigma",
216
+ )
217
+ intensity_threshold = gr.Slider(
218
+ minimum=0,
219
+ maximum=255,
220
+ step=1,
221
+ value=3,
222
+ label="(Anyline) Intensity Threshold",
223
+ )
224
+
225
+ btn = gr.Button()
226
+ with gr.Column(scale=2):
227
+ with gr.Group():
228
+ image_slider = ImageSlider(position=0.5)
229
+ with gr.Row():
230
+ padded_image = gr.Image(type="pil", label="Padded Image")
231
+ anyline_image = gr.Image(type="pil", label="Anyline Image")
232
+ inputs = [
233
+ image_input,
234
+ prompt,
235
+ negative_prompt,
236
+ seed,
237
+ guidance_scale,
238
+ controlnet_conditioning_scale,
239
+ strength,
240
+ controlnet_start,
241
+ controlnet_end,
242
+ guassian_sigma,
243
+ intensity_threshold,
244
+ ]
245
+ outputs = [image_slider, padded_image, anyline_image]
246
+ btn.click(lambda x: None, inputs=None, outputs=image_slider).then(
247
+ fn=predict, inputs=inputs, outputs=outputs
248
+ )
249
+ gr.Examples(
250
+ fn=predict,
251
+ inputs=inputs,
252
+ outputs=outputs,
253
+ examples=[
254
+ [
255
+ "./examples/lara.jpeg",
256
+ "photography of lara croft 8k high definition award winning",
257
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
258
+ 5436236241,
259
+ 8.5,
260
+ 0.8,
261
+ 1.0,
262
+ 0.0,
263
+ 0.9,
264
+ 2,
265
+ 3,
266
+ ],
267
+ [
268
+ "./examples/cybetruck.jpeg",
269
+ "photo of tesla cybertruck futuristic car 8k high definition on a sand dune in mars, future",
270
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
271
+ 383472451451,
272
+ 8.5,
273
+ 0.8,
274
+ 0.8,
275
+ 0.0,
276
+ 0.9,
277
+ 2,
278
+ 3,
279
+ ],
280
+ [
281
+ "./examples/jesus.png",
282
+ "a photorealistic painting of Jesus Christ, 4k high definition",
283
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
284
+ 13317204146129588000,
285
+ 8.5,
286
+ 0.8,
287
+ 0.8,
288
+ 0.0,
289
+ 0.9,
290
+ 2,
291
+ 3,
292
+ ],
293
+ [
294
+ "./examples/anna-sullivan-DioLM8ViiO8-unsplash.jpg",
295
+ "A crowded stadium with enthusiastic fans watching a daytime sporting event, the stands filled with colorful attire and the sun casting a warm glow",
296
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
297
+ 5623124123512,
298
+ 8.5,
299
+ 0.8,
300
+ 0.8,
301
+ 0.0,
302
+ 0.9,
303
+ 2,
304
+ 3,
305
+ ],
306
+ [
307
+ "./examples/img_aef651cb-2919-499d-aa49-6d4e2e21a56e_1024.jpg",
308
+ "a large red flower on a black background 4k high definition",
309
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic",
310
+ 23123412341234,
311
+ 8.5,
312
+ 0.8,
313
+ 0.8,
314
+ 0.0,
315
+ 0.9,
316
+ 2,
317
+ 3,
318
+ ],
319
+ [
320
+ "./examples/huggingface.jpg",
321
+ "photo realistic huggingface human emoji costume, round, yellow, (human skin)+++ (human texture)+++",
322
+ "blurry, ugly, duplicate, poorly drawn, deformed, mosaic, emoji cartoon, drawing, pixelated",
323
+ 12312353423,
324
+ 15.206,
325
+ 0.364,
326
+ 0.8,
327
+ 0.0,
328
+ 0.9,
329
+ 2,
330
+ 3,
331
+ ],
332
+ ],
333
+ cache_examples="lazy",
334
+ )
335
+
336
+
337
+ demo.queue(api_open=False)
338
+ demo.launch(show_api=False)
examples/anna-sullivan-DioLM8ViiO8-unsplash.jpg ADDED
examples/cybetruck.jpeg ADDED
examples/huggingface.jpg ADDED
examples/img_aef651cb-2919-499d-aa49-6d4e2e21a56e_1024.jpg ADDED
examples/jesus.png ADDED
examples/lara.jpeg ADDED
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==4.29.0
2
+ accelerate
3
+ transformers
4
+ torchvision
5
+ xformers
6
+ accelerate
7
+ invisible-watermark
8
+ huggingface-hub
9
+ hf-transfer
10
+ gradio_imageslider==0.0.20
11
+ compel
12
+ opencv-python
13
+ numpy
14
+ diffusers==0.27.0
15
+ transformers
16
+ accelerate
17
+ safetensors
18
+ hidiffusion==0.1.8
19
+ spaces
20
+ torch==2.2
21
+ controlnet-aux @ git+https://github.com/huggingface/controlnet_aux