Update README.md
Browse files
README.md
CHANGED
@@ -31,15 +31,25 @@ The model is created by Dongxu Li, Junnan Li, Steven C.H. Hoi.
|
|
31 |
```python
|
32 |
from diffusers.pipelines import BlipDiffusionPipeline
|
33 |
from diffusers.utils import load_image
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
cond_subject = "dog"
|
37 |
tgt_subject = "dog"
|
38 |
text_prompt_input = "swimming underwater"
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
40 |
guidance_scale = 7.5
|
41 |
-
num_inference_steps =
|
42 |
negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate"
|
|
|
43 |
output = blip_diffusion_pipe(
|
44 |
text_prompt_input,
|
45 |
cond_image,
|
@@ -50,8 +60,8 @@ output = blip_diffusion_pipe(
|
|
50 |
neg_prompt=negative_prompt,
|
51 |
height=512,
|
52 |
width=512,
|
53 |
-
)
|
54 |
-
output[0]
|
55 |
```
|
56 |
Input Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/dog.jpg" style="width:500px;"/>
|
57 |
|
@@ -63,22 +73,32 @@ Generatred Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffu
|
|
63 |
from diffusers.pipelines import BlipDiffusionControlNetPipeline
|
64 |
from diffusers.utils import load_image
|
65 |
from controlnet_aux import CannyDetector
|
66 |
-
|
67 |
-
blip_diffusion_pipe.
|
68 |
-
|
|
|
|
|
|
|
69 |
tgt_subject = "teapot" # subject to generate.
|
70 |
text_prompt = "on a marble table"
|
71 |
-
|
|
|
|
|
|
|
72 |
canny = CannyDetector()
|
73 |
-
cldm_cond_image = canny(cldm_cond_image, 30, 70, output_type=
|
74 |
-
style_image = load_image(
|
|
|
|
|
|
|
75 |
guidance_scale = 7.5
|
76 |
num_inference_steps = 50
|
77 |
negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate"
|
|
|
78 |
output = blip_diffusion_pipe(
|
79 |
text_prompt,
|
80 |
style_image,
|
81 |
-
|
82 |
style_subject,
|
83 |
tgt_subject,
|
84 |
guidance_scale=guidance_scale,
|
@@ -86,8 +106,8 @@ output = blip_diffusion_pipe(
|
|
86 |
neg_prompt=negative_prompt,
|
87 |
height=512,
|
88 |
width=512,
|
89 |
-
|
90 |
-
output[0]
|
91 |
```
|
92 |
|
93 |
Input Style Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg" style="width:500px;"/>
|
@@ -99,24 +119,34 @@ Generated Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffus
|
|
99 |
from diffusers.pipelines import BlipDiffusionControlNetPipeline
|
100 |
from diffusers.utils import load_image
|
101 |
from controlnet_aux import HEDdetector
|
102 |
-
|
|
|
|
|
|
|
103 |
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-scribble")
|
104 |
blip_diffusion_pipe.controlnet = controlnet
|
105 |
-
blip_diffusion_pipe.to(
|
106 |
-
|
|
|
107 |
tgt_subject = "bag" # subject to generate.
|
108 |
text_prompt = "on a table"
|
109 |
-
cldm_cond_image = load_image(
|
|
|
|
|
110 |
hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
|
111 |
cldm_cond_image = hed(cldm_cond_image)
|
112 |
-
style_image = load_image(
|
|
|
|
|
|
|
113 |
guidance_scale = 7.5
|
114 |
num_inference_steps = 50
|
115 |
negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate"
|
|
|
116 |
output = blip_diffusion_pipe(
|
117 |
text_prompt,
|
118 |
style_image,
|
119 |
-
|
120 |
style_subject,
|
121 |
tgt_subject,
|
122 |
guidance_scale=guidance_scale,
|
@@ -124,8 +154,8 @@ output = blip_diffusion_pipe(
|
|
124 |
neg_prompt=negative_prompt,
|
125 |
height=512,
|
126 |
width=512,
|
127 |
-
)
|
128 |
-
output[0]
|
129 |
```
|
130 |
|
131 |
Input Style Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg" style="width:500px;"/>
|
@@ -164,4 +194,5 @@ If you find this repository useful in your research, please cite:
|
|
164 |
archivePrefix={arXiv},
|
165 |
primaryClass={cs.CV}
|
166 |
}
|
167 |
-
```
|
|
|
|
31 |
```python
|
32 |
from diffusers.pipelines import BlipDiffusionPipeline
|
33 |
from diffusers.utils import load_image
|
34 |
+
import torch
|
35 |
+
|
36 |
+
blip_diffusion_pipe = BlipDiffusionPipeline.from_pretrained(
|
37 |
+
"ayushtues/blipdiffusion", torch_dtype=torch.float16
|
38 |
+
).to("cuda")
|
39 |
+
|
40 |
cond_subject = "dog"
|
41 |
tgt_subject = "dog"
|
42 |
text_prompt_input = "swimming underwater"
|
43 |
+
|
44 |
+
cond_image = load_image(
|
45 |
+
"https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/dog.jpg"
|
46 |
+
)
|
47 |
+
|
48 |
+
iter_seed = 88888
|
49 |
guidance_scale = 7.5
|
50 |
+
num_inference_steps = 25
|
51 |
negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate"
|
52 |
+
|
53 |
output = blip_diffusion_pipe(
|
54 |
text_prompt_input,
|
55 |
cond_image,
|
|
|
60 |
neg_prompt=negative_prompt,
|
61 |
height=512,
|
62 |
width=512,
|
63 |
+
).images
|
64 |
+
output[0].save("image.png")
|
65 |
```
|
66 |
Input Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/dog.jpg" style="width:500px;"/>
|
67 |
|
|
|
73 |
from diffusers.pipelines import BlipDiffusionControlNetPipeline
|
74 |
from diffusers.utils import load_image
|
75 |
from controlnet_aux import CannyDetector
|
76 |
+
|
77 |
+
blip_diffusion_pipe = BlipDiffusionControlNetPipeline.from_pretrained(
|
78 |
+
"ayushtues/blipdiffusion-controlnet", torch_dtype=torch.float16
|
79 |
+
).to("cuda")
|
80 |
+
|
81 |
+
style_subject = "flower" # subject that defines the style
|
82 |
tgt_subject = "teapot" # subject to generate.
|
83 |
text_prompt = "on a marble table"
|
84 |
+
|
85 |
+
cldm_cond_image = load_image(
|
86 |
+
"https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/kettle.jpg"
|
87 |
+
).resize((512, 512))
|
88 |
canny = CannyDetector()
|
89 |
+
cldm_cond_image = canny(cldm_cond_image, 30, 70, output_type="pil")
|
90 |
+
style_image = load_image(
|
91 |
+
"https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg"
|
92 |
+
)
|
93 |
+
|
94 |
guidance_scale = 7.5
|
95 |
num_inference_steps = 50
|
96 |
negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate"
|
97 |
+
|
98 |
output = blip_diffusion_pipe(
|
99 |
text_prompt,
|
100 |
style_image,
|
101 |
+
cldm_cond_image,
|
102 |
style_subject,
|
103 |
tgt_subject,
|
104 |
guidance_scale=guidance_scale,
|
|
|
106 |
neg_prompt=negative_prompt,
|
107 |
height=512,
|
108 |
width=512,
|
109 |
+
).images
|
110 |
+
output[0].save("image.png")
|
111 |
```
|
112 |
|
113 |
Input Style Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg" style="width:500px;"/>
|
|
|
119 |
from diffusers.pipelines import BlipDiffusionControlNetPipeline
|
120 |
from diffusers.utils import load_image
|
121 |
from controlnet_aux import HEDdetector
|
122 |
+
|
123 |
+
blip_diffusion_pipe = BlipDiffusionControlNetPipeline.from_pretrained(
|
124 |
+
"ayushtues/blipdiffusion-controlnet"
|
125 |
+
)
|
126 |
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-scribble")
|
127 |
blip_diffusion_pipe.controlnet = controlnet
|
128 |
+
blip_diffusion_pipe.to("cuda")
|
129 |
+
|
130 |
+
style_subject = "flower" # subject that defines the style
|
131 |
tgt_subject = "bag" # subject to generate.
|
132 |
text_prompt = "on a table"
|
133 |
+
cldm_cond_image = load_image(
|
134 |
+
"https://huggingface.co/lllyasviel/sd-controlnet-scribble/resolve/main/images/bag.png"
|
135 |
+
).resize((512, 512))
|
136 |
hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
|
137 |
cldm_cond_image = hed(cldm_cond_image)
|
138 |
+
style_image = load_image(
|
139 |
+
"https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg"
|
140 |
+
)
|
141 |
+
|
142 |
guidance_scale = 7.5
|
143 |
num_inference_steps = 50
|
144 |
negative_prompt = "over-exposure, under-exposure, saturated, duplicate, out of frame, lowres, cropped, worst quality, low quality, jpeg artifacts, morbid, mutilated, out of frame, ugly, bad anatomy, bad proportions, deformed, blurry, duplicate"
|
145 |
+
|
146 |
output = blip_diffusion_pipe(
|
147 |
text_prompt,
|
148 |
style_image,
|
149 |
+
cldm_cond_image,
|
150 |
style_subject,
|
151 |
tgt_subject,
|
152 |
guidance_scale=guidance_scale,
|
|
|
154 |
neg_prompt=negative_prompt,
|
155 |
height=512,
|
156 |
width=512,
|
157 |
+
).images
|
158 |
+
output[0].save("image.png")
|
159 |
```
|
160 |
|
161 |
Input Style Image : <img src="https://huggingface.co/datasets/ayushtues/blipdiffusion_images/resolve/main/flower.jpg" style="width:500px;"/>
|
|
|
194 |
archivePrefix={arXiv},
|
195 |
primaryClass={cs.CV}
|
196 |
}
|
197 |
+
```
|
198 |
+
|