Spaces:
Running
on
Zero
Running
on
Zero
bugfix
Browse files- app.py +15 -11
- requirements.txt +2 -1
- utils/__pycache__/__init__.cpython-310.pyc +0 -0
- utils/__pycache__/florence.cpython-310.pyc +0 -0
- utils/__pycache__/sam.cpython-310.pyc +0 -0
- utils/florence.py +7 -17
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from typing import Tuple, Optional
|
2 |
-
|
3 |
import gradio as gr
|
4 |
import numpy as np
|
5 |
import random
|
@@ -9,6 +9,7 @@ from diffusers import FluxInpaintPipeline
|
|
9 |
import torch
|
10 |
from PIL import Image, ImageFilter
|
11 |
from huggingface_hub import login
|
|
|
12 |
from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download
|
13 |
import copy
|
14 |
import random
|
@@ -38,9 +39,6 @@ dtype = torch.bfloat16
|
|
38 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
39 |
base_model = "black-forest-labs/FLUX.1-dev"
|
40 |
|
41 |
-
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
|
42 |
-
good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device)
|
43 |
-
pipe = FluxInpaintPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1).to(device)
|
44 |
|
45 |
FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=device)
|
46 |
SAM_IMAGE_MODEL = load_sam_image_model(device=device)
|
@@ -133,7 +131,7 @@ def upload_image_to_r2(image, account_id, access_key, secret_key, bucket_name):
|
|
133 |
print("upload finish", image_file)
|
134 |
return image_file
|
135 |
|
136 |
-
|
137 |
def run_flux(
|
138 |
image: Image.Image,
|
139 |
mask: Image.Image,
|
@@ -149,8 +147,13 @@ def run_flux(
|
|
149 |
) -> Image.Image:
|
150 |
print("Running FLUX...")
|
151 |
|
|
|
|
|
|
|
|
|
152 |
with calculateDuration("load lora"):
|
153 |
print("start to load lora", lora_path, lora_weights)
|
|
|
154 |
pipe.load_lora_weights(lora_path, weight_name=lora_weights)
|
155 |
|
156 |
width, height = resolution_wh
|
@@ -159,7 +162,7 @@ def run_flux(
|
|
159 |
generator = torch.Generator().manual_seed(seed_slicer)
|
160 |
|
161 |
with calculateDuration("run pipe"):
|
162 |
-
genearte_image =
|
163 |
prompt=prompt,
|
164 |
image=image,
|
165 |
mask_image=mask,
|
@@ -170,12 +173,13 @@ def run_flux(
|
|
170 |
num_inference_steps=num_inference_steps_slider,
|
171 |
max_sequence_length=256,
|
172 |
joint_attention_kwargs={"scale": lora_scale},
|
|
|
173 |
).images[0]
|
174 |
|
175 |
return genearte_image
|
176 |
|
177 |
-
|
178 |
-
def genearte_mask(
|
179 |
# generate mask by florence & sam
|
180 |
print("Generating mask...")
|
181 |
task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
|
@@ -186,7 +190,7 @@ def genearte_mask(image: Image.Image, masking_prompt_text: str) -> Image.Image:
|
|
186 |
model=FLORENCE_MODEL,
|
187 |
processor=FLORENCE_PROCESSOR,
|
188 |
device=device,
|
189 |
-
image=
|
190 |
task=task_prompt,
|
191 |
text=masking_prompt_text
|
192 |
)
|
@@ -203,7 +207,7 @@ def genearte_mask(image: Image.Image, masking_prompt_text: str) -> Image.Image:
|
|
203 |
|
204 |
with calculateDuration("generate segmenet mask"):
|
205 |
# using sam generate segments images
|
206 |
-
detections = run_sam_inference(SAM_IMAGE_MODEL,
|
207 |
if len(detections) == 0:
|
208 |
gr.Info("No objects detected.")
|
209 |
return None
|
@@ -225,7 +229,7 @@ def genearte_mask(image: Image.Image, masking_prompt_text: str) -> Image.Image:
|
|
225 |
return images[0]
|
226 |
|
227 |
|
228 |
-
|
229 |
def process(
|
230 |
image_url: str,
|
231 |
inpainting_prompt_text: str,
|
|
|
1 |
from typing import Tuple, Optional
|
2 |
+
import os
|
3 |
import gradio as gr
|
4 |
import numpy as np
|
5 |
import random
|
|
|
9 |
import torch
|
10 |
from PIL import Image, ImageFilter
|
11 |
from huggingface_hub import login
|
12 |
+
from diffusers import AutoencoderTiny, AutoencoderKL
|
13 |
from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download
|
14 |
import copy
|
15 |
import random
|
|
|
39 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
40 |
base_model = "black-forest-labs/FLUX.1-dev"
|
41 |
|
|
|
|
|
|
|
42 |
|
43 |
FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=device)
|
44 |
SAM_IMAGE_MODEL = load_sam_image_model(device=device)
|
|
|
131 |
print("upload finish", image_file)
|
132 |
return image_file
|
133 |
|
134 |
+
@spaces.GPU(duration=60)
|
135 |
def run_flux(
|
136 |
image: Image.Image,
|
137 |
mask: Image.Image,
|
|
|
147 |
) -> Image.Image:
|
148 |
print("Running FLUX...")
|
149 |
|
150 |
+
taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
|
151 |
+
good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device)
|
152 |
+
pipe = FluxInpaintPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1).to(device)
|
153 |
+
|
154 |
with calculateDuration("load lora"):
|
155 |
print("start to load lora", lora_path, lora_weights)
|
156 |
+
pipe.unload_lora_weights()
|
157 |
pipe.load_lora_weights(lora_path, weight_name=lora_weights)
|
158 |
|
159 |
width, height = resolution_wh
|
|
|
162 |
generator = torch.Generator().manual_seed(seed_slicer)
|
163 |
|
164 |
with calculateDuration("run pipe"):
|
165 |
+
genearte_image = pipe(
|
166 |
prompt=prompt,
|
167 |
image=image,
|
168 |
mask_image=mask,
|
|
|
173 |
num_inference_steps=num_inference_steps_slider,
|
174 |
max_sequence_length=256,
|
175 |
joint_attention_kwargs={"scale": lora_scale},
|
176 |
+
good_vae=good_vae
|
177 |
).images[0]
|
178 |
|
179 |
return genearte_image
|
180 |
|
181 |
+
@spaces.GPU(duration=10)
|
182 |
+
def genearte_mask(image_input: Image.Image, masking_prompt_text: str) -> Image.Image:
|
183 |
# generate mask by florence & sam
|
184 |
print("Generating mask...")
|
185 |
task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
|
|
|
190 |
model=FLORENCE_MODEL,
|
191 |
processor=FLORENCE_PROCESSOR,
|
192 |
device=device,
|
193 |
+
image=image_input,
|
194 |
task=task_prompt,
|
195 |
text=masking_prompt_text
|
196 |
)
|
|
|
207 |
|
208 |
with calculateDuration("generate segmenet mask"):
|
209 |
# using sam generate segments images
|
210 |
+
detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
|
211 |
if len(detections) == 0:
|
212 |
gr.Info("No objects detected.")
|
213 |
return None
|
|
|
229 |
return images[0]
|
230 |
|
231 |
|
232 |
+
|
233 |
def process(
|
234 |
image_url: str,
|
235 |
inpainting_prompt_text: str,
|
requirements.txt
CHANGED
@@ -14,4 +14,5 @@ opencv-python
|
|
14 |
pytest
|
15 |
requests
|
16 |
git+https://github.com/Gothos/diffusers.git@flux-inpaint
|
17 |
-
boto3
|
|
|
|
14 |
pytest
|
15 |
requests
|
16 |
git+https://github.com/Gothos/diffusers.git@flux-inpaint
|
17 |
+
boto3
|
18 |
+
sentencepiece
|
utils/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (125 Bytes). View file
|
|
utils/__pycache__/florence.cpython-310.pyc
ADDED
Binary file (2.31 kB). View file
|
|
utils/__pycache__/sam.cpython-310.pyc
ADDED
Binary file (1.57 kB). View file
|
|
utils/florence.py
CHANGED
@@ -29,10 +29,8 @@ def load_florence_model(
|
|
29 |
device: torch.device, checkpoint: str = FLORENCE_CHECKPOINT
|
30 |
) -> Tuple[Any, Any]:
|
31 |
with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
|
32 |
-
model = AutoModelForCausalLM.from_pretrained(
|
33 |
-
|
34 |
-
processor = AutoProcessor.from_pretrained(
|
35 |
-
checkpoint, trust_remote_code=True)
|
36 |
return model, processor
|
37 |
|
38 |
|
@@ -49,16 +47,8 @@ def run_florence_inference(
|
|
49 |
else:
|
50 |
prompt = task
|
51 |
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
num_beams=3
|
58 |
-
)
|
59 |
-
generated_text = processor.batch_decode(
|
60 |
-
generated_ids, skip_special_tokens=False)[0]
|
61 |
-
response = processor.post_process_generation(
|
62 |
-
generated_text, task=task, image_size=image.size)
|
63 |
-
print(generated_text, response)
|
64 |
-
return generated_text, response
|
|
|
29 |
device: torch.device, checkpoint: str = FLORENCE_CHECKPOINT
|
30 |
) -> Tuple[Any, Any]:
|
31 |
with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
|
32 |
+
model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True).to(device).eval()
|
33 |
+
processor = AutoProcessor.from_pretrained(checkpoint, trust_remote_code=True)
|
|
|
|
|
34 |
return model, processor
|
35 |
|
36 |
|
|
|
47 |
else:
|
48 |
prompt = task
|
49 |
inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
|
50 |
+
generated_ids = model.generate(input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, num_beams=3)
|
51 |
+
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
52 |
+
response = processor.post_process_generation(generated_text, task=task, image_size=image.size)
|
53 |
+
print("run_florence_inference", "finish", generated_text, response)
|
54 |
+
return generated_text, response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|