Spaces:
Running
on
T4
Running
on
T4
liuyizhang
commited on
Commit
•
5836895
1
Parent(s):
c3f7ca9
update app.py
Browse files
app.py
CHANGED
@@ -58,6 +58,26 @@ from diffusers import StableDiffusionInpaintPipeline
|
|
58 |
from huggingface_hub import hf_hub_download
|
59 |
|
60 |
from utils import computer_info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
|
63 |
args = SLConfig.fromfile(model_config_path)
|
@@ -224,55 +244,50 @@ def mix_masks(imgs):
|
|
224 |
re_img = 1 - re_img
|
225 |
return Image.fromarray(np.uint8(255*re_img))
|
226 |
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
#
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
logger.info(f"initialize lama_cleaner...")
|
265 |
-
from lama_cleaner.helper import (
|
266 |
-
load_img,
|
267 |
-
numpy_to_bytes,
|
268 |
-
resize_max_size,
|
269 |
-
)
|
270 |
-
|
271 |
-
lama_cleaner_model = ModelManager(
|
272 |
-
name='lama',
|
273 |
-
device='cpu', # device,
|
274 |
)
|
275 |
|
|
|
|
|
|
|
|
|
|
|
276 |
def lama_cleaner_process(image, mask):
|
277 |
ori_image = image
|
278 |
if mask.shape[0] == image.shape[1] and mask.shape[1] == image.shape[0] and mask.shape[0] != mask.shape[1]:
|
@@ -330,32 +345,6 @@ def lama_cleaner_process(image, mask):
|
|
330 |
image = Image.open(io.BytesIO(numpy_to_bytes(res_np_img, 'png')))
|
331 |
return image
|
332 |
|
333 |
-
# relate anything
|
334 |
-
from ram_utils import iou, sort_and_deduplicate, relation_classes, MLP, show_anns, ram_show_mask
|
335 |
-
from ram_train_eval import RamModel,RamPredictor
|
336 |
-
from mmengine.config import Config as mmengine_Config
|
337 |
-
input_size = 512
|
338 |
-
hidden_size = 256
|
339 |
-
num_classes = 56
|
340 |
-
|
341 |
-
# load ram model
|
342 |
-
model_path = "./checkpoints/ram_epoch12.pth"
|
343 |
-
ram_config = dict(
|
344 |
-
model=dict(
|
345 |
-
pretrained_model_name_or_path='bert-base-uncased',
|
346 |
-
load_pretrained_weights=False,
|
347 |
-
num_transformer_layer=2,
|
348 |
-
input_feature_size=256,
|
349 |
-
output_feature_size=768,
|
350 |
-
cls_feature_size=512,
|
351 |
-
num_relation_classes=56,
|
352 |
-
pred_type='attention',
|
353 |
-
loss_type='multi_label_ce',
|
354 |
-
),
|
355 |
-
load_from=model_path,
|
356 |
-
)
|
357 |
-
ram_config = mmengine_Config(ram_config)
|
358 |
-
|
359 |
class Ram_Predictor(RamPredictor):
|
360 |
def __init__(self, config, device='cpu'):
|
361 |
self.config = config
|
@@ -368,7 +357,25 @@ class Ram_Predictor(RamPredictor):
|
|
368 |
self.model.load_state_dict(torch.load(self.config.load_from, map_location=self.device))
|
369 |
self.model.train()
|
370 |
|
371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
# visualization
|
374 |
def draw_selected_mask(mask, draw):
|
@@ -531,7 +538,6 @@ def run_anything_task(input_image, text_prompt, task_type, inpaint_prompt, box_t
|
|
531 |
except:
|
532 |
warnings.warn("Failed to load custom C++ ops. Running on CPU mode Only in groundingdino!")
|
533 |
|
534 |
-
groundingdino_device = 'cpu'
|
535 |
boxes_filt, pred_phrases = get_grounding_output(
|
536 |
groundingdino_model, image, text_prompt, box_threshold, text_threshold, device=groundingdino_device
|
537 |
)
|
@@ -676,6 +682,15 @@ if __name__ == "__main__":
|
|
676 |
|
677 |
print(f'args = {args}')
|
678 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
679 |
block = gr.Blocks().queue()
|
680 |
with block:
|
681 |
with gr.Row():
|
|
|
58 |
from huggingface_hub import hf_hub_download
|
59 |
|
60 |
from utils import computer_info
|
61 |
+
# relate anything
|
62 |
+
from ram_utils import iou, sort_and_deduplicate, relation_classes, MLP, show_anns, ram_show_mask
|
63 |
+
from ram_train_eval import RamModel,RamPredictor
|
64 |
+
from mmengine.config import Config as mmengine_Config
|
65 |
+
|
66 |
+
config_file = 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py'
|
67 |
+
ckpt_repo_id = "ShilongLiu/GroundingDINO"
|
68 |
+
ckpt_filenmae = "groundingdino_swint_ogc.pth"
|
69 |
+
sam_checkpoint = './sam_vit_h_4b8939.pth'
|
70 |
+
output_dir = "outputs"
|
71 |
+
device = 'cpu'
|
72 |
+
|
73 |
+
os.makedirs(output_dir, exist_ok=True)
|
74 |
+
groundingdino_model = None
|
75 |
+
sam_model = None
|
76 |
+
sam_predictor = None
|
77 |
+
sam_mask_generator = None
|
78 |
+
sd_pipe = None
|
79 |
+
lama_cleaner_model= None
|
80 |
+
ram_model = None
|
81 |
|
82 |
def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
|
83 |
args = SLConfig.fromfile(model_config_path)
|
|
|
244 |
re_img = 1 - re_img
|
245 |
return Image.fromarray(np.uint8(255*re_img))
|
246 |
|
247 |
+
def set_device():
|
248 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
249 |
+
print(f'device={device}')
|
250 |
+
|
251 |
+
def load_groundingdino_model():
|
252 |
+
# initialize groundingdino model
|
253 |
+
logger.info(f"initialize groundingdino model...")
|
254 |
+
groundingdino_model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
|
255 |
+
|
256 |
+
def load_sam_model():
|
257 |
+
# initialize SAM
|
258 |
+
logger.info(f"initialize SAM model...")
|
259 |
+
sam_device = device
|
260 |
+
sam_model = build_sam(checkpoint=sam_checkpoint).to(sam_device)
|
261 |
+
sam_predictor = SamPredictor(sam_model)
|
262 |
+
sam_mask_generator = SamAutomaticMaskGenerator(sam_model)
|
263 |
+
|
264 |
+
def load_sd_model():
|
265 |
+
# initialize stable-diffusion-inpainting
|
266 |
+
logger.info(f"initialize stable-diffusion-inpainting...")
|
267 |
+
sd_pipe = None
|
268 |
+
if os.environ.get('IS_MY_DEBUG') is None:
|
269 |
+
sd_pipe = StableDiffusionInpaintPipeline.from_pretrained(
|
270 |
+
"runwayml/stable-diffusion-inpainting",
|
271 |
+
# revision="fp16",
|
272 |
+
# "stabilityai/stable-diffusion-2-inpainting",
|
273 |
+
torch_dtype=torch.float16,
|
274 |
+
)
|
275 |
+
sd_pipe = sd_pipe.to(device)
|
276 |
+
|
277 |
+
def load_lama_cleaner_model():
|
278 |
+
# initialize lama_cleaner
|
279 |
+
logger.info(f"initialize lama_cleaner...")
|
280 |
+
from lama_cleaner.helper import (
|
281 |
+
load_img,
|
282 |
+
numpy_to_bytes,
|
283 |
+
resize_max_size,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
)
|
285 |
|
286 |
+
lama_cleaner_model = ModelManager(
|
287 |
+
name='lama',
|
288 |
+
device='cpu', # device,
|
289 |
+
)
|
290 |
+
|
291 |
def lama_cleaner_process(image, mask):
|
292 |
ori_image = image
|
293 |
if mask.shape[0] == image.shape[1] and mask.shape[1] == image.shape[0] and mask.shape[0] != mask.shape[1]:
|
|
|
345 |
image = Image.open(io.BytesIO(numpy_to_bytes(res_np_img, 'png')))
|
346 |
return image
|
347 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
class Ram_Predictor(RamPredictor):
|
349 |
def __init__(self, config, device='cpu'):
|
350 |
self.config = config
|
|
|
357 |
self.model.load_state_dict(torch.load(self.config.load_from, map_location=self.device))
|
358 |
self.model.train()
|
359 |
|
360 |
+
def load_ram_model():
|
361 |
+
# load ram model
|
362 |
+
model_path = "./checkpoints/ram_epoch12.pth"
|
363 |
+
ram_config = dict(
|
364 |
+
model=dict(
|
365 |
+
pretrained_model_name_or_path='bert-base-uncased',
|
366 |
+
load_pretrained_weights=False,
|
367 |
+
num_transformer_layer=2,
|
368 |
+
input_feature_size=256,
|
369 |
+
output_feature_size=768,
|
370 |
+
cls_feature_size=512,
|
371 |
+
num_relation_classes=56,
|
372 |
+
pred_type='attention',
|
373 |
+
loss_type='multi_label_ce',
|
374 |
+
),
|
375 |
+
load_from=model_path,
|
376 |
+
)
|
377 |
+
ram_config = mmengine_Config(ram_config)
|
378 |
+
ram_model = Ram_Predictor(ram_config, device)
|
379 |
|
380 |
# visualization
|
381 |
def draw_selected_mask(mask, draw):
|
|
|
538 |
except:
|
539 |
warnings.warn("Failed to load custom C++ ops. Running on CPU mode Only in groundingdino!")
|
540 |
|
|
|
541 |
boxes_filt, pred_phrases = get_grounding_output(
|
542 |
groundingdino_model, image, text_prompt, box_threshold, text_threshold, device=groundingdino_device
|
543 |
)
|
|
|
682 |
|
683 |
print(f'args = {args}')
|
684 |
|
685 |
+
os.system("pip list")
|
686 |
+
|
687 |
+
set_device()
|
688 |
+
load_groundingdino_model()
|
689 |
+
load_sam_model()
|
690 |
+
load_sd_model()
|
691 |
+
load_lama_cleaner_model()
|
692 |
+
load_ram_model()
|
693 |
+
|
694 |
block = gr.Blocks().queue()
|
695 |
with block:
|
696 |
with gr.Row():
|