Spaces:

jiuface
/

flux-controlnet-inpainting

Running on Zero

App Files Files Community

jiuface commited on Sep 11

Commit

a4f92f5

•

1 Parent(s): 7b934fb

bugfix

Browse files

Files changed (6) hide show

app.py +15 -11
requirements.txt +2 -1
utils/__pycache__/__init__.cpython-310.pyc +0 -0
utils/__pycache__/florence.cpython-310.pyc +0 -0
utils/__pycache__/sam.cpython-310.pyc +0 -0
utils/florence.py +7 -17

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from typing import Tuple, Optional
 import gradio as gr
 import numpy as np
 import random
@@ -9,6 +9,7 @@ from diffusers import FluxInpaintPipeline
 import torch
 from PIL import Image, ImageFilter
 from huggingface_hub import login
 from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download
 import copy
 import random
@@ -38,9 +39,6 @@ dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 base_model = "black-forest-labs/FLUX.1-dev"
-taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
-good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device)
-pipe = FluxInpaintPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1).to(device)
 FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=device)
 SAM_IMAGE_MODEL = load_sam_image_model(device=device)
@@ -133,7 +131,7 @@ def upload_image_to_r2(image, account_id, access_key, secret_key, bucket_name):
     print("upload finish", image_file)
     return image_file
 def run_flux(
     image: Image.Image,
     mask: Image.Image,
@@ -149,8 +147,13 @@ def run_flux(
 ) -> Image.Image:
     print("Running FLUX...")
     with calculateDuration("load lora"):
         print("start to load lora", lora_path, lora_weights)
         pipe.load_lora_weights(lora_path, weight_name=lora_weights)
     width, height = resolution_wh
@@ -159,7 +162,7 @@ def run_flux(
     generator = torch.Generator().manual_seed(seed_slicer)
     with calculateDuration("run pipe"):
-        genearte_image = PIPE(
             prompt=prompt,
             image=image,
             mask_image=mask,
@@ -170,12 +173,13 @@ def run_flux(
             num_inference_steps=num_inference_steps_slider,
             max_sequence_length=256,
             joint_attention_kwargs={"scale": lora_scale},
         ).images[0]
     return genearte_image
-def genearte_mask(image: Image.Image, masking_prompt_text: str) -> Image.Image:
     # generate mask by florence & sam
     print("Generating mask...")
     task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
@@ -186,7 +190,7 @@ def genearte_mask(image: Image.Image, masking_prompt_text: str) -> Image.Image:
             model=FLORENCE_MODEL,
             processor=FLORENCE_PROCESSOR,
             device=device,
-            image=image,
             task=task_prompt,
             text=masking_prompt_text
         )
@@ -203,7 +207,7 @@ def genearte_mask(image: Image.Image, masking_prompt_text: str) -> Image.Image:
     with calculateDuration("generate segmenet mask"):
         # using sam generate segments images
-        detections = run_sam_inference(SAM_IMAGE_MODEL, image, detections)
         if len(detections) == 0:
             gr.Info("No objects detected.")
             return None
@@ -225,7 +229,7 @@ def genearte_mask(image: Image.Image, masking_prompt_text: str) -> Image.Image:
     return images[0]
-@spaces.GPU(duration=120)
 def process(
     image_url: str,
     inpainting_prompt_text: str,

 from typing import Tuple, Optional
+import os
 import gradio as gr
 import numpy as np
 import random
 import torch
 from PIL import Image, ImageFilter
 from huggingface_hub import login
+from diffusers import AutoencoderTiny, AutoencoderKL
 from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download
 import copy
 import random
 device = "cuda" if torch.cuda.is_available() else "cpu"
 base_model = "black-forest-labs/FLUX.1-dev"
 FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=device)
 SAM_IMAGE_MODEL = load_sam_image_model(device=device)
     print("upload finish", image_file)
     return image_file
+@spaces.GPU(duration=60)
 def run_flux(
     image: Image.Image,
     mask: Image.Image,
 ) -> Image.Image:
     print("Running FLUX...")
+    taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
+    good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device)
+    pipe = FluxInpaintPipeline.from_pretrained(base_model, torch_dtype=dtype, vae=taef1).to(device)
     with calculateDuration("load lora"):
         print("start to load lora", lora_path, lora_weights)
+        pipe.unload_lora_weights()
         pipe.load_lora_weights(lora_path, weight_name=lora_weights)
     width, height = resolution_wh
     generator = torch.Generator().manual_seed(seed_slicer)
     with calculateDuration("run pipe"):
+        genearte_image = pipe(
             prompt=prompt,
             image=image,
             mask_image=mask,
             num_inference_steps=num_inference_steps_slider,
             max_sequence_length=256,
             joint_attention_kwargs={"scale": lora_scale},
+            good_vae=good_vae
         ).images[0]
     return genearte_image
+@spaces.GPU(duration=10)
+def genearte_mask(image_input: Image.Image, masking_prompt_text: str) -> Image.Image:
     # generate mask by florence & sam
     print("Generating mask...")
     task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
             model=FLORENCE_MODEL,
             processor=FLORENCE_PROCESSOR,
             device=device,
+            image=image_input,
             task=task_prompt,
             text=masking_prompt_text
         )
     with calculateDuration("generate segmenet mask"):
         # using sam generate segments images
+        detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
         if len(detections) == 0:
             gr.Info("No objects detected.")
             return None
     return images[0]
 def process(
     image_url: str,
     inpainting_prompt_text: str,

requirements.txt CHANGED Viewed

@@ -14,4 +14,5 @@ opencv-python
 pytest
 requests
 git+https://github.com/Gothos/diffusers.git@flux-inpaint
-boto3

 pytest
 requests
 git+https://github.com/Gothos/diffusers.git@flux-inpaint
+boto3
+sentencepiece

utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (125 Bytes). View file

utils/__pycache__/florence.cpython-310.pyc ADDED Viewed

Binary file (2.31 kB). View file

utils/__pycache__/sam.cpython-310.pyc ADDED Viewed

Binary file (1.57 kB). View file

utils/florence.py CHANGED Viewed

@@ -29,10 +29,8 @@ def load_florence_model(
     device: torch.device, checkpoint: str = FLORENCE_CHECKPOINT
 ) -> Tuple[Any, Any]:
     with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
-        model = AutoModelForCausalLM.from_pretrained(
-            checkpoint, trust_remote_code=True).to(device).eval()
-        processor = AutoProcessor.from_pretrained(
-            checkpoint, trust_remote_code=True)
         return model, processor
@@ -49,16 +47,8 @@ def run_florence_inference(
     else:
         prompt = task
     inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
-    print(inputs)
-    generated_ids = model.generate(
-        input_ids=inputs["input_ids"],
-        pixel_values=inputs["pixel_values"],
-        max_new_tokens=1024,
-        num_beams=3
-    )
-    generated_text = processor.batch_decode(
-        generated_ids, skip_special_tokens=False)[0]
-    response = processor.post_process_generation(
-        generated_text, task=task, image_size=image.size)
-    print(generated_text, response)
-    return generated_text, response

     device: torch.device, checkpoint: str = FLORENCE_CHECKPOINT
 ) -> Tuple[Any, Any]:
     with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports):
+        model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True).to(device).eval()
+        processor = AutoProcessor.from_pretrained(checkpoint, trust_remote_code=True)
         return model, processor
     else:
         prompt = task
     inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)
+    generated_ids = model.generate(input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, num_beams=3)
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    response = processor.post_process_generation(generated_text, task=task, image_size=image.size)
+    print("run_florence_inference", "finish", generated_text, response)
+    return generated_text, response