Object_Remove / app.py
ifmain's picture
Update app.py
3c9d565 verified
raw
history blame contribute delete
No virus
4.39 kB
import gradio as gr
import spaces
import time
import cv2
from diffusers import AutoPipelineForInpainting
from transformers import pipeline
from ultralytics import YOLO
from PIL import Image
import numpy as np
import torch
import base64
from io import BytesIO
import difflib
# Constants
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
# Helper functions
def image_to_base64(image: Image.Image):
buffered = BytesIO()
image.save(buffered, format="JPEG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")
def get_most_similar_string(target_string, string_array):
best_match = string_array[0]
best_match_ratio = 0
for candidate_string in string_array:
similarity_ratio = difflib.SequenceMatcher(None, target_string, candidate_string).ratio()
if similarity_ratio > best_match_ratio:
best_match = candidate_string
best_match_ratio = similarity_ratio
return best_match
# Load YOLO model on CPU
yoloModel = YOLO('yolov8x-seg.pt')
yoloModel.to('cpu')
def getClasses(model, img1):
results = model([img1])
out = []
for r in results:
im_array = r.plot()
out.append(r)
return r, im_array[..., ::-1], results
def getMasks(out):
allout = {}
class_masks = {}
for a in out:
class_name = a['name']
mask = a['img']
if class_name in class_masks:
class_masks[class_name] = Image.fromarray(
np.maximum(np.array(class_masks[class_name]), np.array(mask))
)
else:
class_masks[class_name] = mask
for class_name, mask in class_masks.items():
allout[class_name] = mask
return allout
def joinClasses(classes):
out = []
for r in classes:
masks = r.masks
name0 = r.names[int(r.boxes.cls.cpu().numpy()[0])]
mask1 = masks[0]
mask = mask1.data[0].cpu().numpy()
mask_normalized = ((mask - mask.min()) * (255 / (mask.max() - mask.min()))).astype(np.uint8)
kernel = np.ones((10, 10), np.uint8)
mask_with_border = cv2.dilate(mask_normalized, kernel, iterations=1)
mask_img = Image.fromarray(mask_with_border, "L")
out.append({'name': name0, 'img': mask_img})
allMask = getMasks(out)
return allMask
def getSegments(yoloModel, img1):
classes, image, results1 = getClasses(yoloModel, img1)
im = Image.fromarray(image)
im.save('classes.jpg')
allMask = joinClasses(classes)
return allMask
@spaces.GPU
def getDescript(img1):
image_captioner = pipeline("image-to-text", model="Abdou/vit-swin-base-224-gpt2-image-captioning", device='cuda')
base64_img = image_to_base64(img1)
caption = image_captioner(base64_img)[0]['generated_text']
return caption
def rmGPT(caption, remove_class, change):
arstr = caption.split(' ')
popular = get_most_similar_string(remove_class, arstr)
ind = arstr.index(popular)
if len(change) < 3:
new = []
rng = round(len(arstr) / 5)
for i in range(len(arstr)):
if i not in list(range(ind - rng, ind + rng)):
new.append(arstr[i])
return ' '.join(new)
else:
arstr[ind] = change
return ' '.join(arstr)
@spaces.GPU
def ChangeOBJ(img1, response, mask1):
sdxl = AutoPipelineForInpainting.from_pretrained(
"diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16
).to('cuda')
size = img1.size
image = sdxl(prompt=response, image=img1, mask_image=mask1).images[0]
return image.resize((size[0], size[1]))
def full_pipeline(image, target, change):
img1 = Image.fromarray(image.astype('uint8'), 'RGB')
allMask = getSegments(yoloModel, img1)
target_to_remove = get_most_similar_string(target, list(allMask.keys()))
caption = getDescript(img1)
response = rmGPT(caption, target_to_remove, change)
mask1 = allMask[target_to_remove]
remimg = ChangeOBJ(img1, response, mask1)
return remimg, caption, response
iface = gr.Interface(
fn=full_pipeline,
inputs=[
gr.Image(label="Upload Image"),
gr.Textbox(label="What to delete?"),
gr.Textbox(label="Change?"),
],
outputs=[
gr.Image(label="Result Image", type="numpy"),
gr.Textbox(label="Caption"),
gr.Textbox(label="Message"),
],
live=False
)
iface.launch(share=True)