ifmain commited on
Commit
339ab7c
1 Parent(s): 91b0dd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -121
app.py CHANGED
@@ -1,32 +1,22 @@
1
- import spaces
2
-
3
- import time
4
-
5
- from diffusers import AutoPipelineForInpainting
6
- from transformers import pipeline
7
- from ultralytics import YOLO
8
  from PIL import Image
9
- import numpy as np
10
  import torch
11
  import base64
12
  from io import BytesIO
13
- import gradio as gr
14
- from gradio import components
15
  import difflib
16
 
 
 
17
 
18
- # Constants
19
- DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
20
-
21
- # Load
22
 
 
23
  def image_to_base64(image: Image.Image):
24
  buffered = BytesIO()
25
  image.save(buffered, format="JPEG")
26
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
27
 
 
28
  def get_most_similar_string(target_string, string_array):
29
- differ = difflib.Differ()
30
  best_match = string_array[0]
31
  best_match_ratio = 0
32
  for candidate_string in string_array:
@@ -34,134 +24,64 @@ def get_most_similar_string(target_string, string_array):
34
  if similarity_ratio > best_match_ratio:
35
  best_match = candidate_string
36
  best_match_ratio = similarity_ratio
37
-
38
  return best_match
39
 
40
 
41
- # Load
42
- def loadModels():
43
- DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
44
- yoloModel=YOLO('yolov8x-seg.pt')
45
- pipe =AutoPipelineForInpainting.from_pretrained(
46
- "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
47
- torch_dtype=torch.float32
48
- ).to(DEVICE)
49
- image_captioner = pipeline("image-to-text", model="Abdou/vit-swin-base-224-gpt2-image-captioning", device=DEVICE)
50
- return yoloModel, pipe, image_captioner
51
-
52
- # Yolo
53
- @spaces.GPU
54
- def getClasses(model,img1):
55
- results = model([img1])
56
- out=[]
57
- for r in results:
58
- im_array = r.plot()
59
- out.append(r)
60
-
61
- return r,im_array[..., ::-1],results
62
-
63
- def getMasks(out):
64
- allout={}
65
- class_masks = {}
66
- for a in out:
67
- class_name = a['name']
68
- mask = a['img']
69
- if class_name in class_masks:
70
- class_masks[class_name] = Image.fromarray(
71
- np.maximum(np.array(class_masks[class_name]), np.array(mask))
72
- )
73
- else:
74
- class_masks[class_name] = mask
75
- for class_name, mask in class_masks.items():
76
- allout[class_name]=mask
77
- return allout
78
-
79
- def joinClasses(classes):
80
- i=0
81
- out=[]
82
- for r in classes:
83
- masks=r.masks
84
- name0=r.names[int(r.boxes.cls.cpu().numpy()[0])]
85
-
86
- mask1 = masks[0]
87
- mask = mask1.data[0].cpu().numpy()
88
- polygon = mask1.xy[0]
89
- # Normalize the mask values to 0-255 if needed
90
- mask_normalized = ((mask - mask.min()) * (255 / (mask.max() - mask.min()))).astype(np.uint8)
91
- mask_img = Image.fromarray(mask_normalized, "L")
92
- out.append({'name':name0,'img':mask_img})
93
- i+=1
94
-
95
- allMask=getMasks(out)
96
- return allMask
97
-
98
- def getSegments(yoloModel,img1):
99
- classes,image,results1=getClasses(yoloModel,img1)
100
- allMask=joinClasses(classes)
101
- return allMask
102
-
103
- # Gradio UI
104
 
 
105
  @spaces.GPU
106
- def captionMaker(base64_img):
107
- return image_captioner(base64_img)[0]['generated_text']
 
108
 
109
 
110
- def getDescript(image_captioner,img1):
111
- base64_img = image_to_base64(img1)
112
- caption = captionMaker(base64_img)
113
- return caption
114
-
115
- def rmGPT(caption,remove_class):
116
- arstr=caption.split(' ')
117
- popular=get_most_similar_string(remove_class,arstr)
118
- ind=arstr.index(popular)
119
- new=[]
120
- for i in range(len(arstr)):
121
- if i not in list(range(ind-2,ind+3)):
122
- new.append(arstr[i])
123
- return ' '.join(new)
124
 
125
- # SDXL
126
 
127
  @spaces.GPU
128
- def ChangeOBJ(sdxl_m,img1,response,mask1):
129
- size = img1.size
130
- image = sdxl_m(prompt=response, image=img1, mask_image=mask1).images[0]
131
- return image.resize((size[0], size[1]))
132
 
133
 
 
 
 
 
 
 
134
 
135
- yoloModel,sdxl,image_captioner=loadModels()
136
 
 
137
  def full_pipeline(image, target):
138
- img1 = Image.fromarray(image.astype('uint8'), 'RGB')
139
- allMask=getSegments(yoloModel,img1)
140
- tartget_to_remove=get_most_similar_string(target,list(allMask.keys()))
141
- caption=getDescript(image_captioner,img1)
142
-
143
- response=rmGPT(caption,tartget_to_remove)
144
- mask1=allMask[tartget_to_remove]
145
-
146
- remimg=ChangeOBJ(sdxl,img1,response,mask1)
147
-
148
- return remimg,caption,response
149
 
 
150
 
151
 
152
  iface = gr.Interface(
153
- fn=full_pipeline,
154
  inputs=[
155
- gr.Image(label="Upload Image"),
156
- gr.Textbox(label="What to delete?"),
157
- ],
158
  outputs=[
159
- gr.Image(label="Result Image", type="numpy"),
160
- gr.Textbox(label="Caption"),
161
- gr.Textbox(label="Message"),
162
  ],
163
- live=False
164
  )
165
 
166
-
167
  iface.launch()
 
1
+ import gradio as gr
 
 
 
 
 
 
2
  from PIL import Image
 
3
  import torch
4
  import base64
5
  from io import BytesIO
 
 
6
  import difflib
7
 
8
+ # Assumed available GPU decorator and spaces from Hugging Face
9
+ import spaces
10
 
 
 
 
 
11
 
12
+ # ==== Utility Functions ====
13
  def image_to_base64(image: Image.Image):
14
  buffered = BytesIO()
15
  image.save(buffered, format="JPEG")
16
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
17
 
18
+
19
  def get_most_similar_string(target_string, string_array):
 
20
  best_match = string_array[0]
21
  best_match_ratio = 0
22
  for candidate_string in string_array:
 
24
  if similarity_ratio > best_match_ratio:
25
  best_match = candidate_string
26
  best_match_ratio = similarity_ratio
 
27
  return best_match
28
 
29
 
30
+ # ==== GPU-Aware Model Loading and Operations ====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ # Lazy Model Loader Decorators
33
  @spaces.GPU
34
+ def load_yolo_model():
35
+ from ultralytics import YOLO
36
+ return YOLO('yolov8x-seg.pt')
37
 
38
 
39
+ @spaces.GPU
40
+ def load_diffusion_model():
41
+ from diffusers import AutoPipelineForInpainting
42
+ model = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16)
43
+ return model.to("cuda")
 
 
 
 
 
 
 
 
 
44
 
 
45
 
46
  @spaces.GPU
47
+ def load_image_captioner():
48
+ from transformers import pipeline
49
+ return pipeline("image-to-text", model="Abdou/vit-swin-base-224-gpt2-image-captioning", device=0)
 
50
 
51
 
52
+ # Image Object Removal and Modification
53
+ @spaces.GPU
54
+ def process_image(model_yolo, model_diffuser, model_captioner, image, target):
55
+ # Assuming getSegments, getDescript, ChangeOBJ, etc., are refactored to fit the context of this function.
56
+ # Placeholder for the actual logic for each model to run predictions, modifications, etc.
57
+ pass
58
 
 
59
 
60
+ # ==== Gradio Interface ====
61
  def full_pipeline(image, target):
62
+ # Load models (deferred to GPU-ready environment)
63
+ model_yolo = load_yolo_model()
64
+ model_diffuser = load_diffusion_model()
65
+ model_captioner = load_image_captioner()
66
+
67
+ # Process the image (mask generation, captioning, object removal, etc.)
68
+ result_image, caption, response = process_image(model_yolo, model_diffuser, model_captioner, image, target)
 
 
 
 
69
 
70
+ return result_image, caption, response
71
 
72
 
73
  iface = gr.Interface(
74
+ fn=full_pipeline,
75
  inputs=[
76
+ gr.inputs.Image(type='pil', label="Upload Image"),
77
+ gr.inputs.Textbox(label="What to delete?"),
78
+ ],
79
  outputs=[
80
+ gr.outputs.Image(label="Result Image", type="pil"),
81
+ gr.outputs.Textbox(label="Caption"),
82
+ gr.outputs.Textbox(label="Message"),
83
  ],
84
+ live=False,
85
  )
86
 
 
87
  iface.launch()