ifmain commited on
Commit
c731ccc
1 Parent(s): e8eb6dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -72
app.py CHANGED
@@ -1,45 +1,30 @@
1
- # UI and Application Framework
2
  import gradio as gr
 
3
  import spaces # Thanks a lot!
4
 
5
-
6
- # Standard Libraries
7
  import time
8
- from io import BytesIO
9
- import base64
10
-
11
- # Data Handling and Image Processing
12
- import numpy as np
13
- from PIL import Image
14
-
15
- # Machine Learning and AI Models
16
- import torch
17
- from transformers import pipeline
18
  from diffusers import AutoPipelineForInpainting
19
- from diffusers.utils import load_image
20
  from ultralytics import YOLO
 
 
 
 
 
21
 
22
- # Text and Data Manipulation
23
  import difflib
24
 
25
-
26
  # Constants
27
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
28
- print(DEVICE)
29
-
30
- yoloModel = YOLO('yolov8x-seg.pt')
31
- sdxl = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", torch_dtype=torch.float16, variant="fp16").to("cuda")
32
- sdxl.to("cuda")
33
-
34
- image_captioner = pipeline("image-to-text", model="Abdou/vit-swin-base-224-gpt2-image-captioning", device=DEVICE)
35
 
 
36
 
37
  def image_to_base64(image: Image.Image):
38
  buffered = BytesIO()
39
  image.save(buffered, format="JPEG")
40
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
41
 
42
-
43
  def get_most_similar_string(target_string, string_array):
44
  differ = difflib.Differ()
45
  best_match = string_array[0]
@@ -49,23 +34,35 @@ def get_most_similar_string(target_string, string_array):
49
  if similarity_ratio > best_match_ratio:
50
  best_match = candidate_string
51
  best_match_ratio = similarity_ratio
52
-
53
  return best_match
54
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # Yolo
57
- def getClasses(img1):
58
- results = yoloModel([np.array(img1)], device='cpu') # Изменение для передачи изображения как массива NumPy
59
- out = []
 
60
  for r in results:
 
61
  im_array = r.plot()
62
  out.append(r)
63
 
64
- return r, im_array[..., ::-1], results
65
-
66
 
67
  def getMasks(out):
68
- allout = {}
69
  class_masks = {}
70
  for a in out:
71
  class_name = a['name']
@@ -77,10 +74,9 @@ def getMasks(out):
77
  else:
78
  class_masks[class_name] = mask
79
  for class_name, mask in class_masks.items():
80
- allout[class_name] = mask
81
  return allout
82
 
83
-
84
  def joinClasses(classes):
85
  i = 0
86
  out = []
@@ -90,74 +86,84 @@ def joinClasses(classes):
90
 
91
  mask1 = masks[0]
92
  mask = mask1.data[0].cpu().numpy()
93
- polygon = mask1.xy[0]
94
  # Normalize the mask values to 0-255 if needed
95
  mask_normalized = ((mask - mask.min()) * (255 / (mask.max() - mask.min()))).astype(np.uint8)
96
- mask_img = Image.fromarray(mask_normalized, "L")
 
 
 
 
 
97
  out.append({'name': name0, 'img': mask_img})
98
  i += 1
99
 
100
  allMask = getMasks(out)
101
  return allMask
102
 
103
-
104
- def getSegments(img1):
105
- classes, image, results1 = getClasses(img1)
106
- allMask = joinClasses(classes)
 
107
  return allMask
108
 
109
-
110
  # Gradio UI
111
  @spaces.GPU
112
- def captionMaker(base64_img):
113
- return image_captioner(base64_img)[0]['generated_text']
114
-
115
-
116
- def getDescript(image_captioner, img1):
117
  base64_img = image_to_base64(img1)
118
- caption = captionMaker(base64_img)
119
  return caption
120
 
121
-
122
- def rmGPT(caption, remove_class):
123
- arstr = caption.split(' ')
124
- popular = get_most_similar_string(remove_class, arstr)
125
- ind = arstr.index(popular)
126
- new = []
127
- for i in range(len(arstr)):
128
- if i not in list(range(ind - 2, ind + 3)):
129
- new.append(arstr[i])
130
- return ' '.join(new)
131
-
132
-
 
 
 
 
 
133
  @spaces.GPU
134
- def ChangeOBJ(img1, response, mask1):
135
  size = img1.size
136
- image = sdxl(prompt=response, image=img1, mask_image=mask1,strength=1.0).images[0]
137
  return image.resize((size[0], size[1]))
138
 
139
 
140
- def full_pipeline(image, target):
 
 
 
141
  img1 = Image.fromarray(image.astype('uint8'), 'RGB')
142
- #img1 = img1.resize((512, 512))
143
- allMask = getSegments(img1)
144
- tartget_to_remove = get_most_similar_string(target, list(allMask.keys()))
145
- caption = getDescript(image_captioner, img1)
 
 
146
 
147
- response = rmGPT(caption, tartget_to_remove)
148
- mask1 = allMask[tartget_to_remove]
149
 
150
- remimg = ChangeOBJ(img1, response, mask1)
151
 
152
- return remimg, caption, response
153
 
154
 
155
  iface = gr.Interface(
156
- fn=full_pipeline,
157
  inputs=[
158
  gr.Image(label="Upload Image"),
159
  gr.Textbox(label="What to delete?"),
160
- ],
 
161
  outputs=[
162
  gr.Image(label="Result Image", type="numpy"),
163
  gr.Textbox(label="Caption"),
@@ -166,4 +172,6 @@ iface = gr.Interface(
166
  live=False
167
  )
168
 
169
- iface.launch()
 
 
 
 
1
  import gradio as gr
2
+ from gradio import components
3
  import spaces # Thanks a lot!
4
 
 
 
5
  import time
6
+ import cv2
 
 
 
 
 
 
 
 
 
7
  from diffusers import AutoPipelineForInpainting
8
+ from transformers import pipeline
9
  from ultralytics import YOLO
10
+ from PIL import Image
11
+ import numpy as np
12
+ import torch
13
+ import base64
14
+ from io import BytesIO
15
 
 
16
  import difflib
17
 
 
18
  # Constants
19
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
 
 
 
 
20
 
21
+ # Load
22
 
23
  def image_to_base64(image: Image.Image):
24
  buffered = BytesIO()
25
  image.save(buffered, format="JPEG")
26
  return base64.b64encode(buffered.getvalue()).decode("utf-8")
27
 
 
28
  def get_most_similar_string(target_string, string_array):
29
  differ = difflib.Differ()
30
  best_match = string_array[0]
 
34
  if similarity_ratio > best_match_ratio:
35
  best_match = candidate_string
36
  best_match_ratio = similarity_ratio
37
+
38
  return best_match
39
 
40
+ def loadModels():
41
+
42
+ yoloModel=YOLO('yolov8x-seg.pt')
43
+ pipe =AutoPipelineForInpainting.from_pretrained(
44
+ "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
45
+ torch_dtype=torch.float16,
46
+ variant="fp16",
47
+ ).to("cuda")
48
+ image_captioner = pipeline("image-to-text", model="Abdou/vit-swin-base-224-gpt2-image-captioning", device=DEVICE)
49
+ #return gpt_model, gpt_tokenizer, gpt_params,yoloModel,pipe,image_captioner
50
+ return yoloModel,pipe,image_captioner
51
 
52
  # Yolo
53
+
54
+ def getClasses(model,img1):
55
+ results = model([img1])
56
+ out=[]
57
  for r in results:
58
+ #im_array = r.plot(boxes=False,labels=False) # plot a BGR numpy array of predictions
59
  im_array = r.plot()
60
  out.append(r)
61
 
62
+ return r,im_array[..., ::-1],results
 
63
 
64
  def getMasks(out):
65
+ allout={}
66
  class_masks = {}
67
  for a in out:
68
  class_name = a['name']
 
74
  else:
75
  class_masks[class_name] = mask
76
  for class_name, mask in class_masks.items():
77
+ allout[class_name]=mask
78
  return allout
79
 
 
80
  def joinClasses(classes):
81
  i = 0
82
  out = []
 
86
 
87
  mask1 = masks[0]
88
  mask = mask1.data[0].cpu().numpy()
89
+
90
  # Normalize the mask values to 0-255 if needed
91
  mask_normalized = ((mask - mask.min()) * (255 / (mask.max() - mask.min()))).astype(np.uint8)
92
+
93
+ # Add white border
94
+ kernel = np.ones((10, 10), np.uint8)
95
+ mask_with_border = cv2.dilate(mask_normalized, kernel, iterations=1)
96
+
97
+ mask_img = Image.fromarray(mask_with_border, "L")
98
  out.append({'name': name0, 'img': mask_img})
99
  i += 1
100
 
101
  allMask = getMasks(out)
102
  return allMask
103
 
104
+ def getSegments(yoloModel,img1):
105
+ classes,image,results1=getClasses(yoloModel,img1)
106
+ im = Image.fromarray(image) # RGB PIL image
107
+ im.save('classes.jpg')
108
+ allMask=joinClasses(classes)
109
  return allMask
110
 
 
111
  # Gradio UI
112
  @spaces.GPU
113
+ def getDescript(image_captioner,img1):
 
 
 
 
114
  base64_img = image_to_base64(img1)
115
+ caption = image_captioner(base64_img)[0]['generated_text']
116
  return caption
117
 
118
+ def rmGPT(caption,remove_class,change):
119
+ arstr=caption.split(' ')
120
+ popular=get_most_similar_string(remove_class,arstr)
121
+ ind=arstr.index(popular)
122
+ if len(change)<3:
123
+ new=[]
124
+ rng=round(len(arstr)/5)
125
+ print(f'Center {ind} | range {ind-rng}:{ind+rng+1}')
126
+ for i in range(len(arstr)):
127
+ if i not in list(range(ind-rng,ind+rng)):
128
+ new.append(arstr[i])
129
+ return ' '.join(new)
130
+ else:
131
+ arstr[ind]=change
132
+ return ' '.join(arstr)
133
+
134
+ # SDXL
135
  @spaces.GPU
136
+ def ChangeOBJ(sdxl_m,img1,response,mask1):
137
  size = img1.size
138
+ image = sdxl_m(prompt=response, image=img1, mask_image=mask1).images[0]
139
  return image.resize((size[0], size[1]))
140
 
141
 
142
+
143
+ yoloModel,sdxl,image_captioner=loadModels()
144
+
145
+ def full_pipeline(image, target,change):
146
  img1 = Image.fromarray(image.astype('uint8'), 'RGB')
147
+ allMask=getSegments(yoloModel,img1)
148
+ tartget_to_remove=get_most_similar_string(target,list(allMask.keys()))
149
+ caption=getDescript(image_captioner,img1)
150
+
151
+ response=rmGPT(caption,tartget_to_remove,change)
152
+ mask1=allMask[tartget_to_remove]
153
 
154
+ remimg=ChangeOBJ(sdxl,img1,response,mask1)
 
155
 
156
+ return remimg,caption,response
157
 
 
158
 
159
 
160
  iface = gr.Interface(
161
+ fn=full_pipeline,
162
  inputs=[
163
  gr.Image(label="Upload Image"),
164
  gr.Textbox(label="What to delete?"),
165
+ gr.Textbox(label="Change?"),
166
+ ],
167
  outputs=[
168
  gr.Image(label="Result Image", type="numpy"),
169
  gr.Textbox(label="Caption"),
 
172
  live=False
173
  )
174
 
175
+
176
+ #iface.launch(share=True)
177
+ iface.launch(server_name='192.168.31.75')