Spaces:

curt-park
/

segment-anything-with-clip

Runtime error

App Files Files Community

curt-park commited on Apr 9, 2023

Commit

064ed26

•

1 Parent(s): 8d4a5a4

Remove backgrounds from cropped images

Browse files

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -57,7 +57,7 @@ def get_scores(crops: List[PIL.Image.Image], query: str) -> torch.Tensor:
     txt_features = model.encode_text(token)
     img_features /= img_features.norm(dim=-1, keepdim=True)
     txt_features /= txt_features.norm(dim=-1, keepdim=True)
-    similarity = (100.0 * img_features @ txt_features.T).softmax(dim=0)
     return similarity
@@ -82,9 +82,10 @@ def filter_masks(
         filtered_masks.append(mask)
         x, y, w, h = mask["bbox"]
-        crop = image[y: y + h, x: x + w]
         crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
-        crop = PIL.Image.fromarray(np.uint8(crop * 255)).convert("RGB")
         crop.resize((CLIP_WIDTH, CLIP_HEIGHT))
         cropped_masks.append(crop)
@@ -141,7 +142,7 @@ def segment(
     )
     image = draw_masks(image, masks)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-    image = PIL.Image.fromarray(np.uint8(image)).convert("RGB")
     return image
@@ -161,14 +162,14 @@ demo = gr.Interface(
         [
             0.9,
             0.8,
-            0.15,
             os.path.join(os.path.dirname(__file__), "examples/dog.jpg"),
-            "A dog only",
         ],
         [
             0.9,
             0.8,
-            0.1,
             os.path.join(os.path.dirname(__file__), "examples/city.jpg"),
             "A bridge on the water",
         ],
@@ -177,7 +178,7 @@ demo = gr.Interface(
             0.8,
             0.05,
             os.path.join(os.path.dirname(__file__), "examples/food.jpg"),
-            "",
         ],
         [
             0.9,

     txt_features = model.encode_text(token)
     img_features /= img_features.norm(dim=-1, keepdim=True)
     txt_features /= txt_features.norm(dim=-1, keepdim=True)
+    similarity = (100 * img_features @ txt_features.T).softmax(0)
     return similarity
         filtered_masks.append(mask)
         x, y, w, h = mask["bbox"]
+        masked = image * np.expand_dims(mask["segmentation"], -1)
+        crop = masked[y: y + h, x: x + w]
         crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
+        crop = PIL.Image.fromarray(crop * 255)
         crop.resize((CLIP_WIDTH, CLIP_HEIGHT))
         cropped_masks.append(crop)
     )
     image = draw_masks(image, masks)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image = PIL.Image.fromarray(image)
     return image
         [
             0.9,
             0.8,
+            0.30,
             os.path.join(os.path.dirname(__file__), "examples/dog.jpg"),
+            "A dog",
         ],
         [
             0.9,
             0.8,
+            0.05,
             os.path.join(os.path.dirname(__file__), "examples/city.jpg"),
             "A bridge on the water",
         ],
             0.8,
             0.05,
             os.path.join(os.path.dirname(__file__), "examples/food.jpg"),
+            "spoon",
         ],
         [
             0.9,