Spaces:

lightly-ai
/

SAMv2-Mask-Generator

Running

App Files Files Community

SauravMaheshkar commited on Jul 31

Commit

410698b

•

1 Parent(s): 95190fc

feat: display masks as a single image

Browse files

Files changed (3) hide show

app.py +15 -29
requirements.txt +1 -1
src/plot_utils.py +48 -30

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import pathlib
-import zipfile
 from typing import Any, Dict, List
 import cv2
@@ -7,24 +5,20 @@ import gradio as gr
 import numpy as np
 import torch
 from gradio_image_annotation import image_annotator
-from sam2.build_sam import build_sam2
 from sam2.sam2_image_predictor import SAM2ImagePredictor
-from src.plot_utils import render_masks
-choice_mapping: Dict[str, List[str]] = {
-    "tiny": ["sam2_hiera_t.yaml", "assets/checkpoints/sam2_hiera_tiny.pt"],
-    "small": ["sam2_hiera_s.yaml", "assets/checkpoints/sam2_hiera_small.pt"],
-    "base_plus": ["sam2_hiera_b+.yaml", "assets/checkpoints/sam2_hiera_base_plus.pt"],
-    "large": ["sam2_hiera_l.yaml", "assets/checkpoints/sam2_hiera_large.pt"],
-}
 def predict(model_choice, annotations: Dict[str, Any]):
-    config_file, ckpt_path = choice_mapping[str(model_choice)]
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    sam2_model = build_sam2(config_file, ckpt_path, device=device)
-    predictor = SAM2ImagePredictor(sam2_model)
     predictor.set_image(annotations["image"])
     coordinates = []
     for i in range(len(annotations["boxes"])):
@@ -42,19 +36,12 @@ def predict(model_choice, annotations: Dict[str, Any]):
         box=np.array(coordinates),
         multimask_output=False,
     )
-    for count, mask in enumerate(masks):
-        mask = mask.transpose(1, 2, 0)  # type:ignore
-        mask_image = (mask * 255).astype(np.uint8)  # Convert to uint8 format
-        cv2.imwrite(f"assets/mask_{count}.png", mask_image)
-        mask_dir = pathlib.Path("assets/")
-        with zipfile.ZipFile("assets/masks.zip", "w") as archive:
-            for mask_file in mask_dir.glob("mask_*.png"):
-                archive.write(mask_file, arcname=mask_file.relative_to(mask_dir))
-    return [
-        render_masks(annotations["image"], masks),
-        gr.DownloadButton("Download Mask(s)", value="assets/masks.zip", visible=True),
-    ]
 with gr.Blocks(delete_cache=(30, 30)) as demo:
@@ -83,9 +70,8 @@ with gr.Blocks(delete_cache=(30, 30)) as demo:
         label="Draw a bounding box",
     )
     btn = gr.Button("Get Segmentation Mask(s)")
-    download_btn = gr.DownloadButton(
-        "Download Mask(s)", value="assets/masks.zip", visible=False
     )
-    btn.click(fn=predict, inputs=[model, annotator], outputs=[gr.Plot(), download_btn])
 demo.launch()

 from typing import Any, Dict, List
 import cv2
 import numpy as np
 import torch
 from gradio_image_annotation import image_annotator
+from sam2 import load_model
 from sam2.sam2_image_predictor import SAM2ImagePredictor
+from src.plot_utils import export_mask
 def predict(model_choice, annotations: Dict[str, Any]):
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    sam2_model = load_model(
+        variant=model_choice,
+        ckpt_path=f"assets/checkpoints/sam2_hiera_{model_choice}.pt",
+        device=device,
+    )
+    predictor = SAM2ImagePredictor(sam2_model)  # type:ignore
     predictor.set_image(annotations["image"])
     coordinates = []
     for i in range(len(annotations["boxes"])):
         box=np.array(coordinates),
         multimask_output=False,
     )
+    if masks.shape[0] == 1:
+        # handle single mask cases
+        masks = np.expand_dims(masks, axis=0)
+    return export_mask(masks)
 with gr.Blocks(delete_cache=(30, 30)) as demo:
         label="Draw a bounding box",
     )
     btn = gr.Button("Get Segmentation Mask(s)")
+    btn.click(
+        fn=predict, inputs=[model, annotator], outputs=[gr.Image(label="Mask(s)")]
     )
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 gradio
 gradio_image_annotation
 opencv-python
-samv2
 spaces

+git+https://github.com/SauravMaheshkar/samv2.git
 gradio
 gradio_image_annotation
 opencv-python
 spaces

src/plot_utils.py CHANGED Viewed

@@ -1,50 +1,68 @@
 from typing import Optional
-import matplotlib.pyplot as plt
 import numpy as np
-from matplotlib.pyplot import Figure
-def render_masks(
-    image,
-    masks,
     random_color: Optional[bool] = True,
     smoothen_contours: Optional[bool] = True,
-) -> "Figure":
-    h, w = image.shape[:2]
-    fig, ax = plt.subplots(figsize=(w / 100, h / 100), dpi=100)
-    ax.axis("off")
-    ax.imshow(image)
-    for mask in masks:
-        if random_color:
-            color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
-        else:
-            color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6])
         mask = mask.astype(np.uint8)
-        mask = mask.reshape(h, w)
-        mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-        if smoothen_contours:
-            import cv2
             contours, _ = cv2.findContours(
-                mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
             )
             contours = [
                 cv2.approxPolyDP(contour, epsilon=0.01, closed=True)
                 for contour in contours
             ]
-            mask_image = cv2.drawContours(
-                mask_image, contours, -1, (1, 1, 1, 0.5), thickness=2
             )
-        ax.imshow(mask_image, alpha=0.6)
-    # Make image occupy the whole figure
-    ax.set_xlim(0, w)
-    ax.set_ylim(h, 0)
-    plt.subplots_adjust(left=0, right=1, top=1, bottom=0)
-    return fig

 from typing import Optional
 import numpy as np
+from PIL import Image
+def export_mask(
+    masks: np.ndarray,
     random_color: Optional[bool] = True,
     smoothen_contours: Optional[bool] = True,
+) -> Image:
+    num_masks, _, h, w = masks.shape
+    num_masks = len(masks)
+    # Ensure masks are 2D by squeezing channel dimension
+    masks = masks.squeeze(axis=1)
+    # Create a single uint8 image with unique values for each mask
+    combined_mask = np.zeros((h, w), dtype=np.uint8)
+    for i in range(num_masks):
+        mask = masks[i]
         mask = mask.astype(np.uint8)
+        combined_mask[mask > 0] = i + 1
+    # Create color map for visualization
+    if random_color:
+        colors = np.random.rand(num_masks, 3)  # Random colors for each mask
+    else:
+        colors = np.array(
+            [[30 / 255, 144 / 255, 255 / 255]] * num_masks
+        )  # Use fixed color
+    # Create an RGB image where each mask has its own color
+    color_image = np.zeros((h, w, 3), dtype=np.uint8)
+    for i in range(1, num_masks + 1):
+        mask_color = colors[i - 1] * 255
+        color_image[combined_mask == i] = mask_color
+    # Convert the NumPy array to a PIL Image
+    pil_image = Image.fromarray(color_image)
+    # Optional: Add contours to the mask image
+    if smoothen_contours:
+        import cv2
+        contours_image = np.zeros((h, w, 4), dtype=np.float32)
+        for i in range(1, num_masks + 1):
+            mask = (combined_mask == i).astype(np.uint8)
             contours, _ = cv2.findContours(
+                mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
             )
             contours = [
                 cv2.approxPolyDP(contour, epsilon=0.01, closed=True)
                 for contour in contours
             ]
+            contours_image = cv2.drawContours(
+                contours_image, contours, -1, (0, 0, 0, 0.5), thickness=2
             )
+        # Convert contours to PIL image and blend with the color image
+        contours_image = (contours_image[:, :, :3] * 255).astype(np.uint8)
+        contours_pil_image = Image.fromarray(contours_image)
+        pil_image = Image.blend(pil_image, contours_pil_image, alpha=0.6)
+    return pil_image