Spaces:

curt-park
/

segment-anything-with-clip

Runtime error

App Files Files Community

curt-park commited on Apr 11, 2023

Commit

8760721

•

1 Parent(s): ce58c9d

Use weights in cache dir

Browse files

Files changed (3) hide show

ViT-B-32.pt +0 -3
app.py +13 -5
sam_vit_h_4b8939.pth +0 -3

ViT-B-32.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af
-size 353976522

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from functools import lru_cache
 from random import randint
 from typing import Any, Callable, Dict, List, Tuple
@@ -11,7 +12,10 @@ import PIL
 import torch
 from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
-CHECKPOINT_PATH = "sam_vit_h_4b8939.pth"
 MODEL_TYPE = "default"
 MAX_WIDTH = MAX_HEIGHT = 800
 THRESHOLD = 0.05
@@ -20,6 +24,11 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 @lru_cache
 def load_mask_generator() -> SamAutomaticMaskGenerator:
     sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device)
     mask_generator = SamAutomaticMaskGenerator(sam)
     return mask_generator
@@ -27,10 +36,9 @@ def load_mask_generator() -> SamAutomaticMaskGenerator:
 @lru_cache
 def load_clip(
-    name: str = "ViT-B-32.pt",
 ) -> Tuple[torch.nn.Module, Callable[[PIL.Image.Image], torch.Tensor]]:
-    model_path = os.path.join(".", name)
-    model, preprocess = clip.load(model_path, device=device)
     return model.to(device), preprocess
@@ -63,7 +71,7 @@ def get_scores(crops: List[PIL.Image.Image], query: str) -> torch.Tensor:
 def crop_image(image: np.ndarray, mask: Dict[str, Any]) -> PIL.Image.Image:
     x, y, w, h = mask["bbox"]
     masked = image * np.expand_dims(mask["segmentation"], -1)
-    crop = masked[y : y + h, x : x + w]
     if h > w:
         top, bottom, left, right = 0, 0, (h - w) // 2, (h - w) // 2
     else:

 import os
+import urllib
 from functools import lru_cache
 from random import randint
 from typing import Any, Callable, Dict, List, Tuple
 import torch
 from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+CHECKPOINT_PATH = os.path.join(os.path.expanduser("~"), ".cache", "SAM")
+CHECKPOINT_NAME = "sam_vit_h_4b8939.pth"
+CHECKPOINT_URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth"
 MODEL_TYPE = "default"
 MAX_WIDTH = MAX_HEIGHT = 800
 THRESHOLD = 0.05
 @lru_cache
 def load_mask_generator() -> SamAutomaticMaskGenerator:
+    if not os.path.exists(CHECKPOINT_PATH):
+        os.makedirs(CHECKPOINT_PATH)
+    checkpoint = os.path.join(CHECKPOINT_PATH, CHECKPOINT_NAME)
+    if not os.path.exists(checkpoint):
+        urllib.request.urlretrieve(CHECKPOINT_URL, checkpoint)
     sam = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT_PATH).to(device)
     mask_generator = SamAutomaticMaskGenerator(sam)
     return mask_generator
 @lru_cache
 def load_clip(
+    name: str = "ViT-B/32",
 ) -> Tuple[torch.nn.Module, Callable[[PIL.Image.Image], torch.Tensor]]:
+    model, preprocess = clip.load(name, device=device)
     return model.to(device), preprocess
 def crop_image(image: np.ndarray, mask: Dict[str, Any]) -> PIL.Image.Image:
     x, y, w, h = mask["bbox"]
     masked = image * np.expand_dims(mask["segmentation"], -1)
+    crop = masked[y: y + h, x: x + w]
     if h > w:
         top, bottom, left, right = 0, 0, (h - w) // 2, (h - w) // 2
     else:

sam_vit_h_4b8939.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e
-size 2564550879