Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,372 Bytes
576e22a 2fbf361 576e22a 488d99e 2fbf361 08430c8 2fbf361 488d99e 2fbf361 576e22a 488d99e 576e22a 5197257 576e22a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from typing import Any
import numpy as np
import supervision as sv
import torch
from PIL import Image
from sam2.build_sam import build_sam2, build_sam2_video_predictor
from sam2.sam2_image_predictor import SAM2ImagePredictor
# SAM_CHECKPOINT = "checkpoints/sam2_hiera_small.pt"
# SAM_CONFIG = "sam2_hiera_s.yaml"
SAM_CHECKPOINT = "checkpoints/sam2_hiera_large.pt"
SAM_CONFIG = "sam2_hiera_l.yaml"
def load_sam_image_model(
device: torch.device,
config: str = SAM_CONFIG,
checkpoint: str = SAM_CHECKPOINT
) -> SAM2ImagePredictor:
model = build_sam2(config, checkpoint, device=device)
return SAM2ImagePredictor(sam_model=model)
def load_sam_video_model(
device: torch.device,
config: str = SAM_CONFIG,
checkpoint: str = SAM_CHECKPOINT
) -> Any:
return build_sam2_video_predictor(config, checkpoint, device=device)
def run_sam_inference(
model: Any,
image: Image,
detections: sv.Detections
) -> sv.Detections:
image = np.array(image.convert("RGB"))
model.set_image(image)
# from left to right
bboxes = detections.xyxy
bboxes = sorted(bboxes, key=lambda bbox: bbox[0])
mask, score, _ = model.predict(box=bboxes, multimask_output=False)
# dirty fix; remove this later
if len(mask.shape) == 4:
mask = np.squeeze(mask)
detections.mask = mask.astype(bool)
return detections
|