File size: 3,145 Bytes

import os
from functools import partial
from typing import List, Union

import numpy as np
import numpy.typing as npt
import torch

import plasma.huggingface as hf

from .module.yolov5.yolo_utils.datasets import letterbox
from .module.yolov5.yolo_utils.general import non_max_suppression, scale_coords
from .preprocess import create_batch, process_image
from .utils import (
    DETECTOR_WEIGHT_ID,
    check_image_shape,
    load_yolo_model,
)


class StampDetector:
    def __init__(
        self, model_path: Union[str, None] = None, device: str = "cpu", conf_thres: float = 0.5, iou_thres: float = 0.3
    ) -> None:
        """Create an object for stamp detection"""
        # assert device == "cpu", "Currently only support cpu inference"
        
        checkpoint = hf.download_file(model_path)
        print(model_path)
        print(checkpoint)

        self.device = device
        self.model, self.stride = load_yolo_model(checkpoint, device="cpu")

        self.img_size = 640
        self.conf_thres = conf_thres
        self.iou_thres = iou_thres

        self.process_func_ = partial(process_image, device="cpu")

    def __call__(self, image_list: Union[List[npt.NDArray], npt.NDArray]) -> List[npt.NDArray]:
        """Returns a list of bounding boxes [xmin, ymin, xmax, ymax] for each image in image_list
        Each element in the list is a numpy array of shape N x 4

        Args:
            image_list (Union[List[npt.NDArray], npt.NDArray]): input images

        Returns:
            [List[np.ndarray]]: output bounding boxes
        """

        if not isinstance(image_list, (np.ndarray, list)):
            raise TypeError("Invalid Type: Input must be of type list or np.ndarray")

        if len(image_list) > 0:
            check_image_shape(image_list[0])
        else:
            return []
        return self.__detect(image_list)  # type: ignore

    def __detect(self, image_list):  # type: ignore
        """
        Use __call__ method
        """
        batches, indices = create_batch(image_list, set(list(x.shape for x in image_list)))
        predictions = []

        for origin_images in batches:
            images = [letterbox(x, 640, stride=32)[0] for x in origin_images]  # type: ignore
            images = list(map(self.process_func_, images))
            tensor = torch.stack(images).half()

            with torch.no_grad():
                pred = self.model(tensor)[0]
            all_boxes = []
            pred = non_max_suppression(pred, 0.3, 0.30, classes=0, agnostic=1)  # type: ignore

            for idx, det in enumerate(pred):
                if len(det):
                    det[:, :4] = scale_coords(images[idx].shape[1:], det[:, :4], origin_images[0].shape)  # type: ignore
                    det = det[:, :4].round()
                    all_boxes.append(det.cpu().numpy().astype("int").tolist())
                else:
                    all_boxes.append([])

            predictions.extend(all_boxes)

        z = zip(predictions, indices)
        sorted_result = sorted(z, key=lambda x: x[1])
        predictions, _ = zip(*sorted_result)

        return list(predictions)