MedCat
/

table_detection_3

Model card Files Files and versions Community

nguyenp99 commited on Apr 8

Commit

45099b6

•

1 Parent(s): 66655aa

Upload 17 files

Browse files

Files changed (17) hide show

stamp_processing/__init__.py +11 -0
stamp_processing/callback.py +41 -0
stamp_processing/detector.py +92 -0
stamp_processing/module/unet.py +209 -0
stamp_processing/module/yolov5/__init__.py +4 -0
stamp_processing/module/yolov5/hubconf.py +47 -0
stamp_processing/module/yolov5/models/common.py +293 -0
stamp_processing/module/yolov5/models/experimental.py +136 -0
stamp_processing/module/yolov5/models/yolo.py +264 -0
stamp_processing/module/yolov5/yolo/utils/__init__.py +4 -0
stamp_processing/module/yolov5/yolo/utils/autoanchor.py +10 -0
stamp_processing/module/yolov5/yolo/utils/datasets.py +43 -0
stamp_processing/module/yolov5/yolo/utils/general.py +181 -0
stamp_processing/module/yolov5/yolo/utils/utils.py +82 -0
stamp_processing/preprocess.py +72 -0
stamp_processing/remover.py +94 -0
stamp_processing/utils.py +60 -0

stamp_processing/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+    isort: skip_file
+"""
+from pkg_resources import DistributionNotFound, get_distribution
+__version__ = None
+try:
+    __version__ = get_distribution("table_reconstruction").version
+except DistributionNotFound:
+    __version__ == "0.0.0"  # package is not installed
+    pass

stamp_processing/callback.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+def iou(table_box, stamp_box):
+    # table_box = [x1, y1, x2, y2]
+    # stamp_box = [x1, y1, x2, y2]
+    x1 = max(table_box[0], stamp_box[0])
+    y1 = max(table_box[1], stamp_box[1])
+    x2 = min(table_box[2], stamp_box[2])
+    y2 = min(table_box[3], stamp_box[3])
+    intersection = max(0, x2 - x1) * max(0, y2 - y1)
+    union = (table_box[2] - table_box[0]) * (table_box[3] - table_box[1]) + (stamp_box[2] - stamp_box[0]) * (stamp_box[3] - stamp_box[1]) - intersection
+    return intersection / union
+def remove_potiential_table_fp(stamp_detector, image, table_preds, iou_threshold=0.6):
+    stamps = stamp_detector([image])[0]
+    remove_idc = []
+    for stamp in stamps:
+        for i, table in enumerate(table_preds):
+            if iou(table, stamp) >= iou_threshold:
+                remove_idc.append(i)
+    return remove_idc
+def torch_delete_by_idc(tensor, indices):
+    mask = torch.ones(tensor.shape[0], dtype=torch.bool)
+    if len(indices) == 0:
+        return tensor
+    else:
+        mask[indices] = False
+        return tensor[mask, :]
+def remove_box_by_idc(boxes, indices):
+    item = getattr(boxes, 'data')
+    setattr(boxes, 'data', torch_delete_by_idc(item, indices))
+    return boxes

stamp_processing/detector.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+from functools import partial
+from typing import List, Union
+import numpy as np
+import numpy.typing as npt
+import torch
+import plasma.huggingface as hf
+from .module.yolov5.yolo_utils.datasets import letterbox
+from .module.yolov5.yolo_utils.general import non_max_suppression, scale_coords
+from .preprocess import create_batch, process_image
+from .utils import (
+    DETECTOR_WEIGHT_ID,
+    check_image_shape,
+    load_yolo_model,
+)
+class StampDetector:
+    def __init__(
+        self, model_path: Union[str, None] = None, device: str = "cpu", conf_thres: float = 0.5, iou_thres: float = 0.3
+    ) -> None:
+        """Create an object for stamp detection"""
+        # assert device == "cpu", "Currently only support cpu inference"
+        checkpoint = hf.download_file(model_path)
+        print(model_path)
+        print(checkpoint)
+        self.device = device
+        self.model, self.stride = load_yolo_model(checkpoint, device=device)
+        self.img_size = 640
+        self.conf_thres = conf_thres
+        self.iou_thres = iou_thres
+        self.process_func_ = partial(process_image, device=self.device)
+    def __call__(self, image_list: Union[List[npt.NDArray], npt.NDArray]) -> List[npt.NDArray]:
+        """Returns a list of bounding boxes [xmin, ymin, xmax, ymax] for each image in image_list
+        Each element in the list is a numpy array of shape N x 4
+        Args:
+            image_list (Union[List[npt.NDArray], npt.NDArray]): input images
+        Returns:
+            [List[np.ndarray]]: output bounding boxes
+        """
+        if not isinstance(image_list, (np.ndarray, list)):
+            raise TypeError("Invalid Type: Input must be of type list or np.ndarray")
+        if len(image_list) > 0:
+            check_image_shape(image_list[0])
+        else:
+            return []
+        return self.__detect(image_list)  # type: ignore
+    def __detect(self, image_list):  # type: ignore
+        """
+        Use __call__ method
+        """
+        batches, indices = create_batch(image_list, set(list(x.shape for x in image_list)))
+        predictions = []
+        for origin_images in batches:
+            images = [letterbox(x, 640, stride=32)[0] for x in origin_images]  # type: ignore
+            images = list(map(self.process_func_, images))
+            tensor = torch.stack(images).half()
+            with torch.no_grad():
+                pred = self.model(tensor)[0]
+            all_boxes = []
+            pred = non_max_suppression(pred, 0.3, 0.30, classes=0, agnostic=1)  # type: ignore
+            for idx, det in enumerate(pred):
+                if len(det):
+                    det[:, :4] = scale_coords(images[idx].shape[1:], det[:, :4], origin_images[0].shape)  # type: ignore
+                    det = det[:, :4].round()
+                    all_boxes.append(det.cpu().numpy().astype("int").tolist())
+                else:
+                    all_boxes.append([])
+            predictions.extend(all_boxes)
+        z = zip(predictions, indices)
+        sorted_result = sorted(z, key=lambda x: x[1])
+        predictions, _ = zip(*sorted_result)
+        return list(predictions)

stamp_processing/module/unet.py ADDED Viewed

	@@ -0,0 +1,209 @@

+from typing import List
+import numpy as np
+import torch
+from fastai.vision.all import *
+# from backend.StampRemoval.util import *
+__all__ = ["CustomUnetBlock", "CustomDynamicUnet", "UnetInference"]
+class CustomUnetBlock(Module):
+    """A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."""
+    @delegates(ConvLayer.__init__)
+    def __init__(
+        self,
+        up_in_c,
+        x_in_c,
+        hook,
+        final_div=True,
+        blur=False,
+        act_cls=defaults.activation,
+        self_attention=False,
+        init=nn.init.kaiming_normal_,
+        norm_type=None,
+        **kwargs,
+    ):
+        self.hook = hook
+        self.shuf = PixelShuffle_ICNR(up_in_c, up_in_c // 2, blur=blur, act_cls=act_cls, norm_type=norm_type)
+        self.bn = BatchNorm(x_in_c)
+        ni = up_in_c // 2 + x_in_c
+        #         nf = ni if final_div else ni//2
+        nf = ni // 2 if final_div else ni // 4
+        self.conv1 = ConvLayer(ni, nf, act_cls=act_cls, norm_type=norm_type, **kwargs)
+        self.conv2 = ConvLayer(
+            nf,
+            nf,
+            act_cls=act_cls,
+            norm_type=norm_type,
+            xtra=SelfAttention(nf) if self_attention else None,
+            **kwargs,
+        )
+        self.relu = act_cls()
+        apply_init(nn.Sequential(self.conv1, self.conv2), init)
+    def forward(self, up_in):
+        s = self.hook.stored
+        up_out = self.shuf(up_in)
+        ssh = s.shape[-2:]
+        if ssh != up_out.shape[-2:]:
+            up_out = F.interpolate(up_out, s.shape[-2:], mode="nearest")
+        cat_x = self.relu(torch.cat([up_out, self.bn(s)], dim=1))
+        return self.conv2(self.conv1(cat_x))
+class CustomDynamicUnet(SequentialEx):
+    """Create a U-Net from a given architecture."""
+    def __init__(
+        self,
+        encoder,
+        n_out,
+        img_size,
+        blur=False,
+        blur_final=True,
+        self_attention=False,
+        y_range=None,
+        last_cross=True,
+        bottle=False,
+        act_cls=defaults.activation,
+        init=nn.init.kaiming_normal_,
+        norm_type=None,
+        **kwargs,
+    ):
+        imsize = img_size
+        sizes = model_sizes(encoder, size=imsize)
+        sz_chg_idxs = list(reversed(self._get_sz_change_idxs(sizes)))
+        self.sfs = hook_outputs([encoder[i] for i in sz_chg_idxs], detach=False)
+        x = dummy_eval(encoder, imsize).detach()
+        ni = sizes[-1][1]
+        middle_conv = nn.Sequential(
+            ConvLayer(ni, ni, act_cls=act_cls, norm_type=norm_type, **kwargs),
+            ConvLayer(ni, ni, act_cls=act_cls, norm_type=norm_type, **kwargs),
+        ).eval()
+        x = middle_conv(x)
+        layers = [encoder, BatchNorm(ni), nn.ReLU(), middle_conv]
+        for i, idx in enumerate(sz_chg_idxs):
+            not_final = i != len(sz_chg_idxs) - 1
+            up_in_c, x_in_c = int(x.shape[1]), int(sizes[idx][1])
+            do_blur = blur and (not_final or blur_final)
+            sa = self_attention and (i == len(sz_chg_idxs) - 3)
+            unet_block = CustomUnetBlock(
+                up_in_c,
+                x_in_c,
+                self.sfs[i],
+                final_div=not_final,
+                blur=do_blur,
+                self_attention=sa,
+                act_cls=act_cls,
+                init=init,
+                norm_type=norm_type,
+                **kwargs,
+            ).eval()
+            layers.append(unet_block)
+            x = unet_block(x)
+        ni = x.shape[1]
+        if imsize != sizes[0][-2:]:
+            layers.append(PixelShuffle_ICNR(ni, act_cls=act_cls, norm_type=norm_type))
+        layers.append(ResizeToOrig())
+        if last_cross:
+            layers.append(MergeLayer(dense=True))
+            ni += in_channels(encoder)
+            layers.append(
+                ResBlock(
+                    1,
+                    ni,
+                    ni // 2 if bottle else ni,
+                    act_cls=act_cls,
+                    norm_type=norm_type,
+                    **kwargs,
+                )
+            )
+        layers += [ConvLayer(ni, n_out, ks=1, act_cls=None, norm_type=norm_type, **kwargs)]
+        apply_init(nn.Sequential(layers[3], layers[-2]), init)
+        # apply_init(nn.Sequential(layers[2]), init)
+        if y_range is not None:
+            layers.append(SigmoidRange(*y_range))
+        super().__init__(*layers)
+    def _get_sz_change_idxs(self, sizes):
+        "Get the indexes of the layers where the size of the activation changes."
+        feature_szs = [size[-1] for size in sizes]
+        sz_chg_idxs = list(np.where(np.array(feature_szs[:-1]) != np.array(feature_szs[1:]))[0])
+        return sz_chg_idxs
+    def __del__(self):
+        if hasattr(self, "sfs"):
+            self.sfs.remove()
+class PerceptualLoss:
+    pass
+class UnetInference:
+    def __init__(self, model_path):
+        """Inference interface for unet model"""
+        self.learn = load_learner(model_path)
+        self.learn.model.eval()
+    def __call__(self, image_array: str, bs: int = 1) -> List[np.ndarray]:
+        """Perform forward pass and decode the prediction of Unet model
+        Args:
+            image_array (list): list of numpy array
+            bs (int, optional): [batch size]. Defaults to 1.
+        Returns:
+            [list]: list of numpy array
+        """
+        if len(image_array) < 1:
+            return []
+        batches = self.__build_batches(image_array, bs=bs)
+        outs = []
+        with torch.no_grad():
+            for b in batches:
+                outs.append(self.learn.model(b))
+                del b
+        pil_images = self.__decode_prediction(outs)
+        return pil_images
+    def __decode_prediction(self, preds):
+        out = []
+        i2f = IntToFloatTensor()
+        for pred in preds:
+            img_np = i2f.decodes(pred.squeeze()).numpy()
+            img_np = img_np.transpose(1, 2, 0)
+            img_np = img_np.astype(np.uint8)
+            out.append(img_np)
+            # out.append(Image.fromarray(img_np))
+            del img_np
+        return out
+    def __build_batches(self, image_array: list, bs=1):
+        "Builds batches to skip `DataLoader` overhead"
+        type_tfms = [PILImage.create]
+        item_tfms = [ToTensor()]
+        type_pipe = Pipeline(type_tfms)
+        item_pipe = Pipeline(item_tfms)
+        i2f = IntToFloatTensor()
+        batches = []
+        batch = []
+        k = 0
+        for i, im in enumerate(image_array):
+            batch.append(item_pipe(type_pipe(im)))
+            k += 1
+            if i == len(image_array) - 1 or k == bs:
+                # batches.append(torch.cat([norm(i2f(b.cuda())) for b in batch]))
+                batches.append(torch.stack([i2f(b.cpu()) for b in batch], axis=0))
+                batch = []
+                k = 0
+        return batches

stamp_processing/module/yolov5/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ from pathlib import Path
2	+
3	+
4	+ YOLO_DIR = Path(__file__).parent.absolute()

stamp_processing/module/yolov5/hubconf.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
+    """Creates a specified YOLOv5 model
+    Arguments:
+        name (str): name of model, i.e. 'yolov5s'
+        pretrained (bool): load pretrained weights into the model
+        channels (int): number of input channels
+        classes (int): number of model classes
+        autoshape (bool): apply YOLOv5 .autoshape() wrapper to model
+        verbose (bool): print all information to screen
+        device (str, torch.device, None): device to use for model parameters
+    Returns:
+        YOLOv5 pytorch model
+    """
+    from pathlib import Path
+    from models.experimental import attempt_load
+    from models.yolo import Model
+    from yolo_utils.torch_utils import select_device
+    file = Path(__file__).absolute()
+    save_dir = Path("") if str(name).endswith(".pt") else file.parent
+    path = (save_dir / name).with_suffix(".pt")  # checkpoint path
+    try:
+        device = select_device(("0" if torch.cuda.is_available() else "cpu") if device is None else device)
+        if pretrained and channels == 3 and classes == 80:
+            model = attempt_load(path, map_location=device)  # download/load FP32 model
+        else:
+            cfg = list((Path(__file__).parent / "models").rglob(f"{name}.yaml"))[0]  # model.yaml path
+            model = Model(cfg, channels, classes)  # create model
+        if autoshape:
+            model = model.autoshape()  # for file/URI/PIL/cv2/np inputs and NMS
+        return model.to(device)
+    except Exception as e:
+        help_url = "https://github.com/ultralytics/yolov5/issues/36"
+        s = "Cache may be out of date, try `force_reload=True`. See %s for help." % help_url
+        raise Exception(s) from e
+def custom(path="path/to/model.pt", autoshape=True, verbose=True, device=None):
+    # YOLOv5 custom or local model
+    return _create(path, autoshape=autoshape, verbose=verbose, device=device)

stamp_processing/module/yolov5/models/common.py ADDED Viewed

	@@ -0,0 +1,293 @@

+# YOLOv5 common modules
+import math
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.cuda import amp
+from yolo_utils import letterbox, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, time_synchronized
+def autopad(k, p=None):  # kernel, padding
+    # Pad to 'same'
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+def DWConv(c1, c2, k=1, s=1, act=True):
+    # Depthwise convolution
+    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+class Conv(nn.Module):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super(Bottleneck, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+class BottleneckCSP(nn.Module):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(BottleneckCSP, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+class C3(nn.Module):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+        # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+    def forward(self, x):
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
+class SPP(nn.Module):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13)):
+        super(SPP, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+    def forward(self, x):
+        x = self.cv1(x)
+        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+class Focus(nn.Module):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+        # self.contract = Contract(gain=2)
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        return self.conv(
+            torch.cat(
+                [
+                    x[..., ::2, ::2],
+                    x[..., 1::2, ::2],
+                    x[..., ::2, 1::2],
+                    x[..., 1::2, 1::2],
+                ],
+                1,
+            )
+        )
+        # return self.conv(self.contract(x))
+class Contract(nn.Module):
+    # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+    def forward(self, x):
+        (
+            N,
+            C,
+            H,
+            W,
+        ) = x.size()  # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
+        s = self.gain
+        x = x.view(N, C, H // s, s, W // s, s)  # x(1,64,40,2,40,2)
+        x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)
+        return x.view(N, C * s * s, H // s, W // s)  # x(1,256,40,40)
+class Expand(nn.Module):
+    # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
+    def __init__(self, gain=2):
+        super().__init__()
+        self.gain = gain
+    def forward(self, x):
+        N, C, H, W = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain'
+        s = self.gain
+        x = x.view(N, s, s, C // s ** 2, H, W)  # x(1,2,2,16,80,80)
+        x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)
+        return x.view(N, C // s ** 2, H * s, W * s)  # x(1,16,160,160)
+class Concat(nn.Module):
+    # Concatenate a list of tensors along dimension
+    def __init__(self, dimension=1):
+        super(Concat, self).__init__()
+        self.d = dimension
+    def forward(self, x):
+        return torch.cat(x, self.d)
+class NMS(nn.Module):
+    # Non-Maximum Suppression (NMS) module
+    conf = 0.25  # confidence threshold
+    iou = 0.45  # IoU threshold
+    classes = None  # (optional list) filter by class
+    def __init__(self):
+        super(NMS, self).__init__()
+    def forward(self, x):
+        return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
+class autoShape(nn.Module):
+    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    conf = 0.25  # NMS confidence threshold
+    iou = 0.45  # NMS IoU threshold
+    classes = None  # (optional list) filter by class
+    def __init__(self, model):
+        super(autoShape, self).__init__()
+        self.model = model.eval()
+    def autoshape(self):
+        print("autoShape already enabled, skipping... ")  # model already converted to model.autoshape()
+        return self
+    @torch.no_grad()
+    def forward(self, imgs, size=640, augment=False, profile=False):
+        # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
+        #   filename:   imgs = 'data/samples/zidane.jpg'
+        #   URI:             = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
+        #   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)
+        #   PIL:             = Image.open('image.jpg')  # HWC x(640,1280,3)
+        #   numpy:           = np.zeros((640,1280,3))  # HWC
+        #   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)
+        #   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+        t = [time_synchronized()]
+        p = next(self.model.parameters())  # for device and type
+        if isinstance(imgs, torch.Tensor):  # torch
+            with amp.autocast(enabled=p.device.type != "cpu"):
+                return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
+        # Pre-process
+        n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs])  # number of images, list of images
+        shape0, shape1, files = [], [], []  # image and inference shapes, filenames
+        for i, im in enumerate(imgs):
+            f = f"image{i}"  # filename
+            # if isinstance(im, str):  # filename or uri
+            #     im, f = (
+            #         np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith("http") else im)),
+            #         im,
+            #     )
+            # if isinstance(im, Image.Image):  # PIL Image
+            #     im, f = np.asarray(im), getattr(im, "filename", f) or f
+            files.append(Path(f).with_suffix(".jpg").name)
+            if im.shape[0] < 5:  # image in CHW
+                im = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)
+            im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3)  # enforce 3ch input
+            s = im.shape[:2]  # HWC
+            shape0.append(s)  # image shape
+            g = size / max(s)  # gain
+            shape1.append([y * g for y in s])
+            imgs[i] = im  # update
+        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
+        x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
+        x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
+        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.0  # uint8 to fp16/32
+        t.append(time_synchronized())
+        with amp.autocast(enabled=p.device.type != "cpu"):
+            # Inference
+            y = self.model(x, augment, profile)[0]  # forward
+            t.append(time_synchronized())
+            # Post-process
+            y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
+            for i in range(n):
+                scale_coords(shape1, y[i][:, :4], shape0[i])
+            t.append(time_synchronized())
+            return Detections(imgs, y, files, t, self.names, x.shape)
+class Detections:
+    # detections class for YOLOv5 inference results
+    def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
+        super(Detections, self).__init__()
+        d = pred[0].device  # device
+        gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1.0, 1.0], device=d) for im in imgs]  # normalizations
+        self.imgs = imgs  # list of images as numpy arrays
+        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
+        self.names = names  # class names
+        self.files = files  # image filenames
+        self.xyxy = pred  # xyxy pixels
+        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
+        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
+        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
+        self.n = len(self.pred)  # number of images (batch size)
+        self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3))  # timestamps (ms)
+        self.s = shape  # inference BCHW shape
+    def tolist(self):
+        # return a list of Detections objects, i.e. 'for result in results.tolist():'
+        x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
+        for d in x:
+            for k in ["imgs", "pred", "xyxy", "xyxyn", "xywh", "xywhn"]:
+                setattr(d, k, getattr(d, k)[0])  # pop out of list
+        return x
+    def __len__(self):
+        return self.n
+class Classify(nn.Module):
+    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Classify, self).__init__()
+        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g)  # to x(b,c2,1,1)
+        self.flat = nn.Flatten()
+    def forward(self, x):
+        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
+        return self.flat(self.conv(z))  # flatten to x(b,c2)

stamp_processing/module/yolov5/models/experimental.py ADDED Viewed

	@@ -0,0 +1,136 @@

+# YOLOv5 experimental modules
+import numpy as np
+import torch
+import torch.nn as nn
+from .common import Conv, DWConv
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super(CrossConv, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+class Sum(nn.Module):
+    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+    def __init__(self, n, weight=False):  # n: number of inputs
+        super(Sum, self).__init__()
+        self.weight = weight  # apply weights boolean
+        self.iter = range(n - 1)  # iter object
+        if weight:
+            self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
+    def forward(self, x):
+        y = x[0]  # no weight
+        if self.weight:
+            w = torch.sigmoid(self.w) * 2
+            for i in self.iter:
+                y = y + x[i + 1] * w[i]
+        else:
+            for i in self.iter:
+                y = y + x[i + 1]
+        return y
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super(GhostConv, self).__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, None, g, act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat([y, self.cv2(y)], 1)
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
+        super(GhostBottleneck, self).__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(
+            GhostConv(c1, c_, 1, 1),  # pw
+            DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+            GhostConv(c_, c2, 1, 1, act=False),
+        )  # pw-linear
+        self.shortcut = (
+            nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
+        )
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+class MixConv2d(nn.Module):
+    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        super(MixConv2d, self).__init__()
+        groups = len(k)
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, groups - 1e-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+    def forward(self, x):
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+class Ensemble(nn.ModuleList):
+    # Ensemble of models
+    def __init__(self):
+        super(Ensemble, self).__init__()
+    def forward(self, x, augment=False):
+        y = []
+        for module in self:
+            y.append(module(x, augment)[0])
+        # y = torch.stack(y).max(0)[0]  # max ensemble
+        # y = torch.stack(y).mean(0)  # mean ensemble
+        y = torch.cat(y, 1)  # nms ensemble
+        return y, None  # inference, train output
+def attempt_load(weights, map_location=None):
+    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        # attempt_download(w)
+        ckpt = torch.load(w, map_location=map_location)  # load
+        model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().fuse().eval())  # FP32 model
+    # Compatibility updates
+    for m in model.modules():
+        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+            m.inplace = True  # pytorch 1.7.0 compatibility
+        elif type(m) is Conv:
+            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+    if len(model) == 1:
+        return model[-1]  # return model
+    else:
+        print("Ensemble created with %s\n" % weights)
+        for k in ["names", "stride"]:
+            setattr(model, k, getattr(model[-1], k))
+        return model  # return ensemble

stamp_processing/module/yolov5/models/yolo.py ADDED Viewed

	@@ -0,0 +1,264 @@

+# YOLOv5 YOLO-specific modules
+import logging
+import sys
+from copy import deepcopy
+logger = logging.getLogger(__name__)
+from .common import *
+from .experimental import *
+import os
+from yolo_utils import check_anchor_order, make_divisible, copy_attr, fuse_conv_and_bn, initialize_weights, scale_img
+class Detect(nn.Module):
+    stride = None  # strides computed during build
+    export = False  # onnx export
+    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
+        super(Detect, self).__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.zeros(1)] * self.nl  # init grid
+        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+        self.register_buffer("anchors", a)  # shape(nl,na,2)
+        self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+    def forward(self, x):
+        # x = x.copy()  # for profiling
+        z = []  # inference output
+        self.training |= self.export
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            if not self.training:  # inference
+                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+                y = x[i].sigmoid()
+                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z.append(y.view(bs, -1, self.no))
+        return x if self.training else (torch.cat(z, 1), x)
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+class Model(nn.Module):
+    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):  # model, input channels, number of classes
+        super(Model, self).__init__()
+        if isinstance(cfg, dict):
+            self.yaml = cfg  # model dict
+        else:  # is *.yaml
+            import yaml  # for torch hub
+            self.yaml_file = Path(cfg).name
+            with open(cfg) as f:
+                self.yaml = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
+        # Define model
+        ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
+        if nc and nc != self.yaml["nc"]:
+            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            self.yaml["nc"] = nc  # override yaml value
+        if anchors:
+            logger.info(f"Overriding model.yaml anchors with anchors={anchors}")
+            self.yaml["anchors"] = round(anchors)  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
+        self.names = [str(i) for i in range(self.yaml["nc"])]  # default names
+        # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            s = 256  # 2x min stride
+            m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
+            m.anchors /= m.stride.view(-1, 1, 1)
+            check_anchor_order(m)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+            # print('Strides: %s' % m.stride.tolist())
+        # Init weights, biases
+        initialize_weights(self)
+        self.info()
+        logger.info("")
+    def forward(self, x, augment=False, profile=False):
+        if augment:
+            img_size = x.shape[-2:]  # height, width
+            s = [1, 0.83, 0.67]  # scales
+            f = [None, 3, None]  # flips (2-ud, 3-lr)
+            y = []  # outputs
+            for si, fi in zip(s, f):
+                xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+                yi = self.forward_once(xi)[0]  # forward
+                # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
+                yi[..., :4] /= si  # de-scale
+                if fi == 2:
+                    yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
+                elif fi == 3:
+                    yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
+                y.append(yi)
+            return torch.cat(y, 1), None  # augmented inference, train
+        else:
+            return self.forward_once(x, profile)  # single-scale inference, train
+    def forward_once(self, x, profile=False):
+        y, dt = [], []  # outputs
+        for m in self.model.modules():
+            if isinstance(m, nn.Upsample):
+                m.recompute_scale_factor = None
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+        if profile:
+            print("%.1fms total" % sum(dt))
+        return x
+    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  # from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+    def _print_biases(self):
+        m = self.model[-1]  # Detect() module
+        for mi in m.m:  # from
+            b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
+            print(("%6g Conv2d.bias:" + "%10.3g" * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+    # def _print_weights(self):
+    #     for m in self.model.modules():
+    #         if type(m) is Bottleneck:
+    #             print('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
+    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+        # print('Fusing layers... ')
+        for m in self.model.modules():
+            if type(m) is Conv and hasattr(m, "bn"):
+                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
+                delattr(m, "bn")  # remove batchnorm
+                m.forward = m.fuseforward  # update forward
+        # self.info()
+        return self
+    def nms(self, mode=True):  # add or remove NMS module
+        present = type(self.model[-1]) is NMS  # last layer is NMS
+        if mode and not present:
+            print("Adding NMS... ")
+            m = NMS()  # module
+            m.f = -1  # from
+            m.i = self.model[-1].i + 1  # index
+            self.model.add_module(name="%s" % m.i, module=m)  # add
+            self.eval()
+        elif not mode and present:
+            print("Removing NMS... ")
+            self.model = self.model[:-1]  # remove
+        return self
+    def autoshape(self):  # add autoShape module
+        print("Adding autoShape... ")
+        m = autoShape(self)  # wrap model
+        copy_attr(m, self, include=("yaml", "nc", "hyp", "names", "stride"), exclude=())  # copy attributes
+        return m
+    # def info(self, verbose=False, img_size=640):  # print model information
+    #     model_info(self, verbose, img_size)
+def parse_model(d, ch):  # model_dict, input_channels(3)
+    logger.info("\n%3s%18s%3s%10s  %-40s%-30s" % ("", "from", "n", "params", "module", "arguments"))
+    anchors, nc, gd, gw = (
+        d["anchors"],
+        d["nc"],
+        d["depth_multiple"],
+        d["width_multiple"],
+    )
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except Exception as e:
+                logger.error(e)
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [
+            Conv,
+            GhostConv,
+            Bottleneck,
+            GhostBottleneck,
+            SPP,
+            DWConv,
+            MixConv2d,
+            Focus,
+            CrossConv,
+            BottleneckCSP,
+            C3,
+        ]:
+            c1, c2 = ch[f], args[0]
+            if c2 != no:  # if not output
+                c2 = make_divisible(c2 * gw, 8)
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP, C3]:
+                args.insert(2, n)  # number of repeats
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum([ch[x] for x in f])
+        elif m is Detect:
+            args.append([ch[x] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+        elif m is Contract:
+            c2 = ch[f] * args[0] ** 2
+        elif m is Expand:
+            c2 = ch[f] // args[0] ** 2
+        else:
+            c2 = ch[f]
+        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace("__main__.", "")  # module type
+        np = sum([x.numel() for x in m_.parameters()])  # number params
+        m_.i, m_.f, m_.type, m_.np = (
+            i,
+            f,
+            t,
+            np,
+        )  # attach index, 'from' index, type, number params
+        logger.info("%3s%18s%3s%10.0f  %-40s%-30s" % (i, f, n, np, t, args))  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        if i == 0:
+            ch = []
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)

stamp_processing/module/yolov5/yolo/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .datasets import letterbox
+from .general import make_divisible, non_max_suppression, scale_coords, xyxy2xywh
+from .torch_utils import time_synchronized, copy_attr, fuse_conv_and_bn, initialize_weights, scale_img
+from .autoanchor import check_anchor_order

stamp_processing/module/yolov5/yolo/utils/autoanchor.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# Auto-anchor utils
+def check_anchor_order(m):
+    # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
+    a = m.anchor_grid.prod(-1).view(-1)  # anchor area
+    da = a[-1] - a[0]  # delta a
+    ds = m.stride[-1] - m.stride[0]  # delta s
+    if da.sign() != ds.sign():  # same order
+        print("Reversing anchor order")
+        m.anchors[:] = m.anchors.flip(0)
+        m.anchor_grid[:] = m.anchor_grid.flip(0)

stamp_processing/module/yolov5/yolo/utils/datasets.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import cv2
+import numpy as np
+def letterbox(
+    img,
+    new_shape=(640, 640),
+    color=(114, 114, 114),
+    auto=True,
+    scaleFill=False,
+    scaleup=True,
+    stride=32,
+):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = img.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    if auto:  # minimum rectangle
+        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
+    elif scaleFill:  # stretch
+        dw, dh = 0.0, 0.0
+        new_unpad = (new_shape[1], new_shape[0])
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return img, ratio, (dw, dh)

stamp_processing/module/yolov5/yolo/utils/general.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import math
+import time
+import numpy as np
+import torch
+import torchvision
+def box_iou(box1, box2):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+    def box_area(box):
+        # box = 4xn
+        return (box[2] - box[0]) * (box[3] - box[1])
+    area1 = box_area(box1.T)
+    area2 = box_area(box2.T)
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+def non_max_suppression(
+    prediction,
+    conf_thres=0.25,
+    iou_thres=0.45,
+    classes=None,
+    agnostic=False,
+    multi_label=False,
+    labels=(),
+):
+    """Runs Non-Maximum Suppression (NMS) on inference results
+    Returns:
+         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
+    """
+    nc = prediction.shape[2] - 5  # number of classes
+    xc = prediction[..., 4] > conf_thres  # candidates
+    # Settings
+    _, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_det = 300  # maximum number of detections per image
+    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
+    time_limit = 10.0  # seconds to quit after
+    redundant = True  # require redundant detections
+    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
+    merge = False  # use merge-NMS
+    t = time.time()
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+        # Cat apriori labels if autolabelling
+        if labels and len(labels[xi]):
+            label = labels[xi]
+            v = torch.zeros((len(label), nc + 5), device=x.device)
+            v[:, :4] = label[:, 1:5]  # box
+            v[:, 4] = 1.0  # conf
+            v[range(len(label)), label[:, 0].long() + 5] = 1.0  # cls
+            x = torch.cat((x, v), 0)
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+        # Compute conf
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+        else:  # best class only
+            conf, j = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+        # Filter by class
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+        # Apply finite constraint
+        # if not torch.isfinite(x).all():
+        #     x = x[torch.isfinite(x).all(1)]
+        # Check shape
+        n = x.shape[0]  # number of boxes
+        if not n:  # no boxes
+            continue
+        elif n > max_nms:  # excess boxes
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+        if merge and (1 < n < 3e3):  # Merge NMS (boxes merged using weighted mean)
+            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+            weights = iou * scores[None]  # box weights
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
+            if redundant:
+                i = i[iou.sum(1) > 1]  # require redundancy
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            print(f"WARNING: NMS time limit {time_limit}s exceeded")
+            break  # time limit exceeded
+    return output
+def clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[:, 0].clamp_(0, img_shape[1])  # x1
+    boxes[:, 1].clamp_(0, img_shape[0])  # y1
+    boxes[:, 2].clamp_(0, img_shape[1])  # x2
+    boxes[:, 3].clamp_(0, img_shape[0])  # y2
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (
+            (img1_shape[1] - img0_shape[1] * gain) / 2,
+            (img1_shape[0] - img0_shape[0] * gain) / 2,
+        )  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+    coords[:, [0, 2]] -= pad[0]  # x padding
+    coords[:, [1, 3]] -= pad[1]  # y padding
+    coords[:, :4] /= gain
+    clip_coords(coords, img0_shape)
+    return coords
+def make_divisible(x, divisor):
+    # Returns x evenly divisible by divisor
+    return math.ceil(x / divisor) * divisor
+def xyxy2xywh(x):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
+    y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
+    y[:, 2] = x[:, 2] - x[:, 0]  # width
+    y[:, 3] = x[:, 3] - x[:, 1]  # height
+    return y

stamp_processing/module/yolov5/yolo/utils/utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import math
+import time
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def select_device(device=""):
+    cpu = device.lower() == "cpu"
+    cuda = not cpu and torch.cuda.is_available()
+    return torch.device("cuda:0" if cuda else "cpu")
+def time_synchronized():
+    # pytorch-accurate time
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+    return time.time()
+def fuse_conv_and_bn(conv, bn):
+    # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
+    fusedconv = (
+        nn.Conv2d(
+            conv.in_channels,
+            conv.out_channels,
+            kernel_size=conv.kernel_size,
+            stride=conv.stride,
+            padding=conv.padding,
+            groups=conv.groups,
+            bias=True,
+        )
+        .requires_grad_(False)
+        .to(conv.weight.device)
+    )
+    # prepare filters
+    w_conv = conv.weight.clone().view(conv.out_channels, -1)
+    w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
+    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
+    # prepare spatial bias
+    b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
+    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
+    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+    return fusedconv
+def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
+    # scales img(bs,3,y,x) by ratio constrained to gs-multiple
+    if ratio == 1.0:
+        return img
+    else:
+        h, w = img.shape[2:]
+        s = (int(h * ratio), int(w * ratio))  # new size
+        img = F.interpolate(img, size=s, mode="bilinear", align_corners=False)  # resize
+        if not same_shape:  # pad/crop img
+            h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
+        return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
+def initialize_weights(model):
+    for m in model.modules():
+        t = type(m)
+        if t is nn.Conv2d:
+            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        elif t is nn.BatchNorm2d:
+            m.eps = 1e-3
+            m.momentum = 0.03
+        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+            m.inplace = True
+def copy_attr(a, b, include=(), exclude=()):
+    # Copy attributes from b to a, options to only include [...] and to exclude [...]
+    for k, v in b.__dict__.items():
+        if (len(include) and k not in include) or k.startswith("_") or k in exclude:
+            continue
+        else:
+            setattr(a, k, v)

stamp_processing/preprocess.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from typing import List, Set, Tuple
+import cv2
+import numpy as np
+import numpy.typing as npt
+import torch
+def create_batch(
+    images: npt.NDArray, shapes: Set[Tuple[int, int]], batch_size: int = 16
+) -> Tuple[List[List[npt.NDArray]], List[int]]:
+    """
+    - Input:
+        +) images: List images
+        +) shapes: set of all shapes of input images
+        +) batch_size: number image in one batch
+    - Output:
+        +) images_batch: batch of images for inference
+        +) indices: order of all input images
+    """
+    split_batch = []
+    images_batch = []
+    for shape in shapes:
+        mini_batch = []
+        images_mini_batch = []  # type: ignore
+        for idx, img in enumerate(images):
+            if img.shape == shape:
+                mini_batch.append(idx)
+                if len(images_mini_batch) < batch_size:
+                    images_mini_batch.append(img)
+                else:
+                    images_batch.append(images_mini_batch)
+                    images_mini_batch = []
+                    images_mini_batch.append(img)
+        images_batch.append(images_mini_batch)
+        split_batch.append(mini_batch)
+    del images_mini_batch
+    indices = [item for sublist in split_batch for item in sublist]
+    return images_batch, indices
+def process_image(img: npt.NDArray, device: str = "cpu") -> npt.NDArray:
+    """Preprocess function for yolov5
+    Args:
+        img (npt.NDArray): Input image
+        device (str, optional): torch device. Defaults to "cpu".
+    Returns:
+        npt.NDArray: preprocessed image
+    """
+    height, width = img.shape[:2]
+    top = (640 - height) // 2
+    bottom = 640 - height - top
+    left = (640 - width) // 2
+    right = 640 - width - left
+    img = cv2.copyMakeBorder(
+        img,
+        top,
+        bottom,
+        left,
+        right,
+        cv2.BORDER_CONSTANT,
+        value=(255, 255, 255),
+    )
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+    img = np.ascontiguousarray(img)
+    img = torch.from_numpy(img).to(device)
+    img = img.float()  # uint8 to fp16/32
+    img /= 255.0
+    return img

stamp_processing/remover.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+from typing import List, Union
+import numpy as np
+import numpy.typing as npt
+from .detector import StampDetector
+from .module.unet import *
+from .preprocess import create_batch
+from .utils import REMOVER_WEIGHT_ID, check_image_shape, download_weight, logger
+class StampRemover:
+    def __init__(
+        self, detection_weight: Union[str, None] = None, removal_weight: Union[str, None] = None, device: str = "cpu"
+    ):
+        """Create an object to remove stamps from document images"""
+        # assert device == "cpu", "Currently only support cpu inference"
+        if removal_weight is None:
+            if not os.path.exists("tmp/"):
+                os.makedirs("tmp/", exist_ok=True)
+            removal_weight = os.path.join("tmp", "stamp_remover.pkl")
+            logger.info("Downloading stamp remover weight from google drive")
+            download_weight(REMOVER_WEIGHT_ID, output=removal_weight)
+            logger.info(f"Finished downloading. Weight is saved at {removal_weight}")
+        try:
+            self.remover = UnetInference(removal_weight)  # type: ignore
+        except Exception as e:
+            logger.error(e)
+            logger.error("There is something wrong when loading remover weight")
+            logger.error(
+                "Please make sure you provide the correct path to the weight"
+                "or mannually download the weight at"
+                f"https://drive.google.com/file/d/{REMOVER_WEIGHT_ID}/view?usp=sharing"
+            )
+            raise FileNotFoundError()
+        self.detector = StampDetector(detection_weight, device=device)
+        self.padding = 3
+    def __call__(self, image_list: Union[List[npt.NDArray], npt.NDArray], batch_size: int = 16) -> List[npt.NDArray]:
+        """Detect and remove stamps from document images
+        Args:
+            image_list (Union[List[npt.NDArray], npt.NDArray]): list of input images
+            batch_size (int, optional): Defaults to 16.
+        Returns:
+            List[np.ndarray]: Input images with stamps removed
+        """
+        if not isinstance(image_list, (np.ndarray, list)):
+            raise TypeError("Invalid Type: Input must be of type list or np.ndarray")
+        if len(image_list) > 0:
+            check_image_shape(image_list[0])
+        else:
+            return []
+        return self.__batch_removing(image_list, batch_size)  # type:ignore
+    def __batch_removing(self, image_list, batch_size=16):  # type: ignore
+        new_pages = []
+        shapes = set(list(x.shape for x in image_list))
+        images_batch, indices = create_batch(image_list, shapes, batch_size)
+        # num_batch = len(image_list) // batch_size
+        detection_predictions = []
+        for batch in images_batch:
+            if len(batch):
+                detection_predictions.extend(self.detector(batch))
+        z = zip(detection_predictions, indices)
+        sorted_result = sorted(z, key=lambda x: x[1])
+        detection_predictions, _ = zip(*sorted_result)
+        for idx, page_boxes in enumerate(detection_predictions):
+            page_img = image_list[idx]
+            h, w, c = page_img.shape
+            for box in page_boxes:
+                x_min, y_min, x_max, y_max = box[:4]
+                stamp_area = page_img[
+                    max(y_min - self.padding, 0) : min(y_max + self.padding, h),
+                    max(x_min - self.padding, 0) : min(x_max + self.padding, w),
+                ]
+                stamp_area = self.remover([stamp_area])  # type:ignore
+                page_img[
+                    max(y_min - self.padding, 0) : min(y_max + self.padding, h),
+                    max(x_min - self.padding, 0) : min(x_max + self.padding, w),
+                    :,
+                ] = stamp_area[0]
+            new_pages.append(page_img)
+        return new_pages

stamp_processing/utils.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import logging
+from typing import Tuple, Union
+import gdown
+import numpy as np
+import numpy.typing as npt
+import torch
+from torch import device as torch_device
+from torch.nn import Module
+from .module.yolov5 import YOLO_DIR
+logging.basicConfig(format="%(levelname)s - %(message)s'")
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+DETECTOR_WEIGHT_ID = "1YHH7pLoZEdyxw2AoLz9G4lrq6uuxweYB"
+REMOVER_WEIGHT_ID = "1Hd79M8DhCwjFuT198R-QB7ozQbHRGcGM"
+def select_device(device: str = "") -> torch_device:
+    """Return a torch.device instance"""
+    cpu = device.lower() == "cpu"
+    cuda = not cpu and torch.cuda.is_available()
+    return torch_device("cuda:0" if cuda else "cpu")
+def load_yolo_model(weight_path: str, device: str) -> Tuple[Module, int]:
+    """Load yolov5 model from specified path using torch hub"""
+    model = torch.hub.load(str(YOLO_DIR), "custom", path=weight_path, source="local", force_reload=True)
+    print(weight_path)
+    # model = torch.load(weight_path, map_location=device)["model"]
+    # model.to(device)
+    return model, model.stride
+def download_weight(file_id: str, output: Union[str, None] = None, quiet: bool = False) -> None:
+    """Download model weight from Google Drive given the file ID"""
+    url = f"https://drive.google.com/uc?id={file_id}"
+    try:
+        gdown.cached_download(url=url, path=output, quiet=quiet)
+    except Exception as e:
+        logger.error(e)
+        logger.error("Something went wrong when downloading the weight")
+        logger.error(
+            "Check your internet connection or manually download the weight "
+            f"at https://drive.google.com/file/d/{file_id}/view?usp=sharing"
+        )
+def check_image_shape(image: npt.NDArray) -> None:
+    """Check if input image is valid"""
+    if not isinstance(image, np.ndarray):
+        raise TypeError("Invalid Type: List value must be of type np.ndarray")
+    else:
+        if len(image.shape) != 3:
+            raise ValueError("Invalid image shape")
+        if image.shape[-1] != 3:
+            raise ValueError("Image must be 3 dimensional")