nguyenp99 commited on
Commit
45099b6
1 Parent(s): 66655aa

Upload 17 files

Browse files
stamp_processing/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ isort: skip_file
3
+ """
4
+ from pkg_resources import DistributionNotFound, get_distribution
5
+
6
+ __version__ = None
7
+ try:
8
+ __version__ = get_distribution("table_reconstruction").version
9
+ except DistributionNotFound:
10
+ __version__ == "0.0.0" # package is not installed
11
+ pass
stamp_processing/callback.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ def iou(table_box, stamp_box):
5
+ # table_box = [x1, y1, x2, y2]
6
+ # stamp_box = [x1, y1, x2, y2]
7
+ x1 = max(table_box[0], stamp_box[0])
8
+ y1 = max(table_box[1], stamp_box[1])
9
+ x2 = min(table_box[2], stamp_box[2])
10
+ y2 = min(table_box[3], stamp_box[3])
11
+
12
+ intersection = max(0, x2 - x1) * max(0, y2 - y1)
13
+ union = (table_box[2] - table_box[0]) * (table_box[3] - table_box[1]) + (stamp_box[2] - stamp_box[0]) * (stamp_box[3] - stamp_box[1]) - intersection
14
+ return intersection / union
15
+
16
+
17
+ def remove_potiential_table_fp(stamp_detector, image, table_preds, iou_threshold=0.6):
18
+ stamps = stamp_detector([image])[0]
19
+ remove_idc = []
20
+ for stamp in stamps:
21
+ for i, table in enumerate(table_preds):
22
+ if iou(table, stamp) >= iou_threshold:
23
+ remove_idc.append(i)
24
+ return remove_idc
25
+
26
+
27
+ def torch_delete_by_idc(tensor, indices):
28
+ mask = torch.ones(tensor.shape[0], dtype=torch.bool)
29
+ if len(indices) == 0:
30
+ return tensor
31
+ else:
32
+ mask[indices] = False
33
+ return tensor[mask, :]
34
+
35
+
36
+ def remove_box_by_idc(boxes, indices):
37
+
38
+ item = getattr(boxes, 'data')
39
+ setattr(boxes, 'data', torch_delete_by_idc(item, indices))
40
+
41
+ return boxes
stamp_processing/detector.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from functools import partial
3
+ from typing import List, Union
4
+
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+ import torch
8
+
9
+ import plasma.huggingface as hf
10
+
11
+ from .module.yolov5.yolo_utils.datasets import letterbox
12
+ from .module.yolov5.yolo_utils.general import non_max_suppression, scale_coords
13
+ from .preprocess import create_batch, process_image
14
+ from .utils import (
15
+ DETECTOR_WEIGHT_ID,
16
+ check_image_shape,
17
+ load_yolo_model,
18
+ )
19
+
20
+
21
+ class StampDetector:
22
+ def __init__(
23
+ self, model_path: Union[str, None] = None, device: str = "cpu", conf_thres: float = 0.5, iou_thres: float = 0.3
24
+ ) -> None:
25
+ """Create an object for stamp detection"""
26
+ # assert device == "cpu", "Currently only support cpu inference"
27
+
28
+ checkpoint = hf.download_file(model_path)
29
+ print(model_path)
30
+ print(checkpoint)
31
+
32
+ self.device = device
33
+ self.model, self.stride = load_yolo_model(checkpoint, device=device)
34
+
35
+ self.img_size = 640
36
+ self.conf_thres = conf_thres
37
+ self.iou_thres = iou_thres
38
+
39
+ self.process_func_ = partial(process_image, device=self.device)
40
+
41
+ def __call__(self, image_list: Union[List[npt.NDArray], npt.NDArray]) -> List[npt.NDArray]:
42
+ """Returns a list of bounding boxes [xmin, ymin, xmax, ymax] for each image in image_list
43
+ Each element in the list is a numpy array of shape N x 4
44
+
45
+ Args:
46
+ image_list (Union[List[npt.NDArray], npt.NDArray]): input images
47
+
48
+ Returns:
49
+ [List[np.ndarray]]: output bounding boxes
50
+ """
51
+
52
+ if not isinstance(image_list, (np.ndarray, list)):
53
+ raise TypeError("Invalid Type: Input must be of type list or np.ndarray")
54
+
55
+ if len(image_list) > 0:
56
+ check_image_shape(image_list[0])
57
+ else:
58
+ return []
59
+ return self.__detect(image_list) # type: ignore
60
+
61
+ def __detect(self, image_list): # type: ignore
62
+ """
63
+ Use __call__ method
64
+ """
65
+ batches, indices = create_batch(image_list, set(list(x.shape for x in image_list)))
66
+ predictions = []
67
+
68
+ for origin_images in batches:
69
+ images = [letterbox(x, 640, stride=32)[0] for x in origin_images] # type: ignore
70
+ images = list(map(self.process_func_, images))
71
+ tensor = torch.stack(images).half()
72
+
73
+ with torch.no_grad():
74
+ pred = self.model(tensor)[0]
75
+ all_boxes = []
76
+ pred = non_max_suppression(pred, 0.3, 0.30, classes=0, agnostic=1) # type: ignore
77
+
78
+ for idx, det in enumerate(pred):
79
+ if len(det):
80
+ det[:, :4] = scale_coords(images[idx].shape[1:], det[:, :4], origin_images[0].shape) # type: ignore
81
+ det = det[:, :4].round()
82
+ all_boxes.append(det.cpu().numpy().astype("int").tolist())
83
+ else:
84
+ all_boxes.append([])
85
+
86
+ predictions.extend(all_boxes)
87
+
88
+ z = zip(predictions, indices)
89
+ sorted_result = sorted(z, key=lambda x: x[1])
90
+ predictions, _ = zip(*sorted_result)
91
+
92
+ return list(predictions)
stamp_processing/module/unet.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ import numpy as np
4
+ import torch
5
+ from fastai.vision.all import *
6
+
7
+
8
+ # from backend.StampRemoval.util import *
9
+ __all__ = ["CustomUnetBlock", "CustomDynamicUnet", "UnetInference"]
10
+
11
+
12
+ class CustomUnetBlock(Module):
13
+ """A quasi-UNet block, using `PixelShuffle_ICNR upsampling`."""
14
+
15
+ @delegates(ConvLayer.__init__)
16
+ def __init__(
17
+ self,
18
+ up_in_c,
19
+ x_in_c,
20
+ hook,
21
+ final_div=True,
22
+ blur=False,
23
+ act_cls=defaults.activation,
24
+ self_attention=False,
25
+ init=nn.init.kaiming_normal_,
26
+ norm_type=None,
27
+ **kwargs,
28
+ ):
29
+ self.hook = hook
30
+ self.shuf = PixelShuffle_ICNR(up_in_c, up_in_c // 2, blur=blur, act_cls=act_cls, norm_type=norm_type)
31
+ self.bn = BatchNorm(x_in_c)
32
+ ni = up_in_c // 2 + x_in_c
33
+ # nf = ni if final_div else ni//2
34
+ nf = ni // 2 if final_div else ni // 4
35
+ self.conv1 = ConvLayer(ni, nf, act_cls=act_cls, norm_type=norm_type, **kwargs)
36
+ self.conv2 = ConvLayer(
37
+ nf,
38
+ nf,
39
+ act_cls=act_cls,
40
+ norm_type=norm_type,
41
+ xtra=SelfAttention(nf) if self_attention else None,
42
+ **kwargs,
43
+ )
44
+ self.relu = act_cls()
45
+ apply_init(nn.Sequential(self.conv1, self.conv2), init)
46
+
47
+ def forward(self, up_in):
48
+ s = self.hook.stored
49
+ up_out = self.shuf(up_in)
50
+ ssh = s.shape[-2:]
51
+ if ssh != up_out.shape[-2:]:
52
+ up_out = F.interpolate(up_out, s.shape[-2:], mode="nearest")
53
+ cat_x = self.relu(torch.cat([up_out, self.bn(s)], dim=1))
54
+ return self.conv2(self.conv1(cat_x))
55
+
56
+
57
+ class CustomDynamicUnet(SequentialEx):
58
+ """Create a U-Net from a given architecture."""
59
+
60
+ def __init__(
61
+ self,
62
+ encoder,
63
+ n_out,
64
+ img_size,
65
+ blur=False,
66
+ blur_final=True,
67
+ self_attention=False,
68
+ y_range=None,
69
+ last_cross=True,
70
+ bottle=False,
71
+ act_cls=defaults.activation,
72
+ init=nn.init.kaiming_normal_,
73
+ norm_type=None,
74
+ **kwargs,
75
+ ):
76
+ imsize = img_size
77
+ sizes = model_sizes(encoder, size=imsize)
78
+
79
+ sz_chg_idxs = list(reversed(self._get_sz_change_idxs(sizes)))
80
+ self.sfs = hook_outputs([encoder[i] for i in sz_chg_idxs], detach=False)
81
+ x = dummy_eval(encoder, imsize).detach()
82
+
83
+ ni = sizes[-1][1]
84
+
85
+ middle_conv = nn.Sequential(
86
+ ConvLayer(ni, ni, act_cls=act_cls, norm_type=norm_type, **kwargs),
87
+ ConvLayer(ni, ni, act_cls=act_cls, norm_type=norm_type, **kwargs),
88
+ ).eval()
89
+ x = middle_conv(x)
90
+ layers = [encoder, BatchNorm(ni), nn.ReLU(), middle_conv]
91
+
92
+ for i, idx in enumerate(sz_chg_idxs):
93
+ not_final = i != len(sz_chg_idxs) - 1
94
+ up_in_c, x_in_c = int(x.shape[1]), int(sizes[idx][1])
95
+ do_blur = blur and (not_final or blur_final)
96
+ sa = self_attention and (i == len(sz_chg_idxs) - 3)
97
+ unet_block = CustomUnetBlock(
98
+ up_in_c,
99
+ x_in_c,
100
+ self.sfs[i],
101
+ final_div=not_final,
102
+ blur=do_blur,
103
+ self_attention=sa,
104
+ act_cls=act_cls,
105
+ init=init,
106
+ norm_type=norm_type,
107
+ **kwargs,
108
+ ).eval()
109
+ layers.append(unet_block)
110
+ x = unet_block(x)
111
+
112
+ ni = x.shape[1]
113
+ if imsize != sizes[0][-2:]:
114
+ layers.append(PixelShuffle_ICNR(ni, act_cls=act_cls, norm_type=norm_type))
115
+ layers.append(ResizeToOrig())
116
+ if last_cross:
117
+ layers.append(MergeLayer(dense=True))
118
+ ni += in_channels(encoder)
119
+ layers.append(
120
+ ResBlock(
121
+ 1,
122
+ ni,
123
+ ni // 2 if bottle else ni,
124
+ act_cls=act_cls,
125
+ norm_type=norm_type,
126
+ **kwargs,
127
+ )
128
+ )
129
+ layers += [ConvLayer(ni, n_out, ks=1, act_cls=None, norm_type=norm_type, **kwargs)]
130
+ apply_init(nn.Sequential(layers[3], layers[-2]), init)
131
+ # apply_init(nn.Sequential(layers[2]), init)
132
+ if y_range is not None:
133
+ layers.append(SigmoidRange(*y_range))
134
+ super().__init__(*layers)
135
+
136
+ def _get_sz_change_idxs(self, sizes):
137
+ "Get the indexes of the layers where the size of the activation changes."
138
+ feature_szs = [size[-1] for size in sizes]
139
+ sz_chg_idxs = list(np.where(np.array(feature_szs[:-1]) != np.array(feature_szs[1:]))[0])
140
+ return sz_chg_idxs
141
+
142
+ def __del__(self):
143
+ if hasattr(self, "sfs"):
144
+ self.sfs.remove()
145
+
146
+
147
+ class PerceptualLoss:
148
+ pass
149
+
150
+
151
+ class UnetInference:
152
+ def __init__(self, model_path):
153
+ """Inference interface for unet model"""
154
+ self.learn = load_learner(model_path)
155
+ self.learn.model.eval()
156
+
157
+ def __call__(self, image_array: str, bs: int = 1) -> List[np.ndarray]:
158
+ """Perform forward pass and decode the prediction of Unet model
159
+
160
+ Args:
161
+ image_array (list): list of numpy array
162
+ bs (int, optional): [batch size]. Defaults to 1.
163
+
164
+ Returns:
165
+ [list]: list of numpy array
166
+ """
167
+ if len(image_array) < 1:
168
+ return []
169
+
170
+ batches = self.__build_batches(image_array, bs=bs)
171
+ outs = []
172
+ with torch.no_grad():
173
+ for b in batches:
174
+ outs.append(self.learn.model(b))
175
+ del b
176
+ pil_images = self.__decode_prediction(outs)
177
+ return pil_images
178
+
179
+ def __decode_prediction(self, preds):
180
+ out = []
181
+ i2f = IntToFloatTensor()
182
+ for pred in preds:
183
+ img_np = i2f.decodes(pred.squeeze()).numpy()
184
+ img_np = img_np.transpose(1, 2, 0)
185
+ img_np = img_np.astype(np.uint8)
186
+ out.append(img_np)
187
+ # out.append(Image.fromarray(img_np))
188
+ del img_np
189
+ return out
190
+
191
+ def __build_batches(self, image_array: list, bs=1):
192
+ "Builds batches to skip `DataLoader` overhead"
193
+ type_tfms = [PILImage.create]
194
+ item_tfms = [ToTensor()]
195
+ type_pipe = Pipeline(type_tfms)
196
+ item_pipe = Pipeline(item_tfms)
197
+ i2f = IntToFloatTensor()
198
+ batches = []
199
+ batch = []
200
+ k = 0
201
+ for i, im in enumerate(image_array):
202
+ batch.append(item_pipe(type_pipe(im)))
203
+ k += 1
204
+ if i == len(image_array) - 1 or k == bs:
205
+ # batches.append(torch.cat([norm(i2f(b.cuda())) for b in batch]))
206
+ batches.append(torch.stack([i2f(b.cpu()) for b in batch], axis=0))
207
+ batch = []
208
+ k = 0
209
+ return batches
stamp_processing/module/yolov5/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+
4
+ YOLO_DIR = Path(__file__).parent.absolute()
stamp_processing/module/yolov5/hubconf.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+
4
+ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbose=True, device=None):
5
+ """Creates a specified YOLOv5 model
6
+ Arguments:
7
+ name (str): name of model, i.e. 'yolov5s'
8
+ pretrained (bool): load pretrained weights into the model
9
+ channels (int): number of input channels
10
+ classes (int): number of model classes
11
+ autoshape (bool): apply YOLOv5 .autoshape() wrapper to model
12
+ verbose (bool): print all information to screen
13
+ device (str, torch.device, None): device to use for model parameters
14
+ Returns:
15
+ YOLOv5 pytorch model
16
+ """
17
+ from pathlib import Path
18
+
19
+ from models.experimental import attempt_load
20
+ from models.yolo import Model
21
+ from yolo_utils.torch_utils import select_device
22
+
23
+ file = Path(__file__).absolute()
24
+
25
+ save_dir = Path("") if str(name).endswith(".pt") else file.parent
26
+ path = (save_dir / name).with_suffix(".pt") # checkpoint path
27
+ try:
28
+ device = select_device(("0" if torch.cuda.is_available() else "cpu") if device is None else device)
29
+
30
+ if pretrained and channels == 3 and classes == 80:
31
+ model = attempt_load(path, map_location=device) # download/load FP32 model
32
+ else:
33
+ cfg = list((Path(__file__).parent / "models").rglob(f"{name}.yaml"))[0] # model.yaml path
34
+ model = Model(cfg, channels, classes) # create model
35
+ if autoshape:
36
+ model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS
37
+ return model.to(device)
38
+
39
+ except Exception as e:
40
+ help_url = "https://github.com/ultralytics/yolov5/issues/36"
41
+ s = "Cache may be out of date, try `force_reload=True`. See %s for help." % help_url
42
+ raise Exception(s) from e
43
+
44
+
45
+ def custom(path="path/to/model.pt", autoshape=True, verbose=True, device=None):
46
+ # YOLOv5 custom or local model
47
+ return _create(path, autoshape=autoshape, verbose=verbose, device=device)
stamp_processing/module/yolov5/models/common.py ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 common modules
2
+
3
+ import math
4
+ from pathlib import Path
5
+
6
+ import numpy as np
7
+ import torch
8
+ import torch.nn as nn
9
+ from torch.cuda import amp
10
+
11
+ from yolo_utils import letterbox, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, time_synchronized
12
+
13
+
14
+ def autopad(k, p=None): # kernel, padding
15
+ # Pad to 'same'
16
+ if p is None:
17
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
18
+ return p
19
+
20
+
21
+ def DWConv(c1, c2, k=1, s=1, act=True):
22
+ # Depthwise convolution
23
+ return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
24
+
25
+
26
+ class Conv(nn.Module):
27
+ # Standard convolution
28
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
29
+ super(Conv, self).__init__()
30
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
31
+ self.bn = nn.BatchNorm2d(c2)
32
+ self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
33
+
34
+ def forward(self, x):
35
+ return self.act(self.bn(self.conv(x)))
36
+
37
+ def fuseforward(self, x):
38
+ return self.act(self.conv(x))
39
+
40
+
41
+ class Bottleneck(nn.Module):
42
+ # Standard bottleneck
43
+ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
44
+ super(Bottleneck, self).__init__()
45
+ c_ = int(c2 * e) # hidden channels
46
+ self.cv1 = Conv(c1, c_, 1, 1)
47
+ self.cv2 = Conv(c_, c2, 3, 1, g=g)
48
+ self.add = shortcut and c1 == c2
49
+
50
+ def forward(self, x):
51
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
52
+
53
+
54
+ class BottleneckCSP(nn.Module):
55
+ # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
56
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
57
+ super(BottleneckCSP, self).__init__()
58
+ c_ = int(c2 * e) # hidden channels
59
+ self.cv1 = Conv(c1, c_, 1, 1)
60
+ self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
61
+ self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
62
+ self.cv4 = Conv(2 * c_, c2, 1, 1)
63
+ self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
64
+ self.act = nn.LeakyReLU(0.1, inplace=True)
65
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
66
+
67
+ def forward(self, x):
68
+ y1 = self.cv3(self.m(self.cv1(x)))
69
+ y2 = self.cv2(x)
70
+ return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
71
+
72
+
73
+ class C3(nn.Module):
74
+ # CSP Bottleneck with 3 convolutions
75
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
76
+ super(C3, self).__init__()
77
+ c_ = int(c2 * e) # hidden channels
78
+ self.cv1 = Conv(c1, c_, 1, 1)
79
+ self.cv2 = Conv(c1, c_, 1, 1)
80
+ self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
81
+ self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
82
+ # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
83
+
84
+ def forward(self, x):
85
+ return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
86
+
87
+
88
+ class SPP(nn.Module):
89
+ # Spatial pyramid pooling layer used in YOLOv3-SPP
90
+ def __init__(self, c1, c2, k=(5, 9, 13)):
91
+ super(SPP, self).__init__()
92
+ c_ = c1 // 2 # hidden channels
93
+ self.cv1 = Conv(c1, c_, 1, 1)
94
+ self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
95
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
96
+
97
+ def forward(self, x):
98
+ x = self.cv1(x)
99
+ return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
100
+
101
+
102
+ class Focus(nn.Module):
103
+ # Focus wh information into c-space
104
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
105
+ super(Focus, self).__init__()
106
+ self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
107
+ # self.contract = Contract(gain=2)
108
+
109
+ def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
110
+ return self.conv(
111
+ torch.cat(
112
+ [
113
+ x[..., ::2, ::2],
114
+ x[..., 1::2, ::2],
115
+ x[..., ::2, 1::2],
116
+ x[..., 1::2, 1::2],
117
+ ],
118
+ 1,
119
+ )
120
+ )
121
+ # return self.conv(self.contract(x))
122
+
123
+
124
+ class Contract(nn.Module):
125
+ # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
126
+ def __init__(self, gain=2):
127
+ super().__init__()
128
+ self.gain = gain
129
+
130
+ def forward(self, x):
131
+ (
132
+ N,
133
+ C,
134
+ H,
135
+ W,
136
+ ) = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
137
+ s = self.gain
138
+ x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
139
+ x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
140
+ return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
141
+
142
+
143
+ class Expand(nn.Module):
144
+ # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
145
+ def __init__(self, gain=2):
146
+ super().__init__()
147
+ self.gain = gain
148
+
149
+ def forward(self, x):
150
+ N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
151
+ s = self.gain
152
+ x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
153
+ x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
154
+ return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
155
+
156
+
157
+ class Concat(nn.Module):
158
+ # Concatenate a list of tensors along dimension
159
+ def __init__(self, dimension=1):
160
+ super(Concat, self).__init__()
161
+ self.d = dimension
162
+
163
+ def forward(self, x):
164
+ return torch.cat(x, self.d)
165
+
166
+
167
+ class NMS(nn.Module):
168
+ # Non-Maximum Suppression (NMS) module
169
+ conf = 0.25 # confidence threshold
170
+ iou = 0.45 # IoU threshold
171
+ classes = None # (optional list) filter by class
172
+
173
+ def __init__(self):
174
+ super(NMS, self).__init__()
175
+
176
+ def forward(self, x):
177
+ return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
178
+
179
+
180
+ class autoShape(nn.Module):
181
+ # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
182
+ conf = 0.25 # NMS confidence threshold
183
+ iou = 0.45 # NMS IoU threshold
184
+ classes = None # (optional list) filter by class
185
+
186
+ def __init__(self, model):
187
+ super(autoShape, self).__init__()
188
+ self.model = model.eval()
189
+
190
+ def autoshape(self):
191
+ print("autoShape already enabled, skipping... ") # model already converted to model.autoshape()
192
+ return self
193
+
194
+ @torch.no_grad()
195
+ def forward(self, imgs, size=640, augment=False, profile=False):
196
+ # Inference from various sources. For height=640, width=1280, RGB images example inputs are:
197
+ # filename: imgs = 'data/samples/zidane.jpg'
198
+ # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg'
199
+ # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
200
+ # PIL: = Image.open('image.jpg') # HWC x(640,1280,3)
201
+ # numpy: = np.zeros((640,1280,3)) # HWC
202
+ # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
203
+ # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
204
+
205
+ t = [time_synchronized()]
206
+ p = next(self.model.parameters()) # for device and type
207
+ if isinstance(imgs, torch.Tensor): # torch
208
+ with amp.autocast(enabled=p.device.type != "cpu"):
209
+ return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference
210
+
211
+ # Pre-process
212
+ n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
213
+ shape0, shape1, files = [], [], [] # image and inference shapes, filenames
214
+ for i, im in enumerate(imgs):
215
+ f = f"image{i}" # filename
216
+ # if isinstance(im, str): # filename or uri
217
+ # im, f = (
218
+ # np.asarray(Image.open(requests.get(im, stream=True).raw if im.startswith("http") else im)),
219
+ # im,
220
+ # )
221
+ # if isinstance(im, Image.Image): # PIL Image
222
+ # im, f = np.asarray(im), getattr(im, "filename", f) or f
223
+ files.append(Path(f).with_suffix(".jpg").name)
224
+ if im.shape[0] < 5: # image in CHW
225
+ im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
226
+ im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input
227
+ s = im.shape[:2] # HWC
228
+ shape0.append(s) # image shape
229
+ g = size / max(s) # gain
230
+ shape1.append([y * g for y in s])
231
+ imgs[i] = im # update
232
+ shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape
233
+ x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad
234
+ x = np.stack(x, 0) if n > 1 else x[0][None] # stack
235
+ x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW
236
+ x = torch.from_numpy(x).to(p.device).type_as(p) / 255.0 # uint8 to fp16/32
237
+ t.append(time_synchronized())
238
+
239
+ with amp.autocast(enabled=p.device.type != "cpu"):
240
+ # Inference
241
+ y = self.model(x, augment, profile)[0] # forward
242
+ t.append(time_synchronized())
243
+
244
+ # Post-process
245
+ y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS
246
+ for i in range(n):
247
+ scale_coords(shape1, y[i][:, :4], shape0[i])
248
+
249
+ t.append(time_synchronized())
250
+ return Detections(imgs, y, files, t, self.names, x.shape)
251
+
252
+
253
+ class Detections:
254
+ # detections class for YOLOv5 inference results
255
+ def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
256
+ super(Detections, self).__init__()
257
+ d = pred[0].device # device
258
+ gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1.0, 1.0], device=d) for im in imgs] # normalizations
259
+ self.imgs = imgs # list of images as numpy arrays
260
+ self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
261
+ self.names = names # class names
262
+ self.files = files # image filenames
263
+ self.xyxy = pred # xyxy pixels
264
+ self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
265
+ self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
266
+ self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
267
+ self.n = len(self.pred) # number of images (batch size)
268
+ self.t = tuple((times[i + 1] - times[i]) * 1000 / self.n for i in range(3)) # timestamps (ms)
269
+ self.s = shape # inference BCHW shape
270
+
271
+ def tolist(self):
272
+ # return a list of Detections objects, i.e. 'for result in results.tolist():'
273
+ x = [Detections([self.imgs[i]], [self.pred[i]], self.names, self.s) for i in range(self.n)]
274
+ for d in x:
275
+ for k in ["imgs", "pred", "xyxy", "xyxyn", "xywh", "xywhn"]:
276
+ setattr(d, k, getattr(d, k)[0]) # pop out of list
277
+ return x
278
+
279
+ def __len__(self):
280
+ return self.n
281
+
282
+
283
+ class Classify(nn.Module):
284
+ # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
285
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
286
+ super(Classify, self).__init__()
287
+ self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
288
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
289
+ self.flat = nn.Flatten()
290
+
291
+ def forward(self, x):
292
+ z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
293
+ return self.flat(self.conv(z)) # flatten to x(b,c2)
stamp_processing/module/yolov5/models/experimental.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 experimental modules
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ from .common import Conv, DWConv
8
+
9
+
10
+ class CrossConv(nn.Module):
11
+ # Cross Convolution Downsample
12
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
13
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
14
+ super(CrossConv, self).__init__()
15
+ c_ = int(c2 * e) # hidden channels
16
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
17
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
18
+ self.add = shortcut and c1 == c2
19
+
20
+ def forward(self, x):
21
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
22
+
23
+
24
+ class Sum(nn.Module):
25
+ # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
26
+ def __init__(self, n, weight=False): # n: number of inputs
27
+ super(Sum, self).__init__()
28
+ self.weight = weight # apply weights boolean
29
+ self.iter = range(n - 1) # iter object
30
+ if weight:
31
+ self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
32
+
33
+ def forward(self, x):
34
+ y = x[0] # no weight
35
+ if self.weight:
36
+ w = torch.sigmoid(self.w) * 2
37
+ for i in self.iter:
38
+ y = y + x[i + 1] * w[i]
39
+ else:
40
+ for i in self.iter:
41
+ y = y + x[i + 1]
42
+ return y
43
+
44
+
45
+ class GhostConv(nn.Module):
46
+ # Ghost Convolution https://github.com/huawei-noah/ghostnet
47
+ def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
48
+ super(GhostConv, self).__init__()
49
+ c_ = c2 // 2 # hidden channels
50
+ self.cv1 = Conv(c1, c_, k, s, None, g, act)
51
+ self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
52
+
53
+ def forward(self, x):
54
+ y = self.cv1(x)
55
+ return torch.cat([y, self.cv2(y)], 1)
56
+
57
+
58
+ class GhostBottleneck(nn.Module):
59
+ # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
60
+ def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
61
+ super(GhostBottleneck, self).__init__()
62
+ c_ = c2 // 2
63
+ self.conv = nn.Sequential(
64
+ GhostConv(c1, c_, 1, 1), # pw
65
+ DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
66
+ GhostConv(c_, c2, 1, 1, act=False),
67
+ ) # pw-linear
68
+ self.shortcut = (
69
+ nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
70
+ )
71
+
72
+ def forward(self, x):
73
+ return self.conv(x) + self.shortcut(x)
74
+
75
+
76
+ class MixConv2d(nn.Module):
77
+ # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
78
+ def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
79
+ super(MixConv2d, self).__init__()
80
+ groups = len(k)
81
+ if equal_ch: # equal c_ per group
82
+ i = torch.linspace(0, groups - 1e-6, c2).floor() # c2 indices
83
+ c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
84
+ else: # equal weight.numel() per group
85
+ b = [c2] + [0] * groups
86
+ a = np.eye(groups + 1, groups, k=-1)
87
+ a -= np.roll(a, 1, axis=1)
88
+ a *= np.array(k) ** 2
89
+ a[0] = 1
90
+ c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
91
+
92
+ self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
93
+ self.bn = nn.BatchNorm2d(c2)
94
+ self.act = nn.LeakyReLU(0.1, inplace=True)
95
+
96
+ def forward(self, x):
97
+ return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
98
+
99
+
100
+ class Ensemble(nn.ModuleList):
101
+ # Ensemble of models
102
+ def __init__(self):
103
+ super(Ensemble, self).__init__()
104
+
105
+ def forward(self, x, augment=False):
106
+ y = []
107
+ for module in self:
108
+ y.append(module(x, augment)[0])
109
+ # y = torch.stack(y).max(0)[0] # max ensemble
110
+ # y = torch.stack(y).mean(0) # mean ensemble
111
+ y = torch.cat(y, 1) # nms ensemble
112
+ return y, None # inference, train output
113
+
114
+
115
+ def attempt_load(weights, map_location=None):
116
+ # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
117
+ model = Ensemble()
118
+ for w in weights if isinstance(weights, list) else [weights]:
119
+ # attempt_download(w)
120
+ ckpt = torch.load(w, map_location=map_location) # load
121
+ model.append(ckpt["ema" if ckpt.get("ema") else "model"].float().fuse().eval()) # FP32 model
122
+
123
+ # Compatibility updates
124
+ for m in model.modules():
125
+ if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
126
+ m.inplace = True # pytorch 1.7.0 compatibility
127
+ elif type(m) is Conv:
128
+ m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
129
+
130
+ if len(model) == 1:
131
+ return model[-1] # return model
132
+ else:
133
+ print("Ensemble created with %s\n" % weights)
134
+ for k in ["names", "stride"]:
135
+ setattr(model, k, getattr(model[-1], k))
136
+ return model # return ensemble
stamp_processing/module/yolov5/models/yolo.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 YOLO-specific modules
2
+
3
+ import logging
4
+ import sys
5
+ from copy import deepcopy
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ from .common import *
11
+ from .experimental import *
12
+ import os
13
+ from yolo_utils import check_anchor_order, make_divisible, copy_attr, fuse_conv_and_bn, initialize_weights, scale_img
14
+
15
+
16
+ class Detect(nn.Module):
17
+ stride = None # strides computed during build
18
+ export = False # onnx export
19
+
20
+ def __init__(self, nc=80, anchors=(), ch=()): # detection layer
21
+ super(Detect, self).__init__()
22
+ self.nc = nc # number of classes
23
+ self.no = nc + 5 # number of outputs per anchor
24
+ self.nl = len(anchors) # number of detection layers
25
+ self.na = len(anchors[0]) // 2 # number of anchors
26
+ self.grid = [torch.zeros(1)] * self.nl # init grid
27
+ a = torch.tensor(anchors).float().view(self.nl, -1, 2)
28
+ self.register_buffer("anchors", a) # shape(nl,na,2)
29
+ self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
30
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
31
+
32
+ def forward(self, x):
33
+ # x = x.copy() # for profiling
34
+ z = [] # inference output
35
+ self.training |= self.export
36
+ for i in range(self.nl):
37
+ x[i] = self.m[i](x[i]) # conv
38
+ bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
39
+ x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
40
+
41
+ if not self.training: # inference
42
+ if self.grid[i].shape[2:4] != x[i].shape[2:4]:
43
+ self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
44
+
45
+ y = x[i].sigmoid()
46
+ y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i] # xy
47
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
48
+ z.append(y.view(bs, -1, self.no))
49
+
50
+ return x if self.training else (torch.cat(z, 1), x)
51
+
52
+ @staticmethod
53
+ def _make_grid(nx=20, ny=20):
54
+ yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
55
+ return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
56
+
57
+
58
+ class Model(nn.Module):
59
+ def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None): # model, input channels, number of classes
60
+ super(Model, self).__init__()
61
+ if isinstance(cfg, dict):
62
+ self.yaml = cfg # model dict
63
+ else: # is *.yaml
64
+ import yaml # for torch hub
65
+
66
+ self.yaml_file = Path(cfg).name
67
+ with open(cfg) as f:
68
+ self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict
69
+
70
+ # Define model
71
+ ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
72
+ if nc and nc != self.yaml["nc"]:
73
+ logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
74
+ self.yaml["nc"] = nc # override yaml value
75
+ if anchors:
76
+ logger.info(f"Overriding model.yaml anchors with anchors={anchors}")
77
+ self.yaml["anchors"] = round(anchors) # override yaml value
78
+ self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
79
+ self.names = [str(i) for i in range(self.yaml["nc"])] # default names
80
+ # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
81
+
82
+ # Build strides, anchors
83
+ m = self.model[-1] # Detect()
84
+ if isinstance(m, Detect):
85
+ s = 256 # 2x min stride
86
+ m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
87
+ m.anchors /= m.stride.view(-1, 1, 1)
88
+ check_anchor_order(m)
89
+ self.stride = m.stride
90
+ self._initialize_biases() # only run once
91
+ # print('Strides: %s' % m.stride.tolist())
92
+
93
+ # Init weights, biases
94
+ initialize_weights(self)
95
+ self.info()
96
+ logger.info("")
97
+
98
+ def forward(self, x, augment=False, profile=False):
99
+ if augment:
100
+ img_size = x.shape[-2:] # height, width
101
+ s = [1, 0.83, 0.67] # scales
102
+ f = [None, 3, None] # flips (2-ud, 3-lr)
103
+ y = [] # outputs
104
+ for si, fi in zip(s, f):
105
+ xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
106
+ yi = self.forward_once(xi)[0] # forward
107
+ # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
108
+ yi[..., :4] /= si # de-scale
109
+ if fi == 2:
110
+ yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
111
+ elif fi == 3:
112
+ yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr
113
+ y.append(yi)
114
+ return torch.cat(y, 1), None # augmented inference, train
115
+ else:
116
+ return self.forward_once(x, profile) # single-scale inference, train
117
+
118
+ def forward_once(self, x, profile=False):
119
+ y, dt = [], [] # outputs
120
+
121
+ for m in self.model.modules():
122
+ if isinstance(m, nn.Upsample):
123
+ m.recompute_scale_factor = None
124
+
125
+ for m in self.model:
126
+ if m.f != -1: # if not from previous layer
127
+ x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
128
+
129
+ x = m(x) # run
130
+
131
+ y.append(x if m.i in self.save else None) # save output
132
+
133
+ if profile:
134
+ print("%.1fms total" % sum(dt))
135
+ return x
136
+
137
+ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
138
+ # https://arxiv.org/abs/1708.02002 section 3.3
139
+ # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
140
+ m = self.model[-1] # Detect() module
141
+ for mi, s in zip(m.m, m.stride): # from
142
+ b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
143
+ b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
144
+ b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
145
+ mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
146
+
147
+ def _print_biases(self):
148
+ m = self.model[-1] # Detect() module
149
+ for mi in m.m: # from
150
+ b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
151
+ print(("%6g Conv2d.bias:" + "%10.3g" * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
152
+
153
+ # def _print_weights(self):
154
+ # for m in self.model.modules():
155
+ # if type(m) is Bottleneck:
156
+ # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
157
+
158
+ def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
159
+ # print('Fusing layers... ')
160
+ for m in self.model.modules():
161
+ if type(m) is Conv and hasattr(m, "bn"):
162
+ m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
163
+ delattr(m, "bn") # remove batchnorm
164
+ m.forward = m.fuseforward # update forward
165
+ # self.info()
166
+ return self
167
+
168
+ def nms(self, mode=True): # add or remove NMS module
169
+ present = type(self.model[-1]) is NMS # last layer is NMS
170
+ if mode and not present:
171
+ print("Adding NMS... ")
172
+ m = NMS() # module
173
+ m.f = -1 # from
174
+ m.i = self.model[-1].i + 1 # index
175
+ self.model.add_module(name="%s" % m.i, module=m) # add
176
+ self.eval()
177
+ elif not mode and present:
178
+ print("Removing NMS... ")
179
+ self.model = self.model[:-1] # remove
180
+ return self
181
+
182
+ def autoshape(self): # add autoShape module
183
+ print("Adding autoShape... ")
184
+ m = autoShape(self) # wrap model
185
+ copy_attr(m, self, include=("yaml", "nc", "hyp", "names", "stride"), exclude=()) # copy attributes
186
+ return m
187
+
188
+ # def info(self, verbose=False, img_size=640): # print model information
189
+ # model_info(self, verbose, img_size)
190
+
191
+
192
+ def parse_model(d, ch): # model_dict, input_channels(3)
193
+ logger.info("\n%3s%18s%3s%10s %-40s%-30s" % ("", "from", "n", "params", "module", "arguments"))
194
+ anchors, nc, gd, gw = (
195
+ d["anchors"],
196
+ d["nc"],
197
+ d["depth_multiple"],
198
+ d["width_multiple"],
199
+ )
200
+ na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
201
+ no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
202
+
203
+ layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
204
+ for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
205
+ m = eval(m) if isinstance(m, str) else m # eval strings
206
+ for j, a in enumerate(args):
207
+ try:
208
+ args[j] = eval(a) if isinstance(a, str) else a # eval strings
209
+ except Exception as e:
210
+ logger.error(e)
211
+
212
+ n = max(round(n * gd), 1) if n > 1 else n # depth gain
213
+ if m in [
214
+ Conv,
215
+ GhostConv,
216
+ Bottleneck,
217
+ GhostBottleneck,
218
+ SPP,
219
+ DWConv,
220
+ MixConv2d,
221
+ Focus,
222
+ CrossConv,
223
+ BottleneckCSP,
224
+ C3,
225
+ ]:
226
+ c1, c2 = ch[f], args[0]
227
+ if c2 != no: # if not output
228
+ c2 = make_divisible(c2 * gw, 8)
229
+
230
+ args = [c1, c2, *args[1:]]
231
+ if m in [BottleneckCSP, C3]:
232
+ args.insert(2, n) # number of repeats
233
+ n = 1
234
+ elif m is nn.BatchNorm2d:
235
+ args = [ch[f]]
236
+ elif m is Concat:
237
+ c2 = sum([ch[x] for x in f])
238
+ elif m is Detect:
239
+ args.append([ch[x] for x in f])
240
+ if isinstance(args[1], int): # number of anchors
241
+ args[1] = [list(range(args[1] * 2))] * len(f)
242
+ elif m is Contract:
243
+ c2 = ch[f] * args[0] ** 2
244
+ elif m is Expand:
245
+ c2 = ch[f] // args[0] ** 2
246
+ else:
247
+ c2 = ch[f]
248
+
249
+ m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
250
+ t = str(m)[8:-2].replace("__main__.", "") # module type
251
+ np = sum([x.numel() for x in m_.parameters()]) # number params
252
+ m_.i, m_.f, m_.type, m_.np = (
253
+ i,
254
+ f,
255
+ t,
256
+ np,
257
+ ) # attach index, 'from' index, type, number params
258
+ logger.info("%3s%18s%3s%10.0f %-40s%-30s" % (i, f, n, np, t, args)) # print
259
+ save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
260
+ layers.append(m_)
261
+ if i == 0:
262
+ ch = []
263
+ ch.append(c2)
264
+ return nn.Sequential(*layers), sorted(save)
stamp_processing/module/yolov5/yolo/utils/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .datasets import letterbox
2
+ from .general import make_divisible, non_max_suppression, scale_coords, xyxy2xywh
3
+ from .torch_utils import time_synchronized, copy_attr, fuse_conv_and_bn, initialize_weights, scale_img
4
+ from .autoanchor import check_anchor_order
stamp_processing/module/yolov5/yolo/utils/autoanchor.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Auto-anchor utils
2
+ def check_anchor_order(m):
3
+ # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
4
+ a = m.anchor_grid.prod(-1).view(-1) # anchor area
5
+ da = a[-1] - a[0] # delta a
6
+ ds = m.stride[-1] - m.stride[0] # delta s
7
+ if da.sign() != ds.sign(): # same order
8
+ print("Reversing anchor order")
9
+ m.anchors[:] = m.anchors.flip(0)
10
+ m.anchor_grid[:] = m.anchor_grid.flip(0)
stamp_processing/module/yolov5/yolo/utils/datasets.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+
4
+
5
+ def letterbox(
6
+ img,
7
+ new_shape=(640, 640),
8
+ color=(114, 114, 114),
9
+ auto=True,
10
+ scaleFill=False,
11
+ scaleup=True,
12
+ stride=32,
13
+ ):
14
+ # Resize and pad image while meeting stride-multiple constraints
15
+ shape = img.shape[:2] # current shape [height, width]
16
+ if isinstance(new_shape, int):
17
+ new_shape = (new_shape, new_shape)
18
+
19
+ # Scale ratio (new / old)
20
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
21
+ if not scaleup: # only scale down, do not scale up (for better test mAP)
22
+ r = min(r, 1.0)
23
+
24
+ # Compute padding
25
+ ratio = r, r # width, height ratios
26
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
27
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
28
+ if auto: # minimum rectangle
29
+ dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
30
+ elif scaleFill: # stretch
31
+ dw, dh = 0.0, 0.0
32
+ new_unpad = (new_shape[1], new_shape[0])
33
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
34
+
35
+ dw /= 2 # divide padding into 2 sides
36
+ dh /= 2
37
+
38
+ if shape[::-1] != new_unpad: # resize
39
+ img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
40
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
41
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
42
+ img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
43
+ return img, ratio, (dw, dh)
stamp_processing/module/yolov5/yolo/utils/general.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import time
3
+
4
+ import numpy as np
5
+ import torch
6
+ import torchvision
7
+
8
+
9
+ def box_iou(box1, box2):
10
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
11
+ """
12
+ Return intersection-over-union (Jaccard index) of boxes.
13
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
14
+ Arguments:
15
+ box1 (Tensor[N, 4])
16
+ box2 (Tensor[M, 4])
17
+ Returns:
18
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
19
+ IoU values for every element in boxes1 and boxes2
20
+ """
21
+
22
+ def box_area(box):
23
+ # box = 4xn
24
+ return (box[2] - box[0]) * (box[3] - box[1])
25
+
26
+ area1 = box_area(box1.T)
27
+ area2 = box_area(box2.T)
28
+
29
+ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
30
+ inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
31
+ return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
32
+
33
+
34
+ def xywh2xyxy(x):
35
+ # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
36
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
37
+ y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
38
+ y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
39
+ y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
40
+ y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
41
+ return y
42
+
43
+
44
+ def non_max_suppression(
45
+ prediction,
46
+ conf_thres=0.25,
47
+ iou_thres=0.45,
48
+ classes=None,
49
+ agnostic=False,
50
+ multi_label=False,
51
+ labels=(),
52
+ ):
53
+ """Runs Non-Maximum Suppression (NMS) on inference results
54
+
55
+ Returns:
56
+ list of detections, on (n,6) tensor per image [xyxy, conf, cls]
57
+ """
58
+
59
+ nc = prediction.shape[2] - 5 # number of classes
60
+ xc = prediction[..., 4] > conf_thres # candidates
61
+
62
+ # Settings
63
+ _, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
64
+ max_det = 300 # maximum number of detections per image
65
+ max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
66
+ time_limit = 10.0 # seconds to quit after
67
+ redundant = True # require redundant detections
68
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
69
+ merge = False # use merge-NMS
70
+
71
+ t = time.time()
72
+ output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
73
+ for xi, x in enumerate(prediction): # image index, image inference
74
+ # Apply constraints
75
+ # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
76
+ x = x[xc[xi]] # confidence
77
+
78
+ # Cat apriori labels if autolabelling
79
+ if labels and len(labels[xi]):
80
+ label = labels[xi]
81
+ v = torch.zeros((len(label), nc + 5), device=x.device)
82
+ v[:, :4] = label[:, 1:5] # box
83
+ v[:, 4] = 1.0 # conf
84
+ v[range(len(label)), label[:, 0].long() + 5] = 1.0 # cls
85
+ x = torch.cat((x, v), 0)
86
+
87
+ # If none remain process next image
88
+ if not x.shape[0]:
89
+ continue
90
+
91
+ # Compute conf
92
+ x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
93
+
94
+ # Box (center x, center y, width, height) to (x1, y1, x2, y2)
95
+ box = xywh2xyxy(x[:, :4])
96
+
97
+ # Detections matrix nx6 (xyxy, conf, cls)
98
+ if multi_label:
99
+ i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
100
+ x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
101
+ else: # best class only
102
+ conf, j = x[:, 5:].max(1, keepdim=True)
103
+ x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
104
+
105
+ # Filter by class
106
+ if classes is not None:
107
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
108
+
109
+ # Apply finite constraint
110
+ # if not torch.isfinite(x).all():
111
+ # x = x[torch.isfinite(x).all(1)]
112
+
113
+ # Check shape
114
+ n = x.shape[0] # number of boxes
115
+ if not n: # no boxes
116
+ continue
117
+ elif n > max_nms: # excess boxes
118
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
119
+
120
+ # Batched NMS
121
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
122
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
123
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
124
+ if i.shape[0] > max_det: # limit detections
125
+ i = i[:max_det]
126
+ if merge and (1 < n < 3e3): # Merge NMS (boxes merged using weighted mean)
127
+ # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
128
+ iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
129
+ weights = iou * scores[None] # box weights
130
+ x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
131
+ if redundant:
132
+ i = i[iou.sum(1) > 1] # require redundancy
133
+
134
+ output[xi] = x[i]
135
+ if (time.time() - t) > time_limit:
136
+ print(f"WARNING: NMS time limit {time_limit}s exceeded")
137
+ break # time limit exceeded
138
+
139
+ return output
140
+
141
+
142
+ def clip_coords(boxes, img_shape):
143
+ # Clip bounding xyxy bounding boxes to image shape (height, width)
144
+ boxes[:, 0].clamp_(0, img_shape[1]) # x1
145
+ boxes[:, 1].clamp_(0, img_shape[0]) # y1
146
+ boxes[:, 2].clamp_(0, img_shape[1]) # x2
147
+ boxes[:, 3].clamp_(0, img_shape[0]) # y2
148
+
149
+
150
+ def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
151
+ # Rescale coords (xyxy) from img1_shape to img0_shape
152
+ if ratio_pad is None: # calculate from img0_shape
153
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
154
+ pad = (
155
+ (img1_shape[1] - img0_shape[1] * gain) / 2,
156
+ (img1_shape[0] - img0_shape[0] * gain) / 2,
157
+ ) # wh padding
158
+ else:
159
+ gain = ratio_pad[0][0]
160
+ pad = ratio_pad[1]
161
+
162
+ coords[:, [0, 2]] -= pad[0] # x padding
163
+ coords[:, [1, 3]] -= pad[1] # y padding
164
+ coords[:, :4] /= gain
165
+ clip_coords(coords, img0_shape)
166
+ return coords
167
+
168
+
169
+ def make_divisible(x, divisor):
170
+ # Returns x evenly divisible by divisor
171
+ return math.ceil(x / divisor) * divisor
172
+
173
+
174
+ def xyxy2xywh(x):
175
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
176
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
177
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
178
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
179
+ y[:, 2] = x[:, 2] - x[:, 0] # width
180
+ y[:, 3] = x[:, 3] - x[:, 1] # height
181
+ return y
stamp_processing/module/yolov5/yolo/utils/utils.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import time
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+
8
+
9
+ def select_device(device=""):
10
+ cpu = device.lower() == "cpu"
11
+ cuda = not cpu and torch.cuda.is_available()
12
+ return torch.device("cuda:0" if cuda else "cpu")
13
+
14
+
15
+ def time_synchronized():
16
+ # pytorch-accurate time
17
+ if torch.cuda.is_available():
18
+ torch.cuda.synchronize()
19
+ return time.time()
20
+
21
+
22
+ def fuse_conv_and_bn(conv, bn):
23
+ # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
24
+ fusedconv = (
25
+ nn.Conv2d(
26
+ conv.in_channels,
27
+ conv.out_channels,
28
+ kernel_size=conv.kernel_size,
29
+ stride=conv.stride,
30
+ padding=conv.padding,
31
+ groups=conv.groups,
32
+ bias=True,
33
+ )
34
+ .requires_grad_(False)
35
+ .to(conv.weight.device)
36
+ )
37
+
38
+ # prepare filters
39
+ w_conv = conv.weight.clone().view(conv.out_channels, -1)
40
+ w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
41
+ fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
42
+
43
+ # prepare spatial bias
44
+ b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
45
+ b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
46
+ fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
47
+
48
+ return fusedconv
49
+
50
+
51
+ def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
52
+ # scales img(bs,3,y,x) by ratio constrained to gs-multiple
53
+ if ratio == 1.0:
54
+ return img
55
+ else:
56
+ h, w = img.shape[2:]
57
+ s = (int(h * ratio), int(w * ratio)) # new size
58
+ img = F.interpolate(img, size=s, mode="bilinear", align_corners=False) # resize
59
+ if not same_shape: # pad/crop img
60
+ h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
61
+ return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
62
+
63
+
64
+ def initialize_weights(model):
65
+ for m in model.modules():
66
+ t = type(m)
67
+ if t is nn.Conv2d:
68
+ pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
69
+ elif t is nn.BatchNorm2d:
70
+ m.eps = 1e-3
71
+ m.momentum = 0.03
72
+ elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
73
+ m.inplace = True
74
+
75
+
76
+ def copy_attr(a, b, include=(), exclude=()):
77
+ # Copy attributes from b to a, options to only include [...] and to exclude [...]
78
+ for k, v in b.__dict__.items():
79
+ if (len(include) and k not in include) or k.startswith("_") or k in exclude:
80
+ continue
81
+ else:
82
+ setattr(a, k, v)
stamp_processing/preprocess.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Set, Tuple
2
+
3
+ import cv2
4
+ import numpy as np
5
+ import numpy.typing as npt
6
+ import torch
7
+
8
+
9
+ def create_batch(
10
+ images: npt.NDArray, shapes: Set[Tuple[int, int]], batch_size: int = 16
11
+ ) -> Tuple[List[List[npt.NDArray]], List[int]]:
12
+ """
13
+ - Input:
14
+ +) images: List images
15
+ +) shapes: set of all shapes of input images
16
+ +) batch_size: number image in one batch
17
+ - Output:
18
+ +) images_batch: batch of images for inference
19
+ +) indices: order of all input images
20
+ """
21
+ split_batch = []
22
+ images_batch = []
23
+ for shape in shapes:
24
+ mini_batch = []
25
+ images_mini_batch = [] # type: ignore
26
+ for idx, img in enumerate(images):
27
+ if img.shape == shape:
28
+ mini_batch.append(idx)
29
+ if len(images_mini_batch) < batch_size:
30
+ images_mini_batch.append(img)
31
+ else:
32
+ images_batch.append(images_mini_batch)
33
+ images_mini_batch = []
34
+ images_mini_batch.append(img)
35
+ images_batch.append(images_mini_batch)
36
+ split_batch.append(mini_batch)
37
+ del images_mini_batch
38
+
39
+ indices = [item for sublist in split_batch for item in sublist]
40
+ return images_batch, indices
41
+
42
+
43
+ def process_image(img: npt.NDArray, device: str = "cpu") -> npt.NDArray:
44
+ """Preprocess function for yolov5
45
+
46
+ Args:
47
+ img (npt.NDArray): Input image
48
+ device (str, optional): torch device. Defaults to "cpu".
49
+
50
+ Returns:
51
+ npt.NDArray: preprocessed image
52
+ """
53
+ height, width = img.shape[:2]
54
+ top = (640 - height) // 2
55
+ bottom = 640 - height - top
56
+ left = (640 - width) // 2
57
+ right = 640 - width - left
58
+ img = cv2.copyMakeBorder(
59
+ img,
60
+ top,
61
+ bottom,
62
+ left,
63
+ right,
64
+ cv2.BORDER_CONSTANT,
65
+ value=(255, 255, 255),
66
+ )
67
+ img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
68
+ img = np.ascontiguousarray(img)
69
+ img = torch.from_numpy(img).to(device)
70
+ img = img.float() # uint8 to fp16/32
71
+ img /= 255.0
72
+ return img
stamp_processing/remover.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Union
3
+
4
+ import numpy as np
5
+ import numpy.typing as npt
6
+
7
+ from .detector import StampDetector
8
+ from .module.unet import *
9
+ from .preprocess import create_batch
10
+ from .utils import REMOVER_WEIGHT_ID, check_image_shape, download_weight, logger
11
+
12
+
13
+ class StampRemover:
14
+ def __init__(
15
+ self, detection_weight: Union[str, None] = None, removal_weight: Union[str, None] = None, device: str = "cpu"
16
+ ):
17
+ """Create an object to remove stamps from document images"""
18
+
19
+ # assert device == "cpu", "Currently only support cpu inference"
20
+
21
+ if removal_weight is None:
22
+ if not os.path.exists("tmp/"):
23
+ os.makedirs("tmp/", exist_ok=True)
24
+ removal_weight = os.path.join("tmp", "stamp_remover.pkl")
25
+
26
+ logger.info("Downloading stamp remover weight from google drive")
27
+ download_weight(REMOVER_WEIGHT_ID, output=removal_weight)
28
+ logger.info(f"Finished downloading. Weight is saved at {removal_weight}")
29
+
30
+ try:
31
+ self.remover = UnetInference(removal_weight) # type: ignore
32
+ except Exception as e:
33
+ logger.error(e)
34
+ logger.error("There is something wrong when loading remover weight")
35
+ logger.error(
36
+ "Please make sure you provide the correct path to the weight"
37
+ "or mannually download the weight at"
38
+ f"https://drive.google.com/file/d/{REMOVER_WEIGHT_ID}/view?usp=sharing"
39
+ )
40
+ raise FileNotFoundError()
41
+
42
+ self.detector = StampDetector(detection_weight, device=device)
43
+ self.padding = 3
44
+
45
+ def __call__(self, image_list: Union[List[npt.NDArray], npt.NDArray], batch_size: int = 16) -> List[npt.NDArray]:
46
+ """Detect and remove stamps from document images
47
+
48
+ Args:
49
+ image_list (Union[List[npt.NDArray], npt.NDArray]): list of input images
50
+ batch_size (int, optional): Defaults to 16.
51
+
52
+ Returns:
53
+ List[np.ndarray]: Input images with stamps removed
54
+ """
55
+ if not isinstance(image_list, (np.ndarray, list)):
56
+ raise TypeError("Invalid Type: Input must be of type list or np.ndarray")
57
+
58
+ if len(image_list) > 0:
59
+ check_image_shape(image_list[0])
60
+ else:
61
+ return []
62
+ return self.__batch_removing(image_list, batch_size) # type:ignore
63
+
64
+ def __batch_removing(self, image_list, batch_size=16): # type: ignore
65
+ new_pages = []
66
+
67
+ shapes = set(list(x.shape for x in image_list))
68
+ images_batch, indices = create_batch(image_list, shapes, batch_size)
69
+ # num_batch = len(image_list) // batch_size
70
+ detection_predictions = []
71
+ for batch in images_batch:
72
+ if len(batch):
73
+ detection_predictions.extend(self.detector(batch))
74
+ z = zip(detection_predictions, indices)
75
+ sorted_result = sorted(z, key=lambda x: x[1])
76
+ detection_predictions, _ = zip(*sorted_result)
77
+ for idx, page_boxes in enumerate(detection_predictions):
78
+ page_img = image_list[idx]
79
+ h, w, c = page_img.shape
80
+ for box in page_boxes:
81
+ x_min, y_min, x_max, y_max = box[:4]
82
+ stamp_area = page_img[
83
+ max(y_min - self.padding, 0) : min(y_max + self.padding, h),
84
+ max(x_min - self.padding, 0) : min(x_max + self.padding, w),
85
+ ]
86
+ stamp_area = self.remover([stamp_area]) # type:ignore
87
+
88
+ page_img[
89
+ max(y_min - self.padding, 0) : min(y_max + self.padding, h),
90
+ max(x_min - self.padding, 0) : min(x_max + self.padding, w),
91
+ :,
92
+ ] = stamp_area[0]
93
+ new_pages.append(page_img)
94
+ return new_pages
stamp_processing/utils.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Tuple, Union
3
+
4
+ import gdown
5
+ import numpy as np
6
+ import numpy.typing as npt
7
+ import torch
8
+ from torch import device as torch_device
9
+ from torch.nn import Module
10
+
11
+ from .module.yolov5 import YOLO_DIR
12
+
13
+
14
+ logging.basicConfig(format="%(levelname)s - %(message)s'")
15
+ logger = logging.getLogger()
16
+ logger.setLevel(logging.INFO)
17
+
18
+ DETECTOR_WEIGHT_ID = "1YHH7pLoZEdyxw2AoLz9G4lrq6uuxweYB"
19
+ REMOVER_WEIGHT_ID = "1Hd79M8DhCwjFuT198R-QB7ozQbHRGcGM"
20
+
21
+
22
+ def select_device(device: str = "") -> torch_device:
23
+ """Return a torch.device instance"""
24
+ cpu = device.lower() == "cpu"
25
+ cuda = not cpu and torch.cuda.is_available()
26
+ return torch_device("cuda:0" if cuda else "cpu")
27
+
28
+
29
+ def load_yolo_model(weight_path: str, device: str) -> Tuple[Module, int]:
30
+ """Load yolov5 model from specified path using torch hub"""
31
+ model = torch.hub.load(str(YOLO_DIR), "custom", path=weight_path, source="local", force_reload=True)
32
+ print(weight_path)
33
+ # model = torch.load(weight_path, map_location=device)["model"]
34
+ # model.to(device)
35
+ return model, model.stride
36
+
37
+
38
+ def download_weight(file_id: str, output: Union[str, None] = None, quiet: bool = False) -> None:
39
+ """Download model weight from Google Drive given the file ID"""
40
+ url = f"https://drive.google.com/uc?id={file_id}"
41
+ try:
42
+ gdown.cached_download(url=url, path=output, quiet=quiet)
43
+ except Exception as e:
44
+ logger.error(e)
45
+ logger.error("Something went wrong when downloading the weight")
46
+ logger.error(
47
+ "Check your internet connection or manually download the weight "
48
+ f"at https://drive.google.com/file/d/{file_id}/view?usp=sharing"
49
+ )
50
+
51
+
52
+ def check_image_shape(image: npt.NDArray) -> None:
53
+ """Check if input image is valid"""
54
+ if not isinstance(image, np.ndarray):
55
+ raise TypeError("Invalid Type: List value must be of type np.ndarray")
56
+ else:
57
+ if len(image.shape) != 3:
58
+ raise ValueError("Invalid image shape")
59
+ if image.shape[-1] != 3:
60
+ raise ValueError("Image must be 3 dimensional")