yyk19 commited on
Commit
11935ed
1 Parent(s): 37f1640

support cpu mode.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. annotator/ckpts/ckpts.txt +0 -1
  2. annotator/hed/__init__.py +0 -132
  3. annotator/midas/__init__.py +0 -38
  4. annotator/midas/api.py +0 -169
  5. annotator/midas/midas/__init__.py +0 -0
  6. annotator/midas/midas/base_model.py +0 -16
  7. annotator/midas/midas/blocks.py +0 -342
  8. annotator/midas/midas/dpt_depth.py +0 -109
  9. annotator/midas/midas/midas_net.py +0 -76
  10. annotator/midas/midas/midas_net_custom.py +0 -128
  11. annotator/midas/midas/transforms.py +0 -234
  12. annotator/midas/midas/vit.py +0 -491
  13. annotator/midas/utils.py +0 -189
  14. annotator/mlsd/__init__.py +0 -39
  15. annotator/mlsd/models/mbv2_mlsd_large.py +0 -292
  16. annotator/mlsd/models/mbv2_mlsd_tiny.py +0 -275
  17. annotator/mlsd/utils.py +0 -580
  18. annotator/openpose/__init__.py +0 -44
  19. annotator/openpose/body.py +0 -219
  20. annotator/openpose/hand.py +0 -86
  21. annotator/openpose/model.py +0 -219
  22. annotator/openpose/util.py +0 -164
  23. annotator/uniformer/__init__.py +0 -23
  24. annotator/uniformer/configs/_base_/datasets/ade20k.py +0 -54
  25. annotator/uniformer/configs/_base_/datasets/chase_db1.py +0 -59
  26. annotator/uniformer/configs/_base_/datasets/cityscapes.py +0 -54
  27. annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py +0 -35
  28. annotator/uniformer/configs/_base_/datasets/drive.py +0 -59
  29. annotator/uniformer/configs/_base_/datasets/hrf.py +0 -59
  30. annotator/uniformer/configs/_base_/datasets/pascal_context.py +0 -60
  31. annotator/uniformer/configs/_base_/datasets/pascal_context_59.py +0 -60
  32. annotator/uniformer/configs/_base_/datasets/pascal_voc12.py +0 -57
  33. annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py +0 -9
  34. annotator/uniformer/configs/_base_/datasets/stare.py +0 -59
  35. annotator/uniformer/configs/_base_/default_runtime.py +0 -14
  36. annotator/uniformer/configs/_base_/models/ann_r50-d8.py +0 -46
  37. annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py +0 -44
  38. annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py +0 -44
  39. annotator/uniformer/configs/_base_/models/cgnet.py +0 -35
  40. annotator/uniformer/configs/_base_/models/danet_r50-d8.py +0 -44
  41. annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py +0 -44
  42. annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py +0 -50
  43. annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py +0 -46
  44. annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py +0 -44
  45. annotator/uniformer/configs/_base_/models/dnl_r50-d8.py +0 -46
  46. annotator/uniformer/configs/_base_/models/emanet_r50-d8.py +0 -47
  47. annotator/uniformer/configs/_base_/models/encnet_r50-d8.py +0 -48
  48. annotator/uniformer/configs/_base_/models/fast_scnn.py +0 -57
  49. annotator/uniformer/configs/_base_/models/fcn_hr18.py +0 -52
  50. annotator/uniformer/configs/_base_/models/fcn_r50-d8.py +0 -45
annotator/ckpts/ckpts.txt DELETED
@@ -1 +0,0 @@
1
- Weights here.
 
 
annotator/hed/__init__.py DELETED
@@ -1,132 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- import os
4
- import torch
5
- from einops import rearrange
6
- from annotator.util import annotator_ckpts_path
7
-
8
-
9
- class Network(torch.nn.Module):
10
- def __init__(self, model_path):
11
- super().__init__()
12
-
13
- self.netVggOne = torch.nn.Sequential(
14
- torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
15
- torch.nn.ReLU(inplace=False),
16
- torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
17
- torch.nn.ReLU(inplace=False)
18
- )
19
-
20
- self.netVggTwo = torch.nn.Sequential(
21
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
22
- torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
23
- torch.nn.ReLU(inplace=False),
24
- torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
25
- torch.nn.ReLU(inplace=False)
26
- )
27
-
28
- self.netVggThr = torch.nn.Sequential(
29
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
30
- torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
31
- torch.nn.ReLU(inplace=False),
32
- torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
33
- torch.nn.ReLU(inplace=False),
34
- torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
35
- torch.nn.ReLU(inplace=False)
36
- )
37
-
38
- self.netVggFou = torch.nn.Sequential(
39
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
40
- torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
41
- torch.nn.ReLU(inplace=False),
42
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
43
- torch.nn.ReLU(inplace=False),
44
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
45
- torch.nn.ReLU(inplace=False)
46
- )
47
-
48
- self.netVggFiv = torch.nn.Sequential(
49
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
50
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
51
- torch.nn.ReLU(inplace=False),
52
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
53
- torch.nn.ReLU(inplace=False),
54
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
55
- torch.nn.ReLU(inplace=False)
56
- )
57
-
58
- self.netScoreOne = torch.nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, padding=0)
59
- self.netScoreTwo = torch.nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0)
60
- self.netScoreThr = torch.nn.Conv2d(in_channels=256, out_channels=1, kernel_size=1, stride=1, padding=0)
61
- self.netScoreFou = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)
62
- self.netScoreFiv = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)
63
-
64
- self.netCombine = torch.nn.Sequential(
65
- torch.nn.Conv2d(in_channels=5, out_channels=1, kernel_size=1, stride=1, padding=0),
66
- torch.nn.Sigmoid()
67
- )
68
-
69
- self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(model_path).items()})
70
-
71
- def forward(self, tenInput):
72
- tenInput = tenInput * 255.0
73
- tenInput = tenInput - torch.tensor(data=[104.00698793, 116.66876762, 122.67891434], dtype=tenInput.dtype, device=tenInput.device).view(1, 3, 1, 1)
74
-
75
- tenVggOne = self.netVggOne(tenInput)
76
- tenVggTwo = self.netVggTwo(tenVggOne)
77
- tenVggThr = self.netVggThr(tenVggTwo)
78
- tenVggFou = self.netVggFou(tenVggThr)
79
- tenVggFiv = self.netVggFiv(tenVggFou)
80
-
81
- tenScoreOne = self.netScoreOne(tenVggOne)
82
- tenScoreTwo = self.netScoreTwo(tenVggTwo)
83
- tenScoreThr = self.netScoreThr(tenVggThr)
84
- tenScoreFou = self.netScoreFou(tenVggFou)
85
- tenScoreFiv = self.netScoreFiv(tenVggFiv)
86
-
87
- tenScoreOne = torch.nn.functional.interpolate(input=tenScoreOne, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
88
- tenScoreTwo = torch.nn.functional.interpolate(input=tenScoreTwo, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
89
- tenScoreThr = torch.nn.functional.interpolate(input=tenScoreThr, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
90
- tenScoreFou = torch.nn.functional.interpolate(input=tenScoreFou, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
91
- tenScoreFiv = torch.nn.functional.interpolate(input=tenScoreFiv, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
92
-
93
- return self.netCombine(torch.cat([ tenScoreOne, tenScoreTwo, tenScoreThr, tenScoreFou, tenScoreFiv ], 1))
94
-
95
-
96
- class HEDdetector:
97
- def __init__(self):
98
- remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/network-bsds500.pth"
99
- modelpath = os.path.join(annotator_ckpts_path, "network-bsds500.pth")
100
- if not os.path.exists(modelpath):
101
- from basicsr.utils.download_util import load_file_from_url
102
- load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path)
103
- self.netNetwork = Network(modelpath).cuda().eval()
104
-
105
- def __call__(self, input_image):
106
- assert input_image.ndim == 3
107
- input_image = input_image[:, :, ::-1].copy()
108
- with torch.no_grad():
109
- image_hed = torch.from_numpy(input_image).float().cuda()
110
- image_hed = image_hed / 255.0
111
- image_hed = rearrange(image_hed, 'h w c -> 1 c h w')
112
- edge = self.netNetwork(image_hed)[0]
113
- edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8)
114
- return edge[0]
115
-
116
-
117
- def nms(x, t, s):
118
- x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
119
-
120
- f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
121
- f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
122
- f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
123
- f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
124
-
125
- y = np.zeros_like(x)
126
-
127
- for f in [f1, f2, f3, f4]:
128
- np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
129
-
130
- z = np.zeros_like(y, dtype=np.uint8)
131
- z[y > t] = 255
132
- return z
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/__init__.py DELETED
@@ -1,38 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- import torch
4
-
5
- from einops import rearrange
6
- from .api import MiDaSInference
7
-
8
-
9
- class MidasDetector:
10
- def __init__(self):
11
- self.model = MiDaSInference(model_type="dpt_hybrid").cuda()
12
-
13
- def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1):
14
- assert input_image.ndim == 3
15
- image_depth = input_image
16
- with torch.no_grad():
17
- image_depth = torch.from_numpy(image_depth).float().cuda()
18
- image_depth = image_depth / 127.5 - 1.0
19
- image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
20
- depth = self.model(image_depth)[0]
21
-
22
- depth_pt = depth.clone()
23
- depth_pt -= torch.min(depth_pt)
24
- depth_pt /= torch.max(depth_pt)
25
- depth_pt = depth_pt.cpu().numpy()
26
- depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8)
27
-
28
- depth_np = depth.cpu().numpy()
29
- x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3)
30
- y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3)
31
- z = np.ones_like(x) * a
32
- x[depth_pt < bg_th] = 0
33
- y[depth_pt < bg_th] = 0
34
- normal = np.stack([x, y, z], axis=2)
35
- normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5
36
- normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
37
-
38
- return depth_image, normal_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/api.py DELETED
@@ -1,169 +0,0 @@
1
- # based on https://github.com/isl-org/MiDaS
2
-
3
- import cv2
4
- import os
5
- import torch
6
- import torch.nn as nn
7
- from torchvision.transforms import Compose
8
-
9
- from .midas.dpt_depth import DPTDepthModel
10
- from .midas.midas_net import MidasNet
11
- from .midas.midas_net_custom import MidasNet_small
12
- from .midas.transforms import Resize, NormalizeImage, PrepareForNet
13
- from annotator.util import annotator_ckpts_path
14
-
15
-
16
- ISL_PATHS = {
17
- "dpt_large": os.path.join(annotator_ckpts_path, "dpt_large-midas-2f21e586.pt"),
18
- "dpt_hybrid": os.path.join(annotator_ckpts_path, "dpt_hybrid-midas-501f0c75.pt"),
19
- "midas_v21": "",
20
- "midas_v21_small": "",
21
- }
22
-
23
- remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
24
-
25
-
26
- def disabled_train(self, mode=True):
27
- """Overwrite model.train with this function to make sure train/eval mode
28
- does not change anymore."""
29
- return self
30
-
31
-
32
- def load_midas_transform(model_type):
33
- # https://github.com/isl-org/MiDaS/blob/master/run.py
34
- # load transform only
35
- if model_type == "dpt_large": # DPT-Large
36
- net_w, net_h = 384, 384
37
- resize_mode = "minimal"
38
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
39
-
40
- elif model_type == "dpt_hybrid": # DPT-Hybrid
41
- net_w, net_h = 384, 384
42
- resize_mode = "minimal"
43
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
44
-
45
- elif model_type == "midas_v21":
46
- net_w, net_h = 384, 384
47
- resize_mode = "upper_bound"
48
- normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
49
-
50
- elif model_type == "midas_v21_small":
51
- net_w, net_h = 256, 256
52
- resize_mode = "upper_bound"
53
- normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
54
-
55
- else:
56
- assert False, f"model_type '{model_type}' not implemented, use: --model_type large"
57
-
58
- transform = Compose(
59
- [
60
- Resize(
61
- net_w,
62
- net_h,
63
- resize_target=None,
64
- keep_aspect_ratio=True,
65
- ensure_multiple_of=32,
66
- resize_method=resize_mode,
67
- image_interpolation_method=cv2.INTER_CUBIC,
68
- ),
69
- normalization,
70
- PrepareForNet(),
71
- ]
72
- )
73
-
74
- return transform
75
-
76
-
77
- def load_model(model_type):
78
- # https://github.com/isl-org/MiDaS/blob/master/run.py
79
- # load network
80
- model_path = ISL_PATHS[model_type]
81
- if model_type == "dpt_large": # DPT-Large
82
- model = DPTDepthModel(
83
- path=model_path,
84
- backbone="vitl16_384",
85
- non_negative=True,
86
- )
87
- net_w, net_h = 384, 384
88
- resize_mode = "minimal"
89
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
90
-
91
- elif model_type == "dpt_hybrid": # DPT-Hybrid
92
- if not os.path.exists(model_path):
93
- from basicsr.utils.download_util import load_file_from_url
94
- load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path)
95
-
96
- model = DPTDepthModel(
97
- path=model_path,
98
- backbone="vitb_rn50_384",
99
- non_negative=True,
100
- )
101
- net_w, net_h = 384, 384
102
- resize_mode = "minimal"
103
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
104
-
105
- elif model_type == "midas_v21":
106
- model = MidasNet(model_path, non_negative=True)
107
- net_w, net_h = 384, 384
108
- resize_mode = "upper_bound"
109
- normalization = NormalizeImage(
110
- mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
111
- )
112
-
113
- elif model_type == "midas_v21_small":
114
- model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True,
115
- non_negative=True, blocks={'expand': True})
116
- net_w, net_h = 256, 256
117
- resize_mode = "upper_bound"
118
- normalization = NormalizeImage(
119
- mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
120
- )
121
-
122
- else:
123
- print(f"model_type '{model_type}' not implemented, use: --model_type large")
124
- assert False
125
-
126
- transform = Compose(
127
- [
128
- Resize(
129
- net_w,
130
- net_h,
131
- resize_target=None,
132
- keep_aspect_ratio=True,
133
- ensure_multiple_of=32,
134
- resize_method=resize_mode,
135
- image_interpolation_method=cv2.INTER_CUBIC,
136
- ),
137
- normalization,
138
- PrepareForNet(),
139
- ]
140
- )
141
-
142
- return model.eval(), transform
143
-
144
-
145
- class MiDaSInference(nn.Module):
146
- MODEL_TYPES_TORCH_HUB = [
147
- "DPT_Large",
148
- "DPT_Hybrid",
149
- "MiDaS_small"
150
- ]
151
- MODEL_TYPES_ISL = [
152
- "dpt_large",
153
- "dpt_hybrid",
154
- "midas_v21",
155
- "midas_v21_small",
156
- ]
157
-
158
- def __init__(self, model_type):
159
- super().__init__()
160
- assert (model_type in self.MODEL_TYPES_ISL)
161
- model, _ = load_model(model_type)
162
- self.model = model
163
- self.model.train = disabled_train
164
-
165
- def forward(self, x):
166
- with torch.no_grad():
167
- prediction = self.model(x)
168
- return prediction
169
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/midas/__init__.py DELETED
File without changes
annotator/midas/midas/base_model.py DELETED
@@ -1,16 +0,0 @@
1
- import torch
2
-
3
-
4
- class BaseModel(torch.nn.Module):
5
- def load(self, path):
6
- """Load model from file.
7
-
8
- Args:
9
- path (str): file path
10
- """
11
- parameters = torch.load(path, map_location=torch.device('cpu'))
12
-
13
- if "optimizer" in parameters:
14
- parameters = parameters["model"]
15
-
16
- self.load_state_dict(parameters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/midas/blocks.py DELETED
@@ -1,342 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
-
4
- from .vit import (
5
- _make_pretrained_vitb_rn50_384,
6
- _make_pretrained_vitl16_384,
7
- _make_pretrained_vitb16_384,
8
- forward_vit,
9
- )
10
-
11
- def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, use_vit_only=False, use_readout="ignore",):
12
- if backbone == "vitl16_384":
13
- pretrained = _make_pretrained_vitl16_384(
14
- use_pretrained, hooks=hooks, use_readout=use_readout
15
- )
16
- scratch = _make_scratch(
17
- [256, 512, 1024, 1024], features, groups=groups, expand=expand
18
- ) # ViT-L/16 - 85.0% Top1 (backbone)
19
- elif backbone == "vitb_rn50_384":
20
- pretrained = _make_pretrained_vitb_rn50_384(
21
- use_pretrained,
22
- hooks=hooks,
23
- use_vit_only=use_vit_only,
24
- use_readout=use_readout,
25
- )
26
- scratch = _make_scratch(
27
- [256, 512, 768, 768], features, groups=groups, expand=expand
28
- ) # ViT-H/16 - 85.0% Top1 (backbone)
29
- elif backbone == "vitb16_384":
30
- pretrained = _make_pretrained_vitb16_384(
31
- use_pretrained, hooks=hooks, use_readout=use_readout
32
- )
33
- scratch = _make_scratch(
34
- [96, 192, 384, 768], features, groups=groups, expand=expand
35
- ) # ViT-B/16 - 84.6% Top1 (backbone)
36
- elif backbone == "resnext101_wsl":
37
- pretrained = _make_pretrained_resnext101_wsl(use_pretrained)
38
- scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3
39
- elif backbone == "efficientnet_lite3":
40
- pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable)
41
- scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3
42
- else:
43
- print(f"Backbone '{backbone}' not implemented")
44
- assert False
45
-
46
- return pretrained, scratch
47
-
48
-
49
- def _make_scratch(in_shape, out_shape, groups=1, expand=False):
50
- scratch = nn.Module()
51
-
52
- out_shape1 = out_shape
53
- out_shape2 = out_shape
54
- out_shape3 = out_shape
55
- out_shape4 = out_shape
56
- if expand==True:
57
- out_shape1 = out_shape
58
- out_shape2 = out_shape*2
59
- out_shape3 = out_shape*4
60
- out_shape4 = out_shape*8
61
-
62
- scratch.layer1_rn = nn.Conv2d(
63
- in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
64
- )
65
- scratch.layer2_rn = nn.Conv2d(
66
- in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
67
- )
68
- scratch.layer3_rn = nn.Conv2d(
69
- in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
70
- )
71
- scratch.layer4_rn = nn.Conv2d(
72
- in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
73
- )
74
-
75
- return scratch
76
-
77
-
78
- def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False):
79
- efficientnet = torch.hub.load(
80
- "rwightman/gen-efficientnet-pytorch",
81
- "tf_efficientnet_lite3",
82
- pretrained=use_pretrained,
83
- exportable=exportable
84
- )
85
- return _make_efficientnet_backbone(efficientnet)
86
-
87
-
88
- def _make_efficientnet_backbone(effnet):
89
- pretrained = nn.Module()
90
-
91
- pretrained.layer1 = nn.Sequential(
92
- effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2]
93
- )
94
- pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3])
95
- pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5])
96
- pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9])
97
-
98
- return pretrained
99
-
100
-
101
- def _make_resnet_backbone(resnet):
102
- pretrained = nn.Module()
103
- pretrained.layer1 = nn.Sequential(
104
- resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1
105
- )
106
-
107
- pretrained.layer2 = resnet.layer2
108
- pretrained.layer3 = resnet.layer3
109
- pretrained.layer4 = resnet.layer4
110
-
111
- return pretrained
112
-
113
-
114
- def _make_pretrained_resnext101_wsl(use_pretrained):
115
- resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")
116
- return _make_resnet_backbone(resnet)
117
-
118
-
119
-
120
- class Interpolate(nn.Module):
121
- """Interpolation module.
122
- """
123
-
124
- def __init__(self, scale_factor, mode, align_corners=False):
125
- """Init.
126
-
127
- Args:
128
- scale_factor (float): scaling
129
- mode (str): interpolation mode
130
- """
131
- super(Interpolate, self).__init__()
132
-
133
- self.interp = nn.functional.interpolate
134
- self.scale_factor = scale_factor
135
- self.mode = mode
136
- self.align_corners = align_corners
137
-
138
- def forward(self, x):
139
- """Forward pass.
140
-
141
- Args:
142
- x (tensor): input
143
-
144
- Returns:
145
- tensor: interpolated data
146
- """
147
-
148
- x = self.interp(
149
- x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners
150
- )
151
-
152
- return x
153
-
154
-
155
- class ResidualConvUnit(nn.Module):
156
- """Residual convolution module.
157
- """
158
-
159
- def __init__(self, features):
160
- """Init.
161
-
162
- Args:
163
- features (int): number of features
164
- """
165
- super().__init__()
166
-
167
- self.conv1 = nn.Conv2d(
168
- features, features, kernel_size=3, stride=1, padding=1, bias=True
169
- )
170
-
171
- self.conv2 = nn.Conv2d(
172
- features, features, kernel_size=3, stride=1, padding=1, bias=True
173
- )
174
-
175
- self.relu = nn.ReLU(inplace=True)
176
-
177
- def forward(self, x):
178
- """Forward pass.
179
-
180
- Args:
181
- x (tensor): input
182
-
183
- Returns:
184
- tensor: output
185
- """
186
- out = self.relu(x)
187
- out = self.conv1(out)
188
- out = self.relu(out)
189
- out = self.conv2(out)
190
-
191
- return out + x
192
-
193
-
194
- class FeatureFusionBlock(nn.Module):
195
- """Feature fusion block.
196
- """
197
-
198
- def __init__(self, features):
199
- """Init.
200
-
201
- Args:
202
- features (int): number of features
203
- """
204
- super(FeatureFusionBlock, self).__init__()
205
-
206
- self.resConfUnit1 = ResidualConvUnit(features)
207
- self.resConfUnit2 = ResidualConvUnit(features)
208
-
209
- def forward(self, *xs):
210
- """Forward pass.
211
-
212
- Returns:
213
- tensor: output
214
- """
215
- output = xs[0]
216
-
217
- if len(xs) == 2:
218
- output += self.resConfUnit1(xs[1])
219
-
220
- output = self.resConfUnit2(output)
221
-
222
- output = nn.functional.interpolate(
223
- output, scale_factor=2, mode="bilinear", align_corners=True
224
- )
225
-
226
- return output
227
-
228
-
229
-
230
-
231
- class ResidualConvUnit_custom(nn.Module):
232
- """Residual convolution module.
233
- """
234
-
235
- def __init__(self, features, activation, bn):
236
- """Init.
237
-
238
- Args:
239
- features (int): number of features
240
- """
241
- super().__init__()
242
-
243
- self.bn = bn
244
-
245
- self.groups=1
246
-
247
- self.conv1 = nn.Conv2d(
248
- features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
249
- )
250
-
251
- self.conv2 = nn.Conv2d(
252
- features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
253
- )
254
-
255
- if self.bn==True:
256
- self.bn1 = nn.BatchNorm2d(features)
257
- self.bn2 = nn.BatchNorm2d(features)
258
-
259
- self.activation = activation
260
-
261
- self.skip_add = nn.quantized.FloatFunctional()
262
-
263
- def forward(self, x):
264
- """Forward pass.
265
-
266
- Args:
267
- x (tensor): input
268
-
269
- Returns:
270
- tensor: output
271
- """
272
-
273
- out = self.activation(x)
274
- out = self.conv1(out)
275
- if self.bn==True:
276
- out = self.bn1(out)
277
-
278
- out = self.activation(out)
279
- out = self.conv2(out)
280
- if self.bn==True:
281
- out = self.bn2(out)
282
-
283
- if self.groups > 1:
284
- out = self.conv_merge(out)
285
-
286
- return self.skip_add.add(out, x)
287
-
288
- # return out + x
289
-
290
-
291
- class FeatureFusionBlock_custom(nn.Module):
292
- """Feature fusion block.
293
- """
294
-
295
- def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True):
296
- """Init.
297
-
298
- Args:
299
- features (int): number of features
300
- """
301
- super(FeatureFusionBlock_custom, self).__init__()
302
-
303
- self.deconv = deconv
304
- self.align_corners = align_corners
305
-
306
- self.groups=1
307
-
308
- self.expand = expand
309
- out_features = features
310
- if self.expand==True:
311
- out_features = features//2
312
-
313
- self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
314
-
315
- self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn)
316
- self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn)
317
-
318
- self.skip_add = nn.quantized.FloatFunctional()
319
-
320
- def forward(self, *xs):
321
- """Forward pass.
322
-
323
- Returns:
324
- tensor: output
325
- """
326
- output = xs[0]
327
-
328
- if len(xs) == 2:
329
- res = self.resConfUnit1(xs[1])
330
- output = self.skip_add.add(output, res)
331
- # output += res
332
-
333
- output = self.resConfUnit2(output)
334
-
335
- output = nn.functional.interpolate(
336
- output, scale_factor=2, mode="bilinear", align_corners=self.align_corners
337
- )
338
-
339
- output = self.out_conv(output)
340
-
341
- return output
342
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/midas/dpt_depth.py DELETED
@@ -1,109 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.nn.functional as F
4
-
5
- from .base_model import BaseModel
6
- from .blocks import (
7
- FeatureFusionBlock,
8
- FeatureFusionBlock_custom,
9
- Interpolate,
10
- _make_encoder,
11
- forward_vit,
12
- )
13
-
14
-
15
- def _make_fusion_block(features, use_bn):
16
- return FeatureFusionBlock_custom(
17
- features,
18
- nn.ReLU(False),
19
- deconv=False,
20
- bn=use_bn,
21
- expand=False,
22
- align_corners=True,
23
- )
24
-
25
-
26
- class DPT(BaseModel):
27
- def __init__(
28
- self,
29
- head,
30
- features=256,
31
- backbone="vitb_rn50_384",
32
- readout="project",
33
- channels_last=False,
34
- use_bn=False,
35
- ):
36
-
37
- super(DPT, self).__init__()
38
-
39
- self.channels_last = channels_last
40
-
41
- hooks = {
42
- "vitb_rn50_384": [0, 1, 8, 11],
43
- "vitb16_384": [2, 5, 8, 11],
44
- "vitl16_384": [5, 11, 17, 23],
45
- }
46
-
47
- # Instantiate backbone and reassemble blocks
48
- self.pretrained, self.scratch = _make_encoder(
49
- backbone,
50
- features,
51
- False, # Set to true of you want to train from scratch, uses ImageNet weights
52
- groups=1,
53
- expand=False,
54
- exportable=False,
55
- hooks=hooks[backbone],
56
- use_readout=readout,
57
- )
58
-
59
- self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
60
- self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
61
- self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
62
- self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
63
-
64
- self.scratch.output_conv = head
65
-
66
-
67
- def forward(self, x):
68
- if self.channels_last == True:
69
- x.contiguous(memory_format=torch.channels_last)
70
-
71
- layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x)
72
-
73
- layer_1_rn = self.scratch.layer1_rn(layer_1)
74
- layer_2_rn = self.scratch.layer2_rn(layer_2)
75
- layer_3_rn = self.scratch.layer3_rn(layer_3)
76
- layer_4_rn = self.scratch.layer4_rn(layer_4)
77
-
78
- path_4 = self.scratch.refinenet4(layer_4_rn)
79
- path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
80
- path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
81
- path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
82
-
83
- out = self.scratch.output_conv(path_1)
84
-
85
- return out
86
-
87
-
88
- class DPTDepthModel(DPT):
89
- def __init__(self, path=None, non_negative=True, **kwargs):
90
- features = kwargs["features"] if "features" in kwargs else 256
91
-
92
- head = nn.Sequential(
93
- nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
94
- Interpolate(scale_factor=2, mode="bilinear", align_corners=True),
95
- nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1),
96
- nn.ReLU(True),
97
- nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
98
- nn.ReLU(True) if non_negative else nn.Identity(),
99
- nn.Identity(),
100
- )
101
-
102
- super().__init__(head, **kwargs)
103
-
104
- if path is not None:
105
- self.load(path)
106
-
107
- def forward(self, x):
108
- return super().forward(x).squeeze(dim=1)
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/midas/midas_net.py DELETED
@@ -1,76 +0,0 @@
1
- """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2
- This file contains code that is adapted from
3
- https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4
- """
5
- import torch
6
- import torch.nn as nn
7
-
8
- from .base_model import BaseModel
9
- from .blocks import FeatureFusionBlock, Interpolate, _make_encoder
10
-
11
-
12
- class MidasNet(BaseModel):
13
- """Network for monocular depth estimation.
14
- """
15
-
16
- def __init__(self, path=None, features=256, non_negative=True):
17
- """Init.
18
-
19
- Args:
20
- path (str, optional): Path to saved model. Defaults to None.
21
- features (int, optional): Number of features. Defaults to 256.
22
- backbone (str, optional): Backbone network for encoder. Defaults to resnet50
23
- """
24
- print("Loading weights: ", path)
25
-
26
- super(MidasNet, self).__init__()
27
-
28
- use_pretrained = False if path is None else True
29
-
30
- self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)
31
-
32
- self.scratch.refinenet4 = FeatureFusionBlock(features)
33
- self.scratch.refinenet3 = FeatureFusionBlock(features)
34
- self.scratch.refinenet2 = FeatureFusionBlock(features)
35
- self.scratch.refinenet1 = FeatureFusionBlock(features)
36
-
37
- self.scratch.output_conv = nn.Sequential(
38
- nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
39
- Interpolate(scale_factor=2, mode="bilinear"),
40
- nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
41
- nn.ReLU(True),
42
- nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
43
- nn.ReLU(True) if non_negative else nn.Identity(),
44
- )
45
-
46
- if path:
47
- self.load(path)
48
-
49
- def forward(self, x):
50
- """Forward pass.
51
-
52
- Args:
53
- x (tensor): input data (image)
54
-
55
- Returns:
56
- tensor: depth
57
- """
58
-
59
- layer_1 = self.pretrained.layer1(x)
60
- layer_2 = self.pretrained.layer2(layer_1)
61
- layer_3 = self.pretrained.layer3(layer_2)
62
- layer_4 = self.pretrained.layer4(layer_3)
63
-
64
- layer_1_rn = self.scratch.layer1_rn(layer_1)
65
- layer_2_rn = self.scratch.layer2_rn(layer_2)
66
- layer_3_rn = self.scratch.layer3_rn(layer_3)
67
- layer_4_rn = self.scratch.layer4_rn(layer_4)
68
-
69
- path_4 = self.scratch.refinenet4(layer_4_rn)
70
- path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
71
- path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
72
- path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
73
-
74
- out = self.scratch.output_conv(path_1)
75
-
76
- return torch.squeeze(out, dim=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/midas/midas_net_custom.py DELETED
@@ -1,128 +0,0 @@
1
- """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2
- This file contains code that is adapted from
3
- https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4
- """
5
- import torch
6
- import torch.nn as nn
7
-
8
- from .base_model import BaseModel
9
- from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder
10
-
11
-
12
- class MidasNet_small(BaseModel):
13
- """Network for monocular depth estimation.
14
- """
15
-
16
- def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True,
17
- blocks={'expand': True}):
18
- """Init.
19
-
20
- Args:
21
- path (str, optional): Path to saved model. Defaults to None.
22
- features (int, optional): Number of features. Defaults to 256.
23
- backbone (str, optional): Backbone network for encoder. Defaults to resnet50
24
- """
25
- print("Loading weights: ", path)
26
-
27
- super(MidasNet_small, self).__init__()
28
-
29
- use_pretrained = False if path else True
30
-
31
- self.channels_last = channels_last
32
- self.blocks = blocks
33
- self.backbone = backbone
34
-
35
- self.groups = 1
36
-
37
- features1=features
38
- features2=features
39
- features3=features
40
- features4=features
41
- self.expand = False
42
- if "expand" in self.blocks and self.blocks['expand'] == True:
43
- self.expand = True
44
- features1=features
45
- features2=features*2
46
- features3=features*4
47
- features4=features*8
48
-
49
- self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable)
50
-
51
- self.scratch.activation = nn.ReLU(False)
52
-
53
- self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
54
- self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
55
- self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
56
- self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners)
57
-
58
-
59
- self.scratch.output_conv = nn.Sequential(
60
- nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups),
61
- Interpolate(scale_factor=2, mode="bilinear"),
62
- nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1),
63
- self.scratch.activation,
64
- nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
65
- nn.ReLU(True) if non_negative else nn.Identity(),
66
- nn.Identity(),
67
- )
68
-
69
- if path:
70
- self.load(path)
71
-
72
-
73
- def forward(self, x):
74
- """Forward pass.
75
-
76
- Args:
77
- x (tensor): input data (image)
78
-
79
- Returns:
80
- tensor: depth
81
- """
82
- if self.channels_last==True:
83
- print("self.channels_last = ", self.channels_last)
84
- x.contiguous(memory_format=torch.channels_last)
85
-
86
-
87
- layer_1 = self.pretrained.layer1(x)
88
- layer_2 = self.pretrained.layer2(layer_1)
89
- layer_3 = self.pretrained.layer3(layer_2)
90
- layer_4 = self.pretrained.layer4(layer_3)
91
-
92
- layer_1_rn = self.scratch.layer1_rn(layer_1)
93
- layer_2_rn = self.scratch.layer2_rn(layer_2)
94
- layer_3_rn = self.scratch.layer3_rn(layer_3)
95
- layer_4_rn = self.scratch.layer4_rn(layer_4)
96
-
97
-
98
- path_4 = self.scratch.refinenet4(layer_4_rn)
99
- path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
100
- path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
101
- path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
102
-
103
- out = self.scratch.output_conv(path_1)
104
-
105
- return torch.squeeze(out, dim=1)
106
-
107
-
108
-
109
- def fuse_model(m):
110
- prev_previous_type = nn.Identity()
111
- prev_previous_name = ''
112
- previous_type = nn.Identity()
113
- previous_name = ''
114
- for name, module in m.named_modules():
115
- if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU:
116
- # print("FUSED ", prev_previous_name, previous_name, name)
117
- torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True)
118
- elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d:
119
- # print("FUSED ", prev_previous_name, previous_name)
120
- torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True)
121
- # elif previous_type == nn.Conv2d and type(module) == nn.ReLU:
122
- # print("FUSED ", previous_name, name)
123
- # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True)
124
-
125
- prev_previous_type = previous_type
126
- prev_previous_name = previous_name
127
- previous_type = type(module)
128
- previous_name = name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/midas/transforms.py DELETED
@@ -1,234 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- import math
4
-
5
-
6
- def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
7
- """Rezise the sample to ensure the given size. Keeps aspect ratio.
8
-
9
- Args:
10
- sample (dict): sample
11
- size (tuple): image size
12
-
13
- Returns:
14
- tuple: new size
15
- """
16
- shape = list(sample["disparity"].shape)
17
-
18
- if shape[0] >= size[0] and shape[1] >= size[1]:
19
- return sample
20
-
21
- scale = [0, 0]
22
- scale[0] = size[0] / shape[0]
23
- scale[1] = size[1] / shape[1]
24
-
25
- scale = max(scale)
26
-
27
- shape[0] = math.ceil(scale * shape[0])
28
- shape[1] = math.ceil(scale * shape[1])
29
-
30
- # resize
31
- sample["image"] = cv2.resize(
32
- sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
33
- )
34
-
35
- sample["disparity"] = cv2.resize(
36
- sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
37
- )
38
- sample["mask"] = cv2.resize(
39
- sample["mask"].astype(np.float32),
40
- tuple(shape[::-1]),
41
- interpolation=cv2.INTER_NEAREST,
42
- )
43
- sample["mask"] = sample["mask"].astype(bool)
44
-
45
- return tuple(shape)
46
-
47
-
48
- class Resize(object):
49
- """Resize sample to given size (width, height).
50
- """
51
-
52
- def __init__(
53
- self,
54
- width,
55
- height,
56
- resize_target=True,
57
- keep_aspect_ratio=False,
58
- ensure_multiple_of=1,
59
- resize_method="lower_bound",
60
- image_interpolation_method=cv2.INTER_AREA,
61
- ):
62
- """Init.
63
-
64
- Args:
65
- width (int): desired output width
66
- height (int): desired output height
67
- resize_target (bool, optional):
68
- True: Resize the full sample (image, mask, target).
69
- False: Resize image only.
70
- Defaults to True.
71
- keep_aspect_ratio (bool, optional):
72
- True: Keep the aspect ratio of the input sample.
73
- Output sample might not have the given width and height, and
74
- resize behaviour depends on the parameter 'resize_method'.
75
- Defaults to False.
76
- ensure_multiple_of (int, optional):
77
- Output width and height is constrained to be multiple of this parameter.
78
- Defaults to 1.
79
- resize_method (str, optional):
80
- "lower_bound": Output will be at least as large as the given size.
81
- "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
82
- "minimal": Scale as least as possible. (Output size might be smaller than given size.)
83
- Defaults to "lower_bound".
84
- """
85
- self.__width = width
86
- self.__height = height
87
-
88
- self.__resize_target = resize_target
89
- self.__keep_aspect_ratio = keep_aspect_ratio
90
- self.__multiple_of = ensure_multiple_of
91
- self.__resize_method = resize_method
92
- self.__image_interpolation_method = image_interpolation_method
93
-
94
- def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
95
- y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
96
-
97
- if max_val is not None and y > max_val:
98
- y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
99
-
100
- if y < min_val:
101
- y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
-
103
- return y
104
-
105
- def get_size(self, width, height):
106
- # determine new height and width
107
- scale_height = self.__height / height
108
- scale_width = self.__width / width
109
-
110
- if self.__keep_aspect_ratio:
111
- if self.__resize_method == "lower_bound":
112
- # scale such that output size is lower bound
113
- if scale_width > scale_height:
114
- # fit width
115
- scale_height = scale_width
116
- else:
117
- # fit height
118
- scale_width = scale_height
119
- elif self.__resize_method == "upper_bound":
120
- # scale such that output size is upper bound
121
- if scale_width < scale_height:
122
- # fit width
123
- scale_height = scale_width
124
- else:
125
- # fit height
126
- scale_width = scale_height
127
- elif self.__resize_method == "minimal":
128
- # scale as least as possbile
129
- if abs(1 - scale_width) < abs(1 - scale_height):
130
- # fit width
131
- scale_height = scale_width
132
- else:
133
- # fit height
134
- scale_width = scale_height
135
- else:
136
- raise ValueError(
137
- f"resize_method {self.__resize_method} not implemented"
138
- )
139
-
140
- if self.__resize_method == "lower_bound":
141
- new_height = self.constrain_to_multiple_of(
142
- scale_height * height, min_val=self.__height
143
- )
144
- new_width = self.constrain_to_multiple_of(
145
- scale_width * width, min_val=self.__width
146
- )
147
- elif self.__resize_method == "upper_bound":
148
- new_height = self.constrain_to_multiple_of(
149
- scale_height * height, max_val=self.__height
150
- )
151
- new_width = self.constrain_to_multiple_of(
152
- scale_width * width, max_val=self.__width
153
- )
154
- elif self.__resize_method == "minimal":
155
- new_height = self.constrain_to_multiple_of(scale_height * height)
156
- new_width = self.constrain_to_multiple_of(scale_width * width)
157
- else:
158
- raise ValueError(f"resize_method {self.__resize_method} not implemented")
159
-
160
- return (new_width, new_height)
161
-
162
- def __call__(self, sample):
163
- width, height = self.get_size(
164
- sample["image"].shape[1], sample["image"].shape[0]
165
- )
166
-
167
- # resize sample
168
- sample["image"] = cv2.resize(
169
- sample["image"],
170
- (width, height),
171
- interpolation=self.__image_interpolation_method,
172
- )
173
-
174
- if self.__resize_target:
175
- if "disparity" in sample:
176
- sample["disparity"] = cv2.resize(
177
- sample["disparity"],
178
- (width, height),
179
- interpolation=cv2.INTER_NEAREST,
180
- )
181
-
182
- if "depth" in sample:
183
- sample["depth"] = cv2.resize(
184
- sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
185
- )
186
-
187
- sample["mask"] = cv2.resize(
188
- sample["mask"].astype(np.float32),
189
- (width, height),
190
- interpolation=cv2.INTER_NEAREST,
191
- )
192
- sample["mask"] = sample["mask"].astype(bool)
193
-
194
- return sample
195
-
196
-
197
- class NormalizeImage(object):
198
- """Normlize image by given mean and std.
199
- """
200
-
201
- def __init__(self, mean, std):
202
- self.__mean = mean
203
- self.__std = std
204
-
205
- def __call__(self, sample):
206
- sample["image"] = (sample["image"] - self.__mean) / self.__std
207
-
208
- return sample
209
-
210
-
211
- class PrepareForNet(object):
212
- """Prepare sample for usage as network input.
213
- """
214
-
215
- def __init__(self):
216
- pass
217
-
218
- def __call__(self, sample):
219
- image = np.transpose(sample["image"], (2, 0, 1))
220
- sample["image"] = np.ascontiguousarray(image).astype(np.float32)
221
-
222
- if "mask" in sample:
223
- sample["mask"] = sample["mask"].astype(np.float32)
224
- sample["mask"] = np.ascontiguousarray(sample["mask"])
225
-
226
- if "disparity" in sample:
227
- disparity = sample["disparity"].astype(np.float32)
228
- sample["disparity"] = np.ascontiguousarray(disparity)
229
-
230
- if "depth" in sample:
231
- depth = sample["depth"].astype(np.float32)
232
- sample["depth"] = np.ascontiguousarray(depth)
233
-
234
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/midas/vit.py DELETED
@@ -1,491 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import timm
4
- import types
5
- import math
6
- import torch.nn.functional as F
7
-
8
-
9
- class Slice(nn.Module):
10
- def __init__(self, start_index=1):
11
- super(Slice, self).__init__()
12
- self.start_index = start_index
13
-
14
- def forward(self, x):
15
- return x[:, self.start_index :]
16
-
17
-
18
- class AddReadout(nn.Module):
19
- def __init__(self, start_index=1):
20
- super(AddReadout, self).__init__()
21
- self.start_index = start_index
22
-
23
- def forward(self, x):
24
- if self.start_index == 2:
25
- readout = (x[:, 0] + x[:, 1]) / 2
26
- else:
27
- readout = x[:, 0]
28
- return x[:, self.start_index :] + readout.unsqueeze(1)
29
-
30
-
31
- class ProjectReadout(nn.Module):
32
- def __init__(self, in_features, start_index=1):
33
- super(ProjectReadout, self).__init__()
34
- self.start_index = start_index
35
-
36
- self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU())
37
-
38
- def forward(self, x):
39
- readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :])
40
- features = torch.cat((x[:, self.start_index :], readout), -1)
41
-
42
- return self.project(features)
43
-
44
-
45
- class Transpose(nn.Module):
46
- def __init__(self, dim0, dim1):
47
- super(Transpose, self).__init__()
48
- self.dim0 = dim0
49
- self.dim1 = dim1
50
-
51
- def forward(self, x):
52
- x = x.transpose(self.dim0, self.dim1)
53
- return x
54
-
55
-
56
- def forward_vit(pretrained, x):
57
- b, c, h, w = x.shape
58
-
59
- glob = pretrained.model.forward_flex(x)
60
-
61
- layer_1 = pretrained.activations["1"]
62
- layer_2 = pretrained.activations["2"]
63
- layer_3 = pretrained.activations["3"]
64
- layer_4 = pretrained.activations["4"]
65
-
66
- layer_1 = pretrained.act_postprocess1[0:2](layer_1)
67
- layer_2 = pretrained.act_postprocess2[0:2](layer_2)
68
- layer_3 = pretrained.act_postprocess3[0:2](layer_3)
69
- layer_4 = pretrained.act_postprocess4[0:2](layer_4)
70
-
71
- unflatten = nn.Sequential(
72
- nn.Unflatten(
73
- 2,
74
- torch.Size(
75
- [
76
- h // pretrained.model.patch_size[1],
77
- w // pretrained.model.patch_size[0],
78
- ]
79
- ),
80
- )
81
- )
82
-
83
- if layer_1.ndim == 3:
84
- layer_1 = unflatten(layer_1)
85
- if layer_2.ndim == 3:
86
- layer_2 = unflatten(layer_2)
87
- if layer_3.ndim == 3:
88
- layer_3 = unflatten(layer_3)
89
- if layer_4.ndim == 3:
90
- layer_4 = unflatten(layer_4)
91
-
92
- layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1)
93
- layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2)
94
- layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3)
95
- layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4)
96
-
97
- return layer_1, layer_2, layer_3, layer_4
98
-
99
-
100
- def _resize_pos_embed(self, posemb, gs_h, gs_w):
101
- posemb_tok, posemb_grid = (
102
- posemb[:, : self.start_index],
103
- posemb[0, self.start_index :],
104
- )
105
-
106
- gs_old = int(math.sqrt(len(posemb_grid)))
107
-
108
- posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
109
- posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear")
110
- posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1)
111
-
112
- posemb = torch.cat([posemb_tok, posemb_grid], dim=1)
113
-
114
- return posemb
115
-
116
-
117
- def forward_flex(self, x):
118
- b, c, h, w = x.shape
119
-
120
- pos_embed = self._resize_pos_embed(
121
- self.pos_embed, h // self.patch_size[1], w // self.patch_size[0]
122
- )
123
-
124
- B = x.shape[0]
125
-
126
- if hasattr(self.patch_embed, "backbone"):
127
- x = self.patch_embed.backbone(x)
128
- if isinstance(x, (list, tuple)):
129
- x = x[-1] # last feature if backbone outputs list/tuple of features
130
-
131
- x = self.patch_embed.proj(x).flatten(2).transpose(1, 2)
132
-
133
- if getattr(self, "dist_token", None) is not None:
134
- cls_tokens = self.cls_token.expand(
135
- B, -1, -1
136
- ) # stole cls_tokens impl from Phil Wang, thanks
137
- dist_token = self.dist_token.expand(B, -1, -1)
138
- x = torch.cat((cls_tokens, dist_token, x), dim=1)
139
- else:
140
- cls_tokens = self.cls_token.expand(
141
- B, -1, -1
142
- ) # stole cls_tokens impl from Phil Wang, thanks
143
- x = torch.cat((cls_tokens, x), dim=1)
144
-
145
- x = x + pos_embed
146
- x = self.pos_drop(x)
147
-
148
- for blk in self.blocks:
149
- x = blk(x)
150
-
151
- x = self.norm(x)
152
-
153
- return x
154
-
155
-
156
- activations = {}
157
-
158
-
159
- def get_activation(name):
160
- def hook(model, input, output):
161
- activations[name] = output
162
-
163
- return hook
164
-
165
-
166
- def get_readout_oper(vit_features, features, use_readout, start_index=1):
167
- if use_readout == "ignore":
168
- readout_oper = [Slice(start_index)] * len(features)
169
- elif use_readout == "add":
170
- readout_oper = [AddReadout(start_index)] * len(features)
171
- elif use_readout == "project":
172
- readout_oper = [
173
- ProjectReadout(vit_features, start_index) for out_feat in features
174
- ]
175
- else:
176
- assert (
177
- False
178
- ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'"
179
-
180
- return readout_oper
181
-
182
-
183
- def _make_vit_b16_backbone(
184
- model,
185
- features=[96, 192, 384, 768],
186
- size=[384, 384],
187
- hooks=[2, 5, 8, 11],
188
- vit_features=768,
189
- use_readout="ignore",
190
- start_index=1,
191
- ):
192
- pretrained = nn.Module()
193
-
194
- pretrained.model = model
195
- pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1"))
196
- pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2"))
197
- pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3"))
198
- pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4"))
199
-
200
- pretrained.activations = activations
201
-
202
- readout_oper = get_readout_oper(vit_features, features, use_readout, start_index)
203
-
204
- # 32, 48, 136, 384
205
- pretrained.act_postprocess1 = nn.Sequential(
206
- readout_oper[0],
207
- Transpose(1, 2),
208
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
209
- nn.Conv2d(
210
- in_channels=vit_features,
211
- out_channels=features[0],
212
- kernel_size=1,
213
- stride=1,
214
- padding=0,
215
- ),
216
- nn.ConvTranspose2d(
217
- in_channels=features[0],
218
- out_channels=features[0],
219
- kernel_size=4,
220
- stride=4,
221
- padding=0,
222
- bias=True,
223
- dilation=1,
224
- groups=1,
225
- ),
226
- )
227
-
228
- pretrained.act_postprocess2 = nn.Sequential(
229
- readout_oper[1],
230
- Transpose(1, 2),
231
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
232
- nn.Conv2d(
233
- in_channels=vit_features,
234
- out_channels=features[1],
235
- kernel_size=1,
236
- stride=1,
237
- padding=0,
238
- ),
239
- nn.ConvTranspose2d(
240
- in_channels=features[1],
241
- out_channels=features[1],
242
- kernel_size=2,
243
- stride=2,
244
- padding=0,
245
- bias=True,
246
- dilation=1,
247
- groups=1,
248
- ),
249
- )
250
-
251
- pretrained.act_postprocess3 = nn.Sequential(
252
- readout_oper[2],
253
- Transpose(1, 2),
254
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
255
- nn.Conv2d(
256
- in_channels=vit_features,
257
- out_channels=features[2],
258
- kernel_size=1,
259
- stride=1,
260
- padding=0,
261
- ),
262
- )
263
-
264
- pretrained.act_postprocess4 = nn.Sequential(
265
- readout_oper[3],
266
- Transpose(1, 2),
267
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
268
- nn.Conv2d(
269
- in_channels=vit_features,
270
- out_channels=features[3],
271
- kernel_size=1,
272
- stride=1,
273
- padding=0,
274
- ),
275
- nn.Conv2d(
276
- in_channels=features[3],
277
- out_channels=features[3],
278
- kernel_size=3,
279
- stride=2,
280
- padding=1,
281
- ),
282
- )
283
-
284
- pretrained.model.start_index = start_index
285
- pretrained.model.patch_size = [16, 16]
286
-
287
- # We inject this function into the VisionTransformer instances so that
288
- # we can use it with interpolated position embeddings without modifying the library source.
289
- pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model)
290
- pretrained.model._resize_pos_embed = types.MethodType(
291
- _resize_pos_embed, pretrained.model
292
- )
293
-
294
- return pretrained
295
-
296
-
297
- def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=None):
298
- model = timm.create_model("vit_large_patch16_384", pretrained=pretrained)
299
-
300
- hooks = [5, 11, 17, 23] if hooks == None else hooks
301
- return _make_vit_b16_backbone(
302
- model,
303
- features=[256, 512, 1024, 1024],
304
- hooks=hooks,
305
- vit_features=1024,
306
- use_readout=use_readout,
307
- )
308
-
309
-
310
- def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=None):
311
- model = timm.create_model("vit_base_patch16_384", pretrained=pretrained)
312
-
313
- hooks = [2, 5, 8, 11] if hooks == None else hooks
314
- return _make_vit_b16_backbone(
315
- model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout
316
- )
317
-
318
-
319
- def _make_pretrained_deitb16_384(pretrained, use_readout="ignore", hooks=None):
320
- model = timm.create_model("vit_deit_base_patch16_384", pretrained=pretrained)
321
-
322
- hooks = [2, 5, 8, 11] if hooks == None else hooks
323
- return _make_vit_b16_backbone(
324
- model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout
325
- )
326
-
327
-
328
- def _make_pretrained_deitb16_distil_384(pretrained, use_readout="ignore", hooks=None):
329
- model = timm.create_model(
330
- "vit_deit_base_distilled_patch16_384", pretrained=pretrained
331
- )
332
-
333
- hooks = [2, 5, 8, 11] if hooks == None else hooks
334
- return _make_vit_b16_backbone(
335
- model,
336
- features=[96, 192, 384, 768],
337
- hooks=hooks,
338
- use_readout=use_readout,
339
- start_index=2,
340
- )
341
-
342
-
343
- def _make_vit_b_rn50_backbone(
344
- model,
345
- features=[256, 512, 768, 768],
346
- size=[384, 384],
347
- hooks=[0, 1, 8, 11],
348
- vit_features=768,
349
- use_vit_only=False,
350
- use_readout="ignore",
351
- start_index=1,
352
- ):
353
- pretrained = nn.Module()
354
-
355
- pretrained.model = model
356
-
357
- if use_vit_only == True:
358
- pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1"))
359
- pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2"))
360
- else:
361
- pretrained.model.patch_embed.backbone.stages[0].register_forward_hook(
362
- get_activation("1")
363
- )
364
- pretrained.model.patch_embed.backbone.stages[1].register_forward_hook(
365
- get_activation("2")
366
- )
367
-
368
- pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3"))
369
- pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4"))
370
-
371
- pretrained.activations = activations
372
-
373
- readout_oper = get_readout_oper(vit_features, features, use_readout, start_index)
374
-
375
- if use_vit_only == True:
376
- pretrained.act_postprocess1 = nn.Sequential(
377
- readout_oper[0],
378
- Transpose(1, 2),
379
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
380
- nn.Conv2d(
381
- in_channels=vit_features,
382
- out_channels=features[0],
383
- kernel_size=1,
384
- stride=1,
385
- padding=0,
386
- ),
387
- nn.ConvTranspose2d(
388
- in_channels=features[0],
389
- out_channels=features[0],
390
- kernel_size=4,
391
- stride=4,
392
- padding=0,
393
- bias=True,
394
- dilation=1,
395
- groups=1,
396
- ),
397
- )
398
-
399
- pretrained.act_postprocess2 = nn.Sequential(
400
- readout_oper[1],
401
- Transpose(1, 2),
402
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
403
- nn.Conv2d(
404
- in_channels=vit_features,
405
- out_channels=features[1],
406
- kernel_size=1,
407
- stride=1,
408
- padding=0,
409
- ),
410
- nn.ConvTranspose2d(
411
- in_channels=features[1],
412
- out_channels=features[1],
413
- kernel_size=2,
414
- stride=2,
415
- padding=0,
416
- bias=True,
417
- dilation=1,
418
- groups=1,
419
- ),
420
- )
421
- else:
422
- pretrained.act_postprocess1 = nn.Sequential(
423
- nn.Identity(), nn.Identity(), nn.Identity()
424
- )
425
- pretrained.act_postprocess2 = nn.Sequential(
426
- nn.Identity(), nn.Identity(), nn.Identity()
427
- )
428
-
429
- pretrained.act_postprocess3 = nn.Sequential(
430
- readout_oper[2],
431
- Transpose(1, 2),
432
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
433
- nn.Conv2d(
434
- in_channels=vit_features,
435
- out_channels=features[2],
436
- kernel_size=1,
437
- stride=1,
438
- padding=0,
439
- ),
440
- )
441
-
442
- pretrained.act_postprocess4 = nn.Sequential(
443
- readout_oper[3],
444
- Transpose(1, 2),
445
- nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])),
446
- nn.Conv2d(
447
- in_channels=vit_features,
448
- out_channels=features[3],
449
- kernel_size=1,
450
- stride=1,
451
- padding=0,
452
- ),
453
- nn.Conv2d(
454
- in_channels=features[3],
455
- out_channels=features[3],
456
- kernel_size=3,
457
- stride=2,
458
- padding=1,
459
- ),
460
- )
461
-
462
- pretrained.model.start_index = start_index
463
- pretrained.model.patch_size = [16, 16]
464
-
465
- # We inject this function into the VisionTransformer instances so that
466
- # we can use it with interpolated position embeddings without modifying the library source.
467
- pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model)
468
-
469
- # We inject this function into the VisionTransformer instances so that
470
- # we can use it with interpolated position embeddings without modifying the library source.
471
- pretrained.model._resize_pos_embed = types.MethodType(
472
- _resize_pos_embed, pretrained.model
473
- )
474
-
475
- return pretrained
476
-
477
-
478
- def _make_pretrained_vitb_rn50_384(
479
- pretrained, use_readout="ignore", hooks=None, use_vit_only=False
480
- ):
481
- model = timm.create_model("vit_base_resnet50_384", pretrained=pretrained)
482
-
483
- hooks = [0, 1, 8, 11] if hooks == None else hooks
484
- return _make_vit_b_rn50_backbone(
485
- model,
486
- features=[256, 512, 768, 768],
487
- size=[384, 384],
488
- hooks=hooks,
489
- use_vit_only=use_vit_only,
490
- use_readout=use_readout,
491
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/midas/utils.py DELETED
@@ -1,189 +0,0 @@
1
- """Utils for monoDepth."""
2
- import sys
3
- import re
4
- import numpy as np
5
- import cv2
6
- import torch
7
-
8
-
9
- def read_pfm(path):
10
- """Read pfm file.
11
-
12
- Args:
13
- path (str): path to file
14
-
15
- Returns:
16
- tuple: (data, scale)
17
- """
18
- with open(path, "rb") as file:
19
-
20
- color = None
21
- width = None
22
- height = None
23
- scale = None
24
- endian = None
25
-
26
- header = file.readline().rstrip()
27
- if header.decode("ascii") == "PF":
28
- color = True
29
- elif header.decode("ascii") == "Pf":
30
- color = False
31
- else:
32
- raise Exception("Not a PFM file: " + path)
33
-
34
- dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii"))
35
- if dim_match:
36
- width, height = list(map(int, dim_match.groups()))
37
- else:
38
- raise Exception("Malformed PFM header.")
39
-
40
- scale = float(file.readline().decode("ascii").rstrip())
41
- if scale < 0:
42
- # little-endian
43
- endian = "<"
44
- scale = -scale
45
- else:
46
- # big-endian
47
- endian = ">"
48
-
49
- data = np.fromfile(file, endian + "f")
50
- shape = (height, width, 3) if color else (height, width)
51
-
52
- data = np.reshape(data, shape)
53
- data = np.flipud(data)
54
-
55
- return data, scale
56
-
57
-
58
- def write_pfm(path, image, scale=1):
59
- """Write pfm file.
60
-
61
- Args:
62
- path (str): pathto file
63
- image (array): data
64
- scale (int, optional): Scale. Defaults to 1.
65
- """
66
-
67
- with open(path, "wb") as file:
68
- color = None
69
-
70
- if image.dtype.name != "float32":
71
- raise Exception("Image dtype must be float32.")
72
-
73
- image = np.flipud(image)
74
-
75
- if len(image.shape) == 3 and image.shape[2] == 3: # color image
76
- color = True
77
- elif (
78
- len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1
79
- ): # greyscale
80
- color = False
81
- else:
82
- raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.")
83
-
84
- file.write("PF\n" if color else "Pf\n".encode())
85
- file.write("%d %d\n".encode() % (image.shape[1], image.shape[0]))
86
-
87
- endian = image.dtype.byteorder
88
-
89
- if endian == "<" or endian == "=" and sys.byteorder == "little":
90
- scale = -scale
91
-
92
- file.write("%f\n".encode() % scale)
93
-
94
- image.tofile(file)
95
-
96
-
97
- def read_image(path):
98
- """Read image and output RGB image (0-1).
99
-
100
- Args:
101
- path (str): path to file
102
-
103
- Returns:
104
- array: RGB image (0-1)
105
- """
106
- img = cv2.imread(path)
107
-
108
- if img.ndim == 2:
109
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
110
-
111
- img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
112
-
113
- return img
114
-
115
-
116
- def resize_image(img):
117
- """Resize image and make it fit for network.
118
-
119
- Args:
120
- img (array): image
121
-
122
- Returns:
123
- tensor: data ready for network
124
- """
125
- height_orig = img.shape[0]
126
- width_orig = img.shape[1]
127
-
128
- if width_orig > height_orig:
129
- scale = width_orig / 384
130
- else:
131
- scale = height_orig / 384
132
-
133
- height = (np.ceil(height_orig / scale / 32) * 32).astype(int)
134
- width = (np.ceil(width_orig / scale / 32) * 32).astype(int)
135
-
136
- img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA)
137
-
138
- img_resized = (
139
- torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float()
140
- )
141
- img_resized = img_resized.unsqueeze(0)
142
-
143
- return img_resized
144
-
145
-
146
- def resize_depth(depth, width, height):
147
- """Resize depth map and bring to CPU (numpy).
148
-
149
- Args:
150
- depth (tensor): depth
151
- width (int): image width
152
- height (int): image height
153
-
154
- Returns:
155
- array: processed depth
156
- """
157
- depth = torch.squeeze(depth[0, :, :, :]).to("cpu")
158
-
159
- depth_resized = cv2.resize(
160
- depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC
161
- )
162
-
163
- return depth_resized
164
-
165
- def write_depth(path, depth, bits=1):
166
- """Write depth map to pfm and png file.
167
-
168
- Args:
169
- path (str): filepath without extension
170
- depth (array): depth
171
- """
172
- write_pfm(path + ".pfm", depth.astype(np.float32))
173
-
174
- depth_min = depth.min()
175
- depth_max = depth.max()
176
-
177
- max_val = (2**(8*bits))-1
178
-
179
- if depth_max - depth_min > np.finfo("float").eps:
180
- out = max_val * (depth - depth_min) / (depth_max - depth_min)
181
- else:
182
- out = np.zeros(depth.shape, dtype=depth.type)
183
-
184
- if bits == 1:
185
- cv2.imwrite(path + ".png", out.astype("uint8"))
186
- elif bits == 2:
187
- cv2.imwrite(path + ".png", out.astype("uint16"))
188
-
189
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/mlsd/__init__.py DELETED
@@ -1,39 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- import torch
4
- import os
5
-
6
- from einops import rearrange
7
- from .models.mbv2_mlsd_tiny import MobileV2_MLSD_Tiny
8
- from .models.mbv2_mlsd_large import MobileV2_MLSD_Large
9
- from .utils import pred_lines
10
-
11
- from annotator.util import annotator_ckpts_path
12
-
13
-
14
- remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/mlsd_large_512_fp32.pth"
15
-
16
-
17
- class MLSDdetector:
18
- def __init__(self):
19
- model_path = os.path.join(annotator_ckpts_path, "mlsd_large_512_fp32.pth")
20
- if not os.path.exists(model_path):
21
- from basicsr.utils.download_util import load_file_from_url
22
- load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path)
23
- model = MobileV2_MLSD_Large()
24
- model.load_state_dict(torch.load(model_path), strict=True)
25
- self.model = model.cuda().eval()
26
-
27
- def __call__(self, input_image, thr_v, thr_d):
28
- assert input_image.ndim == 3
29
- img = input_image
30
- img_output = np.zeros_like(img)
31
- try:
32
- with torch.no_grad():
33
- lines = pred_lines(img, self.model, [img.shape[0], img.shape[1]], thr_v, thr_d)
34
- for line in lines:
35
- x_start, y_start, x_end, y_end = [int(val) for val in line]
36
- cv2.line(img_output, (x_start, y_start), (x_end, y_end), [255, 255, 255], 1)
37
- except Exception as e:
38
- pass
39
- return img_output[:, :, 0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/mlsd/models/mbv2_mlsd_large.py DELETED
@@ -1,292 +0,0 @@
1
- import os
2
- import sys
3
- import torch
4
- import torch.nn as nn
5
- import torch.utils.model_zoo as model_zoo
6
- from torch.nn import functional as F
7
-
8
-
9
- class BlockTypeA(nn.Module):
10
- def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True):
11
- super(BlockTypeA, self).__init__()
12
- self.conv1 = nn.Sequential(
13
- nn.Conv2d(in_c2, out_c2, kernel_size=1),
14
- nn.BatchNorm2d(out_c2),
15
- nn.ReLU(inplace=True)
16
- )
17
- self.conv2 = nn.Sequential(
18
- nn.Conv2d(in_c1, out_c1, kernel_size=1),
19
- nn.BatchNorm2d(out_c1),
20
- nn.ReLU(inplace=True)
21
- )
22
- self.upscale = upscale
23
-
24
- def forward(self, a, b):
25
- b = self.conv1(b)
26
- a = self.conv2(a)
27
- if self.upscale:
28
- b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True)
29
- return torch.cat((a, b), dim=1)
30
-
31
-
32
- class BlockTypeB(nn.Module):
33
- def __init__(self, in_c, out_c):
34
- super(BlockTypeB, self).__init__()
35
- self.conv1 = nn.Sequential(
36
- nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
37
- nn.BatchNorm2d(in_c),
38
- nn.ReLU()
39
- )
40
- self.conv2 = nn.Sequential(
41
- nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
42
- nn.BatchNorm2d(out_c),
43
- nn.ReLU()
44
- )
45
-
46
- def forward(self, x):
47
- x = self.conv1(x) + x
48
- x = self.conv2(x)
49
- return x
50
-
51
- class BlockTypeC(nn.Module):
52
- def __init__(self, in_c, out_c):
53
- super(BlockTypeC, self).__init__()
54
- self.conv1 = nn.Sequential(
55
- nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5),
56
- nn.BatchNorm2d(in_c),
57
- nn.ReLU()
58
- )
59
- self.conv2 = nn.Sequential(
60
- nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
61
- nn.BatchNorm2d(in_c),
62
- nn.ReLU()
63
- )
64
- self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1)
65
-
66
- def forward(self, x):
67
- x = self.conv1(x)
68
- x = self.conv2(x)
69
- x = self.conv3(x)
70
- return x
71
-
72
- def _make_divisible(v, divisor, min_value=None):
73
- """
74
- This function is taken from the original tf repo.
75
- It ensures that all layers have a channel number that is divisible by 8
76
- It can be seen here:
77
- https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
78
- :param v:
79
- :param divisor:
80
- :param min_value:
81
- :return:
82
- """
83
- if min_value is None:
84
- min_value = divisor
85
- new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
86
- # Make sure that round down does not go down by more than 10%.
87
- if new_v < 0.9 * v:
88
- new_v += divisor
89
- return new_v
90
-
91
-
92
- class ConvBNReLU(nn.Sequential):
93
- def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
94
- self.channel_pad = out_planes - in_planes
95
- self.stride = stride
96
- #padding = (kernel_size - 1) // 2
97
-
98
- # TFLite uses slightly different padding than PyTorch
99
- if stride == 2:
100
- padding = 0
101
- else:
102
- padding = (kernel_size - 1) // 2
103
-
104
- super(ConvBNReLU, self).__init__(
105
- nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
106
- nn.BatchNorm2d(out_planes),
107
- nn.ReLU6(inplace=True)
108
- )
109
- self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
110
-
111
-
112
- def forward(self, x):
113
- # TFLite uses different padding
114
- if self.stride == 2:
115
- x = F.pad(x, (0, 1, 0, 1), "constant", 0)
116
- #print(x.shape)
117
-
118
- for module in self:
119
- if not isinstance(module, nn.MaxPool2d):
120
- x = module(x)
121
- return x
122
-
123
-
124
- class InvertedResidual(nn.Module):
125
- def __init__(self, inp, oup, stride, expand_ratio):
126
- super(InvertedResidual, self).__init__()
127
- self.stride = stride
128
- assert stride in [1, 2]
129
-
130
- hidden_dim = int(round(inp * expand_ratio))
131
- self.use_res_connect = self.stride == 1 and inp == oup
132
-
133
- layers = []
134
- if expand_ratio != 1:
135
- # pw
136
- layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
137
- layers.extend([
138
- # dw
139
- ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
140
- # pw-linear
141
- nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
142
- nn.BatchNorm2d(oup),
143
- ])
144
- self.conv = nn.Sequential(*layers)
145
-
146
- def forward(self, x):
147
- if self.use_res_connect:
148
- return x + self.conv(x)
149
- else:
150
- return self.conv(x)
151
-
152
-
153
- class MobileNetV2(nn.Module):
154
- def __init__(self, pretrained=True):
155
- """
156
- MobileNet V2 main class
157
- Args:
158
- num_classes (int): Number of classes
159
- width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
160
- inverted_residual_setting: Network structure
161
- round_nearest (int): Round the number of channels in each layer to be a multiple of this number
162
- Set to 1 to turn off rounding
163
- block: Module specifying inverted residual building block for mobilenet
164
- """
165
- super(MobileNetV2, self).__init__()
166
-
167
- block = InvertedResidual
168
- input_channel = 32
169
- last_channel = 1280
170
- width_mult = 1.0
171
- round_nearest = 8
172
-
173
- inverted_residual_setting = [
174
- # t, c, n, s
175
- [1, 16, 1, 1],
176
- [6, 24, 2, 2],
177
- [6, 32, 3, 2],
178
- [6, 64, 4, 2],
179
- [6, 96, 3, 1],
180
- #[6, 160, 3, 2],
181
- #[6, 320, 1, 1],
182
- ]
183
-
184
- # only check the first element, assuming user knows t,c,n,s are required
185
- if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
186
- raise ValueError("inverted_residual_setting should be non-empty "
187
- "or a 4-element list, got {}".format(inverted_residual_setting))
188
-
189
- # building first layer
190
- input_channel = _make_divisible(input_channel * width_mult, round_nearest)
191
- self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
192
- features = [ConvBNReLU(4, input_channel, stride=2)]
193
- # building inverted residual blocks
194
- for t, c, n, s in inverted_residual_setting:
195
- output_channel = _make_divisible(c * width_mult, round_nearest)
196
- for i in range(n):
197
- stride = s if i == 0 else 1
198
- features.append(block(input_channel, output_channel, stride, expand_ratio=t))
199
- input_channel = output_channel
200
-
201
- self.features = nn.Sequential(*features)
202
- self.fpn_selected = [1, 3, 6, 10, 13]
203
- # weight initialization
204
- for m in self.modules():
205
- if isinstance(m, nn.Conv2d):
206
- nn.init.kaiming_normal_(m.weight, mode='fan_out')
207
- if m.bias is not None:
208
- nn.init.zeros_(m.bias)
209
- elif isinstance(m, nn.BatchNorm2d):
210
- nn.init.ones_(m.weight)
211
- nn.init.zeros_(m.bias)
212
- elif isinstance(m, nn.Linear):
213
- nn.init.normal_(m.weight, 0, 0.01)
214
- nn.init.zeros_(m.bias)
215
- if pretrained:
216
- self._load_pretrained_model()
217
-
218
- def _forward_impl(self, x):
219
- # This exists since TorchScript doesn't support inheritance, so the superclass method
220
- # (this one) needs to have a name other than `forward` that can be accessed in a subclass
221
- fpn_features = []
222
- for i, f in enumerate(self.features):
223
- if i > self.fpn_selected[-1]:
224
- break
225
- x = f(x)
226
- if i in self.fpn_selected:
227
- fpn_features.append(x)
228
-
229
- c1, c2, c3, c4, c5 = fpn_features
230
- return c1, c2, c3, c4, c5
231
-
232
-
233
- def forward(self, x):
234
- return self._forward_impl(x)
235
-
236
- def _load_pretrained_model(self):
237
- pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth')
238
- model_dict = {}
239
- state_dict = self.state_dict()
240
- for k, v in pretrain_dict.items():
241
- if k in state_dict:
242
- model_dict[k] = v
243
- state_dict.update(model_dict)
244
- self.load_state_dict(state_dict)
245
-
246
-
247
- class MobileV2_MLSD_Large(nn.Module):
248
- def __init__(self):
249
- super(MobileV2_MLSD_Large, self).__init__()
250
-
251
- self.backbone = MobileNetV2(pretrained=False)
252
- ## A, B
253
- self.block15 = BlockTypeA(in_c1= 64, in_c2= 96,
254
- out_c1= 64, out_c2=64,
255
- upscale=False)
256
- self.block16 = BlockTypeB(128, 64)
257
-
258
- ## A, B
259
- self.block17 = BlockTypeA(in_c1 = 32, in_c2 = 64,
260
- out_c1= 64, out_c2= 64)
261
- self.block18 = BlockTypeB(128, 64)
262
-
263
- ## A, B
264
- self.block19 = BlockTypeA(in_c1=24, in_c2=64,
265
- out_c1=64, out_c2=64)
266
- self.block20 = BlockTypeB(128, 64)
267
-
268
- ## A, B, C
269
- self.block21 = BlockTypeA(in_c1=16, in_c2=64,
270
- out_c1=64, out_c2=64)
271
- self.block22 = BlockTypeB(128, 64)
272
-
273
- self.block23 = BlockTypeC(64, 16)
274
-
275
- def forward(self, x):
276
- c1, c2, c3, c4, c5 = self.backbone(x)
277
-
278
- x = self.block15(c4, c5)
279
- x = self.block16(x)
280
-
281
- x = self.block17(c3, x)
282
- x = self.block18(x)
283
-
284
- x = self.block19(c2, x)
285
- x = self.block20(x)
286
-
287
- x = self.block21(c1, x)
288
- x = self.block22(x)
289
- x = self.block23(x)
290
- x = x[:, 7:, :, :]
291
-
292
- return x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/mlsd/models/mbv2_mlsd_tiny.py DELETED
@@ -1,275 +0,0 @@
1
- import os
2
- import sys
3
- import torch
4
- import torch.nn as nn
5
- import torch.utils.model_zoo as model_zoo
6
- from torch.nn import functional as F
7
-
8
-
9
- class BlockTypeA(nn.Module):
10
- def __init__(self, in_c1, in_c2, out_c1, out_c2, upscale = True):
11
- super(BlockTypeA, self).__init__()
12
- self.conv1 = nn.Sequential(
13
- nn.Conv2d(in_c2, out_c2, kernel_size=1),
14
- nn.BatchNorm2d(out_c2),
15
- nn.ReLU(inplace=True)
16
- )
17
- self.conv2 = nn.Sequential(
18
- nn.Conv2d(in_c1, out_c1, kernel_size=1),
19
- nn.BatchNorm2d(out_c1),
20
- nn.ReLU(inplace=True)
21
- )
22
- self.upscale = upscale
23
-
24
- def forward(self, a, b):
25
- b = self.conv1(b)
26
- a = self.conv2(a)
27
- b = F.interpolate(b, scale_factor=2.0, mode='bilinear', align_corners=True)
28
- return torch.cat((a, b), dim=1)
29
-
30
-
31
- class BlockTypeB(nn.Module):
32
- def __init__(self, in_c, out_c):
33
- super(BlockTypeB, self).__init__()
34
- self.conv1 = nn.Sequential(
35
- nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
36
- nn.BatchNorm2d(in_c),
37
- nn.ReLU()
38
- )
39
- self.conv2 = nn.Sequential(
40
- nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
41
- nn.BatchNorm2d(out_c),
42
- nn.ReLU()
43
- )
44
-
45
- def forward(self, x):
46
- x = self.conv1(x) + x
47
- x = self.conv2(x)
48
- return x
49
-
50
- class BlockTypeC(nn.Module):
51
- def __init__(self, in_c, out_c):
52
- super(BlockTypeC, self).__init__()
53
- self.conv1 = nn.Sequential(
54
- nn.Conv2d(in_c, in_c, kernel_size=3, padding=5, dilation=5),
55
- nn.BatchNorm2d(in_c),
56
- nn.ReLU()
57
- )
58
- self.conv2 = nn.Sequential(
59
- nn.Conv2d(in_c, in_c, kernel_size=3, padding=1),
60
- nn.BatchNorm2d(in_c),
61
- nn.ReLU()
62
- )
63
- self.conv3 = nn.Conv2d(in_c, out_c, kernel_size=1)
64
-
65
- def forward(self, x):
66
- x = self.conv1(x)
67
- x = self.conv2(x)
68
- x = self.conv3(x)
69
- return x
70
-
71
- def _make_divisible(v, divisor, min_value=None):
72
- """
73
- This function is taken from the original tf repo.
74
- It ensures that all layers have a channel number that is divisible by 8
75
- It can be seen here:
76
- https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
77
- :param v:
78
- :param divisor:
79
- :param min_value:
80
- :return:
81
- """
82
- if min_value is None:
83
- min_value = divisor
84
- new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
85
- # Make sure that round down does not go down by more than 10%.
86
- if new_v < 0.9 * v:
87
- new_v += divisor
88
- return new_v
89
-
90
-
91
- class ConvBNReLU(nn.Sequential):
92
- def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
93
- self.channel_pad = out_planes - in_planes
94
- self.stride = stride
95
- #padding = (kernel_size - 1) // 2
96
-
97
- # TFLite uses slightly different padding than PyTorch
98
- if stride == 2:
99
- padding = 0
100
- else:
101
- padding = (kernel_size - 1) // 2
102
-
103
- super(ConvBNReLU, self).__init__(
104
- nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
105
- nn.BatchNorm2d(out_planes),
106
- nn.ReLU6(inplace=True)
107
- )
108
- self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
109
-
110
-
111
- def forward(self, x):
112
- # TFLite uses different padding
113
- if self.stride == 2:
114
- x = F.pad(x, (0, 1, 0, 1), "constant", 0)
115
- #print(x.shape)
116
-
117
- for module in self:
118
- if not isinstance(module, nn.MaxPool2d):
119
- x = module(x)
120
- return x
121
-
122
-
123
- class InvertedResidual(nn.Module):
124
- def __init__(self, inp, oup, stride, expand_ratio):
125
- super(InvertedResidual, self).__init__()
126
- self.stride = stride
127
- assert stride in [1, 2]
128
-
129
- hidden_dim = int(round(inp * expand_ratio))
130
- self.use_res_connect = self.stride == 1 and inp == oup
131
-
132
- layers = []
133
- if expand_ratio != 1:
134
- # pw
135
- layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
136
- layers.extend([
137
- # dw
138
- ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
139
- # pw-linear
140
- nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
141
- nn.BatchNorm2d(oup),
142
- ])
143
- self.conv = nn.Sequential(*layers)
144
-
145
- def forward(self, x):
146
- if self.use_res_connect:
147
- return x + self.conv(x)
148
- else:
149
- return self.conv(x)
150
-
151
-
152
- class MobileNetV2(nn.Module):
153
- def __init__(self, pretrained=True):
154
- """
155
- MobileNet V2 main class
156
- Args:
157
- num_classes (int): Number of classes
158
- width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
159
- inverted_residual_setting: Network structure
160
- round_nearest (int): Round the number of channels in each layer to be a multiple of this number
161
- Set to 1 to turn off rounding
162
- block: Module specifying inverted residual building block for mobilenet
163
- """
164
- super(MobileNetV2, self).__init__()
165
-
166
- block = InvertedResidual
167
- input_channel = 32
168
- last_channel = 1280
169
- width_mult = 1.0
170
- round_nearest = 8
171
-
172
- inverted_residual_setting = [
173
- # t, c, n, s
174
- [1, 16, 1, 1],
175
- [6, 24, 2, 2],
176
- [6, 32, 3, 2],
177
- [6, 64, 4, 2],
178
- #[6, 96, 3, 1],
179
- #[6, 160, 3, 2],
180
- #[6, 320, 1, 1],
181
- ]
182
-
183
- # only check the first element, assuming user knows t,c,n,s are required
184
- if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
185
- raise ValueError("inverted_residual_setting should be non-empty "
186
- "or a 4-element list, got {}".format(inverted_residual_setting))
187
-
188
- # building first layer
189
- input_channel = _make_divisible(input_channel * width_mult, round_nearest)
190
- self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
191
- features = [ConvBNReLU(4, input_channel, stride=2)]
192
- # building inverted residual blocks
193
- for t, c, n, s in inverted_residual_setting:
194
- output_channel = _make_divisible(c * width_mult, round_nearest)
195
- for i in range(n):
196
- stride = s if i == 0 else 1
197
- features.append(block(input_channel, output_channel, stride, expand_ratio=t))
198
- input_channel = output_channel
199
- self.features = nn.Sequential(*features)
200
-
201
- self.fpn_selected = [3, 6, 10]
202
- # weight initialization
203
- for m in self.modules():
204
- if isinstance(m, nn.Conv2d):
205
- nn.init.kaiming_normal_(m.weight, mode='fan_out')
206
- if m.bias is not None:
207
- nn.init.zeros_(m.bias)
208
- elif isinstance(m, nn.BatchNorm2d):
209
- nn.init.ones_(m.weight)
210
- nn.init.zeros_(m.bias)
211
- elif isinstance(m, nn.Linear):
212
- nn.init.normal_(m.weight, 0, 0.01)
213
- nn.init.zeros_(m.bias)
214
-
215
- #if pretrained:
216
- # self._load_pretrained_model()
217
-
218
- def _forward_impl(self, x):
219
- # This exists since TorchScript doesn't support inheritance, so the superclass method
220
- # (this one) needs to have a name other than `forward` that can be accessed in a subclass
221
- fpn_features = []
222
- for i, f in enumerate(self.features):
223
- if i > self.fpn_selected[-1]:
224
- break
225
- x = f(x)
226
- if i in self.fpn_selected:
227
- fpn_features.append(x)
228
-
229
- c2, c3, c4 = fpn_features
230
- return c2, c3, c4
231
-
232
-
233
- def forward(self, x):
234
- return self._forward_impl(x)
235
-
236
- def _load_pretrained_model(self):
237
- pretrain_dict = model_zoo.load_url('https://download.pytorch.org/models/mobilenet_v2-b0353104.pth')
238
- model_dict = {}
239
- state_dict = self.state_dict()
240
- for k, v in pretrain_dict.items():
241
- if k in state_dict:
242
- model_dict[k] = v
243
- state_dict.update(model_dict)
244
- self.load_state_dict(state_dict)
245
-
246
-
247
- class MobileV2_MLSD_Tiny(nn.Module):
248
- def __init__(self):
249
- super(MobileV2_MLSD_Tiny, self).__init__()
250
-
251
- self.backbone = MobileNetV2(pretrained=True)
252
-
253
- self.block12 = BlockTypeA(in_c1= 32, in_c2= 64,
254
- out_c1= 64, out_c2=64)
255
- self.block13 = BlockTypeB(128, 64)
256
-
257
- self.block14 = BlockTypeA(in_c1 = 24, in_c2 = 64,
258
- out_c1= 32, out_c2= 32)
259
- self.block15 = BlockTypeB(64, 64)
260
-
261
- self.block16 = BlockTypeC(64, 16)
262
-
263
- def forward(self, x):
264
- c2, c3, c4 = self.backbone(x)
265
-
266
- x = self.block12(c3, c4)
267
- x = self.block13(x)
268
- x = self.block14(c2, x)
269
- x = self.block15(x)
270
- x = self.block16(x)
271
- x = x[:, 7:, :, :]
272
- #print(x.shape)
273
- x = F.interpolate(x, scale_factor=2.0, mode='bilinear', align_corners=True)
274
-
275
- return x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/mlsd/utils.py DELETED
@@ -1,580 +0,0 @@
1
- '''
2
- modified by lihaoweicv
3
- pytorch version
4
- '''
5
-
6
- '''
7
- M-LSD
8
- Copyright 2021-present NAVER Corp.
9
- Apache License v2.0
10
- '''
11
-
12
- import os
13
- import numpy as np
14
- import cv2
15
- import torch
16
- from torch.nn import functional as F
17
-
18
-
19
- def deccode_output_score_and_ptss(tpMap, topk_n = 200, ksize = 5):
20
- '''
21
- tpMap:
22
- center: tpMap[1, 0, :, :]
23
- displacement: tpMap[1, 1:5, :, :]
24
- '''
25
- b, c, h, w = tpMap.shape
26
- assert b==1, 'only support bsize==1'
27
- displacement = tpMap[:, 1:5, :, :][0]
28
- center = tpMap[:, 0, :, :]
29
- heat = torch.sigmoid(center)
30
- hmax = F.max_pool2d( heat, (ksize, ksize), stride=1, padding=(ksize-1)//2)
31
- keep = (hmax == heat).float()
32
- heat = heat * keep
33
- heat = heat.reshape(-1, )
34
-
35
- scores, indices = torch.topk(heat, topk_n, dim=-1, largest=True)
36
- yy = torch.floor_divide(indices, w).unsqueeze(-1)
37
- xx = torch.fmod(indices, w).unsqueeze(-1)
38
- ptss = torch.cat((yy, xx),dim=-1)
39
-
40
- ptss = ptss.detach().cpu().numpy()
41
- scores = scores.detach().cpu().numpy()
42
- displacement = displacement.detach().cpu().numpy()
43
- displacement = displacement.transpose((1,2,0))
44
- return ptss, scores, displacement
45
-
46
-
47
- def pred_lines(image, model,
48
- input_shape=[512, 512],
49
- score_thr=0.10,
50
- dist_thr=20.0):
51
- h, w, _ = image.shape
52
- h_ratio, w_ratio = [h / input_shape[0], w / input_shape[1]]
53
-
54
- resized_image = np.concatenate([cv2.resize(image, (input_shape[1], input_shape[0]), interpolation=cv2.INTER_AREA),
55
- np.ones([input_shape[0], input_shape[1], 1])], axis=-1)
56
-
57
- resized_image = resized_image.transpose((2,0,1))
58
- batch_image = np.expand_dims(resized_image, axis=0).astype('float32')
59
- batch_image = (batch_image / 127.5) - 1.0
60
-
61
- batch_image = torch.from_numpy(batch_image).float().cuda()
62
- outputs = model(batch_image)
63
- pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)
64
- start = vmap[:, :, :2]
65
- end = vmap[:, :, 2:]
66
- dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1))
67
-
68
- segments_list = []
69
- for center, score in zip(pts, pts_score):
70
- y, x = center
71
- distance = dist_map[y, x]
72
- if score > score_thr and distance > dist_thr:
73
- disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :]
74
- x_start = x + disp_x_start
75
- y_start = y + disp_y_start
76
- x_end = x + disp_x_end
77
- y_end = y + disp_y_end
78
- segments_list.append([x_start, y_start, x_end, y_end])
79
-
80
- lines = 2 * np.array(segments_list) # 256 > 512
81
- lines[:, 0] = lines[:, 0] * w_ratio
82
- lines[:, 1] = lines[:, 1] * h_ratio
83
- lines[:, 2] = lines[:, 2] * w_ratio
84
- lines[:, 3] = lines[:, 3] * h_ratio
85
-
86
- return lines
87
-
88
-
89
- def pred_squares(image,
90
- model,
91
- input_shape=[512, 512],
92
- params={'score': 0.06,
93
- 'outside_ratio': 0.28,
94
- 'inside_ratio': 0.45,
95
- 'w_overlap': 0.0,
96
- 'w_degree': 1.95,
97
- 'w_length': 0.0,
98
- 'w_area': 1.86,
99
- 'w_center': 0.14}):
100
- '''
101
- shape = [height, width]
102
- '''
103
- h, w, _ = image.shape
104
- original_shape = [h, w]
105
-
106
- resized_image = np.concatenate([cv2.resize(image, (input_shape[0], input_shape[1]), interpolation=cv2.INTER_AREA),
107
- np.ones([input_shape[0], input_shape[1], 1])], axis=-1)
108
- resized_image = resized_image.transpose((2, 0, 1))
109
- batch_image = np.expand_dims(resized_image, axis=0).astype('float32')
110
- batch_image = (batch_image / 127.5) - 1.0
111
-
112
- batch_image = torch.from_numpy(batch_image).float().cuda()
113
- outputs = model(batch_image)
114
-
115
- pts, pts_score, vmap = deccode_output_score_and_ptss(outputs, 200, 3)
116
- start = vmap[:, :, :2] # (x, y)
117
- end = vmap[:, :, 2:] # (x, y)
118
- dist_map = np.sqrt(np.sum((start - end) ** 2, axis=-1))
119
-
120
- junc_list = []
121
- segments_list = []
122
- for junc, score in zip(pts, pts_score):
123
- y, x = junc
124
- distance = dist_map[y, x]
125
- if score > params['score'] and distance > 20.0:
126
- junc_list.append([x, y])
127
- disp_x_start, disp_y_start, disp_x_end, disp_y_end = vmap[y, x, :]
128
- d_arrow = 1.0
129
- x_start = x + d_arrow * disp_x_start
130
- y_start = y + d_arrow * disp_y_start
131
- x_end = x + d_arrow * disp_x_end
132
- y_end = y + d_arrow * disp_y_end
133
- segments_list.append([x_start, y_start, x_end, y_end])
134
-
135
- segments = np.array(segments_list)
136
-
137
- ####### post processing for squares
138
- # 1. get unique lines
139
- point = np.array([[0, 0]])
140
- point = point[0]
141
- start = segments[:, :2]
142
- end = segments[:, 2:]
143
- diff = start - end
144
- a = diff[:, 1]
145
- b = -diff[:, 0]
146
- c = a * start[:, 0] + b * start[:, 1]
147
-
148
- d = np.abs(a * point[0] + b * point[1] - c) / np.sqrt(a ** 2 + b ** 2 + 1e-10)
149
- theta = np.arctan2(diff[:, 0], diff[:, 1]) * 180 / np.pi
150
- theta[theta < 0.0] += 180
151
- hough = np.concatenate([d[:, None], theta[:, None]], axis=-1)
152
-
153
- d_quant = 1
154
- theta_quant = 2
155
- hough[:, 0] //= d_quant
156
- hough[:, 1] //= theta_quant
157
- _, indices, counts = np.unique(hough, axis=0, return_index=True, return_counts=True)
158
-
159
- acc_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='float32')
160
- idx_map = np.zeros([512 // d_quant + 1, 360 // theta_quant + 1], dtype='int32') - 1
161
- yx_indices = hough[indices, :].astype('int32')
162
- acc_map[yx_indices[:, 0], yx_indices[:, 1]] = counts
163
- idx_map[yx_indices[:, 0], yx_indices[:, 1]] = indices
164
-
165
- acc_map_np = acc_map
166
- # acc_map = acc_map[None, :, :, None]
167
- #
168
- # ### fast suppression using tensorflow op
169
- # acc_map = tf.constant(acc_map, dtype=tf.float32)
170
- # max_acc_map = tf.keras.layers.MaxPool2D(pool_size=(5, 5), strides=1, padding='same')(acc_map)
171
- # acc_map = acc_map * tf.cast(tf.math.equal(acc_map, max_acc_map), tf.float32)
172
- # flatten_acc_map = tf.reshape(acc_map, [1, -1])
173
- # topk_values, topk_indices = tf.math.top_k(flatten_acc_map, k=len(pts))
174
- # _, h, w, _ = acc_map.shape
175
- # y = tf.expand_dims(topk_indices // w, axis=-1)
176
- # x = tf.expand_dims(topk_indices % w, axis=-1)
177
- # yx = tf.concat([y, x], axis=-1)
178
-
179
- ### fast suppression using pytorch op
180
- acc_map = torch.from_numpy(acc_map_np).unsqueeze(0).unsqueeze(0)
181
- _,_, h, w = acc_map.shape
182
- max_acc_map = F.max_pool2d(acc_map,kernel_size=5, stride=1, padding=2)
183
- acc_map = acc_map * ( (acc_map == max_acc_map).float() )
184
- flatten_acc_map = acc_map.reshape([-1, ])
185
-
186
- scores, indices = torch.topk(flatten_acc_map, len(pts), dim=-1, largest=True)
187
- yy = torch.div(indices, w, rounding_mode='floor').unsqueeze(-1)
188
- xx = torch.fmod(indices, w).unsqueeze(-1)
189
- yx = torch.cat((yy, xx), dim=-1)
190
-
191
- yx = yx.detach().cpu().numpy()
192
-
193
- topk_values = scores.detach().cpu().numpy()
194
- indices = idx_map[yx[:, 0], yx[:, 1]]
195
- basis = 5 // 2
196
-
197
- merged_segments = []
198
- for yx_pt, max_indice, value in zip(yx, indices, topk_values):
199
- y, x = yx_pt
200
- if max_indice == -1 or value == 0:
201
- continue
202
- segment_list = []
203
- for y_offset in range(-basis, basis + 1):
204
- for x_offset in range(-basis, basis + 1):
205
- indice = idx_map[y + y_offset, x + x_offset]
206
- cnt = int(acc_map_np[y + y_offset, x + x_offset])
207
- if indice != -1:
208
- segment_list.append(segments[indice])
209
- if cnt > 1:
210
- check_cnt = 1
211
- current_hough = hough[indice]
212
- for new_indice, new_hough in enumerate(hough):
213
- if (current_hough == new_hough).all() and indice != new_indice:
214
- segment_list.append(segments[new_indice])
215
- check_cnt += 1
216
- if check_cnt == cnt:
217
- break
218
- group_segments = np.array(segment_list).reshape([-1, 2])
219
- sorted_group_segments = np.sort(group_segments, axis=0)
220
- x_min, y_min = sorted_group_segments[0, :]
221
- x_max, y_max = sorted_group_segments[-1, :]
222
-
223
- deg = theta[max_indice]
224
- if deg >= 90:
225
- merged_segments.append([x_min, y_max, x_max, y_min])
226
- else:
227
- merged_segments.append([x_min, y_min, x_max, y_max])
228
-
229
- # 2. get intersections
230
- new_segments = np.array(merged_segments) # (x1, y1, x2, y2)
231
- start = new_segments[:, :2] # (x1, y1)
232
- end = new_segments[:, 2:] # (x2, y2)
233
- new_centers = (start + end) / 2.0
234
- diff = start - end
235
- dist_segments = np.sqrt(np.sum(diff ** 2, axis=-1))
236
-
237
- # ax + by = c
238
- a = diff[:, 1]
239
- b = -diff[:, 0]
240
- c = a * start[:, 0] + b * start[:, 1]
241
- pre_det = a[:, None] * b[None, :]
242
- det = pre_det - np.transpose(pre_det)
243
-
244
- pre_inter_y = a[:, None] * c[None, :]
245
- inter_y = (pre_inter_y - np.transpose(pre_inter_y)) / (det + 1e-10)
246
- pre_inter_x = c[:, None] * b[None, :]
247
- inter_x = (pre_inter_x - np.transpose(pre_inter_x)) / (det + 1e-10)
248
- inter_pts = np.concatenate([inter_x[:, :, None], inter_y[:, :, None]], axis=-1).astype('int32')
249
-
250
- # 3. get corner information
251
- # 3.1 get distance
252
- '''
253
- dist_segments:
254
- | dist(0), dist(1), dist(2), ...|
255
- dist_inter_to_segment1:
256
- | dist(inter,0), dist(inter,0), dist(inter,0), ... |
257
- | dist(inter,1), dist(inter,1), dist(inter,1), ... |
258
- ...
259
- dist_inter_to_semgnet2:
260
- | dist(inter,0), dist(inter,1), dist(inter,2), ... |
261
- | dist(inter,0), dist(inter,1), dist(inter,2), ... |
262
- ...
263
- '''
264
-
265
- dist_inter_to_segment1_start = np.sqrt(
266
- np.sum(((inter_pts - start[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
267
- dist_inter_to_segment1_end = np.sqrt(
268
- np.sum(((inter_pts - end[:, None, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
269
- dist_inter_to_segment2_start = np.sqrt(
270
- np.sum(((inter_pts - start[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
271
- dist_inter_to_segment2_end = np.sqrt(
272
- np.sum(((inter_pts - end[None, :, :]) ** 2), axis=-1, keepdims=True)) # [n_batch, n_batch, 1]
273
-
274
- # sort ascending
275
- dist_inter_to_segment1 = np.sort(
276
- np.concatenate([dist_inter_to_segment1_start, dist_inter_to_segment1_end], axis=-1),
277
- axis=-1) # [n_batch, n_batch, 2]
278
- dist_inter_to_segment2 = np.sort(
279
- np.concatenate([dist_inter_to_segment2_start, dist_inter_to_segment2_end], axis=-1),
280
- axis=-1) # [n_batch, n_batch, 2]
281
-
282
- # 3.2 get degree
283
- inter_to_start = new_centers[:, None, :] - inter_pts
284
- deg_inter_to_start = np.arctan2(inter_to_start[:, :, 1], inter_to_start[:, :, 0]) * 180 / np.pi
285
- deg_inter_to_start[deg_inter_to_start < 0.0] += 360
286
- inter_to_end = new_centers[None, :, :] - inter_pts
287
- deg_inter_to_end = np.arctan2(inter_to_end[:, :, 1], inter_to_end[:, :, 0]) * 180 / np.pi
288
- deg_inter_to_end[deg_inter_to_end < 0.0] += 360
289
-
290
- '''
291
- B -- G
292
- | |
293
- C -- R
294
- B : blue / G: green / C: cyan / R: red
295
-
296
- 0 -- 1
297
- | |
298
- 3 -- 2
299
- '''
300
- # rename variables
301
- deg1_map, deg2_map = deg_inter_to_start, deg_inter_to_end
302
- # sort deg ascending
303
- deg_sort = np.sort(np.concatenate([deg1_map[:, :, None], deg2_map[:, :, None]], axis=-1), axis=-1)
304
-
305
- deg_diff_map = np.abs(deg1_map - deg2_map)
306
- # we only consider the smallest degree of intersect
307
- deg_diff_map[deg_diff_map > 180] = 360 - deg_diff_map[deg_diff_map > 180]
308
-
309
- # define available degree range
310
- deg_range = [60, 120]
311
-
312
- corner_dict = {corner_info: [] for corner_info in range(4)}
313
- inter_points = []
314
- for i in range(inter_pts.shape[0]):
315
- for j in range(i + 1, inter_pts.shape[1]):
316
- # i, j > line index, always i < j
317
- x, y = inter_pts[i, j, :]
318
- deg1, deg2 = deg_sort[i, j, :]
319
- deg_diff = deg_diff_map[i, j]
320
-
321
- check_degree = deg_diff > deg_range[0] and deg_diff < deg_range[1]
322
-
323
- outside_ratio = params['outside_ratio'] # over ratio >>> drop it!
324
- inside_ratio = params['inside_ratio'] # over ratio >>> drop it!
325
- check_distance = ((dist_inter_to_segment1[i, j, 1] >= dist_segments[i] and \
326
- dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * outside_ratio) or \
327
- (dist_inter_to_segment1[i, j, 1] <= dist_segments[i] and \
328
- dist_inter_to_segment1[i, j, 0] <= dist_segments[i] * inside_ratio)) and \
329
- ((dist_inter_to_segment2[i, j, 1] >= dist_segments[j] and \
330
- dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * outside_ratio) or \
331
- (dist_inter_to_segment2[i, j, 1] <= dist_segments[j] and \
332
- dist_inter_to_segment2[i, j, 0] <= dist_segments[j] * inside_ratio))
333
-
334
- if check_degree and check_distance:
335
- corner_info = None
336
-
337
- if (deg1 >= 0 and deg1 <= 45 and deg2 >= 45 and deg2 <= 120) or \
338
- (deg2 >= 315 and deg1 >= 45 and deg1 <= 120):
339
- corner_info, color_info = 0, 'blue'
340
- elif (deg1 >= 45 and deg1 <= 125 and deg2 >= 125 and deg2 <= 225):
341
- corner_info, color_info = 1, 'green'
342
- elif (deg1 >= 125 and deg1 <= 225 and deg2 >= 225 and deg2 <= 315):
343
- corner_info, color_info = 2, 'black'
344
- elif (deg1 >= 0 and deg1 <= 45 and deg2 >= 225 and deg2 <= 315) or \
345
- (deg2 >= 315 and deg1 >= 225 and deg1 <= 315):
346
- corner_info, color_info = 3, 'cyan'
347
- else:
348
- corner_info, color_info = 4, 'red' # we don't use it
349
- continue
350
-
351
- corner_dict[corner_info].append([x, y, i, j])
352
- inter_points.append([x, y])
353
-
354
- square_list = []
355
- connect_list = []
356
- segments_list = []
357
- for corner0 in corner_dict[0]:
358
- for corner1 in corner_dict[1]:
359
- connect01 = False
360
- for corner0_line in corner0[2:]:
361
- if corner0_line in corner1[2:]:
362
- connect01 = True
363
- break
364
- if connect01:
365
- for corner2 in corner_dict[2]:
366
- connect12 = False
367
- for corner1_line in corner1[2:]:
368
- if corner1_line in corner2[2:]:
369
- connect12 = True
370
- break
371
- if connect12:
372
- for corner3 in corner_dict[3]:
373
- connect23 = False
374
- for corner2_line in corner2[2:]:
375
- if corner2_line in corner3[2:]:
376
- connect23 = True
377
- break
378
- if connect23:
379
- for corner3_line in corner3[2:]:
380
- if corner3_line in corner0[2:]:
381
- # SQUARE!!!
382
- '''
383
- 0 -- 1
384
- | |
385
- 3 -- 2
386
- square_list:
387
- order: 0 > 1 > 2 > 3
388
- | x0, y0, x1, y1, x2, y2, x3, y3 |
389
- | x0, y0, x1, y1, x2, y2, x3, y3 |
390
- ...
391
- connect_list:
392
- order: 01 > 12 > 23 > 30
393
- | line_idx01, line_idx12, line_idx23, line_idx30 |
394
- | line_idx01, line_idx12, line_idx23, line_idx30 |
395
- ...
396
- segments_list:
397
- order: 0 > 1 > 2 > 3
398
- | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j |
399
- | line_idx0_i, line_idx0_j, line_idx1_i, line_idx1_j, line_idx2_i, line_idx2_j, line_idx3_i, line_idx3_j |
400
- ...
401
- '''
402
- square_list.append(corner0[:2] + corner1[:2] + corner2[:2] + corner3[:2])
403
- connect_list.append([corner0_line, corner1_line, corner2_line, corner3_line])
404
- segments_list.append(corner0[2:] + corner1[2:] + corner2[2:] + corner3[2:])
405
-
406
- def check_outside_inside(segments_info, connect_idx):
407
- # return 'outside or inside', min distance, cover_param, peri_param
408
- if connect_idx == segments_info[0]:
409
- check_dist_mat = dist_inter_to_segment1
410
- else:
411
- check_dist_mat = dist_inter_to_segment2
412
-
413
- i, j = segments_info
414
- min_dist, max_dist = check_dist_mat[i, j, :]
415
- connect_dist = dist_segments[connect_idx]
416
- if max_dist > connect_dist:
417
- return 'outside', min_dist, 0, 1
418
- else:
419
- return 'inside', min_dist, -1, -1
420
-
421
- top_square = None
422
-
423
- try:
424
- map_size = input_shape[0] / 2
425
- squares = np.array(square_list).reshape([-1, 4, 2])
426
- score_array = []
427
- connect_array = np.array(connect_list)
428
- segments_array = np.array(segments_list).reshape([-1, 4, 2])
429
-
430
- # get degree of corners:
431
- squares_rollup = np.roll(squares, 1, axis=1)
432
- squares_rolldown = np.roll(squares, -1, axis=1)
433
- vec1 = squares_rollup - squares
434
- normalized_vec1 = vec1 / (np.linalg.norm(vec1, axis=-1, keepdims=True) + 1e-10)
435
- vec2 = squares_rolldown - squares
436
- normalized_vec2 = vec2 / (np.linalg.norm(vec2, axis=-1, keepdims=True) + 1e-10)
437
- inner_products = np.sum(normalized_vec1 * normalized_vec2, axis=-1) # [n_squares, 4]
438
- squares_degree = np.arccos(inner_products) * 180 / np.pi # [n_squares, 4]
439
-
440
- # get square score
441
- overlap_scores = []
442
- degree_scores = []
443
- length_scores = []
444
-
445
- for connects, segments, square, degree in zip(connect_array, segments_array, squares, squares_degree):
446
- '''
447
- 0 -- 1
448
- | |
449
- 3 -- 2
450
-
451
- # segments: [4, 2]
452
- # connects: [4]
453
- '''
454
-
455
- ###################################### OVERLAP SCORES
456
- cover = 0
457
- perimeter = 0
458
- # check 0 > 1 > 2 > 3
459
- square_length = []
460
-
461
- for start_idx in range(4):
462
- end_idx = (start_idx + 1) % 4
463
-
464
- connect_idx = connects[start_idx] # segment idx of segment01
465
- start_segments = segments[start_idx]
466
- end_segments = segments[end_idx]
467
-
468
- start_point = square[start_idx]
469
- end_point = square[end_idx]
470
-
471
- # check whether outside or inside
472
- start_position, start_min, start_cover_param, start_peri_param = check_outside_inside(start_segments,
473
- connect_idx)
474
- end_position, end_min, end_cover_param, end_peri_param = check_outside_inside(end_segments, connect_idx)
475
-
476
- cover += dist_segments[connect_idx] + start_cover_param * start_min + end_cover_param * end_min
477
- perimeter += dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min
478
-
479
- square_length.append(
480
- dist_segments[connect_idx] + start_peri_param * start_min + end_peri_param * end_min)
481
-
482
- overlap_scores.append(cover / perimeter)
483
- ######################################
484
- ###################################### DEGREE SCORES
485
- '''
486
- deg0 vs deg2
487
- deg1 vs deg3
488
- '''
489
- deg0, deg1, deg2, deg3 = degree
490
- deg_ratio1 = deg0 / deg2
491
- if deg_ratio1 > 1.0:
492
- deg_ratio1 = 1 / deg_ratio1
493
- deg_ratio2 = deg1 / deg3
494
- if deg_ratio2 > 1.0:
495
- deg_ratio2 = 1 / deg_ratio2
496
- degree_scores.append((deg_ratio1 + deg_ratio2) / 2)
497
- ######################################
498
- ###################################### LENGTH SCORES
499
- '''
500
- len0 vs len2
501
- len1 vs len3
502
- '''
503
- len0, len1, len2, len3 = square_length
504
- len_ratio1 = len0 / len2 if len2 > len0 else len2 / len0
505
- len_ratio2 = len1 / len3 if len3 > len1 else len3 / len1
506
- length_scores.append((len_ratio1 + len_ratio2) / 2)
507
-
508
- ######################################
509
-
510
- overlap_scores = np.array(overlap_scores)
511
- overlap_scores /= np.max(overlap_scores)
512
-
513
- degree_scores = np.array(degree_scores)
514
- # degree_scores /= np.max(degree_scores)
515
-
516
- length_scores = np.array(length_scores)
517
-
518
- ###################################### AREA SCORES
519
- area_scores = np.reshape(squares, [-1, 4, 2])
520
- area_x = area_scores[:, :, 0]
521
- area_y = area_scores[:, :, 1]
522
- correction = area_x[:, -1] * area_y[:, 0] - area_y[:, -1] * area_x[:, 0]
523
- area_scores = np.sum(area_x[:, :-1] * area_y[:, 1:], axis=-1) - np.sum(area_y[:, :-1] * area_x[:, 1:], axis=-1)
524
- area_scores = 0.5 * np.abs(area_scores + correction)
525
- area_scores /= (map_size * map_size) # np.max(area_scores)
526
- ######################################
527
-
528
- ###################################### CENTER SCORES
529
- centers = np.array([[256 // 2, 256 // 2]], dtype='float32') # [1, 2]
530
- # squares: [n, 4, 2]
531
- square_centers = np.mean(squares, axis=1) # [n, 2]
532
- center2center = np.sqrt(np.sum((centers - square_centers) ** 2))
533
- center_scores = center2center / (map_size / np.sqrt(2.0))
534
-
535
- '''
536
- score_w = [overlap, degree, area, center, length]
537
- '''
538
- score_w = [0.0, 1.0, 10.0, 0.5, 1.0]
539
- score_array = params['w_overlap'] * overlap_scores \
540
- + params['w_degree'] * degree_scores \
541
- + params['w_area'] * area_scores \
542
- - params['w_center'] * center_scores \
543
- + params['w_length'] * length_scores
544
-
545
- best_square = []
546
-
547
- sorted_idx = np.argsort(score_array)[::-1]
548
- score_array = score_array[sorted_idx]
549
- squares = squares[sorted_idx]
550
-
551
- except Exception as e:
552
- pass
553
-
554
- '''return list
555
- merged_lines, squares, scores
556
- '''
557
-
558
- try:
559
- new_segments[:, 0] = new_segments[:, 0] * 2 / input_shape[1] * original_shape[1]
560
- new_segments[:, 1] = new_segments[:, 1] * 2 / input_shape[0] * original_shape[0]
561
- new_segments[:, 2] = new_segments[:, 2] * 2 / input_shape[1] * original_shape[1]
562
- new_segments[:, 3] = new_segments[:, 3] * 2 / input_shape[0] * original_shape[0]
563
- except:
564
- new_segments = []
565
-
566
- try:
567
- squares[:, :, 0] = squares[:, :, 0] * 2 / input_shape[1] * original_shape[1]
568
- squares[:, :, 1] = squares[:, :, 1] * 2 / input_shape[0] * original_shape[0]
569
- except:
570
- squares = []
571
- score_array = []
572
-
573
- try:
574
- inter_points = np.array(inter_points)
575
- inter_points[:, 0] = inter_points[:, 0] * 2 / input_shape[1] * original_shape[1]
576
- inter_points[:, 1] = inter_points[:, 1] * 2 / input_shape[0] * original_shape[0]
577
- except:
578
- inter_points = []
579
-
580
- return new_segments, squares, score_array, inter_points
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/openpose/__init__.py DELETED
@@ -1,44 +0,0 @@
1
- import os
2
- os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
3
-
4
- import torch
5
- import numpy as np
6
- from . import util
7
- from .body import Body
8
- from .hand import Hand
9
- from annotator.util import annotator_ckpts_path
10
-
11
-
12
- body_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/body_pose_model.pth"
13
- hand_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/hand_pose_model.pth"
14
-
15
-
16
- class OpenposeDetector:
17
- def __init__(self):
18
- body_modelpath = os.path.join(annotator_ckpts_path, "body_pose_model.pth")
19
- hand_modelpath = os.path.join(annotator_ckpts_path, "hand_pose_model.pth")
20
-
21
- if not os.path.exists(hand_modelpath):
22
- from basicsr.utils.download_util import load_file_from_url
23
- load_file_from_url(body_model_path, model_dir=annotator_ckpts_path)
24
- load_file_from_url(hand_model_path, model_dir=annotator_ckpts_path)
25
-
26
- self.body_estimation = Body(body_modelpath)
27
- self.hand_estimation = Hand(hand_modelpath)
28
-
29
- def __call__(self, oriImg, hand=False):
30
- oriImg = oriImg[:, :, ::-1].copy()
31
- with torch.no_grad():
32
- candidate, subset = self.body_estimation(oriImg)
33
- canvas = np.zeros_like(oriImg)
34
- canvas = util.draw_bodypose(canvas, candidate, subset)
35
- if hand:
36
- hands_list = util.handDetect(candidate, subset, oriImg)
37
- all_hand_peaks = []
38
- for x, y, w, is_left in hands_list:
39
- peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :])
40
- peaks[:, 0] = np.where(peaks[:, 0] == 0, peaks[:, 0], peaks[:, 0] + x)
41
- peaks[:, 1] = np.where(peaks[:, 1] == 0, peaks[:, 1], peaks[:, 1] + y)
42
- all_hand_peaks.append(peaks)
43
- canvas = util.draw_handpose(canvas, all_hand_peaks)
44
- return canvas, dict(candidate=candidate.tolist(), subset=subset.tolist())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/openpose/body.py DELETED
@@ -1,219 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- import math
4
- import time
5
- from scipy.ndimage.filters import gaussian_filter
6
- import matplotlib.pyplot as plt
7
- import matplotlib
8
- import torch
9
- from torchvision import transforms
10
-
11
- from . import util
12
- from .model import bodypose_model
13
-
14
- class Body(object):
15
- def __init__(self, model_path):
16
- self.model = bodypose_model()
17
- if torch.cuda.is_available():
18
- self.model = self.model.cuda()
19
- print('cuda')
20
- model_dict = util.transfer(self.model, torch.load(model_path))
21
- self.model.load_state_dict(model_dict)
22
- self.model.eval()
23
-
24
- def __call__(self, oriImg):
25
- # scale_search = [0.5, 1.0, 1.5, 2.0]
26
- scale_search = [0.5]
27
- boxsize = 368
28
- stride = 8
29
- padValue = 128
30
- thre1 = 0.1
31
- thre2 = 0.05
32
- multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
33
- heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
34
- paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
35
-
36
- for m in range(len(multiplier)):
37
- scale = multiplier[m]
38
- imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
39
- imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
40
- im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
41
- im = np.ascontiguousarray(im)
42
-
43
- data = torch.from_numpy(im).float()
44
- if torch.cuda.is_available():
45
- data = data.cuda()
46
- # data = data.permute([2, 0, 1]).unsqueeze(0).float()
47
- with torch.no_grad():
48
- Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
49
- Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
50
- Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
51
-
52
- # extract outputs, resize, and remove padding
53
- # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
54
- heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
55
- heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
56
- heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
57
- heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
58
-
59
- # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
60
- paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
61
- paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
62
- paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
63
- paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
64
-
65
- heatmap_avg += heatmap_avg + heatmap / len(multiplier)
66
- paf_avg += + paf / len(multiplier)
67
-
68
- all_peaks = []
69
- peak_counter = 0
70
-
71
- for part in range(18):
72
- map_ori = heatmap_avg[:, :, part]
73
- one_heatmap = gaussian_filter(map_ori, sigma=3)
74
-
75
- map_left = np.zeros(one_heatmap.shape)
76
- map_left[1:, :] = one_heatmap[:-1, :]
77
- map_right = np.zeros(one_heatmap.shape)
78
- map_right[:-1, :] = one_heatmap[1:, :]
79
- map_up = np.zeros(one_heatmap.shape)
80
- map_up[:, 1:] = one_heatmap[:, :-1]
81
- map_down = np.zeros(one_heatmap.shape)
82
- map_down[:, :-1] = one_heatmap[:, 1:]
83
-
84
- peaks_binary = np.logical_and.reduce(
85
- (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
86
- peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
87
- peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
88
- peak_id = range(peak_counter, peak_counter + len(peaks))
89
- peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
90
-
91
- all_peaks.append(peaks_with_score_and_id)
92
- peak_counter += len(peaks)
93
-
94
- # find connection in the specified sequence, center 29 is in the position 15
95
- limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
96
- [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
97
- [1, 16], [16, 18], [3, 17], [6, 18]]
98
- # the middle joints heatmap correpondence
99
- mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
100
- [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
101
- [55, 56], [37, 38], [45, 46]]
102
-
103
- connection_all = []
104
- special_k = []
105
- mid_num = 10
106
-
107
- for k in range(len(mapIdx)):
108
- score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
109
- candA = all_peaks[limbSeq[k][0] - 1]
110
- candB = all_peaks[limbSeq[k][1] - 1]
111
- nA = len(candA)
112
- nB = len(candB)
113
- indexA, indexB = limbSeq[k]
114
- if (nA != 0 and nB != 0):
115
- connection_candidate = []
116
- for i in range(nA):
117
- for j in range(nB):
118
- vec = np.subtract(candB[j][:2], candA[i][:2])
119
- norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
120
- norm = max(0.001, norm)
121
- vec = np.divide(vec, norm)
122
-
123
- startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
124
- np.linspace(candA[i][1], candB[j][1], num=mid_num)))
125
-
126
- vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
127
- for I in range(len(startend))])
128
- vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
129
- for I in range(len(startend))])
130
-
131
- score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
132
- score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
133
- 0.5 * oriImg.shape[0] / norm - 1, 0)
134
- criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
135
- criterion2 = score_with_dist_prior > 0
136
- if criterion1 and criterion2:
137
- connection_candidate.append(
138
- [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
139
-
140
- connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
141
- connection = np.zeros((0, 5))
142
- for c in range(len(connection_candidate)):
143
- i, j, s = connection_candidate[c][0:3]
144
- if (i not in connection[:, 3] and j not in connection[:, 4]):
145
- connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
146
- if (len(connection) >= min(nA, nB)):
147
- break
148
-
149
- connection_all.append(connection)
150
- else:
151
- special_k.append(k)
152
- connection_all.append([])
153
-
154
- # last number in each row is the total parts number of that person
155
- # the second last number in each row is the score of the overall configuration
156
- subset = -1 * np.ones((0, 20))
157
- candidate = np.array([item for sublist in all_peaks for item in sublist])
158
-
159
- for k in range(len(mapIdx)):
160
- if k not in special_k:
161
- partAs = connection_all[k][:, 0]
162
- partBs = connection_all[k][:, 1]
163
- indexA, indexB = np.array(limbSeq[k]) - 1
164
-
165
- for i in range(len(connection_all[k])): # = 1:size(temp,1)
166
- found = 0
167
- subset_idx = [-1, -1]
168
- for j in range(len(subset)): # 1:size(subset,1):
169
- if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
170
- subset_idx[found] = j
171
- found += 1
172
-
173
- if found == 1:
174
- j = subset_idx[0]
175
- if subset[j][indexB] != partBs[i]:
176
- subset[j][indexB] = partBs[i]
177
- subset[j][-1] += 1
178
- subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
179
- elif found == 2: # if found 2 and disjoint, merge them
180
- j1, j2 = subset_idx
181
- membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
182
- if len(np.nonzero(membership == 2)[0]) == 0: # merge
183
- subset[j1][:-2] += (subset[j2][:-2] + 1)
184
- subset[j1][-2:] += subset[j2][-2:]
185
- subset[j1][-2] += connection_all[k][i][2]
186
- subset = np.delete(subset, j2, 0)
187
- else: # as like found == 1
188
- subset[j1][indexB] = partBs[i]
189
- subset[j1][-1] += 1
190
- subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
191
-
192
- # if find no partA in the subset, create a new subset
193
- elif not found and k < 17:
194
- row = -1 * np.ones(20)
195
- row[indexA] = partAs[i]
196
- row[indexB] = partBs[i]
197
- row[-1] = 2
198
- row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
199
- subset = np.vstack([subset, row])
200
- # delete some rows of subset which has few parts occur
201
- deleteIdx = []
202
- for i in range(len(subset)):
203
- if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
204
- deleteIdx.append(i)
205
- subset = np.delete(subset, deleteIdx, axis=0)
206
-
207
- # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
208
- # candidate: x, y, score, id
209
- return candidate, subset
210
-
211
- if __name__ == "__main__":
212
- body_estimation = Body('../model/body_pose_model.pth')
213
-
214
- test_image = '../images/ski.jpg'
215
- oriImg = cv2.imread(test_image) # B,G,R order
216
- candidate, subset = body_estimation(oriImg)
217
- canvas = util.draw_bodypose(oriImg, candidate, subset)
218
- plt.imshow(canvas[:, :, [2, 1, 0]])
219
- plt.show()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/openpose/hand.py DELETED
@@ -1,86 +0,0 @@
1
- import cv2
2
- import json
3
- import numpy as np
4
- import math
5
- import time
6
- from scipy.ndimage.filters import gaussian_filter
7
- import matplotlib.pyplot as plt
8
- import matplotlib
9
- import torch
10
- from skimage.measure import label
11
-
12
- from .model import handpose_model
13
- from . import util
14
-
15
- class Hand(object):
16
- def __init__(self, model_path):
17
- self.model = handpose_model()
18
- if torch.cuda.is_available():
19
- self.model = self.model.cuda()
20
- print('cuda')
21
- model_dict = util.transfer(self.model, torch.load(model_path))
22
- self.model.load_state_dict(model_dict)
23
- self.model.eval()
24
-
25
- def __call__(self, oriImg):
26
- scale_search = [0.5, 1.0, 1.5, 2.0]
27
- # scale_search = [0.5]
28
- boxsize = 368
29
- stride = 8
30
- padValue = 128
31
- thre = 0.05
32
- multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
33
- heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
34
- # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
35
-
36
- for m in range(len(multiplier)):
37
- scale = multiplier[m]
38
- imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
39
- imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
40
- im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
41
- im = np.ascontiguousarray(im)
42
-
43
- data = torch.from_numpy(im).float()
44
- if torch.cuda.is_available():
45
- data = data.cuda()
46
- # data = data.permute([2, 0, 1]).unsqueeze(0).float()
47
- with torch.no_grad():
48
- output = self.model(data).cpu().numpy()
49
- # output = self.model(data).numpy()q
50
-
51
- # extract outputs, resize, and remove padding
52
- heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
53
- heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
54
- heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
55
- heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
56
-
57
- heatmap_avg += heatmap / len(multiplier)
58
-
59
- all_peaks = []
60
- for part in range(21):
61
- map_ori = heatmap_avg[:, :, part]
62
- one_heatmap = gaussian_filter(map_ori, sigma=3)
63
- binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
64
- # 全部小于阈值
65
- if np.sum(binary) == 0:
66
- all_peaks.append([0, 0])
67
- continue
68
- label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
69
- max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
70
- label_img[label_img != max_index] = 0
71
- map_ori[label_img == 0] = 0
72
-
73
- y, x = util.npmax(map_ori)
74
- all_peaks.append([x, y])
75
- return np.array(all_peaks)
76
-
77
- if __name__ == "__main__":
78
- hand_estimation = Hand('../model/hand_pose_model.pth')
79
-
80
- # test_image = '../images/hand.jpg'
81
- test_image = '../images/hand.jpg'
82
- oriImg = cv2.imread(test_image) # B,G,R order
83
- peaks = hand_estimation(oriImg)
84
- canvas = util.draw_handpose(oriImg, peaks, True)
85
- cv2.imshow('', canvas)
86
- cv2.waitKey(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/openpose/model.py DELETED
@@ -1,219 +0,0 @@
1
- import torch
2
- from collections import OrderedDict
3
-
4
- import torch
5
- import torch.nn as nn
6
-
7
- def make_layers(block, no_relu_layers):
8
- layers = []
9
- for layer_name, v in block.items():
10
- if 'pool' in layer_name:
11
- layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
12
- padding=v[2])
13
- layers.append((layer_name, layer))
14
- else:
15
- conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
16
- kernel_size=v[2], stride=v[3],
17
- padding=v[4])
18
- layers.append((layer_name, conv2d))
19
- if layer_name not in no_relu_layers:
20
- layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
21
-
22
- return nn.Sequential(OrderedDict(layers))
23
-
24
- class bodypose_model(nn.Module):
25
- def __init__(self):
26
- super(bodypose_model, self).__init__()
27
-
28
- # these layers have no relu layer
29
- no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
30
- 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
31
- 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
32
- 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
33
- blocks = {}
34
- block0 = OrderedDict([
35
- ('conv1_1', [3, 64, 3, 1, 1]),
36
- ('conv1_2', [64, 64, 3, 1, 1]),
37
- ('pool1_stage1', [2, 2, 0]),
38
- ('conv2_1', [64, 128, 3, 1, 1]),
39
- ('conv2_2', [128, 128, 3, 1, 1]),
40
- ('pool2_stage1', [2, 2, 0]),
41
- ('conv3_1', [128, 256, 3, 1, 1]),
42
- ('conv3_2', [256, 256, 3, 1, 1]),
43
- ('conv3_3', [256, 256, 3, 1, 1]),
44
- ('conv3_4', [256, 256, 3, 1, 1]),
45
- ('pool3_stage1', [2, 2, 0]),
46
- ('conv4_1', [256, 512, 3, 1, 1]),
47
- ('conv4_2', [512, 512, 3, 1, 1]),
48
- ('conv4_3_CPM', [512, 256, 3, 1, 1]),
49
- ('conv4_4_CPM', [256, 128, 3, 1, 1])
50
- ])
51
-
52
-
53
- # Stage 1
54
- block1_1 = OrderedDict([
55
- ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
56
- ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
57
- ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
58
- ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
59
- ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
60
- ])
61
-
62
- block1_2 = OrderedDict([
63
- ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
64
- ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
65
- ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
66
- ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
67
- ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
68
- ])
69
- blocks['block1_1'] = block1_1
70
- blocks['block1_2'] = block1_2
71
-
72
- self.model0 = make_layers(block0, no_relu_layers)
73
-
74
- # Stages 2 - 6
75
- for i in range(2, 7):
76
- blocks['block%d_1' % i] = OrderedDict([
77
- ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
78
- ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
79
- ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
80
- ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
81
- ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
82
- ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
83
- ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
84
- ])
85
-
86
- blocks['block%d_2' % i] = OrderedDict([
87
- ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
88
- ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
89
- ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
90
- ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
91
- ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
92
- ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
93
- ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
94
- ])
95
-
96
- for k in blocks.keys():
97
- blocks[k] = make_layers(blocks[k], no_relu_layers)
98
-
99
- self.model1_1 = blocks['block1_1']
100
- self.model2_1 = blocks['block2_1']
101
- self.model3_1 = blocks['block3_1']
102
- self.model4_1 = blocks['block4_1']
103
- self.model5_1 = blocks['block5_1']
104
- self.model6_1 = blocks['block6_1']
105
-
106
- self.model1_2 = blocks['block1_2']
107
- self.model2_2 = blocks['block2_2']
108
- self.model3_2 = blocks['block3_2']
109
- self.model4_2 = blocks['block4_2']
110
- self.model5_2 = blocks['block5_2']
111
- self.model6_2 = blocks['block6_2']
112
-
113
-
114
- def forward(self, x):
115
-
116
- out1 = self.model0(x)
117
-
118
- out1_1 = self.model1_1(out1)
119
- out1_2 = self.model1_2(out1)
120
- out2 = torch.cat([out1_1, out1_2, out1], 1)
121
-
122
- out2_1 = self.model2_1(out2)
123
- out2_2 = self.model2_2(out2)
124
- out3 = torch.cat([out2_1, out2_2, out1], 1)
125
-
126
- out3_1 = self.model3_1(out3)
127
- out3_2 = self.model3_2(out3)
128
- out4 = torch.cat([out3_1, out3_2, out1], 1)
129
-
130
- out4_1 = self.model4_1(out4)
131
- out4_2 = self.model4_2(out4)
132
- out5 = torch.cat([out4_1, out4_2, out1], 1)
133
-
134
- out5_1 = self.model5_1(out5)
135
- out5_2 = self.model5_2(out5)
136
- out6 = torch.cat([out5_1, out5_2, out1], 1)
137
-
138
- out6_1 = self.model6_1(out6)
139
- out6_2 = self.model6_2(out6)
140
-
141
- return out6_1, out6_2
142
-
143
- class handpose_model(nn.Module):
144
- def __init__(self):
145
- super(handpose_model, self).__init__()
146
-
147
- # these layers have no relu layer
148
- no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
149
- 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
150
- # stage 1
151
- block1_0 = OrderedDict([
152
- ('conv1_1', [3, 64, 3, 1, 1]),
153
- ('conv1_2', [64, 64, 3, 1, 1]),
154
- ('pool1_stage1', [2, 2, 0]),
155
- ('conv2_1', [64, 128, 3, 1, 1]),
156
- ('conv2_2', [128, 128, 3, 1, 1]),
157
- ('pool2_stage1', [2, 2, 0]),
158
- ('conv3_1', [128, 256, 3, 1, 1]),
159
- ('conv3_2', [256, 256, 3, 1, 1]),
160
- ('conv3_3', [256, 256, 3, 1, 1]),
161
- ('conv3_4', [256, 256, 3, 1, 1]),
162
- ('pool3_stage1', [2, 2, 0]),
163
- ('conv4_1', [256, 512, 3, 1, 1]),
164
- ('conv4_2', [512, 512, 3, 1, 1]),
165
- ('conv4_3', [512, 512, 3, 1, 1]),
166
- ('conv4_4', [512, 512, 3, 1, 1]),
167
- ('conv5_1', [512, 512, 3, 1, 1]),
168
- ('conv5_2', [512, 512, 3, 1, 1]),
169
- ('conv5_3_CPM', [512, 128, 3, 1, 1])
170
- ])
171
-
172
- block1_1 = OrderedDict([
173
- ('conv6_1_CPM', [128, 512, 1, 1, 0]),
174
- ('conv6_2_CPM', [512, 22, 1, 1, 0])
175
- ])
176
-
177
- blocks = {}
178
- blocks['block1_0'] = block1_0
179
- blocks['block1_1'] = block1_1
180
-
181
- # stage 2-6
182
- for i in range(2, 7):
183
- blocks['block%d' % i] = OrderedDict([
184
- ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
185
- ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
186
- ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
187
- ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
188
- ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
189
- ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
190
- ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
191
- ])
192
-
193
- for k in blocks.keys():
194
- blocks[k] = make_layers(blocks[k], no_relu_layers)
195
-
196
- self.model1_0 = blocks['block1_0']
197
- self.model1_1 = blocks['block1_1']
198
- self.model2 = blocks['block2']
199
- self.model3 = blocks['block3']
200
- self.model4 = blocks['block4']
201
- self.model5 = blocks['block5']
202
- self.model6 = blocks['block6']
203
-
204
- def forward(self, x):
205
- out1_0 = self.model1_0(x)
206
- out1_1 = self.model1_1(out1_0)
207
- concat_stage2 = torch.cat([out1_1, out1_0], 1)
208
- out_stage2 = self.model2(concat_stage2)
209
- concat_stage3 = torch.cat([out_stage2, out1_0], 1)
210
- out_stage3 = self.model3(concat_stage3)
211
- concat_stage4 = torch.cat([out_stage3, out1_0], 1)
212
- out_stage4 = self.model4(concat_stage4)
213
- concat_stage5 = torch.cat([out_stage4, out1_0], 1)
214
- out_stage5 = self.model5(concat_stage5)
215
- concat_stage6 = torch.cat([out_stage5, out1_0], 1)
216
- out_stage6 = self.model6(concat_stage6)
217
- return out_stage6
218
-
219
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/openpose/util.py DELETED
@@ -1,164 +0,0 @@
1
- import math
2
- import numpy as np
3
- import matplotlib
4
- import cv2
5
-
6
-
7
- def padRightDownCorner(img, stride, padValue):
8
- h = img.shape[0]
9
- w = img.shape[1]
10
-
11
- pad = 4 * [None]
12
- pad[0] = 0 # up
13
- pad[1] = 0 # left
14
- pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
15
- pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
16
-
17
- img_padded = img
18
- pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
19
- img_padded = np.concatenate((pad_up, img_padded), axis=0)
20
- pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
21
- img_padded = np.concatenate((pad_left, img_padded), axis=1)
22
- pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
23
- img_padded = np.concatenate((img_padded, pad_down), axis=0)
24
- pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
25
- img_padded = np.concatenate((img_padded, pad_right), axis=1)
26
-
27
- return img_padded, pad
28
-
29
- # transfer caffe model to pytorch which will match the layer name
30
- def transfer(model, model_weights):
31
- transfered_model_weights = {}
32
- for weights_name in model.state_dict().keys():
33
- transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
34
- return transfered_model_weights
35
-
36
- # draw the body keypoint and lims
37
- def draw_bodypose(canvas, candidate, subset):
38
- stickwidth = 4
39
- limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
40
- [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
41
- [1, 16], [16, 18], [3, 17], [6, 18]]
42
-
43
- colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
44
- [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
45
- [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
46
- for i in range(18):
47
- for n in range(len(subset)):
48
- index = int(subset[n][i])
49
- if index == -1:
50
- continue
51
- x, y = candidate[index][0:2]
52
- cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
53
- for i in range(17):
54
- for n in range(len(subset)):
55
- index = subset[n][np.array(limbSeq[i]) - 1]
56
- if -1 in index:
57
- continue
58
- cur_canvas = canvas.copy()
59
- Y = candidate[index.astype(int), 0]
60
- X = candidate[index.astype(int), 1]
61
- mX = np.mean(X)
62
- mY = np.mean(Y)
63
- length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
64
- angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
65
- polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
66
- cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
67
- canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
68
- # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
69
- # plt.imshow(canvas[:, :, [2, 1, 0]])
70
- return canvas
71
-
72
-
73
- # image drawed by opencv is not good.
74
- def draw_handpose(canvas, all_hand_peaks, show_number=False):
75
- edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
76
- [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
77
-
78
- for peaks in all_hand_peaks:
79
- for ie, e in enumerate(edges):
80
- if np.sum(np.all(peaks[e], axis=1)==0)==0:
81
- x1, y1 = peaks[e[0]]
82
- x2, y2 = peaks[e[1]]
83
- cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
84
-
85
- for i, keyponit in enumerate(peaks):
86
- x, y = keyponit
87
- cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
88
- if show_number:
89
- cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
90
- return canvas
91
-
92
- # detect hand according to body pose keypoints
93
- # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
94
- def handDetect(candidate, subset, oriImg):
95
- # right hand: wrist 4, elbow 3, shoulder 2
96
- # left hand: wrist 7, elbow 6, shoulder 5
97
- ratioWristElbow = 0.33
98
- detect_result = []
99
- image_height, image_width = oriImg.shape[0:2]
100
- for person in subset.astype(int):
101
- # if any of three not detected
102
- has_left = np.sum(person[[5, 6, 7]] == -1) == 0
103
- has_right = np.sum(person[[2, 3, 4]] == -1) == 0
104
- if not (has_left or has_right):
105
- continue
106
- hands = []
107
- #left hand
108
- if has_left:
109
- left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
110
- x1, y1 = candidate[left_shoulder_index][:2]
111
- x2, y2 = candidate[left_elbow_index][:2]
112
- x3, y3 = candidate[left_wrist_index][:2]
113
- hands.append([x1, y1, x2, y2, x3, y3, True])
114
- # right hand
115
- if has_right:
116
- right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
117
- x1, y1 = candidate[right_shoulder_index][:2]
118
- x2, y2 = candidate[right_elbow_index][:2]
119
- x3, y3 = candidate[right_wrist_index][:2]
120
- hands.append([x1, y1, x2, y2, x3, y3, False])
121
-
122
- for x1, y1, x2, y2, x3, y3, is_left in hands:
123
- # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
124
- # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
125
- # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
126
- # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
127
- # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
128
- # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
129
- x = x3 + ratioWristElbow * (x3 - x2)
130
- y = y3 + ratioWristElbow * (y3 - y2)
131
- distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
132
- distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
133
- width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
134
- # x-y refers to the center --> offset to topLeft point
135
- # handRectangle.x -= handRectangle.width / 2.f;
136
- # handRectangle.y -= handRectangle.height / 2.f;
137
- x -= width / 2
138
- y -= width / 2 # width = height
139
- # overflow the image
140
- if x < 0: x = 0
141
- if y < 0: y = 0
142
- width1 = width
143
- width2 = width
144
- if x + width > image_width: width1 = image_width - x
145
- if y + width > image_height: width2 = image_height - y
146
- width = min(width1, width2)
147
- # the max hand box value is 20 pixels
148
- if width >= 20:
149
- detect_result.append([int(x), int(y), int(width), is_left])
150
-
151
- '''
152
- return value: [[x, y, w, True if left hand else False]].
153
- width=height since the network require squared input.
154
- x, y is the coordinate of top left
155
- '''
156
- return detect_result
157
-
158
- # get max index of 2d array
159
- def npmax(array):
160
- arrayindex = array.argmax(1)
161
- arrayvalue = array.max(1)
162
- i = arrayvalue.argmax()
163
- j = arrayindex[i]
164
- return i, j
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- import os
2
-
3
- from annotator.uniformer.mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot
4
- from annotator.uniformer.mmseg.core.evaluation import get_palette
5
- from annotator.util import annotator_ckpts_path
6
-
7
-
8
- checkpoint_file = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/upernet_global_small.pth"
9
-
10
-
11
- class UniformerDetector:
12
- def __init__(self):
13
- modelpath = os.path.join(annotator_ckpts_path, "upernet_global_small.pth")
14
- if not os.path.exists(modelpath):
15
- from basicsr.utils.download_util import load_file_from_url
16
- load_file_from_url(checkpoint_file, model_dir=annotator_ckpts_path)
17
- config_file = os.path.join(os.path.dirname(annotator_ckpts_path), "uniformer", "exp", "upernet_global_small", "config.py")
18
- self.model = init_segmentor(config_file, modelpath).cuda()
19
-
20
- def __call__(self, img):
21
- result = inference_segmentor(self.model, img)
22
- res_img = show_result_pyplot(self.model, img, result, get_palette('ade'), opacity=1)
23
- return res_img
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/ade20k.py DELETED
@@ -1,54 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'ADE20KDataset'
3
- data_root = 'data/ade/ADEChallengeData2016'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
- crop_size = (512, 512)
7
- train_pipeline = [
8
- dict(type='LoadImageFromFile'),
9
- dict(type='LoadAnnotations', reduce_zero_label=True),
10
- dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
- dict(type='RandomFlip', prob=0.5),
13
- dict(type='PhotoMetricDistortion'),
14
- dict(type='Normalize', **img_norm_cfg),
15
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16
- dict(type='DefaultFormatBundle'),
17
- dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18
- ]
19
- test_pipeline = [
20
- dict(type='LoadImageFromFile'),
21
- dict(
22
- type='MultiScaleFlipAug',
23
- img_scale=(2048, 512),
24
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25
- flip=False,
26
- transforms=[
27
- dict(type='Resize', keep_ratio=True),
28
- dict(type='RandomFlip'),
29
- dict(type='Normalize', **img_norm_cfg),
30
- dict(type='ImageToTensor', keys=['img']),
31
- dict(type='Collect', keys=['img']),
32
- ])
33
- ]
34
- data = dict(
35
- samples_per_gpu=4,
36
- workers_per_gpu=4,
37
- train=dict(
38
- type=dataset_type,
39
- data_root=data_root,
40
- img_dir='images/training',
41
- ann_dir='annotations/training',
42
- pipeline=train_pipeline),
43
- val=dict(
44
- type=dataset_type,
45
- data_root=data_root,
46
- img_dir='images/validation',
47
- ann_dir='annotations/validation',
48
- pipeline=test_pipeline),
49
- test=dict(
50
- type=dataset_type,
51
- data_root=data_root,
52
- img_dir='images/validation',
53
- ann_dir='annotations/validation',
54
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/chase_db1.py DELETED
@@ -1,59 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'ChaseDB1Dataset'
3
- data_root = 'data/CHASE_DB1'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
- img_scale = (960, 999)
7
- crop_size = (128, 128)
8
- train_pipeline = [
9
- dict(type='LoadImageFromFile'),
10
- dict(type='LoadAnnotations'),
11
- dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
- dict(type='RandomFlip', prob=0.5),
14
- dict(type='PhotoMetricDistortion'),
15
- dict(type='Normalize', **img_norm_cfg),
16
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
- dict(type='DefaultFormatBundle'),
18
- dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
- ]
20
- test_pipeline = [
21
- dict(type='LoadImageFromFile'),
22
- dict(
23
- type='MultiScaleFlipAug',
24
- img_scale=img_scale,
25
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
- flip=False,
27
- transforms=[
28
- dict(type='Resize', keep_ratio=True),
29
- dict(type='RandomFlip'),
30
- dict(type='Normalize', **img_norm_cfg),
31
- dict(type='ImageToTensor', keys=['img']),
32
- dict(type='Collect', keys=['img'])
33
- ])
34
- ]
35
-
36
- data = dict(
37
- samples_per_gpu=4,
38
- workers_per_gpu=4,
39
- train=dict(
40
- type='RepeatDataset',
41
- times=40000,
42
- dataset=dict(
43
- type=dataset_type,
44
- data_root=data_root,
45
- img_dir='images/training',
46
- ann_dir='annotations/training',
47
- pipeline=train_pipeline)),
48
- val=dict(
49
- type=dataset_type,
50
- data_root=data_root,
51
- img_dir='images/validation',
52
- ann_dir='annotations/validation',
53
- pipeline=test_pipeline),
54
- test=dict(
55
- type=dataset_type,
56
- data_root=data_root,
57
- img_dir='images/validation',
58
- ann_dir='annotations/validation',
59
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/cityscapes.py DELETED
@@ -1,54 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'CityscapesDataset'
3
- data_root = 'data/cityscapes/'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
- crop_size = (512, 1024)
7
- train_pipeline = [
8
- dict(type='LoadImageFromFile'),
9
- dict(type='LoadAnnotations'),
10
- dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)),
11
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
- dict(type='RandomFlip', prob=0.5),
13
- dict(type='PhotoMetricDistortion'),
14
- dict(type='Normalize', **img_norm_cfg),
15
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16
- dict(type='DefaultFormatBundle'),
17
- dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18
- ]
19
- test_pipeline = [
20
- dict(type='LoadImageFromFile'),
21
- dict(
22
- type='MultiScaleFlipAug',
23
- img_scale=(2048, 1024),
24
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25
- flip=False,
26
- transforms=[
27
- dict(type='Resize', keep_ratio=True),
28
- dict(type='RandomFlip'),
29
- dict(type='Normalize', **img_norm_cfg),
30
- dict(type='ImageToTensor', keys=['img']),
31
- dict(type='Collect', keys=['img']),
32
- ])
33
- ]
34
- data = dict(
35
- samples_per_gpu=2,
36
- workers_per_gpu=2,
37
- train=dict(
38
- type=dataset_type,
39
- data_root=data_root,
40
- img_dir='leftImg8bit/train',
41
- ann_dir='gtFine/train',
42
- pipeline=train_pipeline),
43
- val=dict(
44
- type=dataset_type,
45
- data_root=data_root,
46
- img_dir='leftImg8bit/val',
47
- ann_dir='gtFine/val',
48
- pipeline=test_pipeline),
49
- test=dict(
50
- type=dataset_type,
51
- data_root=data_root,
52
- img_dir='leftImg8bit/val',
53
- ann_dir='gtFine/val',
54
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py DELETED
@@ -1,35 +0,0 @@
1
- _base_ = './cityscapes.py'
2
- img_norm_cfg = dict(
3
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
4
- crop_size = (769, 769)
5
- train_pipeline = [
6
- dict(type='LoadImageFromFile'),
7
- dict(type='LoadAnnotations'),
8
- dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)),
9
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
10
- dict(type='RandomFlip', prob=0.5),
11
- dict(type='PhotoMetricDistortion'),
12
- dict(type='Normalize', **img_norm_cfg),
13
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
14
- dict(type='DefaultFormatBundle'),
15
- dict(type='Collect', keys=['img', 'gt_semantic_seg']),
16
- ]
17
- test_pipeline = [
18
- dict(type='LoadImageFromFile'),
19
- dict(
20
- type='MultiScaleFlipAug',
21
- img_scale=(2049, 1025),
22
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
23
- flip=False,
24
- transforms=[
25
- dict(type='Resize', keep_ratio=True),
26
- dict(type='RandomFlip'),
27
- dict(type='Normalize', **img_norm_cfg),
28
- dict(type='ImageToTensor', keys=['img']),
29
- dict(type='Collect', keys=['img']),
30
- ])
31
- ]
32
- data = dict(
33
- train=dict(pipeline=train_pipeline),
34
- val=dict(pipeline=test_pipeline),
35
- test=dict(pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/drive.py DELETED
@@ -1,59 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'DRIVEDataset'
3
- data_root = 'data/DRIVE'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
- img_scale = (584, 565)
7
- crop_size = (64, 64)
8
- train_pipeline = [
9
- dict(type='LoadImageFromFile'),
10
- dict(type='LoadAnnotations'),
11
- dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
- dict(type='RandomFlip', prob=0.5),
14
- dict(type='PhotoMetricDistortion'),
15
- dict(type='Normalize', **img_norm_cfg),
16
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
- dict(type='DefaultFormatBundle'),
18
- dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
- ]
20
- test_pipeline = [
21
- dict(type='LoadImageFromFile'),
22
- dict(
23
- type='MultiScaleFlipAug',
24
- img_scale=img_scale,
25
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
- flip=False,
27
- transforms=[
28
- dict(type='Resize', keep_ratio=True),
29
- dict(type='RandomFlip'),
30
- dict(type='Normalize', **img_norm_cfg),
31
- dict(type='ImageToTensor', keys=['img']),
32
- dict(type='Collect', keys=['img'])
33
- ])
34
- ]
35
-
36
- data = dict(
37
- samples_per_gpu=4,
38
- workers_per_gpu=4,
39
- train=dict(
40
- type='RepeatDataset',
41
- times=40000,
42
- dataset=dict(
43
- type=dataset_type,
44
- data_root=data_root,
45
- img_dir='images/training',
46
- ann_dir='annotations/training',
47
- pipeline=train_pipeline)),
48
- val=dict(
49
- type=dataset_type,
50
- data_root=data_root,
51
- img_dir='images/validation',
52
- ann_dir='annotations/validation',
53
- pipeline=test_pipeline),
54
- test=dict(
55
- type=dataset_type,
56
- data_root=data_root,
57
- img_dir='images/validation',
58
- ann_dir='annotations/validation',
59
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/hrf.py DELETED
@@ -1,59 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'HRFDataset'
3
- data_root = 'data/HRF'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
- img_scale = (2336, 3504)
7
- crop_size = (256, 256)
8
- train_pipeline = [
9
- dict(type='LoadImageFromFile'),
10
- dict(type='LoadAnnotations'),
11
- dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
- dict(type='RandomFlip', prob=0.5),
14
- dict(type='PhotoMetricDistortion'),
15
- dict(type='Normalize', **img_norm_cfg),
16
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
- dict(type='DefaultFormatBundle'),
18
- dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
- ]
20
- test_pipeline = [
21
- dict(type='LoadImageFromFile'),
22
- dict(
23
- type='MultiScaleFlipAug',
24
- img_scale=img_scale,
25
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
- flip=False,
27
- transforms=[
28
- dict(type='Resize', keep_ratio=True),
29
- dict(type='RandomFlip'),
30
- dict(type='Normalize', **img_norm_cfg),
31
- dict(type='ImageToTensor', keys=['img']),
32
- dict(type='Collect', keys=['img'])
33
- ])
34
- ]
35
-
36
- data = dict(
37
- samples_per_gpu=4,
38
- workers_per_gpu=4,
39
- train=dict(
40
- type='RepeatDataset',
41
- times=40000,
42
- dataset=dict(
43
- type=dataset_type,
44
- data_root=data_root,
45
- img_dir='images/training',
46
- ann_dir='annotations/training',
47
- pipeline=train_pipeline)),
48
- val=dict(
49
- type=dataset_type,
50
- data_root=data_root,
51
- img_dir='images/validation',
52
- ann_dir='annotations/validation',
53
- pipeline=test_pipeline),
54
- test=dict(
55
- type=dataset_type,
56
- data_root=data_root,
57
- img_dir='images/validation',
58
- ann_dir='annotations/validation',
59
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/pascal_context.py DELETED
@@ -1,60 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'PascalContextDataset'
3
- data_root = 'data/VOCdevkit/VOC2010/'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
-
7
- img_scale = (520, 520)
8
- crop_size = (480, 480)
9
-
10
- train_pipeline = [
11
- dict(type='LoadImageFromFile'),
12
- dict(type='LoadAnnotations'),
13
- dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
- dict(type='RandomFlip', prob=0.5),
16
- dict(type='PhotoMetricDistortion'),
17
- dict(type='Normalize', **img_norm_cfg),
18
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19
- dict(type='DefaultFormatBundle'),
20
- dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21
- ]
22
- test_pipeline = [
23
- dict(type='LoadImageFromFile'),
24
- dict(
25
- type='MultiScaleFlipAug',
26
- img_scale=img_scale,
27
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28
- flip=False,
29
- transforms=[
30
- dict(type='Resize', keep_ratio=True),
31
- dict(type='RandomFlip'),
32
- dict(type='Normalize', **img_norm_cfg),
33
- dict(type='ImageToTensor', keys=['img']),
34
- dict(type='Collect', keys=['img']),
35
- ])
36
- ]
37
- data = dict(
38
- samples_per_gpu=4,
39
- workers_per_gpu=4,
40
- train=dict(
41
- type=dataset_type,
42
- data_root=data_root,
43
- img_dir='JPEGImages',
44
- ann_dir='SegmentationClassContext',
45
- split='ImageSets/SegmentationContext/train.txt',
46
- pipeline=train_pipeline),
47
- val=dict(
48
- type=dataset_type,
49
- data_root=data_root,
50
- img_dir='JPEGImages',
51
- ann_dir='SegmentationClassContext',
52
- split='ImageSets/SegmentationContext/val.txt',
53
- pipeline=test_pipeline),
54
- test=dict(
55
- type=dataset_type,
56
- data_root=data_root,
57
- img_dir='JPEGImages',
58
- ann_dir='SegmentationClassContext',
59
- split='ImageSets/SegmentationContext/val.txt',
60
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/pascal_context_59.py DELETED
@@ -1,60 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'PascalContextDataset59'
3
- data_root = 'data/VOCdevkit/VOC2010/'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
-
7
- img_scale = (520, 520)
8
- crop_size = (480, 480)
9
-
10
- train_pipeline = [
11
- dict(type='LoadImageFromFile'),
12
- dict(type='LoadAnnotations', reduce_zero_label=True),
13
- dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
14
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15
- dict(type='RandomFlip', prob=0.5),
16
- dict(type='PhotoMetricDistortion'),
17
- dict(type='Normalize', **img_norm_cfg),
18
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19
- dict(type='DefaultFormatBundle'),
20
- dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21
- ]
22
- test_pipeline = [
23
- dict(type='LoadImageFromFile'),
24
- dict(
25
- type='MultiScaleFlipAug',
26
- img_scale=img_scale,
27
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28
- flip=False,
29
- transforms=[
30
- dict(type='Resize', keep_ratio=True),
31
- dict(type='RandomFlip'),
32
- dict(type='Normalize', **img_norm_cfg),
33
- dict(type='ImageToTensor', keys=['img']),
34
- dict(type='Collect', keys=['img']),
35
- ])
36
- ]
37
- data = dict(
38
- samples_per_gpu=4,
39
- workers_per_gpu=4,
40
- train=dict(
41
- type=dataset_type,
42
- data_root=data_root,
43
- img_dir='JPEGImages',
44
- ann_dir='SegmentationClassContext',
45
- split='ImageSets/SegmentationContext/train.txt',
46
- pipeline=train_pipeline),
47
- val=dict(
48
- type=dataset_type,
49
- data_root=data_root,
50
- img_dir='JPEGImages',
51
- ann_dir='SegmentationClassContext',
52
- split='ImageSets/SegmentationContext/val.txt',
53
- pipeline=test_pipeline),
54
- test=dict(
55
- type=dataset_type,
56
- data_root=data_root,
57
- img_dir='JPEGImages',
58
- ann_dir='SegmentationClassContext',
59
- split='ImageSets/SegmentationContext/val.txt',
60
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/pascal_voc12.py DELETED
@@ -1,57 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'PascalVOCDataset'
3
- data_root = 'data/VOCdevkit/VOC2012'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
- crop_size = (512, 512)
7
- train_pipeline = [
8
- dict(type='LoadImageFromFile'),
9
- dict(type='LoadAnnotations'),
10
- dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
11
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
12
- dict(type='RandomFlip', prob=0.5),
13
- dict(type='PhotoMetricDistortion'),
14
- dict(type='Normalize', **img_norm_cfg),
15
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
16
- dict(type='DefaultFormatBundle'),
17
- dict(type='Collect', keys=['img', 'gt_semantic_seg']),
18
- ]
19
- test_pipeline = [
20
- dict(type='LoadImageFromFile'),
21
- dict(
22
- type='MultiScaleFlipAug',
23
- img_scale=(2048, 512),
24
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
25
- flip=False,
26
- transforms=[
27
- dict(type='Resize', keep_ratio=True),
28
- dict(type='RandomFlip'),
29
- dict(type='Normalize', **img_norm_cfg),
30
- dict(type='ImageToTensor', keys=['img']),
31
- dict(type='Collect', keys=['img']),
32
- ])
33
- ]
34
- data = dict(
35
- samples_per_gpu=4,
36
- workers_per_gpu=4,
37
- train=dict(
38
- type=dataset_type,
39
- data_root=data_root,
40
- img_dir='JPEGImages',
41
- ann_dir='SegmentationClass',
42
- split='ImageSets/Segmentation/train.txt',
43
- pipeline=train_pipeline),
44
- val=dict(
45
- type=dataset_type,
46
- data_root=data_root,
47
- img_dir='JPEGImages',
48
- ann_dir='SegmentationClass',
49
- split='ImageSets/Segmentation/val.txt',
50
- pipeline=test_pipeline),
51
- test=dict(
52
- type=dataset_type,
53
- data_root=data_root,
54
- img_dir='JPEGImages',
55
- ann_dir='SegmentationClass',
56
- split='ImageSets/Segmentation/val.txt',
57
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py DELETED
@@ -1,9 +0,0 @@
1
- _base_ = './pascal_voc12.py'
2
- # dataset settings
3
- data = dict(
4
- train=dict(
5
- ann_dir=['SegmentationClass', 'SegmentationClassAug'],
6
- split=[
7
- 'ImageSets/Segmentation/train.txt',
8
- 'ImageSets/Segmentation/aug.txt'
9
- ]))
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/datasets/stare.py DELETED
@@ -1,59 +0,0 @@
1
- # dataset settings
2
- dataset_type = 'STAREDataset'
3
- data_root = 'data/STARE'
4
- img_norm_cfg = dict(
5
- mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
- img_scale = (605, 700)
7
- crop_size = (128, 128)
8
- train_pipeline = [
9
- dict(type='LoadImageFromFile'),
10
- dict(type='LoadAnnotations'),
11
- dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)),
12
- dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
13
- dict(type='RandomFlip', prob=0.5),
14
- dict(type='PhotoMetricDistortion'),
15
- dict(type='Normalize', **img_norm_cfg),
16
- dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
17
- dict(type='DefaultFormatBundle'),
18
- dict(type='Collect', keys=['img', 'gt_semantic_seg'])
19
- ]
20
- test_pipeline = [
21
- dict(type='LoadImageFromFile'),
22
- dict(
23
- type='MultiScaleFlipAug',
24
- img_scale=img_scale,
25
- # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0],
26
- flip=False,
27
- transforms=[
28
- dict(type='Resize', keep_ratio=True),
29
- dict(type='RandomFlip'),
30
- dict(type='Normalize', **img_norm_cfg),
31
- dict(type='ImageToTensor', keys=['img']),
32
- dict(type='Collect', keys=['img'])
33
- ])
34
- ]
35
-
36
- data = dict(
37
- samples_per_gpu=4,
38
- workers_per_gpu=4,
39
- train=dict(
40
- type='RepeatDataset',
41
- times=40000,
42
- dataset=dict(
43
- type=dataset_type,
44
- data_root=data_root,
45
- img_dir='images/training',
46
- ann_dir='annotations/training',
47
- pipeline=train_pipeline)),
48
- val=dict(
49
- type=dataset_type,
50
- data_root=data_root,
51
- img_dir='images/validation',
52
- ann_dir='annotations/validation',
53
- pipeline=test_pipeline),
54
- test=dict(
55
- type=dataset_type,
56
- data_root=data_root,
57
- img_dir='images/validation',
58
- ann_dir='annotations/validation',
59
- pipeline=test_pipeline))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/default_runtime.py DELETED
@@ -1,14 +0,0 @@
1
- # yapf:disable
2
- log_config = dict(
3
- interval=50,
4
- hooks=[
5
- dict(type='TextLoggerHook', by_epoch=False),
6
- # dict(type='TensorboardLoggerHook')
7
- ])
8
- # yapf:enable
9
- dist_params = dict(backend='nccl')
10
- log_level = 'INFO'
11
- load_from = None
12
- resume_from = None
13
- workflow = [('train', 1)]
14
- cudnn_benchmark = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/ann_r50-d8.py DELETED
@@ -1,46 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='ANNHead',
19
- in_channels=[1024, 2048],
20
- in_index=[2, 3],
21
- channels=512,
22
- project_channels=256,
23
- query_scales=(1, ),
24
- key_pool_scales=(1, 3, 6, 8),
25
- dropout_ratio=0.1,
26
- num_classes=19,
27
- norm_cfg=norm_cfg,
28
- align_corners=False,
29
- loss_decode=dict(
30
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
- auxiliary_head=dict(
32
- type='FCNHead',
33
- in_channels=1024,
34
- in_index=2,
35
- channels=256,
36
- num_convs=1,
37
- concat_input=False,
38
- dropout_ratio=0.1,
39
- num_classes=19,
40
- norm_cfg=norm_cfg,
41
- align_corners=False,
42
- loss_decode=dict(
43
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
- # model training and testing settings
45
- train_cfg=dict(),
46
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py DELETED
@@ -1,44 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='APCHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- pool_scales=(1, 2, 3, 6),
23
- dropout_ratio=0.1,
24
- num_classes=19,
25
- norm_cfg=dict(type='SyncBN', requires_grad=True),
26
- align_corners=False,
27
- loss_decode=dict(
28
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
- auxiliary_head=dict(
30
- type='FCNHead',
31
- in_channels=1024,
32
- in_index=2,
33
- channels=256,
34
- num_convs=1,
35
- concat_input=False,
36
- dropout_ratio=0.1,
37
- num_classes=19,
38
- norm_cfg=norm_cfg,
39
- align_corners=False,
40
- loss_decode=dict(
41
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
- # model training and testing settings
43
- train_cfg=dict(),
44
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py DELETED
@@ -1,44 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='CCHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- recurrence=2,
23
- dropout_ratio=0.1,
24
- num_classes=19,
25
- norm_cfg=norm_cfg,
26
- align_corners=False,
27
- loss_decode=dict(
28
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
- auxiliary_head=dict(
30
- type='FCNHead',
31
- in_channels=1024,
32
- in_index=2,
33
- channels=256,
34
- num_convs=1,
35
- concat_input=False,
36
- dropout_ratio=0.1,
37
- num_classes=19,
38
- norm_cfg=norm_cfg,
39
- align_corners=False,
40
- loss_decode=dict(
41
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
- # model training and testing settings
43
- train_cfg=dict(),
44
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/cgnet.py DELETED
@@ -1,35 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- backbone=dict(
6
- type='CGNet',
7
- norm_cfg=norm_cfg,
8
- in_channels=3,
9
- num_channels=(32, 64, 128),
10
- num_blocks=(3, 21),
11
- dilations=(2, 4),
12
- reductions=(8, 16)),
13
- decode_head=dict(
14
- type='FCNHead',
15
- in_channels=256,
16
- in_index=2,
17
- channels=256,
18
- num_convs=0,
19
- concat_input=False,
20
- dropout_ratio=0,
21
- num_classes=19,
22
- norm_cfg=norm_cfg,
23
- loss_decode=dict(
24
- type='CrossEntropyLoss',
25
- use_sigmoid=False,
26
- loss_weight=1.0,
27
- class_weight=[
28
- 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
29
- 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
30
- 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
31
- 10.396974, 10.055647
32
- ])),
33
- # model training and testing settings
34
- train_cfg=dict(sampler=None),
35
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/danet_r50-d8.py DELETED
@@ -1,44 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='DAHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- pam_channels=64,
23
- dropout_ratio=0.1,
24
- num_classes=19,
25
- norm_cfg=norm_cfg,
26
- align_corners=False,
27
- loss_decode=dict(
28
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
- auxiliary_head=dict(
30
- type='FCNHead',
31
- in_channels=1024,
32
- in_index=2,
33
- channels=256,
34
- num_convs=1,
35
- concat_input=False,
36
- dropout_ratio=0.1,
37
- num_classes=19,
38
- norm_cfg=norm_cfg,
39
- align_corners=False,
40
- loss_decode=dict(
41
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
- # model training and testing settings
43
- train_cfg=dict(),
44
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py DELETED
@@ -1,44 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='ASPPHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- dilations=(1, 12, 24, 36),
23
- dropout_ratio=0.1,
24
- num_classes=19,
25
- norm_cfg=norm_cfg,
26
- align_corners=False,
27
- loss_decode=dict(
28
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
- auxiliary_head=dict(
30
- type='FCNHead',
31
- in_channels=1024,
32
- in_index=2,
33
- channels=256,
34
- num_convs=1,
35
- concat_input=False,
36
- dropout_ratio=0.1,
37
- num_classes=19,
38
- norm_cfg=norm_cfg,
39
- align_corners=False,
40
- loss_decode=dict(
41
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
- # model training and testing settings
43
- train_cfg=dict(),
44
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py DELETED
@@ -1,50 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained=None,
6
- backbone=dict(
7
- type='UNet',
8
- in_channels=3,
9
- base_channels=64,
10
- num_stages=5,
11
- strides=(1, 1, 1, 1, 1),
12
- enc_num_convs=(2, 2, 2, 2, 2),
13
- dec_num_convs=(2, 2, 2, 2),
14
- downsamples=(True, True, True, True),
15
- enc_dilations=(1, 1, 1, 1, 1),
16
- dec_dilations=(1, 1, 1, 1),
17
- with_cp=False,
18
- conv_cfg=None,
19
- norm_cfg=norm_cfg,
20
- act_cfg=dict(type='ReLU'),
21
- upsample_cfg=dict(type='InterpConv'),
22
- norm_eval=False),
23
- decode_head=dict(
24
- type='ASPPHead',
25
- in_channels=64,
26
- in_index=4,
27
- channels=16,
28
- dilations=(1, 12, 24, 36),
29
- dropout_ratio=0.1,
30
- num_classes=2,
31
- norm_cfg=norm_cfg,
32
- align_corners=False,
33
- loss_decode=dict(
34
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
35
- auxiliary_head=dict(
36
- type='FCNHead',
37
- in_channels=128,
38
- in_index=3,
39
- channels=64,
40
- num_convs=1,
41
- concat_input=False,
42
- dropout_ratio=0.1,
43
- num_classes=2,
44
- norm_cfg=norm_cfg,
45
- align_corners=False,
46
- loss_decode=dict(
47
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
48
- # model training and testing settings
49
- train_cfg=dict(),
50
- test_cfg=dict(mode='slide', crop_size=256, stride=170))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py DELETED
@@ -1,46 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='DepthwiseSeparableASPPHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- dilations=(1, 12, 24, 36),
23
- c1_in_channels=256,
24
- c1_channels=48,
25
- dropout_ratio=0.1,
26
- num_classes=19,
27
- norm_cfg=norm_cfg,
28
- align_corners=False,
29
- loss_decode=dict(
30
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
- auxiliary_head=dict(
32
- type='FCNHead',
33
- in_channels=1024,
34
- in_index=2,
35
- channels=256,
36
- num_convs=1,
37
- concat_input=False,
38
- dropout_ratio=0.1,
39
- num_classes=19,
40
- norm_cfg=norm_cfg,
41
- align_corners=False,
42
- loss_decode=dict(
43
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
- # model training and testing settings
45
- train_cfg=dict(),
46
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/dmnet_r50-d8.py DELETED
@@ -1,44 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='DMHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- filter_sizes=(1, 3, 5, 7),
23
- dropout_ratio=0.1,
24
- num_classes=19,
25
- norm_cfg=dict(type='SyncBN', requires_grad=True),
26
- align_corners=False,
27
- loss_decode=dict(
28
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
29
- auxiliary_head=dict(
30
- type='FCNHead',
31
- in_channels=1024,
32
- in_index=2,
33
- channels=256,
34
- num_convs=1,
35
- concat_input=False,
36
- dropout_ratio=0.1,
37
- num_classes=19,
38
- norm_cfg=norm_cfg,
39
- align_corners=False,
40
- loss_decode=dict(
41
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
42
- # model training and testing settings
43
- train_cfg=dict(),
44
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/dnl_r50-d8.py DELETED
@@ -1,46 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='DNLHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- dropout_ratio=0.1,
23
- reduction=2,
24
- use_scale=True,
25
- mode='embedded_gaussian',
26
- num_classes=19,
27
- norm_cfg=norm_cfg,
28
- align_corners=False,
29
- loss_decode=dict(
30
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
31
- auxiliary_head=dict(
32
- type='FCNHead',
33
- in_channels=1024,
34
- in_index=2,
35
- channels=256,
36
- num_convs=1,
37
- concat_input=False,
38
- dropout_ratio=0.1,
39
- num_classes=19,
40
- norm_cfg=norm_cfg,
41
- align_corners=False,
42
- loss_decode=dict(
43
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
44
- # model training and testing settings
45
- train_cfg=dict(),
46
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/emanet_r50-d8.py DELETED
@@ -1,47 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='EMAHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=256,
22
- ema_channels=512,
23
- num_bases=64,
24
- num_stages=3,
25
- momentum=0.1,
26
- dropout_ratio=0.1,
27
- num_classes=19,
28
- norm_cfg=norm_cfg,
29
- align_corners=False,
30
- loss_decode=dict(
31
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
32
- auxiliary_head=dict(
33
- type='FCNHead',
34
- in_channels=1024,
35
- in_index=2,
36
- channels=256,
37
- num_convs=1,
38
- concat_input=False,
39
- dropout_ratio=0.1,
40
- num_classes=19,
41
- norm_cfg=norm_cfg,
42
- align_corners=False,
43
- loss_decode=dict(
44
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
45
- # model training and testing settings
46
- train_cfg=dict(),
47
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/encnet_r50-d8.py DELETED
@@ -1,48 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='EncHead',
19
- in_channels=[512, 1024, 2048],
20
- in_index=(1, 2, 3),
21
- channels=512,
22
- num_codes=32,
23
- use_se_loss=True,
24
- add_lateral=False,
25
- dropout_ratio=0.1,
26
- num_classes=19,
27
- norm_cfg=norm_cfg,
28
- align_corners=False,
29
- loss_decode=dict(
30
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
31
- loss_se_decode=dict(
32
- type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
33
- auxiliary_head=dict(
34
- type='FCNHead',
35
- in_channels=1024,
36
- in_index=2,
37
- channels=256,
38
- num_convs=1,
39
- concat_input=False,
40
- dropout_ratio=0.1,
41
- num_classes=19,
42
- norm_cfg=norm_cfg,
43
- align_corners=False,
44
- loss_decode=dict(
45
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
46
- # model training and testing settings
47
- train_cfg=dict(),
48
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/fast_scnn.py DELETED
@@ -1,57 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
3
- model = dict(
4
- type='EncoderDecoder',
5
- backbone=dict(
6
- type='FastSCNN',
7
- downsample_dw_channels=(32, 48),
8
- global_in_channels=64,
9
- global_block_channels=(64, 96, 128),
10
- global_block_strides=(2, 2, 1),
11
- global_out_channels=128,
12
- higher_in_channels=64,
13
- lower_in_channels=128,
14
- fusion_out_channels=128,
15
- out_indices=(0, 1, 2),
16
- norm_cfg=norm_cfg,
17
- align_corners=False),
18
- decode_head=dict(
19
- type='DepthwiseSeparableFCNHead',
20
- in_channels=128,
21
- channels=128,
22
- concat_input=False,
23
- num_classes=19,
24
- in_index=-1,
25
- norm_cfg=norm_cfg,
26
- align_corners=False,
27
- loss_decode=dict(
28
- type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
29
- auxiliary_head=[
30
- dict(
31
- type='FCNHead',
32
- in_channels=128,
33
- channels=32,
34
- num_convs=1,
35
- num_classes=19,
36
- in_index=-2,
37
- norm_cfg=norm_cfg,
38
- concat_input=False,
39
- align_corners=False,
40
- loss_decode=dict(
41
- type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
42
- dict(
43
- type='FCNHead',
44
- in_channels=64,
45
- channels=32,
46
- num_convs=1,
47
- num_classes=19,
48
- in_index=-3,
49
- norm_cfg=norm_cfg,
50
- concat_input=False,
51
- align_corners=False,
52
- loss_decode=dict(
53
- type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
54
- ],
55
- # model training and testing settings
56
- train_cfg=dict(),
57
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/fcn_hr18.py DELETED
@@ -1,52 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://msra/hrnetv2_w18',
6
- backbone=dict(
7
- type='HRNet',
8
- norm_cfg=norm_cfg,
9
- norm_eval=False,
10
- extra=dict(
11
- stage1=dict(
12
- num_modules=1,
13
- num_branches=1,
14
- block='BOTTLENECK',
15
- num_blocks=(4, ),
16
- num_channels=(64, )),
17
- stage2=dict(
18
- num_modules=1,
19
- num_branches=2,
20
- block='BASIC',
21
- num_blocks=(4, 4),
22
- num_channels=(18, 36)),
23
- stage3=dict(
24
- num_modules=4,
25
- num_branches=3,
26
- block='BASIC',
27
- num_blocks=(4, 4, 4),
28
- num_channels=(18, 36, 72)),
29
- stage4=dict(
30
- num_modules=3,
31
- num_branches=4,
32
- block='BASIC',
33
- num_blocks=(4, 4, 4, 4),
34
- num_channels=(18, 36, 72, 144)))),
35
- decode_head=dict(
36
- type='FCNHead',
37
- in_channels=[18, 36, 72, 144],
38
- in_index=(0, 1, 2, 3),
39
- channels=sum([18, 36, 72, 144]),
40
- input_transform='resize_concat',
41
- kernel_size=1,
42
- num_convs=1,
43
- concat_input=False,
44
- dropout_ratio=-1,
45
- num_classes=19,
46
- norm_cfg=norm_cfg,
47
- align_corners=False,
48
- loss_decode=dict(
49
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
50
- # model training and testing settings
51
- train_cfg=dict(),
52
- test_cfg=dict(mode='whole'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
annotator/uniformer/configs/_base_/models/fcn_r50-d8.py DELETED
@@ -1,45 +0,0 @@
1
- # model settings
2
- norm_cfg = dict(type='SyncBN', requires_grad=True)
3
- model = dict(
4
- type='EncoderDecoder',
5
- pretrained='open-mmlab://resnet50_v1c',
6
- backbone=dict(
7
- type='ResNetV1c',
8
- depth=50,
9
- num_stages=4,
10
- out_indices=(0, 1, 2, 3),
11
- dilations=(1, 1, 2, 4),
12
- strides=(1, 2, 1, 1),
13
- norm_cfg=norm_cfg,
14
- norm_eval=False,
15
- style='pytorch',
16
- contract_dilation=True),
17
- decode_head=dict(
18
- type='FCNHead',
19
- in_channels=2048,
20
- in_index=3,
21
- channels=512,
22
- num_convs=2,
23
- concat_input=True,
24
- dropout_ratio=0.1,
25
- num_classes=19,
26
- norm_cfg=norm_cfg,
27
- align_corners=False,
28
- loss_decode=dict(
29
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
30
- auxiliary_head=dict(
31
- type='FCNHead',
32
- in_channels=1024,
33
- in_index=2,
34
- channels=256,
35
- num_convs=1,
36
- concat_input=False,
37
- dropout_ratio=0.1,
38
- num_classes=19,
39
- norm_cfg=norm_cfg,
40
- align_corners=False,
41
- loss_decode=dict(
42
- type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
43
- # model training and testing settings
44
- train_cfg=dict(),
45
- test_cfg=dict(mode='whole'))