kbrodt commited on
Commit
5bd9834
1 Parent(s): 74d6764

Upload pose_estimation.py

Browse files
Files changed (1) hide show
  1. src/pose_estimation.py +266 -0
src/pose_estimation.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import cv2
4
+ import numpy as np
5
+
6
+ IMG_SIZE = (288, 384)
7
+ MEAN = np.array([0.485, 0.456, 0.406])
8
+ STD = np.array([0.229, 0.224, 0.225])
9
+
10
+ KPS = (
11
+ "Head",
12
+ "Neck",
13
+ "Right Shoulder",
14
+ "Right Arm",
15
+ "Right Hand",
16
+ "Left Shoulder",
17
+ "Left Arm",
18
+ "Left Hand",
19
+ "Spine",
20
+ "Hips",
21
+ "Right Upper Leg",
22
+ "Right Leg",
23
+ "Right Foot",
24
+ "Left Upper Leg",
25
+ "Left Leg",
26
+ "Left Foot",
27
+ "Left Toe",
28
+ "Right Toe",
29
+ )
30
+
31
+ SKELETON = (
32
+ (0, 1),
33
+ (1, 8),
34
+ (8, 9),
35
+ (9, 10),
36
+ (9, 13),
37
+ (10, 11),
38
+ (11, 12),
39
+ (13, 14),
40
+ (14, 15),
41
+ (1, 2),
42
+ (2, 3),
43
+ (3, 4),
44
+ (1, 5),
45
+ (5, 6),
46
+ (6, 7),
47
+ (15, 16),
48
+ (12, 17),
49
+ )
50
+
51
+
52
+ OPENPOSE_TO_GESTURE = (
53
+ 0, # 0 Head\n",
54
+ 1, # Neck\n",
55
+ 2, # 2 Right Shoulder\n",
56
+ 3, # Right Arm\n",
57
+ 4, # 4 Right Hand\n",
58
+ 5, # Left Shoulder\n",
59
+ 6, # 6 Left Arm\n",
60
+ 7, # Left Hand\n",
61
+ 9, # 8 Hips\n",
62
+ 10, # Right Upper Leg\n",
63
+ 11, # 10Right Leg\n",
64
+ 12, # Right Foot\n",
65
+ 13, # 12Left Upper Leg\n",
66
+ 14, # Left Leg\n",
67
+ 15, # 14Left Foot\n",
68
+ -1, # \n",
69
+ -1, # 16\n",
70
+ -1, # \n",
71
+ -1, # 18\n",
72
+ 16, # Left Toe\n",
73
+ -1, # 20\n",
74
+ -1, # \n",
75
+ 17, # 22Right Toe\n",
76
+ -1, # \n",
77
+ -1, # 24\n",
78
+ )
79
+
80
+
81
+ def transform(img):
82
+ img = img.astype("float32") / 255
83
+
84
+ img = (img - MEAN) / STD
85
+
86
+ return np.transpose(img, axes=(2, 0, 1))
87
+
88
+
89
+ def get_affine_transform(
90
+ center,
91
+ scale,
92
+ rot,
93
+ output_size,
94
+ shift=np.array([0, 0], dtype=np.float32),
95
+ inv=0,
96
+ pixel_std=200,
97
+ ):
98
+ if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
99
+ scale = np.array([scale, scale])
100
+
101
+ scale_tmp = scale * pixel_std
102
+ src_w = scale_tmp[0]
103
+ dst_w = output_size[0]
104
+ dst_h = output_size[1]
105
+
106
+ rot_rad = np.pi * rot / 180
107
+ src_dir = get_dir([0, src_w * -0.5], rot_rad)
108
+ dst_dir = np.array([0, dst_w * -0.5], np.float32)
109
+ src = np.zeros((3, 2), dtype=np.float32)
110
+ dst = np.zeros((3, 2), dtype=np.float32)
111
+ src[0, :] = center + scale_tmp * shift
112
+ src[1, :] = center + src_dir + scale_tmp * shift
113
+ dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
114
+ dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
115
+
116
+ src[2:, :] = get_3rd_point(src[0, :], src[1, :])
117
+ dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
118
+
119
+ if inv:
120
+ trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
121
+ else:
122
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
123
+
124
+ return trans
125
+
126
+
127
+ def get_3rd_point(a, b):
128
+ direct = a - b
129
+ return b + np.array([-direct[1], direct[0]], dtype=np.float32)
130
+
131
+
132
+ def get_dir(src_point, rot_rad):
133
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
134
+
135
+ src_result = [0, 0]
136
+ src_result[0] = src_point[0] * cs - src_point[1] * sn
137
+ src_result[1] = src_point[0] * sn + src_point[1] * cs
138
+
139
+ return src_result
140
+
141
+
142
+ def process_image(path, input_img_size, pixel_std=200):
143
+ data_numpy = cv2.imread(path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
144
+ # BUG HERE. Must be uncommented
145
+ # data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
146
+
147
+ h, w = data_numpy.shape[:2]
148
+ c = np.array([w / 2, h / 2], dtype=np.float32)
149
+
150
+ aspect_ratio = input_img_size[0] / input_img_size[1]
151
+ if w > aspect_ratio * h:
152
+ h = w * 1.0 / aspect_ratio
153
+ elif w < aspect_ratio * h:
154
+ w = h * aspect_ratio
155
+
156
+ s = np.array([w / pixel_std, h / pixel_std], dtype=np.float32) * 1.25
157
+ r = 0
158
+ trans = get_affine_transform(c, s, r, input_img_size, pixel_std=pixel_std)
159
+ input = cv2.warpAffine(data_numpy, trans, input_img_size, flags=cv2.INTER_LINEAR)
160
+
161
+ input = transform(input)
162
+
163
+ return input, data_numpy, c, s
164
+
165
+
166
+ def get_final_preds(batch_heatmaps, center, scale, post_process=False):
167
+ coords, maxvals = get_max_preds(batch_heatmaps)
168
+
169
+ heatmap_height = batch_heatmaps.shape[2]
170
+ heatmap_width = batch_heatmaps.shape[3]
171
+
172
+ # post-processing
173
+ if post_process:
174
+ for n in range(coords.shape[0]):
175
+ for p in range(coords.shape[1]):
176
+ hm = batch_heatmaps[n][p]
177
+ px = int(math.floor(coords[n][p][0] + 0.5))
178
+ py = int(math.floor(coords[n][p][1] + 0.5))
179
+ if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
180
+ diff = np.array(
181
+ [
182
+ hm[py][px + 1] - hm[py][px - 1],
183
+ hm[py + 1][px] - hm[py - 1][px],
184
+ ]
185
+ )
186
+ coords[n][p] += np.sign(diff) * 0.25
187
+
188
+ preds = coords.copy()
189
+
190
+ # Transform back
191
+ for i in range(coords.shape[0]):
192
+ preds[i] = transform_preds(
193
+ coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
194
+ )
195
+
196
+ return preds, maxvals
197
+
198
+
199
+ def transform_preds(coords, center, scale, output_size):
200
+ target_coords = np.zeros(coords.shape)
201
+ trans = get_affine_transform(center, scale, 0, output_size, inv=1)
202
+ for p in range(coords.shape[0]):
203
+ target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
204
+ return target_coords
205
+
206
+
207
+ def affine_transform(pt, t):
208
+ new_pt = np.array([pt[0], pt[1], 1.0]).T
209
+ new_pt = np.dot(t, new_pt)
210
+ return new_pt[:2]
211
+
212
+
213
+ def get_max_preds(batch_heatmaps):
214
+ """
215
+ get predictions from score maps
216
+ heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
217
+ """
218
+ assert isinstance(
219
+ batch_heatmaps, np.ndarray
220
+ ), "batch_heatmaps should be numpy.ndarray"
221
+ assert batch_heatmaps.ndim == 4, "batch_images should be 4-ndim"
222
+
223
+ batch_size = batch_heatmaps.shape[0]
224
+ num_joints = batch_heatmaps.shape[1]
225
+ width = batch_heatmaps.shape[3]
226
+ heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
227
+ idx = np.argmax(heatmaps_reshaped, 2)
228
+ maxvals = np.amax(heatmaps_reshaped, 2)
229
+
230
+ maxvals = maxvals.reshape((batch_size, num_joints, 1))
231
+ idx = idx.reshape((batch_size, num_joints, 1))
232
+
233
+ preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
234
+
235
+ preds[:, :, 0] = (preds[:, :, 0]) % width
236
+ preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
237
+
238
+ pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
239
+ pred_mask = pred_mask.astype(np.float32)
240
+
241
+ preds *= pred_mask
242
+ return preds, maxvals
243
+
244
+
245
+ def infer_single_image(model, img_path, input_img_size=(288, 384), return_kps=True):
246
+ img_path = str(img_path)
247
+ pose_input, img, center, scale = process_image(
248
+ img_path, input_img_size=input_img_size
249
+ )
250
+ model.setInput(pose_input[None])
251
+ predicted_heatmap = model.forward()
252
+
253
+ if not return_kps:
254
+ return predicted_heatmap.squeeze(0)
255
+
256
+ predicted_keypoints, confidence = get_final_preds(
257
+ predicted_heatmap, center[None], scale[None], post_process=True
258
+ )
259
+
260
+ (predicted_keypoints, confidence, predicted_heatmap,) = (
261
+ predicted_keypoints.squeeze(0),
262
+ confidence.squeeze(0),
263
+ predicted_heatmap.squeeze(0),
264
+ )
265
+
266
+ return img, predicted_keypoints, confidence, predicted_heatmap