|
import cv2 |
|
import random |
|
import colorsys |
|
import numpy as np |
|
import tensorflow as tf |
|
from core.config import cfg |
|
|
|
def load_freeze_layer(model='yolov4', tiny=False): |
|
if tiny: |
|
if model == 'yolov3': |
|
freeze_layouts = ['conv2d_9', 'conv2d_12'] |
|
else: |
|
freeze_layouts = ['conv2d_17', 'conv2d_20'] |
|
else: |
|
if model == 'yolov3': |
|
freeze_layouts = ['conv2d_58', 'conv2d_66', 'conv2d_74'] |
|
else: |
|
freeze_layouts = ['conv2d_93', 'conv2d_101', 'conv2d_109'] |
|
return freeze_layouts |
|
|
|
def load_weights(model, weights_file, model_name='yolov4', is_tiny=False): |
|
if is_tiny: |
|
if model_name == 'yolov3': |
|
layer_size = 13 |
|
output_pos = [9, 12] |
|
else: |
|
layer_size = 21 |
|
output_pos = [17, 20] |
|
else: |
|
if model_name == 'yolov3': |
|
layer_size = 75 |
|
output_pos = [58, 66, 74] |
|
else: |
|
layer_size = 110 |
|
output_pos = [93, 101, 109] |
|
wf = open(weights_file, 'rb') |
|
major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) |
|
|
|
j = 0 |
|
for i in range(layer_size): |
|
conv_layer_name = 'conv2d_%d' %i if i > 0 else 'conv2d' |
|
bn_layer_name = 'batch_normalization_%d' %j if j > 0 else 'batch_normalization' |
|
|
|
conv_layer = model.get_layer(conv_layer_name) |
|
filters = conv_layer.filters |
|
k_size = conv_layer.kernel_size[0] |
|
in_dim = conv_layer.input_shape[-1] |
|
|
|
if i not in output_pos: |
|
|
|
bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters) |
|
|
|
bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] |
|
bn_layer = model.get_layer(bn_layer_name) |
|
j += 1 |
|
else: |
|
conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) |
|
|
|
|
|
conv_shape = (filters, in_dim, k_size, k_size) |
|
conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape)) |
|
|
|
conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0]) |
|
|
|
if i not in output_pos: |
|
conv_layer.set_weights([conv_weights]) |
|
bn_layer.set_weights(bn_weights) |
|
else: |
|
conv_layer.set_weights([conv_weights, conv_bias]) |
|
|
|
|
|
wf.close() |
|
|
|
|
|
def read_class_names(class_file_name): |
|
names = {} |
|
with open(class_file_name, 'r') as data: |
|
for ID, name in enumerate(data): |
|
names[ID] = name.strip('\n') |
|
return names |
|
|
|
def load_config(FLAGS): |
|
if FLAGS.tiny: |
|
STRIDES = np.array(cfg.YOLO.STRIDES_TINY) |
|
ANCHORS = get_anchors(cfg.YOLO.ANCHORS_TINY, FLAGS.tiny) |
|
XYSCALE = cfg.YOLO.XYSCALE_TINY if FLAGS.model == 'yolov4' else [1, 1] |
|
else: |
|
STRIDES = np.array(cfg.YOLO.STRIDES) |
|
if FLAGS.model == 'yolov4': |
|
ANCHORS = get_anchors(cfg.YOLO.ANCHORS, FLAGS.tiny) |
|
elif FLAGS.model == 'yolov3': |
|
ANCHORS = get_anchors(cfg.YOLO.ANCHORS_V3, FLAGS.tiny) |
|
XYSCALE = cfg.YOLO.XYSCALE if FLAGS.model == 'yolov4' else [1, 1, 1] |
|
NUM_CLASS = len(read_class_names(cfg.YOLO.CLASSES)) |
|
|
|
return STRIDES, ANCHORS, NUM_CLASS, XYSCALE |
|
|
|
def get_anchors(anchors_path, tiny=False): |
|
anchors = np.array(anchors_path) |
|
if tiny: |
|
return anchors.reshape(2, 3, 2) |
|
else: |
|
return anchors.reshape(3, 3, 2) |
|
|
|
def image_preprocess(image, target_size, gt_boxes=None): |
|
|
|
ih, iw = target_size |
|
h, w, _ = image.shape |
|
|
|
scale = min(iw/w, ih/h) |
|
nw, nh = int(scale * w), int(scale * h) |
|
image_resized = cv2.resize(image, (nw, nh)) |
|
|
|
image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0) |
|
dw, dh = (iw - nw) // 2, (ih-nh) // 2 |
|
image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized |
|
image_paded = image_paded / 255. |
|
|
|
if gt_boxes is None: |
|
return image_paded |
|
|
|
else: |
|
gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw |
|
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh |
|
return image_paded, gt_boxes |
|
|
|
def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True): |
|
num_classes = len(classes) |
|
image_h, image_w, _ = image.shape |
|
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)] |
|
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) |
|
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors)) |
|
|
|
random.seed(0) |
|
random.shuffle(colors) |
|
random.seed(None) |
|
|
|
out_boxes, out_scores, out_classes, num_boxes = bboxes |
|
for i in range(num_boxes[0]): |
|
if int(out_classes[0][i]) < 0 or int(out_classes[0][i]) > num_classes: continue |
|
coor = out_boxes[0][i] |
|
coor[0] = int(coor[0] * image_h) |
|
coor[2] = int(coor[2] * image_h) |
|
coor[1] = int(coor[1] * image_w) |
|
coor[3] = int(coor[3] * image_w) |
|
|
|
fontScale = 0.5 |
|
score = out_scores[0][i] |
|
class_ind = int(out_classes[0][i]) |
|
bbox_color = colors[class_ind] |
|
bbox_thick = int(0.6 * (image_h + image_w) / 600) |
|
c1, c2 = (coor[1], coor[0]), (coor[3], coor[2]) |
|
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick) |
|
|
|
if show_label: |
|
bbox_mess = '%s: %.2f' % (classes[class_ind], score) |
|
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0] |
|
c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3) |
|
cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) |
|
|
|
cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX, |
|
fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA) |
|
return image |
|
|
|
def bbox_iou(bboxes1, bboxes2): |
|
""" |
|
@param bboxes1: (a, b, ..., 4) |
|
@param bboxes2: (A, B, ..., 4) |
|
x:X is 1:n or n:n or n:1 |
|
@return (max(a,A), max(b,B), ...) |
|
ex) (4,):(3,4) -> (3,) |
|
(2,1,4):(2,3,4) -> (2,3) |
|
""" |
|
bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3] |
|
bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3] |
|
|
|
bboxes1_coor = tf.concat( |
|
[ |
|
bboxes1[..., :2] - bboxes1[..., 2:] * 0.5, |
|
bboxes1[..., :2] + bboxes1[..., 2:] * 0.5, |
|
], |
|
axis=-1, |
|
) |
|
bboxes2_coor = tf.concat( |
|
[ |
|
bboxes2[..., :2] - bboxes2[..., 2:] * 0.5, |
|
bboxes2[..., :2] + bboxes2[..., 2:] * 0.5, |
|
], |
|
axis=-1, |
|
) |
|
|
|
left_up = tf.maximum(bboxes1_coor[..., :2], bboxes2_coor[..., :2]) |
|
right_down = tf.minimum(bboxes1_coor[..., 2:], bboxes2_coor[..., 2:]) |
|
|
|
inter_section = tf.maximum(right_down - left_up, 0.0) |
|
inter_area = inter_section[..., 0] * inter_section[..., 1] |
|
|
|
union_area = bboxes1_area + bboxes2_area - inter_area |
|
|
|
iou = tf.math.divide_no_nan(inter_area, union_area) |
|
|
|
return iou |
|
|
|
|
|
def bbox_giou(bboxes1, bboxes2): |
|
""" |
|
Generalized IoU |
|
@param bboxes1: (a, b, ..., 4) |
|
@param bboxes2: (A, B, ..., 4) |
|
x:X is 1:n or n:n or n:1 |
|
@return (max(a,A), max(b,B), ...) |
|
ex) (4,):(3,4) -> (3,) |
|
(2,1,4):(2,3,4) -> (2,3) |
|
""" |
|
bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3] |
|
bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3] |
|
|
|
bboxes1_coor = tf.concat( |
|
[ |
|
bboxes1[..., :2] - bboxes1[..., 2:] * 0.5, |
|
bboxes1[..., :2] + bboxes1[..., 2:] * 0.5, |
|
], |
|
axis=-1, |
|
) |
|
bboxes2_coor = tf.concat( |
|
[ |
|
bboxes2[..., :2] - bboxes2[..., 2:] * 0.5, |
|
bboxes2[..., :2] + bboxes2[..., 2:] * 0.5, |
|
], |
|
axis=-1, |
|
) |
|
|
|
left_up = tf.maximum(bboxes1_coor[..., :2], bboxes2_coor[..., :2]) |
|
right_down = tf.minimum(bboxes1_coor[..., 2:], bboxes2_coor[..., 2:]) |
|
|
|
inter_section = tf.maximum(right_down - left_up, 0.0) |
|
inter_area = inter_section[..., 0] * inter_section[..., 1] |
|
|
|
union_area = bboxes1_area + bboxes2_area - inter_area |
|
|
|
iou = tf.math.divide_no_nan(inter_area, union_area) |
|
|
|
enclose_left_up = tf.minimum(bboxes1_coor[..., :2], bboxes2_coor[..., :2]) |
|
enclose_right_down = tf.maximum( |
|
bboxes1_coor[..., 2:], bboxes2_coor[..., 2:] |
|
) |
|
|
|
enclose_section = enclose_right_down - enclose_left_up |
|
enclose_area = enclose_section[..., 0] * enclose_section[..., 1] |
|
|
|
giou = iou - tf.math.divide_no_nan(enclose_area - union_area, enclose_area) |
|
|
|
return giou |
|
|
|
|
|
def bbox_ciou(bboxes1, bboxes2): |
|
""" |
|
Complete IoU |
|
@param bboxes1: (a, b, ..., 4) |
|
@param bboxes2: (A, B, ..., 4) |
|
x:X is 1:n or n:n or n:1 |
|
@return (max(a,A), max(b,B), ...) |
|
ex) (4,):(3,4) -> (3,) |
|
(2,1,4):(2,3,4) -> (2,3) |
|
""" |
|
bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3] |
|
bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3] |
|
|
|
bboxes1_coor = tf.concat( |
|
[ |
|
bboxes1[..., :2] - bboxes1[..., 2:] * 0.5, |
|
bboxes1[..., :2] + bboxes1[..., 2:] * 0.5, |
|
], |
|
axis=-1, |
|
) |
|
bboxes2_coor = tf.concat( |
|
[ |
|
bboxes2[..., :2] - bboxes2[..., 2:] * 0.5, |
|
bboxes2[..., :2] + bboxes2[..., 2:] * 0.5, |
|
], |
|
axis=-1, |
|
) |
|
|
|
left_up = tf.maximum(bboxes1_coor[..., :2], bboxes2_coor[..., :2]) |
|
right_down = tf.minimum(bboxes1_coor[..., 2:], bboxes2_coor[..., 2:]) |
|
|
|
inter_section = tf.maximum(right_down - left_up, 0.0) |
|
inter_area = inter_section[..., 0] * inter_section[..., 1] |
|
|
|
union_area = bboxes1_area + bboxes2_area - inter_area |
|
|
|
iou = tf.math.divide_no_nan(inter_area, union_area) |
|
|
|
enclose_left_up = tf.minimum(bboxes1_coor[..., :2], bboxes2_coor[..., :2]) |
|
enclose_right_down = tf.maximum( |
|
bboxes1_coor[..., 2:], bboxes2_coor[..., 2:] |
|
) |
|
|
|
enclose_section = enclose_right_down - enclose_left_up |
|
|
|
c_2 = enclose_section[..., 0] ** 2 + enclose_section[..., 1] ** 2 |
|
|
|
center_diagonal = bboxes2[..., :2] - bboxes1[..., :2] |
|
|
|
rho_2 = center_diagonal[..., 0] ** 2 + center_diagonal[..., 1] ** 2 |
|
|
|
diou = iou - tf.math.divide_no_nan(rho_2, c_2) |
|
|
|
v = ( |
|
( |
|
tf.math.atan( |
|
tf.math.divide_no_nan(bboxes1[..., 2], bboxes1[..., 3]) |
|
) |
|
- tf.math.atan( |
|
tf.math.divide_no_nan(bboxes2[..., 2], bboxes2[..., 3]) |
|
) |
|
) |
|
* 2 |
|
/ np.pi |
|
) ** 2 |
|
|
|
alpha = tf.math.divide_no_nan(v, 1 - iou + v) |
|
|
|
ciou = diou - alpha * v |
|
|
|
return ciou |
|
|
|
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): |
|
""" |
|
:param bboxes: (xmin, ymin, xmax, ymax, score, class) |
|
|
|
Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf |
|
https://github.com/bharatsingh430/soft-nms |
|
""" |
|
classes_in_img = list(set(bboxes[:, 5])) |
|
best_bboxes = [] |
|
|
|
for cls in classes_in_img: |
|
cls_mask = (bboxes[:, 5] == cls) |
|
cls_bboxes = bboxes[cls_mask] |
|
|
|
while len(cls_bboxes) > 0: |
|
max_ind = np.argmax(cls_bboxes[:, 4]) |
|
best_bbox = cls_bboxes[max_ind] |
|
best_bboxes.append(best_bbox) |
|
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) |
|
iou = bbox_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) |
|
weight = np.ones((len(iou),), dtype=np.float32) |
|
|
|
assert method in ['nms', 'soft-nms'] |
|
|
|
if method == 'nms': |
|
iou_mask = iou > iou_threshold |
|
weight[iou_mask] = 0.0 |
|
|
|
if method == 'soft-nms': |
|
weight = np.exp(-(1.0 * iou ** 2 / sigma)) |
|
|
|
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight |
|
score_mask = cls_bboxes[:, 4] > 0. |
|
cls_bboxes = cls_bboxes[score_mask] |
|
|
|
return best_bboxes |
|
|
|
def freeze_all(model, frozen=True): |
|
model.trainable = not frozen |
|
if isinstance(model, tf.keras.Model): |
|
for l in model.layers: |
|
freeze_all(l, frozen) |
|
def unfreeze_all(model, frozen=False): |
|
model.trainable = not frozen |
|
if isinstance(model, tf.keras.Model): |
|
for l in model.layers: |
|
unfreeze_all(l, frozen) |
|
|
|
|