glenn-jocher
commited on
Commit
•
bb87276
1
Parent(s):
8bf3cff
update build_targets() (#589)
Browse filesSigned-off-by: Glenn Jocher <[email protected]>
- utils/utils.py +48 -55
utils/utils.py
CHANGED
@@ -308,7 +308,7 @@ def compute_ap(recall, precision):
|
|
308 |
|
309 |
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False):
|
310 |
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
|
311 |
-
box2 = box2.
|
312 |
|
313 |
# Get the coordinates of bounding boxes
|
314 |
if x1y1x2y2: # x1, y1, x2, y2 = box1
|
@@ -347,7 +347,7 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False):
|
|
347 |
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
|
348 |
with torch.no_grad():
|
349 |
alpha = v / (1 - iou + v + 1e-16)
|
350 |
-
return iou - (rho2 / c2 + v * alpha
|
351 |
|
352 |
return iou
|
353 |
|
@@ -369,8 +369,8 @@ def box_iou(box1, box2):
|
|
369 |
# box = 4xn
|
370 |
return (box[2] - box[0]) * (box[3] - box[1])
|
371 |
|
372 |
-
area1 = box_area(box1.
|
373 |
-
area2 = box_area(box2.
|
374 |
|
375 |
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
376 |
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
|
@@ -439,70 +439,62 @@ class BCEBlurWithLogitsLoss(nn.Module):
|
|
439 |
|
440 |
def compute_loss(p, targets, model): # predictions, targets, model
|
441 |
device = targets.device
|
442 |
-
|
443 |
-
lcls, lbox, lobj = ft([0]).to(device), ft([0]).to(device), ft([0]).to(device)
|
444 |
tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets
|
445 |
h = model.hyp # hyperparameters
|
446 |
-
red = 'mean' # Loss reduction (sum or mean)
|
447 |
|
448 |
# Define criteria
|
449 |
-
BCEcls = nn.BCEWithLogitsLoss(pos_weight=
|
450 |
-
BCEobj = nn.BCEWithLogitsLoss(pos_weight=
|
451 |
|
452 |
-
#
|
453 |
cp, cn = smooth_BCE(eps=0.0)
|
454 |
|
455 |
-
#
|
456 |
g = h['fl_gamma'] # focal loss gamma
|
457 |
if g > 0:
|
458 |
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
|
459 |
|
460 |
-
#
|
461 |
nt = 0 # number of targets
|
462 |
np = len(p) # number of outputs
|
463 |
balance = [4.0, 1.0, 0.4] if np == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
|
464 |
for i, pi in enumerate(p): # layer index, layer predictions
|
465 |
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
466 |
-
tobj = torch.zeros_like(pi[..., 0]
|
467 |
|
468 |
-
|
469 |
-
if
|
470 |
-
nt +=
|
471 |
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
|
472 |
|
473 |
-
#
|
474 |
pxy = ps[:, :2].sigmoid() * 2. - 0.5
|
475 |
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
|
476 |
pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box
|
477 |
-
giou = bbox_iou(pbox.
|
478 |
-
lbox += (1.0 - giou).
|
479 |
|
480 |
-
#
|
481 |
tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) # giou ratio
|
482 |
|
483 |
-
#
|
484 |
if model.nc > 1: # cls loss (only if multiple classes)
|
485 |
-
t = torch.full_like(ps[:, 5:], cn
|
486 |
-
t[range(
|
487 |
-
lcls
|
488 |
|
489 |
# Append targets to text file
|
490 |
# with open('targets.txt', 'a') as file:
|
491 |
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
492 |
|
493 |
-
lobj
|
494 |
|
495 |
s = 3 / np # output count scaling
|
496 |
lbox *= h['giou'] * s
|
497 |
lobj *= h['obj'] * s * (1.4 if np == 4 else 1.)
|
498 |
lcls *= h['cls'] * s
|
499 |
bs = tobj.shape[0] # batch size
|
500 |
-
if red == 'sum':
|
501 |
-
g = 3.0 # loss gain
|
502 |
-
lobj *= g / bs
|
503 |
-
if nt:
|
504 |
-
lcls *= g / nt / model.nc
|
505 |
-
lbox *= g / nt
|
506 |
|
507 |
loss = lbox + lobj + lcls
|
508 |
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
|
@@ -510,40 +502,40 @@ def compute_loss(p, targets, model): # predictions, targets, model
|
|
510 |
|
511 |
def build_targets(p, targets, model):
|
512 |
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
|
513 |
-
det = model.module.model[-1] if
|
514 |
-
else model.model[-1] # Detect() module
|
515 |
na, nt = det.na, targets.shape[0] # number of anchors, targets
|
516 |
tcls, tbox, indices, anch = [], [], [], []
|
517 |
-
gain = torch.ones(
|
518 |
-
|
519 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
520 |
|
521 |
-
g = 0.5 # offset
|
522 |
-
style = 'rect4'
|
523 |
for i in range(det.nl):
|
524 |
anchors = det.anchors[i]
|
525 |
-
gain[2:] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
|
526 |
|
527 |
# Match targets to anchors
|
528 |
-
|
529 |
if nt:
|
530 |
-
|
|
|
531 |
j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare
|
532 |
-
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)
|
533 |
-
|
534 |
|
535 |
-
#
|
536 |
gxy = t[:, 2:4] # grid xy
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
544 |
-
l, m = ((gxy % 1. > (1 - g)) & (gxy < (gain[[2, 3]] - 1.))).T
|
545 |
-
a, t = torch.cat((a, a[j], a[k], a[l], a[m]), 0), torch.cat((t, t[j], t[k], t[l], t[m]), 0)
|
546 |
-
offsets = torch.cat((z, z[j] + off[0], z[k] + off[1], z[l] + off[2], z[m] + off[3]), 0) * g
|
547 |
|
548 |
# Define
|
549 |
b, c = t[:, :2].long().T # image, class
|
@@ -553,6 +545,7 @@ def build_targets(p, targets, model):
|
|
553 |
gi, gj = gij.T # grid xy indices
|
554 |
|
555 |
# Append
|
|
|
556 |
indices.append((b, a, gj, gi)) # image, anchor, grid indices
|
557 |
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
|
558 |
anch.append(anchors[a]) # anchors
|
@@ -599,7 +592,7 @@ def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, merge=False,
|
|
599 |
|
600 |
# Detections matrix nx6 (xyxy, conf, cls)
|
601 |
if multi_label:
|
602 |
-
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).
|
603 |
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
604 |
else: # best class only
|
605 |
conf, j = x[:, 5:].max(1, keepdim=True)
|
|
|
308 |
|
309 |
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False):
|
310 |
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
|
311 |
+
box2 = box2.T
|
312 |
|
313 |
# Get the coordinates of bounding boxes
|
314 |
if x1y1x2y2: # x1, y1, x2, y2 = box1
|
|
|
347 |
v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
|
348 |
with torch.no_grad():
|
349 |
alpha = v / (1 - iou + v + 1e-16)
|
350 |
+
return iou - (rho2 / c2 + v * alpha) # CIoU
|
351 |
|
352 |
return iou
|
353 |
|
|
|
369 |
# box = 4xn
|
370 |
return (box[2] - box[0]) * (box[3] - box[1])
|
371 |
|
372 |
+
area1 = box_area(box1.T)
|
373 |
+
area2 = box_area(box2.T)
|
374 |
|
375 |
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
376 |
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
|
|
|
439 |
|
440 |
def compute_loss(p, targets, model): # predictions, targets, model
|
441 |
device = targets.device
|
442 |
+
lcls, lbox, lobj = torch.zeros(3, 1, device=device)
|
|
|
443 |
tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets
|
444 |
h = model.hyp # hyperparameters
|
|
|
445 |
|
446 |
# Define criteria
|
447 |
+
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['cls_pw']])).to(device)
|
448 |
+
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([h['obj_pw']])).to(device)
|
449 |
|
450 |
+
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
|
451 |
cp, cn = smooth_BCE(eps=0.0)
|
452 |
|
453 |
+
# Focal loss
|
454 |
g = h['fl_gamma'] # focal loss gamma
|
455 |
if g > 0:
|
456 |
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
|
457 |
|
458 |
+
# Losses
|
459 |
nt = 0 # number of targets
|
460 |
np = len(p) # number of outputs
|
461 |
balance = [4.0, 1.0, 0.4] if np == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
|
462 |
for i, pi in enumerate(p): # layer index, layer predictions
|
463 |
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
|
464 |
+
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
|
465 |
|
466 |
+
n = b.shape[0] # number of targets
|
467 |
+
if n:
|
468 |
+
nt += n # cumulative targets
|
469 |
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
|
470 |
|
471 |
+
# Regression
|
472 |
pxy = ps[:, :2].sigmoid() * 2. - 0.5
|
473 |
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
|
474 |
pbox = torch.cat((pxy, pwh), 1).to(device) # predicted box
|
475 |
+
giou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # giou(prediction, target)
|
476 |
+
lbox += (1.0 - giou).mean() # giou loss
|
477 |
|
478 |
+
# Objectness
|
479 |
tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) # giou ratio
|
480 |
|
481 |
+
# Classification
|
482 |
if model.nc > 1: # cls loss (only if multiple classes)
|
483 |
+
t = torch.full_like(ps[:, 5:], cn, device=device) # targets
|
484 |
+
t[range(n), tcls[i]] = cp
|
485 |
+
lcls = lcls + BCEcls(ps[:, 5:], t) # BCE
|
486 |
|
487 |
# Append targets to text file
|
488 |
# with open('targets.txt', 'a') as file:
|
489 |
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
|
490 |
|
491 |
+
lobj = lobj + BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
|
492 |
|
493 |
s = 3 / np # output count scaling
|
494 |
lbox *= h['giou'] * s
|
495 |
lobj *= h['obj'] * s * (1.4 if np == 4 else 1.)
|
496 |
lcls *= h['cls'] * s
|
497 |
bs = tobj.shape[0] # batch size
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
|
499 |
loss = lbox + lobj + lcls
|
500 |
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
|
|
|
502 |
|
503 |
def build_targets(p, targets, model):
|
504 |
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
|
505 |
+
det = model.module.model[-1] if torch_utils.is_parallel(model) else model.model[-1] # Detect() module
|
|
|
506 |
na, nt = det.na, targets.shape[0] # number of anchors, targets
|
507 |
tcls, tbox, indices, anch = [], [], [], []
|
508 |
+
gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
|
509 |
+
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
|
510 |
+
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
|
511 |
+
|
512 |
+
g = 0.5 # bias
|
513 |
+
off = torch.tensor([[0, 0],
|
514 |
+
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
|
515 |
+
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
|
516 |
+
], device=targets.device).float() * g # offsets
|
517 |
|
|
|
|
|
518 |
for i in range(det.nl):
|
519 |
anchors = det.anchors[i]
|
520 |
+
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
|
521 |
|
522 |
# Match targets to anchors
|
523 |
+
t, offsets = targets * gain, 0
|
524 |
if nt:
|
525 |
+
# Matches
|
526 |
+
r = t[:, :, 4:6] / anchors[:, None] # wh ratio
|
527 |
j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare
|
528 |
+
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
|
529 |
+
t = t[j] # filter
|
530 |
|
531 |
+
# Offsets
|
532 |
gxy = t[:, 2:4] # grid xy
|
533 |
+
gxi = gain[[2, 3]] - gxy # inverse
|
534 |
+
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
|
535 |
+
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
|
536 |
+
j = torch.stack((torch.ones_like(j), j, k, l, m))
|
537 |
+
t = t.repeat((5, 1, 1))[j]
|
538 |
+
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
|
|
|
|
|
|
|
|
|
539 |
|
540 |
# Define
|
541 |
b, c = t[:, :2].long().T # image, class
|
|
|
545 |
gi, gj = gij.T # grid xy indices
|
546 |
|
547 |
# Append
|
548 |
+
a = t[:, 6].long() # anchor indices
|
549 |
indices.append((b, a, gj, gi)) # image, anchor, grid indices
|
550 |
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
|
551 |
anch.append(anchors[a]) # anchors
|
|
|
592 |
|
593 |
# Detections matrix nx6 (xyxy, conf, cls)
|
594 |
if multi_label:
|
595 |
+
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
596 |
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
597 |
else: # best class only
|
598 |
conf, j = x[:, 5:].max(1, keepdim=True)
|