Pusheen commited on
Commit
7cb3306
1 Parent(s): 127d448

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +131 -0
utils.py CHANGED
@@ -1,5 +1,136 @@
1
  import torch
 
2
  import math
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def compute_ca_loss(attn_maps_mid, attn_maps_up, bboxes, object_positions):
4
  loss = 0
5
  object_number = len(bboxes)
 
1
  import torch
2
+ from torch import nn
3
  import math
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ import logging
6
+ import os
7
+ import pandas as pd
8
+ import csv
9
+ import pickle
10
+ import numpy as np
11
+
12
+ from torch.nn import BCELoss
13
+
14
+ from torch.nn import functional as F
15
+ import math
16
+ import numbers
17
+ from typing import List
18
+
19
+ def get_all_attention_64(attn_maps_down, attn_maps_mid , attn_maps_up, res = 16):
20
+ result = []
21
+
22
+ for attn_map_integrated in attn_maps_up:
23
+ if attn_map_integrated == []: continue
24
+ attn_map = attn_map_integrated.squeeze(0)
25
+ # print(attn_map.shape)
26
+ b, i, j = attn_map.shape
27
+ H = W = int(math.sqrt(i))
28
+ # print(H)
29
+
30
+ if H == res:
31
+ item = attn_map.reshape(-1, res, res, attn_map.shape[-1] )
32
+ item = item.permute(0, 3, 1, 2)
33
+ item = F.interpolate(item, 64, mode='bilinear').permute(0, 2, 3, 1)
34
+ result.append(item)
35
+ for attn_map_integrated in attn_maps_mid:
36
+ attn_map = attn_map_integrated.squeeze(0)
37
+ b, i, j = attn_map.shape
38
+ H = W = int(math.sqrt(i))
39
+ # print(H)
40
+ if (H==8):
41
+ item = attn_map.reshape(-1, 8, 8, attn_map.shape[-1] )
42
+ item = item.permute(0, 3, 1, 2)
43
+ item = F.interpolate(item, 64, mode='bilinear').permute(0, 2, 3, 1)
44
+ result.append(item)
45
+
46
+ for attn_map_integrated in attn_maps_down:
47
+ if attn_map_integrated == []: continue
48
+ attn_map = attn_map_integrated.squeeze(0)
49
+ if attn_map == []: continue
50
+ b, i, j = attn_map.shape
51
+ H = W = int(math.sqrt(i))
52
+
53
+ if H == res:
54
+ item = attn_map.reshape(-1, res, res, attn_map.shape[-1] )
55
+ item = item.permute(0, 3, 1, 2)
56
+ item = F.interpolate(item, 64, mode='bilinear').permute(0, 2, 3, 1)
57
+ result.append(item)
58
+ # print('RES LENGTH', len(result))
59
+
60
+ # for maps in result:
61
+ # print(maps.shape)
62
+ result = torch.cat(result, dim=0)
63
+ result = result.sum(0) / result.shape[0]
64
+ return result
65
+
66
+
67
+ def compute_loco_v2(attn_maps_down, attn_maps_mid, attn_maps_up, bboxes, object_positions, smooth_attn=True, topk = 0.8):
68
+
69
+ loss = 0.
70
+ pad_loss = 0.
71
+ total_fg_map = torch.zeros(size=(64, 64)).cuda()
72
+
73
+ alpha = 0.2
74
+ beta = 0.8
75
+
76
+ object_number = len(bboxes)
77
+ if object_number == 0:
78
+ return torch.tensor(0).float().cuda() if torch.cuda.is_available() else torch.tensor(0).float()
79
+ attn16 = get_all_attention_64(attn_maps_down[-1]+ attn_maps_down[-2], attn_maps_mid, attn_maps_up[0]+attn_maps_up[1], 16)
80
+ all_attn = [attn16]
81
+ max_loss = 0
82
+
83
+
84
+ for attn_map in all_attn:
85
+
86
+ sum_in = 0.
87
+ sum_out = 0.
88
+
89
+ i, j, k = attn_map.shape
90
+ H = W = i
91
+ for obj_idx in range(object_number):
92
+ obj_loss = 0
93
+ mask = torch.zeros(size=(H, W)).cuda() if torch.cuda.is_available() else torch.zeros(size=(H, W))
94
+ for obj_box in bboxes[obj_idx]:
95
+
96
+ x_min, y_min, x_max, y_max = int(obj_box[0] * W), \
97
+ int(obj_box[1] * H), int(obj_box[2] * W), int(obj_box[3] * H)
98
+ mask[y_min: y_max, x_min: x_max] = 1
99
+ total_fg_map[y_min: y_max, x_min: x_max] = 1
100
+
101
+ for obj_position in [object_positions[obj_idx]]:
102
+
103
+ ca_map_obj = attn_map[:, :, obj_position].sum(-1)
104
+
105
+ ca_map_obj = ca_map_obj.reshape(H, W)
106
+ norm_ca_map_obj = ca_map_obj / ca_map_obj.max()
107
+ norm_ca_map_obj = norm_ca_map_obj.reshape(H, W)
108
+
109
+ sum_in += (norm_ca_map_obj * mask).sum()
110
+ sum_out += (norm_ca_map_obj * (1 - mask)).sum()
111
+
112
+ loss += (obj_loss/len(object_positions[obj_idx]))
113
+
114
+ sot_map = attn_map[:, :, 0].reshape(H, W)
115
+ eot_map = attn_map[:, :, -1].reshape(H, W)
116
+
117
+ norm_sot_map = (1 - sot_map) / (1 - sot_map).max()
118
+ norm_eot_map = eot_map / eot_map.max()
119
+
120
+
121
+ pad_map = beta * norm_sot_map + (1 - beta) * norm_eot_map
122
+
123
+ total_fg_mask = total_fg_map
124
+ fg_map = pad_map * total_fg_mask
125
+
126
+ bce_loss = F.binary_cross_entropy(torch.sigmoid(pad_map.to(torch.float16).reshape(-1)), fg_map.to(torch.float16).reshape(-1))
127
+
128
+ pad_loss += bce_loss
129
+
130
+ loss += (1 - sum_in / (sum_in + sum_out)) ** 2
131
+
132
+ return loss + alpha * pad_loss
133
+
134
  def compute_ca_loss(attn_maps_mid, attn_maps_up, bboxes, object_positions):
135
  loss = 0
136
  object_number = len(bboxes)