Simon Slamka commited on
Commit
a5240f9
1 Parent(s): 7bcf3d8
Files changed (2) hide show
  1. face_grab.py +66 -0
  2. gradcam.py +128 -0
face_grab.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import cv2 as cv
3
+ import numpy as np
4
+ import dlib
5
+ from typing import Optional
6
+
7
+ logging.basicConfig(level=logging.INFO)
8
+
9
+
10
+ class FaceGrabber:
11
+ def __init__(self):
12
+ self.cascades = [
13
+ "haarcascade_frontalface_default.xml",
14
+ "haarcascade_frontalface_alt.xml",
15
+ "haarcascade_frontalface_alt2.xml",
16
+ "haarcascade_frontalface_alt_tree.xml"
17
+ ]
18
+ self.detector = dlib.get_frontal_face_detector() # load face detector
19
+ self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks_GTX.dat") # load face predictor
20
+ self.mmod = dlib.cnn_face_detection_model_v1("mmod_human_face_detector.dat") # load face detector
21
+ self.paddingBy = 0.1 # padding by 10%
22
+
23
+ def grab_faces(self, img: np.ndarray, bGray: bool = False) -> Optional[np.ndarray]:
24
+
25
+ if bGray:
26
+ img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # convert to grayscale
27
+
28
+ detected = None
29
+
30
+ if detected is None:
31
+ faces = self.detector(img) # detect faces
32
+ if len(faces) > 0:
33
+ detected = faces[0]
34
+ detected = (detected.left(), detected.top(), detected.width(), detected.height())
35
+ logging.info("Face detected by dlib")
36
+
37
+ if detected is None:
38
+ faces = self.mmod(img)
39
+ if len(faces) > 0:
40
+ detected = faces[0]
41
+ detected = (detected.rect.left(), detected.rect.top(), detected.rect.width(), detected.rect.height())
42
+ logging.info("Face detected by mmod")
43
+
44
+ for cascade in self.cascades:
45
+ cascadeClassifier = cv.CascadeClassifier(cv.data.haarcascades + cascade)
46
+ faces = cascadeClassifier.detectMultiScale(img, scaleFactor=1.5, minNeighbors=5) # detect faces
47
+ if len(faces) > 0:
48
+ detected = faces[0]
49
+ logging.info(f"Face detected by {cascade}")
50
+ break
51
+
52
+ if detected is not None: # if face detected
53
+ x, y, w, h = detected # grab first face
54
+ padW = int(self.paddingBy * w) # get padding width
55
+ padH = int(self.paddingBy * h) # get padding height
56
+ imgH, imgW, _ = img.shape # get image dims
57
+ x = max(0, x - padW)
58
+ y = max(0, y - padH)
59
+ w = min(imgW - x, w + 2 * padW)
60
+ h = min(imgH - y, h + 2 * padH)
61
+ x = max(0, x - (w - detected[2]) // 2) # center the face horizontally
62
+ y = max(0, y - (h - detected[3]) // 2) # center the face vertically
63
+ face = img[y:y+h, x:x+w] # crop face
64
+ return face
65
+
66
+ return None
gradcam.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import ViTFeatureExtractor, ViTForImageClassification
2
+ import warnings
3
+ from torchvision import transforms
4
+ from datasets import load_dataset
5
+ from pytorch_grad_cam import run_dff_on_image, GradCAM
6
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
7
+ from pytorch_grad_cam.utils.image import show_cam_on_image
8
+ from PIL import Image
9
+ import numpy as np
10
+ import cv2 as cv
11
+ import torch
12
+ from typing import List, Callable, Optional
13
+ import logging
14
+ from face_grab import FaceGrabber
15
+
16
+ # original borrowed from https://github.com/jacobgil/pytorch-grad-cam/blob/master/tutorials/HuggingFace.ipynb
17
+ # thanks @jacobgil
18
+ # further mods beyond this commit by @simonSlamka
19
+
20
+ warnings.filterwarnings("ignore")
21
+
22
+ logging.basicConfig(level=logging.INFO)
23
+
24
+ class HuggingfaceToTensorModelWrapper(torch.nn.Module):
25
+ def __init__(self, model):
26
+ super(HuggingfaceToTensorModelWrapper, self).__init__()
27
+ self.model = model
28
+
29
+ def forward(self, x):
30
+ return self.model(x).logits
31
+
32
+ class GradCam():
33
+ def __init__(self):
34
+ pass
35
+
36
+ def category_name_to_index(model, category_name):
37
+ name_to_index = dict((v, k) for k, v in model.config.id2label.items())
38
+ return name_to_index[category_name]
39
+
40
+ def run_grad_cam_on_image(model: torch.nn.Module,
41
+ target_layer: torch.nn.Module,
42
+ targets_for_gradcam: List[Callable],
43
+ reshape_transform: Optional[Callable],
44
+ input_tensor: torch.nn.Module,
45
+ input_image: Image,
46
+ method: Callable=GradCAM,
47
+ threshold: float=0.5):
48
+ with method(model=HuggingfaceToTensorModelWrapper(model),
49
+ target_layers=[target_layer],
50
+ reshape_transform=reshape_transform) as cam:
51
+
52
+ # Replicate the tensor for each of the categories we want to create Grad-CAM for:
53
+ repeated_tensor = input_tensor[None, :].repeat(len(targets_for_gradcam), 1, 1, 1)
54
+
55
+ batch_results = cam(input_tensor=repeated_tensor,
56
+ targets=targets_for_gradcam)
57
+ results = []
58
+ for grayscale_cam in batch_results:
59
+ grayscale_cam[grayscale_cam < threshold] = 0
60
+ visualization = show_cam_on_image(np.float32(input_image)/255,
61
+ grayscale_cam,
62
+ use_rgb=True)
63
+ # Make it weight less in the notebook:
64
+ visualization = cv.resize(visualization,
65
+ (visualization.shape[1]//2, visualization.shape[0]//2))
66
+ results.append(visualization)
67
+ return np.hstack(results)
68
+
69
+
70
+ def print_top_categories(model, img_tensor, top_k=5):
71
+ logits = model(img_tensor.unsqueeze(0)).logits
72
+ probabilities = torch.nn.functional.softmax(logits, dim=1)
73
+ indices = logits.cpu()[0, :].detach().numpy().argsort()[-top_k :][::-1]
74
+ for i in indices:
75
+ print(f"Predicted class (sorted from most confident) {i}: {model.config.id2label[i]}, confidence: {probabilities[0][i].item()}")
76
+
77
+ def reshape_transform_vit_huggingface(x):
78
+ activations = x[:, 1:, :]
79
+ activations = activations.view(activations.shape[0],
80
+ 14, 14, activations.shape[2])
81
+ activations = activations.transpose(2, 3).transpose(1, 2)
82
+ return activations
83
+
84
+ faceGrabber = FaceGrabber()
85
+
86
+ image = Image.open("Feature-Image-74.jpg").convert("RGB")
87
+ face = faceGrabber.grab_faces(np.array(image))
88
+ if face is not None:
89
+ image = Image.fromarray(face)
90
+
91
+ img_tensor = transforms.ToTensor()(image)
92
+
93
+ model = ViTForImageClassification.from_pretrained("ongkn/attraction-classifier")
94
+ targets_for_gradcam = [ClassifierOutputTarget(category_name_to_index(model, "pos")),
95
+ ClassifierOutputTarget(category_name_to_index(model, "neg"))]
96
+ target_layer_dff = model.vit.layernorm
97
+ target_layer_gradcam = model.vit.encoder.layer[-2].output
98
+ image_resized = image.resize((224, 224))
99
+ tensor_resized = transforms.ToTensor()(image_resized)
100
+
101
+ dff_image = run_dff_on_image(model=model,
102
+ target_layer=target_layer_dff,
103
+ classifier=model.classifier,
104
+ img_pil=image_resized,
105
+ img_tensor=tensor_resized,
106
+ reshape_transform=reshape_transform_vit_huggingface,
107
+ n_components=5,
108
+ top_k=10,
109
+ threshold=0,
110
+ output_size=None) #(500, 500))
111
+ cv.namedWindow("DFF Image", cv.WINDOW_KEEPRATIO)
112
+ cv.imshow("DFF Image", cv.cvtColor(dff_image, cv.COLOR_BGR2RGB))
113
+ cv.resizeWindow("DFF Image", 2500, 700)
114
+ # cv.waitKey(0)
115
+ # cv.destroyAllWindows()
116
+ grad_cam_image = run_grad_cam_on_image(model=model,
117
+ target_layer=target_layer_gradcam,
118
+ targets_for_gradcam=targets_for_gradcam,
119
+ input_tensor=tensor_resized,
120
+ input_image=image_resized,
121
+ reshape_transform=reshape_transform_vit_huggingface,
122
+ threshold=0)
123
+ cv.namedWindow("Grad-CAM Image", cv.WINDOW_KEEPRATIO)
124
+ cv.imshow("Grad-CAM Image", grad_cam_image)
125
+ cv.resizeWindow("Grad-CAM Image", 2000, 1250)
126
+ cv.waitKey(0)
127
+ cv.destroyAllWindows()
128
+ print_top_categories(model, tensor_resized)