onipot commited on
Commit
c12cb7e
1 Parent(s): 13f24b5

augmented inference

Browse files
Files changed (2) hide show
  1. app.py +33 -21
  2. yolov5/detect.py +38 -5
app.py CHANGED
@@ -22,14 +22,14 @@ from yolov5.detect import predict, load_yolo_model
22
  model, stride, names, pt, jit, onnx, engine = load_yolo_model("face_model.pt", imgsz=[320,320])
23
  age_model_ts = torch.jit.load("age_model.pt")
24
 
25
- roboto_font = load_font(height_px=18)
26
 
27
- def run_yolo(img0):
28
 
29
- #img_path = img
30
- #img0 = Image.open(img_path).convert("RGB")
31
 
32
- img0 = ImageOps.contain(img0, (720,720))
 
 
33
  img0 = ImageOps.exif_transpose(img0)
34
 
35
  draw = ImageDraw.Draw(img0)
@@ -37,7 +37,8 @@ def run_yolo(img0):
37
  predictions = predict(age_model_ts, model,
38
  stride, imgsz=[320, 320],
39
  conf_thres=0.5, iou_thres=0.45,
40
- source=img0
 
41
  )
42
 
43
  detections : list[Detection] = []
@@ -56,21 +57,31 @@ def run_yolo(img0):
56
 
57
  detections.append(det)
58
  draw.rectangle(((det.xmin, det.ymin), (det.xmax, det.ymax)), fill=None, outline=(255,255,255))
59
- draw.rectangle(((det.xmin, det.ymin - 20), (det.xmax, det.ymin)), fill=(255,255,255))
60
- draw.text((det.xmin, det.ymin - 20), det.class_name, fill=(0,0,0), font=roboto_font)
61
 
62
- # img0.save("img.jpg")
 
 
 
 
 
63
  return img0
64
 
65
 
66
- #run_yolo("D:\\Download\\IMG_20220803_153335c.jpg")
67
- #sys.exit(1)
68
- inputs = gr.inputs.Image(type='pil', label="Input Image")
69
- outputs = gr.outputs.Image(type="pil", label="Output Image")
 
 
 
 
 
 
70
 
71
- title = "AgeGuesser"
72
- description = "Guess the age of a person from a facial image!"
73
- article = """<p>A fully automated system based on YOLOv5 and EfficientNet to perform face detection and age estimation in real-time.</p>
 
74
  <p><b>Links</b></p>
75
  <ul>
76
  <li>
@@ -83,9 +94,7 @@ article = """<p>A fully automated system based on YOLOv5 and EfficientNet to per
83
  <a href='https://github.com/ai-hazard/AgeGuesser-train'>Github</a>
84
  </li>
85
  </ul>
86
-
87
  <p>Credits to my dear colleague <a href='https://www.linkedin.com/in/nicola-marvulli-904270136/'>Dott. Nicola Marvulli</a>, we've developed AgeGuesser together as part of two university exams. (Computer Vision + Deep Learning)</p>
88
-
89
  <p>Credits to my dear professors and the <a href='https://sites.google.com/site/cilabuniba/'>CILAB</a> research group</p>
90
  <ul>
91
  <li>
@@ -95,8 +104,11 @@ article = """<p>A fully automated system based on YOLOv5 and EfficientNet to per
95
  <a href='https://sites.google.com/view/gennaro-vessio/home-page'>Prof. Gennaro Vessio</a>
96
  </li>
97
  </ul>
98
- """
 
 
 
 
99
 
100
- examples = [['images/1.jpg'], ['images/2.jpg'], ['images/3.jpg'], ['images/4.jpg'], ['images/5.jpg'], ]
101
 
102
- gr.Interface(run_yolo, inputs, outputs, title=title, description=description, article=article, examples=examples, theme="huggingface").launch(enable_queue=True)
 
22
  model, stride, names, pt, jit, onnx, engine = load_yolo_model("face_model.pt", imgsz=[320,320])
23
  age_model_ts = torch.jit.load("age_model.pt")
24
 
25
+ text_box_height = 22
26
 
27
+ roboto_font = load_font(height_px=text_box_height-2)
28
 
 
 
29
 
30
+ def run_yolo(img0, with_random_augs):
31
+
32
+ img0 = ImageOps.contain(img0, (640,640))
33
  img0 = ImageOps.exif_transpose(img0)
34
 
35
  draw = ImageDraw.Draw(img0)
 
37
  predictions = predict(age_model_ts, model,
38
  stride, imgsz=[320, 320],
39
  conf_thres=0.5, iou_thres=0.45,
40
+ source=img0,
41
+ with_random_augs = with_random_augs
42
  )
43
 
44
  detections : list[Detection] = []
 
57
 
58
  detections.append(det)
59
  draw.rectangle(((det.xmin, det.ymin), (det.xmax, det.ymax)), fill=None, outline=(255,255,255))
 
 
60
 
61
+ text_length = roboto_font.getlength(bbox["class"])
62
+
63
+ rect_center = (det.xmin + det.xmax - text_length) // 2
64
+ draw.rectangle(((rect_center, det.ymin), (rect_center + text_length, det.ymin + text_box_height)), fill=(255,255,255))
65
+ draw.text((rect_center, det.ymin), det.class_name, fill=(0,0,0), font=roboto_font)
66
+
67
  return img0
68
 
69
 
70
+ """ img = Image.open("D:\\Download\\IMG_20220803_153335c2.jpg").convert("RGB")
71
+ run_yolo(img)
72
+ sys.exit(1) """
73
+
74
+
75
+ def main():
76
+ input = gr.Image(type='pil', label="Input Image")
77
+ outputs = gr.Image(type="pil", label="Output Image", interactive=False)
78
+
79
+ augment_preds = gr.Checkbox(label="Apply random augmentations")
80
 
81
+ title = "AgeGuesser"
82
+ description = "Guess the age of a person from a facial image!"
83
+ article = """
84
+ <p>A fully automated system based on YOLOv5 and EfficientNet to perform face detection and age estimation in real-time.</p>
85
  <p><b>Links</b></p>
86
  <ul>
87
  <li>
 
94
  <a href='https://github.com/ai-hazard/AgeGuesser-train'>Github</a>
95
  </li>
96
  </ul>
 
97
  <p>Credits to my dear colleague <a href='https://www.linkedin.com/in/nicola-marvulli-904270136/'>Dott. Nicola Marvulli</a>, we've developed AgeGuesser together as part of two university exams. (Computer Vision + Deep Learning)</p>
 
98
  <p>Credits to my dear professors and the <a href='https://sites.google.com/site/cilabuniba/'>CILAB</a> research group</p>
99
  <ul>
100
  <li>
 
104
  <a href='https://sites.google.com/view/gennaro-vessio/home-page'>Prof. Gennaro Vessio</a>
105
  </li>
106
  </ul>
107
+ """
108
+
109
+ examples = [['images/1.jpg'], ['images/2.jpg'], ['images/3.jpg'], ['images/4.jpg'], ['images/5.jpg'], ]
110
+
111
+ gr.Interface(run_yolo, [input, augment_preds], outputs, title=title, description=description, article=article, examples=examples, theme="huggingface").launch(enable_queue=True, ) # share=True
112
 
 
113
 
114
+ main()
yolov5/detect.py CHANGED
@@ -21,6 +21,7 @@ from yolov5.utils.general import (check_img_size,
21
  from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
22
  from yolov5.models.common import DetectMultiBackend
23
  import torchvision
 
24
 
25
  test_transforms = torchvision.transforms.Compose([
26
  torchvision.transforms.ToPILImage(),
@@ -29,6 +30,18 @@ test_transforms = torchvision.transforms.Compose([
29
  torchvision.transforms.Resize((224, 224)),
30
  ])
31
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
33
  # Load model
34
  device = select_device(device)
@@ -62,7 +75,7 @@ def predict(
62
  augment=False, # augmented inference
63
  visualize=False, # visualize features
64
  half=False, # use FP16 half-precision inference
65
-
66
  ):
67
 
68
  im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
@@ -82,6 +95,8 @@ def predict(
82
 
83
  # Process predictions
84
  preds = []
 
 
85
  for i, det in enumerate(pred): # per image
86
 
87
  # im0 = im0.copy()
@@ -89,16 +104,34 @@ def predict(
89
  if len(det):
90
  # Rescale boxes from img_size to im0 size
91
  det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
 
 
92
 
93
- for *xyxy, conf, cls in reversed(det):
94
  face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
95
  face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
 
 
96
  im = test_transforms(face_img).unsqueeze_(0)
 
97
  with torch.no_grad():
98
  y = age_model(im)
99
 
100
- age = y[0]
101
-
102
- preds.append({"class": str(int(age)), "xmin": int(xyxy[0]), "ymin": int(xyxy[1]), "xmax": int(xyxy[2]),"ymax": int(xyxy[3]), "conf": float(conf)})
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  return preds
 
21
  from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
22
  from yolov5.models.common import DetectMultiBackend
23
  import torchvision
24
+ import numpy as np
25
 
26
  test_transforms = torchvision.transforms.Compose([
27
  torchvision.transforms.ToPILImage(),
 
30
  torchvision.transforms.Resize((224, 224)),
31
  ])
32
 
33
+
34
+ test_random_transforms = torchvision.transforms.Compose([
35
+ torchvision.transforms.ToPILImage(),
36
+ torchvision.transforms.transforms.ToTensor(),
37
+ torchvision.transforms.RandomRotation((-15, 15)),
38
+ torchvision.transforms.RandomGrayscale(p=0.4),
39
+ torchvision.transforms.RandomPerspective(0.4, p=0.4),
40
+ torchvision.transforms.RandomAdjustSharpness(2),
41
+ torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
42
+ torchvision.transforms.Resize((224, 224)),
43
+ ])
44
+
45
  def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
46
  # Load model
47
  device = select_device(device)
 
75
  augment=False, # augmented inference
76
  visualize=False, # visualize features
77
  half=False, # use FP16 half-precision inference
78
+ with_random_augs = False
79
  ):
80
 
81
  im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
 
95
 
96
  # Process predictions
97
  preds = []
98
+
99
+
100
  for i, det in enumerate(pred): # per image
101
 
102
  # im0 = im0.copy()
 
104
  if len(det):
105
  # Rescale boxes from img_size to im0 size
106
  det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
107
+
108
+ for *xyxy, conf, _ in reversed(det):
109
 
110
+ ages = []
111
  face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
112
  face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
113
+
114
+ # inference with original crop
115
  im = test_transforms(face_img).unsqueeze_(0)
116
+
117
  with torch.no_grad():
118
  y = age_model(im)
119
 
120
+ age = y[0].item()
121
+ ages.append(age)
122
+
123
+ if with_random_augs:
124
+ # inference with random augmentations
125
+ for k in range(12):
126
+ im = test_random_transforms(face_img).unsqueeze_(0)
127
+
128
+ with torch.no_grad():
129
+ y = age_model(im)
130
+
131
+ age = y[0].item()
132
+
133
+ ages.append(age)
134
+
135
+ preds.append({"class": str(int( np.mean(np.array(ages), axis=0))), "xmin": int(xyxy[0]), "ymin": int(xyxy[1]), "xmax": int(xyxy[2]),"ymax": int(xyxy[3]), "conf": float(conf)})
136
 
137
  return preds