Spaces:
Sleeping
Sleeping
import argparse | |
import os | |
from collections import defaultdict | |
from typing import Dict, List, Optional | |
import cv2 | |
import pandas as pd | |
import tqdm | |
from mivolo.data.data_reader import PictureInfo, get_all_files | |
from mivolo.model.yolo_detector import Detector, PersonAndFaceResult | |
from preparation_utils import get_additional_bboxes, get_main_face, save_annotations | |
def read_adience_annotations(annotations_files): | |
annotations_per_image = {} | |
stat_per_fold = defaultdict(int) | |
cols = ["user_id", "original_image", "face_id", "age", "gender"] | |
for file in annotations_files: | |
fold_name = os.path.basename(file).split(".")[0] | |
df = pd.read_csv(file, sep="\t", usecols=cols) | |
for index, row in df.iterrows(): | |
face_id, img_name, user_id = row["face_id"], row["original_image"], row["user_id"] | |
aligned_face_path = f"faces/{user_id}/coarse_tilt_aligned_face.{face_id}.{img_name}" | |
age, gender = row["age"], row["gender"] | |
gender = gender.upper() if isinstance(gender, str) and gender != "u" else None | |
age = age if isinstance(age, str) else None | |
annotations_per_image[aligned_face_path] = {"age": age, "gender": gender, "fold": fold_name} | |
stat_per_fold[fold_name] += 1 | |
print(f"Per fold images: {stat_per_fold}") | |
return annotations_per_image | |
def read_data(images_dir, annotations_files, data_dir) -> List[PictureInfo]: | |
dataset_pictures: List[PictureInfo] = [] | |
all_images = get_all_files(images_dir) | |
annotations_per_file = read_adience_annotations(annotations_files) | |
total, missed = 0, 0 | |
stat_per_gender: Dict[str, int] = defaultdict(int) | |
missed_gender, missed_age, missed_gender_and_age = 0, 0, 0 | |
stat_per_ages: Dict[str, int] = defaultdict(int) | |
# final age classes: '0;2', "4;6", "8;12", "15;20", "25;32", "38;43", "48;53", "60;100" | |
age_map = { | |
"2": "(0, 2)", | |
"3": "(0, 2)", | |
"13": "(8, 12)", | |
"(8, 23)": "(8, 12)", | |
"22": "(15, 20)", | |
"23": "(25, 32)", | |
"29": "(25, 32)", | |
"(27, 32)": "(25, 32)", | |
"32": "(25, 32)", | |
"34": "(25, 32)", | |
"35": "(25, 32)", | |
"36": "(38, 43)", | |
"(38, 42)": "(38, 43)", | |
"(38, 48)": "(38, 43)", | |
"42": "(38, 43)", | |
"45": "(38, 43)", | |
"46": "(48, 53)", | |
"55": "(48, 53)", | |
"56": "(48, 53)", | |
"57": "(60, 100)", | |
"58": "(60, 100)", | |
} | |
for image_path in all_images: | |
total += 1 | |
relative_path = image_path.replace(f"{data_dir}/", "") | |
if relative_path not in annotations_per_file: | |
missed += 1 | |
print("Can not find annotation for ", relative_path) | |
else: | |
annot = annotations_per_file[relative_path] | |
age, gender = annot["age"], annot["gender"] | |
if gender is None and age is not None: | |
missed_gender += 1 | |
elif age is None and gender is not None: | |
missed_age += 1 | |
elif gender is None and age is None: | |
missed_gender_and_age += 1 | |
# skip such image | |
continue | |
if gender is not None: | |
stat_per_gender[gender] += 1 | |
if age is not None: | |
age = age_map[age] if age in age_map else age | |
stat_per_ages[age] += 1 | |
dataset_pictures.append(PictureInfo(image_path, age, gender)) | |
print(f"Missed annots for images: {missed}/{total}") | |
print(f"Missed genders: {missed_gender}") | |
print(f"Missed ages: {missed_age}") | |
print(f"Missed ages and gender: {missed_gender_and_age}") | |
print(f"\nPer gender images: {stat_per_gender}") | |
ages = list(stat_per_ages.keys()) | |
print(f"Per ages categories ({len(ages)} cats) :") | |
ages = sorted(ages, key=lambda x: int(x.split("(")[-1].split(",")[0].strip())) | |
for age in ages: | |
print(f"Age: {age} Count: {stat_per_ages[age]}") | |
return dataset_pictures | |
def main(faces_dir: str, annotations: List[str], data_dir: str, detector_cfg: dict = None): | |
""" | |
Generate a .txt annotation file with columns: | |
["img_name", "age", "gender", | |
"face_x0", "face_y0", "face_x1", "face_y1", | |
"person_x0", "person_y0", "person_x1", "person_y1"] | |
All person bboxes here will be set to [-1, -1, -1, -1] | |
If detector_cfg is set, for each face bbox will be refined using detector. | |
Also, other detected faces wil be written to txt file (needed for further preprocessing) | |
""" | |
# out directory for annotations | |
out_dir = os.path.join(data_dir, "annotations") | |
os.makedirs(out_dir, exist_ok=True) | |
# load annotations | |
images: List[PictureInfo] = read_data(faces_dir, annotations, data_dir) | |
if detector_cfg: | |
# detect faces with yolo detector | |
faces_not_found, images_with_other_faces = 0, 0 | |
other_faces: List[PictureInfo] = [] | |
detector_weights, device = detector_cfg["weights"], detector_cfg["device"] | |
detector = Detector(detector_weights, device, verbose=False, conf_thresh=0.1, iou_thresh=0.2) | |
for image_info in tqdm.tqdm(images, desc="Detecting faces: "): | |
cv_im = cv2.imread(image_info.image_path) | |
im_h, im_w = cv_im.shape[:2] | |
detected_objects: PersonAndFaceResult = detector.predict(cv_im) | |
main_bbox, other_bboxes_inds = get_main_face(detected_objects) | |
if main_bbox is None: | |
# use a full image as face bbox | |
faces_not_found += 1 | |
image_info.bbox = [0, 0, im_w, im_h] | |
else: | |
image_info.bbox = main_bbox | |
if len(other_bboxes_inds): | |
images_with_other_faces += 1 | |
additional_faces = get_additional_bboxes(detected_objects, other_bboxes_inds, image_info.image_path) | |
other_faces.extend(additional_faces) | |
print(f"Faces not detected: {faces_not_found}/{len(images)}") | |
print(f"Images with other faces: {images_with_other_faces}/{len(images)}") | |
print(f"Other faces: {len(other_faces)}") | |
images = images + other_faces | |
else: | |
# use a full image as face bbox | |
for image_info in tqdm.tqdm(images, desc="Collect face bboxes: "): | |
cv_im = cv2.imread(image_info.image_path) | |
im_h, im_w = cv_im.shape[:2] | |
image_info.bbox = [0, 0, im_w, im_h] # xyxy | |
save_annotations(images, faces_dir, out_file=os.path.join(out_dir, "adience_annotations.csv")) | |
def get_parser(): | |
parser = argparse.ArgumentParser(description="Adience") | |
parser.add_argument( | |
"--dataset_path", | |
default="data/adience", | |
type=str, | |
required=True, | |
help="path to dataset with faces/ and fold_{i}_data.txt files", | |
) | |
parser.add_argument( | |
"--detector_weights", default=None, type=str, required=False, help="path to face and person detector" | |
) | |
parser.add_argument("--device", default="cuda:0", type=str, required=False, help="device to inference detector") | |
return parser | |
if __name__ == "__main__": | |
parser = get_parser() | |
args = parser.parse_args() | |
data_dir = args.dataset_path | |
faces_dir = os.path.join(data_dir, "faces") | |
if data_dir[-1] == "/": | |
data_dir = data_dir[:-1] | |
annotations = [ | |
os.path.join(data_dir, "fold_0_data.txt"), | |
os.path.join(data_dir, "fold_1_data.txt"), | |
os.path.join(data_dir, "fold_2_data.txt"), | |
os.path.join(data_dir, "fold_3_data.txt"), | |
os.path.join(data_dir, "fold_4_data.txt"), | |
] | |
detector_cfg: Optional[Dict[str, str]] = None | |
if args.detector_weights is not None: | |
detector_cfg = {"weights": args.detector_weights, "device": "cuda:0"} | |
main(faces_dir, annotations, data_dir, detector_cfg) | |