from typing import Dict, List, Optional, Tuple import pandas as pd import torch from mivolo.data.data_reader import PictureInfo from mivolo.data.misc import assign_faces, box_iou from mivolo.model.yolo_detector import PersonAndFaceResult def save_annotations(images: List[PictureInfo], images_dir: str, out_file: str): def get_age_str(age: Optional[str]) -> str: age = "-1" if age is None else age.replace("(", "").replace(")", "").replace(" ", "").replace(",", ";") return age def get_gender_str(gender: Optional[str]) -> str: gender = "-1" if gender is None else gender return gender headers = [ "img_name", "age", "gender", "face_x0", "face_y0", "face_x1", "face_y1", "person_x0", "person_y0", "person_x1", "person_y1", ] output_data = [] for image_info in images: relative_image_path = image_info.image_path.replace(f"{images_dir}/", "") face_x0, face_y0, face_x1, face_y1 = image_info.bbox p_x0, p_y0, p_x1, p_y1 = image_info.person_bbox output_data.append( { "img_name": relative_image_path, "age": get_age_str(image_info.age), "gender": get_gender_str(image_info.gender), "face_x0": face_x0, "face_y0": face_y0, "face_x1": face_x1, "face_y1": face_y1, "person_x0": p_x0, "person_y0": p_y0, "person_x1": p_x1, "person_y1": p_y1, } ) output_df = pd.DataFrame(output_data, columns=headers) output_df.to_csv(out_file, sep=",", index=False) print(f"Saved annotations for {len(images)} images to {out_file}") def get_main_face( detected_objects: PersonAndFaceResult, coarse_bbox: Optional[List[int]] = None, coarse_thresh: float = 0.2 ) -> Tuple[Optional[List[int]], List[int]]: """ Returns: main_bbox (Optional[List[int]]): The most cenetered face bbox other_bboxes (List[int]): indexes of other faces """ face_bboxes_inds: List[int] = detected_objects.get_bboxes_inds("face") if len(face_bboxes_inds) == 0: return None, [] # sort found faces face_bboxes_inds = sorted(face_bboxes_inds, key=lambda bb_ind: detected_objects.get_distance_to_center(bb_ind)) most_centered_bbox_ind = face_bboxes_inds[0] main_bbox = detected_objects.get_bbox_by_ind(most_centered_bbox_ind).cpu().numpy().tolist() iou_matrix: List[float] = [1.0] + [0.0] * (len(face_bboxes_inds) - 1) if coarse_bbox is not None: # calc iou between coarse_bbox and found bboxes found_bboxes: List[torch.tensor] = [ detected_objects.get_bbox_by_ind(other_ind) for other_ind in face_bboxes_inds ] iou_matrix = ( box_iou(torch.stack([torch.tensor(coarse_bbox)]), torch.stack(found_bboxes).cpu()).numpy()[0].tolist() ) if iou_matrix[0] < coarse_thresh: # to avoid fp detections main_bbox = None other_bboxes = [ind for i, ind in enumerate(face_bboxes_inds[1:]) if iou_matrix[i] < coarse_thresh] else: other_bboxes = face_bboxes_inds[1:] return main_bbox, other_bboxes def get_additional_bboxes( detected_objects: PersonAndFaceResult, other_bboxes_inds: List[int], image_path: str, **kwargs ) -> List[PictureInfo]: is_face = True if "is_person" not in kwargs else False is_person = False if "is_person" not in kwargs else True additional_data: List[PictureInfo] = [] # extend other faces for other_ind in other_bboxes_inds: other_box: List[int] = detected_objects.get_bbox_by_ind(other_ind).cpu().numpy().tolist() if is_face: additional_data.append(PictureInfo(image_path, None, None, other_box)) elif is_person: additional_data.append(PictureInfo(image_path, None, None, person_bbox=other_box)) return additional_data def associate_persons(face_bboxes: List[torch.tensor], detected_objects: PersonAndFaceResult): person_bboxes_inds: List[int] = detected_objects.get_bboxes_inds("person") person_bboxes: List[torch.tensor] = [detected_objects.get_bbox_by_ind(ind) for ind in person_bboxes_inds] face_to_person_map: Dict[int, Optional[int]] = {ind: None for ind in range(len(face_bboxes))} if len(person_bboxes) == 0: return face_to_person_map, [] assigned_faces, unassigned_persons_inds = assign_faces(person_bboxes, face_bboxes) for face_ind, person_ind in enumerate(assigned_faces): person_ind = person_bboxes_inds[person_ind] if person_ind is not None else None face_to_person_map[face_ind] = person_ind unassigned_persons_inds = [person_bboxes_inds[person_ind] for person_ind in unassigned_persons_inds] return face_to_person_map, unassigned_persons_inds def assign_persons( faces_info: List[PictureInfo], faces_persons_map: Dict[int, int], detected_objects: PersonAndFaceResult ): for face_ind, person_ind in faces_persons_map.items(): if person_ind is None: continue person_bbox = detected_objects.get_bbox_by_ind(person_ind).cpu().numpy().tolist() faces_info[face_ind].person_bbox = person_bbox