Spaces:
Sleeping
Sleeping
from typing import Dict, List, Optional, Tuple | |
import pandas as pd | |
import torch | |
from mivolo.data.data_reader import PictureInfo | |
from mivolo.data.misc import assign_faces, box_iou | |
from mivolo.model.yolo_detector import PersonAndFaceResult | |
def save_annotations(images: List[PictureInfo], images_dir: str, out_file: str): | |
def get_age_str(age: Optional[str]) -> str: | |
age = "-1" if age is None else age.replace("(", "").replace(")", "").replace(" ", "").replace(",", ";") | |
return age | |
def get_gender_str(gender: Optional[str]) -> str: | |
gender = "-1" if gender is None else gender | |
return gender | |
headers = [ | |
"img_name", | |
"age", | |
"gender", | |
"face_x0", | |
"face_y0", | |
"face_x1", | |
"face_y1", | |
"person_x0", | |
"person_y0", | |
"person_x1", | |
"person_y1", | |
] | |
output_data = [] | |
for image_info in images: | |
relative_image_path = image_info.image_path.replace(f"{images_dir}/", "") | |
face_x0, face_y0, face_x1, face_y1 = image_info.bbox | |
p_x0, p_y0, p_x1, p_y1 = image_info.person_bbox | |
output_data.append( | |
{ | |
"img_name": relative_image_path, | |
"age": get_age_str(image_info.age), | |
"gender": get_gender_str(image_info.gender), | |
"face_x0": face_x0, | |
"face_y0": face_y0, | |
"face_x1": face_x1, | |
"face_y1": face_y1, | |
"person_x0": p_x0, | |
"person_y0": p_y0, | |
"person_x1": p_x1, | |
"person_y1": p_y1, | |
} | |
) | |
output_df = pd.DataFrame(output_data, columns=headers) | |
output_df.to_csv(out_file, sep=",", index=False) | |
print(f"Saved annotations for {len(images)} images to {out_file}") | |
def get_main_face( | |
detected_objects: PersonAndFaceResult, coarse_bbox: Optional[List[int]] = None, coarse_thresh: float = 0.2 | |
) -> Tuple[Optional[List[int]], List[int]]: | |
""" | |
Returns: | |
main_bbox (Optional[List[int]]): The most cenetered face bbox | |
other_bboxes (List[int]): indexes of other faces | |
""" | |
face_bboxes_inds: List[int] = detected_objects.get_bboxes_inds("face") | |
if len(face_bboxes_inds) == 0: | |
return None, [] | |
# sort found faces | |
face_bboxes_inds = sorted(face_bboxes_inds, key=lambda bb_ind: detected_objects.get_distance_to_center(bb_ind)) | |
most_centered_bbox_ind = face_bboxes_inds[0] | |
main_bbox = detected_objects.get_bbox_by_ind(most_centered_bbox_ind).cpu().numpy().tolist() | |
iou_matrix: List[float] = [1.0] + [0.0] * (len(face_bboxes_inds) - 1) | |
if coarse_bbox is not None: | |
# calc iou between coarse_bbox and found bboxes | |
found_bboxes: List[torch.tensor] = [ | |
detected_objects.get_bbox_by_ind(other_ind) for other_ind in face_bboxes_inds | |
] | |
iou_matrix = ( | |
box_iou(torch.stack([torch.tensor(coarse_bbox)]), torch.stack(found_bboxes).cpu()).numpy()[0].tolist() | |
) | |
if iou_matrix[0] < coarse_thresh: | |
# to avoid fp detections | |
main_bbox = None | |
other_bboxes = [ind for i, ind in enumerate(face_bboxes_inds[1:]) if iou_matrix[i] < coarse_thresh] | |
else: | |
other_bboxes = face_bboxes_inds[1:] | |
return main_bbox, other_bboxes | |
def get_additional_bboxes( | |
detected_objects: PersonAndFaceResult, other_bboxes_inds: List[int], image_path: str, **kwargs | |
) -> List[PictureInfo]: | |
is_face = True if "is_person" not in kwargs else False | |
is_person = False if "is_person" not in kwargs else True | |
additional_data: List[PictureInfo] = [] | |
# extend other faces | |
for other_ind in other_bboxes_inds: | |
other_box: List[int] = detected_objects.get_bbox_by_ind(other_ind).cpu().numpy().tolist() | |
if is_face: | |
additional_data.append(PictureInfo(image_path, None, None, other_box)) | |
elif is_person: | |
additional_data.append(PictureInfo(image_path, None, None, person_bbox=other_box)) | |
return additional_data | |
def associate_persons(face_bboxes: List[torch.tensor], detected_objects: PersonAndFaceResult): | |
person_bboxes_inds: List[int] = detected_objects.get_bboxes_inds("person") | |
person_bboxes: List[torch.tensor] = [detected_objects.get_bbox_by_ind(ind) for ind in person_bboxes_inds] | |
face_to_person_map: Dict[int, Optional[int]] = {ind: None for ind in range(len(face_bboxes))} | |
if len(person_bboxes) == 0: | |
return face_to_person_map, [] | |
assigned_faces, unassigned_persons_inds = assign_faces(person_bboxes, face_bboxes) | |
for face_ind, person_ind in enumerate(assigned_faces): | |
person_ind = person_bboxes_inds[person_ind] if person_ind is not None else None | |
face_to_person_map[face_ind] = person_ind | |
unassigned_persons_inds = [person_bboxes_inds[person_ind] for person_ind in unassigned_persons_inds] | |
return face_to_person_map, unassigned_persons_inds | |
def assign_persons( | |
faces_info: List[PictureInfo], faces_persons_map: Dict[int, int], detected_objects: PersonAndFaceResult | |
): | |
for face_ind, person_ind in faces_persons_map.items(): | |
if person_ind is None: | |
continue | |
person_bbox = detected_objects.get_bbox_by_ind(person_ind).cpu().numpy().tolist() | |
faces_info[face_ind].person_bbox = person_bbox | |