#!/usr/bin/env python import pathlib import os import huggingface_hub import numpy as np import argparse from dataclasses import dataclass from mivolo.predictor import Predictor from PIL import Image @dataclass class Cfg: detector_weights: str checkpoint: str device: str = "cpu" with_persons: bool = True disable_faces: bool = False draw: bool = True def load_models(): detector_path = huggingface_hub.hf_hub_download('iitolstykh/demo_yolov8_detector', 'yolov8x_person_face.pt') age_gender_path_v1 = 'age_estimator/MiVOLO-main/models/model_imdb_cross_person_4.22_99.46.pth.tar' predictor_cfg_v1 = Cfg(detector_path, age_gender_path_v1) predictor_v1 = Predictor(predictor_cfg_v1) return predictor_v1 def detect(image: np.ndarray, score_threshold: float, iou_threshold: float, mode: str, predictor: Predictor) -> np.ndarray: predictor.detector.detector_kwargs['conf'] = score_threshold predictor.detector.detector_kwargs['iou'] = iou_threshold if mode == "Use persons and faces": use_persons = True disable_faces = False elif mode == "Use persons only": use_persons = True disable_faces = True elif mode == "Use faces only": use_persons = False disable_faces = False predictor.age_gender_model.meta.use_persons = use_persons predictor.age_gender_model.meta.disable_faces = disable_faces image = image[:, :, ::-1] # RGB -> BGR for OpenCV detected_objects, out_im = predictor.recognize(image) return out_im[:, :, ::-1] # BGR -> RGB def load_image(image_path: str): image = Image.open(image_path) image_np = np.array(image) return image_np def main(args): # Load models predictor_v1 = load_models() # Set parameters from args score_threshold = args.score_threshold iou_threshold = args.iou_threshold mode = args.mode # Load and process image image_np = load_image(args.image_path) # Predict with model result = detect(image_np, score_threshold, iou_threshold, mode, predictor_v1) output_image = Image.fromarray(result) output_image.save(args.output_path) output_image.show() if __name__ == "__main__": parser = argparse.ArgumentParser(description='Object Detection with YOLOv8 and Age/Gender Prediction') parser.add_argument('--image_path', type=str, required=True, help='Path to the input image') parser.add_argument('--output_path', type=str, default='output_image.jpg', help='Path to save the output image') parser.add_argument('--score_threshold', type=float, default=0.4, help='Score threshold for detection') parser.add_argument('--iou_threshold', type=float, default=0.7, help='IoU threshold for detection') parser.add_argument('--mode', type=str, choices=["Use persons and faces", "Use persons only", "Use faces only"], default="Use persons and faces", help='Detection mode') args = parser.parse_args() main(args)