File size: 3,058 Bytes
bf53f45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
import pathlib
import os
import huggingface_hub
import numpy as np
import argparse
from dataclasses import dataclass
from mivolo.predictor import Predictor
from PIL import Image

@dataclass
class Cfg:
    detector_weights: str
    checkpoint: str
    device: str = "cpu"
    with_persons: bool = True
    disable_faces: bool = False
    draw: bool = True
  

def load_models():
    detector_path = huggingface_hub.hf_hub_download('iitolstykh/demo_yolov8_detector',
                                                    'yolov8x_person_face.pt')

    age_gender_path_v1 = 'age_estimator/MiVOLO-main/models/model_imdb_cross_person_4.22_99.46.pth.tar'
    predictor_cfg_v1 = Cfg(detector_path, age_gender_path_v1)

    predictor_v1 = Predictor(predictor_cfg_v1)
    
    return predictor_v1
    
def detect(image: np.ndarray, score_threshold: float, iou_threshold: float, mode: str, predictor: Predictor) -> np.ndarray:
    predictor.detector.detector_kwargs['conf'] = score_threshold
    predictor.detector.detector_kwargs['iou'] = iou_threshold

    if mode == "Use persons and faces":
        use_persons = True
        disable_faces = False
    elif mode == "Use persons only":
        use_persons = True
        disable_faces = True
    elif mode == "Use faces only":
        use_persons = False
        disable_faces = False

    predictor.age_gender_model.meta.use_persons = use_persons
    predictor.age_gender_model.meta.disable_faces = disable_faces

    image = image[:, :, ::-1]  # RGB -> BGR for OpenCV
    detected_objects, out_im = predictor.recognize(image)
    return out_im[:, :, ::-1]  # BGR -> RGB
    
def load_image(image_path: str):
    image = Image.open(image_path)
    image_np = np.array(image)
    return image_np

def main(args):
    # Load models
    predictor_v1 = load_models()

    # Set parameters from args
    score_threshold = args.score_threshold
    iou_threshold = args.iou_threshold
    mode = args.mode

    # Load and process image
    image_np = load_image(args.image_path)

    # Predict with model
    result = detect(image_np, score_threshold, iou_threshold, mode, predictor_v1)

    output_image = Image.fromarray(result)
    output_image.save(args.output_path)
    output_image.show()

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Object Detection with YOLOv8 and Age/Gender Prediction')
    parser.add_argument('--image_path', type=str, required=True, help='Path to the input image')
    parser.add_argument('--output_path', type=str, default='output_image.jpg', help='Path to save the output image')
    parser.add_argument('--score_threshold', type=float, default=0.4, help='Score threshold for detection')
    parser.add_argument('--iou_threshold', type=float, default=0.7, help='IoU threshold for detection')
    parser.add_argument('--mode', type=str, choices=["Use persons and faces", "Use persons only", "Use faces only"], 
                        default="Use persons and faces", help='Detection mode')

    args = parser.parse_args()
    main(args)