# YOLOv5 🚀 by Ultralytics, GPL-3.0 license

import os
import sys
from pathlib import Path
import cv2

FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH


ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

import torch
from yolov5.utils.torch_utils import select_device, time_sync
from yolov5.utils.plots import Annotator, colors, save_one_box
from yolov5.utils.general import (check_img_size,
                        increment_path, non_max_suppression, scale_coords, xyxy2xywh)
from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
from yolov5.models.common import DetectMultiBackend
import torchvision
import numpy as np

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    torchvision.transforms.Resize((224, 224)),
])


test_random_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.transforms.ToTensor(),
    torchvision.transforms.RandomRotation((-15, 15)),
    torchvision.transforms.RandomGrayscale(p=0.4),
    torchvision.transforms.RandomPerspective(0.4, p=0.4),
    torchvision.transforms.RandomAdjustSharpness(2),
    torchvision.transforms.RandomAffine(degrees=0, translate=None, scale=(0.9, 1.0)),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    torchvision.transforms.Resize((224, 224)),
])

def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=False, data=ROOT / 'data/coco128.yaml')
    stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    half = False
    # Half
    half &= (pt or jit or onnx or engine) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
    if pt or jit:
        model.model.half() if half else model.model.float()
    model.warmup(imgsz=(1, 3, *imgsz), half=half)
    
    return model, stride, names, pt, jit, onnx, engine


def predict(

        age_model,
        model,  # model.pt path(s)
        stride, 
        source=None,  # PIL Image
        imgsz=(640, 640),  # inference size (height, width)
        conf_thres=0.5,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        half=False,  # use FP16 half-precision inference
        with_random_augs = False
        ):

    im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
    
    im = torch.from_numpy(im).to(device)
    im = im.half() if half else im.float()  # uint8 to fp16/32
    im /= 255  # 0 - 255 to 0.0 - 1.0
    if len(im.shape) == 3:
        im = im[None]  # expand for batch dim
   
    # Inference
    visualize = False
    pred = model(im, augment=augment, visualize=visualize)

    # NMS
    pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

    # Process predictions
    preds = []

    for i, det in enumerate(pred):  # per image
        
        # im0 = im0.copy()
        
        if len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
            
            for *xyxy, conf, _ in reversed(det):

                ages = []
                face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
                face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                
                # inference with original crop
                im = test_transforms(face_img).unsqueeze_(0)
                
                with torch.no_grad():
                    y = age_model(im)
                
                age = y[0].item()
                ages.append(age)

                if with_random_augs:
                    # inference with random augmentations
                    for k in range(12):
                        im = test_random_transforms(face_img).unsqueeze_(0)
                    
                        with torch.no_grad():
                            y = age_model(im)
                        
                        age = y[0].item()

                        ages.append(age)

                preds.append({"class": str(int( np.mean(np.array(ages), axis=0))), "xmin": int(xyxy[0]), "ymin": int(xyxy[1]), "xmax": int(xyxy[2]),"ymax": int(xyxy[3]), "conf": float(conf)})
                
    return preds