import numpy as np import cv2 from PIL import Image import torch from app.model import pth_model_static, cam, pth_processing from app.face_utils import get_box from app.config import DICT_EMO from pytorch_grad_cam.utils.image import show_cam_on_image import mediapipe as mp mp_face_mesh = mp.solutions.face_mesh def preprocess_image_and_predict(inp): inp = np.array(inp) if inp is None: return None, None, None try: h, w = inp.shape[:2] except Exception: return None, None, None with mp_face_mesh.FaceMesh( max_num_faces=1, refine_landmarks=False, min_detection_confidence=0.5, min_tracking_confidence=0.5, ) as face_mesh: results = face_mesh.process(inp) if results.multi_face_landmarks: for fl in results.multi_face_landmarks: startX, startY, endX, endY = get_box(fl, w, h) cur_face = inp[startY:endY, startX:endX] cur_face_n = pth_processing(Image.fromarray(cur_face)) with torch.no_grad(): prediction = ( torch.nn.functional.softmax(pth_model_static(cur_face_n), dim=1) .detach() .numpy()[0] ) confidences = {DICT_EMO[i]: float(prediction[i]) for i in range(7)} grayscale_cam = cam(input_tensor=cur_face_n) grayscale_cam = grayscale_cam[0, :] cur_face_hm = cv2.resize(cur_face,(224,224)) cur_face_hm = np.float32(cur_face_hm) / 255 heatmap = show_cam_on_image(cur_face_hm, grayscale_cam, use_rgb=True) return cur_face, heatmap, confidences