|
import cv2 |
|
from transformers import ViTImageProcessor, ViTForImageClassification, AutoModelForImageClassification, AutoImageProcessor |
|
import torch |
|
import numpy as np |
|
|
|
torch.backends.cudnn.benchmark = True |
|
|
|
import urllib.request |
|
path = 'https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml' |
|
urllib.request.urlretrieve(path, path.split('/')[-1]) |
|
|
|
face_cascade = cv2.CascadeClassifier('./haarcascade_frontalface_default.xml') |
|
|
|
class Base: |
|
size = 224 |
|
scale = 1. / 255. |
|
mean = np.array( [ .5 ] * 3 ).reshape( 1, 1, 1, -1) |
|
std = np.array( [ .5 ] * 3 ).reshape( 1, 1, 1, -1) |
|
resample = 2 |
|
|
|
class ethnicityConfig(Base): |
|
size = 384 |
|
|
|
class maskConfig(Base): |
|
resample = 3 |
|
mean = np.array( [ .485 ] * 3 ).reshape( 1, 1, 1, -1) |
|
std = np.array( [ .229 ] * 3 ).reshape( 1, 1, 1, -1) |
|
|
|
|
|
AGE = "nateraw/vit-age-classifier" |
|
GENDER = 'rizvandwiki/gender-classification-2' |
|
ETHNICITY = 'cledoux42/Ethnicity_Test_v003' |
|
MASK = 'DamarJati/Face-Mask-Detection' |
|
BLUR = 'WT-MM/vit-base-blur' |
|
BEARD = 'dima806/beard_face_image_detection' |
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
age_model = ViTForImageClassification.from_pretrained( AGE ).to(device) |
|
gender_model = ViTForImageClassification.from_pretrained( GENDER ).to(device) |
|
beard_model = ViTForImageClassification.from_pretrained( BEARD ).to(device) |
|
blur_model = ViTForImageClassification.from_pretrained( BLUR ).to(device) |
|
|
|
|
|
ethnicity_model= ViTForImageClassification.from_pretrained( ETHNICITY ).to(device) |
|
|
|
|
|
mask_model = AutoModelForImageClassification.from_pretrained( MASK ).to(device) |
|
|
|
|
|
from PIL import Image |
|
def normalize( data, mean, std ): |
|
data = (data - mean ) / std |
|
return data.astype(np.float32) |
|
|
|
def resize( image, size = 224, resample = 2 ): |
|
|
|
|
|
|
|
image = image.resize( (size, size), resample = resample ) |
|
|
|
return np.array( image ) |
|
|
|
def rescale( data, scale = Base.scale ): |
|
return data * scale |
|
|
|
|
|
|
|
|
|
|
|
def ParallelBatchsPredict( data, MODELS, nbatchs = 16 ): |
|
|
|
total = data.shape[0] |
|
|
|
data = np.transpose( data, ( 0, 3, 1, 2 ) ) |
|
count = 0 |
|
batchs = [ [] for i in range(len(MODELS)) ] |
|
for i in range( 0, total, nbatchs ): |
|
batch = data[i:i+nbatchs] |
|
count += batch.shape[0] |
|
with torch.no_grad(): |
|
batch = torch.from_numpy( batch ).to(device) |
|
for _, model in enumerate(MODELS): |
|
logits = model( batch ).logits.softmax(1).argmax(1).tolist() |
|
for x in logits: |
|
batchs[_].append( model.config.id2label[ x ] ) |
|
|
|
assert count == total |
|
return batchs |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def AnalysisFeatures(rawFaces): |
|
|
|
if len(rawFaces) == 0: |
|
return [ [] ]* 6 |
|
baseProcessed = np.array([ resize(x, size = Base.size, resample = Base.resample ) for x in rawFaces]) |
|
baseProcessed = rescale( baseProcessed ) |
|
baseProcessed = normalize( baseProcessed, Base.mean, Base.std ) |
|
|
|
ages, genders, beards, blurs = ParallelBatchsPredict(baseProcessed, [age_model, gender_model, beard_model, blur_model] ) |
|
|
|
EthncityProcessed = np.array([ resize(x, size = ethnicityConfig.size, resample = ethnicityConfig.resample ) for x in rawFaces]) |
|
EthncityProcessed = rescale( EthncityProcessed ) |
|
EthncityProcessed = normalize( EthncityProcessed, ethnicityConfig.mean, ethnicityConfig.std ) |
|
|
|
ethncities = ParallelBatchsPredict(EthncityProcessed, [ethnicity_model])[0] |
|
|
|
|
|
MaskProcessed = np.array([ resize(x, size = maskConfig.size, resample = maskConfig.resample ) for x in rawFaces]) |
|
MaskProcessed = rescale( MaskProcessed ) |
|
MaskProcessed = normalize( MaskProcessed, maskConfig.mean, maskConfig.std ) |
|
|
|
masks = ParallelBatchsPredict(MaskProcessed, [mask_model])[0] |
|
|
|
beards = [True if beard == 'Beard' else False for beard in beards] |
|
blurs = [True if blur == 'blurry' else False for blur in blurs] |
|
masks = [True if mask == 'WithMask' else False for mask in masks] |
|
|
|
return ages, genders, beards, blurs, ethncities, masks |
|
|
|
|
|
import gradio as gr |
|
|
|
def frameWrapper( facesCo, ages, genders, beards, blurs, ethncities, masks ): |
|
return { 'identifiedPersonCount': len(facesCo), 'value': [ { 'coordinate': { 'x': x, 'y': y, 'h': h, 'w':w }, 'ageGroup': age, 'gender': gender, 'beardPresent':beard, 'blurOccur': blur, 'ethncity': ethncity, 'maskPresent': mask } for (x, y, w, h), age, gender, beard, blur, ethncity, mask in zip( facesCo, ages, genders, beards, blurs, ethncities, masks ) ] } |
|
|
|
def postProcessed( rawfaces, maximunSize, minSize = 30 ): |
|
faces = [] |
|
for (x, y, w, h) in rawfaces: |
|
x1 = x if x<maximunSize[0] else maximunSize[0] |
|
y1 = y if y<maximunSize[1] else maximunSize[1] |
|
x2 = w+x if w+x<maximunSize[0] else maximunSize[0] |
|
y2 = h+y if h+y<maximunSize[1] else maximunSize[1] |
|
|
|
if x2-x1 > minSize and y2-y1 >minSize: |
|
faces.append( (x, y, w, h) ) |
|
return faces |
|
def image_inference(image): |
|
|
|
if sum(image.shape) == 0: |
|
return { 'ErrorFound': 'ImageNotFound' } |
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
|
|
rawfaces = face_cascade.detectMultiScale(gray, 1.05, 5, minSize = (30, 30)) |
|
image = np.asarray( image ) |
|
|
|
rawfaces = postProcessed( rawfaces, image.shape[:2] ) |
|
|
|
faces = [ image[x:w+x, y:h+y].copy() for (x, y, w, h) in rawfaces ] |
|
faces = [ Image.fromarray(x, mode = 'RGB') for x in faces ] |
|
ages, genders, beards, blurs, ethncities, masks = AnalysisFeatures( faces ) |
|
|
|
|
|
|
|
|
|
|
|
|
|
return frameWrapper( rawfaces, ages, genders, beards, blurs, ethncities, masks ) |
|
def video_inference(video_path): |
|
|
|
global_facesCo = [] |
|
global_faces = [] |
|
cap = cv2.VideoCapture(video_path) |
|
frameCount = 0 |
|
while(cap.isOpened()): |
|
_, img = cap.read() |
|
|
|
try: |
|
|
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) |
|
except: |
|
break |
|
|
|
rawfaces = face_cascade.detectMultiScale(gray, 1.05, 6, minSize = (30, 30)) |
|
image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
image = np.asarray( image ) |
|
|
|
rawfaces = postProcessed( rawfaces, image.shape[:2] ) |
|
|
|
|
|
|
|
global_facesCo.append( rawfaces ) |
|
for (x, y, w, h) in rawfaces: |
|
face = image[x:w+x, y:h+y].copy() |
|
global_faces.append(Image.fromarray( face , mode = 'RGB') ) |
|
|
|
ages, genders, beards, blurs, ethncities, masks = AnalysisFeatures( global_faces ) |
|
|
|
total_extraction = [] |
|
for facesCo in global_facedsCo: |
|
length = len(facesCo) |
|
|
|
total_extraction.append( frameWrapper( facesCo, ages[:length], genders[:length], beards[:length], blurs[:length], ethncities[:length], masks[:length] ) ) |
|
|
|
ages, genders, beards, blurs, ethncities, masks = ages[length:], genders[length:], beards[length:], blurs[length:], ethncities[length:], masks[length:] |
|
return total_extraction |
|
|
|
css = """ |
|
.outputJSON{ |
|
overflow: scroll; |
|
} |
|
""" |
|
imageHander = gr.Interface( fn = image_inference, inputs = gr.Image(type="numpy", sources = 'upload'), outputs = gr.JSON(elem_classes = 'outputJSON'), css = css ) |
|
videoHander = gr.Interface( fn = video_inference, inputs = gr.Video(sources = 'upload', max_length = 30, include_audio = False), outputs = 'json' ) |
|
demo = gr.TabbedInterface( [imageHander, videoHander] ) |
|
|
|
demo.launch() |