Spaces:

bit-guber
/

Face_Features_Extraction

Paused

App Files Files Community

Face_Features_Extraction / app.py

bit-guber

Update app.py

a5b7893 verified 8 months ago

raw

history blame contribute delete

10 kB

	import cv2
	from transformers import ViTImageProcessor, ViTForImageClassification, AutoModelForImageClassification, AutoImageProcessor
	import torch
	import numpy as np
	# import face_recognition
	import subprocess
	import sys
	# subprocess.check_call([sys.executable, "-m", "pip", "install", 'git+https://github.com/bit-guber/retinaface.git', "--force-reinstall"])

	# from retinaface import RetinaFace

	from deepface import DeepFace

	torch.backends.cudnn.benchmark = True

	import urllib.request
	path = 'https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml'
	urllib.request.urlretrieve(path, path.split('/')[-1])

	face_cascade = cv2.CascadeClassifier('./haarcascade_frontalface_default.xml')

	class Base:
	size = 224
	scale = 1. / 255.
	mean = np.array( [ .5 ] * 3 ).reshape( 1, 1, 1, -1)
	std = np.array( [ .5 ] * 3 ).reshape( 1, 1, 1, -1)
	resample = 2

	class ethnicityConfig(Base):
	size = 384

	class maskConfig(Base):
	resample = 3
	mean = np.array( [ .485 ] * 3 ).reshape( 1, 1, 1, -1)
	std = np.array( [ .229 ] * 3 ).reshape( 1, 1, 1, -1)


	AGE = "nateraw/vit-age-classifier"
	GENDER = 'rizvandwiki/gender-classification-2'
	ETHNICITY = 'cledoux42/Ethnicity_Test_v003'
	MASK = 'DamarJati/Face-Mask-Detection'
	BLUR = 'WT-MM/vit-base-blur'
	BEARD = 'dima806/beard_face_image_detection'


	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	# base_processor = ViTImageProcessor.from_pretrained( global_path + 'base_processor' )
	age_model = ViTForImageClassification.from_pretrained( AGE ).to(device)
	gender_model = ViTForImageClassification.from_pretrained( GENDER ).to(device)
	beard_model = ViTForImageClassification.from_pretrained( BEARD ).to(device)
	blur_model = ViTForImageClassification.from_pretrained( BLUR ).to(device)

	# ethnicity_precessor = ViTImageProcessor.from_pretrained( global_path + 'ethnicity' )
	ethnicity_model= ViTForImageClassification.from_pretrained( ETHNICITY ).to(device)

	# mask_processor = ViTImageProcessor.from_pretrained( global_path + 'mask' )
	mask_model = AutoModelForImageClassification.from_pretrained( MASK ).to(device)


	from PIL import Image
	def normalize( data, mean, std ): # (batchs, nchannels, height, width)
	data = (data - mean ) / std
	return data.astype(np.float32)

	def resize( image, size = 224, resample = 2 ):
	# if isinstance(iamge, np.ndarray):
	# image = Image.fromarray( image, mode = 'RGB' )

	image = image.resize( (size, size), resample = resample )

	return np.array( image )

	def rescale( data, scale = Base.scale ):
	return data * scale

	# resize
	# rescale
	# normalize

	def ParallelBatchsPredict( data, MODELS, nbatchs = 16 ):

	total = data.shape[0]
	# for change channel axis to first format
	data = np.transpose( data, ( 0, 3, 1, 2 ) )
	count = 0
	batchs = [ [] for i in range(len(MODELS)) ]
	for i in range( 0, total, nbatchs ):
	batch = data[i:i+nbatchs]
	count += batch.shape[0]
	with torch.no_grad():
	batch = torch.from_numpy( batch ).to(device)
	for _, model in enumerate(MODELS):
	logits = model( batch ).logits.softmax(1).argmax(1).tolist()
	for x in logits:
	batchs[_].append( model.config.id2label[ x ] )

	assert count == total
	return batchs
	# model arrange
	# age
	# gender
	# blur
	# beard
	# changle processor
	# Ethnicity
	# change processor
	# Mask
	def AnalysisFeatures(rawFaces): # list[ PIL.Image ]

	if len(rawFaces) == 0:
	return [ [] ]* 6
	baseProcessed = np.array([ resize(x, size = Base.size, resample = Base.resample ) for x in rawFaces])
	baseProcessed = rescale( baseProcessed )
	baseProcessed = normalize( baseProcessed, Base.mean, Base.std )

	ages, genders, beards, blurs = ParallelBatchsPredict(baseProcessed, [age_model, gender_model, beard_model, blur_model] )

	EthncityProcessed = np.array([ resize(x, size = ethnicityConfig.size, resample = ethnicityConfig.resample ) for x in rawFaces])
	EthncityProcessed = rescale( EthncityProcessed )
	EthncityProcessed = normalize( EthncityProcessed, ethnicityConfig.mean, ethnicityConfig.std )

	ethncities = ParallelBatchsPredict(EthncityProcessed, [ethnicity_model])[0]


	MaskProcessed = np.array([ resize(x, size = maskConfig.size, resample = maskConfig.resample ) for x in rawFaces])
	MaskProcessed = rescale( MaskProcessed )
	MaskProcessed = normalize( MaskProcessed, maskConfig.mean, maskConfig.std )

	masks = ParallelBatchsPredict(MaskProcessed, [mask_model])[0]

	beards = [True if beard == 'Beard' else False for beard in beards]
	blurs = [True if blur == 'blurry' else False for blur in blurs]
	masks = [True if mask == 'WithMask' else False for mask in masks]

	return ages, genders, beards, blurs, ethncities, masks


	import gradio as gr

	def frameWrapper( facesCo, ages, genders, beards, blurs, ethncities, masks ):
	return { 'identifiedPersonCount': len(facesCo), 'value': [ { 'coordinate': { 'x': x, 'y': y, 'h': h, 'w':w }, 'ageGroup': age, 'gender': gender, 'beardPresent':beard, 'blurOccur': blur, 'ethncity': ethncity, 'maskPresent': mask } for (x, y, w, h), age, gender, beard, blur, ethncity, mask in zip( facesCo, ages, genders, beards, blurs, ethncities, masks ) ] }

	def postProcessed( rawfaces, maximunSize, minSize = 30 ):
	faces = []
	for (x, y, w, h) in rawfaces:
	x1 = x if x<maximunSize[0] else maximunSize[0]
	y1 = y if y<maximunSize[1] else maximunSize[1]
	x2 = w+x if w+x<maximunSize[0] else maximunSize[0]
	y2 = h+y if h+y<maximunSize[1] else maximunSize[1]

	if x2-x1 > minSize and y2-y1 >minSize:
	faces.append( (x, y, w, h) )
	return faces
	def image_inference(image):


	if sum(image.shape) == 0:
	return image, { 'ErrorFound': 'ImageNotFound' }
	# Convert into grayscale
	# gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# Detect faces
	# rawfaces = face_cascade.detectMultiScale(gray, 1.05, 5, minSize = (30, 30))
	# image = np.asarray( image )
	# Draw rectangle around the faces
	# rawfaces = postProcessed( rawfaces, image.shape[:2] )

	# rawfaces = face_recognition.face_locations( image, number_of_times_to_upsample = 1 , model="hog")
	# rawfaces = []
	# for name, keys in RetinaFace.detect_faces( image ).items():
	# rawfaces.append( keys['facial_area'] )
	# faces = [ image[top:bottom, left:right].copy() for (top, left, bottom, right) in rawfaces ]
	# faces = RetinaFace.extract_faces( image, align = True)
	# faces_mean = [ x.mean() for x in faces ]
	rawfaces = DeepFace.extract_faces( image )
	faces = [ x['face'] for x in rawfaces]
	rawfaces = [ (x['facial_area']['x'], x['facial_area']['y'], x['facial_area']['w'], x['facial_area']['h']) for x in rawfaces ]
	# faces = [ image[x:w+x, y:h+y].copy() for (x, y, w, h) in rawfaces ]
	faces = [ Image.fromarray(x, mode = 'RGB') for x in faces ]
	ages, genders, beards, blurs, ethncities, masks = AnalysisFeatures( faces )

	annotatedImage = image.copy()
	for (x, y, w, h) in rawfaces:
	cv2.rectangle(annotatedImage, (x, x+w), (y, y+h), (255, 0, 0), 5)

	return Image.fromarray(annotatedImage, mode = 'RGB'), frameWrapper( rawfaces, ages, genders, beards, blurs, ethncities, masks )
	# return frameWrapper( rawfaces, ages, genders, beards, blurs, ethncities, masks )
	def video_inference(video_path):

	global_facesCo = []
	global_faces = []
	cap = cv2.VideoCapture(video_path)
	frameCount = 0
	while(cap.isOpened()):
	_, img = cap.read()

	# try:
	# Convert into grayscale
	# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	# except:
	# break
	# Detect faces
	# rawfaces = face_cascade.detectMultiScale(gray, 1.05, 6, minSize = (30, 30))
	try:
	image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	image = np.asarray( image )
	except:
	break
	# rawfaces = postProcessed( rawfaces, image.shape[:2] )
	rawfaces = []
	for name, keys in RetinaFace.detect_faces( image ).items():
	rawfaces.append( keys['facial_area'] )

	# rawfaces = face_recognition.face_locations( image, number_of_times_to_upsample = 1 , model="hog")
	# Draw rectangle around the faces
	# https://stackoverflow.com/questions/15589517/how-to-crop-an-image-in-opencv-using-python for fliping axis
	global_facesCo.append( rawfaces )
	for (top, left, bottom, right) in rawfaces:
	# face = image[x:w+x, y:h+y].copy()
	face = image[top:bottom, left:right].copy()
	global_faces.append(Image.fromarray( face , mode = 'RGB') )

	ages, genders, beards, blurs, ethncities, masks = AnalysisFeatures( global_faces )

	total_extraction = []
	for facesCo in global_facedsCo:
	length = len(facesCo)

	total_extraction.append( frameWrapper( facesCo, ages[:length], genders[:length], beards[:length], blurs[:length], ethncities[:length], masks[:length] ) )

	ages, genders, beards, blurs, ethncities, masks = ages[length:], genders[length:], beards[length:], blurs[length:], ethncities[length:], masks[length:]
	return total_extraction

	css = """
	.outputJSON{
	overflow: scroll;
	}
	"""
	imageHander = gr.Interface( fn = image_inference, inputs = gr.Image(type="numpy", sources = 'upload'), outputs = ['image', gr.JSON(elem_classes = 'outputJSON')], css = css )
	videoHander = gr.Interface( fn = video_inference, inputs = gr.Video(sources = 'upload', max_length = 30, include_audio = False), outputs = 'json' )
	demo = gr.TabbedInterface( [imageHander, videoHander], tab_names = [ 'Image-to-Features', 'Video-to-Features' ], title = 'Facial Feature Extraction' )

	demo.launch()