#!/usr/bin/env python ############################################################################# # # This is the bulk of the logic for the gradio demo. You use it for whatever # you want. Credit would be nice but w/e # # You can also run it on an image from the cli # # TODO: # # 1. rework the classes that just wrap Dict and List to extend them # 2. cleanup all the to_dict madness # 3. convert the print calls to use the logging # 4. add a proper creative commons license # 5. cleanup string constants # 6. replace custom code with libraries like for OBJ # ############################################################################# import cv2 import json import logging import mediapipe as mp import numpy as np import os import sys import torch from mediapipe.framework.formats import landmark_pb2 from mediapipe.python.solutions.drawing_utils import _normalized_to_pixel_coordinates from PIL import Image, ImageDraw from transformers import DPTFeatureExtractor, DPTForDepthEstimation from typing import List, Mapping, Optional, Tuple, Union, Dict, Type from utils import colorize from quads import QUADS mp_face_mesh = mp.solutions.face_mesh mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles NumpyImage = Type[np.ndarray] DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' HF_HACK = True class Point3: def __init__(self, values:List[float]=3*[0] ): self.values = values @property def x(self): return self.values[0] @property def y(self): return self.values[1] @property def z(self): return self.values[2] def to_dict(self): return {'x':self.x,'y':self.y,'z':self.z} class TextureCoordinate: def __init__(self, values:List[float]=2*[0] ): self.values = values @property def u(self): return self.values[0] @property def v(self): return self.values[1] def to_dict(self): return {'u':self.u,'v':self.v} class PixelCoordinate: def __init__(self, values:List[int]=2*[0] ): self.values = values @property def x(self): return self.values[0] @property def y(self): return self.values[1] def to_dict(self): return {'x':self.x,'y':self.y} class DepthMap: MEDIA_PIPE = 'mediapipe' def __init__(self, values:Dict[str,float]={'og':0} ): self.values = values def to_dict(self): return self.values class DepthMapping: def __init__(self, weight:float=1, lo:float=+np.inf, hi:float=-np.inf, toLo:float=0, toHi:float=1): self.weight = weight self.lo = lo self.hi = hi self.toLo = toLo self.toHi = toHi self.diff = 1 self.toDiff = 1 self.update() def reset(self): self.lo = +np.inf self.hi = -np.inf def track(self,value): self.lo = min(self.lo,value) self.hi = max(self.hi,value) def update(self): self.diff = self.hi - self.lo self.toDiff = self.toHi - self.toLo return self def translate(self,value): if not self.diff == 0: value = ( value - self.lo ) / self.diff value = self.toLo + value * self.toDiff value = value * self.weight return value def to_dict(self): return { 'weight' : self.weight, 'lo' : self.lo, 'hi' : self.hi, 'toLo' : self.toLo, 'toHi' : self.toHi, 'diff' : self.diff, 'toDiff' : self.toDiff, } class WeightMap: def __init__(self, values:Dict[str,DepthMapping]=None): if values is None: self.values = {DepthMap.MEDIA_PIPE:DepthMapping()} else: self.values = values def set(self,key:str,depthMapping:DepthMapping): self.values[key] = depthMapping def totally(self,name:str): if not name in self.values: raise Exception( f'no weight for {k} in {self.to_dict()}' ) for depthMapping in self.values.values(): depthMapping.weight = 0 self.values[ name ].weight = 1 def saveWeights(self)->Dict[str,float]: return {k:v.weight for k,v in self.values.items()} def loadWeights(self,weights:Dict[str,float]): for k,weight in weights.items(): if k in self.values: self.values[ k ].weight = weight else: raise Exception( f'no weight for {k} in {self.to_dict()}' ) def to_dict(self): return {k:dm.to_dict() for k,dm in self.values.items()} return self.values class MeshPoint: def __init__(self, position:Point3 = Point3(), color:Point3 = Point3(), textureCoordinate:TextureCoordinate = TextureCoordinate(), pixelCoordinate:PixelCoordinate = PixelCoordinate(), depthMap:DepthMap = None, ): self.position = position self.color = color self.textureCoordinate = textureCoordinate self.pixelCoordinate = pixelCoordinate if depthMap is None: self.depthMap = DepthMap({DepthMap.MEDIA_PIPE:position.values[2]}) else: self.depthMap = depthMap def to_dict(self): derp = { 'position' : self.position.to_dict(), 'color' : self.color.to_dict(), 'textureCoordinate' : self.textureCoordinate.to_dict(), 'pixelCoordinate' : self.pixelCoordinate.to_dict(), } if not self.depthMap is None: derp[ 'depthMap' ] = self.depthMap.to_dict() return derp def weighDepth(self, weightMap:WeightMap = WeightMap()): total_sum = sum([dm.weight for dm in weightMap.values.values()]) tmp = 0 for key, depthMapping in weightMap.values.items(): if key in self.depthMap.values: tmp = tmp + depthMapping.translate( self.depthMap.values[ key ] ) else: raise Exception(f'{key} from weightMap not in depthMap') tmp = tmp / total_sum #print( f'depthMap: {json.dumps(self.depthMap.to_dict())} -> {tmp}') # spam!!! self.position.values[2] = tmp def mapLandMark(self, mediaMesh:'MediaMesh', landmark: landmark_pb2.NormalizedLandmark) -> 'MeshPoint': x, y = _normalized_to_pixel_coordinates(landmark.x,landmark.y,mediaMesh.width,mediaMesh.height) #position = [landmark.x * mediaMesh.ratio, landmark.y, landmark.z] #position = [landmark.x * mediaMesh.ratio, landmark.y, landmark.z] position = [v * mediaMesh.scale[i] for i,v in enumerate([landmark.x, landmark.y, landmark.z])] self.position = Point3(position) #self.position = Point3([landmark.x * mediaMesh.ratio, landmark.y, landmark.z]) self.color = Point3([value / 255 for value in mediaMesh.image[y,x]]) self.textureCoordinate = TextureCoordinate([x/mediaMesh.width,1-y/mediaMesh.height] ) self.pixelCoordinate = PixelCoordinate([x,y]) self.depthMap = DepthMap({DepthMap.MEDIA_PIPE:self.position.z}) return self def toObj(self, lines:List[str], hf_hack:bool=HF_HACK): lines.append( "v " + " ".join(map(str, self.position.values + self.color.values)) ) lines.append( "vt " + " ".join(map(str, self.textureCoordinate.values ) ) ) # IMPORTANT! MeshFace uses 1 based indices, not 0 based!!!! class MeshFace: def __init__(self,indices:List[int]=None,normal:Point3=Point3()): self.indices = indices self.normal = normal def calculateNormal(self,meshPoints:List[MeshPoint]): if self.indices is None: raise Exception('indices is junk') if meshPoints is None: raise Exception('meshPoints is junk') if len(self.indices)<3: raise Exception('need at least 3 points') points = [meshPoints[index-1] for index in self.indices[:3]] npz = [np.array(point.position.values) for point in points] v1 = npz[1] - npz[0] v2 = npz[2] - npz[0] normal = np.cross(v1, v2) normal = normal / np.linalg.norm(normal) self.normal = Point3( normal.tolist() ) def toObj(self, lines:List[str], index:int, hf_hack:bool=HF_HACK): lines.append( "vn " + " ".join([str(value) for value in self.normal.values]) ) face_uv = "f " + " ".join([f'{vertex}/{vertex}/{index}' for vertex in self.indices]) face_un = "f " + " ".join([str(vertex) for vertex in self.indices]) if hf_hack: lines.append( f'#{face_uv}' ) lines.append( f'{face_un}' ) else: lines.append( face_uv ) class DepthSource: def __init__(self, name:str=None): self.name = name self.mediaMesh = None self.depth:NumpyImage = None self.gray:NumpyImage = None def mapDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': return self def _addDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': self.gray = colorize(self.depth, cmap='gray_r') self.mediaMesh = mediaMesh for meshPoint in mediaMesh.points: depth = self.depth[meshPoint.pixelCoordinate.y,meshPoint.pixelCoordinate.x] #depth = -depth # lazy conversion from depth to position meshPoint.depthMap.values[ self.name ] = float( depth ) mediaMesh.weightMap.set( self.name, self.createDepthMapping(depthMapping) ) self.gray = mediaMesh.drawGrayMesh(self.name,True) return self # note: if depthMapping is passed in, the hi and lo will be reset def createDepthMapping(self,depthMapping:DepthMapping=None) -> DepthMapping: if depthMapping is None: depthMapping = DepthMapping() depthMapping.reset() if not self.depth is None: for meshPoint in self.mediaMesh.points: depth = self.depth[meshPoint.pixelCoordinate.y,meshPoint.pixelCoordinate.x] depthMapping.track(float(depth)) return depthMapping.update() class ZoeDepthSource( DepthSource ): NAME = 'zoe' def __init__(self): super().__init__(ZoeDepthSource.NAME) self.model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_K", pretrained=True).to(DEVICE).eval() def mapDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': self.depth = 1.-self.model.infer_pil(mediaMesh.image) return self._addDepth(mediaMesh, depthMapping) class MidasDepthSource( DepthSource ): NAME = 'midas' def __init__(self): super().__init__(MidasDepthSource.NAME) self.feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") def mapDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': img = Image.fromarray(mediaMesh.image) encoding = self.feature_extractor(img, return_tensors="pt") with torch.no_grad(): outputs = self.model(**encoding) predicted_depth = outputs.predicted_depth prediction = torch.nn.functional.interpolate( predicted_depth.unsqueeze(1), size=img.size[::-1], mode="bicubic", align_corners=False, ).squeeze() self.depth = prediction.cpu().numpy() return self._addDepth(mediaMesh, depthMapping) ############################################################################# # # A MediaMesh has: # # 1. an input image # 2. the first landmark found # 3. a MeshPoint for each point # # # ############################################################################# class MediaMesh: LOG = logging.getLogger(__name__) COMBINED = 'combined' def __init__(self, scale:List[int]=[-1,-1,-1], weightMap:WeightMap = None, image:NumpyImage = None, annotated:NumpyImage = None, points:List[MeshPoint] = None): self.scale = scale if weightMap is None: self.weightMap = WeightMap() else: self.weightMap = weightMap self.image = image self.annotated = annotated self.points = points self.meshes = {} self.depthSources = {} # after this call, instance variables for image, annotated and points should be set def detect(self, image:NumpyImage, min_detection_confidence:float = .5) -> 'MediaMesh': self.image = image self.annotated = image.copy() self.points = None self.width = image.shape[1] self.height = image.shape[0] self.ratio = self.width / self.height self.scale[0] = self.ratio first = True # just do the first face for now with mp_face_mesh.FaceMesh( static_image_mode=True, max_num_faces=1, min_detection_confidence=min_detection_confidence) as face_mesh: results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) if not results.multi_face_landmarks: raise Exception( 'no faces found' ) for landmarks in results.multi_face_landmarks: if first: self.points = self.mapLandMarks(landmarks) first = False self.drawLandMarks(self.annotated, landmarks) self.gray = self.drawGrayMesh() self.weightMap.set( DepthMap.MEDIA_PIPE, self.createDepthMapping() ) return self def drawLandMarks(self, image:NumpyImage, landmarks: landmark_pb2.NormalizedLandmarkList): drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) mp_drawing.draw_landmarks( image=image, landmark_list=landmarks, connections=mp_face_mesh.FACEMESH_TESSELATION, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_tesselation_style()) mp_drawing.draw_landmarks( image=image, landmark_list=landmarks, connections=mp_face_mesh.FACEMESH_CONTOURS, landmark_drawing_spec=None, connection_drawing_spec=mp_drawing_styles .get_default_face_mesh_contours_style()) def mapLandMarks(self, landmarks: landmark_pb2.NormalizedLandmarkList) -> List[MeshPoint]: points = [] for landmark in landmarks.landmark: point = MeshPoint().mapLandMark(self, landmark) points.append( point ) return self.centerPoints(points) def centerPoints(self,points:List[MeshPoint]=None) -> List[MeshPoint]: if points is None: points = self.points mins = [+np.inf] * 3 maxs = [-np.inf] * 3 for point in points: for dimension,value in enumerate( point.position.values ): mins[dimension] = min(mins[dimension],value) maxs[dimension] = max(maxs[dimension],value) mids = [(min_val + max_val) / 2 for min_val, max_val in zip(mins, maxs)] for point in points: point.position.values = [(val-mid) for val, mid in zip(point.position.values,mids)] print( f'mins: {mins}' ) print( f'mids: {mids}' ) print( f'maxs: {maxs}' ) return points def createDepthMapping(self,depthMapping:DepthMapping=None) -> DepthMapping: if depthMapping is None: depthMapping = DepthMapping() for point in self.points: depthMapping.track(point.position.z) return depthMapping.update() def drawGrayMesh(self, source:str=DepthMap.MEDIA_PIPE, invert:bool=False): image = Image.new("RGB", (self.width, self.height), (88,13,33)) draw = ImageDraw.Draw(image) minZ = np.inf maxZ = -np.inf depths = [] for point in self.points: depth = point.depthMap.values[source] depths.append( depth ) minZ = min( minZ, depth ) maxZ = max( maxZ, depth ) difZ = maxZ - minZ if 0 == difZ: difZ = 1 depths = [(depth-minZ)/difZ for depth in depths] for quad in QUADS: points = [tuple(self.points[index-1].pixelCoordinate.values) for index in quad] colors = [tuple(3*[int(255*depths[index-1])]) for index in quad] color = int(np.average(colors)) if invert: color = 255 - color draw.polygon(points, fill=tuple(3*[color])) #draw.polygon(points, fill=colors) # sadly this does not work return np.asarray(image) # the obj is based on the current weightMap def toObj(self, name:str='sweet', hf_hack:bool=HF_HACK): print( '-----------------------------------------------------------------------------' ) obj = [f'o {name}Mesh'] mtl = f'newmtl {name}Material\nmap_Kd {name}.png\n' c = '#' if hf_hack else '' obj.append( f'{c}mtllib {name}.mtl' ) obj.append( f'##################################################################' ) obj.append( f'# to bring into blender with uvs:' ) obj.append( f'# put the following 2 lines into {name}.mtl uncommented' ) obj.append( f'#newmtl {name}Material' ) obj.append( f'#map_Kd {name}.png' ) obj.append( f'# remove lines from this file starting with "f "' ) obj.append( f'# uncomment the lines that start with "#f "' ) obj.append( f'##################################################################' ) for key, depthMapping in self.weightMap.values.items(): depthMapping.update() print( f'{name}.{key} -> {depthMapping.to_dict()}' ) for point in self.points: point.weighDepth(self.weightMap) self.centerPoints() for point in self.points: point.toObj(obj,hf_hack) obj.append( f'usemtl {name}Material' ) index = 0 for quad in QUADS: index = 1 + index face = MeshFace(quad) face.calculateNormal(self.points) face.toObj(obj, index, hf_hack) obj.append( f'##################################################################' ) obj.append( f'# EOF' ) obj.append( f'##################################################################' ) print( '-----------------------------------------------------------------------------' ) return obj,mtl def to_dict(self): return { 'width' : self.width, 'height' : self.height, 'ratio' : self.ratio, 'weightMap' : {key: value.to_dict() for key, value in self.weightMap.values.items()}, 'points' : [point.to_dict() for point in self.points] } # should be called after demoSetup and detect def singleSourceMesh(self,name:str, hf_hack:bool=HF_HACK): before = self.weightMap.saveWeights() # push self.weightMap.totally(name) obj,mtl = self.toObj(name) self.weightMap.loadWeights( before ) # pop return obj,mtl # should be called after demoSetup and detect def meshmerizing(self,hf_hack:bool=HF_HACK): for depthSource in self.depthSources: depthSource.mapDepth(self,self.weightMap.values[depthSource.name]) obj,mtl = self.toObj(MediaMesh.COMBINED) self.meshes = {MediaMesh.COMBINED:(obj,mtl)} for source in self.depthSources: self.meshes[ source.name ] = (self.singleSourceMesh(source.name)) self.meshes[DepthMap.MEDIA_PIPE] = (self.singleSourceMesh(DepthMap.MEDIA_PIPE)) return self.meshes def demoSetup(self) -> 'MediaMesh': self.depthSources = [ ZoeDepthSource(), MidasDepthSource() ] for depthSource in self.depthSources: self.weightMap.set( depthSource.name, depthSource.createDepthMapping() ) # observationally self.weightMap.values[ ZoeDepthSource.NAME ].toHi = 1.77 self.weightMap.values[ MidasDepthSource.NAME ].toHi = 2.55 self.weightMap.values[ ZoeDepthSource.NAME ].weight = 1.00 self.weightMap.values[ MidasDepthSource.NAME ].weight = 0.22 return self def main(self): if not 2 == len(sys.argv): raise Exception( 'usage: MediaMesh.py ' ) mediaMesh = MediaMesh().demoSetup() mediaMesh.detect(cv2.imread( sys.argv[1] ) ) for name,mesh in mediaMesh.meshmerizing().items(): obj = mesh[0] mtl = mesh[1] with open(f"{name}.obj", "w") as file: file.write( '\n'.join(obj) ) with open(f"{name}.mtl", "w") as file: file.write( mtl ) cv2.imwrite( 'mesh.png', mediaMesh.annotated ) cv2.imwrite( 'mpg.png', mediaMesh.gray ) for source in mediaMesh.depthSources: cv2.imwrite( f'{source.name}.png', source.gray ) with open("mesh.json", "w") as file: json.dump(mediaMesh.to_dict(), file, indent=4) if __name__ == "__main__": MediaMesh().main() # EOF #############################################################################