rozumden
/

DRAK

Model card Files Files and versions Community

Denys Rozumnyi commited on May 20

Commit

a83935b

•

1 Parent(s): 69a667d

update

Browse files

Files changed (10) hide show

__pycache__/geom_solver.cpython-39.pyc +0 -0
__pycache__/handcrafted_solution.cpython-39.pyc +0 -0
__pycache__/helpers.cpython-39.pyc +0 -0
__pycache__/my_solution.cpython-39.pyc +0 -0
geom_solver.py +116 -0
handcrafted_solution.py +245 -0
helpers.py +27 -0
my_solution.py +11 -208
script.py +1 -1
testing.ipynb +0 -0

__pycache__/geom_solver.cpython-39.pyc ADDED Viewed

Binary file (4.96 kB). View file

__pycache__/handcrafted_solution.cpython-39.pyc ADDED Viewed

Binary file (7.94 kB). View file

__pycache__/helpers.cpython-39.pyc ADDED Viewed

Binary file (1.04 kB). View file

__pycache__/my_solution.cpython-39.pyc ADDED Viewed

Binary file (1.94 kB). View file

geom_solver.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import numpy as np
+from pytorch3d.ops import ball_query
+from helpers import *
+from handcrafted_solution import *
+import hoho
+import itertools
+import torch
+from pytorch3d.renderer import PerspectiveCameras
+class GeomSolver(object):
+	def __init__(self, entry):
+		human_entry = convert_entry_to_human_readable(entry)
+		self.human_entry = human_entry
+		col_cams = [hoho.Rt_to_eye_target(human_entry['ade20k'][0], to_K(*human_entry['cameras'][1].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
+		eye, target, up, fov = col_cams[0]
+		cameras, images, points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
+		xyz = np.stack([p.xyz for p in points3D.values()])
+		color = np.stack([p.rgb for p in points3D.values()])
+		gestalt_camcet = np.stack([eye for eye, target, up, fov in itertools.starmap(hoho.Rt_to_eye_target, zip(*[human_entry[k] for k in 'ade20k K R t'.split()]))])
+		col_camcet = np.stack([eye for eye, target, up, fov in col_cams])
+		gestalt_to_colmap_cams = [np.argmin(((gcam - col_camcet)**2).sum(1)**0.5)+1 for gcam in gestalt_camcet]
+		# def get_vertices(self):
+		clr_th = 2.5
+		device = 'cuda:0'
+		height = cameras[1].height
+		width = cameras[1].width
+		N = len(gestalt_to_colmap_cams)
+		K = to_K(*human_entry['cameras'][1].params)[None].repeat(N, 0)
+		R = np.stack([quaternion_to_rotation_matrix(human_entry['images'][gestalt_to_colmap_cams[ind]].qvec) for ind in range(N)])
+		T = np.stack([human_entry['images'][gestalt_to_colmap_cams[ind]].tvec for ind in range(N)])
+		R = np.linalg.inv(R)
+		image_size=torch.Tensor([height, width]).repeat(N, 1)
+		pyt_cameras = PerspectiveCameras(device=device, R=R, T=T, in_ndc=False, focal_length=K[:, 0, :1], principal_point=K[:, :2, 2], image_size=image_size)
+		verts = torch.from_numpy(xyz.astype(np.float32)).to(device)
+		apex_color = np.array(gestalt_color_mapping['apex'])
+		eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
+		dist_points = np.zeros((xyz.shape[0], ))
+		visible_counts = np.zeros((xyz.shape[0], ), dtype=int)
+		proj_uv = []
+		for ki in range(N):
+		    cki = gestalt_to_colmap_cams[ki]
+		    gest = np.array(human_entry['gestalt'][ki])
+		    apex_mask = cv2.inRange(gest,  apex_color-clr_th, apex_color+clr_th)
+		    eave_end_mask = cv2.inRange(gest,  eave_end_color-clr_th, eave_end_color+clr_th)
+		    vert_mask = apex_mask + eave_end_mask
+		    vert_mask = (vert_mask > 0).astype(np.uint8)
+		    dist = cv2.distanceTransform(1-vert_mask, cv2.DIST_L2, 3)
+		    dist[dist > 100] = 100
+		    ndist = np.zeros_like(dist)
+		    ndist = cv2.normalize(dist, ndist, 0, 1.0, cv2.NORM_MINMAX)
+		    in_this_image = np.array([cki in p.image_ids for p in points3D.values()])
+		    # tempind = 2103
+		    # print(in_this_image[tempind-1], cki, points3D[tempind].image_ids)
+		    uv = torch.round(pyt_cameras[ki].transform_points(verts)[:, :2]).cpu().numpy().astype(int)
+		    uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < width) * (uv[:, 1] < height) * in_this_image
+		    proj_uv.append((uv, uv_inl))
+		    uv = uv[uv_inl]
+		    dist_points[uv_inl] += dist[uv[:,1], uv[:,0]]
+		    visible_counts[uv_inl] += 1
+		selected_points = (dist_points / (visible_counts + 1e-6)) <= 10
+		selected_points[visible_counts < 1] = False
+		pnts = torch.from_numpy(xyz[selected_points].astype(np.float32))[None]
+		bdists, inds, nn = ball_query(pnts, pnts, K=3, radius=30)
+		dense_pnts = (bdists[0] > 0).sum(1) == 2
+		criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, 0.3)
+		flags = cv2.KMEANS_RANDOM_CENTERS
+		centers = None
+		kmeans_th = 150
+		for tempi in range(1,11):
+		    retval, bestLabels, temp_centers = cv2.kmeans(xyz[selected_points][dense_pnts].astype(np.float32), tempi, None, criteria, 200,flags)
+		    cpnts = torch.from_numpy(temp_centers.astype(np.float32))[None]
+		    bdists, inds, nn = ball_query(cpnts, cpnts, K=1, radius=100)
+		    if bdists.max() > 0:
+		        closest_nn = (bdists[bdists>0].min()**0.5).item()
+		    else:
+		        closest_nn = kmeans_th
+		    if closest_nn < kmeans_th:
+		        break
+		    centers = temp_centers
+		# image_ids = np.array([p.id for p in points3D.values()])
+		# pyt_centers = torch.from_numpy(centers).to(device)
+		self.vertices = centers
+	def get_vertices(self, visualize=False):
+		if visualize:
+			from hoho.viz3d import plot_estimate_and_gt
+			plot_estimate_and_gt(self.vertices, [(0,1)], self.human_entry['wf_vertices'], self.human_entry['wf_edges'])
+		if self.vertices.shape[0] == 0:
+			return my_empty_solution()
+		return self.vertices, []

handcrafted_solution.py ADDED Viewed

	@@ -0,0 +1,245 @@

+# Description: This file contains the handcrafted solution for the task of wireframe reconstruction
+import io
+from PIL import Image as PImage
+import numpy as np
+from collections import defaultdict
+import cv2
+from typing import Tuple, List
+from scipy.spatial.distance import cdist
+from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
+from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
+def empty_solution():
+    '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
+    return np.zeros((2,3)), [(0, 1)]
+def convert_entry_to_human_readable(entry):
+    out = {}
+    already_good = ['__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces', 'face_semantics', 'K', 'R', 't']
+    for k, v in entry.items():
+        if k in already_good:
+            out[k] = v
+            continue
+        if k == 'points3d':
+            out[k] = read_points3D_binary(fid=io.BytesIO(v))
+        if k == 'cameras':
+            out[k] = read_cameras_binary(fid=io.BytesIO(v))
+        if k == 'images':
+            out[k] = read_images_binary(fid=io.BytesIO(v))
+        if k in ['ade20k', 'gestalt']:
+            out[k] =  [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
+        if k == 'depthcm':
+            out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
+    return out
+def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
+    '''Get the vertices and edges from the gestalt segmentation mask of the house'''
+    vertices = []
+    connections = []
+    # Apex
+    apex_color = np.array(gestalt_color_mapping['apex'])
+    apex_mask = cv2.inRange(gest_seg_np,  apex_color-0.5, apex_color+0.5)
+    if apex_mask.sum() > 0:
+        output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
+        (numLabels, labels, stats, centroids) = output
+        stats, centroids = stats[1:], centroids[1:]
+        for i in range(numLabels-1):
+            vert = {"xy": centroids[i], "type": "apex"}
+            vertices.append(vert)
+    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
+    eave_end_mask = cv2.inRange(gest_seg_np,  eave_end_color-0.5, eave_end_color+0.5)
+    if eave_end_mask.sum() > 0:
+        output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
+        (numLabels, labels, stats, centroids) = output
+        stats, centroids = stats[1:], centroids[1:]
+        for i in range(numLabels-1):
+            vert = {"xy": centroids[i], "type": "eave_end_point"}
+            vertices.append(vert)
+    # Connectivity
+    apex_pts = []
+    apex_pts_idxs = []
+    for j, v in enumerate(vertices):
+        apex_pts.append(v['xy'])
+        apex_pts_idxs.append(j)
+    apex_pts = np.array(apex_pts)
+    # Ridge connects two apex points
+    for edge_class in ['eave', 'ridge', 'rake', 'valley']:
+        edge_color = np.array(gestalt_color_mapping[edge_class])
+        mask = cv2.morphologyEx(cv2.inRange(gest_seg_np,
+                                            edge_color-0.5,
+                                            edge_color+0.5),
+                                cv2.MORPH_DILATE, np.ones((11, 11)))
+        line_img = np.copy(gest_seg_np) * 0
+        if mask.sum() > 0:
+            output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
+            (numLabels, labels, stats, centroids) = output
+            stats, centroids = stats[1:], centroids[1:]
+            edges = []
+            for i in range(1, numLabels):
+                y,x = np.where(labels == i)
+                xleft_idx = np.argmin(x)
+                x_left = x[xleft_idx]
+                y_left = y[xleft_idx]
+                xright_idx = np.argmax(x)
+                x_right = x[xright_idx]
+                y_right = y[xright_idx]
+                edges.append((x_left, y_left, x_right, y_right))
+                cv2.line(line_img, (x_left, y_left), (x_right, y_right), (255, 255, 255), 2)
+            edges = np.array(edges)
+            if (len(apex_pts) < 2) or len(edges) <1:
+                continue
+            pts_to_edges_dist = np.minimum(cdist(apex_pts, edges[:,:2]), cdist(apex_pts, edges[:,2:]))
+            connectivity_mask = pts_to_edges_dist <= edge_th
+            edge_connects = connectivity_mask.sum(axis=0)
+            for edge_idx, edgesum in enumerate(edge_connects):
+                if edgesum>=2:
+                    connected_verts = np.where(connectivity_mask[:,edge_idx])[0]
+                    for a_i, a in enumerate(connected_verts):
+                        for b in connected_verts[a_i+1:]:
+                            connections.append((a, b))
+    return vertices, connections
+def get_uv_depth(vertices, depth):
+    '''Get the depth of the vertices from the depth image'''
+    uv = []
+    for v in vertices:
+        uv.append(v['xy'])
+    uv = np.array(uv)
+    uv_int = uv.astype(np.int32)
+    H, W = depth.shape[:2]
+    uv_int[:, 0] = np.clip( uv_int[:, 0], 0, W-1)
+    uv_int[:, 1] = np.clip( uv_int[:, 1], 0, H-1)
+    vertex_depth = depth[(uv_int[:, 1] , uv_int[:, 0])]
+    return uv, vertex_depth
+def merge_vertices_3d(vert_edge_per_image, th=0.1):
+    '''Merge vertices that are close to each other in 3D space and are of same types'''
+    all_3d_vertices = []
+    connections_3d = []
+    all_indexes = []
+    cur_start = 0
+    types = []
+    for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
+        types += [int(v['type']=='apex') for v in vertices]
+        all_3d_vertices.append(vertices_3d)
+        connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
+        cur_start+=len(vertices_3d)
+    all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
+    #print (connections_3d)
+    distmat = cdist(all_3d_vertices, all_3d_vertices)
+    types = np.array(types).reshape(-1,1)
+    same_types = cdist(types, types)
+    mask_to_merge = (distmat <= th) & (same_types==0)
+    new_vertices = []
+    new_connections = []
+    to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
+    to_merge_final = defaultdict(list)
+    for i in range(len(all_3d_vertices)):
+        for j in to_merge:
+            if i in j:
+                to_merge_final[i]+=j
+    for k, v in to_merge_final.items():
+        to_merge_final[k] = list(set(v))
+    already_there = set()
+    merged = []
+    for k, v in to_merge_final.items():
+        if k in already_there:
+            continue
+        merged.append(v)
+        for vv in v:
+            already_there.add(vv)
+    old_idx_to_new = {}
+    count=0
+    for idxs in merged:
+        new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
+        for idx in idxs:
+            old_idx_to_new[idx] = count
+        count +=1
+    #print (connections_3d)
+    new_vertices=np.array(new_vertices)
+    #print (connections_3d)
+    for conn in connections_3d:
+        new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
+        if new_con[0] == new_con[1]:
+            continue
+        if new_con not in new_connections:
+            new_connections.append(new_con)
+    #print (f'{len(new_vertices)} left after merging {len(all_3d_vertices)} with {th=}')
+    return new_vertices, new_connections
+def prune_not_connected(all_3d_vertices, connections_3d):
+    '''Prune vertices that are not connected to any other vertex'''
+    connected = defaultdict(list)
+    for c in connections_3d:
+        connected[c[0]].append(c)
+        connected[c[1]].append(c)
+    new_indexes = {}
+    new_verts = []
+    connected_out = []
+    for k,v in connected.items():
+        vert = all_3d_vertices[k]
+        if tuple(vert) not in new_verts:
+            new_verts.append(tuple(vert))
+            new_indexes[k]=len(new_verts) -1
+    for k,v in connected.items():
+        for vv in v:
+            connected_out.append((new_indexes[vv[0]],new_indexes[vv[1]]))
+    connected_out=list(set(connected_out))
+    return np.array(new_verts), connected_out
+def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
+    good_entry = convert_entry_to_human_readable(entry)
+    vert_edge_per_image = {}
+    for i, (gest, depth, K, R, t) in enumerate(zip(good_entry['gestalt'],
+                                                good_entry['depthcm'],
+                                                good_entry['K'],
+                                                good_entry['R'],
+                                                good_entry['t']
+                                                )):
+        gest_seg = gest.resize(depth.size)
+        gest_seg_np = np.array(gest_seg).astype(np.uint8)
+        # Metric3D
+        depth_np = np.array(depth) / 2.5 # 2.5 is the scale estimation coefficient
+        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 20.)
+        if (len(vertices) < 2) or (len(connections) < 1):
+            print (f'Not enough vertices or connections in image {i}')
+            vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
+            continue
+        uv, depth_vert = get_uv_depth(vertices, depth_np)
+        # Normalize the uv to the camera intrinsics
+        xy_local = np.ones((len(uv), 3))
+        xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
+        xy_local[:, 1] = (uv[:, 1] - K[1,2]) / K[1,1]
+        # Get the 3D vertices
+        vertices_3d_local = depth_vert[...,None] * (xy_local/np.linalg.norm(xy_local, axis=1)[...,None])
+        world_to_cam = np.eye(4)
+        world_to_cam[:3, :3] = R
+        world_to_cam[:3, 3] = t.reshape(-1)
+        cam_to_world =  np.linalg.inv(world_to_cam)
+        vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
+        vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
+        vert_edge_per_image[i] = vertices, connections, vertices_3d
+    all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 3.0)
+    all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d)
+    if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
+        print (f'Not enough vertices or connections in the 3D vertices')
+        return (good_entry['__key__'], *empty_solution())
+    if visualize:
+        from hoho.viz3d import plot_estimate_and_gt
+        plot_estimate_and_gt(   all_3d_vertices_clean,
+                                connections_3d_clean,
+                                good_entry['wf_vertices'],
+                                good_entry['wf_edges'])
+    return good_entry['__key__'], all_3d_vertices_clean, connections_3d_clean

helpers.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import numpy as np
+from PIL import Image as PImage
+import io
+def my_empty_solution():
+    '''Return a minimal valid solution, i.e. 1 vertices and 0 edge.'''
+    return np.zeros((1,3)), []
+def to_K(f, cx, cy):
+    K = np.eye(3)
+    K[0,0] = K[1,1] = f
+    K[0,2] = cx
+    K[1,2] = cy
+    return K
+def quaternion_to_rotation_matrix(qvec):
+    qw, qx, qy, qz = qvec
+    R = np.array([
+        [1 - 2*qy**2 - 2*qz**2, 2*qx*qy - 2*qz*qw, 2*qx*qz + 2*qy*qw],
+        [2*qx*qy + 2*qz*qw, 1 - 2*qx**2 - 2*qz**2, 2*qy*qz - 2*qx*qw],
+        [2*qx*qz - 2*qy*qw, 2*qy*qz + 2*qx*qw, 1 - 2*qx**2 - 2*qy**2]
+    ])
+    return R

my_solution.py CHANGED Viewed

@@ -11,11 +11,8 @@ from scipy.spatial.distance import cdist
 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
 from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
-def empty_solution():
-    '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
-    return np.zeros((2,3)), [(0, 1)]
 def convert_entry_to_human_readable(entry):
     out = {}
@@ -37,209 +34,15 @@ def convert_entry_to_human_readable(entry):
     return out
-def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
-    '''Get the vertices and edges from the gestalt segmentation mask of the house'''
-    vertices = []
-    connections = []
-    # Apex
-    apex_color = np.array(gestalt_color_mapping['apex'])
-    apex_mask = cv2.inRange(gest_seg_np,  apex_color-0.5, apex_color+0.5)
-    if apex_mask.sum() > 0:
-        output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
-        (numLabels, labels, stats, centroids) = output
-        stats, centroids = stats[1:], centroids[1:]
-        for i in range(numLabels-1):
-            vert = {"xy": centroids[i], "type": "apex"}
-            vertices.append(vert)
-    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
-    eave_end_mask = cv2.inRange(gest_seg_np,  eave_end_color-0.5, eave_end_color+0.5)
-    if eave_end_mask.sum() > 0:
-        output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
-        (numLabels, labels, stats, centroids) = output
-        stats, centroids = stats[1:], centroids[1:]
-        for i in range(numLabels-1):
-            vert = {"xy": centroids[i], "type": "eave_end_point"}
-            vertices.append(vert)
-    # Connectivity
-    apex_pts = []
-    apex_pts_idxs = []
-    for j, v in enumerate(vertices):
-        apex_pts.append(v['xy'])
-        apex_pts_idxs.append(j)
-    apex_pts = np.array(apex_pts)
-    # Ridge connects two apex points
-    for edge_class in ['eave', 'ridge', 'rake', 'valley']:
-        edge_color = np.array(gestalt_color_mapping[edge_class])
-        mask = cv2.morphologyEx(cv2.inRange(gest_seg_np,
-                                            edge_color-0.5,
-                                            edge_color+0.5),
-                                cv2.MORPH_DILATE, np.ones((11, 11)))
-        line_img = np.copy(gest_seg_np) * 0
-        if mask.sum() > 0:
-            output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
-            (numLabels, labels, stats, centroids) = output
-            stats, centroids = stats[1:], centroids[1:]
-            edges = []
-            for i in range(1, numLabels):
-                y,x = np.where(labels == i)
-                xleft_idx = np.argmin(x)
-                x_left = x[xleft_idx]
-                y_left = y[xleft_idx]
-                xright_idx = np.argmax(x)
-                x_right = x[xright_idx]
-                y_right = y[xright_idx]
-                edges.append((x_left, y_left, x_right, y_right))
-                cv2.line(line_img, (x_left, y_left), (x_right, y_right), (255, 255, 255), 2)
-            edges = np.array(edges)
-            if (len(apex_pts) < 2) or len(edges) <1:
-                continue
-            pts_to_edges_dist = np.minimum(cdist(apex_pts, edges[:,:2]), cdist(apex_pts, edges[:,2:]))
-            connectivity_mask = pts_to_edges_dist <= edge_th
-            edge_connects = connectivity_mask.sum(axis=0)
-            for edge_idx, edgesum in enumerate(edge_connects):
-                if edgesum>=2:
-                    connected_verts = np.where(connectivity_mask[:,edge_idx])[0]
-                    for a_i, a in enumerate(connected_verts):
-                        for b in connected_verts[a_i+1:]:
-                            connections.append((a, b))
-    return vertices, connections
-def get_uv_depth(vertices, depth):
-    '''Get the depth of the vertices from the depth image'''
-    uv = []
-    for v in vertices:
-        uv.append(v['xy'])
-    uv = np.array(uv)
-    uv_int = uv.astype(np.int32)
-    H, W = depth.shape[:2]
-    uv_int[:, 0] = np.clip( uv_int[:, 0], 0, W-1)
-    uv_int[:, 1] = np.clip( uv_int[:, 1], 0, H-1)
-    vertex_depth = depth[(uv_int[:, 1] , uv_int[:, 0])]
-    return uv, vertex_depth
-def merge_vertices_3d(vert_edge_per_image, th=0.1):
-    '''Merge vertices that are close to each other in 3D space and are of same types'''
-    all_3d_vertices = []
-    connections_3d = []
-    all_indexes = []
-    cur_start = 0
-    types = []
-    for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
-        types += [int(v['type']=='apex') for v in vertices]
-        all_3d_vertices.append(vertices_3d)
-        connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
-        cur_start+=len(vertices_3d)
-    all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
-    #print (connections_3d)
-    distmat = cdist(all_3d_vertices, all_3d_vertices)
-    types = np.array(types).reshape(-1,1)
-    same_types = cdist(types, types)
-    mask_to_merge = (distmat <= th) & (same_types==0)
-    new_vertices = []
-    new_connections = []
-    to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
-    to_merge_final = defaultdict(list)
-    for i in range(len(all_3d_vertices)):
-        for j in to_merge:
-            if i in j:
-                to_merge_final[i]+=j
-    for k, v in to_merge_final.items():
-        to_merge_final[k] = list(set(v))
-    already_there = set()
-    merged = []
-    for k, v in to_merge_final.items():
-        if k in already_there:
-            continue
-        merged.append(v)
-        for vv in v:
-            already_there.add(vv)
-    old_idx_to_new = {}
-    count=0
-    for idxs in merged:
-        new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
-        for idx in idxs:
-            old_idx_to_new[idx] = count
-        count +=1
-    #print (connections_3d)
-    new_vertices=np.array(new_vertices)
-    #print (connections_3d)
-    for conn in connections_3d:
-        new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
-        if new_con[0] == new_con[1]:
-            continue
-        if new_con not in new_connections:
-            new_connections.append(new_con)
-    #print (f'{len(new_vertices)} left after merging {len(all_3d_vertices)} with {th=}')
-    return new_vertices, new_connections
-def prune_not_connected(all_3d_vertices, connections_3d):
-    '''Prune vertices that are not connected to any other vertex'''
-    connected = defaultdict(list)
-    for c in connections_3d:
-        connected[c[0]].append(c)
-        connected[c[1]].append(c)
-    new_indexes = {}
-    new_verts = []
-    connected_out = []
-    for k,v in connected.items():
-        vert = all_3d_vertices[k]
-        if tuple(vert) not in new_verts:
-            new_verts.append(tuple(vert))
-            new_indexes[k]=len(new_verts) -1
-    for k,v in connected.items():
-        for vv in v:
-            connected_out.append((new_indexes[vv[0]],new_indexes[vv[1]]))
-    connected_out=list(set(connected_out))
-    return np.array(new_verts), connected_out
 def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
-    good_entry = convert_entry_to_human_readable(entry)
-    vert_edge_per_image = {}
-    for i, (gest, depth, K, R, t) in enumerate(zip(good_entry['gestalt'],
-                                                good_entry['depthcm'],
-                                                good_entry['K'],
-                                                good_entry['R'],
-                                                good_entry['t']
-                                                )):
-        gest_seg = gest.resize(depth.size)
-        gest_seg_np = np.array(gest_seg).astype(np.uint8)
-        # Metric3D
-        depth_np = np.array(depth) / 2.5 # 2.5 is the scale estimation coefficient
-        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 20.)
-        if (len(vertices) < 2) or (len(connections) < 1):
-            print (f'Not enough vertices or connections in image {i}')
-            vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
-            continue
-        uv, depth_vert = get_uv_depth(vertices, depth_np)
-        # Normalize the uv to the camera intrinsics
-        xy_local = np.ones((len(uv), 3))
-        xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
-        xy_local[:, 1] = (uv[:, 1] - K[1,2]) / K[1,1]
-        # Get the 3D vertices
-        vertices_3d_local = depth_vert[...,None] * (xy_local/np.linalg.norm(xy_local, axis=1)[...,None])
-        world_to_cam = np.eye(4)
-        world_to_cam[:3, :3] = R
-        world_to_cam[:3, 3] = t.reshape(-1)
-        cam_to_world =  np.linalg.inv(world_to_cam)
-        vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
-        vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
-        vert_edge_per_image[i] = vertices, connections, vertices_3d
-    all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 3.0)
-    all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d)
-    if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
-        print (f'Not enough vertices or connections in the 3D vertices')
-        return (good_entry['__key__'], *empty_solution())
     if visualize:
         from hoho.viz3d import plot_estimate_and_gt
-        plot_estimate_and_gt(   all_3d_vertices_clean,
-                                connections_3d_clean,
-                                good_entry['wf_vertices'],
-                                good_entry['wf_edges'])
-    return good_entry['__key__'], all_3d_vertices_clean, connections_3d_clean

 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
 from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
+from helpers import my_empty_solution
+from geom_solver import GeomSolver
 def convert_entry_to_human_readable(entry):
     out = {}
     return out
 def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
+    # return (entry['__key__'], *my_empty_solution())
+    solver = GeomSolver(entry)
+    vertices, edges = solver.get_vertices()
     if visualize:
         from hoho.viz3d import plot_estimate_and_gt
+        plot_estimate_and_gt(   vertices,
+                                edges,
+                                entry['wf_vertices'],
+                                entry['wf_edges'])
+    return entry['__key__'], vertices, edges

script.py CHANGED Viewed

@@ -116,7 +116,7 @@ def save_submission(submission, path):
     print(f"Submission saved to {path}")
 if __name__ == "__main__":
-    from handcrafted_solution import my_solution
     print ("------------ Loading dataset------------ ")
     params = hoho.get_params()
     dataset = hoho.get_dataset(decode=None, split='all', dataset_type='webdataset')

     print(f"Submission saved to {path}")
 if __name__ == "__main__":
+    from my_solution import predict
     print ("------------ Loading dataset------------ ")
     params = hoho.get_params()
     dataset = hoho.get_dataset(decode=None, split='all', dataset_type='webdataset')

testing.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff