rozumden
/

DRAK

Model card Files Files and versions Community

Denys Rozumnyi commited on Jun 12

Commit

fc034ff

•

1 Parent(s): c336685

Public release

Browse files

Files changed (9) hide show

dataset.py +0 -88
geom_solver.py +12 -26
handcrafted_solution.py +0 -245
helpers.py +21 -0
testing.ipynb → main.ipynb +0 -0
my_solution.py +2 -28
pointnet.py +0 -213
script_cpus.py +0 -145
train_pointnet.py +0 -148

dataset.py DELETED Viewed

@@ -1,88 +0,0 @@
-class ShapeNetDataset(data.Dataset):
-    def __init__(self,
-                 root,
-                 npoints=2500,
-                 classification=False,
-                 class_choice=None,
-                 split='train',
-                 data_augmentation=True):
-        self.npoints = npoints
-        self.root = root
-        self.catfile = os.path.join(self.root, 'synsetoffset2category.txt')
-        self.cat = {}
-        self.data_augmentation = data_augmentation
-        self.classification = classification
-        self.seg_classes = {}
-        with open(self.catfile, 'r') as f:
-            for line in f:
-                ls = line.strip().split()
-                self.cat[ls[0]] = ls[1]
-        #print(self.cat)
-        if not class_choice is None:
-            self.cat = {k: v for k, v in self.cat.items() if k in class_choice}
-        self.id2cat = {v: k for k, v in self.cat.items()}
-        self.meta = {}
-        splitfile = os.path.join(self.root, 'train_test_split', 'shuffled_{}_file_list.json'.format(split))
-        #from IPython import embed; embed()
-        filelist = json.load(open(splitfile, 'r'))
-        for item in self.cat:
-            self.meta[item] = []
-        for file in filelist:
-            _, category, uuid = file.split('/')
-            if category in self.cat.values():
-                self.meta[self.id2cat[category]].append((os.path.join(self.root, category, 'points', uuid+'.pts'),
-                                        os.path.join(self.root, category, 'points_label', uuid+'.seg')))
-        self.datapath = []
-        for item in self.cat:
-            for fn in self.meta[item]:
-                self.datapath.append((item, fn[0], fn[1]))
-        self.classes = dict(zip(sorted(self.cat), range(len(self.cat))))
-        print(self.classes)
-        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../misc/num_seg_classes.txt'), 'r') as f:
-            for line in f:
-                ls = line.strip().split()
-                self.seg_classes[ls[0]] = int(ls[1])
-        self.num_seg_classes = self.seg_classes[list(self.cat.keys())[0]]
-        print(self.seg_classes, self.num_seg_classes)
-    def __getitem__(self, index):
-        fn = self.datapath[index]
-        cls = self.classes[self.datapath[index][0]]
-        point_set = np.loadtxt(fn[1]).astype(np.float32)
-        seg = np.loadtxt(fn[2]).astype(np.int64)
-        #print(point_set.shape, seg.shape)
-        choice = np.random.choice(len(seg), self.npoints, replace=True)
-        #resample
-        point_set = point_set[choice, :]
-        point_set = point_set - np.expand_dims(np.mean(point_set, axis = 0), 0) # center
-        dist = np.max(np.sqrt(np.sum(point_set ** 2, axis = 1)),0)
-        point_set = point_set / dist #scale
-        if self.data_augmentation:
-            theta = np.random.uniform(0,np.pi*2)
-            rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]])
-            point_set[:,[0,2]] = point_set[:,[0,2]].dot(rotation_matrix) # random rotation
-            point_set += np.random.normal(0, 0.02, size=point_set.shape) # random jitter
-        seg = seg[choice]
-        point_set = torch.from_numpy(point_set)
-        seg = torch.from_numpy(seg)
-        cls = torch.from_numpy(np.array([cls]).astype(np.int64))
-        if self.classification:
-            return point_set, cls
-        else:
-            return point_set, seg
-    def __len__(self):
-        return len(self.datapath)

geom_solver.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import numpy as np
 from pytorch3d.ops import ball_query
 from helpers import *
-from handcrafted_solution import convert_entry_to_human_readable
 import cv2
 import hoho
 import itertools
@@ -10,13 +9,14 @@ from pytorch3d.renderer import PerspectiveCameras
 from hoho.color_mappings import gestalt_color_mapping
 from PIL import Image
 def my_empty_solution():
-	return np.zeros((19,3)), [(0, 0)]
-def cheat_the_metric_solution(vertices=None):
 	if vertices is None:
-		nverts = 19
 		vertices_new = np.zeros((nverts,3))
 	else:
 		nverts = vertices.shape[0]
@@ -31,7 +31,7 @@ class GeomSolver(object):
 	def __init__(self):
 		self.min_vertices = 10
-		self.mean_vertices = 19
 		self.max_vertices = 30
 		self.kmeans_th = 200
 		self.point_dist_th = 50
@@ -41,7 +41,7 @@ class GeomSolver(object):
 		self.return_edges = False
 		self.mean_fixed = False
 		self.repeat_predicted = True
-		self.cheat_metric = True
 	def cluster_points(self, point_types):
 		point_colors = []
@@ -65,9 +65,6 @@ class GeomSolver(object):
 			vert_mask = (vert_mask > 0).astype(np.uint8)
 			dist = cv2.distanceTransform(1-vert_mask, cv2.DIST_L2, 3)
-			# dist[dist > 100] = 100
-			# ndist = np.zeros_like(dist)
-			# ndist = cv2.normalize(dist, ndist, 0, 1.0, cv2.NORM_MINMAX)
 			in_this_image = np.array([cki in p.image_ids for p in self.points3D.values()])
 			uv = torch.round(self.pyt_cameras[ki].transform_points(self.verts)[:, :2]).cpu().numpy().astype(int)
@@ -130,16 +127,12 @@ class GeomSolver(object):
 		human_entry = self.human_entry
 		col_cams = [hoho.Rt_to_eye_target(Image.new('RGB', (human_entry['cameras'][colmap_img.camera_id].width, human_entry['cameras'][colmap_img.camera_id].height)), to_K(*human_entry['cameras'][colmap_img.camera_id].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
-		# eye, target, up, fov = col_cams[0]
 		cameras, images, self.points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
 		colmap_cameras_tf = list(human_entry['images'].keys())
 		self.xyz = np.stack([p.xyz for p in self.points3D.values()])
 		color = np.stack([p.rgb for p in self.points3D.values()])
 		self.gests = [np.array(gest0) for gest0 in human_entry['gestalt']]
-		# for ki in range(1, len(self.gests)):
-		# 	if self.gests[ki].shape != self.gests[0].shape:
-		# 		self.gests[ki] = self.gests[ki].transpose(1,0,2)
 		to_camera_ids = np.array([colmap_img.camera_id for colmap_img in human_entry['images'].values()])
@@ -183,16 +176,11 @@ class GeomSolver(object):
 		self.vertices = centers
 		nvert = centers.shape[0]
-		# desired_vertices = (self.xyz[:,-1] > z_th).sum() // 300
 		desired_vertices = int(2.2*nvert)
-		# desired_vertices = self.mean_vertices
 		if desired_vertices < self.min_vertices:
 			desired_vertices = self.mean_vertices
 		if desired_vertices > self.max_vertices:
 			desired_vertices = self.mean_vertices
-		# if self.broken_cams.any():
-		# 	vertices = centers
-		# 	print("There are broken cams.")
 		if nvert >= desired_vertices:
 			vertices = centers[:desired_vertices]
 			print("Enough vertices.")
@@ -248,8 +236,8 @@ class GeomSolver(object):
 		    uvs.append(uv)
 		edges = []
-		# thresholds_min_mean = {0 : [5, 7], 1 : [9, 25], 2: [30, 1000]}
-		thresholds_min_mean = {0 : [1, 7], 1 : [3, 25], 2: [3, 1000]}
 		for i in range(pyt_centers.shape[0]):
 			for j in range(i+1, pyt_centers.shape[0]):
 				etype = (self.is_apex[i] + self.is_apex[j])
@@ -298,12 +286,10 @@ class GeomSolver(object):
 		else:
 			edges = [(0, 0)]
-		if self.cheat_metric:
-			dumb_vertices = np.zeros((vertices.shape[0],3))
-			# dumb_vertices = self.wf_center[None].repeat(vertices.shape[0], axis=0)
-			vertices, edges = cheat_the_metric_solution(dumb_vertices)
-			# vertices_new, edges = cheat_the_metric_solution(np.zeros((vertices.shape[0] // 2,3)))
-			# vertices = np.concatenate((vertices_new, vertices[:vertices_new.shape[0]]))
 		if visualize:
 			from hoho.viz3d import plot_estimate_and_gt

 import numpy as np
 from pytorch3d.ops import ball_query
 from helpers import *
 import cv2
 import hoho
 import itertools
 from hoho.color_mappings import gestalt_color_mapping
 from PIL import Image
 def my_empty_solution():
+	return np.zeros((20,3)), [(0, 0)]
+def fully_connected_solution(vertices=None):
 	if vertices is None:
+		nverts = 20
 		vertices_new = np.zeros((nverts,3))
 	else:
 		nverts = vertices.shape[0]
 	def __init__(self):
 		self.min_vertices = 10
+		self.mean_vertices = 20
 		self.max_vertices = 30
 		self.kmeans_th = 200
 		self.point_dist_th = 50
 		self.return_edges = False
 		self.mean_fixed = False
 		self.repeat_predicted = True
+		self.return_fully_connected = True
 	def cluster_points(self, point_types):
 		point_colors = []
 			vert_mask = (vert_mask > 0).astype(np.uint8)
 			dist = cv2.distanceTransform(1-vert_mask, cv2.DIST_L2, 3)
 			in_this_image = np.array([cki in p.image_ids for p in self.points3D.values()])
 			uv = torch.round(self.pyt_cameras[ki].transform_points(self.verts)[:, :2]).cpu().numpy().astype(int)
 		human_entry = self.human_entry
 		col_cams = [hoho.Rt_to_eye_target(Image.new('RGB', (human_entry['cameras'][colmap_img.camera_id].width, human_entry['cameras'][colmap_img.camera_id].height)), to_K(*human_entry['cameras'][colmap_img.camera_id].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
 		cameras, images, self.points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
 		colmap_cameras_tf = list(human_entry['images'].keys())
 		self.xyz = np.stack([p.xyz for p in self.points3D.values()])
 		color = np.stack([p.rgb for p in self.points3D.values()])
 		self.gests = [np.array(gest0) for gest0 in human_entry['gestalt']]
 		to_camera_ids = np.array([colmap_img.camera_id for colmap_img in human_entry['images'].values()])
 		self.vertices = centers
 		nvert = centers.shape[0]
 		desired_vertices = int(2.2*nvert)
 		if desired_vertices < self.min_vertices:
 			desired_vertices = self.mean_vertices
 		if desired_vertices > self.max_vertices:
 			desired_vertices = self.mean_vertices
 		if nvert >= desired_vertices:
 			vertices = centers[:desired_vertices]
 			print("Enough vertices.")
 		    uvs.append(uv)
 		edges = []
+		thresholds_min_mean = {0 : [5, 7], 1 : [9, 25], 2: [30, 1000]}
+		# thresholds_min_mean = {0 : [1, 7], 1 : [3, 25], 2: [3, 1000]}
 		for i in range(pyt_centers.shape[0]):
 			for j in range(i+1, pyt_centers.shape[0]):
 				etype = (self.is_apex[i] + self.is_apex[j])
 		else:
 			edges = [(0, 0)]
+		if self.return_fully_connected:
+			zero_vertices = np.zeros((vertices.shape[0],3))
+			# zero_vertices = self.wf_center[None].repeat(vertices.shape[0], axis=0)
+			vertices, edges = fully_connected_solution(zero_vertices)
 		if visualize:
 			from hoho.viz3d import plot_estimate_and_gt

handcrafted_solution.py DELETED Viewed

@@ -1,245 +0,0 @@
-# Description: This file contains the handcrafted solution for the task of wireframe reconstruction
-import io
-from PIL import Image as PImage
-import numpy as np
-from collections import defaultdict
-import cv2
-from typing import Tuple, List
-from scipy.spatial.distance import cdist
-from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
-from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
-def empty_solution():
-    '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
-    return np.zeros((2,3)), [(0, 1)]
-def convert_entry_to_human_readable(entry):
-    out = {}
-    already_good = ['__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces', 'face_semantics', 'K', 'R', 't']
-    for k, v in entry.items():
-        if k in already_good:
-            out[k] = v
-            continue
-        if k == 'points3d':
-            out[k] = read_points3D_binary(fid=io.BytesIO(v))
-        if k == 'cameras':
-            out[k] = read_cameras_binary(fid=io.BytesIO(v))
-        if k == 'images':
-            out[k] = read_images_binary(fid=io.BytesIO(v))
-        if k in ['ade20k', 'gestalt']:
-            out[k] =  [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
-        if k == 'depthcm':
-            out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
-    return out
-def get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 50.0):
-    '''Get the vertices and edges from the gestalt segmentation mask of the house'''
-    vertices = []
-    connections = []
-    # Apex
-    apex_color = np.array(gestalt_color_mapping['apex'])
-    apex_mask = cv2.inRange(gest_seg_np,  apex_color-0.5, apex_color+0.5)
-    if apex_mask.sum() > 0:
-        output = cv2.connectedComponentsWithStats(apex_mask, 8, cv2.CV_32S)
-        (numLabels, labels, stats, centroids) = output
-        stats, centroids = stats[1:], centroids[1:]
-        for i in range(numLabels-1):
-            vert = {"xy": centroids[i], "type": "apex"}
-            vertices.append(vert)
-    eave_end_color = np.array(gestalt_color_mapping['eave_end_point'])
-    eave_end_mask = cv2.inRange(gest_seg_np,  eave_end_color-0.5, eave_end_color+0.5)
-    if eave_end_mask.sum() > 0:
-        output = cv2.connectedComponentsWithStats(eave_end_mask, 8, cv2.CV_32S)
-        (numLabels, labels, stats, centroids) = output
-        stats, centroids = stats[1:], centroids[1:]
-        for i in range(numLabels-1):
-            vert = {"xy": centroids[i], "type": "eave_end_point"}
-            vertices.append(vert)
-    # Connectivity
-    apex_pts = []
-    apex_pts_idxs = []
-    for j, v in enumerate(vertices):
-        apex_pts.append(v['xy'])
-        apex_pts_idxs.append(j)
-    apex_pts = np.array(apex_pts)
-    # Ridge connects two apex points
-    for edge_class in ['eave', 'ridge', 'rake', 'valley']:
-        edge_color = np.array(gestalt_color_mapping[edge_class])
-        mask = cv2.morphologyEx(cv2.inRange(gest_seg_np,
-                                            edge_color-0.5,
-                                            edge_color+0.5),
-                                cv2.MORPH_DILATE, np.ones((11, 11)))
-        line_img = np.copy(gest_seg_np) * 0
-        if mask.sum() > 0:
-            output = cv2.connectedComponentsWithStats(mask, 8, cv2.CV_32S)
-            (numLabels, labels, stats, centroids) = output
-            stats, centroids = stats[1:], centroids[1:]
-            edges = []
-            for i in range(1, numLabels):
-                y,x = np.where(labels == i)
-                xleft_idx = np.argmin(x)
-                x_left = x[xleft_idx]
-                y_left = y[xleft_idx]
-                xright_idx = np.argmax(x)
-                x_right = x[xright_idx]
-                y_right = y[xright_idx]
-                edges.append((x_left, y_left, x_right, y_right))
-                cv2.line(line_img, (x_left, y_left), (x_right, y_right), (255, 255, 255), 2)
-            edges = np.array(edges)
-            if (len(apex_pts) < 2) or len(edges) <1:
-                continue
-            pts_to_edges_dist = np.minimum(cdist(apex_pts, edges[:,:2]), cdist(apex_pts, edges[:,2:]))
-            connectivity_mask = pts_to_edges_dist <= edge_th
-            edge_connects = connectivity_mask.sum(axis=0)
-            for edge_idx, edgesum in enumerate(edge_connects):
-                if edgesum>=2:
-                    connected_verts = np.where(connectivity_mask[:,edge_idx])[0]
-                    for a_i, a in enumerate(connected_verts):
-                        for b in connected_verts[a_i+1:]:
-                            connections.append((a, b))
-    return vertices, connections
-def get_uv_depth(vertices, depth):
-    '''Get the depth of the vertices from the depth image'''
-    uv = []
-    for v in vertices:
-        uv.append(v['xy'])
-    uv = np.array(uv)
-    uv_int = uv.astype(np.int32)
-    H, W = depth.shape[:2]
-    uv_int[:, 0] = np.clip( uv_int[:, 0], 0, W-1)
-    uv_int[:, 1] = np.clip( uv_int[:, 1], 0, H-1)
-    vertex_depth = depth[(uv_int[:, 1] , uv_int[:, 0])]
-    return uv, vertex_depth
-def merge_vertices_3d(vert_edge_per_image, th=0.1):
-    '''Merge vertices that are close to each other in 3D space and are of same types'''
-    all_3d_vertices = []
-    connections_3d = []
-    all_indexes = []
-    cur_start = 0
-    types = []
-    for cimg_idx, (vertices, connections, vertices_3d) in vert_edge_per_image.items():
-        types += [int(v['type']=='apex') for v in vertices]
-        all_3d_vertices.append(vertices_3d)
-        connections_3d+=[(x+cur_start,y+cur_start) for (x,y) in connections]
-        cur_start+=len(vertices_3d)
-    all_3d_vertices = np.concatenate(all_3d_vertices, axis=0)
-    #print (connections_3d)
-    distmat = cdist(all_3d_vertices, all_3d_vertices)
-    types = np.array(types).reshape(-1,1)
-    same_types = cdist(types, types)
-    mask_to_merge = (distmat <= th) & (same_types==0)
-    new_vertices = []
-    new_connections = []
-    to_merge = sorted(list(set([tuple(a.nonzero()[0].tolist()) for a in mask_to_merge])))
-    to_merge_final = defaultdict(list)
-    for i in range(len(all_3d_vertices)):
-        for j in to_merge:
-            if i in j:
-                to_merge_final[i]+=j
-    for k, v in to_merge_final.items():
-        to_merge_final[k] = list(set(v))
-    already_there = set()
-    merged = []
-    for k, v in to_merge_final.items():
-        if k in already_there:
-            continue
-        merged.append(v)
-        for vv in v:
-            already_there.add(vv)
-    old_idx_to_new = {}
-    count=0
-    for idxs in merged:
-        new_vertices.append(all_3d_vertices[idxs].mean(axis=0))
-        for idx in idxs:
-            old_idx_to_new[idx] = count
-        count +=1
-    #print (connections_3d)
-    new_vertices=np.array(new_vertices)
-    #print (connections_3d)
-    for conn in connections_3d:
-        new_con = sorted((old_idx_to_new[conn[0]], old_idx_to_new[conn[1]]))
-        if new_con[0] == new_con[1]:
-            continue
-        if new_con not in new_connections:
-            new_connections.append(new_con)
-    #print (f'{len(new_vertices)} left after merging {len(all_3d_vertices)} with {th=}')
-    return new_vertices, new_connections
-def prune_not_connected(all_3d_vertices, connections_3d):
-    '''Prune vertices that are not connected to any other vertex'''
-    connected = defaultdict(list)
-    for c in connections_3d:
-        connected[c[0]].append(c)
-        connected[c[1]].append(c)
-    new_indexes = {}
-    new_verts = []
-    connected_out = []
-    for k,v in connected.items():
-        vert = all_3d_vertices[k]
-        if tuple(vert) not in new_verts:
-            new_verts.append(tuple(vert))
-            new_indexes[k]=len(new_verts) -1
-    for k,v in connected.items():
-        for vv in v:
-            connected_out.append((new_indexes[vv[0]],new_indexes[vv[1]]))
-    connected_out=list(set(connected_out))
-    return np.array(new_verts), connected_out
-def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
-    good_entry = convert_entry_to_human_readable(entry)
-    vert_edge_per_image = {}
-    for i, (gest, depth, K, R, t) in enumerate(zip(good_entry['gestalt'],
-                                                good_entry['depthcm'],
-                                                good_entry['K'],
-                                                good_entry['R'],
-                                                good_entry['t']
-                                                )):
-        gest_seg = gest.resize(depth.size)
-        gest_seg_np = np.array(gest_seg).astype(np.uint8)
-        # Metric3D
-        depth_np = np.array(depth) / 2.5 # 2.5 is the scale estimation coefficient
-        vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 20.)
-        if (len(vertices) < 2) or (len(connections) < 1):
-            print (f'Not enough vertices or connections in image {i}')
-            vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
-            continue
-        uv, depth_vert = get_uv_depth(vertices, depth_np)
-        # Normalize the uv to the camera intrinsics
-        xy_local = np.ones((len(uv), 3))
-        xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
-        xy_local[:, 1] = (uv[:, 1] - K[1,2]) / K[1,1]
-        # Get the 3D vertices
-        vertices_3d_local = depth_vert[...,None] * (xy_local/np.linalg.norm(xy_local, axis=1)[...,None])
-        world_to_cam = np.eye(4)
-        world_to_cam[:3, :3] = R
-        world_to_cam[:3, 3] = t.reshape(-1)
-        cam_to_world =  np.linalg.inv(world_to_cam)
-        vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
-        vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
-        vert_edge_per_image[i] = vertices, connections, vertices_3d
-    all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 3.0)
-    all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d)
-    if (len(all_3d_vertices_clean) < 2) or len(connections_3d_clean) < 1:
-        print (f'Not enough vertices or connections in the 3D vertices')
-        return (good_entry['__key__'], *empty_solution())
-    if visualize:
-        from hoho.viz3d import plot_estimate_and_gt
-        plot_estimate_and_gt(   all_3d_vertices_clean,
-                                connections_3d_clean,
-                                good_entry['wf_vertices'],
-                                good_entry['wf_edges'])
-    return good_entry['__key__'], all_3d_vertices_clean, connections_3d_clean

helpers.py CHANGED Viewed

@@ -3,6 +3,27 @@ from PIL import Image as PImage
 import io
 from scipy.spatial.distance import cdist
 from scipy.optimize import linear_sum_assignment
 def to_K(f, cx, cy):

 import io
 from scipy.spatial.distance import cdist
 from scipy.optimize import linear_sum_assignment
+from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
+def convert_entry_to_human_readable(entry):
+    out = {}
+    already_good = ['__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces', 'face_semantics', 'K', 'R', 't']
+    for k, v in entry.items():
+        if k in already_good:
+            out[k] = v
+            continue
+        if k == 'points3d':
+            out[k] = read_points3D_binary(fid=io.BytesIO(v))
+        if k == 'cameras':
+            out[k] = read_cameras_binary(fid=io.BytesIO(v))
+        if k == 'images':
+            out[k] = read_images_binary(fid=io.BytesIO(v))
+        if k in ['ade20k', 'gestalt']:
+            out[k] =  [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
+        if k == 'depthcm':
+            out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
+    return out
 def to_K(f, cx, cy):

testing.ipynb → main.ipynb RENAMED Viewed

The diff for this file is too large to render. See raw diff

my_solution.py CHANGED Viewed

@@ -9,43 +9,17 @@ from scipy.spatial.distance import cdist
 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
 from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
-from geom_solver import GeomSolver, my_empty_solution, cheat_the_metric_solution
-def convert_entry_to_human_readable(entry):
-    out = {}
-    already_good = ['__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces', 'face_semantics', 'K', 'R', 't']
-    for k, v in entry.items():
-        if k in already_good:
-            out[k] = v
-            continue
-        if k == 'points3d':
-            out[k] = read_points3D_binary(fid=io.BytesIO(v))
-        if k == 'cameras':
-            out[k] = read_cameras_binary(fid=io.BytesIO(v))
-        if k == 'images':
-            out[k] = read_images_binary(fid=io.BytesIO(v))
-        if k in ['ade20k', 'gestalt']:
-            out[k] =  [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
-        if k == 'depthcm':
-            out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
-    return out
 def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
-    # return (entry['__key__'], *my_empty_solution())
     vertices0, edges0 = my_empty_solution()
     try:
         vertices, edges = GeomSolver().solve(entry)
     except:
         print('ERROR')
-        # vertices, edges = vertices0, edges0
-        vertices, edges = cheat_the_metric_solution()
-    # if vertices.shape[0] < vertices0.shape[0]:
-    #     verts_new = vertices0
-    #     verts_new[:vertices.shape[0]] = vertices
-    #     vertices = verts_new
     if (len(edges) < 1) and (len(vertices) >= 2):
         # print("Added only edges")

 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
 from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
+from geom_solver import GeomSolver, my_empty_solution, fully_connected_solution
 def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
     vertices0, edges0 = my_empty_solution()
     try:
         vertices, edges = GeomSolver().solve(entry)
     except:
         print('ERROR')
+        vertices, edges = fully_connected_solution()
     if (len(edges) < 1) and (len(vertices) >= 2):
         # print("Added only edges")

pointnet.py DELETED Viewed

@@ -1,213 +0,0 @@
-from __future__ import print_function
-import torch
-import torch.nn as nn
-import torch.nn.parallel
-import torch.utils.data
-from torch.autograd import Variable
-import numpy as np
-import torch.nn.functional as F
-class STN3d(nn.Module):
-    def __init__(self):
-        super(STN3d, self).__init__()
-        self.conv1 = torch.nn.Conv1d(3, 64, 1)
-        self.conv2 = torch.nn.Conv1d(64, 128, 1)
-        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
-        self.fc1 = nn.Linear(1024, 512)
-        self.fc2 = nn.Linear(512, 256)
-        self.fc3 = nn.Linear(256, 9)
-        self.relu = nn.ReLU()
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(1024)
-        self.bn4 = nn.BatchNorm1d(512)
-        self.bn5 = nn.BatchNorm1d(256)
-    def forward(self, x):
-        batchsize = x.size()[0]
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = F.relu(self.bn2(self.conv2(x)))
-        x = F.relu(self.bn3(self.conv3(x)))
-        x = torch.max(x, 2, keepdim=True)[0]
-        x = x.view(-1, 1024)
-        x = F.relu(self.bn4(self.fc1(x)))
-        x = F.relu(self.bn5(self.fc2(x)))
-        x = self.fc3(x)
-        iden = Variable(torch.from_numpy(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).view(1,9).repeat(batchsize,1)
-        if x.is_cuda:
-            iden = iden.cuda()
-        x = x + iden
-        x = x.view(-1, 3, 3)
-        return x
-class STNkd(nn.Module):
-    def __init__(self, k=64):
-        super(STNkd, self).__init__()
-        self.conv1 = torch.nn.Conv1d(k, 64, 1)
-        self.conv2 = torch.nn.Conv1d(64, 128, 1)
-        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
-        self.fc1 = nn.Linear(1024, 512)
-        self.fc2 = nn.Linear(512, 256)
-        self.fc3 = nn.Linear(256, k*k)
-        self.relu = nn.ReLU()
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(1024)
-        self.bn4 = nn.BatchNorm1d(512)
-        self.bn5 = nn.BatchNorm1d(256)
-        self.k = k
-    def forward(self, x):
-        batchsize = x.size()[0]
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = F.relu(self.bn2(self.conv2(x)))
-        x = F.relu(self.bn3(self.conv3(x)))
-        x = torch.max(x, 2, keepdim=True)[0]
-        x = x.view(-1, 1024)
-        x = F.relu(self.bn4(self.fc1(x)))
-        x = F.relu(self.bn5(self.fc2(x)))
-        x = self.fc3(x)
-        iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1,self.k*self.k).repeat(batchsize,1)
-        if x.is_cuda:
-            iden = iden.cuda()
-        x = x + iden
-        x = x.view(-1, self.k, self.k)
-        return x
-class PointNetfeat(nn.Module):
-    def __init__(self, global_feat = True, feature_transform = False):
-        super(PointNetfeat, self).__init__()
-        self.stn = STN3d()
-        self.conv1 = torch.nn.Conv1d(3, 64, 1)
-        self.conv2 = torch.nn.Conv1d(64, 128, 1)
-        self.conv3 = torch.nn.Conv1d(128, 1024, 1)
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(1024)
-        self.global_feat = global_feat
-        self.feature_transform = feature_transform
-        if self.feature_transform:
-            self.fstn = STNkd(k=64)
-    def forward(self, x):
-        n_pts = x.size()[2]
-        trans = self.stn(x)
-        x = x.transpose(2, 1)
-        x = torch.bmm(x, trans)
-        x = x.transpose(2, 1)
-        x = F.relu(self.bn1(self.conv1(x)))
-        if self.feature_transform:
-            trans_feat = self.fstn(x)
-            x = x.transpose(2,1)
-            x = torch.bmm(x, trans_feat)
-            x = x.transpose(2,1)
-        else:
-            trans_feat = None
-        pointfeat = x
-        x = F.relu(self.bn2(self.conv2(x)))
-        x = self.bn3(self.conv3(x))
-        x = torch.max(x, 2, keepdim=True)[0]
-        x = x.view(-1, 1024)
-        if self.global_feat:
-            return x, trans, trans_feat
-        else:
-            x = x.view(-1, 1024, 1).repeat(1, 1, n_pts)
-            return torch.cat([x, pointfeat], 1), trans, trans_feat
-class PointNetCls(nn.Module):
-    def __init__(self, k=2, feature_transform=False):
-        super(PointNetCls, self).__init__()
-        self.feature_transform = feature_transform
-        self.feat = PointNetfeat(global_feat=True, feature_transform=feature_transform)
-        self.fc1 = nn.Linear(1024, 512)
-        self.fc2 = nn.Linear(512, 256)
-        self.fc3 = nn.Linear(256, k)
-        self.dropout = nn.Dropout(p=0.3)
-        self.bn1 = nn.BatchNorm1d(512)
-        self.bn2 = nn.BatchNorm1d(256)
-        self.relu = nn.ReLU()
-    def forward(self, x):
-        x, trans, trans_feat = self.feat(x)
-        x = F.relu(self.bn1(self.fc1(x)))
-        x = F.relu(self.bn2(self.dropout(self.fc2(x))))
-        x = self.fc3(x)
-        return F.log_softmax(x, dim=1), trans, trans_feat
-class PointNetDenseCls(nn.Module):
-    def __init__(self, k = 2, feature_transform=False):
-        super(PointNetDenseCls, self).__init__()
-        self.k = k
-        self.feature_transform=feature_transform
-        self.feat = PointNetfeat(global_feat=False, feature_transform=feature_transform)
-        self.conv1 = torch.nn.Conv1d(1088, 512, 1)
-        self.conv2 = torch.nn.Conv1d(512, 256, 1)
-        self.conv3 = torch.nn.Conv1d(256, 128, 1)
-        self.conv4 = torch.nn.Conv1d(128, self.k, 1)
-        self.bn1 = nn.BatchNorm1d(512)
-        self.bn2 = nn.BatchNorm1d(256)
-        self.bn3 = nn.BatchNorm1d(128)
-    def forward(self, x):
-        batchsize = x.size()[0]
-        n_pts = x.size()[2]
-        x, trans, trans_feat = self.feat(x)
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = F.relu(self.bn2(self.conv2(x)))
-        x = F.relu(self.bn3(self.conv3(x)))
-        x = self.conv4(x)
-        x = x.transpose(2,1).contiguous()
-        x = F.log_softmax(x.view(-1,self.k), dim=-1)
-        x = x.view(batchsize, n_pts, self.k)
-        return x, trans, trans_feat
-def feature_transform_regularizer(trans):
-    d = trans.size()[1]
-    batchsize = trans.size()[0]
-    I = torch.eye(d)[None, :, :]
-    if trans.is_cuda:
-        I = I.cuda()
-    loss = torch.mean(torch.norm(torch.bmm(trans, trans.transpose(2,1)) - I, dim=(1,2)))
-    return loss
-if __name__ == '__main__':
-    sim_data = Variable(torch.rand(32,3,2500))
-    trans = STN3d()
-    out = trans(sim_data)
-    print('stn', out.size())
-    print('loss', feature_transform_regularizer(out))
-    sim_data_64d = Variable(torch.rand(32, 64, 2500))
-    trans = STNkd(k=64)
-    out = trans(sim_data_64d)
-    print('stn64d', out.size())
-    print('loss', feature_transform_regularizer(out))
-    pointfeat = PointNetfeat(global_feat=True)
-    out, _, _ = pointfeat(sim_data)
-    print('global feat', out.size())
-    pointfeat = PointNetfeat(global_feat=False)
-    out, _, _ = pointfeat(sim_data)
-    print('point feat', out.size())
-    cls = PointNetCls(k = 5)
-    out, _, _ = cls(sim_data)
-    print('class', out.size())
-    seg = PointNetDenseCls(k = 3)
-    out, _, _ = seg(sim_data)
-    print('seg', out.size())

script_cpus.py DELETED Viewed

@@ -1,145 +0,0 @@
-### This is example of the script that will be run in the test environment.
-### Some parts of the code are compulsory and you should NOT CHANGE THEM.
-### They are between '''---compulsory---''' comments.
-### You can change the rest of the code to define and test your solution.
-### However, you should not change the signature of the provided function.
-### The script would save "submission.parquet" file in the current directory.
-### The actual logic of the solution is implemented in the `handcrafted_solution.py` file.
-### The `handcrafted_solution.py` file is a placeholder for your solution.
-### You should implement the logic of your solution in that file.
-### You can use any additional files and subdirectories to organize your code.
-'''---compulsory---'''
-# import subprocess
-# from pathlib import Path
-# def install_package_from_local_file(package_name, folder='packages'):
-#     """
-#     Installs a package from a local .whl file or a directory containing .whl files using pip.
-#     Parameters:
-#     path_to_file_or_directory (str): The path to the .whl file or the directory containing .whl files.
-#     """
-#     try:
-#         pth = str(Path(folder) / package_name)
-#         subprocess.check_call([subprocess.sys.executable, "-m", "pip", "install",
-#                                "--no-index",  # Do not use package index
-#                                "--find-links", pth,  # Look for packages in the specified directory or at the file
-#                                package_name])  # Specify the package to install
-#         print(f"Package installed successfully from {pth}")
-#     except subprocess.CalledProcessError as e:
-#         print(f"Failed to install package from {pth}. Error: {e}")
-# install_package_from_local_file('hoho')
-import hoho; hoho.setup() # YOU MUST CALL hoho.setup() BEFORE ANYTHING ELSE
-# import subprocess
-# import importlib
-# from pathlib import Path
-# import subprocess
-# ### The function below is useful for installing additional python wheels.
-# def install_package_from_local_file(package_name, folder='packages'):
-#     """
-#     Installs a package from a local .whl file or a directory containing .whl files using pip.
-#     Parameters:
-#     path_to_file_or_directory (str): The path to the .whl file or the directory containing .whl files.
-#     """
-#     try:
-#         pth = str(Path(folder) / package_name)
-#         subprocess.check_call([subprocess.sys.executable, "-m", "pip", "install",
-#                                "--no-index",  # Do not use package index
-#                                "--find-links", pth,  # Look for packages in the specified directory or at the file
-#                                package_name])  # Specify the package to install
-#         print(f"Package installed successfully from {pth}")
-#     except subprocess.CalledProcessError as e:
-#         print(f"Failed to install package from {pth}. Error: {e}")
-# pip download webdataset -d packages/webdataset --platform manylinux1_x86_64 --python-version 38 --only-binary=:all:
-# install_package_from_local_file('webdataset')
-# install_package_from_local_file('tqdm')
-### Here you can import any library or module you want.
-### The code below is used to read and parse the input dataset.
-### Please, do not modify it.
-import webdataset as wds
-from tqdm import tqdm
-from typing import Dict
-import pandas as pd
-from transformers import AutoTokenizer
-import os
-import time
-import io
-from PIL import Image as PImage
-import numpy as np
-from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
-from hoho import proc, Sample
-def convert_entry_to_human_readable(entry):
-    out = {}
-    already_good = ['__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces', 'face_semantics', 'K', 'R', 't']
-    for k, v in entry.items():
-        if k in already_good:
-            out[k] = v
-            continue
-        if k == 'points3d':
-            out[k] = read_points3D_binary(fid=io.BytesIO(v))
-        if k == 'cameras':
-            out[k] = read_cameras_binary(fid=io.BytesIO(v))
-        if k == 'images':
-            out[k] = read_images_binary(fid=io.BytesIO(v))
-        if k in ['ade20k', 'gestalt']:
-            out[k] =  [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
-        if k == 'depthcm':
-            out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
-    return out
-'''---end of compulsory---'''
-### The part below is used to define and test your solution.
-from pathlib import Path
-def save_submission(submission, path):
-    """
-    Saves the submission to a specified path.
-    Parameters:
-    submission (List[Dict[]]): The submission to save.
-    path (str): The path to save the submission to.
-    """
-    sub = pd.DataFrame(submission, columns=["__key__", "wf_vertices", "wf_edges"])
-    sub.to_parquet(path)
-    print(f"Submission saved to {path}")
-if __name__ == "__main__":
-    from my_solution import predict
-    print ("------------ Loading dataset------------ ")
-    params = hoho.get_params()
-    dataset = hoho.get_dataset(decode=None, split='all', dataset_type='webdataset')
-    print('------------ Now you can do your solution ---------------')
-    solution = []
-    from concurrent.futures import ProcessPoolExecutor
-    with ProcessPoolExecutor(max_workers=8) as pool:
-        results = []
-        for i, sample in enumerate(tqdm(dataset)):
-            results.append(pool.submit(predict, sample, visualize=False))
-        for i, result in enumerate(tqdm(results)):
-            key, pred_vertices, pred_edges = result.result()
-            solution.append({
-                            '__key__': key,
-                            'wf_vertices': pred_vertices.tolist(),
-                            'wf_edges': pred_edges
-                        })
-            if i % 100 == 0:
-                # incrementally save the results in case we run out of time
-                print(f"Processed {i} samples")
-                # save_submission(solution, Path(params['output_path']) / "submission.parquet")
-    print('------------ Saving results ---------------')
-    save_submission(solution, Path(params['output_path']) / "submission.parquet")
-    print("------------ Done ------------ ")

train_pointnet.py DELETED Viewed

@@ -1,148 +0,0 @@
-from __future__ import print_function
-import argparse
-import os
-import random
-import torch
-import torch.nn.parallel
-import torch.optim as optim
-import torch.utils.data
-from pointnet.dataset import ShapeNetDataset, ModelNetDataset
-from pointnet import PointNetCls, feature_transform_regularizer
-import torch.nn.functional as F
-from tqdm import tqdm
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    '--batchSize', type=int, default=32, help='input batch size')
-parser.add_argument(
-    '--num_points', type=int, default=2500, help='input batch size')
-parser.add_argument(
-    '--workers', type=int, help='number of data loading workers', default=4)
-parser.add_argument(
-    '--nepoch', type=int, default=250, help='number of epochs to train for')
-parser.add_argument('--outf', type=str, default='cls', help='output folder')
-parser.add_argument('--model', type=str, default='', help='model path')
-parser.add_argument('--dataset', type=str, required=True, help="dataset path")
-parser.add_argument('--dataset_type', type=str, default='shapenet', help="dataset type shapenet|modelnet40")
-parser.add_argument('--feature_transform', action='store_true', help="use feature transform")
-opt = parser.parse_args()
-print(opt)
-blue = lambda x: '\033[94m' + x + '\033[0m'
-opt.manualSeed = random.randint(1, 10000)  # fix seed
-print("Random Seed: ", opt.manualSeed)
-random.seed(opt.manualSeed)
-torch.manual_seed(opt.manualSeed)
-if opt.dataset_type == 'shapenet':
-    dataset = ShapeNetDataset(
-        root=opt.dataset,
-        classification=True,
-        npoints=opt.num_points)
-    test_dataset = ShapeNetDataset(
-        root=opt.dataset,
-        classification=True,
-        split='test',
-        npoints=opt.num_points,
-        data_augmentation=False)
-elif opt.dataset_type == 'modelnet40':
-    dataset = ModelNetDataset(
-        root=opt.dataset,
-        npoints=opt.num_points,
-        split='trainval')
-    test_dataset = ModelNetDataset(
-        root=opt.dataset,
-        split='test',
-        npoints=opt.num_points,
-        data_augmentation=False)
-else:
-    exit('wrong dataset type')
-dataloader = torch.utils.data.DataLoader(
-    dataset,
-    batch_size=opt.batchSize,
-    shuffle=True,
-    num_workers=int(opt.workers))
-testdataloader = torch.utils.data.DataLoader(
-        test_dataset,
-        batch_size=opt.batchSize,
-        shuffle=True,
-        num_workers=int(opt.workers))
-print(len(dataset), len(test_dataset))
-num_classes = len(dataset.classes)
-print('classes', num_classes)
-try:
-    os.makedirs(opt.outf)
-except OSError:
-    pass
-classifier = PointNetCls(k=num_classes, feature_transform=opt.feature_transform)
-if opt.model != '':
-    classifier.load_state_dict(torch.load(opt.model))
-optimizer = optim.Adam(classifier.parameters(), lr=0.001, betas=(0.9, 0.999))
-scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
-classifier.cuda()
-num_batch = len(dataset) / opt.batchSize
-for epoch in range(opt.nepoch):
-    scheduler.step()
-    for i, data in enumerate(dataloader, 0):
-        points, target = data
-        target = target[:, 0]
-        points = points.transpose(2, 1)
-        points, target = points.cuda(), target.cuda()
-        optimizer.zero_grad()
-        classifier = classifier.train()
-        pred, trans, trans_feat = classifier(points)
-        loss = F.nll_loss(pred, target)
-        if opt.feature_transform:
-            loss += feature_transform_regularizer(trans_feat) * 0.001
-        loss.backward()
-        optimizer.step()
-        pred_choice = pred.data.max(1)[1]
-        correct = pred_choice.eq(target.data).cpu().sum()
-        print('[%d: %d/%d] train loss: %f accuracy: %f' % (epoch, i, num_batch, loss.item(), correct.item() / float(opt.batchSize)))
-        if i % 10 == 0:
-            j, data = next(enumerate(testdataloader, 0))
-            points, target = data
-            target = target[:, 0]
-            points = points.transpose(2, 1)
-            points, target = points.cuda(), target.cuda()
-            classifier = classifier.eval()
-            pred, _, _ = classifier(points)
-            loss = F.nll_loss(pred, target)
-            pred_choice = pred.data.max(1)[1]
-            correct = pred_choice.eq(target.data).cpu().sum()
-            print('[%d: %d/%d] %s loss: %f accuracy: %f' % (epoch, i, num_batch, blue('test'), loss.item(), correct.item()/float(opt.batchSize)))
-    torch.save(classifier.state_dict(), '%s/cls_model_%d.pth' % (opt.outf, epoch))
-total_correct = 0
-total_testset = 0
-for i,data in tqdm(enumerate(testdataloader, 0)):
-    points, target = data
-    target = target[:, 0]
-    points = points.transpose(2, 1)
-    points, target = points.cuda(), target.cuda()
-    classifier = classifier.eval()
-    pred, _, _ = classifier(points)
-    pred_choice = pred.data.max(1)[1]
-    correct = pred_choice.eq(target.data).cpu().sum()
-    total_correct += correct.item()
-    total_testset += points.size()[0]
-print("final accuracy {}".format(total_correct / float(total_testset)))