LucidDreamer / scene /dataset_readers.py
ironjr's picture
untroubled files first
24f9881
raw
history blame
17.3 kB
#
# Copyright (C) 2023, Inria
# GRAPHDECO research group, https://team.inria.fr/graphdeco
# All rights reserved.
#
# This software is free for non-commercial, research and evaluation use
# under the terms of the LICENSE.md file.
#
# For inquiries contact [email protected]
#
import os
import sys
import json
from typing import NamedTuple
from pathlib import Path
import imageio
import torch
import numpy as np
from PIL import Image
from plyfile import PlyData, PlyElement
from scene.gaussian_model import BasicPointCloud
from scene.cameras import MiniCam, Camera
from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \
read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text
from utils.graphics import getWorld2View2, focal2fov, fov2focal
from utils.graphics import getProjectionMatrix
from utils.trajectory import get_camerapaths
from utils.sh import SH2RGB
class CameraInfo(NamedTuple):
uid: int
R: np.array
T: np.array
FovY: np.array
FovX: np.array
image: np.array
image_path: str
image_name: str
width: int
height: int
class SceneInfo(NamedTuple):
point_cloud: BasicPointCloud
train_cameras: list
test_cameras: list
preset_cameras: list
nerf_normalization: dict
ply_path: str
def getNerfppNorm(cam_info):
def get_center_and_diag(cam_centers):
cam_centers = np.hstack(cam_centers)
avg_cam_center = np.mean(cam_centers, axis=1, keepdims=True)
center = avg_cam_center
dist = np.linalg.norm(cam_centers - center, axis=0, keepdims=True)
diagonal = np.max(dist)
return center.flatten(), diagonal
cam_centers = []
for cam in cam_info:
W2C = getWorld2View2(cam.R, cam.T)
C2W = np.linalg.inv(W2C)
cam_centers.append(C2W[:3, 3:4])
center, diagonal = get_center_and_diag(cam_centers)
radius = diagonal * 1.1
translate = -center
return {"translate": translate, "radius": radius}
def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder):
cam_infos = []
for idx, key in enumerate(cam_extrinsics):
sys.stdout.write('\r')
# the exact output you're looking for:
sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics)))
sys.stdout.flush()
extr = cam_extrinsics[key]
intr = cam_intrinsics[extr.camera_id]
height = intr.height
width = intr.width
uid = intr.id
R = np.transpose(qvec2rotmat(extr.qvec))
T = np.array(extr.tvec)
if intr.model=="SIMPLE_PINHOLE":
focal_length_x = intr.params[0]
FovY = focal2fov(focal_length_x, height)
FovX = focal2fov(focal_length_x, width)
elif intr.model=="PINHOLE":
focal_length_x = intr.params[0]
focal_length_y = intr.params[1]
FovY = focal2fov(focal_length_y, height)
FovX = focal2fov(focal_length_x, width)
else:
assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
image_path = os.path.join(images_folder, os.path.basename(extr.name))
image_name = os.path.basename(image_path).split(".")[0]
image = Image.open(image_path)
cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=width, height=height)
cam_infos.append(cam_info)
sys.stdout.write('\n')
return cam_infos
def fetchPly(path):
plydata = PlyData.read(path)
vertices = plydata['vertex']
idx = np.random.choice(len(vertices['x']),size=(min(len(vertices['x']), 100_000),),replace=False)
positions = np.vstack([vertices['x'][idx], vertices['y'][idx], vertices['z'][idx]]).T if 'x' in vertices else None
colors = np.vstack([vertices['red'][idx], vertices['green'][idx], vertices['blue'][idx]]).T / 255.0 if 'red' in vertices else None
normals = np.vstack([vertices['nx'][idx], vertices['ny'][idx], vertices['nz'][idx]]).T if 'nx' in vertices else None
return BasicPointCloud(points=positions, colors=colors, normals=normals)
def storePly(path, xyz, rgb):
# Define the dtype for the structured array
dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'),
('red', 'u1'), ('green', 'u1'), ('blue', 'u1')]
normals = np.zeros_like(xyz)
elements = np.empty(xyz.shape[0], dtype=dtype)
attributes = np.concatenate((xyz, normals, rgb), axis=1)
elements[:] = list(map(tuple, attributes))
# Create the PlyData object and write to file
vertex_element = PlyElement.describe(elements, 'vertex')
ply_data = PlyData([vertex_element])
ply_data.write(path)
def readColmapSceneInfo(path, images, eval, preset=None, llffhold=8):
try:
cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin")
cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin")
cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file)
cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file)
except:
cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.txt")
cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.txt")
cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file)
cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file)
reading_dir = "images" if images == None else images
cam_infos_unsorted = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir))
cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : x.image_name)
if eval:
# train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold != 0]
# test_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % llffhold == 0]
train_cam_infos = [c for idx, c in enumerate(cam_infos) if idx % 5 == 2 or idx % 5 == 0]
test_cam_infos = [c for idx, c in enumerate(cam_infos) if not (idx % 5 == 2 or idx % 5 == 0)]
else:
train_cam_infos = cam_infos
test_cam_infos = []
nerf_normalization = getNerfppNorm(train_cam_infos)
ply_path = os.path.join(path, "sparse/0/points3D.ply")
bin_path = os.path.join(path, "sparse/0/points3D.bin")
txt_path = os.path.join(path, "sparse/0/points3D.txt")
if not os.path.exists(ply_path):
print("Converting point3d.bin to .ply, will happen only the first time you open the scene.")
try:
xyz, rgb, _ = read_points3D_binary(bin_path)
except:
xyz, rgb, _ = read_points3D_text(txt_path)
storePly(ply_path, xyz, rgb)
try:
pcd = fetchPly(ply_path)
except:
pcd = None
if preset:
preset_cam_infos = readCamerasFromPreset('/home/chung/workspace/gaussian-splatting/poses_supplementary', f"{preset}.json")
else:
preset_cam_infos = None
scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_cam_infos,
test_cameras=test_cam_infos,
preset_cameras=preset_cam_infos,
nerf_normalization=nerf_normalization,
ply_path=ply_path)
return scene_info
def readCamerasFromTransforms(path, transformsfile, white_background, extension=".png"):
cam_infos = []
with open(os.path.join(path, transformsfile)) as json_file:
contents = json.load(json_file)
fovx = contents["camera_angle_x"]
frames = contents["frames"]
for idx, frame in enumerate(frames):
cam_name = os.path.join(path, frame["file_path"] + extension)
# NeRF 'transform_matrix' is a camera-to-world transform
c2w = np.array(frame["transform_matrix"])
# change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
c2w[:3, 1:3] *= -1
# get the world-to-camera transform and set R, T
w2c = np.linalg.inv(c2w)
R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code
T = w2c[:3, 3]
image_path = os.path.join(path, cam_name)
image_name = Path(cam_name).stem
image = Image.open(image_path)
# if os.path.exists(os.path.join(path, frame["file_path"].replace("/train/", "/depths_train/")+'.npy')):
# depth = np.load(os.path.join(path, frame["file_path"].replace("/train/", "/depths_train/")+'.npy'))
# if os.path.exists(os.path.join(path, frame["file_path"].replace("/train/", "/masks_train/")+'.png')):
# mask = imageio.v3.imread(os.path.join(path, frame["file_path"].replace("/train/", "/masks_train/")+'.png'))[:,:,0]/255.
# else:
# mask = np.ones_like(depth)
# final_depth = depth*mask
# else:
# final_depth = None
im_data = np.array(image.convert("RGBA"))
bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0])
norm_data = im_data / 255.0
arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4])
image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
fovy = focal2fov(fov2focal(fovx, image.size[1]), image.size[0])
FovY = fovy
FovX = fovx
cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1]))
return cam_infos
def readCamerasFromPreset(path, transformsfile):
cam_infos = []
with open(os.path.join(path, transformsfile)) as json_file:
contents = json.load(json_file)
FOV = contents["camera_angle_x"]*1.2
frames = contents["frames"]
for idx, frame in enumerate(frames):
# NeRF 'transform_matrix' is a camera-to-world transform
c2w = np.array(frame["transform_matrix"])
# change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
c2w[:3, 1:3] *= -1
# get the world-to-camera transform and set R, T
w2c = np.linalg.inv(np.concatenate((c2w, np.array([0,0,0,1]).reshape(1,4)), axis=0))
R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code
T = w2c[:3, 3]
# R = c2w[:3,:3]
# T = - np.transpose(R).dot(c2w[:3,3])
image = Image.fromarray(np.zeros((512,512)), "RGB")
FovY = focal2fov(fov2focal(FOV, 512), image.size[0])
FovX = focal2fov(fov2focal(FOV, 512), image.size[1])
# FovX, FovY = contents["camera_angle_x"], contents["camera_angle_x"]
cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
image_path='None', image_name='None', width=image.size[1], height=image.size[0]))
return cam_infos
def readNerfSyntheticInfo(path, white_background, eval, preset=None, extension=".png"):
print("Reading Training Transforms")
train_cam_infos = readCamerasFromTransforms(path, "transforms_train.json", white_background, extension)
print("Reading Test Transforms")
test_cam_infos = readCamerasFromTransforms(path, "transforms_test.json", white_background, extension)
if preset:
preset_cam_infos = readCamerasFromPreset('/home/chung/workspace/gaussian-splatting/poses_supplementary', f"{preset}.json")
else:
preset_cam_infos = None
if not eval:
train_cam_infos.extend(test_cam_infos)
test_cam_infos = []
nerf_normalization = getNerfppNorm(train_cam_infos)
ply_path = os.path.join(path, "points3d.ply")
if not os.path.exists(ply_path):
# Since this data set has no colmap data, we start with random points
num_pts = 100_000
print(f"Generating random point cloud ({num_pts})...")
# We create random points inside the bounds of the synthetic Blender scenes
xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3
shs = np.random.random((num_pts, 3)) / 255.0
pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3)))
storePly(ply_path, xyz, SH2RGB(shs) * 255)
try:
pcd = fetchPly(ply_path)
except:
pcd = None
scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_cam_infos,
test_cameras=test_cam_infos,
preset_cameras=preset_cam_infos,
nerf_normalization=nerf_normalization,
ply_path=ply_path)
return scene_info
def loadCamerasFromData(traindata, white_background):
cameras = []
fovx = traindata["camera_angle_x"]
frames = traindata["frames"]
for idx, frame in enumerate(frames):
# NeRF 'transform_matrix' is a camera-to-world transform
c2w = np.array(frame["transform_matrix"])
# change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
c2w[:3, 1:3] *= -1
# get the world-to-camera transform and set R, T
w2c = np.linalg.inv(c2w)
R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code
T = w2c[:3, 3]
image = frame["image"] if "image" in frame else None
im_data = np.array(image.convert("RGBA"))
bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0])
norm_data = im_data / 255.0
arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4])
image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
loaded_mask = np.ones_like(norm_data[:, :, 3:4])
fovy = focal2fov(fov2focal(fovx, image.size[1]), image.size[0])
FovY = fovy
FovX = fovx
image = torch.Tensor(arr).permute(2,0,1)
loaded_mask = None #torch.Tensor(loaded_mask).permute(2,0,1)
### torch로 바꿔야함
cameras.append(Camera(colmap_id=idx, R=R, T=T, FoVx=FovX, FoVy=FovY, image=image,
gt_alpha_mask=loaded_mask, image_name='', uid=idx, data_device='cuda'))
return cameras
def loadCameraPreset(traindata, presetdata):
cam_infos = {}
## camera setting (for H, W and focal)
fovx = traindata["camera_angle_x"] * 1.2
W, H = traindata["frames"][0]["image"].size
# W, H = traindata["W"], traindata["H"]
for camkey in presetdata:
cam_infos[camkey] = []
for idx, frame in enumerate(presetdata[camkey]["frames"]):
# NeRF 'transform_matrix' is a camera-to-world transform
c2w = np.array(frame["transform_matrix"])
# change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
c2w[:3, 1:3] *= -1
# get the world-to-camera transform and set R, T
w2c = np.linalg.inv(c2w)
R = np.transpose(w2c[:3,:3]) # R is stored transposed due to 'glm' in CUDA code
T = w2c[:3, 3]
fovy = focal2fov(fov2focal(fovx, W), H)
FovY = fovy
FovX = fovx
znear, zfar = 0.01, 100
world_view_transform = torch.tensor(getWorld2View2(R, T, np.array([0.0, 0.0, 0.0]), 1.0)).transpose(0, 1).cuda()
projection_matrix = getProjectionMatrix(znear=znear, zfar=zfar, fovX=FovX, fovY=FovY).transpose(0,1).cuda()
full_proj_transform = (world_view_transform.unsqueeze(0).bmm(projection_matrix.unsqueeze(0))).squeeze(0)
cam_infos[camkey].append(MiniCam(width=W, height=H, fovy=FovY, fovx=FovX, znear=znear, zfar=zfar,
world_view_transform=world_view_transform, full_proj_transform=full_proj_transform))
return cam_infos
def readDataInfo(traindata, white_background):
print("Reading Training Transforms")
train_cameras = loadCamerasFromData(traindata, white_background)
preset_minicams = loadCameraPreset(traindata, presetdata=get_camerapaths())
# if not eval:
# train_cam_infos.extend(test_cam_infos)
# test_cam_infos = []
nerf_normalization = getNerfppNorm(train_cameras)
pcd = BasicPointCloud(points=traindata['pcd_points'].T, colors=traindata['pcd_colors'], normals=None)
scene_info = SceneInfo(point_cloud=pcd,
train_cameras=train_cameras,
test_cameras=[],
preset_cameras=preset_minicams,
nerf_normalization=nerf_normalization,
ply_path='')
return scene_info
sceneLoadTypeCallbacks = {
"Colmap": readColmapSceneInfo,
"Blender" : readNerfSyntheticInfo
}