Spaces:
Running
on
Zero
Running
on
Zero
import numpy as np | |
import logging | |
import torch | |
import re | |
def random_per_point_translation_in_place(pcd_data) -> None: | |
""" | |
Jittering the point cloud data by a random value between -0.02 and 0.02 | |
Args: | |
pcd_data: point cloud data in the form x, y, z | |
""" | |
translations = ( | |
np.random.rand(pcd_data.shape[0], 3) - 0.5 | |
) * 0.04 # Random values between -0.02 and 0.02 | |
pcd_data[:, -3:] += translations | |
def compute_max_extent_and_centroid(pcd_data, epsilon=1e-4) -> tuple[np.ndarray, np.ndarray]: | |
"""_summary_ | |
Args: | |
pcd_data : point cloud data in the form x, y, z | |
epsilon (float, optional): buffer for the max_extent. Defaults to 1e-4. | |
Returns: | |
max_extent: maximum extent of the point cloud data in terms of the largest dimension | |
centroid: centroid of the point cloud data | |
""" | |
min_vals = pcd_data.min(axis=0) | |
max_vals = pcd_data.max(axis=0) | |
centroid = (min_vals + max_vals) / 2 | |
max_extent = np.max(max_vals - min_vals) + epsilon | |
return max_extent, centroid | |
def unit_cube_normalization_in_place( | |
pcd_data, | |
max_extent, | |
centroid, | |
): | |
""" | |
Normalized data point in a unit cube between 0 and 1 for each x, y, z in-place | |
Args: | |
pcd_data: point cloud data in the form x, y, z | |
""" | |
# translate the centroid to the origin | |
pcd_data -= centroid | |
# scale the data to fit within [-0.5, 0.5] | |
pcd_data /= max_extent | |
# translate it back to within [0, 1] | |
pcd_data += 0.5 | |
def point_to_index(point, grid_size): | |
""" | |
Maps a point in the unit cube to a unique index based on the grid size. | |
Args: | |
point (tuple): a tuple of (x, y, z) coordinates of the point. Each coordinate should be in [0, 1]. | |
grid_size (int): the number of divisions along each axis. | |
Returns: | |
int: a unique index for the point. | |
""" | |
xi = int(point[0] * grid_size) | |
yi = int(point[1] * grid_size) | |
zi = int(point[2] * grid_size) | |
# Ensure that the point is inside the unit cube | |
if not (0 <= xi < grid_size) or not (0 <= yi < grid_size) or not (0 <= zi < grid_size): | |
logging.warning( | |
f"The point is outside the unit cube: point: {point}, grid_index: ({xi}, {yi}, {zi})" | |
) | |
# Clamp the point to be inside the unit cube | |
xi = min(max(xi, 0), grid_size - 1) | |
yi = min(max(yi, 0), grid_size - 1) | |
zi = min(max(zi, 0), grid_size - 1) | |
# Compute the unique voxel ID, row-major order | |
voxel_id = xi + yi * grid_size + zi * grid_size * grid_size | |
return voxel_id | |
def scale_bbox(bbox_str, max_extent, centroid): | |
""" | |
Scale the bounding box to be within a unit cube and output numerically tokenized bounding box. | |
Args: | |
bbox_str (str): A string representing a bounding box, in the format "<x_min,y_min,z_min,x_max,y_max,z_max>". | |
max_extent (float): The maximum extent of the bounding box. | |
centroid (np.array): The centroid of the bounding box. | |
Returns: | |
str: A string representing the scaled bounding box, in the same format as the input. | |
""" | |
# Remove < and > from the bounding box string | |
bbox_str = bbox_str.strip("<>") | |
bbox_values = bbox_str.split(",") | |
# Convert each string to a float and store in a list | |
bbox_floats = [float(value) for value in bbox_values] | |
# Convert the list to a numpy array | |
bbox_array = np.array(bbox_floats) | |
bbox_array[:3] -= centroid | |
bbox_array[3:] -= centroid | |
bbox_array /= max_extent | |
bbox_array += 0.5 | |
x_min, y_min, z_min, x_max, y_max, z_max = bbox_array | |
x_min, y_min, z_min, x_max, y_max, z_max = ( | |
x_min.item(), | |
y_min.item(), | |
z_min.item(), | |
x_max.item(), | |
y_max.item(), | |
z_max.item(), | |
) | |
x_min, y_min, z_min, x_max, y_max, z_max = ( | |
round(x_min, 3), | |
round(y_min, 3), | |
round(z_min, 3), | |
round(x_max, 3), | |
round(y_max, 3), | |
round(z_max, 3), | |
) | |
new_bbox_str = f"< {x_min}, {y_min}, {z_min}, {x_max}, {y_max}, {z_max}>" # adding space after < because tokenizer will not merge < and first digit or negative sign | |
return new_bbox_str | |
def voxelize_points( | |
xyz_to_be_voxelized: np.array, | |
scene_min_xyz: np.array, | |
scene_max_xyz: np.array, | |
num_voxels_per_axis: int, | |
): | |
"""Convert points to voxel indexes | |
Args: | |
xyz_to_be_voxelized (np.array): shape (num_points, 3) | |
scene_min_xyz (np.array): shape (3,) | |
scene_max_xyz (np.array): shape (3,) | |
num_voxels_per_axis (int): number of voxels per axis | |
Returns: | |
voxel_id (np.array): shape (num_points,) | |
""" | |
voxel_index = np.floor( | |
(xyz_to_be_voxelized - scene_min_xyz) | |
/ (scene_max_xyz - scene_min_xyz) | |
* num_voxels_per_axis | |
).astype( | |
int | |
) # range after this overations: [0, num_voxels_per_axis] | |
voxel_index = np.clip( | |
voxel_index, 0, num_voxels_per_axis - 1 | |
) # clamp range to [0, num_voxels_per_axis - 1] | |
# calculate index using row-major order | |
voxel_id = ( | |
voxel_index[:, 0] | |
+ voxel_index[:, 1] * num_voxels_per_axis | |
+ voxel_index[:, 2] * num_voxels_per_axis * num_voxels_per_axis | |
) # range after this operation: [0, num_voxels_per_axis ** 3 - 1] | |
return voxel_id | |
def process_one_bbox_minkowski_loc_token( | |
bbox_str, scene_min_xyz, scene_max_xyz, num_voxels_per_axis | |
): | |
# Remove < and > from the bounding box string | |
bbox_str = bbox_str.strip("<>") | |
bbox_values = bbox_str.split(",") | |
# Convert each string to a float and store in a list | |
bbox_floats = [float(value) for value in bbox_values] | |
# Convert the list to a numpy array | |
bbox_array = np.array(bbox_floats) # shape: (6,) | |
bbox_array = bbox_array.reshape(2, 3) # shape: (2, 3) | |
voxel_indices = voxelize_points( | |
bbox_array, scene_min_xyz, scene_max_xyz, num_voxels_per_axis | |
) # shape: (2,) | |
new_bbox_str = f"<loc_{voxel_indices[0]}><loc_{voxel_indices[1]}>" | |
return new_bbox_str | |
def scale_bbox_special_token(bbox_str, max_extent, centroid, num_grid_cells): | |
""" | |
Special token for the bbox. The bbox is scaled to the unit cube and then converted | |
to a unique index based on the grid size. | |
Args: | |
bbox_str (str): bbox string in the form "<x_min, y_min, z_min, x_max, y_max, z_max>" | |
max_extent (float): max extent of the point cloud data in terms of the largest dimension | |
centroid (np.array): centroid of the point cloud data | |
num_grid_cells (int): number of grids along each axis | |
Returns: | |
two unique special tokens for the bbox as string | |
""" | |
# Remove < and > from the bounding box string | |
bbox_str = bbox_str.strip("<>") | |
bbox_values = bbox_str.split(",") | |
# Convert each string to a float and store in a list | |
bbox_floats = [float(value) for value in bbox_values] | |
# Convert the list to a numpy array | |
bbox_floats = np.array(bbox_floats) | |
bbox_floats[:3] -= centroid | |
bbox_floats[3:] -= centroid | |
bbox_floats /= max_extent | |
bbox_floats += 0.5 | |
min_point = bbox_floats[:3] | |
max_point = bbox_floats[3:] | |
index_min = point_to_index(min_point, num_grid_cells) | |
index_max = point_to_index(max_point, num_grid_cells) | |
new_bbox_str = f"<loc_{index_min}><loc_{index_max}>" | |
return new_bbox_str | |
def rotate_point_cloud_90_degrees(pcd_data): | |
""" | |
Rotate the point cloud data by 90 degrees in the x-y plane | |
Args: | |
pcd_data: point cloud data in the form x, y, z | |
Returns: | |
pcd_data: rotated point cloud data in the form x, y, z | |
""" | |
# Randomly select among no change, clockwise, and counterclockwise | |
rotation_choices = ["no change", "clockwise", "counterclockwise"] | |
direction = np.random.choice(rotation_choices) | |
if direction == "clockwise": | |
rotation_matrix = torch.tensor([[0, 1], [-1, 0]]) | |
# Apply rotation on x-y plane | |
pcd_data[:, -3:-1] = torch.matmul(pcd_data[:, -3:-1], rotation_matrix) | |
elif direction == "counterclockwise": | |
rotation_matrix = torch.tensor([[0, -1], [1, 0]]) | |
# Apply rotation on x-y plane | |
pcd_data[:, -3:-1] = torch.matmul(pcd_data[:, -3:-1], rotation_matrix) | |
return pcd_data, direction | |
def adjust_bbox_after_rotation(bbox_str, direction): | |
"""_summary_ | |
Args: | |
bbox_str (_type_): _description_ | |
direction (_type_): _description_ | |
Returns: | |
_type_: _description_ | |
""" | |
if direction == "no change": | |
return bbox_str | |
values = list(map(float, re.findall(r"[-+]?\d*\.\d+|\d+", bbox_str))) | |
x_min, y_min, z_min, x_max, y_max, z_max = values | |
if direction == "clockwise": | |
# adding space after < because tokenizer will not merge < and first digit or negative sign | |
new_bbox_str = f"< {y_min}, {x_min}, {z_min}, {y_max}, {x_max}, {z_max}>" | |
else: # counterclockwise | |
# adding space after < because tokenizer will not merge < and first digit or negative sign | |
new_bbox_str = f"< {x_max}, {y_min}, {z_min}, {x_min}, {y_max}, {z_max}>" | |
return new_bbox_str | |