#include #include #include #include "cluster_gpu.h" #include "cuda_utils.h" __device__ float get_dis(float x1, float y1, float z1, float x2, float y2, float z2) { float dis = (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) + (z1 - z2) * (z1 - z2); return sqrt(dis); } /* __device__ void dfs (int i, int c, int n, int min_pts, const int* pts_cnt, const int* pts_adj, int* idx, int label) { idx[i] = c; if(pts_cnt[i] < min_pts) return; for(int j=0;j= b) return; xyz += bs_idx * n * 3; idx += bs_idx * n; pts_cnt += bs_idx * n; pts_stack += bs_idx * n; pts_adj += bs_idx * n * n; for(int i=0;i= min_pts) { for(int j=0;j>>(b, n, eps, min_pts, xyz, idx, pts_cnt, pts_adj, pts_stack); // cudaDeviceSynchronize(); // for using printf in kernel function cudaFree(pts_cnt); cudaFree(pts_stack); cudaFree(pts_adj); err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } } __global__ void cluster_pts_kernel_fast(int b, int n, int m, const float *__restrict__ xyz, const int *__restrict__ idx, float *__restrict__ new_xyz, int *__restrict__ num) { int bs_idx = blockIdx.x * blockDim.x + threadIdx.x; if (bs_idx >= b ) return; xyz += bs_idx * n * 3; idx += bs_idx * n; new_xyz += bs_idx * m * 3; num += bs_idx * m; for(int i=0;i>>(b, n, m, xyz, idx, new_xyz, num); // cudaDeviceSynchronize(); // for using printf in kernel function err = cudaGetLastError(); if (cudaSuccess != err) { fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); exit(-1); } }