Spaces:
Runtime error
Runtime error
/****************************************************************************** | |
* Copyright (c) 2011, Duane Merrill. All rights reserved. | |
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions are met: | |
* * Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* * Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* * Neither the name of the NVIDIA CORPORATION nor the | |
* names of its contributors may be used to endorse or promote products | |
* derived from this software without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY | |
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
* | |
******************************************************************************/ | |
/****************************************************************************** | |
* Test of BlockHistogram utilities | |
******************************************************************************/ | |
// Ensure printing of CUDA runtime errors to console | |
#define CUB_STDERR | |
#include <stdio.h> | |
#include <limits> | |
#include <string> | |
#include <typeinfo> | |
#include <cub/block/block_histogram.cuh> | |
#include <cub/block/block_load.cuh> | |
#include <cub/block/block_store.cuh> | |
#include <cub/util_allocator.cuh> | |
#include "test_util.h" | |
using namespace cub; | |
//--------------------------------------------------------------------- | |
// Globals, constants and typedefs | |
//--------------------------------------------------------------------- | |
bool g_verbose = false; | |
int g_timing_iterations = 0; | |
int g_repeat = 0; | |
CachingDeviceAllocator g_allocator(true); | |
//--------------------------------------------------------------------- | |
// Test kernels | |
//--------------------------------------------------------------------- | |
/** | |
* BlockHistogram test kernel. | |
*/ | |
template < | |
int BINS, | |
int BLOCK_THREADS, | |
int ITEMS_PER_THREAD, | |
BlockHistogramAlgorithm ALGORITHM, | |
typename T, | |
typename HistoCounter> | |
__global__ void BlockHistogramKernel( | |
T *d_samples, | |
HistoCounter *d_histogram) | |
{ | |
// Parameterize BlockHistogram type for our thread block | |
typedef BlockHistogram<T, BLOCK_THREADS, ITEMS_PER_THREAD, BINS, ALGORITHM> BlockHistogram; | |
// Allocate temp storage in shared memory | |
__shared__ typename BlockHistogram::TempStorage temp_storage; | |
// Per-thread tile data | |
T data[ITEMS_PER_THREAD]; | |
LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_samples, data); | |
// Test histo (writing directly to histogram buffer in global) | |
BlockHistogram(temp_storage).Histogram(data, d_histogram); | |
} | |
/** | |
* Initialize problem (and solution) | |
*/ | |
template < | |
int BINS, | |
typename SampleT> | |
void Initialize( | |
GenMode gen_mode, | |
SampleT *h_samples, | |
int *h_histograms_linear, | |
int num_samples) | |
{ | |
// Init bins | |
for (int bin = 0; bin < BINS; ++bin) | |
{ | |
h_histograms_linear[bin] = 0; | |
} | |
if (g_verbose) printf("Samples: \n"); | |
// Initialize interleaved channel samples and histogram them correspondingly | |
for (int i = 0; i < num_samples; ++i) | |
{ | |
InitValue(gen_mode, h_samples[i], i); | |
h_samples[i] %= BINS; | |
if (g_verbose) std::cout << CoutCast(h_samples[i]) << ", "; | |
h_histograms_linear[h_samples[i]]++; | |
} | |
if (g_verbose) printf("\n\n"); | |
} | |
/** | |
* Test BlockHistogram | |
*/ | |
template < | |
typename SampleT, | |
int BINS, | |
int BLOCK_THREADS, | |
int ITEMS_PER_THREAD, | |
BlockHistogramAlgorithm ALGORITHM> | |
void Test( | |
GenMode gen_mode) | |
{ | |
int num_samples = BLOCK_THREADS * ITEMS_PER_THREAD; | |
printf("cub::BlockHistogram %s %d %s samples (%dB), %d bins, %d threads, gen-mode %s\n", | |
(ALGORITHM == BLOCK_HISTO_SORT) ? "BLOCK_HISTO_SORT" : "BLOCK_HISTO_ATOMIC", | |
num_samples, | |
typeid(SampleT).name(), | |
(int) sizeof(SampleT), | |
BINS, | |
BLOCK_THREADS, | |
(gen_mode == RANDOM) ? "RANDOM" : (gen_mode == INTEGER_SEED) ? "SEQUENTIAL" : "HOMOGENOUS"); | |
fflush(stdout); | |
// Allocate host arrays | |
SampleT *h_samples = new SampleT[num_samples]; | |
int *h_reference = new int[BINS]; | |
// Initialize problem | |
Initialize<BINS>(gen_mode, h_samples, h_reference, num_samples); | |
// Allocate problem device arrays | |
SampleT *d_samples = NULL; | |
int *d_histogram = NULL; | |
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples, sizeof(SampleT) * num_samples)); | |
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram, sizeof(int) * BINS)); | |
// Initialize/clear device arrays | |
CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * num_samples, cudaMemcpyHostToDevice)); | |
CubDebugExit(cudaMemset(d_histogram, 0, sizeof(int) * BINS)); | |
// Run kernel | |
BlockHistogramKernel<BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM><<<1, BLOCK_THREADS>>>( | |
d_samples, | |
d_histogram); | |
// Check for correctness (and display results, if specified) | |
int compare = CompareDeviceResults((int*) h_reference, d_histogram, BINS, g_verbose, g_verbose); | |
printf("\t%s\n\n", compare ? "FAIL" : "PASS"); | |
// Flush any stdout/stderr | |
CubDebugExit(cudaPeekAtLastError()); | |
CubDebugExit(cudaDeviceSynchronize()); | |
fflush(stdout); | |
fflush(stderr); | |
// Cleanup | |
if (h_samples) delete[] h_samples; | |
if (h_reference) delete[] h_reference; | |
if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples)); | |
if (d_histogram) CubDebugExit(g_allocator.DeviceFree(d_histogram)); | |
// Correctness asserts | |
AssertEquals(0, compare); | |
} | |
/** | |
* Test different sample distributions | |
*/ | |
template < | |
typename SampleT, | |
int BINS, | |
int BLOCK_THREADS, | |
int ITEMS_PER_THREAD, | |
BlockHistogramAlgorithm ALGORITHM> | |
void Test() | |
{ | |
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(UNIFORM); | |
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(INTEGER_SEED); | |
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(RANDOM); | |
} | |
/** | |
* Test different ALGORITHM | |
*/ | |
template < | |
typename SampleT, | |
int BINS, | |
int BLOCK_THREADS, | |
int ITEMS_PER_THREAD> | |
void Test() | |
{ | |
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_SORT>(); | |
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_ATOMIC>(); | |
} | |
/** | |
* Test different ITEMS_PER_THREAD | |
*/ | |
template < | |
typename SampleT, | |
int BINS, | |
int BLOCK_THREADS> | |
void Test() | |
{ | |
Test<SampleT, BINS, BLOCK_THREADS, 1>(); | |
Test<SampleT, BINS, BLOCK_THREADS, 5>(); | |
} | |
/** | |
* Test different BLOCK_THREADS | |
*/ | |
template < | |
typename SampleT, | |
int BINS> | |
void Test() | |
{ | |
Test<SampleT, BINS, 32>(); | |
Test<SampleT, BINS, 96>(); | |
Test<SampleT, BINS, 128>(); | |
} | |
//--------------------------------------------------------------------- | |
// Main | |
//--------------------------------------------------------------------- | |
/** | |
* Main | |
*/ | |
int main(int argc, char** argv) | |
{ | |
// Initialize command line | |
CommandLineArgs args(argc, argv); | |
g_verbose = args.CheckCmdLineFlag("v"); | |
args.GetCmdLineArgument("repeat", g_repeat); | |
// Print usage | |
if (args.CheckCmdLineFlag("help")) | |
{ | |
printf("%s " | |
"[--n=<total input samples across all channels> " | |
"[--device=<device-id>] " | |
"[--repeat=<repetitions of entire test suite>]" | |
"[--v] " | |
"\n", argv[0]); | |
exit(0); | |
} | |
// Initialize device | |
CubDebugExit(args.DeviceInit()); | |
#ifdef QUICK_TEST | |
// Compile/run quick tests | |
Test<unsigned char, 256, 128, 4, BLOCK_HISTO_SORT>(RANDOM); | |
Test<unsigned char, 256, 128, 4, BLOCK_HISTO_ATOMIC>(RANDOM); | |
#else | |
// Compile/run thorough tests | |
for (int i = 0; i <= g_repeat; ++i) | |
{ | |
Test<unsigned char, 32>(); | |
Test<unsigned char, 256>(); | |
Test<unsigned short, 1024>(); | |
} | |
#endif | |
return 0; | |
} | |