LIVE / thrust /dependencies /cub /test /test_block_histogram.cu
Xu Ma
update
1c3c0d9
raw
history blame
9.62 kB
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/******************************************************************************
* Test of BlockHistogram utilities
******************************************************************************/
// Ensure printing of CUDA runtime errors to console
#define CUB_STDERR
#include <stdio.h>
#include <limits>
#include <string>
#include <typeinfo>
#include <cub/block/block_histogram.cuh>
#include <cub/block/block_load.cuh>
#include <cub/block/block_store.cuh>
#include <cub/util_allocator.cuh>
#include "test_util.h"
using namespace cub;
//---------------------------------------------------------------------
// Globals, constants and typedefs
//---------------------------------------------------------------------
bool g_verbose = false;
int g_timing_iterations = 0;
int g_repeat = 0;
CachingDeviceAllocator g_allocator(true);
//---------------------------------------------------------------------
// Test kernels
//---------------------------------------------------------------------
/**
* BlockHistogram test kernel.
*/
template <
int BINS,
int BLOCK_THREADS,
int ITEMS_PER_THREAD,
BlockHistogramAlgorithm ALGORITHM,
typename T,
typename HistoCounter>
__global__ void BlockHistogramKernel(
T *d_samples,
HistoCounter *d_histogram)
{
// Parameterize BlockHistogram type for our thread block
typedef BlockHistogram<T, BLOCK_THREADS, ITEMS_PER_THREAD, BINS, ALGORITHM> BlockHistogram;
// Allocate temp storage in shared memory
__shared__ typename BlockHistogram::TempStorage temp_storage;
// Per-thread tile data
T data[ITEMS_PER_THREAD];
LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_samples, data);
// Test histo (writing directly to histogram buffer in global)
BlockHistogram(temp_storage).Histogram(data, d_histogram);
}
/**
* Initialize problem (and solution)
*/
template <
int BINS,
typename SampleT>
void Initialize(
GenMode gen_mode,
SampleT *h_samples,
int *h_histograms_linear,
int num_samples)
{
// Init bins
for (int bin = 0; bin < BINS; ++bin)
{
h_histograms_linear[bin] = 0;
}
if (g_verbose) printf("Samples: \n");
// Initialize interleaved channel samples and histogram them correspondingly
for (int i = 0; i < num_samples; ++i)
{
InitValue(gen_mode, h_samples[i], i);
h_samples[i] %= BINS;
if (g_verbose) std::cout << CoutCast(h_samples[i]) << ", ";
h_histograms_linear[h_samples[i]]++;
}
if (g_verbose) printf("\n\n");
}
/**
* Test BlockHistogram
*/
template <
typename SampleT,
int BINS,
int BLOCK_THREADS,
int ITEMS_PER_THREAD,
BlockHistogramAlgorithm ALGORITHM>
void Test(
GenMode gen_mode)
{
int num_samples = BLOCK_THREADS * ITEMS_PER_THREAD;
printf("cub::BlockHistogram %s %d %s samples (%dB), %d bins, %d threads, gen-mode %s\n",
(ALGORITHM == BLOCK_HISTO_SORT) ? "BLOCK_HISTO_SORT" : "BLOCK_HISTO_ATOMIC",
num_samples,
typeid(SampleT).name(),
(int) sizeof(SampleT),
BINS,
BLOCK_THREADS,
(gen_mode == RANDOM) ? "RANDOM" : (gen_mode == INTEGER_SEED) ? "SEQUENTIAL" : "HOMOGENOUS");
fflush(stdout);
// Allocate host arrays
SampleT *h_samples = new SampleT[num_samples];
int *h_reference = new int[BINS];
// Initialize problem
Initialize<BINS>(gen_mode, h_samples, h_reference, num_samples);
// Allocate problem device arrays
SampleT *d_samples = NULL;
int *d_histogram = NULL;
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples, sizeof(SampleT) * num_samples));
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram, sizeof(int) * BINS));
// Initialize/clear device arrays
CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * num_samples, cudaMemcpyHostToDevice));
CubDebugExit(cudaMemset(d_histogram, 0, sizeof(int) * BINS));
// Run kernel
BlockHistogramKernel<BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM><<<1, BLOCK_THREADS>>>(
d_samples,
d_histogram);
// Check for correctness (and display results, if specified)
int compare = CompareDeviceResults((int*) h_reference, d_histogram, BINS, g_verbose, g_verbose);
printf("\t%s\n\n", compare ? "FAIL" : "PASS");
// Flush any stdout/stderr
CubDebugExit(cudaPeekAtLastError());
CubDebugExit(cudaDeviceSynchronize());
fflush(stdout);
fflush(stderr);
// Cleanup
if (h_samples) delete[] h_samples;
if (h_reference) delete[] h_reference;
if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples));
if (d_histogram) CubDebugExit(g_allocator.DeviceFree(d_histogram));
// Correctness asserts
AssertEquals(0, compare);
}
/**
* Test different sample distributions
*/
template <
typename SampleT,
int BINS,
int BLOCK_THREADS,
int ITEMS_PER_THREAD,
BlockHistogramAlgorithm ALGORITHM>
void Test()
{
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(UNIFORM);
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(INTEGER_SEED);
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, ALGORITHM>(RANDOM);
}
/**
* Test different ALGORITHM
*/
template <
typename SampleT,
int BINS,
int BLOCK_THREADS,
int ITEMS_PER_THREAD>
void Test()
{
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_SORT>();
Test<SampleT, BINS, BLOCK_THREADS, ITEMS_PER_THREAD, BLOCK_HISTO_ATOMIC>();
}
/**
* Test different ITEMS_PER_THREAD
*/
template <
typename SampleT,
int BINS,
int BLOCK_THREADS>
void Test()
{
Test<SampleT, BINS, BLOCK_THREADS, 1>();
Test<SampleT, BINS, BLOCK_THREADS, 5>();
}
/**
* Test different BLOCK_THREADS
*/
template <
typename SampleT,
int BINS>
void Test()
{
Test<SampleT, BINS, 32>();
Test<SampleT, BINS, 96>();
Test<SampleT, BINS, 128>();
}
//---------------------------------------------------------------------
// Main
//---------------------------------------------------------------------
/**
* Main
*/
int main(int argc, char** argv)
{
// Initialize command line
CommandLineArgs args(argc, argv);
g_verbose = args.CheckCmdLineFlag("v");
args.GetCmdLineArgument("repeat", g_repeat);
// Print usage
if (args.CheckCmdLineFlag("help"))
{
printf("%s "
"[--n=<total input samples across all channels> "
"[--device=<device-id>] "
"[--repeat=<repetitions of entire test suite>]"
"[--v] "
"\n", argv[0]);
exit(0);
}
// Initialize device
CubDebugExit(args.DeviceInit());
#ifdef QUICK_TEST
// Compile/run quick tests
Test<unsigned char, 256, 128, 4, BLOCK_HISTO_SORT>(RANDOM);
Test<unsigned char, 256, 128, 4, BLOCK_HISTO_ATOMIC>(RANDOM);
#else
// Compile/run thorough tests
for (int i = 0; i <= g_repeat; ++i)
{
Test<unsigned char, 32>();
Test<unsigned char, 256>();
Test<unsigned short, 1024>();
}
#endif
return 0;
}