Spaces:
Runtime error
Runtime error
// We don't use THRUST_PP_STRINGIZE and THRUST_PP_CAT because they are new, and | |
// we want this benchmark to be backwards-compatible to older versions of Thrust. | |
// We don't use THRUST_NOEXCEPT because it's new, and we want this benchmark to | |
// be backwards-compatible to older versions of Thrust. | |
/////////////////////////////////////////////////////////////////////////////// | |
template <typename T> | |
struct squared_difference | |
{ | |
private: | |
T const average; | |
public: | |
__host__ __device__ | |
squared_difference(squared_difference const& rhs) : average(rhs.average) {} | |
__host__ __device__ | |
squared_difference(T average_) : average(average_) {} | |
__host__ __device__ | |
T operator()(T x) const | |
{ | |
return (x - average) * (x - average); | |
} | |
}; | |
template <typename T> | |
struct value_and_count | |
{ | |
T value; | |
uint64_t count; | |
__host__ __device__ | |
value_and_count(value_and_count const& other) | |
: value(other.value), count(other.count) {} | |
__host__ __device__ | |
value_and_count(T const& value_) | |
: value(value_), count(1) {} | |
__host__ __device__ | |
value_and_count(T const& value_, uint64_t count_) | |
: value(value_), count(count_) {} | |
__host__ __device__ | |
value_and_count& operator=(value_and_count const& other) | |
{ | |
value = other.value; | |
count = other.count; | |
return *this; | |
} | |
__host__ __device__ | |
value_and_count& operator=(T const& value_) | |
{ | |
value = value_; | |
count = 1; | |
return *this; | |
} | |
}; | |
template <typename T, typename ReduceOp> | |
struct counting_op | |
{ | |
private: | |
ReduceOp reduce; | |
public: | |
__host__ __device__ | |
counting_op() : reduce() {} | |
__host__ __device__ | |
counting_op(counting_op const& other) : reduce(other.reduce) {} | |
__host__ __device__ | |
counting_op(ReduceOp const& reduce_) : reduce(reduce_) {} | |
__host__ __device__ | |
value_and_count<T> operator()( | |
value_and_count<T> const& x | |
, T const& y | |
) const | |
{ | |
return value_and_count<T>(reduce(x.value, y), x.count + 1); | |
} | |
__host__ __device__ | |
value_and_count<T> operator()( | |
value_and_count<T> const& x | |
, value_and_count<T> const& y | |
) const | |
{ | |
return value_and_count<T>(reduce(x.value, y.value), x.count + y.count); | |
} | |
}; | |
template <typename InputIt, typename T> | |
T arithmetic_mean(InputIt first, InputIt last, T init) | |
{ | |
value_and_count<T> init_vc(init, 0); | |
counting_op<T, thrust::plus<T> > reduce_vc; | |
value_and_count<T> vc | |
= thrust::reduce(first, last, init_vc, reduce_vc); | |
return vc.value / vc.count; | |
} | |
template <typename InputIt> | |
typename thrust::iterator_traits<InputIt>::value_type | |
arithmetic_mean(InputIt first, InputIt last) | |
{ | |
typedef typename thrust::iterator_traits<InputIt>::value_type T; | |
return arithmetic_mean(first, last, T()); | |
} | |
template <typename InputIt, typename T> | |
T sample_standard_deviation(InputIt first, InputIt last, T average) | |
{ | |
value_and_count<T> init_vc(T(), 0); | |
counting_op<T, thrust::plus<T> > reduce_vc; | |
squared_difference<T> transform(average); | |
value_and_count<T> vc | |
= thrust::transform_reduce(first, last, transform, init_vc, reduce_vc); | |
return std::sqrt(vc.value / T(vc.count - 1)); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
// Formulas for propagation of uncertainty from: | |
// | |
// https://en.wikipedia.org/wiki/Propagation_of_uncertainty#Example_formulas | |
// | |
// Even though it's Wikipedia, I trust it as I helped write that table. | |
// | |
// XXX Replace with a proper reference. | |
// Compute the propagated uncertainty from the multiplication of two uncertain | |
// values, `A +/- A_unc` and `B +/- B_unc`. Given `f = AB` or `f = A/B`, where | |
// `A != 0` and `B != 0`, the uncertainty in `f` is approximately: | |
// | |
// f_unc = abs(f) * sqrt((A_unc / A) ^ 2 + (B_unc / B) ^ 2) | |
// | |
template <typename T> | |
__host__ __device__ | |
T uncertainty_multiplicative( | |
T const& f | |
, T const& A, T const& A_unc | |
, T const& B, T const& B_unc | |
) | |
{ | |
return std::abs(f) | |
* std::sqrt((A_unc / A) * (A_unc / A) + (B_unc / B) * (B_unc / B)); | |
} | |
// Compute the propagated uncertainty from addition of two uncertain values, | |
// `A +/- A_unc` and `B +/- B_unc`. Given `f = cA + dB` (where `c` and `d` are | |
// certain constants), the uncertainty in `f` is approximately: | |
// | |
// f_unc = sqrt(c ^ 2 * A_unc ^ 2 + d ^ 2 * B_unc ^ 2) | |
// | |
template <typename T> | |
__host__ __device__ | |
T uncertainty_additive( | |
T const& c, T const& A_unc | |
, T const& d, T const& B_unc | |
) | |
{ | |
return std::sqrt((c * c * A_unc * A_unc) + (d * d * B_unc * B_unc)); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
// Return the significant digit of `x`. The result is the number of digits | |
// after the decimal place to round to (negative numbers indicate rounding | |
// before the decimal place) | |
template <typename T> | |
int find_significant_digit(T x) | |
{ | |
if (x == T(0)) return T(0); | |
return -int(std::floor(std::log10(std::abs(x)))); | |
} | |
// Round `x` to `ndigits` after the decimal place (Python-style). | |
template <typename T, typename N> | |
T round_to_precision(T x, N ndigits) | |
{ | |
double m = (x < 0.0) ? -1.0 : 1.0; | |
double pwr = std::pow(T(10.0), ndigits); | |
return (std::floor(x * m * pwr + 0.5) / pwr) * m; | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
void print_experiment_header() | |
{ // {{{ | |
std::cout << "Thrust Version" | |
<< "," << "Algorithm" | |
<< "," << "Element Type" | |
<< "," << "Element Size" | |
<< "," << "Elements per Trial" | |
<< "," << "Total Input Size" | |
<< "," << "STL Trials" | |
<< "," << "STL Average Walltime" | |
<< "," << "STL Walltime Uncertainty" | |
<< "," << "STL Average Throughput" | |
<< "," << "STL Throughput Uncertainty" | |
<< "," << "Thrust Trials" | |
<< "," << "Thrust Average Walltime" | |
<< "," << "Thrust Walltime Uncertainty" | |
<< "," << "Thrust Average Throughput" | |
<< "," << "Thrust Throughput Uncertainty" | |
<< "," << "TBB Trials" | |
<< "," << "TBB Average Walltime" | |
<< "," << "TBB Walltime Uncertainty" | |
<< "," << "TBB Average Throughput" | |
<< "," << "TBB Throughput Uncertainty" | |
<< std::endl; | |
std::cout << "" // Thrust Version. | |
<< "," << "" // Algorithm. | |
<< "," << "" // Element Type. | |
<< "," << "bits/element" // Element Size. | |
<< "," << "elements" // Elements per Trial. | |
<< "," << "MiBs" // Total Input Size. | |
<< "," << "trials" // STL Trials. | |
<< "," << "secs" // STL Average Walltime. | |
<< "," << "secs" // STL Walltime Uncertainty. | |
<< "," << "elements/sec" // STL Average Throughput. | |
<< "," << "elements/sec" // STL Throughput Uncertainty. | |
<< "," << "trials" // Thrust Trials. | |
<< "," << "secs" // Thrust Average Walltime. | |
<< "," << "secs" // Thrust Walltime Uncertainty. | |
<< "," << "elements/sec" // Thrust Average Throughput. | |
<< "," << "elements/sec" // Thrust Throughput Uncertainty. | |
<< "," << "trials" // TBB Trials. | |
<< "," << "secs" // TBB Average Walltime. | |
<< "," << "secs" // TBB Walltime Uncertainty. | |
<< "," << "elements/sec" // TBB Average Throughput. | |
<< "," << "elements/sec" // TBB Throughput Uncertainty. | |
<< std::endl; | |
} // }}} | |
/////////////////////////////////////////////////////////////////////////////// | |
struct experiment_results | |
{ | |
double const average_time; // Arithmetic mean of trial times in seconds. | |
double const stdev_time; // Sample standard deviation of trial times. | |
experiment_results(double average_time_, double stdev_time_) | |
: average_time(average_time_), stdev_time(stdev_time_) {} | |
}; | |
/////////////////////////////////////////////////////////////////////////////// | |
template < | |
template <typename> class Test | |
, typename ElementMetaType // Has an embedded typedef `type, | |
// and a static method `name` that | |
// returns a char const*. | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
struct experiment_driver | |
{ | |
typedef typename ElementMetaType::type element_type; | |
static char const* const test_name; | |
static char const* const element_type_name; // Element type name as a string. | |
static uint64_t const elements; // # of elements per trial. | |
static uint64_t const element_size; // Size of each element in bits. | |
static double const input_size; // `elements` * `element_size` in MiB. | |
static uint64_t const baseline_trials; // # of baseline trials per experiment. | |
static uint64_t const regular_trials; // # of regular trials per experiment. | |
static void run_experiment() | |
{ // {{{ | |
experiment_results stl = std_experiment(); | |
experiment_results thrust = thrust_experiment(); | |
experiment_results tbb = tbb_experiment(); | |
double stl_average_walltime = stl.average_time; | |
double thrust_average_walltime = thrust.average_time; | |
double tbb_average_walltime = tbb.average_time; | |
double stl_average_throughput = elements / stl.average_time; | |
double thrust_average_throughput = elements / thrust.average_time; | |
double tbb_average_throughput = elements / tbb.average_time; | |
double stl_walltime_uncertainty = stl.stdev_time; | |
double thrust_walltime_uncertainty = thrust.stdev_time; | |
double tbb_walltime_uncertainty = tbb.stdev_time; | |
double stl_throughput_uncertainty = uncertainty_multiplicative( | |
stl_average_throughput | |
, double(elements), 0.0 | |
, stl_average_walltime, stl_walltime_uncertainty | |
); | |
double thrust_throughput_uncertainty = uncertainty_multiplicative( | |
thrust_average_throughput | |
, double(elements), 0.0 | |
, thrust_average_walltime, thrust_walltime_uncertainty | |
); | |
double tbb_throughput_uncertainty = uncertainty_multiplicative( | |
tbb_average_throughput | |
, double(elements), 0.0 | |
, tbb_average_walltime, tbb_walltime_uncertainty | |
); | |
// Round the average walltime and walltime uncertainty to the | |
// significant figure of the walltime uncertainty. | |
int stl_walltime_precision = std::max( | |
find_significant_digit(stl.average_time) | |
, find_significant_digit(stl.stdev_time) | |
); | |
int thrust_walltime_precision = std::max( | |
find_significant_digit(thrust.average_time) | |
, find_significant_digit(thrust.stdev_time) | |
); | |
int tbb_walltime_precision = std::max( | |
find_significant_digit(tbb.average_time) | |
, find_significant_digit(tbb.stdev_time) | |
); | |
stl_average_walltime = round_to_precision( | |
stl_average_walltime, stl_walltime_precision | |
); | |
thrust_average_walltime = round_to_precision( | |
thrust_average_walltime, thrust_walltime_precision | |
); | |
tbb_average_walltime = round_to_precision( | |
tbb_average_walltime, tbb_walltime_precision | |
); | |
stl_walltime_uncertainty = round_to_precision( | |
stl_walltime_uncertainty, stl_walltime_precision | |
); | |
thrust_walltime_uncertainty = round_to_precision( | |
thrust_walltime_uncertainty, thrust_walltime_precision | |
); | |
tbb_walltime_uncertainty = round_to_precision( | |
tbb_walltime_uncertainty, tbb_walltime_precision | |
); | |
// Round the average throughput and throughput uncertainty to the | |
// significant figure of the throughput uncertainty. | |
int stl_throughput_precision = std::max( | |
find_significant_digit(stl_average_throughput) | |
, find_significant_digit(stl_throughput_uncertainty) | |
); | |
int thrust_throughput_precision = std::max( | |
find_significant_digit(thrust_average_throughput) | |
, find_significant_digit(thrust_throughput_uncertainty) | |
); | |
int tbb_throughput_precision = std::max( | |
find_significant_digit(tbb_average_throughput) | |
, find_significant_digit(tbb_throughput_uncertainty) | |
); | |
stl_average_throughput = round_to_precision( | |
stl_average_throughput, stl_throughput_precision | |
); | |
thrust_average_throughput = round_to_precision( | |
thrust_average_throughput, thrust_throughput_precision | |
); | |
tbb_average_throughput = round_to_precision( | |
tbb_average_throughput, tbb_throughput_precision | |
); | |
stl_throughput_uncertainty = round_to_precision( | |
stl_throughput_uncertainty, stl_throughput_precision | |
); | |
thrust_throughput_uncertainty = round_to_precision( | |
thrust_throughput_uncertainty, thrust_throughput_precision | |
); | |
tbb_throughput_uncertainty = round_to_precision( | |
tbb_throughput_uncertainty, tbb_throughput_precision | |
); | |
std::cout << THRUST_VERSION // Thrust Version. | |
<< "," << test_name // Algorithm. | |
<< "," << element_type_name // Element Type. | |
<< "," << element_size // Element Size. | |
<< "," << elements // Elements per Trial. | |
<< "," << input_size // Total Input Size. | |
<< "," << baseline_trials // STL Trials. | |
<< "," << stl_average_walltime // STL Average Walltime. | |
<< "," << stl_walltime_uncertainty // STL Walltime Uncertainty. | |
<< "," << stl_average_throughput // STL Average Throughput. | |
<< "," << stl_throughput_uncertainty // STL Throughput Uncertainty. | |
<< "," << regular_trials // Thrust Trials. | |
<< "," << thrust_average_walltime // Thrust Average Walltime. | |
<< "," << thrust_walltime_uncertainty // Thrust Walltime Uncertainty. | |
<< "," << thrust_average_throughput // Thrust Average Throughput. | |
<< "," << thrust_throughput_uncertainty // Thrust Throughput Uncertainty. | |
<< "," << regular_trials // TBB Trials. | |
<< "," << tbb_average_walltime // TBB Average Walltime. | |
<< "," << tbb_walltime_uncertainty // TBB Walltime Uncertainty. | |
<< "," << tbb_average_throughput // TBB Average Throughput. | |
<< "," << tbb_throughput_uncertainty // TBB Throughput Uncertainty. | |
<< std::endl; | |
} // }}} | |
private: | |
static experiment_results std_experiment() | |
{ | |
return experiment<typename Test<element_type>::std_trial>(); | |
} | |
static experiment_results thrust_experiment() | |
{ | |
return experiment<typename Test<element_type>::thrust_trial>(); | |
} | |
static experiment_results tbb_experiment() | |
{ | |
return experiment<typename Test<element_type>::tbb_trial>(); | |
} | |
template <typename Trial> | |
static experiment_results experiment() | |
{ // {{{ | |
Trial trial; | |
// Allocate storage and generate random input for the warmup trial. | |
trial.setup(elements); | |
// Warmup trial. | |
trial(); | |
uint64_t const trials | |
= trial.is_baseline() ? baseline_trials : regular_trials; | |
std::vector<double> times; | |
times.reserve(trials); | |
for (uint64_t t = 0; t < trials; ++t) | |
{ | |
// Generate random input for next trial. | |
trial.setup(elements); | |
steady_timer e; | |
// Benchmark. | |
e.start(); | |
trial(); | |
e.stop(); | |
times.push_back(e.seconds_elapsed()); | |
} | |
double average_time | |
= arithmetic_mean(times.begin(), times.end()); | |
double stdev_time | |
= sample_standard_deviation(times.begin(), times.end(), average_time); | |
return experiment_results(average_time, stdev_time); | |
} // }}} | |
}; | |
template < | |
template <typename> class Test | |
, typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
char const* const | |
experiment_driver< | |
Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
>::test_name | |
= Test<typename ElementMetaType::type>::test_name(); | |
template < | |
template <typename> class Test | |
, typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
char const* const | |
experiment_driver< | |
Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
>::element_type_name | |
= ElementMetaType::name(); | |
template < | |
template <typename> class Test | |
, typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
uint64_t const | |
experiment_driver< | |
Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
>::element_size | |
= CHAR_BIT * sizeof(typename ElementMetaType::type); | |
template < | |
template <typename> class Test | |
, typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
uint64_t const | |
experiment_driver< | |
Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
>::elements | |
= Elements; | |
template < | |
template <typename> class Test | |
, typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
double const | |
experiment_driver< | |
Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
>::input_size | |
= double( Elements /* [elements] */ | |
* sizeof(typename ElementMetaType::type) /* [bytes/element] */ | |
) | |
/ double(1024 * 1024 /* [bytes/MiB] */); | |
template < | |
template <typename> class Test | |
, typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
uint64_t const | |
experiment_driver< | |
Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
>::baseline_trials | |
= BaselineTrials; | |
template < | |
template <typename> class Test | |
, typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
uint64_t const | |
experiment_driver< | |
Test, ElementMetaType, Elements, BaselineTrials, RegularTrials | |
>::regular_trials | |
= RegularTrials; | |
/////////////////////////////////////////////////////////////////////////////// | |
// Never create variables, pointers or references of any of the `*_trial_base` | |
// classes. They are purely mixin base classes and do not have vtables and | |
// virtual destructors. Using them for polymorphism instead of composition will | |
// probably cause slicing. | |
struct baseline_trial {}; | |
struct regular_trial {}; | |
template <typename TrialKind = regular_trial> | |
struct trial_base; | |
template <> | |
struct trial_base<baseline_trial> | |
{ | |
static bool is_baseline() { return true; } | |
}; | |
template <> | |
struct trial_base<regular_trial> | |
{ | |
static bool is_baseline() { return false; } | |
}; | |
template <typename Container, typename TrialKind = regular_trial> | |
struct inplace_trial_base : trial_base<TrialKind> | |
{ | |
Container input; | |
void setup(uint64_t elements) | |
{ | |
input.resize(elements); | |
randomize(input); | |
} | |
}; | |
template <typename Container, typename TrialKind = regular_trial> | |
struct copy_trial_base : trial_base<TrialKind> | |
{ | |
Container input; | |
Container output; | |
void setup(uint64_t elements) | |
{ | |
input.resize(elements); | |
output.resize(elements); | |
randomize(input); | |
} | |
}; | |
template <typename Container, typename TrialKind = regular_trial> | |
struct shuffle_trial_base : trial_base<TrialKind> | |
{ | |
Container input; | |
void setup(uint64_t elements) | |
{ | |
input.resize(elements); | |
randomize(input); | |
} | |
}; | |
/////////////////////////////////////////////////////////////////////////////// | |
template <typename T> | |
struct reduce_tester | |
{ | |
static char const* test_name() { return "reduce"; } | |
struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
{ | |
void operator()() | |
{ | |
if (std::accumulate(this->input.begin(), this->input.end(), T(0)) == 0) | |
// Prevent optimizer from removing body. | |
std::cout << "xyz"; | |
} | |
}; | |
struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
{ | |
void operator()() | |
{ | |
thrust::reduce(this->input.begin(), this->input.end()); | |
} | |
}; | |
struct tbb_trial : inplace_trial_base<std::vector<T> > | |
{ | |
void operator()() | |
{ | |
tbb_reduce(this->input); | |
} | |
}; | |
}; | |
template <typename T> | |
struct sort_tester | |
{ | |
static char const* test_name() { return "sort"; } | |
struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
{ | |
void operator()() | |
{ | |
std::sort(this->input.begin(), this->input.end()); | |
} | |
}; | |
struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
{ | |
void operator()() | |
{ | |
thrust::sort(this->input.begin(), this->input.end()); | |
cudaError_t err = cudaDeviceSynchronize(); | |
if (err != cudaSuccess) | |
throw thrust::error_code(err, thrust::cuda_category()); | |
} | |
}; | |
struct tbb_trial : inplace_trial_base<std::vector<T> > | |
{ | |
void operator()() | |
{ | |
tbb_sort(this->input); | |
} | |
} | |
}; | |
template <typename T> | |
struct transform_inplace_tester | |
{ | |
static char const* test_name() { return "transform_inplace"; } | |
struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
{ | |
void operator()() | |
{ | |
std::transform( | |
this->input.begin(), this->input.end(), this->input.begin() | |
, thrust::negate<T>() | |
); | |
} | |
}; | |
struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
{ | |
void operator()() | |
{ | |
thrust::transform( | |
this->input.begin(), this->input.end(), this->input.begin() | |
, thrust::negate<T>() | |
); | |
cudaError_t err = cudaDeviceSynchronize(); | |
if (err != cudaSuccess) | |
throw thrust::error_code(err, thrust::cuda_category()); | |
} | |
}; | |
struct tbb_trial : inplace_trial_base<std::vector<T> > | |
{ | |
void operator()() | |
{ | |
tbb_transform(this->input); | |
} | |
}; | |
}; | |
template <typename T> | |
struct inclusive_scan_inplace_tester | |
{ | |
static char const* test_name() { return "inclusive_scan_inplace"; } | |
struct std_trial : inplace_trial_base<std::vector<T>, baseline_trial> | |
{ | |
void operator()() | |
{ | |
std::partial_sum( | |
this->input.begin(), this->input.end(), this->input.begin() | |
); | |
} | |
}; | |
struct thrust_trial : inplace_trial_base<thrust::device_vector<T> > | |
{ | |
void operator()() | |
{ | |
thrust::inclusive_scan( | |
this->input.begin(), this->input.end(), this->input.begin() | |
); | |
cudaError_t err = cudaDeviceSynchronize(); | |
if (err != cudaSuccess) | |
throw thrust::error_code(err, thrust::cuda_category()); | |
} | |
}; | |
struct tbb_trial : inplace_trial_base<std::vector<T> > | |
{ | |
void operator()() | |
{ | |
tbb_scan(this->input); | |
} | |
}; | |
}; | |
template <typename T> | |
struct copy_tester | |
{ | |
static char const* test_name() { return "copy"; } | |
struct std_trial : copy_trial_base<std::vector<T> > | |
{ | |
void operator()() | |
{ | |
std::copy(this->input.begin(), this->input.end(), this->output.begin()); | |
} | |
}; | |
struct thrust_trial : copy_trial_base<thrust::device_vector<T> > | |
{ | |
void operator()() | |
{ | |
thrust::copy(this->input.begin(), this->input.end(), this->input.begin()); | |
cudaError_t err = cudaDeviceSynchronize(); | |
if (err != cudaSuccess) | |
throw thrust::error_code(err, thrust::cuda_category()); | |
} | |
}; | |
struct tbb_trial : copy_trial_base<std::vector<T> > | |
{ | |
void operator()() | |
{ | |
tbb_copy(this->input, this->output); | |
} | |
}; | |
}; | |
template <typename T> | |
struct shuffle_tester | |
{ | |
static char const* test_name() { return "shuffle"; } | |
struct std_trial : shuffle_trial_base<std::vector<T>, baseline_trial> | |
{ | |
std::default_random_engine g; | |
void operator()() | |
{ | |
std::shuffle(this->input.begin(), this->input.end(), this->g); | |
} | |
}; | |
struct thrust_trial : shuffle_trial_base<thrust::device_vector<T> > | |
{ | |
thrust::default_random_engine g; | |
void operator()() | |
{ | |
thrust::shuffle(this->input.begin(), this->input.end(), this->g); | |
cudaError_t err = cudaDeviceSynchronize(); | |
if (err != cudaSuccess) | |
throw thrust::error_code(err, thrust::cuda_category()); | |
} | |
}; | |
}; | |
/////////////////////////////////////////////////////////////////////////////// | |
template < | |
typename ElementMetaType | |
, uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
void run_core_primitives_experiments_for_type() | |
{ | |
experiment_driver< | |
reduce_tester | |
, ElementMetaType | |
, Elements / sizeof(typename ElementMetaType::type) | |
, BaselineTrials | |
, RegularTrials | |
>::run_experiment(); | |
experiment_driver< | |
transform_inplace_tester | |
, ElementMetaType | |
, Elements / sizeof(typename ElementMetaType::type) | |
, BaselineTrials | |
, RegularTrials | |
>::run_experiment(); | |
experiment_driver< | |
inclusive_scan_inplace_tester | |
, ElementMetaType | |
, Elements / sizeof(typename ElementMetaType::type) | |
, BaselineTrials | |
, RegularTrials | |
>::run_experiment(); | |
experiment_driver< | |
sort_tester | |
, ElementMetaType | |
// , Elements / sizeof(typename ElementMetaType::type) | |
, (Elements >> 6) // Sorting is more sensitive to element count than | |
// memory footprint. | |
, BaselineTrials | |
, RegularTrials | |
>::run_experiment(); | |
experiment_driver< | |
copy_tester | |
, ElementMetaType | |
, Elements / sizeof(typename ElementMetaType::type) | |
, BaselineTrials | |
, RegularTrials | |
>::run_experiment(); | |
experiment_driver< | |
shuffle_tester | |
, ElementMetaType | |
, Elements / sizeof(typename ElementMetaType::type) | |
, BaselineTrials | |
, RegularTrials | |
>::run_experiment(); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
DEFINE_ELEMENT_META_TYPE(char); | |
DEFINE_ELEMENT_META_TYPE(int); | |
DEFINE_ELEMENT_META_TYPE(int8_t); | |
DEFINE_ELEMENT_META_TYPE(int16_t); | |
DEFINE_ELEMENT_META_TYPE(int32_t); | |
DEFINE_ELEMENT_META_TYPE(int64_t); | |
DEFINE_ELEMENT_META_TYPE(float); | |
DEFINE_ELEMENT_META_TYPE(double); | |
/////////////////////////////////////////////////////////////////////////////// | |
template < | |
uint64_t Elements | |
, uint64_t BaselineTrials | |
, uint64_t RegularTrials | |
> | |
void run_core_primitives_experiments() | |
{ | |
run_core_primitives_experiments_for_type< | |
char_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
run_core_primitives_experiments_for_type< | |
int_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
run_core_primitives_experiments_for_type< | |
int8_t_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
run_core_primitives_experiments_for_type< | |
int16_t_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
run_core_primitives_experiments_for_type< | |
int32_t_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
run_core_primitives_experiments_for_type< | |
int64_t_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
run_core_primitives_experiments_for_type< | |
float_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
run_core_primitives_experiments_for_type< | |
double_meta, Elements, BaselineTrials, RegularTrials | |
>(); | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
// XXX Use `std::string_view` when possible. | |
std::vector<std::string> split(std::string const& str, std::string const& delim) | |
{ | |
std::vector<std::string> tokens; | |
std::string::size_type prev = 0, pos = 0; | |
do | |
{ | |
pos = str.find(delim, prev); | |
if (pos == std::string::npos) pos = str.length(); | |
std::string token = str.substr(prev, pos - prev); | |
if (!token.empty()) tokens.push_back(token); | |
prev = pos + delim.length(); | |
} | |
while (pos < str.length() && prev < str.length()); | |
return tokens; | |
} | |
/////////////////////////////////////////////////////////////////////////////// | |
struct command_line_option_error : std::exception | |
{ | |
virtual ~command_line_option_error() NOEXCEPT {} | |
virtual const char* what() const NOEXCEPT = 0; | |
}; | |
struct only_one_option_allowed : command_line_option_error | |
{ | |
// Construct a new `only_one_option_allowed` exception. `key` is the | |
// option name and `[first, last)` is a sequence of | |
// `std::pair<std::string const, std::string>`s (the values). | |
template <typename InputIt> | |
only_one_option_allowed(std::string const& key, InputIt first, InputIt last) | |
: message() | |
{ | |
message = "Only one `--"; | |
message += key; | |
message += "` option is allowed, but multiple were received: "; | |
for (; first != last; ++first) | |
{ | |
message += "`"; | |
message += (*first).second; | |
message += "` "; | |
} | |
// Remove the trailing space added by the last iteration of the above loop. | |
message.erase(message.size() - 1, 1); | |
message += "."; | |
} | |
virtual ~only_one_option_allowed() NOEXCEPT {} | |
virtual const char* what() const NOEXCEPT | |
{ | |
return message.c_str(); | |
} | |
private: | |
std::string message; | |
}; | |
struct required_option_missing : command_line_option_error | |
{ | |
// Construct a new `requirement_option_missing` exception. `key` is the | |
// option name. | |
required_option_missing(std::string const& key) | |
: message() | |
{ | |
message = "`--"; | |
message += key; | |
message += "` option is required."; | |
} | |
virtual ~required_option_missing() NOEXCEPT {} | |
virtual const char* what() const NOEXCEPT | |
{ | |
return message.c_str(); | |
} | |
private: | |
std::string message; | |
}; | |
struct command_line_processor | |
{ | |
typedef std::vector<std::string> positional_options_type; | |
typedef std::multimap<std::string, std::string> keyword_options_type; | |
typedef std::pair< | |
keyword_options_type::const_iterator | |
, keyword_options_type::const_iterator | |
> keyword_option_values; | |
command_line_processor(int argc, char** argv) | |
: pos_args(), kw_args() | |
{ // {{{ | |
for (int i = 1; i < argc; ++i) | |
{ | |
std::string arg(argv[i]); | |
// Look for --key or --key=value options. | |
if (arg.substr(0, 2) == "--") | |
{ | |
std::string::size_type n = arg.find('=', 2); | |
keyword_options_type::value_type key_value; | |
if (n == std::string::npos) // --key | |
kw_args.insert(keyword_options_type::value_type( | |
arg.substr(2), "" | |
)); | |
else // --key=value | |
kw_args.insert(keyword_options_type::value_type( | |
arg.substr(2, n - 2), arg.substr(n + 1) | |
)); | |
kw_args.insert(key_value); | |
} | |
else // Assume it's positional. | |
pos_args.push_back(arg); | |
} | |
} // }}} | |
// Return the value for option `key`. | |
// | |
// Throws: | |
// * `only_one_option_allowed` if there is more than one value for `key`. | |
// * `required_option_missing` if there is no value for `key`. | |
std::string operator()(std::string const& key) const | |
{ | |
keyword_option_values v = kw_args.equal_range(key); | |
keyword_options_type::difference_type d = std::distance(v.first, v.second); | |
if (1 < d) // Too many options. | |
throw only_one_option_allowed(key, v.first, v.second); | |
else if (0 == d) // No option. | |
throw required_option_missing(key); | |
return (*v.first).second; | |
} | |
// Return the value for option `key`, or `dflt` if `key` has no value. | |
// | |
// Throws: `only_one_option_allowed` if there is more than one value for `key`. | |
std::string operator()(std::string const& key, std::string const& dflt) const | |
{ | |
keyword_option_values v = kw_args.equal_range(key); | |
keyword_options_type::difference_type d = std::distance(v.first, v.second); | |
if (1 < d) // Too many options. | |
throw only_one_option_allowed(key, v.first, v.second); | |
if (0 == d) // No option. | |
return dflt; | |
else // 1 option. | |
return (*v.first).second; | |
} | |
// Returns `true` if the option `key` was specified at least once. | |
bool has(std::string const& key) const | |
{ | |
return kw_args.count(key) > 0; | |
} | |
private: | |
positional_options_type pos_args; | |
keyword_options_type kw_args; | |
}; | |
/////////////////////////////////////////////////////////////////////////////// | |
int main(int argc, char** argv) | |
{ | |
command_line_processor clp(argc, argv); | |
tbb::task_scheduler_init init; | |
test_tbb(); | |
// Set the CUDA device to use for the benchmark - `0` by default. | |
int device = std::atoi(clp("device", "0").c_str()); | |
// `std::atoi` returns 0 if the conversion fails. | |
cudaSetDevice(device); | |
if (!clp.has("no-header")) | |
print_experiment_header(); | |
/* Elements | Trials */ | |
/* | Baseline | Regular */ | |
//run_core_primitives_experiments< 1LLU << 21LLU , 4 , 16 >(); | |
//run_core_primitives_experiments< 1LLU << 22LLU , 4 , 16 >(); | |
//run_core_primitives_experiments< 1LLU << 23LLU , 4 , 16 >(); | |
//run_core_primitives_experiments< 1LLU << 24LLU , 4 , 16 >(); | |
//run_core_primitives_experiments< 1LLU << 25LLU , 4 , 16 >(); | |
run_core_primitives_experiments< 1LLU << 26LLU , 4 , 16 >(); | |
run_core_primitives_experiments< 1LLU << 27LLU , 4 , 16 >(); | |
//run_core_primitives_experiments< 1LLU << 28LLU , 4 , 16 >(); | |
//run_core_primitives_experiments< 1LLU << 29LLU , 4 , 16 >(); | |
return 0; | |
} | |
// TODO: Add different input sizes and half precision | |