LIVE / thrust /cub /iterator /tex_obj_input_iterator.cuh
Xu Ma
update
1c3c0d9
raw
history blame
11 kB
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* Random-access iterator types
*/
#pragma once
#include <iterator>
#include <iostream>
#include "../thread/thread_load.cuh"
#include "../thread/thread_store.cuh"
#include "../util_device.cuh"
#include "../util_debug.cuh"
#include "../config.cuh"
#if (THRUST_VERSION >= 100700)
// This iterator is compatible with Thrust API 1.7 and newer
#include <thrust/iterator/iterator_facade.h>
#include <thrust/iterator/iterator_traits.h>
#endif // THRUST_VERSION
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/**
* \addtogroup UtilIterator
* @{
*/
/**
* \brief A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects.
*
* \par Overview
* - TexObjInputIterator wraps a native device pointer of type <tt>ValueType*</tt>. References
* to elements are to be loaded through texture cache.
* - Can be used to load any data type from memory through texture cache.
* - Can be manipulated and exchanged within and between host and device
* functions, can only be constructed within host functions, and can only be
* dereferenced within device functions.
* - With regard to nested/dynamic parallelism, TexObjInputIterator iterators may only be
* created by the host thread, but can be used by any descendant kernel.
* - Compatible with Thrust API v1.7 or newer.
*
* \par Snippet
* The code snippet below illustrates the use of \p TexRefInputIterator to
* dereference a device array of doubles through texture cache.
* \par
* \code
* #include <cub/cub.cuh> // or equivalently <cub/iterator/tex_obj_input_iterator.cuh>
*
* // Declare, allocate, and initialize a device array
* int num_items; // e.g., 7
* double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0]
*
* // Create an iterator wrapper
* cub::TexObjInputIterator<double> itr;
* itr.BindTexture(d_in, sizeof(double) * num_items);
* ...
*
* // Within device code:
* printf("%f\n", itr[0]); // 8.0
* printf("%f\n", itr[1]); // 6.0
* printf("%f\n", itr[6]); // 9.0
*
* ...
* itr.UnbindTexture();
*
* \endcode
*
* \tparam T The value type of this iterator
* \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t)
*/
template <
typename T,
typename OffsetT = ptrdiff_t>
class TexObjInputIterator
{
public:
// Required iterator traits
typedef TexObjInputIterator self_type; ///< My own type
typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another
typedef T value_type; ///< The type of the element the iterator can point to
typedef T* pointer; ///< The type of a pointer to an element the iterator can point to
typedef T reference; ///< The type of a reference to an element the iterator can point to
#if (THRUST_VERSION >= 100700)
// Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
typedef typename thrust::detail::iterator_facade_category<
thrust::device_system_tag,
thrust::random_access_traversal_tag,
value_type,
reference
>::type iterator_category; ///< The iterator category
#else
typedef std::random_access_iterator_tag iterator_category; ///< The iterator category
#endif // THRUST_VERSION
private:
// Largest texture word we can use in device
typedef typename UnitWord<T>::TextureWord TextureWord;
// Number of texture words per T
enum {
TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
};
private:
T* ptr;
difference_type tex_offset;
cudaTextureObject_t tex_obj;
public:
/// Constructor
__host__ __device__ __forceinline__ TexObjInputIterator()
:
ptr(NULL),
tex_offset(0),
tex_obj(0)
{}
/// Use this iterator to bind \p ptr with a texture reference
template <typename QualifiedT>
cudaError_t BindTexture(
QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
size_t bytes = size_t(-1), ///< Number of bytes in the range
size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator
{
this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
this->tex_offset = tex_offset;
cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<TextureWord>();
cudaResourceDesc res_desc;
cudaTextureDesc tex_desc;
memset(&res_desc, 0, sizeof(cudaResourceDesc));
memset(&tex_desc, 0, sizeof(cudaTextureDesc));
res_desc.resType = cudaResourceTypeLinear;
res_desc.res.linear.devPtr = this->ptr;
res_desc.res.linear.desc = channel_desc;
res_desc.res.linear.sizeInBytes = bytes;
tex_desc.readMode = cudaReadModeElementType;
return CubDebug(cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL));
}
/// Unbind this iterator from its texture reference
cudaError_t UnbindTexture()
{
return CubDebug(cudaDestroyTextureObject(tex_obj));
}
/// Postfix increment
__host__ __device__ __forceinline__ self_type operator++(int)
{
self_type retval = *this;
tex_offset++;
return retval;
}
/// Prefix increment
__host__ __device__ __forceinline__ self_type operator++()
{
tex_offset++;
return *this;
}
/// Indirection
__host__ __device__ __forceinline__ reference operator*() const
{
if (CUB_IS_HOST_CODE) {
#if CUB_INCLUDE_HOST_CODE
// Simply dereference the pointer on the host
return ptr[tex_offset];
#endif
} else {
#if CUB_INCLUDE_DEVICE_CODE
// Move array of uninitialized words, then alias and assign to return value
TextureWord words[TEXTURE_MULTIPLE];
#pragma unroll
for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
{
words[i] = tex1Dfetch<TextureWord>(
tex_obj,
(tex_offset * TEXTURE_MULTIPLE) + i);
}
// Load from words
return *reinterpret_cast<T*>(words);
#else
// This is dead code which will never be executed. It is here
// only to avoid warnings about missing return statements.
return ptr[tex_offset];
#endif
}
}
/// Addition
template <typename Distance>
__host__ __device__ __forceinline__ self_type operator+(Distance n) const
{
self_type retval;
retval.ptr = ptr;
retval.tex_obj = tex_obj;
retval.tex_offset = tex_offset + n;
return retval;
}
/// Addition assignment
template <typename Distance>
__host__ __device__ __forceinline__ self_type& operator+=(Distance n)
{
tex_offset += n;
return *this;
}
/// Subtraction
template <typename Distance>
__host__ __device__ __forceinline__ self_type operator-(Distance n) const
{
self_type retval;
retval.ptr = ptr;
retval.tex_obj = tex_obj;
retval.tex_offset = tex_offset - n;
return retval;
}
/// Subtraction assignment
template <typename Distance>
__host__ __device__ __forceinline__ self_type& operator-=(Distance n)
{
tex_offset -= n;
return *this;
}
/// Distance
__host__ __device__ __forceinline__ difference_type operator-(self_type other) const
{
return tex_offset - other.tex_offset;
}
/// Array subscript
template <typename Distance>
__host__ __device__ __forceinline__ reference operator[](Distance n) const
{
self_type offset = (*this) + n;
return *offset;
}
/// Structure dereference
__host__ __device__ __forceinline__ pointer operator->()
{
return &(*(*this));
}
/// Equal to
__host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
{
return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj));
}
/// Not equal to
__host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
{
return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj));
}
/// ostream operator
friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
{
return os;
}
};
/** @} */ // end group UtilIterator
} // CUB namespace
CUB_NS_POSTFIX // Optional outer namespace(s)