LIVE / thrust /cub /iterator /tex_ref_input_iterator.cuh
Xu Ma
update
1c3c0d9
raw
history blame
12.7 kB
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/
/**
* \file
* Random-access iterator types
*/
#pragma once
#include <iterator>
#include <iostream>
#include "../thread/thread_load.cuh"
#include "../thread/thread_store.cuh"
#include "../util_device.cuh"
#include "../util_debug.cuh"
#include "../config.cuh"
#if (CUDART_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer
#if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer
#include <thrust/iterator/iterator_facade.h>
#include <thrust/iterator/iterator_traits.h>
#endif // THRUST_VERSION
/// Optional outer namespace(s)
CUB_NS_PREFIX
/// CUB namespace
namespace cub {
/******************************************************************************
* Static file-scope Tesla/Fermi-style texture references
*****************************************************************************/
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
// Anonymous namespace
namespace {
/// Global texture reference specialized by type
template <typename T>
struct IteratorTexRef
{
/// And by unique ID
template <int UNIQUE_ID>
struct TexId
{
// Largest texture word we can use in device
typedef typename UnitWord<T>::DeviceWord DeviceWord;
typedef typename UnitWord<T>::TextureWord TextureWord;
// Number of texture words per T
enum {
DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord),
TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
};
// Texture reference type
typedef texture<TextureWord> TexRef;
// Texture reference
static TexRef ref;
/// Bind texture
static cudaError_t BindTexture(void *d_in, size_t &offset)
{
if (d_in)
{
cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc<TextureWord>();
ref.channelDesc = tex_desc;
return (CubDebug(cudaBindTexture(&offset, ref, d_in)));
}
return cudaSuccess;
}
/// Unbind texture
static cudaError_t UnbindTexture()
{
return CubDebug(cudaUnbindTexture(ref));
}
/// Fetch element
template <typename Distance>
static __device__ __forceinline__ T Fetch(Distance tex_offset)
{
DeviceWord temp[DEVICE_MULTIPLE];
TextureWord *words = reinterpret_cast<TextureWord*>(temp);
#pragma unroll
for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
{
words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i);
}
return reinterpret_cast<T&>(temp);
}
};
};
// Texture reference definitions
template <typename T>
template <int UNIQUE_ID>
typename IteratorTexRef<T>::template TexId<UNIQUE_ID>::TexRef IteratorTexRef<T>::template TexId<UNIQUE_ID>::ref = 0;
} // Anonymous namespace
#endif // DOXYGEN_SHOULD_SKIP_THIS
/**
* \addtogroup UtilIterator
* @{
*/
/**
* \brief A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references.
*
* \par Overview
* - TexRefInputIterator wraps a native device pointer of type <tt>ValueType*</tt>. References
* to elements are to be loaded through texture cache.
* - Can be used to load any data type from memory through texture cache.
* - Can be manipulated and exchanged within and between host and device
* functions, can only be constructed within host functions, and can only be
* dereferenced within device functions.
* - The \p UNIQUE_ID template parameter is used to statically name the underlying texture
* reference. Only one TexRefInputIterator instance can be bound at any given time for a
* specific combination of (1) data type \p T, (2) \p UNIQUE_ID, (3) host
* thread, and (4) compilation .o unit.
* - With regard to nested/dynamic parallelism, TexRefInputIterator iterators may only be
* created by the host thread and used by a top-level kernel (i.e. the one which is launched
* from the host).
* - Compatible with Thrust API v1.7 or newer.
* - Compatible with CUDA toolkit v5.5 or newer.
*
* \par Snippet
* The code snippet below illustrates the use of \p TexRefInputIterator to
* dereference a device array of doubles through texture cache.
* \par
* \code
* #include <cub/cub.cuh> // or equivalently <cub/iterator/tex_ref_input_iterator.cuh>
*
* // Declare, allocate, and initialize a device array
* int num_items; // e.g., 7
* double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0]
*
* // Create an iterator wrapper
* cub::TexRefInputIterator<double, __LINE__> itr;
* itr.BindTexture(d_in, sizeof(double) * num_items);
* ...
*
* // Within device code:
* printf("%f\n", itr[0]); // 8.0
* printf("%f\n", itr[1]); // 6.0
* printf("%f\n", itr[6]); // 9.0
*
* ...
* itr.UnbindTexture();
*
* \endcode
*
* \tparam T The value type of this iterator
* \tparam UNIQUE_ID A globally-unique identifier (within the compilation unit) to name the underlying texture reference
* \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t)
*/
template <
typename T,
int UNIQUE_ID,
typename OffsetT = ptrdiff_t>
class TexRefInputIterator
{
public:
// Required iterator traits
typedef TexRefInputIterator self_type; ///< My own type
typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another
typedef T value_type; ///< The type of the element the iterator can point to
typedef T* pointer; ///< The type of a pointer to an element the iterator can point to
typedef T reference; ///< The type of a reference to an element the iterator can point to
#if (THRUST_VERSION >= 100700)
// Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
typedef typename thrust::detail::iterator_facade_category<
thrust::device_system_tag,
thrust::random_access_traversal_tag,
value_type,
reference
>::type iterator_category; ///< The iterator category
#else
typedef std::random_access_iterator_tag iterator_category; ///< The iterator category
#endif // THRUST_VERSION
private:
T* ptr;
difference_type tex_offset;
// Texture reference wrapper (old Tesla/Fermi-style textures)
typedef typename IteratorTexRef<T>::template TexId<UNIQUE_ID> TexId;
public:
/*
/// Constructor
__host__ __device__ __forceinline__ TexRefInputIterator()
:
ptr(NULL),
tex_offset(0)
{}
*/
/// Use this iterator to bind \p ptr with a texture reference
template <typename QualifiedT>
cudaError_t BindTexture(
QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment
size_t /*bytes*/ = size_t(-1), ///< Number of bytes in the range
size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator
{
this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
size_t offset;
cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset);
this->tex_offset = (difference_type) (offset / sizeof(QualifiedT));
return retval;
}
/// Unbind this iterator from its texture reference
cudaError_t UnbindTexture()
{
return TexId::UnbindTexture();
}
/// Postfix increment
__host__ __device__ __forceinline__ self_type operator++(int)
{
self_type retval = *this;
tex_offset++;
return retval;
}
/// Prefix increment
__host__ __device__ __forceinline__ self_type operator++()
{
tex_offset++;
return *this;
}
/// Indirection
__host__ __device__ __forceinline__ reference operator*() const
{
if (CUB_IS_HOST_CODE) {
// Simply dereference the pointer on the host
return ptr[tex_offset];
} else {
#if CUB_INCLUDE_DEVICE_CODE
// Use the texture reference
return TexId::Fetch(tex_offset);
#else
// This is dead code that will never be executed. It is here
// only to avoid warnings about missing returns.
return ptr[tex_offset];
#endif
}
}
/// Addition
template <typename Distance>
__host__ __device__ __forceinline__ self_type operator+(Distance n) const
{
self_type retval;
retval.ptr = ptr;
retval.tex_offset = tex_offset + n;
return retval;
}
/// Addition assignment
template <typename Distance>
__host__ __device__ __forceinline__ self_type& operator+=(Distance n)
{
tex_offset += n;
return *this;
}
/// Subtraction
template <typename Distance>
__host__ __device__ __forceinline__ self_type operator-(Distance n) const
{
self_type retval;
retval.ptr = ptr;
retval.tex_offset = tex_offset - n;
return retval;
}
/// Subtraction assignment
template <typename Distance>
__host__ __device__ __forceinline__ self_type& operator-=(Distance n)
{
tex_offset -= n;
return *this;
}
/// Distance
__host__ __device__ __forceinline__ difference_type operator-(self_type other) const
{
return tex_offset - other.tex_offset;
}
/// Array subscript
template <typename Distance>
__host__ __device__ __forceinline__ reference operator[](Distance n) const
{
self_type offset = (*this) + n;
return *offset;
}
/// Structure dereference
__host__ __device__ __forceinline__ pointer operator->()
{
return &(*(*this));
}
/// Equal to
__host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
{
return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset));
}
/// Not equal to
__host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
{
return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset));
}
/// ostream operator
friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/)
{
return os;
}
};
/** @} */ // end group UtilIterator
} // CUB namespace
CUB_NS_POSTFIX // Optional outer namespace(s)
#endif // CUDART_VERSION