CUDA纹理存储器的特性及其使用-技术开发专区

CUDA纹理存储器的特性及其使用

作者：张舒的博客编辑：覃里 2009-11-27 14:27 来源：IT168�

　　3.2.4.3 纹理绑定

　　在kernel能用纹理参照系从纹理内存中读数据前，纹理参照系必须通过cudaBindTexture()或cudaBindTextureToArray()绑定到纹理上。cudaUnbindTexture()用于解除纹理参照系的绑定。

　　以下代码示例绑定一个纹理参照系到devPtr指向的线性内存：

使用低级API：

texture<float, 2, cudaReadModeElementType> texRef;

textureReference* texRefPtr;

cudaGetTextureReference(&texRefPtr, “texRef”);

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();

cudaBindTexture2D(0, texRefPtr, devPtr, &channelDesc, width, height, pitch);

使用高级API

texture<float, 2, cudaReadModeElementType> texRef;

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();

cudaBindTexture2D(0, texRef, devPtr, &channelDesc, width, height, pitch);

　　以下代码示例绑定纹理参照系到一个CUDA数组cuArray：

使用低级API：

texture<float, 2, cudaReadModeElementType> texRef;

textureReference* texRefPtr;

cudaGetTextureReference(&texRefPtr, “texRef”);

cudaChannelFormatDesc channelDesc;

cudaGetChannelDesc(&channelDesc, cuArray);

cudaBindTextureToArray(texRef, cuArray, &channelDesc);

使用高级API

texture<float, 2, cudaReadModeElementType> texRef;

cudaBindTextureToArray(texRef, cuArray);

　　当绑定一个纹理到纹理参照系时，格式必须与声明纹理参照系时的参数匹配;否则，纹理获取的结果是undefined的。

　　3.2.4.4 纹理拾取

　　纹理拾取函数采用纹理坐标对纹理存储器进行访问。

　　对与线性内存绑定的纹理，使用texfetch1D函数访问，采用的纹理坐标是整型。由cudaMallocPitch或者cudaMalloc3D分配的线性空间实际上仍然是经过填充、对齐的一维线性空间，因此也用texfetch1D()函数访问。

　　对与一维、二维和三维CUDA数组绑定的问哪里，分别使用tex1D()、tex2D()和tex3D()函数访问，并且使用浮点型纹理坐标。

　　关于纹理拾取函数的更多讨论，请见本书附录D.8

　　3.2.4.5 例子分析：Simple texture

// 2D float texture

texture<float, 2, cudaReadModeElementType> texRef;

// Simple transformation kernel

__global__ void transformKernel(float* output,

int width, int height, float theta)

{

     // 根据tid bid计算归一化的拾取坐标

     unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;

     unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

     float u = x / (float)width;

     float v = y / (float)height;

     // 旋转拾取坐标

     u -= 0.5f;

     v -= 0.5f;

     float tu = u * cosf(theta) –v * sinf(theta) + 0.5f;

     float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;

     //从纹理存储器中拾取数据，并写入显存

     output[y * width + x] = tex2D(tex, tu, tv);

}

// Host code

int main()

{

     // 分配CUDA数组

     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0,cudaChannelFormatKindFloat);

     cudaArray* cuArray;

     cudaMallocArray(&cuArray, &channelDesc, width, height);

     // Copy to device memory some data located at address h_data

     // in host memory

     cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);

     // Set texture parameters

     texRef.addressMode[0] = cudaAddressModeWrap; //循环寻址方式

     texRef.addressMode[1] = cudaAddressModeWrap;

     texRef.filterMode = cudaFilterModeLinear;   //线性滤波，因为这里是一个图像。如果要保持原来的值则千万不要用线性滤波

     texRef.normalized = true; //归一化坐标

     // Bind the array to the texture

     cudaBindTextureToArray(texRef, cuArray, channelDesc);

     // Allocate result of transformation in device memory

     float* output;

     cudaMalloc((void**)&output, width * height * sizeof(float));

     // Invoke kernel

     dim3 dimBlock(16, 16);

     dim3 dimGrid((width + dimBlock.x –1) / dimBlock.x,(height + dimBlock.y –1) / dimBlock.y);

     transformKernel<<<dimGrid, dimBlock>>>(output, width, height,angle);

     // Free device memory

     cudaFreeArray(cuArray);

     cudaFree(output);

}

第1页：纹理存储器第2页：CUDA数组第3页：纹理绑定 & 纹理拾取 & 例子分析

关注我们