3.2.4.3 纹理绑定
在kernel能用纹理参照系从纹理内存中读数据前,纹理参照系必须通过cudaBindTexture()或cudaBindTextureToArray()绑定到纹理上。cudaUnbindTexture()用于解除纹理参照系的绑定。
以下代码示例绑定一个纹理参照系到devPtr指向的线性内存:
- 使用低级API:
texture<float, 2, cudaReadModeElementType> texRef;
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, “texRef”);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, texRefPtr, devPtr, &channelDesc, width, height, pitch);
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, “texRef”);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, texRefPtr, devPtr, &channelDesc, width, height, pitch);
- 使用高级API
texture<float, 2, cudaReadModeElementType> texRef;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, texRef, devPtr, &channelDesc, width, height, pitch);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, texRef, devPtr, &channelDesc, width, height, pitch);
以下代码示例绑定纹理参照系到一个CUDA数组cuArray:
- 使用低级API:
texture<float, 2, cudaReadModeElementType> texRef;
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, “texRef”);
cudaChannelFormatDesc channelDesc;
cudaGetChannelDesc(&channelDesc, cuArray);
cudaBindTextureToArray(texRef, cuArray, &channelDesc);
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, “texRef”);
cudaChannelFormatDesc channelDesc;
cudaGetChannelDesc(&channelDesc, cuArray);
cudaBindTextureToArray(texRef, cuArray, &channelDesc);
- 使用高级API
texture<float, 2, cudaReadModeElementType> texRef;
cudaBindTextureToArray(texRef, cuArray);
cudaBindTextureToArray(texRef, cuArray);
当绑定一个纹理到纹理参照系时,格式必须与声明纹理参照系时的参数匹配;否则,纹理获取的结果是undefined的。
3.2.4.4 纹理拾取
纹理拾取函数采用纹理坐标对纹理存储器进行访问。
对与线性内存绑定的纹理,使用texfetch1D函数访问,采用的纹理坐标是整型。由cudaMallocPitch或者cudaMalloc3D分配的线性空间实际上仍然是经过填充、对齐的一维线性空间,因此也用texfetch1D()函数访问。
对与一维、二维和三维CUDA数组绑定的问哪里,分别使用tex1D()、tex2D()和tex3D()函数访问,并且使用浮点型纹理坐标。
关于纹理拾取函数的更多讨论,请见本书附录D.8
3.2.4.5 例子分析:Simple texture
// 2D float texture
texture<float, 2, cudaReadModeElementType> texRef;
// Simple transformation kernel
__global__ void transformKernel(float* output,
int width, int height, float theta)
{
// 根据tid bid计算归一化的拾取坐标
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
// 旋转拾取坐标
u -= 0.5f;
v -= 0.5f;
float tu = u * cosf(theta) –v * sinf(theta) + 0.5f;
float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;
//从纹理存储器中拾取数据,并写入显存
output[y * width + x] = tex2D(tex, tu, tv);
}
// Host code
int main()
{
// 分配CUDA数组
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0,cudaChannelFormatKindFloat);
cudaArray* cuArray;
cudaMallocArray(&cuArray, &channelDesc, width, height);
// Copy to device memory some data located at address h_data
// in host memory
cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);
// Set texture parameters
texRef.addressMode[0] = cudaAddressModeWrap; //循环寻址方式
texRef.addressMode[1] = cudaAddressModeWrap;
texRef.filterMode = cudaFilterModeLinear; //线性滤波,因为这里是一个图像。如果要保持原来的值则千万不要用线性滤波
texRef.normalized = true; //归一化坐标
// Bind the array to the texture
cudaBindTextureToArray(texRef, cuArray, channelDesc);
// Allocate result of transformation in device memory
float* output;
cudaMalloc((void**)&output, width * height * sizeof(float));
// Invoke kernel
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x –1) / dimBlock.x,(height + dimBlock.y –1) / dimBlock.y);
transformKernel<<<dimGrid, dimBlock>>>(output, width, height,angle);
// Free device memory
cudaFreeArray(cuArray);
cudaFree(output);
}
texture<float, 2, cudaReadModeElementType> texRef;
// Simple transformation kernel
__global__ void transformKernel(float* output,
int width, int height, float theta)
{
// 根据tid bid计算归一化的拾取坐标
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
float u = x / (float)width;
float v = y / (float)height;
// 旋转拾取坐标
u -= 0.5f;
v -= 0.5f;
float tu = u * cosf(theta) –v * sinf(theta) + 0.5f;
float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;
//从纹理存储器中拾取数据,并写入显存
output[y * width + x] = tex2D(tex, tu, tv);
}
// Host code
int main()
{
// 分配CUDA数组
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0,cudaChannelFormatKindFloat);
cudaArray* cuArray;
cudaMallocArray(&cuArray, &channelDesc, width, height);
// Copy to device memory some data located at address h_data
// in host memory
cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);
// Set texture parameters
texRef.addressMode[0] = cudaAddressModeWrap; //循环寻址方式
texRef.addressMode[1] = cudaAddressModeWrap;
texRef.filterMode = cudaFilterModeLinear; //线性滤波,因为这里是一个图像。如果要保持原来的值则千万不要用线性滤波
texRef.normalized = true; //归一化坐标
// Bind the array to the texture
cudaBindTextureToArray(texRef, cuArray, channelDesc);
// Allocate result of transformation in device memory
float* output;
cudaMalloc((void**)&output, width * height * sizeof(float));
// Invoke kernel
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x –1) / dimBlock.x,(height + dimBlock.y –1) / dimBlock.y);
transformKernel<<<dimGrid, dimBlock>>>(output, width, height,angle);
// Free device memory
cudaFreeArray(cuArray);
cudaFree(output);
}