创建CUDA代码
为了标示清楚,我们首先创建了一个名为CUDA的筛选器,然后在该筛选器中分别创建名为first.cu和first_kernel.cu的两个源代码文件。然后其中添加代码如下:
#include "stdio.h"
#include "cutil.h"
#include "first_kernel.cu"
extern "C" void runtest(float *source,int datalen,float *result)
{
int count;
cudaGetDeviceCount(&count);
if(count == 0) {
fprintf(stderr, "There is no device.\n");
*result=-1;
}
int i;
for(i = 0; i < count; i++) {
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {
if(prop.major >= 1) {
break;
}
}
}
if(i == count) {
fprintf(stderr, "There is no device supporting CUDA 1.x.\n");
*result=-1;
}
float * d_source,*d_result;
cudaMalloc((void**)&d_source,datalen*sizeof(float));
cudaMalloc((void**)&d_result,sizeof(float));
cudaMemcpy(d_source,source,datalen*sizeof(float),cudaMemcpyHostToDevice);
kernel<<<1,256,0>>>(d_source,datalen,d_result);
cudaMemcpy(result,d_result,sizeof(float),cudaMemcpyDeviceToHost);
cudaFree(d_source);
cudaFree(d_result);
}
#include "cutil.h"
#include "first_kernel.cu"
extern "C" void runtest(float *source,int datalen,float *result)
{
int count;
cudaGetDeviceCount(&count);
if(count == 0) {
fprintf(stderr, "There is no device.\n");
*result=-1;
}
int i;
for(i = 0; i < count; i++) {
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {
if(prop.major >= 1) {
break;
}
}
}
if(i == count) {
fprintf(stderr, "There is no device supporting CUDA 1.x.\n");
*result=-1;
}
float * d_source,*d_result;
cudaMalloc((void**)&d_source,datalen*sizeof(float));
cudaMalloc((void**)&d_result,sizeof(float));
cudaMemcpy(d_source,source,datalen*sizeof(float),cudaMemcpyHostToDevice);
kernel<<<1,256,0>>>(d_source,datalen,d_result);
cudaMemcpy(result,d_result,sizeof(float),cudaMemcpyDeviceToHost);
cudaFree(d_source);
cudaFree(d_result);
}
以及在first_kernel.cu中添加的内核函数(即并行部分):
#ifndef _FIRST_KERNEL_H_
#define _FIRST_KERNEL_H_
__global__ void kernel(float *source,int len,float *result)
{
int i;
float sum;
sum=0;
for(i=0;i<len;i++)
sum+=*(source+i);
*result=sum;
}
#endif
#define _FIRST_KERNEL_H_
__global__ void kernel(float *source,int len,float *result)
{
int i;
float sum;
sum=0;
for(i=0;i<len;i++)
sum+=*(source+i);
*result=sum;
}
#endif