对于Ubuntu或其近亲(Lubuntu、Kubuntu、Mint等)编写OpenCL程序也不会太难。由于本例用的是AMD APP
SDK,因此需要AMD的GPU以及相关驱动。首先,去AMD官网下载GPU驱动——AMD
Catalyst。如果你用的是APU并且还有一块独立显卡的话,通过AMD Catalyst Control
Center可以选择使用哪个GPU。像我现在用的联想Z475笔记本,搭载了AMD APU A6-3420M以及一块AMD Radeon HD
7400M,但是相比较而言,还是APU自带的6620G的GPU性能更强一些,因此我这边设置的是采用AMD Radeon HD 6620G。
在Linux下,AMD官方的GPU驱动是.run文件,只需使用sudo sh xxx.run即可安装。安装时采用默认安装即可。
然后去developer.amd.com开发者网站下载AMD APP
SDK。下载完成之后,将lib里面的动态库文件(xxx.so)取出来,并且把include里的头文件取出来。在你的OpenCL工程中把头文件的路
径以及动态加载库都设置好。在你用-l的时候,如果动态库文件后缀名为.so.1,那么得把文件名后缀.1去掉。因为-l只能连接.a、.lib、.so
等这些后缀的库文件。
下面我们将举一个最简单的例子,首先看主机端代码:
/*
============================================================================
Name : OpenCLTest.c
Author : Zenny Chen
Version :
Copyright : Your copyright notice
Description : Hello World in C, Ansi-style
============================================================================
*/
#include
#include
#include
#include
#include
static int GetCurrentLocationFilePath(char pDst[512], const char *filename)
{
if(pDst == NULL || filename == NULL)
return 0;
int size = readlink("/proc/self/exe", pDst, 512);
while(pDst[size - 1] != '/')
size--;
strcpy(&pDst[size], filename);
int retSize = strlen(filename) + size;
pDst[retSize] = '\0';
return retSize;
}
int main(void)
{
/*Step1: Getting platforms and choose an available one.*/
cl_uint numPlatforms; //the NO. of platforms
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS)
{
puts("Error: Getting platforms!");
return 0;
}
cl_platform_id platforms[16];
/*For clarity, choose the first available platform. */
if(numPlatforms > 0)
{
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(status != CL_SUCCESS)
{
puts("Failed to get platform IDs");
return 0;
}
}
/*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/
cl_uint numDevices = 0;
cl_device_id devices[16];
clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if(numDevices == 0) //no GPU available.
{
puts("No devices available!");
return 0;
}
else
{
printf("The number of available devices is: %u\n", numDevices);
clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
}
/*Step 3: Create context.*/
cl_context context = clCreateContext(NULL,1, devices,NULL,NULL,NULL);
/*Step 4: Creating command queue associate with the context.*/
cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
/*Step 5: Create program object */
char filePath[512];
GetCurrentLocationFilePath(filePath, "test.cl");
FILE *fp = fopen(filePath, "r");
if(fp == NULL)
{
puts("OpenCL kernel source file open failed!");
return 0;
}
fseek(fp, 0, SEEK_END);
long fileLength = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *source = (char*)malloc(fileLength + 1);
fread(source, 1, fileLength, fp);
fclose(fp);
size_t sourceSize[] = {fileLength};
cl_program program = clCreateProgramWithSource(context, 1, (const char**)&source, sourceSize, NULL);
free(source);
if(program == NULL)
{
puts("Failed to create the program!");
return 0;
}
/*Step 6: Build program. */
status = clBuildProgram(program, 1,devices,NULL,NULL,NULL);
if(status != CL_SUCCESS)
{
puts("Failed to build the program!");
return 0;
}
/*Step 7: Initial input,output for the host and create memory objects for the kernel*/
int input[128];
for(int i = 0; i < 128; i++)
input[i] = i + 1;
cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(input), input, NULL);
cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY , sizeof(input), NULL, NULL);
/*Step 8: Create kernel object */
cl_kernel kernel = clCreateKernel(program, "test", NULL);
/*Step 9: Sets Kernel arguments.*/
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
/*Step 10: Running the kernel.*/
size_t global_work_size[1] = { 128 };
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
/*Step 11: Read the cout put back to host memory.*/
int output[128];
status = clEnqueueReadBuffer(commandQueue, outputBuffer, CL_TRUE, 0, sizeof(input), output, 0, NULL, NULL);
/*Step 12: Clean the resources.*/
status = clReleaseKernel(kernel); //Release kernel.
status = clReleaseProgram(program); //Release the program object.
status = clReleaseMemObject(inputBuffer); //Release mem object.
status = clReleaseMemObject(outputBuffer);
status = clReleaseCommandQueue(commandQueue); //Release Command queue.
status = clReleaseContext(context); //Release context.
for(int i = 0; i < 128; i++)
{
if(output[i] != i + 2)
{
printf("Error occurred @%d!", i);
return 0;
}
}
puts("Pass!");
return 1;
}