I have modified source from example code to do very simple program but repeatedly running into problem. I can start from original working source but I would rather debug it to see what is wrong.
clCreatekernel
is returning -46
which means invalid kernel name. But I double checked everything and even did bcompare original vs. modified one (below).
I can not find problem with the kernel name at all. Perhaps more experience opencl dev-s can look and spot something??
source and output below:
PS. Do not worry about file name referring to CUDA, it has nothing to do with CUDA, at least now so ignore the name:
=~=~=~=~=~=~=~=~=~=~=~= MobaXterm log 2020.06.23 10:29:28 =~=~=~=~=~=~=~=~=~=~=~=
ls
Makefile ex-code-1 ex-code-1.c ex-code-1.o p25-cuda p25-cuda.c p25-cuda.o
root@sriov-guest:/git.co/dev-learn/rocm/opencl/opencl-programming-guide/cuda-conversion# cat p25-cuda.c
//
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
//
// A minimalist OpenCL program.
#include <CL/cl.h>
#include <stdio.h>
#define printDeviceInfo(X) printf("\n%s: %s", (X));
#define declareDeviceInfo(X) char str(X)[] = "(X)";
#define NWITEMS 512
// A simple simple_add kernel
const char *source =
"kernel void simple_add( global uint *c, global uint a, global uint b) \n"
"{ \n"
" *c = a + b; \n"
"} \n";
int main(int argc, char ** argv) {
int c;
int * dev_c;
int stat;
char str1[100];
size_t strLen;
int i;
// 1. Get a platform.
cl_platform_id platform;
clGetPlatformIDs( 1, &platform, NULL );
// 2. Find a gpu device.
cl_device_id device;
cl_device_info deviceInfos[]={CL_DEVICE_NAME, CL_DEVICE_VENDOR, CL_DEVICE_VERSION, CL_DRIVER_VERSION, CL_DEVICE_EXTENSIONS};
stat = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
for (int i = 0 ; i < sizeof(deviceInfos)/sizeof(cl_device_info); i ++ ) {
clGetDeviceInfo(device, deviceInfos[i], sizeof(str1), str1, &strLen);
if (stat == 0) {
printf("\n%s.", str1);
} else {
printf("\nclGetDevicesIDs FAIL.");
return 1;
}
}
// 3. Create a context and command queue on that device.
cl_context context = clCreateContext( NULL, 1, &device, NULL, NULL, NULL);
cl_command_queue queue = clCreateCommandQueue( context, device, 0, NULL );
// 4. Perform runtime source compilation, and obtain kernel entry point.
cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, NULL );
clBuildProgram( program, 1, &device, NULL, NULL, NULL );
cl_kernel kernel = clCreateKernel( program, "simple_add", NULL );
// 5. Create a data buffer.
cl_mem buffer = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(cl_uint), NULL, NULL );
// 6. Launch the kernel. Let OpenCL pick the local work size.
size_t global_work_size = NWITEMS;
clSetKernelArg(kernel, 0, sizeof(buffer), (void*) &buffer);
clSetKernelArg(kernel, 0, sizeof(buffer), (void*)2);
clSetKernelArg(kernel, 0, sizeof(buffer), (void*)7);
clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);
clFinish( queue );
// 7. Look at the results via synchronous buffer map.
cl_uint *ptr;
ptr = (cl_uint *) clEnqueueMapBuffer( queue, buffer, CL_TRUE, CL_MAP_READ, 0, NWITEMS * sizeof(cl_uint), 0, NULL, NULL, NULL );
/*
for(i=0; i < NWITEMS; i++) {
if (i % 16 == 0)
printf("\n");
printf("%03d: %04d. ", i, ptr[i]);
}
*/
return 0;
}
root@sriov-guest:/git.co/dev-learn/rocm/opencl/opencl-programming-guide/cuda-conversion#
=~=~=~=~=~=~=~=~=~=~=~= MobaXterm log 2020.06.26 01:08:33 =~=~=~=~=~=~=~=~=~=~=~=
rm p25-cuda ; make p25-cuda ; ./p25-cuda nano -w p25-cuda.c rm p25-cuda ; make p25-cuda ; ./p25-cuda
g++ -o p25-cuda.o -c p25-cuda.c -I/opt/rocm/opencl//include
In file included from /opt/rocm/opencl//include/CL/cl.h:32:0,
from p25-cuda.c:7:
/opt/rocm/opencl//include/CL/cl_version.h:34:104: note: #pragma message: cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
^
p25-cuda.c: In function 'int main(int, char**)':
p25-cuda.c:66:77: warning: '_cl_command_queue* clCreateCommandQueue(cl_context, cl_device_id, cl_command_queue_properties, cl_int*)' is deprecated [-Wdeprecated-declarations]
cl_command_queue queue = clCreateCommandQueue( context, device, 0, &ret );
^
In file included from p25-cuda.c:7:0:
/opt/rocm/opencl//include/CL/cl.h:1813:1: note: declared here
clCreateCommandQueue(cl_context context,
^~~~~~~~~~~~~~~~~~~~
g++ -o p25-cuda p25-cuda.o -lOpenCL -L/opt/rocm/opencl//lib/x86_64
gfx900.
Advanced Micro Devices, Inc..
OpenCL 2.0 AMD-APP (3004.5).
3004.5 (PAL,HSAIL).
cl_khr_fp64 cl_amd_fp64 cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_kh.
Error: clCreateKernel returned non-zero: -46.
root@sriov-guest:/git.co/dev-learn/rocm/opencl/opencl-programming-guide/cuda-conversion# cd ..
root@sriov-guest:/git.co/dev-learn/rocm/opencl/opencl-programming-guide#