I’ve a simple kernel - it accpets a struct containting 2 integers (image witdth and height) and a float array (image).
typedef struct __attribute__ ((packed)) sample_struct
{
int image_width;
int image_height;
float * image;
}sample_struct;
Inside the kernel Im trying to print the values of the image
But strangely, OpenCL ignores the part where I try to access the floating point array. (but does not ignore other parts)
Here’s the kernel :
typedef struct __attribute__ ((packed)) sample_struct
{
int image_width;
int image_height;
float * image;
}sample_struct;
__kernel void test(
__global sample_struct * param
)
{
int image_width = param->image_width;
int image_height = param->image_height;
float * image = param->image;
int id = get_global_id(0);
int x = id/image_width;
int y = id%image_width;
//this line is printed
printf("On pixel %d, %d \t image height : %d image width : %d\n", x, y, image_height, image_width);
// this line is not printed
printf("On pixel %d, %d \t image height : %d image width : %d \t pixel value : %f\n", x, y, image_height, image_width, image[x*image_width + y]);
}
I found a post from 2011 which states we cannot pass structs which contains pointers as kernel arguments. Is this supported in OpenCL 3.0?
Whats a good workaround?
Here’s the full host code I used :
#include <stdio.h>
#include <CL/cl.h>
#define MAX_SOURCE_SIZE (0x100000)
typedef struct __attribute__ ((packed)) sample_struct
{
int image_width;
int image_height;
float *image;
}sample_struct;
int main(void) {
// loading the kernel
FILE *fp;
char *source_str;
size_t source_size;
fp = fopen("kernel.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
// initiating the device
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1,
&device_id, &ret_num_devices);
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "test", &ret);
// initialize image on host.
float * image = (float *) malloc(9*sizeof(float));
for(int i = 0; i < 9; i++)
image[i] = i+1;
// copy image to device memory
cl_mem device_image = clCreateBuffer(context, CL_MEM_READ_ONLY,
9* sizeof(float), NULL, &ret);
ret = clEnqueueWriteBuffer(command_queue, device_image, CL_TRUE, 0,
9* sizeof(float), image, 0, NULL, NULL);
// initialize param
sample_struct param;
param.image_height = 3;
param.image_width = 3;
param.image = (float *)device_image;
// copy param to device memory
cl_mem device_param = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(param), NULL, &ret);
clEnqueueWriteBuffer(command_queue, device_param, CL_TRUE, 0,
sizeof(param), ¶m, 0, NULL, NULL);
//configuration
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&device_param);
size_t global_item_size = 9;
size_t local_item_size = 3;
// launch the kernel
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
free(image);
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
return 0;
}
I also checked the return values (ret
) of all the functions and they were all zero - meaning OpenCL thinks they all executed successfully.
Im using OpenCL 3.0 (Nvidia implementation), and Nvidia 1650 GTX GPU is my device.