Passing structs with pointers as kernel args in OpenCL 3.0

I’ve a simple kernel - it accpets a struct containting 2 integers (image witdth and height) and a float array (image).

typedef struct __attribute__ ((packed)) sample_struct
{
    int image_width;
    int image_height;
    float * image;
}sample_struct;

Inside the kernel Im trying to print the values of the image
But strangely, OpenCL ignores the part where I try to access the floating point array. (but does not ignore other parts)
Here’s the kernel :

typedef struct __attribute__ ((packed)) sample_struct
{
    int image_width;
    int image_height;
    float * image;
}sample_struct;

__kernel void test(
	__global sample_struct * param
)
{	

    int image_width = param->image_width;
    int image_height =  param->image_height;
    float * image = param->image;

    int id = get_global_id(0);
    int x = id/image_width;
    int y = id%image_width;

    //this line is printed
    printf("On pixel %d, %d \t image height : %d  image width : %d\n", x, y, image_height, image_width);
    
    // this line is not printed
    printf("On pixel %d, %d \t image height : %d  image width : %d \t pixel value : %f\n", x, y, image_height, image_width, image[x*image_width + y]);
}

I found a post from 2011 which states we cannot pass structs which contains pointers as kernel arguments. Is this supported in OpenCL 3.0?
Whats a good workaround?

Here’s the full host code I used :

#include <stdio.h>
#include <CL/cl.h>
 
#define MAX_SOURCE_SIZE (0x100000)


typedef struct __attribute__ ((packed)) sample_struct
{
    int image_width;
    int image_height;
    float *image;
}sample_struct;



 
int main(void) {

    // loading the kernel
    FILE *fp;
    char *source_str;
    size_t source_size;
 
    fp = fopen("kernel.cl", "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }
    source_str = (char*)malloc(MAX_SOURCE_SIZE);
    source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );
 

    // initiating the device
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;   
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, 
            &device_id, &ret_num_devices);

    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
 
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);

    cl_program program = clCreateProgramWithSource(context, 1, 
            (const char **)&source_str, (const size_t *)&source_size, &ret);

    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); 
    cl_kernel kernel = clCreateKernel(program, "test", &ret);






    // initialize image on host.
    float * image = (float *) malloc(9*sizeof(float));
    for(int i = 0; i < 9; i++)
        image[i] = i+1;


    // copy image to device memory
    cl_mem device_image = clCreateBuffer(context, CL_MEM_READ_ONLY, 
            9* sizeof(float), NULL, &ret);
    ret = clEnqueueWriteBuffer(command_queue, device_image, CL_TRUE, 0,
            9* sizeof(float), image, 0, NULL, NULL);



    // initialize param
    sample_struct param;
    param.image_height = 3;
    param.image_width = 3;
    param.image = (float *)device_image;


    // copy param to device memory
    cl_mem device_param = clCreateBuffer(context, CL_MEM_READ_ONLY, 
            sizeof(param), NULL, &ret);

    clEnqueueWriteBuffer(command_queue, device_param, CL_TRUE, 0, 
        sizeof(param), &param, 0, NULL, NULL);



    //configuration
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&device_param);

    size_t global_item_size = 9;
    size_t local_item_size = 3; 

    // launch the kernel
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
            &global_item_size, &local_item_size, 0, NULL, NULL);



    free(image);
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);

    return 0;
}

I also checked the return values (ret) of all the functions and they were all zero - meaning OpenCL thinks they all executed successfully.

Im using OpenCL 3.0 (Nvidia implementation), and Nvidia 1650 GTX GPU is my device.

The post from 2011 : https:// community. khronos. org/t/passing-struct-to-kernel/3132/7
(please remove spaces)
And the code ouput :

On pixel 1, 0    image height : 3  image width : 3
On pixel 1, 1    image height : 3  image width : 3
On pixel 1, 2    image height : 3  image width : 3
On pixel 0, 0    image height : 3  image width : 3
On pixel 0, 1    image height : 3  image width : 3
On pixel 0, 2    image height : 3  image width : 3
On pixel 2, 0    image height : 3  image width : 3
On pixel 2, 1    image height : 3  image width : 3
On pixel 2, 2    image height : 3  image width : 3

(for some reason, I cannot include direct links in my post above)

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.