How to fill image2d_t from float vector?

I have already asked this question on Stack Overflow to no avail: In a little program I wrote I work with grayscale images, which I read into standard float-vectors. I think a lot of work my kernels do could be done in hardware by the TMUs if I didn’t use regular buffers but image2d_t memory objects. My question would be: How can I copy a float vector into an image? If I understand it correctly, I basically have to write the entries of my float vector into a vector that is four times as long, like so:

std::vector<float> A(256,0.0);
std::vector<float> A_img(1024, 0.0);

for(int i=0; i<256; i++)
{
    A_img[4*i] = A[i];
    A_img[(4*i)+3] = 1;
}

Maybe my device being little endian causes the components to get order reversed, but I am not sure on that. However, I can’t make this idea work. Here is a minimal working example:

#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#if defined(__APPLE__) 
#include <OpenCL/cl2.hpp>
#else 
#include <CL/cl2.hpp>
#endif
#include <iostream>
#include <string>
#include <vector>
#include <fstream>

int main(void)
{
    // Set up platform, device and context
    std::vector<cl::Platform> platforms;
    std::vector<cl::Device> devices;
    cl::Device default_device;
    cl::Platform::get(&platforms);
    
    if (platforms.size() == 0)
    {
        std::cout << "No OpenCL platform found, check installation!" << std::endl;
        exit(-1);
    }
    platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
    
    if (devices.size() == 0)
    {
        std::cout << "No devices found in platform, check installation!" << std::endl;
        exit(-1);
    }
    default_device = devices[0];
    cl::Context context(default_device);
    
    std::ifstream program_file("read_write_image.cl");
    std::string program_string(std::istreambuf_iterator<char>(program_file), (std::istreambuf_iterator<char>()));
    cl::Program::Sources source { program_string };
    cl::Program dummy_program(context, source);
    if (dummy_program.build()!=CL_SUCCESS)
    {
        std::cout << "Error building: " << dummy_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device)<< std::endl;
        exit(-1);
    }
    cl::Kernel kernel(dummy_program, "read_write_image");
    cl::CommandQueue queue(context, default_device);
    
    // Set up dummy grayscale image
    std::vector<float> A(256, 0.0);
    for(int i=0; i<256; i++)
    {
        A[i] = 255.0f - i;
        std::cout << A[i] << "  ";
    }
    std::cout << std::endl;
    
    // Blow up to float4 array
    std::vector<float> A_img(1024, 0.0);
    for(int i=0; i<256; i++)
    {
        A_img[4*i] = A[i];
        A_img[(4*i)+3] = 1;
    }
    std::vector<float> B_img(1024, 0.0);
    
    cl::ImageFormat grayscale(CL_R, CL_FLOAT);
    cl::Image2D Input_Image(context, CL_MEM_READ_ONLY, grayscale, 16, 16);
    cl::Image2D Output_Image(context, CL_MEM_WRITE_ONLY, grayscale, 16, 16);
    
    std::array<cl::size_type, 3> origin {0,0,0};
    std::array<cl::size_type, 3> region {16, 16, 1};

    queue.enqueueWriteImage(Input_Image, CL_TRUE, origin, region, 0, 0, &A_img[0]);

    kernel.setArg(0, Input_Image);
    kernel.setArg(1, Output_Image);
    
    queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(16,16), cl::NullRange, NULL); 
    queue.enqueueReadImage(Output_Image, CL_TRUE, origin, region, 0, 0, &B_img[0]);

    for(int i=0; i<1024; i++)
    {
        std::cout << B_img[i] << "  ";
    }   
    std::cout << std::endl;
    
    return EXIT_SUCCESS;
}

with this kernel:

__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_CLAMP|CLK_FILTER_LINEAR;

__kernel void read_write_image(read_only image2d_t input_image, write_only image2d_t output_image)
{
    int i = get_global_id(0);
    int j = get_global_id(1);
    float tmp = read_imagef(input_image, sampler, (int2) (i,j)).x;
    write_imagef(output_image, (int2) (i,j), (float4) (tmp,0,0,1));
}

The output I recieve is only zeroes. What do I do wrong? How can I make this work?

The functions “read_imagef” and “write_imagef” work with float4s, but not every image is necessarily a float4-array. Skipping the A_img-part and just working with A itself. i.e. using the host code

#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#if defined(__APPLE__) 
#include <OpenCL/cl2.hpp>
#else 
#include <CL/cl2.hpp>
#endif
#include <iostream>
#include <string>
#include <vector>
#include <fstream>

int main(void)
{
	// Set up platform, device and context
	std::vector<cl::Platform> platforms;
	std::vector<cl::Device> devices;
	cl::Device default_device;
	cl::Platform::get(&platforms);
	
	if (platforms.size() == 0)
	{
		std::cout << "No OpenCL platform found, check installation!" << std::endl;
		exit(-1);
	}
	platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
	
	if (devices.size() == 0)
	{
		std::cout << "No devices found in platform, check installation!" << std::endl;
		exit(-1);
	}
	default_device = devices[0];
	cl::Context context(default_device);
	
	std::ifstream program_file("read_write_image.cl");
	std::string program_string(std::istreambuf_iterator<char>(program_file), (std::istreambuf_iterator<char>()));
	cl::Program::Sources source { program_string };
	cl::Program dummy_program(context, source);
    if (dummy_program.build()!=CL_SUCCESS)
	{
        std::cout << "Error building: " << dummy_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device)<< std::endl;
        exit(-1);
    }
	cl::Kernel kernel(dummy_program, "read_write_image");
	cl::CommandQueue queue(context, default_device);
	
	// Set up dummy grayscale image
	std::vector<float> A(256, 0.0);
	for(int i=0; i<256; i++)
	{
		A[i] = 255.0f - i;
	}
	// Set up empty target image
	std::vector<float> B(256, 0.0);
	
	cl::ImageFormat grayscale(CL_R, CL_FLOAT);
	cl::Image2D Input_Image(context, CL_MEM_READ_ONLY, grayscale, 16, 16);
	cl::Image2D Output_Image(context, CL_MEM_WRITE_ONLY, grayscale, 16, 16);
	
	std::array<cl::size_type, 3> origin {0,0,0};
	std::array<cl::size_type, 3> region {16, 16, 1};

	queue.enqueueWriteImage(Input_Image, CL_TRUE, origin, region, 0, 0, &A[0]);

	kernel.setArg(0, Input_Image);
	kernel.setArg(1, Output_Image);
	
	queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(16,16), cl::NullRange, NULL); 
	queue.enqueueReadImage(Output_Image, CL_TRUE, origin, region, 0, 0, &B[0]);

	for(int i=0; i<256; i++)
	{
		std::cout << B[i] << "  ";
	}	
	std::cout << std::endl;
	
	return EXIT_SUCCESS;
}

makes this example a working example. However, it seems support may be platform dependent. On my Macbook, it still doesn’t work, while on my AMD graphics card it workes with the changes.

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.