Crash in clEnqueueReadBuffer

Hello Group,

I am currently developing some algorithm on a Firefly board / Mali T760 GPU. I have written a simple standalone application to depict the problem I have:

#include <iostream>
#include <fstream>
#include <sstream>
#include <CL/cl.h>

const int WIDTH = 400;
const int HEIGHT = 100;
const int DEPTH = 1;
const int ARRAY_SIZE = WIDTH * HEIGHT;

int main()
{
	// Context
	cl_int errNum;
	cl_uint numPlatforms;
	cl_platform_id firstPlatformId;

	errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error getting platform: " << errNum << std::endl;
		exit(-1);
	}

	cl_context_properties contextProperties[] =
	{
		CL_CONTEXT_PLATFORM,
		(cl_context_properties)firstPlatformId,
		0
	};
	cl_context context = clCreateContextFromType(
		contextProperties,
		CL_DEVICE_TYPE_GPU,
		NULL,
		NULL,
		&errNum);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error getting context from type: " << errNum << std::endl;
		exit(-4);
	}

	// Queue
	cl_command_queue commandQueue = NULL;
	cl_device_id * devices;
	size_t deviceBufferSize = 0;

	errNum = clGetContextInfo(
		context, 
		CL_CONTEXT_DEVICES, 
		0, 
		NULL, 
		&deviceBufferSize);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error getting context info: " << errNum << std::endl;
		exit(-2);
	}

	devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
	errNum = clGetContextInfo(
		context,
		CL_CONTEXT_DEVICES,
		deviceBufferSize,
		devices,
		NULL);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error getting context info 2: " << errNum << std::endl;
		exit(-3);
	}

	commandQueue = clCreateCommandQueue(
		context,
		devices[0],
		0,
		NULL);

	// Kernel
	cl_program program;
	std::ifstream kernelFile("buffer.cl", std::ios::in);
	std::ostringstream oss;
	oss << kernelFile.rdbuf();

	std::string srcStdStr = oss.str();
	const char *srcStr = srcStdStr.c_str();
	program = clCreateProgramWithSource(
		context,
		1,
		(const char**)&srcStr,
		NULL,
		NULL);

	errNum = clBuildProgram(
		program,
		0,
		NULL,
		NULL,
		NULL,
		NULL);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error building program: " << errNum << std::endl;

		char buildLog[16384];
		clGetProgramBuildInfo(
			program,
			devices[0],
			CL_PROGRAM_BUILD_LOG,
			sizeof(buildLog),
			buildLog,
			NULL);
		std::cout << "log: " << buildLog << std::endl;
		exit(-6);
	}

	cl_kernel kernel = clCreateKernel(program, "hello_kernel", NULL);

	// Data
	float * data = new float[ARRAY_SIZE];
	float * result = new float[ARRAY_SIZE];
	for (int i=0; i<ARRAY_SIZE; ++i)
	{
		data[i] = (float)i;
	}

	// Buffers
	cl_mem memObjects[2];
	memObjects[0] = clCreateBuffer(
		context,
		CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		sizeof(float) * ARRAY_SIZE,
		data,
		NULL);
	memObjects[1] = clCreateBuffer(
		context,
		CL_MEM_READ_WRITE,
		sizeof(float) * ARRAY_SIZE,
		NULL,
		NULL);

	// Process...
	errNum = clSetKernelArg(
		kernel,
		0,
		sizeof(cl_mem),
		&memObjects[0]);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error arg0: " << errNum << std::endl;
		exit(-8);
	}
	errNum = clSetKernelArg(
		kernel,
		1,
		sizeof(cl_mem),
		&memObjects[1]);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error arg1: " << errNum << std::endl;
		exit(-9);
	}

	size_t globalWorkSize[1] = {ARRAY_SIZE};
	size_t localWorkSize[1] = {1};
	errNum = clEnqueueNDRangeKernel(
		commandQueue,
		kernel,
		1,
		NULL,
		globalWorkSize,
		localWorkSize,
		0,
		NULL,
		NULL);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error enqueueing kernel: " << errNum << std::endl;
		exit(-7);
	}

	errNum = clEnqueueReadBuffer(
		commandQueue,
		memObjects[1],
		CL_TRUE,
		0,
		ARRAY_SIZE * sizeof(float),
		result,
		0,
		NULL,
		NULL);
	if (errNum != CL_SUCCESS)
	{
		std::cout << "Error enqueueing read buffer: " << errNum << std::endl;
		exit(-10);
	}

	// Cleanup
	clReleaseProgram(program);
	clReleaseMemObject(memObjects[0]);
	clReleaseMemObject(memObjects[1]);
	clReleaseCommandQueue(commandQueue);
	clReleaseContext(context);
	delete [] data;
	delete [] result;
}

The program itself is really simple. The kernel code is just a copy of the input cell to the output cell.

The problem is a crash when reading memory back from device using clEnqueueReadBuffer(). If I replace buffers by images, I get the same crash in clEnqueueReadImage() or clEnqueueMapImage()…

I have been working on this problem for several hours and I really can’t figure out what the problem is…

Thank you for your help,
Patrick

Actually, I put a clFinish() just after the clEnqueueNDRangeKernel() and it crashes at this place (before actually reaching clEnqueueReadBuffer())…

What could be the cause? Any idea?

Thanks a lot

you input buffer can be in CL_MEM_READ_ONLY

but the output must be in CL_MEM_COPY_HOST_PTR

in the enqueueReadBuffer you take the output buffer.