HelloWorld free invalid pointer

When I completed the openCL development environment on the mips64el platform, the OpenCL Helloword program was used to test the development environment and encountered the following problems:

*** Error in `./HH1': free(): invalid pointer: 0x0000005559667148 ***
======= Backtrace: =========
/lib64/libc.so.6(+0x9f854)[0x55567e3854]
/lib64/libc.so.6(__libc_free+0x804)[0x55567f422c]
/lib64/libstdc++.so.6(_ZdlPv+0x34)[0x555644e45c]
/lib64/libstdc++.so.6(+0x12cb58)[0x555650cb58]
/lib64/libstdc++.so.6(_ZNSs4_Rep10_M_destroyERKSaIcE+0x78)[0x555650b958]
/lib64/libMesaOpenCL.so.1(+0x3302dc)[0x5556c702dc]

Program for testing is from AMD_APP_SDK 3.0, including HelloWorld.cpp and HelloWorld.cl.
I hope someone can give some tips on how to solve the problem of openCL program memory crash. First Thank you。

The information for the test platform and related packages is as follows:

[root@localhost ~]# rpm -qa | grep llvm
llvm-libs-3.7.0-4.fc21.loongson.mips64el
llvm-3.7.0-4.fc21.loongson.mips64el
llvm-doc-3.7.0-4.fc21.loongson.noarch
llvm-static-3.7.0-4.fc21.loongson.mips64el
llvm-devel-3.7.0-4.fc21.loongson.mips64el
llvm-debuginfo-3.7.0-4.fc21.loongson.mips64el

[root@localhost ~]# rpm -qa | grep clang
clang-analyzer-3.7.0-4.fc21.loongson.noarch
clang-3.7.0-4.fc21.loongson.mips64el
clang-libs-3.7.0-4.fc21.loongson.mips64el
clang-devel-3.7.0-4.fc21.loongson.mips64el

[root@localhost ~]# rpm -qa | grep mesa
mesa-libOpenCL-debuginfo-18.2.0~rc5-1.fc29.mips64el
mesa-libwayland-egl-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-vdpau-drivers-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libOSMesa-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libOpenCL-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libEGL-devel-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-filesystem-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libGL-devel-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-dri-drivers-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-debuginfo-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libGL-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libwayland-egl-devel-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libglapi-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libgbm-devel-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libEGL-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libOpenCL-devel-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libGLU-9.0.0-7.fc21.loongson.mips64el
mesa-libgbm-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libOSMesa-devel-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libGLES-11.1.0-5.20151218.fc21.loongson.6.mips64el
mesa-libGLES-devel-11.1.0-5.20151218.fc21.loongson.6.mips64el

[root@localhost ~]# rpm -qa | grep libclc
libclc-devel-0.0.1-11.20150918git4346c30.fc21.loongson.mips64el
libclc-0.0.1-11.20150918git4346c30.fc21.loongson.mips64el

[root@localhost ~]# rpm -qa | grep ocl-icd
ocl-icd-devel-2.2.7-2.git20150606.ebbc4c1.fc21.loongson.mips64el
ocl-icd-debuginfo-2.2.7-2.git20150606.ebbc4c1.fc21.loongson.mips64el
ocl-icd-2.2.7-2.git20150606.ebbc4c1.fc21.loongson.mips64el
Graphics card related information
[loongson@localhost ~]$ lspci -vnn |grep VGA
        Flags: bus master, VGA palette snoop, 66MHz, medium devsel, latency 64
01:00.0 VGA compatible controller [0300]: Advanced Micro Devices, Inc. 
[AMD/ATI] Caicos XT [Radeon HD 7470/8470 / R5 235 OEM] [1002:6778] (prog-if 00 [VGA controller])

The complete program code is as follows:

#file HelloWorld.cpp

#include <iostream>
#include <fstream>
#include <sstream>
#include <CL/cl.h>
 
const int ARRAY_SIZE = 1000;
 
//一、 选择OpenCL平台并创建一个上下文
cl_context CreateContext()
{
	cl_int errNum;
	cl_uint numPlatforms;
	cl_platform_id firstPlatformId;
	cl_context context = NULL;
 
	//选择可用的平台中的第一个
	errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
	if (errNum != CL_SUCCESS || numPlatforms <= 0)
	{
		std::cerr << "Failed to find any OpenCL platforms." << std::endl;
		return NULL;
	}
 
	//创建一个OpenCL上下文环境
	cl_context_properties contextProperties[] =
	{
		CL_CONTEXT_PLATFORM,
		(cl_context_properties)firstPlatformId,
		0
	};
	context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
		NULL, NULL, &errNum);
 
	return context;
}
 
 
//二、 创建设备并创建命令队列
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{
	cl_int errNum;
	cl_device_id *devices;
	cl_command_queue commandQueue = NULL;
	size_t deviceBufferSize = -1;
 
	// 获取设备缓冲区大小
	errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
 
	if (deviceBufferSize <= 0)
	{
		std::cerr << "No devices available.";
		return NULL;
	}
 
	// 为设备分配缓存空间
	devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
	errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
 
	//选取可用设备中的第一个
	commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
 
	*device = devices[0];
	delete[] devices;
	return commandQueue;
}
 
 
// 三、创建和构建程序对象
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
	cl_int errNum;
	cl_program program;
 
	std::ifstream kernelFile(fileName, std::ios::in);
	if (!kernelFile.is_open())
	{
		std::cerr << "Failed to open file for reading: " << fileName << std::endl;
		return NULL;
	}
 
	std::ostringstream oss;
	oss << kernelFile.rdbuf();
 
	std::string srcStdStr = oss.str();
	const char *srcStr = srcStdStr.c_str();
	program = clCreateProgramWithSource(context, 1,
		(const char**)&srcStr,
		NULL, NULL);
 
	errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
 
	return program;
}
 
//创建和构建程序对象
bool CreateMemObjects(cl_context context, cl_mem memObjects[3],
	float *a, float *b)
{
	memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		sizeof(float) * ARRAY_SIZE, a, NULL);
	memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		sizeof(float) * ARRAY_SIZE, b, NULL);
	memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
		sizeof(float) * ARRAY_SIZE, NULL, NULL);
	return true;
}
 
 
// 释放OpenCL资源
void Cleanup(cl_context context, cl_command_queue commandQueue,
	cl_program program, cl_kernel kernel, cl_mem memObjects[3])
{
	for (int i = 0; i < 3; i++)
	{
		if (memObjects[i] != 0)
			clReleaseMemObject(memObjects[i]);
	}
	if (commandQueue != 0)
		clReleaseCommandQueue(commandQueue);
 
	if (kernel != 0)
		clReleaseKernel(kernel);
 
	if (program != 0)
		clReleaseProgram(program);
 
	if (context != 0)
		clReleaseContext(context);
}
 
int main(int argc, char** argv)
{
	cl_context context = 0;
	cl_command_queue commandQueue = 0;
	cl_program program = 0;
	cl_device_id device = 0;
	cl_kernel kernel = 0;
	cl_mem memObjects[3] = { 0, 0, 0 };
	cl_int errNum;
 
	// 一、选择OpenCL平台并创建一个上下文
	context = CreateContext();
 
	// 二、 创建设备并创建命令队列
	commandQueue = CreateCommandQueue(context, &device);
 
	//创建和构建程序对象
	program = CreateProgram(context, device, "HelloWorld.cl");
 
	// 四、 创建OpenCL内核并分配内存空间
	kernel = clCreateKernel(program, "hello_kernel", NULL);
 
	//创建要处理的数据
	float result[ARRAY_SIZE];
	float a[ARRAY_SIZE];
	float b[ARRAY_SIZE];
	for (int i = 0; i < ARRAY_SIZE; i++)
	{
		a[i] = (float)i;
		b[i] = (float)(ARRAY_SIZE - i);
	}
 
	//创建内存对象
	if (!CreateMemObjects(context, memObjects, a, b))
	{
		Cleanup(context, commandQueue, program, kernel, memObjects);
		return 1;
	}
 
	// 五、 设置内核数据并执行内核
	errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObjects[0]);
	errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObjects[1]);
	errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObjects[2]);
 
	size_t globalWorkSize[1] = { ARRAY_SIZE };
	size_t localWorkSize[1] = { 1 };
 
	errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL,
		globalWorkSize, localWorkSize,
		0, NULL, NULL);
 
	// 六、 读取执行结果并释放OpenCL资源
	errNum = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE,
		0, ARRAY_SIZE * sizeof(float), result,
		0, NULL, NULL);
 
	for (int i = 0; i < ARRAY_SIZE; i++)
	{
		std::cout << result[i] << " ";
	}
	std::cout << std::endl;
	std::cout << "Executed program succesfully." << std::endl;
	getchar();
	Cleanup(context, commandQueue, program, kernel, memObjects);
 
	return 0;
}

#HelloWorld.cl
__kernel void hello_kernel(__global const float *a,
	__global const float *b,
	__global float *result)
{
	int gid = get_global_id(0);
 
	result[gid] = a[gid] + b[gid];
}