OpenCL freeze issue on Intel GPU

I tried to run a very simple OpenCL program on Intel UHD630 GPU and it freezed after a certain number of runs.It does’t happend on Intel CPU or Nvdia GPU.
I have also discussed this issue on the Intel community, and the codes can be download by that issue which can be searched by key word " Opencl program stops after executing 2 to the power of 32 times"
Employee of Intel also able to reproduce the issue,and they are trying to figure out what’s happening.
But no feedback for long time.
I create this topic just want to know is there any way to reset the Platform id or Device id except creat a new process? Because i found that new DeviceID by creating a new process can continue to run the same number of times after the freeze occurs.

The following is the test codes of OpenCL program.

#include <cstdlib>
#include <iostream>
#include <iomanip>
#include <cstring>
#include <cassert>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <windows.h>
#include <process.h>

#include <CL/cl.h>
#define KERNEL(...) #__VA_ARGS__

#define MAX_ARG_N 128
#pragma warning( disable : 4996 )

using namespace std;
static UINT32 param_buffer[MAX_ARG_N];
cl_context context = NULL;
cl_command_queue command_queue[2] = { NULL }, cque;
cl_program program = NULL;
cl_kernel kernel = NULL;
int state = 0;
LONGLONG runcount = 0;
	if(ERR != CL_SUCCESS){ \
		cerr << "OpenCL error code" << ERR \
			 << "file: " << __FILE__ \
			 << "line: " << __LINE__ \
			 << ".\nExiting..." << endl; \
		exit(1); \
void monitor_func(LPVOID p)
	while (1) {
		printf("count %lld ", runcount);
		printf("state %d\n", state);



int main(int argc, const char** argv)
	cl_int err = CL_SUCCESS;

	cl_int ret;
	cl_uint cmdqueuesize = 0;
	cl_platform_id platform_id = NULL;
	cl_device_id device_id = NULL;

	cl_mem memObj[MAX_ARG_N];
	const char* kernelSource = NULL;

	LARGE_INTEGER StartingTime, EndingTime, ElapsedMicroseconds;
	size_t kernelLength;
	int i;

	cl_uint num_of_platforms = 0;
	err = clGetPlatformIDs(0, 0, &num_of_platforms);

	cl_platform_id* platforms = new cl_platform_id[num_of_platforms];
	err = clGetPlatformIDs(num_of_platforms, platforms, 0);

	for (cl_uint i = 0; i < num_of_platforms; i++)
		size_t platform_name_length = 0;
		err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 0, 0, &platform_name_length);

		char* platform_name = new char[platform_name_length];
		err = clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, platform_name_length, platform_name, 0);

		cout << "    [" << i << "] " << platform_name << endl;

	clGetPlatformIDs(1, &platform_id, NULL);
	if (platform_id == NULL)
		puts("Get OpenCL platform failed!");
		goto FINISH;

	clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
	if (device_id == NULL)
		puts("No GPU available as a compute device!");
		goto FINISH;

	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
	if (context == NULL)
		puts("Context not established!");
		goto FINISH;
	command_queue[0] = clCreateCommandQueue(context, device_id, 0, &ret);
	if (command_queue[0] == NULL)
		puts("Command queue cannot be created!");
		goto FINISH;
	command_queue[1] = clCreateCommandQueue(context, device_id, 0, &ret);
	if (command_queue[1] == NULL)
		puts("Command queue cannot be created!");
		goto FINISH;
	cque = command_queue[0];
	kernelSource = KERNEL(
		__kernel void test(
			__global int* arg00, __global int* arg01, __global int* arg02, __global int* arg03,
			__global int* arg04, __global int* arg05, __global int* arg06, __global int* arg07,
			__global int* arg08, __global int* arg09, __global int* arg0a, __global int* arg0b,
			__global int* arg0c, __global int* arg0d, __global int* arg0e, __global int* arg0f,
			__global int* arg10, __global int* arg11, __global int* arg12, __global int* arg13,
			__global int* arg14, __global int* arg15, __global int* arg16, __global int* arg17,
			__global int* arg18, __global int* arg19, __global int* arg1a, __global int* arg1b,
			__global int* arg1c, __global int* arg1d, __global int* arg1e, __global int* arg1f,
			__global int* arg20, __global int* arg21, __global int* arg22, __global int* arg23,
			__global int* arg24, __global int* arg25, __global int* arg26, __global int* arg27,
			__global int* arg28, __global int* arg29, __global int* arg2a, __global int* arg2b,
			__global int* arg2c, __global int* arg2d, __global int* arg2e, __global int* arg2f,
			__global int* arg30, __global int* arg31, __global int* arg32, __global int* arg33,
			__global int* arg34, __global int* arg35, __global int* arg36, __global int* arg37,
			__global int* arg38, __global int* arg39, __global int* arg3a, __global int* arg3b,
			__global int* arg3c, __global int* arg3d, __global int* arg3e, __global int* arg3f,
			__global int* arg40, __global int* arg41, __global int* arg42, __global int* arg43,
			__global int* arg44, __global int* arg45, __global int* arg46, __global int* arg47,
			__global int* arg48, __global int* arg49, __global int* arg4a, __global int* arg4b,
			__global int* arg4c, __global int* arg4d, __global int* arg4e, __global int* arg4f,
			__global int* arg50, __global int* arg51, __global int* arg52, __global int* arg53,
			__global int* arg54, __global int* arg55, __global int* arg56, __global int* arg57,
			__global int* arg58, __global int* arg59, __global int* arg5a, __global int* arg5b,
			__global int* arg5c, __global int* arg5d, __global int* arg5e, __global int* arg5f,
			__global int* arg60, __global int* arg61, __global int* arg62, __global int* arg63,
			__global int* arg64, __global int* arg65, __global int* arg66, __global int* arg67,
			__global int* arg68, __global int* arg69, __global int* arg6a, __global int* arg6b,
			__global int* arg6c, __global int* arg6d, __global int* arg6e, __global int* arg6f,
			__global int* arg70, __global int* arg71, __global int* arg72, __global int* arg73,
			__global int* arg74, __global int* arg75, __global int* arg76, __global int* arg77,
			__global int* arg78, __global int* arg79, __global int* arg7a, __global int* arg7b,
			__global int* arg7c, __global int* arg7d, __global int* arg7e, __global int* arg7f
		int index = get_global_id(0);

	kernelLength = { strlen(kernelSource) };

	program = clCreateProgramWithSource(context, 1, (const char**)&kernelSource, (const size_t*)&kernelLength, &ret);
	ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
	if (ret != CL_SUCCESS)
		size_t len;
		char buffer[8 * 2048];

		printf("Error: Failed to build program executable!\n");
		clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s\n", buffer);
		goto FINISH;

	kernel = clCreateKernel(program, "test", &ret);
	if (kernel == NULL)
		puts("Kernel failed to create!");
		goto FINISH;

	for (i = 0; i < MAX_ARG_N; i++)
		param_buffer[i] = i;
		memObj[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, 4, NULL, &ret);
		if (ret != CL_SUCCESS)
			puts("CreateBuffer error!");
			goto FINISH;

	if (ret != CL_SUCCESS)
		puts("Set arguments error!");
		goto FINISH;
	HANDLE	hThread;
	hThread = (HANDLE)_beginthread(monitor_func, 0, NULL);

	ret = CL_SUCCESS;

	state = 1;

	for (i = 0; i < MAX_ARG_N; i++) {
		ret |= clSetKernelArg(kernel, i, sizeof(cl_mem), (void*)&memObj[i]);

	while (ret == CL_SUCCESS)

		state = 2;

		for (i = 0; i < MAX_ARG_N; i++)
			ret |= clEnqueueWriteBuffer(cque, memObj[i], CL_TRUE, 0, 4, &param_buffer[i], 0, NULL, NULL);

		state = 3;
		size_t WorkSize[1] = { 256 };
		ret |= clEnqueueNDRangeKernel(cque, kernel, 1, NULL, WorkSize, NULL, 0, NULL, NULL);

		state = 4;
		ret |= clFlush(cque);

		state = 5;
		ret |= clFinish(cque);



	if (kernel != NULL)

	if (program != NULL)

	if (command_queue[0] != NULL)
	if (command_queue[1] != NULL)

	if (context != NULL)

	printf("End: Program run End!\n");
	return 0;

The Result is like this.