following code fails for me, on Mac Sierra, using Radeon Pro 450. Where/how to report it?
int N = 100000;
float *a = new float[N];
cl_mem a_gpu = clCreateBuffer(context, CL_MEM_READ_WRITE, N * sizeof(float), 0, &err);
checkError(err);
for(int it = 0; it < 100; it++) {
float value = 123.0f + it;
err = clEnqueueFillBuffer(queue, a_gpu, &value, sizeof(value), 0, N * sizeof(float), 0, 0, 0);
checkError(err);
clFinish(queue);
err = clEnqueueReadBuffer(queue, a_gpu, CL_TRUE, 0,
sizeof(cl_float) * N, a, 0, NULL, NULL);
checkError(err);
clFinish(queue);
cout << it << " a[N - 1]=" << a[N - 1] << endl;
}
delete[] a;
Expected output:
0 a[N - 1]=123
1 a[N - 1]=124
2 a[N - 1]=125
3 a[N - 1]=126
4 a[N - 1]=127
5 a[N - 1]=128
6 a[N - 1]=129
7 a[N - 1]=130
actual output:
0 a[N - 1]=0
1 a[N - 1]=0
2 a[N - 1]=0
3 a[N - 1]=0
4 a[N - 1]=0
5 a[N - 1]=0
6 a[N - 1]=0
7 a[N - 1]=0
8 a[N - 1]=0
9 a[N - 1]=132