Hi,
I am doing some simple float operations on the input buffer data and I see large differences between GPU and CPU results.
Shader:
#version 450
layout(local_size_x = 10, local_size_y = 10, local_size_z = 10) in;
layout (binding = 0) buffer InputBuffer {
float inputData[];
};
layout (binding = 1) buffer OutputBuffer {
float outputData[];
};
void main() {
uint idx = gl_GlobalInvocationID.x+
gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x +
gl_GlobalInvocationID.z * gl_NumWorkGroups.x * gl_WorkGroupSize.x * gl_NumWorkGroups.y * gl_WorkGroupSize.y;
if (idx<1000000)
{
outputData[idx] = inputData[idx] * inputData[idx]/1.27f;
}
}
After running GPU and compute the output data I do the same operation in CPU and I see large differences:
void getOutputData()
{
void* data;
float inFloat = 1.0f;
vkMapMemory(device, outBufferMemory[0], 0, MAX_BUFFER_SIZE, 0, &data);
for (int i = 0; i < MAX_BUFFER_ITEMS; i++)
{
float outFloat = (float)inFloat * (float)inFloat/1.27f;
printf("Output: i=%d, in=%f, %f != %f \n\n", i, inFloat, ((float*)data)[i], outFloat);
if (std::abs(((float*)data)[i] - outFloat)>0.1f)
{
printf("--> Error Output: i=%d, in=%f, %f != %f \n\n", i, inFloat, ((float*)data)[i], outFloat );
break;
}
inFloat += 1.0f;
}
vkUnmapMemory(device, outBufferMemory[0]);
}
GPU result != CPU result
--> Error Output: i=1155, in=1156.000000, 1052233.000000 != 1052233.125000
Any solution to have GPU float calculations matching CPU float calculations or to have smaller errors? For me this is important because I have to do much complex calculations which they need to match the C model.
Any idea much appreciated!
Thanks,
Andrei