Hello,
I have a problem with structure aligment. I think.
Header on HOST
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_CL_1_2_DEFAULT_BUILD
#include <CL/cl2.hpp>
#pragma pack (push, 16)
typedef struct s_ray
{
cl_float4 dir;
cl_float4 origin;
cl_uint pos;
cl_float intensity;
} t_ray;
#pragma pack (pop)
Header on Device :
struct __attribute__ ((aligned (16))) s_ray
{
float4 dir;
float4 origin;
uint pos;
float intensity;
};
typedef struct s_ray t_ray;
GenRay Kernel :
__kernel void GenRays(__global float4 *PosUpLeft,
__global float4 *Origin,
__global float4 *NormCam,
__global t_ray *BufferRaysOut,
uint width,
uint MaxRaysLen)
{
uint id = get_global_id(0);
BufferRaysOut[id].pos = id;
}
Trace Kernel :
__kernel void trace(
__global t_ray *rays,
__global uint *buffer,
uint raysLen)
{
uint id = get_global_id(0);
if (id < raysLen)
{
buffer[id] = 0xFF000000 + (rays[id].pos % 255) << 2;
}
}
Allocation Code :
RayBufferMaxLen = width * height;
std::cout << "kernelGenerateRayInit..." << std::endl;
Ray1 = new cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(cl_float4));
Ray2 = new cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(cl_float4));
Ray3 = new cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(cl_float4) * 3);
Rays = new cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(t_ray) * RayBufferMaxLen);
KernelGenerateRay = new cl::Kernel(*program, "GenRays");
KernelGenerateRay->setArg(0, *Ray1);
KernelGenerateRay->setArg(1, *Ray2);
KernelGenerateRay->setArg(2, *Ray3);
KernelGenerateRay->setArg(3, *Rays);
KernelGenerateRay->setArg(4, width);
KernelGenerateRay->setArg(5, RayBufferMaxLen);
Execution Code :
for (size_t i = 0; i < bufferLen; i++)
{
rays[i].pos = 0;
}
ret = queue->enqueueWriteBuffer(*Ray1, CL_TRUE, 0, sizeof(cl_float4), positionOnScreen);
ret |= queue->enqueueWriteBuffer(*Ray2, CL_TRUE, 0, sizeof(cl_float4), originCamera);
ret |= queue->enqueueWriteBuffer(*Ray3, CL_TRUE, 0, sizeof(cl_float4) * 3, vec3Cam);
ret |= queue->enqueueNDRangeKernel(*KernelGenerateRay, cl::NullRange, cl::NDRange(bufferLen), cl::NDRange(64));
ret |= queue->finish();
ret |= queue->enqueueReadBuffer(*Rays, CL_TRUE, 0, sizeof(t_ray) * bufferLen, rays);
displayOpenCLerror(ret);
for (size_t i = 0; i < bufferLen; i++)
{
std::cout << "I == " << i << std::endl;
if (rays[i].pos != i)
std::cout << "Diff : " << i << " and " << rays[i].pos, system("pause") ,exit(1);
}
The seconde step is diff 1 != 0
But If I look All Values, I Read Correct Data for many pixels.
Just Why ?
Thank for your help