problem with smoothed particle hydrodynamics kernel

I’m taking the oclParticles example program and changing it to an SPH simulation. However, when the computePressure kernel is called I get the error:

Unhandled exception at 0x049ed72b in oclParticles.exe: 0xC0000005: Access violation reading location 0x00008068

With some print statements I can see that my computePressure kernel is called but doesn’t finish. I should also mention that when I comment out all of the code inside the kernel (ie. so it doesn’t do anything) I still get the same error. This leads me to believe the problem isn’t in the kernel code but perhaps in the way I’ve called the kernel. Anyway I’ve spent a few days looking for my problem with no success, so I thought I’d post here to see if someone can see what my problem is. Thanks for looking. :slight_smile:

Here is the array allocation:

 //Allocate GPU data
    shrLog("Allocating GPU Data buffers...

    allocateArray(&m_dPos,          m_numParticles * 4 * sizeof(float));
    allocateArray(&m_dVel,          m_numParticles * 4 * sizeof(float));
    allocateArray(&m_dDen,			m_numParticles * sizeof(float) );
    allocateArray(&m_dPre,			m_numParticles * sizeof(float) );
    allocateArray(&m_dReorderedPos, m_numParticles * 4 * sizeof(float));
    allocateArray(&m_dReorderedVel, m_numParticles * 4 * sizeof(float));
    allocateArray(&m_dReorderedDen, m_numParticles * sizeof(float) );
    allocateArray(&m_dReorderedPre, m_numParticles * sizeof(float) );
    allocateArray(&m_dHash,         m_numParticles * sizeof(uint));
    allocateArray(&m_dIndex,        m_numParticles * sizeof(uint));
    allocateArray(&m_dCellStart,    m_numGridCells * sizeof(uint));
    allocateArray(&m_dCellEnd,      m_numGridCells * sizeof(uint));
    shrLog("Allocation of GPU Data buffers was successful...


Here is where the kernel is called from particleSystem_class.cpp:

std::cout << "Here we launch the kernel computePressure..." << std::endl;
std::cout << "...but we never get here." << std::endl;

Declaration in particleSystem_engine.h:

extern "C" void computePressure(
    memHandle_t d_Den,
    memHandle_t d_Pre,
    memHandle_t d_ReorderedPos,
    memHandle_t d_Index,
    memHandle_t d_CellStart,
    memHandle_t d_CellEnd,
    uint   numParticles,
    uint   numCells

Defn in oclParticles_launcher.cpp:

extern "C" void computePressure(
    memHandle_t d_Den,
	memHandle_t d_Pre,
    memHandle_t d_ReorderedPos,
    memHandle_t d_Index,
    memHandle_t d_CellStart,
    memHandle_t d_CellEnd,
    uint   numParticles,
    uint   numCells
    cl_int ciErrNum;
    size_t globalWorkSize = uSnap(numParticles, wgSize);

    ciErrNum  = clSetKernelArg(ckComputePressure, 0, sizeof(cl_mem), (void *)&d_Den);
    ciErrNum |= clSetKernelArg(ckComputePressure, 1, sizeof(cl_mem), (void *)&d_Pre);
    ciErrNum |= clSetKernelArg(ckComputePressure, 2, sizeof(cl_mem), (void *)&d_ReorderedPos);
    ciErrNum |= clSetKernelArg(ckComputePressure, 3, sizeof(cl_mem), (void *)&d_Index);
    ciErrNum |= clSetKernelArg(ckComputePressure, 4, sizeof(cl_mem), (void *)&d_CellStart);
    ciErrNum |= clSetKernelArg(ckComputePressure, 5, sizeof(cl_mem), (void *)&d_CellEnd);
    ciErrNum |= clSetKernelArg(ckComputePressure, 6, sizeof(cl_mem), (void *)&params);
    ciErrNum |= clSetKernelArg(ckComputePressure, 7, sizeof(uint),   (void *)&numParticles);
    oclCheckError(ciErrNum, CL_SUCCESS);

    ciErrNum = clEnqueueNDRangeKernel(cqDefaultCommandQue, ckComputePressure, 1, NULL, &globalWorkSize, &wgSize, 0, NULL, NULL);
    oclCheckError(ciErrNum, CL_SUCCESS);

Here is the OpenCL code for the kernel computePressure:

__kernel void computePressure(
    __global float		  *d_Den,          //output: new density
	__global float		  *d_Pre,		   //output: new pressure
    __global const float4 *d_ReorderedPos, //input: reordered positions
    __global const uint   *d_Index,        //input: reordered particle indices
    __global const uint   *d_CellStart,    //input: beginning of cell boundary
    __global const uint   *d_CellEnd,      //input: end of cell boundary
    __constant simParams_t *params,
    uint    numParticles
    uint index = get_global_id(0);
    if(index >= numParticles)

    float4 pos = d_ReorderedPos[index];

	float sum = 0.0, distanceSqrd = 0.0, dx, dy, dz;

    //Get address in grid
    int4 gridPos = getGridPos(pos, params);

    //Accumulate surrounding cells
    for(int z = -1; z <= 1; z++)
        for(int y = -1; y <= 1; y++)
            for(int x = -1; x <= 1; x++)
                //Get start particle index for this cell
                uint   hash = getGridHash(gridPos + (int4)(x, y, z, 0), params);
                uint startI = d_CellStart[hash];

                //Skip empty cell
                if(startI == 0xFFFFFFFFU)

                //Iterate over particles in this cell
                uint endI = d_CellEnd[hash];
                for(uint j = startI; j < endI; j++)
                    if(j == index)

                    float4 pos2 = d_ReorderedPos[j];

					dx = pos.x - pos2.x;
					dy = pos.y - pos2.y;
					dz = pos.z - pos2.z;
					//if a particle is within the smoothing length of the query particle include it in the summation
					distanceSqrd = ( dx * dx ) + ( dy * dy ) + ( dz * dz );
					if( distanceSqrd <= params->smoothingRadiusSqrd )
						distanceSqrd = params->smoothingRadiusSqrd - distanceSqrd;
						sum += distanceSqrd * distanceSqrd * distanceSqrd;

	//Now we update the density and pressure to the original unsorted location
	float density = ( sum * params->particleMass * params->poly6Kern );
	d_Pre[d_Index[index]] = ( density - params->restDensity ) * params->internalStiffness;
	d_Den[d_Index[index]] =  1.0 / density;

I can’t edit by above post but my error was here:

ckComputePressure = clCreateKernel(cpParticles, "computePressure", &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);
ckComputePressure = clCreateKernel(cpParticles, "computeForce", &ciErrNum);
oclCheckError(ciErrNum, CL_SUCCESS);


But thanks for looking! :slight_smile: