Calculate vertex norm with compute shader

#1

I am trying to calculate vertex norms with a compute shader but I seem to have a timing error

I accumulate the normals from all the faces adjacent to a vertex with this code


#version 430 core


struct vertex_in_struct
{
  float x;
  float y;
  float z;
  uint  nx;
  uint  ny;
  uint  nz;
};


layout( std430, binding=4 ) buffer Vertices
{
  vertex_in_struct Vertex[ ]; // array of structures
};


layout( std430, binding=5 ) readonly buffer Faces
{
  int Face[ ]; // array of structures
};


//uniform int   u_Group_Offset_X;
uniform uint  u_FaceCount;


layout( local_size_x = 128, local_size_y = 1, local_size_z = 1 ) in;


void main()
{
  uint gid = gl_GlobalInvocationID.x; // +u_Group_Offset_X; // the .y and .z are both 1 in this case


  if (gid < u_FaceCount)
  {
    int vi[3];
    vec3 v[3];


    // get vertex indices
    vi[0] = Face[gid*3];
    vi[1] = Face[gid*3+1];
    vi[2] = Face[gid*3+2];


    // get the vertex position
    for (int i = 0; i < 3; i++)
    {
      float vx = Vertex[vi[i]].x;
      float vy = Vertex[vi[i]].y;
      float vz = Vertex[vi[i]].z;


      v[i] = vec3(vx,vy,vz);
    }


    // calculate the face normal weighted by triangle size
    vec3 norm = (cross((v[2]-v[0]),(v[1]-v[0])));


    memoryBarrierBuffer();


    // accumulate normal
    uint prevVal; 
    uint newVal ;


    for (int i = 0; i < 3; i++)
    {
      // currently no atomic float add except on  some nVidia gpus
      do
      {
        prevVal = Vertex[vi[i]].nx;
        newVal = floatBitsToUint(( norm.x + uintBitsToFloat( prevVal )));
      } while( ( atomicCompSwap( Vertex[vi[i]].nx , prevVal , newVal ) ) != prevVal );
      do
      {
        prevVal = Vertex[vi[i]].ny;
        newVal = floatBitsToUint(( norm.y + uintBitsToFloat( prevVal )));
      } while( ( atomicCompSwap( Vertex[vi[i]].ny , prevVal , newVal ) ) != prevVal );
      do
      {
        prevVal = Vertex[vi[i]].nz;
        newVal = floatBitsToUint(( norm.z + uintBitsToFloat( prevVal )));
      } while( ( atomicCompSwap( Vertex[vi[i]].nz , prevVal , newVal ) ) != prevVal );  
    }


  }
}

Then I normalise with this


#version 430 core


struct vertex_in_struct
{
  float x;
  float y;
  float z;
  float nx;
  float ny;
  float nz;
};


layout( std430, binding=4 ) buffer Vertices
{
  vertex_in_struct Vertex[ ];
};


uniform int   u_Group_Offset_X;
uniform uint  u_VertexCount;


layout( local_size_x = 128, local_size_y = 1, local_size_z = 1 ) in;


void main()
{
  uint gid = gl_GlobalInvocationID.x+u_Group_Offset_X; // the .y and .z are both 1 in this case


  if (gid < u_VertexCount)
  {
    vec3 norm = normalize(vec3(Vertex[gid].nx,Vertex[gid].ny,Vertex[gid].nz));


    memoryBarrierBuffer();


    Vertex[gid].nx = norm.x;
    Vertex[gid].ny = norm.y;
    Vertex[gid].nz = norm.z;
  }
}

I used a shader to display the normals at each vertex. Most of my normals look right but some appear to be un-normalised.
If I normalise them in the display shader they all look fine.

I have tried memoryBarrier and memoryBarrierBuffer to ensure load/modifiy/store to the buffers stays in sink.