A synchronisation problem

#1

Hi!
I’ve a first shader which writes fragments to an SSBO and a second shader which read it.

glMemoryBarrier doesn’t work because it wait that the values of the SSBO are visible in memory but it can have several writes for a single fragment because this is a linked list and I need the second shader executes only when all fragments are written to the SSBO and not only the first one.

So I want to do this : telling to opengl that everything is drawn in the first pass before going to the shader of the second pass. The problem is that the draw function executes in parallel with the second draw function and I doesn’t want to do that, I want that the first draw function have finished to execute before going to the shader of the second pass : I tried glFinish, glSynch objects but it doesn’t work.

Here is some code :

frameBuffer.setActive();
currentStates.blendMode = sf::BlendAlpha;
currentStates.shader=&perPixelLinkedList;

for (unsigned int i = 0; i < m_instances.size(); i++) {
   if (m_instances[i].getAllVertices().getVertexCount() > 0) {
        if (m_instances[i].getMaterial().getTexture() == nullptr) {
            perPixelLinkedList.setParameter("haveTexture", 0.f);
        } else {
            perPixelLinkedList.setParameter("haveTexture", 1.f);
        }
        currentStates.texture = m_instances[i].getMaterial().getTexture();
        frameBuffer.draw(m_instances[i].getAllVertices(), currentStates);
    }
}

//glCheck(glTextureBarrier());
glCheck(glFinish());
glCheck(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT));

glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );

currentStates.shader = &perPixelLinkedListP2;
for (unsigned int i = 0; i < m_instances.size(); i++) {
   if (m_instances[i].getAllVertices().getVertexCount() > 0) {
        frameBuffer.draw(m_instances[i].getAllVertices(), currentStates);
   }
}
//glCheck(glDepthMask(GL_TRUE));
//currentStates.shader = nullptr;
//quad.setCenter(frameBuffer.getView().getPosition());
//frameBuffer.draw(quad, currentStates);
glCheck(glFinish());
frameBuffer.display();

And my shaders :

//const std::string fragmentShader = R"(

   #version 140
   #extension GL_ARB_shader_atomic_counters : require
   #extension GL_ARB_shading_language_420pack : require
   #extension GL_ARB_shader_image_load_store : require
   #extension GL_ARB_shader_storage_buffer_object : require
   struct NodeType {
      vec4 color;
      float depth;
      uint next;
   };
   layout(binding = 0, offset = 0) uniform atomic_uint nextNodeCounter;
   layout(binding = 0, r32ui) uniform uimage2D headPointers;
   layout(binding = 0, std430) buffer linkedLists {
       NodeType nodes[];
   };
   uniform uint maxNodes;
   uniform float haveTexture;
   uniform sampler2D texture;
   void main() {
       uint nodeIdx = atomicCounterIncrement(nextNodeCounter);
       vec4 texel = texture2D(texture, gl_TexCoord[0].xy);
       vec4 color = (haveTexture > 0.9) ? gl_Color * texel : gl_Color;
       if (nodeIdx < maxNodes) {
            uint prevHead = imageAtomicExchange(headPointers, ivec2(gl_FragCoord.xy), nodeIdx);
            nodes[nodeIdx].color = color;
            nodes[nodeIdx].depth = gl_FragCoord.z;
            nodes[nodeIdx].next = prevHead;
       }
   }

//)";


// const std::string fragmentShader2 = R"(
   #version 140
   #extension GL_ARB_shader_atomic_counters : require
   #extension GL_ARB_shading_language_420pack : require
   #extension GL_ARB_shader_image_load_store : require
   #extension GL_ARB_shader_storage_buffer_object : require
   #define MAX_FRAGMENTS 75
   struct NodeType {
      vec4 color;
      float depth;
      uint next;
   };
   layout(binding = 0, r32ui) uniform uimage2D headPointers;
   layout(binding = 0, std430) buffer linkedLists {
       NodeType nodes[];
   };
   void main() {
      NodeType frags[MAX_FRAGMENTS];
      int count = 0;
      uint n = imageLoad(headPointers, ivec2(gl_FragCoord.xy)).r;
      while( n != 0xffffffffu && count < MAX_FRAGMENTS) {
           frags[count] = nodes[n];
           n = frags[count].next;
           count++;
      }
      //merge sort
      int i, j1, j2, k;
      int a, b, c;
      int step = 1;
      NodeType leftArray[MAX_FRAGMENTS/2]; //for merge sort

      while (step <= count)
      {
          i = 0;
          while (i < count - step)
          {
              ////////////////////////////////////////////////////////////////////////
              //merge(step, i, i + step, min(i + step + step, count));
              a = i;
              b = i + step;
              c = (i + step + step) >= count ? count : (i + step + step);

              for (k = 0; k < step; k++)
                  leftArray[k] = frags[a + k];

              j1 = 0;
              j2 = 0;
              for (k = a; k < c; k++)
              {
                  if (b + j1 >= c || (j2 < step && leftArray[j2].depth > frags[b + j1].depth))
                      frags[k] = leftArray[j2++];
                  else
                      frags[k] = frags[b + j1++];
              }
              ////////////////////////////////////////////////////////////////////////
              i += 2 * step;
          }
          step *= 2;
      }
      vec4 color = vec4(0, 0, 0, 0);
      for( int i = 0; i < count; i++ )
      {
        if (i == 0) {
            color = mix( color, frags[i].color, frags[i].color.a);
        } else {
            color = mix(frags[i].color, color, color.a);
        }
      }
      gl_FragColor = color;
   }
//)";

Thanks

#2

glMemoryBarrier already handles that.

#3

Really ? It doens’t seems to be the case maybe it’s a driver issue…

#4

It’s probably because your glMemoryBarrier call is incomplete. Your receiving shader uses imageLoad to retrieve the head pointer for the fragment. That’s not a GL_TEXTURE_FETCH_BARRIER_BIT operation; it’s a GL_SHADER_IMAGE_ACCESS_BARRIER_BIT​ operation.

#5

I’ve changed that but same problem…