Shader doesn't write the fragment color in the second pass

#1

Hi! I’m trying to implement a per pixel list algorithm, but, I’ve a problem and I don’t find the solution, the problem is that, the fragment shader doesn’t write the output color to my FBO.
Here is the source code :

#include <GL/glew.h>
#include <SFML/OpenGL.hpp>
#include "../../../include/odfaeg/Graphics/perPixelLinkedListRenderComponent.hpp"
#include "glCheck.h"
#include "../../../include/odfaeg/Physics/particuleSystem.h"
namespace odfaeg {
    namespace graphic {
        PerPixelLinkedListRenderComponent::PerPixelLinkedListRenderComponent(RenderWindow& window, int layer, std::string expression, window::ContextSettings settings) :
            HeavyComponent(window, math::Vec3f(window.getView().getPosition().x, window.getView().getPosition().y, layer),
                          math::Vec3f(window.getView().getSize().x, window.getView().getSize().y, 0),
                          math::Vec3f(window.getView().getSize().x + window.getView().getSize().x * 0.5f, window.getView().getPosition().y + window.getView().getSize().y * 0.5f, layer)),
            view(window.getView()),
            expression(expression),
            quad(math::Vec3f(window.getView().getSize().x, window.getView().getSize().y, 0)) {
            GLuint maxNodes = 20 * window.getView().getSize().x * window.getView().getSize().y;
            GLint nodeSize = 5 * sizeof(GLfloat) + sizeof(GLuint);
            frameBuffer.create(window.getView().getSize().x, window.getView().getSize().y, settings);
            frameBufferSprite = Sprite(frameBuffer.getTexture(), math::Vec3f(0, 0, 0), math::Vec3f(window.getView().getSize().x, window.getView().getSize().y, 0), sf::IntRect(0, 0, window.getView().getSize().x, window.getView().getSize().y));
            glCheck(glGenTextures(1, &headPtrTex));
            glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
            glCheck(glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, window.getView().getSize().x, window.getView().getSize().y));
            glCheck(glBindImageTexture(0, headPtrTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI));
            glCheck(glGenBuffers(1, &atomicBuffer));
            glCheck(glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicBuffer));
            glCheck(glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), nullptr, GL_DYNAMIC_DRAW));
            glCheck(glGenBuffers(1, &linkedListBuffer));
            glCheck(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, linkedListBuffer));
            glCheck(glBufferData(GL_SHADER_STORAGE_BUFFER, maxNodes * nodeSize, NULL, GL_DYNAMIC_DRAW));
            std::vector<GLuint> headPtrClearBuf(window.getView().getSize().x*window.getView().getSize().y, 0xffffffff);
            glCheck(glGenBuffers(1, &clearBuf));
            glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, clearBuf));
            glCheck(glBufferData(GL_PIXEL_UNPACK_BUFFER, headPtrClearBuf.size() * sizeof(GLuint),
            &headPtrClearBuf[0], GL_STATIC_COPY));
            core::FastDelegate<bool> signal (&PerPixelLinkedListRenderComponent::needToUpdate, this);
            core::FastDelegate<void> slot (&PerPixelLinkedListRenderComponent::drawNextFrame, this);
            core::Command cmd(signal, slot);
            getListener().connect("UPDATE", cmd);
            const std::string  simpleVertexShader =
            R"(#version 140
            void main () {
                gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
                gl_TexCoord[0] = gl_TextureMatrix[0] * gl_MultiTexCoord0;
                gl_FrontColor = gl_Color;
            })";
            const std::string fragmentShader =
            R"(#version 140
               #extension GL_ARB_shader_atomic_counters : require
               #extension GL_ARB_shading_language_420pack : require
               #extension GL_ARB_shader_image_load_store : require
               #extension GL_ARB_shader_storage_buffer_object : require
               #extension GL_ARB_shader_subroutine : require
               #define MAX_FRAGMENTS 75
               struct NodeType {
                  vec4 color;
                  float depth;
                  uint next;
               };
               layout(binding = 0, offset = 0) uniform atomic_uint nextNodeCounter;
               layout(binding = 0, r32ui) uniform uimage2D headPointers;
               layout(binding = 0, std430) buffer linkedLists {
                   NodeType nodes[];
               };
               uniform uint maxNodes;
               uniform float haveTexture;
               uniform sampler2D texture;
               subroutine vec4 RenderPassType();
               subroutine uniform RenderPassType RenderPass;
               subroutine(RenderPassType)
               vec4 pass1 () {
                   uint nodeIdx = atomicCounterIncrement(nextNodeCounter);
                   if (nodeIdx < maxNodes) {
                        uint prevHead = imageAtomicExchange(headPointers, ivec2(gl_FragCoord.xy), nodeIdx);
                        nodes[nodeIdx].color = (haveTexture > 0.9) ? gl_Color * texture2D(texture, gl_TexCoord[0].xy) : gl_Color;
                        nodes[nodeIdx].depth = gl_FragCoord.z;
                        nodes[nodeIdx].next = prevHead;
                   }
                   return vec4(0, 0, 0, 0);
               }
               subroutine(RenderPassType)
               vec4 pass2 () {
                   /*NodeType frags[MAX_FRAGMENTS];
                   int count = 0;
                   uint n = imageLoad(headPointers, ivec2(gl_FragCoord.xy)).r;
                   while( n != uint(0xffffffff) && count < MAX_FRAGMENTS) {
                        frags[count] = nodes[n];
                        n = frags[count].next;
                        count++;
                   }
                   for (int i = 0; i < count-1; i++) {
                        for (int j = i+1; j < count; j++) {
                            if (frags[i].depth > frags[j].depth) {
                                NodeType tmp = frags[i];
                                frags[i] = frags[j];
                                frags[j] = tmp;
                            }
                        }
                   }
                   // Traverse the array, and combine the colors using the alpha
                  // channel.
                  vec4 color = vec4 (0, 0, 0, 0);
                  for (int i = 0; i < count; i++) {
                    color.rgb = frags[i].color.rgb * frags[i].color.a + color.rgb * (1 - frags[i].color.a);
                    color.a = frags[i].color.a + color.a * (1 - frags[i].color.a);
                  }*/
                  // Output the final color
                  return vec4(1, 1, 1, 1);
               }
               void main() {
                   gl_FragColor = RenderPass();
               })";
               if (!perPixelLinkedList.loadFromMemory(simpleVertexShader, fragmentShader)) {
                    throw core::Erreur(54, "Failed to load per pixel linked list shader");
               }
               glCheck(glGenBuffers(1, &pboId));
               glCheck(glBindBuffer(GL_PIXEL_PACK_BUFFER, pboId));
               glCheck(glBufferData(GL_PIXEL_PACK_BUFFER, view.getSize().x * view.getSize().y, 0, GL_STREAM_READ));
               glCheck(glBindBuffer(GL_PIXEL_PACK_BUFFER, 0));
               perPixelLinkedList.setParameter("maxNodes", maxNodes);
               perPixelLinkedList.setParameter("texture", Shader::CurrentTexture);
               GLuint programHandle = perPixelLinkedList.getHandle();
               pass1Index = glGetSubroutineIndex( programHandle, GL_FRAGMENT_SHADER, "pass1");
               pass2Index = glGetSubroutineIndex( programHandle, GL_FRAGMENT_SHADER, "pass2");
               backgroundColor = sf::Color::Transparent;
        }
        void PerPixelLinkedListRenderComponent::setBackgroundColor(sf::Color color) {
            backgroundColor = color;
        }
        void PerPixelLinkedListRenderComponent::clear() {
            frameBuffer.clear(backgroundColor);
            GLuint zero = 0;
            glCheck(glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicBuffer));
            glCheck(glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &zero));

            glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, clearBuf));
            glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
            glCheck(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, view.getSize().x, view.getSize().y, GL_RED_INTEGER,
            GL_UNSIGNED_INT, NULL));
        }
        void PerPixelLinkedListRenderComponent::pass1() {
            Shader::bind(&perPixelLinkedList);
            glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass1Index));

            currentStates.blendMode = sf::BlendAlpha;
            currentStates.shader=&perPixelLinkedList;
            for (unsigned int i = 0; i < m_instances.size(); i++) {
                if (m_instances[i].getAllVertices().getVertexCount() > 0) {
                    if (m_instances[i].getMaterial().getTexture() == nullptr) {
                        perPixelLinkedList.setParameter("haveTexture", 0.f);
                    } else {
                        perPixelLinkedList.setParameter("haveTexture", 1.f);
                    }
                    currentStates.texture=m_instances[i].getMaterial().getTexture();
                    frameBuffer.draw(m_instances[i].getAllVertices(), currentStates);
                }
            }
            frameBuffer.display();
            glCheck(glFinish());
            Texture::bind(&frameBuffer.getTexture());
                //glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
                unsigned int size = view.getSize().x*view.getSize().y*4;
                GLubyte src[size];
                glCheck(glGetTexImage(GL_TEXTURE_2D,
                  0,
                  GL_RGBA,
                  GL_UNSIGNED_BYTE,
                  src));


                    if(src) {
                        std::cout<<"pass 1 "<<std::endl;
                        for (unsigned int i = 0; i < view.getSize().x * view.getSize().y; i++) {
                             if (src[i*4] > 0 || src[i*4+1] > 0 || src[i*4+2] > 0 || src[i*4+3] > 0) {
                                std::cout<<"color : (";
                                for (unsigned int j = 0; j < 4; j++) {

                                         std::cout<<(unsigned int) src[i*4+j];
                                         if (j < 3)
                                            std::cout<<" , ";
                                }
                                std::cout<<")"<<std::endl;
                             }
                        }

                    }
                    frameBuffer.display();
        }
        void PerPixelLinkedListRenderComponent::pass2() {
            glCheck(glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT ));
            Shader::bind(&perPixelLinkedList);
            glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass2Index));

            quad.setCenter(view.getPosition());
            frameBuffer.draw(quad, currentStates);

            glCheck(glFinish());



                Texture::bind(&frameBuffer.getTexture());
                //glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
                unsigned int size = view.getSize().x*view.getSize().y*4;
                GLubyte src[size];
                glCheck(glGetTexImage(GL_TEXTURE_2D,
                  0,
                  GL_RGBA,
                  GL_UNSIGNED_BYTE,
                  src));


                    if(src) {
                        std::cout<<"pass 2 : "<<std::endl;
                        for (unsigned int i = 0; i < view.getSize().x * view.getSize().y; i++) {
                             if (src[i*4] > 0 || src[i*4+1] > 0 || src[i*4+2] > 0 || src[i*4+3] > 0) {
                                std::cout<<"color : (";
                                for (unsigned int j = 0; j < 4; j++) {

                                         std::cout<<(unsigned int) src[i*4+j];
                                         if (j < 3)
                                            std::cout<<" , ";
                                }
                                std::cout<<")"<<std::endl;
                             }
                        }

                    }
                    frameBuffer.display();
        }
        void PerPixelLinkedListRenderComponent::drawNextFrame() {
            pass1();
            pass2();
        }
        void PerPixelLinkedListRenderComponent::draw(RenderTarget& target, RenderStates states) {
            frameBufferSprite.setCenter(view.getPosition());
            target.draw(frameBufferSprite, states);
        }
        int  PerPixelLinkedListRenderComponent::getLayer() {
            return getPosition().z;
        }
        void PerPixelLinkedListRenderComponent::draw(Drawable& drawable, RenderStates states) {
            //drawables.insert(std::make_pair(drawable, states));
        }
        void PerPixelLinkedListRenderComponent::changeVisibleEntities(Entity* toRemove, Entity* toAdd, EntityManager* em) {
        }
        void PerPixelLinkedListRenderComponent::setView(View view) {
            frameBuffer.setView(view);
            this->view = view;
        }
        std::vector<Entity*> PerPixelLinkedListRenderComponent::getEntities() {
            return visibleEntities;
        }
        std::string PerPixelLinkedListRenderComponent::getExpression() {
            return expression;
        }
        View& PerPixelLinkedListRenderComponent::getView() {
            return view;
        }
        bool PerPixelLinkedListRenderComponent::needToUpdate() {
            return update;
        }
        void PerPixelLinkedListRenderComponent::setExpression (std::string expression) {
            this->expression = expression;
        }
        bool PerPixelLinkedListRenderComponent::loadEntitiesOnComponent(std::vector<Entity*> vEntities) {
            batcher.clear();
            for (unsigned int i = 0; i < vEntities.size(); i++) {
                //if ( vEntities[i]->isLeaf()) {
                    for (unsigned int j = 0; j <  vEntities[i]->getNbFaces(); j++) {
                         batcher.addFace( vEntities[i]->getFace(j));
                    }
                //}
            }
            m_instances = batcher.getInstances();
            visibleEntities = vEntities;
            update = true;
            return true;
        }
        void PerPixelLinkedListRenderComponent::updateParticleSystems() {
            for (unsigned int i = 0; i < visibleEntities.size(); i++) {
                if (dynamic_cast<physic::ParticleSystem*>(visibleEntities[i]) != nullptr) {
                    static_cast<physic::ParticleSystem*>(visibleEntities[i])->update();
                }
            }
            loadEntitiesOnComponent(visibleEntities);
            update = true;
        }
        void PerPixelLinkedListRenderComponent::pushEvent(window::IEvent event, RenderWindow& rw) {
            if (event.type == window::IEvent::WINDOW_EVENT && event.window.type == window::IEvent::WINDOW_EVENT_RESIZED && &getWindow() == &rw && isAutoResized()) {
                std::cout<<"recompute size"<<std::endl;
                recomputeSize();
                getListener().pushEvent(event);
                getView().reset(physic::BoundingBox(getView().getViewport().getPosition().x, getView().getViewport().getPosition().y, getView().getViewport().getPosition().z, event.window.data1, event.window.data2, getView().getViewport().getDepth()));
            }
        }
    }
}
#2

@laurent7601, please read The Forum Posting Guidelines. In particular, Posting Guideline #4 and the Posting source code sections.

Folks reading these forums are more likely to help you if you put forth some effort. You need to be able to narrow down the cause of problems and diagnose your own code.

If you’ve got something complex that doesn’t work, whittle it back until it works, or start over with something simple that works, and then incrementally add to it.

Then once you hit a problem you don’t understand, you can ask a targeted question here to get some insight.

2 Likes
#3

I tried a more simple source code and it seems it always goes to the first pass.
Even if I change the subroutine id

    RenderWindow window(sf::VideoMode(800, 600), "Test", sf::Style::Default, ContextSettings(0, 0, 8, 3, 0));
  window.getView().move(400, 300, 0);
  /*PerPixelLinkedListRenderComponent pplrc(window, 0, "E_TILE", ContextSettings(0, 0, 8, 3, 0));
  std::vector<Entity*> entities;
  Entity* tile = new Tile(nullptr, Vec3f(0, 0, 0), Vec3f(100, 50, 0),sf::IntRect(0, 0, 100, 50));
  entities.push_back(tile);
  pplrc.loadEntitiesOnComponent(entities);*/

  GLuint maxNodes = 20 * 100 * 100;
    GLint nodeSize = 5 * sizeof(GLfloat) + sizeof(GLuint);
    GLuint headPtrTex, atomicBuffer, linkedListBuffer, clearBuf, pass1Index, pass2Index;
    glCheck(glGenTextures(1, &headPtrTex));
    glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
    glCheck(glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, 100, 100));
    glCheck(glBindImageTexture(0, headPtrTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI));
    glCheck(glBindTexture(GL_TEXTURE_2D, 0));
    glCheck(glGenBuffers(1, &atomicBuffer));
    glCheck(glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicBuffer));
    glCheck(glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), nullptr, GL_DYNAMIC_DRAW));
    glCheck(glGenBuffers(1, &linkedListBuffer));
    glCheck(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, linkedListBuffer));
    glCheck(glBufferData(GL_SHADER_STORAGE_BUFFER, maxNodes * nodeSize, NULL, GL_DYNAMIC_DRAW));
    std::vector<GLuint> headPtrClearBuf(100*100, 0xffffffff);
    glCheck(glGenBuffers(1, &clearBuf));
    glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, clearBuf));
    glCheck(glBufferData(GL_PIXEL_UNPACK_BUFFER, headPtrClearBuf.size() * sizeof(GLuint),
    &headPtrClearBuf[0], GL_STATIC_COPY));
    glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0));

    const std::string  simpleVertexShader =
    R"(#version 140
    void main () {
        gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
        gl_TexCoord[0] = gl_TextureMatrix[0] * gl_MultiTexCoord0;
        gl_FrontColor = gl_Color;
    })";
    const std::string fragmentShader =
    R"(#version 140
       #extension GL_ARB_shader_atomic_counters : require
       #extension GL_ARB_shading_language_420pack : require
       #extension GL_ARB_shader_image_load_store : require
       #extension GL_ARB_shader_storage_buffer_object : require
       #extension GL_ARB_shader_subroutine : require
       #define MAX_FRAGMENTS 75
       struct NodeType {
          vec4 color;
          float depth;
          uint next;
       };
       layout(binding = 0, offset = 0) uniform atomic_uint nextNodeCounter;
       layout(binding = 0, r32ui) uniform uimage2D headPointers;
       layout(binding = 0, std430) buffer linkedLists {
           NodeType nodes[];
       };
       uniform uint maxNodes;
       uniform float haveTexture;
       uniform sampler2D texture;
       subroutine vec4 RenderPassType();
       subroutine uniform RenderPassType RenderPass;
       subroutine(RenderPassType)
       vec4 pass1 () {
           uint nodeIdx = atomicCounterIncrement(nextNodeCounter);
           if (nodeIdx < maxNodes) {
                uint prevHead = imageAtomicExchange(headPointers, ivec2(gl_FragCoord.xy), nodeIdx);
                nodes[nodeIdx].color = (haveTexture > 0.9) ? gl_Color * texture2D(texture, gl_TexCoord[0].xy) : gl_Color;
                nodes[nodeIdx].depth = gl_FragCoord.z;
                nodes[nodeIdx].next = prevHead;
           }

           return vec4(1, 1, 1, 1);

       }
       subroutine(RenderPassType)
       vec4 pass2 () {
           NodeType frags[MAX_FRAGMENTS];
           int count = 0;
           uint n = imageLoad(headPointers, ivec2(gl_FragCoord.xy)).r;
           while( n != uint(0xffffffff) && count < MAX_FRAGMENTS) {
                frags[count] = nodes[n];
                n = frags[count].next;
                count++;
           }
           for (int i = 0; i < count-1; i++) {
                for (int j = i+1; j < count; j++) {
                    if (frags[i].depth > frags[j].depth) {
                        NodeType tmp = frags[i];
                        frags[i] = frags[j];
                        frags[j] = tmp;
                    }
                }
           }
           // Traverse the array, and combine the colors using the alpha
          // channel.
          vec4 color = vec4 (0, 0, 0, 0);
          for (int i = 0; i < count; i++) {
            color.rgb = frags[i].color.rgb * frags[i].color.a + color.rgb * (1 - frags[i].color.a);
            color.a = frags[i].color.a + color.a * (1 - frags[i].color.a);
          }
          // Output the final color
          return color;
       }
       void main() {
           gl_FragColor = RenderPass();
       })";
       odfaeg::graphic::Shader perPixelLinkedList;
       if (!perPixelLinkedList.loadFromMemory(simpleVertexShader, fragmentShader)) {
            throw odfaeg::core::Erreur(54, "Failed to load per pixel linked list shader");
       }
       perPixelLinkedList.setParameter("maxNodes", maxNodes);
       perPixelLinkedList.setParameter("texture", odfaeg::graphic::Shader::CurrentTexture);
       GLuint programHandle = perPixelLinkedList.getHandle();
       pass1Index = glGetSubroutineIndex( programHandle, GL_FRAGMENT_SHADER, "pass1");
       pass2Index = glGetSubroutineIndex( programHandle, GL_FRAGMENT_SHADER, "pass2");
       RectangleShape rect(Vec3f(100, 50, 0));
  while (window.isOpen()) {
        IEvent event;
        while (window.pollEvent(event)) {
            if (event.type == IEvent::WINDOW_EVENT && event.window.type == IEvent::WINDOW_EVENT_CLOSED) {
                window.close();
            }
        }
        window.clear();
        /*pplrc.clear();
        window.draw(pplrc);*/
        glClearColor( 0, 0.5, 1, 1 );
        glClear( GL_COLOR_BUFFER_BIT );
        GLuint zero = 0;
                    glCheck(glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicBuffer));
                    glCheck(glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &zero));
                    glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, clearBuf));
                    glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
                    glCheck(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 100,  100, GL_RED_INTEGER,
                    GL_UNSIGNED_INT, NULL));
                    glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0));
                    odfaeg::graphic::Shader::bind(&perPixelLinkedList);
                    glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass1Index));
                    RenderStates states;
                    states.shader = &perPixelLinkedList;
                    window.draw(rect, states);
                    glCheck(glFinish());

                       glCheck(glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT ));
            odfaeg::graphic::Shader::bind(&perPixelLinkedList);
            glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass2Index));
            RectangleShape fullScreenQuad(Vec3f(window.getSize().x, window.getSize().y, 0));
            window.draw(fullScreenQuad, states);
            glCheck(glFinish());
            window.display();
  }

Sorry but there is really a problem with your preformatted text button, isn’t there a balise for that ? Like code source here ?
Ha yes!

#4

In the Forum posting guidelines we have a short description of a good way to format your code using ``` tags before and after. I’ve tested on your post just above here and it worked as expected.

[code] tags are BBCode and not the best approach to take, even though it works for now. It is not native to these forums.

Hope this is helpful. Check out the Forum Posting Guidelines for more help.

GLTexStorage2D crash the program
#5

I’ve forgot if you want to view all the source code, it’s here on my git repository :
https://github.com/LaurentDuroisin/ODFAEG
I’m not sure about glx context creation in the window module, the code is similar to the SFML library, that I had to recode because of crashs.
I also wonder if the Texture and Shader class in the graphic module are correct.
SFML 2.5 create transient contexts but it doesn’t work for me… I even doesn’t know the reason of using this.

#6

I think I’ve found a part of the problem, the problem is that opengl display the framebuffer when the shader haven’t finished to update it. (Sometimes the head pointer texture is empty, sometimes not when I put an std::cout (so it slow down the execution)
It seems that imageAtomicExchange operations are assynchrone.
How can I tell to opengl to wait that all the shader have finished the execution before updating the texture ?
I’ve heard about memory barriers, but I don’t understand what it is, so I don’t know how to use them.
But I’ve also have that problem with a shader who doesn’t use imageLoadStore extensions …, the depthtexture is not always updated before the light is rendering so sometimes the light is rendering in front of, the wall even if the light is behind the wall.

#7

Mmm this is REALLY STRANGE!
I added an sf::sleep to at the and of my draw function and, during the 5 first seconds it doesn’t draw anything, during the five next seconds it draws somethings, and after 5 seconds it doesn’t draw anything again

Blockquote
void PerPixelLinkedListRenderComponent::draw(RenderTarget& target, RenderStates states) {
Shader::bind(&perPixelLinkedList);
glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass1Index));

        states.blendMode = sf::BlendNone;
        states.shader=&perPixelLinkedList;
        for (unsigned int i = 0; i < m_instances.size(); i++) {
            if (m_instances[i].getAllVertices().getVertexCount() > 0) {
                if (m_instances[i].getMaterial().getTexture() == nullptr) {
                    perPixelLinkedList.setParameter("haveTexture", 0.f);
                } else {
                    perPixelLinkedList.setParameter("haveTexture", 1.f);
                }
                currentStates.texture=m_instances[i].getMaterial().getTexture();
                target.draw(m_instances[i].getAllVertices(), currentStates);
            }
        }
        glCheck(glFinish());

I really doesn’t understand anything about openGL normally glFinish should tells opengl to wait until all GPU commands are finished before update anything but it doesn’t seems to be the case. (Mmm…)
But the shader is good because during those five precious seconds it display the correct colors. (But wirthout the texture)

#8

The OpenGL Spec contains all the details. However, there are some good pages in the wiki that cover things more from a “User’s Guide” perspective in a user-friendly form. For instance, if you search for the atomic function you’re using, you’ll find this wiki page:

Notice the description of the atomic operation you’re using. Be sure to read the:

section, and link over to the page it references:

I would read the entire page. That’ll give you a solid understanding of what memory barriers are and when you need them. While you read that, I would be sure to notice GL_SHADER_IMAGE_ACCESS_BARRIER_BIT​.

Single GL context? Or multiple?

We probably should cover this issue in a separate thread (with more context provided).

#9

Killing time on the CPU is of course not a reasonable method of waiting for the graphics driver and/or the GPU to do anything. In the general case, this may just waste CPU time with the graphics driver and the GPU doing absolutely nothing (for your app) in the interim.

Yes, when running in a single context, glFinish should flush any unfinished work to the graphics driver and the GPU, and wait for it to finish before returning from the glFinish command in the thread that called it. This is the “sledgehammer” approach to synchronization and is very expensive.

However, that’s not to say that you might not have needed synchronization elsewhere in your frame to ensure proper operation (particularly since you’re using operations with side-effects). If so and it’s missing, that could also cause you problems.

#10

Blockquote
Single GL context? Or multiple?
We probably should cover this issue in a separate thread (with more context provided).

I use a single context but I can use multiple contexts if I draw on an FBO but the problem remains the same.

Blockquote
However, that’s not to say that you might not have needed synchronization elsewhere in your frame to ensure proper operation (particularly since you’re using operations with side-effects). If so and it’s missing, that could also cause you problems.

Ha ? I need to use glFlush to update the FBO when drawing, and, glSwapBuffers to update the window content with double buffering.

I think I’ll need to check the specifications details to see how to make the GPU waiting before shaders have finished to process data before updating the window…

#11

I understand the principe but, it doesn’t solve the problem, this is not normal because, even if I use textures (and not images) in the shader (you don’t need barriers with textures because opengl manage memory coherency for you), I’ve the same problem.
So I’ve reported the problem to mesa driver.
So I’ve two complicated solutions now.

1)Downloading the mesa source code and try to fix the issue.
2)Coding my own driver with a technology like openCL by example it should be less difficult.

#12

I tried a last thing to tell to the GPU “Hey! Wait that the normal map is updating before computing lighting!” but that doesn’t work. :frowning:

states.shader = &normalMapGenerator;
            depthBufferTile.setCenter(view.getPosition());
            glCheck(glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT));
            depthBuffer.display();
            normalMap.draw(depthBufferTile, states);
            glCheck(glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT));
            states.shader = &lightMapGenerator;
#13

because, even if I use textures (and not images) in the shader (you don’t need barriers with textures because opengl manage memory coherency for you), I’ve the same problem.

It’s a bit difficult to understand what you’re talking about here. You cannot implement the algorithm you describe using only samplers. You have to use image load/store to build the data. And, as the page on memory coherency states, once you do that, any subsequent accesses to that memory must be synchronized properly. It doesn’t matter if the second pass tries to read it through a sampler; what causes the need for synchronization is how the data gets written.

Overall, it’s difficult to understand what your code is doing from an OpenGL perspective, because so much of it lives behind objects and abstractions, the behavior of which we can only guess at. Coupled with that, there are a number of things your code is doing that appear… dubious.

For example, in PerPixelLinkedListRenderComponent::pass1, you call frameBuffer.display(). I have no idea what that function does, and any guesses I might make as to what it might mean only confuse me more. See, if you’re implementing linked-list OIT, the first pass is just building the linked list. As such, when the first pass is finished… you don’t have anything to display yet. You just have a bunch of meaningless integers.

Not only that, your first-pass rendering shouldn’t even be writing anything to a framebuffer. Whatever frameBuffer is, it shouldn’t have any images attached to it. Which also makes the currentStates.blendMode = sf::BlendAlpha seem rather dubious. It also makes you glGetTexImage stuff make no sense, as there’s nothing to get yet.

Now, if I read between the lines, I would guess that frameBuffer.getTexture() is retrieving a texture which you believe has some meaningful data in it. But since the only meaningful image data that your first pass is supposed to generate is the integer indices, then this is broken in several ways.

First, if frameBuffer represents a genuine OpenGL FBO, you cannot have an image used as a framebuffer render target while simultaneously using it in an image load/store process. So if that’s what you’re doing (I can’t know that, since again I don’t know what your abstraction is doing), then you need to stop doing it.

Second, if that’s the case, gl_FragData is still being written by pass1’s shader. And gl_FragData is a vec4. Which cannot be used to write to an integer texture. You have to use a uvec4 or ivec4 to write to integer texures. So even if the feedback loop weren’t a problem, you’re still incorrectly writing to it.

The first pass should be rendering to an empty framebuffer, and the fragment shader used there should not be writing to any fragment shader outputs.

But because I have no idea what any of those functions mean or does (I can only guess at them), I cannot state that any of your code is wrong with any certainty. I can only guess at it.

One thing I can be sure of is this: you’re not using glMemoryBarrier entirely correctly. Your second pass reads data from an image via image load/store, and it reads data from an SSBO. Since the data its reading in both cases was written via incoherent processes, then you need bits in glMemoryBarrier that specify both of these. That is, you’re saying that you’re reading incoherently-written data via SSBO and image load/store.

But overall, you’re overcomplicating your implementation. When developing a new rendering effect like this, it’s best to start with much more simplified code. Implement it first in a bare-bones application, a simple testbed. Write code that talks directly to OpenGL with a minimum of abstractions; don’t use your main engine. This makes it much easier to know what’s going on and to know how to fix it. And once you port it to your engine, you know that if it worked in the testbed but not your engine, then the structure of your engine is at fault.

And FYI:

This is not valid C++. You’re using a language extension not supported by many compilers. In standard C++, the size of an array must be a compile-time constant. The correct way to do this is with something like a vector.

#14

Ok I wrote a source code without abstraction, excepts for the Vertex class but this class only contains a vector of 3 floats with the vertex position, 4 unsigned bytes for the color and a 2 floats for the tex coords so you can guess what it does.

This code display somethings, it’s not the case with the abstraction so I guess the problem is in my code when I create the glxcontext.

But the output of this source code is not what I excpected, I have two red square instead of a red and a green one, and the squares are not semi-transparent, here is the code :

#include "application.h"
#include <GL/glew.h>
#include <SFML/OpenGL.hpp>
#include "odfaeg/Graphics/glCheck.h"
#include "odfaeg/Window/window.hpp"
#include "odfaeg/Window/sfmlWindowImpl.hpp"
#include "odfaeg/Window/x11Window.hpp"
#include "odfaeg/Window/context.hpp"
#include <SFML/Window/WindowStyle.hpp>
#include "odfaeg/Graphics/sprite.h"
#include "odfaeg/Graphics/renderWindow.h"
#include "odfaeg/Graphics/rectangleShape.h"

#include "odfaeg/Graphics/shader.h"
/*using namespace odfaeg::core;
using namespace odfaeg::math;
using namespace odfaeg::physic;
using namespace odfaeg::graphic;
using namespace odfaeg::window;
using namespace odfaeg::audio;
using namespace sorrok;*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>
#include <GL/gl.h>
#include <GL/glx.h>

#define GLX_CONTEXT_MAJOR_VERSION_ARB       0x2091
#define GLX_CONTEXT_MINOR_VERSION_ARB       0x2092
typedef GLXContext (*glXCreateContextAttribsARBProc)(Display*, GLXFBConfig, GLXContext, Bool, const int*);

// Helper to check for extension string presence.  Adapted from:
//   http://www.opengl.org/resources/features/OGLextensions/
static bool isExtensionSupported(const char *extList, const char *extension)
{
  const char *start;
  const char *where, *terminator;


  where = strchr(extension, ' ');
  if (where || *extension == '\0')
    return false;


  for (start=extList;;) {
    where = strstr(start, extension);

    if (!where)
      break;

    terminator = where + strlen(extension);

    if ( where == start || *(where - 1) == ' ' )
      if ( *terminator == ' ' || *terminator == '\0' )
        return true;

    start = terminator;
  }

  return false;
}

static bool ctxErrorOccurred = false;
static int ctxErrorHandler( Display *dpy, XErrorEvent *ev )
{
    ctxErrorOccurred = true;
    return 0;
}
int main(int argc, char* argv[])
{
    /*EXPORT_CLASS_GUID(BoundingVolumeBoundingBox, BoundingVolume, BoundingBox)
    EXPORT_CLASS_GUID(EntityTile, Entity, Tile)
    EXPORT_CLASS_GUID(EntityTile, Entity, BigTile)
    EXPORT_CLASS_GUID(EntityWall, Entity, g2d::Wall)
    EXPORT_CLASS_GUID(EntityDecor, Entity, g2d::Decor)
    EXPORT_CLASS_GUID(EntityAnimation, Entity, Anim)
    EXPORT_CLASS_GUID(EntityHero, Entity, Hero)
    EXPORT_CLASS_GUID(EntityMesh, Entity, Mesh)
    MyAppli app(sf::VideoMode(800, 600), "Test odfaeg");
    return app.exec();*/

    unsigned int width = 800, height = 600;
    Display *display = XOpenDisplay(NULL);

  if (!display)
  {
    printf("Failed to open X display\n");
    exit(1);
  }

  // Get a matching FB config
  static int visual_attribs[] =
    {
      GLX_X_RENDERABLE    , True,
      GLX_DRAWABLE_TYPE   , GLX_WINDOW_BIT,
      GLX_RENDER_TYPE     , GLX_RGBA_BIT,
      GLX_X_VISUAL_TYPE   , GLX_TRUE_COLOR,
      GLX_RED_SIZE        , 8,
      GLX_GREEN_SIZE      , 8,
      GLX_BLUE_SIZE       , 8,
      GLX_ALPHA_SIZE      , 8,
      GLX_DEPTH_SIZE      , 24,
      GLX_STENCIL_SIZE    , 8,
      GLX_DOUBLEBUFFER    , True,
      //GLX_SAMPLE_BUFFERS  , 1,
      //GLX_SAMPLES         , 4,
      None
    };

  int glx_major, glx_minor;

  // FBConfigs were added in GLX version 1.3.
  if ( !glXQueryVersion( display, &glx_major, &glx_minor ) ||
       ( ( glx_major == 1 ) && ( glx_minor < 3 ) ) || ( glx_major < 1 ) )
  {
    printf("Invalid GLX version");
    exit(1);
  }

  printf( "Getting matching framebuffer configs\n" );
  int fbcount;
  GLXFBConfig* fbc = glXChooseFBConfig(display, DefaultScreen(display), visual_attribs, &fbcount);
  if (!fbc)
  {
    printf( "Failed to retrieve a framebuffer config\n" );
    exit(1);
  }
  printf( "Found %d matching FB configs.\n", fbcount );

  // Pick the FB config/visual with the most samples per pixel
  printf( "Getting XVisualInfos\n" );
  int best_fbc = -1, worst_fbc = -1, best_num_samp = -1, worst_num_samp = 999;

  int i;
  for (i=0; i<fbcount; ++i)
  {
    XVisualInfo *vi = glXGetVisualFromFBConfig( display, fbc[i] );
    if ( vi )
    {
      int samp_buf, samples;
      glXGetFBConfigAttrib( display, fbc[i], GLX_SAMPLE_BUFFERS, &samp_buf );
      glXGetFBConfigAttrib( display, fbc[i], GLX_SAMPLES       , &samples  );

      printf( "  Matching fbconfig %d, visual ID 0x%2x: SAMPLE_BUFFERS = %d,"
              " SAMPLES = %d\n",
              i, vi -> visualid, samp_buf, samples );

      if ( best_fbc < 0 || samp_buf && samples > best_num_samp )
        best_fbc = i, best_num_samp = samples;
      if ( worst_fbc < 0 || !samp_buf || samples < worst_num_samp )
        worst_fbc = i, worst_num_samp = samples;
    }
    XFree( vi );
  }

  GLXFBConfig bestFbc = fbc[ best_fbc ];

  // Be sure to free the FBConfig list allocated by glXChooseFBConfig()
  XFree( fbc );

  // Get a visual
  XVisualInfo *vi = glXGetVisualFromFBConfig( display, bestFbc );
  printf( "Chosen visual ID = 0x%x\n", vi->visualid );

  printf( "Creating colormap\n" );
  XSetWindowAttributes swa;
  Colormap cmap;
  swa.colormap = cmap = XCreateColormap( display,
                                         RootWindow( display, vi->screen ),
                                         vi->visual, AllocNone );
  swa.background_pixmap = None ;
  swa.border_pixel      = 0;
  swa.event_mask        = StructureNotifyMask;

  printf( "Creating window\n" );
  Window win = XCreateWindow( display, RootWindow( display, vi->screen ),
                              0, 0, width, height, 0, vi->depth, InputOutput,
                              vi->visual,
                              CWBorderPixel|CWColormap|CWEventMask, &swa );
  if ( !win )
  {
    printf( "Failed to create window.\n" );
    exit(1);
  }

  // Done with the visual info data
  XFree( vi );

  XStoreName( display, win, "GL 3.0 Window" );

  printf( "Mapping window\n" );
  XMapWindow( display, win );

  // Get the default screen's GLX extension list
  const char *glxExts = glXQueryExtensionsString( display,
                                                  DefaultScreen( display ) );

  // NOTE: It is not necessary to create or make current to a context before
  // calling glXGetProcAddressARB
  glXCreateContextAttribsARBProc glXCreateContextAttribsARB = 0;
  glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc)
           glXGetProcAddressARB( (const GLubyte *) "glXCreateContextAttribsARB" );

  GLXContext ctx = 0;

  // Install an X error handler so the application won't exit if GL 3.0
  // context allocation fails.
  //
  // Note this error handler is global.  All display connections in all threads
  // of a process use the same error handler, so be sure to guard against other
  // threads issuing X commands while this code is running.
  ctxErrorOccurred = false;
  int (*oldHandler)(Display*, XErrorEvent*) =
      XSetErrorHandler(&ctxErrorHandler);

  // Check for the GLX_ARB_create_context extension string and the function.
  // If either is not present, use GLX 1.3 context creation method.
  if ( !isExtensionSupported( glxExts, "GLX_ARB_create_context" ) ||
       !glXCreateContextAttribsARB )
  {
    printf( "glXCreateContextAttribsARB() not found"
            " ... using old-style GLX context\n" );
    ctx = glXCreateNewContext( display, bestFbc, GLX_RGBA_TYPE, 0, True );
  }

  // If it does, try to get a GL 3.0 context!
  else
  {
    int context_attribs[] =
      {
        GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
        GLX_CONTEXT_MINOR_VERSION_ARB, 0,
        //GLX_CONTEXT_FLAGS_ARB        , GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB,
        None
      };

    printf( "Creating context\n" );
    ctx = glXCreateContextAttribsARB( display, bestFbc, 0,
                                      True, context_attribs );

    // Sync to ensure any errors generated are processed.
    XSync( display, False );
    if ( !ctxErrorOccurred && ctx )
      printf( "Created GL 3.0 context\n" );
    else
    {
      // Couldn't create GL 3.0 context.  Fall back to old-style 2.x context.
      // When a context version below 3.0 is requested, implementations will
      // return the newest context version compatible with OpenGL versions less
      // than version 3.0.
      // GLX_CONTEXT_MAJOR_VERSION_ARB = 1
      context_attribs[1] = 1;
      // GLX_CONTEXT_MINOR_VERSION_ARB = 0
      context_attribs[3] = 0;

      ctxErrorOccurred = false;

      printf( "Failed to create GL 3.0 context"
              " ... using old-style GLX context\n" );
      ctx = glXCreateContextAttribsARB( display, bestFbc, 0,
                                        True, context_attribs );
    }
  }

  // Sync to ensure any errors generated are processed.
  XSync( display, False );

  // Restore the original error handler
  XSetErrorHandler( oldHandler );

  if ( ctxErrorOccurred || !ctx )
  {
    printf( "Failed to create an OpenGL context\n" );
    exit(1);
  }

  // Verifying that context is a direct context
  if ( ! glXIsDirect ( display, ctx ) )
  {
    printf( "Indirect GLX rendering context obtained\n" );
  }
  else
  {
    printf( "Direct GLX rendering context obtained\n" );
  }

  printf( "Making context current\n" );


  glXMakeCurrent( display, win, ctx );
  GLenum ok = glewInit();
  if (ok != GLEW_OK)
    std::cout<<"glew initalisation failed"<<std::endl;

    GLuint maxNodes = 20 * width * height;
    GLint nodeSize = 5 * sizeof(GLfloat) + sizeof(GLuint);
    GLuint headPtrTex, atomicBuffer, linkedListBuffer, clearBuf, pass1Index, pass2Index;
    glCheck(glGenTextures(1, &headPtrTex));
    glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
    glCheck(glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, width, height));
    glCheck(glBindImageTexture(0, headPtrTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI));
    glCheck(glBindTexture(GL_TEXTURE_2D, 0));
    glCheck(glGenBuffers(1, &atomicBuffer));
    glCheck(glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicBuffer));
    glCheck(glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), nullptr, GL_DYNAMIC_DRAW));
    glCheck(glGenBuffers(1, &linkedListBuffer));
    glCheck(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, linkedListBuffer));
    glCheck(glBufferData(GL_SHADER_STORAGE_BUFFER, maxNodes * nodeSize, NULL, GL_DYNAMIC_DRAW));
    std::vector<GLuint> headPtrClearBuf(width*height, 0xffffffff);
    glCheck(glGenBuffers(1, &clearBuf));
    glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, clearBuf));
    glCheck(glBufferData(GL_PIXEL_UNPACK_BUFFER, headPtrClearBuf.size() * sizeof(GLuint),
    &headPtrClearBuf[0], GL_STATIC_COPY));
    glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0));


                const std::string  simpleVertexShader =
                R"(#version 140
                void main () {
                    gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
                    gl_TexCoord[0] = gl_TextureMatrix[0] * gl_MultiTexCoord0;
                    gl_FrontColor = gl_Color;
                })";
                const std::string fragmentShader =
                R"(#version 140
                   #extension GL_ARB_shader_atomic_counters : require
                   #extension GL_ARB_shading_language_420pack : require
                   #extension GL_ARB_shader_image_load_store : require
                   #extension GL_ARB_shader_storage_buffer_object : require
                   #extension GL_ARB_shader_subroutine : require
                   #define MAX_FRAGMENTS 75
                   struct NodeType {
                      vec4 color;
                      float depth;
                      uint next;
                   };
                   layout(binding = 0, offset = 0) uniform atomic_uint nextNodeCounter;
                   layout(binding = 0, r32ui) uniform uimage2D headPointers;
                   layout(binding = 0, std430) buffer linkedLists {
                       NodeType nodes[];
                   };
                   uniform uint maxNodes;
                   subroutine void RenderPassType();
                   subroutine uniform RenderPassType RenderPass;
                   subroutine(RenderPassType)
                   void pass1 () {
                       uint nodeIdx = atomicCounterIncrement(nextNodeCounter);
                       if (nodeIdx < maxNodes) {
                            uint prevHead = imageAtomicExchange(headPointers, ivec2(gl_FragCoord.xy), nodeIdx);
                            nodes[nodeIdx].color = gl_Color;
                            nodes[nodeIdx].depth = gl_FragCoord.z;
                            nodes[nodeIdx].next = prevHead;
                       }
                   }
                   subroutine(RenderPassType)
                   void pass2 () {
                       NodeType frags[MAX_FRAGMENTS];
                       int count = 0;
                       uint n = imageLoad(headPointers, ivec2(gl_FragCoord.xy)).r;
                       while( n != uint(0xffffffff) && count < MAX_FRAGMENTS) {
                            frags[count] = nodes[n];
                            n = frags[count].next;
                            count++;
                       }
                        //merge sort
                       int i, j1, j2, k;
                       int a, b, c;
                       int step = 1;
                       NodeType leftArray[MAX_FRAGMENTS/2]; //for merge sort

                       while (step <= count)
                       {
                           i = 0;
                           while (i < count - step)
                           {
                               ////////////////////////////////////////////////////////////////////////
                               //merge(step, i, i + step, min(i + step + step, count));
                               a = i;
                               b = i + step;
                               c = (i + step + step) >= count ? count : (i + step + step);

                               for (k = 0; k < step; k++)
                                   leftArray[k] = frags[a + k];

                               j1 = 0;
                               j2 = 0;
                               for (k = a; k < c; k++)
                               {
                                   if (b + j1 >= c || (j2 < step && leftArray[j2].depth > frags[b + j1].depth))
                                       frags[k] = leftArray[j2++];
                                   else
                                       frags[k] = frags[b + j1++];
                               }
                               ////////////////////////////////////////////////////////////////////////
                               i += 2 * step;
                           }
                           step *= 2;
                       }
                       vec4 color = vec4(0, 0, 0, 0);
                       for( int i = 0; i < count; i++ )
                       {
                         color = mix( color, frags[i].color, frags[i].color.a);
                       }

                       // Output the final color
                       gl_FragColor = color;
                   }
                   void main() {
                       RenderPass();
                   })";
                   GLuint programHandle;
                   glCheck(programHandle = glCreateProgram());
                   GLuint vertexShaderID = glCreateShader(GL_VERTEX_SHADER);
                   const char* source = simpleVertexShader.c_str();
                   glCheck(glShaderSource(vertexShaderID, 1, &source, nullptr));
                    glCheck(glCompileShader(vertexShaderID));
                    GLint success;
                    glCheck(glGetShaderiv(vertexShaderID, GL_COMPILE_STATUS,&success));
                    if (success == GL_FALSE) {
                        int infoLogLength;
                        glCheck(glGetShaderiv(vertexShaderID, GL_INFO_LOG_LENGTH, &infoLogLength));
                        char log[infoLogLength];
                        glCheck(glGetShaderInfoLog(vertexShaderID, infoLogLength, 0, &log[0]));
                        std::cerr<< "Failed to compile vertex shader:" << std::endl
                        << log << std::endl;
                        glCheck(glDeleteShader(vertexShaderID));
                        glCheck(glDeleteProgram(programHandle));
                        programHandle = 0;
                    }
                    glCheck(glAttachShader(programHandle, vertexShaderID));
                    glCheck(glDeleteShader(vertexShaderID));
                    GLuint fragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
                    source = fragmentShader.c_str();
                    glCheck(glShaderSource(fragmentShaderID, 1, &source, nullptr));
                    glCheck(glCompileShader(fragmentShaderID));
                    glCheck(glGetShaderiv(fragmentShaderID, GL_COMPILE_STATUS,&success));
                    if (success == GL_FALSE) {
                        int infoLogLength;
                        glCheck(glGetShaderiv(fragmentShaderID, GL_INFO_LOG_LENGTH, &infoLogLength));
                        char log[infoLogLength];
                        glCheck(glGetShaderInfoLog(fragmentShaderID, infoLogLength, 0, &log[0]));
                        std::cerr << "Failed to compile fragment shader:" << std::endl
                        << log << std::endl;
                        glCheck(glDeleteShader(fragmentShaderID));
                        glCheck(glDeleteProgram(programHandle));
                        programHandle = 0;
                    }
                    glCheck(glAttachShader(programHandle, fragmentShaderID));
                    glCheck(glDeleteShader(fragmentShaderID));
                    glCheck(glLinkProgram(programHandle));
                glCheck(glGetProgramiv(programHandle, GL_LINK_STATUS, &success));
                if (success == GL_FALSE) {
                    int infoLogLength;
                    glCheck(glGetProgramiv(programHandle, GL_INFO_LOG_LENGTH, &infoLogLength));
                    std::vector<char> programErrorMessage(std::max(infoLogLength, int(1)) );
                    glCheck(glGetProgramInfoLog(programHandle, infoLogLength, nullptr, &programErrorMessage[0]));
                    std::cerr << "Failed to link shader:" << std::endl
                         /* << log << std::endl*/;
                    glCheck(glDeleteProgram(programHandle));
                    programHandle = 0;
                }
                glUseProgram(programHandle);
                int location = glGetUniformLocation(programHandle, "maxNodes");
                glCheck(glUniform1ui(location, maxNodes));
                pass1Index = glGetSubroutineIndex( programHandle, GL_FRAGMENT_SHADER, "pass1");
                pass2Index = glGetSubroutineIndex( programHandle, GL_FRAGMENT_SHADER, "pass2");

                glCheck(glEnableClientState(GL_VERTEX_ARRAY));
                glCheck(glEnableClientState(GL_COLOR_ARRAY));
                glCheck(glEnableClientState(GL_TEXTURE_COORD_ARRAY));


       while (true) {
        sleep( 1 );

        glClearColor( 0, 0.5, 1, 1 );
        glClear( GL_COLOR_BUFFER_BIT );
        glBlendFunc(GL_ONE, GL_ZERO);


        GLuint zero = 0;
            glCheck(glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicBuffer));
            glCheck(glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &zero));
            glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, clearBuf));
            glCheck(glBindTexture(GL_TEXTURE_2D, headPtrTex));
            glCheck(glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width,  height, GL_RED_INTEGER,
            GL_UNSIGNED_INT, NULL));
            glCheck(glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0));
            glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass1Index));

            odfaeg::graphic::Vertex vertices[4];
            vertices[0] = odfaeg::graphic::Vertex(sf::Vector3f(-1, 1, 0), sf::Color(255, 0, 0, 100));
            vertices[1] = odfaeg::graphic::Vertex(sf::Vector3f(-0.8, 1, 0), sf::Color(255, 0, 0, 100));
            vertices[2] = odfaeg::graphic::Vertex(sf::Vector3f(-0.8, 0.8, 0), sf::Color(255, 0, 0, 100));
            vertices[3] = odfaeg::graphic::Vertex(sf::Vector3f(-1, 0.8, 0), sf::Color(255, 0, 0, 100));

            const char* data = reinterpret_cast<const char*>(&vertices[0]);
            glCheck(glVertexPointer(3, GL_FLOAT, sizeof(odfaeg::graphic::Vertex), data + 0 ));
            glCheck(glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(odfaeg::graphic::Vertex), data + 12));
            glCheck(glTexCoordPointer(2, GL_FLOAT, sizeof(odfaeg::graphic::Vertex), data + 16));
            glCheck(glDrawArrays(GL_QUADS, 0, 4));
            odfaeg::graphic::Vertex vertices2[4];
            vertices2[0] = odfaeg::graphic::Vertex(sf::Vector3f(-0.9, 0.9, 0), sf::Color(0, 255, 0, 150));
            vertices2[1] = odfaeg::graphic::Vertex(sf::Vector3f(-0.9, 0.7, 0), sf::Color(0, 255, 0, 150));
            vertices2[2] = odfaeg::graphic::Vertex(sf::Vector3f(-0.7, 0.7, 0), sf::Color(0, 255, 0, 150));
            vertices2[3] = odfaeg::graphic::Vertex(sf::Vector3f(-0.7, 0.9, 0), sf::Color(0, 255, 0, 150));

            const char* data2 = reinterpret_cast<const char*>(&vertices2[0]);
            glCheck(glVertexPointer(3, GL_FLOAT, sizeof(odfaeg::graphic::Vertex), data2 + 0 ));
            glCheck(glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(odfaeg::graphic::Vertex), data2 + 12));
            glCheck(glTexCoordPointer(2, GL_FLOAT, sizeof(odfaeg::graphic::Vertex), data2 + 16));
            glCheck(glDrawArrays(GL_QUADS, 0, 4));
            glCheck(glFinish());
            glCheck(glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT ));
            glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass2Index));
            odfaeg::graphic::Vertex vertices3[4];
            vertices3[0] = odfaeg::graphic::Vertex(sf::Vector3f(-1, -1, 0));
            vertices3[1] = odfaeg::graphic::Vertex(sf::Vector3f(1, -1, 0));
            vertices3[2] = odfaeg::graphic::Vertex(sf::Vector3f(1, 1, 0));
            vertices3[3] = odfaeg::graphic::Vertex(sf::Vector3f(-1, 1, 0));
            const char* data3 = reinterpret_cast<const char*>(&vertices3[0]);
            glCheck(glVertexPointer(3, GL_FLOAT, sizeof(odfaeg::graphic::Vertex), data3 + 0 ));
            glCheck(glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(odfaeg::graphic::Vertex), data3 + 12));
            glCheck(glTexCoordPointer(2, GL_FLOAT, sizeof(odfaeg::graphic::Vertex), data3 + 16));


            glCheck(glDrawArrays(GL_QUADS, 0, 4));
            glCheck(glFinish());
            glXSwapBuffers ( display, win );
            sleep( 1 );
  }
  glXMakeCurrent( display, 0, 0 );
  glXDestroyContext( display, ctx );

  XDestroyWindow( display, win );
  XFreeColormap( display, cmap );
  XCloseDisplay( display );
  glCheck(glDeleteTextures(GL_TEXTURE_2D, &headPtrTex));
  return 0;
}

So I’ll try to create a GlxContext with glut to see waht happens.

#15

Ok, I see the problem : when I do this (I call glUseProgram after calling glUniformSubroutinesuiv :

Shader::bind(&perPixelLinkedList);
        glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass2Index));
        Shader::bind(&perPixelLinkedList);
        window.draw(rect, states);
        Shader::bind(nullptr);

I get this screen :

https://i.ibb.co/NC4nK8P/Capture-d-cran-de-2019-03-25-01-33-19.png

But when I doesn’t bind the shader after calling glUniformSubroutinesuiv, I got this screen and the application is very slow!!! (And the result is not correct because all the sprites are red and not semi-transparent. (But I think it’s the correct way of doing this)

https://i.ibb.co/NNdVTk0/Capture-d-cran-de-2019-03-25-01-44-31.png

glCheck(glUniformSubroutinesuiv( GL_FRAGMENT_SHADER, 1, &pass2Index));
        //Shader::bind(&perPixelLinkedList);
        window.draw(rect, states);

So I’ve a question, shouldn’t I call glUseProgram after the second call to glUniformSubroutineSuiv ?

Is this uniform parameter different from other uniforms parameters and we can’t call glUseProgram after calling glUnifromSubroutinesuiv ?

And the source code is very slow, per-pixel-linked-lists seems to be too slow for real time rendering …

#16

No.

If you want my advice, you shouldn’t be using shader subroutines at all (they’re really not a good feature. As evidenced by the fact that SPIR-V doesn’t even offer it as a possibility. Just make two different shaders). But if you insist on using it, you have to remember the most important gotcha of shader subroutines. Indeed, it’s so important that the wiki article on subroutines mentions it three times:

Subroutine “uniform” state is not stored with the program.

Every time you change the current program/program pipeline, whatever subroutine uniform values you’ve set become invalidated. So anytime you change the current program, you must then set any subroutine uniforms you want to use.

So your code has these calls precisely backwards. You use the program, then set the subroutine uniforms.

#17

Ok now the last problem is that all my Linked list nodes are pointing to the color of the first fragment…

 const std::string  simpleVertexShader =
                R"(#version 140
                void main () {
                    gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;
                    gl_TexCoord[0] = gl_TextureMatrix[0] * gl_MultiTexCoord0;
                    gl_FrontColor = gl_Color;
                })";
                const std::string fragmentShader =
                R"(#version 140
                   #extension GL_ARB_shader_atomic_counters : require
                   #extension GL_ARB_shading_language_420pack : require
                   #extension GL_ARB_shader_image_load_store : require
                   #extension GL_ARB_shader_storage_buffer_object : require
                   #extension GL_ARB_shader_subroutine : require
                   #define MAX_FRAGMENTS 75
                   struct NodeType {
                      vec4 color;
                      float depth;
                      uint next;
                   };
                   layout(binding = 0, offset = 0) uniform atomic_uint nextNodeCounter;
                   layout(binding = 0, r32ui) uniform uimage2D headPointers;
                   layout(binding = 0, std430) buffer linkedLists {
                       NodeType nodes[];
                   };
                   uniform uint maxNodes;
                   subroutine void RenderPassType();
                   subroutine uniform RenderPassType RenderPass;
                   subroutine(RenderPassType)
                   void pass1 () {
                       uint nodeIdx = atomicCounterIncrement(nextNodeCounter);
                       if (nodeIdx < maxNodes) {
                            uint prevHead = imageAtomicExchange(headPointers, ivec2(gl_FragCoord.xy), nodeIdx);
                            nodes[nodeIdx].color = gl_Color;
                            nodes[nodeIdx].depth = gl_FragCoord.z;
                            nodes[nodeIdx].next = prevHead;
                       }
                   }
                   subroutine(RenderPassType)
                   void pass2 () {
                       NodeType frags[MAX_FRAGMENTS];
                       int count = 0;
                       uint n = imageLoad(headPointers, ivec2(gl_FragCoord.xy)).r;
                       while( n != uint(0xffffffff) && count < MAX_FRAGMENTS) {
                            frags[count] = nodes[n];
                            n = frags[count].next;
                            count++;
                       }
                        //merge sort
                       int i, j1, j2, k;
                       int a, b, c;
                       int step = 1;
                       NodeType leftArray[MAX_FRAGMENTS/2]; //for merge sort
                       while (step <= count)
                       {
                           i = 0;
                           while (i < count - step)
                           {
                               ////////////////////////////////////////////////////////////////////////
                               //merge(step, i, i + step, min(i + step + step, count));
                               a = i;
                               b = i + step;
                               c = (i + step + step) >= count ? count : (i + step + step);
                               for (k = 0; k < step; k++)
                                   leftArray[k] = frags[a + k];
                               j1 = 0;
                               j2 = 0;
                               for (k = a; k < c; k++)
                               {
                                   if (b + j1 >= c || (j2 < step && leftArray[j2].depth > frags[b + j1].depth))
                                       frags[k] = leftArray[j2++];
                                   else
                                       frags[k] = frags[b + j1++];
                               }
                               ////////////////////////////////////////////////////////////////////////
                               i += 2 * step;
                           }
                           step *= 2;
                       }
                       vec4 color = vec4(0, 0, 0, 0);
                       for( int i = 0; i < count; i++ )
                       {
                         color = mix( color, frags[i].color, frags[i].color.a);
                       }                     
                       // Output the final color
                       gl_FragColor = color;
                   }
                   void main() {
                       RenderPass();
                   })";

PS : it’s a code I’ve found on the Internet but it doesn’t seems to work even if the code seems to be correct.

#18

It doens’t works without subrountine, I’ve tried to pass a uniform to indicate the pass num and calling the function pass1() or pass2() in function but that doesn’t works.
And it seems I can’t use my SSBO in two different shaders. (Otherwise I’ve a compilation error)
For the color problem even without the shader it doesn’t switch the color, if I’ve one sprite red, the other green and the last blue, it always draw the red color. (It doesn’t change the color…)

#19

It shouldn’t work. You’re supposed to use two separate programs.

That doesn’t make any sense. It’s likely that you’re doing something wrong in some other way.

#20
That doesn’t make any sense. It’s likely that you’re doing something wrong in some other way.

Ok but should I put the source code of the two shader on a single file or not ? Because if I load one file for the first shader and another for the second shader that doesn’t works… (The compiler doesn’t reconize the ssbo)

And I think I’ll change the code anaway and trying to use a layered image instead, because, it’s really slow …