Performance when having multiple draws inside loop

Hi,

I’ve been trying to find overlapping triangles in 2D using OpenGL but I still have some performance issues. I have a high quantity of triangles I want to check in a small space, like I can have 150,000 triangles in a 1 by 1 square area, and the triangles are very small. Therefore I am using a for loop to search for overlaps one section of that square area at a time.

The idea is then, for each small section of the square area, to do a two-pass check where the first pass just renders the section into a texture with the face IDs as the color, and the second pass does a second render of the section where it checks if the face ID matches that of the texture at that location, if it does not match there is an intersection. I use an ssbo to gather the results and be able to use them in the CPU.

When I do this, the times are not as optimal as I was hoping to achieve with OpenGL and when I check a very small area (like a 0.005 by 0.005 section of the square) using very detailed textures (like 4K) I get less overlaps than what I was getting using 2K instead.

Another strange aspect is that the first time I run the overlap check I get less overlaps than the following times I run it (so the first time I get less, but then the 2nd, 3rd, 4th, etc. times I get the same results).

Here are some images illustrating those strange behaviors. It indicates the results based on the kernel size (size of the pieces for the square to be divided into) and the dimensions of the textures (eg: 4K = 4000x4000). The percentage indicates how close the results are to the correct overlap results, and the time is there to show that large spike in lose of performance I mentioned.

This first one is for the first time I run my program
OverlapResults_FirstRun
These are the results forwhen I run the code a second time or more.
OverlappingResults_SecondRun

If someone has some time to look over my code and let me know what I might be missing or doing wrong I would appreciate it. I’m doing the following:

        unsigned int trianglesVAO, trianglesVBO;
        glGenVertexArrays(1, &trianglesVAO);
        glGenBuffers(1, &trianglesVBO);
        glBindVertexArray(trianglesVAO);
        glBindBuffer(GL_ARRAY_BUFFER, trianglesVBO);
        glBufferData(GL_ARRAY_BUFFER, trianglesVerts.size() * sizeof(float), trianglesVerts.data(), GL_STATIC_DRAW);
        glEnableVertexAttribArray(0);
        glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0);
        glEnableVertexAttribArray(1);
        glVertexAttribPointer(1, 1, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)(2 * sizeof(float)));
         

        unsigned int framebufferEdges;
        glGenFramebuffers(1, &framebufferEdges);
        glBindFramebuffer(GL_FRAMEBUFFER, framebufferEdges);

        unsigned int textureEdgesbuffer;
        glGenTextures(1, &textureEdgesbuffer);
        glBindTexture(GL_TEXTURE_2D, textureEdgesbuffer);

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, SCR_WIDTH, SCR_HEIGHT, 0, GL_BGRA, GL_FLOAT, NULL);

        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, textureEdgesbuffer, 0);
        
        if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
            std::cout << "ERROR::FRAMEBUFFER:: Framebuffer is not complete!" << std::endl;
        glBindFramebuffer(GL_FRAMEBUFFER, 0);



        unsigned int framebuffer;
        glGenFramebuffers(1, &framebuffer);
        glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);

        // create a color attachment texture
        unsigned int textureColorbuffer;
        glGenTextures(1, &textureColorbuffer);
        glBindTexture(GL_TEXTURE_2D, textureColorbuffer);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, SCR_WIDTH, SCR_HEIGHT, 0, GL_BGRA, GL_FLOAT, NULL);
        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, textureColorbuffer, 0);
        
        if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
            std::cout << "ERROR::FRAMEBUFFER:: Framebuffer is not complete!" << std::endl;
        glBindFramebuffer(GL_FRAMEBUFFER, 0);

        unsigned int framebuffer2;
        glGenFramebuffers(1, &framebuffer2);
        glBindFramebuffer(GL_FRAMEBUFFER, framebuffer2);
        glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH, SCR_WIDTH);
        glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT, SCR_HEIGHT);  
        if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
            std::cout << "ERROR::FRAMEBUFFER:: Framebuffer is not complete!" << std::endl;
        glBindFramebuffer(GL_FRAMEBUFFER, 0);


		
        GLuint ssbo = 0;
        glGenBuffers(1, &ssbo);
        glBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo);
        GLbitfield flags = GL_MAP_READ_BIT           | 
                               GL_MAP_PERSISTENT_BIT |
                               GL_MAP_COHERENT_BIT;
        glBufferStorage(GL_SHADER_STORAGE_BUFFER, sizeof(shader_data), &shader_data, flags);
        glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);


        incrementValU = 0.01;
        incrementValV = 0.01;


        for (float i = floor(minU); i <= ceil(maxU); i += incrementValU)
        {
            for (float j = floor(minV); j < ceil(maxV); j += incrementValV)
            {

                 glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);

            // make sure we clear the framebuffer's content
            glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
            glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
            
            shaderUtil.Use();
            shaderUtil.setVec2("bBoxU", i, i + incrementValU);
            shaderUtil.setVec2("bBoxV", j, j + incrementValV);

            glBindVertexArray(trianglesVAO);
            glActiveTexture(GL_TEXTURE0);
            glBindTexture(GL_TEXTURE_2D, textureEdgesbuffer);
            glDrawArrays(GL_TRIANGLES, 0, numTriangles * 3);
        


            glBindFramebuffer(GL_FRAMEBUFFER, framebuffer2);

            glClearColor(1.0f, 1.0f, 1.0f, 1.0f); // set clear color to white (not really necessary actually, since we won't be able to see behind the quad anyways)
            glClear(GL_COLOR_BUFFER_BIT);
            
            screenShader.Use();
            screenShader.setVec2("bBoxU", i, i + incrementValU);
            screenShader.setVec2("bBoxV", j, j + incrementValV);

            glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, ssbo);
            glBindVertexArray(trianglesVAO);
            glBindTexture(GL_TEXTURE_2D, textureColorbuffer);

            glDrawArrays(GL_TRIANGLES, 0, numTriangles * 3);

        }
    }

        glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, numTriangles * sizeof(int), overlapResults.data());

        glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);