FBO and early Z culling

Hi,

I’ve some trouble using the early Z culling in my program. I’ve tried this test acoording to this topic to see if early Z culling run on my Quadro FX 3450/4000 SDI and it works fine.

I’ve builded this test to try enabling early Z culling in my program

This is render function

static bool dbgInit = false, dbgZPass = false, dbgCompute = true;

  /******** Init pass ********/
  testFBOTarg->beginDraw();
  testShaderInitPass.enableShader();

  glEnable(GL_TEXTURE_RECTANGLE_NV);
  glBindTexture(GL_TEXTURE_RECTANGLE_NV, testSrcBufferID);

  glBegin(GL_QUADS);
    glTexCoord2i(gridX, 0);
    glVertex3f(0.0f, 0.0f,  0.0f);
    
    glTexCoord2i(gridX, gridZ);
    glVertex3f(0.0f, 0.0f,  1.0f);
    
    glTexCoord2i(0, gridZ);
    glVertex3f(1.0f, 0.0f,  1.0f);
    
    glTexCoord2i(0, 0);
    glVertex3f(1.0f, 0.0f,  0.0f);
  glEnd();

  glDisable(GL_TEXTURE_RECTANGLE_NV);

  testShaderInitPass.disableShader();
  testFBOTarg->endDraw();
  /***************************/

  if(dbgInit)
  {
    float *tmpD = new float[testFBOTarg->getWidth() * testFBOTarg->getHeight() * 4];
    glBindTexture(GL_TEXTURE_RECTANGLE_NV, testFBOTarg->getTexAttach(GL_COLOR_ATTACHMENT0_EXT));
    glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, GL_FLOAT, tmpD);
    imdebug("rgba b=32f w=%d h=%d %p", testFBOTarg->getWidth(), testFBOTarg->getHeight(), tmpD);
    dbgInit = false;
    delete[] tmpD;
  }

  /******** Switch FBO ********/
  PXVFramebufferObject *tmpFBO = testFBOSrc;
  testFBOSrc = testFBOTarg;
  testFBOTarg = tmpFBO;
  GLuint tmpID = testSrcBufferID;
  testSrcBufferID = testTargBufferID;
  testTargBufferID = tmpID;
  /****************************/

  /******** Early Z-Culling init pass ********/
  testFBOTarg->beginDraw(false);
  testShaderZPass.enableShader();

  glEnable(GL_TEXTURE_RECTANGLE_NV);
  glBindTexture(GL_TEXTURE_RECTANGLE_NV, testSrcBufferID);

  glBegin(GL_QUADS);
    glTexCoord2i(gridX, 0);
    glVertex3f(0.0f, 0.0f,  0.0f);
    
    glTexCoord2i(gridX, gridZ);
    glVertex3f(0.0f, 0.0f,  1.0f);
    
    glTexCoord2i(0, gridZ);
    glVertex3f(1.0f, 0.0f,  1.0f);
    
    glTexCoord2i(0, 0);
    glVertex3f(1.0f, 0.0f,  0.0f);
  glEnd();

  glDisable(GL_TEXTURE_RECTANGLE_NV);

  testShaderZPass.disableShader();
  testFBOTarg->endDraw();
  /*******************************************/

  if(dbgZPass)
  {
    testFBOTarg->bindFBO();
    imdebugDepthf(0, 0, testFBOTarg->getWidth(), testFBOTarg->getHeight());
    dbgZPass = false;
    testFBOTarg->unbindFBO();
  }

  /******** Switch FBO ********/
  tmpFBO = testFBOSrc;
  testFBOSrc = testFBOTarg;
  testFBOTarg = tmpFBO;
  tmpID = testSrcBufferID;
  testSrcBufferID = testTargBufferID;
  testTargBufferID = tmpID;
  /****************************/

  /******** Compute pass ********/
  testFBOTarg->beginDraw(false, false);
  testShaderComputePass.enableShader();

  glEnable(GL_TEXTURE_RECTANGLE_NV);
  glBindTexture(GL_TEXTURE_RECTANGLE_NV, testSrcBufferID);

  glBegin(GL_QUADS);
    glTexCoord2i(gridX, 0);
    glVertex3f(0.0f, 0.0f,  0.0f);
    
    glTexCoord2i(gridX, gridZ);
    glVertex3f(0.0f, 0.0f,  1.0f);
    
    glTexCoord2i(0, gridZ);
    glVertex3f(1.0f, 0.0f,  1.0f);
    
    glTexCoord2i(0, 0);
    glVertex3f(1.0f, 0.0f,  0.0f);
  glEnd();

  glDisable(GL_TEXTURE_RECTANGLE_NV);

  testShaderComputePass.disableShader();
  testFBOTarg->endDraw();
  /******************************/

  if(dbgCompute)
  {
    float *tmpD = new float[testFBOTarg->getWidth() * testFBOTarg->getHeight() * 4];
    glBindTexture(GL_TEXTURE_RECTANGLE_NV, testFBOTarg->getTexAttach(GL_COLOR_ATTACHMENT0_EXT));
    glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, GL_FLOAT, tmpD);
    imdebug("rgba b=32f w=%d h=%d %p", testFBOTarg->getWidth(), testFBOTarg->getHeight(), tmpD);
    dbgCompute = false;
    delete[] tmpD;
  }

  /******** Switch FBO ********/
  tmpFBO = testFBOSrc;
  testFBOSrc = testFBOTarg;
  testFBOTarg = tmpFBO;
  tmpID = testSrcBufferID;
  testSrcBufferID = testTargBufferID;
  testTargBufferID = tmpID;
  /****************************/

Here prototype for beginDraw FBO function

inline void beginDraw(bool clearColor = true, bool clearDepth = true);

And differents fragment program used in test

Init pass
uniform samplerRect src;

void main()
{
  vec4 outputVal = vec4(0.15, 0.15, 0.75, 1.0);
  
  gl_FragColor = outputVal;
}

Z pass
uniform samplerRect src;

void main()
{
  vec4 inputVal = textureRect(src, gl_TexCoord[0].st);
  vec4 outputVal = inputVal;
  
  discard;
  
  gl_FragColor = outputVal;
}

Compute pass
uniform samplerRect src;

void main()
{
  vec4 inputVal = textureRect(src, gl_TexCoord[0].st);
  vec4 outputVal = inputVal;
  
  for(int i = 0; i < 100; i++)
  {
    outputVal.r = ((int)gl_TexCoord[0].s % 2 == 0) ? 1.0 : 0.0;
  }
  
  gl_FragColor = outputVal;
}

Init pass stand only because in real program I have an init pass
Un/commented “discard” in Z pass allow to control fragment proceed in compute pass.
Compute pass only make a time consuming operation for test.

I use Fraps to measure performance and I note no difference between test with discard commented an discard uncommented (22fps). If somebody have an idea on my trouble?

I don’t understand what are you trying to acomplish… Early z-culling is a feature, where a fragment can get discarded before it runs throug the fragment shader. To make use of it you lay out the depth first (render the scene with no color output) and then render the scene the second time. The discard keyword will in many cases show no speed up, as it just prohibits the fragment from passing, the shader still may have to run to the end.

To make use of it you lay out the depth first (render the scene with no color output)

–> It’s my Early Z-Culling init pass

then render the scene the second time
–> It’s my Compute pass

I use discard in my frag shader only to write the depth buffer or not. Discarded frag in Z pass means that they will be compute in 2nd pass.

Originally posted by Olivier B.:

I use discard in my frag shader only to write the depth buffer or not. Discarded frag in Z pass means that they will be compute in 2nd pass.
you should disable color writes using glColorMask instead of discarding the fragment inside the shader

cheers

I cannot disable writing color because in the real program I need writing color values in this pass.
And I think writing color don’t deactivate early Z-Culling

Discard and alpha test will typically not work with early Z-cull - any depth values written while alpha test or a shader with discard is active will not contribute to early Z-cull. That said…I’ve had some luck in the past with getting early stencil reject to work with alpha test. Also, others have found that early Z-cull is less effective with FBOs or large render targets than with the main backbuffer.

Ok but I found this example and it use discard :confused:

May be, discard doesn’t work for your Quadro, as for GeForceFX (I mean, it may cause Early-Z to break).
Get the working example and make it not working - so you shall get the reasons for Early-Z to die, and it would be great.

But when I comment the discard (i.e. no fragment proceeded in compute pass), I’ve no perf gain.

Okey, so may be you have ALPHA_TEST enabled, check it to be disabled.

Thanks for your help but ALPHA_TEST is well disabled.

Okey, are you sure you have the same depth renderbuffer for prepare and for render pass? As for your code, you have different FBO’s for them, so may be you have different depth attachments, and that’s bad.
Are you sure, that it’s okey with depth write mask? Your code has no setup function calls, so it is hard to understand, what is executed there or what i not executed.
You say, you have some working example. May be, you do something wrong here, comparing to that example?
Bon chance!

My two FBOs share the same depth buffer. I’ve put an occlusion query to see how many fragment are proceed and number is correct. I’ve compare my example with the working example but the only differences is working example use Cg and GLUT instead of GLSL and SDL for my example. I’ve tried to init depth buffer without shader and change color/depthMask but nothing work.

I’ve writed a short program to show all step of how I test early Z culling.

#include <iostream>

#include <SDL.h>
#include <GL/glew.h>
#include <imdebuggl.h>

using namespace std;

GLuint fbo1;      // Framebuffer Object
GLuint colorMap1; // Color Map for Framebuffer Object
GLuint depthMap1; // Depth map for Framebuffer Object
GLuint shader1;   // Shader program

static const char *vertProg = {
  "void main(void)                       "
    "{                                     "
    "  gl_TexCoord[0] = gl_MultiTexCoord0; "
    "  gl_Position = ftransform();         "
    "}                                     "
};

static const char *fragProg = {
  "void main(void)                                                 "
    "{                                                               "
    "  vec4 outputVal = vec4(0.0, 0.0, 0.0, 1.0);                    "
    "  for(int i = 0; i < 100; i++)                                  "
    "  {                                                             "
    "    outputVal.r = ((int)gl_TexCoord[0].s % 2 == 0) ? 1.0 : 0.0; "
    "  }                                                             "
    "  gl_FragColor = outputVal;                                     "
    "}                                                               "
};

void drawScene(int scrWidth, int scrHeight, int fboWidth, int fboHeight)
{
  static bool halfRender = true, dbgResult = true;

  // Bind FBO
  glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo1);

  glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

  glViewport(0, 0, fboWidth, fboHeight);

  glMatrixMode(GL_PROJECTION);
  glPushMatrix();
  glLoadIdentity();

  glOrtho(-0.5, 0.5, -0.5, 0.5, 0.1, 1024.0);

  glMatrixMode(GL_MODELVIEW);
  glPushMatrix();
  glLoadIdentity();

  gluLookAt(0.5, 1.0, 0.5,
    0.5, 0.0, 0.5,
    0.0, 0.0, 1.0);

  glEnable(GL_DEPTH_TEST);

  // If Z buffer will be init for early Z culling
  if(halfRender)
  {
    glColorMask(false, false, false, false);
    glDepthMask(true);

    glBegin(GL_QUADS);
    glVertex3f(0.0f, 0.0f,  0.0f);
    glVertex3f(0.0f, 0.0f,  0.5f);
    glVertex3f(1.0f, 0.0f,  0.5f);
    glVertex3f(1.0f, 0.0f,  0.0f);
    glEnd();
  }

  glColorMask(true, true, true, true);
  glDepthMask(false);

  // Bind Shader
  glUseProgramObjectARB(shader1);

  glBegin(GL_QUADS);
  glTexCoord2i(fboWidth, 0);
  glVertex3f(0.0f, 0.0f,  0.0f);

  glTexCoord2i(fboWidth, fboHeight);
  glVertex3f(0.0f, 0.0f,  1.0f);

  glTexCoord2i(0, fboHeight);
  glVertex3f(1.0f, 0.0f,  1.0f);

  glTexCoord2i(0, 0);
  glVertex3f(1.0f, 0.0f,  0.0f);
  glEnd();

  // Unbind Shader
  glUseProgramObjectARB(0);

  glMatrixMode(GL_PROJECTION);
  glPopMatrix();

  glMatrixMode(GL_MODELVIEW);
  glPopMatrix();

  // Unbind FBO
  glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);

  if(dbgResult)
  {
    float *tmpD = new float[fboWidth * fboHeight * 4];
    glBindTexture(GL_TEXTURE_RECTANGLE_NV, colorMap1);
    glGetTexImage(GL_TEXTURE_RECTANGLE_NV, 0, GL_RGBA, GL_FLOAT, tmpD);
    imdebug("rgba b=32f w=%d h=%d %p", fboWidth, fboHeight, tmpD);
    dbgResult = false;
    delete[] tmpD;
  }

  SDL_GL_SwapBuffers();
}

void genFramebuffer(int width, int height)
{
  // Init Color Map
  glGenTextures(1, &colorMap1);
  glBindTexture(GL_TEXTURE_RECTANGLE_NV, colorMap1);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
  glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, GL_FLOAT_RGBA32_NV, width, height, 0, GL_RGB, GL_FLOAT, NULL);

  // Init Depth Map
  glGenTextures(1, &depthMap1);
  glBindTexture(GL_TEXTURE_RECTANGLE_NV, depthMap1);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
  glTexParameteri(GL_TEXTURE_RECTANGLE_NV, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
  glTexImage2D(GL_TEXTURE_RECTANGLE_NV, 0, GL_DEPTH_COMPONENT24, width, height, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, NULL);

  // Init FBO
  glGenFramebuffersEXT(1, &fbo1);

  // Textures Attachement
  glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fbo1);
  glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_RECTANGLE_NV, colorMap1, 0);
  glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_TEXTURE_RECTANGLE_NV, depthMap1, 0);

  if(glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT) != GL_FRAMEBUFFER_COMPLETE_EXT)
  {
    cout << "FBO is not COMPLETE" << endl;
  }

  glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
}

void genShader()
{
  GLint compilResult;

  // Get a shader program object id
  shader1 = glCreateProgramObjectARB();

  // Get handle to vertex shader
  GLhandleARB vertHandle = glCreateShaderObjectARB(GL_VERTEX_SHADER);

  // Sends vertex shader source to OpenGL
  glShaderSource(vertHandle, 1, &vertProg, 0);

  // Vertex shader compilation
  glCompileShader(vertHandle);

  // Link vertex shader program with shader program object
  glAttachObjectARB(shader1, vertHandle);

  // Get vertex shader compilation log in case of error
  glGetObjectParameterivARB(vertHandle, GL_OBJECT_COMPILE_STATUS_ARB, &compilResult);
  if(compilResult == GL_FALSE)
  {
    char temp[4096];
    glGetInfoLogARB(vertHandle, 4096, NULL, temp);
    cout << "Vertex shader : " << temp << endl;
  }

  // Delete vertex shader handle
  glDeleteObjectARB(vertHandle);

  // Get handle to frag shader
  GLhandleARB fragHandle = glCreateShaderObjectARB(GL_FRAGMENT_SHADER);

  // Sends frag shader source to OpenGL
  glShaderSource(fragHandle, 1, &fragProg, 0);

  // Vertex frag compilation
  glCompileShader(fragHandle);

  // Link frag shader program with shader program object
  glAttachObjectARB(shader1, fragHandle);

  // Get frag shader compilation log in case of error
  glGetObjectParameterivARB(fragHandle, GL_OBJECT_COMPILE_STATUS_ARB, &compilResult);
  if (compilResult == GL_FALSE)
  {
    char temp[4096];
    glGetInfoLogARB(fragHandle, 4096, NULL, temp);
    cout << "Frag shader : " << temp << endl;
  }

  // Delete frag shader handle
  glDeleteObjectARB(fragHandle);

  // Link shader programs
  glLinkProgram(shader1);
}

void delFramebuffer()
{
  glDeleteTextures(1, &colorMap1);
  glDeleteTextures(1, &depthMap1);
  glDeleteFramebuffersEXT(1, &fbo1);
}

void delShader()
{
}

int main(int argc, char* argv[])
{
  bool finish = false;    // Indicate program end
  int scrWidth = 512;    // Screen width resolution
  int scrHeight = 512;   // Screen height resolution
  int fboWidth = 512;    // Framebuffer width resolution
  int fboHeight = 512;   // Framebuffer height resolution
  int scrBpp = 32;        // Screen bits per pixel
  SDL_Surface *screen;    // Video buffer to display

  // SDL video init
  if(SDL_Init(SDL_INIT_VIDEO) < 0)
  {
    cout << "Unable to init SDL : " << SDL_GetError() << endl;
    return 1;
  }

  // Program exit function
  atexit(SDL_Quit);

  SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 5);
  SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 5);
  SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 5);
  SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 16);
  SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);

  // Video mode init
  screen = SDL_SetVideoMode(scrWidth, scrHeight, scrBpp, SDL_OPENGL/*|SDL_FULLSCREEN*/);

  // Video mode error
  if(screen == NULL)
  {
    cout << "Unable to set " << scrWidth << "x" << scrHeight << "x" << scrBpp
      << " video mode : " << SDL_GetError() << endl;
    return 1;
  }

  GLenum err = glewInit();

  if(err != GLEW_OK)
  {
    cout << "Unable to start glew : " << glewGetErrorString(err) << endl;
    return 1;
  }

  genFramebuffer(fboWidth, fboHeight);
  genShader();

  // Program loop
  while(!finish)
  {
    SDL_Event event;

    while(SDL_PollEvent(&event))
    {
      if(event.type == SDL_QUIT)
        finish =  true;

      if(event.type == SDL_KEYDOWN)
      {
        if(event.key.keysym.sym == SDLK_ESCAPE)
          finish =  true;
      }
    }

    drawScene(scrWidth, scrHeight, fboWidth, fboHeight);
  }

  delShader();
  delFramebuffer();

  return 0;
}

In this program early Z don’t work too.

I was under the impression that early Z didn’t work with FBOs ?!

Y.

According to this test Early Z work with FBO.
I’m looking for include same shader but in Cg in my test. I think the problem come from GLSL.

Hmmm, rather strange. By the way, you have very simple situation to see gains from early-Z.

I’ve mentioned, you create your simple back-buffer with 16 bits color and depth, and a framebuffer with 32 bits color and 24 bit depth.
Some time ago (about a year) it was quite hard to get FBO 24 bits depth when you have 16 bits screen framebuffer depth. I hope, now it’s no problem.

Try this demo and see, whether your card can have benefits from Z-Cull: http://humus.ca/index.php?page=3D&ID=3