FBO drawing slower than PBuffers? (6800GT)

Hi all,

I’ve just modified my renderer to use FBOs instead of pbuffers for doing my multipass rendering. Its basically drawing into a float16 framebuffer, generating a glow texture in another framebuffer, blurs that, adds it back to the float16, then does some refraction post processing, and draws it into the windowsystem framebuffer. This worked fine with pbuffers, and now with fbo’s however in my test scenes that have had consistant 15FPS in certain views with pbuffers, now run at 9FPS! It seems everything has slowed down with FBO’s, including drawing lighting & stencil passes into the float16 FBO (went from about 0.04sec to 0.05sec for all my lighting to draw!)

I havent had time to test it out thoroughly yet, but it seems that its definitly got to do with the FBO code, I can change a #define and go back to the pbuffer version, that still runs at the same framerates as before!

I was doing something silly and reallocating the FBO’s on resizes, causing about 6 reallocations/frame, now its keeping it at the largest size (was doing this with pbuffers too), so I expected it to hop back to the same speed as pbuffers, but its not.

So, anyone else seen something similar? Heres my FBO initing code:

bool FramebufferObject::CreateFBO()
{
	CHECK_GL_ERRORS("FramebufferObject::CreateFBO: start");

//	if(m_iFBO)
//		Destroy(false);

	if(!m_iFBO)
		glGenFramebuffersEXT(1, &m_iFBO);
	if(!m_iDepthRB)
		glGenRenderbuffersEXT(1, &m_iDepthRB);
	if(!m_iStencilRB)
		glGenRenderbuffersEXT(1, &m_iStencilRB);
	if(!m_iDepthStencilRB)
		glGenRenderbuffersEXT(1, &m_iDepthStencilRB);

	CHECK_GL_ERRORS("FramebufferObject::CreateFBO: before binding FBO");

	// Enable render-to-texture
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, m_iFBO);

	// Set up color_tex and depth_rb for render-to-texture
	CreateTextureObject();

	CHECK_GL_ERRORS("FramebufferObject::CreateFBO: after creating texture");

	// attach positive X for now
	if(m_iTexTarget == GL_TEXTURE_CUBE_MAP)
		glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_CUBE_MAP_POSITIVE_X, m_iTextureID, 0);
	else
		glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, m_iTexTarget, m_iTextureID, 0);

	CHECK_GL_ERRORS("FramebufferObject::CreateFBO: after color attachment");

	// if we're using 24bit depth and 8bit stencil, we can use an interleaved buffer for both stencil & depth data
	// otherwise we can only use the one buffer for each separately (lone stencil not currently supported on any hardware!)
	if(m_iDepthBits > 0 && m_iStencilBits > 0)
	{
		glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, m_iDepthStencilRB);
		glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_STENCIL_NV, m_iWidth, m_iHeight);
		glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, m_iDepthStencilRB);
		glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, m_iDepthStencilRB);
	}
	else
	{
		// initialize separate depth renderbuffer
		if(m_iDepthBits > 0)
		{
			glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, m_iDepthRB);
			int depth = GL_DEPTH_COMPONENT32;
			if(m_iDepthBits < 32)
				depth = GL_DEPTH_COMPONENT24;
			if(m_iDepthBits < 24)
				depth = GL_DEPTH_COMPONENT16;

			glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, depth, m_iWidth, m_iHeight);
			glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, m_iDepthRB);
		}

		CHECK_GL_ERRORS("FramebufferObject::CreateFBO: after depth attachment");

		// and the separate stencil renderbuffer
		if(m_iStencilBits > 0)
		{
			glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, m_iStencilRB);

			int stencil = GL_STENCIL_INDEX8_EXT;
			if(m_iStencilBits < 8)
				stencil = GL_STENCIL_INDEX4_EXT;
			if(m_iStencilBits < 4)
				stencil = GL_STENCIL_INDEX1_EXT;

			glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, stencil, m_iWidth, m_iHeight);
			glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_STENCIL_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, m_iStencilRB);
		}

		CHECK_GL_ERRORS("FramebufferObject::CreateFBO: after stencil attachment");
	}

	// Check framebuffer completeness at the end of initialization.
	CheckFBOStatus();

	// Re-enable rendering to the window
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);

	// remember our new actual size
	m_iActualWidth = m_iWidth;
	m_iActualHeight = m_iHeight;

	CHECK_GL_ERRORS("FramebufferObject::CreateFBO: end");

	return true;
}

void FramebufferObject::CreateTextureObject()
{
CHECK_GL_ERRORS("FramebufferObject::CreateTextureObject: start");

	if(!m_iTextureID)
		glGenTextures(1, &m_iTextureID);

	glBindTexture(m_iTexTarget, m_iTextureID);

	if(m_iTexTarget == GL_TEXTURE_CUBE_MAP)
	{
		glTexParameterf(m_iTexTarget, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

		glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGBA8, m_iWidth, m_iHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, GL_RGBA8, m_iWidth, m_iHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, GL_RGBA8, m_iWidth, m_iHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, GL_RGBA8, m_iWidth, m_iHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, GL_RGBA8, m_iWidth, m_iHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
		glTexImage2D(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, GL_RGBA8, m_iWidth, m_iHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
	}
	else
	{
		int type = GL_UNSIGNED_BYTE;
		int internalformat = GL_RGBA8;

		if(m_bFloatBuffer)
		{
			if(m_iColourBits > 64)
			{
				type = GL_FLOAT; // shouldnt matter
				internalformat = GL_RGBA32F_ARB;
			}
			else
			{
				type = GL_FLOAT; // shouldnt matter
				internalformat = GL_RGBA16F_ARB;
			}
		}

		glTexImage2D(m_iTexTarget, 0, internalformat, m_iWidth, m_iHeight, 0, GL_RGBA, type, NULL);
	}

CHECK_GL_ERRORS("FramebufferObject::CreateTextureObject: end");
}

I’ve never used PBuffers, only FBOs, but I’ve had some crashes where the driver itself crashed and put out a messagebox saying that something got fishy with the PBuffers… This leads me to the conclusion that currently the FBO implementations just wrap around PBuffers. Hopefully this won’t be the case forever :slight_smile:

don’t know about the performance. but andras, i think, that the error message is not because of wrapper, but because of code reuse (copy&paste :slight_smile: ). i doubt that currnet fbo implementation is a wrapper.

I very much doubt FBO is a wrapper because your GL calls affect the state of your window’s GL RC… with PBuffers it was a separate context…

PS. I’m running windows XP, latest driver (81.98), only tested on 6800GT so far.

I can’t say anything for PBO due I never used them, but with FBO’s after 76.xx I think my FBO performance has sucked. Whenever I exit my app with FB sizes larger than 512x512 it takes a long time to return to windows. This never was the case with drivers before 77 or 78? But as of now performance with glCopyTexImage has been as good as FBO’s and thats with 2 FBO’s using two depthmaps and two color rendering targets… Now on my ATI hardware when I exit my engine it exits immediately… Got me but seems like Nvidia did something different and kind of sucks for me due to takes so long to get back to windows… And this behavior is on my now DEAD 6800GT and my new 7800GT

bump

Surely someone else has migrated from pbuffers to fbo?? Noticed a rendering slowdown? Anyone? Anyone? Bueller? Bueller? :slight_smile:

Well, just finished porting my app (numerical computations on the GPU with readback) from

Win32, C++, OpenGL 1.5, pbuffer
to
.NET 2.0, C#, OpenGL 2.0, FBO

The performance decreased a little:
old: CPU 40 sec, GPU 4.4 sec
new: CPU 43 sec, GPU 4.7 sec

I’m only using one color attachment (no depth, no stencil), a floating point texture (2k*2k, 32bit) and GLSL.
And this is done on a Pentium M 2.0 Ghz, 1 GB RAM, 6800go (256MB) (driver 78.10).

Regarding all the changes (framework, language) CPU and GPU times changed both by almost the same.
Don’t know if this info is of any help for you!

I’ve noticed that early-z/stencil rejection currently doesn’t work on NVidia cards using FBOs. In situations where you’re fragment limited, you’re guaranteed to see a tremendous slowdown. I would imagine this will be fixed in NVidia’s next driver revision(s). After all, this extension is becoming pretty popular and it’s only a matter of time until some software starts shipping with support for it. :wink:

Kevin

As far as I remember early-z worked just fine with RGB/RGBA fbos, it wasn’t working with float formats though.

But then again it was like 6 months ago, I havn’t been programming in GL since :frowning:

Never use pbo.remembered in NVSDK there’s a demo compared the speed of uploading and downloading ,between PBO,multiPBO,glCopySubImage2D. I didn’t find anything exciting.So i choose FBO.maybe you should have a test on them carefully.

Well that’d definitly be a cause then! Not working early-z… I’m VERY fragment limited, my 6800 thinks I’m an evil slave driver :slight_smile:

I’m using fp16 frame buffers so it’d probably be that - but it seemed to have worked with pbuffers though, so I’m guessing this is something nvidia are yet to fix?