FBO problems on ATI Radeon X1300

Hi,

I have a problem with FBOs on my old ATI Radeon X1300 on Windows XP 32 bit using the legacy Catalyst 10.2 driver, which is the latest one with support for this GPU. I’m using EXT_framebuffer_object to allocate an FBO with a color texture attachment using internal format RGBA16. I first render a simple object into the FBO, then bind the attached texture to a shader that writes this texture back into the window-system framebuffer in a second pass.

Whenever I enable blending for the first pass, the screen stays black on the Radeon X1300. I don’t have this problem with other systems I tested, it even works on Linux with the same GPU where I use the Gallium3D drivers. When I use RGBA8 as internal format, it seems to work, so I guess the driver has problems with RGBA16 for FBOs. I’ve hacked together a “small” test program which reproduces this error:

#include <GL/glew.h>
#include <SDL/SDL.h>
#include <SDL/SDL_video.h>
#include <SDL/SDL_syswm.h>
#include <iostream>
#include <cstdlib>
#include <fstream>

using std::cerr;
using std::cout;
using std::endl;
using std::ifstream;


float vertices[] = {
		1.0, 1.0, -3.0,
		1.0, -1.0, -3.0,
		-1.0, -1.0, -3.0,

		-1.0, -1.0, -3.0,
		1.0, 1.0, -3.0,
		-1.0, 1.0, -3.0,
		
		1.0, 1.0,
		1.0, 0.0,
		0.0, 0.0,
		
		0.0, 0.0,
		1.0, 1.0,
		0.0, 1.0
};

unsigned int indices[] = {
		0, 1, 2, 3, 4, 5
};

GLint vertexAttrib;
GLint texCoordAttrib;

GLint mvpMatrixUniform;
GLint texUniform;

GLint blendVertexAttrib;

GLint blendLayerUniform;

GLuint tex;

GLuint fbo;
GLuint fboTex;

GLuint program, blendProgram;

GLuint modelDataBuf, modelIndexBuf;

GLuint planeDataBuf;



GLuint CompileShaderProgram(const char* vpath, const char* fpath)
{
	GLuint program;
	
	ifstream vin(vpath);
	vin.seekg(0, ifstream::end);
	int vlen = vin.tellg();
	vin.seekg(0, ifstream::beg);
	char* vdata = new char[vlen];
	vin.read(vdata, vlen);

	ifstream fin(fpath);
	fin.seekg(0, ifstream::end);
	int flen = fin.tellg();
	fin.seekg(0, ifstream::beg);
	char* fdata = new char[flen];
	fin.read(fdata, flen);

	const char* cvdata = vdata;
	const char* cfdata = fdata;

	GLuint vshader = glCreateShader(GL_VERTEX_SHADER);
	GLuint fshader = glCreateShader(GL_FRAGMENT_SHADER);

	glShaderSource(vshader, 1, &cvdata, &vlen);
	glShaderSource(fshader, 1, &cfdata, &flen);

	glCompileShader(vshader);
	
	GLint status;
	glGetShaderiv(vshader, GL_COMPILE_STATUS, &status);

	if (status == GL_FALSE) {
		GLint maxLength;
		GLint actualLength;
		glGetShaderiv(vshader, GL_INFO_LOG_LENGTH, &maxLength);
		char* log = new char[maxLength];
		glGetShaderInfoLog(vshader, maxLength, &actualLength, log);
		printf("ERROR compiling vshader: %s
", log);
		exit(1);
	}
	
	glCompileShader(fshader);
	
	glGetShaderiv(fshader, GL_COMPILE_STATUS, &status);

	if (status == GL_FALSE) {
		GLint maxLength;
		GLint actualLength;
		glGetShaderiv(fshader, GL_INFO_LOG_LENGTH, &maxLength);
		char* log = new char[maxLength];
		glGetShaderInfoLog(fshader, maxLength, &actualLength, log);
		printf("ERROR compiling fshader: %s
", log);
		exit(1);
	}

	program = glCreateProgram();

	glAttachShader(program, vshader);
	glAttachShader(program, fshader);

	glLinkProgram(program);
	
	glGetProgramiv(program, GL_LINK_STATUS, &status);

	if (status == GL_FALSE) {
		GLint maxLength;
		GLint actualLength;
		glGetProgramiv(program, GL_INFO_LOG_LENGTH, &maxLength);
		char* log = new char[maxLength];
		glGetProgramInfoLog(program, maxLength, &actualLength, log);
		printf("ERROR linking program: %s
", log);
		exit(1);
	}
	
	return program;
}



void init()
{
	glewInit();
	
	uint8_t texData[] = {
		255, 0, 0, 255,
		0, 255, 0, 255,
		0, 255, 0, 255,
		255, 0, 0, 255
	};
	

	// The texture to be rendered
	glGenTextures(1, &tex);
	glBindTexture(GL_TEXTURE_2D, tex);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texData);
	
	
	glGenFramebuffersEXT(1, &fbo);
	glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER, fbo);

	glGenTextures(1, &fboTex);
	glBindTexture(GL_TEXTURE_2D, fboTex);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16, 512, 512, 0, GL_RGBA, GL_UNSIGNED_SHORT, NULL);
	glFramebufferTexture2DEXT(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fboTex, 0);
	
	
	glClearColor(0.0f, 0.0f, 0.0f, 0.0f);

	program = CompileShaderProgram("vertex.glsl", "fragment.glsl");
	blendProgram = CompileShaderProgram("vertex_blend.glsl", "fragment_blend.glsl");

	GLuint dataBuffer, indexBuffer;
	glGenBuffers(1, &dataBuffer);
	glGenBuffers(1, &indexBuffer);

	glBindBuffer(GL_ARRAY_BUFFER, dataBuffer);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexBuffer);

	glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
	glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
	
	modelDataBuf = dataBuffer;
	modelIndexBuf = indexBuffer;
	
	float planeData[] = {
			-1.0f, 1.0f,	// Upper left
			-1.0f, -1.0f,	// Lower left
			1.0f, 1.0f,		// Upper right
			1.0f, -1.0f		// Lower right
	};

	glGenBuffers(1, &planeDataBuf);
	glBindBuffer(GL_ARRAY_BUFFER, planeDataBuf);
	glBufferData(GL_ARRAY_BUFFER, 4*2*sizeof(float), planeData, GL_STATIC_DRAW);

	vertexAttrib = glGetAttribLocation(program, "Vertex");
	texCoordAttrib = glGetAttribLocation(program, "TexCoord");
	
	mvpMatrixUniform = glGetUniformLocation(program, "MVPMatrix");
	texUniform = glGetUniformLocation(program, "Texture");
	
	blendVertexAttrib = glGetAttribLocation(blendProgram, "Vertex");
	blendLayerUniform = glGetUniformLocation(blendProgram, "Layer");
}


void reshape(int w, int h)
{
	float aspect = (float) w / (float) h;
	glViewport(0, 0, w, h);

	float l = aspect*-0.7;
	float r = aspect*0.7;
	float b = -0.7;
	float t = 0.7;
	float n = 1.0;
	float f = 3000.0;

	float mat[] = {
			2*n/(r-l),		0,				0,					0,
			0,				2*n/(t-b),		0, 					0,
			(r+l)/(r-l),	(t+b)/(t-b),	(-(f+n))/(f-n),		-1,
			0,				0,				(-2*f*n)/(f-n),		0
	};

	glUseProgram(program);
	glUniformMatrix4fv(mvpMatrixUniform, 1, GL_FALSE, mat);
}


void display()
{
	glClear(GL_COLOR_BUFFER_BIT);
	
	glEnable(GL_BLEND);
	glBlendFunc(GL_ONE, GL_ZERO);

	glBindTexture(GL_TEXTURE_2D, tex);
	
	glUseProgram(program);
	glUniform1i(texUniform, 0);

	glEnableVertexAttribArray(vertexAttrib);
	glEnableVertexAttribArray(texCoordAttrib);
	
	glBindBuffer(GL_ARRAY_BUFFER, modelDataBuf);
	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, modelIndexBuf);
	
	glVertexAttribPointer(vertexAttrib, 3, GL_FLOAT, GL_FALSE, 0, (void*) 0);
	glVertexAttribPointer(texCoordAttrib, 2, GL_FLOAT, GL_FALSE, 0, (void*) (6*3*sizeof(float)));
	
	glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, fbo);
	
	glClear(GL_COLOR_BUFFER_BIT);

	glDrawElements(GL_TRIANGLES, sizeof(indices)/sizeof(unsigned int), GL_UNSIGNED_INT, (void*) 0);
	
	glBindFramebufferEXT(GL_DRAW_FRAMEBUFFER_EXT, 0);
	
	glUseProgram(blendProgram);
	
	glBindTexture(GL_TEXTURE_2D, fboTex);
	glUniform1i(blendLayerUniform, 0);
	
	glBindBuffer(GL_ARRAY_BUFFER, planeDataBuf);
	glEnableVertexAttribArray(blendVertexAttrib);
	glVertexAttribPointer(blendVertexAttrib, 2, GL_FLOAT, GL_FALSE, 0, (void*) 0);

	glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
	
	glActiveTexture(GL_TEXTURE0);

	SDL_GL_SwapBuffers();
	
	printf("Rendering done!
");
}



int main(int argc, char** argv)
{
	if (SDL_Init(SDL_INIT_VIDEO) < 0) {
		cerr << "ERROR initializing SDL!" << endl;
	}

	atexit(SDL_Quit);

	SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 5);
	SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 5);
	SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 5);
	SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 16);
	SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);

	SDL_SetVideoMode(512, 512, 16, SDL_HWSURFACE | SDL_OPENGL);

	init();
	reshape(512, 512);

	while (true) {
		SDL_Event evt;
		
		while (SDL_PollEvent(&evt) != 0) {
			if (evt.type == SDL_QUIT)
				exit(0);
		}
		
		display();
	}

	return 0;
}

The above code does not have any deeper meaning, but I can reproduce the error there. I just want to make sure that this is a driver bug and not something wrong with my program.

I also have several other issues on the same system: I’m using EXT_framebuffer_blit to copy the window-system depth buffer to an FBO depth attachment, which generates an invalid operation. I use glGetIntegerv with GL_DEPTH_BITS to get the depth of the window-system framebuffer and use GL_DEPTH_COMPONENT16/24/32 as texture format accordingly. glCheckFramebufferStatus shows no problems, so I guess it’s a problem with the FBO having a different format, but as I obtain the depth using GL_DEPTH_BITS, I don’t know what might be wrong about it. Again, I have no problems on other systems.

There are even more problems, like a drawing command which renders a simple full-screen quad with texturing taking about half a minute to complete. I guess it’s all related to the FBO problems.

Are these known problems with the (relatively old) Catalyst driver, or could it be a problem in my program? I’ve also tried the Omega drivers where I have similar problems, but I didn’t test a lot on them.

I can’t help you with the RGBA16 problem… I never personally used this format yet. You should be careful with that. RGBA16 is very exotic and might be well promoted to GL_RGBA8 or GL_RGBA16F internally.

I also have several other issues on the same system: I’m using EXT_framebuffer_blit to copy the window-system depth buffer to an FBO depth attachment, which generates an invalid operation. I use glGetIntegerv with GL_DEPTH_BITS to get the depth of the window-system framebuffer and use GL_DEPTH_COMPONENT16/24/32 as texture format accordingly. glCheckFramebufferStatus shows no problems, so I guess it’s a problem with the FBO having a different format, but as I obtain the depth using GL_DEPTH_BITS, I don’t know what might be wrong about it

This problem I know too well :-(. Basically, the root cause is non-identical depth buffer formats of the window and the FBO. If the formats don’t match, you’ll get GL_INVALID_OPERATION.

There are two problems with that:

  1. There is no query to ask for the actual internal format of the window (FBO 0), so you have to guess (depending on GL_DEPTH_BITS like you did) in order to create a matching FBO
  2. There is no clear definition of what “to match” means.

ATI is particular picky on what depthformats match.
The stencil buffer format seems to be part of the depthbuffer format. So, you have to check for GL_STENCIL_BITS as well and use
GL_DEPTH24_STENCIL8 as depthbuffer format accordingly.
For example, you can’t depth-blit between a GL_DEPTH24_STENCIL8 and a GL_DEPTH_COMPONENT24 texture - crazy :-/

The same problem arises if you try to blit from a multisampled window. In this case I have found no way to blit directly into a multisampled FBO without an intermediate MSAA-resolve-blit, even if the number of samples and the format match!

NVidia is more forgiving when blitting back and forth between windows and FBOs.

Have you considered using glCopyTex[Sub]Image instead? It is less restricted than glBlitFramebuffer.

The only thing I can add to this that I have an engine which can switch between the different RGBA colour formats: GL_RGBA8, GL_RGBA16F, GL_RGBA16, etc and I know that they all work on my ATI 4850.
You have an older generation h/w but still it should work.

I thought RGBA16 would be more portable than RGBA16F, because the float formats became core in later versions of OpenGL. At least according to glGetTexLevelParameter and gDEBugger, RGBA16 was really used. And even if another format is used, you should still see something else than a black screen.

I still suspect the old driver to be the problem. As I said, I can only use the older legacy drivers, and the card only supports OpenGL 2.1 with EXT_framebuffer_object. From what I’ve heard, the AMD drivers are known to be problematic, so maybe the card is just too old to fully support FBOs.

I use glCopyTexImage2D now, seems to work without problems. So if it has no downsides other than being less flexible than glBlitFramebuffer, I’ll keep using it.

I have somehow solved the problem with RGBA16, although I have no idea what I changed to make it work. It also seems that the driver has problems with NPOT textures used as framebuffer attachments. When I use an NPOT texture, the color buffer data gets screwed up. Using a POT texture works for now, but I get more and more problems with FBOs…

Now using RGBA16F creates ugly lines between the triangles of my models, a little as if the models were rendered as solid and wireframe at the same time. With RGBA32F the blending stops working again.

I guess I’ll give up on this card for now. I don’t think all these problems can be caused by my code alone, as it only happens on the X1300. Unfortunately (well, I guess it has its reasons) it’s the only AMD card I have. Nonetheless I still hope to hear from someone who has experienced similar problems with the old Catalyst drivers.

I thought RGBA16 would be more portable than RGBA16F, because the float formats became core in later versions of OpenGL.

It’s about what gets used, not what was in the spec. If nobody tests 16-bit unsigned normalized integer textures, how could implementers know that there was a bug in them? The squeaky wheel gets the grease and all that.

Also, I’m fairly sure that hardware couldn’t handle GL_RGBA16 (actual GL_RGBA16, rather than downcoversion to 8-bit) until at least GL 2.1 if not 3.x-class hardware.

The best thing you can do with your problem, since the hardware isn’t being supported, is to find another way to do what you need to do that works. Copy the texture rather than rendering to an FBO or something.

Unfortunately the real program I’m having problems in does not only copy a textured object from one framebuffer to another. It’s an implementation of the weighted average algorithm for which I need an FBO to render parts of my scene into. For weighted average I need a color buffer with a higher depth than 8 bits per channel. I tried RGBA16 as well as RGBA16/32F, and RGBA16F seems to be the least problematic one. I only have these lines between my triangles which don’t look nice.
Getting weighted average to work on this card is not too important as I have other algorithms which seem to work better. I just wanted to make sure it’s not a problem with my program which might suddenly pop up on other hardware with a driver update.

Here’s how it should look, on my Nvidia GTX460, and this is what my X1300 renders.

Thanks for your help again!

Just another tip for using older ATI Hardware:
Even though the extension list may report GL2.0+ or ARB_texture_non_power_of_two, the hardware might not fully support it and the driver will fall back to software rather quickly.

Make sure, you set the texture minification filter to GL_LINEAR or GL_NEAREST (i.e. no mipmapping) and set the texture wrapping mode to GL_CLAMP_TO_EDGE.

Better yet, try to to use rectangle textures instead (ARB_texture_rectangle). I have witnessed cases where on a R520 based card (FireGLV5200) NPOT textures lead to crashes and slow rendering (even with the right clamping and no mipmapping). Rectangle textures just worked and performed well.

If you’re doing “just” a bit of 2D postprocessing, they impose not much extra work. Just use a different sampler type in your shaders and non-normalized texture coordinates.

ATI X1000 series cards do support NPOT textures, the problem is that they don’t have floating point blending support, even though they supports 16 bit floating point texturing and rendering.

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.