Request for Advice - Optimizing My Drawing Calls...


Folks here have given excellent advice before, I thought I would give this another try.

I have the code below which works but I believe I can do a better job of rendering.

Essentially I load my geometry in via a “for loop”, find the duplicate geometry to set up instancing, load the goods into OpenGL, sort the loaded geometry by whether it has transparency or not, and then draw.

I have two areas below where I believe I can speed things up but I am not sure how.

The first is the way I load materials: Current I am using a uniform buffer object and loading it in based on the contents of the file. I want to use a standard buffer so that I can just have one call to glDrawElementsInstanced (or a better equivalent) but the offset for the material is by an arbitrary amount based on what the object is rather than an easy-to-define offset like the number of vertices. You will see this in code below.

The second way is my MVP and normal matrices. Currently I calculate all of the instances MVPs CPU side and then push the data down to the graphics card via a glBufferSubData but is there a smarter way of doing things?

If you see anything else, I am all ears.

Thank you all for any guidance you can provide.

First, how I load my geometry into OpenGL:

void HighSpeedMeshCache::LoadIntoOpenGL()

	glGenVertexArrays(1, &Pointer_VAO);

	glGenBuffers(1, &Vertex_VBO);

	glBindBuffer(GL_ARRAY_BUFFER, Vertex_VBO);

	glBufferData(GL_ARRAY_BUFFER, TotalBufferSize, NULL, GL_STATIC_DRAW);

	glBufferSubData(GL_ARRAY_BUFFER, NULL, VerticesBufferSize, Vertices);

	glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, 0);

	if (HasNormals) {

		glBufferSubData(GL_ARRAY_BUFFER, NormalOffset, NormalBufferSize, Normals);

		glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 0, 0);


	if (HasUVs) {
		glBufferSubData(GL_ARRAY_BUFFER, UVOffset, UVBufferSize, UVs);

		glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 0, 0);


	glGenBuffers(1, &MVP_VBO);

	glBufferData(GL_ARRAY_BUFFER, ModelMatrixIntances.size() * sizeof(glm::mat4), NULL, GL_DYNAMIC_DRAW);

	glVertexAttribPointer(3, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (void*)(sizeof(GLfloat) * 0));
	glVertexAttribDivisor(3, 1);

	glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (void*)(sizeof(GLfloat) * 4));
	glVertexAttribDivisor(4, 1);

	glVertexAttribPointer(5, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (void*)(sizeof(GLfloat) * 8));
	glVertexAttribDivisor(5, 1);

	glVertexAttribPointer(6, 4, GL_FLOAT, GL_FALSE, sizeof(glm::mat4), (void*)(sizeof(GLfloat) * 12));
	glVertexAttribDivisor(6, 1);

	glGenBuffers(1, &NormalMatrix_VBO);
	glBindBuffer(GL_ARRAY_BUFFER, NormalMatrix_VBO);

	glBufferData(GL_ARRAY_BUFFER, ModelMatrixIntances.size() * sizeof(glm::mat3), NULL, GL_DYNAMIC_DRAW);

	glVertexAttribPointer(7, 3, GL_FLOAT, GL_FALSE, sizeof(glm::mat3), (void*)(sizeof(GLfloat) * 0));
	glVertexAttribDivisor(7, 1);

	glVertexAttribPointer(8, 3, GL_FLOAT, GL_FALSE, sizeof(glm::mat3), (void*)(sizeof(GLfloat) * 3));
	glVertexAttribDivisor(8, 1);

	glVertexAttribPointer(9, 3, GL_FLOAT, GL_FALSE, sizeof(glm::mat3), (void*)(sizeof(GLfloat) * 6));
	glVertexAttribDivisor(9, 1);

	glGenBuffers(1, &Index_VBO);

	glBufferData(GL_ELEMENT_ARRAY_BUFFER, TotalPolygonCount * 3 * sizeof(unsigned int), Indices, GL_STATIC_DRAW);


And this is how I draw the geometry per mesh.

This function runs in a loop per mesh:

void HighSpeedMeshCache::DrawMe(GlMaintenance* CurrentOpenGLController)

	for (int i = 0; i < ModelMatrixIntances.size(); i++) {
		glm::mat4 ModelViewMatrix = CurrentOpenGLController->GetViewMatrix() * ModelMatrixIntances[i];
		glm::mat3 NormalMatrix = glm::transpose(glm::inverse(glm::mat3(ModelViewMatrix)));
		glm::mat4 ModelViewProjectionMatrix = CurrentOpenGLController->GetProjectionViewMatrix() * ModelMatrixIntances[i];
		glBufferSubData(GL_ARRAY_BUFFER, (i * (sizeof(glm::mat4))), sizeof(glm::mat4), glm::value_ptr(ModelViewProjectionMatrix));
		glBindBuffer(GL_ARRAY_BUFFER, NormalMatrix_VBO);
		glBufferSubData(GL_ARRAY_BUFFER, (i * (sizeof(glm::mat3))), sizeof(glm::mat3), glm::value_ptr(NormalMatrix));

	for (int i = 0; i < Materials.size(); i++) {
		glBufferData(GL_UNIFORM_BUFFER, sizeof(Materials[i].ColorProperties), Materials[i].ColorProperties, GL_DYNAMIC_DRAW);

		glDrawElementsInstanced(GL_TRIANGLES, (Materials[i].TriangleCount * 3)
		               , GL_UNSIGNED_INT, reinterpret_cast<const GLvoid *>(Materials[i].Offset * sizeof(unsigned int)), NumberOfChildItems + 1);