Hi! VBO can optimize the speed a lot but I’ve a problem when using multiple VBO.
In fact, when I want to draw a VBO and then another VBO here :
for (unsigned int i = 0; i < m_normals.size(); i++) {
if (m_normals[i].getAllVertices().getVertexCount() > 0) {
//std::cout<<"next frame draw normal"<<std::endl;
if (m_normals[i].getMaterial().getTexture() == nullptr) {
perPixelLinkedList2.setParameter("haveTexture", 0.f);
} else {
math::Matrix4f texMatrix = m_normals[i].getMaterial().getTexture()->getTextureMatrix();
perPixelLinkedList2.setParameter("textureMatrix", texMatrix);
perPixelLinkedList2.setParameter("haveTexture", 1.f);
}
if (m_normals[i].getVertexArrays()[0]->getEntity()->isWater()) {
perPixelLinkedList2.setParameter("water", 1.0f);
} else {
perPixelLinkedList2.setParameter("water", 0.0f);
}
if (core::Application::app != nullptr) {
float time = core::Application::getTimeClk().getElapsedTime().asSeconds();
perPixelLinkedList2.setParameter("time", time);
}
currentStates.blendMode = sf::BlendNone;
currentStates.shader = &perPixelLinkedList2;
currentStates.texture = m_normals[i].getMaterial().getTexture();
vb.clear();
vb.setPrimitiveType(m_normals[i].getAllVertices().getPrimitiveType());
for (unsigned int j = 0; j < m_normals[i].getAllVertices().getVertexCount(); j++) {
vb.append(m_normals[i].getAllVertices()[j]);
}
vb.update();
frameBuffer.drawVertexBuffer(vb, currentStates);
}
}
glCheck(glFinish());
glCheck(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT));
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
vb2.clear();
vb2.setPrimitiveType(sf::Quads);
Vertex v1 (sf::Vector3f(0, 0, quad.getSize().z));
Vertex v2 (sf::Vector3f(quad.getSize().x,0, quad.getSize().z));
Vertex v3 (sf::Vector3f(quad.getSize().x, quad.getSize().y, quad.getSize().z));
Vertex v4 (sf::Vector3f(0, quad.getSize().y, quad.getSize().z));
vb2.append(v1);
vb2.append(v2);
vb2.append(v3);
vb2.append(v4);
vb2.update();
math::Matrix4f matrix = quad.getTransform().getMatrix().transpose();
perPixelLinkedListP2.setParameter("worldMat", matrix);
currentStates.shader = &perPixelLinkedListP2;
frameBuffer.drawVertexBuffer(vb2, currentStates);
glCheck(glFinish());
frameBuffer.display();
There is a lack of performances.
This is because I’ve have to set the vertex attrib pointers again each time I switch the VBO here (even if I set the pointers once for all my VBO, at the first frame, that doesn’t work it doesn’t display the scene form frame 2 I really have to reset vertex attrib pointers at every frames when I use multiple VBO for rendering) :
void RenderTarget::drawVertexBuffer(VertexBuffer& vertexBuffer, RenderStates states) {
if (vertexBuffer.getVertexCount() == 0) {
return;
}
if (activate(true))
{
if (!m_cache.glStatesSet)
resetGLStates();
// Apply the view
if (m_cache.viewChanged)
applyCurrentView();
if (states.blendMode != m_cache.lastBlendMode)
applyBlendMode(states.blendMode);
// Apply the texture
sf::Uint64 textureId = states.texture ? states.texture->getNativeHandle() : 0;
if (textureId != m_cache.lastTextureId)
applyTexture(states.texture);
// Apply the shader
if (states.shader)
applyShader(states.shader);
if (m_versionMajor > 3 || m_versionMajor == 3 && m_versionMinor >= 3)
glCheck(glBindVertexArray(m_vao));
if (m_cache.lastVboBuffer != &vertexBuffer) {
if (m_versionMajor > 3 || m_versionMajor == 3 && m_versionMinor >= 3) {
glCheck(glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer.vboVertexBuffer));
glCheck(glEnableVertexAttribArray(0));
glCheck(glEnableVertexAttribArray(1));
glCheck(glEnableVertexAttribArray(2));
glCheck(glVertexAttribPointer(0, 3,GL_FLOAT,GL_FALSE,sizeof(Vertex), (GLvoid*) 0));
glCheck(glVertexAttribPointer(1, 4,GL_UNSIGNED_BYTE,GL_TRUE,sizeof(Vertex),(GLvoid*) 12));
glCheck(glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (GLvoid*) 16));
glCheck(glEnableVertexAttribArray(3));
glCheck(glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer.vboNormalBuffer));
glCheck(glVertexAttribPointer(3, 3, GL_FLOAT, GL_FALSE, sizeof(sf::Vector3f), (GLvoid*) 0));
glCheck(glDisableVertexAttribArray(0));
glCheck(glDisableVertexAttribArray(1));
glCheck(glDisableVertexAttribArray(2));
glCheck(glDisableVertexAttribArray(3));
glCheck(glBindBuffer(GL_ARRAY_BUFFER, 0));
} else {
glCheck(glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer.vboVertexBuffer));
glCheck(glEnableClientState(GL_COLOR_ARRAY));
glCheck(glEnableClientState(GL_TEXTURE_COORD_ARRAY));
glCheck(glEnableClientState(GL_VERTEX_ARRAY));
glCheck(glVertexPointer(3, GL_FLOAT, sizeof(Vertex), (GLvoid*) 0 ));
glCheck(glColorPointer(4, GL_UNSIGNED_BYTE, sizeof(Vertex), (GLvoid*) 12));
glCheck(glTexCoordPointer(2, GL_FLOAT, sizeof(Vertex),(GLvoid*) 16));
glCheck(glEnableClientState(GL_NORMAL_ARRAY));
glCheck(glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer.vboNormalBuffer));
glCheck(glNormalPointer(GL_FLOAT, sizeof(sf::Vector3f), (GLvoid*) 0));
glCheck(glDisableClientState(GL_COLOR_ARRAY));
glCheck(glDisableClientState(GL_TEXTURE_COORD_ARRAY));
glCheck(glDisableClientState(GL_VERTEX_ARRAY));
glCheck(glDisableClientState(GL_NORMAL_ARRAY));
glCheck(glBindBuffer(GL_ARRAY_BUFFER, 0));
}
m_cache.lastVboBuffer = &vertexBuffer;
}
if (m_versionMajor > 3 || m_versionMajor == 3 && m_versionMinor >= 3) {
glCheck(glEnableVertexAttribArray(0));
glCheck(glEnableVertexAttribArray(1));
glCheck(glEnableVertexAttribArray(2));
glCheck(glEnableVertexAttribArray(3));
} else {
glCheck(glEnableClientState(GL_COLOR_ARRAY));
glCheck(glEnableClientState(GL_TEXTURE_COORD_ARRAY));
glCheck(glEnableClientState(GL_VERTEX_ARRAY));
glCheck(glEnableClientState(GL_NORMAL_ARRAY));
}
// Find the OpenGL primitive type
static const GLenum modes[] = {GL_POINTS, GL_LINES, GL_LINE_STRIP, GL_TRIANGLES,
GL_TRIANGLE_STRIP, GL_TRIANGLE_FAN, GL_QUADS};
GLenum mode = modes[vertexBuffer.getPrimitiveType()];
if (vertexBuffer.m_indexes.size() > 0) {
glCheck(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vertexBuffer.vboIndexBuffer));
glCheck(glDrawElements(mode, vertexBuffer.m_indexes.size(), GL_UNSIGNED_INT, (GLvoid*) 0));
glCheck(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0));
} else {
//std::cout<<"draw arrays"<<std::endl;
glCheck(glBindFramebuffer(GL_FRAMEBUFFER, m_framebufferId));
glCheck(glDrawArrays(mode, 0, vertexBuffer.getVertexCount()));
}
if (m_versionMajor > 3 || m_versionMajor == 3 && m_versionMinor >= 3) {
glCheck(glDisableVertexAttribArray(0));
glCheck(glDisableVertexAttribArray(1));
glCheck(glDisableVertexAttribArray(2));
glCheck(glDisableVertexAttribArray(3));
glCheck(glBindVertexArray(0));
} else {
glCheck(glDisableClientState(GL_COLOR_ARRAY));
glCheck(glDisableClientState(GL_TEXTURE_COORD_ARRAY));
glCheck(glDisableClientState(GL_VERTEX_ARRAY));
glCheck(glDisableClientState(GL_NORMAL_ARRAY));
}
}
}
When I comment the line where I draw the second VBO, it runs really faster, or is it my second fragment shader which is slow ?
const std::string fragmentShader2 =
R"(
#version 460
#define MAX_FRAGMENTS 20
struct NodeType {
vec4 color;
float depth;
uint next;
};
layout(binding = 0, r32ui) uniform uimage2D headPointers;
layout(binding = 0, std430) buffer linkedLists {
NodeType nodes[];
};
layout(location = 0) out vec4 fcolor;
void main() {
NodeType frags[MAX_FRAGMENTS];
int count = 0;
uint n = imageLoad(headPointers, ivec2(gl_FragCoord.xy)).r;
while( n != 0xffffffffu && count < MAX_FRAGMENTS) {
frags[count] = nodes[n];
n = frags[count].next;
count++;
}
//merge sort
int i, j1, j2, k;
int a, b, c;
int step = 1;
NodeType leftArray[MAX_FRAGMENTS/2]; //for merge sort
while (step <= count)
{
i = 0;
while (i < count - step)
{
////////////////////////////////////////////////////////////////////////
//merge(step, i, i + step, min(i + step + step, count));
a = i;
b = i + step;
c = (i + step + step) >= count ? count : (i + step + step);
for (k = 0; k < step; k++)
leftArray[k] = frags[a + k];
j1 = 0;
j2 = 0;
for (k = a; k < c; k++)
{
if (b + j1 >= c || (j2 < step && leftArray[j2].depth > frags[b + j1].depth))
frags[k] = leftArray[j2++];
else
frags[k] = frags[b + j1++];
}
////////////////////////////////////////////////////////////////////////
i += 2 * step;
}
step *= 2;
}
vec4 color = vec4(0, 0, 0, 0);
for( int i = count - 1; i >= 0; i--)
{
color.rgb = frags[i].color.rgb * frags[i].color.a + color.rgb * (1 - frags[i].color.a);
color.a = frags[i].color.a + color.a * (1 - frags[i].color.a);
}
fcolor = color;
})";
But I don’t think so I think calling glBindXXX functions and reset the vertexAttribPointers slow down the performances.
How should I do ? Use only one VBO to draw everything ? I don’t thinks that’s a good idea because if I want to draw particles by example which can be added and removed from the scene while rendering it, I need to remove/insert new indexes to the CPU index array to pass it then to the indexed VBO, this is really slow to modify the array of indexes with the CPU. (I’ve already tested this)
This is stupid to have to redefines them, because the attrib pointers are the same for all my VBOs.
What should I do ? Thanks.
EDIT Maybe I should put every visible objects to one VBO and update it at every frame, so I 've to pass this unique VBO, to all my renderers, this is really…
Or maybe using a static variable.
EDIT 2 : Ho! Using the same vbo for the two passes also decrease the performance so it’s the shader which is slow.
for (unsigned int i = 0; i < m_normals.size(); i++) {
if (m_normals[i].getAllVertices().getVertexCount() > 0) {
//std::cout<<"next frame draw normal"<<std::endl;
if (m_normals[i].getMaterial().getTexture() == nullptr) {
perPixelLinkedList2.setParameter("haveTexture", 0.f);
} else {
math::Matrix4f texMatrix = m_normals[i].getMaterial().getTexture()->getTextureMatrix();
perPixelLinkedList2.setParameter("textureMatrix", texMatrix);
perPixelLinkedList2.setParameter("haveTexture", 1.f);
}
if (m_normals[i].getVertexArrays()[0]->getEntity()->isWater()) {
perPixelLinkedList2.setParameter("water", 1.0f);
} else {
perPixelLinkedList2.setParameter("water", 0.0f);
}
if (core::Application::app != nullptr) {
float time = core::Application::getTimeClk().getElapsedTime().asSeconds();
perPixelLinkedList2.setParameter("time", time);
}
currentStates.blendMode = sf::BlendNone;
currentStates.shader = &perPixelLinkedList2;
currentStates.texture = m_normals[i].getMaterial().getTexture();
vb.clear();
vb.setPrimitiveType(m_normals[i].getAllVertices().getPrimitiveType());
for (unsigned int j = 0; j < m_normals[i].getAllVertices().getVertexCount(); j++) {
vb.append(m_normals[i].getAllVertices()[j]);
}
vb.update();
frameBuffer.drawVertexBuffer(vb, currentStates);
}
}
glCheck(glFinish());
glCheck(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT));
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
vb.clear();
vb.setPrimitiveType(sf::Quads);
Vertex v1 (sf::Vector3f(0, 0, quad.getSize().z));
Vertex v2 (sf::Vector3f(quad.getSize().x,0, quad.getSize().z));
Vertex v3 (sf::Vector3f(quad.getSize().x, quad.getSize().y, quad.getSize().z));
Vertex v4 (sf::Vector3f(0, quad.getSize().y, quad.getSize().z));
vb.append(v1);
vb.append(v2);
vb.append(v3);
vb.append(v4);
vb.update();
math::Matrix4f matrix = quad.getTransform().getMatrix().transpose();
perPixelLinkedListP2.setParameter("worldMat", matrix);
currentStates.shader = &perPixelLinkedListP2;
frameBuffer.drawVertexBuffer(vb2, currentStates);
glCheck(glFinish());
frameBuffer.display();