One more update, I modified your doReadbackFAST
algorithm to use glGetTexImage
as the following:
void WaylandEgl::doFastReadBackTexture() // 12.4 cpu load :)
{
// Work-around for NVidia driver readback crippling on GeForce.
if (!buffCreated)
{
qDebug() << "Heiht" << mWinHeight << "Width" << mWinWidth;
pbo_size = mWinHeight * mWinWidth * 2;
nBytesPerLine = mWinWidth ;
Readback_buf = (GLchar *) malloc( pbo_size );
glGenFramebuffers(1, &textFrameBuffer);
glBindFramebuffer(GL_FRAMEBUFFER, textFrameBuffer);
glGenTextures(1, &boundTex);
glBindTexture(GL_TEXTURE_2D, boundTex);
glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, mWinWidth, mWinHeight, 0,GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,GL_TEXTURE_2D, boundTex, 0);
if( glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE)
{
qDebug() << "Framebuffer error is " << glGetError();
}
else
{
qDebug() << "Texture Framebuffer is OK" << glGetError();
}
buffCreated = true;
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glGenBuffers( PBO_COUNT, pboIds );
// Buffer #0: glReadPixels target
GLenum target = GL_PIXEL_PACK_BUFFER;
glBindBuffer( target, pboIds[0] );
glBufferData( target, pbo_size, 0, GL_STATIC_COPY );
glGetBufferParameterui64vNV = (PFNGLGETBUFFERPARAMETERUI64VNVPROC)eglGetProcAddress("glGetBufferParameterui64vNV");
if (!glGetBufferParameterui64vNV)
{
qDebug() << "glGetBufferParameterui64vNV not fouynded!";
return;
}
glMakeBufferResidentNV = (PFNGLMAKEBUFFERRESIDENTNVPROC)eglGetProcAddress("glMakeBufferResidentNV");
if (!glMakeBufferResidentNV)
{
qDebug() << "glMakeBufferResidentNV not fouynded!";
return;
}
glUnmapBufferARB = (PFNGLUNMAPBUFFERARBPROC)eglGetProcAddress("glUnmapBufferARB");
if (!glUnmapBufferARB)
{
qDebug() << "glUnmapBufferARB not fouynded!";
return;
}
glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC)eglGetProcAddress("glGetBufferSubData");
if (!glGetBufferSubData)
{
qDebug() << "glGetBufferSubData not fouynded!";
return;
}
qDebug() << "Run the optimizatiosn16";
GLuint64EXT addr;
glGetBufferParameterui64vNV( target, GL_BUFFER_GPU_ADDRESS_NV, &addr );
glMakeBufferResidentNV( target, GL_READ_ONLY );
// Buffer #1: glCopyBuffer target
target = GL_COPY_WRITE_BUFFER;
glBindBuffer( target, pboIds[1] );
glBufferData( target, pbo_size, 0, GL_STREAM_READ );
glMapBufferRange( target, 0, 1, GL_MAP_WRITE_BIT);
glUnmapBufferARB( target );
glGetBufferParameterui64vNV( target, GL_BUFFER_GPU_ADDRESS_NV, &addr );
glMakeBufferResidentNV ( target, GL_READ_ONLY );
buffCreated = true;
int rowL;
glGetIntegerv(GL_PACK_ROW_LENGTH, &rowL);
qDebug() << "Rowl before" << rowL;
glPixelStorei( GL_PACK_ALIGNMENT, 1 );
glPixelStorei(GL_PACK_ROW_LENGTH,nBytesPerLine);
qDebug() << "Pixel st" << glGetError();
glGetIntegerv(GL_PACK_ROW_LENGTH, &rowL);
qDebug() << "Rowl after" << rowL;
}
glFinish();
Timer t1;
t1.start();
glBindFramebuffer(GL_READ_FRAMEBUFFER,mwindow->openglContext()->defaultFramebufferObject());
glBindFramebuffer(GL_DRAW_FRAMEBUFFER,textFrameBuffer);
glBlitFramebuffer(0, 0, mWinWidth, mWinHeight, 0, 0, mWinWidth, mWinHeight, GL_COLOR_BUFFER_BIT, GL_LINEAR);
// Do a depth readback to BUF OBJ #0
glBindBuffer( GL_PIXEL_PACK_BUFFER, pboIds[0] );
glBindTexture(GL_TEXTURE_2D, boundTex);
//glReadPixels( 0, 0, mWinWidth, mWinHeight,
// GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 0 );
glGetTexImage(GL_TEXTURE_2D,0,GL_RGB,GL_UNSIGNED_SHORT_5_6_5,0);
t1.stop();
readTime = t1.getElapsedTimeInMilliSec();
t1.start();
// Copy from BUF OBJ #0 to BUF OBJ #1
glBindBuffer( GL_COPY_WRITE_BUFFER, pboIds[1] );
glCopyBufferSubData( GL_PIXEL_PACK_BUFFER, GL_COPY_WRITE_BUFFER, 0, 0,
pbo_size );
// Do the readback from BUF OBJ #1 to app CPU memory
glGetBufferSubData( GL_COPY_WRITE_BUFFER, 0, pbo_size,
Readback_buf );
//sendImage((unsigned char*)Readback_buf,pbo_size);
t1.stop();
processTime = t1.getElapsedTimeInMilliSec();
glBindBuffer( GL_PIXEL_PACK_BUFFER, 0 );
//qDebug() << "Read Time " << readTime;
//qDebug() << "Process Time " << processTime;
}
and glGetTexImage
consumes 12-14 % CPU . Read Time 0.216 ms and Process Time 3.296 ms.
It is similar to performRenderBuffer16
algorithm CPU load which is good number
Regards