Hello,
Possibly this should go in the OpenGL Driver forum, since this is NVIDIA specific.
Anyway, I’m streaming uploading textures from a separate thread, using a shared context, for the purpose of playing back video. I need to upload 3 textures at once, per frame, because the video is in planar YUV format, so I upload separate textures for the Y, U, and V data. Usually the U and V textures are 1/4 the size of the Y texture. I’m using PBOs for the texture transfer.
Everything is working fine as far as I can tell. GPUView shows that I am indeed using the separate copy engine for the texture uploads. However, I’m getting tons of performance warnings when I turn on debug output: “Pixel-path performance warning: Pixel transfer is synchronized with 3D rendering.”
This only happens when the U and V textures are a different size than the Y texture. If I make them the same size, the warning goes away. Also, this warning happens only on Windows. I don’t get this on Debian Linux with NVIDIA proprietary drivers.
I haven’t been able to find a way to rearrange my code to make the driver happy. I’ve tried using a different PBO for each texture, one sized at, say, 1280x720, and two sized at 640x360. I’ve also tried using one large PBO and logically dividing it among the 3 textures, passing different offsets for the glTexSubImage2D call. In all cases, I use a queue of textures and PBOs and cycle through them, to keep the queue full.
I have a couple options:
- Ignore the warning
- Further rearrange my code so that the planar YUV data is uploaded into a single texture, and rework my code and shader to compensate. That will complicate my C++ and shader code, so I don’t really want to if I can avoid it.
Or maybe I’m just doing something wrong.
I’ve replicated this in a sample project if anyone wants to have a look: https://www.dropbox.com/s/q1xqoijiovkcqvb/SDLTest.zip?dl=0
Thanks!
– Kevin
Here’s the main.cpp file from my sample project, so you don’t have to download the project if you just want to browse my code:
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <queue>
#include <condition_variable>
#include <mutex>
#include <thread>
#include <GL/glew.h>
#include <GL/gl.h>
#define SDL_MAIN_HANDLED
#include <SDL.h>
struct TextureBundle
{
GLuint textureID[3];
GLuint pboID[3];
GLsync fence;
};
std::mutex availableTexturesMtx;
std::condition_variable availableTexturesCv;
std::queue<TextureBundle> availableTextures;
std::mutex readyTexturesMtx;
std::condition_variable readyTexturesCv;
std::queue<TextureBundle> readyTextures;
SDL_Window *mainWindow;
SDL_GLContext mainContext;
SDL_GLContext uploadContext;
void GLAPIENTRY _onOpenGLDebugMsg(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message, const void* userParam)
{
std::string strSource = "Unknown";
switch (source)
{
case GL_DEBUG_SOURCE_API:
strSource = "API";
break;
case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
strSource = "Window System";
break;
case GL_DEBUG_SOURCE_SHADER_COMPILER:
strSource = "Shader Compiler";
break;
case GL_DEBUG_SOURCE_THIRD_PARTY:
strSource = "Third Party";
break;
case GL_DEBUG_SOURCE_APPLICATION:
strSource = "Application";
break;
case GL_DEBUG_SOURCE_OTHER:
strSource = "Other";
break;
}
std::string strType = "Unknown";
switch (type)
{
case GL_DEBUG_TYPE_ERROR:
strType = "Error";
break;
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
strType = "Deprecated Behavior";
break;
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
strType = "Undefined Behavior";
break;
case GL_DEBUG_TYPE_PORTABILITY:
strType = "Portability";
break;
case GL_DEBUG_TYPE_PERFORMANCE:
strType = "Performance";
break;
case GL_DEBUG_TYPE_MARKER:
strType = "Marker";
break;
case GL_DEBUG_TYPE_PUSH_GROUP:
strType = "Push Group";
break;
case GL_DEBUG_TYPE_POP_GROUP:
strType = "Pop Group";
break;
case GL_DEBUG_TYPE_OTHER:
strType = "Other";
break;
}
switch (severity)
{
case GL_DEBUG_SEVERITY_HIGH:
printf("Source: %s, Type: %s, ID: %u, Message: %s
", strSource.c_str(), strType.c_str(), id, message);
break;
case GL_DEBUG_SEVERITY_MEDIUM:
printf("Source: %s, Type: %s, ID: %u, Message: %s
", strSource.c_str(), strType.c_str(), id, message);
break;
case GL_DEBUG_SEVERITY_LOW:
printf("Source: %s, Type: %s, ID: %u, Message: %s
", strSource.c_str(), strType.c_str(), id, message);
break;
default:
printf("Source: %s, Type: %s, ID: %u, Message: %s
", strSource.c_str(), strType.c_str(), id, message);
break;
}
}
void uploadThreadProc()
{
int value = 0;
int inc = 1;
SDL_GL_MakeCurrent(mainWindow, uploadContext);
glDebugMessageCallback(_onOpenGLDebugMsg, nullptr);
// ignore debug messages telling us where buffers are mapped in memory
GLuint id = 131185;
glDebugMessageControl(GL_DEBUG_SOURCE_API, GL_DEBUG_TYPE_OTHER, GL_DONT_CARE, 1, &id, GL_FALSE);
for (;;)
{
// wait for next available texture
std::unique_lock<std::mutex> availLock(availableTexturesMtx);
while (availableTextures.empty())
availableTexturesCv.wait(availLock);
auto tex = availableTextures.front();
availableTextures.pop();
availLock.unlock();
// make sure the GPU is done with the textures
if (tex.fence != 0)
{
glWaitSync(tex.fence, 0, GL_TIMEOUT_IGNORED);
glDeleteSync(tex.fence);
tex.fence = 0;
}
// upload some dummy data to the textures
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tex.pboID[0]);
void* locked = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, 1280 * 720, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memset(locked, value, 1280 * 720);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
glBindTexture(GL_TEXTURE_2D, tex.textureID[0]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 1280, 720, GL_RED, GL_UNSIGNED_BYTE, nullptr);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tex.pboID[1]);
locked = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, 640 * 360, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memset(locked, value, 640 * 360);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
glBindTexture(GL_TEXTURE_2D, tex.textureID[1]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 640, 360, GL_RED, GL_UNSIGNED_BYTE, nullptr);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tex.pboID[2]);
locked = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, 640 * 360, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memset(locked, value, 640 * 360);
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
glBindTexture(GL_TEXTURE_2D, tex.textureID[2]);
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 640, 360, GL_RED, GL_UNSIGNED_BYTE, nullptr);
glBindTexture(GL_TEXTURE_2D, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
// create a fence so the GPU won't try to draw until the upload is finished
tex.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
// add it to the ready queue
std::unique_lock<std::mutex> readyLock(readyTexturesMtx);
readyTextures.push(tex);
readyLock.unlock();
readyTexturesCv.notify_one();
// animate the dummy data that gets uploaded
if (value == 0)
{
value = 1;
inc = 1;
}
else if (value == 255)
{
value = 254;
inc = -1;
}
else
value += inc;
}
}
GLuint compile_shaders(void)
{
GLuint vertex_shader;
GLuint fragment_shader;
GLuint program;
// Source code for vertex shader
static const GLchar * vertex_shader_source[] =
{
"#version 330 core
"
"
"
"layout(location = 0) in vec2 aPosition;
"
"layout(location = 1) in vec2 aTexCoord;
"
"
"
"out vec2 vTexCoord;
"
"
"
"void main(void)
"
"{
"
" gl_Position = vec4(aPosition, 0.0, 1.0);
"
" vTexCoord = aTexCoord;
"
"}
"
};
// Source code for fragment shader
static const GLchar * fragment_shader_source[] =
{
"#version 330 core
"
"
"
"uniform sampler2D uTextureY, uTextureU, uTextureV;
"
"
"
"in vec2 vTexCoord;
"
"out vec4 fColor;
"
"
"
"const vec3 offset = vec3(-0.0625, -0.5, -0.5);
"
"const vec3 rcoeff = vec3(1.164, 0.000, 1.596);
"
"const vec3 gcoeff = vec3(1.164, -0.391, -0.813);
"
"const vec3 bcoeff = vec3(1.164, 2.018, 0.000);
"
"
"
"void main(void)
"
"{
"
" float r, g, b;
"
" vec3 yuv;
"
" yuv.x = texture(uTextureY, vTexCoord.xy).r;
"
" yuv.y = texture(uTextureU, vTexCoord.xy).r;
"
" yuv.z = texture(uTextureV, vTexCoord.xy).r;
"
" yuv += offset;
"
" r = dot(yuv, rcoeff);
"
" g = dot(yuv, gcoeff);
"
" b = dot(yuv, bcoeff);
"
"
"
" fColor = vec4(r, g, b, 1.0);
"
"}
"
};
#if 0
"#version 330 core
"
"
"
"in vec2 vTexCoord;
"
"out vec4 fColor;
"
"uniform sampler2D uTextureY, uTextureU, uTextureV;
"
"
"
"void main(void)
"
"{
"
" fColor = texture(uTextureY, vTexCoord) * texture(uTextureU, vTexCoord) * texture(uTextureV, vTexCoord);
"
"}
"
#endif
// Create and compile vertex shader
vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, vertex_shader_source, NULL);
glCompileShader(vertex_shader);
// Create and compile fragment shader
fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment_shader, 1, fragment_shader_source, NULL);
glCompileShader(fragment_shader);
// Create program, attach shaders to it, and link it
program = glCreateProgram();
glAttachShader(program, vertex_shader);
glAttachShader(program, fragment_shader);
glLinkProgram(program);
// Delete the shaders as the program has them now
glDeleteShader(vertex_shader);
glDeleteShader(fragment_shader);
return program;
}
/* Our program's entry point */
int main(int argc, char *argv[])
{
SDL_Init(SDL_INIT_VIDEO);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 3);
SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8);
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
SDL_GL_SetSwapInterval(1);
mainWindow = SDL_CreateWindow("Test", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, 1280, 720, SDL_WINDOW_OPENGL | SDL_WINDOW_SHOWN);
uploadContext = SDL_GL_CreateContext(mainWindow);
mainContext = SDL_GL_CreateContext(mainWindow);
glewExperimental = GL_TRUE;
glewInit();
glDebugMessageCallback(_onOpenGLDebugMsg, nullptr);
printf("OpenGL vendor: %s
", glGetString(GL_VENDOR));
printf("OpenGL version: %s
", glGetString(GL_VERSION));
printf("OpenGL renderer: %s
", glGetString(GL_RENDERER));
// create vertex buffer for our textured quads
GLuint quadVAO;
glGenVertexArrays(1, &quadVAO);
glBindVertexArray(quadVAO);
GLuint quadVBO;
glGenBuffers(1, &quadVBO);
glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (const void *)0);
glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 0, (const void *)(8 * sizeof(GLfloat)));
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
GLfloat quadData[] =
{
// vertices
-1.0, 1.0,
1.0, 1.0,
-1.0, -1.0,
1.0, -1.0,
// corresponding texture coordinates
0.0, 0.0,
1.0, 0.0,
0.0, 1.0,
1.0, 1.0,
};
glBufferData(GL_ARRAY_BUFFER, sizeof(quadData), quadData, GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
GLuint program = compile_shaders();
glUseProgram(program);
// create the textures and PBOs
for (int i = 0; i < 6; i++)
{
TextureBundle tex;
glGenTextures(1, &tex.textureID[0]);
glBindTexture(GL_TEXTURE_2D, tex.textureID[0]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R8, 1280, 720);
glBindTexture(GL_TEXTURE_2D, 0);
glGenBuffers(1, &tex.pboID[0]);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tex.pboID[0]);
glBufferData(GL_PIXEL_UNPACK_BUFFER, 1280 * 720 + (640 * 360 * 2), nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glGenTextures(1, &tex.textureID[1]);
glBindTexture(GL_TEXTURE_2D, tex.textureID[1]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R8, 640, 360);
glBindTexture(GL_TEXTURE_2D, 0);
glGenBuffers(1, &tex.pboID[1]);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tex.pboID[1]);
glBufferData(GL_PIXEL_UNPACK_BUFFER, 640 * 360, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
glGenTextures(1, &tex.textureID[2]);
glBindTexture(GL_TEXTURE_2D, tex.textureID[2]);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_R8, 640, 360);
glBindTexture(GL_TEXTURE_2D, 0);
glGenBuffers(1, &tex.pboID[2]);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, tex.pboID[2]);
glBufferData(GL_PIXEL_UNPACK_BUFFER, 640 * 360, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
tex.fence = 0;
availableTextures.push(tex);
}
availableTexturesCv.notify_one();
std::thread uploadThread{ uploadThreadProc };
bool bQuit = false;
while (!bQuit)
{
SDL_Event event;
while (SDL_PollEvent(&event))
{
if (event.type == SDL_QUIT)
{
bQuit = true;
break;
}
}
// grab the next texture from the ready queue
std::unique_lock<std::mutex> readyLock(readyTexturesMtx);
while (readyTextures.empty())
readyTexturesCv.wait(readyLock);
auto tex = readyTextures.front();
readyTextures.pop();
readyLock.unlock();
// make sure it's done uploading
if (tex.fence != 0)
{
glWaitSync(tex.fence, 0, GL_TIMEOUT_IGNORED);
glDeleteSync(tex.fence);
tex.fence = 0;
}
// draw it
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, tex.textureID[2]);
glUniform1i(glGetUniformLocation(program, "uTextureV"), 2);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, tex.textureID[1]);
glUniform1i(glGetUniformLocation(program, "uTextureU"), 1);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, tex.textureID[0]);
glUniform1i(glGetUniformLocation(program, "uTextureY"), 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glActiveTexture(GL_TEXTURE2);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, 0);
// let the upload thread know when it's done drawing
tex.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
// put it in the available textures queue, so the upload thread can upload new data to it
std::unique_lock<std::mutex> availLock(availableTexturesMtx);
availableTextures.push(tex);
availLock.unlock();
availableTexturesCv.notify_one();
SDL_GL_SwapWindow(mainWindow);
}
SDL_GL_DeleteContext(mainContext);
SDL_DestroyWindow(mainWindow);
SDL_Quit();
return 0;
}