Unexpected 3D texture interpolation results

I am modifying an existing OpenGL-based image player to accept 3D display LUTs. I figured this would be relatively easy using 3D textures. However I wrote a test program which is not behaving as expected, and I am completely baffled. I synthesize a 256x256 image that contains a monochrome gradient where each pixel contains its x coordinate in each of the RGB components, then load that into a 2D texture. I then create a 3D texture that I believe is a 2x2x2 identity LUT. I draw the pixels, applying the LUT in the fragment shader, then read the pixels back from the framebuffer. I expected to get back the same pixels that I sent up in the image texture, and this is true for the R and B channels. But, very strangely, the G channel does not match, and the result is non-linear! What am I doing wrong?

The program:

#include <iostream>
#include <iomanip>
#include <GL/glew.h>
#include <GLFW/glfw3.h>
GLFWwindow* window;
const unsigned int imageWidth = 256;
const unsigned int imageHeight = 256;
GLubyte imageData[imageHeight][imageWidth][3];

const GLchar* vertexSource = R"glsl(
    #version 450 core
    in vec2 position;
    in vec2 texCoord;
    out vec2 fragTexCoord;
    void main()
    {
        fragTexCoord = texCoord;
        gl_Position = vec4(position, 0.0, 1.0);
    }
)glsl";

const GLchar* fragmentSource = R"glsl(
    #version 450 core
    in vec2 fragTexCoord;
    out vec4 outColor;
    uniform sampler2D imageTextureUnit;
    uniform sampler3D lutTextureUnit;
    uniform float lutInterpScale;
    uniform float lutInterpOffset;
    void main()
    {
        vec4 srcColor = texture(imageTextureUnit, fragTexCoord);
        vec3 lutIn = vec3(srcColor.rgb * lutInterpScale + lutInterpOffset);
        outColor = texture(lutTextureUnit, lutIn);
    }
)glsl";

int main(void)
{
    glfwInit();
    glfwWindowHint(GLFW_SAMPLES, 4);
    glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
    glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE); // To make MacOS happy; should not be needed
    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
    window = glfwCreateWindow(imageWidth, imageHeight, "3D Lut Test", NULL, NULL);
    glfwMakeContextCurrent(window);

    glewExperimental = true; // Needed for core profile
    glewInit();

    GLuint vertexArrayID, vertexBufferID, elementBufferID;
    glGenVertexArrays(1, &vertexArrayID);
    glBindVertexArray(vertexArrayID);

    static GLfloat vertexBufferData[] =
    {
        //  VERTEX POS    TEXTURE COORD
        -1.0f,  1.0f,    0.0f,  1.0f,     //  top left
         1.0f,  1.0f,    1.0f,  1.0f,     //  top right
         1.0f, -1.0f,    1.0f,  0.0f,     //  bottom right
        -1.0f, -1.0f,    0.0f,  0.0f,     //  bottom left
    };
    glGenBuffers(1, &vertexBufferID);
    glBindBuffer(GL_ARRAY_BUFFER, vertexBufferID);
    glBufferData(GL_ARRAY_BUFFER, sizeof(vertexBufferData), vertexBufferData, GL_STATIC_DRAW);

    static unsigned int triangleIndices[2][3] =
    {
        {0,  1,  2}, // first image triangle
        {2,  3,  0}, // second image triangle
    };
    glGenBuffers(1, &elementBufferID);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, elementBufferID);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(triangleIndices), triangleIndices, GL_STATIC_DRAW);

    // Create an 8-bit RGB test image - each row is a monochrome 0-256 gradient.
    for (auto row = 0; row < imageHeight; ++row)
    {
        for (auto col = 0; col < imageWidth; ++col)
        {
            imageData[row][col][0] = col;  // r
            imageData[row][col][1] = col;  // g
            imageData[row][col][2] = col;  // b
        }
    }

    // Image texture
    GLuint imageTextureID;
    glGenTextures(1, &imageTextureID);
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D, imageTextureID);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
    glPixelStorei(GL_UNPACK_ROW_LENGTH, imageWidth);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, imageWidth, imageHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, imageData);

    // Set up 2x2x2 identity 3D LUT texture
    const int lutSize = 2;
    static GLfloat lutValues[8][3] =
    {
      {0.00f, 0.00f, 0.00f},
      {1.00f, 0.00f, 0.00f},  
      {0.00f, 1.00f, 0.00f},
      {1.00f, 1.00f, 0.00f},
      {0.00f, 0.00f, 1.00f},
      {1.00f, 0.00f, 1.00f},
      {0.00f, 1.00f, 1.00f},
      {1.00f, 1.00f, 1.00f},
    };
    GLuint lutTextureId;
    glGenTextures(1, &lutTextureId);
    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_3D, lutTextureId);
    glTexParameterf(GL_TEXTURE_3D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTexParameterf(GL_TEXTURE_3D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTexParameterf(GL_TEXTURE_3D, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
    glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
    glPixelStorei(GL_UNPACK_ROW_LENGTH, 2);
    glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, 2, 2, 2, 0, GL_RGB, GL_FLOAT, lutValues);

    // Create the shader program
    GLuint vertexShader = glCreateShader(GL_VERTEX_SHADER);
    glShaderSource(vertexShader, 1, &vertexSource, NULL);
    glCompileShader(vertexShader);
    GLuint fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
    glShaderSource(fragmentShader, 1, &fragmentSource, NULL);
    glCompileShader(fragmentShader);
    GLuint shaderProgram = glCreateProgram();
    glAttachShader(shaderProgram, vertexShader);
    glAttachShader(shaderProgram, fragmentShader);
    glLinkProgram(shaderProgram);
    glUseProgram(shaderProgram);

    // Set up attributes, uniforms, and fragment output.
    GLint posAttrib = glGetAttribLocation(shaderProgram, "position");
    glEnableVertexAttribArray(posAttrib);
    glVertexAttribPointer(posAttrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat), 0);
    GLint texAttrib = glGetAttribLocation(shaderProgram, "texCoord");
    glEnableVertexAttribArray(texAttrib);
    glVertexAttribPointer(texAttrib, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(GLfloat), (void*)(2 * sizeof(GLfloat)));
    GLuint imageTextureUniform = glGetUniformLocation(shaderProgram, "imageTextureUnit");
    glUniform1i(imageTextureUniform, 0);  // image --> texture unit 0
    GLuint lutTextureUniform = glGetUniformLocation(shaderProgram, "lutTextureUnit");
    glUniform1i(lutTextureUniform, 1);    // lut --> texture unit 1
    GLuint lutInterpScaleUniform = glGetUniformLocation(shaderProgram, "lutInterpScale");
    glUniform1f(lutInterpScaleUniform, (lutSize - 1.0F) / lutSize);
    GLuint lutInterpOffsetUniform = glGetUniformLocation(shaderProgram, "lutInterpOffset");
    glUniform1f(lutInterpOffsetUniform, 1.0F / (2.0F * lutSize));
    glBindFragDataLocation(shaderProgram, 0, "outColor");

    // Draw
    glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0);
    glfwSwapBuffers(window);

    // Read back one row of the frame buffer image and compare with orig image
    unsigned char readBuffer[256][3];
    glReadPixels(0, 0, 256, 1, GL_RGB, GL_UNSIGNED_BYTE, readBuffer);
    std::cout << std::endl << " IN  dR dG dB";
    for (auto i = 0; i < 256; ++i)
    {
        std::cout << std::endl << std::setw(3) << i << ":";
        for (auto j = 0; j < 3; ++j)
        {
            int diff = int(readBuffer[i][j]) - int(imageData[0][i][j]);
            std::cout << " " << std::setw(2) << diff;
        }
    }
    std::cout << std::endl;

    while (glfwWindowShouldClose(window) == 0)
    {
        glfwWaitEvents();
    }

    glfwTerminate();
    return 0;
}

The output:

 IN  dR dG dB
  0:  0  0  0
  1:  0  0  0
  2:  0  0  0
  3:  0  0  0
  4:  0  0  0
  5:  0  0  0
  6:  0  0  0
  7:  0  0  0
  8:  0 -1  0
  9:  0 -1  0
 10:  0 -1  0
 11:  0 -1  0
 12:  0  1  0
 13:  0  1  0
 14:  0  1  0
 15:  0  0  0
 16:  0  0  0
 17:  0  0  0
 18:  0  0  0
 19:  0 -1  0
 20:  0 -1  0
 21:  0  1  0
 22:  0  0  0
 23:  0  0  0
 24:  0  0  0
 25:  0 -1  0
 26:  0 -1  0
 27:  0  1  0
 28:  0  0  0
 29:  0  0  0
 30:  0 -1  0
 31:  0 -1  0
 32:  0  0  0
 33:  0  1  0
 34:  0  1  0
 35:  0  0  0
 36:  0  0  0
 37:  0  1  0
 38:  0  1  0
 39:  0  0  0
 40:  0 -1  0
 41:  0 -1  0
 42:  0  0  0
 43:  0  0  0
 44:  0 -1  0
 45:  0  1  0
 46:  0  0  0
 47:  0  0  0
 48:  0  0  0
 49:  0  1  0
 50:  0  0  0
 51:  0  0  0
 52:  0 -1  0
 53:  0  0  0
 54:  0  0  0
 55:  0  0  0
 56:  0  1  0
 57:  0  1  0
 58:  0  0  0
 59:  0 -1  0
 60:  0  0  0
 61:  0  1  0
 62:  0  0  0
 63:  0  1  0
 64:  0  0  0
 65:  0 -1  0
 66:  0  0  0
 67:  0  1  0
 68:  0  0  0
 69:  0 -1  0
 70:  0  0  0
 71:  0  0  0
 72:  0  1  0
 73:  0  0  0
 74:  0 -1  0
 75:  0  0  0
 76:  0  1  0
 77:  0  0  0
 78:  0 -1  0
 79:  0  0  0
 80:  0  0  0
 81:  0 -1  0
 82:  0  0  0
 83:  0  0  0
 84:  0 -1  0
 85:  0  0  0
 86:  0  0  0
 87:  0 -1  0
 88:  0  0  0
 89:  0  0  0
 90:  0 -1  0
 91:  0  0  0
 92:  0  0  0
 93:  0 -1  0
 94:  0  1  0
 95:  0 -1  0
 96:  0  0  0
 97:  0  1  0
 98:  0  1  0
 99:  0  1  0
100:  0  0  0
101:  0  0  0
102:  0 -1  0
103:  0  0  0
104:  0  0  0
105:  0  0  0
106:  0  0  0
107:  0  0  0
108:  0 -1  0
109:  0  1  0
110:  0  0  0
111:  0  0  0
112:  0  0  0
113:  0  0  0
114:  0  0  0
115:  0 -1  0
116:  0  1  0
117:  0 -1  0
118:  0  0  0
119:  0  1  0
120:  0  0  0
121:  0  0  0
122:  0  0  0
123:  0  0  0
124:  0  0  0
125:  0  0  0
126:  0  0  0
127:  0  0  0
128:  0  0  0
129:  0  0  0
130:  0  0  0
131:  0  0  0
132:  0  0  0
133:  0  0  0
134:  0  0  0
135:  0  0  0
136:  0 -1  0
137:  0  0  0
138:  0  1  0
139:  0 -1  0
140:  0  1  0
141:  0  0  0
142:  0  0  0
143:  0  0  0
144:  0  0  0
145:  0  0  0
146:  0 -1  0
147:  0  1  0
148:  0  0  0
149:  0  0  0
150:  0  0  0
151:  0  0  0
152:  0  0  0
153:  0  1  0
154:  0  0  0
155:  0  0  0
156:  0 -1  0
157:  0 -1  0
158:  0 -1  0
159:  0  0  0
160:  0  1  0
161:  0 -1  0
162:  0  1  0
163:  0  0  0
164:  0  0  0
165:  0  1  0
166:  0  0  0
167:  0  0  0
168:  0  1  0
169:  0  0  0
170:  0  0  0
171:  0  1  0
172:  0  0  0
173:  0  0  0
174:  0  1  0
175:  0  0  0
176:  0  0  0
177:  0  1  0
178:  0  0  0
179:  0 -1  0
180:  0  0  0
181:  0  1  0
182:  0  0  0
183:  0 -1  0
184:  0  0  0
185:  0  0  0
186:  0  1  0
187:  0  0  0
188:  0 -1  0
189:  0  0  0
190:  0  1  0
191:  0  0  0
192:  0 -1  0
193:  0  0  0
194:  0 -1  0
195:  0  0  0
196:  0  1  0
197:  0  0  0
198:  0 -1  0
199:  0 -1  0
200:  0  0  0
201:  0  0  0
202:  0  0  0
203:  0  1  0
204:  0  0  0
205:  0  0  0
206:  0 -1  0
207:  0  0  0
208:  0  0  0
209:  0  0  0
210:  0 -1  0
211:  0  1  0
212:  0  0  0
213:  0  0  0
214:  0  1  0
215:  0  1  0
216:  0  0  0
217:  0 -1  0
218:  0 -1  0
219:  0  0  0
220:  0  0  0
221:  0 -1  0
222:  0 -1  0
223:  0  0  0
224:  0  1  0
225:  0  1  0
226:  0  0  0
227:  0  0  0
228:  0 -1  0
229:  0  1  0
230:  0  1  0
231:  0  0  0
232:  0  0  0
233:  0  0  0
234:  0 -1  0
235:  0  1  0
236:  0  1  0
237:  0  0  0
238:  0  0  0
239:  0  0  0
240:  0  0  0
241:  0 -1  0
242:  0 -1  0
243:  0 -1  0
244:  0  1  0
245:  0  1  0
246:  0  1  0
247:  0  1  0
248:  0  0  0
249:  0  0  0
250:  0  0  0
251:  0  0  0
252:  0  0  0
253:  0  0  0
254:  0  0  0
255:  0  0  0

Does changing the internal format to GL_RGB8 have any effect?

The only reason I can imagine for G being different to R and B is if the effective internal format was GL_RGB565.

No, I get the same result for any format that is based on GL_RGB, or GL_RGBA.

OK, I played around with this some. In terms of the input, I found that the errors in the green channel are directly affected by the image values in the blue channel. If I zero out the blue channel, there are no errors in the green, which I guess makes sense because 2D interpolation is probably solid. I changed the program to build an NxNxN identity LUT instead of hardwiring it to 2x2x2 and I found that the errors change with the sizes and there were fewer errors the more points you have on a side. When I got to 9x9x9 there were no errors. So… my conclusion here is that nVidia is using a crappy 3D interpolation algorithm, and I will avoid making 3D LUTs that are smaller than 16x16x16.

This is pretty interesting.

For what it’s worth, I tried your test program and got the same results as you. A nice, smooth gradient, but G being off by 1/256 from the R and B for some reason. It made no difference whether I converted the 3D texture form GL_RGB8 to GL_RGB32F or not (or the 2D texture as well for that matter). The underlying internal format isn’t the issue (or any interpolation behavior tied to that internal format), nor is the content of the 3D texture’s texels.

To eliminate the float-to-8-bit-normalized conversion (performed when writing the frag shader color output to the probably 8-bit-per-channel window) as a factor, you might try 1) redirecting your rendering to an FBO with an RGBA32F render target and 2) reading the color values back from that instead in full-float. Alternatively, I guess you could try scaling up the output color values by 4, possibly after subtracting the red, just to prove that the 1-off discrepancy in G isn’t at all related to the fixed-point color conversion.