Fyusenet works in OpenGL but not in GLES2 and EG

IssacXid · January 19, 2024, 10:24am

Good Afternoon,

I’ve been working with a open source library called fyusenet that runs neural network using OpenGL on a target board with PowerVR GPU.

The target board just has OpenGLES and EGL and doesn’t have OpenGL / glx / glfw / GLUT / GLU / GLVND etc. The library has a sample resnet written.

I’ve been able to run the sample resnet successfully on a host Ubuntu system with OpenGL, glx and GLVND, but in the target board the second last layer, which is defined in a file called deepgemmlayer.cpp, is causing all the outputs to go 0. I think it has something to do with how the shader is getting executed by GLES-EGL combination.

This is the deepgemmlayer’s forward function:

void DeepGEMMLayer::forward(uint64_t sequence) {
    if (!valid_) THROW_EXCEPTION_ARGS(FynException,"Trying to invoke forward() on invalid layer");
#ifdef DEBUG
    int err = glGetError();
    if (err != GL_NO_ERROR) FNLOGD("HINT: glerror on render entry: 0x%x (%s:%d)[%s]",err,__FILE__,__LINE__,getName().c_str());
#endif
    std::lock_guard<std::recursive_mutex> lck(processingLock_);
    if (outputChanged_) updateFBOs();
    glDisable(GL_DEPTH_TEST);
    glDisable(GL_STENCIL_TEST);
    glDisable(GL_CULL_FACE);
    if (tiler_->numInputTiles() <= 1) glDisable(GL_BLEND);
    else {
        glEnable(GL_BLEND);
        glBlendEquationSeparate(GL_FUNC_ADD,GL_FUNC_ADD);
        glBlendFuncSeparate(GL_ONE,GL_ONE,GL_ONE,GL_ONE);
    }
    glViewport(0, 0, viewport_[0], viewport_[1]);
    vertexArray_->bind();
    framebuffers_.at(0)->bind();
    framebuffers_.at(0)->setWriteMask();
    glClearColor(0.0f,0.0f,0.0f,0.0f);
    glClear(GL_COLOR_BUFFER_BIT);
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(GL_TEXTURE_2D,inputTextures_.at(0));
    glActiveTexture(GL_TEXTURE0+DISP_TEXTURE);
    glBindTexture(GL_TEXTURE_2D,inputCoordTexture_);
    glActiveTexture(GL_TEXTURE0+WEIGHT_TEXTURE);
    glBindTexture(GL_TEXTURE_2D,weightTexture_);
    glActiveTexture(GL_TEXTURE0+BIAS_TEXTURE);
    glBindTexture(GL_TEXTURE_2D,biasTexture_);
    if (flags_ & LayerFlags::RESIDUAL_INPUT) {
        if (residualTextures_.empty()) THROW_EXCEPTION_ARGS(FynException,"Residual flag configured, but no such texture found.");
        glActiveTexture(GL_TEXTURE1);
        glBindTexture(GL_TEXTURE_2D,residualTextures_.at(0));
    }
    if (usePoints_) {
        int instances = tiler_->numInputTiles();
        int points = tiler_->numOutputTiles();
        shader_->bind(shaderState_.get());
        shader_->setUniformValue("numInputTiles",tiler_->numInputTiles());
        glDrawArrays(GL_POINTS, 0, points);
        shader_->unbind((instances > 1) ? true : false);
        if (instances > 1) {
            noBiasShader_->bind(noBiasShaderState_.get());
            noBiasShader_->setUniformValue("numInputTiles",tiler_->numInputTiles());
            glDrawArraysInstanced(GL_POINTS, 0, points, instances-1);
            noBiasShader_->unbind();
        }
    } else {
        int instances = tiler_->numInputTiles()*kernel_;
        int tris = tiler_->numOutputTiles();
        shader_->bind(shaderState_.get());
        shader_->setUniformValue("numInputTiles",tiler_->numInputTiles());
        glDrawElements(GL_TRIANGLES,tris*6,GL_UNSIGNED_SHORT,(const GLvoid *)0);
        shader_->unbind((instances > 1) ? true : false);
        if (instances > 1) {
            noBiasShader_->bind(noBiasShaderState_.get());
            noBiasShader_->setUniformValue("numInputTiles",tiler_->numInputTiles());
            glDrawElementsInstanced(GL_TRIANGLES,tris*6,GL_UNSIGNED_SHORT,(const GLvoid *)0,instances-1);
            noBiasShader_->unbind();
        }
    }
    framebuffers_.at(0)->unbind();
    vertexArray_->unbind();
}

The vertex shader code is as follows:

precision highp float;
precision highp int;
precision highp sampler2D;

#ifdef BINDING_SUPPORT
layout(binding=4) uniform highp sampler2D inputDisplacements;
#ifdef NO_HALF
layout(binding=5) uniform sampler2D inputCoeffs;
#else
layout(binding=5) uniform highp usampler2D inputCoeffs;
#endif
#else
uniform highp sampler2D inputDisplacements;
#ifdef NO_HALF
uniform sampler2D inputCoeffs;
#else
uniform highp usampler2D inputCoeffs;
#endif
#endif

in highp vec4 attributes0;
in highp ivec2 attributes1;
in highp vec2 attributes2;

out highp vec4 texCoord;
#ifdef NO_HALF
// requires 6 varyings in total (w/ residual)
flat out vec4 layer0coeffs[4];
#else
// requires 4 varyings in total (w/ residual)
flat out highp uvec4 layer0coeffs[2];
#endif

#ifdef USE_RESIDUAL
out highp vec2 resCoord;
#endif

uniform int numInputTiles;

void main(void) {
  gl_Position = vec4(attributes0.x,attributes0.y,0.0,1.0);
  texCoord = vec4(attributes0.z,attributes0.w,0.0,0.0);
#ifdef INSTANCE_OFFSET
  int instance = gl_InstanceID + INSTANCE_OFFSET;
#else
  int instance = gl_InstanceID;
#endif
  int intile = instance % numInputTiles;
  texCoord.xy += texelFetch(inputDisplacements,ivec2(intile,0),0).rg;
#ifdef NO_HALF
  intile *= 4;
#else
  intile *= 2;
#endif
  int ybase = attributes1.x;
  // fetch weights
  layer0coeffs[0] = texelFetch(inputCoeffs,ivec2(intile,ybase),0);
  layer0coeffs[1] = texelFetch(inputCoeffs,ivec2(intile+1,ybase),0);
#ifdef NO_HALF
  layer0coeffs[2] = texelFetch(inputCoeffs,ivec2(intile+2,ybase),0);
  layer0coeffs[3] = texelFetch(inputCoeffs,ivec2(intile+3,ybase),0);
#endif
#if !defined(NO_BIAS) || defined(POST_BATCHNORM)
  if (instance == 0) {
    texCoord.z = float(attributes1.y+1);
  } else {
    texCoord.z = 0.0;
  }
#else
  texCoord.z = 0.0;
#endif
  texCoord.w = float(attributes1.y+1);
#ifdef USE_RESIDUAL
  resCoord.xy = attributes2.xy;
#endif
}

The frag shader code is:

#include "shaders/deep/convheader.inc"

#ifdef NO_HALF
// requires 6 varyings in total (w/ residual)
flat in vec4 layer0coeffs[4];
#else
// requires 4 varyings in total (w/ residual)
flat in highp uvec4 layer0coeffs[2];
#endif

#include "shaders/activation.inc"
#include "shaders/deep/batchnorm.inc"
#include "shaders/deep/computeconv.inc"
#include "shaders/deep/residual.inc"

void main(void) {
  vec2 tc = texCoord.xy;
  fragmentColor0 =  compute(texture(inputLayer0,tc),0);  
#if !defined(NO_BIAS) || defined(POST_BATCHNORM)
#ifdef POST_BATCHNORM
  fragmentColor0 = applyBN(fragmentColor0, biasTexture, ivec4(texCoord.zw,0,1));
#else
  fragmentColor0 += texelFetch(biasTexture,ivec2(int(texCoord.z),0),0);
#endif
#endif
#ifdef USE_RESIDUAL
  fragmentColor0 += residual(residualLayer0, resCoord.xy,  biasTexture, texCoord.zw);
#endif
}

The preprocessor directives and version information is added to the code later:

 #version 320 es
 #define BINDING_SUPPORT
 #define GLES
 #define DISP_UNIT 4
 #define WEIGHT_UNIT 5
 #define BIAS_UNIT 6
 #define ACT_RELU
 #define POST_BATCHNORM
 #define PIXEL_PACKING 4
 #define PADDING 0
 #define KERNEL 1
 #define DILATION 1

Can any help me find out the issue why the shaders might be behaving in such a way that the layer’s output becomes all 0?

I’ve tried to attach the critical sections that might be causing the deepgemmlayer to go bad. Apologies for the long message.

Thanks in advance for any help.