Data alignment in shader

xphere · November 11, 2020, 3:51pm

Hi all!

I have an strange issue, while migrating my code from OpenGL3.3 to DSA buffers, which i think is related to data alignment in the shaders.
Let me summarize:
I have the following VS (fragment is not really relevant as of now), its a simple shader that draws an animated 3D model with Bones data.

#version 330 core
#define BONES_PER_VERTEX 4
#define BONES_SUPPORT

layout (location = 0) in vec3 Position;
layout (location = 1) in vec3 Normal;
layout (location = 2) in vec2 aTexCoords;
layout (location = 3) in vec3 Tangent;
layout (location = 4) in vec3 BiTangent;
layout (location = 5) in uint BoneIDs[];
layout (location = 6) in float Weights[];

const int MAX_BONES = 100;
uniform mat4 gBones[MAX_BONES];

uniform mat4 model;
uniform mat4 view;
uniform mat4 projection;

out vec2 TexCoords;

void main()
{
	TexCoords = aTexCoords;

	vec4 Pos = vec4(Position, 1.0);
#ifdef BONES_SUPPORT
	mat4 BoneTransform = gBones[BoneIDs[0]] * Weights[0];
    BoneTransform += gBones[BoneIDs[1]] * Weights[1];
    BoneTransform += gBones[BoneIDs[2]] * Weights[2];
    BoneTransform += gBones[BoneIDs[3]] * Weights[3];
	Pos = BoneTransform * Pos;
#endif

	
	gl_Position = projection * view * model * Pos;
}

The data source comes from this data structure:

#define NUM_BONES_PER_VERTEX	4 // Number of Bones per Vertex
    
struct VertexBoneData
    {
    	GLuint IDs[NUM_BONES_PER_VERTEX];
    	GLfloat Weights[NUM_BONES_PER_VERTEX];

	VertexBoneData() { Reset(); }
	
	void Reset()
	{
		for (unsigned int i = 0; i < NUM_BONES_PER_VERTEX; ++i)
		{
			IDs[i] = 0;
			Weights[i] = 0;
		}
	}

	void AddBoneData(unsigned int BoneID, float Weight);
};

struct Vertex {
	glm::vec3		Position = { 0.0f, 0.0f, 0.0f };
	glm::vec3		Normal = { 0.0f, 0.0f, 0.0f };
	glm::vec2		TexCoords = { 0.0f, 0.0f };
	glm::vec3		Tangent = { 0.0f, 0.0f, 0.0f };
	glm::vec3		Bitangent = { 0.0f, 0.0f, 0.0f };
	VertexBoneData	Bone;
};

And, the “old” OpenGL code (opengl 3.X), used to work fine, with this buffer creation:

    // Create VAO
    glGenVertexArrays(1, &m_VAO);
    glBindVertexArray(m_VAO);

    // Load data into vertex buffers
    glGenBuffers(1, &m_vertexBuffer);
    glBindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
    glBufferData(GL_ARRAY_BUFFER, m_vertices.size() * sizeof(Vertex), &m_vertices[0], GL_STATIC_DRAW);

    // Load data into index buffers (element buffer)
    glGenBuffers(1, &m_indicesBuffer);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indicesBuffer);
    glBufferData(GL_ELEMENT_ARRAY_BUFFER, m_indices.size() * sizeof(unsigned int), &m_indices[0], GL_STATIC_DRAW);

    // Set the vertex attribute pointers
    // vertex Positions
    glEnableVertexAttribArray(POSITION_LOCATION);
    glVertexAttribPointer(POSITION_LOCATION, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)0);
    // vertex normals
    glEnableVertexAttribArray(NORMAL_LOCATION);
    glVertexAttribPointer(NORMAL_LOCATION, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, Normal));
    // vertex texture coords
    glEnableVertexAttribArray(TEX_COORD_LOCATION);
    glVertexAttribPointer(TEX_COORD_LOCATION, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, TexCoords));
    // vertex tangent
    glEnableVertexAttribArray(TANGENT_LOCATION);
    glVertexAttribPointer(TANGENT_LOCATION, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, Tangent));
    // vertex bitangent
    glEnableVertexAttribArray(BITANGENT_LOCATION);
    glVertexAttribPointer(BITANGENT_LOCATION, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, Bitangent));

    // Bone Vertex ID's as Unsigned INT
    glEnableVertexAttribArray(BONE_ID_LOCATION);
    glVertexAttribIPointer(BONE_ID_LOCATION, 4, GL_UNSIGNED_INT, sizeof(Vertex), (void*)(offsetof(Vertex, Bone) + offsetof(VertexBoneData, IDs)));
    
    // Bone Vertex Weights
    glEnableVertexAttribArray(BONE_WEIGHT_LOCATION);
    glVertexAttribPointer(BONE_WEIGHT_LOCATION, 4, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)(offsetof(Vertex, Bone) + offsetof(VertexBoneData, Weights)));

    glBindVertexArray(0);

But now I have migrated the code above to OpenGL 4.5 with DSA, and some of the data is not properly passed to the shader. As far as I have seen the “BoneID’s” and “Weights” locations are not properly received in the shader, and I think it might be due to data packing.

The new buffer creation code is encapuslated in a Class that looks like:

#include "main.h"
#include "VertexArray.h"

static GLenum ShaderDataTypeToOpenGLBaseType(ShaderDataType type)
{
	switch (type)
	{
	case ShaderDataType::Float:    return GL_FLOAT;
	case ShaderDataType::Float2:   return GL_FLOAT;
	case ShaderDataType::Float3:   return GL_FLOAT;
	case ShaderDataType::Float4:   return GL_FLOAT;
	case ShaderDataType::Int:      return GL_INT;
	case ShaderDataType::Int2:     return GL_INT;
	case ShaderDataType::Int3:     return GL_INT;
	case ShaderDataType::Int4:     return GL_INT;
	case ShaderDataType::Bool:     return GL_BOOL;
	}

	Logger::error("Unknown ShaderDataType");
	return 0;
}

VertexArray::VertexArray()
	:
	m_IndexBuffer(nullptr)
{
	glCreateVertexArrays(1, &m_RendererID);
}

VertexArray::~VertexArray()
{
	if (m_RendererID!=0)
		glDeleteVertexArrays(1, &m_RendererID);
}

void VertexArray::Bind() const
{
	glBindVertexArray(m_RendererID);
}

void VertexArray::Unbind() const
{
	glBindVertexArray(0);
}

void VertexArray::AddVertexBuffer(VertexBuffer* vertexBuffer)
{
	const auto& layout = vertexBuffer->GetLayout();

	glVertexArrayVertexBuffer(m_RendererID, 0, vertexBuffer->GetBufferID(), 0, layout.GetStride());

	for (const auto& element : layout)
	{
		switch (element.Type)
		{
		case ShaderDataType::Float:
		case ShaderDataType::Float2:
		case ShaderDataType::Float3:
		case ShaderDataType::Float4:
		{
			glEnableVertexArrayAttrib(m_RendererID, m_VertexBufferIndex);
			glVertexArrayAttribFormat(m_RendererID, m_VertexBufferIndex,
				element.GetComponentCount(),
				ShaderDataTypeToOpenGLBaseType(element.Type),
				element.Normalized ? GL_TRUE : GL_FALSE,
				element.Offset);
			glVertexArrayAttribBinding(m_RendererID, m_VertexBufferIndex, 0);	// remove Binding hardcoded to 0
			
			m_VertexBufferIndex++;
			break;
		}
		case ShaderDataType::Int:
		case ShaderDataType::Int2:
		case ShaderDataType::Int3:
		case ShaderDataType::Int4:
		case ShaderDataType::Bool:
		{
			glEnableVertexArrayAttrib(m_RendererID, m_VertexBufferIndex);
			glVertexArrayAttribIFormat(m_RendererID, m_VertexBufferIndex,
				element.GetComponentCount(),
				ShaderDataTypeToOpenGLBaseType(element.Type),
				element.Offset);
			glVertexArrayAttribBinding(m_RendererID, m_VertexBufferIndex, 0);	// remove Binding hardcoded to 0
			
			m_VertexBufferIndex++;
			break;   		
		}
		default:
			Logger::error("Unknown ShaderDataType");
		}
	}


	m_VertexBuffers.push_back(vertexBuffer);
	glBindVertexArray(0);
}

void VertexArray::SetIndexBuffer(IndexBuffer* indexBuffer)
{
	glVertexArrayElementBuffer(m_RendererID, indexBuffer->GetBufferID());
	m_IndexBuffer = indexBuffer;
}

And the .h:

#include <memory>
#include "core/renderer/Buffer.h"

class VertexArray
{
public:
	VertexArray();
	virtual ~VertexArray();

	virtual void Bind() const;
	virtual void Unbind() const;

	virtual void AddVertexBuffer(VertexBuffer* vertexBuffer);
	virtual void SetIndexBuffer(IndexBuffer* indexBuffer);

	virtual const std::vector<VertexBuffer*>& GetVertexBuffers() const { return m_VertexBuffers; }
	virtual const IndexBuffer* GetIndexBuffer() const { return m_IndexBuffer; }

private:
	uint32_t					m_RendererID;				// Our "VAO"
	uint32_t					m_VertexBufferIndex = 0;
	std::vector<VertexBuffer*>	m_VertexBuffers;
	IndexBuffer*				m_IndexBuffer;
};

And the VAO setup is done as follows:

// Allocate Vertex Array
m_VertexArray = new VertexArray();
// Create & Load the Vertex Buffer
	VertexBuffer *vertexBuffer = new VertexBuffer(&m_vertices[0], static_cast<uint32_t>(m_vertices.size() * sizeof(Vertex)));
	vertexBuffer->SetLayout({
		{ ShaderDataType::Float3,	"aPos"},
		{ ShaderDataType::Float3,	"aNormal"},
		{ ShaderDataType::Float2,	"aTexCoords"},
		{ ShaderDataType::Float3,	"aTangent"},
		{ ShaderDataType::Float3,	"aBiTangent"},
		{ ShaderDataType::Int4,		"aBoneID"},
		{ ShaderDataType::Float4,	"aBoneWeight"}
		});

	m_VertexArray->AddVertexBuffer(vertexBuffer);

	// Create & Load the Index Buffer
	IndexBuffer* indexBuffer = new IndexBuffer(&m_indices[0], static_cast<uint32_t>(m_indices.size()));
	m_VertexArray->SetIndexBuffer(indexBuffer);

	m_VertexArray->Unbind();

The Stride and offsets that I get are the following:
2020-11-11 17_02_16-Book1 - Excel

I think that maybe the Stride is wrong…, bit I don’t know how to calculate it properly… any clue?

Another important point to highlight is that in the “old” code I use to initialize the buffers as follows:

glBindBuffer(GL_ARRAY_BUFFER, m_RendererID);
glBufferData(GL_ARRAY_BUFFER, size, data, GL_STATIC_DRAW);

But in the new code, I’m using the DSA functions, like:

glNamedBufferStorage(m_RendererID, size, data, GL_MAP_READ_BIT);

Does it may have any impact?

Thanks in advance!!!

GClements · November 11, 2020, 5:47pm

88 is likely correct, although technically the compiler gets to decide padding. FWIW, a quick test here (g++ 9.3.0, Linux, amd64) reports 88 for sizeof(Vertex).

Why GL_MAP_READ_BIT? For attribute data, you’ll be writing it from the client.

xphere · November 11, 2020, 5:53pm

Thanks GClements! You are right, the GL_MAP_READ_BIT has no sense… I have corrected the buffer creation by the following, but same issue persists.

glNamedBufferStorage(m_RendererID, size, data, 0);

GClements · November 11, 2020, 8:13pm

This shouldn’t link. Each element of the array is a separate attribute with a separate location, so if you have more than one element the two will overlap. These should probably be changed to uvec4 and vec4 respectively. If you want to support more than four bones, and you don’t want to use a separate attribute per bone, you’re going to need to pack/unpack the data to/from uvec4/vec4 attributes manually in the shader.

If you do want to use separate attributes per bone, you’ll need to update the SetLayout() call to reflect that; i.e. it should be 4× Int and 4× Float, not one Int4 and one Float4.

That has nothing to do with DSA versus “currently-bound VAO/VBO” or glVertexAttribPointer versus glBindVertexBuffer + glVertexAttribBinding + glVertexAttribFormat. For vertex shader inputs (attributess), a vec4 isn’t the same as a float[4] array. The former is one attribute, the latter is four.

Silence · November 11, 2020, 8:15pm

The OP might be interested in reading this.

GClements · November 11, 2020, 8:51pm

Interface blocks cannot be used for vertex shader inputs (or fragment shader outputs). They are limited to the interfaces between stages, uniform blocks, and shader storage blocks.

xphere · November 12, 2020, 8:20am

Thanks for the reply!!!
It makes sense to pack it to uvec4 and vec4 respectively (I don’t think I’ll never use more than 4 bones), but that was also one of my tests, and I did not get the proper result, i just see a black screen

And I should see the animation as is shown here (but textured, hehe): https://www.youtube.com/watch?v=6DhJ7-6DhZE&feature=emb_title

The change that I did was to adapt the shader locations and code:

#version 330 core
#define BONES_SUPPORT

layout (location = 0) in vec3 Position;
layout (location = 1) in vec3 Normal;
layout (location = 2) in vec2 aTexCoords;
layout (location = 3) in vec3 Tangent;
layout (location = 4) in vec3 BiTangent;
layout (location = 5) in uvec4 BoneIDs;
layout (location = 6) in vec4 Weights;

const int MAX_BONES = 100;
uniform mat4 gBones[MAX_BONES];

uniform mat4 model;
uniform mat4 view;
uniform mat4 projection;

out vec2 TexCoords;

void main()
{
	TexCoords = aTexCoords;

	vec4 Pos = vec4(Position, 1.0);
#ifdef BONES_SUPPORT
	mat4 BoneTransform = gBones[BoneIDs.x] * Weights.x;
    BoneTransform += gBones[BoneIDs.y] * Weights.y;
    BoneTransform += gBones[BoneIDs.z] * Weights.z;
    BoneTransform += gBones[BoneIDs.w] * Weights.w;
	Pos = BoneTransform * Pos;
#endif

	
	gl_Position = projection * view * model * Pos;
}

If I use glVertexArrayAttribFormat instead of glVertexArrayAttribIFormat for sending the uvec4 value, I can see “something” (see image below), but I think the format should be sent with the glVertexArrayAttribIFormat, since we are sending unsigned integers.

I have checked alto the “gBones” matrices, and are sent properly…

I am missing something?

GClements · November 12, 2020, 5:04pm

This may exceed the value of GL_MAX_VERTEX_UNIFORM_COMPONENTS which is only required to be at least 1024 (100 mat4s is 1600 components). That’s for 4.6; previous versions may have a lower requirement. If you need more uniform storage than is available, use a SSBO (requires 4.3 or the ARB_shader_storage_buffer_object extension), or a texture.

glVertexArrayAttribIFormat is correct for an integer attribute. Using glVertexArrayAttribFormat for an integer attribute (i.e. where the variable is int, uint, ivec* or uvec*) is undefined behaviour; typically it will convert it to float then perform the equivalent of floatBitsToInt.

xphere · November 12, 2020, 5:12pm

I have done a glGet on GL_MAX_VERTEX_UNIFORM_COMPONENTS and I get 4096 value, eventhough, I have reduced MAX_BONES to 32 (I have just 32 bones in this model), but still the same results…

xphere · November 12, 2020, 5:52pm

Ok, i made it work and the solution was… change the PC!!! I was trying initially with a intel integrated card, but I have tried the same code with an nvidia, and runs perfectly!!!

I will update the intel drivers and try again, but In my tests, I think that the current driver that I have does not process properly the values sent to the shader with the format specified with glVertexArrayAttribIFormat.

Amazing… simply amazing…

EDIT: I made another test, not changing the shader, and just updating the code of the vertex definition (instead of using DSA buffers, going back to the “old” code [using glVertexAttribIPointer]), and the animation runs perfectly… so… my assumption is that intel drivers (i have a shitty intel 520) have a bug on the DSA function… or maybe I’m missing something!

More info: OpenGL » Driver workarounds | Magnum C++ docs

Silence · November 12, 2020, 8:36pm

From what I know nVidia drivers are more lax than their AMD / Intel counterparts. But also, it seems your card might not support GL 4.5, or as you stated, drivers for DSA might not be ready (beta driver ?)

source1
source2

So did you check that ARB_direct_state_access is available ? And you might also read the drivers review of your intel card.

xphere · November 12, 2020, 9:26pm

Yes, intel 520 supports openGL 4.5 (or at least is what it says the driver properties screen).
I will check ARB_direct_state_access, just to doublecheck…

Thanks!!!

xphere · November 13, 2020, 8:40am

Confirmed, the getString returns:

OpenGL driver version: 4.5.0 - Build 23.20.16.4877
OpenGL driver vendor: Intel
OpenGL driver renderer: Intel(R) HD Graphics 520
GLFW library version: 3.3.2 Win32 WGL EGL OSMesa VisualC

So… if driver supports 4.5, it should support DSA

GClements · November 13, 2020, 5:40pm

Assuming it is a driver bug, you can work around it by changing BoneIDs to a vec4 and casting to uvec4 in the vertex shader. But you’ll need to change AddVertexBuffer to either use glVertexArrayAttribFormat for integers, which would be a problem if you actually need to use integer attributes elsewhere (i.e. if you need the full 32-bit integer range), or choose between glVertexArrayAttribFormat and glVertexArrayAttribIFormat based upon the variable type (which can be queried with glGetActiveAttrib).

xphere · November 14, 2020, 8:29am

Thanks! I have tried sending bones as vec4 and casting the values to int with ivec4 BoneID = floatBitsToInt(BoneIDs); and works fine, but as you said, using glVertexArrayAttribFormat always is not ideal, I will need int’s for sure.

But I don’t understand this:

What do you mean? I already have a switch statement which chooses between one or another depending on the data type.

Thanks!!

GClements · November 14, 2020, 6:32pm

I wasn’t suggesting using floatBitsToInt. I was suggesting to pass the data using glVertexArrayAttribFormat with type equal to GL_INT and normalized equal to GL_FALSE (which will convert the values to floating-point) then converting back to integers with e.g.

layout (location = 5) in vec4 BoneIDsF;
...
    uvec4 BoneIDs = uvec4(BoneIDsF);

Passing integer data with type equal to GL_FLOAT (so that no conversion is performed) then converting with floatBitsToInt (i.e. a reinterpreting cast rather than a value cast) will typically work but is undefined behaviour per the specification.

Which function is appropriate depends upon the type of the shader variable, not the data. You can pas an array of integers to a floating-point attribute by using glVertexArrayAttribFormat with type equal to GL_INT and normalized equal to GL_FALSE. If you’re having trouble passing integers as integers with the Intel driver, this may be an appropriate solution. But that requires looking at the type of the shader variable; integer data could be used for either integer or floating-point attributes.

xphere · November 14, 2020, 8:02pm

Cristal clear CGlements! Thanks a lot!!!

system · October 19, 2021, 7:09pm

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.