I’m building a simple 2D particle “system”, I’ve recently started moving the transformations and trajectory calculations to the compute shader. For some reason it is slower than doing it on directly on the CPU. I’m new to graphics programming so any help is welcome.
Compute shader code.
layout(local_size_x = 256) in;
struct Particle
{
mat4 Transform;
vec2 Trajectory;
};
layout(std430, binding = 0) readonly buffer InParticlesBuffer
{
Particle InParticles[];
};
layout(std430, binding = 1) writeonly buffer OutTransformsBuffer
{
mat4 OutParticleTransforms[];
};
uniform mat4 ParticleEmmiterTransform;
uniform uint WindowWidth;
uniform uint WindowHeight;
uniform float ParticleScaleFactor;
vec2 CartesianToNDC(in vec2 cartesianPosition)
{
return vec2(((2.0f * cartesianPosition.x) / WindowWidth),
((2.0f * cartesianPosition.y) / WindowHeight));
};
// Matrix translation converted from glm::translate to GLSL
mat4 Translate(in mat4 inputMatrix, in vec3 translationVector)
{
mat4 result = mat4(inputMatrix);
result[3] = inputMatrix[0] * translationVector[0] + inputMatrix[1] * translationVector[1] + inputMatrix[2] * translationVector[2] + inputMatrix[3];
return result;
};
void main()
{
const Particle particle = InParticles[gl_GlobalInvocationID.x];
const vec2 ndcPosition = CartesianToNDC(particle.Trajectory) / ParticleScaleFactor;
const mat4 screenTransfrom = (Translate(ParticleEmmiterTransform, vec3(ndcPosition.x, ndcPosition.y, 0.0f))) * particle.Transform;
OutParticleTransforms[gl_GlobalInvocationID.x] = screenTransfrom;
};
GLSL Particle struct definition in C++
struct alignas(16) ComputeShaderParticle
{
glm::mat4 Transform;
glm::vec2 Trajectory;
};
How I create the SSBOs. Compute shader input buffer takes “GL_DYNAMIC_COPY”, and output buffer takes “GL_STATIC_DRAW”
glGenBuffers(1, &_bufferId);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, _bufferId);
glBufferData(GL_SHADER_STORAGE_BUFFER, bufferSizeInBytes, bufferData, usageType);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, bindIndex, _bufferId);
How I retrieve and upload the data to the Compute Shader.
(GL calls are abstracted away in actual code, but they still follow the same principal).
When I call glMapBuffer (or glSUbBuffer and similar functions) OpenGL outputs a warning stating that moving an SSBO from video memory to RAM may result in a performance penalty
_inputBuffer.get().Bind();
ComputeShaderParticle* inputBuffer = static_cast<ComputeShaderParticle*>(glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_WRITE_ONLY));
for(std::size_t i = 0; i < _numberOfParticles; i++)
{
inputBuffer[i].Trajectory = _particles[i].Trajectory;
inputBuffer[i].Transform = _particles[i].Transform;
};
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
glDispatchCompute((_numberOfParticles / 256) + 1, 1, 1);
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
_outputBuffer.get().Bind();
glm::mat4* screenTransformsBuffer = static_cast<glm::mat4*>(glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY));