Incorrect compute shader output

I wrote a simple program using compute shader for practice, but the output is not correct. Help me find the error

void createBuffer(vk::DeviceSize size, vk::BufferUsageFlags usage, vk::MemoryPropertyFlags properties, vk::raii::Buffer& buffer, vk::raii::DeviceMemory& bufferMemory)
    {
        vk::BufferCreateInfo bufferInfo
        {
            .size = size,
            .usage = usage,
            .sharingMode = vk::SharingMode::eExclusive,
        };

        buffer = device.createBuffer(bufferInfo);

        auto memRequirements = buffer.getMemoryRequirements();

        vk::MemoryAllocateInfo allocInfo
        {
            .allocationSize = memRequirements.size,
            .memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties)
        };

        bufferMemory = device.allocateMemory(allocInfo);
        buffer.bindMemory(*bufferMemory, 0);
    }

vk::raii::CommandBuffer beginSingleTimeCommands()
    {
        vk::CommandBufferAllocateInfo allocInfo
        {
            .commandPool = *commandPool,
            .level = vk::CommandBufferLevel::ePrimary,
            .commandBufferCount = 1
        };

        vk::raii::CommandBuffer commandBuffer = std::move(device.allocateCommandBuffers(allocInfo).front());

        vk::CommandBufferBeginInfo beginInfo
        {
            .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
        };

        commandBuffer.begin(beginInfo);

        return commandBuffer;
    }

    void endSingleTimeCommands(vk::raii::CommandBuffer& commandBuffer)
    {
        commandBuffer.end();

        vk::SubmitInfo submitInfo
        {
            .commandBufferCount = 1,
            .pCommandBuffers = &(*commandBuffer)
        };

        graphicsQueue.submit(submitInfo);
        graphicsQueue.waitIdle();

        commandBuffer.clear();
    }

void compute()
    {
        std::vector<float> data = {
            0.0f, 1.0f, 0.343f, 0.5f, 34.0f, 23.0f, 21.0f
        };

        vk::raii::Buffer hostBuf = nullptr;
        vk::raii::DeviceMemory hostMem = nullptr;
        createBuffer(data.size() * sizeof(float), vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, hostBuf, hostMem);

        vk::raii::Buffer devBuf = nullptr;
        vk::raii::DeviceMemory devMem = nullptr;
        createBuffer(data.size() * sizeof(float), vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eStorageBuffer, vk::MemoryPropertyFlagBits::eDeviceLocal, devBuf, devMem);

        void* dataA = hostMem.mapMemory(0, data.size() * sizeof(float));
        memcpy(dataA, data.data(), data.size() * sizeof(float));
        hostMem.unmapMemory();

        vk::DescriptorSetLayoutBinding bind
        {
            .binding = 0,
            .descriptorType = vk::DescriptorType::eStorageBuffer,
            .descriptorCount = 1,
            .stageFlags = vk::ShaderStageFlagBits::eCompute
        };
        vk::DescriptorSetLayoutCreateInfo layInfo
        {
            .bindingCount = 1,
            .pBindings = &bind
        };

        vk::raii::DescriptorSetLayout lay(device, layInfo);

        vk::DescriptorPoolSize poolSize
        {
            .type = vk::DescriptorType::eStorageBuffer,
            .descriptorCount = 1
        };

        vk::DescriptorPoolCreateInfo poolInfo
        {
            .maxSets = 1,
            .poolSizeCount = 1,
            .pPoolSizes = &poolSize,        
        };

        vk::raii::DescriptorPool pool(device, poolInfo);

        vk::DescriptorSetAllocateInfo alloc
        {
            .descriptorPool = *pool,
            .descriptorSetCount = 1,
            .pSetLayouts = &(*lay),
        };

        auto sets = (*device).allocateDescriptorSets(alloc);

        vk::DescriptorBufferInfo bufInfo
        {
            .buffer = *devBuf,
            .offset = 0,
            .range = data.size() * sizeof(float)
        };

        vk::WriteDescriptorSet write
        {
            .dstSet = sets[0],
            .dstBinding = 0,
            .dstArrayElement = 0,
            .descriptorCount = 1,
            .descriptorType = vk::DescriptorType::eStorageBuffer,
            .pBufferInfo = &bufInfo
        };

        device.updateDescriptorSets(write, nullptr);

        auto code = readFile("spv/compute.spv");
        auto modu = createShaderModule(code);

        vk::PipelineShaderStageCreateInfo stageInfo
        {
            .stage = vk::ShaderStageFlagBits::eCompute,
            .module = *modu, 
            .pName = "main"
        };

        vk::PipelineLayoutCreateInfo pipelayInfo
        {
            .setLayoutCount = 1,
            .pSetLayouts = &(*lay)
        };

        vk::raii::PipelineLayout pipeLay(device, pipelayInfo);

        vk::ComputePipelineCreateInfo pipInfo
        {
            .stage = stageInfo,
            .layout = *pipeLay,
            .basePipelineIndex = -1
        };

        vk::raii::Pipeline comp(device, nullptr, pipInfo);

        auto com1 = beginSingleTimeCommands();

        vk::BufferCopy cop
        {
            .srcOffset = 0,
            .dstOffset = 0,
            .size = data.size() * sizeof(float)
        };

        com1.copyBuffer(*hostBuf, *devBuf, cop);

        vk::BufferMemoryBarrier bar
        {
            .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
            .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
            .buffer = *devBuf,
            .offset = 0,
            .size = data.size() * sizeof(float)
        };

        com1.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eComputeShader, {}, {}, bar, {});

        com1.bindPipeline(vk::PipelineBindPoint::eCompute, *comp);
        com1.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeLay, 0, sets[0], nullptr);
        com1.dispatch(data.size(), 1, 1);

        bar.srcAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite;
        bar.dstAccessMask = vk::AccessFlagBits::eTransferRead;
        bar.buffer = *devBuf;
        com1.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eTransfer, {}, {}, bar, {});

        com1.copyBuffer(*devBuf, *hostBuf, cop);

        endSingleTimeCommands(com1);

        void* comData = hostMem.mapMemory(0, data.size() * sizeof(float));

        std::cout << "DATA:\n";
        for (size_t i = 0; i < data.size(); ++i)
        {
            std::cout << static_cast<float*>(comData)[i] << "\n";
        }
    }

Shader code:

#version 450

layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;

layout(std140, binding = 0) buffer Buf {
    float buf[];
};

void main()
{
    uint index = gl_GlobalInvocationID.z;
    buf[index] = 1.0f;
}

Program output

DATA:
1
1
0.343
0.5
34
23
21

graphicsQueue supports compute operations

You dispatch with x but index with z.

1 Like

The output has changed, but it’s still not what was expected

DATA:
1
1
0.343
0.5
1
23
21

If you want to have an array of unpadded (!) floats in your buffer, you need to use std430, instead of std140 as layout.

PS: you probably have your default dispatcher initialized? Otherwise

auto sets = (*device).allocateDescriptorSets(alloc);

would have miserably failed. You know, that this isn’t needed with the vk::raii handles?

1 Like

Thank you.

yes. Also validation layers complain about the destruction of vk::raii::DescriptorSet

Yes, that’s a known issue. You need to explicitly use

vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet

on creation of the vk::raii::DescriptorPool.
Unfortunately, that can’t be generated automatically, but maybe I’ll add some special handling for that.

1 Like

This topic was automatically closed 183 days after the last reply. New replies are no longer allowed.