I wrote a simple program using compute shader for practice, but the output is not correct. Help me find the error
void createBuffer(vk::DeviceSize size, vk::BufferUsageFlags usage, vk::MemoryPropertyFlags properties, vk::raii::Buffer& buffer, vk::raii::DeviceMemory& bufferMemory)
{
vk::BufferCreateInfo bufferInfo
{
.size = size,
.usage = usage,
.sharingMode = vk::SharingMode::eExclusive,
};
buffer = device.createBuffer(bufferInfo);
auto memRequirements = buffer.getMemoryRequirements();
vk::MemoryAllocateInfo allocInfo
{
.allocationSize = memRequirements.size,
.memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties)
};
bufferMemory = device.allocateMemory(allocInfo);
buffer.bindMemory(*bufferMemory, 0);
}
vk::raii::CommandBuffer beginSingleTimeCommands()
{
vk::CommandBufferAllocateInfo allocInfo
{
.commandPool = *commandPool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = 1
};
vk::raii::CommandBuffer commandBuffer = std::move(device.allocateCommandBuffers(allocInfo).front());
vk::CommandBufferBeginInfo beginInfo
{
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
commandBuffer.begin(beginInfo);
return commandBuffer;
}
void endSingleTimeCommands(vk::raii::CommandBuffer& commandBuffer)
{
commandBuffer.end();
vk::SubmitInfo submitInfo
{
.commandBufferCount = 1,
.pCommandBuffers = &(*commandBuffer)
};
graphicsQueue.submit(submitInfo);
graphicsQueue.waitIdle();
commandBuffer.clear();
}
void compute()
{
std::vector<float> data = {
0.0f, 1.0f, 0.343f, 0.5f, 34.0f, 23.0f, 21.0f
};
vk::raii::Buffer hostBuf = nullptr;
vk::raii::DeviceMemory hostMem = nullptr;
createBuffer(data.size() * sizeof(float), vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, hostBuf, hostMem);
vk::raii::Buffer devBuf = nullptr;
vk::raii::DeviceMemory devMem = nullptr;
createBuffer(data.size() * sizeof(float), vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eStorageBuffer, vk::MemoryPropertyFlagBits::eDeviceLocal, devBuf, devMem);
void* dataA = hostMem.mapMemory(0, data.size() * sizeof(float));
memcpy(dataA, data.data(), data.size() * sizeof(float));
hostMem.unmapMemory();
vk::DescriptorSetLayoutBinding bind
{
.binding = 0,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute
};
vk::DescriptorSetLayoutCreateInfo layInfo
{
.bindingCount = 1,
.pBindings = &bind
};
vk::raii::DescriptorSetLayout lay(device, layInfo);
vk::DescriptorPoolSize poolSize
{
.type = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1
};
vk::DescriptorPoolCreateInfo poolInfo
{
.maxSets = 1,
.poolSizeCount = 1,
.pPoolSizes = &poolSize,
};
vk::raii::DescriptorPool pool(device, poolInfo);
vk::DescriptorSetAllocateInfo alloc
{
.descriptorPool = *pool,
.descriptorSetCount = 1,
.pSetLayouts = &(*lay),
};
auto sets = (*device).allocateDescriptorSets(alloc);
vk::DescriptorBufferInfo bufInfo
{
.buffer = *devBuf,
.offset = 0,
.range = data.size() * sizeof(float)
};
vk::WriteDescriptorSet write
{
.dstSet = sets[0],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &bufInfo
};
device.updateDescriptorSets(write, nullptr);
auto code = readFile("spv/compute.spv");
auto modu = createShaderModule(code);
vk::PipelineShaderStageCreateInfo stageInfo
{
.stage = vk::ShaderStageFlagBits::eCompute,
.module = *modu,
.pName = "main"
};
vk::PipelineLayoutCreateInfo pipelayInfo
{
.setLayoutCount = 1,
.pSetLayouts = &(*lay)
};
vk::raii::PipelineLayout pipeLay(device, pipelayInfo);
vk::ComputePipelineCreateInfo pipInfo
{
.stage = stageInfo,
.layout = *pipeLay,
.basePipelineIndex = -1
};
vk::raii::Pipeline comp(device, nullptr, pipInfo);
auto com1 = beginSingleTimeCommands();
vk::BufferCopy cop
{
.srcOffset = 0,
.dstOffset = 0,
.size = data.size() * sizeof(float)
};
com1.copyBuffer(*hostBuf, *devBuf, cop);
vk::BufferMemoryBarrier bar
{
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = *devBuf,
.offset = 0,
.size = data.size() * sizeof(float)
};
com1.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eComputeShader, {}, {}, bar, {});
com1.bindPipeline(vk::PipelineBindPoint::eCompute, *comp);
com1.bindDescriptorSets(vk::PipelineBindPoint::eCompute, *pipeLay, 0, sets[0], nullptr);
com1.dispatch(data.size(), 1, 1);
bar.srcAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite;
bar.dstAccessMask = vk::AccessFlagBits::eTransferRead;
bar.buffer = *devBuf;
com1.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eTransfer, {}, {}, bar, {});
com1.copyBuffer(*devBuf, *hostBuf, cop);
endSingleTimeCommands(com1);
void* comData = hostMem.mapMemory(0, data.size() * sizeof(float));
std::cout << "DATA:\n";
for (size_t i = 0; i < data.size(); ++i)
{
std::cout << static_cast<float*>(comData)[i] << "\n";
}
}
Shader code:
#version 450
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
layout(std140, binding = 0) buffer Buf {
float buf[];
};
void main()
{
uint index = gl_GlobalInvocationID.z;
buf[index] = 1.0f;
}
Program output
DATA:
1
1
0.343
0.5
34
23
21
graphicsQueue supports compute operations