Hi,
the situation :
- a compute shader writing stuff in an SBO
- a vertex shader reading it and rendering
Case 1 :
- compute shader and vertex shader ON THE SAME QUEUE ( graphic queue )
- two separate CMD buffers, one for the “compute” one for the “draw”
- No memory barriers
- SBO pre-initialized with some contents
- draw CMD executed before the compute CMD
- compute CMD waits for a “graphic semaphore” to know “it’s done using my sbo” and signals a “compute semaphore” to tell “i am done” at vkQueueSubmit()
- compute semaphore “pre-signalled” at start ( to be in a “signalled state” )
- draw CMD waits the “compute semaphore” to know “It’s done computing” and signals a “graphic semaphore” to tell “I am done” at VkQueubmit
( well I omitted the ‘draw’ also waits for the usual “image acquired” semaphore of the swapchain and such )
In Case 1, everything seems to go without a problem.
Case 2 - ALL like in Case 1 , except
- Compute shader is on a specialized “compute queue” ( index ‘2’ )
- Draw is on the “graphic queue” ( index 1 )
The “compute” cmd buffer is recorded as such :
VkCommandBufferBeginInfo cmd_buf_info = {};
cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
cmd_buf_info.pNext = NULL;
cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
cmd_buf_info.pInheritanceInfo = NULL;
PRINTF("Recording COMPUTE command %d\n", i);
result = vkBeginCommandBuffer(DeviceParams.compute_cmd[i][what_group], &cmd_buf_info);
assert(result == VK_SUCCESS);
VkBufferMemoryBarrier acquire_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
0, // SRC
VK_ACCESS_SHADER_WRITE_BIT, // DST access mask
DeviceParams.queueFamilyIndex,
DeviceParams.compute_queueFamilyIndex,
Scene->SB1.buffer.buffer,
0,
Scene->SB1.total_size
};
vkCmdPipelineBarrier(
cmd,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, //SRC
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // DST
0,
0, nullptr,
1, &acquire_buffer_barrier,
0, nullptr);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, Scene->pipeline[view]);
// attention here we could have 1 or 2 descriptor sets, depending on the scene
int desc_num = (Scene->ls_layout_flags & LS_LAYOUT_CS_SET10_TEXTURE) ? 2 : 1;
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, Scene->pipeline_layout[view], 0, desc_num, &Scene->descriptor_set[cb][view][0], 0, NULL);
vkCmdDispatch(cmd, Scene->comp_dispatch_x, Scene->comp_dispatch_y, Scene->comp_dispatch_z);
VkBufferMemoryBarrier release_buffer_barrier =
{
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_SHADER_WRITE_BIT,
0,
DeviceParams.compute_queueFamilyIndex,
DeviceParams.queueFamilyIndex,
Scene->SB1.buffer.buffer,
0,
Scene->SB1.total_size
};
vkCmdPipelineBarrier(
cmd,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
0,
0, nullptr,
1, &release_buffer_barrier,
0, nullptr);
result = vkEndCommandBuffer(DeviceParams.compute_cmd[i][what_group]); | ||
---|---|---|
assert(result == VK_SUCCESS); |
However when this is run this happens :
ERROR: (COMMAND_BUFFER 0x25268ab0d58) [Validation] [ UNASSIGNED-VkBufferMemoryBarrier-buffer-00004 ] Object: 0x25268ab0d58 (Type = 6) | vkQueueSubmit(): in submitted command buffer VkBufferMemoryBarrier acquiring ownership of VkBuffer (0x183), from srcQueueFamilyIndex 2 to dstQueueFamilyIndex 0 has no matching release barrier queued for execution.
WHY ?
“In my reasoning” the second mem barrier I put the one called “Release Buffer Barrier” should do precisely that .
What I am mis-reasoning here ? Why do I get that warning ? So what is “the matching release barrier” for the one I marked as “acquire” ??
I really cannot make “a logic sense” out of this.
Thanks in advance.