I need to run a function on CPU between two GPU batches. For this I use timeline semaphores. As far as I know, vkQueueSubmit does not block. However, it blocks when I submit these GPU batches:
uint64_t host_wait = timeline;
uint64_t host_signal = ++timeline;
uint64_t wait0 = timeline;
uint64_t signal0 = ++timeline;
uint64_t wait1 = timeline;
uint64_t signal1 = ++timeline;
VkPipelineStageFlags wait_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkTimelineSemaphoreSubmitInfo sp_submit0 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait0,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal0,
};
VkSubmitInfo submit0 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit0,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkTimelineSemaphoreSubmitInfo sp_submit1 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait1,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal1,
};
VkSubmitInfo submit1 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit1,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkSubmitInfo infos[2] = { submit0, submit1 };
vkQueueSubmit(queue, 2, infos, fence);
// here vkQueueSubmit blocks the thread
WaitSemaphore(timeline_semaphore, host_wait);
some_function();
SignalSemaphore(timeline_semaphore, host_signal);
It is blocking for seconds without return, I think this is something like a deadlock. In the debugger, I saw SleepEx function call from vkQueueSubmit: vk_icdGetInstanceProcAddrSG -> … -> SleepEx. Also, when I add a command buffer to submit0
, vkQueueSubmit doesn’t block. Is it a bug in the driver?
However, in this following sample (equivalent to the first), where two batches are combined into one, vkQueueSubmit does not block:
uint64_t host_wait = timeline;
uint64_t host_signal = ++timeline;
uint64_t wait1 = timeline;
uint64_t signal1 = ++timeline;
VkPipelineStageFlags wait_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkTimelineSemaphoreSubmitInfo sp_submit1 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait1,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal1,
};
VkSubmitInfo submit1 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit1,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkSubmitInfo infos[1] = { submit1 };
vkQueueSubmit(queue, 1, infos, fence);
WaitSemaphore(timeline_semaphore, host_wait);
some_function();
SignalSemaphore(timeline_semaphore, host_signal);
Why vkQueueSubmit blocks in the first code sample? What are the possible causes of this problem?
I use Vulkan 1.2 (SDK 1.2.135) on Windows 10 and Radeon RX 570 (driver 20.4.2).