It turns out to be a problem with the imageStore function in the following shader:
#version 310 es
layout (local_size_x=1) in;
layout (rgba8, binding = 0, set = 0) lowp uniform writeonly image2D imageB;
void main(){
imageStore(imageB, ivec2(gl_WorkGroupID.x, gl_WorkGroupID.y), vec4(10,10,10,10));
int x = 0;
}";
Once I hide the imageStore op, there is no errors reported, but the program runs at 20 fps, when there is only a command buffer for each image in the swap chain, and each command buffer is recorded in the following (which is a snap shot of the loop that records them):
vkBeginCommandBuffer(commandBuffer[swapChainImageIndex], &commandBufferBeginInfo);
VkImageMemoryBarrier imageMemoryBarrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = swapChainImages[swapChainImageIndex],
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.layerCount = 1,
.baseArrayLayer = 0,
.levelCount = 1,
.baseMipLevel = 0,
},
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstAccessMask = 0,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.pNext = nullptr,
};
vkCmdPipelineBarrier(commandBuffer[swapChainImageIndex], VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 0, &bufferMemoryBarrier, 1, &imageMemoryBarrier);
vkCmdBindDescriptorSets(commandBuffer[swapChainImageIndex], VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderPipelineLayout, 0, 1, &descriptorSets[swapChainImageIndex], 0, nullptr);
vkCmdBindPipeline(commandBuffer[swapChainImageIndex], VK_PIPELINE_BIND_POINT_COMPUTE, computeShaderPipeline);
vkCmdDispatch(commandBuffer[swapChainImageIndex], width, height, 1);
imageMemoryBarrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.image = swapChainImages[swapChainImageIndex],
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.layerCount = 1,
.baseArrayLayer = 0,
.levelCount = 1,
.baseMipLevel = 0,
},
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT,
.pNext = nullptr,
};
vkCmdPipelineBarrier(commandBuffer[swapChainImageIndex], VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
vkEndCommandBuffer(commandBuffer[swapChainImageIndex]);
The descriptor sets are updated as follows (a snap shot of the actual loop):
VkDescriptorImageInfo descriptorImageInfo = {
.sampler = sampler,
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
.imageView = swapChainImageViews[descriptorSetIndex],
};
writeDescriptorSets[descriptorSetIndex] = {
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = descriptorSets[descriptorSetIndex],
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.pBufferInfo = nullptr,
.pImageInfo = &descriptorImageInfo,
.pTexelBufferView = nullptr,
.pNext = nullptr,
};
The swap chain and its images are created as follows :
uint32_t chosenFormat;
for (chosenFormat = 0; chosenFormat < formatCount; chosenFormat++) {
if (formats[chosenFormat].format == VK_FORMAT_R8G8B8A8_UNORM) break;
}
assert(chosenFormat < formatCount);
VkSurfaceCapabilitiesKHR surfaceCap;
vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device.gpuDevice_, device.surface_, &surfaceCap);
assert(surfaceCap.supportedCompositeAlpha | VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR && surfaceCap.supportedUsageFlags & VK_IMAGE_USAGE_STORAGE_BIT);
VkSwapchainCreateInfoKHR swapchainCreateInfoKhr = {
.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
.queueFamilyIndexCount = 1,
.pQueueFamilyIndices = &device.queueFamilyIndex_,
.surface = device.surface_,
.minImageCount = swapChainImageCount,
.presentMode = VK_PRESENT_MODE_FIFO_KHR,
.imageExtent = surfaceCapabilities.currentExtent,
.imageArrayLayers = 1,
.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
.imageColorSpace = formats[chosenFormat].colorSpace,
.imageFormat = formats[chosenFormat].format,
.imageUsage = VK_IMAGE_USAGE_STORAGE_BIT,
.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
.compositeAlpha = VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR,
.clipped = VK_FALSE,
.oldSwapchain = 0,
.flags = 0,
.pNext = nullptr,
};
vkCreateSwapchainKHR(device.device_, &swapchainCreateInfoKhr, nullptr, &swapChain);
I make the pipeline layout as follows :
descriptorSetLayoutBindings[0] = {
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.binding = 0,
.pImmutableSamplers = nullptr,
};
VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 1,
.pBindings = descriptorSetLayoutBindings,
.flags = 0,
.pNext = nullptr,
};
vkCreateDescriptorSetLayout(device.device_, &descriptorSetLayoutCreateInfo, nullptr, &descriptorSetLayout);
// Pipeline Layout
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &descriptorSetLayout,
.pushConstantRangeCount = 0,
.pPushConstantRanges = &pushConstantRange,
.flags = 0,
.pNext = nullptr,
};
vkCreatePipelineLayout(device.device_, &pipelineLayoutCreateInfo, nullptr, &computeShaderPipelineLayout);
I use the descriptor set layout made above for the descriptor set layouts needed when allocating the actual descriptor sets in the descriptor pool used for each swapchain image.
If any questions, let me know. If you have any jokes, let me laugh!
With all this code in action, the result is still the same, with the flickering, and only swapChainIndex = 0 hitting the validation layer, but when imageStore is dropped from the shader, both swapChainImages transition without a problem.
I still do not understand why swapChainIndex = 1 is not affected at all. The synchronization specs seem to agree with my implementation (this is to clear up my previous beliefs), and given that imageStore is the cause, it must be something to do with with how I am building and binding descriptor sets, descriptor layouts, and pipeline layouts.