subpassLoad() returning black

I have a render pass with two subpasses. The first subpass renders the scene to an image, the second renders a fullscreen quad to a swapchain image using the initial image as an input attachment in the fragment shader.

When I call subpassLoad, it is only giving me black. The fragment shader is working, because I can render a static colour with it. Hopefully the code below gives you an idea of how I’ve set things up. Let me know if you need more. Thanks.

This is the fragment shader used in the second subpass:

#version 440

layout(set = 0, binding = 2, input_attachment_index=0) uniform subpassInput renderImage;

layout(location = 0) out vec4 fragColour;

void main()
{
	fragColour = subpassLoad(renderImage);
}

This is how the render pass is set up:

VkAttachmentDescription akAttachments[3];
// Initial render image
akAttachments[0].flags = 0;
akAttachments[0].format = VK_FORMAT_R8G8B8A8_UNORM;
akAttachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
akAttachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
akAttachments[0].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
akAttachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
akAttachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
akAttachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
akAttachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

// Depth/stencil
akAttachments[1].flags = 0;
akAttachments[1].format = VK_FORMAT_D24_UNORM_S8_UINT;
akAttachments[1].samples = VK_SAMPLE_COUNT_1_BIT;
akAttachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
akAttachments[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
akAttachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
akAttachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
akAttachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
akAttachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;

// Output image
akAttachments[2].flags = 0;
akAttachments[2].format = VK_FORMAT_R8G8B8A8_UNORM;
akAttachments[2].samples = VK_SAMPLE_COUNT_1_BIT;
akAttachments[2].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
akAttachments[2].storeOp = VK_ATTACHMENT_STORE_OP_STORE; // TODO: DONT CARE
akAttachments[2].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
akAttachments[2].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
akAttachments[2].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
akAttachments[2].finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;

VkAttachmentReference kInitalColourRefSub1;
kInitalColourRefSub1.attachment = 0;
kInitalColourRefSub1.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

VkAttachmentReference kInitalColourRefSub2;
kInitalColourRefSub2.attachment = 0;
kInitalColourRefSub2.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

VkAttachmentReference kDepthStencilRef;
kDepthStencilRef.attachment = 1;
kDepthStencilRef.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;

VkAttachmentReference kFinalColourRef;
kFinalColourRef.attachment = 2;
kFinalColourRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;

VkSubpassDescription akSubpasses[2];
akSubpasses[0].flags = 0;
akSubpasses[0].pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
akSubpasses[0].inputAttachmentCount = 0;
akSubpasses[0].pInputAttachments = nullptr;
akSubpasses[0].colorAttachmentCount = 1;
akSubpasses[0].pColorAttachments = &kInitalColourRefSub1;
akSubpasses[0].pResolveAttachments = nullptr;
akSubpasses[0].pDepthStencilAttachment = &kDepthStencilRef;
akSubpasses[0].preserveAttachmentCount = 0;
akSubpasses[0].pPreserveAttachments = nullptr;

akSubpasses[1].flags = 0;
akSubpasses[1].pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
akSubpasses[1].inputAttachmentCount = 1;
akSubpasses[1].pInputAttachments = &kInitalColourRefSub2;
akSubpasses[1].colorAttachmentCount = 1;
akSubpasses[1].pColorAttachments = &kFinalColourRef;
akSubpasses[1].pResolveAttachments = nullptr;
akSubpasses[1].pDepthStencilAttachment = nullptr;
akSubpasses[1].preserveAttachmentCount = 0;
akSubpasses[1].pPreserveAttachments = nullptr;

VkSubpassDependency kDependency;
kDependency.srcSubpass = 0;
kDependency.dstSubpass = 1;
kDependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
kDependency.dstStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
kDependency.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
kDependency.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
kDependency.dependencyFlags = 0;

VkRenderPassCreateInfo kRenderPassInfo;
kRenderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
kRenderPassInfo.pNext = nullptr;
kRenderPassInfo.flags = 0;
kRenderPassInfo.attachmentCount = 3;
kRenderPassInfo.pAttachments = akAttachments;
kRenderPassInfo.subpassCount = 2;
kRenderPassInfo.pSubpasses = akSubpasses;
kRenderPassInfo.dependencyCount = 1;
kRenderPassInfo.pDependencies = &kDependency;

I am using two pipelines, both using the same single descriptor set layout that has 3 descriptors. The first two descriptors are only used in the first pipeline, and the third is the input attachment used in the second pipeline. This is how the input attachment descriptor is set up:

...
kDescSetLayoutBind[2].binding = 2;
kDescSetLayoutBind[2].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
kDescSetLayoutBind[2].descriptorCount = 1;
kDescSetLayoutBind[2].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
kDescSetLayoutBind[2].pImmutableSamplers = nullptr;

VkDescriptorSetLayoutCreateInfo kDescSetLayoutInfo;
kDescSetLayoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
kDescSetLayoutInfo.pNext = nullptr;
kDescSetLayoutInfo.flags = 0;
kDescSetLayoutInfo.bindingCount = 3;
kDescSetLayoutInfo.pBindings = &kDescSetLayoutBind[0];

...
akPoolSizes[2].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
akPoolSizes[2].descriptorCount = 1;

VkDescriptorPoolCreateInfo kDescPoolInfo;
kDescPoolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
kDescPoolInfo.pNext = nullptr;
kDescPoolInfo.flags = 0;
kDescPoolInfo.maxSets = 1;
kDescPoolInfo.poolSizeCount = 3;
kDescPoolInfo.pPoolSizes = akPoolSizes;

...

VkDescriptorImageInfo kDescInputAttachInfo;
kDescInputAttachInfo.sampler = VK_NULL_HANDLE;
kDescInputAttachInfo.imageView = m_hInitialRenderImageView;
kDescInputAttachInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

...

akWriteSets[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
akWriteSets[2].pNext = nullptr;
akWriteSets[2].dstSet = m_hDescSet;
akWriteSets[2].dstBinding = 2;
akWriteSets[2].dstArrayElement = 0;
akWriteSets[2].descriptorCount = 1;
akWriteSets[2].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
akWriteSets[2].pImageInfo = &kDescInputAttachInfo;
akWriteSets[2].pBufferInfo = nullptr;
akWriteSets[2].pTexelBufferView = nullptr;

// Update sets

Currently I am not doing any draw calls in the first subpass, just clearing the image to red, so I would expect subpassLoad to return red. This is the render command:

VkClearValue kClearValues[3];
kClearValues[0].color.float32[0] = 1.0f;
kClearValues[0].color.float32[1] = 0.0f;
kClearValues[0].color.float32[2] = 0.0f;
kClearValues[0].color.float32[3] = 1.0f;
kClearValues[1].depthStencil.depth = 1.0f;
kClearValues[1].depthStencil.stencil = 0;
kClearValues[2].color.float32[0] = 0.0f;
kClearValues[2].color.float32[1] = 1.0f;
kClearValues[2].color.float32[2] = 0.0f;
kClearValues[2].color.float32[3] = 1.0f;

...

VkRenderPassBeginInfo kRenderBeginInfo;
kRenderBeginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
kRenderBeginInfo.pNext = nullptr;
kRenderBeginInfo.renderPass = m_hRenderPass;
kRenderBeginInfo.framebuffer = m_kFramebuffers[uiImage];
kRenderBeginInfo.renderArea = {
	0, 0,
	spkRenderWindow->GetWindow()->GetWidth(),
	spkRenderWindow->GetWindow()->GetHeight()
};
kRenderBeginInfo.clearValueCount = 3;
kRenderBeginInfo.pClearValues = kClearValues;

vkCmdBeginRenderPass(
	m_kFrameCmdBuffers[uiImage],
	&kRenderBeginInfo,
	VK_SUBPASS_CONTENTS_INLINE);

vkCmdBindDescriptorSets(
	m_kFrameCmdBuffers[uiImage],
	VK_PIPELINE_BIND_POINT_GRAPHICS,
	m_kPipelineLayouts[0],
	0,
	1,
	&m_hDescSet,
	0,
	nullptr);

vkCmdBindPipeline(
	m_kFrameCmdBuffers[uiImage],
	VK_PIPELINE_BIND_POINT_GRAPHICS,
	m_kPipelines[0]);

// Bind viewport + scissor

// Draw calls (commented out)

vkCmdNextSubpass(
	m_kFrameCmdBuffers[uiImage],
	VK_SUBPASS_CONTENTS_INLINE);

vkCmdBindDescriptorSets(
	m_kFrameCmdBuffers[uiImage],
	VK_PIPELINE_BIND_POINT_GRAPHICS,
	m_kPipelineLayouts[0],
	0,
	1,
	&m_hDescSet,
	0,
	nullptr);

vkCmdBindPipeline(
	m_kFrameCmdBuffers[uiImage],
	VK_PIPELINE_BIND_POINT_GRAPHICS,
	m_kPipelines[1]);

// Bind viewport + scissor

// (Quad hardcoded in shader)
vkCmdDraw(
	m_kFrameCmdBuffers[uiImage],
	6,
	1,
	0,
	0);

vkCmdEndRenderPass(
	m_kFrameCmdBuffers[uiImage]);

Bit TL;DR… You discard your results by VK_ATTACHMENT_STORE_OP_DONT_CARE?

As far as I can tell, it shouldn’t matter, because i think it discards it after the render pass has finished; when I no longer need it (is that correct?). But I have tried it with OP_STORE anyway, with no luck.

Thanks krOoze.

I think so, only I assume you need at least the last image, to display it later?

Well, moving on then. It’s not exactly minimal example… Before I read on may I assume:
a) you have standard layers on , verified they work AND they do not complain about anything?
b) latest drivers(e.g. 16.5.2 for AMD) and SDK (1.0.11.1)?
c) some simplified version / previous iteration of it works. E.g. drawing a triangle without multiple pipelines and subpasses and whatnot.
d) you actually submit these commands to a queue, unlike me the other day

Correct. I am storing attachment 3, which is the swapchain image, after the initial render image has been drawn to it in subpass 2. (ignore the todo on that line)

Well, moving on then. It’s not exactly minimal example… Before I read on may I assume:
a) you have standard layers on , verified they work AND they do not complain about anything?
b) latest drivers(e.g. 16.5.2 for AMD) and SDK (1.0.11.1)?
c) some simplified version / previous iteration of it works. E.g. drawing a triangle without multiple pipelines and subpasses and whatnot.
d) you actually submit these commands to a queue, unlike me the other day

Validation layers are on, and have been working, but no errors or warnings are showing up for this.
I have SDK 1.0.11.1, and my NVIDIA driver is 365.10.
I’ve successfully rendered a triangle on screen using a uniform buffer for vertex inputs.
I am submitting the commands :slight_smile:

The swapchain image is being rendered to, and displayed on screen successfully (apart from the black square). Currently I am drawing the quad at less than full screen, and I am seeing the clear colour for attachment 3 (green), and a black square in the center.

I removed the second subpass, and drew straight to the swapchain image, and I am getting the red clear colour for the first attatchment, so it would seem the first render pass is working too.

Must be something to do with the transition from color attachment to input attachment, or maybe a problem with my descriptors. If if have understood the spec correctly, the transition from colour attachment to input attachment, between subpasses, is handled automatically, and won’t be discarded unless there is a subpass in between in which the attachment is not used and is not preserved. I shouldn’t have to do any memory barriers there, should I?

I’m trying to simplify the program a bit. I’m rendering the basic triangle in the first subpass, rather than trying to draw a mesh with vertex inputs. I’m getting a driver crash when I call vkCmdDraw in the first subpass at the moment, so I’ll try to fix that up. Not sure if its related to the other problem.

Ok, so the problem doesn’t seem to be with the subpasses.

If I render with one subpass, and one pipeline, with one descriptor for vertex data:

layout(set = 0, binding = 0) uniform InputBuffer {
        mat4 MVP;
		vec4 positions[3];
		vec4 colours[3];
} inBuffer;

everything works fine. A coloured triangle is drawn.

If I add an input attachment to the render pass and descriptor set, it will crash when I try to draw. If I comment out the uniform buffer usage in the shader, so that it’s just drawing a static triangle, it won’t crash, but the input attachment value will be black. NVIDIA error is “error code: 3 (subcode 7)” when accessing the uniform buffer (I assume).

Am I doing anything wrong with the descriptor sets?:

// In frag shader
layout(input_attachment_index=0, set = 0, binding = 1) uniform subpassInput renderImage;

// C++
struct UniformData
{
    glm::mat4 kMVP;
    glm::vec4 akPositions[3];
    glm::vec4 akColours[3];
};

...

std::vector<VkDescriptorSetLayoutBinding> kDescSetLayoutBind(2);
kDescSetLayoutBind[0].binding = 0;
kDescSetLayoutBind[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
kDescSetLayoutBind[0].descriptorCount = 1;
kDescSetLayoutBind[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
kDescSetLayoutBind[0].pImmutableSamplers = nullptr;

kDescSetLayoutBind[1].binding = 1;
kDescSetLayoutBind[1].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
kDescSetLayoutBind[1].descriptorCount = 1;
kDescSetLayoutBind[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
kDescSetLayoutBind[1].pImmutableSamplers = nullptr;

std::vector<VkDescriptorSetLayoutCreateInfo> kDescSetLayoutInfo(1);
kDescSetLayoutInfo[0].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
kDescSetLayoutInfo[0].pNext = nullptr;
kDescSetLayoutInfo[0].flags = 0;
kDescSetLayoutInfo[0].bindingCount = kDescSetLayoutBind.size();
kDescSetLayoutInfo[0].pBindings = &kDescSetLayoutBind[0];

...

UniformData kData;
kData.kMVP =
	glm::perspective(45.0f, 4.0f / 3.0f, 0.1f, 1000.0f) *
	glm::lookAt(
		glm::vec3(0.0f, 0.0f, 10.0f),
		glm::vec3(0.0f, 0.0f, 0.0f),
		glm::vec3(0.0f, 1.0f, 0.0f)
	);
kData.akPositions[0] = glm::vec4(0.0f, 1.0f, 0.0f, 1.0f);
kData.akPositions[1] = glm::vec4(-1.0f, 0.0f, 0.0f, 1.0f);
kData.akPositions[2] = glm::vec4(1.0f, 0.0f, 0.0f, 1.0f);
kData.akColours[0] = glm::vec4(1.0f, 0.0f, 0.0f, 1.0f);
kData.akColours[1] = glm::vec4(0.0f, 1.0f, 0.0f, 1.0f);
kData.akColours[2] = glm::vec4(0.0f, 0.0f, 1.0f, 1.0f);

VkBufferCreateInfo kBufferInfo;
kBufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
kBufferInfo.pNext = nullptr;
kBufferInfo.flags = 0;
kBufferInfo.size = sizeof(UniformData);
kBufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
kBufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
kBufferInfo.queueFamilyIndexCount = 0;
kBufferInfo.pQueueFamilyIndices = nullptr;

VkResult eResult = vkCreateBuffer(
	m_hDevice,
	&kBufferInfo,
	&m_kAllocCallbacks,
	&m_hMVPBuffer);
CheckResult(eResult, "Failed to create buffer");

VkMemoryRequirements kMemReq;
vkGetBufferMemoryRequirements(
	m_hDevice,
	m_hMVPBuffer,
	&kMemReq);

m_hMVPMemory = AllocateDeviceMemory(
	kMemReq.size,
	kMemReq.memoryTypeBits,
	VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);

void* pvBufferData;
eResult = vkMapMemory(
	m_hDevice,
	m_hMVPMemory,
	0,
	kMemReq.size,
	0,
	&pvBufferData);
CheckResult(eResult, "Failed to map buffer memory");

memcpy(pvBufferData, &kData, sizeof(UniformData));

vkUnmapMemory(
	m_hDevice,
	m_hMVPMemory);

eResult = vkBindBufferMemory(
	m_hDevice,
	m_hMVPBuffer,
	m_hMVPMemory,
	0);
CheckResult(eResult, "Failed to bind buffer memory");

std::vector<VkDescriptorPoolSize> kPoolSizes(2);
kPoolSizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
kPoolSizes[0].descriptorCount = 1;

kPoolSizes[1].type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
kPoolSizes[1].descriptorCount = 1;

VkDescriptorPoolCreateInfo kDescPoolInfo;
kDescPoolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
kDescPoolInfo.pNext = nullptr;
kDescPoolInfo.flags = 0;
kDescPoolInfo.maxSets = 1;
kDescPoolInfo.poolSizeCount = kPoolSizes.size();
kDescPoolInfo.pPoolSizes = &kPoolSizes[0];

eResult = vkCreateDescriptorPool(
	m_hDevice,
	&kDescPoolInfo,
	&m_kAllocCallbacks,
	&m_hDescPool);
CheckResult(eResult, "Failed to create descriptor pool");

VkDescriptorSetAllocateInfo kDescAllocInfo;
kDescAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
kDescAllocInfo.pNext = nullptr;
kDescAllocInfo.descriptorPool = m_hDescPool;
kDescAllocInfo.descriptorSetCount = 1;
kDescAllocInfo.pSetLayouts = &m_kDescLayouts[0];

m_kDescSets.resize(1);
eResult = vkAllocateDescriptorSets(
	m_hDevice,
	&kDescAllocInfo,
	&m_kDescSets[0]);
CheckResult(eResult, "Failed to allocate descriptor sets");

VkDescriptorBufferInfo kUniformBufferInfo;
kUniformBufferInfo.buffer = m_hMVPBuffer;
kUniformBufferInfo.offset = 0;
kUniformBufferInfo.range = sizeof(UniformData);

VkDescriptorImageInfo kInputImageInfo;
kInputImageInfo.sampler = VK_NULL_HANDLE;
kInputImageInfo.imageView = m_hInputImageView;
kInputImageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

std::vector<VkWriteDescriptorSet> kWriteSets(2);
kWriteSets[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
kWriteSets[0].pNext = nullptr;
kWriteSets[0].dstSet = m_kDescSets[0];
kWriteSets[0].dstBinding = 0;
kWriteSets[0].dstArrayElement = 0;
kWriteSets[0].descriptorCount = 1;
kWriteSets[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
kWriteSets[0].pImageInfo = nullptr;
kWriteSets[0].pBufferInfo = &kUniformBufferInfo;
kWriteSets[0].pTexelBufferView = nullptr;

kWriteSets[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
kWriteSets[1].pNext = nullptr;
kWriteSets[1].dstSet = m_kDescSets[0];
kWriteSets[1].dstBinding = 1;
kWriteSets[1].dstArrayElement = 0;
kWriteSets[1].descriptorCount = 1;
kWriteSets[1].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
kWriteSets[1].pImageInfo = &kInputImageInfo;
kWriteSets[1].pBufferInfo = nullptr;
kWriteSets[1].pTexelBufferView = nullptr;

vkUpdateDescriptorSets(
	m_hDevice,
	kWriteSets.size(),
	&kWriteSets[0],
	0,
	nullptr);

Update: If I split them into two descriptor sets, it still crashes. If I only update and use the uniform buffer, there is no crash, but the triangle doesn’t show. If I only update and use the input attachment, the black triangle shows.