"readPixels" on vulkan

Hi everyone!

How can I implement “readPixel” functionality using vulkan?
Could you explain which steps I have to do.

Thank’s a lot!

As for the steps you:

  1. Prepare everything you would need for standard rendering
  2. Create Image of mutable format you need with linear tiling. Allocate Memory that is HOST_VISIBLE and bind it to the image.
  3. Crate ImageView from the Image with format the renderer needs. Create Framebuffer from the ImageView.
  4. Prepare your rendering CommandBuffer as you would normaly, but that draws to Frambuffer of your Image. Put a barrier at the end to the Image with ACCESS_HOST_READ_BIT and LAYOUT_GENERAL.
  5. enqueue the command buffer
  6. vkMapMemory the Memory of the image and do whatever you need with it

Note(correct me if I am wrong): I don’t think you can use Image of the Swapchain directly, because you have no guarantee it is allocated HOST_VISIBLE and you have only limited control over the format used. If you want both screen output as well as host access, you have to either provide two Framebuffers or draw two times.

This extremely short snippet seems to work for me (you still need to provide device, rendered triangles, renderpass etc. etc.):


	VkImage imageSource;
	VkImageCreateInfo isci{ VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, nullptr, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, VK_IMAGE_TYPE_2D, VK_FORMAT_R8G8B8A8_UINT, {width,height, 1},
	                   1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 1, &queueFamily };
	VkResult errorCode = vkCreateImage( device, &isci, nullptr, &imageSource ); RESULT_HANDLER( errorCode, "vkCreateImage" );

	VkMemoryRequirements ismr;
	vkGetImageMemoryRequirements( device, imageSource, &ismr );

	uint32_t memoryType = 0; bool found = false;
	for( uint32_t i = 0; i < 32; ++i ){
		if(  ( ismr.memoryTypeBits & (0x1 << i) )  &&  physicalDeviceMemoryProperties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT  ){
				memoryType = i; found = true; break;
		}
	}
	if( !found ) throw "Can't find compatible mappable memory for image";

	VkDeviceMemory memorySource;
	VkMemoryAllocateInfo memoryInfo{ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, ismr.size, memoryType };
	errorCode = vkAllocateMemory( device, &memoryInfo, nullptr, &memorySource ); RESULT_HANDLER( errorCode, "vkAllocateMemory" );
	errorCode = vkBindImageMemory( device, imageSource, memorySource, 0 ); RESULT_HANDLER( errorCode, "vkBindImageMemory" );

	VkImageView imageSourceView;
	VkImageViewCreateInfo isvci{  VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, nullptr, 0, imageSource, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R8G8B8A8_UNORM,
	                              { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY },
	                              { VK_IMAGE_ASPECT_COLOR_BIT, 0, VK_REMAINING_MIP_LEVELS, 0, VK_REMAINING_ARRAY_LAYERS }  };
	errorCode = vkCreateImageView( device, &isvci, nullptr, &imageSourceView ); RESULT_HANDLER( errorCode, "vkCreateImageView" );

	VkFramebuffer framebufferSource;
	VkFramebufferCreateInfo fsci{ VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, nullptr, 0, renderPass, 1, &imageSourceView, width, height, 1 };
	errorCode = vkCreateFramebuffer( device, &fsci, nullptr, &framebufferSource ); RESULT_HANDLER( errorCode, "vkCreateFramebuffer" );


	VkCommandBuffer renderCommandBuffer;
	VkCommandBufferAllocateInfo rcbai{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr, commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1 };
	errorCode = vkAllocateCommandBuffers( device, &rcbai, &renderCommandBuffer ); RESULT_HANDLER( errorCode, "vkAllocateCommandBuffers" );

	beginCommandBuffer( renderCommandBuffer );
		VkImageMemoryBarrier predrawBarrier{  VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, nullptr, 0, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
		                                      VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, imageSource, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }  };
		vkCmdPipelineBarrier( renderCommandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &predrawBarrier );

		VkRenderPassBeginInfo rpbi{ VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, nullptr, renderPass, framebufferSource, {{0,0}, {width,height}}, 1, &clearColor };
		vkCmdBeginRenderPass( renderCommandBuffer, &rpbi, VK_SUBPASS_CONTENTS_INLINE );

		vkCmdBindPipeline( renderCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline );
		vkCmdBindDescriptorSets( renderCommandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSet, 0, nullptr );
		VkDeviceSize offsets[] = {0};
		vkCmdBindVertexBuffers( renderCommandBuffer, vertexBufferBinding, 1, &vertexBuffer, offsets );

		VkViewport viewport{ 0.0f, 0.0f, (float)width, (float)height, 0.0f, 1.0f };
		vkCmdSetViewport( renderCommandBuffer, 0, 1, &viewport );
		VkRect2D scissor{ {0, 0}, {width, height} };
		vkCmdSetScissor( renderCommandBuffer, 0, 1, &scissor );

		vkCmdDraw( renderCommandBuffer, triangle.size(), 1, 0, 0 );

		vkCmdEndRenderPass( renderCommandBuffer );

		VkImageMemoryBarrier premapBarrier{  VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, nullptr, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
		                                      VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, imageSource, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }  };
		vkCmdPipelineBarrier( renderCommandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, nullptr, 0, nullptr, 1, &premapBarrier );
	endCommandBuffer( renderCommandBuffer );

	vkDeviceWaitIdle( device );
	VkSubmitInfo submit{ VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, nullptr, 1, &renderCommandBuffer, 0, nullptr };
	errorCode = vkQueueSubmit( queue, 1, &submit, VK_NULL_HANDLE ); RESULT_HANDLER( errorCode, "vkQueueSubmit" );
	vkQueueWaitIdle( queue );

	void* data;
	errorCode = vkMapMemory( device, memorySource, 0, VK_WHOLE_SIZE, 0, &data ); RESULT_HANDLER( errorCode, "vkMapMemory" );
	std::ofstream ofs( "out.raw", std::ostream::binary );
	ofs.write( (char*)data, width * height * 4 );
	vkUnmapMemory( device, memorySource );

	vkDestroyFramebuffer( device, framebufferSource, nullptr );
	vkDestroyImageView( device, imageSourceView, nullptr );
	vkFreeMemory( device, memorySource, nullptr );
	vkDestroyImage( device, imageSource, nullptr );

PS: If your swapchain image supports VK_IMAGE_USAGE_TRANSFER_SRC_BIT (as queried by vkGetPhysicalDeviceSurfaceCapabilitiesKHR and used during vkCreateSwapchainKHR), you can then alternatively copy that Image using vkCmdCopyImage and map that copy to host.

Hi, krOoze!
Thanks you very much for your answer and code snippet.
I was worried that no one will answer)

Please don’t use VK_IMAGE_TILING_LINEAR or HOST_VISIBLE unless absolutely required, as there will be performance hits depending on the GPU.

For the use-case of “readpixels” it is much better to keep the framebuffer images “fast” with VK_IMAGE_TILING_OPTIMAL and not in CPU-memory (HOST_VISIBLE).

Just like when working with PBOs in OpenGL, use a separate “staging” image or buffer (which is host visible for mapping) that you copy your “fast” render framebuffer image into. That way the actual rendering is not affected by non-optimal formats/memory type, and that is where you will access the memory fore-most. The cost to copy the data and potentially transform the format will not be as bad, as the permanent hit you get when rendering.

Yeah, there is an error in the example. Definitely don’t want host allocated mamory.
Bad: propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
right: propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)

Making performace forecasts is a dangerous game. I simply posted working code sample I have and I outlined the possibly better alternative as soon as I posted it. Though there is error too: one would have to use vkCmdBlitImage instead, to convert to the linear tiling.

One should probably also refrain from using mutable format Image-ImageView without reason. Also the synchronization in the example is too restrictive.

no worries Krooze, it’s new for everyone.

main point is to use one extra “mapped” image/buffer for the readback results, and leave the rendering images dedicated.

this here won’t work correctly, as it will return “true” if one of the bits is set, not both
propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)

for both bits need
(propertyFlags & (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) == (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)

and that combination actually doesn’t exist on all hardware either
NVIDIA GTX 970 - Vulkan Hardware Database by Sascha Willems (click Memory tab)