Vulkan seems to be rendering only a part of my command buffer

Hi,

I’m recording a command buffer to render 10 meshes, but only 4 of the first ones render. If I record 100, only 25 of the first ones render. After checking that I indeed do record the commands properly and send the uniform data, my first thought was that I’ve messed up something in the synchronization, but everything seems fine. I give vkAcquireNextImageKHR a semaphore, which I then use as a waitSemaphore when I submit the command buffer. What could cause this partial execution of the command buffer?

void Rendering_update() {
    auto &swapchain = Rendering->swapchain;
    wait_for_in_flight_fence        (swapchain);
    acquire_next_image              (swapchain);
    Rendering_record_command_buffer (swapchain.image_current);
    Rendering_update_uniform_buffer (swapchain.image_current);
    wait_for_in_flight_image        (swapchain);
    reset                           (swapchain.in_flight_fences[swapchain.in_flight_index]);

    Device_CmdBuffer            cmd_buffers         [] = {Rendering->cmd_buffers[swapchain.image_current]};
    Device_PipelineStageFlags   wait_stages         [] = {Device_PipelineStageFlags_color_attachment_output};
    Device_Semaphore            wait_semaphores     [] = {swapchain.in_flight_image_available_semaphores[swapchain.in_flight_index]};
    Device_Semaphore            signal_semaphores   [] = {swapchain.in_flight_render_finished_semaphores[swapchain.in_flight_index]};
    submit(
        Buffer_make(cmd_buffers),
        Buffer_make(wait_stages),
        Buffer_make(wait_semaphores),
        Buffer_make(signal_semaphores),
        swapchain.in_flight_fences[swapchain.in_flight_index]
    );
    present(swapchain);
}

void Rendering_update_uniform_buffer(uint32 p_swapchain_image_index) {
    auto camera_transform = get_transform_render(World->camera);
    
    for (uint32 i = 0; i < World->props.count; ++i) {
        auto &prop          = World->props[i];
        auto prop_transform = get_transform(prop);
        auto mvp            = camera_transform*prop_transform;
        Memory_copy(Rendering->uniform_copy_buffer + i*Device->alignment_uniform_buffer_offset, &mvp, sizeof(mvp));
    }
    uint64 size = World->props.count*Device->alignment_uniform_buffer_offset;
    auto uniform_index = Device_Memory_uniform_transient_reserve(p_swapchain_image_index, size);
    copy(uniform_index, size, Rendering->uniform_copy_buffer);

    Device_Memory_uniform_transient_clear();
}

void Rendering_record_command_buffer(uint32 p_swapchain_image_index) {
    Device_BufferIndex64 vertices[] = {
        get_positions   (Rendering->mesh_head),
        get_normals     (Rendering->mesh_head),
        get_tex_coords  (Rendering->mesh_head),
    };
    Device_BufferIndex64 indices = get_indices(Rendering->mesh_head);
    auto &cmd_buffer = Rendering->cmd_buffers[p_swapchain_image_index];
    free(cmd_buffer);
    alloc(cmd_buffer, Rendering->cmd_pool_transient);
    begin(cmd_buffer, Device_CmdBufferUsageFlags_one_time); {
        Device_ClearValue clear_values[] = {
            {.color={0.0f, 0.0f, 0.0f, 1.0f}},
            {.depth={1.0f, 0}},
        };
        cmd_begin_render_pass(
            cmd_buffer,
            Rendering->swapchain,
            Rendering->render_pass,
            Rendering->framebuffers[p_swapchain_image_index],
            Buffer_make(clear_values)
        ); {
            cmd_bind_pipeline(cmd_buffer, Rendering->pipeline, Device_PipelineBindPointType_graphics);
            cmd_bind_vertex_buffers(cmd_buffer, Buffer_make(vertices));
            cmd_bind_index_buffer(cmd_buffer, indices, Device_IndexType_uint32);
            Device_DescriptorSet descriptor_sets[] = {Rendering->descriptor_sets[p_swapchain_image_index]};
            uint32 dynamic_offsets[1] = {};
            for (uint32 i = 0; i < World->props.count; ++i) {
                dynamic_offsets[0] = round_up(i*sizeof(mat4), Device->alignment_uniform_buffer_offset);
                cmd_bind_descriptor_sets(
                    cmd_buffer,
                    Rendering->pipeline,
                    Device_PipelineBindPointType_graphics,
                    Buffer_make(descriptor_sets),
                    0,
                    Buffer_make(dynamic_offsets)
                );
                cmd_draw_indexed(cmd_buffer, Rendering->mesh_head.index_count, 1, 0, 0, 0);
            }
        }
        cmd_end_render_pass(cmd_buffer);
    }
    end(cmd_buffer);
}

Device_BufferIndex64 Device_Memory_uniform_transient_reserve(uint32 p_swapchain_image_index, uint64 p_size) {
    Device_BufferIndex64 result = {
        Device_Memory->uniform_transient + p_swapchain_image_index,
        round_up(
            Device_Memory->uniform_transient_index,
            Device->alignment_uniform_buffer_offset
        )
    };
    Device_Memory->uniform_transient_index = result.index + p_size;
    if (Device_Memory->uniform_transient_index > Device_Memory_uniform_transient_size) {
        println("Device_Memory_uniform_transient_reserve error: memory buffer out of capacity");
        exit(1);
    }
    return result;
}

void Device_Memory_uniform_transient_clear() {
    Device_Memory->uniform_transient_index = 0;
}

void wait_for_in_flight_fence(Device_Swapchain &self) {
    wait(self.in_flight_fences[self.in_flight_index]);
}

void wait(Device_Fence &self) {
    vkWaitForFences(self.logical, 1, &self.handle, VK_TRUE, uint64_max);
}

void wait_for_in_flight_image(Device_Swapchain &self) {
    if (self.image_fences[self.image_current].handle != VK_NULL_HANDLE) {
        wait(self.image_fences[self.image_current]);
    }
    self.image_fences[self.image_current] = self.in_flight_fences[self.in_flight_index];
}

void acquire_next_image(Device_Swapchain &self) {
   vkAcquireNextImageKHR(
        self.logical,
        self.handle,
        uint64_max,
        self.in_flight_image_available_semaphores[self.in_flight_index].handle,
        VK_NULL_HANDLE,
        &self.image_current
    );
}

void reset(Device_Fence &self) {
    if (vkResetFences(self.logical, 1, &self.handle) != VK_SUCCESS) {
        println("reset(Device_Fence &): couldn't reset fence");
        exit(1);
    }
}

void submit(
    Buffer<Device_CmdBuffer> p_cmd_buffers,
    Buffer<Device_PipelineStageFlags> p_wait_stages,
    Buffer<Device_Semaphore> p_wait_semaphores,
    Buffer<Device_Semaphore> p_signal_semaphores,
    Device_Fence &p_fence
) {
    if (!p_cmd_buffers.count) {
        return;
    }
    auto cmd_buffers        = Memory_temp_reserve<VkCommandBuffer>      (p_cmd_buffers.count);
    auto wait_stages        = Memory_temp_reserve<VkPipelineStageFlags> (p_wait_stages.count);
    auto wait_semaphores    = Memory_temp_reserve<VkSemaphore>          (p_wait_semaphores.count);
    auto signal_semaphores  = Memory_temp_reserve<VkSemaphore>          (p_signal_semaphores.count);

    for (uint64 i = 0; i < p_cmd_buffers.count; ++i) {
        cmd_buffers[i] = p_cmd_buffers[i].handle;
    }
    for (uint64 i = 0; i < p_wait_stages.count; ++i) {
        wait_stages[i] = to_VkPipelineStageFlags(p_wait_stages[i]);
    }
    for (uint64 i = 0; i < p_wait_semaphores.count; ++i) {
        wait_semaphores[i] = p_wait_semaphores[i].handle;
    }
    for (uint64 i = 0; i < p_signal_semaphores.count; ++i) {
        signal_semaphores[i] = p_signal_semaphores[i].handle;
    }
    VkSubmitInfo submit_info = {};
    submit_info.sType                   = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submit_info.waitSemaphoreCount      = p_wait_semaphores.count;
    submit_info.pWaitSemaphores         = wait_semaphores;
    submit_info.pWaitDstStageMask       = wait_stages;
    submit_info.commandBufferCount      = p_cmd_buffers.count;
    submit_info.pCommandBuffers         = cmd_buffers;
    submit_info.signalSemaphoreCount    = p_signal_semaphores.count;
    submit_info.pSignalSemaphores       = signal_semaphores;

    if (vkQueueSubmit(p_cmd_buffers[0].pool->queue.handle, 1, &submit_info, p_fence.handle) != VK_SUCCESS) {
        println("submit(Buffer<Device_CmdBuffer>) error: vkQueueSubmit failed");
        exit(1);
    }
    Memory_temp_clear();
}

void present(Device_Swapchain &self) {
    VkSemaphore signal_semaphores[] = {self.in_flight_render_finished_semaphores[self.in_flight_index].handle};
    VkSwapchainKHR swapchains[]     = {self.handle};
    VkPresentInfoKHR present_info   = {};
    present_info.sType              = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
    present_info.waitSemaphoreCount = 1;
    present_info.pWaitSemaphores    = signal_semaphores;
    present_info.swapchainCount     = 1;
    present_info.pSwapchains        = swapchains;
    present_info.pImageIndices      = &self.image_current;
    present_info.pResults           = 0;
    vkQueuePresentKHR(Device->logical.present.handle, &present_info);
    self.in_flight_index = (self.in_flight_index + 1)%Device_in_flight_frame_count_max;
}

Cheers,
Miika Vihersaari

My bad, the problem was a small typo in Rendering_record_command_buffer:

for (uint32 i = 0; i < World->props.count; ++i) {
    // This next line had the index i inside round_up, which messed up the dynamic uniforms
    dynamic_offsets[0] = i*round_up(sizeof(mat4), Device->alignment_uniform_buffer_offset);

    cmd_bind_descriptor_sets(
        cmd_buffer,
        Rendering->pipeline,
        Device_PipelineBindPointType_graphics,
        Buffer_make(descriptor_sets),
        0,
        Buffer_make(dynamic_offsets)
    );
    cmd_draw_indexed(cmd_buffer, Rendering->mesh_head.index_count, 1, 0, 0, 0);
}

Cheers,
Miika Vihersaari