[Mesa-dev] [PATCH] radv: detect command buffers that do no work and drop them (v2)

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Mon Feb 13 08:13:54 UTC 2017


Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

On Sun, Feb 12, 2017, at 20:11, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
> 
> If a buffer is just full of flushes we flush things on command
> buffer submission, so don't bother submitting these.
> 
> This will reduce some CPU overhead on dota2, which submits a fair
> few command streams that don't end up drawing anything.
> 
> v2: reorganise loop to count first then malloc,
> rename some vars (Bas)
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/vulkan/radv_cmd_buffer.c  |  3 +++
>  src/amd/vulkan/radv_device.c      | 27 ++++++++++++++++++++-------
>  src/amd/vulkan/radv_meta_buffer.c |  1 +
>  src/amd/vulkan/radv_private.h     |  2 ++
>  src/amd/vulkan/si_cmd_buffer.c    |  2 +-
>  5 files changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
> b/src/amd/vulkan/radv_cmd_buffer.c
> index f281f33..25b1bd6 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1277,6 +1277,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer
> *cmd_buffer)
>  	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
>  							   cmd_buffer->cs, 4096);
>  
> +       cmd_buffer->no_draws = false;
>  	if ((cmd_buffer->state.vertex_descriptors_dirty || cmd_buffer->state.vb_dirty) &&
>  	    cmd_buffer->state.pipeline->num_vertex_attribs) {
>  		unsigned vb_offset;
> @@ -1592,6 +1593,7 @@ static void  radv_reset_cmd_buffer(struct
> radv_cmd_buffer *cmd_buffer)
>  	cmd_buffer->record_fail = false;
>  
>  	cmd_buffer->ring_offsets_idx = -1;
> +       cmd_buffer->no_draws = true;
>  }
>  
>  VkResult radv_ResetCommandBuffer(
> @@ -2423,6 +2425,7 @@ void radv_CmdDrawIndexedIndirectCountAMD(
>  static void
>  radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer)
>  {
> +       cmd_buffer->no_draws = false;
>  	radv_emit_compute_pipeline(cmd_buffer);
>  	radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_pipeline,
>  			       VK_SHADER_STAGE_COMPUTE_BIT);
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 8a54a2a..38848f9 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -1425,8 +1425,18 @@ VkResult radv_QueueSubmit(
>  		struct radeon_winsys_cs **cs_array;
>  		bool can_patch = true;
>  		uint32_t advance;
> +               int draw_cmd_buffers_count = 0;
>  
> -               if (!pSubmits[i].commandBufferCount) {
> +               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount;
> j++) {
> +                       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
> +                                        pSubmits[i].pCommandBuffers[j]);
> +                       assert(cmd_buffer->level ==
> VK_COMMAND_BUFFER_LEVEL_PRIMARY);
> +                       if (cmd_buffer->no_draws == true)
> +                               continue;
> +                       draw_cmd_buffers_count++;
> +               }
> +
> +               if (!draw_cmd_buffers_count) {
>  			if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
>  				ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
>  								   &queue->device->empty_cs[queue->queue_family_index],
> @@ -1445,24 +1455,27 @@ VkResult radv_QueueSubmit(
>  			continue;
>  		}
>  
> -               cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
> -                                              
> pSubmits[i].commandBufferCount);
> +               cs_array = malloc(sizeof(struct radeon_winsys_cs *) *
> draw_cmd_buffers_count);
>  
> +               int draw_cmd_buffer_idx = 0;
>  		for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
>  			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
>  					 pSubmits[i].pCommandBuffers[j]);
>  			assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
> +                       if (cmd_buffer->no_draws == true)
> +                               continue;
>  
> -                       cs_array[j] = cmd_buffer->cs;
> +                       cs_array[draw_cmd_buffer_idx] = cmd_buffer->cs;
> +                       draw_cmd_buffer_idx++;
>  			if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
>  				can_patch = false;
>  		}
>  
> -               for (uint32_t j = 0; j < pSubmits[i].commandBufferCount;
> j += advance) {
> +               for (uint32_t j = 0; j < draw_cmd_buffers_count; j +=
> advance) {
>  			advance = MIN2(max_cs_submission,
> -                                      pSubmits[i].commandBufferCount -
> j);
> +                                      draw_cmd_buffers_count - j);
>  			bool b = j == 0;
> -                       bool e = j + advance ==
> pSubmits[i].commandBufferCount;
> +                       bool e = j + advance == draw_cmd_buffers_count;
>  
>  			if (queue->device->trace_bo)
>  				*queue->device->trace_id_ptr = 0;
> diff --git a/src/amd/vulkan/radv_meta_buffer.c
> b/src/amd/vulkan/radv_meta_buffer.c
> index cd2973f..4857d3d 100644
> --- a/src/amd/vulkan/radv_meta_buffer.c
> +++ b/src/amd/vulkan/radv_meta_buffer.c
> @@ -523,6 +523,7 @@ void radv_CmdUpdateBuffer(
>  	assert(!(dataSize & 3));
>  	assert(!(va & 3));
>  
> +       cmd_buffer->no_draws = false;
>  	if (dataSize < 4096) {
>  		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);
>  
> diff --git a/src/amd/vulkan/radv_private.h
> b/src/amd/vulkan/radv_private.h
> index 25ed5de..9a88ce0 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -750,6 +750,8 @@ struct radv_cmd_buffer {
>  	uint32_t gsvs_ring_size_needed;
>  
>  	int ring_offsets_idx; /* just used for verification */
> +
> +       bool no_draws;
>  };
>  
>  struct radv_image;
> diff --git a/src/amd/vulkan/si_cmd_buffer.c
> b/src/amd/vulkan/si_cmd_buffer.c
> index 1c99b22..b94c1f1 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -828,7 +828,7 @@ static void si_emit_cp_dma_clear_buffer(struct
> radv_cmd_buffer *cmd_buffer,
>  static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer,
>  uint64_t byte_count,
>  			      uint64_t remaining_size, unsigned *flags)
>  {
> -
> +       cmd_buffer->no_draws = false;
>  	/* Flush the caches for the first copy only.
>  	 * Also wait for the previous CP DMA operations.
>  	 */
> -- 
> 2.7.4
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list