[Mesa-dev] [PATCH] r600/eg: rework atomic counter emission with flushes

Gert Wollny gw.fossdev at gmail.com
Tue Aug 7 21:25:00 UTC 2018


I have not yet read the patch, just applied it and run, at one point it
locked up again, blancking the screen, and these piglits running: 

   arb_compute_shader-local-id -auto -fbo
   arb_shader_image_load_store-bitcast -auto -fbo
   arb_shader_image_load_store-max-images -auto -fbo
   arb_shader_image_load_store-max-size --quick -auto -fbo
   arb_shader_image_load_store-minmax -auto -fbo
   arb_shader_image_load_store-qualifiers -auto -fbo
 
I couldn't kill the -bitcast process, didn't try the -local-id process,
but starting with -max-size I was able to kill -9 some tests and then
it resolved itself and piglit continued to run to the end.  

I have to retry whether I can do the same without this patch, I thinkat least once it worked for me. 

Best, 
Gert 

Am Dienstag, den 07.08.2018, 12:31 +1000 schrieb Dave Airlie:
> From: Dave Airlie <airlied at redhat.com>
> 
> With the current code, we didn't do the space checks prior
> to atomic counter setup emission, but we also didn't add
> atomic counters to the space check so we could get a flush
> later as well.
> 
> These flushes would be bad, and lead to problems with
> parallel tests. We have to ensure the atomic counter copy in,
> draw emits and counter copy out are kept in the same command
> submission unit.
> 
> This reworks the code to drop some useless masks, make the
> counting separate to the emits, and make the space checker
> handle atomic counter space.
> ---
>  src/gallium/drivers/r600/evergreen_compute.c  | 11 ++++--
>  .../drivers/r600/evergreen_hw_context.c       |  2 +-
>  src/gallium/drivers/r600/evergreen_state.c    | 38 +++++++++++----
> ----
>  src/gallium/drivers/r600/r600_hw_context.c    |  7 +++-
>  src/gallium/drivers/r600/r600_pipe.h          | 14 ++++---
>  src/gallium/drivers/r600/r600_state_common.c  | 13 +++++--
>  6 files changed, 54 insertions(+), 31 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c
> b/src/gallium/drivers/r600/evergreen_compute.c
> index 90eae1e2829..a77f58242e3 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context
> *rctx,
>  		rctx->cmd_buf_is_compute = true;
>  	}
>  
> -	r600_need_cs_space(rctx, 0, true);
>  	if (rctx->cs_shader_state.shader->ir_type ==
> PIPE_SHADER_IR_TGSI) {
>  		r600_shader_select(&rctx->b.b, rctx-
> >cs_shader_state.shader->sel, &compute_dirty);
>  		current = rctx->cs_shader_state.shader->sel-
> >current;
> @@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context
> *rctx,
>  		}
>  		rctx->cs_block_grid_sizes[3] = rctx-
> >cs_block_grid_sizes[7] = 0;
>  		rctx-
> >driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
> +
> +		evergreen_emit_atomic_buffer_setup_count(rctx,
> current, combined_atomics, &atomic_used_mask);
> +		r600_need_cs_space(rctx, 0, true,
> util_bitcount(atomic_used_mask));
> +
>  		if (need_buf_const) {
>  			eg_setup_buffer_constants(rctx,
> PIPE_SHADER_COMPUTE);
>  		}
>  		r600_update_driver_const_buffers(rctx, true);
>  
> -		if (evergreen_emit_atomic_buffer_setup(rctx,
> current, combined_atomics, &atomic_used_mask)) {
> +		evergreen_emit_atomic_buffer_setup(rctx, true,
> combined_atomics, atomic_used_mask);
> +		if (atomic_used_mask) {
>  			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0,
> 0));
>  			radeon_emit(cs,
> EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
>  		}
> -	}
> +	} else
> +		r600_need_cs_space(rctx, 0, true, 0);
>  
>  	/* Initialize all the compute-related registers.
>  	 *
> diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c
> b/src/gallium/drivers/r600/evergreen_hw_context.c
> index d3f3e227c1f..5e0e27b0f16 100644
> --- a/src/gallium/drivers/r600/evergreen_hw_context.c
> +++ b/src/gallium/drivers/r600/evergreen_hw_context.c
> @@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct
> r600_context *rctx,
>  
>  		r600_need_cs_space(rctx,
>  				   10 + (rctx->b.flags ?
> R600_MAX_FLUSH_CS_DWORDS : 0) +
> -				   R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE);
> +				   R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE, 0);
>  
>  		/* Flush the caches for the first copy only. */
>  		if (rctx->b.flags) {
> diff --git a/src/gallium/drivers/r600/evergreen_state.c
> b/src/gallium/drivers/r600/evergreen_state.c
> index 57e81e30c27..cc41e114369 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -4030,7 +4030,6 @@ static void
> evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
>  
>  		if (!buffers || !buffers[idx].buffer) {
>  			pipe_resource_reference(&abuf->buffer,
> NULL);
> -			astate->enabled_mask &= ~(1 << i);
>  			continue;
>  		}
>  		buf = &buffers[idx];
> @@ -4038,7 +4037,6 @@ static void
> evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
>  		pipe_resource_reference(&abuf->buffer, buf->buffer);
>  		abuf->buffer_offset = buf->buffer_offset;
>  		abuf->buffer_size = buf->buffer_size;
> -		astate->enabled_mask |= (1 << i);
>  	}
>  }
>  
> @@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct
> r600_context *rctx,
>  	radeon_emit(cs, reloc);
>  }
>  
> -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> -					struct r600_pipe_shader
> *cs_shader,
> -					struct r600_shader_atomic
> *combined_atomics,
> -					uint8_t *atomic_used_mask_p)
> +void evergreen_emit_atomic_buffer_setup_count(struct r600_context
> *rctx,
> +					      struct
> r600_pipe_shader *cs_shader,
> +					      struct
> r600_shader_atomic *combined_atomics,
> +					      uint8_t
> *atomic_used_mask_p)
>  {
> -	struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
> -	unsigned pkt_flags = 0;
>  	uint8_t atomic_used_mask = 0;
>  	int i, j, k;
>  	bool is_compute = cs_shader ? true : false;
>  
> -	if (is_compute)
> -		pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
> -
>  	for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
>  		uint8_t num_atomic_stage;
>  		struct r600_pipe_shader *pshader;
> @@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct
> r600_context *rctx,
>  			}
>  		}
>  	}
> +	*atomic_used_mask_p = atomic_used_mask;
> +}
> +
> +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> +					bool is_compute,
> +					struct r600_shader_atomic
> *combined_atomics,
> +					uint8_t atomic_used_mask)
> +{
> +	struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
> +	unsigned pkt_flags = 0;
> +	uint32_t mask;
> +
> +	if (is_compute)
> +		pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
> +
> +	mask = atomic_used_mask;
> +	if (!mask)
> +		return;
>  
> -	uint32_t mask = atomic_used_mask;
>  	while (mask) {
>  		unsigned atomic_index = u_bit_scan(&mask);
>  		struct r600_shader_atomic *atomic =
> &combined_atomics[atomic_index];
> @@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct
> r600_context *rctx,
>  		else
>  			evergreen_emit_set_append_cnt(rctx, atomic,
> resource, pkt_flags);
>  	}
> -	*atomic_used_mask_p = atomic_used_mask;
> -	return true;
>  }
>  
>  void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
> @@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct
> r600_context *rctx,
>  	struct r600_atomic_buffer_state *astate = &rctx-
> >atomic_buffer_state;
>  	uint32_t pkt_flags = 0;
>  	uint32_t event = EVENT_TYPE_PS_DONE;
> -	uint32_t mask = astate->enabled_mask;
> +	uint32_t mask;
>  	uint64_t dst_offset;
>  	unsigned reloc;
>  
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c
> b/src/gallium/drivers/r600/r600_hw_context.c
> index 1cfc180ad6c..a2f5f637b20 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -31,7 +31,7 @@
>  
>  
>  void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> -			boolean count_draw_in)
> +			boolean count_draw_in, unsigned num_atomics)
>  {
>  	/* Flush the DMA IB if it's not empty. */
>  	if (radeon_emitted(ctx->b.dma.cs, 0))
> @@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx,
> unsigned num_dw,
>  		num_dw += R600_MAX_FLUSH_CS_DWORDS +
> R600_MAX_DRAW_CS_DWORDS;
>  	}
>  
> +	/* add atomic counters, 8 pre + 8 post per counter + 16 post
> if any counters */
> +	num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
> +
>  	/* Count in r600_suspend_queries. */
>  	num_dw += ctx->b.num_cs_dw_queries_suspend;
>  
> @@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context
> *rctx,
>  
>  		r600_need_cs_space(rctx,
>  				   10 + (rctx->b.flags ?
> R600_MAX_FLUSH_CS_DWORDS : 0) +
> -				   3 + R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE);
> +				   3 + R600_MAX_PFP_SYNC_ME_DWORDS,
> FALSE, 0);
>  
>  		/* Flush the caches for the first copy only. */
>  		if (rctx->b.flags) {
> diff --git a/src/gallium/drivers/r600/r600_pipe.h
> b/src/gallium/drivers/r600/r600_pipe.h
> index 6204e3c557b..239005cab7f 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -446,8 +446,6 @@ struct r600_shader_state {
>  };
>  
>  struct r600_atomic_buffer_state {
> -	uint32_t enabled_mask;
> -	uint32_t dirty_mask;
>  	struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
>  };
>  
> @@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context,
> unsigned flags,
>  			    struct pipe_fence_handle **fence);
>  void r600_begin_new_cs(struct r600_context *ctx);
>  void r600_flush_emit(struct r600_context *ctx);
> -void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> boolean count_draw_in);
> +void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> boolean count_draw_in, unsigned num_atomics);
>  void r600_emit_pfp_sync_me(struct r600_context *rctx);
>  void r600_cp_dma_copy_buffer(struct r600_context *rctx,
>  			     struct pipe_resource *dst, uint64_t
> dst_offset,
> @@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct
> pipe_context *ctx,
>  				 struct r600_pipe_shader_selector
> *sel);
>  
>  struct r600_shader_atomic;
> -bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> -					struct r600_pipe_shader
> *cs_shader,
> +void evergreen_emit_atomic_buffer_setup_count(struct r600_context
> *rctx,
> +					      struct
> r600_pipe_shader *cs_shader,
> +					      struct
> r600_shader_atomic *combined_atomics,
> +					      uint8_t
> *atomic_used_mask_p);
> +void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
> +					bool is_compute,
>  					struct r600_shader_atomic
> *combined_atomics,
> -					uint8_t
> *atomic_used_mask_p);
> +					uint8_t atomic_used_mask);
>  void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
>  				       bool is_compute,
>  				       struct r600_shader_atomic
> *combined_atomics,
> diff --git a/src/gallium/drivers/r600/r600_state_common.c
> b/src/gallium/drivers/r600/r600_state_common.c
> index 402d95838f0..e6c1b0be97c 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
>  		: (rctx->tes_shader)? rctx->tes_shader-
> >info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
>  		: info->mode;
>  
> -	if (rctx->b.chip_class >= EVERGREEN)
> -		evergreen_emit_atomic_buffer_setup(rctx, NULL,
> combined_atomics, &atomic_used_mask);
> +	if (rctx->b.chip_class >= EVERGREEN) {
> +		evergreen_emit_atomic_buffer_setup_count(rctx, NULL,
> combined_atomics, &atomic_used_mask);
> +	}
>  
>  	if (index_size) {
>  		index_offset += info->start * index_size;
> @@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
>  		evergreen_setup_tess_constants(rctx, info,
> &num_patches);
>  
>  	/* Emit states. */
> -	r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
> +	r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE,
> util_bitcount(atomic_used_mask));
>  	r600_flush_emit(rctx);
>  
>  	mask = rctx->dirty_atoms;
> @@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context
> *ctx, const struct pipe_draw_info
>  		r600_emit_atom(rctx, rctx-
> >atoms[u_bit_scan64(&mask)]);
>  	}
>  
> +	if (rctx->b.chip_class >= EVERGREEN) {
> +		evergreen_emit_atomic_buffer_setup(rctx, false,
> combined_atomics, atomic_used_mask);
> +	}
> +		
>  	if (rctx->b.chip_class == CAYMAN) {
>  		/* Copied from radeonsi. */
>  		unsigned primgroup_size = 128; /* recommended
> without a GS */
> @@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct
> pipe_context *ctx, boolean enable
>  static void r600_need_gfx_cs_space(struct pipe_context *ctx,
> unsigned num_dw,
>                                     bool include_draw_vbo)
>  {
> -	r600_need_cs_space((struct r600_context*)ctx, num_dw,
> include_draw_vbo);
> +	r600_need_cs_space((struct r600_context*)ctx, num_dw,
> include_draw_vbo, 0);
>  }
>  
>  /* keep this at the end of this file, please */


More information about the mesa-dev mailing list