[Mesa-dev] [PATCH 1/6] radeonsi: don't emit redundant PKT3_NUM_INSTANCES packets

Dieter Nützel Dieter at nuetzel-hh.de
Thu Dec 20 04:40:53 UTC 2018


For the series:

Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>

(with my 'normal' apps) ;-)

Dieter

Am 14.12.2018 22:23, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>  src/gallium/drivers/radeonsi/si_pipe.h       | 3 +++
>  src/gallium/drivers/radeonsi/si_state_draw.c | 9 +++++++--
>  2 files changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index 1d677d29e88..b3522b60752 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -40,20 +40,21 @@
> 
>  #define ATI_VENDOR_ID			0x1002
> 
>  #define SI_NOT_QUERY			0xffffffff
> 
>  /* The base vertex and primitive restart can be any number, but we 
> must pick
>   * one which will mean "unknown" for the purpose of state tracking and
>   * the number shouldn't be a commonly-used one. */
>  #define SI_BASE_VERTEX_UNKNOWN		INT_MIN
>  #define SI_RESTART_INDEX_UNKNOWN	INT_MIN
> +#define SI_INSTANCE_COUNT_UNKNOWN	INT_MIN
>  #define SI_NUM_SMOOTH_AA_SAMPLES	8
>  #define SI_MAX_POINT_SIZE		2048
>  #define SI_GS_PER_ES			128
>  /* Alignment for optimal CP DMA performance. */
>  #define SI_CPDMA_ALIGNMENT		32
> 
>  /* Tunables for compute-based clear_buffer and copy_buffer: */
>  #define SI_COMPUTE_CLEAR_DW_PER_THREAD	4
>  #define SI_COMPUTE_COPY_DW_PER_THREAD	4
>  #define SI_COMPUTE_DST_CACHE_POLICY	L2_STREAM
> @@ -918,20 +919,21 @@ struct si_context {
>  	bool			db_stencil_disable_expclear:1;
>  	bool			occlusion_queries_disabled:1;
>  	bool			generate_mipmap_for_depth:1;
> 
>  	/* Emitted draw state. */
>  	bool			gs_tri_strip_adj_fix:1;
>  	bool			ls_vgpr_fix:1;
>  	int			last_index_size;
>  	int			last_base_vertex;
>  	int			last_start_instance;
> +	int			last_instance_count;
>  	int			last_drawid;
>  	int			last_sh_base_reg;
>  	int			last_primitive_restart_en;
>  	int			last_restart_index;
>  	int			last_prim;
>  	int			last_multi_vgt_param;
>  	int			last_rast_prim;
>  	unsigned		last_sc_line_stipple;
>  	unsigned		current_vs_state;
>  	unsigned		last_vs_state;
> @@ -1369,20 +1371,21 @@ si_context_add_resource_size(struct si_context
> *sctx, struct pipe_resource *r)
>  		/* Add memory usage for need_gfx_cs_space */
>  		sctx->vram += r600_resource(r)->vram_usage;
>  		sctx->gtt += r600_resource(r)->gart_usage;
>  	}
>  }
> 
>  static inline void
>  si_invalidate_draw_sh_constants(struct si_context *sctx)
>  {
>  	sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
> +	sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;
>  }
> 
>  static inline unsigned
>  si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
>  {
>  	return 1 << (atom - sctx->atoms.array);
>  }
> 
>  static inline void
>  si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool 
> dirty)
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 612ca910cb9..b707a6585c5 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -802,24 +802,29 @@ static void si_emit_draw_packets(struct 
> si_context *sctx,
>  			radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 -
> SI_SH_REG_OFFSET) >> 2) |
>  					S_2C3_DRAW_INDEX_ENABLE(1) |
>  					S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
>  			radeon_emit(cs, indirect->draw_count);
>  			radeon_emit(cs, count_va);
>  			radeon_emit(cs, count_va >> 32);
>  			radeon_emit(cs, indirect->stride);
>  			radeon_emit(cs, di_src_sel);
>  		}
>  	} else {
> +		unsigned instance_count = info->instance_count;
>  		int base_vertex;
> 
> -		radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
> -		radeon_emit(cs, info->instance_count);
> +		if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
> +		    sctx->last_instance_count != instance_count) {
> +			radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
> +			radeon_emit(cs, instance_count);
> +			sctx->last_instance_count = instance_count;
> +		}
> 
>  		/* Base vertex and start instance. */
>  		base_vertex = index_size ? info->index_bias : info->start;
> 
>  		if (sctx->num_vs_blit_sgprs) {
>  			/* Re-emit draw constants after we leave u_blitter. */
>  			si_invalidate_draw_sh_constants(sctx);
> 
>  			/* Blit VS doesn't use BASE_VERTEX, START_INSTANCE, and DRAWID. */
>  			radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_VS_BLIT_DATA * 4,


More information about the mesa-dev mailing list