[Mesa-dev] [PATCH 1/6] radeonsi: don't emit redundant PKT3_NUM_INSTANCES packets
Dieter Nützel
Dieter at nuetzel-hh.de
Thu Dec 20 04:40:53 UTC 2018
For the series:
Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>
(with my 'normal' apps) ;-)
Dieter
Am 14.12.2018 22:23, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeonsi/si_pipe.h | 3 +++
> src/gallium/drivers/radeonsi/si_state_draw.c | 9 +++++++--
> 2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index 1d677d29e88..b3522b60752 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -40,20 +40,21 @@
>
> #define ATI_VENDOR_ID 0x1002
>
> #define SI_NOT_QUERY 0xffffffff
>
> /* The base vertex and primitive restart can be any number, but we
> must pick
> * one which will mean "unknown" for the purpose of state tracking and
> * the number shouldn't be a commonly-used one. */
> #define SI_BASE_VERTEX_UNKNOWN INT_MIN
> #define SI_RESTART_INDEX_UNKNOWN INT_MIN
> +#define SI_INSTANCE_COUNT_UNKNOWN INT_MIN
> #define SI_NUM_SMOOTH_AA_SAMPLES 8
> #define SI_MAX_POINT_SIZE 2048
> #define SI_GS_PER_ES 128
> /* Alignment for optimal CP DMA performance. */
> #define SI_CPDMA_ALIGNMENT 32
>
> /* Tunables for compute-based clear_buffer and copy_buffer: */
> #define SI_COMPUTE_CLEAR_DW_PER_THREAD 4
> #define SI_COMPUTE_COPY_DW_PER_THREAD 4
> #define SI_COMPUTE_DST_CACHE_POLICY L2_STREAM
> @@ -918,20 +919,21 @@ struct si_context {
> bool db_stencil_disable_expclear:1;
> bool occlusion_queries_disabled:1;
> bool generate_mipmap_for_depth:1;
>
> /* Emitted draw state. */
> bool gs_tri_strip_adj_fix:1;
> bool ls_vgpr_fix:1;
> int last_index_size;
> int last_base_vertex;
> int last_start_instance;
> + int last_instance_count;
> int last_drawid;
> int last_sh_base_reg;
> int last_primitive_restart_en;
> int last_restart_index;
> int last_prim;
> int last_multi_vgt_param;
> int last_rast_prim;
> unsigned last_sc_line_stipple;
> unsigned current_vs_state;
> unsigned last_vs_state;
> @@ -1369,20 +1371,21 @@ si_context_add_resource_size(struct si_context
> *sctx, struct pipe_resource *r)
> /* Add memory usage for need_gfx_cs_space */
> sctx->vram += r600_resource(r)->vram_usage;
> sctx->gtt += r600_resource(r)->gart_usage;
> }
> }
>
> static inline void
> si_invalidate_draw_sh_constants(struct si_context *sctx)
> {
> sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
> + sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;
> }
>
> static inline unsigned
> si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
> {
> return 1 << (atom - sctx->atoms.array);
> }
>
> static inline void
> si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool
> dirty)
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
> b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 612ca910cb9..b707a6585c5 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -802,24 +802,29 @@ static void si_emit_draw_packets(struct
> si_context *sctx,
> radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 -
> SI_SH_REG_OFFSET) >> 2) |
> S_2C3_DRAW_INDEX_ENABLE(1) |
> S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
> radeon_emit(cs, indirect->draw_count);
> radeon_emit(cs, count_va);
> radeon_emit(cs, count_va >> 32);
> radeon_emit(cs, indirect->stride);
> radeon_emit(cs, di_src_sel);
> }
> } else {
> + unsigned instance_count = info->instance_count;
> int base_vertex;
>
> - radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
> - radeon_emit(cs, info->instance_count);
> + if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
> + sctx->last_instance_count != instance_count) {
> + radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
> + radeon_emit(cs, instance_count);
> + sctx->last_instance_count = instance_count;
> + }
>
> /* Base vertex and start instance. */
> base_vertex = index_size ? info->index_bias : info->start;
>
> if (sctx->num_vs_blit_sgprs) {
> /* Re-emit draw constants after we leave u_blitter. */
> si_invalidate_draw_sh_constants(sctx);
>
> /* Blit VS doesn't use BASE_VERTEX, START_INSTANCE, and DRAWID. */
> radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_VS_BLIT_DATA * 4,
More information about the mesa-dev
mailing list