[Mesa-dev] [PATCH 1/6] radeonsi: don't emit redundant PKT3_NUM_INSTANCES packets

Marek Olšák maraeo at gmail.com
Fri Dec 14 21:23:55 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.h       | 3 +++
 src/gallium/drivers/radeonsi/si_state_draw.c | 9 +++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 1d677d29e88..b3522b60752 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -40,20 +40,21 @@
 
 #define ATI_VENDOR_ID			0x1002
 
 #define SI_NOT_QUERY			0xffffffff
 
 /* The base vertex and primitive restart can be any number, but we must pick
  * one which will mean "unknown" for the purpose of state tracking and
  * the number shouldn't be a commonly-used one. */
 #define SI_BASE_VERTEX_UNKNOWN		INT_MIN
 #define SI_RESTART_INDEX_UNKNOWN	INT_MIN
+#define SI_INSTANCE_COUNT_UNKNOWN	INT_MIN
 #define SI_NUM_SMOOTH_AA_SAMPLES	8
 #define SI_MAX_POINT_SIZE		2048
 #define SI_GS_PER_ES			128
 /* Alignment for optimal CP DMA performance. */
 #define SI_CPDMA_ALIGNMENT		32
 
 /* Tunables for compute-based clear_buffer and copy_buffer: */
 #define SI_COMPUTE_CLEAR_DW_PER_THREAD	4
 #define SI_COMPUTE_COPY_DW_PER_THREAD	4
 #define SI_COMPUTE_DST_CACHE_POLICY	L2_STREAM
@@ -918,20 +919,21 @@ struct si_context {
 	bool			db_stencil_disable_expclear:1;
 	bool			occlusion_queries_disabled:1;
 	bool			generate_mipmap_for_depth:1;
 
 	/* Emitted draw state. */
 	bool			gs_tri_strip_adj_fix:1;
 	bool			ls_vgpr_fix:1;
 	int			last_index_size;
 	int			last_base_vertex;
 	int			last_start_instance;
+	int			last_instance_count;
 	int			last_drawid;
 	int			last_sh_base_reg;
 	int			last_primitive_restart_en;
 	int			last_restart_index;
 	int			last_prim;
 	int			last_multi_vgt_param;
 	int			last_rast_prim;
 	unsigned		last_sc_line_stipple;
 	unsigned		current_vs_state;
 	unsigned		last_vs_state;
@@ -1369,20 +1371,21 @@ si_context_add_resource_size(struct si_context *sctx, struct pipe_resource *r)
 		/* Add memory usage for need_gfx_cs_space */
 		sctx->vram += r600_resource(r)->vram_usage;
 		sctx->gtt += r600_resource(r)->gart_usage;
 	}
 }
 
 static inline void
 si_invalidate_draw_sh_constants(struct si_context *sctx)
 {
 	sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
+	sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;
 }
 
 static inline unsigned
 si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
 {
 	return 1 << (atom - sctx->atoms.array);
 }
 
 static inline void
 si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool dirty)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 612ca910cb9..b707a6585c5 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -802,24 +802,29 @@ static void si_emit_draw_packets(struct si_context *sctx,
 			radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
 					S_2C3_DRAW_INDEX_ENABLE(1) |
 					S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
 			radeon_emit(cs, indirect->draw_count);
 			radeon_emit(cs, count_va);
 			radeon_emit(cs, count_va >> 32);
 			radeon_emit(cs, indirect->stride);
 			radeon_emit(cs, di_src_sel);
 		}
 	} else {
+		unsigned instance_count = info->instance_count;
 		int base_vertex;
 
-		radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
-		radeon_emit(cs, info->instance_count);
+		if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
+		    sctx->last_instance_count != instance_count) {
+			radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
+			radeon_emit(cs, instance_count);
+			sctx->last_instance_count = instance_count;
+		}
 
 		/* Base vertex and start instance. */
 		base_vertex = index_size ? info->index_bias : info->start;
 
 		if (sctx->num_vs_blit_sgprs) {
 			/* Re-emit draw constants after we leave u_blitter. */
 			si_invalidate_draw_sh_constants(sctx);
 
 			/* Blit VS doesn't use BASE_VERTEX, START_INSTANCE, and DRAWID. */
 			radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_VS_BLIT_DATA * 4,
-- 
2.17.1



More information about the mesa-dev mailing list