[Mesa-dev] [PATCH 1/6] radeonsi: don't emit redundant PKT3_NUM_INSTANCES packets
Marek Olšák
maraeo at gmail.com
Fri Dec 14 21:23:55 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_pipe.h | 3 +++
src/gallium/drivers/radeonsi/si_state_draw.c | 9 +++++++--
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 1d677d29e88..b3522b60752 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -40,20 +40,21 @@
#define ATI_VENDOR_ID 0x1002
#define SI_NOT_QUERY 0xffffffff
/* The base vertex and primitive restart can be any number, but we must pick
* one which will mean "unknown" for the purpose of state tracking and
* the number shouldn't be a commonly-used one. */
#define SI_BASE_VERTEX_UNKNOWN INT_MIN
#define SI_RESTART_INDEX_UNKNOWN INT_MIN
+#define SI_INSTANCE_COUNT_UNKNOWN INT_MIN
#define SI_NUM_SMOOTH_AA_SAMPLES 8
#define SI_MAX_POINT_SIZE 2048
#define SI_GS_PER_ES 128
/* Alignment for optimal CP DMA performance. */
#define SI_CPDMA_ALIGNMENT 32
/* Tunables for compute-based clear_buffer and copy_buffer: */
#define SI_COMPUTE_CLEAR_DW_PER_THREAD 4
#define SI_COMPUTE_COPY_DW_PER_THREAD 4
#define SI_COMPUTE_DST_CACHE_POLICY L2_STREAM
@@ -918,20 +919,21 @@ struct si_context {
bool db_stencil_disable_expclear:1;
bool occlusion_queries_disabled:1;
bool generate_mipmap_for_depth:1;
/* Emitted draw state. */
bool gs_tri_strip_adj_fix:1;
bool ls_vgpr_fix:1;
int last_index_size;
int last_base_vertex;
int last_start_instance;
+ int last_instance_count;
int last_drawid;
int last_sh_base_reg;
int last_primitive_restart_en;
int last_restart_index;
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
unsigned last_sc_line_stipple;
unsigned current_vs_state;
unsigned last_vs_state;
@@ -1369,20 +1371,21 @@ si_context_add_resource_size(struct si_context *sctx, struct pipe_resource *r)
/* Add memory usage for need_gfx_cs_space */
sctx->vram += r600_resource(r)->vram_usage;
sctx->gtt += r600_resource(r)->gart_usage;
}
}
static inline void
si_invalidate_draw_sh_constants(struct si_context *sctx)
{
sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;
+ sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;
}
static inline unsigned
si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)
{
return 1 << (atom - sctx->atoms.array);
}
static inline void
si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool dirty)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 612ca910cb9..b707a6585c5 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -802,24 +802,29 @@ static void si_emit_draw_packets(struct si_context *sctx,
radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) |
S_2C3_DRAW_INDEX_ENABLE(1) |
S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count));
radeon_emit(cs, indirect->draw_count);
radeon_emit(cs, count_va);
radeon_emit(cs, count_va >> 32);
radeon_emit(cs, indirect->stride);
radeon_emit(cs, di_src_sel);
}
} else {
+ unsigned instance_count = info->instance_count;
int base_vertex;
- radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
- radeon_emit(cs, info->instance_count);
+ if (sctx->last_instance_count == SI_INSTANCE_COUNT_UNKNOWN ||
+ sctx->last_instance_count != instance_count) {
+ radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0));
+ radeon_emit(cs, instance_count);
+ sctx->last_instance_count = instance_count;
+ }
/* Base vertex and start instance. */
base_vertex = index_size ? info->index_bias : info->start;
if (sctx->num_vs_blit_sgprs) {
/* Re-emit draw constants after we leave u_blitter. */
si_invalidate_draw_sh_constants(sctx);
/* Blit VS doesn't use BASE_VERTEX, START_INSTANCE, and DRAWID. */
radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_VS_BLIT_DATA * 4,
--
2.17.1
More information about the mesa-dev
mailing list