[Mesa-dev] [PATCH 09/17] radeonsi: use a bitmask for looping over dirty PM4 states
Marek Olšák
maraeo at gmail.com
Thu Jan 26 16:04:25 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
also move it to draw_vbo, because it should be 0 in most cases
---
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_pm4.c | 16 +---------------
src/gallium/drivers/radeonsi/si_pm4.h | 1 -
src/gallium/drivers/radeonsi/si_state.h | 3 +++
src/gallium/drivers/radeonsi/si_state_draw.c | 17 +++++++++++++++--
5 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b6474e6..da6aca1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -221,20 +221,21 @@ struct si_context {
struct si_shader_ctx_state fixed_func_tcs_shader;
LLVMTargetMachineRef tm; /* only non-threaded compilation */
bool gfx_flush_in_progress;
bool compute_is_busy;
/* Atoms (direct states). */
union si_state_atoms atoms;
unsigned dirty_atoms; /* mask */
/* PM4 states (precomputed immutable states) */
+ unsigned dirty_states;
union si_state queued;
union si_state emitted;
/* Atom declarations. */
struct r600_atom prefetch_L2;
struct si_framebuffer framebuffer;
struct si_sample_locs msaa_sample_locs;
struct r600_atom db_render_state;
struct r600_atom msaa_config;
struct si_sample_mask sample_mask;
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index 97b6799..2680439 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -22,22 +22,20 @@
*
* Authors:
* Christian König <christian.koenig at amd.com>
*/
#include "radeon/r600_cs.h"
#include "util/u_memory.h"
#include "si_pipe.h"
#include "sid.h"
-#define NUMBER_OF_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
-
void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
{
state->last_opcode = opcode;
state->last_pm4 = state->ndw++;
}
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
{
state->pm4[state->ndw++] = dw;
}
@@ -150,36 +148,24 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
RADEON_USAGE_READ,
RADEON_PRIO_IB2);
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
radeon_emit(cs, ib->gpu_address);
radeon_emit(cs, (ib->gpu_address >> 32) & 0xffff);
radeon_emit(cs, (ib->b.b.width0 >> 2) & 0xfffff);
}
}
-void si_pm4_emit_dirty(struct si_context *sctx)
-{
- for (int i = 0; i < NUMBER_OF_STATES; ++i) {
- struct si_pm4_state *state = sctx->queued.array[i];
-
- if (!state || sctx->emitted.array[i] == state)
- continue;
-
- si_pm4_emit(sctx, state);
- sctx->emitted.array[i] = state;
- }
-}
-
void si_pm4_reset_emitted(struct si_context *sctx)
{
memset(&sctx->emitted, 0, sizeof(sctx->emitted));
+ sctx->dirty_states |= u_bit_consecutive(0, SI_NUM_STATES);
}
void si_pm4_upload_indirect_buffer(struct si_context *sctx,
struct si_pm4_state *state)
{
struct pipe_screen *screen = sctx->b.b.screen;
unsigned aligned_ndw = align(state->ndw, 8);
/* only supported on CIK and later */
if (sctx->b.chip_class < CIK)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h
index 9b02a80..106abe1 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -71,14 +71,13 @@ void si_pm4_add_bo(struct si_pm4_state *state,
void si_pm4_upload_indirect_buffer(struct si_context *sctx,
struct si_pm4_state *state);
void si_pm4_clear_state(struct si_pm4_state *state);
void si_pm4_free_state_simple(struct si_pm4_state *state);
void si_pm4_free_state(struct si_context *sctx,
struct si_pm4_state *state,
unsigned idx);
void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state);
-void si_pm4_emit_dirty(struct si_context *sctx);
void si_pm4_reset_emitted(struct si_context *sctx);
#endif
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 915a8eb..bdcfb5b 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -122,20 +122,22 @@ union si_state {
struct si_pm4_state *hs;
struct si_pm4_state *es;
struct si_pm4_state *gs;
struct si_pm4_state *vgt_shader_config;
struct si_pm4_state *vs;
struct si_pm4_state *ps;
} named;
struct si_pm4_state *array[0];
};
+#define SI_NUM_STATES (sizeof(union si_state) / sizeof(struct si_pm4_state *))
+
union si_state_atoms {
struct {
/* The order matters. */
struct r600_atom *prefetch_L2;
struct r600_atom *render_cond;
struct r600_atom *streamout_begin;
struct r600_atom *streamout_enable; /* must be after streamout_begin */
struct r600_atom *framebuffer;
struct r600_atom *msaa_sample_locs;
struct r600_atom *db_render_state;
@@ -260,20 +262,21 @@ struct si_buffer_resources {
#define si_pm4_block_idx(member) \
(offsetof(union si_state, named.member) / sizeof(struct si_pm4_state *))
#define si_pm4_state_changed(sctx, member) \
((sctx)->queued.named.member != (sctx)->emitted.named.member)
#define si_pm4_bind_state(sctx, member, value) \
do { \
(sctx)->queued.named.member = (value); \
+ (sctx)->dirty_states |= 1 << si_pm4_block_idx(member); \
} while(0)
#define si_pm4_delete_state(sctx, member, value) \
do { \
if ((sctx)->queued.named.member == (value)) { \
(sctx)->queued.named.member = NULL; \
} \
si_pm4_free_state(sctx, (struct si_pm4_state *)(value), \
si_pm4_block_idx(member)); \
} while(0)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index c80d4d6..cce5f30 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1109,30 +1109,43 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
* this must be called after si_need_cs_space, because we must let
* need_cs_space flush before we add buffers to the buffer list.
*/
if (!si_upload_vertex_buffer_descriptors(sctx))
return;
/* Flush caches before the first state atom, which does L2 prefetches. */
if (sctx->b.flags)
si_emit_cache_flush(sctx);
- /* Emit states. */
+ /* Emit state atoms. */
mask = sctx->dirty_atoms;
while (mask) {
struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
atom->emit(&sctx->b, atom);
}
sctx->dirty_atoms = 0;
- si_pm4_emit_dirty(sctx);
+ /* Emit states. */
+ mask = sctx->dirty_states;
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+ struct si_pm4_state *state = sctx->queued.array[i];
+
+ if (!state || sctx->emitted.array[i] == state)
+ continue;
+
+ si_pm4_emit(sctx, state);
+ sctx->emitted.array[i] = state;
+ }
+ sctx->dirty_states = 0;
+
si_emit_scratch_reloc(sctx);
si_emit_rasterizer_prim_state(sctx);
si_emit_draw_registers(sctx, info);
si_ce_pre_draw_synchronization(sctx);
si_emit_draw_packets(sctx, info, &ib);
si_ce_post_draw_synchronization(sctx);
--
2.7.4
More information about the mesa-dev
mailing list