[Mesa-dev] [PATCH 4/8] i965: Modify dirty bit handling to support 2 pipelines.
Ian Romanick
idr at freedesktop.org
Wed Aug 27 15:13:45 PDT 2014
On 08/27/2014 02:30 PM, Jordan Justen wrote:
> From: Paul Berry <stereotype441 at gmail.com>
>
> The hardware state for compute shaders is almost entirely orthogonal
> to the hardware state for 3D rendering. To avoid sending unnecessary
> state to the hardware, we'll need to have a separate set of state
> atoms for the compute pipeline and the 3D pipeline. That means we
> need to maintain two separate sets of dirty bits to determine which
> state atoms need to be run.
>
> But the dirty bits are not completely independent; for example, if
> BRW_NEW_SURFACES is flagged while doing 3D rendering, then not only do
> we need to re-run 3D state atoms that depend on BRW_NEW_SURFACES, but
> we also need to re-run compute state atoms that depend on
> BRW_NEW_SURFACES. But we'll also need to re-run those state atoms the
> next time the compute pipeline is run.
>
> To accomplish this, we record two sets of dirty bits, one for each
> pipeline. When bits are dirtied (via SET_DIRTY_BIT() or
> SET_DIRTY_ALL()) we set them to the dirty state in both pipelines.
> When brw_state_upload() is run, we clear the dirty bits just for the
> pipeline that was run.
>
> Note that since the number of pipelines is known at compile time to be
> 2, the compiler should unroll the loops in SET_DIRTY_BIT() and
> SET_DIRTY_ALL().
>
> Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 40 ++++++++++++++++++++++++----
> src/mesa/drivers/dri/i965/brw_draw.c | 8 +++---
> src/mesa/drivers/dri/i965/brw_state.h | 4 +--
> src/mesa/drivers/dri/i965/brw_state_upload.c | 14 ++++++----
> 4 files changed, 50 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index a44b77f..bab0f39 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -243,9 +243,33 @@ struct brw_state_flags {
>
>
> /**
> + * Enum representing the different pipelines.
> + */
> +typedef enum {
> + /**
> + * 3D rendering pipeline (vertex through fragment shader).
> + */
> + BRW_PIPELINE_3D,
> +
> + /**
> + * Compute shader pipeline.
> + */
> + BRW_PIPELINE_COMPUTE,
> +
> + BRW_NUM_PIPELINES
> +} brw_pipeline;
> +
> +
> +/**
> * Set one of the bits in a field of brw_state_flags.
> */
> -#define SET_DIRTY_BIT(FIELD, FLAG) brw->state.dirty.FIELD |= (FLAG)
> +#define SET_DIRTY_BIT(FIELD, FLAG) \
> + do { \
> + int which_pipeline; \
> + for (which_pipeline = 0; which_pipeline < BRW_NUM_PIPELINES; \
We use C99 in the driver, so this can be:
for (int which_pipeline = 0; which_pipeline < BRW_NUM_PIPELINES; \
I might also be tempted to change the variable name to which_pipe... so
that it all fits on one line. *shrug*
> + which_pipeline++) \
> + brw->state.pipeline_dirty[which_pipeline].FIELD |= (FLAG); \
> + } while (false)
>
>
> /**
> @@ -253,16 +277,21 @@ struct brw_state_flags {
> */
> #define SET_DIRTY_ALL(FIELD) \
> do { \
> + int which_pipeline; \
> /* ~0 == 0xffffffff, so make sure field is <= 32 bits */ \
> - STATIC_ASSERT(sizeof(brw->state.dirty.FIELD) == 4); \
> - brw->state.dirty.FIELD = ~0; \
> + STATIC_ASSERT(sizeof(brw->state.pipeline_dirty[0].FIELD) == 4); \
> + for (which_pipeline = 0; which_pipeline < BRW_NUM_PIPELINES; \
> + which_pipeline++) \
> + brw->state.pipeline_dirty[which_pipeline].FIELD = ~0; \
> } while (false)
>
>
> /**
> * Check one of the bits in a field of brw_state_flags.
> */
> -#define CHECK_DIRTY_BIT(FIELD, FLAG) ((brw->state.dirty.FIELD & (FLAG)) != 0)
> +#define CHECK_DIRTY_BIT(FIELD, FLAG) \
> + ((brw->state.pipeline_dirty[brw->state.current_pipeline].FIELD & (FLAG)) \
> + != 0)
>
>
> /** Subclass of Mesa vertex program */
> @@ -1071,7 +1100,8 @@ struct brw_context
>
> GLuint NewGLState;
> struct {
> - struct brw_state_flags dirty;
> + struct brw_state_flags pipeline_dirty[BRW_NUM_PIPELINES];
> + brw_pipeline current_pipeline;
> } state;
>
> struct brw_cache cache;
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 2b773c1..3bdbb43 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -447,9 +447,9 @@ retry:
> * *_set_prim or intel_batchbuffer_flush(), which only impacts
> * brw->state.dirty.brw.
> */
> - if (brw->state.dirty.brw) {
> + if (brw->state.pipeline_dirty[BRW_PIPELINE_3D].brw) {
> brw->no_batch_wrap = true;
> - brw_upload_state(brw);
> + brw_upload_state(brw, BRW_PIPELINE_3D);
> }
>
> brw_emit_prim(brw, &prims[i], brw->primitive);
> @@ -480,8 +480,8 @@ retry:
> /* Now that we know we haven't run out of aperture space, we can safely
> * reset the dirty bits.
> */
> - if (brw->state.dirty.brw)
> - brw_clear_dirty_bits(brw);
> + if (brw->state.pipeline_dirty[BRW_PIPELINE_3D].brw)
> + brw_clear_dirty_bits(brw, BRW_PIPELINE_3D);
> }
>
> if (brw->always_flush_batch)
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index bbaa85c..69657c1 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -160,8 +160,8 @@ brw_depthbuffer_format(struct brw_context *brw);
> /***********************************************************************
> * brw_state.c
> */
> -void brw_upload_state(struct brw_context *brw);
> -void brw_clear_dirty_bits(struct brw_context *brw);
> +void brw_upload_state(struct brw_context *brw, brw_pipeline pipeline);
> +void brw_clear_dirty_bits(struct brw_context *brw, brw_pipeline pipeline);
> void brw_init_state(struct brw_context *brw);
> void brw_destroy_state(struct brw_context *brw);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index 7324274..8e45f29 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -389,7 +389,8 @@ void brw_init_state( struct brw_context *brw )
> /* Make sure that brw->state.dirty.brw has enough bits to hold all possible
> * dirty flags.
> */
> - STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->state.dirty.brw));
> + STATIC_ASSERT(BRW_NUM_STATE_BITS <=
> + 8 * sizeof(brw->state.pipeline_dirty[0].brw));
>
> ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
> ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
> @@ -565,13 +566,16 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map)
> /***********************************************************************
> * Emit all state:
> */
> -void brw_upload_state(struct brw_context *brw)
> +void brw_upload_state(struct brw_context *brw, brw_pipeline pipeline)
> {
> struct gl_context *ctx = &brw->ctx;
> - struct brw_state_flags *state = &brw->state.dirty;
> + struct brw_state_flags *state = &brw->state.pipeline_dirty[pipeline];
> int i;
> static int dirty_count = 0;
>
> + assert(0 <= pipeline && pipeline < BRW_NUM_PIPELINES);
> + brw->state.current_pipeline = pipeline;
> +
> SET_DIRTY_BIT(mesa, brw->NewGLState);
> brw->NewGLState = 0;
>
> @@ -677,8 +681,8 @@ void brw_upload_state(struct brw_context *brw)
> * brw_upload_state() call.
> */
> void
> -brw_clear_dirty_bits(struct brw_context *brw)
> +brw_clear_dirty_bits(struct brw_context *brw, brw_pipeline pipeline)
> {
> - struct brw_state_flags *state = &brw->state.dirty;
> + struct brw_state_flags *state = &brw->state.pipeline_dirty[pipeline];
> memset(state, 0, sizeof(*state));
> }
>
More information about the mesa-dev
mailing list