[Mesa-dev] [PATCH 4/4] r600g: use a bitfield to track dirty atoms

Marek Olšák maraeo at gmail.com
Mon Aug 10 03:47:46 PDT 2015


Please never use "long" in Mesa. It only has 32 bits on 32-bit
systems. uint64_t is generally used for all unsigned 64-bit variables
and "llu" or "ull" is the number suffix. Also, the 64-bit ctz is ctzll
and a proper HAVE macro should be added for it too.

The general idea is nice, thanks.

The number of atoms can be cut down by merging all scissors states
into 1 atom (just as there is 1 atom for 16 textures, there can be 1
atom for 16 scissors) and the same applies to viewport states. This
would simplify the code, because all dirty bits would fit into 64 bits
and there would even be some space left.

Patches 1-3:
Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek

On Sun, Aug 9, 2015 at 11:42 PM, Grazvydas Ignotas <notasas at gmail.com> wrote:
> r600 currently has 73 atoms and looping through their dirty flags has
> become costly because checking each flag requires a pointer
> dereference before the read. To avoid having to do that add additional
> bitfield which can be checked really quickly thanks to tzcnt instruction.
>
> id field was added to struct r600_atom but that doesn't affect memory
> usage for both 32 and 64 bit CPUs because it was stuffed into padding.
>
> The performance improvement is ~2% for benchmarks that can have FPS in
> the thousands but is hardly measurable in "real" programs.
> ---
>  src/gallium/drivers/r600/r600_hw_context.c    | 12 +++----
>  src/gallium/drivers/r600/r600_pipe.h          | 45 +++++++++++++++++++++++++++
>  src/gallium/drivers/r600/r600_state_common.c  |  8 ++---
>  src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
>  4 files changed, 56 insertions(+), 10 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index c048a71..6445151 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -51,13 +51,13 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>                 unsigned i;
>
>                 /* The number of dwords all the dirty states would take. */
> -               for (i = 0; i < R600_NUM_ATOMS; i++) {
> -                       if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
> -                               num_dw += ctx->atoms[i]->num_dw;
> -                               if (ctx->screen->b.trace_bo) {
> -                                       num_dw += R600_TRACE_CS_DWORDS;
> -                               }
> +               i = r600_next_dirty_atom(ctx, 0);
> +               while (i < R600_NUM_ATOMS) {
> +                       num_dw += ctx->atoms[i]->num_dw;
> +                       if (ctx->screen->b.trace_bo) {
> +                               num_dw += R600_TRACE_CS_DWORDS;
>                         }
> +                       i = r600_next_dirty_atom(ctx, i + 1);
>                 }
>
>                 /* The upper-bound of how much space a draw command would take. */
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index 8b61269..5d10bb4 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -85,6 +85,9 @@
>  #define R600_BIG_ENDIAN 0
>  #endif
>
> +#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
> +#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
> +
>  struct r600_context;
>  struct r600_bytecode;
>  struct r600_shader_key;
> @@ -426,6 +429,8 @@ struct r600_context {
>
>         /* State binding slots are here. */
>         struct r600_atom                *atoms[R600_NUM_ATOMS];
> +       /* Dirty atom bitmask for fast tests */
> +       unsigned long                   dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
>         /* States for CS initialization. */
>         struct r600_command_buffer      start_cs_cmd; /* invariant state mostly */
>         /** Compute specific registers initializations.  The start_cs_cmd atom
> @@ -502,7 +507,18 @@ static inline void r600_set_atom_dirty(struct r600_context *rctx,
>                                        struct r600_atom *atom,
>                                        bool dirty)
>  {
> +       unsigned long mask;
> +       unsigned int w;
> +
>         atom->dirty = dirty;
> +
> +       assert(atom->id != 0);
> +       w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
> +       mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
> +       if (dirty)
> +               rctx->dirty_atoms[w] |= mask;
> +       else
> +               rctx->dirty_atoms[w] &= ~mask;
>  }
>
>  static inline void r600_mark_atom_dirty(struct r600_context *rctx,
> @@ -511,6 +527,35 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx,
>         r600_set_atom_dirty(rctx, atom, true);
>  }
>
> +static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
> +                                               unsigned int id)
> +{
> +#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
> +       unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
> +       unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
> +       unsigned long bits, mask = (1ul << bit) - 1;
> +
> +       for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
> +               bits = rctx->dirty_atoms[w] & ~mask;
> +               if (bits == 0)
> +                       continue;
> +               return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
> +       }
> +
> +       return R600_NUM_ATOMS;
> +#else
> +       for (; id < R600_NUM_ATOMS; id++) {
> +               bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
> +                       (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
> +               assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
> +               if (dirty)
> +                       break;
> +       }
> +
> +       return id;
> +#endif
> +}
> +
>  void r600_trace_emit(struct r600_context *rctx);
>
>  static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index a4238a2..8d0942f 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -54,6 +54,7 @@ void r600_add_atom(struct r600_context *rctx,
>         assert(id < R600_NUM_ATOMS);
>         assert(rctx->atoms[id] == NULL);
>         rctx->atoms[id] = atom;
> +       atom->id = id;
>         atom->dirty = false;
>  }
>
> @@ -1476,11 +1477,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>         r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
>         r600_flush_emit(rctx);
>
> -       for (i = 0; i < R600_NUM_ATOMS; i++) {
> -               if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
> -                       continue;
> -               }
> +       i = r600_next_dirty_atom(rctx, 0);
> +       while (i < R600_NUM_ATOMS) {
>                 r600_emit_atom(rctx, rctx->atoms[i]);
> +               i = r600_next_dirty_atom(rctx, i + 1);
>         }
>
>         if (rctx->b.chip_class == CAYMAN) {
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 717e248..17da41e 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -314,6 +314,7 @@ struct r600_common_screen {
>  struct r600_atom {
>         void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
>         unsigned                num_dw;
> +       unsigned short          id;     /* used by r600 only */
>         bool                    dirty;
>  };
>
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list