[Mesa-dev] [PATCH 4/4] r600g: use a bitfield to track dirty atoms

Grazvydas Ignotas notasas at gmail.com
Mon Aug 10 17:20:00 PDT 2015


On Mon, Aug 10, 2015 at 1:47 PM, Marek Olšák <maraeo at gmail.com> wrote:
> Please never use "long" in Mesa. It only has 32 bits on 32-bit
> systems. uint64_t is generally used for all unsigned 64-bit variables
> and "llu" or "ull" is the number suffix. Also, the 64-bit ctz is ctzll
> and a proper HAVE macro should be added for it too.

Well I intentionally chose long to have a machine-word sized type, for
uint64_t gcc would have to emit multiple instructions for bit setting
and ctzll on 32bit CPUs (actually a library call for ctzll from what I
see), so the idea was to use longer array there instead.

> The general idea is nice, thanks.
>
> The number of atoms can be cut down by merging all scissors states
> into 1 atom (just as there is 1 atom for 16 textures, there can be 1
> atom for 16 scissors) and the same applies to viewport states. This
> would simplify the code, because all dirty bits would fit into 64 bits
> and there would even be some space left.

This sounds good, I'll try to work on it when I can find time.

Also, do you know why the atoms start from 4 for r600, and 0-3 seem to
be unused?
(also CC'ing Jerome, seems to be his code)

Gražvydas


>
> Patches 1-3:
> Reviewed-by: Marek Olšák <marek.olsak at amd.com>
>
> Marek
>
> On Sun, Aug 9, 2015 at 11:42 PM, Grazvydas Ignotas <notasas at gmail.com> wrote:
>> r600 currently has 73 atoms and looping through their dirty flags has
>> become costly because checking each flag requires a pointer
>> dereference before the read. To avoid having to do that add additional
>> bitfield which can be checked really quickly thanks to tzcnt instruction.
>>
>> id field was added to struct r600_atom but that doesn't affect memory
>> usage for both 32 and 64 bit CPUs because it was stuffed into padding.
>>
>> The performance improvement is ~2% for benchmarks that can have FPS in
>> the thousands but is hardly measurable in "real" programs.
>> ---
>>  src/gallium/drivers/r600/r600_hw_context.c    | 12 +++----
>>  src/gallium/drivers/r600/r600_pipe.h          | 45 +++++++++++++++++++++++++++
>>  src/gallium/drivers/r600/r600_state_common.c  |  8 ++---
>>  src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
>>  4 files changed, 56 insertions(+), 10 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
>> index c048a71..6445151 100644
>> --- a/src/gallium/drivers/r600/r600_hw_context.c
>> +++ b/src/gallium/drivers/r600/r600_hw_context.c
>> @@ -51,13 +51,13 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>>                 unsigned i;
>>
>>                 /* The number of dwords all the dirty states would take. */
>> -               for (i = 0; i < R600_NUM_ATOMS; i++) {
>> -                       if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
>> -                               num_dw += ctx->atoms[i]->num_dw;
>> -                               if (ctx->screen->b.trace_bo) {
>> -                                       num_dw += R600_TRACE_CS_DWORDS;
>> -                               }
>> +               i = r600_next_dirty_atom(ctx, 0);
>> +               while (i < R600_NUM_ATOMS) {
>> +                       num_dw += ctx->atoms[i]->num_dw;
>> +                       if (ctx->screen->b.trace_bo) {
>> +                               num_dw += R600_TRACE_CS_DWORDS;
>>                         }
>> +                       i = r600_next_dirty_atom(ctx, i + 1);
>>                 }
>>
>>                 /* The upper-bound of how much space a draw command would take. */
>> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
>> index 8b61269..5d10bb4 100644
>> --- a/src/gallium/drivers/r600/r600_pipe.h
>> +++ b/src/gallium/drivers/r600/r600_pipe.h
>> @@ -85,6 +85,9 @@
>>  #define R600_BIG_ENDIAN 0
>>  #endif
>>
>> +#define R600_DIRTY_ATOM_WORD_BITS (sizeof(unsigned long) * 8)
>> +#define R600_DIRTY_ATOM_ARRAY_LEN DIV_ROUND_UP(R600_NUM_ATOMS, R600_DIRTY_ATOM_WORD_BITS)
>> +
>>  struct r600_context;
>>  struct r600_bytecode;
>>  struct r600_shader_key;
>> @@ -426,6 +429,8 @@ struct r600_context {
>>
>>         /* State binding slots are here. */
>>         struct r600_atom                *atoms[R600_NUM_ATOMS];
>> +       /* Dirty atom bitmask for fast tests */
>> +       unsigned long                   dirty_atoms[R600_DIRTY_ATOM_ARRAY_LEN];
>>         /* States for CS initialization. */
>>         struct r600_command_buffer      start_cs_cmd; /* invariant state mostly */
>>         /** Compute specific registers initializations.  The start_cs_cmd atom
>> @@ -502,7 +507,18 @@ static inline void r600_set_atom_dirty(struct r600_context *rctx,
>>                                        struct r600_atom *atom,
>>                                        bool dirty)
>>  {
>> +       unsigned long mask;
>> +       unsigned int w;
>> +
>>         atom->dirty = dirty;
>> +
>> +       assert(atom->id != 0);
>> +       w = atom->id / R600_DIRTY_ATOM_WORD_BITS;
>> +       mask = 1ul << (atom->id % R600_DIRTY_ATOM_WORD_BITS);
>> +       if (dirty)
>> +               rctx->dirty_atoms[w] |= mask;
>> +       else
>> +               rctx->dirty_atoms[w] &= ~mask;
>>  }
>>
>>  static inline void r600_mark_atom_dirty(struct r600_context *rctx,
>> @@ -511,6 +527,35 @@ static inline void r600_mark_atom_dirty(struct r600_context *rctx,
>>         r600_set_atom_dirty(rctx, atom, true);
>>  }
>>
>> +static inline unsigned int r600_next_dirty_atom(struct r600_context *rctx,
>> +                                               unsigned int id)
>> +{
>> +#if !defined(DEBUG) && defined(HAVE___BUILTIN_CTZ)
>> +       unsigned int w = id / R600_DIRTY_ATOM_WORD_BITS;
>> +       unsigned int bit = id % R600_DIRTY_ATOM_WORD_BITS;
>> +       unsigned long bits, mask = (1ul << bit) - 1;
>> +
>> +       for (; w < R600_DIRTY_ATOM_ARRAY_LEN; w++, mask = 0ul) {
>> +               bits = rctx->dirty_atoms[w] & ~mask;
>> +               if (bits == 0)
>> +                       continue;
>> +               return w * R600_DIRTY_ATOM_WORD_BITS + __builtin_ctzl(bits);
>> +       }
>> +
>> +       return R600_NUM_ATOMS;
>> +#else
>> +       for (; id < R600_NUM_ATOMS; id++) {
>> +               bool dirty = !!(rctx->dirty_atoms[id / R600_DIRTY_ATOM_WORD_BITS] &
>> +                       (1ul << (id % R600_DIRTY_ATOM_WORD_BITS)));
>> +               assert(dirty == (rctx->atoms[id] && rctx->atoms[id]->dirty));
>> +               if (dirty)
>> +                       break;
>> +       }
>> +
>> +       return id;
>> +#endif
>> +}
>> +
>>  void r600_trace_emit(struct r600_context *rctx);
>>
>>  static inline void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
>> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
>> index a4238a2..8d0942f 100644
>> --- a/src/gallium/drivers/r600/r600_state_common.c
>> +++ b/src/gallium/drivers/r600/r600_state_common.c
>> @@ -54,6 +54,7 @@ void r600_add_atom(struct r600_context *rctx,
>>         assert(id < R600_NUM_ATOMS);
>>         assert(rctx->atoms[id] == NULL);
>>         rctx->atoms[id] = atom;
>> +       atom->id = id;
>>         atom->dirty = false;
>>  }
>>
>> @@ -1476,11 +1477,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>>         r600_need_cs_space(rctx, ib.user_buffer ? 5 : 0, TRUE);
>>         r600_flush_emit(rctx);
>>
>> -       for (i = 0; i < R600_NUM_ATOMS; i++) {
>> -               if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) {
>> -                       continue;
>> -               }
>> +       i = r600_next_dirty_atom(rctx, 0);
>> +       while (i < R600_NUM_ATOMS) {
>>                 r600_emit_atom(rctx, rctx->atoms[i]);
>> +               i = r600_next_dirty_atom(rctx, i + 1);
>>         }
>>
>>         if (rctx->b.chip_class == CAYMAN) {
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
>> index 717e248..17da41e 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
>> @@ -314,6 +314,7 @@ struct r600_common_screen {
>>  struct r600_atom {
>>         void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
>>         unsigned                num_dw;
>> +       unsigned short          id;     /* used by r600 only */
>>         bool                    dirty;
>>  };
>>
>> --
>> 1.9.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list