[Mesa-dev] [PATCH 3/4] intel/decoder: tools: Use engine for decoding batch instructions

Tue Oct 30 16:21:11 UTC 2018

The engine doesn't change inside a batch but it can change between batches, 
right? The options are either to pass the engine around or have it in the 
decoder context but it would need to be updated in the there before decoding a 
new batch.

On Tue, Oct 30, 2018 at 02:58:32PM +0000, Lionel Landwerlin wrote:
> Since a batch will be given to a specific engine, the engine won't change
> change while decoding.
> So I would just store the engine into gen_batch_decode_ctx and set it in the
> init function, that'll reduce the diff a bit.
> 
> Then just introduce a local find_instruction() function that calls
> gen_spec_find_instruction(ctx->spec, ctx->engine, p).
> 
> Thanks,
> 
> -
> Lionel
> 
> On 30/10/2018 14:32, Toni Lönnberg wrote:
> > The engine to which the batch was sent to is now passed along when decoding the
> > batch. This is needed so that we can distinguish between instructions as the
> > render and video pipe share some of the instruction opcodes.
> > ---
> >   src/intel/common/gen_batch_decoder.c          | 21 ++++++++++---------
> >   src/intel/common/gen_decoder.c                |  4 ++--
> >   src/intel/common/gen_decoder.h                |  3 ++-
> >   src/intel/tools/aubinator.c                   |  5 ++---
> >   src/intel/tools/aubinator_error_decode.c      | 19 ++++++++++++++++-
> >   src/mesa/drivers/dri/i965/intel_batchbuffer.c |  3 ++-
> >   6 files changed, 37 insertions(+), 18 deletions(-)
> > 
> > diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c
> > index 63f04627572..8bd256d10e9 100644
> > --- a/src/intel/common/gen_batch_decoder.c
> > +++ b/src/intel/common/gen_batch_decoder.c
> > @@ -195,7 +195,7 @@ ctx_print_buffer(struct gen_batch_decode_ctx *ctx,
> >   static void
> >   handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      struct gen_field_iterator iter;
> >      gen_field_iterator_init(&iter, inst, p, 0, false);
> > @@ -309,7 +309,7 @@ static void
> >   handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
> >                                          const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      struct gen_group *desc =
> >         gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
> > @@ -373,7 +373,7 @@ static void
> >   handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx *ctx,
> >                                 const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      struct gen_group *vbs = gen_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
> >      struct gen_batch_decode_bo vb = {};
> > @@ -436,7 +436,7 @@ static void
> >   handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
> >                               const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      struct gen_batch_decode_bo ib = {};
> >      uint32_t ib_size = 0;
> > @@ -486,7 +486,7 @@ handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
> >   static void
> >   decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      uint64_t ksp = 0;
> >      bool is_simd8 = false; /* vertex shaders on Gen8+ only */
> > @@ -528,7 +528,7 @@ decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   static void
> >   decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      uint64_t ksp[3] = {0, 0, 0};
> >      bool enabled[3] = {false, false, false};
> > @@ -576,7 +576,7 @@ decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   static void
> >   decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      struct gen_group *body =
> >         gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
> > @@ -658,7 +658,7 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
> >                                 const char *struct_type, const uint32_t *p,
> >                                 int count)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
> >      uint32_t state_offset = 0;
> > @@ -794,6 +794,7 @@ struct custom_decoder {
> >   void
> >   gen_print_batch(struct gen_batch_decode_ctx *ctx,
> > +                enum gen_engine engine,
> >                   const uint32_t *batch, uint32_t batch_size,
> >                   uint64_t batch_addr)
> >   {
> > @@ -802,7 +803,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
> >      struct gen_group *inst;
> >      for (p = batch; p < end; p += length) {
> > -      inst = gen_spec_find_instruction(ctx->spec, p);
> > +      inst = gen_spec_find_instruction(ctx->spec, engine, p);
> >         length = gen_group_get_length(inst, p);
> >         assert(inst == NULL || length > 0);
> >         length = MAX2(1, length);
> > @@ -871,7 +872,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
> >               fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
> >                       next_batch.addr);
> >            } else {
> > -            gen_print_batch(ctx, next_batch.map, next_batch.size,
> > +            gen_print_batch(ctx, engine, next_batch.map, next_batch.size,
> >                               next_batch.addr);
> >            }
> >            if (second_level) {
> > diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
> > index f4f8af3c5ae..b7d40558937 100644
> > --- a/src/intel/common/gen_decoder.c
> > +++ b/src/intel/common/gen_decoder.c
> > @@ -717,12 +717,12 @@ void gen_spec_destroy(struct gen_spec *spec)
> >   }
> >   struct gen_group *
> > -gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p)
> > +gen_spec_find_instruction(struct gen_spec *spec, enum gen_engine engine, const uint32_t *p)
> >   {
> >      hash_table_foreach(spec->commands, entry) {
> >         struct gen_group *command = entry->data;
> >         uint32_t opcode = *p & command->opcode_mask;
> > -      if (opcode == command->opcode)
> > +      if ((command->engine & engine) && opcode == command->opcode)
> >            return command;
> >      }
> > diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
> > index b179ae8f380..4e57ecc5ac9 100644
> > --- a/src/intel/common/gen_decoder.h
> > +++ b/src/intel/common/gen_decoder.h
> > @@ -57,7 +57,7 @@ struct gen_spec *gen_spec_load_from_path(const struct gen_device_info *devinfo,
> >                                            const char *path);
> >   void gen_spec_destroy(struct gen_spec *spec);
> >   uint32_t gen_spec_get_gen(struct gen_spec *spec);
> > -struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p);
> > +struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, enum gen_engine engine, const uint32_t *p);
> >   struct gen_group *gen_spec_find_register(struct gen_spec *spec, uint32_t offset);
> >   struct gen_group *gen_spec_find_register_by_name(struct gen_spec *spec, const char *name);
> >   struct gen_enum *gen_spec_find_enum(struct gen_spec *spec, const char *name);
> > @@ -249,6 +249,7 @@ void gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx *ctx);
> >   void gen_print_batch(struct gen_batch_decode_ctx *ctx,
> > +                     enum gen_engine engine,
> >                        const uint32_t *batch, uint32_t batch_size,
> >                        uint64_t batch_addr);
> > diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
> > index 1458875a313..9f3600e42ef 100644
> > --- a/src/intel/tools/aubinator.c
> > +++ b/src/intel/tools/aubinator.c
> > @@ -158,8 +158,7 @@ handle_execlist_write(void *user_data, enum gen_engine engine, uint64_t context_
> >         batch_ctx.get_bo = aub_mem_get_ggtt_bo;
> >      }
> > -   (void)engine; /* TODO */
> > -   gen_print_batch(&batch_ctx, commands, ring_buffer_tail - ring_buffer_head,
> > +   gen_print_batch(&batch_ctx, engine, commands, ring_buffer_tail - ring_buffer_head,
> >                      0);
> >      aub_mem_clear_bo_maps(&mem);
> >   }
> > @@ -171,7 +170,7 @@ handle_ring_write(void *user_data, enum gen_engine engine,
> >      batch_ctx.user_data = &mem;
> >      batch_ctx.get_bo = aub_mem_get_ggtt_bo;
> > -   gen_print_batch(&batch_ctx, data, data_len, 0);
> > +   gen_print_batch(&batch_ctx, engine, data, data_len, 0);
> >      aub_mem_clear_bo_maps(&mem);
> >   }
> > diff --git a/src/intel/tools/aubinator_error_decode.c b/src/intel/tools/aubinator_error_decode.c
> > index c581414eb2c..c69dfb84f13 100644
> > --- a/src/intel/tools/aubinator_error_decode.c
> > +++ b/src/intel/tools/aubinator_error_decode.c
> > @@ -111,6 +111,17 @@ static const struct ring_register_mapping fault_registers[] = {
> >      { VECS, 0, "VECS_FAULT_REG" },
> >   };
> > +static int class_to_engine(unsigned int class)
> > +{
> > +   static const int map[] = {
> > +      [RCS] = GEN_ENGINE_RENDER,
> > +      [BCS] = GEN_ENGINE_BLITTER,
> > +      [VCS] = GEN_ENGINE_VIDEO,
> > +   };
> > +
> > +   return map[class];
> > +}
> > +
> >   static int ring_name_to_class(const char *ring_name,
> >                                 unsigned int *class)
> >   {
> > @@ -601,6 +612,11 @@ read_data_file(FILE *file)
> >      for (int s = 0; s < num_sections; s++) {
> > +      unsigned int class;
> > +      enum gen_engine engine;
> > +      ring_name_to_class(sections[s].ring_name, &class);
> > +      engine = class_to_engine(class);
> > +
> >         printf("--- %s (%s) at 0x%08x %08x\n",
> >                sections[s].buffer_name, sections[s].ring_name,
> >                (unsigned) (sections[s].gtt_offset >> 32),
> > @@ -610,7 +626,8 @@ read_data_file(FILE *file)
> >             strcmp(sections[s].buffer_name, "batch buffer") == 0 ||
> >             strcmp(sections[s].buffer_name, "ring buffer") == 0 ||
> >             strcmp(sections[s].buffer_name, "HW Context") == 0) {
> > -         gen_print_batch(&batch_ctx, sections[s].data,
> > +         gen_print_batch(&batch_ctx, engine,
> > +                         sections[s].data,
> >                            sections[s].dword_count * 4,
> >                            sections[s].gtt_offset);
> >         }
> > diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> > index 4363b146150..a0d2f8a98f5 100644
> > --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> > +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> > @@ -813,7 +813,8 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
> >      }
> >      if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
> > -      gen_print_batch(&batch->decoder, batch->batch.map,
> > +      gen_print_batch(&batch->decoder, GEN_ENGINE_RENDER,
> > +                      batch->batch.map,
> >                         4 * USED_BATCH(*batch),
> >                         batch->batch.bo->gtt_offset);
> >      }
> 
>