[Mesa-dev] [PATCH v2 03/13] intel/decoder: tools: Use engine for decoding batch instructions

Tue Nov 6 17:26:03 UTC 2018

On Tue, Nov 06, 2018 at 12:11:16PM +0000, Lionel Landwerlin wrote:
> On 31/10/2018 13:12, Toni Lönnberg wrote:
> > The engine to which the batch was sent to is now set to the decoder context when
> > decoding the batch. This is needed so that we can distinguish between
> > instructions as the render and video pipe share some of the instruction opcodes.
> > 
> > v2: The engine is now in the decoder context and the batch decoder uses a local
> > function for finding the instruction for an engine.
> > ---
> >   src/intel/common/gen_batch_decoder.c     | 25 +++++++++++++++---------
> >   src/intel/common/gen_decoder.c           |  7 +++++--
> >   src/intel/common/gen_decoder.h           |  6 +++++-
> >   src/intel/tools/aubinator.c              |  3 ++-
> >   src/intel/tools/aubinator_error_decode.c | 16 +++++++++++++++
> >   5 files changed, 44 insertions(+), 13 deletions(-)
> > 
> > diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c
> > index 63f04627572..d5482a4d455 100644
> > --- a/src/intel/common/gen_batch_decoder.c
> > +++ b/src/intel/common/gen_batch_decoder.c
> > @@ -45,6 +45,7 @@ gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx,
> >      ctx->fp = fp;
> >      ctx->flags = flags;
> >      ctx->max_vbo_decoded_lines = -1; /* No limit! */
> > +   ctx->engine = I915_ENGINE_CLASS_RENDER;
> >      if (xml_path == NULL)
> >         ctx->spec = gen_spec_load(devinfo);
> > @@ -192,10 +193,16 @@ ctx_print_buffer(struct gen_batch_decode_ctx *ctx,
> >      fprintf(ctx->fp, "\n");
> >   }
> > +static struct gen_group *
> > +gen_ctx_find_instruction(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> > +{
> > +   return gen_spec_find_instruction(ctx->spec, ctx->engine, p);
> > +}
> > +
> >   static void
> >   handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      struct gen_field_iterator iter;
> >      gen_field_iterator_init(&iter, inst, p, 0, false);
> > @@ -309,7 +316,7 @@ static void
> >   handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
> >                                          const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      struct gen_group *desc =
> >         gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
> > @@ -373,7 +380,7 @@ static void
> >   handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx *ctx,
> >                                 const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      struct gen_group *vbs = gen_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
> >      struct gen_batch_decode_bo vb = {};
> > @@ -436,7 +443,7 @@ static void
> >   handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
> >                               const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      struct gen_batch_decode_bo ib = {};
> >      uint32_t ib_size = 0;
> > @@ -486,7 +493,7 @@ handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
> >   static void
> >   decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      uint64_t ksp = 0;
> >      bool is_simd8 = false; /* vertex shaders on Gen8+ only */
> > @@ -528,7 +535,7 @@ decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   static void
> >   decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      uint64_t ksp[3] = {0, 0, 0};
> >      bool enabled[3] = {false, false, false};
> > @@ -576,7 +583,7 @@ decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   static void
> >   decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      struct gen_group *body =
> >         gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
> > @@ -658,7 +665,7 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
> >                                 const char *struct_type, const uint32_t *p,
> >                                 int count)
> >   {
> > -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > +   struct gen_group *inst = gen_ctx_find_instruction(ctx, p);
> >      uint32_t state_offset = 0;
> > @@ -802,7 +809,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
> >      struct gen_group *inst;
> >      for (p = batch; p < end; p += length) {
> > -      inst = gen_spec_find_instruction(ctx->spec, p);
> > +      inst = gen_ctx_find_instruction(ctx, p);
> >         length = gen_group_get_length(inst, p);
> >         assert(inst == NULL || length > 0);
> >         length = MAX2(1, length);
> > diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
> > index 43e5b6e3561..bb1e0d3980f 100644
> > --- a/src/intel/common/gen_decoder.c
> > +++ b/src/intel/common/gen_decoder.c
> > @@ -719,12 +719,15 @@ void gen_spec_destroy(struct gen_spec *spec)
> >   }
> >   struct gen_group *
> > -gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p)
> > +gen_spec_find_instruction(struct gen_spec *spec,
> > +                          enum drm_i915_gem_engine_class engine,
> > +                          const uint32_t *p)
> >   {
> >      hash_table_foreach(spec->commands, entry) {
> >         struct gen_group *command = entry->data;
> >         uint32_t opcode = *p & command->opcode_mask;
> > -      if (opcode == command->opcode)
> > +      if ((command->engine & I915_ENGINE_CLASS_TO_MASK(engine)) &&
> > +           opcode == command->opcode)
> >            return command;
> >      }
> > diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
> > index e9efa6d1451..dfb07b24a9f 100644
> > --- a/src/intel/common/gen_decoder.h
> > +++ b/src/intel/common/gen_decoder.h
> > @@ -55,7 +55,9 @@ struct gen_spec *gen_spec_load_from_path(const struct gen_device_info *devinfo,
> >                                            const char *path);
> >   void gen_spec_destroy(struct gen_spec *spec);
> >   uint32_t gen_spec_get_gen(struct gen_spec *spec);
> > -struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p);
> > +struct gen_group *gen_spec_find_instruction(struct gen_spec *spec,
> > +                                            enum drm_i915_gem_engine_class engine,
> > +                                            const uint32_t *p);
> >   struct gen_group *gen_spec_find_register(struct gen_spec *spec, uint32_t offset);
> >   struct gen_group *gen_spec_find_register_by_name(struct gen_spec *spec, const char *name);
> >   struct gen_enum *gen_spec_find_enum(struct gen_spec *spec, const char *name);
> > @@ -232,6 +234,8 @@ struct gen_batch_decode_ctx {
> >      uint64_t instruction_base;
> >      int max_vbo_decoded_lines;
> > +
> > +   enum drm_i915_gem_engine_class engine;
> >   };
> >   void gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx,
> > diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
> > index ed758a62503..f3bd006a8ab 100644
> > --- a/src/intel/tools/aubinator.c
> > +++ b/src/intel/tools/aubinator.c
> > @@ -157,7 +157,7 @@ handle_execlist_write(void *user_data, enum drm_i915_gem_engine_class engine, ui
> >         batch_ctx.get_bo = aub_mem_get_ggtt_bo;
> >      }
> > -   (void)engine; /* TODO */
> > +   batch_ctx.engine = engine;
> >      gen_print_batch(&batch_ctx, commands, ring_buffer_tail - ring_buffer_head,
> >                      0);
> >      aub_mem_clear_bo_maps(&mem);
> > @@ -170,6 +170,7 @@ handle_ring_write(void *user_data, enum drm_i915_gem_engine_class engine,
> >      batch_ctx.user_data = &mem;
> >      batch_ctx.get_bo = aub_mem_get_ggtt_bo;
> > +   batch_ctx.engine = engine;
> >      gen_print_batch(&batch_ctx, data, data_len, 0);
> >      aub_mem_clear_bo_maps(&mem);
> > diff --git a/src/intel/tools/aubinator_error_decode.c b/src/intel/tools/aubinator_error_decode.c
> > index c581414eb2c..e7f900e4f00 100644
> > --- a/src/intel/tools/aubinator_error_decode.c
> > +++ b/src/intel/tools/aubinator_error_decode.c
> > @@ -111,6 +111,18 @@ static const struct ring_register_mapping fault_registers[] = {
> >      { VECS, 0, "VECS_FAULT_REG" },
> >   };
> > +static enum drm_i915_gem_engine_class class_to_engine(unsigned int class)
> > +{
> > +   static const enum drm_i915_gem_engine_class map[] = {
> > +      [RCS] = I915_ENGINE_CLASS_RENDER,
> > +      [BCS] = I915_ENGINE_CLASS_COPY,
> > +      [VCS] = I915_ENGINE_CLASS_VIDEO,
> > +      [VECS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
> > +   };
> > +
> > +   return map[class];
> > +}
> > +
> 
> 
> Rather than adding a new function, I would modify the existing one.
> 
> They already deal with class/instance tuples so the change should be pretty
> mechanical.

Which function are you talking about?

> Otherwise the rest of the change looks good to me.
> 
> 
> Thanks for doing this.
> 
> 
> -
> 
> Lionel
> 
> 
> >   static int ring_name_to_class(const char *ring_name,
> >                                 unsigned int *class)
> >   {
> > @@ -601,6 +613,9 @@ read_data_file(FILE *file)
> >      for (int s = 0; s < num_sections; s++) {
> > +      unsigned int class;
> > +      ring_name_to_class(sections[s].ring_name, &class);
> > +
> >         printf("--- %s (%s) at 0x%08x %08x\n",
> >                sections[s].buffer_name, sections[s].ring_name,
> >                (unsigned) (sections[s].gtt_offset >> 32),
> > @@ -610,6 +625,7 @@ read_data_file(FILE *file)
> >             strcmp(sections[s].buffer_name, "batch buffer") == 0 ||
> >             strcmp(sections[s].buffer_name, "ring buffer") == 0 ||
> >             strcmp(sections[s].buffer_name, "HW Context") == 0) {
> > +         batch_ctx.engine = class_to_engine(class);
> >            gen_print_batch(&batch_ctx, sections[s].data,
> >                            sections[s].dword_count * 4,
> >                            sections[s].gtt_offset);
> 
>