[Mesa-dev] [PATCH 3/4] intel/decoder: tools: Use engine for decoding batch instructions

Tue Oct 30 17:11:05 UTC 2018

Yeah, true.
Maybe put a default value on init (likely render) and let the caller of 
gen_print_batch() set the field on the line above.
I could just like to leave the decode somewhat agnostic about what type 
of engine it's dealing with.

-
Lionel

On 30/10/2018 16:21, Toni Lönnberg wrote:
> The engine doesn't change inside a batch but it can change between batches,
> right? The options are either to pass the engine around or have it in the
> decoder context but it would need to be updated in the there before decoding a
> new batch.
>
> On Tue, Oct 30, 2018 at 02:58:32PM +0000, Lionel Landwerlin wrote:
>> Since a batch will be given to a specific engine, the engine won't change
>> change while decoding.
>> So I would just store the engine into gen_batch_decode_ctx and set it in the
>> init function, that'll reduce the diff a bit.
>>
>> Then just introduce a local find_instruction() function that calls
>> gen_spec_find_instruction(ctx->spec, ctx->engine, p).
>>
>> Thanks,
>>
>> -
>> Lionel
>>
>> On 30/10/2018 14:32, Toni Lönnberg wrote:
>>> The engine to which the batch was sent to is now passed along when decoding the
>>> batch. This is needed so that we can distinguish between instructions as the
>>> render and video pipe share some of the instruction opcodes.
>>> ---
>>>    src/intel/common/gen_batch_decoder.c          | 21 ++++++++++---------
>>>    src/intel/common/gen_decoder.c                |  4 ++--
>>>    src/intel/common/gen_decoder.h                |  3 ++-
>>>    src/intel/tools/aubinator.c                   |  5 ++---
>>>    src/intel/tools/aubinator_error_decode.c      | 19 ++++++++++++++++-
>>>    src/mesa/drivers/dri/i965/intel_batchbuffer.c |  3 ++-
>>>    6 files changed, 37 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c
>>> index 63f04627572..8bd256d10e9 100644
>>> --- a/src/intel/common/gen_batch_decoder.c
>>> +++ b/src/intel/common/gen_batch_decoder.c
>>> @@ -195,7 +195,7 @@ ctx_print_buffer(struct gen_batch_decode_ctx *ctx,
>>>    static void
>>>    handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       struct gen_field_iterator iter;
>>>       gen_field_iterator_init(&iter, inst, p, 0, false);
>>> @@ -309,7 +309,7 @@ static void
>>>    handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
>>>                                           const uint32_t *p)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       struct gen_group *desc =
>>>          gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
>>> @@ -373,7 +373,7 @@ static void
>>>    handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx *ctx,
>>>                                  const uint32_t *p)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       struct gen_group *vbs = gen_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
>>>       struct gen_batch_decode_bo vb = {};
>>> @@ -436,7 +436,7 @@ static void
>>>    handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
>>>                                const uint32_t *p)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       struct gen_batch_decode_bo ib = {};
>>>       uint32_t ib_size = 0;
>>> @@ -486,7 +486,7 @@ handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
>>>    static void
>>>    decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       uint64_t ksp = 0;
>>>       bool is_simd8 = false; /* vertex shaders on Gen8+ only */
>>> @@ -528,7 +528,7 @@ decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
>>>    static void
>>>    decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       uint64_t ksp[3] = {0, 0, 0};
>>>       bool enabled[3] = {false, false, false};
>>> @@ -576,7 +576,7 @@ decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
>>>    static void
>>>    decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       struct gen_group *body =
>>>          gen_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
>>> @@ -658,7 +658,7 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
>>>                                  const char *struct_type, const uint32_t *p,
>>>                                  int count)
>>>    {
>>> -   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
>>> +   struct gen_group *inst = gen_spec_find_instruction(ctx->spec, GEN_ENGINE_RENDER, p);
>>>       uint32_t state_offset = 0;
>>> @@ -794,6 +794,7 @@ struct custom_decoder {
>>>    void
>>>    gen_print_batch(struct gen_batch_decode_ctx *ctx,
>>> +                enum gen_engine engine,
>>>                    const uint32_t *batch, uint32_t batch_size,
>>>                    uint64_t batch_addr)
>>>    {
>>> @@ -802,7 +803,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
>>>       struct gen_group *inst;
>>>       for (p = batch; p < end; p += length) {
>>> -      inst = gen_spec_find_instruction(ctx->spec, p);
>>> +      inst = gen_spec_find_instruction(ctx->spec, engine, p);
>>>          length = gen_group_get_length(inst, p);
>>>          assert(inst == NULL || length > 0);
>>>          length = MAX2(1, length);
>>> @@ -871,7 +872,7 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
>>>                fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
>>>                        next_batch.addr);
>>>             } else {
>>> -            gen_print_batch(ctx, next_batch.map, next_batch.size,
>>> +            gen_print_batch(ctx, engine, next_batch.map, next_batch.size,
>>>                                next_batch.addr);
>>>             }
>>>             if (second_level) {
>>> diff --git a/src/intel/common/gen_decoder.c b/src/intel/common/gen_decoder.c
>>> index f4f8af3c5ae..b7d40558937 100644
>>> --- a/src/intel/common/gen_decoder.c
>>> +++ b/src/intel/common/gen_decoder.c
>>> @@ -717,12 +717,12 @@ void gen_spec_destroy(struct gen_spec *spec)
>>>    }
>>>    struct gen_group *
>>> -gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p)
>>> +gen_spec_find_instruction(struct gen_spec *spec, enum gen_engine engine, const uint32_t *p)
>>>    {
>>>       hash_table_foreach(spec->commands, entry) {
>>>          struct gen_group *command = entry->data;
>>>          uint32_t opcode = *p & command->opcode_mask;
>>> -      if (opcode == command->opcode)
>>> +      if ((command->engine & engine) && opcode == command->opcode)
>>>             return command;
>>>       }
>>> diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
>>> index b179ae8f380..4e57ecc5ac9 100644
>>> --- a/src/intel/common/gen_decoder.h
>>> +++ b/src/intel/common/gen_decoder.h
>>> @@ -57,7 +57,7 @@ struct gen_spec *gen_spec_load_from_path(const struct gen_device_info *devinfo,
>>>                                             const char *path);
>>>    void gen_spec_destroy(struct gen_spec *spec);
>>>    uint32_t gen_spec_get_gen(struct gen_spec *spec);
>>> -struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, const uint32_t *p);
>>> +struct gen_group *gen_spec_find_instruction(struct gen_spec *spec, enum gen_engine engine, const uint32_t *p);
>>>    struct gen_group *gen_spec_find_register(struct gen_spec *spec, uint32_t offset);
>>>    struct gen_group *gen_spec_find_register_by_name(struct gen_spec *spec, const char *name);
>>>    struct gen_enum *gen_spec_find_enum(struct gen_spec *spec, const char *name);
>>> @@ -249,6 +249,7 @@ void gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx *ctx);
>>>    void gen_print_batch(struct gen_batch_decode_ctx *ctx,
>>> +                     enum gen_engine engine,
>>>                         const uint32_t *batch, uint32_t batch_size,
>>>                         uint64_t batch_addr);
>>> diff --git a/src/intel/tools/aubinator.c b/src/intel/tools/aubinator.c
>>> index 1458875a313..9f3600e42ef 100644
>>> --- a/src/intel/tools/aubinator.c
>>> +++ b/src/intel/tools/aubinator.c
>>> @@ -158,8 +158,7 @@ handle_execlist_write(void *user_data, enum gen_engine engine, uint64_t context_
>>>          batch_ctx.get_bo = aub_mem_get_ggtt_bo;
>>>       }
>>> -   (void)engine; /* TODO */
>>> -   gen_print_batch(&batch_ctx, commands, ring_buffer_tail - ring_buffer_head,
>>> +   gen_print_batch(&batch_ctx, engine, commands, ring_buffer_tail - ring_buffer_head,
>>>                       0);
>>>       aub_mem_clear_bo_maps(&mem);
>>>    }
>>> @@ -171,7 +170,7 @@ handle_ring_write(void *user_data, enum gen_engine engine,
>>>       batch_ctx.user_data = &mem;
>>>       batch_ctx.get_bo = aub_mem_get_ggtt_bo;
>>> -   gen_print_batch(&batch_ctx, data, data_len, 0);
>>> +   gen_print_batch(&batch_ctx, engine, data, data_len, 0);
>>>       aub_mem_clear_bo_maps(&mem);
>>>    }
>>> diff --git a/src/intel/tools/aubinator_error_decode.c b/src/intel/tools/aubinator_error_decode.c
>>> index c581414eb2c..c69dfb84f13 100644
>>> --- a/src/intel/tools/aubinator_error_decode.c
>>> +++ b/src/intel/tools/aubinator_error_decode.c
>>> @@ -111,6 +111,17 @@ static const struct ring_register_mapping fault_registers[] = {
>>>       { VECS, 0, "VECS_FAULT_REG" },
>>>    };
>>> +static int class_to_engine(unsigned int class)
>>> +{
>>> +   static const int map[] = {
>>> +      [RCS] = GEN_ENGINE_RENDER,
>>> +      [BCS] = GEN_ENGINE_BLITTER,
>>> +      [VCS] = GEN_ENGINE_VIDEO,
>>> +   };
>>> +
>>> +   return map[class];
>>> +}
>>> +
>>>    static int ring_name_to_class(const char *ring_name,
>>>                                  unsigned int *class)
>>>    {
>>> @@ -601,6 +612,11 @@ read_data_file(FILE *file)
>>>       for (int s = 0; s < num_sections; s++) {
>>> +      unsigned int class;
>>> +      enum gen_engine engine;
>>> +      ring_name_to_class(sections[s].ring_name, &class);
>>> +      engine = class_to_engine(class);
>>> +
>>>          printf("--- %s (%s) at 0x%08x %08x\n",
>>>                 sections[s].buffer_name, sections[s].ring_name,
>>>                 (unsigned) (sections[s].gtt_offset >> 32),
>>> @@ -610,7 +626,8 @@ read_data_file(FILE *file)
>>>              strcmp(sections[s].buffer_name, "batch buffer") == 0 ||
>>>              strcmp(sections[s].buffer_name, "ring buffer") == 0 ||
>>>              strcmp(sections[s].buffer_name, "HW Context") == 0) {
>>> -         gen_print_batch(&batch_ctx, sections[s].data,
>>> +         gen_print_batch(&batch_ctx, engine,
>>> +                         sections[s].data,
>>>                             sections[s].dword_count * 4,
>>>                             sections[s].gtt_offset);
>>>          }
>>> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>>> index 4363b146150..a0d2f8a98f5 100644
>>> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>>> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
>>> @@ -813,7 +813,8 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
>>>       }
>>>       if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
>>> -      gen_print_batch(&batch->decoder, batch->batch.map,
>>> +      gen_print_batch(&batch->decoder, GEN_ENGINE_RENDER,
>>> +                      batch->batch.map,
>>>                          4 * USED_BATCH(*batch),
>>>                          batch->batch.bo->gtt_offset);
>>>       }
>>