[Mesa-dev] [PATCH 2/2] intel: Make the decoder handle STATE_BASE_ADDRESS not being a buffer.
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Tue Jul 17 16:40:29 UTC 2018
On 11/07/18 19:25, Kenneth Graunke wrote:
> Normally, i965 programs STATE_BASE_ADDRESS every batch, and puts all
> state for a given base in a single buffer.
>
> I'm working on a prototype which emits STATE_BASE_ADDRESS only once at
> startup, where each base address is a fixed 4GB region of the PPGTT.
> State may live in many buffers in that 4GB region, even if there isn't
> a buffer located at the actual base address itself.
>
> To handle this, we need to save the STATE_BASE_ADDRESS values across
> multiple batches, rather than assuming we'll see the command each time.
> Then, each time we see a pointer, we need to ask the driver for the BO
> map for that data. (We can't just use the map for the base address, as
> state may be in multiple buffers, and there may not even be a buffer
> at the base address to map.)
> ---
> src/intel/common/gen_batch_decoder.c | 83 ++++++++++++++++------------
> src/intel/common/gen_decoder.h | 9 ++-
> 2 files changed, 56 insertions(+), 36 deletions(-)
>
> diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c
> index fe7536da9ec..6cb66bcb257 100644
> --- a/src/intel/common/gen_batch_decoder.c
> +++ b/src/intel/common/gen_batch_decoder.c
> @@ -128,13 +128,13 @@ static void
> ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
> uint32_t ksp, const char *type)
> {
> - if (!ctx->instruction_base.map)
> + uint64_t addr = ctx->instruction_base.addr + ksp;
> + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
> + if (!bo.map)
> return;
>
> - printf("\nReferenced %s:\n", type);
> - gen_disasm_disassemble(ctx->disasm,
> - (void *)ctx->instruction_base.map, ksp,
> - ctx->fp);
> + fprintf(ctx->fp, "\nReferenced %s:\n", type);
> + gen_disasm_disassemble(ctx->disasm, bo.map, 0, ctx->fp);
> }
>
> /* Heuristic to determine whether a uint32_t is probably actually a float
> @@ -225,35 +225,30 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
> if (count < 0)
> count = update_count(ctx, offset, 1, 8);
>
> - if (ctx->surface_base.map == NULL) {
> + struct gen_batch_decode_bo bind_bo =
> + ctx_get_bo(ctx, ctx->surface_base.addr + offset);
> +
> + if (bind_bo.map == NULL) {
> fprintf(ctx->fp, " binding table unavailable\n");
> return;
> }
>
> - if (offset % 32 != 0 || offset >= UINT16_MAX ||
> - offset >= ctx->surface_base.size) {
> + if (offset % 32 != 0 || offset >= UINT16_MAX || offset >= bind_bo.size) {
I wonder if this
offset >= bind_bo.size
is right. That's assuming bind_bo.addr == ctx->surface_base.addr, but in
your prototype it probably won't be, right?
I would check (ctx->surface_base.addr + offset) >= (bind_bo.addr +
bind_bo.size)
> fprintf(ctx->fp, " invalid binding table pointer\n");
> return;
> }
>
> - struct gen_batch_decode_bo bo = ctx->surface_base;
> - const uint32_t *pointers = ctx->surface_base.map + offset;
> + const uint32_t *pointers = bind_bo.map;
> for (int i = 0; i < count; i++) {
> if (pointers[i] == 0)
> continue;
>
> - if (pointers[i] % 32 != 0) {
> - fprintf(ctx->fp, "pointer %u: %08x <not valid>\n", i, pointers[i]);
> - continue;
> - }
> -
> uint64_t addr = ctx->surface_base.addr + pointers[i];
> + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
> uint32_t size = strct->dw_length * 4;
>
> - if (addr < bo.addr || addr + size >= bo.addr + bo.size)
> - bo = ctx->get_bo(ctx->user_data, addr);
> -
> - if (addr < bo.addr || addr + size >= bo.addr + bo.size) {
> + if (pointers[i] % 32 != 0 ||
> + addr < bo.addr || addr + size >= bo.addr + bo.size) {
> fprintf(ctx->fp, "pointer %u: %08x <not valid>\n", i, pointers[i]);
> continue;
> }
> @@ -271,18 +266,20 @@ dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
> if (count < 0)
> count = update_count(ctx, offset, strct->dw_length, 4);
>
> - if (ctx->dynamic_base.map == NULL) {
> + uint64_t state_addr = ctx->dynamic_base.addr + offset;
> + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
> + const void *state_map = bo.map;
> +
> + if (state_map == NULL) {
> fprintf(ctx->fp, " samplers unavailable\n");
> return;
> }
>
> - if (offset % 32 != 0 || offset >= ctx->dynamic_base.size) {
> + if (offset % 32 != 0 || state_addr - bo.addr >= bo.size) {
> fprintf(ctx->fp, " invalid sampler state pointer\n");
> return;
> }
>
> - uint64_t state_addr = ctx->dynamic_base.addr + offset;
> - const void *state_map = ctx->dynamic_base.map + offset;
> for (int i = 0; i < count; i++) {
> fprintf(ctx->fp, "sampler state %d\n", i);
> ctx_print_group(ctx, strct, state_addr, state_map);
> @@ -295,9 +292,6 @@ static void
> handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
> const uint32_t *p)
> {
> - if (ctx->dynamic_base.map == NULL)
> - return;
> -
> struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> struct gen_group *desc =
> gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
> @@ -316,7 +310,12 @@ handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
> }
>
> uint64_t desc_addr = ctx->dynamic_base.addr + descriptor_offset;
> - const uint32_t *desc_map = ctx->dynamic_base.map + descriptor_offset;
> + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, desc_addr);
> + const void *desc_map = bo.map;
> +
> + if (desc_map == NULL)
> + return;
Maybe a message here?
> +
> for (int i = 0; i < descriptor_count; i++) {
> fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
>
> @@ -640,11 +639,6 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
> const char *struct_type, const uint32_t *p,
> int count)
> {
> - if (ctx->dynamic_base.map == NULL) {
> - fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
> - return;
> - }
> -
> struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
>
> @@ -659,8 +653,15 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
> }
> }
>
> - uint32_t state_addr = ctx->dynamic_base.addr + state_offset;
> - const uint32_t *state_map = ctx->dynamic_base.map + state_offset;
> + uint64_t state_addr = ctx->dynamic_base.addr + state_offset;
> + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
> + const void *state_map = bo.map;
> +
> + if (state_map == NULL) {
> + fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
> + return;
> + }
> +
> for (int i = 0; i < count; i++) {
> fprintf(ctx->fp, "%s %d\n", struct_type, i);
> ctx_print_group(ctx, state, state_offset, state_map);
> @@ -787,6 +788,20 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
> int length;
> struct gen_group *inst;
>
> + /* If we have an address for base addresses, but no map, try and fetch
> + * them again. We might have a pre-programmed address but no buffer
> + * there (say, because no surfaces were in use the first time).
> + */
> + if (ctx->surface_base.map == NULL && ctx->surface_base.addr != 0) {
> + ctx->surface_base = ctx_get_bo(ctx, ctx->surface_base.addr);
> + }
> + if (ctx->dynamic_base.map == NULL && ctx->dynamic_base.addr != 0) {
> + ctx->dynamic_base = ctx_get_bo(ctx, ctx->dynamic_base.addr);
> + }
> + if (ctx->instruction_base.map == NULL && ctx->instruction_base.addr != 0) {
> + ctx->instruction_base = ctx_get_bo(ctx, ctx->instruction_base.addr);
> + }
Not really sure why you bother getting the BOs here.
If you're going to get them for each address field we encounter in the
rest of the decoder, that's probably not necessary.
> +
> for (p = batch; p < end; p += length) {
> inst = gen_spec_find_instruction(ctx->spec, p);
> length = gen_group_get_length(inst, p);
> diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h
> index f2207ddf889..afbdb6a9aae 100644
> --- a/src/intel/common/gen_decoder.h
> +++ b/src/intel/common/gen_decoder.h
> @@ -205,8 +205,13 @@ struct gen_batch_decode_bo {
> struct gen_disasm *disasm;
>
> struct gen_batch_decode_ctx {
> - struct gen_batch_decode_bo (*get_bo)(void *user_data,
> - uint64_t base_address);
> + /**
> + * Return information about the buffer containing the given address.
> + *
> + * If the given address is inside a buffer, the map pointer should be
> + * offset accordingly so it points at the data corresponding to address.
> + */
> + struct gen_batch_decode_bo (*get_bo)(void *user_data, uint64_t address);
> unsigned (*get_state_size)(void *user_data,
> uint32_t offset_from_dynamic_state_base_addr);
> void *user_data;
More information about the mesa-dev
mailing list