[Mesa-dev] [PATCH 2/2] intel: Make the decoder handle STATE_BASE_ADDRESS not being a buffer.
Kenneth Graunke
kenneth at whitecape.org
Tue Jul 24 21:47:27 UTC 2018
On Tuesday, July 17, 2018 9:40:29 AM PDT Lionel Landwerlin wrote:
> On 11/07/18 19:25, Kenneth Graunke wrote:
> > Normally, i965 programs STATE_BASE_ADDRESS every batch, and puts all
> > state for a given base in a single buffer.
> >
> > I'm working on a prototype which emits STATE_BASE_ADDRESS only once at
> > startup, where each base address is a fixed 4GB region of the PPGTT.
> > State may live in many buffers in that 4GB region, even if there isn't
> > a buffer located at the actual base address itself.
> >
> > To handle this, we need to save the STATE_BASE_ADDRESS values across
> > multiple batches, rather than assuming we'll see the command each time.
> > Then, each time we see a pointer, we need to ask the driver for the BO
> > map for that data. (We can't just use the map for the base address, as
> > state may be in multiple buffers, and there may not even be a buffer
> > at the base address to map.)
> > ---
> > src/intel/common/gen_batch_decoder.c | 83 ++++++++++++++++------------
> > src/intel/common/gen_decoder.h | 9 ++-
> > 2 files changed, 56 insertions(+), 36 deletions(-)
> >
> > diff --git a/src/intel/common/gen_batch_decoder.c b/src/intel/common/gen_batch_decoder.c
> > index fe7536da9ec..6cb66bcb257 100644
> > --- a/src/intel/common/gen_batch_decoder.c
> > +++ b/src/intel/common/gen_batch_decoder.c
> > @@ -128,13 +128,13 @@ static void
> > ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
> > uint32_t ksp, const char *type)
> > {
> > - if (!ctx->instruction_base.map)
> > + uint64_t addr = ctx->instruction_base.addr + ksp;
> > + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
> > + if (!bo.map)
> > return;
> >
> > - printf("\nReferenced %s:\n", type);
> > - gen_disasm_disassemble(ctx->disasm,
> > - (void *)ctx->instruction_base.map, ksp,
> > - ctx->fp);
> > + fprintf(ctx->fp, "\nReferenced %s:\n", type);
> > + gen_disasm_disassemble(ctx->disasm, bo.map, 0, ctx->fp);
> > }
> >
> > /* Heuristic to determine whether a uint32_t is probably actually a float
> > @@ -225,35 +225,30 @@ dump_binding_table(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
> > if (count < 0)
> > count = update_count(ctx, offset, 1, 8);
> >
> > - if (ctx->surface_base.map == NULL) {
> > + struct gen_batch_decode_bo bind_bo =
> > + ctx_get_bo(ctx, ctx->surface_base.addr + offset);
> > +
> > + if (bind_bo.map == NULL) {
> > fprintf(ctx->fp, " binding table unavailable\n");
> > return;
> > }
> >
> > - if (offset % 32 != 0 || offset >= UINT16_MAX ||
> > - offset >= ctx->surface_base.size) {
> > + if (offset % 32 != 0 || offset >= UINT16_MAX || offset >= bind_bo.size) {
>
> I wonder if this
>
> offset >= bind_bo.size
>
> is right. That's assuming bind_bo.addr == ctx->surface_base.addr, but in
> your prototype it probably won't be, right?
>
>
> I would check (ctx->surface_base.addr + offset) >= (bind_bo.addr +
> bind_bo.size)
Yeah, I agree, this is wrong. I think we should just drop the
offset >= bind_bo.size check. We're already doing ctx_get_bo()
at surface_base.addr + offset...and since we've checked bind_bo.map
isn't NULL...we know the start of the binding table is in the BO.
I'll also move these checks above the ctx_get_bo() call, so we validate
the alignment and maximum amount restrictions from the packet before
using it to look up anything.
> > fprintf(ctx->fp, " invalid binding table pointer\n");
> > return;
> > }
> >
> > - struct gen_batch_decode_bo bo = ctx->surface_base;
> > - const uint32_t *pointers = ctx->surface_base.map + offset;
> > + const uint32_t *pointers = bind_bo.map;
> > for (int i = 0; i < count; i++) {
> > if (pointers[i] == 0)
> > continue;
> >
> > - if (pointers[i] % 32 != 0) {
> > - fprintf(ctx->fp, "pointer %u: %08x <not valid>\n", i, pointers[i]);
> > - continue;
> > - }
> > -
> > uint64_t addr = ctx->surface_base.addr + pointers[i];
> > + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, addr);
> > uint32_t size = strct->dw_length * 4;
> >
> > - if (addr < bo.addr || addr + size >= bo.addr + bo.size)
> > - bo = ctx->get_bo(ctx->user_data, addr);
> > -
> > - if (addr < bo.addr || addr + size >= bo.addr + bo.size) {
> > + if (pointers[i] % 32 != 0 ||
> > + addr < bo.addr || addr + size >= bo.addr + bo.size) {
> > fprintf(ctx->fp, "pointer %u: %08x <not valid>\n", i, pointers[i]);
> > continue;
> > }
> > @@ -271,18 +266,20 @@ dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
> > if (count < 0)
> > count = update_count(ctx, offset, strct->dw_length, 4);
> >
> > - if (ctx->dynamic_base.map == NULL) {
> > + uint64_t state_addr = ctx->dynamic_base.addr + offset;
> > + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
> > + const void *state_map = bo.map;
> > +
> > + if (state_map == NULL) {
> > fprintf(ctx->fp, " samplers unavailable\n");
> > return;
> > }
> >
> > - if (offset % 32 != 0 || offset >= ctx->dynamic_base.size) {
> > + if (offset % 32 != 0 || state_addr - bo.addr >= bo.size) {
> > fprintf(ctx->fp, " invalid sampler state pointer\n");
> > return;
> > }
> >
> > - uint64_t state_addr = ctx->dynamic_base.addr + offset;
> > - const void *state_map = ctx->dynamic_base.map + offset;
> > for (int i = 0; i < count; i++) {
> > fprintf(ctx->fp, "sampler state %d\n", i);
> > ctx_print_group(ctx, strct, state_addr, state_map);
> > @@ -295,9 +292,6 @@ static void
> > handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
> > const uint32_t *p)
> > {
> > - if (ctx->dynamic_base.map == NULL)
> > - return;
> > -
> > struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > struct gen_group *desc =
> > gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
> > @@ -316,7 +310,12 @@ handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
> > }
> >
> > uint64_t desc_addr = ctx->dynamic_base.addr + descriptor_offset;
> > - const uint32_t *desc_map = ctx->dynamic_base.map + descriptor_offset;
> > + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, desc_addr);
> > + const void *desc_map = bo.map;
> > +
> > + if (desc_map == NULL)
> > + return;
>
> Maybe a message here?
>
Sure, I'll add
fprintf(ctx->fp, " interface descriptors unavailable\n");
and some braces.
> > +
> > for (int i = 0; i < descriptor_count; i++) {
> > fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
> >
> > @@ -640,11 +639,6 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
> > const char *struct_type, const uint32_t *p,
> > int count)
> > {
> > - if (ctx->dynamic_base.map == NULL) {
> > - fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
> > - return;
> > - }
> > -
> > struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
> > struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
> >
> > @@ -659,8 +653,15 @@ decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
> > }
> > }
> >
> > - uint32_t state_addr = ctx->dynamic_base.addr + state_offset;
> > - const uint32_t *state_map = ctx->dynamic_base.map + state_offset;
> > + uint64_t state_addr = ctx->dynamic_base.addr + state_offset;
> > + struct gen_batch_decode_bo bo = ctx_get_bo(ctx, state_addr);
> > + const void *state_map = bo.map;
> > +
> > + if (state_map == NULL) {
> > + fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type);
> > + return;
> > + }
> > +
> > for (int i = 0; i < count; i++) {
> > fprintf(ctx->fp, "%s %d\n", struct_type, i);
> > ctx_print_group(ctx, state, state_offset, state_map);
> > @@ -787,6 +788,20 @@ gen_print_batch(struct gen_batch_decode_ctx *ctx,
> > int length;
> > struct gen_group *inst;
> >
> > + /* If we have an address for base addresses, but no map, try and fetch
> > + * them again. We might have a pre-programmed address but no buffer
> > + * there (say, because no surfaces were in use the first time).
> > + */
> > + if (ctx->surface_base.map == NULL && ctx->surface_base.addr != 0) {
> > + ctx->surface_base = ctx_get_bo(ctx, ctx->surface_base.addr);
> > + }
> > + if (ctx->dynamic_base.map == NULL && ctx->dynamic_base.addr != 0) {
> > + ctx->dynamic_base = ctx_get_bo(ctx, ctx->dynamic_base.addr);
> > + }
> > + if (ctx->instruction_base.map == NULL && ctx->instruction_base.addr != 0) {
> > + ctx->instruction_base = ctx_get_bo(ctx, ctx->instruction_base.addr);
> > + }
>
> Not really sure why you bother getting the BOs here.
> If you're going to get them for each address field we encounter in the
> rest of the decoder, that's probably not necessary.
Yep. Useless. I'd added this originally because I wasn't getting
Surface State Base Address, and this sort-of fixed that
problem...because I happen to have a BO there. But it wasn't remotely
complete or correct, so I fixed it properly...and now this isn't needed.
Dropped. Thanks for the review! I'll send a v2 shortly.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180724/fc41504d/attachment.sig>
More information about the mesa-dev
mailing list