[igt-dev] [PATCH v3 1/3] lib/rendercopy: Add AUX page table support

Chris Wilson chris at chris-wilson.co.uk
Wed Nov 6 11:11:40 UTC 2019


Quoting Imre Deak (2019-11-05 21:42:20)
> On GEN12+ the AUX CCS surfaces required by the render and media
> compression must be specified by a 3 level page table directory, which
> translates the main surface graphics address to the AUX CCS surface
> graphics address. For this purpose add support for creating a GEM buffer
> to translate the linear surface address range to the linear AUX surface
> address range.
> 
> The buffers containing the main surface must be pinned down, since the
> directory table entry indices depend on the surface address, and they
> must be 64kB aligned. The page table can be relocated OTOH, so allow
> that and emit the required relocation entries.
> 
> v2:
> - Make level variables to be 0 based (l1..l3 -> level=0..2).
> - Add missing drm_intel_bo_set_softpin_offset() stub to fix build on
>   non-Intel archs.
> - Fix missing offsets in reloc entries of already bound objects. (Chris)
> - Randomize pin offsets, to try to avoid eviction. (Chris)
> - Remove redundant MI_NOOPS around MI_LOAD_REGISTER_MEM
> - Stop using explicit reloc cache domains, as these don't make sense on
>   GEN12 anyway. (Chris)
> - Fix missing autotools support. (Chris)
> - s/igt_aux_pgtable/intel_aux_pgtable/, since the functionality is Intel
>   specific. (Chris)
> v3:
> - Make sure all objects with an AUX surface are pinned.
> 
> Cc: Mika Kahola <mika.kahola at intel.com>
> Cc: Brian Welty <brian.welty at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak at intel.com>
> ---
>  lib/Makefile.sources         |   1 +
>  lib/intel_aux_pgtable.c      | 379 +++++++++++++++++++++++++++++++++++
>  lib/intel_aux_pgtable.h      |  12 ++
>  lib/intel_reg.h              |   2 +
>  lib/meson.build              |   1 +
>  lib/rendercopy_gen9.c        | 234 ++++++++++++++++++++-
>  lib/stubs/drm/intel_bufmgr.c |   6 +
>  7 files changed, 629 insertions(+), 6 deletions(-)
>  create mode 100644 lib/intel_aux_pgtable.c
>  create mode 100644 lib/intel_aux_pgtable.h
> 
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index cf094ab8..1d0f45f7 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -100,6 +100,7 @@ lib_source_list =           \
>         surfaceformat.h         \
>         sw_sync.c               \
>         sw_sync.h               \
> +       intel_aux_pgtable.c     \
>         intel_reg_map.c         \
>         intel_iosf.c            \
>         igt_kms.c               \
> diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
> new file mode 100644
> index 00000000..79402813
> --- /dev/null
> +++ b/lib/intel_aux_pgtable.c
> @@ -0,0 +1,379 @@
> +#include <stdint.h>
> +#include <stdbool.h>
> +
> +#include "drmtest.h"
> +#include "intel_aux_pgtable.h"
> +#include "intel_bufmgr.h"
> +#include "intel_batchbuffer.h"
> +#include "ioctl_wrappers.h"
> +
> +#include "i915/gem_mman.h"
> +
> +#define BITS_PER_LONG          (sizeof(long) * 8)
> +#define BITMASK(e, s)          ((~0UL << (s)) & \
> +                                (~0UL >> (BITS_PER_LONG - 1 - (e))))
> +
> +#define ALIGN_DOWN(x, a)       ALIGN((x) - ((a) - 1), (a))
> +
> +/* The unit size to which the AUX CCS surface is aligned to. */
> +#define AUX_CCS_UNIT_SIZE      64
> +/*
> + * The block size on the AUX CCS surface which is mapped by one L1 AUX
> + * pagetable entry.
> + */
> +#define AUX_CCS_BLOCK_SIZE     (4 * AUX_CCS_UNIT_SIZE)
> +/*
> + * The block size on the main surface mapped by one AUX CCS block:
> + *   256 bytes per CCS block *
> + *   8   bits per byte /
> + *   2   bits per main surface CL *
> + *   64  bytes per main surface CL
> + */
> +#define MAIN_SURFACE_BLOCK_SIZE        (AUX_CCS_BLOCK_SIZE * 8 / 2 * 64)
> +
> +#define GFX_ADDRESS_BITS       48
> +
> +#define max(a, b)              ((a) > (b) ? (a) : (b))
> +
> +struct pgtable_level_desc {
> +       int idx_shift;
> +       int idx_bits;
> +       int entry_ptr_shift;
> +       int table_size;
> +};
> +
> +struct pgtable_level_info {
> +       const struct pgtable_level_desc *desc;
> +       int table_count;
> +       int alloc_base;
> +       int alloc_ptr;
> +};
> +
> +struct pgtable {
> +       int levels;
> +       struct pgtable_level_info *level_info;
> +       int size;
> +       int max_align;
> +       drm_intel_bo *bo;
> +};
> +
> +static int
> +pgt_table_count(int address_bits, const struct igt_buf **bufs, int buf_count)
> +{
> +       uint64_t end;
> +       int count;
> +       int i;
> +
> +       count = 0;
> +       end = 0;
> +       for (i = 0; i < buf_count; i++) {
> +               const struct igt_buf *buf = bufs[i];
> +               uint64_t start;
> +
> +               /* We require bufs to be sorted. */
> +               igt_assert(i == 0 ||
> +                          buf->bo->offset64 >= bufs[i - 1]->bo->offset64 +
> +                                               bufs[i - 1]->bo->size);
> +
> +               start = ALIGN_DOWN(buf->bo->offset64, 1UL << address_bits);
> +               /* Avoid double counting for overlapping aligned bufs. */
> +               start = max(start, end);
> +
> +               end = ALIGN(buf->bo->offset64 + buf->size, 1UL << address_bits);
> +               igt_assert(end >= start);
> +
> +               count += (end - start) >> address_bits;
> +       }
> +
> +       return count;
> +}
> +
> +static void
> +pgt_calc_size(struct pgtable *pgt, const struct igt_buf **bufs, int buf_count)
> +{
> +       int level;
> +
> +       pgt->size = 0;
> +
> +       for (level = pgt->levels - 1; level >= 0; level--) {
> +               struct pgtable_level_info *li = &pgt->level_info[level];
> +
> +               li->alloc_base = ALIGN(pgt->size, li->desc->table_size);
> +               li->alloc_ptr = li->alloc_base;
> +
> +               li->table_count = pgt_table_count(li->desc->idx_shift +
> +                                                 li->desc->idx_bits,
> +                                                 bufs, buf_count);
> +
> +               pgt->size = li->alloc_base +
> +                           li->table_count * li->desc->table_size;
> +       }
> +}
> +
> +static uint64_t pgt_alloc_table(struct pgtable *pgt, int level)
> +{
> +       struct pgtable_level_info *li = &pgt->level_info[level];
> +       uint64_t table;
> +
> +       table = li->alloc_ptr;
> +       li->alloc_ptr += li->desc->table_size;
> +
> +       igt_assert(li->alloc_ptr <=
> +                  li->alloc_base + li->table_count * li->desc->table_size);
> +
> +       return table;
> +}
> +
> +static int pgt_address_index(struct pgtable *pgt, int level, uint64_t address)
> +{
> +       const struct pgtable_level_desc *ld = pgt->level_info[level].desc;
> +       uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1,
> +                               ld->idx_shift);
> +
> +       return (address & mask) >> ld->idx_shift;
> +}
> +
> +static uint64_t ptr_mask(struct pgtable *pgt, int level)
> +{
> +       const struct pgtable_level_desc *ld = pgt->level_info[level].desc;
> +
> +       return BITMASK(GFX_ADDRESS_BITS - 1, ld->entry_ptr_shift);
> +}
> +
> +static uint64_t pgt_entry_ptr(struct pgtable *pgt, int level, uint64_t entry)
> +{
> +       uint64_t ptr = entry & ptr_mask(pgt, level);
> +
> +       if (level)
> +               ptr -= pgt->bo->offset64;
> +       igt_assert(!(ptr & ~ptr_mask(pgt, level)));
> +
> +       return ptr;
> +}
> +
> +static uint64_t pgt_mkentry(struct pgtable *pgt, int level, uint64_t ptr,
> +                           uint64_t flags)
> +{
> +       if (level)
> +               ptr += pgt->bo->offset64;
> +       igt_assert(!(ptr & ~ptr_mask(pgt, level)));
> +
> +       return ptr | flags;
> +}
> +
> +static uint64_t
> +pgt_get_table(struct pgtable *pgt, uint64_t parent_table,
> +             int level, uint64_t address, uint64_t flags)
> +{
> +       uint64_t *table_ptr = pgt->bo->virtual + parent_table;
> +       int entry_idx = pgt_address_index(pgt, level, address);
> +       uint64_t *entry_ptr;
> +
> +       entry_ptr = &table_ptr[entry_idx];
> +       if (!*entry_ptr) {
> +               uint64_t child_table = pgt_alloc_table(pgt, level - 1);
> +
> +               *entry_ptr = pgt_mkentry(pgt, level, child_table, flags);

The value in the batch is the absolute address, good.

> +
> +               drm_intel_bo_emit_reloc(pgt->bo,
> +                                       parent_table + entry_idx * sizeof(uint64_t),
> +                                       pgt->bo, *entry_ptr, 0, 0);

But you then pass the absolute address as the relocation delta. Not so
good.

> +       }
> +
> +       return pgt_entry_ptr(pgt, level, *entry_ptr);

You only use pgt_entry_ptr, here so just keep the batch entry separate
from the relative offset.

uint64_t pte = pgt_mkentry(pgt, level, child_table, flags);

Hmm, note this interface is limited to s32.
igt_assert(pte < S32_MAX);

*entry_ptr = bo->offset64 + pte;
drm_intel_bo_emit_reloc(pgt->bo,
	parent_table + entry_idx * sizeof(uint64_t),
	pgt->bo, offset, 0, 0);


return offset & ptr_mask(pgt, level);

What's the coherency model for the pgt->bo? There's a lot of writes here
from the CPU how are you making sure they are visible to the GPU?
-Chris


More information about the igt-dev mailing list