[igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support

Imre Deak imre.deak at intel.com
Mon Nov 4 14:07:23 UTC 2019


On Mon, Nov 04, 2019 at 11:28:11AM +0000, Chris Wilson wrote:
> Quoting Imre Deak (2019-11-01 20:13:09)
> > On GEN12+ the AUX CCS surfaces required by the render and media
> > compression must be specified by a 3 level page table directory, which
> > translates the main surface graphics address to the AUX CCS surface
> > graphics address. For this purpose add support for creating a GEM buffer
> > to translate the linear surface address range to the linear AUX surface
> > address range.
> > 
> > The buffers containing the main surface must be pinned down, since the
> > directory table entry indices depend on the surface address, and they
> > must be 64kB aligned. The page table can be relocated OTOH, so allow
> > that and emit the required relocation entries.
> > 
> > Cc: Mika Kahola <mika.kahola at intel.com>
> > Cc: Brian Welty <brian.welty at intel.com>
> > Cc: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > Signed-off-by: Imre Deak <imre.deak at intel.com>
> > ---
> >  lib/igt_aux_pgtable.c | 381 ++++++++++++++++++++++++++++++++++++++++++
> >  lib/igt_aux_pgtable.h |  21 +++
> 
> This is not igt, but an libdrm_intel wrapper.

Right, can rename it to lib/intel_aux_pgtable.

> Missing the autotools support.

Yep missed it, will add.

> >  lib/intel_reg.h       |   3 +
> >  lib/meson.build       |   1 +
> >  lib/rendercopy_gen9.c | 121 +++++++++++++-
> >  5 files changed, 521 insertions(+), 6 deletions(-)
> >  create mode 100644 lib/igt_aux_pgtable.c
> >  create mode 100644 lib/igt_aux_pgtable.h
> > 
> > diff --git a/lib/igt_aux_pgtable.c b/lib/igt_aux_pgtable.c
> > new file mode 100644
> > index 00000000..aaa24cfd
> > --- /dev/null
> > +++ b/lib/igt_aux_pgtable.c
> > @@ -0,0 +1,381 @@
> > +#include <stdint.h>
> > +#include <stdbool.h>
> > +
> > +#include "drmtest.h"
> > +#include "igt_aux_pgtable.h"
> > +#include "intel_bufmgr.h"
> > +#include "intel_batchbuffer.h"
> > +#include "ioctl_wrappers.h"
> > +
> > +#include "i915/gem_mman.h"
> > +
> > +#define BITS_PER_LONG          (sizeof(long) * 8)
> > +#define BITMASK(e, s)          ((~0UL << (s)) & \
> > +                                (~0UL >> (BITS_PER_LONG - 1 - (e))))
> > +
> > +#define ALIGN_DOWN(x, a)       ALIGN((x) - ((a) - 1), (a))
> > +
> > +#define CL_SIZE                        64
> > +/*
> > + * The size of a block on the CCS surface that is covered/pointed to by one
> > + * L1 AUX pagetable entry. This size corresponds to the 1<<8 alignment of the
> > + * pointers in the L1 entry.
> > + */
> > +#define CCS_BLOCK_SIZE         (4 * CL_SIZE)
> > +/*
> > + * 256 bytes per CCS block size *
> > + * 8   bits per byte /
> > + * 2   bits per surface CL *
> > + * 64  bytes per surface CL
> > + */
> > +#define SURFACE_BLOCK_SIZE     (CCS_BLOCK_SIZE * 8 / 2 * CL_SIZE)
> > +#define AUX_PGTABLE_VALID      1
> > +#define AUX_PGTABLE_LEVELS     3
> > +
> > +#define ADDRESS_BITS           48
> > +
> > +#define max(a, b)              ((a) > (b) ? (a) : (b))
> > +
> > +struct pgtable_level_desc {
> > +       int idx_shift;
> > +       int idx_bits;
> > +       int entry_ptr_shift;
> > +       int table_size;
> > +};
> > +
> > +struct pgtable_level_info {
> > +       const struct pgtable_level_desc *desc;
> > +       int table_count;
> > +       int alloc_base;
> > +       int alloc_ptr;
> > +};
> > +
> > +struct pgtable {
> > +       int levels;
> > +       struct pgtable_level_info *level_info;
> > +       int size;
> > +       int max_align;
> > +       drm_intel_bo *bo;
> > +};
> > +
> > +static const struct pgtable_level_desc aux_pgtable_desc[AUX_PGTABLE_LEVELS] = {
> > +       {
> > +               .idx_shift = 16,
> > +               .idx_bits = 8,
> > +               .entry_ptr_shift = 8,
> > +               .table_size = 8 * 1024,
> > +       },
> > +       {
> > +               .idx_shift = 24,
> > +               .idx_bits = 12,
> > +               .entry_ptr_shift = 13,
> > +               .table_size = 32 * 1024,
> > +       },
> > +       {
> > +               .idx_shift = 36,
> > +               .idx_bits = 12,
> > +               .entry_ptr_shift = 15,
> > +               .table_size = 32 * 1024,
> > +       },
> > +};
> > +
> > +static int
> > +pgt_table_count(int address_bits,
> > +               const struct igt_aux_pgtable_range *ranges, int range_count)
> > +{
> > +       uint64_t end;
> > +       int count;
> > +       int i;
> > +
> > +       count = 0;
> > +       end = 0;
> > +       for (i = 0; i < range_count; i++) {
> > +               const struct igt_aux_pgtable_range *r = &ranges[i];
> > +               uint64_t start;
> > +
> > +               /* We require ranges to be sorted. */
> > +               igt_assert(i == 0 ||
> > +                          r->surface_base >= ranges[i - 1].surface_base +
> > +                                             ranges[i - 1].surface_size);
> > +
> > +               start = ALIGN_DOWN(r->surface_base, 1UL << address_bits);
> > +               /* Avoid double counting for overlapping aligned ranges. */
> > +               start = max(start, end);
> > +
> > +               end = ALIGN(r->surface_base + r->surface_size,
> > +                           1UL << address_bits);
> > +               igt_assert(end >= start);
> > +
> > +               count += (end - start) >> address_bits;
> > +       }
> > +
> > +       return count;
> > +}
> > +
> > +static void
> > +pgt_calc_size(struct pgtable *pgt,
> > +             const struct igt_aux_pgtable_range *ranges, int range_count)
> > +{
> > +       int level;
> > +
> > +       pgt->size = 0;
> > +
> > +       for (level = pgt->levels; level > 0; level--) {
> > +               struct pgtable_level_info *li = &pgt->level_info[level - 1];
> > +
> > +               li->alloc_base = ALIGN(pgt->size, li->desc->table_size);
> > +               li->alloc_ptr = li->alloc_base;
> > +
> > +               li->table_count = pgt_table_count(li->desc->idx_shift +
> > +                                                 li->desc->idx_bits,
> > +                                                 ranges, range_count);
> > +
> > +               pgt->size = li->alloc_base +
> > +                           li->table_count * li->desc->table_size;
> > +       }
> > +}
> > +
> > +static uint64_t pgt_alloc_table(struct pgtable *pgt, int level)
> > +{
> > +       struct pgtable_level_info *li = &pgt->level_info[level - 1];
> > +       uint64_t table;
> > +
> > +       table = li->alloc_ptr;
> > +       li->alloc_ptr += li->desc->table_size;
> > +
> > +       igt_assert(li->alloc_ptr <=
> > +                  li->alloc_base + li->table_count * li->desc->table_size);
> > +
> > +       return table;
> > +}
> > +
> > +static int pgt_address_index(struct pgtable *pgt, int level, uint64_t address)
> > +{
> > +       const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
> > +       uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1,
> > +                               ld->idx_shift);
> > +
> > +       return (address & mask) >> ld->idx_shift;
> > +}
> > +
> > +static uint64_t ptr_mask(struct pgtable *pgt, int level)
> > +{
> > +       const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
> > +
> > +       return BITMASK(ADDRESS_BITS - 1, ld->entry_ptr_shift);
> > +}
> > +
> > +static uint64_t pgt_entry_ptr(struct pgtable *pgt, int level, uint64_t entry)
> > +{
> > +       return entry & ptr_mask(pgt, level);
> > +}
> > +
> > +static uint64_t pgt_mkentry(struct pgtable *pgt, int level, uint64_t ptr,
> > +                           uint64_t flags)
> > +{
> > +       igt_assert(!(ptr & ~ptr_mask(pgt, level)));
> > +
> > +       return ptr | flags;
> > +}
> > +
> > +static uint64_t
> > +pgt_get_table(struct pgtable *pgt, uint64_t parent_table,
> > +             int level, uint64_t address, uint64_t flags)
> > +{
> > +       uint64_t *table_ptr = pgt->bo->virtual + parent_table;
> > +       int entry_idx = pgt_address_index(pgt, level, address);
> > +       uint64_t *entry_ptr;
> > +
> > +       entry_ptr = &table_ptr[entry_idx];
> > +       if (!*entry_ptr) {
> > +               uint64_t child_table = pgt_alloc_table(pgt, level - 1);
> > +
> > +               *entry_ptr = pgt_mkentry(pgt, level, child_table, flags);
> > +
> > +               drm_intel_bo_emit_reloc(pgt->bo,
> > +                                       parent_table + entry_idx * sizeof(uint64_t),
> > +                                       pgt->bo, *entry_ptr,
> > +                                       I915_GEM_DOMAIN_INSTRUCTION, 0);
> 
> This is missing setting the correct value into the table, and so may be
> skipped by relocation pass inside execbuf.

Arg right, missed that pgt->bo->offset64 can be non-zero for an already
bound object and the kernel reloc optimization. Will fix it, thanks.

> 
> > +       }
> > +
> > +       return pgt_entry_ptr(pgt, level, *entry_ptr);
> > +}
> > +
> > +static void
> > +pgt_set_l1_entry(struct pgtable *pgt, uint64_t l1_table,
> > +                uint64_t address, uint64_t ptr, uint64_t flags)
> > +{
> > +       uint64_t *l1_table_ptr;
> > +       uint64_t *l1_entry_ptr;
> > +
> > +       l1_table_ptr = pgt->bo->virtual + l1_table;
> > +       l1_entry_ptr = &l1_table_ptr[pgt_address_index(pgt, 1, address)];
> > +       *l1_entry_ptr = pgt_mkentry(pgt, 1, ptr, flags);
> > +}
> > +
> > +static uint64_t pgt_get_l1_flags(const struct igt_aux_pgtable_range *range)
> > +{
> > +       /*
> > +        * The offset of .tile_mode isn't specifed by bspec, it's what Mesa
> > +        * uses.
> > +        */
> > +       union {
> > +               struct {
> > +                       uint64_t        valid:1;
> > +                       uint64_t        compression_mod:2;
> > +                       uint64_t        lossy_compression:1;
> > +                       uint64_t        pad:4;
> > +                       uint64_t        addr:40;
> > +                       uint64_t        pad2:4;
> > +                       uint64_t        tile_mode:2;
> > +                       uint64_t        depth:3;
> > +                       uint64_t        ycr:1;
> > +                       uint64_t        format:6;
> > +               } e;
> > +               uint64_t l;
> > +       } entry = {
> > +               .e = {
> > +                       .valid = 1,
> > +                       .tile_mode = range->tiling == I915_TILING_Y ? 1 : 0,
> > +                       .depth = 5,             /* 32bpp */
> > +                       .format = 0xA,          /* B8G8R8A8_UNORM */
> > +               }
> > +       };
> > +
> > +       /*
> > +        * TODO: Clarify if Yf is supported and if we need to differentiate
> > +        *       Ys and Yf.
> > +        *       Add support for more formats.
> > +        */
> > +       igt_assert(range->tiling == I915_TILING_Y ||
> > +                  range->tiling == I915_TILING_Yf ||
> > +                  range->tiling == I915_TILING_Ys);
> > +
> > +       igt_assert(range->bpp == 32);
> > +
> > +       return entry.l;
> > +}
> > +
> > +static uint64_t pgt_get_lx_flags(void)
> > +{
> > +       union {
> > +               struct {
> > +                       uint64_t        valid:1;
> > +                       uint64_t        addr:47;
> > +                       uint64_t        pad:16;
> > +               } e;
> > +               uint64_t l;
> > +       } entry = {
> > +               .e = {
> > +                       .valid = 1,
> > +               }
> > +       };
> > +
> > +       return entry.l;
> > +}
> > +
> > +static void
> > +pgt_populate_entries_for_range(struct pgtable *pgt,
> > +                              const struct igt_aux_pgtable_range *range,
> > +                              drm_intel_bo *bo,
> > +                              uint64_t top_table)
> > +{
> > +       uint64_t surface_addr = range->surface_base;
> > +       uint64_t surface_end = surface_addr + range->surface_size;
> > +       uint64_t aux_addr = range->aux_base;
> > +       uint64_t l1_flags = pgt_get_l1_flags(range);
> > +       uint64_t lx_flags = pgt_get_lx_flags();
> > +
> > +       pgt->bo = bo;
> > +
> > +       for (; surface_addr < surface_end;
> > +            surface_addr += SURFACE_BLOCK_SIZE, aux_addr += CCS_BLOCK_SIZE) {
> > +               uint64_t table = top_table;
> > +               int level;
> > +
> > +               for (level = pgt->levels; level > 1; level--)
> > +                       table = pgt_get_table(pgt, table, level,
> > +                                             surface_addr, lx_flags);
> > +
> > +               pgt_set_l1_entry(pgt, table, surface_addr, aux_addr, l1_flags);
> > +       }
> > +}
> > +
> > +static void pgt_populate_entries(struct pgtable *pgt,
> > +                                const struct igt_aux_pgtable_range *ranges,
> > +                                int range_count,
> > +                                drm_intel_bo *gem_bo)
> > +{
> > +       uint64_t top_table;
> > +       int i;
> > +
> > +       igt_assert(gem_bo->size >= pgt->size);
> > +       memset(gem_bo->virtual, 0, pgt->size);
> > +
> > +       top_table = pgt_alloc_table(pgt, pgt->levels);
> > +       /* Top level table must be at offset 0. */
> > +       igt_assert(top_table == 0);
> > +
> > +       for (i = 0; i < range_count; i++)
> > +               pgt_populate_entries_for_range(pgt, &ranges[i], gem_bo,
> > +                                              top_table);
> > +}
> > +
> > +static struct pgtable *
> > +pgt_create(const struct pgtable_level_desc *level_descs, int levels,
> > +          const struct igt_aux_pgtable_range *ranges, int range_count)
> > +{
> > +       struct pgtable *pgt;
> > +       int level;
> > +
> > +       pgt = calloc(1, sizeof(*pgt));
> > +       igt_assert(pgt);
> > +
> > +       pgt->levels = levels;
> > +
> > +       pgt->level_info = calloc(levels, sizeof(*pgt->level_info));
> > +       igt_assert(pgt->level_info);
> > +
> > +       for (level = 0; level < pgt->levels; level++) {
> > +               struct pgtable_level_info *li = &pgt->level_info[level];
> > +
> > +               li->desc = &level_descs[level];
> > +               if (li->desc->table_size > pgt->max_align)
> > +                       pgt->max_align = li->desc->table_size;
> > +       }
> > +
> > +       pgt_calc_size(pgt, ranges, range_count);
> > +
> > +       return pgt;
> > +}
> > +
> > +static void pgt_destroy(struct pgtable *pgt)
> > +{
> > +       free(pgt->level_info);
> > +       free(pgt);
> > +}
> > +
> > +drm_intel_bo *
> > +igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
> > +                      const struct igt_aux_pgtable_range *ranges,
> > +                      int range_count)
> > +{
> > +       struct pgtable *pgt;
> > +       drm_intel_bo *gem_bo;
> > +
> > +       pgt = pgt_create(aux_pgtable_desc, AUX_PGTABLE_LEVELS,
> > +                        ranges, range_count);
> > +
> > +       gem_bo = drm_intel_bo_alloc_for_render(bufmgr,
> > +                                              "aux pgt",
> > +                                              pgt->size, pgt->max_align);
> > +       igt_assert(gem_bo);
> > +
> > +       igt_assert(drm_intel_bo_map(gem_bo, true) == 0);
> > +       pgt_populate_entries(pgt, ranges, range_count, gem_bo);
> > +       igt_assert(drm_intel_bo_unmap(gem_bo) == 0);
> > +
> > +       pgt_destroy(pgt);
> > +
> > +       return gem_bo;
> > +}
> > diff --git a/lib/igt_aux_pgtable.h b/lib/igt_aux_pgtable.h
> > new file mode 100644
> > index 00000000..64c6b21f
> > --- /dev/null
> > +++ b/lib/igt_aux_pgtable.h
> > @@ -0,0 +1,21 @@
> > +#ifndef _IGT_AUX_PGTABLE_H_
> > +#define _IGT_AUX_PGTABLE_H_
> > +
> > +#include "intel_bufmgr.h"
> > +
> > +struct igt_aux_pgtable;
> > +
> > +struct igt_aux_pgtable_range {
> > +       uint64_t surface_base;
> > +       uint64_t surface_size;
> > +       uint64_t aux_base;
> > +       uint32_t tiling;
> > +       int bpp;
> > +};
> > +
> > +drm_intel_bo *
> > +igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
> > +                      const struct igt_aux_pgtable_range *ranges,
> > +                      int range_count);
> > +
> > +#endif
> > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > index 069440cb..e7263ce1 100644
> > --- a/lib/intel_reg.h
> > +++ b/lib/intel_reg.h
> > @@ -673,6 +673,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  #define RING_VALID          0x00000001
> >  #define RING_INVALID        0x00000000
> >  
> > +#define GEN12_GFX_AUX_TABLE_BASE_ADDR  0x4200
> > +
> >  
> >  
> >  /* BitBlt Instructions
> > @@ -2570,6 +2572,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >  
> >  #define MI_LOAD_SCAN_LINES_INCL                (0x12<<23)
> >  #define MI_LOAD_REGISTER_IMM           ((0x22 << 23) | 1)
> > +#define MI_LOAD_REGISTER_MEM           ((0x29 << 23) | (4 - 2))
> >  
> >  /* Flush */
> >  #define MI_FLUSH                       (0x04<<23)
> > diff --git a/lib/meson.build b/lib/meson.build
> > index 221ae28c..2135ddf3 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -46,6 +46,7 @@ lib_sources = [
> >         'sw_sync.c',
> >         'intel_reg_map.c',
> >         'intel_iosf.c',
> > +       'igt_aux_pgtable.c',
> >         'igt_kms.c',
> >         'igt_fb.c',
> >         'igt_core.c',
> > diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> > index 694eb3cf..31e38c2b 100644
> > --- a/lib/rendercopy_gen9.c
> > +++ b/lib/rendercopy_gen9.c
> > @@ -15,6 +15,7 @@
> >  #include <i915_drm.h>
> >  
> >  #include "drmtest.h"
> > +#include "igt_aux_pgtable.h"
> >  #include "intel_bufmgr.h"
> >  #include "intel_batchbuffer.h"
> >  #include "intel_io.h"
> > @@ -972,19 +973,113 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
> >  
> >  #define BATCH_STATE_SPLIT 2048
> >  
> > +static void
> > +gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state)
> > +{
> > +       if (!state)
> > +               return;
> > +
> > +       OUT_BATCH(MI_LOAD_REGISTER_MEM);
> > +       OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
> > +       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state);
> > +       OUT_BATCH(MI_NOOP);
> > +
> > +       OUT_BATCH(MI_LOAD_REGISTER_MEM);
> > +       OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
> > +       OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state + 4);
> > +       OUT_BATCH(MI_NOOP);
> 
> Extra MI_NOOP.

Ok, will remove both, not required by the spec. This is a remnant when I
tried to use MI_LOAD_REGISTER_IMM here, but that too requires the noop
_before_ the instruction.

> 
> > +}
> > +
> > +static int add_aux_pgtable_range(const struct igt_buf *buf,
> > +                                struct igt_aux_pgtable_range *range,
> > +                                uint64_t *pin_offset)
> > +{
> > +       if (!buf->aux.stride)
> > +               return 0;
> > +
> > +       drm_intel_bo_set_softpin_offset(buf->bo, *pin_offset);
> > +       igt_assert(buf->bo->offset64 == *pin_offset);
> > +
> > +       range->surface_base = *pin_offset;
> > +       range->surface_size = buf->size;
> > +       range->aux_base = *pin_offset + buf->aux.offset;
> > +       range->tiling = buf->tiling;
> > +       range->bpp = buf->bpp;
> > +
> > +       /* The GEN12+ compressed main surface must be 64kB aligned.  */
> > +       *pin_offset = ALIGN(*pin_offset + buf->bo->size, 0x10000);
> > +
> > +       return 1;
> > +}
> > +
> > +static drm_intel_bo *
> > +gen12_create_aux_pgtable_bo(drm_intel_bufmgr *bufmgr,
> > +                           const struct igt_buf *dst_buf,
> > +                           const struct igt_buf *src_buf)
> > +{
> > +       struct igt_aux_pgtable_range ranges[2];
> > +       int range_count;
> > +       uint64_t pin_offset;
> > +       drm_intel_bo *gem_bo;
> > +
> > +       range_count = 0;
> > +       pin_offset = 0;
> > +
> > +       range_count += add_aux_pgtable_range(dst_buf,
> > +                                            &ranges[range_count], &pin_offset);
> > +       range_count += add_aux_pgtable_range(src_buf,
> > +                                            &ranges[range_count], &pin_offset);
> > +
> > +       if (!range_count)
> > +               return NULL;
> > +
> > +       gem_bo = igt_aux_pgtable_create(bufmgr, ranges, range_count);
> > +       igt_assert(gem_bo);
> > +
> > +       return gem_bo;
> > +}
> > +
> > +static uint32_t
> > +gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
> > +                              drm_intel_bo *aux_pgtable_bo)
> > +{
> > +       uint64_t *pgtable_ptr;
> > +       uint32_t pgtable_ptr_offset;
> > +       int ret;
> > +
> > +       if (!aux_pgtable_bo)
> > +               return 0;
> > +
> > +       pgtable_ptr = intel_batchbuffer_subdata_alloc(batch,
> > +                                                     sizeof(*pgtable_ptr),
> > +                                                     sizeof(*pgtable_ptr));
> > +       pgtable_ptr_offset = intel_batchbuffer_subdata_offset(batch,
> > +                                                             pgtable_ptr);
> > +
> > +       ret = drm_intel_bo_emit_reloc(batch->bo, pgtable_ptr_offset,
> > +                                     aux_pgtable_bo, 0,
> > +                                     I915_GEM_DOMAIN_RENDER, 0);
> > +       assert(ret == 0);
> > +
> > +       return pgtable_ptr_offset;
> > +}
> > +
> >  static
> >  void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >                           drm_intel_context *context,
> >                           const struct igt_buf *src, unsigned src_x,
> >                           unsigned src_y, unsigned width, unsigned height,
> >                           const struct igt_buf *dst, unsigned dst_x,
> > -                         unsigned dst_y, const uint32_t ps_kernel[][4],
> > +                         unsigned dst_y,
> > +                         drm_intel_bo *aux_pgtable_bo,
> > +                         const uint32_t ps_kernel[][4],
> >                           uint32_t ps_kernel_size)
> >  {
> >         uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
> >         uint32_t scissor_state;
> >         uint32_t vertex_buffer;
> >         uint32_t batch_end;
> > +       uint32_t aux_pgtable_state;
> >  
> >         igt_assert(src->bpp == dst->bpp);
> >         intel_batchbuffer_flush_with_context(batch, context);
> > @@ -1007,6 +1102,10 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >         viewport.cc_state = gen6_create_cc_viewport(batch);
> >         viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
> >         scissor_state = gen6_create_scissor_rect(batch);
> > +
> > +       aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
> > +                                                          aux_pgtable_bo);
> > +
> >         /* TODO: theree is other state which isn't setup */
> >  
> >         assert(batch->ptr < &batch->buffer[4095]);
> > @@ -1018,6 +1117,8 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >         OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
> >                                 GEN9_PIPELINE_SELECTION_MASK);
> >  
> > +       gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
> > +
> >         gen8_emit_sip(batch);
> >  
> >         gen7_emit_push_constants(batch);
> > @@ -1092,8 +1193,8 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
> >  
> >  {
> >         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> > -                         width, height, dst, dst_x, dst_y, ps_kernel_gen9,
> > -                         sizeof(ps_kernel_gen9));
> > +                         width, height, dst, dst_x, dst_y, NULL,
> > +                         ps_kernel_gen9, sizeof(ps_kernel_gen9));
> >  }
> >  
> >  void gen11_render_copyfunc(struct intel_batchbuffer *batch,
> > @@ -1104,8 +1205,8 @@ void gen11_render_copyfunc(struct intel_batchbuffer *batch,
> >  
> >  {
> >         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> > -                         width, height, dst, dst_x, dst_y, ps_kernel_gen11,
> > -                         sizeof(ps_kernel_gen11));
> > +                         width, height, dst, dst_x, dst_y, NULL,
> > +                         ps_kernel_gen11, sizeof(ps_kernel_gen11));
> >  }
> >  
> >  void gen12_render_copyfunc(struct intel_batchbuffer *batch,
> > @@ -1115,7 +1216,15 @@ void gen12_render_copyfunc(struct intel_batchbuffer *batch,
> >                            const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
> >  
> >  {
> > +       drm_intel_bo *aux_pgtable_bo;
> > +
> > +       aux_pgtable_bo = gen12_create_aux_pgtable_bo(batch->bufmgr, dst, src);
> > +
> >         _gen9_render_copyfunc(batch, context, src, src_x, src_y,
> > -                         width, height, dst, dst_x, dst_y, gen12_render_copy,
> > +                         width, height, dst, dst_x, dst_y,
> > +                         aux_pgtable_bo,
> > +                         gen12_render_copy,
> >                           sizeof(gen12_render_copy));
> > +
> > +       drm_intel_bo_unreference(aux_pgtable_bo);
> >  }
> > -- 
> > 2.17.1
> > 


More information about the igt-dev mailing list