[igt-dev] [PATCH i-g-t CI_ONLY v2 1/6] lib/rendercopy: Add AUX page table support

Mika Kahola mika.kahola at intel.com
Fri Nov 8 14:21:45 UTC 2019


From: Imre Deak <imre.deak at intel.com>

On GEN12+ the AUX CCS surfaces required by the render and media
compression must be specified by a 3 level page table directory, which
translates the main surface graphics address to the AUX CCS surface
graphics address. For this purpose add support for creating a GEM buffer
to translate the linear surface address range to the linear AUX surface
address range.

The buffers containing the main surface must be pinned down, since the
directory table entry indices depend on the surface address, and they
must be 64kB aligned. The page table can be relocated OTOH, so allow
that and emit the required relocation entries.

v2:
- Make level variables to be 0 based (l1..l3 -> level=0..2).
- Add missing drm_intel_bo_set_softpin_offset() stub to fix build on
  non-Intel archs.
- Fix missing offsets in reloc entries of already bound objects. (Chris)
- Randomize pin offsets, to try to avoid eviction. (Chris)
- Remove redundant MI_NOOPS around MI_LOAD_REGISTER_MEM
- Stop using explicit reloc cache domains, as these don't make sense on
  GEN12 anyway. (Chris)
- Fix missing autotools support. (Chris)
- s/igt_aux_pgtable/intel_aux_pgtable/, since the functionality is Intel
  specific. (Chris)
v3:
- Make sure all objects with an AUX surface are pinned.
v4:
- s/MI_LOAD_REGISTER_MEM/MI_LOAD_REGISTER_MEM_GEN8/ (Chris)
- Fix using buf->bo->size instead of buf->size when finding a free
  range for a pinned obj.
- Fix alignment of the reserved space start for a pinned obj.
- Move gen12_emit_aux_pgtable_state() to its logical spot.

Cc: Mika Kahola <mika.kahola at intel.com>
Cc: Brian Welty <brian.welty at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
Signed-off-by: Imre Deak <imre.deak at intel.com>
---
 lib/Makefile.sources         |   1 +
 lib/drmtest.h                |   9 +
 lib/intel_aux_pgtable.c      | 377 +++++++++++++++++++++++++++++++++++
 lib/intel_aux_pgtable.h      |  12 ++
 lib/intel_reg.h              |   2 +
 lib/meson.build              |   1 +
 lib/rendercopy_gen9.c        | 234 +++++++++++++++++++++-
 lib/stubs/drm/intel_bufmgr.c |   6 +
 8 files changed, 636 insertions(+), 6 deletions(-)
 create mode 100644 lib/intel_aux_pgtable.c
 create mode 100644 lib/intel_aux_pgtable.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 34e0c012..e544c27b 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -100,6 +100,7 @@ lib_source_list =	 	\
 	surfaceformat.h		\
 	sw_sync.c		\
 	sw_sync.h		\
+	intel_aux_pgtable.c	\
 	intel_reg_map.c		\
 	intel_iosf.c		\
 	igt_kms.c		\
diff --git a/lib/drmtest.h b/lib/drmtest.h
index 614f57e6..05eb0860 100644
--- a/lib/drmtest.h
+++ b/lib/drmtest.h
@@ -77,6 +77,15 @@ void __set_forced_driver(const char *name);
  */
 #define ALIGN(v, a) (((v) + (a)-1) & ~((a)-1))
 
+/**
+ * ALIGN_DOWN:
+ * @v: value to be aligned down
+ * @a: alignment unit in bytes
+ *
+ * Macro to align down a value @v to a specified unit @a.
+ */
+#define ALIGN_DOWN(x, a)	ALIGN((x) - ((a) - 1), (a))
+
 int drm_open_driver(int chipset);
 int drm_open_driver_master(int chipset);
 int drm_open_driver_render(int chipset);
diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
new file mode 100644
index 00000000..e1249dee
--- /dev/null
+++ b/lib/intel_aux_pgtable.c
@@ -0,0 +1,377 @@
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "drmtest.h"
+#include "intel_aux_pgtable.h"
+#include "intel_batchbuffer.h"
+#include "intel_bufmgr.h"
+#include "ioctl_wrappers.h"
+
+#include "i915/gem_mman.h"
+
+#define BITS_PER_LONG		(sizeof(long) * 8)
+#define BITMASK(e, s)		((~0UL << (s)) & \
+				 (~0UL >> (BITS_PER_LONG - 1 - (e))))
+
+/* The unit size to which the AUX CCS surface is aligned to. */
+#define AUX_CCS_UNIT_SIZE	64
+/*
+ * The block size on the AUX CCS surface which is mapped by one L1 AUX
+ * pagetable entry.
+ */
+#define AUX_CCS_BLOCK_SIZE	(4 * AUX_CCS_UNIT_SIZE)
+/*
+ * The block size on the main surface mapped by one AUX CCS block:
+ *   256 bytes per CCS block *
+ *   8   bits per byte /
+ *   2   bits per main surface CL *
+ *   64  bytes per main surface CL
+ */
+#define MAIN_SURFACE_BLOCK_SIZE	(AUX_CCS_BLOCK_SIZE * 8 / 2 * 64)
+
+#define GFX_ADDRESS_BITS	48
+
+#define max(a, b)		((a) > (b) ? (a) : (b))
+
+struct pgtable_level_desc {
+	int idx_shift;
+	int idx_bits;
+	int entry_ptr_shift;
+	int table_size;
+};
+
+struct pgtable_level_info {
+	const struct pgtable_level_desc *desc;
+	int table_count;
+	int alloc_base;
+	int alloc_ptr;
+};
+
+struct pgtable {
+	int levels;
+	struct pgtable_level_info *level_info;
+	int size;
+	int max_align;
+	drm_intel_bo *bo;
+};
+
+static int
+pgt_table_count(int address_bits, const struct igt_buf **bufs, int buf_count)
+{
+	uint64_t end;
+	int count;
+	int i;
+
+	count = 0;
+	end = 0;
+	for (i = 0; i < buf_count; i++) {
+		const struct igt_buf *buf = bufs[i];
+		uint64_t start;
+
+		/* We require bufs to be sorted. */
+		igt_assert(i == 0 ||
+			   buf->bo->offset64 >= bufs[i - 1]->bo->offset64 +
+						bufs[i - 1]->bo->size);
+
+		start = ALIGN_DOWN(buf->bo->offset64, 1UL << address_bits);
+		/* Avoid double counting for overlapping aligned bufs. */
+		start = max(start, end);
+
+		end = ALIGN(buf->bo->offset64 + buf->size, 1UL << address_bits);
+		igt_assert(end >= start);
+
+		count += (end - start) >> address_bits;
+	}
+
+	return count;
+}
+
+static void
+pgt_calc_size(struct pgtable *pgt, const struct igt_buf **bufs, int buf_count)
+{
+	int level;
+
+	pgt->size = 0;
+
+	for (level = pgt->levels - 1; level >= 0; level--) {
+		struct pgtable_level_info *li = &pgt->level_info[level];
+
+		li->alloc_base = ALIGN(pgt->size, li->desc->table_size);
+		li->alloc_ptr = li->alloc_base;
+
+		li->table_count = pgt_table_count(li->desc->idx_shift +
+						  li->desc->idx_bits,
+						  bufs, buf_count);
+
+		pgt->size = li->alloc_base +
+			    li->table_count * li->desc->table_size;
+	}
+}
+
+static uint64_t pgt_alloc_table(struct pgtable *pgt, int level)
+{
+	struct pgtable_level_info *li = &pgt->level_info[level];
+	uint64_t table;
+
+	table = li->alloc_ptr;
+	li->alloc_ptr += li->desc->table_size;
+
+	igt_assert(li->alloc_ptr <=
+		   li->alloc_base + li->table_count * li->desc->table_size);
+
+	return table;
+}
+
+static int pgt_address_index(struct pgtable *pgt, int level, uint64_t address)
+{
+	const struct pgtable_level_desc *ld = pgt->level_info[level].desc;
+	uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1,
+				ld->idx_shift);
+
+	return (address & mask) >> ld->idx_shift;
+}
+
+static uint64_t ptr_mask(struct pgtable *pgt, int level)
+{
+	const struct pgtable_level_desc *ld = pgt->level_info[level].desc;
+
+	return BITMASK(GFX_ADDRESS_BITS - 1, ld->entry_ptr_shift);
+}
+
+static uint64_t pgt_entry_ptr(struct pgtable *pgt, int level, uint64_t entry)
+{
+	uint64_t ptr = entry & ptr_mask(pgt, level);
+
+	if (level)
+		ptr -= pgt->bo->offset64;
+	igt_assert(!(ptr & ~ptr_mask(pgt, level)));
+
+	return ptr;
+}
+
+static uint64_t pgt_mkentry(struct pgtable *pgt, int level, uint64_t ptr,
+			    uint64_t flags)
+{
+	if (level)
+		ptr += pgt->bo->offset64;
+	igt_assert(!(ptr & ~ptr_mask(pgt, level)));
+
+	return ptr | flags;
+}
+
+static uint64_t
+pgt_get_table(struct pgtable *pgt, uint64_t parent_table,
+	      int level, uint64_t address, uint64_t flags)
+{
+	uint64_t *table_ptr = pgt->bo->virtual + parent_table;
+	int entry_idx = pgt_address_index(pgt, level, address);
+	uint64_t *entry_ptr;
+
+	entry_ptr = &table_ptr[entry_idx];
+	if (!*entry_ptr) {
+		uint64_t child_table = pgt_alloc_table(pgt, level - 1);
+
+		*entry_ptr = pgt_mkentry(pgt, level, child_table, flags);
+
+		drm_intel_bo_emit_reloc(pgt->bo,
+					parent_table + entry_idx * sizeof(uint64_t),
+					pgt->bo, *entry_ptr, 0, 0);
+	}
+
+	return pgt_entry_ptr(pgt, level, *entry_ptr);
+}
+
+static void
+pgt_set_l1_entry(struct pgtable *pgt, uint64_t l1_table,
+		 uint64_t address, uint64_t ptr, uint64_t flags)
+{
+	uint64_t *l1_table_ptr;
+	uint64_t *l1_entry_ptr;
+
+	l1_table_ptr = pgt->bo->virtual + l1_table;
+	l1_entry_ptr = &l1_table_ptr[pgt_address_index(pgt, 0, address)];
+	*l1_entry_ptr = pgt_mkentry(pgt, 0, ptr, flags);
+}
+
+static uint64_t pgt_get_l1_flags(const struct igt_buf *buf)
+{
+	/*
+	 * The offset of .tile_mode isn't specifed by bspec, it's what Mesa
+	 * uses.
+	 */
+	union {
+		struct {
+			uint64_t	valid:1;
+			uint64_t	compression_mod:2;
+			uint64_t	lossy_compression:1;
+			uint64_t	pad:4;
+			uint64_t	addr:40;
+			uint64_t	pad2:4;
+			uint64_t	tile_mode:2;
+			uint64_t	depth:3;
+			uint64_t	ycr:1;
+			uint64_t	format:6;
+		} e;
+		uint64_t l;
+	} entry = {
+		.e = {
+			.valid = 1,
+			.tile_mode = buf->tiling == I915_TILING_Y ? 1 : 0,
+			.depth = 5,		/* 32bpp */
+			.format = 0xA,		/* B8G8R8A8_UNORM */
+		}
+	};
+
+	/*
+	 * TODO: Clarify if Yf is supported and if we need to differentiate
+	 *       Ys and Yf.
+	 *       Add support for more formats.
+	 */
+	igt_assert(buf->tiling == I915_TILING_Y ||
+		   buf->tiling == I915_TILING_Yf ||
+		   buf->tiling == I915_TILING_Ys);
+
+	igt_assert(buf->bpp == 32);
+
+	return entry.l;
+}
+
+static uint64_t pgt_get_lx_flags(void)
+{
+	union {
+		struct {
+			uint64_t        valid:1;
+			uint64_t        addr:47;
+			uint64_t        pad:16;
+		} e;
+		uint64_t l;
+	} entry = {
+		.e = {
+			.valid = 1,
+		}
+	};
+
+	return entry.l;
+}
+
+static void
+pgt_populate_entries_for_buf(struct pgtable *pgt,
+			       const struct igt_buf *buf,
+			       uint64_t top_table)
+{
+	uint64_t surface_addr = buf->bo->offset64;
+	uint64_t surface_end = surface_addr + buf->size;
+	uint64_t aux_addr = buf->bo->offset64 + buf->aux.offset;
+	uint64_t l1_flags = pgt_get_l1_flags(buf);
+	uint64_t lx_flags = pgt_get_lx_flags();
+
+	for (; surface_addr < surface_end;
+	     surface_addr += MAIN_SURFACE_BLOCK_SIZE,
+	     aux_addr += AUX_CCS_BLOCK_SIZE) {
+		uint64_t table = top_table;
+		int level;
+
+		for (level = pgt->levels - 1; level >= 1; level--)
+			table = pgt_get_table(pgt, table, level,
+					      surface_addr, lx_flags);
+
+		pgt_set_l1_entry(pgt, table, surface_addr, aux_addr, l1_flags);
+	}
+}
+
+static void pgt_populate_entries(struct pgtable *pgt,
+				 const struct igt_buf **bufs,
+				 int buf_count,
+				 drm_intel_bo *pgt_bo)
+{
+	uint64_t top_table;
+	int i;
+
+	pgt->bo = pgt_bo;
+
+	igt_assert(pgt_bo->size >= pgt->size);
+	memset(pgt_bo->virtual, 0, pgt->size);
+
+	top_table = pgt_alloc_table(pgt, pgt->levels - 1);
+	/* Top level table must be at offset 0. */
+	igt_assert(top_table == 0);
+
+	for (i = 0; i < buf_count; i++)
+		pgt_populate_entries_for_buf(pgt, bufs[i], top_table);
+}
+
+static struct pgtable *
+pgt_create(const struct pgtable_level_desc *level_descs, int levels,
+	   const struct igt_buf **bufs, int buf_count)
+{
+	struct pgtable *pgt;
+	int level;
+
+	pgt = calloc(1, sizeof(*pgt));
+	igt_assert(pgt);
+
+	pgt->levels = levels;
+
+	pgt->level_info = calloc(levels, sizeof(*pgt->level_info));
+	igt_assert(pgt->level_info);
+
+	for (level = 0; level < pgt->levels; level++) {
+		struct pgtable_level_info *li = &pgt->level_info[level];
+
+		li->desc = &level_descs[level];
+		if (li->desc->table_size > pgt->max_align)
+			pgt->max_align = li->desc->table_size;
+	}
+
+	pgt_calc_size(pgt, bufs, buf_count);
+
+	return pgt;
+}
+
+static void pgt_destroy(struct pgtable *pgt)
+{
+	free(pgt->level_info);
+	free(pgt);
+}
+
+drm_intel_bo *
+intel_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
+		       const struct igt_buf **bufs, int buf_count)
+{
+	static const struct pgtable_level_desc level_desc[] = {
+		{
+			.idx_shift = 16,
+			.idx_bits = 8,
+			.entry_ptr_shift = 8,
+			.table_size = 8 * 1024,
+		},
+		{
+			.idx_shift = 24,
+			.idx_bits = 12,
+			.entry_ptr_shift = 13,
+			.table_size = 32 * 1024,
+		},
+		{
+			.idx_shift = 36,
+			.idx_bits = 12,
+			.entry_ptr_shift = 15,
+			.table_size = 32 * 1024,
+		},
+	};
+	struct pgtable *pgt;
+	drm_intel_bo *pgt_bo;
+
+	pgt = pgt_create(level_desc, ARRAY_SIZE(level_desc), bufs, buf_count);
+
+	pgt_bo = drm_intel_bo_alloc_for_render(bufmgr, "aux pgt",
+					       pgt->size, pgt->max_align);
+	igt_assert(pgt_bo);
+
+	igt_assert(drm_intel_bo_map(pgt_bo, true) == 0);
+	pgt_populate_entries(pgt, bufs, buf_count, pgt_bo);
+	igt_assert(drm_intel_bo_unmap(pgt_bo) == 0);
+
+	pgt_destroy(pgt);
+
+	return pgt_bo;
+}
diff --git a/lib/intel_aux_pgtable.h b/lib/intel_aux_pgtable.h
new file mode 100644
index 00000000..c0f001b4
--- /dev/null
+++ b/lib/intel_aux_pgtable.h
@@ -0,0 +1,12 @@
+#ifndef __INTEL_AUX_PGTABLE_H__
+#define __INTEL_AUX_PGTABLE_H__
+
+#include "intel_bufmgr.h"
+
+struct igt_buf;
+
+drm_intel_bo *
+intel_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
+			 const struct igt_buf **bufs, int buf_count);
+
+#endif
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 069440cb..84f746a6 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -673,6 +673,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define RING_VALID          0x00000001
 #define RING_INVALID        0x00000000
 
+#define GEN12_GFX_AUX_TABLE_BASE_ADDR	0x4200
 
 
 /* BitBlt Instructions
@@ -2570,6 +2571,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define MI_LOAD_SCAN_LINES_INCL		(0x12<<23)
 #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
+#define MI_LOAD_REGISTER_MEM_GEN8	((0x29 << 23) | (4 - 2))
 
 /* Flush */
 #define MI_FLUSH			(0x04<<23)
diff --git a/lib/meson.build b/lib/meson.build
index fbc0c8d1..edaca091 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -44,6 +44,7 @@ lib_sources = [
 	'rendercopy_gen8.c',
 	'rendercopy_gen9.c',
 	'sw_sync.c',
+	'intel_aux_pgtable.c',
 	'intel_reg_map.c',
 	'intel_iosf.c',
 	'igt_kms.c',
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index 694eb3cf..e3f2af25 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -15,6 +15,7 @@
 #include <i915_drm.h>
 
 #include "drmtest.h"
+#include "intel_aux_pgtable.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
 #include "intel_io.h"
@@ -972,19 +973,226 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
 
 #define BATCH_STATE_SPLIT 2048
 
+static void
+aux_pgtable_find_max_free_range(const struct igt_buf **bufs, int buf_count,
+				uint64_t *range_start, uint64_t *range_size)
+{
+	/*
+	 * Keep the first page reserved, so we can differentiate pinned
+	 * objects based on a non-NULL offset.
+	 */
+	uint64_t start = 0x1000;
+	/* For now alloc only from the first 4GB address space. */
+	const uint64_t end = 1ULL << 32;
+	uint64_t max_range_start = 0;
+	uint64_t max_range_size = 0;
+	int i;
+
+	for (i = 0; i < buf_count; i++) {
+		if (bufs[i]->bo->offset64 >= end)
+			break;
+
+		if (bufs[i]->bo->offset64 - start > max_range_size) {
+			max_range_start = start;
+			max_range_size = bufs[i]->bo->offset64 - start;
+		}
+		start = bufs[i]->bo->offset64 + bufs[i]->bo->size;
+	}
+
+	if (start < end && end - start > max_range_size) {
+		max_range_start = start;
+		max_range_size = end - start;
+	}
+
+	*range_start = max_range_start;
+	*range_size = max_range_size;
+}
+
+static uint64_t
+aux_pgtable_find_free_range(const struct igt_buf **bufs, int buf_count,
+			    uint32_t size)
+{
+	uint64_t range_start;
+	uint64_t range_size;
+	/* A compressed surface must be 64kB aligned. */
+	const uint32_t align = 0x10000;
+	int pad;
+
+	aux_pgtable_find_max_free_range(bufs, buf_count,
+					&range_start, &range_size);
+
+	pad = ALIGN(range_start, align) - range_start;
+	range_start += pad;
+	range_size -= pad;
+	igt_assert(range_size >= size);
+
+	return range_start + ALIGN_DOWN(rand() % (range_size - size), align);
+}
+
+static void
+aux_pgtable_reserve_range(const struct igt_buf **bufs, int buf_count,
+			  const struct igt_buf *new_buf)
+{
+	int i;
+
+	if (new_buf->aux.stride) {
+		uint64_t pin_offset = new_buf->bo->offset64;
+
+		if (!pin_offset)
+			pin_offset = aux_pgtable_find_free_range(bufs,
+								 buf_count,
+								 new_buf->bo->size);
+		drm_intel_bo_set_softpin_offset(new_buf->bo, pin_offset);
+		igt_assert(new_buf->bo->offset64 == pin_offset);
+	}
+
+	for (i = 0; i < buf_count; i++)
+		if (bufs[i]->bo->offset64 > new_buf->bo->offset64)
+			break;
+
+	memmove(&bufs[i + 1], &bufs[i], sizeof(bufs[0]) * (buf_count - i));
+
+	bufs[i] = new_buf;
+}
+
+struct aux_pgtable_info {
+	int buf_count;
+	const struct igt_buf *bufs[2];
+	uint64_t buf_pin_offsets[2];
+	drm_intel_bo *pgtable_bo;
+};
+
+static void
+gen12_aux_pgtable_init(struct aux_pgtable_info *info,
+		       drm_intel_bufmgr *bufmgr,
+		       const struct igt_buf *src_buf,
+		       const struct igt_buf *dst_buf)
+{
+	const struct igt_buf *bufs[2];
+	const struct igt_buf *reserved_bufs[2];
+	int reserved_buf_count;
+	int i;
+
+	if (!src_buf->aux.stride && !dst_buf->aux.stride)
+		return;
+
+	bufs[0] = src_buf;
+	bufs[1] = dst_buf;
+
+	/*
+	 * Ideally we'd need an IGT-wide GFX address space allocator, which
+	 * would consider all allocations and thus avoid evictions. For now use
+	 * a simpler scheme here, which only considers the buffers involved in
+	 * the blit, which should at least minimize the chance for evictions
+	 * in the case of subsequent blits:
+	 *   1. If they were already bound (bo->offset64 != 0), use this
+	 *      address.
+	 *   2. Pick a range randomly from the 4GB address space, that is not
+	 *      already occupied by a bound object, or an object we pinned.
+	 */
+	reserved_buf_count = 0;
+	/* First reserve space for any bufs that are bound already. */
+	for (i = 0; i < ARRAY_SIZE(bufs); i++)
+		if (bufs[i]->bo->offset64)
+			aux_pgtable_reserve_range(reserved_bufs,
+						  reserved_buf_count++,
+						  bufs[i]);
+
+	/* Next, reserve space for unbound bufs with an AUX surface. */
+	for (i = 0; i < ARRAY_SIZE(bufs); i++)
+		if (!bufs[i]->bo->offset64 && bufs[i]->aux.stride)
+			aux_pgtable_reserve_range(reserved_bufs,
+						  reserved_buf_count++,
+						  bufs[i]);
+
+	/* Create AUX pgtable entries only for bufs with an AUX surface */
+	info->buf_count = 0;
+	for (i = 0; i < reserved_buf_count; i++) {
+		if (!reserved_bufs[i]->aux.stride)
+			continue;
+
+		info->bufs[info->buf_count] = reserved_bufs[i];
+		info->buf_pin_offsets[info->buf_count] =
+			reserved_bufs[i]->bo->offset64;
+		info->buf_count++;
+	}
+
+	info->pgtable_bo = intel_aux_pgtable_create(bufmgr,
+						    info->bufs,
+						    info->buf_count);
+	igt_assert(info->pgtable_bo);
+}
+
+static void
+gen12_aux_pgtable_cleanup(struct aux_pgtable_info *info)
+{
+	int i;
+
+	/* Check that the pinned bufs kept their offset after the exec. */
+	for (i = 0; i < info->buf_count; i++)
+		igt_assert_eq_u64(info->bufs[i]->bo->offset64,
+				  info->buf_pin_offsets[i]);
+
+	drm_intel_bo_unreference(info->pgtable_bo);
+}
+
+static uint32_t
+gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
+			       drm_intel_bo *aux_pgtable_bo)
+{
+	uint64_t *pgtable_ptr;
+	uint32_t pgtable_ptr_offset;
+	int ret;
+
+	if (!aux_pgtable_bo)
+		return 0;
+
+	pgtable_ptr = intel_batchbuffer_subdata_alloc(batch,
+						      sizeof(*pgtable_ptr),
+						      sizeof(*pgtable_ptr));
+	pgtable_ptr_offset = intel_batchbuffer_subdata_offset(batch,
+							      pgtable_ptr);
+
+	*pgtable_ptr = aux_pgtable_bo->offset64;
+	ret = drm_intel_bo_emit_reloc(batch->bo, pgtable_ptr_offset,
+				      aux_pgtable_bo, 0,
+				      0, 0);
+	assert(ret == 0);
+
+	return pgtable_ptr_offset;
+}
+
+static void
+gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state)
+{
+	if (!state)
+		return;
+
+	OUT_BATCH(MI_LOAD_REGISTER_MEM_GEN8);
+	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
+	OUT_RELOC(batch->bo, 0, 0, state);
+
+	OUT_BATCH(MI_LOAD_REGISTER_MEM_GEN8);
+	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
+	OUT_RELOC(batch->bo, 0, 0, state + 4);
+}
+
 static
 void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 			  drm_intel_context *context,
 			  const struct igt_buf *src, unsigned src_x,
 			  unsigned src_y, unsigned width, unsigned height,
 			  const struct igt_buf *dst, unsigned dst_x,
-			  unsigned dst_y, const uint32_t ps_kernel[][4],
+			  unsigned dst_y,
+			  drm_intel_bo *aux_pgtable_bo,
+			  const uint32_t ps_kernel[][4],
 			  uint32_t ps_kernel_size)
 {
 	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
 	uint32_t scissor_state;
 	uint32_t vertex_buffer;
 	uint32_t batch_end;
+	uint32_t aux_pgtable_state;
 
 	igt_assert(src->bpp == dst->bpp);
 	intel_batchbuffer_flush_with_context(batch, context);
@@ -1007,6 +1215,10 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	viewport.cc_state = gen6_create_cc_viewport(batch);
 	viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
 	scissor_state = gen6_create_scissor_rect(batch);
+
+	aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
+							   aux_pgtable_bo);
+
 	/* TODO: theree is other state which isn't setup */
 
 	assert(batch->ptr < &batch->buffer[4095]);
@@ -1018,6 +1230,8 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
 				GEN9_PIPELINE_SELECTION_MASK);
 
+	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
+
 	gen8_emit_sip(batch);
 
 	gen7_emit_push_constants(batch);
@@ -1092,8 +1306,8 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
 
 {
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, ps_kernel_gen9,
-			  sizeof(ps_kernel_gen9));
+			  width, height, dst, dst_x, dst_y, NULL,
+			  ps_kernel_gen9, sizeof(ps_kernel_gen9));
 }
 
 void gen11_render_copyfunc(struct intel_batchbuffer *batch,
@@ -1104,8 +1318,8 @@ void gen11_render_copyfunc(struct intel_batchbuffer *batch,
 
 {
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, ps_kernel_gen11,
-			  sizeof(ps_kernel_gen11));
+			  width, height, dst, dst_x, dst_y, NULL,
+			  ps_kernel_gen11, sizeof(ps_kernel_gen11));
 }
 
 void gen12_render_copyfunc(struct intel_batchbuffer *batch,
@@ -1115,7 +1329,15 @@ void gen12_render_copyfunc(struct intel_batchbuffer *batch,
 			   const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
 
 {
+	struct aux_pgtable_info pgtable_info = { };
+
+	gen12_aux_pgtable_init(&pgtable_info, batch->bufmgr, src, dst);
+
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, gen12_render_copy,
+			  width, height, dst, dst_x, dst_y,
+			  pgtable_info.pgtable_bo,
+			  gen12_render_copy,
 			  sizeof(gen12_render_copy));
+
+	gen12_aux_pgtable_cleanup(&pgtable_info);
 }
diff --git a/lib/stubs/drm/intel_bufmgr.c b/lib/stubs/drm/intel_bufmgr.c
index f87452ac..cbab2484 100644
--- a/lib/stubs/drm/intel_bufmgr.c
+++ b/lib/stubs/drm/intel_bufmgr.c
@@ -233,6 +233,12 @@ int drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
 	return -ENODEV;
 }
 
+int drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
+{
+	igt_require_f(false, missing_support_str);
+	return -ENODEV;
+}
+
 int drm_intel_bo_disable_reuse(drm_intel_bo *bo)
 {
 	igt_require_f(false, missing_support_str);
-- 
2.17.1



More information about the igt-dev mailing list