[igt-dev] [PATCH i-g-t 1/3] lib/rendercopy: Add AUX page table support

Imre Deak imre.deak at intel.com
Fri Nov 1 20:13:09 UTC 2019


On GEN12+ the AUX CCS surfaces required by the render and media
compression must be specified by a 3 level page table directory, which
translates the main surface graphics address to the AUX CCS surface
graphics address. For this purpose add support for creating a GEM buffer
to translate the linear surface address range to the linear AUX surface
address range.

The buffers containing the main surface must be pinned down, since the
directory table entry indices depend on the surface address, and they
must be 64kB aligned. The page table can be relocated OTOH, so allow
that and emit the required relocation entries.

Cc: Mika Kahola <mika.kahola at intel.com>
Cc: Brian Welty <brian.welty at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
Signed-off-by: Imre Deak <imre.deak at intel.com>
---
 lib/igt_aux_pgtable.c | 381 ++++++++++++++++++++++++++++++++++++++++++
 lib/igt_aux_pgtable.h |  21 +++
 lib/intel_reg.h       |   3 +
 lib/meson.build       |   1 +
 lib/rendercopy_gen9.c | 121 +++++++++++++-
 5 files changed, 521 insertions(+), 6 deletions(-)
 create mode 100644 lib/igt_aux_pgtable.c
 create mode 100644 lib/igt_aux_pgtable.h

diff --git a/lib/igt_aux_pgtable.c b/lib/igt_aux_pgtable.c
new file mode 100644
index 00000000..aaa24cfd
--- /dev/null
+++ b/lib/igt_aux_pgtable.c
@@ -0,0 +1,381 @@
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "drmtest.h"
+#include "igt_aux_pgtable.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+#include "ioctl_wrappers.h"
+
+#include "i915/gem_mman.h"
+
+#define BITS_PER_LONG		(sizeof(long) * 8)
+#define BITMASK(e, s)		((~0UL << (s)) & \
+				 (~0UL >> (BITS_PER_LONG - 1 - (e))))
+
+#define ALIGN_DOWN(x, a)	ALIGN((x) - ((a) - 1), (a))
+
+#define CL_SIZE			64
+/*
+ * The size of a block on the CCS surface that is covered/pointed to by one
+ * L1 AUX pagetable entry. This size corresponds to the 1<<8 alignment of the
+ * pointers in the L1 entry.
+ */
+#define CCS_BLOCK_SIZE		(4 * CL_SIZE)
+/*
+ * 256 bytes per CCS block size *
+ * 8   bits per byte /
+ * 2   bits per surface CL *
+ * 64  bytes per surface CL
+ */
+#define SURFACE_BLOCK_SIZE	(CCS_BLOCK_SIZE * 8 / 2 * CL_SIZE)
+#define AUX_PGTABLE_VALID	1
+#define AUX_PGTABLE_LEVELS	3
+
+#define ADDRESS_BITS		48
+
+#define max(a, b)		((a) > (b) ? (a) : (b))
+
+struct pgtable_level_desc {
+	int idx_shift;
+	int idx_bits;
+	int entry_ptr_shift;
+	int table_size;
+};
+
+struct pgtable_level_info {
+	const struct pgtable_level_desc *desc;
+	int table_count;
+	int alloc_base;
+	int alloc_ptr;
+};
+
+struct pgtable {
+	int levels;
+	struct pgtable_level_info *level_info;
+	int size;
+	int max_align;
+	drm_intel_bo *bo;
+};
+
+static const struct pgtable_level_desc aux_pgtable_desc[AUX_PGTABLE_LEVELS] = {
+	{
+		.idx_shift = 16,
+		.idx_bits = 8,
+		.entry_ptr_shift = 8,
+		.table_size = 8 * 1024,
+	},
+	{
+		.idx_shift = 24,
+		.idx_bits = 12,
+		.entry_ptr_shift = 13,
+		.table_size = 32 * 1024,
+	},
+	{
+		.idx_shift = 36,
+		.idx_bits = 12,
+		.entry_ptr_shift = 15,
+		.table_size = 32 * 1024,
+	},
+};
+
+static int
+pgt_table_count(int address_bits,
+		const struct igt_aux_pgtable_range *ranges, int range_count)
+{
+	uint64_t end;
+	int count;
+	int i;
+
+	count = 0;
+	end = 0;
+	for (i = 0; i < range_count; i++) {
+		const struct igt_aux_pgtable_range *r = &ranges[i];
+		uint64_t start;
+
+		/* We require ranges to be sorted. */
+		igt_assert(i == 0 ||
+			   r->surface_base >= ranges[i - 1].surface_base +
+					      ranges[i - 1].surface_size);
+
+		start = ALIGN_DOWN(r->surface_base, 1UL << address_bits);
+		/* Avoid double counting for overlapping aligned ranges. */
+		start = max(start, end);
+
+		end = ALIGN(r->surface_base + r->surface_size,
+			    1UL << address_bits);
+		igt_assert(end >= start);
+
+		count += (end - start) >> address_bits;
+	}
+
+	return count;
+}
+
+static void
+pgt_calc_size(struct pgtable *pgt,
+	      const struct igt_aux_pgtable_range *ranges, int range_count)
+{
+	int level;
+
+	pgt->size = 0;
+
+	for (level = pgt->levels; level > 0; level--) {
+		struct pgtable_level_info *li = &pgt->level_info[level - 1];
+
+		li->alloc_base = ALIGN(pgt->size, li->desc->table_size);
+		li->alloc_ptr = li->alloc_base;
+
+		li->table_count = pgt_table_count(li->desc->idx_shift +
+						  li->desc->idx_bits,
+						  ranges, range_count);
+
+		pgt->size = li->alloc_base +
+			    li->table_count * li->desc->table_size;
+	}
+}
+
+static uint64_t pgt_alloc_table(struct pgtable *pgt, int level)
+{
+	struct pgtable_level_info *li = &pgt->level_info[level - 1];
+	uint64_t table;
+
+	table = li->alloc_ptr;
+	li->alloc_ptr += li->desc->table_size;
+
+	igt_assert(li->alloc_ptr <=
+		   li->alloc_base + li->table_count * li->desc->table_size);
+
+	return table;
+}
+
+static int pgt_address_index(struct pgtable *pgt, int level, uint64_t address)
+{
+	const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
+	uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1,
+				ld->idx_shift);
+
+	return (address & mask) >> ld->idx_shift;
+}
+
+static uint64_t ptr_mask(struct pgtable *pgt, int level)
+{
+	const struct pgtable_level_desc *ld = pgt->level_info[level - 1].desc;
+
+	return BITMASK(ADDRESS_BITS - 1, ld->entry_ptr_shift);
+}
+
+static uint64_t pgt_entry_ptr(struct pgtable *pgt, int level, uint64_t entry)
+{
+	return entry & ptr_mask(pgt, level);
+}
+
+static uint64_t pgt_mkentry(struct pgtable *pgt, int level, uint64_t ptr,
+			    uint64_t flags)
+{
+	igt_assert(!(ptr & ~ptr_mask(pgt, level)));
+
+	return ptr | flags;
+}
+
+static uint64_t
+pgt_get_table(struct pgtable *pgt, uint64_t parent_table,
+	      int level, uint64_t address, uint64_t flags)
+{
+	uint64_t *table_ptr = pgt->bo->virtual + parent_table;
+	int entry_idx = pgt_address_index(pgt, level, address);
+	uint64_t *entry_ptr;
+
+	entry_ptr = &table_ptr[entry_idx];
+	if (!*entry_ptr) {
+		uint64_t child_table = pgt_alloc_table(pgt, level - 1);
+
+		*entry_ptr = pgt_mkentry(pgt, level, child_table, flags);
+
+		drm_intel_bo_emit_reloc(pgt->bo,
+					parent_table + entry_idx * sizeof(uint64_t),
+					pgt->bo, *entry_ptr,
+					I915_GEM_DOMAIN_INSTRUCTION, 0);
+	}
+
+	return pgt_entry_ptr(pgt, level, *entry_ptr);
+}
+
+static void
+pgt_set_l1_entry(struct pgtable *pgt, uint64_t l1_table,
+		 uint64_t address, uint64_t ptr, uint64_t flags)
+{
+	uint64_t *l1_table_ptr;
+	uint64_t *l1_entry_ptr;
+
+	l1_table_ptr = pgt->bo->virtual + l1_table;
+	l1_entry_ptr = &l1_table_ptr[pgt_address_index(pgt, 1, address)];
+	*l1_entry_ptr = pgt_mkentry(pgt, 1, ptr, flags);
+}
+
+static uint64_t pgt_get_l1_flags(const struct igt_aux_pgtable_range *range)
+{
+	/*
+	 * The offset of .tile_mode isn't specifed by bspec, it's what Mesa
+	 * uses.
+	 */
+	union {
+		struct {
+			uint64_t	valid:1;
+			uint64_t	compression_mod:2;
+			uint64_t	lossy_compression:1;
+			uint64_t	pad:4;
+			uint64_t	addr:40;
+			uint64_t	pad2:4;
+			uint64_t	tile_mode:2;
+			uint64_t	depth:3;
+			uint64_t	ycr:1;
+			uint64_t	format:6;
+		} e;
+		uint64_t l;
+	} entry = {
+		.e = {
+			.valid = 1,
+			.tile_mode = range->tiling == I915_TILING_Y ? 1 : 0,
+			.depth = 5,		/* 32bpp */
+			.format = 0xA,		/* B8G8R8A8_UNORM */
+		}
+	};
+
+	/*
+	 * TODO: Clarify if Yf is supported and if we need to differentiate
+	 *       Ys and Yf.
+	 *       Add support for more formats.
+	 */
+	igt_assert(range->tiling == I915_TILING_Y ||
+		   range->tiling == I915_TILING_Yf ||
+		   range->tiling == I915_TILING_Ys);
+
+	igt_assert(range->bpp == 32);
+
+	return entry.l;
+}
+
+static uint64_t pgt_get_lx_flags(void)
+{
+	union {
+		struct {
+			uint64_t        valid:1;
+			uint64_t        addr:47;
+			uint64_t        pad:16;
+		} e;
+		uint64_t l;
+	} entry = {
+		.e = {
+			.valid = 1,
+		}
+	};
+
+	return entry.l;
+}
+
+static void
+pgt_populate_entries_for_range(struct pgtable *pgt,
+			       const struct igt_aux_pgtable_range *range,
+			       drm_intel_bo *bo,
+			       uint64_t top_table)
+{
+	uint64_t surface_addr = range->surface_base;
+	uint64_t surface_end = surface_addr + range->surface_size;
+	uint64_t aux_addr = range->aux_base;
+	uint64_t l1_flags = pgt_get_l1_flags(range);
+	uint64_t lx_flags = pgt_get_lx_flags();
+
+	pgt->bo = bo;
+
+	for (; surface_addr < surface_end;
+	     surface_addr += SURFACE_BLOCK_SIZE, aux_addr += CCS_BLOCK_SIZE) {
+		uint64_t table = top_table;
+		int level;
+
+		for (level = pgt->levels; level > 1; level--)
+			table = pgt_get_table(pgt, table, level,
+					      surface_addr, lx_flags);
+
+		pgt_set_l1_entry(pgt, table, surface_addr, aux_addr, l1_flags);
+	}
+}
+
+static void pgt_populate_entries(struct pgtable *pgt,
+				 const struct igt_aux_pgtable_range *ranges,
+				 int range_count,
+				 drm_intel_bo *gem_bo)
+{
+	uint64_t top_table;
+	int i;
+
+	igt_assert(gem_bo->size >= pgt->size);
+	memset(gem_bo->virtual, 0, pgt->size);
+
+	top_table = pgt_alloc_table(pgt, pgt->levels);
+	/* Top level table must be at offset 0. */
+	igt_assert(top_table == 0);
+
+	for (i = 0; i < range_count; i++)
+		pgt_populate_entries_for_range(pgt, &ranges[i], gem_bo,
+					       top_table);
+}
+
+static struct pgtable *
+pgt_create(const struct pgtable_level_desc *level_descs, int levels,
+	   const struct igt_aux_pgtable_range *ranges, int range_count)
+{
+	struct pgtable *pgt;
+	int level;
+
+	pgt = calloc(1, sizeof(*pgt));
+	igt_assert(pgt);
+
+	pgt->levels = levels;
+
+	pgt->level_info = calloc(levels, sizeof(*pgt->level_info));
+	igt_assert(pgt->level_info);
+
+	for (level = 0; level < pgt->levels; level++) {
+		struct pgtable_level_info *li = &pgt->level_info[level];
+
+		li->desc = &level_descs[level];
+		if (li->desc->table_size > pgt->max_align)
+			pgt->max_align = li->desc->table_size;
+	}
+
+	pgt_calc_size(pgt, ranges, range_count);
+
+	return pgt;
+}
+
+static void pgt_destroy(struct pgtable *pgt)
+{
+	free(pgt->level_info);
+	free(pgt);
+}
+
+drm_intel_bo *
+igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
+		       const struct igt_aux_pgtable_range *ranges,
+		       int range_count)
+{
+	struct pgtable *pgt;
+	drm_intel_bo *gem_bo;
+
+	pgt = pgt_create(aux_pgtable_desc, AUX_PGTABLE_LEVELS,
+			 ranges, range_count);
+
+	gem_bo = drm_intel_bo_alloc_for_render(bufmgr,
+					       "aux pgt",
+					       pgt->size, pgt->max_align);
+	igt_assert(gem_bo);
+
+	igt_assert(drm_intel_bo_map(gem_bo, true) == 0);
+	pgt_populate_entries(pgt, ranges, range_count, gem_bo);
+	igt_assert(drm_intel_bo_unmap(gem_bo) == 0);
+
+	pgt_destroy(pgt);
+
+	return gem_bo;
+}
diff --git a/lib/igt_aux_pgtable.h b/lib/igt_aux_pgtable.h
new file mode 100644
index 00000000..64c6b21f
--- /dev/null
+++ b/lib/igt_aux_pgtable.h
@@ -0,0 +1,21 @@
+#ifndef _IGT_AUX_PGTABLE_H_
+#define _IGT_AUX_PGTABLE_H_
+
+#include "intel_bufmgr.h"
+
+struct igt_aux_pgtable;
+
+struct igt_aux_pgtable_range {
+	uint64_t surface_base;
+	uint64_t surface_size;
+	uint64_t aux_base;
+	uint32_t tiling;
+	int bpp;
+};
+
+drm_intel_bo *
+igt_aux_pgtable_create(drm_intel_bufmgr *bufmgr,
+		       const struct igt_aux_pgtable_range *ranges,
+		       int range_count);
+
+#endif
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 069440cb..e7263ce1 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -673,6 +673,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define RING_VALID          0x00000001
 #define RING_INVALID        0x00000000
 
+#define GEN12_GFX_AUX_TABLE_BASE_ADDR	0x4200
+
 
 
 /* BitBlt Instructions
@@ -2570,6 +2572,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define MI_LOAD_SCAN_LINES_INCL		(0x12<<23)
 #define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
+#define MI_LOAD_REGISTER_MEM		((0x29 << 23) | (4 - 2))
 
 /* Flush */
 #define MI_FLUSH			(0x04<<23)
diff --git a/lib/meson.build b/lib/meson.build
index 221ae28c..2135ddf3 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -46,6 +46,7 @@ lib_sources = [
 	'sw_sync.c',
 	'intel_reg_map.c',
 	'intel_iosf.c',
+	'igt_aux_pgtable.c',
 	'igt_kms.c',
 	'igt_fb.c',
 	'igt_core.c',
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index 694eb3cf..31e38c2b 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -15,6 +15,7 @@
 #include <i915_drm.h>
 
 #include "drmtest.h"
+#include "igt_aux_pgtable.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
 #include "intel_io.h"
@@ -972,19 +973,113 @@ static void gen8_emit_primitive(struct intel_batchbuffer *batch, uint32_t offset
 
 #define BATCH_STATE_SPLIT 2048
 
+static void
+gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state)
+{
+	if (!state)
+		return;
+
+	OUT_BATCH(MI_LOAD_REGISTER_MEM);
+	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state);
+	OUT_BATCH(MI_NOOP);
+
+	OUT_BATCH(MI_LOAD_REGISTER_MEM);
+	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0, state + 4);
+	OUT_BATCH(MI_NOOP);
+}
+
+static int add_aux_pgtable_range(const struct igt_buf *buf,
+				 struct igt_aux_pgtable_range *range,
+				 uint64_t *pin_offset)
+{
+	if (!buf->aux.stride)
+		return 0;
+
+	drm_intel_bo_set_softpin_offset(buf->bo, *pin_offset);
+	igt_assert(buf->bo->offset64 == *pin_offset);
+
+	range->surface_base = *pin_offset;
+	range->surface_size = buf->size;
+	range->aux_base = *pin_offset + buf->aux.offset;
+	range->tiling = buf->tiling;
+	range->bpp = buf->bpp;
+
+	/* The GEN12+ compressed main surface must be 64kB aligned.  */
+	*pin_offset = ALIGN(*pin_offset + buf->bo->size, 0x10000);
+
+	return 1;
+}
+
+static drm_intel_bo *
+gen12_create_aux_pgtable_bo(drm_intel_bufmgr *bufmgr,
+			    const struct igt_buf *dst_buf,
+			    const struct igt_buf *src_buf)
+{
+	struct igt_aux_pgtable_range ranges[2];
+	int range_count;
+	uint64_t pin_offset;
+	drm_intel_bo *gem_bo;
+
+	range_count = 0;
+	pin_offset = 0;
+
+	range_count += add_aux_pgtable_range(dst_buf,
+					     &ranges[range_count], &pin_offset);
+	range_count += add_aux_pgtable_range(src_buf,
+					     &ranges[range_count], &pin_offset);
+
+	if (!range_count)
+		return NULL;
+
+	gem_bo = igt_aux_pgtable_create(bufmgr, ranges, range_count);
+	igt_assert(gem_bo);
+
+	return gem_bo;
+}
+
+static uint32_t
+gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
+			       drm_intel_bo *aux_pgtable_bo)
+{
+	uint64_t *pgtable_ptr;
+	uint32_t pgtable_ptr_offset;
+	int ret;
+
+	if (!aux_pgtable_bo)
+		return 0;
+
+	pgtable_ptr = intel_batchbuffer_subdata_alloc(batch,
+						      sizeof(*pgtable_ptr),
+						      sizeof(*pgtable_ptr));
+	pgtable_ptr_offset = intel_batchbuffer_subdata_offset(batch,
+							      pgtable_ptr);
+
+	ret = drm_intel_bo_emit_reloc(batch->bo, pgtable_ptr_offset,
+				      aux_pgtable_bo, 0,
+				      I915_GEM_DOMAIN_RENDER, 0);
+	assert(ret == 0);
+
+	return pgtable_ptr_offset;
+}
+
 static
 void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 			  drm_intel_context *context,
 			  const struct igt_buf *src, unsigned src_x,
 			  unsigned src_y, unsigned width, unsigned height,
 			  const struct igt_buf *dst, unsigned dst_x,
-			  unsigned dst_y, const uint32_t ps_kernel[][4],
+			  unsigned dst_y,
+			  drm_intel_bo *aux_pgtable_bo,
+			  const uint32_t ps_kernel[][4],
 			  uint32_t ps_kernel_size)
 {
 	uint32_t ps_sampler_state, ps_kernel_off, ps_binding_table;
 	uint32_t scissor_state;
 	uint32_t vertex_buffer;
 	uint32_t batch_end;
+	uint32_t aux_pgtable_state;
 
 	igt_assert(src->bpp == dst->bpp);
 	intel_batchbuffer_flush_with_context(batch, context);
@@ -1007,6 +1102,10 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	viewport.cc_state = gen6_create_cc_viewport(batch);
 	viewport.sf_clip_state = gen7_create_sf_clip_viewport(batch);
 	scissor_state = gen6_create_scissor_rect(batch);
+
+	aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
+							   aux_pgtable_bo);
+
 	/* TODO: theree is other state which isn't setup */
 
 	assert(batch->ptr < &batch->buffer[4095]);
@@ -1018,6 +1117,8 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
 				GEN9_PIPELINE_SELECTION_MASK);
 
+	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
+
 	gen8_emit_sip(batch);
 
 	gen7_emit_push_constants(batch);
@@ -1092,8 +1193,8 @@ void gen9_render_copyfunc(struct intel_batchbuffer *batch,
 
 {
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, ps_kernel_gen9,
-			  sizeof(ps_kernel_gen9));
+			  width, height, dst, dst_x, dst_y, NULL,
+			  ps_kernel_gen9, sizeof(ps_kernel_gen9));
 }
 
 void gen11_render_copyfunc(struct intel_batchbuffer *batch,
@@ -1104,8 +1205,8 @@ void gen11_render_copyfunc(struct intel_batchbuffer *batch,
 
 {
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, ps_kernel_gen11,
-			  sizeof(ps_kernel_gen11));
+			  width, height, dst, dst_x, dst_y, NULL,
+			  ps_kernel_gen11, sizeof(ps_kernel_gen11));
 }
 
 void gen12_render_copyfunc(struct intel_batchbuffer *batch,
@@ -1115,7 +1216,15 @@ void gen12_render_copyfunc(struct intel_batchbuffer *batch,
 			   const struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
 
 {
+	drm_intel_bo *aux_pgtable_bo;
+
+	aux_pgtable_bo = gen12_create_aux_pgtable_bo(batch->bufmgr, dst, src);
+
 	_gen9_render_copyfunc(batch, context, src, src_x, src_y,
-			  width, height, dst, dst_x, dst_y, gen12_render_copy,
+			  width, height, dst, dst_x, dst_y,
+			  aux_pgtable_bo,
+			  gen12_render_copy,
 			  sizeof(gen12_render_copy));
+
+	drm_intel_bo_unreference(aux_pgtable_bo);
 }
-- 
2.17.1



More information about the igt-dev mailing list