[Intel-xe] [PATCH 3/5] drm/xe/pf: Introduce Local Memory Translation Table

Michal Wajdeczko michal.wajdeczko at intel.com
Tue Nov 28 15:15:05 UTC 2023


The Local Memory Translation Table (LMTT) provides additional
abstraction for Virtual Functions (VF) accessing device VRAM.

This code is based on prior work of Michal Winiarski.

In this patch we focus only on LMTT initialization. Remaining LMTT
functions will be used once we add a VF provisioning to the PF.

Bspec: 44117, 52404, 59314
Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
---
 drivers/gpu/drm/xe/Makefile             |   5 +
 drivers/gpu/drm/xe/regs/xe_sriov_regs.h |  17 +
 drivers/gpu/drm/xe/xe_device_types.h    |   8 +
 drivers/gpu/drm/xe/xe_gt.c              |  10 +
 drivers/gpu/drm/xe/xe_lmtt.c            | 502 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_lmtt.h            |  27 ++
 drivers/gpu/drm/xe/xe_lmtt_2l.c         | 150 +++++++
 drivers/gpu/drm/xe/xe_lmtt_ml.c         | 161 ++++++++
 drivers/gpu/drm/xe/xe_lmtt_types.h      |  63 +++
 9 files changed, 943 insertions(+)
 create mode 100644 drivers/gpu/drm/xe/regs/xe_sriov_regs.h
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt.c
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt.h
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt_2l.c
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt_ml.c
 create mode 100644 drivers/gpu/drm/xe/xe_lmtt_types.h

diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index 551adbc22b5a..cf1237a148b1 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -125,6 +125,11 @@ xe-$(CONFIG_HWMON) += xe_hwmon.o
 # graphics virtualization (SR-IOV) support
 xe-y += xe_sriov.o
 
+xe-$(CONFIG_PCI_IOV) += \
+	xe_lmtt.o \
+	xe_lmtt_2l.o \
+	xe_lmtt_ml.o
+
 # i915 Display compat #defines and #includes
 subdir-ccflags-$(CONFIG_DRM_XE_DISPLAY) += \
 	-I$(srctree)/$(src)/display/ext \
diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
new file mode 100644
index 000000000000..58a4e0fad1e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _REGS_XE_SRIOV_REGS_H_
+#define _REGS_XE_SRIOV_REGS_H_
+
+#include "regs/xe_reg_defs.h"
+
+#define XE2_LMEM_CFG			XE_REG(0x48b0)
+
+#define LMEM_CFG			XE_REG(0xcf58)
+#define   LMEM_EN			REG_BIT(31)
+#define   LMTT_DIR_PTR			REG_GENMASK(30, 0) /* in multiples of 64KB */
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 2712905c7a91..bded80c50027 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -15,6 +15,7 @@
 #include "xe_devcoredump_types.h"
 #include "xe_heci_gsc.h"
 #include "xe_gt_types.h"
+#include "xe_lmtt_types.h"
 #include "xe_platform_types.h"
 #include "xe_pt_types.h"
 #include "xe_pmu.h"
@@ -186,6 +187,13 @@ struct xe_tile {
 		struct xe_sa_manager *kernel_bb_pool;
 	} mem;
 
+	union {
+		struct {
+			/** @sriov.pf.lmtt: Local Memory Translation Table. */
+			struct xe_lmtt lmtt;
+		} pf;
+	} sriov;
+
 	/** @migrate: Migration helper for vram blits and clearing */
 	struct xe_migrate *migrate;
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0dddb751c6a4..2c311b0e49ee 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -34,6 +34,7 @@
 #include "xe_hw_fence.h"
 #include "xe_hw_engine_class_sysfs.h"
 #include "xe_irq.h"
+#include "xe_lmtt.h"
 #include "xe_lrc.h"
 #include "xe_map.h"
 #include "xe_migrate.h"
@@ -44,6 +45,7 @@
 #include "xe_ring_ops.h"
 #include "xe_sa.h"
 #include "xe_sched_job.h"
+#include "xe_sriov.h"
 #include "xe_tuning.h"
 #include "xe_uc.h"
 #include "xe_vm.h"
@@ -344,6 +346,8 @@ static int gt_fw_domain_init(struct xe_gt *gt)
 		err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
 		if (err)
 			goto err_force_wake;
+		if (IS_SRIOV_PF(gt_to_xe(gt)))
+			xe_lmtt_init(&gt_to_tile(gt)->sriov.pf.lmtt);
 	}
 
 	err = xe_uc_init(&gt->uc);
@@ -451,6 +455,9 @@ static int all_fw_domain_init(struct xe_gt *gt)
 	if (err)
 		goto err_force_wake;
 
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
 	XE_WARN_ON(err);
 	xe_device_mem_access_put(gt_to_xe(gt));
@@ -543,6 +550,9 @@ static int do_gt_restart(struct xe_gt *gt)
 	if (err)
 		return err;
 
+	if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+		xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
+
 	xe_mocs_init(gt);
 	err = xe_uc_start(&gt->uc);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
new file mode 100644
index 000000000000..d5ada31ae633
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_sriov_regs.h"
+
+#include "xe_assert.h"
+#include "xe_bo.h"
+#include "xe_lmtt.h"
+#include "xe_map.h"
+#include "xe_mmio.h"
+#include "xe_res_cursor.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+
+/**
+ * DOC: Local Memory Translation Table
+ *
+ * The Local Memory Translation Table (LMTT) provides additional abstraction
+ * when Virtual Function (VF) is accessing device Local Memory (VRAM).
+ *
+ * The Root LMTT Page Directory contains one entry for each VF. Entries are
+ * indexed by the function number (1-based, index 0 is unused).
+ *
+ * See `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ */
+
+#define lmtt_assert(lmtt, condition)	xe_tile_assert(lmtt_to_tile(lmtt), condition)
+#define lmtt_debug(lmtt, msg...)	xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg)
+
+static bool xe_has_multi_level_lmtt(struct xe_device *xe)
+{
+	return xe->info.platform == XE_PVC;
+}
+
+static struct xe_tile *lmtt_to_tile(struct xe_lmtt *lmtt)
+{
+	return container_of(lmtt, struct xe_tile, sriov.pf.lmtt);
+}
+
+static struct xe_device *lmtt_to_xe(struct xe_lmtt *lmtt)
+{
+	return tile_to_xe(lmtt_to_tile(lmtt));
+}
+
+static u64 lmtt_page_size(struct xe_lmtt *lmtt)
+{
+	return BIT_ULL(lmtt->ops->lmtt_pte_shift(0));
+}
+
+static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level)
+{
+	unsigned int num_entries = level ? lmtt->ops->lmtt_pte_num(level) : 0;
+	struct xe_lmtt_pt *pt;
+	struct xe_bo *bo;
+	int err;
+
+	pt = kzalloc(struct_size(pt, entries, num_entries), GFP_KERNEL);
+	if (!pt) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL,
+				  PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+					     lmtt->ops->lmtt_pte_num(level)),
+				  ttm_bo_type_kernel,
+				  XE_BO_CREATE_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+				  XE_BO_CREATE_PINNED_BIT);
+	if (IS_ERR(bo)) {
+		err = PTR_ERR(bo);
+		goto out_free_pt;
+	}
+
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	pt->level = level;
+	pt->bo = bo;
+	return pt;
+
+out_free_pt:
+	kfree(pt);
+out:
+	return ERR_PTR(err);
+}
+
+static void lmtt_pt_free(struct xe_lmtt_pt *pt)
+{
+	xe_bo_unpin_map_no_vm(pt->bo);
+	kfree(pt);
+}
+
+static int lmtt_init_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd;
+
+	lmtt_assert(lmtt, !lmtt->pd);
+	lmtt_assert(lmtt, lmtt->ops->lmtt_root_pd_level());
+
+	pd = lmtt_pt_alloc(lmtt, lmtt->ops->lmtt_root_pd_level());
+	if (IS_ERR(pd))
+		return PTR_ERR(pd);
+
+	lmtt->pd = pd;
+	return 0;
+}
+
+static void lmtt_fini_pd(struct xe_lmtt *lmtt)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	unsigned int num_entries = lmtt->ops->lmtt_pte_num(pd->level);
+	unsigned int n = 0;
+
+	/* make sure we don't leak */
+	for (n = 0; n < num_entries; n++)
+		lmtt_assert(lmtt, !pd->entries[n]);
+
+	lmtt->pd = NULL;
+	lmtt_pt_free(pd);
+}
+
+static void fini_lmtt(struct drm_device *drm, void *arg)
+{
+	struct xe_lmtt *lmtt = arg;
+
+	lmtt_assert(lmtt, !(!!lmtt->ops ^ !!lmtt->pd));
+
+	if (!lmtt->pd)
+		return;
+
+	lmtt_fini_pd(lmtt);
+	lmtt->ops = NULL;
+}
+
+/**
+ * xe_lmtt_init - LMTT software initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * The LMTT initialization requires two steps.
+ *
+ * The xe_lmtt_init() checks if LMTT is required on current device and selects
+ * and initialize proper variant of the LMTT Root Directory. Currently supported
+ * variants are `Two-Level LMTT Structure`_ and `Multi-Level LMTT Structure`_.
+ *
+ * In next step xe_lmtt_init_hw() will register this directory on the hardware.
+ *
+ * Notes:
+ * The LMTT allocations are managed and will be implicitly released on driver unload.
+ * This function shall be called only once and only when running as a PF driver.
+ * Any LMTT initialization failure should block VFs enabling.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_init(struct xe_lmtt *lmtt)
+{
+	struct xe_device *xe = lmtt_to_xe(lmtt);
+	int err;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(xe));
+	lmtt_assert(lmtt, !lmtt->ops);
+
+	if (!IS_DGFX(xe))
+		return 0;
+
+	if (xe_has_multi_level_lmtt(xe))
+		lmtt->ops = &lmtt_ml_ops;
+	else
+		lmtt->ops = &lmtt_2l_ops;
+
+	err = lmtt_init_pd(lmtt);
+	if (unlikely(err))
+		goto fail;
+
+	return drmm_add_action_or_reset(&xe->drm, fini_lmtt, lmtt);
+
+fail:
+	lmtt->ops = NULL;
+	return err;
+}
+
+static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
+{
+	struct xe_tile *tile = lmtt_to_tile(lmtt);
+	struct xe_device *xe = tile_to_xe(tile);
+	dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE);
+
+	lmtt_debug(lmtt, "DIR offset %pad\n", &offset);
+	lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
+	lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
+
+	xe_mmio_write32(tile->primary_gt,
+			GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
+			LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
+}
+
+/**
+ * xe_lmtt_init_hw - Perform LMTT hardware initialization.
+ * @lmtt: the &xe_lmtt to initialize
+ *
+ * This function is a second step of the LMTT initialization.
+ * This function registers LMTT Root Directory prepared in xe_lmtt_init().
+ *
+ * This function shall be called after every hardware reset.
+ * This function shall be called only when running as a PF driver.
+ */
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt)
+{
+	if (!lmtt->pd)
+		return;
+
+	lmtt_setup_dir_ptr(lmtt);
+}
+
+static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
+			   u64 pte, unsigned int idx)
+{
+	unsigned int level = pt->level;
+
+	lmtt_assert(lmtt, idx <= lmtt->ops->lmtt_pte_num(level));
+	lmtt_debug(lmtt, "WRITE level=%u index=%u pte=%#llx\n", level, idx, pte);
+
+	switch (lmtt->ops->lmtt_pte_size(level)) {
+	case sizeof(u32):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
+		break;
+	case sizeof(u64):
+		xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
+		break;
+	default:
+		lmtt_assert(lmtt, !!!"invalid pte size");
+	}
+}
+
+static void lmtt_destroy_pt(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd)
+{
+	unsigned int num_entries = pd->level ? lmtt->ops->lmtt_pte_num(pd->level) : 0;
+	struct xe_lmtt_pt *pt;
+	unsigned int i;
+
+	for (i = 0; i < num_entries; i++) {
+		pt = pd->entries[i];
+		pd->entries[i] = NULL;
+		if (!pt)
+			continue;
+
+		lmtt_destroy_pt(lmtt, pt);
+	}
+
+	lmtt_pt_free(pd);
+}
+
+static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	pt = pd->entries[vfid];
+	pd->entries[vfid] = NULL;
+	if (!pt)
+		return;
+
+	lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid);
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, pt->level == pd->level - 1);
+	lmtt_destroy_pt(lmtt, pt);
+}
+
+static int __lmtt_alloc_range(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pd,
+			      u64 start, u64 end)
+{
+	u64 pte_addr_shift = BIT_ULL(lmtt->ops->lmtt_pte_shift(pd->level));
+	u64 offset;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+
+	offset = start;
+	while (offset < end) {
+		struct xe_lmtt_pt *pt;
+		u64 next, pde, pt_addr;
+		unsigned int idx;
+
+		pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+		if (IS_ERR(pt))
+			return PTR_ERR(pt);
+
+		pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+		idx = lmtt->ops->lmtt_pte_index(offset, pd->level);
+		pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+		lmtt_write_pte(lmtt, pd, pde, idx);
+
+		pd->entries[idx] = pt;
+
+		next = min(end, round_up(offset + 1, pte_addr_shift));
+
+		if (pt->level != 0) {
+			err = __lmtt_alloc_range(lmtt, pt, offset, next);
+			if (err)
+				return err;
+		}
+
+		offset = next;
+	}
+
+	return 0;
+}
+
+static int lmtt_alloc_range(struct xe_lmtt *lmtt, unsigned int vfid, u64 start, u64 end)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+	u64 pt_addr;
+	u64 pde;
+	int err;
+
+	lmtt_assert(lmtt, pd->level > 0);
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	lmtt_assert(lmtt, IS_ALIGNED(start, lmtt_page_size(lmtt)));
+	lmtt_assert(lmtt, IS_ALIGNED(end, lmtt_page_size(lmtt)));
+
+	if (pd->entries[vfid])
+		return -ENOTEMPTY;
+
+	pt = lmtt_pt_alloc(lmtt, pd->level - 1);
+	if (IS_ERR(pt))
+		return PTR_ERR(pt);
+
+	pt_addr = xe_bo_main_addr(pt->bo, XE_PAGE_SIZE);
+
+	pde = lmtt->ops->lmtt_pte_encode(pt_addr, pd->level);
+
+	lmtt_write_pte(lmtt, pd, pde, vfid);
+
+	pd->entries[vfid] = pt;
+
+	if (pt->level != 0) {
+		err = __lmtt_alloc_range(lmtt, pt, start, end);
+		if (err)
+			goto out_free_pt;
+	}
+
+	return 0;
+
+out_free_pt:
+	lmtt_pt_free(pt);
+	return err;
+}
+
+static struct xe_lmtt_pt *lmtt_leaf_pt(struct xe_lmtt *lmtt, unsigned int vfid, u64 addr)
+{
+	struct xe_lmtt_pt *pd = lmtt->pd;
+	struct xe_lmtt_pt *pt;
+
+	lmtt_assert(lmtt, vfid <= lmtt->ops->lmtt_pte_num(pd->level));
+	pt = pd->entries[vfid];
+
+	while (pt->level) {
+		lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+			    lmtt->ops->lmtt_pte_num(pt->level));
+
+		pt = pt->entries[lmtt->ops->lmtt_pte_index(addr, pt->level)];
+
+		addr >>= lmtt->ops->lmtt_pte_shift(pt->level);
+	}
+
+	lmtt_assert(lmtt, lmtt->ops->lmtt_pte_index(addr, pt->level) <=
+		    lmtt->ops->lmtt_pte_num(pt->level));
+	lmtt_assert(lmtt, pt->level != pd->level);
+	lmtt_assert(lmtt, pt->level == 0);
+	return pt;
+}
+
+static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 start)
+{
+	u64 page_size = lmtt_page_size(lmtt);
+	struct xe_res_cursor cur;
+	struct xe_lmtt_pt *pt;
+	u64 addr, vram_offset;
+
+	lmtt_assert(lmtt, IS_ALIGNED(start, page_size));
+	lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size));
+	lmtt_assert(lmtt, xe_bo_is_vram(bo));
+
+	vram_offset = vram_region_gpu_offset(bo->ttm.resource);
+	xe_res_first(bo->ttm.resource, 0, bo->size, &cur);
+	while (cur.remaining) {
+		addr = xe_res_dma(&cur);
+		addr += vram_offset; /* XXX */
+
+		pt = lmtt_leaf_pt(lmtt, vfid, start);
+
+		lmtt_write_pte(lmtt, pt, lmtt->ops->lmtt_pte_encode(addr, 0),
+					 lmtt->ops->lmtt_pte_index(start, 0));
+
+		xe_res_next(&cur, page_size);
+		start += page_size;
+	}
+}
+
+/**
+ * xe_lmtt_prepare_pages - Create VF's LMTT Page Tables.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @range: top range of LMEM offset to be supported
+ *
+ * This function creates empty LMTT page tables for given VF to support
+ * up to maximum #range LMEM offset. The LMTT page tables created by this
+ * function must be released using xe_lmtt_drop_pages() function.
+ *
+ * Notes:
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	return lmtt_alloc_range(lmtt, vfid, 0, range);
+}
+
+/**
+ * xe_lmtt_populate_pages - Update VF's LMTT Page Table Entries.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ * @bo: the buffer object with LMEM allocation to be mapped
+ * @offset: the offset at which #bo should be mapped
+ *
+ * This function updates VF's LMTT entries to use given buffer object as a backstore.
+ *
+ * Notes:
+ * This function shall be called only after successful preparation of the
+ * VF's LMTT Page Tables. See xe_lmtt_prepare().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_insert_bo(lmtt, vfid, bo, offset);
+	return 0;
+}
+
+/**
+ * xe_lmtt_drop_pages - Remove VF's LMTT Pages.
+ * @lmtt: the &xe_lmtt to update
+ * @vfid: the VF identifier (1-based)
+ *
+ * This function removes all LMTT Page Tables prepared by xe_lmtt_prepare_pages().
+ *
+ * This function shall be called only after successful LMTT initialization.
+ * See xe_lmtt_init().
+ */
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
+{
+	lmtt_assert(lmtt, lmtt->pd);
+	lmtt_assert(lmtt, vfid);
+
+	lmtt_drop_pages(lmtt, vfid);
+}
+
+/**
+ * xe_lmtt_estimate_pt_size - Estimate size of LMTT PT allocations.
+ * @lmtt: the &xe_lmtt
+ * @size: the size of the LMEM to be mapped over LMTT (including any offset)
+ *
+ * This function shall be called only by PF.
+ *
+ * Return: size of the PT allocation(s) needed to support given LMEM size.
+ */
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size)
+{
+	unsigned int level = 0;
+	u64 pt_size;
+
+	lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, IS_DGFX(lmtt_to_xe(lmtt)));
+	lmtt_assert(lmtt, lmtt->ops);
+
+	pt_size = PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+			     lmtt->ops->lmtt_pte_num(level));
+
+	while (++level < lmtt->ops->lmtt_root_pd_level()) {
+		pt_size *= lmtt->ops->lmtt_pte_index(size, level) + 1;
+		pt_size += PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+				      lmtt->ops->lmtt_pte_num(level));
+	}
+
+	return pt_size;
+}
diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h
new file mode 100644
index 000000000000..cb10ef994db6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_H_
+#define _XE_LMTT_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_ops;
+
+#ifdef CONFIG_PCI_IOV
+int xe_lmtt_init(struct xe_lmtt *lmtt);
+void xe_lmtt_init_hw(struct xe_lmtt *lmtt);
+int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range);
+int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset);
+void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid);
+u64 xe_lmtt_estimate_pt_size(struct xe_lmtt *lmtt, u64 size);
+#else
+static inline int xe_lmtt_init(struct xe_lmtt *lmtt) { return 0; }
+static inline void xe_lmtt_init_hw(struct xe_lmtt *lmtt) { }
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt_2l.c b/drivers/gpu/drm/xe/xe_lmtt_2l.c
new file mode 100644
index 000000000000..84bc5c4212b5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_2l.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Two-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 37 bit (128GB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 37 bit (128GB)
+ *
+ * The following figure illustrates the structure and function of the 2L LMTT::
+ *
+ *            LMTT Directory
+ *           (1 Entry per VF)
+ *            +-----------+                     LMTT (per VF)
+ *            |           |                     +-----------+
+ *            |           |                     |           |
+ *            |           |          index:     |           |
+ *            |           |          LMEM VF    +===========+
+ *            |           |          offset --> |    PTE    | ==> LMEM PF offset
+ *            |           |                     +===========+
+ *   index:   +===========+                     |           |
+ *   VFID --> |    PDE    |  -----------------> +-----------+
+ *            +===========+                    /              \.
+ *            |           |                   /                 \.
+ *            |           |                  /                    \.
+ *            |           |                 /                       \.
+ *            +-----------+ <== [LMTT Directory Ptr]                  \.
+ *           /             \              /                             \.
+ *          /               \         +-----------+-----------------+------+---+
+ *         /                 \        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                   \       +===========+=================+======+===+
+ *       /                     \      |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                       \     +-----------+-----------------+------+---+
+ *     /                         \.
+ *   +-----------+-----------------+------+---+
+ *   | 31:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *   +===========+=================+======+===+
+ *   |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *   +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u32 lmtt_2l_pde_t;
+typedef u32 lmtt_2l_pte_t;
+
+#if IS_ENABLED(CONFIG_DRM_XE_LMTT_2L_128GB)
+#define LMTT_2L_HAW			37 /* 128 GiB */
+#else
+#define LMTT_2L_HAW			35 /* 32 GiB */
+#endif
+
+#define LMTT_2L_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_2L_PDE_LMTT_PTR		GENMASK(LMTT_2L_HAW - 13, 4)
+#define LMTT_2L_PDE_VALID		BIT(0)
+
+#define LMTT_2L_PTE_MAX_NUM		BIT(LMTT_2L_HAW - ilog2(SZ_2M))
+#define LMTT_2L_PTE_LMEM_PAGE		GENMASK(LMTT_2L_HAW - 17, 5)
+#define LMTT_2L_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_2l_root_pd_level(void)
+{
+	return 1; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_2l_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return LMTT_2L_PDE_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_2L_HAW == 37 && LMTT_2L_PTE_MAX_NUM != SZ_64K);
+		BUILD_BUG_ON(LMTT_2L_HAW == 35 && LMTT_2L_PTE_MAX_NUM != SZ_16K);
+		return LMTT_2L_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		return sizeof(lmtt_2l_pde_t);
+	case 0:
+		return sizeof(lmtt_2l_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_2l_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_2l_pte_shift(level);
+
+	switch (level) {
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_2L_PTE_MAX_NUM);
+		return addr & (LMTT_2L_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_2l_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_2L_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_2L_PTE_VALID;
+	case 1:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_2L_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_2L_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_2l_ops = {
+	.lmtt_root_pd_level = lmtt_2l_root_pd_level,
+	.lmtt_pte_num = lmtt_2l_pte_num,
+	.lmtt_pte_size = lmtt_2l_pte_size,
+	.lmtt_pte_shift = lmtt_2l_pte_shift,
+	.lmtt_pte_index = lmtt_2l_pte_index,
+	.lmtt_pte_encode = lmtt_2l_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_ml.c b/drivers/gpu/drm/xe/xe_lmtt_ml.c
new file mode 100644
index 000000000000..b21215a2edd6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_ml.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/align.h>
+#include <linux/bitfield.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
+
+#include "xe_lmtt_types.h"
+#include "xe_macros.h"
+
+/**
+ * DOC: Multi-Level LMTT Structure
+ *
+ * LMHAW (Local Memory Host Address Width) is 48 bit (256TB)
+ *
+ * LMGAW (Local Memory Guest Address Width) is 48 bit (256TB)
+ *
+ * The following figure illustrates the structure and function of the ML LMTT::
+ *
+ *           LMTT L3 Directory
+ *           (1 Entry per VF)                                       LMTT L1 Leaf
+ *            +-----------+                                         +-----------+
+ *            |           |             LMTT L2 (per VF)            |           |
+ *            |           |              +-----------+              |           |
+ *            |           |              |           |     index:   +===========+
+ *            |           |              |           |     GDPA --> |    PTE    | => LMEM PF offset
+ *            |           |              |           |     34:21    +===========+
+ *            |           |    index:    |           |              |           |
+ *            |           |    LMEM VF   +===========+              |           |
+ *            |           |    offset -> |    PTE    |  ----------> +-----------+
+ *            |           |    GAW-1:35  +===========+              /           \.
+ *   index:   +===========+              |           |             /              \.
+ *   VFID --> |    PDE    |  --------->  +-----------+            /                 \.
+ *            +===========+             /           /            /                    \.
+ *            |           |           /            /            /                       \.
+ *            +-----------+  <== [LMTT Directory Ptr]          /                          \.
+ *           /             \      /              /            /                             \.
+ *          /                \  /               /       +-----------+-----------------+------+---+
+ *         /                  /\               /        | 31:HAW-16 |        HAW-17:5 |  4:1 | 0 |
+ *        /                 /    \            /         +===========+=================+======+===+
+ *       /                /        \         /          |  Reserved | LMEM Page (2MB) | Rsvd | V |
+ *      /                                   /           +-----------+-----------------+------+---+
+ *     /                                   /
+ *  +-----------+-----------------+------+---+
+ *  | 63:HAW-12 |        HAW-13:4 |  3:1 | 0 |
+ *  +===========+=================+======+===+
+ *  |  Reserved | LMTT Ptr (64KB) | Rsvd | V |
+ *  +-----------+-----------------+------+---+
+ *
+ */
+
+typedef u64 lmtt_ml_pde_t;
+typedef u32 lmtt_ml_pte_t;
+
+#define LMTT_ML_HAW			48 /* 256 TiB */
+
+#define LMTT_ML_PDE_MAX_NUM		64 /* SRIOV with PF and 63 VFs, index 0 (PF) is unused */
+#define LMTT_ML_PDE_LMTT_PTR		GENMASK_ULL(LMTT_ML_HAW - 13, 4)
+#define LMTT_ML_PDE_VALID		BIT(0)
+
+#define LMTT_ML_PDE_L2_SHIFT		35
+#define LMTT_ML_PDE_L2_MAX_NUM		BIT_ULL(LMTT_ML_HAW - 35)
+
+#define LMTT_ML_PTE_MAX_NUM		BIT(35 - ilog2(SZ_2M))
+#define LMTT_ML_PTE_LMEM_PAGE		GENMASK(LMTT_ML_HAW - 17, 5)
+#define LMTT_ML_PTE_VALID		BIT(0)
+
+static unsigned int lmtt_ml_root_pd_level(void)
+{
+	return 2; /* implementation is 0-based */
+}
+
+static unsigned int lmtt_ml_pte_num(unsigned int level)
+{
+	switch (level) {
+	case 2:
+		return LMTT_ML_PDE_MAX_NUM;
+	case 1:
+		BUILD_BUG_ON(LMTT_ML_HAW == 48 && LMTT_ML_PDE_L2_MAX_NUM != SZ_8K);
+		return LMTT_ML_PDE_L2_MAX_NUM;
+	case 0:
+		BUILD_BUG_ON(LMTT_ML_PTE_MAX_NUM != SZ_16K);
+		return LMTT_ML_PTE_MAX_NUM;
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_size(unsigned int level)
+{
+	switch (level) {
+	case 2:
+	case 1:
+		return sizeof(lmtt_ml_pde_t);
+	case 0:
+		return sizeof(lmtt_ml_pte_t);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_shift(unsigned int level)
+{
+	switch (level) {
+	case 1:
+		BUILD_BUG_ON(BIT_ULL(LMTT_ML_PDE_L2_SHIFT) != SZ_32G);
+		return ilog2(SZ_32G);
+	case 0:
+		return ilog2(SZ_2M);
+	default:
+		return 0;
+	}
+}
+
+static unsigned int lmtt_ml_pte_index(u64 addr, unsigned int level)
+{
+	addr >>= lmtt_ml_pte_shift(level);
+
+	switch (level) {
+	case 1:
+		/* SZ_32G increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PDE_L2_MAX_NUM);
+		return addr & (LMTT_ML_PDE_L2_MAX_NUM - 1);
+	case 0:
+		/* SZ_2M increments */
+		BUILD_BUG_ON_NOT_POWER_OF_2(LMTT_ML_PTE_MAX_NUM);
+		return addr & (LMTT_ML_PTE_MAX_NUM - 1);
+	default:
+		return 0;
+	}
+}
+
+static u64 lmtt_ml_pte_encode(unsigned long offset, unsigned int level)
+{
+	switch (level) {
+	case 0:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_2M));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M));
+		return FIELD_PREP(LMTT_ML_PTE_LMEM_PAGE, offset / SZ_2M) | LMTT_ML_PTE_VALID;
+	case 1:
+	case 2:
+		XE_WARN_ON(!IS_ALIGNED(offset, SZ_64K));
+		XE_WARN_ON(!FIELD_FIT(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K));
+		return FIELD_PREP(LMTT_ML_PDE_LMTT_PTR, offset / SZ_64K) | LMTT_ML_PDE_VALID;
+	default:
+		XE_WARN_ON(true);
+		return 0;
+	}
+}
+
+const struct xe_lmtt_ops lmtt_ml_ops = {
+	.lmtt_root_pd_level = lmtt_ml_root_pd_level,
+	.lmtt_pte_num = lmtt_ml_pte_num,
+	.lmtt_pte_size = lmtt_ml_pte_size,
+	.lmtt_pte_shift = lmtt_ml_pte_shift,
+	.lmtt_pte_index = lmtt_ml_pte_index,
+	.lmtt_pte_encode = lmtt_ml_pte_encode,
+};
diff --git a/drivers/gpu/drm/xe/xe_lmtt_types.h b/drivers/gpu/drm/xe/xe_lmtt_types.h
new file mode 100644
index 000000000000..b37abad23416
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_lmtt_types.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_LMTT_TYPES_H_
+#define _XE_LMTT_TYPES_H_
+
+#include <linux/types.h>
+
+struct xe_bo;
+struct xe_lmtt;
+struct xe_lmtt_pt;
+struct xe_lmtt_ops;
+
+#define LMTT_PTE_INVALID	ULL(0)
+
+/**
+ * struct xe_lmtt - Local Memory Translation Table Manager
+ */
+struct xe_lmtt {
+	/** @pd: root LMTT Directory */
+	struct xe_lmtt_pt *pd;
+
+	/** @ops: LMTT functions */
+	const struct xe_lmtt_ops *ops;
+};
+
+/**
+ * struct xe_lmtt_pt - Local Memory Translation Table Page Table
+ *
+ * Represents single level of the LMTT.
+ */
+struct xe_lmtt_pt {
+	/** @level: page table level, 0 is leaf */
+	unsigned int level;
+
+	/** @bo: buffer object with actual LMTT PTE values */
+	struct xe_bo *bo;
+
+	/** @entries: leaf page tables, exist only for root/non-leaf */
+	struct xe_lmtt_pt *entries[];
+};
+
+/**
+ * struct xe_lmtt_ops - Local Memory Translation Table Operations
+ *
+ * Provides abstraction of the LMTT variants.
+ */
+struct xe_lmtt_ops {
+	/* private: */
+	unsigned int (*lmtt_root_pd_level)(void);
+	unsigned int (*lmtt_pte_num)(unsigned int level);
+	unsigned int (*lmtt_pte_size)(unsigned int level);
+	unsigned int (*lmtt_pte_shift)(unsigned int level);
+	unsigned int (*lmtt_pte_index)(u64 addr, unsigned int level);
+	u64 (*lmtt_pte_encode)(unsigned long offset, unsigned int level);
+};
+
+extern const struct xe_lmtt_ops lmtt_2l_ops;
+extern const struct xe_lmtt_ops lmtt_ml_ops;
+
+#endif
-- 
2.25.1



More information about the Intel-xe mailing list