[Intel-xe] [PATCH v3 1/2] drm/xe/xe_migrate.c: Use DPA offset for page table entries.

Kershner, David david.kershner at intel.com
Wed Oct 4 13:58:33 UTC 2023



> -----Original Message-----
> From: Vishwanathapura, Niranjana <niranjana.vishwanathapura at intel.com>
> Sent: Tuesday, October 3, 2023 7:12 PM
> To: Kershner, David <david.kershner at intel.com>
> Cc: intel-xe at lists.freedesktop.org; Ruhl, Michael J <michael.j.ruhl at intel.com>;
> Fleck, John <john.fleck at intel.com>; Roper, Matthew D
> <matthew.d.roper at intel.com>; Brost, Matthew <matthew.brost at intel.com>;
> Summers, Stuart <stuart.summers at intel.com>
> Subject: Re: [PATCH v3 1/2] drm/xe/xe_migrate.c: Use DPA offset for page
> table entries.
> 
> On Tue, Oct 03, 2023 at 06:21:44PM -0400, David Kershner wrote:
> >Device Physical Address (DPA) is the starting offset device memory.
> >
> >Update xe_migrate identity map base PTE entries to start at dpa_base
> >instead of 0.
> >
> >The VM offset value should be 0 relative instead of DPA relative.
> >
> 
> Overall looks good to me. Some minor comments.
> 
> >Signed-off-by: David Kershner <david.kershner at intel.com>
> >---
> > drivers/gpu/drm/xe/tests/xe_migrate.c | 37
> ++++++++++++++++++++++++---
> > drivers/gpu/drm/xe/xe_migrate.c       | 22 ++++++++++------
> > 2 files changed, 47 insertions(+), 12 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c
> >b/drivers/gpu/drm/xe/tests/xe_migrate.c
> >index 6906ff9d9c31..75b06f0bd755 100644
> >--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
> >+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
> >@@ -99,7 +99,7 @@ static const struct xe_migrate_pt_update_ops
> sanity_ops = {
> > 		} } while (0)
> >
> > static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
> >-		      struct kunit *test)
> >+		      struct kunit *test, u32 region)
> > {
> > 	struct xe_device *xe = tile_to_xe(m->tile);
> > 	u64 retval, expected = 0;
> >@@ -111,7 +111,7 @@ static void test_copy(struct xe_migrate *m, struct
> xe_bo *bo,
> > 	struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL,
> > 						   bo->size,
> > 						   ttm_bo_type_kernel,
> >-						   XE_BO_CREATE_SYSTEM_BIT
> |
> >+						   region |
> >
> XE_BO_NEEDS_CPU_ACCESS);
> 
> In this function, we should now change the bo name from 'sysmem' to
> 'remote'
> or something similar.
> 
> > 	if (IS_ERR(sysmem)) {
> > 		KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
> @@
> >-187,6 +187,27 @@ static void test_copy(struct xe_migrate *m, struct xe_bo
> *bo,
> > 	xe_bo_put(sysmem);
> > }
> >
> >+static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
> >+			     struct kunit *test)
> >+{
> >+	test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT); }
> >+
> >+static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
> >+			   struct kunit *test)
> >+{
> >+	u32 region;
> >+
> >+	if (bo->ttm.resource->mem_type == XE_PL_SYSTEM)
> >+		return;
> >+
> >+	if (bo->ttm.resource->mem_type == XE_PL_VRAM0)
> >+		region = XE_BO_CREATE_VRAM1_BIT;
> >+	else
> >+		region = XE_BO_CREATE_VRAM0_BIT;
> >+	test_copy(m, bo, test, region);
> >+}
> >+
> > static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
> > 			   struct kunit *test, bool force_gpu)  { @@ -349,7
> +370,11 @@
> >static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
> > 	check(retval, expected, "Command clear small last value", test);
> >
> > 	kunit_info(test, "Copying small buffer object to system\n");
> >-	test_copy(m, tiny, test);
> >+	test_copy_sysmem(m, tiny, test);
> >+	if (IS_DGFX(xe) && xe->info.tile_count > 1) {
> >+		kunit_info(test, "Copying small buffer object to other
> vram\n");
> >+		test_copy_vram(m, tiny, test);
> >+	}
> 
> Probably we don't need IS_DGFX() check. tile_count check should be good
> enough?
> 

We're concerned with multi tile internal devices, if this is not an issue, then we can drop the check.

David Kershner
> >
> > 	/* Clear a big bo */
> > 	kunit_info(test, "Clearing big buffer object\n"); @@ -366,7 +391,11
> >@@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit
> *test)
> > 	check(retval, expected, "Command clear big last value", test);
> >
> > 	kunit_info(test, "Copying big buffer object to system\n");
> >-	test_copy(m, big, test);
> >+	test_copy_sysmem(m, big, test);
> >+	if (xe->info.tile_count > 1) {
> >+		kunit_info(test, "Copying big buffer object to opposite
> vram\n");
> 
> s/opposite/other/
> 
> >+		test_copy_vram(m, big, test);
> >+	}
> >
> > 	kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
> > 	test_pt_update(m, pt, test, false);
> 
> This test update should be in a separate patch.
> 
> >diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> >b/drivers/gpu/drm/xe/xe_migrate.c index 15f091a7bba3..42942aeef595
> >100644
> >--- a/drivers/gpu/drm/xe/xe_migrate.c
> >+++ b/drivers/gpu/drm/xe/xe_migrate.c
> >@@ -114,8 +114,12 @@ static u64 xe_migrate_vm_addr(u64 slot, u32
> level)
> > 	return (slot + 1ULL) << xe_pt_shift(level + 1);  }
> >
> >-static u64 xe_migrate_vram_ofs(u64 addr)
> >+static u64 xe_migrate_vram_ofs(u64 addr, struct xe_device *xe)
> > {
> 
> The convention is to have 'xe' as first argument, so it is better to follow that.
> 
> >+	/* Remove the DPA to get a correct offset into identity table for the
> >+	 * migrate offset
> >+	 */
> 
> Comment format followed elsewhere is,
> /*
>   * comments line1
>   * comments line2
>   */
> 
> Better to follow that.
> 
> >+	addr -= xe->mem.vram.dpa_base;
> 
> Should never happen, but probably a good idea to add an assert that addr >
> dpa_base? Otherwise, we will endup with hard to debug issues.
> 
> > 	return addr + (256ULL << xe_pt_shift(2)); }
> >
> >@@ -149,7 +153,7 @@ static int xe_migrate_create_cleared_bo(struct
> >xe_migrate *m, struct xe_vm *vm)
> >
> > 	xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
> > 	vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE);
> >-	m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
> >+	m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr, xe);
> >
> > 	return 0;
> > }
> >@@ -225,12 +229,12 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
> > 	} else {
> > 		u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
> >
> >-		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
> >+		m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr, xe);
> >
> > 		if (xe->info.supports_usm) {
> > 			batch = tile->primary_gt->usm.bb_pool->bo;
> > 			batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
> >-			m->usm_batch_base_ofs =
> xe_migrate_vram_ofs(batch_addr);
> >+			m->usm_batch_base_ofs =
> xe_migrate_vram_ofs(batch_addr, xe);
> > 		}
> > 	}
> >
> >@@ -268,7 +272,9 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile,
> struct xe_migrate *m,
> > 		 * Use 1GB pages, it shouldn't matter the physical amount of
> > 		 * vram is less, when we don't access it.
> > 		 */
> >-		for (pos = 0; pos < xe->mem.vram.actual_physical_size; pos +=
> SZ_1G, ofs += 8)
> >+		for (pos = xe->mem.vram.dpa_base;
> >+		     pos < xe->mem.vram.actual_physical_size + xe-
> >mem.vram.dpa_base;
> >+		     pos += SZ_1G, ofs += 8)
> > 			xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
> > 	}
> >
> >@@ -443,8 +449,8 @@ static u32 pte_update_size(struct xe_migrate *m,
> > 		cmds += cmd_size;
> > 	} else {
> > 		/* Offset into identity map. */
> >-		*L0_ofs = xe_migrate_vram_ofs(cur->start +
> >-					      vram_region_gpu_offset(res));
> >+		*L0_ofs = xe_migrate_vram_ofs(cur->start +
> vram_region_gpu_offset(res),
> >+					      tile_to_xe(m->tile));
> > 		cmds += cmd_size;
> > 	}
> >
> >@@ -1062,7 +1068,7 @@ static void write_pgtable(struct xe_tile *tile, struct
> xe_bb *bb, u64 ppgtt_ofs,
> > 	xe_tile_assert(tile, update->qwords <= 0x1ff);
> > 	if (!ppgtt_ofs) {
> > 		ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update-
> >pt_bo, 0,
> >-							   XE_PAGE_SIZE));
> >+							   XE_PAGE_SIZE),
> tile_to_xe(tile));
> > 	}
> >
> > 	do {
> >--
> >2.35.1
> >


More information about the Intel-xe mailing list