[Intel-xe] [PATCH v3 1/2] drm/xe/xe_migrate.c: Use DPA offset for page table entries.
Ruhl, Michael J
michael.j.ruhl at intel.com
Wed Oct 4 11:31:44 UTC 2023
>-----Original Message-----
>From: Vishwanathapura, Niranjana <niranjana.vishwanathapura at intel.com>
>Sent: Tuesday, October 3, 2023 7:12 PM
>To: Kershner, David <david.kershner at intel.com>
>Cc: intel-xe at lists.freedesktop.org; Ruhl, Michael J <michael.j.ruhl at intel.com>;
>Fleck, John <john.fleck at intel.com>; Roper, Matthew D
><matthew.d.roper at intel.com>; Brost, Matthew <matthew.brost at intel.com>;
>Summers, Stuart <stuart.summers at intel.com>
>Subject: Re: [PATCH v3 1/2] drm/xe/xe_migrate.c: Use DPA offset for page table
>entries.
>
>On Tue, Oct 03, 2023 at 06:21:44PM -0400, David Kershner wrote:
>>Device Physical Address (DPA) is the starting offset device memory.
>>
>>Update xe_migrate identity map base PTE entries to start at dpa_base
>>instead of 0.
>>
>>The VM offset value should be 0 relative instead of DPA relative.
>>
>
>Overall looks good to me. Some minor comments.
>
>>Signed-off-by: David Kershner <david.kershner at intel.com>
>>---
>> drivers/gpu/drm/xe/tests/xe_migrate.c | 37 ++++++++++++++++++++++++-
>--
>> drivers/gpu/drm/xe/xe_migrate.c | 22 ++++++++++------
>> 2 files changed, 47 insertions(+), 12 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c
>b/drivers/gpu/drm/xe/tests/xe_migrate.c
>>index 6906ff9d9c31..75b06f0bd755 100644
>>--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
>>+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
>>@@ -99,7 +99,7 @@ static const struct xe_migrate_pt_update_ops
>sanity_ops = {
>> } } while (0)
>>
>> static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
>>- struct kunit *test)
>>+ struct kunit *test, u32 region)
>> {
>> struct xe_device *xe = tile_to_xe(m->tile);
>> u64 retval, expected = 0;
>>@@ -111,7 +111,7 @@ static void test_copy(struct xe_migrate *m, struct
>xe_bo *bo,
>> struct xe_bo *sysmem = xe_bo_create_locked(xe, m->tile, NULL,
>> bo->size,
>> ttm_bo_type_kernel,
>>- XE_BO_CREATE_SYSTEM_BIT
>|
>>+ region |
>>
>XE_BO_NEEDS_CPU_ACCESS);
>
>In this function, we should now change the bo name from 'sysmem' to 'remote'
>or something similar.
>
>> if (IS_ERR(sysmem)) {
>> KUNIT_FAIL(test, "Failed to allocate sysmem bo for %s: %li\n",
>>@@ -187,6 +187,27 @@ static void test_copy(struct xe_migrate *m, struct
>xe_bo *bo,
>> xe_bo_put(sysmem);
>> }
>>
>>+static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
>>+ struct kunit *test)
>>+{
>>+ test_copy(m, bo, test, XE_BO_CREATE_SYSTEM_BIT);
>>+}
>>+
>>+static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
>>+ struct kunit *test)
>>+{
>>+ u32 region;
>>+
>>+ if (bo->ttm.resource->mem_type == XE_PL_SYSTEM)
>>+ return;
>>+
>>+ if (bo->ttm.resource->mem_type == XE_PL_VRAM0)
>>+ region = XE_BO_CREATE_VRAM1_BIT;
>>+ else
>>+ region = XE_BO_CREATE_VRAM0_BIT;
>>+ test_copy(m, bo, test, region);
>>+}
>>+
>> static void test_pt_update(struct xe_migrate *m, struct xe_bo *pt,
>> struct kunit *test, bool force_gpu)
>> {
>>@@ -349,7 +370,11 @@ static void xe_migrate_sanity_test(struct xe_migrate
>*m, struct kunit *test)
>> check(retval, expected, "Command clear small last value", test);
>>
>> kunit_info(test, "Copying small buffer object to system\n");
>>- test_copy(m, tiny, test);
>>+ test_copy_sysmem(m, tiny, test);
>>+ if (IS_DGFX(xe) && xe->info.tile_count > 1) {
>>+ kunit_info(test, "Copying small buffer object to other vram\n");
>>+ test_copy_vram(m, tiny, test);
>>+ }
>
>Probably we don't need IS_DGFX() check. tile_count check should be good
>enough?
>
>>
>> /* Clear a big bo */
>> kunit_info(test, "Clearing big buffer object\n");
>>@@ -366,7 +391,11 @@ static void xe_migrate_sanity_test(struct xe_migrate
>*m, struct kunit *test)
>> check(retval, expected, "Command clear big last value", test);
>>
>> kunit_info(test, "Copying big buffer object to system\n");
>>- test_copy(m, big, test);
>>+ test_copy_sysmem(m, big, test);
>>+ if (xe->info.tile_count > 1) {
>>+ kunit_info(test, "Copying big buffer object to opposite
>vram\n");
>
>s/opposite/other/
>
>>+ test_copy_vram(m, big, test);
>>+ }
>>
>> kunit_info(test, "Testing page table update using CPU if GPU idle.\n");
>> test_pt_update(m, pt, test, false);
>
>This test update should be in a separate patch.
>
>>diff --git a/drivers/gpu/drm/xe/xe_migrate.c
>b/drivers/gpu/drm/xe/xe_migrate.c
>>index 15f091a7bba3..42942aeef595 100644
>>--- a/drivers/gpu/drm/xe/xe_migrate.c
>>+++ b/drivers/gpu/drm/xe/xe_migrate.c
>>@@ -114,8 +114,12 @@ static u64 xe_migrate_vm_addr(u64 slot, u32 level)
>> return (slot + 1ULL) << xe_pt_shift(level + 1);
>> }
>>
>>-static u64 xe_migrate_vram_ofs(u64 addr)
>>+static u64 xe_migrate_vram_ofs(u64 addr, struct xe_device *xe)
>> {
>
>The convention is to have 'xe' as first argument, so it is better to
>follow that.
>
>>+ /* Remove the DPA to get a correct offset into identity table for the
>>+ * migrate offset
>>+ */
>
>Comment format followed elsewhere is,
>/*
> * comments line1
> * comments line2
> */
>
>Better to follow that.
>
>>+ addr -= xe->mem.vram.dpa_base;
>
>Should never happen, but probably a good idea to add an assert
>that addr > dpa_base? Otherwise, we will endup with hard to debug issues.
How about:
addr &= ~xe->mem.vram.dpa_base;
?
M
>> return addr + (256ULL << xe_pt_shift(2));
>> }
>>
>>@@ -149,7 +153,7 @@ static int xe_migrate_create_cleared_bo(struct
>xe_migrate *m, struct xe_vm *vm)
>>
>> xe_map_memset(xe, &m->cleared_bo->vmap, 0, 0x00, cleared_size);
>> vram_addr = xe_bo_addr(m->cleared_bo, 0, XE_PAGE_SIZE);
>>- m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr);
>>+ m->cleared_vram_ofs = xe_migrate_vram_ofs(vram_addr, xe);
>>
>> return 0;
>> }
>>@@ -225,12 +229,12 @@ static int xe_migrate_prepare_vm(struct xe_tile
>*tile, struct xe_migrate *m,
>> } else {
>> u64 batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
>>
>>- m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr);
>>+ m->batch_base_ofs = xe_migrate_vram_ofs(batch_addr, xe);
>>
>> if (xe->info.supports_usm) {
>> batch = tile->primary_gt->usm.bb_pool->bo;
>> batch_addr = xe_bo_addr(batch, 0, XE_PAGE_SIZE);
>>- m->usm_batch_base_ofs =
>xe_migrate_vram_ofs(batch_addr);
>>+ m->usm_batch_base_ofs =
>xe_migrate_vram_ofs(batch_addr, xe);
>> }
>> }
>>
>>@@ -268,7 +272,9 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile,
>struct xe_migrate *m,
>> * Use 1GB pages, it shouldn't matter the physical amount of
>> * vram is less, when we don't access it.
>> */
>>- for (pos = 0; pos < xe->mem.vram.actual_physical_size; pos +=
>SZ_1G, ofs += 8)
>>+ for (pos = xe->mem.vram.dpa_base;
>>+ pos < xe->mem.vram.actual_physical_size + xe-
>>mem.vram.dpa_base;
>>+ pos += SZ_1G, ofs += 8)
>> xe_map_wr(xe, &bo->vmap, ofs, u64, pos | flags);
>> }
>>
>>@@ -443,8 +449,8 @@ static u32 pte_update_size(struct xe_migrate *m,
>> cmds += cmd_size;
>> } else {
>> /* Offset into identity map. */
>>- *L0_ofs = xe_migrate_vram_ofs(cur->start +
>>- vram_region_gpu_offset(res));
>>+ *L0_ofs = xe_migrate_vram_ofs(cur->start +
>vram_region_gpu_offset(res),
>>+ tile_to_xe(m->tile));
>> cmds += cmd_size;
>> }
>>
>>@@ -1062,7 +1068,7 @@ static void write_pgtable(struct xe_tile *tile, struct
>xe_bb *bb, u64 ppgtt_ofs,
>> xe_tile_assert(tile, update->qwords <= 0x1ff);
>> if (!ppgtt_ofs) {
>> ppgtt_ofs = xe_migrate_vram_ofs(xe_bo_addr(update->pt_bo,
>0,
>>- XE_PAGE_SIZE));
>>+ XE_PAGE_SIZE),
>tile_to_xe(tile));
>> }
>>
>> do {
>>--
>>2.35.1
>>
More information about the Intel-xe
mailing list