[PATCH] drm/xe: Add a xe_bo subtest for shrinking / swapping
Matthew Auld
matthew.auld at intel.com
Fri Sep 6 15:59:15 UTC 2024
On 05/09/2024 14:16, Thomas Hellström wrote:
> Add a subtest that tries to allocate twice the amount of
> buffer object memory available, write data to it and then read
> all the data back verifying data integrity.
> In order to be able to do this on systems that
> have no or not enough swap-space available, allocate some memory
> as purgeable, and introduce a function to purge such memory from
> the TTM swap_notify path.
>
> this test is intended to add test coverage to the current
> bo swap path and upcoming shrinking path.
>
> The test has previously been part of the xe bo shrinker series.
>
> v2:
> - Skip test if the execution time is expected to be too long.
> - Minor code cleanups.
>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> drivers/gpu/drm/xe/tests/xe_bo.c | 237 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_bo.c | 32 ++++-
> 2 files changed, 268 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
> index 8dac069483e8..d82b7748dd9e 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo.c
> +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
> @@ -6,6 +6,12 @@
> #include <kunit/test.h>
> #include <kunit/visibility.h>
>
> +#include <linux/iosys-map.h>
> +#include <linux/random.h>
> +#include <linux/swap.h>
> +
> +#include <uapi/linux/sysinfo.h>
> +
> #include "tests/xe_kunit_helpers.h"
> #include "tests/xe_pci_test.h"
> #include "tests/xe_test.h"
> @@ -358,9 +364,240 @@ static void xe_bo_evict_kunit(struct kunit *test)
> evict_test_run_device(xe);
> }
>
> +struct xe_bo_link {
> + struct list_head link;
> + struct xe_bo *bo;
> + u32 val;
> +};
> +
> +#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
> +
> +static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state,
> + struct xe_bo_link *link)
> +{
> + struct iosys_map map;
> + int ret = ttm_bo_vmap(&bo->ttm, &map);
> + size_t __maybe_unused i;
> +
> + if (ret)
> + return ret;
> +
> + for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
> + u32 val = prandom_u32_state(state);
> +
> + iosys_map_wr(&map, i, u32, val);
> + if (i == 0)
> + link->val = val;
> + }
> +
> + ttm_bo_vunmap(&bo->ttm, &map);
> + return 0;
> +}
> +
> +static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo,
> + unsigned int bo_nr, struct rnd_state *state,
> + struct xe_bo_link *link)
> +{
> + struct iosys_map map;
> + int ret = ttm_bo_vmap(&bo->ttm, &map);
> + size_t i;
> + bool failed = false;
> +
> + if (ret) {
> + KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr);
> + return true;
> + }
> +
> + for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
> + u32 val = prandom_u32_state(state);
> +
> + if (iosys_map_rd(&map, i, u32) != val) {
> + KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx",
> + bo_nr, (unsigned long long)i);
> + kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n",
> + (unsigned int)iosys_map_rd(&map, i, u32), val);
> + if (i == 0 && val != link->val)
> + kunit_info(test, "Looks like PRNG is out of sync.\n");
> + failed = true;
> + break;
> + }
> + }
> +
> + ttm_bo_vunmap(&bo->ttm, &map);
> +
> + return failed;
> +}
> +
> +/*
> + * Try to create system bos corresponding to twice the amount
> + * of available system memory to test shrinker functionality.
> + * If no swap space is available to accommodate the
> + * memory overcommit, mark bos purgeable.
> + */
> +static int shrink_test_run_device(struct xe_device *xe)
> +{
> + struct kunit *test = kunit_get_current_test();
> + LIST_HEAD(bos);
> + struct xe_bo_link *link, *next;
> + struct sysinfo si;
> + size_t ram, ram_and_swap, purgeable, alloced, to_alloc, limit;
> + unsigned int interrupted = 0, successful = 0, count = 0;
> + struct rnd_state prng;
> + u64 rand_seed;
> + bool failed = false;
> +
> + rand_seed = get_random_u64();
> + prandom_seed_state(&prng, rand_seed);
We could maybe print the seed, if we don't already, just in case it's
useful to repro something?
Anyway,
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
> +
> + /* Skip if execution time is expected to be too long. */
> +
> + limit = SZ_32G;
> + /* IGFX with flat CCS needs to copy when swapping / shrinking */
> + if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
> + limit = SZ_16G;
> +
> + si_meminfo(&si);
> + ram = (size_t)si.freeram * si.mem_unit;
> + if (ram > limit) {
> + kunit_skip(test, "Too long expected execution time.\n");
> + return 0;
> + }
> + to_alloc = ram * 2;
> +
> + ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE;
> + if (to_alloc > ram_and_swap)
> + purgeable = to_alloc - ram_and_swap;
> + purgeable += purgeable / 5;
> +
> + kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
> + (unsigned long)ram);
> + for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) {
> + struct xe_bo *bo;
> + unsigned int mem_type;
> + struct xe_ttm_tt *xe_tt;
> +
> + link = kzalloc(sizeof(*link), GFP_KERNEL);
> + if (!link) {
> + KUNIT_FAIL(test, "Unexpected link allocation failure\n");
> + failed = true;
> + break;
> + }
> +
> + INIT_LIST_HEAD(&link->link);
> +
> + /* We can create bos using WC caching here. But it is slower. */
> + bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
> + DRM_XE_GEM_CPU_CACHING_WB,
> + XE_BO_FLAG_SYSTEM);
> + if (IS_ERR(bo)) {
> + if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
> + bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
> + KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
> + kfree(link);
> + failed = true;
> + break;
> + }
> + xe_bo_lock(bo, false);
> + xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
> +
> + /*
> + * Allocate purgeable bos first, because if we do it the
> + * other way around, they may not be subject to swapping...
> + */
> + if (alloced < purgeable) {
> + xe_tt->purgeable = true;
> + bo->ttm.priority = 0;
> + } else {
> + int ret = shrink_test_fill_random(bo, &prng, link);
> +
> + if (ret) {
> + xe_bo_unlock(bo);
> + xe_bo_put(bo);
> + KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
> + ERR_PTR(ret));
> + kfree(link);
> + failed = true;
> + break;
> + }
> + }
> +
> + mem_type = bo->ttm.resource->mem_type;
> + xe_bo_unlock(bo);
> + link->bo = bo;
> + list_add_tail(&link->link, &bos);
> +
> + if (mem_type != XE_PL_TT) {
> + KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
> + bo->ttm.resource->mem_type);
> + failed = true;
> + }
> + cond_resched();
> + if (signal_pending(current))
> + break;
> + }
> +
> + /*
> + * Read back and destroy bos. Reset the pseudo-random seed to get an
> + * identical pseudo-random number sequence for readback.
> + */
> + prandom_seed_state(&prng, rand_seed);
> + list_for_each_entry_safe(link, next, &bos, link) {
> + static struct ttm_operation_ctx ctx = {.interruptible = true};
> + struct xe_bo *bo = link->bo;
> + struct xe_ttm_tt *xe_tt;
> + int ret;
> +
> + count++;
> + if (!signal_pending(current) && !failed) {
> + bool purgeable, intr = false;
> +
> + xe_bo_lock(bo, NULL);
> +
> + /* xe_tt->purgeable is cleared on validate. */
> + xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
> + purgeable = xe_tt->purgeable;
> + do {
> + ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
> + if (ret == -EINTR)
> + intr = true;
> + } while (ret == -EINTR && !signal_pending(current));
> +
> + if (!ret && !purgeable)
> + failed = shrink_test_verify(test, bo, count, &prng, link);
> +
> + xe_bo_unlock(bo);
> + if (ret) {
> + KUNIT_FAIL(test, "Validation failed: %pe\n",
> + ERR_PTR(ret));
> + failed = true;
> + } else if (intr) {
> + interrupted++;
> + } else {
> + successful++;
> + }
> + }
> + xe_bo_put(link->bo);
> + list_del(&link->link);
> + kfree(link);
> + }
> + kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
> + interrupted, successful);
> +
> + return 0;
> +}
> +
> +static void xe_bo_shrink_kunit(struct kunit *test)
> +{
> + struct xe_device *xe = test->priv;
> +
> + shrink_test_run_device(xe);
> +}
> +
> static struct kunit_case xe_bo_tests[] = {
> KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
> KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
> + KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
> + {.speed = KUNIT_SPEED_SLOW}),
> {}
> };
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 06911e9a3bf5..a065ba8fda83 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -283,6 +283,8 @@ struct xe_ttm_tt {
> struct device *dev;
> struct sg_table sgt;
> struct sg_table *sg;
> + /** @purgeable: Whether the content of the pages of @ttm is purgeable. */
> + bool purgeable;
> };
>
> static int xe_tt_map_sg(struct ttm_tt *tt)
> @@ -761,7 +763,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
> if (xe_rpm_reclaim_safe(xe)) {
> /*
> * We might be called through swapout in the validation path of
> - * another TTM device, so unconditionally acquire rpm here.
> + * another TTM device, so acquire rpm here.
> */
> xe_pm_runtime_get(xe);
> } else {
> @@ -1082,6 +1084,33 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
> }
> }
>
> +static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
> +{
> + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +
> + if (ttm_bo->ttm) {
> + struct ttm_placement place = {};
> + int ret = ttm_bo_validate(ttm_bo, &place, ctx);
> +
> + drm_WARN_ON(&xe->drm, ret);
> + }
> +}
> +
> +static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
> +{
> + struct ttm_operation_ctx ctx = {
> + .interruptible = false
> + };
> +
> + if (ttm_bo->ttm) {
> + struct xe_ttm_tt *xe_tt =
> + container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
> +
> + if (xe_tt->purgeable)
> + xe_ttm_bo_purge(ttm_bo, &ctx);
> + }
> +}
> +
> const struct ttm_device_funcs xe_ttm_funcs = {
> .ttm_tt_create = xe_ttm_tt_create,
> .ttm_tt_populate = xe_ttm_tt_populate,
> @@ -1094,6 +1123,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
> .release_notify = xe_ttm_bo_release_notify,
> .eviction_valuable = ttm_bo_eviction_valuable,
> .delete_mem_notify = xe_ttm_bo_delete_mem_notify,
> + .swap_notify = xe_ttm_bo_swap_notify,
> };
>
> static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
More information about the Intel-xe
mailing list