[PATCH] drm/xe: Add a xe_bo subtest for shrinking / swapping

Matthew Auld matthew.auld at intel.com
Fri Sep 6 15:59:15 UTC 2024


On 05/09/2024 14:16, Thomas Hellström wrote:
> Add a subtest that tries to allocate twice the amount of
> buffer object memory available, write data to it and then read
> all the data back verifying data integrity.
> In order to be able to do this on systems that
> have no or not enough swap-space available, allocate some memory
> as purgeable, and introduce a function to purge such memory from
> the TTM swap_notify path.
> 
> this test is intended to add test coverage to the current
> bo swap path and upcoming shrinking path.
> 
> The test has previously been part of the xe bo shrinker series.
> 
> v2:
> - Skip test if the execution time is expected to be too long.
> - Minor code cleanups.
> 
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>   drivers/gpu/drm/xe/tests/xe_bo.c | 237 +++++++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_bo.c       |  32 ++++-
>   2 files changed, 268 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
> index 8dac069483e8..d82b7748dd9e 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo.c
> +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
> @@ -6,6 +6,12 @@
>   #include <kunit/test.h>
>   #include <kunit/visibility.h>
>   
> +#include <linux/iosys-map.h>
> +#include <linux/random.h>
> +#include <linux/swap.h>
> +
> +#include <uapi/linux/sysinfo.h>
> +
>   #include "tests/xe_kunit_helpers.h"
>   #include "tests/xe_pci_test.h"
>   #include "tests/xe_test.h"
> @@ -358,9 +364,240 @@ static void xe_bo_evict_kunit(struct kunit *test)
>   	evict_test_run_device(xe);
>   }
>   
> +struct xe_bo_link {
> +	struct list_head link;
> +	struct xe_bo *bo;
> +	u32 val;
> +};
> +
> +#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
> +
> +static int shrink_test_fill_random(struct xe_bo *bo, struct rnd_state *state,
> +				   struct xe_bo_link *link)
> +{
> +	struct iosys_map map;
> +	int ret = ttm_bo_vmap(&bo->ttm, &map);
> +	size_t __maybe_unused i;
> +
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
> +		u32 val = prandom_u32_state(state);
> +
> +		iosys_map_wr(&map, i, u32, val);
> +		if (i == 0)
> +			link->val = val;
> +	}
> +
> +	ttm_bo_vunmap(&bo->ttm, &map);
> +	return 0;
> +}
> +
> +static bool shrink_test_verify(struct kunit *test, struct xe_bo *bo,
> +			       unsigned int bo_nr, struct rnd_state *state,
> +			       struct xe_bo_link *link)
> +{
> +	struct iosys_map map;
> +	int ret = ttm_bo_vmap(&bo->ttm, &map);
> +	size_t i;
> +	bool failed = false;
> +
> +	if (ret) {
> +		KUNIT_FAIL(test, "Error mapping bo %u for content check.\n", bo_nr);
> +		return true;
> +	}
> +
> +	for (i = 0; i < bo->ttm.base.size; i += sizeof(u32)) {
> +		u32 val = prandom_u32_state(state);
> +
> +		if (iosys_map_rd(&map, i, u32) != val) {
> +			KUNIT_FAIL(test, "Content not preserved, bo %u offset 0x%016llx",
> +				   bo_nr, (unsigned long long)i);
> +			kunit_info(test, "Failed value is 0x%08x, recorded 0x%08x\n",
> +				   (unsigned int)iosys_map_rd(&map, i, u32), val);
> +			if (i == 0 && val != link->val)
> +				kunit_info(test, "Looks like PRNG is out of sync.\n");
> +			failed = true;
> +			break;
> +		}
> +	}
> +
> +	ttm_bo_vunmap(&bo->ttm, &map);
> +
> +	return failed;
> +}
> +
> +/*
> + * Try to create system bos corresponding to twice the amount
> + * of available system memory to test shrinker functionality.
> + * If no swap space is available to accommodate the
> + * memory overcommit, mark bos purgeable.
> + */
> +static int shrink_test_run_device(struct xe_device *xe)
> +{
> +	struct kunit *test = kunit_get_current_test();
> +	LIST_HEAD(bos);
> +	struct xe_bo_link *link, *next;
> +	struct sysinfo si;
> +	size_t ram, ram_and_swap, purgeable, alloced, to_alloc, limit;
> +	unsigned int interrupted = 0, successful = 0, count = 0;
> +	struct rnd_state prng;
> +	u64 rand_seed;
> +	bool failed = false;
> +
> +	rand_seed = get_random_u64();
> +	prandom_seed_state(&prng, rand_seed);

We could maybe print the seed, if we don't already, just in case it's 
useful to repro something?

Anyway,
Reviewed-by: Matthew Auld <matthew.auld at intel.com>

> +
> +	/* Skip if execution time is expected to be too long. */
> +
> +	limit = SZ_32G;
> +	/* IGFX with flat CCS needs to copy when swapping / shrinking */
> +	if (!IS_DGFX(xe) && xe_device_has_flat_ccs(xe))
> +		limit = SZ_16G;
> +
> +	si_meminfo(&si);
> +	ram = (size_t)si.freeram * si.mem_unit;
> +	if (ram > limit) {
> +		kunit_skip(test, "Too long expected execution time.\n");
> +		return 0;
> +	}
> +	to_alloc = ram * 2;
> +
> +	ram_and_swap = ram + get_nr_swap_pages() * PAGE_SIZE;
> +	if (to_alloc > ram_and_swap)
> +		purgeable = to_alloc - ram_and_swap;
> +	purgeable += purgeable / 5;
> +
> +	kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
> +		   (unsigned long)ram);
> +	for (alloced = 0; alloced < to_alloc; alloced += XE_BO_SHRINK_SIZE) {
> +		struct xe_bo *bo;
> +		unsigned int mem_type;
> +		struct xe_ttm_tt *xe_tt;
> +
> +		link = kzalloc(sizeof(*link), GFP_KERNEL);
> +		if (!link) {
> +			KUNIT_FAIL(test, "Unexpected link allocation failure\n");
> +			failed = true;
> +			break;
> +		}
> +
> +		INIT_LIST_HEAD(&link->link);
> +
> +		/* We can create bos using WC caching here. But it is slower. */
> +		bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
> +				       DRM_XE_GEM_CPU_CACHING_WB,
> +				       XE_BO_FLAG_SYSTEM);
> +		if (IS_ERR(bo)) {
> +			if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
> +			    bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
> +				KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
> +			kfree(link);
> +			failed = true;
> +			break;
> +		}
> +		xe_bo_lock(bo, false);
> +		xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
> +
> +		/*
> +		 * Allocate purgeable bos first, because if we do it the
> +		 * other way around, they may not be subject to swapping...
> +		 */
> +		if (alloced < purgeable) {
> +			xe_tt->purgeable = true;
> +			bo->ttm.priority = 0;
> +		} else {
> +			int ret = shrink_test_fill_random(bo, &prng, link);
> +
> +			if (ret) {
> +				xe_bo_unlock(bo);
> +				xe_bo_put(bo);
> +				KUNIT_FAIL(test, "Error filling bo with random data: %pe\n",
> +					   ERR_PTR(ret));
> +				kfree(link);
> +				failed = true;
> +				break;
> +			}
> +		}
> +
> +		mem_type = bo->ttm.resource->mem_type;
> +		xe_bo_unlock(bo);
> +		link->bo = bo;
> +		list_add_tail(&link->link, &bos);
> +
> +		if (mem_type != XE_PL_TT) {
> +			KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
> +				   bo->ttm.resource->mem_type);
> +			failed = true;
> +		}
> +		cond_resched();
> +		if (signal_pending(current))
> +			break;
> +	}
> +
> +	/*
> +	 * Read back and destroy bos. Reset the pseudo-random seed to get an
> +	 * identical pseudo-random number sequence for readback.
> +	 */
> +	prandom_seed_state(&prng, rand_seed);
> +	list_for_each_entry_safe(link, next, &bos, link) {
> +		static struct ttm_operation_ctx ctx = {.interruptible = true};
> +		struct xe_bo *bo = link->bo;
> +		struct xe_ttm_tt *xe_tt;
> +		int ret;
> +
> +		count++;
> +		if (!signal_pending(current) && !failed) {
> +			bool purgeable, intr = false;
> +
> +			xe_bo_lock(bo, NULL);
> +
> +			/* xe_tt->purgeable is cleared on validate. */
> +			xe_tt = container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
> +			purgeable = xe_tt->purgeable;
> +			do {
> +				ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
> +				if (ret == -EINTR)
> +					intr = true;
> +			} while (ret == -EINTR && !signal_pending(current));
> +
> +			if (!ret && !purgeable)
> +				failed = shrink_test_verify(test, bo, count, &prng, link);
> +
> +			xe_bo_unlock(bo);
> +			if (ret) {
> +				KUNIT_FAIL(test, "Validation failed: %pe\n",
> +					   ERR_PTR(ret));
> +				failed = true;
> +			} else if (intr) {
> +				interrupted++;
> +			} else {
> +				successful++;
> +			}
> +		}
> +		xe_bo_put(link->bo);
> +		list_del(&link->link);
> +		kfree(link);
> +	}
> +	kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
> +		   interrupted, successful);
> +
> +	return 0;
> +}
> +
> +static void xe_bo_shrink_kunit(struct kunit *test)
> +{
> +	struct xe_device *xe = test->priv;
> +
> +	shrink_test_run_device(xe);
> +}
> +
>   static struct kunit_case xe_bo_tests[] = {
>   	KUNIT_CASE_PARAM(xe_ccs_migrate_kunit, xe_pci_live_device_gen_param),
>   	KUNIT_CASE_PARAM(xe_bo_evict_kunit, xe_pci_live_device_gen_param),
> +	KUNIT_CASE_PARAM_ATTR(xe_bo_shrink_kunit, xe_pci_live_device_gen_param,
> +			      {.speed = KUNIT_SPEED_SLOW}),
>   	{}
>   };
>   
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 06911e9a3bf5..a065ba8fda83 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -283,6 +283,8 @@ struct xe_ttm_tt {
>   	struct device *dev;
>   	struct sg_table sgt;
>   	struct sg_table *sg;
> +	/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
> +	bool purgeable;
>   };
>   
>   static int xe_tt_map_sg(struct ttm_tt *tt)
> @@ -761,7 +763,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>   	if (xe_rpm_reclaim_safe(xe)) {
>   		/*
>   		 * We might be called through swapout in the validation path of
> -		 * another TTM device, so unconditionally acquire rpm here.
> +		 * another TTM device, so acquire rpm here.
>   		 */
>   		xe_pm_runtime_get(xe);
>   	} else {
> @@ -1082,6 +1084,33 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
>   	}
>   }
>   
> +static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operation_ctx *ctx)
> +{
> +	struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +
> +	if (ttm_bo->ttm) {
> +		struct ttm_placement place = {};
> +		int ret = ttm_bo_validate(ttm_bo, &place, ctx);
> +
> +		drm_WARN_ON(&xe->drm, ret);
> +	}
> +}
> +
> +static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo)
> +{
> +	struct ttm_operation_ctx ctx = {
> +		.interruptible = false
> +	};
> +
> +	if (ttm_bo->ttm) {
> +		struct xe_ttm_tt *xe_tt =
> +			container_of(ttm_bo->ttm, struct xe_ttm_tt, ttm);
> +
> +		if (xe_tt->purgeable)
> +			xe_ttm_bo_purge(ttm_bo, &ctx);
> +	}
> +}
> +
>   const struct ttm_device_funcs xe_ttm_funcs = {
>   	.ttm_tt_create = xe_ttm_tt_create,
>   	.ttm_tt_populate = xe_ttm_tt_populate,
> @@ -1094,6 +1123,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
>   	.release_notify = xe_ttm_bo_release_notify,
>   	.eviction_valuable = ttm_bo_eviction_valuable,
>   	.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
> +	.swap_notify = xe_ttm_bo_swap_notify,
>   };
>   
>   static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)


More information about the Intel-xe mailing list