[PATCH v6 11/12] drm/ttm, drm/xe: Add a shrinker for xe bos

Matthew Auld matthew.auld at intel.com
Fri Aug 9 16:05:32 UTC 2024


Hi,

On 03/07/2024 16:38, Thomas Hellström wrote:
> Rather than relying on the TTM watermark accounting add a shrinker
> for xe_bos in TT or system memory.
> 
> Leverage the newly added TTM per-page shrinking and shmem backup
> support.
> 
> Although xe doesn't fully support WONTNEED (purgeable) bos yet,
> introduce and add shrinker support for purgeable ttm_tts.
> 
> v2:
> - Cleanups bugfixes and a KUNIT shrinker test.
> - Add writeback support, and activate if kswapd.
> v3:
> - Move the try_shrink() helper to core TTM.
> - Minor cleanups.
> v4:
> - Add runtime pm for the shrinker. Shrinking may require an active
>    device for CCS metadata copying.
> v5:
> - Separately purge ghost- and zombie objects in the shrinker.
> - Fix a format specifier - type inconsistency. (Kernel test robot).
> 
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Somalapuram Amaranath <Amaranath.Somalapuram at amd.com>
> Cc: Matthew Brost <matthew.brost at intel.com>
> Cc: <dri-devel at lists.freedesktop.org>
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo_util.c     |  67 ++++++
>   drivers/gpu/drm/xe/Makefile           |   1 +
>   drivers/gpu/drm/xe/tests/xe_bo.c      | 118 +++++++++++
>   drivers/gpu/drm/xe/tests/xe_bo_test.c |   1 +
>   drivers/gpu/drm/xe/tests/xe_bo_test.h |   1 +
>   drivers/gpu/drm/xe/xe_bo.c            | 155 ++++++++++++--
>   drivers/gpu/drm/xe/xe_bo.h            |  26 +++
>   drivers/gpu/drm/xe/xe_device.c        |   8 +
>   drivers/gpu/drm/xe/xe_device_types.h  |   2 +
>   drivers/gpu/drm/xe/xe_shrinker.c      | 287 ++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_shrinker.h      |  18 ++
>   include/drm/ttm/ttm_bo.h              |   3 +
>   12 files changed, 671 insertions(+), 16 deletions(-)
>   create mode 100644 drivers/gpu/drm/xe/xe_shrinker.c
>   create mode 100644 drivers/gpu/drm/xe/xe_shrinker.h
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index c4f678f30fc2..563e96a4cf06 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -924,3 +924,70 @@ long ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
>   
>   	return progress;
>   }
> +EXPORT_SYMBOL(ttm_lru_walk_for_evict);
> +
> +/**
> + * ttm_bo_try_shrink - LRU walk helper to shrink a ttm buffer object.
> + * @walk: The struct xe_ttm_lru_walk that describes the walk.
> + * @bo: The buffer object.
> + * @purge: Whether to attempt to purge the bo content since it's no
> + * longer needed.
> + * @writeback: If !@purge, attempt to write out to persistent storage.
> + *
> + * The function uses the ttm_tt_back_up functionality to back up or
> + * purge a struct ttm_tt. If the bo is not in system, it's first
> + * moved there.
> + *
> + * Return: The number of pages shrunken or purged, or
> + * negative error code on failure.
> + */
> +long ttm_bo_try_shrink(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo,
> +		       bool purge, bool writeback)
> +{
> +	static const struct ttm_place sys_placement_flags = {
> +		.fpfn = 0,
> +		.lpfn = 0,
> +		.mem_type = TTM_PL_SYSTEM,
> +		.flags = 0,
> +	};
> +	static struct ttm_placement sys_placement = {
> +		.num_placement = 1,
> +		.placement = &sys_placement_flags,
> +	};
> +	struct ttm_operation_ctx *ctx = walk->ctx;
> +	struct ttm_tt *tt = bo->ttm;
> +	long lret;
> +
> +	dma_resv_assert_held(bo->base.resv);
> +
> +	if (!tt || !ttm_tt_is_populated(tt))
> +		return 0;
> +
> +	if (bo->resource->mem_type != TTM_PL_SYSTEM) {
> +		int ret = ttm_bo_validate(bo, &sys_placement, ctx);
> +
> +		if (ret) {
> +			if (ret == -EINTR || ret == -EDEADLK ||
> +			    ret == -ERESTARTSYS)
> +				return ret;
> +			return 0;
> +		}
> +	}
> +
> +	lret = ttm_bo_wait_ctx(bo, ctx);
> +	if (lret < 0) {
> +		if (lret == -ERESTARTSYS)
> +			return lret;
> +		return 0;
> +	}
> +
> +	if (bo->deleted)
> +		lret = ttm_tt_backup(bo->bdev, tt, true, writeback);
> +	else
> +		lret = ttm_tt_backup(bo->bdev, tt, purge, writeback);
> +	if (lret < 0 && lret != -EINTR)
> +		return 0;
> +
> +	return lret;
> +}
> +EXPORT_SYMBOL(ttm_bo_try_shrink);
> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> index b1e03bfe4a68..1eba51bdd172 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -112,6 +112,7 @@ xe-y += xe_bb.o \
>   	xe_ring_ops.o \
>   	xe_sa.o \
>   	xe_sched_job.o \
> +	xe_shrinker.o \
>   	xe_step.o \
>   	xe_sync.o \
>   	xe_tile.o \
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
> index 9f3c02826464..49617f16dc76 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo.c
> +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
> @@ -6,6 +6,8 @@
>   #include <kunit/test.h>
>   #include <kunit/visibility.h>
>   
> +#include <uapi/linux/sysinfo.h>
> +
>   #include "tests/xe_bo_test.h"
>   #include "tests/xe_pci_test.h"
>   #include "tests/xe_test.h"
> @@ -350,3 +352,119 @@ void xe_bo_evict_kunit(struct kunit *test)
>   	xe_call_for_each_device(evict_test_run_device);
>   }
>   EXPORT_SYMBOL_IF_KUNIT(xe_bo_evict_kunit);
> +
> +struct xe_bo_link {
> +	struct list_head link;
> +	struct xe_bo *bo;
> +};
> +
> +#define XE_BO_SHRINK_SIZE ((unsigned long)SZ_64M)
> +
> +/*
> + * Try to create system bos corresponding to twice the amount
> + * of available system memory to test shrinker functionality.
> + * If no swap space is available to accommodate the
> + * memory overcommit, mark bos purgeable.
> + */
> +static int shrink_test_run_device(struct xe_device *xe)
> +{
> +	struct kunit *test = xe_cur_kunit();
> +	LIST_HEAD(bos);
> +	struct xe_bo_link *link, *next;
> +	struct sysinfo si;
> +	size_t total, alloced;
> +	unsigned int interrupted = 0, successful = 0;
> +
> +	si_meminfo(&si);
> +	total = si.freeram * si.mem_unit;
> +
> +	kunit_info(test, "Free ram is %lu bytes. Will allocate twice of that.\n",
> +		   (unsigned long) total);
> +
> +	total <<= 1;
> +	for (alloced = 0; alloced < total ; alloced += XE_BO_SHRINK_SIZE) {
> +		struct xe_bo *bo;
> +		unsigned int mem_type;
> +
> +		link = kzalloc(sizeof(*link), GFP_KERNEL);
> +		if (!link) {
> +			KUNIT_FAIL(test, "Unexpeced link allocation failure\n");
> +			break;
> +		}
> +
> +		INIT_LIST_HEAD(&link->link);
> +
> +		/* We can create bos using WC caching here. But it is slower. */
> +		bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
> +				       DRM_XE_GEM_CPU_CACHING_WB,
> +				       ttm_bo_type_device,
> +				       XE_BO_FLAG_SYSTEM);
> +		if (IS_ERR(bo)) {
> +			if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
> +			    bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
> +				KUNIT_FAIL(test, "Error creating bo: %pe\n", bo);
> +			kfree(link);
> +			break;
> +		}
> +		link->bo = bo;
> +		list_add_tail(&link->link, &bos);
> +		xe_bo_lock(bo, false);
> +
> +		/*
> +		 * If we're low on swap entries, we can't shrink unless the bo
> +		 * is marked purgeable.
> +		 */
> +		if (get_nr_swap_pages() < (XE_BO_SHRINK_SIZE >> PAGE_SHIFT) * 128) {
> +			struct xe_ttm_tt *xe_tt =
> +				container_of(bo->ttm.ttm, typeof(*xe_tt), ttm);
> +			long num_pages = xe_tt->ttm.num_pages;
> +
> +			xe_tt->purgeable = true;
> +			xe_shrinker_mod_pages(xe->mem.shrinker, -num_pages,
> +					      num_pages);
> +		}
> +
> +		mem_type = bo->ttm.resource->mem_type;
> +		xe_bo_unlock(bo);
> +		if (mem_type != XE_PL_TT)
> +			KUNIT_FAIL(test, "Bo in incorrect memory type: %u\n",
> +				   bo->ttm.resource->mem_type);
> +		cond_resched();
> +		if (signal_pending(current))
> +			break;
> +	}
> +
> +	/* Read back and destroy bos */
> +	list_for_each_entry_safe_reverse(link, next, &bos, link) {
> +		static struct ttm_operation_ctx ctx = {.interruptible = true};
> +		struct xe_bo *bo = link->bo;
> +		int ret;
> +
> +		if (!signal_pending(current)) {
> +			xe_bo_lock(bo, NULL);
> +			ret = ttm_bo_validate(&bo->ttm, &tt_placement, &ctx);
> +			xe_bo_unlock(bo);
> +			if (ret && ret != -EINTR)
> +				KUNIT_FAIL(test, "Validation failed: %pe\n",
> +					   ERR_PTR(ret));
> +			else if (ret)
> +				interrupted++;
> +			else
> +				successful++;
> +		}
> +		xe_bo_put(link->bo);
> +		list_del(&link->link);
> +		kfree(link);
> +		cond_resched();
> +	}
> +	kunit_info(test, "Readbacks interrupted: %u successful: %u\n",
> +		   interrupted, successful);
> +
> +	return 0;
> +}
> +
> +void xe_bo_shrink_kunit(struct kunit *test)
> +{
> +	xe_call_for_each_device(shrink_test_run_device);
> +}
> +EXPORT_SYMBOL_IF_KUNIT(xe_bo_shrink_kunit);
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.c b/drivers/gpu/drm/xe/tests/xe_bo_test.c
> index a324cde77db8..317fa923e287 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo_test.c
> +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.c
> @@ -10,6 +10,7 @@
>   static struct kunit_case xe_bo_tests[] = {
>   	KUNIT_CASE(xe_ccs_migrate_kunit),
>   	KUNIT_CASE(xe_bo_evict_kunit),
> +	KUNIT_CASE_SLOW(xe_bo_shrink_kunit),
>   	{}
>   };
>   
> diff --git a/drivers/gpu/drm/xe/tests/xe_bo_test.h b/drivers/gpu/drm/xe/tests/xe_bo_test.h
> index 0113ab45066a..7f44d14a45c5 100644
> --- a/drivers/gpu/drm/xe/tests/xe_bo_test.h
> +++ b/drivers/gpu/drm/xe/tests/xe_bo_test.h
> @@ -10,5 +10,6 @@ struct kunit;
>   
>   void xe_ccs_migrate_kunit(struct kunit *test);
>   void xe_bo_evict_kunit(struct kunit *test);
> +void xe_bo_shrink_kunit(struct kunit *test);
>   
>   #endif
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 65c696966e96..6ab63d1642ae 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -10,6 +10,7 @@
>   #include <drm/drm_drv.h>
>   #include <drm/drm_gem_ttm_helper.h>
>   #include <drm/drm_managed.h>
> +#include <drm/ttm/ttm_backup.h>
>   #include <drm/ttm/ttm_device.h>
>   #include <drm/ttm/ttm_placement.h>
>   #include <drm/ttm/ttm_tt.h>
> @@ -25,6 +26,7 @@
>   #include "xe_pm.h"
>   #include "xe_preempt_fence.h"
>   #include "xe_res_cursor.h"
> +#include "xe_shrinker.h"
>   #include "xe_trace_bo.h"
>   #include "xe_ttm_stolen_mgr.h"
>   #include "xe_vm.h"
> @@ -278,11 +280,15 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
>   	}
>   }
>   
> +/* struct xe_ttm_tt - Subclassed ttm_tt for xe */
>   struct xe_ttm_tt {
>   	struct ttm_tt ttm;
> -	struct device *dev;
> +	/** @xe - The xe device */
> +	struct xe_device *xe;
>   	struct sg_table sgt;
>   	struct sg_table *sg;
> +	/** @purgeable - Whether the bo is purgeable (WONTNEED) */
> +	bool purgeable;
>   };
>   
>   static int xe_tt_map_sg(struct ttm_tt *tt)
> @@ -291,7 +297,8 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
>   	unsigned long num_pages = tt->num_pages;
>   	int ret;
>   
> -	XE_WARN_ON(tt->page_flags & TTM_TT_FLAG_EXTERNAL);
> +	XE_WARN_ON((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
> +		   !(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE));
>   
>   	if (xe_tt->sg)
>   		return 0;
> @@ -299,13 +306,13 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
>   	ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
>   						num_pages, 0,
>   						(u64)num_pages << PAGE_SHIFT,
> -						xe_sg_segment_size(xe_tt->dev),
> +						xe_sg_segment_size(xe_tt->xe->drm.dev),
>   						GFP_KERNEL);
>   	if (ret)
>   		return ret;
>   
>   	xe_tt->sg = &xe_tt->sgt;
> -	ret = dma_map_sgtable(xe_tt->dev, xe_tt->sg, DMA_BIDIRECTIONAL,
> +	ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
>   			      DMA_ATTR_SKIP_CPU_SYNC);
>   	if (ret) {
>   		sg_free_table(xe_tt->sg);
> @@ -321,7 +328,7 @@ static void xe_tt_unmap_sg(struct ttm_tt *tt)
>   	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
>   
>   	if (xe_tt->sg) {
> -		dma_unmap_sgtable(xe_tt->dev, xe_tt->sg,
> +		dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
>   				  DMA_BIDIRECTIONAL, 0);
>   		sg_free_table(xe_tt->sg);
>   		xe_tt->sg = NULL;
> @@ -336,21 +343,41 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo)
>   	return xe_tt->sg;
>   }
>   
> +/*
> + * Account ttm pages against the device shrinker's shrinkable and
> + * purgeable counts.
> + */
> +static void xe_ttm_tt_account(struct ttm_tt *tt, bool add)
> +{
> +	struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
> +	long num_pages = tt->num_pages;
> +
> +	if (!add)
> +		num_pages = -num_pages;
> +
> +	if (xe_tt->purgeable)
> +		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, num_pages);
> +	else
> +		xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, num_pages, 0);
> +}
> +
>   static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
>   				       u32 page_flags)
>   {
>   	struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
>   	struct xe_device *xe = xe_bo_device(bo);
> -	struct xe_ttm_tt *tt;
> +	struct xe_ttm_tt *xe_tt;
> +	struct ttm_tt *tt;
>   	unsigned long extra_pages;
>   	enum ttm_caching caching;
>   	int err;
>   
> -	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
> -	if (!tt)
> +	xe_tt = kzalloc(sizeof(*xe_tt), GFP_KERNEL);
> +	if (!xe_tt)
>   		return NULL;
>   
> -	tt->dev = xe->drm.dev;
> +	tt = &xe_tt->ttm;
> +	xe_tt->xe = xe;
>   
>   	extra_pages = 0;
>   	if (xe_bo_needs_ccs_pages(bo))
> @@ -387,42 +414,128 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
>   		caching = ttm_uncached;
>   	}
>   
> -	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
> +	if (ttm_bo->type != ttm_bo_type_sg)
> +		page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
> +
> +	err = ttm_tt_init(tt, &bo->ttm, page_flags, caching, extra_pages);
>   	if (err) {
> -		kfree(tt);
> +		kfree(xe_tt);
>   		return NULL;
>   	}
>   
> -	return &tt->ttm;
> +	tt->backup = ttm_backup_shmem_create(tt->num_pages << PAGE_SHIFT);

I guess we should make this (loff_t)tt->num_pages << PAGE_SHIFT or similar?


More information about the Intel-xe mailing list