drm/ttm/memcg/lru: enable memcg tracking for ttm and amdgpu driver (complete series v2)
Maarten Lankhorst
maarten.lankhorst at linux.intel.com
Tue Aug 5 10:58:08 UTC 2025
Hey,
Den 2025-07-14 kl. 07:18, skrev Dave Airlie:
> Hi all,
>
> This is a repost with some fixes and cleanups.
>
> Differences since last posting:
> 1. Added patch 18: add a module option to allow pooled pages to not be stored in the lru per-memcg
> (Requested by Christian Konig)
> 2. Converged the naming and stats between vmstat and memcg (Suggested by Shakeel Butt)
> 3. Cleaned up the charge/uncharge code and some other bits.
>
> Dave.
>
> Original cover letter:
> tl;dr: start using list_lru/numa/memcg in GPU driver core and amdgpu driver for now.
>
> This is a complete series of patches, some of which have been sent before and reviewed,
> but I want to get the complete picture for others, and try to figure out how best to land this.
>
> There are 3 pieces to this:
> 01->02: add support for global gpu stat counters (previously posted, patch 2 is newer)
> 03->07: port ttm pools to list_lru for numa awareness
> 08->14: add memcg stats + gpu apis, then port ttm pools to memcg aware list_lru and shrinker
> 15->17: enable amdgpu to use new functionality.
>
> The biggest difference in the memcg code from previously is I discovered what
> obj cgroups were designed for and I'm reusing the page/objcg intergration that
> already exists, to avoid reinventing that wheel right now.
>
> There are some igt-gpu-tools tests I've written at:
> https://gitlab.freedesktop.org/airlied/igt-gpu-tools/-/tree/amdgpu-cgroups?ref_type=heads
>
> One problem is there are a lot of delayed action, that probably means the testing
> needs a bit more robustness, but the tests validate all the basic paths.
>
> Regards,
> Dave.
>
Patch below to enable on xe as well, I ran into some issues though when testing.
After shutting down gdm3/sddm, I ran into a null dereference in mem_cgroup_uncharge_gpu_page()
from ttm_pool_free_page(), presumably because of the objects that were created without a
cgroup set. I tried to fix it in mem_cgroup_uncharge_gpu_page() by conditionally calling
refill_stock(), but that ran into an underflow instead.
Anyway, patch for xe below:
----->8-----------
drm/xe: Enable memcg accounting for TT/system
Create a flag to enable memcg accounting for XE as well.
Signed-off-by: Maarten Lankhorst <dev at lankhorst.se>
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 867087c2d1534..fd93374967c9e 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -54,6 +54,7 @@ static const struct ttm_place sys_placement_flags = {
.flags = 0,
};
+/* TTM_PL_FLAG_MEMCG is not set, those placements are used for eviction */
static struct ttm_placement sys_placement = {
.num_placement = 1,
.placement = &sys_placement_flags,
@@ -188,6 +189,7 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_TT,
+ .flags = TTM_PL_FLAG_MEMCG,
};
*c += 1;
}
@@ -1696,6 +1698,8 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
static void xe_gem_object_free(struct drm_gem_object *obj)
{
+ struct xe_bo *bo = gem_to_xe_bo(obj);
+
/* Our BO reference counting scheme works as follows:
*
* The gem object kref is typically used throughout the driver,
@@ -1709,8 +1713,9 @@ static void xe_gem_object_free(struct drm_gem_object *obj)
* driver ttm callbacks is allowed to use the ttm_buffer_object
* refcount directly if needed.
*/
- __xe_bo_vunmap(gem_to_xe_bo(obj));
- ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
+ __xe_bo_vunmap(bo);
+ obj_cgroup_put(bo->ttm.objcg);
+ ttm_bo_put(&bo->ttm);
}
static void xe_gem_object_close(struct drm_gem_object *obj,
@@ -1951,6 +1956,9 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
placement = (type == ttm_bo_type_sg ||
bo->flags & XE_BO_FLAG_DEFER_BACKING) ? &sys_placement :
&bo->placement;
+
+ if (bo->flags & XE_BO_FLAG_ACCOUNTED)
+ bo->ttm.objcg = get_obj_cgroup_from_current();
err = ttm_bo_init_reserved(&xe->ttm, &bo->ttm, type,
placement, alignment,
&ctx, NULL, resv, xe_ttm_bo_destroy);
@@ -2726,7 +2734,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, args->size & ~PAGE_MASK))
return -EINVAL;
- bo_flags = 0;
+ bo_flags = XE_BO_FLAG_ACCOUNTED;
if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
bo_flags |= XE_BO_FLAG_DEFER_BACKING;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 6134d82e80554..e44fc58d9a00f 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -48,6 +48,7 @@
#define XE_BO_FLAG_GGTT2 BIT(22)
#define XE_BO_FLAG_GGTT3 BIT(23)
#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24)
+#define XE_BO_FLAG_ACCOUNTED BIT(25)
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 540f044bf4255..4db3227d65c04 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -1266,7 +1266,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE;
if (vm && vm->xef) /* userspace */
- bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
+ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE |
+ XE_BO_FLAG_ACCOUNTED;
lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
ttm_bo_type_kernel,
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 5729e7d3e3356..569035630ffdf 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -885,7 +885,7 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
size, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | XE_BO_FLAG_ACCOUNTED);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 330cc0f54a3f4..efcd54ab75e92 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -120,7 +120,8 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE;
if (vm->xef) /* userspace */
- bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
+ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE |
+ XE_BO_FLAG_ACCOUNTED;
pt->level = level;
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 10c8a1bcb86e8..fdf845bb717e0 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -700,6 +700,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
bo = xe_bo_create_locked(vr->xe, NULL, NULL, end - start,
ttm_bo_type_device,
(IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
+ XE_BO_FLAG_ACCOUNTED |
XE_BO_FLAG_CPU_ADDR_MIRROR);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
More information about the dri-devel
mailing list