[PATCH 4/5] amdgpu: add support for memcg integration
Waiman Long
llong at redhat.com
Fri May 2 14:01:33 UTC 2025
On 5/1/25 11:36 PM, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This adds the memcg object for any user allocated object,
> and adds account_op to necessary paths which might populate
> a tt object.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 ++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 +++++++++++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 +
> 4 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 82df06a72ee0..1a275224b4a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -787,6 +787,7 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
> struct ttm_operation_ctx ctx = {
> .interruptible = true,
> .no_wait_gpu = false,
> + .account_op = true,
> .resv = bo->tbo.base.resv
> };
> uint32_t domain;
> @@ -839,7 +840,11 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> union drm_amdgpu_cs *cs)
> {
> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> - struct ttm_operation_ctx ctx = { true, false };
> + struct ttm_operation_ctx ctx = {
> + .interruptible = true,
> + .no_wait_gpu = false,
> + .account_op = true,
> + };
> struct amdgpu_vm *vm = &fpriv->vm;
> struct amdgpu_bo_list_entry *e;
> struct drm_gem_object *obj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 69429df09477..bdad9a862ed3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -89,6 +89,7 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
> struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
>
> amdgpu_hmm_unregister(aobj);
> + mem_cgroup_put(aobj->tbo.memcg);
> ttm_bo_put(&aobj->tbo);
> }
>
> @@ -116,6 +117,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
> bp.domain = initial_domain;
> bp.bo_ptr_size = sizeof(struct amdgpu_bo);
> bp.xcp_id_plus1 = xcp_id_plus1;
> + bp.memcg = get_mem_cgroup_from_mm(current->mm);
>
> r = amdgpu_bo_create_user(adev, &bp, &ubo);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 0b9987781f76..777cf05ebac8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -632,6 +632,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> struct ttm_operation_ctx ctx = {
> .interruptible = (bp->type != ttm_bo_type_kernel),
> .no_wait_gpu = bp->no_wait_gpu,
> + .account_op = true,
> /* We opt to avoid OOM on system pages allocations */
> .gfp_retry_mayfail = true,
> .allow_res_evict = bp->type != ttm_bo_type_kernel,
> @@ -657,16 +658,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
> size = ALIGN(size, PAGE_SIZE);
> }
>
> - if (!amdgpu_bo_validate_size(adev, size, bp->domain))
> + if (!amdgpu_bo_validate_size(adev, size, bp->domain)) {
> + mem_cgroup_put(bp->memcg);
You should clear bp->memcg after mem_cgroup_put() to avoid stalled
reference as memcg can go away after that.
> return -ENOMEM;
> + }
>
> BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo));
>
> *bo_ptr = NULL;
> bo = kvzalloc(bp->bo_ptr_size, GFP_KERNEL);
> - if (bo == NULL)
> + if (bo == NULL) {
> + mem_cgroup_put(bp->memcg);
Ditto.
Cheers,
Longman
> return -ENOMEM;
> + }
> drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
> + bo->tbo.memcg = bp->memcg;
> bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
> bo->vm_bo = NULL;
> bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
> @@ -1341,7 +1347,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
> vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> {
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> - struct ttm_operation_ctx ctx = { false, false };
> + struct ttm_operation_ctx ctx = { .interruptible = false,
> + .no_wait_gpu = false,
> + .account_op = true };
> struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> int r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 375448627f7b..9a4c506cfb76 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -55,6 +55,7 @@ struct amdgpu_bo_param {
> enum ttm_bo_type type;
> bool no_wait_gpu;
> struct dma_resv *resv;
> + struct mem_cgroup *memcg;
> void (*destroy)(struct ttm_buffer_object *bo);
> /* xcp partition number plus 1, 0 means any partition */
> int8_t xcp_id_plus1;
More information about the dri-devel
mailing list