[PATCH 4/5] amdgpu: add support for memcg integration

Waiman Long llong at redhat.com
Fri May 2 14:01:33 UTC 2025


On 5/1/25 11:36 PM, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This adds the memcg object for any user allocated object,
> and adds account_op to necessary paths which might populate
> a tt object.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |  7 ++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 14 +++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 +
>   4 files changed, 20 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 82df06a72ee0..1a275224b4a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -787,6 +787,7 @@ static int amdgpu_cs_bo_validate(void *param, struct amdgpu_bo *bo)
>   	struct ttm_operation_ctx ctx = {
>   		.interruptible = true,
>   		.no_wait_gpu = false,
> +		.account_op = true,
>   		.resv = bo->tbo.base.resv
>   	};
>   	uint32_t domain;
> @@ -839,7 +840,11 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   				union drm_amdgpu_cs *cs)
>   {
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> -	struct ttm_operation_ctx ctx = { true, false };
> +	struct ttm_operation_ctx ctx = {
> +		.interruptible = true,
> +		.no_wait_gpu = false,
> +		.account_op = true,
> +	};
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   	struct amdgpu_bo_list_entry *e;
>   	struct drm_gem_object *obj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 69429df09477..bdad9a862ed3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -89,6 +89,7 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
>   	struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);
>   
>   	amdgpu_hmm_unregister(aobj);
> +	mem_cgroup_put(aobj->tbo.memcg);
>   	ttm_bo_put(&aobj->tbo);
>   }
>   
> @@ -116,6 +117,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
>   	bp.domain = initial_domain;
>   	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
>   	bp.xcp_id_plus1 = xcp_id_plus1;
> +	bp.memcg = get_mem_cgroup_from_mm(current->mm);
>   
>   	r = amdgpu_bo_create_user(adev, &bp, &ubo);
>   	if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 0b9987781f76..777cf05ebac8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -632,6 +632,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>   	struct ttm_operation_ctx ctx = {
>   		.interruptible = (bp->type != ttm_bo_type_kernel),
>   		.no_wait_gpu = bp->no_wait_gpu,
> +		.account_op = true,
>   		/* We opt to avoid OOM on system pages allocations */
>   		.gfp_retry_mayfail = true,
>   		.allow_res_evict = bp->type != ttm_bo_type_kernel,
> @@ -657,16 +658,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>   		size = ALIGN(size, PAGE_SIZE);
>   	}
>   
> -	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
> +	if (!amdgpu_bo_validate_size(adev, size, bp->domain)) {
> +		mem_cgroup_put(bp->memcg);
You should clear bp->memcg after mem_cgroup_put() to avoid stalled 
reference as memcg can go away after that.
>   		return -ENOMEM;
> +	}
>   
>   	BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo));
>   
>   	*bo_ptr = NULL;
>   	bo = kvzalloc(bp->bo_ptr_size, GFP_KERNEL);
> -	if (bo == NULL)
> +	if (bo == NULL) {
> +		mem_cgroup_put(bp->memcg);

Ditto.

Cheers,
Longman

>   		return -ENOMEM;
> +	}
>   	drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
> +	bo->tbo.memcg = bp->memcg;
>   	bo->tbo.base.funcs = &amdgpu_gem_object_funcs;
>   	bo->vm_bo = NULL;
>   	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
> @@ -1341,7 +1347,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>   vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   {
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> -	struct ttm_operation_ctx ctx = { false, false };
> +	struct ttm_operation_ctx ctx = { .interruptible = false,
> +					 .no_wait_gpu = false,
> +					 .account_op = true };
>   	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>   	int r;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 375448627f7b..9a4c506cfb76 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -55,6 +55,7 @@ struct amdgpu_bo_param {
>   	enum ttm_bo_type		type;
>   	bool				no_wait_gpu;
>   	struct dma_resv			*resv;
> +	struct mem_cgroup               *memcg;
>   	void				(*destroy)(struct ttm_buffer_object *bo);
>   	/* xcp partition number plus 1, 0 means any partition */
>   	int8_t				xcp_id_plus1;



More information about the dri-devel mailing list