[PATCH] drm/amdkfd: Track GPU memory utilization per process

Felix Kuehling felix.kuehling at amd.com
Tue Apr 21 23:43:58 UTC 2020


See comments inline.

Am 2020-04-19 um 9:58 p.m. schrieb Mukul Joshi:
> Track GPU memory usage on a per process basis and report it through
> sysfs.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 ++++++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 51 ++++++++++++++++++++++--
>  3 files changed, 66 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f8fa03a12add..91d4f45730ae 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -39,6 +39,7 @@
>  #include "kfd_device_queue_manager.h"
>  #include "kfd_dbgmgr.h"
>  #include "amdgpu_amdkfd.h"
> +#include "amdgpu_object.h"
>  
>  static long kfd_ioctl(struct file *, unsigned int, unsigned long);
>  static int kfd_open(struct inode *, struct file *);
> @@ -1322,6 +1323,9 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>  		goto err_free;
>  	}
>  
> +	/* Update the VRAM usage count */
> +	pdd->vram_usage += args->size;
> +
>  	mutex_unlock(&p->mutex);
>  
>  	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
> @@ -1351,6 +1355,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
>  	void *mem;
>  	struct kfd_dev *dev;
>  	int ret;
> +	uint64_t size = 0;
>  
>  	dev = kfd_device_by_id(GET_GPU_ID(args->handle));
>  	if (!dev)
> @@ -1372,6 +1377,11 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
>  		goto err_unlock;
>  	}
>  
> +	if (((struct kgd_mem *)mem)->bo)
> +		size = ((struct kgd_mem *)mem)->bo->tbo.mem.size;
> +	else
> +		pr_debug("BO is NULL\n");

Like Harish said, you'll need to find out whether the BO is in VRAM by
checking bo->preferred_domain == AMDGPU_GEM_DOMAIN_VRAM. Also, it would
be cleaner to implement the AMDGPU BO-specific details of this in
amdgpu_amdkfd_gpuvm_free_memory_of_gpu. I had suggested returning the
freed VRAM size as an output parameter of that function to KFD.

I realized that there is another problem we need to account for.
kfd_ioctl_free_memory_of_gpu is also used for unreferencing imported
memory (imported graphics buffers and IPC buffers). We don't count their
VRAM usage during import, so we shouldn't count it during free either.
For that we'd probably need to add a flag in struct kgd_mem to indicate
that it's imported, so we don't count its VRAM usage during free.

Alternatively, we could to count the VRAM usage during import. That way,
some memory might be counted multiple times (in each importing process).
But I think that's not unreasonable.

Regards,
  Felix


> +
>  	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
>  						(struct kgd_mem *)mem);
>  
> @@ -1382,6 +1392,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
>  		kfd_process_device_remove_obj_handle(
>  			pdd, GET_IDR_HANDLE(args->handle));
>  
> +	pdd->vram_usage -= size;
> +
>  err_unlock:
>  	mutex_unlock(&p->mutex);
>  	return ret;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 43b888b311c7..e7918fc3cee5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -616,6 +616,8 @@ enum kfd_pdd_bound {
>  	PDD_BOUND_SUSPENDED,
>  };
>  
> +#define MAX_VRAM_FILENAME_LEN 11
> +
>  /* Data that is per-process-per device. */
>  struct kfd_process_device {
>  	/*
> @@ -658,6 +660,11 @@ struct kfd_process_device {
>  
>  	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
>  	enum kfd_pdd_bound bound;
> +
> +	/* VRAM usage */
> +	uint64_t vram_usage;
> +	struct attribute attr_vram;
> +	char vram_filename[MAX_VRAM_FILENAME_LEN];
>  };
>  
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index fe0cd49d4ea7..c7f1e5d89bd9 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs;
>  static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
>  			       char *buffer)
>  {
> -	int val = 0;
> -
>  	if (strcmp(attr->name, "pasid") == 0) {
>  		struct kfd_process *p = container_of(attr, struct kfd_process,
>  						     attr_pasid);
> -		val = p->pasid;
> +
> +		return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
> +	} else if (strncmp(attr->name, "vram_", 5) == 0) {
> +		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
> +							      attr_vram);
> +		if (pdd)
> +			return snprintf(buffer, PAGE_SIZE, "%llu\n", pdd->vram_usage);
>  	} else {
>  		pr_err("Invalid attribute");
>  		return -EINVAL;
>  	}
>  
> -	return snprintf(buffer, PAGE_SIZE, "%d\n", val);
> +	return 0;
>  }
>  
>  static void kfd_procfs_kobj_release(struct kobject *kobj)
> @@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q)
>  	return 0;
>  }
>  
> +int kfd_procfs_add_vram_usage(struct kfd_process *p)
> +{
> +	int ret = 0;
> +	struct kfd_process_device *pdd;
> +
> +	if (!p)
> +		return -EINVAL;
> +
> +	if (!p->kobj)
> +		return -EFAULT;
> +
> +	/* Create proc/<pid>/vram_<gpuid> file for each GPU */
> +	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
> +		snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
> +			 pdd->dev->id);
> +		pdd->attr_vram.name = pdd->vram_filename;
> +		pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
> +		sysfs_attr_init(&pdd->attr_vram);
> +		ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
> +		if (ret)
> +			pr_warn("Creating vram usage for gpu id %d failed",
> +				(int)pdd->dev->id);
> +	}
> +
> +	return ret;
> +}
> +
> +
>  void kfd_procfs_del_queue(struct queue *q)
>  {
>  	if (!q)
> @@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
>  							process->kobj);
>  		if (!process->kobj_queues)
>  			pr_warn("Creating KFD proc/queues folder failed");
> +
> +		ret = kfd_procfs_add_vram_usage(process);
> +		if (ret)
> +			pr_warn("Creating vram usage file for pid %d failed",
> +				(int)process->lead_thread->pid);
>  	}
>  out:
>  	if (!IS_ERR(process))
> @@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct *work)
>  {
>  	struct kfd_process *p = container_of(work, struct kfd_process,
>  					     release_work);
> +	struct kfd_process_device *pdd;
>  
>  	/* Remove the procfs files */
>  	if (p->kobj) {
> @@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct *work)
>  		kobject_del(p->kobj_queues);
>  		kobject_put(p->kobj_queues);
>  		p->kobj_queues = NULL;
> +
> +		list_for_each_entry(pdd, &p->per_device_data, per_device_list)
> +			sysfs_remove_file(p->kobj, &pdd->attr_vram);
> +
>  		kobject_del(p->kobj);
>  		kobject_put(p->kobj);
>  		p->kobj = NULL;
> @@ -862,6 +904,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>  	pdd->bound = PDD_UNBOUND;
>  	pdd->already_dequeued = false;
>  	pdd->runtime_inuse = false;
> +	pdd->vram_usage = 0;
>  	list_add(&pdd->per_device_list, &p->per_device_data);
>  
>  	/* Init idr used for memory handle translation */


More information about the amd-gfx mailing list