[PATCH] drm/amdkfd: Track SDMA utilization per process

Felix Kuehling felix.kuehling at amd.com
Thu May 21 20:27:46 UTC 2020


Hi Mukul,

This looks pretty good. See some suggestions inline.

Am 2020-05-14 um 4:33 p.m. schrieb Mukul Joshi:
> Track SDMA usage on a per process basis and report it through sysfs.
> The value in the sysfs file indicates the amount of time SDMA has
> been in-use by this process since the creation of the process.
> This value is in microsecond granularity.
>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> ---
>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  57 ++++++++
>  .../drm/amd/amdkfd/kfd_device_queue_manager.h |   2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  16 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 130 ++++++++++++++++--
>  4 files changed, 193 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index e9c4867abeff..49f72d0f7be7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager *dqm,
>  		dqm->active_cp_queue_count--;
>  }
>  
> +int read_sdma_queue_counter(struct queue *q, uint64_t *val)
> +{
> +	int ret;
> +	uint64_t tmp = 0;
> +
> +	if (!q || !val)
> +		return -EINVAL;
> +	/*
> +	 * SDMA activity counter is stored at queue's RPTR + 0x8 location.
> +	 */
> +	if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
> +					sizeof(uint64_t)), sizeof(uint64_t))) {
> +		pr_err("Can't access sdma queue activity counter\n");
> +		return -EFAULT;
> +	}
> +
> +	ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
> +						    sizeof(uint64_t)));
> +	if (!ret) {
> +		*val = tmp;
> +	}
> +
> +	return ret;
> +}
> +
> +static int update_sdma_queue_past_activity_stats(struct kfd_process_device *pdd,
> +						 struct queue *q)
> +{
> +	int ret;
> +	uint64_t val = 0;
> +
> +	if (!pdd)
> +		return -ENODEV;
> +
> +	ret = read_sdma_queue_counter(q, &val);
> +	if (ret) {
> +		pr_err("Failed to read SDMA queue counter for queue: %d\n",
> +				q->properties.queue_id);
> +		return ret;
> +	}
> +
> +	WRITE_ONCE(pdd->sdma_past_activity_counter, pdd->sdma_past_activity_counter + val);
> +
> +	return ret;
> +}
> +
>  static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
>  {
>  	struct kfd_dev *dev = qpd->dqm->dev;
> @@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
>  	if (retval == -ETIME)
>  		qpd->reset_wavefronts = true;
>  
> +	/* Get the SDMA queue stats */
> +        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
> +            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
> +                update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
> +        }
> +
>  	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
>  
>  	list_del(&q->list);
> @@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
>  		}
>  	}
>  
> +	/* Get the SDMA queue stats */
> +	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
> +	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
> +		update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
> +	}
>  	/*
>  	 * Unconditionally decrement this counter, regardless of the queue's
>  	 * type
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 4afa015c69b1..894bcf877f9e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
>  	mutex_unlock(&dqm->lock_hidden);
>  }
>  
> +int read_sdma_queue_counter(struct queue *q, uint64_t *val);
> +
>  #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index f70f789c3cb3..fae139b77c0a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -633,7 +633,14 @@ enum kfd_pdd_bound {
>  	PDD_BOUND_SUSPENDED,
>  };
>  
> -#define MAX_VRAM_FILENAME_LEN 11
> +#define MAX_SYSFS_FILENAME_LEN 11
> +
> +/*
> + * SDMA counter runs at 100MHz frequency.
> + * We display SDMA activity in microsecond granularity in sysfs.
> + * As a result, the divisor is 100.
> + */
> +#define SDMA_ACTIVITY_DIVISOR  100
>  
>  /* Data that is per-process-per device. */
>  struct kfd_process_device {
> @@ -681,7 +688,12 @@ struct kfd_process_device {
>  	/* VRAM usage */
>  	uint64_t vram_usage;
>  	struct attribute attr_vram;
> -	char vram_filename[MAX_VRAM_FILENAME_LEN];
> +	char vram_filename[MAX_SYSFS_FILENAME_LEN];
> +
> +	/* SDMA activity tracking */
> +	uint64_t sdma_past_activity_counter;
> +	struct attribute attr_sdma;
> +	char sdma_filename[MAX_SYSFS_FILENAME_LEN];
>  };
>  
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index d27221ddcdeb..a20053a32949 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -25,6 +25,7 @@
>  #include <linux/sched.h>
>  #include <linux/sched/mm.h>
>  #include <linux/sched/task.h>
> +#include <linux/mmu_context.h>
>  #include <linux/slab.h>
>  #include <linux/amd-iommu.h>
>  #include <linux/notifier.h>
> @@ -76,6 +77,67 @@ struct kfd_procfs_tree {
>  
>  static struct kfd_procfs_tree procfs;
>  
> +/*
> + * Structure for SDMA activity tracking
> + */
> +struct kfd_sdma_activity_handler_workarea {
> +	struct work_struct sdma_activity_work;
> +	struct kfd_process_device *pdd;
> +	uint64_t *sdma_current_activity_counter;

Why not just make the counter itself part of the structure? You end up
just pointing this to a local variable anyway.


> +};
> +
> +static void kfd_sdma_activity_worker(struct work_struct *work)
> +{
> +	struct kfd_sdma_activity_handler_workarea *workarea;
> +	struct kfd_process_device *pdd;
> +	uint64_t val;
> +	struct mm_struct *mm;
> +	struct queue *q;
> +	struct qcm_process_device *qpd;
> +	struct device_queue_manager *dqm;
> +	int ret = 0;
> +
> +	workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
> +				sdma_activity_work);
> +	if (!workarea)
> +		return;
> +
> +	pdd = workarea->pdd;
> +	dqm = pdd->dev->dqm;
> +	qpd = &pdd->qpd;
> +
> +	if (!pdd || !dqm || !qpd)
> +		return;
> +
> +	mm = get_task_mm(pdd->process->lead_thread);
> +	if (!mm) {
> +		pr_err("Failed to get task mm\n");

This could conceivably happen during process termination and would not
be considered an error.


> +		return;
> +	}
> +
> +	use_mm(mm);
> +
> +	dqm_lock(dqm);
> +
> +	list_for_each_entry(q, &qpd->queues_list, list) {
> +		if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
> +		    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
> +			val = 0;
> +			ret = read_sdma_queue_counter(q, &val);
> +			if (ret)
> +				pr_debug("Failed to read SDMA queue active "
> +					 "counter for queue id: %d",
> +					 q->properties.queue_id);
> +			else
> +				*(workarea->sdma_current_activity_counter) += val;
> +		}
> +	}
> +
> +	dqm_unlock(dqm);
> +	unuse_mm(mm);
> +	mmput(mm);
> +}
> +
>  static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
>  			       char *buffer)
>  {
> @@ -89,6 +151,26 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
>  							      attr_vram);
>  		if (pdd)
>  			return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
> +	} else if (strncmp(attr->name, "sdma_", 5) == 0) {
> +		struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
> +							      attr_sdma);
> +		if (pdd) {

This check of the pdd pointer makes no sense. I see we do the same thing
in the for the vram attribute, which should be fixed as well. pdd is a
fixed offset from the addr pointer that's calculated by the container_of
macro. The only way that pdd could be invalid is, if attr was invalid.
Even if attr was NULL, pdd would not be NULL, so this check is
completely pointless.


> +			struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
> +			uint64_t val = 0;
> +
> +			INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
> +					kfd_sdma_activity_worker);
> +
> +			sdma_activity_work_handler.pdd = pdd;
> +			sdma_activity_work_handler.sdma_current_activity_counter = &val;
> +
> +			schedule_work(&sdma_activity_work_handler.sdma_activity_work);
> +
> +			flush_work(&sdma_activity_work_handler.sdma_activity_work);
> +
> +			return snprintf(buffer, PAGE_SIZE, "%llu\n",
> +			        (READ_ONCE(pdd->sdma_past_activity_counter) + val)/SDMA_ACTIVITY_DIVISOR);

If you added the sdma_past_activity_counter in the worker under the DQM
lock, you wouldn't need READ_ONCE/WRITE_ONCE.

Regards,
  Felix


> +		}
>  	} else {
>  		pr_err("Invalid attribute");
>  		return -EINVAL;
> @@ -210,7 +292,24 @@ int kfd_procfs_add_queue(struct queue *q)
>  	return 0;
>  }
>  
> -int kfd_procfs_add_vram_usage(struct kfd_process *p)
> +static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
> +				 char *name)
> +{
> +	int ret = 0;
> +
> +	if (!p || !attr || !name)
> +		return -EINVAL;
> +
> +	attr->name = name;
> +	attr->mode = KFD_SYSFS_FILE_MODE;
> +	sysfs_attr_init(attr);
> +
> +	ret = sysfs_create_file(p->kobj, attr);
> +
> +	return ret;
> +}
> +
> +int kfd_procfs_add_sysfs_files(struct kfd_process *p)
>  {
>  	int ret = 0;
>  	struct kfd_process_device *pdd;
> @@ -221,17 +320,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p)
>  	if (!p->kobj)
>  		return -EFAULT;
>  
> -	/* Create proc/<pid>/vram_<gpuid> file for each GPU */
> +	/*
> +	 * Create sysfs files for each GPU:
> +	 * - proc/<pid>/vram_<gpuid>
> +	 * - proc/<pid>/sdma_<gpuid>
> +	 */
>  	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
> -		snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
> +		snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
>  			 pdd->dev->id);
> -		pdd->attr_vram.name = pdd->vram_filename;
> -		pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
> -		sysfs_attr_init(&pdd->attr_vram);
> -		ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
> +		ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
>  		if (ret)
>  			pr_warn("Creating vram usage for gpu id %d failed",
>  				(int)pdd->dev->id);
> +
> +		snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
> +			 pdd->dev->id);
> +		ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
> +		if (ret)
> +			pr_warn("Creating sdma usage for gpu id %d failed",
> +				(int)pdd->dev->id);
>  	}
>  
>  	return ret;
> @@ -444,9 +551,9 @@ struct kfd_process *kfd_create_process(struct file *filep)
>  		if (!process->kobj_queues)
>  			pr_warn("Creating KFD proc/queues folder failed");
>  
> -		ret = kfd_procfs_add_vram_usage(process);
> +		ret = kfd_procfs_add_sysfs_files(process);
>  		if (ret)
> -			pr_warn("Creating vram usage file for pid %d failed",
> +			pr_warn("Creating sysfs usage file for pid %d failed",
>  				(int)process->lead_thread->pid);
>  	}
>  out:
> @@ -597,8 +704,10 @@ static void kfd_process_wq_release(struct work_struct *work)
>  		kobject_put(p->kobj_queues);
>  		p->kobj_queues = NULL;
>  
> -		list_for_each_entry(pdd, &p->per_device_data, per_device_list)
> +		list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
>  			sysfs_remove_file(p->kobj, &pdd->attr_vram);
> +			sysfs_remove_file(p->kobj, &pdd->attr_sdma);
> +		}
>  
>  		kobject_del(p->kobj);
>  		kobject_put(p->kobj);
> @@ -906,6 +1015,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>  	pdd->already_dequeued = false;
>  	pdd->runtime_inuse = false;
>  	pdd->vram_usage = 0;
> +	pdd->sdma_past_activity_counter = 0;
>  	list_add(&pdd->per_device_list, &p->per_device_data);
>  
>  	/* Init idr used for memory handle translation */


More information about the amd-gfx mailing list