[PATCH v3] drm/amdkfd: Track SDMA utilization per process
Mukul Joshi
mukul.joshi at amd.com
Tue May 26 17:26:59 UTC 2020
Track SDMA usage on a per process basis and report it through sysfs.
The value in the sysfs file indicates the amount of time SDMA has
been in-use by this process since the creation of the process.
This value is in microsecond granularity.
v2:
- Remove unnecessary checks for pdd is kfd_procfs_show().
- Make counter part of the kfd_sdma_activity_handler_workarea
structure.
v3:
- Remove READ_ONCE/WRITE_ONCE while updating acitivty
counter.
- Add updation of past acitivt counter under dqm_lock.
Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 57 ++++++++
.../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 137 ++++++++++++++++--
4 files changed, 198 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e9c4867abeff..6293017bd5bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--;
}
+int read_sdma_queue_counter(struct queue *q, uint64_t *val)
+{
+ int ret;
+ uint64_t tmp = 0;
+
+ if (!q || !val)
+ return -EINVAL;
+ /*
+ * SDMA activity counter is stored at queue's RPTR + 0x8 location.
+ */
+ if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
+ sizeof(uint64_t)), sizeof(uint64_t))) {
+ pr_err("Can't access sdma queue activity counter\n");
+ return -EFAULT;
+ }
+
+ ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
+ sizeof(uint64_t)));
+ if (!ret) {
+ *val = tmp;
+ }
+
+ return ret;
+}
+
+static int update_sdma_queue_past_activity_stats(struct kfd_process_device *pdd,
+ struct queue *q)
+{
+ int ret;
+ uint64_t val = 0;
+
+ if (!pdd)
+ return -ENODEV;
+
+ ret = read_sdma_queue_counter(q, &val);
+ if (ret) {
+ pr_err("Failed to read SDMA queue counter for queue: %d\n",
+ q->properties.queue_id);
+ return ret;
+ }
+
+ pdd->sdma_past_activity_counter += val;
+
+ return ret;
+}
+
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
struct kfd_dev *dev = qpd->dqm->dev;
@@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
if (retval == -ETIME)
qpd->reset_wavefronts = true;
+ /* Get the SDMA queue stats */
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+ }
+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
list_del(&q->list);
@@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
}
}
+ /* Get the SDMA queue stats */
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
+ }
/*
* Unconditionally decrement this counter, regardless of the queue's
* type
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 4afa015c69b1..894bcf877f9e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
mutex_unlock(&dqm->lock_hidden);
}
+int read_sdma_queue_counter(struct queue *q, uint64_t *val);
+
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f70f789c3cb3..fae139b77c0a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -633,7 +633,14 @@ enum kfd_pdd_bound {
PDD_BOUND_SUSPENDED,
};
-#define MAX_VRAM_FILENAME_LEN 11
+#define MAX_SYSFS_FILENAME_LEN 11
+
+/*
+ * SDMA counter runs at 100MHz frequency.
+ * We display SDMA activity in microsecond granularity in sysfs.
+ * As a result, the divisor is 100.
+ */
+#define SDMA_ACTIVITY_DIVISOR 100
/* Data that is per-process-per device. */
struct kfd_process_device {
@@ -681,7 +688,12 @@ struct kfd_process_device {
/* VRAM usage */
uint64_t vram_usage;
struct attribute attr_vram;
- char vram_filename[MAX_VRAM_FILENAME_LEN];
+ char vram_filename[MAX_SYSFS_FILENAME_LEN];
+
+ /* SDMA activity tracking */
+ uint64_t sdma_past_activity_counter;
+ struct attribute attr_sdma;
+ char sdma_filename[MAX_SYSFS_FILENAME_LEN];
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d27221ddcdeb..db010c5da144 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -25,6 +25,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
+#include <linux/mmu_context.h>
#include <linux/slab.h>
#include <linux/amd-iommu.h>
#include <linux/notifier.h>
@@ -76,6 +77,74 @@ struct kfd_procfs_tree {
static struct kfd_procfs_tree procfs;
+/*
+ * Structure for SDMA activity tracking
+ */
+struct kfd_sdma_activity_handler_workarea {
+ struct work_struct sdma_activity_work;
+ struct kfd_process_device *pdd;
+ uint64_t sdma_activity_counter;
+};
+
+static void kfd_sdma_activity_worker(struct work_struct *work)
+{
+ struct kfd_sdma_activity_handler_workarea *workarea;
+ struct kfd_process_device *pdd;
+ uint64_t val;
+ struct mm_struct *mm;
+ struct queue *q;
+ struct qcm_process_device *qpd;
+ struct device_queue_manager *dqm;
+ int ret = 0;
+
+ workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
+ sdma_activity_work);
+ if (!workarea)
+ return;
+
+ pdd = workarea->pdd;
+ dqm = pdd->dev->dqm;
+ qpd = &pdd->qpd;
+
+ if (!pdd || !dqm || !qpd)
+ return;
+
+ mm = get_task_mm(pdd->process->lead_thread);
+ if (!mm) {
+ return;
+ }
+
+ use_mm(mm);
+
+ dqm_lock(dqm);
+
+ /*
+ * Total SDMA activity is current SDMA activity + past SDMA activity
+ */
+ workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
+
+ /*
+ * Get the current activity counters for all active SDMA queues
+ */
+ list_for_each_entry(q, &qpd->queues_list, list) {
+ if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
+ (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
+ val = 0;
+ ret = read_sdma_queue_counter(q, &val);
+ if (ret)
+ pr_debug("Failed to read SDMA queue active "
+ "counter for queue id: %d",
+ q->properties.queue_id);
+ else
+ workarea->sdma_activity_counter += val;
+ }
+ }
+
+ dqm_unlock(dqm);
+ unuse_mm(mm);
+ mmput(mm);
+}
+
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
@@ -87,8 +156,24 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
- if (pdd)
- return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
+ } else if (strncmp(attr->name, "sdma_", 5) == 0) {
+ struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
+ attr_sdma);
+ struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
+
+ INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
+ kfd_sdma_activity_worker);
+
+ sdma_activity_work_handler.pdd = pdd;
+
+ schedule_work(&sdma_activity_work_handler.sdma_activity_work);
+
+ flush_work(&sdma_activity_work_handler.sdma_activity_work);
+
+ return snprintf(buffer, PAGE_SIZE, "%llu\n",
+ (sdma_activity_work_handler.sdma_activity_counter)/
+ SDMA_ACTIVITY_DIVISOR);
} else {
pr_err("Invalid attribute");
return -EINVAL;
@@ -210,7 +295,24 @@ int kfd_procfs_add_queue(struct queue *q)
return 0;
}
-int kfd_procfs_add_vram_usage(struct kfd_process *p)
+static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
+ char *name)
+{
+ int ret = 0;
+
+ if (!p || !attr || !name)
+ return -EINVAL;
+
+ attr->name = name;
+ attr->mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(attr);
+
+ ret = sysfs_create_file(p->kobj, attr);
+
+ return ret;
+}
+
+int kfd_procfs_add_sysfs_files(struct kfd_process *p)
{
int ret = 0;
struct kfd_process_device *pdd;
@@ -221,17 +323,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p)
if (!p->kobj)
return -EFAULT;
- /* Create proc/<pid>/vram_<gpuid> file for each GPU */
+ /*
+ * Create sysfs files for each GPU:
+ * - proc/<pid>/vram_<gpuid>
+ * - proc/<pid>/sdma_<gpuid>
+ */
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
- snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
+ snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
pdd->dev->id);
- pdd->attr_vram.name = pdd->vram_filename;
- pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
- sysfs_attr_init(&pdd->attr_vram);
- ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
+ ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
if (ret)
pr_warn("Creating vram usage for gpu id %d failed",
(int)pdd->dev->id);
+
+ snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
+ pdd->dev->id);
+ ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
+ if (ret)
+ pr_warn("Creating sdma usage for gpu id %d failed",
+ (int)pdd->dev->id);
}
return ret;
@@ -444,9 +554,9 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (!process->kobj_queues)
pr_warn("Creating KFD proc/queues folder failed");
- ret = kfd_procfs_add_vram_usage(process);
+ ret = kfd_procfs_add_sysfs_files(process);
if (ret)
- pr_warn("Creating vram usage file for pid %d failed",
+ pr_warn("Creating sysfs usage file for pid %d failed",
(int)process->lead_thread->pid);
}
out:
@@ -597,8 +707,10 @@ static void kfd_process_wq_release(struct work_struct *work)
kobject_put(p->kobj_queues);
p->kobj_queues = NULL;
- list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
sysfs_remove_file(p->kobj, &pdd->attr_vram);
+ sysfs_remove_file(p->kobj, &pdd->attr_sdma);
+ }
kobject_del(p->kobj);
kobject_put(p->kobj);
@@ -906,6 +1018,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->already_dequeued = false;
pdd->runtime_inuse = false;
pdd->vram_usage = 0;
+ pdd->sdma_past_activity_counter = 0;
list_add(&pdd->per_device_list, &p->per_device_data);
/* Init idr used for memory handle translation */
--
2.17.1
More information about the amd-gfx
mailing list