[RFC PATCH v3 07/11] drm, cgroup: Add TTM buffer allocation stats

Kenny Ho Kenny.Ho at amd.com
Wed Jun 26 15:05:18 UTC 2019


The drm resource being measured is the TTM (Translation Table Manager)
buffers.  TTM manages different types of memory that a GPU might access.
These memory types include dedicated Video RAM (VRAM) and host/system
memory accessible through IOMMU (GART/GTT).  TTM is currently used by
multiple drm drivers (amd, ast, bochs, cirrus, hisilicon, maga200,
nouveau, qxl, virtio, vmwgfx.)

drm.memory.stats
        A read-only nested-keyed file which exists on all cgroups.
        Each entry is keyed by the drm device's major:minor.  The
        following nested keys are defined.

          ======         =============================================
          system         Host/system memory
          tt             Host memory used by the drm device (GTT/GART)
          vram           Video RAM used by the drm device
          priv           Other drm device, vendor specific memory
          ======         =============================================

        Reading returns the following::

        226:0 system=0 tt=0 vram=0 priv=0
        226:1 system=0 tt=9035776 vram=17768448 priv=16809984
        226:2 system=0 tt=9035776 vram=17768448 priv=16809984

drm.memory.evict.stats
        A read-only flat-keyed file which exists on all cgroups.  Each
        entry is keyed by the drm device's major:minor.

        Total number of evictions.

Change-Id: Ice2c4cc845051229549bebeb6aa2d7d6153bdf6a
Signed-off-by: Kenny Ho <Kenny.Ho at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |   3 +-
 drivers/gpu/drm/ttm/ttm_bo.c            |  30 +++++++
 drivers/gpu/drm/ttm/ttm_bo_util.c       |   4 +
 include/drm/drm_cgroup.h                |  19 ++++
 include/drm/ttm/ttm_bo_api.h            |   2 +
 include/drm/ttm/ttm_bo_driver.h         |   8 ++
 include/linux/cgroup_drm.h              |   4 +
 kernel/cgroup/drm.c                     | 113 ++++++++++++++++++++++++
 8 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e9ecc3953673..a8dfc78ed45f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1678,8 +1678,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	mutex_init(&adev->mman.gtt_window_lock);
 
 	/* No others user of address space so set it to 0 */
-	r = ttm_bo_device_init(&adev->mman.bdev,
+	r = ttm_bo_device_init_tmp(&adev->mman.bdev,
 			       &amdgpu_bo_driver,
+			       adev->ddev,
 			       adev->ddev->anon_inode->i_mapping,
 			       adev->need_dma32);
 	if (r) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2845fceb2fbd..e9f70547f0ad 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -34,6 +34,7 @@
 #include <drm/ttm/ttm_module.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
+#include <drm/drm_cgroup.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
@@ -42,6 +43,7 @@
 #include <linux/module.h>
 #include <linux/atomic.h>
 #include <linux/reservation.h>
+#include <linux/cgroup_drm.h>
 
 static void ttm_bo_global_kobj_release(struct kobject *kobj);
 
@@ -151,6 +153,10 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	struct ttm_bo_device *bdev = bo->bdev;
 	size_t acc_size = bo->acc_size;
 
+	if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for all
+		drmcgrp_unchg_mem(bo);
+	put_drmcgrp(bo->drmcgrp);
+
 	BUG_ON(kref_read(&bo->list_kref));
 	BUG_ON(kref_read(&bo->kref));
 	BUG_ON(atomic_read(&bo->cpu_writers));
@@ -353,6 +359,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 		if (bo->mem.mem_type == TTM_PL_SYSTEM) {
 			if (bdev->driver->move_notify)
 				bdev->driver->move_notify(bo, evict, mem);
+			if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for all
+				drmcgrp_mem_track_move(bo, evict, mem);
 			bo->mem = *mem;
 			mem->mm_node = NULL;
 			goto moved;
@@ -361,6 +369,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	if (bdev->driver->move_notify)
 		bdev->driver->move_notify(bo, evict, mem);
+	if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for all
+		drmcgrp_mem_track_move(bo, evict, mem);
 
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
@@ -374,6 +384,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 		if (bdev->driver->move_notify) {
 			swap(*mem, bo->mem);
 			bdev->driver->move_notify(bo, false, mem);
+			if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for all
+				drmcgrp_mem_track_move(bo, evict, mem);
 			swap(*mem, bo->mem);
 		}
 
@@ -1275,6 +1287,10 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev,
 		WARN_ON(!locked);
 	}
 
+	bo->drmcgrp = get_drmcgrp(current);
+	if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for all
+		drmcgrp_chg_mem(bo);
+
 	if (likely(!ret))
 		ret = ttm_bo_validate(bo, placement, ctx);
 
@@ -1666,6 +1682,20 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 }
 EXPORT_SYMBOL(ttm_bo_device_init);
 
+/* TODO merge with official function when implementation finalized*/
+int ttm_bo_device_init_tmp(struct ttm_bo_device *bdev,
+		struct ttm_bo_driver *driver,
+		struct drm_device *ddev,
+		struct address_space *mapping,
+		bool need_dma32)
+{
+	int ret = ttm_bo_device_init(bdev, driver, mapping, need_dma32);
+
+	bdev->ddev = ddev;
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_device_init_tmp);
+
 /*
  * buffer object vm functions.
  */
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 895d77d799e4..4ed7847c21f4 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -32,6 +32,7 @@
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/drm_vma_manager.h>
+#include <drm/drm_cgroup.h>
 #include <linux/io.h>
 #include <linux/highmem.h>
 #include <linux/wait.h>
@@ -522,6 +523,9 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	ret = reservation_object_trylock(fbo->base.resv);
 	WARN_ON(!ret);
 
+	if (bo->bdev->ddev != NULL) // TODO: remove after ddev initiazlied for all
+		drmcgrp_chg_mem(bo);
+
 	*new_obj = &fbo->base;
 	return 0;
 }
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 8711b7c5f7bf..48ab5450cf17 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -5,6 +5,7 @@
 #define __DRM_CGROUP_H__
 
 #include <linux/cgroup_drm.h>
+#include <drm/ttm/ttm_bo_api.h>
 
 #ifdef CONFIG_CGROUP_DRM
 
@@ -18,6 +19,11 @@ void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
 		size_t size);
 bool drmcgrp_bo_can_allocate(struct task_struct *task, struct drm_device *dev,
 		size_t size);
+void drmcgrp_chg_mem(struct ttm_buffer_object *tbo);
+void drmcgrp_unchg_mem(struct ttm_buffer_object *tbo);
+void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
+		struct ttm_mem_reg *new_mem);
+
 #else
 static inline int drmcgrp_register_device(struct drm_device *device)
 {
@@ -50,5 +56,18 @@ static inline bool drmcgrp_bo_can_allocate(struct task_struct *task,
 {
 	return true;
 }
+
+static inline void drmcgrp_chg_mem(struct ttm_buffer_object *tbo)
+{
+}
+
+static inline void drmcgrp_unchg_mem(struct ttm_buffer_object *tbo)
+{
+}
+
+static inline void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo,
+		bool evict, struct ttm_mem_reg *new_mem)
+{
+}
 #endif /* CONFIG_CGROUP_DRM */
 #endif /* __DRM_CGROUP_H__ */
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 49d9cdfc58f2..ae1bb6daec81 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -128,6 +128,7 @@ struct ttm_tt;
  * struct ttm_buffer_object
  *
  * @bdev: Pointer to the buffer object device structure.
+ * @drmcgrp: DRM cgroup this object belongs to.
  * @type: The bo type.
  * @destroy: Destruction function. If NULL, kfree is used.
  * @num_pages: Actual number of pages.
@@ -174,6 +175,7 @@ struct ttm_buffer_object {
 	 */
 
 	struct ttm_bo_device *bdev;
+	struct drmcgrp *drmcgrp;
 	enum ttm_bo_type type;
 	void (*destroy) (struct ttm_buffer_object *);
 	unsigned long num_pages;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c008346c2401..4cbcb41e5aa9 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -30,6 +30,7 @@
 #ifndef _TTM_BO_DRIVER_H_
 #define _TTM_BO_DRIVER_H_
 
+#include <drm/drm_device.h>
 #include <drm/drm_mm.h>
 #include <drm/drm_vma_manager.h>
 #include <linux/workqueue.h>
@@ -442,6 +443,7 @@ extern struct ttm_bo_global {
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @vma_manager: Address space manager
+ * @ddev: Pointer to struct drm_device that this ttm_bo_device belongs to
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -460,6 +462,7 @@ struct ttm_bo_device {
 	struct ttm_bo_global *glob;
 	struct ttm_bo_driver *driver;
 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+	struct drm_device *ddev;
 
 	/*
 	 * Protected by internal locks.
@@ -598,6 +601,11 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		       struct address_space *mapping,
 		       bool need_dma32);
 
+int ttm_bo_device_init_tmp(struct ttm_bo_device *bdev,
+		       struct ttm_bo_driver *driver,
+		       struct drm_device *ddev,
+		       struct address_space *mapping,
+		       bool need_dma32);
 /**
  * ttm_bo_unmap_virtual
  *
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index e4400b21ab8e..141bea06f74c 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -9,6 +9,7 @@
 #include <linux/mutex.h>
 #include <linux/cgroup.h>
 #include <drm/drm_file.h>
+#include <drm/ttm/ttm_placement.h>
 
 /* limit defined per the way drm_minor_alloc operates */
 #define MAX_DRM_DEV (64 * DRM_MINOR_RENDER)
@@ -22,6 +23,9 @@ struct drmcgrp_device_resource {
 	size_t			bo_limits_peak_allocated;
 
 	s64			bo_stats_count_allocated;
+
+	s64			mem_stats[TTM_PL_PRIV+1];
+	s64			mem_stats_evict;
 };
 
 struct drmcgrp {
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 9144f93b851f..5aee42a628c1 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -8,6 +8,8 @@
 #include <linux/mutex.h>
 #include <linux/cgroup_drm.h>
 #include <linux/kernel.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
 #include <drm/drm_device.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_cgroup.h>
@@ -34,6 +36,8 @@ enum drmcgrp_res_type {
 	DRMCGRP_TYPE_BO_TOTAL,
 	DRMCGRP_TYPE_BO_PEAK,
 	DRMCGRP_TYPE_BO_COUNT,
+	DRMCGRP_TYPE_MEM,
+	DRMCGRP_TYPE_MEM_EVICT,
 };
 
 enum drmcgrp_file_type {
@@ -42,6 +46,13 @@ enum drmcgrp_file_type {
 	DRMCGRP_FTYPE_DEFAULT,
 };
 
+static char const *ttm_placement_names[] = {
+	[TTM_PL_SYSTEM] = "system",
+	[TTM_PL_TT]     = "tt",
+	[TTM_PL_VRAM]   = "vram",
+	[TTM_PL_PRIV]   = "priv",
+};
+
 /* indexed by drm_minor for access speed */
 static struct drmcgrp_device	*known_drmcgrp_devs[MAX_DRM_DEV];
 
@@ -134,6 +145,7 @@ drmcgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 static inline void drmcgrp_print_stats(struct drmcgrp_device_resource *ddr,
 		struct seq_file *sf, enum drmcgrp_res_type type)
 {
+	int i;
 	if (ddr == NULL) {
 		seq_puts(sf, "\n");
 		return;
@@ -149,6 +161,16 @@ static inline void drmcgrp_print_stats(struct drmcgrp_device_resource *ddr,
 	case DRMCGRP_TYPE_BO_COUNT:
 		seq_printf(sf, "%lld\n", ddr->bo_stats_count_allocated);
 		break;
+	case DRMCGRP_TYPE_MEM:
+		for (i = 0; i <= TTM_PL_PRIV; i++) {
+			seq_printf(sf, "%s=%lld ", ttm_placement_names[i],
+					ddr->mem_stats[i]);
+		}
+		seq_puts(sf, "\n");
+		break;
+	case DRMCGRP_TYPE_MEM_EVICT:
+		seq_printf(sf, "%lld\n", ddr->mem_stats_evict);
+		break;
 	default:
 		seq_puts(sf, "\n");
 		break;
@@ -406,6 +428,18 @@ struct cftype files[] = {
 		.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_BO_COUNT,
 						DRMCGRP_FTYPE_STATS),
 	},
+	{
+		.name = "memory.stats",
+		.seq_show = drmcgrp_bo_show,
+		.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_MEM,
+						DRMCGRP_FTYPE_STATS),
+	},
+	{
+		.name = "memory.evict.stats",
+		.seq_show = drmcgrp_bo_show,
+		.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_MEM_EVICT,
+						DRMCGRP_FTYPE_STATS),
+	},
 	{ }	/* terminate */
 };
 
@@ -555,3 +589,82 @@ void drmcgrp_unchg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
 	mutex_unlock(&known_drmcgrp_devs[devIdx]->mutex);
 }
 EXPORT_SYMBOL(drmcgrp_unchg_bo_alloc);
+
+void drmcgrp_chg_mem(struct ttm_buffer_object *tbo)
+{
+	struct drm_device *dev = tbo->bdev->ddev;
+	struct drmcgrp *drmcgrp = tbo->drmcgrp;
+	int devIdx = dev->primary->index;
+	s64 size = (s64)(tbo->mem.size);
+	int mem_type = tbo->mem.mem_type;
+	struct drmcgrp_device_resource *ddr;
+
+	if (drmcgrp == NULL || known_drmcgrp_devs[devIdx] == NULL)
+		return;
+
+	mem_type = mem_type > TTM_PL_PRIV ? TTM_PL_PRIV : mem_type;
+
+	mutex_lock(&known_drmcgrp_devs[devIdx]->mutex);
+	for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp)) {
+		ddr = drmcgrp->dev_resources[devIdx];
+		ddr->mem_stats[mem_type] += size;
+	}
+	mutex_unlock(&known_drmcgrp_devs[devIdx]->mutex);
+}
+EXPORT_SYMBOL(drmcgrp_chg_mem);
+
+void drmcgrp_unchg_mem(struct ttm_buffer_object *tbo)
+{
+	struct drm_device *dev = tbo->bdev->ddev;
+	struct drmcgrp *drmcgrp = tbo->drmcgrp;
+	int devIdx = dev->primary->index;
+	s64 size = (s64)(tbo->mem.size);
+	int mem_type = tbo->mem.mem_type;
+	struct drmcgrp_device_resource *ddr;
+
+	if (drmcgrp == NULL || known_drmcgrp_devs[devIdx] == NULL)
+		return;
+
+	mem_type = mem_type > TTM_PL_PRIV ? TTM_PL_PRIV : mem_type;
+
+	mutex_lock(&known_drmcgrp_devs[devIdx]->mutex);
+	for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp)) {
+		ddr = drmcgrp->dev_resources[devIdx];
+		ddr->mem_stats[mem_type] -= size;
+	}
+	mutex_unlock(&known_drmcgrp_devs[devIdx]->mutex);
+}
+EXPORT_SYMBOL(drmcgrp_unchg_mem);
+
+void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
+		struct ttm_mem_reg *new_mem)
+{
+	struct drm_device *dev = old_bo->bdev->ddev;
+	struct drmcgrp *drmcgrp = old_bo->drmcgrp;
+	s64 move_in_bytes = (s64)(old_bo->mem.size);
+	int devIdx = dev->primary->index;
+	int old_mem_type = old_bo->mem.mem_type;
+	int new_mem_type = new_mem->mem_type;
+	struct drmcgrp_device_resource *ddr;
+	struct drmcgrp_device *known_dev;
+
+	known_dev = known_drmcgrp_devs[devIdx];
+
+	if (drmcgrp == NULL || known_dev == NULL)
+		return;
+
+	old_mem_type = old_mem_type > TTM_PL_PRIV ? TTM_PL_PRIV : old_mem_type;
+	new_mem_type = new_mem_type > TTM_PL_PRIV ? TTM_PL_PRIV : new_mem_type;
+
+	mutex_lock(&known_dev->mutex);
+	for ( ; drmcgrp != NULL; drmcgrp = parent_drmcgrp(drmcgrp)) {
+		ddr = drmcgrp->dev_resources[devIdx];
+		ddr->mem_stats[old_mem_type] -= move_in_bytes;
+		ddr->mem_stats[new_mem_type] += move_in_bytes;
+
+		if (evict)
+			ddr->mem_stats_evict++;
+	}
+	mutex_unlock(&known_dev->mutex);
+}
+EXPORT_SYMBOL(drmcgrp_mem_track_move);
-- 
2.21.0



More information about the amd-gfx mailing list