[RFC PATCH v3 10/11] drm, cgroup: Add soft VRAM limit

Kenny Ho Kenny.Ho at amd.com
Wed Jun 26 15:05:21 UTC 2019


The drm resource being limited is the TTM (Translation Table Manager)
buffers.  TTM manages different types of memory that a GPU might access.
These memory types include dedicated Video RAM (VRAM) and host/system
memory accessible through IOMMU (GART/GTT).  TTM is currently used by
multiple drm drivers (amd, ast, bochs, cirrus, hisilicon, maga200,
nouveau, qxl, virtio, vmwgfx.)

TTM buffers belonging to drm cgroups under memory pressure will be
selected to be evicted first.

drm.memory.high
        A read-write nested-keyed file which exists on all cgroups.
        Each entry is keyed by the drm device's major:minor.  The
        following nested keys are defined.

          ====         =============================================
          vram         Video RAM soft limit for a drm device in byte
          ====         =============================================

        Reading returns the following::

        226:0 vram=0
        226:1 vram=17768448
        226:2 vram=17768448

drm.memory.default
        A read-only nested-keyed file which exists on the root cgroup.
        Each entry is keyed by the drm device's major:minor.  The
        following nested keys are defined.

          ====         ===============================
          vram         Video RAM default limit in byte
          ====         ===============================

        Reading returns the following::

        226:0 vram=0
        226:1 vram=17768448
        226:2 vram=17768448

Change-Id: I7988e28a453b53140b40a28c176239acbc81d491
Signed-off-by: Kenny Ho <Kenny.Ho at amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c |   7 ++
 include/drm/drm_cgroup.h     |  15 ++++
 include/linux/cgroup_drm.h   |   2 +
 kernel/cgroup/drm.c          | 145 +++++++++++++++++++++++++++++++++++
 4 files changed, 169 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f06c2b9d8a4a..79c530f4a198 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -806,12 +806,19 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_buffer_object *bo = NULL;
 	bool locked = false;
+        bool check_drmcgrp;
 	unsigned i;
 	int ret;
 
+	check_drmcgrp = drmcgrp_mem_pressure_scan(bdev, mem_type);
+
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
+			if (check_drmcgrp &&
+				!drmcgrp_mem_should_evict(bo, mem_type))
+				continue;
+
 			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
 				continue;
 
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 9b1dbd6a4eca..360c1e6c809f 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -6,6 +6,7 @@
 
 #include <linux/cgroup_drm.h>
 #include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_bo_driver.h>
 
 #ifdef CONFIG_CGROUP_DRM
 
@@ -25,6 +26,8 @@ void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
 		struct ttm_mem_reg *new_mem);
 unsigned int drmcgrp_get_mem_bw_period_in_us(struct ttm_buffer_object *tbo);
 bool drmcgrp_mem_can_move(struct ttm_buffer_object *tbo);
+bool drmcgrp_mem_pressure_scan(struct ttm_bo_device *bdev, unsigned type);
+bool drmcgrp_mem_should_evict(struct ttm_buffer_object *tbo, unsigned type);
 
 #else
 static inline int drmcgrp_register_device(struct drm_device *device)
@@ -82,5 +85,17 @@ static inline bool drmcgrp_mem_can_move(struct ttm_buffer_object *tbo)
 {
 	return true;
 }
+
+static inline bool drmcgrp_mem_pressure_scan(struct ttm_bo_device *bdev,
+		unsigned type)
+{
+	return false;
+}
+
+static inline bool drmcgrp_mem_should_evict(struct ttm_buffer_object *tbo,
+		unsigned type)
+{
+	return true;
+}
 #endif /* CONFIG_CGROUP_DRM */
 #endif /* __DRM_CGROUP_H__ */
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 94828da2104a..52ef02eaac70 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -35,6 +35,8 @@ struct drmcgrp_device_resource {
 
 	s64			mem_stats[TTM_PL_PRIV+1];
 	s64			mem_peaks[TTM_PL_PRIV+1];
+	s64			mem_highs[TTM_PL_PRIV+1];
+	bool			mem_pressure[TTM_PL_PRIV+1];
 	s64			mem_stats_evict;
 
 	s64			mem_bw_stats_last_update_us;
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index bbc6612200a4..1ce13db36ce9 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -29,6 +29,8 @@ struct drmcgrp_device {
 
 	s64			mem_bw_bytes_in_period_default;
 	s64			mem_bw_avg_bytes_per_us_default;
+
+	s64			mem_highs_default[TTM_PL_PRIV+1];
 };
 
 #define DRMCG_CTF_PRIV_SIZE 3
@@ -114,6 +116,8 @@ static inline int init_drmcgrp_single(struct drmcgrp *drmcgrp, int minor)
 
 	/* set defaults here */
 	if (known_drmcgrp_devs[minor] != NULL) {
+		int i;
+
 		ddr->bo_limits_total_allocated =
 		  known_drmcgrp_devs[minor]->bo_limits_total_allocated_default;
 
@@ -125,6 +129,11 @@ static inline int init_drmcgrp_single(struct drmcgrp *drmcgrp, int minor)
 
 		ddr->mem_bw_limits_avg_bytes_per_us =
 		  known_drmcgrp_devs[minor]->mem_bw_avg_bytes_per_us_default;
+
+		for (i = 0; i <= TTM_PL_PRIV; i++) {
+			ddr->mem_highs[i] =
+			known_drmcgrp_devs[minor]->mem_highs_default[i];
+		}
 	}
 
 	return 0;
@@ -274,6 +283,11 @@ static inline void drmcgrp_print_limits(struct drmcgrp_device_resource *ddr,
 	case DRMCGRP_TYPE_BO_PEAK:
 		seq_printf(sf, "%zu\n", ddr->bo_limits_peak_allocated);
 		break;
+	case DRMCGRP_TYPE_MEM:
+		seq_printf(sf, "%s=%lld\n",
+				ttm_placement_names[TTM_PL_VRAM],
+				ddr->mem_highs[TTM_PL_VRAM]);
+		break;
 	case DRMCGRP_TYPE_BANDWIDTH_PERIOD_BURST:
 		seq_printf(sf, "%lld\n",
 			known_drmcgrp_devs[minor]->mem_bw_limits_period_in_us);
@@ -308,6 +322,11 @@ static inline void drmcgrp_print_default(struct drmcgrp_device *ddev,
 		seq_printf(sf, "%zu\n",
 				ddev->bo_limits_peak_allocated_default);
 		break;
+	case DRMCGRP_TYPE_MEM:
+		seq_printf(sf, "%s=%lld\n",
+				ttm_placement_names[TTM_PL_VRAM],
+				ddev->mem_highs_default[TTM_PL_VRAM]);
+		break;
 	case DRMCGRP_TYPE_BANDWIDTH_PERIOD_BURST:
 		seq_printf(sf, "%lld\n",
 				ddev->mem_bw_limits_period_in_us_default);
@@ -552,6 +571,38 @@ ssize_t drmcgrp_bo_limit_write(struct kernfs_open_file *of, char *buf,
 				}
 			}
 			break;
+		case DRMCGRP_TYPE_MEM:
+			nested = strstrip(sattr);
+
+			while (nested != NULL) {
+				attr = strsep(&nested, " ");
+
+				if (sscanf(attr, "vram=%s",
+					 sval) == 1) {
+					p_max = parent == NULL ? S64_MAX :
+						parent->
+						dev_resources[minor]->
+						mem_highs[TTM_PL_VRAM];
+
+					rc = drmcgrp_process_limit_val(sval,
+						true,
+						ddev->
+						mem_highs_default[TTM_PL_VRAM],
+						p_max,
+						&val);
+
+					if (rc || val < 0) {
+						drmcgrp_pr_cft_err(drmcgrp,
+								cft_name,
+								minor);
+						continue;
+					}
+
+					ddr->mem_highs[TTM_PL_VRAM]=val;
+					continue;
+				}
+			}
+			break;
 		default:
 			break;
 		}
@@ -624,6 +675,20 @@ struct cftype files[] = {
 		.seq_show = drmcgrp_bo_show,
 		.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_MEM_PEAK,
 						DRMCGRP_FTYPE_STATS),
+        },
+	{
+		.name = "memory.default",
+		.seq_show = drmcgrp_bo_show,
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_MEM,
+						DRMCGRP_FTYPE_DEFAULT),
+	},
+	{
+		.name = "memory.high",
+		.write = drmcgrp_bo_limit_write,
+		.seq_show = drmcgrp_bo_show,
+		.private = DRMCG_CTF_PRIV(DRMCGRP_TYPE_MEM,
+						DRMCGRP_FTYPE_LIMIT),
 	},
 	{
 		.name = "burst_bw_period_in_us",
@@ -674,6 +739,7 @@ struct cgroup_subsys drm_cgrp_subsys = {
 int drmcgrp_register_device(struct drm_device *dev)
 {
 	struct drmcgrp_device *ddev;
+	int i;
 
 	ddev = kzalloc(sizeof(struct drmcgrp_device), GFP_KERNEL);
 	if (!ddev)
@@ -687,6 +753,10 @@ int drmcgrp_register_device(struct drm_device *dev)
 	ddev->mem_bw_bytes_in_period_default = S64_MAX;
 	ddev->mem_bw_avg_bytes_per_us_default = 65536;
 
+	for (i = 0; i <= TTM_PL_PRIV; i++) {
+		ddev->mem_highs_default[i] = S64_MAX;
+	}
+
 	mutex_init(&ddev->mutex);
 
 	mutex_lock(&drmcgrp_mutex);
@@ -991,3 +1061,78 @@ bool drmcgrp_mem_can_move(struct ttm_buffer_object *tbo)
 	return result;
 }
 EXPORT_SYMBOL(drmcgrp_mem_can_move);
+
+static inline void drmcgrp_mem_set_pressure(struct drmcgrp *drmcgrp,
+		int devIdx, unsigned mem_type, bool pressure_val)
+{
+	struct drmcgrp_device_resource *ddr;
+	struct cgroup_subsys_state *pos;
+	struct drmcgrp *node;
+
+	css_for_each_descendant_pre(pos, &drmcgrp->css) {
+		node = css_drmcgrp(pos);
+		ddr = node->dev_resources[devIdx];
+		ddr->mem_pressure[mem_type] = pressure_val;
+	}
+}
+
+static inline bool drmcgrp_mem_check(struct drmcgrp *drmcgrp, int devIdx,
+		unsigned mem_type)
+{
+	struct drmcgrp_device_resource *ddr = drmcgrp->dev_resources[devIdx];
+
+	/* already under pressure, no need to check and set */
+	if (ddr->mem_pressure[mem_type])
+		return true;
+
+	if (ddr->mem_stats[mem_type] >= ddr->mem_highs[mem_type]) {
+		drmcgrp_mem_set_pressure(drmcgrp, devIdx, mem_type, true);
+		return true;
+	}
+
+	return false;
+}
+
+bool drmcgrp_mem_pressure_scan(struct ttm_bo_device *bdev, unsigned type)
+{
+	struct drm_device *dev = bdev->ddev;
+	struct cgroup_subsys_state *pos;
+	struct drmcgrp *node;
+	int devIdx;
+	bool result = false;
+
+	//TODO replace with BUG_ON
+	if (dev == NULL || type != TTM_PL_VRAM) /* only vram limit for now */
+		return false;
+
+	devIdx = dev->primary->index;
+
+	type = type > TTM_PL_PRIV ? TTM_PL_PRIV : type;
+
+	rcu_read_lock();
+	drmcgrp_mem_set_pressure(root_drmcgrp, devIdx, type, false);
+
+	css_for_each_descendant_pre(pos, &root_drmcgrp->css) {
+		node = css_drmcgrp(pos);
+		result |= drmcgrp_mem_check(node, devIdx, type);
+	}
+	rcu_read_unlock();
+
+	return result;
+}
+EXPORT_SYMBOL(drmcgrp_mem_pressure_scan);
+
+bool drmcgrp_mem_should_evict(struct ttm_buffer_object *tbo, unsigned type)
+{
+	struct drm_device *dev = tbo->bdev->ddev;
+	int devIdx;
+
+	//TODO replace with BUG_ON
+	if (dev == NULL)
+		return true;
+
+	devIdx = dev->primary->index;
+
+	return tbo->drmcgrp->dev_resources[devIdx]->mem_pressure[type];
+}
+EXPORT_SYMBOL(drmcgrp_mem_should_evict);
-- 
2.21.0



More information about the amd-gfx mailing list