[Intel-gfx] [RFC 5/5] drm/i915: Update process mm stats for Gen GFX memory

Praveen Paneri praveen.paneri at intel.com
Thu Mar 22 14:14:58 UTC 2018


From: Sourab Gupta <sourab.gupta at intel.com>

The GFX memory consumed by a process is not accounted by kernel. This
leads to a scenario wherein the low memory killer is not able to
correctly identify the best process to be killed during OOM scenarios.
This case particularly manifests when an application has a large GFX
memory footprint but has a significantly smaller system mem consumption.
Low mem killer in such a case will not be able to identify this culprit
process, and lot of innocent process will be killed without freeing much
memory.

This patch utilizes the memtracker functions to add the accounting for
the processes' GFX memory consumption.
The internal kernel counters updated thus don't affect the output of
procrank, meminfo or smaps.
The kernel counters updated though would lead to better approximation
of process' memory consumption, leading to better low mem killer behavior.

The memtracker functions are called at asynchronous points during
shrinker invocation.
Thus, in times of memory stress, we ensure to update the processes' GFX
mem statistics, therefore leading to better low mem killer behavior

Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
Signed-off-by: Akash Goel <akash.goel at intel.com>
Signed-off-by: Nidhi Gupta <nidhi1.gupta at intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  8 ++++++
 drivers/gpu/drm/i915/i915_gem.c          | 47 +++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 18 ++++++++++++
 drivers/gpu/drm/i915/i915_sysfs.c        | 22 +++++++++++----
 4 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 85a3dd9..0646175 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -316,6 +316,14 @@ struct i915_hotplug {
 struct i915_mm_struct;
 struct i915_mmu_object;
 
+struct i915_gem_file_attr_priv {
+	char tgid_str[16];
+	struct pid *tgid;
+	size_t rss_size;
+	struct mm_struct *mm;
+	struct drm_i915_error_state_buf buf;
+};
+
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
 	struct drm_file *file;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 25845652..ef6dea2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -82,6 +82,7 @@ struct pid_stat_entry {
 	struct drm_open_hash namelist;
 	struct per_file_obj_mem_info stats;
 	struct pid *tgid;
+	struct i915_gem_file_attr_priv *attr_priv;
 	int pid_num;
 };
 
@@ -6289,6 +6290,24 @@ static int i915_get_pid_cmdline(struct task_struct *task, char *buffer)
 	return 0;
 }
 
+static void i915_update_task_mm_stats(struct pid_stat_entry *pid_entry)
+{
+	long new_size, old_size;
+
+	new_size =
+		(pid_entry->stats.phys_space_shared_proportion +
+		 pid_entry->stats.phys_space_allocated_priv) / PAGE_SIZE;
+
+	old_size = pid_entry->attr_priv->rss_size;
+
+	/* Update task mm stats */
+	if (pid_entry->attr_priv->mm)
+		add_mm_counter(pid_entry->attr_priv->mm, MM_FILEPAGES,
+				new_size - old_size);
+
+	pid_entry->attr_priv->rss_size = new_size;
+}
+
 static int i915_gem_obj_insert_pid(struct drm_i915_gem_object *obj)
 {
 	int found = 0;
@@ -6810,6 +6829,7 @@ static int i915_gem_object_pid_order(int id, void *ptr, void *data)
 				&sorted_pid_stats, head) {
 		struct task_struct *task = get_pid_task(pid_entry->tgid,
 							PIDTYPE_PID);
+		struct drm_i915_gem_client_pid *client_pid;
 		err_printf(m,
 			"%5d %6d %6d %6d %9d %8d %14zdK %14zdK %14zdK  %14zdK     %s",
 			   pid_entry->pid_num,
@@ -6829,6 +6849,16 @@ static int i915_gem_object_pid_order(int id, void *ptr, void *data)
 		else
 			err_puts(m, "\n");
 
+		list_for_each_entry(client_pid,
+				    &dev_priv->mm.gem_client_pids, head) {
+			if (client_pid->tgid == pid_entry->pid_num) {
+				pid_entry->attr_priv =
+						client_pid->obj_attr.private;
+				i915_update_task_mm_stats(pid_entry);
+				break;
+			}
+		}
+
 		total_shared_prop_space +=
 			pid_entry->stats.phys_space_shared_proportion/1024;
 		total_priv_space +=
@@ -6872,10 +6902,12 @@ static int i915_gem_object_pid_order(int id, void *ptr, void *data)
 __i915_gem_get_obj_info(struct drm_i915_error_state_buf *m,
 			struct drm_device *dev, struct pid *tgid)
 {
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_file *file;
 	int bytes_copy, ret = 0;
 	struct pid_stat_entry pid_entry;
 	struct name_entry *entry, *next;
+	struct drm_i915_gem_client_pid *client_pid;
 
 	pid_entry.stats.phys_space_shared_proportion = 0;
 	pid_entry.stats.phys_space_allocated_priv = 0;
@@ -6910,6 +6942,14 @@ static int i915_gem_object_pid_order(int id, void *ptr, void *data)
 			break;
 	}
 
+	list_for_each_entry(client_pid, &i915->mm.gem_client_pids, head) {
+		if (client_pid->tgid == pid_entry.pid_num) {
+			pid_entry.attr_priv = client_pid->obj_attr.private;
+			i915_update_task_mm_stats(&pid_entry);
+			break;
+		}
+	}
+
 	/* Reset the bytes counter to buffer beginning */
 	bytes_copy = m->bytes;
 	m->bytes = 0;
@@ -6947,15 +6987,8 @@ int i915_get_drm_clients_info(struct drm_i915_error_state_buf *m,
 	 * XXX: drm_global_mutex is undefined currently
 	 */
 	/* mutex_lock(&drm_global_mutex); */
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret) {
-		/* mutex_unlock(&drm_global_mutex); */
-		return ret;
-	}
-
 	ret = __i915_get_drm_clients_info(m, dev);
 
-	mutex_unlock(&dev->struct_mutex);
 	/* mutex_unlock(&drm_global_mutex); */
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 5757fb7..09fe3bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -304,6 +304,22 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 	return freed;
 }
 
+static void i915_gem_update_gfx_stats(struct drm_device *dev)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_error_state_buf error_str;
+	bool unlock;
+
+	if (!shrinker_lock(i915, &unlock))
+		return;
+
+	error_str.bytes = 0;
+	error_str.size = 0;
+	error_str.err = -ENOSPC;
+	i915_get_drm_clients_info(&error_str, dev);
+	shrinker_unlock(i915, unlock);
+}
+
 static unsigned long
 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
@@ -341,6 +357,8 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 			    128ul /* default SHRINK_BATCH */);
 	}
 
+	i915_gem_update_gfx_stats(&i915->drm);
+
 	return count;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 7f7d74a..5829aec 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/stat.h>
 #include <linux/sysfs.h>
+#include <linux/sched/mm.h>
 #include "intel_drv.h"
 #include "i915_drv.h"
 
@@ -570,7 +571,13 @@ static ssize_t i915_gem_clients_state_read(struct file *filp,
 	if (ret)
 		return ret;
 
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto out;
+
 	ret = i915_get_drm_clients_info(&error_str, dev);
+
+	mutex_unlock(&dev->struct_mutex);
 	if (ret)
 		goto out;
 
@@ -585,12 +592,6 @@ static ssize_t i915_gem_clients_state_read(struct file *filp,
 #define GEM_OBJ_STAT_BUF_SIZE (4*1024) /* 4KB */
 #define GEM_OBJ_STAT_BUF_SIZE_MAX (1024*1024) /* 1MB */
 
-struct i915_gem_file_attr_priv {
-	char tgid_str[16];
-	struct pid *tgid;
-	struct drm_i915_error_state_buf buf;
-};
-
 static ssize_t i915_gem_read_objects(struct file *filp,
 				struct kobject *memtrack_kobj,
 				struct bin_attribute *attr,
@@ -727,6 +728,9 @@ int i915_gem_create_sysfs_file_entry(struct drm_device *dev,
 	obj_attr->size = 0;
 	obj_attr->read = i915_gem_read_objects;
 
+	attr_priv->rss_size = 0;
+	attr_priv->mm = current->mm;
+	atomic_inc(&current->mm->mm_count);
 	attr_priv->tgid = tgid;
 	obj_attr->private = attr_priv;
 
@@ -761,6 +765,12 @@ void i915_gem_remove_sysfs_file_entry(struct drm_device *dev,
 
 	attr_priv = obj_attr->private;
 
+	if (attr_priv->mm) {
+		add_mm_counter(attr_priv->mm, MM_FILEPAGES,
+				-attr_priv->rss_size);
+		mmdrop(attr_priv->mm);
+	}
+
 	sysfs_remove_bin_file(&dev_priv->memtrack_kobj, obj_attr);
 
 	i915_error_state_buf_release(&attr_priv->buf);
-- 
1.9.1



More information about the Intel-gfx mailing list