[PATCH] drm/i915: Sysfs interface to get detailed GFX buffer info per process

Nidhi Gupta nidhi1.gupta at intel.com
Wed Feb 7 05:16:28 UTC 2018


From: Sourab Gupta <sourab.gupta at intel.com>

Sometimes, in order to debug the mem usage scenarios, the summarized
view of GFX memory consumption per process is not sufficient, as it
doesn't provide buffer level details. Therefore, there's a need for an
interface, which can provide such detailed information.

This patch provides a sysfs interface to retrieve such detailed information
about GFX buffers used per process. Each process has a sysfs file
associated in the 'gfx_memtrack' directory, named by the process' tgid.
Reading the file will provide list of all GFX buffers open by the process,
along with their attributes such as size, pinneed, tiling, shared, allocstate,
gtt offset, etc.

Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
Signed-off-by: Akash Goel <akash.goel at intel.com>
Signed-off-by: Nidhi Gupta <nidhi1.gupta at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |   9 ++
 drivers/gpu/drm/i915/i915_gem.c       | 229 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gpu_error.c |  14 ++
 drivers/gpu/drm/i915/i915_sysfs.c     | 242 +++++++++++++++++++++++++++++++++-
 4 files changed, 492 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e823fa5..baa514f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -338,6 +338,7 @@ struct drm_i915_file_private {
 	} rps_client;
 
 	unsigned int bsd_engine;
+	struct bin_attribute *obj_attr;
 
 /* Client can have a maximum of 3 contexts banned before
  * it is denied of creating new contexts. As one context
@@ -3574,6 +3575,8 @@ void i915_gem_obj_remove_pid(struct drm_i915_gem_object *obj);
 void i915_gem_obj_remove_all_pids(struct drm_i915_gem_object *obj);
 int i915_get_drm_clients_info(struct drm_i915_error_state_buf *m,
 				struct drm_device *dev);
+int i915_gem_get_obj_info(struct drm_i915_error_state_buf *m,
+			struct drm_device *dev, struct pid *tgid);
 
 /* i915_gpu_error.c */
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
@@ -3588,6 +3591,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
 int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
 			      struct drm_i915_private *i915,
 			      size_t count, loff_t pos);
+int i915_obj_state_buf_init(struct drm_i915_error_state_buf *eb,
+			      size_t count);
 static inline void i915_error_state_buf_release(
 	struct drm_i915_error_state_buf *eb)
 {
@@ -3662,6 +3667,10 @@ extern int i915_restore_state(struct drm_i915_private *dev_priv);
 /* i915_sysfs.c */
 void i915_setup_sysfs(struct drm_i915_private *dev_priv);
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv);
+int i915_gem_create_sysfs_file_entry(struct drm_device *dev,
+					struct drm_file *file);
+void i915_gem_remove_sysfs_file_entry(struct drm_device *dev,
+			struct drm_file *file);
 
 /* intel_lpe_audio.c */
 int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a2e970b..03e1abc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -51,6 +51,7 @@
 #include <linux/pid.h>
 #include<linux/firmware.h>
 #include "i915_gem_object.h"
+#include <linux/mm_types.h>
 #define DRM_DEBUG_MAGIC_HASH_ORDER 4
 /**< Size of key hash table. Must be power of 2. */
 
@@ -5709,6 +5710,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_request *request;
 
+	i915_gem_remove_sysfs_file_entry(dev, file);
 	put_pid(file_priv->tgid);
 	kfree(file_priv->process_name);
 
@@ -5728,7 +5730,6 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	int ret;
 
 	DRM_DEBUG("\n");
-
 	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
 	if (!file_priv)
 		return -ENOMEM;
@@ -5760,6 +5761,12 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	if (ret)
 		goto out_free_name;
 
+	ret = i915_gem_create_sysfs_file_entry(&i915->drm, file);
+	if (ret) {
+		i915_gem_context_close(file);
+		goto out_free_name;
+	}
+
 	return 0;
 
 out_free_name:
@@ -6089,6 +6096,27 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
 #define err_puts(e, s) i915_error_puts(e, s)
 
 /*
+*static const char *get_pin_flag(struct drm_i915_gem_object *obj)
+*{
+*	if (i915_gem_obj_is_pinned(obj))
+*		return "p";
+*	else
+*		return " ";
+*}
+*/
+
+/*static const char *get_tiling_flag(struct drm_i915_gem_object *obj)
+*{
+*	switch (obj->tiling_and_stride) {
+*	default:
+*	case I915_TILING_NONE: return " ";
+*	case I915_TILING_X: return "X";
+*	case I915_TILING_Y: return "Y";
+*	}
+*}
+*/
+
+/*
  * If this mmput call is the last one, it will tear down the mmaps of the
  * process and calls drm_gem_vm_close(), which leads deadlock on i915 mutex.
  * Instead, asynchronously schedule mmput function here, to avoid recursive
@@ -6326,6 +6354,97 @@ static int i915_obj_shared_count(struct drm_i915_gem_object *obj,
 }
 
 static int
+i915_describe_obj(struct get_obj_stats_buf *obj_stat_buf,
+		struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	struct drm_i915_obj_pid_info *pid_info_entry;
+	struct drm_i915_error_state_buf *m = obj_stat_buf->m;
+	struct pid_stat_entry *pid_entry = obj_stat_buf->entry;
+	struct per_file_obj_mem_info *stats = &pid_entry->stats;
+	int obj_shared_count = 0;
+	bool discard = false;
+	u64 nr_bytes = 0;
+
+	obj_shared_count = i915_obj_shared_count(obj, pid_entry, &discard);
+	if (obj_shared_count < 0)
+		return obj_shared_count;
+
+	if (!obj->stolen)
+		nr_bytes = i915_obj_get_shmem_pages_alloced(obj)*PAGE_SIZE;
+
+	if (!discard && !obj->stolen &&
+			(obj->mm.madv != __I915_MADV_PURGED) &&
+			(nr_bytes != 0)) {
+		if (obj_shared_count > 1)
+			stats->phys_space_shared_proportion +=
+				(nr_bytes)/obj_shared_count;
+		else
+			stats->phys_space_allocated_priv += nr_bytes;
+	}
+
+	/*i915_error_printf(m,
+		*"%p: %7zdK  %10zdK     %s      %s     %s      %s
+		*   %s     ",
+		*   &obj->base,
+		*   obj->base.size / 1024,
+		*   (size_t)nr_bytes / 1024,
+		*   //get_pin_flag(obj),
+		*   get_tiling_flag(obj),
+		*   obj->mm.dirty ? "Y" : "N",
+		*   (obj_shared_count > 1) ? "Y" : "N",
+		*   (obj->userptr.mm != 0) ? "Y" : "N",
+		*   obj->stolen ? "Y" : "N",
+		*   (obj->pin_display || obj->userptr.mm) ? "Y" : "N");
+	*/
+
+	if (obj->mm.madv == __I915_MADV_PURGED)
+		err_puts(m, " purged    ");
+	else if (obj->mm.madv == I915_MADV_DONTNEED)
+		err_puts(m, " purgeable   ");
+	else if (obj->has_backing_pages != 0)
+		err_puts(m, " allocated   ");
+	else
+		err_puts(m, "             ");
+
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		if (!i915_is_ggtt(vma->vm))
+			err_puts(m, " PP    ");
+		else
+			err_puts(m, " G     ");
+		i915_error_printf(m, "  %08llx ", vma->node.start);
+	}
+	if (list_empty(&obj->vma_list))
+		err_puts(m, "                  ");
+
+	list_for_each_entry(pid_info_entry, &obj->pid_info, head)
+		i915_error_printf(m, " (%d: %d)",
+			   pid_info_entry->tgid,
+			   pid_info_entry->open_handle_count);
+
+	err_puts(m, "\n");
+
+	if (m->bytes == 0 && m->err)
+		return m->err;
+
+	return 0;
+}
+
+static int
+i915_drm_gem_obj_info(int id, void *ptr, void *data)
+{
+	struct drm_i915_gem_object *obj = ptr;
+	struct get_obj_stats_buf *obj_stat_buf = data;
+	int ret;
+
+	i915_obj_pidarray_validate(&obj->base);
+	ret = i915_describe_obj(obj_stat_buf, obj);
+
+	return ret;
+}
+
+
+static int
 i915_drm_gem_object_per_file_summary(int id, void *ptr, void *data)
 {
 	struct pid_stat_entry *pid_entry = data;
@@ -6543,6 +6662,97 @@ __i915_get_drm_clients_info(struct drm_i915_error_state_buf *m,
 	return 0;
 }
 
+#define NUM_SPACES 100
+#define INITIAL_SPACES_STR(x) #x
+#define SPACES_STR(x) INITIAL_SPACES_STR(x)
+
+static int
+__i915_gem_get_obj_info(struct drm_i915_error_state_buf *m,
+			struct drm_device *dev, struct pid *tgid)
+{
+	struct drm_file *file;
+	struct drm_i915_file_private *file_priv_reqd = NULL;
+	int bytes_copy, ret = 0;
+	struct pid_stat_entry pid_entry;
+	struct name_entry *entry, *next;
+
+	pid_entry.stats.phys_space_shared_proportion = 0;
+	pid_entry.stats.phys_space_allocated_priv = 0;
+	pid_entry.tgid = tgid;
+	pid_entry.pid_num = pid_nr(tgid);
+	ret = drm_ht_create(&pid_entry.namelist, DRM_DEBUG_MAGIC_HASH_ORDER);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&pid_entry.namefree);
+
+	/*
+	 * Fill up initial few bytes with spaces, to insert summary data later
+	 * on
+	 */
+	i915_error_printf(m, "%"SPACES_STR(NUM_SPACES)"s\n", " ");
+
+	list_for_each_entry(file, &dev->filelist, lhead) {
+		struct drm_i915_file_private *file_priv = file->driver_priv;
+		struct get_obj_stats_buf obj_stat_buf;
+
+		obj_stat_buf.entry = &pid_entry;
+		obj_stat_buf.m = m;
+
+		if (file_priv->tgid != tgid)
+			continue;
+
+		file_priv_reqd = file_priv;
+		err_puts(m,
+			"\n Obj Identifier   Obj-Size Resident-Size Pin Tiling Dirty Shared Vmap Stolen Mappable  AllocState Global/PP  GttOffset (PID: handle count)\n");
+		spin_lock(&file->table_lock);
+		ret = idr_for_each(&file->object_idr,
+				&i915_drm_gem_obj_info, &obj_stat_buf);
+		spin_unlock(&file->table_lock);
+		if (ret)
+			break;
+	}
+
+	if (file_priv_reqd) {
+		int space_remaining;
+
+		/* Reset the bytes counter to buffer beginning */
+		bytes_copy = m->bytes;
+		m->bytes = 0;
+
+		i915_error_printf(m, "\n  PID    GfxMem   Process\n");
+		i915_error_printf(m, "%5d %8zdK ", pid_nr(file_priv_reqd->tgid),
+			   (pid_entry.stats.phys_space_shared_proportion +
+			    pid_entry.stats.phys_space_allocated_priv)/1024);
+
+		space_remaining = NUM_SPACES - m->bytes - 1;
+		if (strlen(file_priv_reqd->process_name) > space_remaining)
+			file_priv_reqd->process_name[space_remaining] = '\0';
+
+		i915_error_printf(m, "%s\n", file_priv_reqd->process_name);
+
+		/* Reinstate the previous saved value of bytes counter */
+		m->bytes = bytes_copy;
+	} else
+		WARN(1, "drm file corresponding to tgid:%d not found\n",
+			pid_nr(tgid));
+
+	list_for_each_entry_safe(entry, next,
+				 &pid_entry.namefree, head) {
+		list_del(&entry->head);
+		drm_ht_remove_item(&pid_entry.namelist,
+				   &entry->hash_item);
+		kfree(entry);
+	}
+	drm_ht_remove(&pid_entry.namelist);
+
+	if (ret)
+		return ret;
+	if (m->bytes == 0 && m->err)
+		return m->err;
+	return 0;
+}
+
 int i915_get_drm_clients_info(struct drm_i915_error_state_buf *m,
 			struct drm_device *dev)
 {
@@ -6566,3 +6776,20 @@ int i915_get_drm_clients_info(struct drm_i915_error_state_buf *m,
 	/* mutex_unlock(&drm_global_mutex); */
 	return ret;
 }
+
+int i915_gem_get_obj_info(struct drm_i915_error_state_buf *m,
+			struct drm_device *dev, struct pid *tgid)
+{
+	int ret = 0;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	ret = __i915_gem_get_obj_info(m, dev, tgid);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index fa83569..5add65d 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -847,6 +847,20 @@ int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf,
 	return 0;
 }
 
+int i915_obj_state_buf_init(struct drm_i915_error_state_buf *ebuf,
+				size_t count)
+{
+	memset(ebuf, 0, sizeof(*ebuf));
+
+	ebuf->buf = kmalloc(count, GFP_KERNEL);
+
+	if (ebuf->buf == NULL)
+		return -ENOMEM;
+
+	ebuf->size = count;
+	return 0;
+}
+
 static void i915_error_object_free(struct drm_i915_error_object *obj)
 {
 	int page;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index ba914c0..a885722b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -33,7 +33,7 @@
 #include "i915_drv.h"
 #include "../drm_internal.h"
 #include <linux/firmware.h>
-
+#include "i915_pvinfo.h"
 static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 {
 	struct drm_minor *minor = dev_get_drvdata(kdev);
@@ -578,6 +578,246 @@ static ssize_t i915_gem_clients_state_read(struct file *filp,
 
 	return ret ?: ret_count;
 }
+#define GEM_OBJ_STAT_BUF_SIZE (4*1024) /* 4KB */
+#define GEM_OBJ_STAT_BUF_SIZE_MAX (1024*1024) /* 1MB */
+
+struct i915_gem_file_attr_priv {
+	char tgid_str[16];
+	struct pid *tgid;
+	struct drm_i915_error_state_buf buf;
+};
+
+static ssize_t i915_gem_read_objects(struct file *filp,
+				struct kobject *memtrack_kobj,
+				struct bin_attribute *attr,
+				char *buf, loff_t off, size_t count)
+{
+	struct kobject *kobj = memtrack_kobj->parent;
+	struct device *kdev = container_of(kobj, struct device, kobj);
+	struct drm_minor *minor = dev_get_drvdata(kdev);
+	struct drm_device *dev = minor->dev;
+	struct i915_gem_file_attr_priv *attr_priv;
+	struct pid *tgid;
+	ssize_t ret_count = 0;
+	long bytes_available;
+	int ret = 0, buf_size = GEM_OBJ_STAT_BUF_SIZE;
+	/*
+	*unsigned long timeout = msecs_to_jiffies(500) + 1;
+	*/
+	
+	/*
+	* There may arise a scenario where syfs file entry is being removed,
+	* and may race against sysfs read. Sysfs file remove function would
+	* have taken the drm_global_mutex and would wait for read to finish,
+	* which is again waiting to acquire drm_global_mutex, leading to
+	* deadlock. To avoid this, use mutex_trylock here with a timeout.
+	*FIXME:
+	*
+	*while (!mutex_trylock(&drm_global_mutex) && --timeout)
+	*schedule_timeout_killable(1);
+	*if (timeout == 0) {
+	*DRM_DEBUG_DRIVER("Unable to acquire drm global mutex.\n");
+	*return -EBUSY;
+	}
+	*/
+
+	if (!attr || !attr->private) {
+		ret = -EINVAL;
+		DRM_ERROR("attr | attr->private pointer is NULL\n");
+		goto out;
+	}
+
+	attr_priv = attr->private;
+	tgid = attr_priv->tgid;
+
+	if (off && !attr_priv->buf.buf) {
+		ret = -EINVAL;
+		DRM_ERROR(
+			"Buf not allocated during read with non-zero offset\n");
+		goto out;
+	}
+
+	if (off == 0) {
+retry:
+		if (!attr_priv->buf.buf) {
+			ret = i915_obj_state_buf_init(&attr_priv->buf,
+				buf_size);
+			if (ret) {
+				DRM_ERROR(
+					"obj state buf init failed. buf_size=%d\n",
+					buf_size);
+				goto out;
+			}
+		} else {
+			/* Reset the buf parameters before filling data */
+			attr_priv->buf.pos = 0;
+			attr_priv->buf.bytes = 0;
+		}
+
+		/* Read the gfx device stats */
+		ret = i915_gem_get_obj_info(&attr_priv->buf, dev, tgid);
+		if (ret)
+			goto out;
+
+		ret = i915_error_ok(&attr_priv->buf);
+		if (ret) {
+			ret = 0;
+			goto copy_data;
+		}
+		if (buf_size >= GEM_OBJ_STAT_BUF_SIZE_MAX) {
+			DRM_DEBUG_DRIVER("obj stat buf size limit reached\n");
+			ret = -ENOMEM;
+			goto out;
+		} else {
+			/* Try to reallocate buf of larger size */
+			i915_error_state_buf_release(&attr_priv->buf);
+			buf_size *= 2;
+
+			ret = i915_obj_state_buf_init(&attr_priv->buf,
+						buf_size);
+			if (ret) {
+				DRM_ERROR(
+					"obj stat buf init failed. buf_size=%d\n",
+					buf_size);
+				goto out;
+			}
+			goto retry;
+		}
+	}
+copy_data:
+
+	bytes_available = (long)attr_priv->buf.bytes - (long)off;
+
+	if (bytes_available > 0) {
+		ret_count = count < bytes_available ? count : bytes_available;
+		memcpy(buf, attr_priv->buf.buf + off, ret_count);
+	} else
+		ret_count = 0;
+
+out:
+	/*mutex_unlock(&drm_global_mutex);*/
+
+	return ret ?: ret_count;
+}
+
+int i915_gem_create_sysfs_file_entry(struct drm_device *dev,
+					struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_gem_file_attr_priv *attr_priv;
+	struct bin_attribute *obj_attr;
+	struct drm_file *file_local;
+	int ret;
+
+	if (!i915_modparams.memtrack_debug)
+		return 0;
+
+	/*
+	 * Check for multiple drm files having same tgid. If found, copy the
+	 * bin attribute into the new file priv. Otherwise allocate a new
+	 * copy of bin attribute, and create its corresponding sysfs file.
+	 */
+	mutex_lock(&dev->struct_mutex);
+	list_for_each_entry(file_local, &dev->filelist, lhead) {
+		struct drm_i915_file_private *file_priv_local =
+				file_local->driver_priv;
+
+		if (file_priv->tgid == file_priv_local->tgid) {
+			file_priv->obj_attr = file_priv_local->obj_attr;
+			mutex_unlock(&dev->struct_mutex);
+			return 0;
+		}
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	obj_attr = kzalloc(sizeof(*obj_attr), GFP_KERNEL);
+	if (!obj_attr) {
+		DRM_ERROR("Alloc failed. Out of memory\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	attr_priv = kzalloc(sizeof(*attr_priv), GFP_KERNEL);
+	if (!attr_priv) {
+		DRM_ERROR("Alloc failed. Out of memory\n");
+		ret = -ENOMEM;
+		goto out_obj_attr;
+	}
+
+	snprintf(attr_priv->tgid_str, 16, "%d", task_tgid_nr(current));
+	sysfs_bin_attr_init(obj_attr);
+	obj_attr->attr.name = attr_priv->tgid_str;
+	obj_attr->attr.mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+	obj_attr->size = 0;
+	obj_attr->read = i915_gem_read_objects;
+
+	attr_priv->tgid = file_priv->tgid;
+	obj_attr->private = attr_priv;
+
+	ret = sysfs_create_bin_file(&dev_priv->memtrack_kobj,
+				   obj_attr);
+	if (ret) {
+		DRM_ERROR(
+			"sysfs tgid file setup failed. tgid=%d, process:%s, ret:%d\n",
+			pid_nr(file_priv->tgid), file_priv->process_name, ret);
+
+		goto out_attr_priv;
+	}
+
+	file_priv->obj_attr = obj_attr;
+	return 0;
+
+out_attr_priv:
+	kfree(attr_priv);
+out_obj_attr:
+	kfree(obj_attr);
+out:
+	return ret;
+}
+
+void i915_gem_remove_sysfs_file_entry(struct drm_device *dev,
+			struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_file *file_local;
+	int open_count = 1;
+
+	if (!i915_modparams.memtrack_debug)
+		return;
+
+	/*
+	 * The current drm file instance is already removed from filelist at
+	 * this point.
+	 * Check if this particular drm file being removed is the last one for
+	 * that particular tgid, and no other instances for this tgid exist in
+	 * the filelist. If so, remove the corresponding sysfs file entry also.
+	 */
+	list_for_each_entry(file_local, &dev->filelist, lhead) {
+		struct drm_i915_file_private *file_priv_local =
+				file_local->driver_priv;
+
+		if (pid_nr(file_priv->tgid) == pid_nr(file_priv_local->tgid))
+			open_count++;
+	}
+
+	if (open_count == 1) {
+		struct i915_gem_file_attr_priv *attr_priv;
+
+		if (WARN_ON(file_priv->obj_attr == NULL))
+			return;
+		attr_priv = file_priv->obj_attr->private;
+
+		sysfs_remove_bin_file(&dev_priv->memtrack_kobj,
+				file_priv->obj_attr);
+
+		i915_error_state_buf_release(&attr_priv->buf);
+		kfree(file_priv->obj_attr->private);
+		kfree(file_priv->obj_attr);
+	}
+}
+
 
 static const struct bin_attribute error_state_attr = {
 	.attr.name = "error",
-- 
2.7.4



More information about the Intel-gfx-trybot mailing list