[RFC PATCH 157/162] drm/i915: Improve accuracy of eviction stats

Matthew Auld matthew.auld at intel.com
Fri Nov 27 12:07:13 UTC 2020


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Current code uses jiffie time to do the accounting and then does:

  diff = jiffies - start;
  msec = diff * 1000 / HZ;
  ...
  atomic_long_add(msec, &i915->time_swap_out_ms);

If we assume jiffie can be as non-granular as 10ms and that the current
accounting records all evictions faster than one jiffie as infinite speed,
we can end up over-estimating the reported eviction throughput.

Fix this by accumulating ktime_t and only dividing to more user friendly
granularity at presentation time (debugfs read).

At the same time consolidate the code a bit and convert from multiple
atomics to single seqlock per stat.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: CQ Tang <cq.tang at intel.com>
Cc: Sudeep Dutt <sudeep.dutt at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_region.c | 67 ++++++++++----------
 drivers/gpu/drm/i915/i915_debugfs.c        | 73 +++++++++++-----------
 drivers/gpu/drm/i915/i915_drv.h            | 25 +++++---
 drivers/gpu/drm/i915/i915_gem.c            |  5 ++
 4 files changed, 90 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index 8ec59fbaa3e6..1a390e502d5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -9,14 +9,29 @@
 #include "i915_trace.h"
 #include "i915_gem_mman.h"
 
+static void
+__update_stat(struct i915_mm_swap_stat *stat,
+	      unsigned long pages,
+	      ktime_t start)
+{
+	if (stat) {
+		start = ktime_get() - start;
+
+		write_seqlock(&stat->lock);
+		stat->time = ktime_add(stat->time, start);
+		stat->pages += pages;
+		write_sequnlock(&stat->lock);
+	}
+}
+
 static int
 i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
 			      struct sg_table *pages, unsigned int sizes)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_mm_swap_stat *stat = NULL;
 	struct drm_i915_gem_object *dst, *src;
-	unsigned long start, diff, msec;
-	bool blt_completed = false;
+	ktime_t start = ktime_get();
 	int err = -EINVAL;
 
 	GEM_BUG_ON(obj->swapto);
@@ -26,7 +41,6 @@ i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(!i915->params.enable_eviction);
 
 	assert_object_held(obj);
-	start = jiffies;
 
 	/* create a shadow object on smem region */
 	dst = i915_gem_object_create_shmem(i915, obj->base.size);
@@ -58,10 +72,14 @@ i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
 	if (i915->params.enable_eviction >= 2) {
 		err = i915_window_blt_copy(dst, src);
 		if (!err)
-			blt_completed = true;
+			stat = &i915->mm.blt_swap_stats.out;
 	}
-	if (err && i915->params.enable_eviction != 2)
+
+	if (err && i915->params.enable_eviction != 2) {
 		err = i915_gem_object_memcpy(dst, src);
+		if (!err)
+			stat = &i915->mm.memcpy_swap_stats.out;
+	}
 
 	__i915_gem_object_unpin_pages(src);
 	__i915_gem_object_unset_pages(src);
@@ -73,18 +91,7 @@ i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
 	else
 		i915_gem_object_put(dst);
 
-	if (!err) {
-		diff = jiffies - start;
-		msec = diff * 1000 / HZ;
-		if (blt_completed) {
-			atomic_long_add(sizes, &i915->num_bytes_swapped_out);
-			atomic_long_add(msec, &i915->time_swap_out_ms);
-		} else {
-			atomic_long_add(sizes,
-					&i915->num_bytes_swapped_out_memcpy);
-			atomic_long_add(msec, &i915->time_swap_out_ms_memcpy);
-		}
-	}
+	__update_stat(stat, sizes >> PAGE_SHIFT, start);
 
 	return err;
 }
@@ -94,9 +101,9 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
 			     struct sg_table *pages, unsigned int sizes)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_mm_swap_stat *stat = NULL;
 	struct drm_i915_gem_object *dst, *src;
-	unsigned long start, diff, msec;
-	bool blt_completed = false;
+	ktime_t start = ktime_get();
 	int err = -EINVAL;
 
 	GEM_BUG_ON(!obj->swapto);
@@ -106,7 +113,6 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(!i915->params.enable_eviction);
 
 	assert_object_held(obj);
-	start = jiffies;
 
 	src = obj->swapto;
 
@@ -134,10 +140,14 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
 	if (i915->params.enable_eviction >= 2) {
 		err = i915_window_blt_copy(dst, src);
 		if (!err)
-			blt_completed = true;
+			stat = &i915->mm.blt_swap_stats.in;
 	}
-	if (err && i915->params.enable_eviction != 2)
+
+	if (err && i915->params.enable_eviction != 2) {
 		err = i915_gem_object_memcpy(dst, src);
+		if (!err)
+			stat = &i915->mm.memcpy_swap_stats.in;
+	}
 
 	__i915_gem_object_unpin_pages(dst);
 	__i915_gem_object_unset_pages(dst);
@@ -149,18 +159,7 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
 		i915_gem_object_put(src);
 	}
 
-	if (!err) {
-		diff = jiffies - start;
-		msec = diff * 1000 / HZ;
-		if (blt_completed) {
-			atomic_long_add(sizes, &i915->num_bytes_swapped_in);
-			atomic_long_add(msec, &i915->time_swap_in_ms);
-		} else {
-			atomic_long_add(sizes,
-					&i915->num_bytes_swapped_in_memcpy);
-			atomic_long_add(msec, &i915->time_swap_in_ms_memcpy);
-		}
-	}
+	__update_stat(stat, sizes >> PAGE_SHIFT, start);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 983030ac39e1..f06f900b598e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -359,12 +359,46 @@ static void print_context_stats(struct seq_file *m,
 	print_file_stats(m, "[k]contexts", kstats);
 }
 
+static void
+evict_stat(struct seq_file *m,
+	   const char *name,
+	   const char *direction,
+	   struct i915_mm_swap_stat *stat)
+{
+	unsigned long pages;
+	unsigned int seq;
+	u64 time, rate;
+	ktime_t ktime;
+
+	do {
+		seq = read_seqbegin(&stat->lock);
+		pages = stat->pages;
+		ktime = stat->time;
+	} while (read_seqretry(&stat->lock, seq));
+
+	time = ktime_to_us(ktime);
+	rate = time ? div64_u64((u64)pages * PAGE_SIZE, time) : 0;
+	rate = div64_ul(rate * USEC_PER_SEC, 1024 * 1024);
+
+	seq_printf(m, "%s swap %s %lu MiB in %llums, %llu MiB/s.\n",
+		   name, direction, pages * PAGE_SIZE, ktime_to_ms(ktime),
+		   rate);
+}
+
+static void
+evict_stats(struct seq_file *m,
+	    const char *name,
+	    struct i915_mm_swap_stats *stats)
+{
+	evict_stat(m, name, "in", &stats->in);
+	evict_stat(m, name, "out", &stats->out);
+}
+
 static int i915_gem_object_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct intel_memory_region *mr;
 	enum intel_region_id id;
-	u64 time, bytes, rate;
 
 	seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n",
 		   i915->mm.shrink_count,
@@ -374,41 +408,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 		seq_printf(m, "%s: total:%pa, available:%pa bytes\n",
 			   mr->name, &mr->total, &mr->avail);
 
-	time = atomic_long_read(&i915->time_swap_out_ms);
-	bytes = atomic_long_read(&i915->num_bytes_swapped_out);
-	if (time)
-		rate = div64_u64(bytes * 1000, time * 1024 * 1024);
-	else
-		rate = 0;
-	seq_printf(m, "BLT: swapout %llu Bytes in %llu mSec(%llu MB/Sec)\n",
-		   bytes, time, rate);
-
-	time = atomic_long_read(&i915->time_swap_in_ms);
-	bytes = atomic_long_read(&i915->num_bytes_swapped_in);
-	if (time)
-		rate = div64_u64(bytes * 1000, time * 1024 * 1024);
-	else
-		rate = 0;
-	seq_printf(m, "BLT: swapin %llu Bytes in %llu mSec(%llu MB/Sec)\n",
-		   bytes, time, rate);
-
-	time = atomic_long_read(&i915->time_swap_out_ms_memcpy);
-	bytes = atomic_long_read(&i915->num_bytes_swapped_out_memcpy);
-	if (time)
-		rate = div64_u64(bytes * 1000, time * 1024 * 1024);
-	else
-		rate = 0;
-	seq_printf(m, "Memcpy: swapout %llu Bytes in %llu mSec(%llu MB/Sec)\n",
-		   bytes, time, rate);
-
-	time = atomic_long_read(&i915->time_swap_in_ms_memcpy);
-	bytes = atomic_long_read(&i915->num_bytes_swapped_in_memcpy);
-	if (time)
-		rate = div64_u64(bytes * 1000, time * 1024 * 1024);
-	else
-		rate = 0;
-	seq_printf(m, "Memcpy: swapin %llu Bytes in %llu mSec(%llu MB/Sec)\n",
-		   bytes, time, rate);
+	evict_stats(m, "Blitter", &i915->mm.blt_swap_stats);
+	evict_stats(m, "Memcpy", &i915->mm.memcpy_swap_stats);
 	seq_putc(m, '\n');
 
 	print_context_stats(m, i915);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6f0ab363bdee..45511f2d8da0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -49,6 +49,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/stackdepot.h>
 #include <linux/xarray.h>
+#include <linux/seqlock.h>
 
 #include <drm/intel-gtt.h>
 #include <drm/drm_legacy.h> /* for struct drm_dma_handle */
@@ -548,6 +549,17 @@ struct intel_l3_parity {
 	int which_slice;
 };
 
+struct i915_mm_swap_stat {
+	seqlock_t lock;
+	unsigned long pages;
+	ktime_t time;
+};
+
+struct i915_mm_swap_stats {
+	struct i915_mm_swap_stat in;
+	struct i915_mm_swap_stat out;
+};
+
 struct i915_gem_mm {
 	/* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */
 	spinlock_t obj_lock;
@@ -601,6 +613,9 @@ struct i915_gem_mm {
 
 	/* To protect above two set of vmas */
 	wait_queue_head_t window_queue;
+
+	struct i915_mm_swap_stats blt_swap_stats;
+	struct i915_mm_swap_stats memcpy_swap_stats;
 };
 
 #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
@@ -1220,16 +1235,6 @@ struct drm_i915_private {
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
 	 */
-
-	atomic_long_t num_bytes_swapped_out;
-	atomic_long_t num_bytes_swapped_in;
-	atomic_long_t time_swap_out_ms;
-	atomic_long_t time_swap_in_ms;
-
-	atomic_long_t num_bytes_swapped_out_memcpy;
-	atomic_long_t num_bytes_swapped_in_memcpy;
-	atomic_long_t time_swap_out_ms_memcpy;
-	atomic_long_t time_swap_in_ms_memcpy;
 };
 
 static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85cbdb8e2bb8..e94f3f689b30 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1151,6 +1151,11 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 	INIT_LIST_HEAD(&i915->mm.purge_list);
 	INIT_LIST_HEAD(&i915->mm.shrink_list);
 
+	seqlock_init(&i915->mm.blt_swap_stats.in.lock);
+	seqlock_init(&i915->mm.blt_swap_stats.out.lock);
+	seqlock_init(&i915->mm.memcpy_swap_stats.in.lock);
+	seqlock_init(&i915->mm.memcpy_swap_stats.out.lock);
+
 	i915_gem_init__objects(i915);
 }
 
-- 
2.26.2



More information about the dri-devel mailing list