[Intel-gfx] [RFC PATCH 157/162] drm/i915: Improve accuracy of eviction stats
Matthew Auld
matthew.auld at intel.com
Fri Nov 27 12:07:13 UTC 2020
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Current code uses jiffie time to do the accounting and then does:
diff = jiffies - start;
msec = diff * 1000 / HZ;
...
atomic_long_add(msec, &i915->time_swap_out_ms);
If we assume jiffie can be as non-granular as 10ms and that the current
accounting records all evictions faster than one jiffie as infinite speed,
we can end up over-estimating the reported eviction throughput.
Fix this by accumulating ktime_t and only dividing to more user friendly
granularity at presentation time (debugfs read).
At the same time consolidate the code a bit and convert from multiple
atomics to single seqlock per stat.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: CQ Tang <cq.tang at intel.com>
Cc: Sudeep Dutt <sudeep.dutt at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_region.c | 67 ++++++++++----------
drivers/gpu/drm/i915/i915_debugfs.c | 73 +++++++++++-----------
drivers/gpu/drm/i915/i915_drv.h | 25 +++++---
drivers/gpu/drm/i915/i915_gem.c | 5 ++
4 files changed, 90 insertions(+), 80 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index 8ec59fbaa3e6..1a390e502d5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -9,14 +9,29 @@
#include "i915_trace.h"
#include "i915_gem_mman.h"
+static void
+__update_stat(struct i915_mm_swap_stat *stat,
+ unsigned long pages,
+ ktime_t start)
+{
+ if (stat) {
+ start = ktime_get() - start;
+
+ write_seqlock(&stat->lock);
+ stat->time = ktime_add(stat->time, start);
+ stat->pages += pages;
+ write_sequnlock(&stat->lock);
+ }
+}
+
static int
i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages, unsigned int sizes)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_mm_swap_stat *stat = NULL;
struct drm_i915_gem_object *dst, *src;
- unsigned long start, diff, msec;
- bool blt_completed = false;
+ ktime_t start = ktime_get();
int err = -EINVAL;
GEM_BUG_ON(obj->swapto);
@@ -26,7 +41,6 @@ i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!i915->params.enable_eviction);
assert_object_held(obj);
- start = jiffies;
/* create a shadow object on smem region */
dst = i915_gem_object_create_shmem(i915, obj->base.size);
@@ -58,10 +72,14 @@ i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
if (i915->params.enable_eviction >= 2) {
err = i915_window_blt_copy(dst, src);
if (!err)
- blt_completed = true;
+ stat = &i915->mm.blt_swap_stats.out;
}
- if (err && i915->params.enable_eviction != 2)
+
+ if (err && i915->params.enable_eviction != 2) {
err = i915_gem_object_memcpy(dst, src);
+ if (!err)
+ stat = &i915->mm.memcpy_swap_stats.out;
+ }
__i915_gem_object_unpin_pages(src);
__i915_gem_object_unset_pages(src);
@@ -73,18 +91,7 @@ i915_gem_object_swapout_pages(struct drm_i915_gem_object *obj,
else
i915_gem_object_put(dst);
- if (!err) {
- diff = jiffies - start;
- msec = diff * 1000 / HZ;
- if (blt_completed) {
- atomic_long_add(sizes, &i915->num_bytes_swapped_out);
- atomic_long_add(msec, &i915->time_swap_out_ms);
- } else {
- atomic_long_add(sizes,
- &i915->num_bytes_swapped_out_memcpy);
- atomic_long_add(msec, &i915->time_swap_out_ms_memcpy);
- }
- }
+ __update_stat(stat, sizes >> PAGE_SHIFT, start);
return err;
}
@@ -94,9 +101,9 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages, unsigned int sizes)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct i915_mm_swap_stat *stat = NULL;
struct drm_i915_gem_object *dst, *src;
- unsigned long start, diff, msec;
- bool blt_completed = false;
+ ktime_t start = ktime_get();
int err = -EINVAL;
GEM_BUG_ON(!obj->swapto);
@@ -106,7 +113,6 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!i915->params.enable_eviction);
assert_object_held(obj);
- start = jiffies;
src = obj->swapto;
@@ -134,10 +140,14 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
if (i915->params.enable_eviction >= 2) {
err = i915_window_blt_copy(dst, src);
if (!err)
- blt_completed = true;
+ stat = &i915->mm.blt_swap_stats.in;
}
- if (err && i915->params.enable_eviction != 2)
+
+ if (err && i915->params.enable_eviction != 2) {
err = i915_gem_object_memcpy(dst, src);
+ if (!err)
+ stat = &i915->mm.memcpy_swap_stats.in;
+ }
__i915_gem_object_unpin_pages(dst);
__i915_gem_object_unset_pages(dst);
@@ -149,18 +159,7 @@ i915_gem_object_swapin_pages(struct drm_i915_gem_object *obj,
i915_gem_object_put(src);
}
- if (!err) {
- diff = jiffies - start;
- msec = diff * 1000 / HZ;
- if (blt_completed) {
- atomic_long_add(sizes, &i915->num_bytes_swapped_in);
- atomic_long_add(msec, &i915->time_swap_in_ms);
- } else {
- atomic_long_add(sizes,
- &i915->num_bytes_swapped_in_memcpy);
- atomic_long_add(msec, &i915->time_swap_in_ms_memcpy);
- }
- }
+ __update_stat(stat, sizes >> PAGE_SHIFT, start);
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 983030ac39e1..f06f900b598e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -359,12 +359,46 @@ static void print_context_stats(struct seq_file *m,
print_file_stats(m, "[k]contexts", kstats);
}
+static void
+evict_stat(struct seq_file *m,
+ const char *name,
+ const char *direction,
+ struct i915_mm_swap_stat *stat)
+{
+ unsigned long pages;
+ unsigned int seq;
+ u64 time, rate;
+ ktime_t ktime;
+
+ do {
+ seq = read_seqbegin(&stat->lock);
+ pages = stat->pages;
+ ktime = stat->time;
+ } while (read_seqretry(&stat->lock, seq));
+
+ time = ktime_to_us(ktime);
+ rate = time ? div64_u64((u64)pages * PAGE_SIZE, time) : 0;
+ rate = div64_ul(rate * USEC_PER_SEC, 1024 * 1024);
+
+ seq_printf(m, "%s swap %s %lu MiB in %llums, %llu MiB/s.\n",
+ name, direction, pages * PAGE_SIZE, ktime_to_ms(ktime),
+ rate);
+}
+
+static void
+evict_stats(struct seq_file *m,
+ const char *name,
+ struct i915_mm_swap_stats *stats)
+{
+ evict_stat(m, name, "in", &stats->in);
+ evict_stat(m, name, "out", &stats->out);
+}
+
static int i915_gem_object_info(struct seq_file *m, void *data)
{
struct drm_i915_private *i915 = node_to_i915(m->private);
struct intel_memory_region *mr;
enum intel_region_id id;
- u64 time, bytes, rate;
seq_printf(m, "%u shrinkable [%u free] objects, %llu bytes\n",
i915->mm.shrink_count,
@@ -374,41 +408,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
seq_printf(m, "%s: total:%pa, available:%pa bytes\n",
mr->name, &mr->total, &mr->avail);
- time = atomic_long_read(&i915->time_swap_out_ms);
- bytes = atomic_long_read(&i915->num_bytes_swapped_out);
- if (time)
- rate = div64_u64(bytes * 1000, time * 1024 * 1024);
- else
- rate = 0;
- seq_printf(m, "BLT: swapout %llu Bytes in %llu mSec(%llu MB/Sec)\n",
- bytes, time, rate);
-
- time = atomic_long_read(&i915->time_swap_in_ms);
- bytes = atomic_long_read(&i915->num_bytes_swapped_in);
- if (time)
- rate = div64_u64(bytes * 1000, time * 1024 * 1024);
- else
- rate = 0;
- seq_printf(m, "BLT: swapin %llu Bytes in %llu mSec(%llu MB/Sec)\n",
- bytes, time, rate);
-
- time = atomic_long_read(&i915->time_swap_out_ms_memcpy);
- bytes = atomic_long_read(&i915->num_bytes_swapped_out_memcpy);
- if (time)
- rate = div64_u64(bytes * 1000, time * 1024 * 1024);
- else
- rate = 0;
- seq_printf(m, "Memcpy: swapout %llu Bytes in %llu mSec(%llu MB/Sec)\n",
- bytes, time, rate);
-
- time = atomic_long_read(&i915->time_swap_in_ms_memcpy);
- bytes = atomic_long_read(&i915->num_bytes_swapped_in_memcpy);
- if (time)
- rate = div64_u64(bytes * 1000, time * 1024 * 1024);
- else
- rate = 0;
- seq_printf(m, "Memcpy: swapin %llu Bytes in %llu mSec(%llu MB/Sec)\n",
- bytes, time, rate);
+ evict_stats(m, "Blitter", &i915->mm.blt_swap_stats);
+ evict_stats(m, "Memcpy", &i915->mm.memcpy_swap_stats);
seq_putc(m, '\n');
print_context_stats(m, i915);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6f0ab363bdee..45511f2d8da0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -49,6 +49,7 @@
#include <linux/shmem_fs.h>
#include <linux/stackdepot.h>
#include <linux/xarray.h>
+#include <linux/seqlock.h>
#include <drm/intel-gtt.h>
#include <drm/drm_legacy.h> /* for struct drm_dma_handle */
@@ -548,6 +549,17 @@ struct intel_l3_parity {
int which_slice;
};
+struct i915_mm_swap_stat {
+ seqlock_t lock;
+ unsigned long pages;
+ ktime_t time;
+};
+
+struct i915_mm_swap_stats {
+ struct i915_mm_swap_stat in;
+ struct i915_mm_swap_stat out;
+};
+
struct i915_gem_mm {
/* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */
spinlock_t obj_lock;
@@ -601,6 +613,9 @@ struct i915_gem_mm {
/* To protect above two set of vmas */
wait_queue_head_t window_queue;
+
+ struct i915_mm_swap_stats blt_swap_stats;
+ struct i915_mm_swap_stats memcpy_swap_stats;
};
#define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
@@ -1220,16 +1235,6 @@ struct drm_i915_private {
* NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
* will be rejected. Instead look for a better place.
*/
-
- atomic_long_t num_bytes_swapped_out;
- atomic_long_t num_bytes_swapped_in;
- atomic_long_t time_swap_out_ms;
- atomic_long_t time_swap_in_ms;
-
- atomic_long_t num_bytes_swapped_out_memcpy;
- atomic_long_t num_bytes_swapped_in_memcpy;
- atomic_long_t time_swap_out_ms_memcpy;
- atomic_long_t time_swap_in_ms_memcpy;
};
static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85cbdb8e2bb8..e94f3f689b30 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1151,6 +1151,11 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
INIT_LIST_HEAD(&i915->mm.purge_list);
INIT_LIST_HEAD(&i915->mm.shrink_list);
+ seqlock_init(&i915->mm.blt_swap_stats.in.lock);
+ seqlock_init(&i915->mm.blt_swap_stats.out.lock);
+ seqlock_init(&i915->mm.memcpy_swap_stats.in.lock);
+ seqlock_init(&i915->mm.memcpy_swap_stats.out.lock);
+
i915_gem_init__objects(i915);
}
--
2.26.2
More information about the Intel-gfx
mailing list