[PATCH i-g-t 2/4] i915/gem_mmap_offset: Measure BAR sharing between cores

Thu Jan 26 10:20:38 UTC 2023

From: Chris Wilson <chris.p.wilson at linux.intel.com>

Zbigniew raised a concern that the mmap bandwidth degraded with the
number of cores contending on the BAR access. So let's plot how the
measured bandwidth varies as add CPU cores each trying to read a
different page of memory.

Signed-off-by: Chris Wilson <chris.p.wilson at linux.intel.com>
---
 tests/i915/gem_mmap_offset.c | 103 +++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/tests/i915/gem_mmap_offset.c b/tests/i915/gem_mmap_offset.c
index 962fc1b739..8399c15b21 100644
--- a/tests/i915/gem_mmap_offset.c
+++ b/tests/i915/gem_mmap_offset.c
@@ -174,6 +174,100 @@ static void perf(int i915, const struct gem_memory_region *r)
 #undef MiB
 }
 
+enum scaling_op {
+	CLEAR,
+	WRITE,
+	READ
+};
+
+static const char *scaling_op_str(enum scaling_op op)
+{
+	switch (op) {
+	case CLEAR: return "clear";
+	case WRITE: return "write";
+	case READ: return "read";
+	}
+
+	return "bad-scaling-op";
+}
+
+static void scaling(int i915, const struct gem_memory_region *r, enum scaling_op op)
+{
+#define MiB (1024 * 1024)
+	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	const unsigned int rep = 1024;
+	const uint64_t sz = 4096;
+	uint32_t handle;
+	char buf[sz];
+	uint64_t *result;
+
+	igt_require(ncpus >= 4);
+
+	result = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+
+	handle = gem_create_in_memory_region_list(i915, ncpus * sz, 0, &r->ci, 1);
+	make_resident(i915, 0, handle);
+
+	for_each_mmap_offset_type(i915, t) {
+		struct igt_covariance cv = {};
+		double m, b, R;
+		void *ptr;
+
+		ptr = __mmap_offset(i915, handle, 0, sz * ncpus,
+				    PROT_READ | PROT_WRITE,
+				    t->type);
+		if (!ptr)
+			continue;
+
+		for (int n = 1; n < ncpus; n++) {
+			uint64_t total;
+			igt_fork(child, n) {
+				void *map = ptr + child * sz;
+				struct timespec tv;
+
+				igt_gettime(&tv);
+				switch (op) {
+				case CLEAR:
+					for (int i = 0; i < rep; i++)
+						memset(map, 0, sz);
+					break;
+				case WRITE:
+					for (int i = 0; i < rep; i++)
+						memcpy(map, buf, sz);
+					break;
+				case READ:
+					for (int i = 0; i < rep; i++)
+						igt_memcpy_from_wc(buf, map, sz);
+					break;
+				}
+				result[child] = igt_nsec_elapsed(&tv);
+			}
+			igt_waitchildren();
+
+			total = 0;
+			for (int i = 0; i < n; i++)
+				total += result[i];
+			if ((n & (n - 1)) == 0)
+				igt_debug("%s[%d], total elapsed time:%.3fs, bw:%.1fMiB/s\n",
+					  t->name, n, total * 1e-9,
+					  1e9 * sz * n * rep / total / MiB);
+			igt_covariance_add(&cv, n, total);
+		}
+
+		munmap(ptr, sz * ncpus);
+
+		R = igt_covariance_get(&cv, &m, &b);
+		igt_info("%s: correlation:%.2f, %s bandwidth per-cpu:%.2fMiB/s\n",
+			 t->name, R, scaling_op_str(op), 1e9 * sz * rep / m / MiB);
+		igt_assert_f(R > 0.9,
+			     "mmap reads not linearly scaling with cores, correlation:%.3f\n", R);
+	}
+
+	gem_close(i915, handle);
+	munmap(result, 4096);
+#undef MiB
+}
+
 static void bad_object(int i915)
 {
 	uint32_t real_handle;
@@ -922,6 +1016,15 @@ igt_main
 		}
 	}
 
+	for (enum scaling_op op = CLEAR; op <= READ; op++) {
+		igt_subtest_with_dynamic_f("scaling-%s", scaling_op_str(op)) {
+			for_each_memory_region(r, i915) {
+				igt_dynamic_f("%s", r->name)
+					scaling(i915, r, op);
+			}
+		}
+	}
+
 	igt_subtest_f("blt-coherency")
 		blt_coherency(i915);
 
-- 
2.34.1