[igt-dev] [PATCH i-g-t 2/3] tests: add slice power programming test

Wed Apr 25 11:48:27 UTC 2018

Verifies that the kernel programs slices correctly based by reading
the value of PWR_CLK_STATE register or MI_SET_PREDICATE on platforms
before Cannonlake.

v2: Add subslice tests (Lionel)
    Use MI_SET_PREDICATE for further verification when available (Lionel)

v3: Rename to gem_ctx_rpcs (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 tests/Makefile.sources |   1 +
 tests/gem_ctx_rpcs.c   | 468 +++++++++++++++++++++++++++++++++++++++++
 tests/meson.build      |   1 +
 3 files changed, 470 insertions(+)
 create mode 100644 tests/gem_ctx_rpcs.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 791e4f83..cd4b1da1 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -60,6 +60,7 @@ TESTS_progs = \
 	gem_ctx_exec \
 	gem_ctx_isolation \
 	gem_ctx_param \
+	gem_ctx_rpcs \
 	gem_ctx_switch \
 	gem_ctx_thrash \
 	gem_double_irq_loop \
diff --git a/tests/gem_ctx_rpcs.c b/tests/gem_ctx_rpcs.c
new file mode 100644
index 00000000..653c2f72
--- /dev/null
+++ b/tests/gem_ctx_rpcs.c
@@ -0,0 +1,468 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Lionel Landwerlin <lionel.g.landwerlin at intel.com>
+ *
+ */
+
+#include "igt.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/wait.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("Test context powergating programming.");
+
+#define MI_STORE_REGISTER_MEM (0x24 << 23)
+
+#define MI_SET_PREDICATE      (0x1 << 23)
+#define  MI_SET_PREDICATE_NOOP_NEVER (0)
+#define  MI_SET_PREDICATE_1_SLICES   (5)
+#define  MI_SET_PREDICATE_2_SLICES   (6)
+#define  MI_SET_PREDICATE_3_SLICES   (7)
+
+#define MI_COPY_MEM_MEM       (0x2e << 23 | 3)
+#define  MI_COPY_MEM_MEM_GTT_SRC     (1 << 22)
+#define  MI_COPY_MEM_MEM_GTT_DST     (1 << 21)
+
+#define GEN8_R_PWR_CLK_STATE		0x20C8
+#define   GEN8_RPCS_ENABLE		(1 << 31)
+#define   GEN8_RPCS_S_CNT_ENABLE	(1 << 18)
+#define   GEN8_RPCS_S_CNT_SHIFT		15
+#define   GEN8_RPCS_S_CNT_MASK		(0x7 << GEN8_RPCS_S_CNT_SHIFT)
+#define   GEN8_RPCS_SS_CNT_ENABLE	(1 << 11)
+#define   GEN8_RPCS_SS_CNT_SHIFT	8
+#define   GEN8_RPCS_SS_CNT_MASK		(0x7 << GEN8_RPCS_SS_CNT_SHIFT)
+#define   GEN8_RPCS_EU_MAX_SHIFT	4
+#define   GEN8_RPCS_EU_MAX_MASK		(0xf << GEN8_RPCS_EU_MAX_SHIFT)
+#define   GEN8_RPCS_EU_MIN_SHIFT	0
+#define   GEN8_RPCS_EU_MIN_MASK		(0xf << GEN8_RPCS_EU_MIN_SHIFT)
+
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+
+#define PIPE_CONTROL     ((0x3<<29)|(0x3<<27)|(0x2<<24))
+#define   PIPE_CONTROL_WRITE_IMMEDIATE  (1<<14)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP  (3<<14)
+#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define   PIPE_CONTROL_WC_FLUSH (1<<12)
+#define   PIPE_CONTROL_IS_FLUSH (1<<11) /* MBZ on Ironlake */
+#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1)
+#define   PIPE_CONTROL_CS_STALL (1<<20)
+#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+
+static int drm_fd;
+static int devid;
+static uint64_t device_slice_mask = 0;
+static uint64_t device_subslice_mask = 0;
+static uint32_t device_slice_count = 0;
+static uint32_t device_subslice_count = 0;
+
+static uint64_t mask_minus_one(uint64_t mask)
+{
+	int i;
+
+	for (i = 0; i < (sizeof(mask) * 8 - 1); i++) {
+		if ((1UL << i) & mask) {
+			return mask & ~(1UL << i);
+		}
+	}
+
+	igt_assert(!"reached");
+	return 0;
+}
+
+static uint64_t mask_minus(uint64_t mask, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		mask = mask_minus_one(mask);
+
+	return mask;
+}
+
+static uint32_t
+read_rpcs_reg(drm_intel_bufmgr *bufmgr,
+	      drm_intel_context *context,
+	      uint64_t engine,
+	      uint32_t expected_slices)
+{
+	struct intel_batchbuffer *batch;
+	drm_intel_bo *dst_bo;
+	uint32_t rpcs;
+	unsigned dwords = 3 + 3, relocs = 1 + 1;
+	int ret;
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	igt_assert(batch);
+
+	intel_batchbuffer_set_context(batch, context);
+
+	dst_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
+	igt_assert(dst_bo);
+
+	/* Clear destination buffer. */
+	ret = drm_intel_bo_map(dst_bo, true /* write enable */);
+	igt_assert_eq(ret, 0);
+	memset(dst_bo->virtual, 0, dst_bo->size);
+	drm_intel_bo_unmap(dst_bo);
+
+	/*
+	 * Prior to Gen10 we can use the predicate to further verify
+	 * that the hardware has been programmed correctly.
+	 */
+	if (expected_slices != 0 && intel_gen(devid) < 10) {
+		BEGIN_BATCH(dwords + 2, relocs);
+		OUT_BATCH(MI_SET_PREDICATE | (1 - 1) |
+			  (MI_SET_PREDICATE_1_SLICES + expected_slices - 1));
+	} else {
+		BEGIN_BATCH(dwords, relocs);
+	}
+
+	OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
+	OUT_BATCH(RCS_TIMESTAMP);
+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+	OUT_BATCH(MI_STORE_REGISTER_MEM | (4 - 2));
+	OUT_BATCH(GEN8_R_PWR_CLK_STATE);
+	OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 4);
+
+	if (expected_slices != 0 && intel_gen(devid) < 10)
+		OUT_BATCH(MI_SET_PREDICATE | (1 - 1) | MI_SET_PREDICATE_NOOP_NEVER);
+
+	ADVANCE_BATCH();
+
+	intel_batchbuffer_flush_on_ring(batch, engine);
+
+	drm_intel_bo_wait_rendering(dst_bo);
+
+	ret = drm_intel_bo_map(dst_bo, false /* write enable */);
+	igt_assert_eq(ret, 0);
+
+	uint32_t *ts = (uint32_t *) (dst_bo->virtual);
+
+	rpcs = *((uint32_t *) (dst_bo->virtual + 4));
+
+	igt_debug("rcs_timestamp=0x%x rpcs=0x%x\n", *ts, rpcs);
+
+	drm_intel_bo_unmap(dst_bo);
+
+	drm_intel_bo_unreference(dst_bo);
+
+	intel_batchbuffer_free(batch);
+
+	return rpcs;
+}
+
+
+static uint32_t
+read_slice_count(drm_intel_bufmgr *bufmgr,
+		 drm_intel_context *context,
+		 uint32_t expected_slice_count)
+{
+	return (read_rpcs_reg(bufmgr, context, I915_EXEC_RENDER,
+			      expected_slice_count) & GEN8_RPCS_S_CNT_MASK)
+		>> GEN8_RPCS_S_CNT_SHIFT;
+}
+
+static uint32_t
+read_subslice_count(drm_intel_bufmgr *bufmgr,
+		    drm_intel_context *context)
+{
+	return (read_rpcs_reg(bufmgr, context, I915_EXEC_RENDER, 0) & GEN8_RPCS_SS_CNT_MASK)
+		>> GEN8_RPCS_SS_CNT_SHIFT;
+}
+
+static void
+context_get_sseu_masks(drm_intel_context *context, uint64_t engine,
+		       uint32_t *slice_mask, uint32_t *subslice_mask)
+{
+	struct drm_i915_gem_context_param arg;
+	struct drm_i915_gem_context_param_sseu sseu;
+	uint32_t context_id;
+	int ret;
+
+	memset(&sseu, 0, sizeof(sseu));
+	sseu.flags = engine;
+
+	ret = drm_intel_gem_context_get_id(context, &context_id);
+	igt_assert_eq(ret, 0);
+
+	memset(&arg, 0, sizeof(arg));
+	arg.ctx_id = context_id;
+	arg.param = I915_CONTEXT_PARAM_SSEU;
+	arg.value = (uintptr_t) &sseu;
+
+	do_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
+
+	if (slice_mask)
+		*slice_mask = sseu.packed.slice_mask;
+	if (subslice_mask)
+		*subslice_mask = sseu.packed.subslice_mask;
+}
+
+static void
+context_set_slice_mask(drm_intel_context *context, uint64_t engine,
+		       uint32_t slice_mask)
+{
+	struct drm_i915_gem_context_param arg;
+	struct drm_i915_gem_context_param_sseu sseu;
+	uint32_t context_id;
+	int ret;
+
+	memset(&sseu, 0, sizeof(sseu));
+	sseu.flags = engine;
+
+	ret = drm_intel_gem_context_get_id(context, &context_id);
+	igt_assert_eq(ret, 0);
+
+	memset(&arg, 0, sizeof(arg));
+	arg.ctx_id = context_id;
+	arg.param = I915_CONTEXT_PARAM_SSEU;
+	arg.value = (uintptr_t) &sseu;
+
+	do_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
+
+	sseu.packed.slice_mask = slice_mask;
+
+	do_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
+}
+
+static void
+context_set_subslice_mask(drm_intel_context *context, uint64_t engine,
+			  uint32_t subslice_mask)
+{
+	struct drm_i915_gem_context_param arg;
+	struct drm_i915_gem_context_param_sseu sseu;
+	uint32_t context_id;
+	int ret;
+
+	memset(&sseu, 0, sizeof(sseu));
+	sseu.flags = engine;
+
+	ret = drm_intel_gem_context_get_id(context, &context_id);
+	igt_assert_eq(ret, 0);
+
+	memset(&arg, 0, sizeof(arg));
+	arg.ctx_id = context_id;
+	arg.param = I915_CONTEXT_PARAM_SSEU;
+	arg.value = (uintptr_t) &sseu;
+
+	do_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &arg);
+
+	sseu.packed.subslice_mask = subslice_mask;
+
+	do_ioctl(drm_fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &arg);
+}
+
+static void rpcs_slice_program_gt(uint64_t engine, uint32_t pg_slice_count)
+{
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *pg_contexts[2], *df_contexts[2];
+	uint32_t pg_slice_mask = mask_minus(device_slice_mask, pg_slice_count);
+	uint32_t slice_count = __builtin_popcount(pg_slice_mask);
+	uint32_t slice_mask;
+
+	igt_debug("Running with %i slices powergated\n", pg_slice_count);
+
+	bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 10 * 4096);
+	igt_assert(bufmgr);
+
+	pg_contexts[0] = drm_intel_gem_context_create(bufmgr);
+	pg_contexts[1] = drm_intel_gem_context_create(bufmgr);
+	df_contexts[0] = drm_intel_gem_context_create(bufmgr);
+	df_contexts[1] = drm_intel_gem_context_create(bufmgr);
+
+	context_set_slice_mask(pg_contexts[0], engine, pg_slice_mask);
+	context_set_slice_mask(pg_contexts[1], engine, pg_slice_mask);
+	context_set_slice_mask(df_contexts[0], engine, device_slice_mask);
+	context_set_slice_mask(df_contexts[1], engine, device_slice_mask);
+
+	for (int i = 0; i < ARRAY_SIZE(pg_contexts); i++) {
+		context_get_sseu_masks(pg_contexts[i], engine, &slice_mask, NULL);
+		igt_assert_eq(pg_slice_mask, slice_mask);
+	}
+
+	for (int i = 0; i < ARRAY_SIZE(df_contexts); i++) {
+		context_get_sseu_masks(df_contexts[i], engine, &slice_mask, NULL);
+		igt_assert_eq(device_slice_mask, slice_mask);
+	}
+
+	/*
+	 * Test false positives with predicates (only available on
+	 * before Gen10).
+	 */
+	if (intel_gen(devid) < 10) {
+		igt_assert_eq(0, read_slice_count(bufmgr, pg_contexts[0],
+						  device_slice_count));
+	}
+
+	igt_debug("pg_contexts:\n");
+	igt_assert_eq(slice_count, read_slice_count(bufmgr, pg_contexts[0],
+						    slice_count));
+	igt_assert_eq(slice_count, read_slice_count(bufmgr, pg_contexts[1],
+						    slice_count));
+	igt_assert_eq(slice_count, read_slice_count(bufmgr, pg_contexts[0],
+						    slice_count));
+	igt_assert_eq(slice_count, read_slice_count(bufmgr, pg_contexts[0],
+						    slice_count));
+
+	igt_debug("df_contexts:\n");
+	igt_assert_eq(device_slice_count, read_slice_count(bufmgr, df_contexts[0],
+							   device_slice_count));
+	igt_assert_eq(device_slice_count, read_slice_count(bufmgr, df_contexts[1],
+							   device_slice_count));
+	igt_assert_eq(device_slice_count, read_slice_count(bufmgr, df_contexts[0],
+							   device_slice_count));
+	igt_assert_eq(device_slice_count, read_slice_count(bufmgr, df_contexts[0],
+							   device_slice_count));
+
+	igt_debug("mixed:\n");
+	igt_assert_eq(slice_count, read_slice_count(bufmgr, pg_contexts[0],
+						    slice_count));
+
+	igt_assert_eq(device_slice_count, read_slice_count(bufmgr, df_contexts[0],
+							   device_slice_count));
+
+
+	for (int i = 0; i < ARRAY_SIZE(pg_contexts); i++)
+		drm_intel_gem_context_destroy(pg_contexts[i]);
+	for (int i = 0; i < ARRAY_SIZE(df_contexts); i++)
+		drm_intel_gem_context_destroy(df_contexts[i]);
+
+	drm_intel_bufmgr_destroy(bufmgr);
+}
+
+static void rpcs_subslice_program_gt(uint64_t engine, int pg_subslice_count)
+{
+	drm_intel_bufmgr *bufmgr;
+	drm_intel_context *context1, *context2;
+	uint32_t pg_subslice_mask =
+		mask_minus(device_subslice_mask, pg_subslice_count);
+	uint32_t subslice_count = __builtin_popcount(pg_subslice_mask);
+	uint32_t subslice_mask;
+
+	igt_debug("Running with %i subslices powergated\n", pg_subslice_count);
+
+	bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+	igt_assert(bufmgr);
+
+	context1 = drm_intel_gem_context_create(bufmgr);
+	igt_assert(context1);
+
+	context2 = drm_intel_gem_context_create(bufmgr);
+	igt_assert(context2);
+
+	context_set_subslice_mask(context1, engine, pg_subslice_mask);
+	context_set_subslice_mask(context2, engine, device_subslice_mask);
+
+	context_get_sseu_masks(context1, engine, NULL, &subslice_mask);
+	igt_assert_eq(pg_subslice_mask, subslice_mask);
+	context_get_sseu_masks(context2, engine, NULL, &subslice_mask);
+	igt_assert_eq(device_subslice_mask, subslice_mask);
+
+	igt_assert_eq(subslice_count, read_subslice_count(bufmgr, context1));
+	igt_assert_eq(device_subslice_count, read_subslice_count(bufmgr, context2));
+
+	context_set_subslice_mask(context1, engine, device_subslice_mask);
+	context_set_subslice_mask(context2, engine, pg_subslice_mask);
+
+	context_get_sseu_masks(context1, engine, NULL, &subslice_mask);
+	igt_assert_eq(device_subslice_mask, subslice_mask);
+	context_get_sseu_masks(context2, engine, NULL, &subslice_mask);
+	igt_assert_eq(pg_subslice_mask, subslice_mask);
+
+	igt_assert_eq(device_subslice_count, read_subslice_count(bufmgr, context1));
+	igt_assert_eq(subslice_count, read_subslice_count(bufmgr, context2));
+
+	drm_intel_gem_context_destroy(context1);
+	drm_intel_gem_context_destroy(context2);
+
+	drm_intel_bufmgr_destroy(bufmgr);
+}
+
+igt_main
+{
+	uint64_t engines[] = {
+		I915_EXEC_RENDER,
+		/* I915_EXEC_BSD, */
+		/* I915_EXEC_VEBOX, */
+	};
+	/* TODO: update these values for newer SKUs. */
+	int i, e, max_slices = 3, max_subslices = 3;
+	drm_i915_getparam_t gp;
+
+	igt_fixture {
+		/* Use drm_open_driver to verify device existence */
+		drm_fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(drm_fd);
+
+		devid = intel_get_drm_devid(drm_fd);
+
+		/* We can only program slice count from Gen8. */
+		igt_skip_on(intel_gen(devid) < 8);
+
+		gp.param = I915_PARAM_SLICE_MASK;
+		gp.value = (int *) &device_slice_mask;
+		do_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+		device_slice_count = __builtin_popcount(device_slice_mask);
+
+		gp.param = I915_PARAM_SUBSLICE_MASK;
+		gp.value = (int *) &device_subslice_mask;
+		do_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
+		device_subslice_count = __builtin_popcount(device_subslice_mask);
+	}
+
+	for (i = 1; i < max_slices; i++) {
+		igt_subtest_f("rpcs-%i-pg-slice-program-rcs", i) {
+			igt_require(device_slice_count > i);
+
+			for (e = 0; e < ARRAY_SIZE(engines); e++)
+				rpcs_slice_program_gt(engines[e], i);
+		}
+	}
+
+	for (i = 1; i < max_subslices; i++) {
+		igt_subtest_f("rpcs-%i-pg-subslice-program-rcs", i) {
+			igt_require(device_subslice_count > i);
+			/* Only available on some Atom platforms and Gen10+. */
+			igt_require(IS_BROXTON(devid) || IS_GEMINILAKE(devid) ||
+				    intel_gen(devid) >= 10);
+
+			for (e = 0; e < ARRAY_SIZE(engines); e++)
+				rpcs_subslice_program_gt(engines[e], i);
+		}
+	}
+
+	igt_fixture {
+		close(drm_fd);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 015afa47..6f0480d2 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -37,6 +37,7 @@ test_progs = [
 	'gem_ctx_exec',
 	'gem_ctx_isolation',
 	'gem_ctx_param',
+	'gem_ctx_rpcs',
 	'gem_ctx_switch',
 	'gem_ctx_thrash',
 	'gem_double_irq_loop',
-- 
2.17.0