[Intel-gfx] [RFC 2/2] igt/gem_workarounds: igt to test workaround registers

Daniel Vetter daniel at ffwll.ch
Fri Aug 8 16:12:15 CEST 2014


On Fri, Aug 08, 2014 at 10:54:56AM +0100, arun.siluvery at linux.intel.com wrote:
> From: Arun Siluvery <arun.siluvery at linux.intel.com>
> 
> Some of the workarounds are lost followed by a gpu reset, suspend/resume;
> this patch adds a test which captures register state before and after
> the test scenario.
> 
> This test currently verifies only bdw workarounds.
> 
> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>

Some comments below.

> ---
>  lib/intel_reg.h         |   8 ++
>  tests/Makefile.sources  |   1 +
>  tests/gem_workarounds.c | 211 ++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 220 insertions(+)
>  create mode 100644 tests/gem_workarounds.c
> 
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index 86175bb..d015c36 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -3628,4 +3628,12 @@ typedef enum {
>  #define   GEN6_WIZ_HASHING_16x4			GEN6_WIZ_HASHING(1, 0)
>  #define   GEN6_WIZ_HASHING_MASK			(GEN6_WIZ_HASHING(1, 1) << 16)
>  
> +#define GAMTARBMODE			0x04a08
> +#define _3D_CHICKEN3			0x02090
> +#define GAM_ECOCHK			0x4090
> +#define CHICKEN_PAR1_1			0x42080
> +#define GEN7_FF_THREAD_MODE		0x20a0
> +#define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
> +#define GEN8_UCGCTL6			0x9430
> +
>  #endif /* _I810_REG_H */
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 0eb9369..a17acd1 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -134,6 +134,7 @@ TESTS_progs = \
>  	gem_unfence_active_buffers \
>  	gem_unref_active_buffers \
>  	gem_wait_render_timeout \
> +	gem_workarounds \
>  	gen3_mixed_blits \
>  	gen3_render_linear_blits \
>  	gen3_render_mixed_blits \
> diff --git a/tests/gem_workarounds.c b/tests/gem_workarounds.c
> new file mode 100644
> index 0000000..35d1aa7
> --- /dev/null
> +++ b/tests/gem_workarounds.c
> @@ -0,0 +1,211 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *  Arun Siluvery <arun.siluvery at linux.intel.com>
> + *
> + */
> +
> +#define _GNU_SOURCE
> +#include <stdbool.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
> +#include <time.h>
> +#include <signal.h>
> +
> +#include "ioctl_wrappers.h"
> +#include "drmtest.h"
> +#include "igt_debugfs.h"
> +#include "igt_aux.h"
> +#include "intel_chipset.h"
> +#include "intel_io.h"
> +
> +int drm_fd;
> +static drm_intel_bufmgr *bufmgr;
> +struct intel_batchbuffer *batch;
> +uint32_t devid;
> +
> +enum operation {
> +	GPU_RESET,
> +	SUSPEND_RESUME,

The suspend test doesn't seem to be wire up ...

Also I think it would be worth to have a module-reload version here too.

> +};
> +
> +struct workaround {
> +	const char *reg_name;
> +	uint32_t address;
> +};
> +
> +static struct workaround bdw_workarounds[] =
> +{
> +	{ "GEN8_ROW_CHICKEN", GEN8_ROW_CHICKEN },
> +	{ "GEN7_ROW_CHICKEN2", GEN7_ROW_CHICKEN2 },
> +	{ "HALF_SLICE_CHICKEN3", HALF_SLICE_CHICKEN3 },
> +	{ "GEN7_HALF_SLICE_CHICKEN1", GEN7_HALF_SLICE_CHICKEN1 },
> +	{ "COMMON_SLICE_CHICKEN2", COMMON_SLICE_CHICKEN2 },
> +	{ "HDC_CHICKEN0", HDC_CHICKEN0 },
> +	{ "GEN7_CACHE_MODE_1", GEN7_CACHE_MODE_1 },
> +	{ "GEN7_GT_MODE", GEN7_GT_MODE },
> +	{ "GAMTARBMODE", GAMTARBMODE },
> +	{ "_3D_CHICKEN3", _3D_CHICKEN3 },
> +	{ "GAM_ECOCHK", GAM_ECOCHK },
> +	{ "CHICKEN_PAR1_1", CHICKEN_PAR1_1 },
> +	{ "GEN7_FF_THREAD_MODE", GEN7_FF_THREAD_MODE },
> +	{ "GEN6_RC_SLEEP_PSMI_CONTROL", GEN6_RC_SLEEP_PSMI_CONTROL },
> +	{ "GEN8_UCGCTL6", GEN8_UCGCTL6 },
> +	{ "NULL", 0xFFFF },
> +};

Crazy idea I've just had to validate that all the w/a table here is
up-to-date with the one in the kernel:

- We create a special WA_REG macro in the kernel which we use to wrap all
  registers used in workarounds at the specific use-site (i.e. not in the
  header). So

  I951_WRITE(WA_REG(GEN8_ROW_CHICKEN), ....);

- That macro then adds the register to a table which we can dump through
  debugs with a file called intel_wa_registers. This happens at runtime.
  This is important since a static list over all platforms might included
  registers which hang some platforms when we read them.

- A special subtest in this test here compares the kernel-provided list
  with the one supplied here and makes sure that all the w/a in the kernel
  list are in the test list, too. Or we just ditch the test list here
  completely, but that might not work for special cases where we only need
  to check some masks ...

Opinions on this? Would this help with maintaining this testcase and
ensuring that it is always up-to-date with the kernel w/a list? I really
want to make sure we get this right, there's been way too many cases where
w/a settings have been lost over resume, runtime pm, ctx switches ...

> +
> +static void test_hang_gpu(void)
> +{
> +	int retry_count = 30;
> +	enum stop_ring_flags flags;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	struct drm_i915_gem_exec_object2 gem_exec;
> +	uint32_t b[2] = {MI_BATCH_BUFFER_END};
> +
> +	igt_assert(retry_count);
> +	igt_set_stop_rings(STOP_RING_DEFAULTS);
> +
> +	memset(&gem_exec, 0, sizeof(gem_exec));
> +	gem_exec.handle = gem_create(drm_fd, 4096);
> +	gem_write(drm_fd, gem_exec.handle, 0, b, sizeof(b));
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = (uintptr_t)&gem_exec;
> +	execbuf.buffer_count = 1;
> +	execbuf.batch_len = sizeof(b);
> +
> +	drmIoctl(drm_fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
> +
> +	while(retry_count--) {
> +		flags = igt_get_stop_rings();
> +		if (flags == 0)
> +			break;
> +		printf("gpu hang not yet cleared, retries left %d\n", retry_count);
> +		sleep(1);
> +	}
> +
> +	flags = igt_get_stop_rings();
> +	if (flags)
> +		igt_set_stop_rings(STOP_RING_NONE);
> +}
> +
> +static void test_suspend_resume(void)
> +{
> +	printf("Suspending the device ...\n");
> +	igt_system_suspend_autoresume();
> +}
> +
> +static void capture_wa_state(struct workaround *wa_regs, int num_wa,
> +			     unsigned int *reg_values)
> +{
> +	int i;
> +
> +	igt_assert(reg_values);
> +	intel_register_access_init(intel_get_pci_device(), 0);
> +
> +	for (i = 0; i < num_wa; ++i)
> +		reg_values[i] = intel_register_read(wa_regs[i].address);
> +
> +	intel_register_access_fini();
> +}
> +
> +static void check_workarounds(struct workaround *wa, enum operation op)
> +{
> +	int i;
> +	int num_wa = 0;
> +	unsigned int *before;
> +	unsigned int *after;
> +	bool fail = false;
> +
> +	while(wa[num_wa].address != 0xFFFF)
> +		num_wa++;
> +
> +	igt_assert(num_wa);
> +
> +	before = malloc(num_wa * sizeof(*before));
> +	memset(before, 0x00, num_wa * sizeof(*before));
> +	capture_wa_state(wa, num_wa, before);
> +
> +	switch (op) {
> +	case GPU_RESET:
> +		test_hang_gpu();
> +		break;
> +
> +	case SUSPEND_RESUME:
> +		test_suspend_resume();
> +		break;
> +
> +	default:
> +		fail = true;
> +		goto out;
> +	}
> +
> +	after = malloc(num_wa * sizeof(*after));
> +	memset(after, 0x00, num_wa * sizeof(*after));
> +	capture_wa_state(wa, num_wa, after);
> +
> +	for (i = 0; i < num_wa; ++i) {
> +		if (before[i] != after[i]) {
> +			fail = true;
> +			printf("%s workaround failed, before: 0x%08X, after: 0x%08X\n",
> +			       wa[i].reg_name, before[i], after[i]);
> +		}
> +	}
> +
> +	free(after);
> +
> +out:
> +	free(before);
> +
> +	igt_assert(fail == false);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	igt_subtest_init(argc, argv);
> +
> +	igt_fixture {
> +		drm_fd = drm_open_any();
> +
> +		bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
> +		devid = intel_get_drm_devid(drm_fd);
> +		batch = intel_batchbuffer_alloc(bufmgr, devid);
> +	}
> +
> +	igt_subtest("check-workaround-data-after-reset") {
> +		if (IS_BROADWELL(devid))

The logic here should be switched around, or at least if you don't have a
w/a table for a given platform we should skip the test. And tbh for
anything gen8+ we should fail it so that someone knows there's still work
to to.
> +			check_workarounds(&bdw_workarounds[0], GPU_RESET);

A simple

		else
			igt_skip_on("No w/a table found!\");

here should do the trick.

> +	}
> +
> +
> +	close(drm_fd);
> +	igt_exit();
> +}
> -- 
> 2.0.4
> 

> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx


-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list