[Intel-gfx] [RFC 2/2] igt/gem_workarounds: igt to test workaround registers

Siluvery, Arun arun.siluvery at linux.intel.com
Fri Aug 8 18:39:45 CEST 2014


On 08/08/2014 15:12, Daniel Vetter wrote:
> On Fri, Aug 08, 2014 at 10:54:56AM +0100, arun.siluvery at linux.intel.com wrote:
>> From: Arun Siluvery <arun.siluvery at linux.intel.com>
>>
>> Some of the workarounds are lost followed by a gpu reset, suspend/resume;
>> this patch adds a test which captures register state before and after
>> the test scenario.
>>
>> This test currently verifies only bdw workarounds.
>>
>> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
>
> Some comments below.
>
>> ---
>>   lib/intel_reg.h         |   8 ++
>>   tests/Makefile.sources  |   1 +
>>   tests/gem_workarounds.c | 211 ++++++++++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 220 insertions(+)
>>   create mode 100644 tests/gem_workarounds.c
>>
>> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
>> index 86175bb..d015c36 100644
>> --- a/lib/intel_reg.h
>> +++ b/lib/intel_reg.h
>> @@ -3628,4 +3628,12 @@ typedef enum {
>>   #define   GEN6_WIZ_HASHING_16x4			GEN6_WIZ_HASHING(1, 0)
>>   #define   GEN6_WIZ_HASHING_MASK			(GEN6_WIZ_HASHING(1, 1) << 16)
>>
>> +#define GAMTARBMODE			0x04a08
>> +#define _3D_CHICKEN3			0x02090
>> +#define GAM_ECOCHK			0x4090
>> +#define CHICKEN_PAR1_1			0x42080
>> +#define GEN7_FF_THREAD_MODE		0x20a0
>> +#define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
>> +#define GEN8_UCGCTL6			0x9430
>> +
>>   #endif /* _I810_REG_H */
>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>> index 0eb9369..a17acd1 100644
>> --- a/tests/Makefile.sources
>> +++ b/tests/Makefile.sources
>> @@ -134,6 +134,7 @@ TESTS_progs = \
>>   	gem_unfence_active_buffers \
>>   	gem_unref_active_buffers \
>>   	gem_wait_render_timeout \
>> +	gem_workarounds \
>>   	gen3_mixed_blits \
>>   	gen3_render_linear_blits \
>>   	gen3_render_mixed_blits \
>> diff --git a/tests/gem_workarounds.c b/tests/gem_workarounds.c
>> new file mode 100644
>> index 0000000..35d1aa7
>> --- /dev/null
>> +++ b/tests/gem_workarounds.c
>> @@ -0,0 +1,211 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *  Arun Siluvery <arun.siluvery at linux.intel.com>
>> + *
>> + */
>> +
>> +#define _GNU_SOURCE
>> +#include <stdbool.h>
>> +#include <unistd.h>
>> +#include <stdlib.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +#include <fcntl.h>
>> +#include <inttypes.h>
>> +#include <errno.h>
>> +#include <sys/stat.h>
>> +#include <sys/ioctl.h>
>> +#include <sys/mman.h>
>> +#include <time.h>
>> +#include <signal.h>
>> +
>> +#include "ioctl_wrappers.h"
>> +#include "drmtest.h"
>> +#include "igt_debugfs.h"
>> +#include "igt_aux.h"
>> +#include "intel_chipset.h"
>> +#include "intel_io.h"
>> +
>> +int drm_fd;
>> +static drm_intel_bufmgr *bufmgr;
>> +struct intel_batchbuffer *batch;
>> +uint32_t devid;
>> +
>> +enum operation {
>> +	GPU_RESET,
>> +	SUSPEND_RESUME,
>
> The suspend test doesn't seem to be wire up ...
>
> Also I think it would be worth to have a module-reload version here too.
>
Suspend/Resume is not working; device is not resuming even after the 
timer is elapsed. Do we know suspend/resume works correctly on nightly?

>> +};
>> +
>> +struct workaround {
>> +	const char *reg_name;
>> +	uint32_t address;
>> +};
>> +
>> +static struct workaround bdw_workarounds[] =
>> +{
>> +	{ "GEN8_ROW_CHICKEN", GEN8_ROW_CHICKEN },
>> +	{ "GEN7_ROW_CHICKEN2", GEN7_ROW_CHICKEN2 },
>> +	{ "HALF_SLICE_CHICKEN3", HALF_SLICE_CHICKEN3 },
>> +	{ "GEN7_HALF_SLICE_CHICKEN1", GEN7_HALF_SLICE_CHICKEN1 },
>> +	{ "COMMON_SLICE_CHICKEN2", COMMON_SLICE_CHICKEN2 },
>> +	{ "HDC_CHICKEN0", HDC_CHICKEN0 },
>> +	{ "GEN7_CACHE_MODE_1", GEN7_CACHE_MODE_1 },
>> +	{ "GEN7_GT_MODE", GEN7_GT_MODE },
>> +	{ "GAMTARBMODE", GAMTARBMODE },
>> +	{ "_3D_CHICKEN3", _3D_CHICKEN3 },
>> +	{ "GAM_ECOCHK", GAM_ECOCHK },
>> +	{ "CHICKEN_PAR1_1", CHICKEN_PAR1_1 },
>> +	{ "GEN7_FF_THREAD_MODE", GEN7_FF_THREAD_MODE },
>> +	{ "GEN6_RC_SLEEP_PSMI_CONTROL", GEN6_RC_SLEEP_PSMI_CONTROL },
>> +	{ "GEN8_UCGCTL6", GEN8_UCGCTL6 },
>> +	{ "NULL", 0xFFFF },
>> +};
>
> Crazy idea I've just had to validate that all the w/a table here is
> up-to-date with the one in the kernel:
>
> - We create a special WA_REG macro in the kernel which we use to wrap all
>    registers used in workarounds at the specific use-site (i.e. not in the
>    header). So
>
>    I951_WRITE(WA_REG(GEN8_ROW_CHICKEN), ....);
>
> - That macro then adds the register to a table which we can dump through
>    debugs with a file called intel_wa_registers. This happens at runtime.
>    This is important since a static list over all platforms might included
>    registers which hang some platforms when we read them.
>
> - A special subtest in this test here compares the kernel-provided list
>    with the one supplied here and makes sure that all the w/a in the kernel
>    list are in the test list, too. Or we just ditch the test list here
>    completely, but that might not work for special cases where we only need
>    to check some masks ...
>
> Opinions on this? Would this help with maintaining this testcase and
> ensuring that it is always up-to-date with the kernel w/a list? I really
> want to make sure we get this right, there's been way too many cases where
> w/a settings have been lost over resume, runtime pm, ctx switches ...
>
I will change the implementation to use this macro.
so in this case the table is updated before each use case (reset, 
suspend/resume, module reload etc)? Is it not sufficient to capture the 
state at the beginning? my understanding is the wa state should really 
stay the same and we compare the current state (eg after reset) to the 
one at the beginning rather than the state before reset.

I think it is easier to maintain if we completely remove the workaround 
list from igt itself, based on hardware macro can populate only those 
workarounds that are applicable but you mentioned that may not work for 
special cases, could you elaborate about these cases?

regards
Arun

>> +
>> +static void test_hang_gpu(void)
>> +{
>> +	int retry_count = 30;
>> +	enum stop_ring_flags flags;
>> +	struct drm_i915_gem_execbuffer2 execbuf;
>> +	struct drm_i915_gem_exec_object2 gem_exec;
>> +	uint32_t b[2] = {MI_BATCH_BUFFER_END};
>> +
>> +	igt_assert(retry_count);
>> +	igt_set_stop_rings(STOP_RING_DEFAULTS);
>> +
>> +	memset(&gem_exec, 0, sizeof(gem_exec));
>> +	gem_exec.handle = gem_create(drm_fd, 4096);
>> +	gem_write(drm_fd, gem_exec.handle, 0, b, sizeof(b));
>> +
>> +	memset(&execbuf, 0, sizeof(execbuf));
>> +	execbuf.buffers_ptr = (uintptr_t)&gem_exec;
>> +	execbuf.buffer_count = 1;
>> +	execbuf.batch_len = sizeof(b);
>> +
>> +	drmIoctl(drm_fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
>> +
>> +	while(retry_count--) {
>> +		flags = igt_get_stop_rings();
>> +		if (flags == 0)
>> +			break;
>> +		printf("gpu hang not yet cleared, retries left %d\n", retry_count);
>> +		sleep(1);
>> +	}
>> +
>> +	flags = igt_get_stop_rings();
>> +	if (flags)
>> +		igt_set_stop_rings(STOP_RING_NONE);
>> +}
>> +
>> +static void test_suspend_resume(void)
>> +{
>> +	printf("Suspending the device ...\n");
>> +	igt_system_suspend_autoresume();
>> +}
>> +
>> +static void capture_wa_state(struct workaround *wa_regs, int num_wa,
>> +			     unsigned int *reg_values)
>> +{
>> +	int i;
>> +
>> +	igt_assert(reg_values);
>> +	intel_register_access_init(intel_get_pci_device(), 0);
>> +
>> +	for (i = 0; i < num_wa; ++i)
>> +		reg_values[i] = intel_register_read(wa_regs[i].address);
>> +
>> +	intel_register_access_fini();
>> +}
>> +
>> +static void check_workarounds(struct workaround *wa, enum operation op)
>> +{
>> +	int i;
>> +	int num_wa = 0;
>> +	unsigned int *before;
>> +	unsigned int *after;
>> +	bool fail = false;
>> +
>> +	while(wa[num_wa].address != 0xFFFF)
>> +		num_wa++;
>> +
>> +	igt_assert(num_wa);
>> +
>> +	before = malloc(num_wa * sizeof(*before));
>> +	memset(before, 0x00, num_wa * sizeof(*before));
>> +	capture_wa_state(wa, num_wa, before);
>> +
>> +	switch (op) {
>> +	case GPU_RESET:
>> +		test_hang_gpu();
>> +		break;
>> +
>> +	case SUSPEND_RESUME:
>> +		test_suspend_resume();
>> +		break;
>> +
>> +	default:
>> +		fail = true;
>> +		goto out;
>> +	}
>> +
>> +	after = malloc(num_wa * sizeof(*after));
>> +	memset(after, 0x00, num_wa * sizeof(*after));
>> +	capture_wa_state(wa, num_wa, after);
>> +
>> +	for (i = 0; i < num_wa; ++i) {
>> +		if (before[i] != after[i]) {
>> +			fail = true;
>> +			printf("%s workaround failed, before: 0x%08X, after: 0x%08X\n",
>> +			       wa[i].reg_name, before[i], after[i]);
>> +		}
>> +	}
>> +
>> +	free(after);
>> +
>> +out:
>> +	free(before);
>> +
>> +	igt_assert(fail == false);
>> +}
>> +
>> +int main(int argc, char **argv)
>> +{
>> +	igt_subtest_init(argc, argv);
>> +
>> +	igt_fixture {
>> +		drm_fd = drm_open_any();
>> +
>> +		bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
>> +		devid = intel_get_drm_devid(drm_fd);
>> +		batch = intel_batchbuffer_alloc(bufmgr, devid);
>> +	}
>> +
>> +	igt_subtest("check-workaround-data-after-reset") {
>> +		if (IS_BROADWELL(devid))
>
> The logic here should be switched around, or at least if you don't have a
> w/a table for a given platform we should skip the test. And tbh for
> anything gen8+ we should fail it so that someone knows there's still work
> to to.
>> +			check_workarounds(&bdw_workarounds[0], GPU_RESET);
>
> A simple
>
> 		else
> 			igt_skip_on("No w/a table found!\");
>
> here should do the trick.
>
>> +	}
>> +
>> +
>> +	close(drm_fd);
>> +	igt_exit();
>> +}
>> --
>> 2.0.4
>>
>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>




More information about the Intel-gfx mailing list