[Intel-gfx] [PATCH i-g-t] i915/gem_mmap_gtt: Reset faster and longer to catch fencing errors

Mika Kuoppala mika.kuoppala at linux.intel.com
Thu Jan 24 15:25:10 UTC 2019


Chris Wilson <chris at chris-wilson.co.uk> writes:

> Performing a GPU reset clobbers the fence registers, affecting which
> addresses the tiled GTT mmap access. If the driver does not take
> precautions across a GPU reset, a client may read the wrong values (but
> only within their own buffer as the fence will only be degraded to
> I915_TILING_NONE, reducing the access area). However, as this requires
> performing a read using the indirect GTT at exactly the same time as the
> reset occurs, it can be quite difficult to catch, so repeat the test
> many times and across all cores simultaneously.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  tests/i915/gem_mmap_gtt.c | 99 +++++++++++++++++++++++++++------------
>  1 file changed, 68 insertions(+), 31 deletions(-)
>
> diff --git a/tests/i915/gem_mmap_gtt.c b/tests/i915/gem_mmap_gtt.c
> index f63535556..21880d31d 100644
> --- a/tests/i915/gem_mmap_gtt.c
> +++ b/tests/i915/gem_mmap_gtt.c
> @@ -38,6 +38,7 @@
>  #include "drm.h"
>  
>  #include "igt.h"
> +#include "igt_sysfs.h"
>  #include "igt_x86.h"
>  
>  #ifndef PAGE_SIZE
> @@ -375,50 +376,86 @@ test_clflush(int fd)
>  static void
>  test_hang(int fd)
>  {
> -	igt_hang_t hang;
> -	uint32_t patterns[] = {
> +	const uint32_t patterns[] = {
>  		0, 0xaaaaaaaa, 0x55555555, 0xcccccccc,
>  	};
> -	uint32_t *gtt[3];
> -	int last_pattern = 0;
> -	int next_pattern = 1;
> -	int i;
> +	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +	struct {
> +		bool done;
> +		bool error;
> +	} *control;
> +	unsigned long count;
> +	igt_hang_t hang;
> +	int dir;
>  
> -	for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
> -		uint32_t handle;
> +	hang = igt_allow_hang(fd, 0, 0);
> +	igt_sysfs_set_parameter(fd, "reset", "1"); /* global resets */

igt_assert to be sure that you made it?

igt_sysfs_set_module_parameter would be less misleadning :P

>  
> -		handle = gem_create(fd, OBJECT_SIZE);
> -		gem_set_tiling(fd, handle, i, 2048);
> +	control = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_assert(control != MAP_FAILED);
>  
> -		gtt[i] = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE);
> -		set_domain_gtt(fd, handle);
> -		gem_close(fd, handle);
> -	}
> +	igt_fork(child, ncpus) {
> +		int last_pattern = 0;
> +		int next_pattern = 1;
> +		uint32_t *gtt[2];

You throw tiling none out as it is just a distraction and
waste of cycles?

>  
> -	hang = igt_hang_ring(fd, I915_EXEC_RENDER);
> +		for (int i = 0; i < ARRAY_SIZE(gtt); i++) {
> +			uint32_t handle;
>  
> -	do {
> -		for (i = 0; i < OBJECT_SIZE / 64; i++) {
> -			int x = 16*i + (i%16);
> +			handle = gem_create(fd, OBJECT_SIZE);
> +			gem_set_tiling(fd, handle, I915_TILING_X + i, 2048);

You could have setup a priori. But this prolly is faster than
one reset cycle of tests so nothing to gain.

>  
> -			igt_assert(gtt[0][x] == patterns[last_pattern]);
> -			igt_assert(gtt[1][x] == patterns[last_pattern]);
> -			igt_assert(gtt[2][x] == patterns[last_pattern]);
> +			gtt[i] = gem_mmap__gtt(fd, handle, OBJECT_SIZE, PROT_WRITE);
> +			set_domain_gtt(fd, handle);
> +			gem_close(fd, handle);
> +		}
>  
> -			gtt[0][x] = patterns[next_pattern];
> -			gtt[1][x] = patterns[next_pattern];
> -			gtt[2][x] = patterns[next_pattern];
> +		while (!READ_ONCE(control->done)) {
> +			for (int i = 0; i < OBJECT_SIZE / 64; i++) {
> +				uint32_t expected = patterns[last_pattern];
> +				uint32_t found[2];
> +				int x = 16*i + (i%16);

nitpicking here for consts and unsigned x.

> +
> +				found[0] = READ_ONCE(gtt[0][x]);
> +				found[1] = READ_ONCE(gtt[1][x]);
> +
> +				if (found[0] != expected ||
> +				    found[1] != expected) {
> +					igt_warn("child[%d] found (%x, %x), expecting %x\n",
> +						 child,
> +						 found[0], found[1],
> +						 expected);
> +					control->error = true;
> +					exit(0);
> +				}
> +
> +				gtt[0][x] = patterns[next_pattern];
> +				gtt[1][x] = patterns[next_pattern];
> +			}
> +
> +			last_pattern = next_pattern;
> +			next_pattern = (next_pattern + 1) % ARRAY_SIZE(patterns);
>  		}
> +	}
>  
> -		last_pattern = next_pattern;
> -		next_pattern = (next_pattern + 1) % ARRAY_SIZE(patterns);
> -	} while (gem_bo_busy(fd, hang.spin->handle));

Well, no concern here anymore that something would sync on
there.

Only nitpicks so,
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>


> +	count = 0;
> +	dir = igt_debugfs_dir(fd);
> +	igt_until_timeout(5) {
> +		igt_sysfs_set(dir, "i915_wedged", "-1");
> +		if (READ_ONCE(control->error))
> +			break;
> +		count++;
> +	}
> +	close(dir);
> +	igt_info("%lu resets\n", count);
> +
> +	control->done = true;
> +	igt_waitchildren();
>  
> -	igt_post_hang_ring(fd, hang);
> +	igt_assert(!control->error);
> +	munmap(control, 4096);
>  
> -	munmap(gtt[0], OBJECT_SIZE);
> -	munmap(gtt[1], OBJECT_SIZE);
> -	munmap(gtt[2], OBJECT_SIZE);
> +	igt_disallow_hang(fd, hang);
>  }
>  
>  static int min_tile_width(uint32_t devid, int tiling)
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list