[igt-dev] [PATCH i-g-t v3 01/12] lib: Fix off-by-one-page in 48b obj.flags

Mon Sep 5 17:19:18 UTC 2022

Hi,

On 2022-09-02 at 14:59:56 +0200, Zbigniew Kempczyński wrote:
> From: Chris Wilson <chris.p.wilson at linux.intel.com>
> 
> The kernel checks that the last byte of the object will fit inside the
> 32b GTT window unless the object is marked as being suitable for use
> with 48b addressing. However, the spinner only checked the start of the
> object which depending on the mix of size/alignment, could allow the
> object to straddle the 32b boundary and not be marked as 48b capable.
> 
> Always set 48b for all the objects where ppGTT merites.
> 
> In the process, there was one location where we failed to write the
> upper 32b of the address into the instruction.
> 
> Signed-off-by: Chris Wilson <chris.p.wilson at linux.intel.com>
> ---
>  lib/igt_dummyload.c | 63 ++++++++++++++++++++++-----------------------
>  1 file changed, 31 insertions(+), 32 deletions(-)
> 
> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
> index dc1bd51e08..17ae21f567 100644
> --- a/lib/igt_dummyload.c
> +++ b/lib/igt_dummyload.c
> @@ -100,12 +100,21 @@ emit_recursive_batch(igt_spin_t *spin,
>  	struct drm_i915_gem_execbuffer2 *execbuf;
>  	struct drm_i915_gem_exec_object2 *obj;
>  	unsigned int flags[GEM_MAX_ENGINES];
> +	unsigned int objflags = 0;
>  	unsigned int nengine;
>  	int fence_fd = -1;
> -	uint64_t addr, addr_scratch, ahnd = opts->ahnd, objflags = 0;
> +	uint64_t addr, addr_scratch, ahnd = opts->ahnd;
>  	uint32_t *cs;
>  	int i;
>  
> +	igt_assert(!(opts->ctx && opts->ctx_id));
> +
> +	r = memset(relocs, 0, sizeof(relocs));
> +	execbuf = memset(&spin->execbuf, 0, sizeof(spin->execbuf));
> +	execbuf->rsvd1 = opts->ctx ? opts->ctx->id : opts->ctx_id;
> +	execbuf->flags = I915_EXEC_NO_RELOC;
> +	obj = memset(spin->obj, 0, sizeof(spin->obj));
> +
>  	/*
>  	 * Pick a random location for our spinner et al.
>  	 *
> @@ -121,8 +130,12 @@ emit_recursive_batch(igt_spin_t *spin,
>  	 * that wrap.
>  	 */
>  
> +	addr = gem_aperture_size(fd) / 2;

imho this divide by 2 is a bit too early.

> +	if (addr >> 32)
> +		objflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +
>  	if (!ahnd) {
> -		addr = gem_aperture_size(fd) / 2;
> +		addr /= 2;

imho this is a bit too restrictive, that will use 1/8 of address
space, while the best will be from 1/4 ... 3/4

>  		if (addr >> 31)
>  			addr = 1u << 31;
>  		addr += random() % addr / 2;

Maybe here it should be:
		size = 1/2 * aperture;
  		addr += random() % size;

Regards,
Kamil

> @@ -131,8 +144,6 @@ emit_recursive_batch(igt_spin_t *spin,
>  		objflags |= EXEC_OBJECT_PINNED;
>  	}
>  
> -	igt_assert(!(opts->ctx && opts->ctx_id));
> -
>  	nengine = 0;
>  	if (opts->engine == ALL_ENGINES) {
>  		struct intel_execution_engine2 *engine;
> @@ -150,11 +161,6 @@ emit_recursive_batch(igt_spin_t *spin,
>  	}
>  	igt_require(nengine);
>  
> -	memset(relocs, 0, sizeof(relocs));
> -	execbuf = memset(&spin->execbuf, 0, sizeof(spin->execbuf));
> -	execbuf->flags = I915_EXEC_NO_RELOC;
> -	obj = memset(spin->obj, 0, sizeof(spin->obj));
> -
>  	obj[BATCH].handle =
>  		handle_create(fd, BATCH_SIZE, opts->flags, &spin->batch);
>  	if (!spin->batch) {
> @@ -175,9 +181,7 @@ emit_recursive_batch(igt_spin_t *spin,
>  							   BATCH_SIZE, 0,
>  							   ALLOC_STRATEGY_LOW_TO_HIGH);
>  	obj[BATCH].offset = CANONICAL(addr);
> -	obj[BATCH].flags |= objflags;
> -	if (obj[BATCH].offset >= (1ull << 32))
> -		obj[BATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +	obj[BATCH].flags = objflags;
>  
>  	addr += BATCH_SIZE;
>  
> @@ -192,27 +196,23 @@ emit_recursive_batch(igt_spin_t *spin,
>  
>  		obj[SCRATCH].handle = opts->dependency;
>  		obj[SCRATCH].offset = CANONICAL(addr_scratch);
> -		obj[SCRATCH].flags |= objflags;
> -		if (obj[SCRATCH].offset >= (1ull << 32))
> -			obj[SCRATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +		obj[SCRATCH].flags = objflags;
>  
>  		if (!(opts->flags & IGT_SPIN_SOFTDEP)) {
>  			obj[SCRATCH].flags |= EXEC_OBJECT_WRITE;
>  
>  			/* dummy write to dependency */
> -			r = &relocs[obj[BATCH].relocation_count++];
>  			r->presumed_offset = obj[SCRATCH].offset;
>  			r->target_handle = obj[SCRATCH].handle;
>  			r->offset = sizeof(uint32_t) * 1020;
>  			r->delta = 0;
>  			r->read_domains = I915_GEM_DOMAIN_RENDER;
>  			r->write_domain = I915_GEM_DOMAIN_RENDER;
> +			r++;
>  		}
>  
>  		execbuf->buffer_count++;
>  	} else if (opts->flags & IGT_SPIN_POLL_RUN) {
> -		r = &relocs[obj[BATCH].relocation_count++];
> -
>  		igt_assert(!opts->dependency);
>  
>  		if (gen == 4 || gen == 5) {
> @@ -244,6 +244,7 @@ emit_recursive_batch(igt_spin_t *spin,
>  								   ALLOC_STRATEGY_LOW_TO_HIGH);
>  		addr += 4096; /* guard page */
>  		obj[SCRATCH].offset = CANONICAL(addr);
> +		obj[SCRATCH].flags = objflags;
>  		addr += 4096;
>  
>  		igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0);
> @@ -253,10 +254,6 @@ emit_recursive_batch(igt_spin_t *spin,
>  		r->offset = sizeof(uint32_t) * 1;
>  		r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;
>  
> -		obj[SCRATCH].flags |= objflags;
> -		if (obj[SCRATCH].offset >= (1ull << 32))
> -			obj[SCRATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> -
>  		*cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
>  
>  		if (gen >= 8) {
> @@ -274,6 +271,7 @@ emit_recursive_batch(igt_spin_t *spin,
>  		*cs++ = 1;
>  
>  		execbuf->buffer_count++;
> +		r++;
>  	}
>  
>  	spin->handle = obj[BATCH].handle;
> @@ -314,8 +312,6 @@ emit_recursive_batch(igt_spin_t *spin,
>  	 * no matter how they modify it (from either the GPU or CPU).
>  	 */
>  	if (gen >= 8) { /* arbitrary cutoff between ring/execlists submission */
> -		r = &relocs[obj[BATCH].relocation_count++];
> -
>  		/*
>  		 * On Sandybridge+ the comparison is a strict greater-than:
>  		 * if the value at spin->condition is greater than BB_END,
> @@ -334,15 +330,17 @@ emit_recursive_batch(igt_spin_t *spin,
>  		r->offset = (cs + 2 - spin->batch) * sizeof(*cs);
>  		r->read_domains = I915_GEM_DOMAIN_COMMAND;
>  		r->delta = (spin->condition - spin->batch) * sizeof(*cs);
> +		igt_assert(r->delta < 4096);
>  
>  		*cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
>  		*cs++ = MI_BATCH_BUFFER_END;
>  		*cs++ = r->presumed_offset + r->delta;
> -		*cs++ = 0;
> +		*cs++ = r->presumed_offset >> 32;
> +
> +		r++;
>  	}
>  
>  	/* recurse */
> -	r = &relocs[obj[BATCH].relocation_count++];
>  	r->target_handle = obj[BATCH].handle;
>  	r->presumed_offset = obj[BATCH].offset;
>  	r->offset = (cs + 1 - spin->batch) * sizeof(*cs);
> @@ -363,11 +361,10 @@ emit_recursive_batch(igt_spin_t *spin,
>  		*cs = r->presumed_offset + r->delta;
>  		cs++;
>  	}
> -	obj[BATCH].relocs_ptr = to_user_pointer(relocs);
> +	r++;
>  
>  	execbuf->buffers_ptr =
>  	       	to_user_pointer(obj + (2 - execbuf->buffer_count));
> -	execbuf->rsvd1 = opts->ctx ? opts->ctx->id : opts->ctx_id;
>  
>  	if (opts->flags & IGT_SPIN_FENCE_OUT)
>  		execbuf->flags |= I915_EXEC_FENCE_OUT;
> @@ -382,14 +379,16 @@ emit_recursive_batch(igt_spin_t *spin,
>  		execbuf->rsvd2 = opts->fence;
>  	}
>  
> +	/* For allocator we have to rid of relocation_count */
> +	if (!ahnd) {
> +		obj[BATCH].relocs_ptr = to_user_pointer(relocs);
> +		obj[BATCH].relocation_count = r - relocs;
> +	}
> +
>  	for (i = 0; i < nengine; i++) {
>  		execbuf->flags &= ~ENGINE_MASK;
>  		execbuf->flags |= flags[i];
>  
> -		/* For allocator we have to rid of relocation_count */
> -		for (int j = 0; j < ARRAY_SIZE(spin->obj) && ahnd; j++)
> -			spin->obj[j].relocation_count = 0;
> -
>  		gem_execbuf_wr(fd, execbuf);
>  
>  		if (opts->flags & IGT_SPIN_FENCE_OUT) {
> -- 
> 2.34.1
>