[igt-dev] [PATCH i-g-t 2/4] lib: Remove duplications in gpu_fill used by media_spin

Wed Apr 25 21:49:02 UTC 2018

On 25/04/18 06:26, Katarzyna Dec wrote:
> Let's remove duplications introduced by moving media_spin helper
> functions to gpu_fill. These were mainly the same functions
> as for Gen8 media/gpgpu fill. gen8_render_flush from media_spin
> was replaced by gen7_render_flush. The only functions that were
> left intact are gen8_emit_media_objects_spin and
> gen8_spin_render_flush.

This is going to be a bit long since I have several comments that don't 
apply to a specific place in the code, apologies in advance :)

I can see gen8_spin_render_flush in the header but it is not implemented 
or called from anywhere. Did you miss something from your git commit or 
can we actually drop it? This isn't tied to any gen (it just does an 
execbuf) so I guess it is probably the latter.

gen8_emit_media_objects_spin is also very similar to what is in the 
inner loop of the gen7 version, maybe we can do something like:

void emit_media_object(struct intel_batchbuffer *batch,
			unsigned xoff, unsigned yoff)
{
	OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));

	/* interface descriptor offset */
	OUT_BATCH(0);

	/* without indirect data */
	OUT_BATCH(0);
	OUT_BATCH(0);

	/* scoreboard */
	OUT_BATCH(0);
	OUT_BATCH(0);

	/* inline data (xoffset, yoffset) */
	OUT_BATCH(xoff);
	OUT_BATCH(yoff);

	if (AT_LEAST_GEN(batch->devid, 8) &&
	    !IS_CHERRYVIEW(batch->devid))
		gen8_emit_media_state_flush(batch);
}

void
gen7_emit_media_objects(struct intel_batchbuffer *batch,
			unsigned x, unsigned y,
			unsigned width, unsigned height)
{
	int i, j;

	for (i = 0; i < width / 16; i++)
		for (j = 0; j < height / 16; j++)
			emit_media_object(batch,
					  x + i * 16,
					  y + j * 16);
}

And from the spin file just call emit_media_object(batch, 0, 0).
I'm happy if you do this on top as an additional patch if you don't want 
to mess with this one.

Regarding remaining "spin" functions, I also see 
gen8_emit_vfe_state_spin and gen8_spin_curbe_buffer_data. Including the 
_spin one, there are 3 variants of gen8_emit_vfe_state which we could 
probably unify by passing in some params, something like:

void
gen8_emit_vfe_state(struct intel_batchbuffer *batch,
		uint32_t threads, uint32_t urb_entries,
		uint32_t urb_entry_size, uint32_t urb_alloc_size)
{
	/* number of threads & urb entries */
	OUT_BATCH(threads << 16 | urb_entries << 8);

	OUT_BATCH(0);

	/* urb entry size & curbe size */
	OUT_BATCH(urb_entry_size << 16 | urb_alloc_size);

	/* scoreboard */
	OUT_BATCH(0);
	OUT_BATCH(0);
	OUT_BATCH(0);
}

Lastly, for gen8_spin_curbe_buffer_data the only 2 differences with 
gen7_fill_curbe_buffer_data are the type of the variable used (u8 vs 
u32) and the size of the allocation. Using u32 in both cases shouldn't 
be an issue and we can potentially pass the size in as a parameter, thus 
allowing us to drop another duplicated function.
Both of those cleanups can also can go later in the series with new 
patches (possibly before the reformatting).

One final comment: in the media_spin file we now mix calls to "spin" 
functions and "fill" functions. For functions that are generic and not 
specific to media/gpgpu fill we can maybe drop the "fill" tag to avoid 
confusion. One more patch on the pile

Suggestions/ramblings aside, the changes in this patch LGTM.

Daniele

> 
> Signed-off-by: Katarzyna Dec <katarzyna.dec at intel.com>
> Cc: Lukasz Kalamarz <lukasz.kalamarz at intel.com>
> Cc: Antonio Argenziano <antonio.argenziano at intel.com>
> ---
>   lib/gpu_fill.c   | 145 -------------------------------------------------------
>   lib/gpu_fill.h   |  28 +----------
>   lib/media_spin.c |  24 ++++-----
>   3 files changed, 13 insertions(+), 184 deletions(-)
> 
> diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
> index f4b33c05..f5fc61bb 100644
> --- a/lib/gpu_fill.c
> +++ b/lib/gpu_fill.c
> @@ -351,18 +351,6 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>   	OUT_BATCH(0xffffffff);
>   }
>   
> -void
> -gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
> -{
> -	int ret;
> -
> -	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
> -	if (ret == 0)
> -		ret = drm_intel_gem_bo_context_exec(batch->bo, NULL,
> -						    batch_end, 0);
> -	igt_assert_eq(ret, 0);
> -}
> -
>   uint32_t
>   gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
>   			    uint32_t iters)
> @@ -428,57 +416,6 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
>   	return offset;
>   }
>   
> -uint32_t
> -gen8_spin_surface_state(struct intel_batchbuffer *batch,
> -			struct igt_buf *buf,
> -			uint32_t format,
> -			int is_dst)
> -{
> -	struct gen8_surface_state *ss;
> -	uint32_t write_domain, read_domain, offset;
> -	int ret;
> -
> -	if (is_dst) {
> -		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
> -	} else {
> -		write_domain = 0;
> -		read_domain = I915_GEM_DOMAIN_SAMPLER;
> -	}
> -
> -	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> -	offset = intel_batchbuffer_subdata_offset(batch, ss);
> -
> -	ss->ss0.surface_type = GEN8_SURFACE_2D;
> -	ss->ss0.surface_format = format;
> -	ss->ss0.render_cache_read_write = 1;
> -	ss->ss0.vertical_alignment = 1; /* align 4 */
> -	ss->ss0.horizontal_alignment = 1; /* align 4 */
> -
> -	if (buf->tiling == I915_TILING_X)
> -		ss->ss0.tiled_mode = 2;
> -	else if (buf->tiling == I915_TILING_Y)
> -		ss->ss0.tiled_mode = 3;
> -
> -	ss->ss8.base_addr = buf->bo->offset;
> -
> -	ret = drm_intel_bo_emit_reloc(batch->bo,
> -				intel_batchbuffer_subdata_offset(batch, ss) + 8 * 4,
> -				buf->bo, 0,
> -				read_domain, write_domain);
> -	igt_assert_eq(ret, 0);
> -
> -	ss->ss2.height = igt_buf_height(buf) - 1;
> -	ss->ss2.width  = igt_buf_width(buf) - 1;
> -	ss->ss3.pitch  = buf->stride - 1;
> -
> -	ss->ss7.shader_chanel_select_r = 4;
> -	ss->ss7.shader_chanel_select_g = 5;
> -	ss->ss7.shader_chanel_select_b = 6;
> -	ss->ss7.shader_chanel_select_a = 7;
> -
> -	return offset;
> -}
> -
>   uint32_t
>   gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
>   {
> @@ -511,65 +448,6 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
>   	return offset;
>   }
>   
> -uint32_t
> -gen8_spin_binding_table(struct intel_batchbuffer *batch,
> -			struct igt_buf *dst)
> -{
> -	uint32_t *binding_table, offset;
> -
> -	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 64);
> -	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
> -
> -	binding_table[0] = gen8_spin_surface_state(batch, dst,
> -					GEN8_SURFACEFORMAT_R8_UNORM, 1);
> -
> -	return offset;
> -}
> -
> -uint32_t
> -gen8_spin_media_kernel(struct intel_batchbuffer *batch,
> -		       const uint32_t kernel[][4],
> -		       size_t size)
> -{
> -	uint32_t offset;
> -
> -	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
> -
> -	return offset;
> -}
> -
> -uint32_t
> -gen8_spin_interface_descriptor(struct intel_batchbuffer *batch,
> -			       struct igt_buf *dst, const uint32_t spin_kernel[][4], size_t size)
> -{
> -	struct gen8_interface_descriptor_data *idd;
> -	uint32_t offset;
> -	uint32_t binding_table_offset, kernel_offset;
> -
> -	binding_table_offset = gen8_spin_binding_table(batch, dst);
> -	kernel_offset = gen8_spin_media_kernel(batch, spin_kernel,
> -					       sizeof(spin_kernel));
> -
> -	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
> -	offset = intel_batchbuffer_subdata_offset(batch, idd);
> -
> -	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
> -
> -	idd->desc2.single_program_flow = 1;
> -	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
> -
> -	idd->desc3.sampler_count = 0;      /* 0 samplers used */
> -	idd->desc3.sampler_state_pointer = 0;
> -
> -	idd->desc4.binding_table_entry_count = 0;
> -	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
> -
> -	idd->desc5.constant_urb_entry_read_offset = 0;
> -	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
> -
> -	return offset;
> -}
> -
>   void
>   gen8_emit_state_base_address(struct intel_batchbuffer *batch)
>   {
> @@ -685,29 +563,6 @@ gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch)
>   	OUT_BATCH(0);
>   }
>   
> -void
> -gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
> -{
> -	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
> -	OUT_BATCH(0);
> -	/* curbe total data length */
> -	OUT_BATCH(64);
> -	/* curbe data start address, is relative to the dynamics base address */
> -	OUT_BATCH(curbe_buffer);
> -}
> -
> -void
> -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
> -				    uint32_t interface_descriptor)
> -{
> -	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
> -	OUT_BATCH(0);
> -	/* interface descriptor data length */
> -	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
> -	/* interface descriptor address, is relative to the dynamics base address */
> -	OUT_BATCH(interface_descriptor);
> -}
> -
>   void
>   gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>   		     unsigned x, unsigned y,
> diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
> index c6d9ffb3..f0f07661 100644
> --- a/lib/gpu_fill.h
> +++ b/lib/gpu_fill.h
> @@ -89,7 +89,7 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>   		     unsigned width, unsigned height);
>   
>   void
> -gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
> +gen8_spin_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
>   
>   uint32_t
>   gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
> @@ -101,28 +101,9 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
>   			uint32_t format,
>   			int is_dst);
>   
> -uint32_t
> -gen8_spin_surface_state(struct intel_batchbuffer *batch,
> -			struct igt_buf *buf,
> -			uint32_t format,
> -			int is_dst);
> -
>   uint32_t
>   gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
>   
> -uint32_t
> -gen8_spin_binding_table(struct intel_batchbuffer *batch,
> -			struct igt_buf *dst);
> -
> -uint32_t
> -gen8_spin_media_kernel(struct intel_batchbuffer *batch,
> -		       const uint32_t kernel[][4],
> -		       size_t size);
> -
> -uint32_t
> -gen8_spin_interface_descriptor(struct intel_batchbuffer *batch,
> -			       struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
> -
>   void
>   gen8_emit_state_base_address(struct intel_batchbuffer *batch);
>   
> @@ -138,13 +119,6 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
>   void
>   gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch);
>   
> -void
> -gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
> -
> -void
> -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
> -				    uint32_t interface_descriptor);
> -
>   void
>   gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>   		     unsigned x, unsigned y,
> diff --git a/lib/media_spin.c b/lib/media_spin.c
> index 72009908..16ea8483 100644
> --- a/lib/media_spin.c
> +++ b/lib/media_spin.c
> @@ -81,7 +81,7 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
>   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>   
>   	curbe_buffer = gen8_spin_curbe_buffer_data(batch, spins);
> -	interface_descriptor = gen8_spin_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
>   	igt_assert(batch->ptr < &batch->buffer[4095]);
>   
>   	/* media pipeline */
> @@ -91,9 +91,9 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
>   
>   	gen8_emit_vfe_state_spin(batch);
>   
> -	gen8_emit_curbe_load(batch, curbe_buffer);
> +	gen7_emit_curbe_load(batch, curbe_buffer);
>   
> -	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
> +	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   
>   	gen8_emit_media_objects_spin(batch);
>   
> @@ -102,7 +102,7 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
>   	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
> -	gen8_render_flush(batch, batch_end);
> +	gen7_render_flush(batch, batch_end);
>   	intel_batchbuffer_reset(batch);
>   }
>   
> @@ -119,7 +119,7 @@ gen8lp_media_spinfunc(struct intel_batchbuffer *batch,
>   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>   
>   	curbe_buffer = gen8_spin_curbe_buffer_data(batch, spins);
> -	interface_descriptor = gen8_spin_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
>   	igt_assert(batch->ptr < &batch->buffer[4095]);
>   
>   	/* media pipeline */
> @@ -129,9 +129,9 @@ gen8lp_media_spinfunc(struct intel_batchbuffer *batch,
>   
>   	gen8_emit_vfe_state_spin(batch);
>   
> -	gen8_emit_curbe_load(batch, curbe_buffer);
> +	gen7_emit_curbe_load(batch, curbe_buffer);
>   
> -	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
> +	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   
>   	gen8lp_emit_media_objects_spin(batch);
>   
> @@ -140,7 +140,7 @@ gen8lp_media_spinfunc(struct intel_batchbuffer *batch,
>   	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
> -	gen8_render_flush(batch, batch_end);
> +	gen7_render_flush(batch, batch_end);
>   	intel_batchbuffer_reset(batch);
>   }
>   
> @@ -157,7 +157,7 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
>   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>   
>   	curbe_buffer = gen8_spin_curbe_buffer_data(batch, spins);
> -	interface_descriptor = gen8_spin_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
>   	igt_assert(batch->ptr < &batch->buffer[4095]);
>   
>   	/* media pipeline */
> @@ -172,9 +172,9 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
>   
>   	gen8_emit_vfe_state_spin(batch);
>   
> -	gen8_emit_curbe_load(batch, curbe_buffer);
> +	gen7_emit_curbe_load(batch, curbe_buffer);
>   
> -	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
> +	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   
>   	gen8_emit_media_objects_spin(batch);
>   
> @@ -190,6 +190,6 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
>   	batch_end = intel_batchbuffer_align(batch, 8);
>   	igt_assert(batch_end < BATCH_STATE_SPLIT);
>   
> -	gen8_render_flush(batch, batch_end);
> +	gen7_render_flush(batch, batch_end);
>   	intel_batchbuffer_reset(batch);
>   }
>