[igt-dev] [PATCH i-g-t 2/4] lib: Remove duplications in gpu_fill used by media_spin

Thu Apr 26 11:11:14 UTC 2018

On Wed, Apr 25, 2018 at 02:49:02PM -0700, Daniele Ceraolo Spurio wrote:
> 
> 
> On 25/04/18 06:26, Katarzyna Dec wrote:
> > Let's remove duplications introduced by moving media_spin helper
> > functions to gpu_fill. These were mainly the same functions
> > as for Gen8 media/gpgpu fill. gen8_render_flush from media_spin
> > was replaced by gen7_render_flush. The only functions that were
> > left intact are gen8_emit_media_objects_spin and
> > gen8_spin_render_flush.
> 
> This is going to be a bit long since I have several comments that don't
> apply to a specific place in the code, apologies in advance :)
> 
> I can see gen8_spin_render_flush in the header but it is not implemented or
> called from anywhere. Did you miss something from your git commit or can we
> actually drop it? This isn't tied to any gen (it just does an execbuf) so I
> guess it is probably the latter.
> 
> gen8_emit_media_objects_spin is also very similar to what is in the inner
> loop of the gen7 version, maybe we can do something like:
> 
> void emit_media_object(struct intel_batchbuffer *batch,
> 			unsigned xoff, unsigned yoff)
> {
> 	OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
> 
> 	/* interface descriptor offset */
> 	OUT_BATCH(0);
> 
> 	/* without indirect data */
> 	OUT_BATCH(0);
> 	OUT_BATCH(0);
> 
> 	/* scoreboard */
> 	OUT_BATCH(0);
> 	OUT_BATCH(0);
> 
> 	/* inline data (xoffset, yoffset) */
> 	OUT_BATCH(xoff);
> 	OUT_BATCH(yoff);
> 
> 	if (AT_LEAST_GEN(batch->devid, 8) &&
> 	    !IS_CHERRYVIEW(batch->devid))
> 		gen8_emit_media_state_flush(batch);
> }
> 
> void
> gen7_emit_media_objects(struct intel_batchbuffer *batch,
> 			unsigned x, unsigned y,
> 			unsigned width, unsigned height)
> {
> 	int i, j;
> 
> 	for (i = 0; i < width / 16; i++)
> 		for (j = 0; j < height / 16; j++)
> 			emit_media_object(batch,
> 					  x + i * 16,
> 					  y + j * 16);
> }
> 
> And from the spin file just call emit_media_object(batch, 0, 0).
> I'm happy if you do this on top as an additional patch if you don't want to
> mess with this one.
> 
> Regarding remaining "spin" functions, I also see gen8_emit_vfe_state_spin
> and gen8_spin_curbe_buffer_data. Including the _spin one, there are 3
> variants of gen8_emit_vfe_state which we could probably unify by passing in
> some params, something like:
> 
> void
> gen8_emit_vfe_state(struct intel_batchbuffer *batch,
> 		uint32_t threads, uint32_t urb_entries,
> 		uint32_t urb_entry_size, uint32_t urb_alloc_size)
> {
> 	/* number of threads & urb entries */
> 	OUT_BATCH(threads << 16 | urb_entries << 8);
> 
> 	OUT_BATCH(0);
> 
> 	/* urb entry size & curbe size */
> 	OUT_BATCH(urb_entry_size << 16 | urb_alloc_size);
> 
> 	/* scoreboard */
> 	OUT_BATCH(0);
> 	OUT_BATCH(0);
> 	OUT_BATCH(0);
> }
> 
> Lastly, for gen8_spin_curbe_buffer_data the only 2 differences with
> gen7_fill_curbe_buffer_data are the type of the variable used (u8 vs u32)
> and the size of the allocation. Using u32 in both cases shouldn't be an
> issue and we can potentially pass the size in as a parameter, thus allowing
> us to drop another duplicated function.
> Both of those cleanups can also can go later in the series with new patches
> (possibly before the reformatting).
> 
> One final comment: in the media_spin file we now mix calls to "spin"
> functions and "fill" functions. For functions that are generic and not
> specific to media/gpgpu fill we can maybe drop the "fill" tag to avoid
> confusion. One more patch on the pile
> 
> Suggestions/ramblings aside, the changes in this patch LGTM.
> 
> Daniele
>
Thanks for such a long input :) These are really nice ideas to implement.
I will introduce them in separate patch on top of coding style changes to
avoid rebasing.

Kasia
> > 
> > Signed-off-by: Katarzyna Dec <katarzyna.dec at intel.com>
> > Cc: Lukasz Kalamarz <lukasz.kalamarz at intel.com>
> > Cc: Antonio Argenziano <antonio.argenziano at intel.com>
> > ---
> >   lib/gpu_fill.c   | 145 -------------------------------------------------------
> >   lib/gpu_fill.h   |  28 +----------
> >   lib/media_spin.c |  24 ++++-----
> >   3 files changed, 13 insertions(+), 184 deletions(-)
> > 
> > diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
> > index f4b33c05..f5fc61bb 100644
> > --- a/lib/gpu_fill.c
> > +++ b/lib/gpu_fill.c
> > @@ -351,18 +351,6 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> >   	OUT_BATCH(0xffffffff);
> >   }
> > -void
> > -gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
> > -{
> > -	int ret;
> > -
> > -	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
> > -	if (ret == 0)
> > -		ret = drm_intel_gem_bo_context_exec(batch->bo, NULL,
> > -						    batch_end, 0);
> > -	igt_assert_eq(ret, 0);
> > -}
> > -
> >   uint32_t
> >   gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
> >   			    uint32_t iters)
> > @@ -428,57 +416,6 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
> >   	return offset;
> >   }
> > -uint32_t
> > -gen8_spin_surface_state(struct intel_batchbuffer *batch,
> > -			struct igt_buf *buf,
> > -			uint32_t format,
> > -			int is_dst)
> > -{
> > -	struct gen8_surface_state *ss;
> > -	uint32_t write_domain, read_domain, offset;
> > -	int ret;
> > -
> > -	if (is_dst) {
> > -		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
> > -	} else {
> > -		write_domain = 0;
> > -		read_domain = I915_GEM_DOMAIN_SAMPLER;
> > -	}
> > -
> > -	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
> > -	offset = intel_batchbuffer_subdata_offset(batch, ss);
> > -
> > -	ss->ss0.surface_type = GEN8_SURFACE_2D;
> > -	ss->ss0.surface_format = format;
> > -	ss->ss0.render_cache_read_write = 1;
> > -	ss->ss0.vertical_alignment = 1; /* align 4 */
> > -	ss->ss0.horizontal_alignment = 1; /* align 4 */
> > -
> > -	if (buf->tiling == I915_TILING_X)
> > -		ss->ss0.tiled_mode = 2;
> > -	else if (buf->tiling == I915_TILING_Y)
> > -		ss->ss0.tiled_mode = 3;
> > -
> > -	ss->ss8.base_addr = buf->bo->offset;
> > -
> > -	ret = drm_intel_bo_emit_reloc(batch->bo,
> > -				intel_batchbuffer_subdata_offset(batch, ss) + 8 * 4,
> > -				buf->bo, 0,
> > -				read_domain, write_domain);
> > -	igt_assert_eq(ret, 0);
> > -
> > -	ss->ss2.height = igt_buf_height(buf) - 1;
> > -	ss->ss2.width  = igt_buf_width(buf) - 1;
> > -	ss->ss3.pitch  = buf->stride - 1;
> > -
> > -	ss->ss7.shader_chanel_select_r = 4;
> > -	ss->ss7.shader_chanel_select_g = 5;
> > -	ss->ss7.shader_chanel_select_b = 6;
> > -	ss->ss7.shader_chanel_select_a = 7;
> > -
> > -	return offset;
> > -}
> > -
> >   uint32_t
> >   gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size)
> >   {
> > @@ -511,65 +448,6 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *
> >   	return offset;
> >   }
> > -uint32_t
> > -gen8_spin_binding_table(struct intel_batchbuffer *batch,
> > -			struct igt_buf *dst)
> > -{
> > -	uint32_t *binding_table, offset;
> > -
> > -	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 64);
> > -	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
> > -
> > -	binding_table[0] = gen8_spin_surface_state(batch, dst,
> > -					GEN8_SURFACEFORMAT_R8_UNORM, 1);
> > -
> > -	return offset;
> > -}
> > -
> > -uint32_t
> > -gen8_spin_media_kernel(struct intel_batchbuffer *batch,
> > -		       const uint32_t kernel[][4],
> > -		       size_t size)
> > -{
> > -	uint32_t offset;
> > -
> > -	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
> > -
> > -	return offset;
> > -}
> > -
> > -uint32_t
> > -gen8_spin_interface_descriptor(struct intel_batchbuffer *batch,
> > -			       struct igt_buf *dst, const uint32_t spin_kernel[][4], size_t size)
> > -{
> > -	struct gen8_interface_descriptor_data *idd;
> > -	uint32_t offset;
> > -	uint32_t binding_table_offset, kernel_offset;
> > -
> > -	binding_table_offset = gen8_spin_binding_table(batch, dst);
> > -	kernel_offset = gen8_spin_media_kernel(batch, spin_kernel,
> > -					       sizeof(spin_kernel));
> > -
> > -	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
> > -	offset = intel_batchbuffer_subdata_offset(batch, idd);
> > -
> > -	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
> > -
> > -	idd->desc2.single_program_flow = 1;
> > -	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
> > -
> > -	idd->desc3.sampler_count = 0;      /* 0 samplers used */
> > -	idd->desc3.sampler_state_pointer = 0;
> > -
> > -	idd->desc4.binding_table_entry_count = 0;
> > -	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
> > -
> > -	idd->desc5.constant_urb_entry_read_offset = 0;
> > -	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
> > -
> > -	return offset;
> > -}
> > -
> >   void
> >   gen8_emit_state_base_address(struct intel_batchbuffer *batch)
> >   {
> > @@ -685,29 +563,6 @@ gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch)
> >   	OUT_BATCH(0);
> >   }
> > -void
> > -gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
> > -{
> > -	OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
> > -	OUT_BATCH(0);
> > -	/* curbe total data length */
> > -	OUT_BATCH(64);
> > -	/* curbe data start address, is relative to the dynamics base address */
> > -	OUT_BATCH(curbe_buffer);
> > -}
> > -
> > -void
> > -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
> > -				    uint32_t interface_descriptor)
> > -{
> > -	OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
> > -	OUT_BATCH(0);
> > -	/* interface descriptor data length */
> > -	OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
> > -	/* interface descriptor address, is relative to the dynamics base address */
> > -	OUT_BATCH(interface_descriptor);
> > -}
> > -
> >   void
> >   gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> >   		     unsigned x, unsigned y,
> > diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
> > index c6d9ffb3..f0f07661 100644
> > --- a/lib/gpu_fill.h
> > +++ b/lib/gpu_fill.h
> > @@ -89,7 +89,7 @@ gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> >   		     unsigned width, unsigned height);
> >   void
> > -gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
> > +gen8_spin_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
> >   uint32_t
> >   gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
> > @@ -101,28 +101,9 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
> >   			uint32_t format,
> >   			int is_dst);
> > -uint32_t
> > -gen8_spin_surface_state(struct intel_batchbuffer *batch,
> > -			struct igt_buf *buf,
> > -			uint32_t format,
> > -			int is_dst);
> > -
> >   uint32_t
> >   gen8_fill_interface_descriptor(struct intel_batchbuffer *batch, struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
> > -uint32_t
> > -gen8_spin_binding_table(struct intel_batchbuffer *batch,
> > -			struct igt_buf *dst);
> > -
> > -uint32_t
> > -gen8_spin_media_kernel(struct intel_batchbuffer *batch,
> > -		       const uint32_t kernel[][4],
> > -		       size_t size);
> > -
> > -uint32_t
> > -gen8_spin_interface_descriptor(struct intel_batchbuffer *batch,
> > -			       struct igt_buf *dst,  const uint32_t kernel[][4], size_t size);
> > -
> >   void
> >   gen8_emit_state_base_address(struct intel_batchbuffer *batch);
> > @@ -138,13 +119,6 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
> >   void
> >   gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch);
> > -void
> > -gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
> > -
> > -void
> > -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
> > -				    uint32_t interface_descriptor);
> > -
> >   void
> >   gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> >   		     unsigned x, unsigned y,
> > diff --git a/lib/media_spin.c b/lib/media_spin.c
> > index 72009908..16ea8483 100644
> > --- a/lib/media_spin.c
> > +++ b/lib/media_spin.c
> > @@ -81,7 +81,7 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
> >   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
> >   	curbe_buffer = gen8_spin_curbe_buffer_data(batch, spins);
> > -	interface_descriptor = gen8_spin_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> > +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> >   	igt_assert(batch->ptr < &batch->buffer[4095]);
> >   	/* media pipeline */
> > @@ -91,9 +91,9 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
> >   	gen8_emit_vfe_state_spin(batch);
> > -	gen8_emit_curbe_load(batch, curbe_buffer);
> > +	gen7_emit_curbe_load(batch, curbe_buffer);
> > -	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
> > +	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
> >   	gen8_emit_media_objects_spin(batch);
> > @@ -102,7 +102,7 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
> >   	batch_end = intel_batchbuffer_align(batch, 8);
> >   	igt_assert(batch_end < BATCH_STATE_SPLIT);
> > -	gen8_render_flush(batch, batch_end);
> > +	gen7_render_flush(batch, batch_end);
> >   	intel_batchbuffer_reset(batch);
> >   }
> > @@ -119,7 +119,7 @@ gen8lp_media_spinfunc(struct intel_batchbuffer *batch,
> >   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
> >   	curbe_buffer = gen8_spin_curbe_buffer_data(batch, spins);
> > -	interface_descriptor = gen8_spin_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> > +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> >   	igt_assert(batch->ptr < &batch->buffer[4095]);
> >   	/* media pipeline */
> > @@ -129,9 +129,9 @@ gen8lp_media_spinfunc(struct intel_batchbuffer *batch,
> >   	gen8_emit_vfe_state_spin(batch);
> > -	gen8_emit_curbe_load(batch, curbe_buffer);
> > +	gen7_emit_curbe_load(batch, curbe_buffer);
> > -	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
> > +	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
> >   	gen8lp_emit_media_objects_spin(batch);
> > @@ -140,7 +140,7 @@ gen8lp_media_spinfunc(struct intel_batchbuffer *batch,
> >   	batch_end = intel_batchbuffer_align(batch, 8);
> >   	igt_assert(batch_end < BATCH_STATE_SPLIT);
> > -	gen8_render_flush(batch, batch_end);
> > +	gen7_render_flush(batch, batch_end);
> >   	intel_batchbuffer_reset(batch);
> >   }
> > @@ -157,7 +157,7 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
> >   	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
> >   	curbe_buffer = gen8_spin_curbe_buffer_data(batch, spins);
> > -	interface_descriptor = gen8_spin_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> > +	interface_descriptor = gen8_fill_interface_descriptor(batch, dst, spin_kernel, sizeof(spin_kernel));
> >   	igt_assert(batch->ptr < &batch->buffer[4095]);
> >   	/* media pipeline */
> > @@ -172,9 +172,9 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
> >   	gen8_emit_vfe_state_spin(batch);
> > -	gen8_emit_curbe_load(batch, curbe_buffer);
> > +	gen7_emit_curbe_load(batch, curbe_buffer);
> > -	gen8_emit_interface_descriptor_load(batch, interface_descriptor);
> > +	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
> >   	gen8_emit_media_objects_spin(batch);
> > @@ -190,6 +190,6 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
> >   	batch_end = intel_batchbuffer_align(batch, 8);
> >   	igt_assert(batch_end < BATCH_STATE_SPLIT);
> > -	gen8_render_flush(batch, batch_end);
> > +	gen7_render_flush(batch, batch_end);
> >   	intel_batchbuffer_reset(batch);
> >   }
> >