[igt-dev] [PATCH i-g-t v2 4/5] lib/gpu_fill: Further code unification in gpu_fill

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Fri Apr 27 17:19:45 UTC 2018



On 27/04/18 03:03, Katarzyna Dec wrote:
> We can unify gen7_emit_vfe_state and gen8_emit_vfe_state
> functions for gpgpu/media_fill and media_spin by adding
> parameters. gen8_emit_media_object was renamed to gen_*
> and extended with additional offset parameters - we can
> have one gen7_emit_media_objects for all tests.
> I have renamed gen8_emit_media_object to gen_emit_*, because
> funtion belongs to all gens and it would be odd to have
> all named genX_* and only one without this prefix.
> 
> Signed-off-by: Katarzyna Dec <katarzyna.dec at intel.com>
> Cc: Lukasz Kalamarz <lukasz.kalamarz at intel.com>
> Cc: Antonio Argenziano <antonio.argenziano at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
>   lib/gpgpu_fill.c      |  26 ++++++++--
>   lib/gpu_fill.c        | 129 +++++++++-----------------------------------------
>   lib/gpu_fill.h        |  20 ++++----
>   lib/media_fill_gen7.c |  10 +++-
>   lib/media_fill_gen8.c |   8 +++-
>   lib/media_fill_gen9.c |   8 +++-
>   lib/media_spin.c      |  25 ++++++++--
>   7 files changed, 97 insertions(+), 129 deletions(-)
> 
> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> index 52925a5c..3b89dd1b 100644
> --- a/lib/gpgpu_fill.c
> +++ b/lib/gpgpu_fill.c
> @@ -106,6 +106,13 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen7_emit_vfe_state. */
> +	uint32_t threads = 1;
> +	uint32_t urb_entries = 0;
> +	uint32_t urb_size = 0;
> +	uint32_t curbe_size = 1;
> +	uint32_t mode = 1;
> +

Those are mostly the same across the various functions. Maybe we can 
just use global defines with some comments, something like:

/* VFE STATE param */
#define THREADS 1
#define GEN7_URB_ENTRIES 0
#define GEN8_URB_ENTRIES 1
#define URB_SIZE 0   	/* size of 1 entry in 256 bits unit */
#define CURBE_SIZE 1 	/* in 256 bits unit */
#define GEN7_VFE_STATE_GPGPU_MODE 1

>   	intel_batchbuffer_flush(batch);
>   
>   	/* setup states */
> @@ -129,7 +136,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
>   
>   	gen7_emit_state_base_address(batch);
> -	gen7_emit_vfe_state_gpgpu(batch);
> +	gen7_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size,
> +			    mode);
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   	gen7_emit_gpgpu_walk(batch, x, y, width, height);
> @@ -153,6 +161,12 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen8_emit_vfe_state. */
> +	uint32_t threads = 1;
> +	uint32_t urb_entries = 1;
> +	uint32_t urb_size = 0;
> +	uint32_t curbe_size = 1;
> +
>   	intel_batchbuffer_flush(batch);
>   
>   	/* setup states */
> @@ -176,7 +190,7 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
>   
>   	gen8_emit_state_base_address(batch);
> -	gen8_emit_vfe_state_gpgpu(batch);
> +	gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   	gen8_emit_gpgpu_walk(batch, x, y, width, height);
> @@ -200,6 +214,12 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen9_emit_vfe_state. */
> +	uint32_t threads = 1;
> +	uint32_t urb_entries = 1;
> +	uint32_t urb_size = 0;
> +	uint32_t curbe_size = 1;
> +
>   	intel_batchbuffer_flush(batch);
>   
>   	/* setup states */
> @@ -224,7 +244,7 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
>   		  PIPELINE_SELECT_GPGPU);
>   
>   	gen9_emit_state_base_address(batch);
> -	gen8_emit_vfe_state_gpgpu(batch);
> +	gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   	gen8_emit_gpgpu_walk(batch, x, y, width, height);
> diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
> index 25c0c810..57b95840 100644
> --- a/lib/gpu_fill.c
> +++ b/lib/gpu_fill.c
> @@ -37,8 +37,7 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>   }
>   
>   uint32_t
> -gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
> -			uint8_t color)
> +gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t color)
>   {
>   	uint8_t *curbe_buffer;
>   	uint32_t offset;
> @@ -193,7 +192,9 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
>   }
>   
>   void
> -gen7_emit_vfe_state(struct intel_batchbuffer *batch)
> +gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> +		    uint32_t urb_entries, uint32_t urb_size,
> +		    uint32_t curbe_size, uint32_t mode)
>   {
>   	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
>   
> @@ -201,39 +202,15 @@ gen7_emit_vfe_state(struct intel_batchbuffer *batch)
>   	OUT_BATCH(0);
>   
>   	/* number of threads & urb entries */
> -	OUT_BATCH(1 << 16 |
> -		2 << 8);
> +	OUT_BATCH(threads << 16 |
> +		urb_entries << 8 |
> +		mode << 2); /* GPGPU mode */

Mode is used to select between media and GPGPU, so the comment could be 
updated to something like:

/* GPGPU vs media mode */

>   
>   	OUT_BATCH(0);
>   
>   	/* urb entry size & curbe size */
> -	OUT_BATCH(2 << 16 |	/* in 256 bits unit */
> -		  2);		/* in 256 bits unit */
> -
> -	/* scoreboard */
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -}
> -
> -void
> -gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
> -{
> -	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
> -
> -	/* scratch buffer */
> -	OUT_BATCH(0);
> -
> -	/* number of threads & urb entries */
> -	OUT_BATCH(1 << 16 | /* max num of threads */
> -		  0 << 8 | /* num of URB entry */
> -		  1 << 2); /* GPGPU mode */
> -
> -	OUT_BATCH(0);
> -
> -	/* urb entry size & curbe size */
> -	OUT_BATCH(0 << 16 |	/* URB entry size in 256 bits unit */
> -		  1);		/* CURBE entry size in 256 bits unit */
> +	OUT_BATCH(urb_size << 16 |	/* in 256 bits unit */
> +		  curbe_size);		/* in 256 bits unit */
>   
>   	/* scoreboard */
>   	OUT_BATCH(0);
> @@ -271,32 +248,14 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>   
>   void
>   gen7_emit_media_objects(struct intel_batchbuffer *batch,
> -			unsigned int x, unsigned int y,
> +			unsigned int xoffset, unsigned int yoffset,
>   			unsigned int width, unsigned int height)
>   {
>   	int i, j;
>   
>   	for (i = 0; i < width / 16; i++) {
>   		for (j = 0; j < height / 16; j++) {
> -			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
> -
> -			/* interface descriptor offset */
> -			OUT_BATCH(0);
> -
> -			/* without indirect data */
> -			OUT_BATCH(0);
> -			OUT_BATCH(0);
> -
> -			/* scoreboard */
> -			OUT_BATCH(0);
> -			OUT_BATCH(0);
> -
> -			/* inline data (xoffset, yoffset) */
> -			OUT_BATCH(x + i * 16);
> -			OUT_BATCH(y + j * 16);
> -			if (AT_LEAST_GEN(batch->devid, 8) &&
> -			    !IS_CHERRYVIEW(batch->devid))
> -				gen8_emit_media_state_flush(batch);
> +			gen_emit_media_object(batch, xoffset, yoffset);

This is wrong, you want to pass in x + i * 16 and y + j * 16 and not x 
and y as they are. Probably the reason why CI is failing.

>   		}
>   	}
>   }
> @@ -503,32 +462,9 @@ gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
>   }
>   
>   void
> -gen8_emit_vfe_state(struct intel_batchbuffer *batch)
> -{
> -	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
> -
> -	/* scratch buffer */
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -
> -	/* number of threads & urb entries */
> -	OUT_BATCH(1 << 16 |
> -		2 << 8);
> -
> -	OUT_BATCH(0);
> -
> -	/* urb entry size & curbe size */
> -	OUT_BATCH(2 << 16 |
> -		2);
> -
> -	/* scoreboard */
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -}
> -
> -void
> -gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
> +gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> +		    uint32_t urb_entries, uint32_t urb_size,
> +		    uint32_t curbe_size)
>   {
>   	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
>   
> @@ -537,36 +473,14 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
>   	OUT_BATCH(0);
>   
>   	/* number of threads & urb entries */
> -	OUT_BATCH(1 << 16 | 1 << 8);
> -
> -	OUT_BATCH(0);
> -
> -	/* urb entry size & curbe size */
> -	OUT_BATCH(0 << 16 | 1);
> -
> -	/* scoreboard */
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -}
> -
> -void
> -gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch)
> -{
> -	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
> -
> -	/* scratch buffer */
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> -
> -	/* number of threads & urb entries */
> -	OUT_BATCH(2 << 8);
> +	OUT_BATCH(threads << 16 |
> +		urb_entries << 8);
>   
>   	OUT_BATCH(0);
>   
>   	/* urb entry size & curbe size */
> -	OUT_BATCH(2 << 16 |
> -		2);
> +	OUT_BATCH(urb_size << 16 |
> +		curbe_size);
>   
>   	/* scoreboard */
>   	OUT_BATCH(0);
> @@ -635,7 +549,8 @@ gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>   }
>   
>   void
> -gen8_emit_media_objects_spin(struct intel_batchbuffer *batch)
> +gen_emit_media_object(struct intel_batchbuffer *batch,
> +		       unsigned int xoffset, unsigned int yoffset)
>   {
>   	OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
>   
> @@ -651,8 +566,8 @@ gen8_emit_media_objects_spin(struct intel_batchbuffer *batch)
>   	OUT_BATCH(0);
>   
>   	/* inline data (xoffset, yoffset) */
> -	OUT_BATCH(0);
> -	OUT_BATCH(0);
> +	OUT_BATCH(xoffset);
> +	OUT_BATCH(yoffset);
>   	if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
>   		gen8_emit_media_state_flush(batch);
>   }
> diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
> index abb3b4c3..3a800198 100644
> --- a/lib/gpu_fill.h
> +++ b/lib/gpu_fill.h
> @@ -67,10 +67,9 @@ void
>   gen7_emit_state_base_address(struct intel_batchbuffer *batch);
>   
>   void
> -gen7_emit_vfe_state(struct intel_batchbuffer *batch);
> -
> -void
> -gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
> +gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> +		    uint32_t urb_entries, uint32_t urb_size,
> +		    uint32_t curbe_size, uint32_t mode);
>   
>   void
>   gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
> @@ -111,13 +110,9 @@ void
>   gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
>   
>   void
> -gen8_emit_vfe_state(struct intel_batchbuffer *batch);
> -
> -void
> -gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
> -
> -void
> -gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch);
> +gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> +		    uint32_t urb_entries, uint32_t urb_size,
> +		    uint32_t curbe_size);
>   
>   void
>   gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> @@ -125,7 +120,8 @@ gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>   		     unsigned int width, unsigned int height);
>   
>   void
> -gen8_emit_media_objects_spin(struct intel_batchbuffer *batch);
> +gen_emit_media_object(struct intel_batchbuffer *batch, unsigned int xoffset,
> +		  unsigned int yoffset);
>   
>   void
>   gen9_emit_state_base_address(struct intel_batchbuffer *batch);
> diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
> index 3dc5617e..02dff04b 100644
> --- a/lib/media_fill_gen7.c
> +++ b/lib/media_fill_gen7.c
> @@ -54,6 +54,13 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen7_emit_vfe_state. */
> +	uint32_t threads = 1;
> +	uint32_t urb_entries = 2;
> +	uint32_t urb_size = 2;
> +	uint32_t curbe_size = 2;
> +	uint32_t mode = 0;
> +

Same as the above, we can use defines for these (and in this case there 
are no differences in values across gens, apart from gen7 having to 
select the mode).

>   	intel_batchbuffer_flush(batch);
>   
>   	/* setup states */
> @@ -69,7 +76,8 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
>   	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
>   	gen7_emit_state_base_address(batch);
>   
> -	gen7_emit_vfe_state(batch);
> +	gen7_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size,
> +			    mode);
>   
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   
> diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
> index 63fe72eb..3103b5e7 100644
> --- a/lib/media_fill_gen8.c
> +++ b/lib/media_fill_gen8.c
> @@ -57,6 +57,12 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen8_emit_vfe_state. */
> +	uint32_t threads = 1;
> +	uint32_t urb_entries = 2;
> +	uint32_t urb_size = 2;
> +	uint32_t curbe_size = 2;
> +
>   	intel_batchbuffer_flush(batch);
>   
>   	/* setup states */
> @@ -72,7 +78,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
>   	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
>   	gen8_emit_state_base_address(batch);
>   
> -	gen8_emit_vfe_state(batch);
> +	gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>   
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   
> diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
> index 78e892f2..d783d0bd 100644
> --- a/lib/media_fill_gen9.c
> +++ b/lib/media_fill_gen9.c
> @@ -54,6 +54,12 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen9_emit_vfe_state. */
> +	uint32_t threads = 1;
> +	uint32_t urb_entries = 2;
> +	uint32_t urb_size = 2;
> +	uint32_t curbe_size = 2;
> +
>   	intel_batchbuffer_flush(batch);
>   
>   	/* setup states */
> @@ -74,7 +80,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
>   			GEN9_FORCE_MEDIA_AWAKE_MASK);
>   	gen9_emit_state_base_address(batch);
>   
> -	gen8_emit_vfe_state(batch);
> +	gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>   
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   
> diff --git a/lib/media_spin.c b/lib/media_spin.c
> index b323550a..2a8a6bdb 100644
> --- a/lib/media_spin.c
> +++ b/lib/media_spin.c
> @@ -68,6 +68,11 @@ static const uint32_t spin_kernel[][4] = {
>   
>   #define BATCH_STATE_SPLIT 2048
>   
> +/* Offsets needed in gen_emit_media_object. In media_spin library this  * values do not matter.

Missing a newline here

> + */
> +#define xoffset 0
> +#define yoffset 0
> +
>   void
>   gen8_media_spinfunc(struct intel_batchbuffer *batch,
>   		    struct igt_buf *dst, uint32_t spins)
> @@ -75,6 +80,12 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen8_emit_vfe_state. */
> +	uint32_t threads = 2;
> +	uint32_t urb_entries = 0;
> +	uint32_t urb_size = 2;
> +	uint32_t curbe_size = 2;
> +
>   	intel_batchbuffer_flush_with_context(batch, NULL);
>   
>   	/* setup states */
> @@ -90,13 +101,13 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
>   	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
>   	gen8_emit_state_base_address(batch);
>   
> -	gen8_emit_vfe_state_spin(batch);
> +	gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>   
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   
>   	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   
> -	gen8_emit_media_objects_spin(batch);
> +	gen_emit_media_object(batch, xoffset, yoffset);
>   
>   	OUT_BATCH(MI_BATCH_BUFFER_END);
>   
> @@ -114,6 +125,12 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
>   	uint32_t curbe_buffer, interface_descriptor;
>   	uint32_t batch_end;
>   
> +	/* Parameters for gen9_emit_vfe_state. */
> +	uint32_t threads = 1;
> +	uint32_t urb_entries = 1;
> +	uint32_t urb_size = 0;
> +	uint32_t curbe_size = 1;
> +

both the gen8 and gen9 media spin called into gen8_emit_vfe_state_spin 
so they should have the same parameters. Any reason to change them for 
gen9? If this was just a cut & paste mistake and the parameters are 
indeed going to be the same I suggest switching to defines like in other 
places.

Daniele

>   	intel_batchbuffer_flush_with_context(batch, NULL);
>   
>   	/* setup states */
> @@ -134,13 +151,13 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
>   			GEN9_FORCE_MEDIA_AWAKE_MASK);
>   	gen9_emit_state_base_address(batch);
>   
> -	gen8_emit_vfe_state_spin(batch);
> +	gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>   
>   	gen7_emit_curbe_load(batch, curbe_buffer);
>   
>   	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>   
> -	gen8_emit_media_objects_spin(batch);
> +	gen_emit_media_object(batch, xoffset, yoffset);
>   
>   	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
>   			GEN9_FORCE_MEDIA_AWAKE_DISABLE |
> 


More information about the igt-dev mailing list