[igt-dev] [PATCH i-g-t v2 4/5] lib/gpu_fill: Further code unification in gpu_fill
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Fri Apr 27 17:19:45 UTC 2018
On 27/04/18 03:03, Katarzyna Dec wrote:
> We can unify gen7_emit_vfe_state and gen8_emit_vfe_state
> functions for gpgpu/media_fill and media_spin by adding
> parameters. gen8_emit_media_object was renamed to gen_*
> and extended with additional offset parameters - we can
> have one gen7_emit_media_objects for all tests.
> I have renamed gen8_emit_media_object to gen_emit_*, because
> funtion belongs to all gens and it would be odd to have
> all named genX_* and only one without this prefix.
>
> Signed-off-by: Katarzyna Dec <katarzyna.dec at intel.com>
> Cc: Lukasz Kalamarz <lukasz.kalamarz at intel.com>
> Cc: Antonio Argenziano <antonio.argenziano at intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> ---
> lib/gpgpu_fill.c | 26 ++++++++--
> lib/gpu_fill.c | 129 +++++++++-----------------------------------------
> lib/gpu_fill.h | 20 ++++----
> lib/media_fill_gen7.c | 10 +++-
> lib/media_fill_gen8.c | 8 +++-
> lib/media_fill_gen9.c | 8 +++-
> lib/media_spin.c | 25 ++++++++--
> 7 files changed, 97 insertions(+), 129 deletions(-)
>
> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> index 52925a5c..3b89dd1b 100644
> --- a/lib/gpgpu_fill.c
> +++ b/lib/gpgpu_fill.c
> @@ -106,6 +106,13 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen7_emit_vfe_state. */
> + uint32_t threads = 1;
> + uint32_t urb_entries = 0;
> + uint32_t urb_size = 0;
> + uint32_t curbe_size = 1;
> + uint32_t mode = 1;
> +
Those are mostly the same across the various functions. Maybe we can
just use global defines with some comments, something like:
/* VFE STATE param */
#define THREADS 1
#define GEN7_URB_ENTRIES 0
#define GEN8_URB_ENTRIES 1
#define URB_SIZE 0 /* size of 1 entry in 256 bits unit */
#define CURBE_SIZE 1 /* in 256 bits unit */
#define GEN7_VFE_STATE_GPGPU_MODE 1
> intel_batchbuffer_flush(batch);
>
> /* setup states */
> @@ -129,7 +136,8 @@ gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
> OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
>
> gen7_emit_state_base_address(batch);
> - gen7_emit_vfe_state_gpgpu(batch);
> + gen7_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size,
> + mode);
> gen7_emit_curbe_load(batch, curbe_buffer);
> gen7_emit_interface_descriptor_load(batch, interface_descriptor);
> gen7_emit_gpgpu_walk(batch, x, y, width, height);
> @@ -153,6 +161,12 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen8_emit_vfe_state. */
> + uint32_t threads = 1;
> + uint32_t urb_entries = 1;
> + uint32_t urb_size = 0;
> + uint32_t curbe_size = 1;
> +
> intel_batchbuffer_flush(batch);
>
> /* setup states */
> @@ -176,7 +190,7 @@ gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
> OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
>
> gen8_emit_state_base_address(batch);
> - gen8_emit_vfe_state_gpgpu(batch);
> + gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
> gen7_emit_curbe_load(batch, curbe_buffer);
> gen7_emit_interface_descriptor_load(batch, interface_descriptor);
> gen8_emit_gpgpu_walk(batch, x, y, width, height);
> @@ -200,6 +214,12 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen9_emit_vfe_state. */
> + uint32_t threads = 1;
> + uint32_t urb_entries = 1;
> + uint32_t urb_size = 0;
> + uint32_t curbe_size = 1;
> +
> intel_batchbuffer_flush(batch);
>
> /* setup states */
> @@ -224,7 +244,7 @@ gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
> PIPELINE_SELECT_GPGPU);
>
> gen9_emit_state_base_address(batch);
> - gen8_emit_vfe_state_gpgpu(batch);
> + gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
> gen7_emit_curbe_load(batch, curbe_buffer);
> gen7_emit_interface_descriptor_load(batch, interface_descriptor);
> gen8_emit_gpgpu_walk(batch, x, y, width, height);
> diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
> index 25c0c810..57b95840 100644
> --- a/lib/gpu_fill.c
> +++ b/lib/gpu_fill.c
> @@ -37,8 +37,7 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
> }
>
> uint32_t
> -gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
> - uint8_t color)
> +gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t color)
> {
> uint8_t *curbe_buffer;
> uint32_t offset;
> @@ -193,7 +192,9 @@ gen7_emit_state_base_address(struct intel_batchbuffer *batch)
> }
>
> void
> -gen7_emit_vfe_state(struct intel_batchbuffer *batch)
> +gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> + uint32_t urb_entries, uint32_t urb_size,
> + uint32_t curbe_size, uint32_t mode)
> {
> OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
>
> @@ -201,39 +202,15 @@ gen7_emit_vfe_state(struct intel_batchbuffer *batch)
> OUT_BATCH(0);
>
> /* number of threads & urb entries */
> - OUT_BATCH(1 << 16 |
> - 2 << 8);
> + OUT_BATCH(threads << 16 |
> + urb_entries << 8 |
> + mode << 2); /* GPGPU mode */
Mode is used to select between media and GPGPU, so the comment could be
updated to something like:
/* GPGPU vs media mode */
>
> OUT_BATCH(0);
>
> /* urb entry size & curbe size */
> - OUT_BATCH(2 << 16 | /* in 256 bits unit */
> - 2); /* in 256 bits unit */
> -
> - /* scoreboard */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> -}
> -
> -void
> -gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
> -{
> - OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
> -
> - /* scratch buffer */
> - OUT_BATCH(0);
> -
> - /* number of threads & urb entries */
> - OUT_BATCH(1 << 16 | /* max num of threads */
> - 0 << 8 | /* num of URB entry */
> - 1 << 2); /* GPGPU mode */
> -
> - OUT_BATCH(0);
> -
> - /* urb entry size & curbe size */
> - OUT_BATCH(0 << 16 | /* URB entry size in 256 bits unit */
> - 1); /* CURBE entry size in 256 bits unit */
> + OUT_BATCH(urb_size << 16 | /* in 256 bits unit */
> + curbe_size); /* in 256 bits unit */
>
> /* scoreboard */
> OUT_BATCH(0);
> @@ -271,32 +248,14 @@ gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>
> void
> gen7_emit_media_objects(struct intel_batchbuffer *batch,
> - unsigned int x, unsigned int y,
> + unsigned int xoffset, unsigned int yoffset,
> unsigned int width, unsigned int height)
> {
> int i, j;
>
> for (i = 0; i < width / 16; i++) {
> for (j = 0; j < height / 16; j++) {
> - OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
> -
> - /* interface descriptor offset */
> - OUT_BATCH(0);
> -
> - /* without indirect data */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> -
> - /* scoreboard */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> -
> - /* inline data (xoffset, yoffset) */
> - OUT_BATCH(x + i * 16);
> - OUT_BATCH(y + j * 16);
> - if (AT_LEAST_GEN(batch->devid, 8) &&
> - !IS_CHERRYVIEW(batch->devid))
> - gen8_emit_media_state_flush(batch);
> + gen_emit_media_object(batch, xoffset, yoffset);
This is wrong, you want to pass in x + i * 16 and y + j * 16 and not x
and y as they are. Probably the reason why CI is failing.
> }
> }
> }
> @@ -503,32 +462,9 @@ gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
> }
>
> void
> -gen8_emit_vfe_state(struct intel_batchbuffer *batch)
> -{
> - OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
> -
> - /* scratch buffer */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> -
> - /* number of threads & urb entries */
> - OUT_BATCH(1 << 16 |
> - 2 << 8);
> -
> - OUT_BATCH(0);
> -
> - /* urb entry size & curbe size */
> - OUT_BATCH(2 << 16 |
> - 2);
> -
> - /* scoreboard */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> -}
> -
> -void
> -gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
> +gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> + uint32_t urb_entries, uint32_t urb_size,
> + uint32_t curbe_size)
> {
> OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
>
> @@ -537,36 +473,14 @@ gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
> OUT_BATCH(0);
>
> /* number of threads & urb entries */
> - OUT_BATCH(1 << 16 | 1 << 8);
> -
> - OUT_BATCH(0);
> -
> - /* urb entry size & curbe size */
> - OUT_BATCH(0 << 16 | 1);
> -
> - /* scoreboard */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> -}
> -
> -void
> -gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch)
> -{
> - OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
> -
> - /* scratch buffer */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> -
> - /* number of threads & urb entries */
> - OUT_BATCH(2 << 8);
> + OUT_BATCH(threads << 16 |
> + urb_entries << 8);
>
> OUT_BATCH(0);
>
> /* urb entry size & curbe size */
> - OUT_BATCH(2 << 16 |
> - 2);
> + OUT_BATCH(urb_size << 16 |
> + curbe_size);
>
> /* scoreboard */
> OUT_BATCH(0);
> @@ -635,7 +549,8 @@ gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> }
>
> void
> -gen8_emit_media_objects_spin(struct intel_batchbuffer *batch)
> +gen_emit_media_object(struct intel_batchbuffer *batch,
> + unsigned int xoffset, unsigned int yoffset)
> {
> OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
>
> @@ -651,8 +566,8 @@ gen8_emit_media_objects_spin(struct intel_batchbuffer *batch)
> OUT_BATCH(0);
>
> /* inline data (xoffset, yoffset) */
> - OUT_BATCH(0);
> - OUT_BATCH(0);
> + OUT_BATCH(xoffset);
> + OUT_BATCH(yoffset);
> if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
> gen8_emit_media_state_flush(batch);
> }
> diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
> index abb3b4c3..3a800198 100644
> --- a/lib/gpu_fill.h
> +++ b/lib/gpu_fill.h
> @@ -67,10 +67,9 @@ void
> gen7_emit_state_base_address(struct intel_batchbuffer *batch);
>
> void
> -gen7_emit_vfe_state(struct intel_batchbuffer *batch);
> -
> -void
> -gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
> +gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> + uint32_t urb_entries, uint32_t urb_size,
> + uint32_t curbe_size, uint32_t mode);
>
> void
> gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
> @@ -111,13 +110,9 @@ void
> gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
>
> void
> -gen8_emit_vfe_state(struct intel_batchbuffer *batch);
> -
> -void
> -gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
> -
> -void
> -gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch);
> +gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
> + uint32_t urb_entries, uint32_t urb_size,
> + uint32_t curbe_size);
>
> void
> gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> @@ -125,7 +120,8 @@ gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
> unsigned int width, unsigned int height);
>
> void
> -gen8_emit_media_objects_spin(struct intel_batchbuffer *batch);
> +gen_emit_media_object(struct intel_batchbuffer *batch, unsigned int xoffset,
> + unsigned int yoffset);
>
> void
> gen9_emit_state_base_address(struct intel_batchbuffer *batch);
> diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
> index 3dc5617e..02dff04b 100644
> --- a/lib/media_fill_gen7.c
> +++ b/lib/media_fill_gen7.c
> @@ -54,6 +54,13 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen7_emit_vfe_state. */
> + uint32_t threads = 1;
> + uint32_t urb_entries = 2;
> + uint32_t urb_size = 2;
> + uint32_t curbe_size = 2;
> + uint32_t mode = 0;
> +
Same as the above, we can use defines for these (and in this case there
are no differences in values across gens, apart from gen7 having to
select the mode).
> intel_batchbuffer_flush(batch);
>
> /* setup states */
> @@ -69,7 +76,8 @@ gen7_media_fillfunc(struct intel_batchbuffer *batch,
> OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
> gen7_emit_state_base_address(batch);
>
> - gen7_emit_vfe_state(batch);
> + gen7_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size,
> + mode);
>
> gen7_emit_curbe_load(batch, curbe_buffer);
>
> diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
> index 63fe72eb..3103b5e7 100644
> --- a/lib/media_fill_gen8.c
> +++ b/lib/media_fill_gen8.c
> @@ -57,6 +57,12 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen8_emit_vfe_state. */
> + uint32_t threads = 1;
> + uint32_t urb_entries = 2;
> + uint32_t urb_size = 2;
> + uint32_t curbe_size = 2;
> +
> intel_batchbuffer_flush(batch);
>
> /* setup states */
> @@ -72,7 +78,7 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
> OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
> gen8_emit_state_base_address(batch);
>
> - gen8_emit_vfe_state(batch);
> + gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>
> gen7_emit_curbe_load(batch, curbe_buffer);
>
> diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
> index 78e892f2..d783d0bd 100644
> --- a/lib/media_fill_gen9.c
> +++ b/lib/media_fill_gen9.c
> @@ -54,6 +54,12 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen9_emit_vfe_state. */
> + uint32_t threads = 1;
> + uint32_t urb_entries = 2;
> + uint32_t urb_size = 2;
> + uint32_t curbe_size = 2;
> +
> intel_batchbuffer_flush(batch);
>
> /* setup states */
> @@ -74,7 +80,7 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
> GEN9_FORCE_MEDIA_AWAKE_MASK);
> gen9_emit_state_base_address(batch);
>
> - gen8_emit_vfe_state(batch);
> + gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>
> gen7_emit_curbe_load(batch, curbe_buffer);
>
> diff --git a/lib/media_spin.c b/lib/media_spin.c
> index b323550a..2a8a6bdb 100644
> --- a/lib/media_spin.c
> +++ b/lib/media_spin.c
> @@ -68,6 +68,11 @@ static const uint32_t spin_kernel[][4] = {
>
> #define BATCH_STATE_SPLIT 2048
>
> +/* Offsets needed in gen_emit_media_object. In media_spin library this * values do not matter.
Missing a newline here
> + */
> +#define xoffset 0
> +#define yoffset 0
> +
> void
> gen8_media_spinfunc(struct intel_batchbuffer *batch,
> struct igt_buf *dst, uint32_t spins)
> @@ -75,6 +80,12 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen8_emit_vfe_state. */
> + uint32_t threads = 2;
> + uint32_t urb_entries = 0;
> + uint32_t urb_size = 2;
> + uint32_t curbe_size = 2;
> +
> intel_batchbuffer_flush_with_context(batch, NULL);
>
> /* setup states */
> @@ -90,13 +101,13 @@ gen8_media_spinfunc(struct intel_batchbuffer *batch,
> OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
> gen8_emit_state_base_address(batch);
>
> - gen8_emit_vfe_state_spin(batch);
> + gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>
> gen7_emit_curbe_load(batch, curbe_buffer);
>
> gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>
> - gen8_emit_media_objects_spin(batch);
> + gen_emit_media_object(batch, xoffset, yoffset);
>
> OUT_BATCH(MI_BATCH_BUFFER_END);
>
> @@ -114,6 +125,12 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
> uint32_t curbe_buffer, interface_descriptor;
> uint32_t batch_end;
>
> + /* Parameters for gen9_emit_vfe_state. */
> + uint32_t threads = 1;
> + uint32_t urb_entries = 1;
> + uint32_t urb_size = 0;
> + uint32_t curbe_size = 1;
> +
both the gen8 and gen9 media spin called into gen8_emit_vfe_state_spin
so they should have the same parameters. Any reason to change them for
gen9? If this was just a cut & paste mistake and the parameters are
indeed going to be the same I suggest switching to defines like in other
places.
Daniele
> intel_batchbuffer_flush_with_context(batch, NULL);
>
> /* setup states */
> @@ -134,13 +151,13 @@ gen9_media_spinfunc(struct intel_batchbuffer *batch,
> GEN9_FORCE_MEDIA_AWAKE_MASK);
> gen9_emit_state_base_address(batch);
>
> - gen8_emit_vfe_state_spin(batch);
> + gen8_emit_vfe_state(batch, threads, urb_entries, urb_size, curbe_size);
>
> gen7_emit_curbe_load(batch, curbe_buffer);
>
> gen7_emit_interface_descriptor_load(batch, interface_descriptor);
>
> - gen8_emit_media_objects_spin(batch);
> + gen_emit_media_object(batch, xoffset, yoffset);
>
> OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
> GEN9_FORCE_MEDIA_AWAKE_DISABLE |
>
More information about the igt-dev
mailing list