[igt-dev] [PATCH v2 5/6] lib: Add vebox copy support

Thu Dec 5 10:30:04 UTC 2019

On Wed, 2019-12-04 at 15:49 +0200, Imre Deak wrote:
> To produce surfaces that are compressed using the media compression
> format we need to use one of the media engines. The simplest way for
> this is to use the vebox engine's tiling convert command, so add
> support
> for this.
> 
> v2:
> - Rebase on latest igt. (Mika)
> 
> Cc: Mika Kahola <mika.kahola at intel.com>
> Cc: Brian Welty <brian.welty at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville SyrjÃ¤lÃ¤ <ville.syrjala at linux.intel.com>
> Signed-off-by: Imre Deak <imre.deak at intel.com>

Reviewed-by: Mika Kahola <mika.kahola at intel.com>

> ---
>  lib/Makefile.sources         |   1 +
>  lib/igt_fb.c                 |  77 +++++++---
>  lib/intel_aux_pgtable.c      |  10 +-
>  lib/intel_aux_pgtable.h      |   3 +-
>  lib/intel_batchbuffer.c      |  11 ++
>  lib/intel_batchbuffer.h      |  30 +++-
>  lib/intel_reg.h              |   1 +
>  lib/ioctl_wrappers.h         |   1 +
>  lib/meson.build              |   1 +
>  lib/rendercopy_gen9.c        |   2 +-
>  lib/veboxcopy.h              |   9 ++
>  lib/veboxcopy_gen12.c        | 269
> +++++++++++++++++++++++++++++++++++
>  tests/i915/gem_render_copy.c |   1 +
>  13 files changed, 388 insertions(+), 28 deletions(-)
>  create mode 100644 lib/veboxcopy.h
>  create mode 100644 lib/veboxcopy_gen12.c
> 
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 750e630b..5dd3962e 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -131,6 +131,7 @@ lib_source_list =	 	\
>  	igt_vc4.h		\
>  	igt_amd.c		\
>  	igt_amd.h		\
> +	veboxcopy_gen12.c	\
>  	$(NULL)
>  
>  .PHONY: version.h.tmp
> diff --git a/lib/igt_fb.c b/lib/igt_fb.c
> index 6ce7cfea..3b141b93 100644
> --- a/lib/igt_fb.c
> +++ b/lib/igt_fb.c
> @@ -403,6 +403,7 @@ void igt_get_fb_tile_size(int fd, uint64_t
> modifier, int fb_bpp,
>  	case LOCAL_I915_FORMAT_MOD_Y_TILED_CCS:
>  	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
>  	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
> +	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
>  		igt_require_intel(fd);
>  		if (intel_gen(intel_get_drm_devid(fd)) == 2) {
>  			*width_ret = 128;
> @@ -467,17 +468,21 @@ void igt_get_fb_tile_size(int fd, uint64_t
> modifier, int fb_bpp,
>  	}
>  }
>  
> +static bool is_gen12_mc_ccs_modifier(uint64_t modifier)
> +{
> +	return modifier == LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
> +}
> +
>  static bool is_gen12_ccs_modifier(uint64_t modifier)
>  {
> -	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
> +	return is_gen12_mc_ccs_modifier(modifier) ||
> +		modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
>  		modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC;
>  }
>  
>  static bool is_ccs_modifier(uint64_t modifier)
>  {
> -
> -	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
> -		modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC ||
> +	return is_gen12_ccs_modifier(modifier) ||
>  		modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
>  		modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
>  }
> @@ -733,6 +738,7 @@ uint64_t igt_fb_mod_to_tiling(uint64_t modifier)
>  	case LOCAL_I915_FORMAT_MOD_Y_TILED_CCS:
>  	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
>  	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC:
> +	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
>  		return I915_TILING_Y;
>  	case LOCAL_I915_FORMAT_MOD_Yf_TILED:
>  	case LOCAL_I915_FORMAT_MOD_Yf_TILED_CCS:
> @@ -1911,7 +1917,7 @@ static bool blitter_ok(const struct igt_fb *fb)
>  	return true;
>  }
>  
> -static bool use_rendercopy(const struct igt_fb *fb)
> +static bool use_enginecopy(const struct igt_fb *fb)
>  {
>  	return is_ccs_modifier(fb->modifier) ||
>  		(fb->modifier == I915_FORMAT_MOD_Yf_TILED &&
> @@ -1960,25 +1966,52 @@ static void fini_buf(struct igt_buf *buf)
>  	drm_intel_bo_unreference(buf->bo);
>  }
>  
> -static void rendercopy(struct fb_blit_upload *blit,
> -		       const struct igt_fb *dst_fb,
> -		       const struct igt_fb *src_fb)
> +/**
> + * copy_with_engine:
> + * @blit: context for the copy operation
> + * @dst_fb: destination buffer
> + * @src_fb: source buffer
> + *
> + * Copy @src_fb to @dst_fb using either the render or vebox engine.
> The engine
> + * is selected based on the compression surface format required by
> the @dst_fb
> + * FB modifier. On GEN12+ a given compression format (render or
> media) can be
> + * produced only by the selected engine:
> + * - For GEN12 media compressed: vebox engine
> + * - For uncompressed, pre-GEN12 compressed, GEN12+ render
> compressed: render engine
> + * Note that both GEN12 engine is capable of reading either
> compression formats.
> + */
> +static void copy_with_engine(struct fb_blit_upload *blit,
> +			     const struct igt_fb *dst_fb,
> +			     const struct igt_fb *src_fb)
>  {
>  	struct igt_buf src = {}, dst = {};
> -	igt_render_copyfunc_t render_copy =
> -		igt_get_render_copyfunc(intel_get_drm_devid(blit->fd));
> +	igt_render_copyfunc_t render_copy = NULL;
> +	igt_vebox_copyfunc_t vebox_copy = NULL;
> +
> +	if (is_gen12_mc_ccs_modifier(dst_fb->modifier))
> +		vebox_copy =
> igt_get_vebox_copyfunc(intel_get_drm_devid(blit->fd));
> +	else
> +		render_copy =
> igt_get_render_copyfunc(intel_get_drm_devid(blit->fd));
>  
> -	igt_require(render_copy);
> +	igt_require(vebox_copy || render_copy);
>  
>  	igt_assert_eq(dst_fb->offsets[0], 0);
>  	igt_assert_eq(src_fb->offsets[0], 0);
>  
> -	init_buf(blit, &src, src_fb, "cairo rendercopy src");
> -	init_buf(blit, &dst, dst_fb, "cairo rendercopy dst");
> +	init_buf(blit, &src, src_fb, "cairo enginecopy src");
> +	init_buf(blit, &dst, dst_fb, "cairo enginecopy dst");
>  
> -	render_copy(blit->batch, NULL,
> -		    &src, 0, 0, dst_fb->plane_width[0], dst_fb-
> >plane_height[0],
> -		    &dst, 0, 0);
> +	if (vebox_copy)
> +		vebox_copy(blit->batch, &src,
> +			   dst_fb->plane_width[0], dst_fb-
> >plane_height[0],
> +			   &dst);
> +	else
> +		render_copy(blit->batch, NULL,
> +			    &src,
> +			    0, 0,
> +			    dst_fb->plane_width[0], dst_fb-
> >plane_height[0],
> +			    &dst,
> +			    0, 0);
>  
>  	fini_buf(&dst);
>  	fini_buf(&src);
> @@ -2029,7 +2062,7 @@ static void free_linear_mapping(struct
> fb_blit_upload *blit)
>  			I915_GEM_DOMAIN_GTT, 0);
>  
>  		if (blit->batch)
> -			rendercopy(blit, fb, &linear->fb);
> +			copy_with_engine(blit, fb, &linear->fb);
>  		else
>  			blitcopy(fb, &linear->fb);
>  
> @@ -2060,7 +2093,7 @@ static void setup_linear_mapping(struct
> fb_blit_upload *blit)
>  	struct igt_fb *fb = blit->fb;
>  	struct fb_blit_linear *linear = &blit->linear;
>  
> -	if (!igt_vc4_is_tiled(fb->modifier) && use_rendercopy(fb)) {
> +	if (!igt_vc4_is_tiled(fb->modifier) && use_enginecopy(fb)) {
>  		blit->bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
>  		blit->batch = intel_batchbuffer_alloc(blit->bufmgr,
>  						      intel_get_drm_dev
> id(fd));
> @@ -2096,7 +2129,7 @@ static void setup_linear_mapping(struct
> fb_blit_upload *blit)
>  				I915_GEM_DOMAIN_GTT, 0);
>  
>  		if (blit->batch)
> -			rendercopy(blit, &linear->fb, fb);
> +			copy_with_engine(blit, &linear->fb, fb);
>  		else
>  			blitcopy(&linear->fb, fb);
>  
> @@ -3202,7 +3235,8 @@ static void create_cairo_surface__convert(int
> fd, struct igt_fb *fb)
>  							     &blit-
> >shadow_fb);
>  	igt_assert(blit->shadow_ptr);
>  
> -	if (use_rendercopy(fb) || use_blitter(fb) ||
> igt_vc4_is_tiled(fb->modifier)) {
> +	if (use_enginecopy(fb) || use_blitter(fb) ||
> +	    igt_vc4_is_tiled(fb->modifier)) {
>  		setup_linear_mapping(&blit->base);
>  	} else {
>  		blit->base.linear.fb = *fb;
> @@ -3285,7 +3319,8 @@ cairo_surface_t *igt_get_cairo_surface(int fd,
> struct igt_fb *fb)
>  	if (fb->cairo_surface == NULL) {
>  		if (use_convert(fb))
>  			create_cairo_surface__convert(fd, fb);
> -		else if (use_blitter(fb) || use_rendercopy(fb) ||
> igt_vc4_is_tiled(fb->modifier))
> +		else if (use_blitter(fb) || use_enginecopy(fb) ||
> +			 igt_vc4_is_tiled(fb->modifier))
>  			create_cairo_surface__gpu(fd, fb);
>  		else
>  			create_cairo_surface__gtt(fd, fb);
> diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
> index 2f22bb39..cbb3c320 100644
> --- a/lib/intel_aux_pgtable.c
> +++ b/lib/intel_aux_pgtable.c
> @@ -555,16 +555,20 @@ gen12_create_aux_pgtable_state(struct
> intel_batchbuffer *batch,
>  }
>  
>  void
> -gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch,
> uint32_t state)
> +gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch,
> uint32_t state,
> +			     bool render)
>  {
> +	uint32_t table_base_reg = render ?
> GEN12_GFX_AUX_TABLE_BASE_ADDR :
> +					   GEN12_VEBOX_AUX_TABLE_BASE_A
> DDR;
> +
>  	if (!state)
>  		return;
>  
>  	OUT_BATCH(MI_LOAD_REGISTER_MEM_GEN8 |
> MI_MMIO_REMAP_ENABLE_GEN12);
> -	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
> +	OUT_BATCH(table_base_reg);
>  	OUT_RELOC(batch->bo, 0, 0, state);
>  
>  	OUT_BATCH(MI_LOAD_REGISTER_MEM_GEN8 |
> MI_MMIO_REMAP_ENABLE_GEN12);
> -	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
> +	OUT_BATCH(table_base_reg + 4);
>  	OUT_RELOC(batch->bo, 0, 0, state + 4);
>  }
> diff --git a/lib/intel_aux_pgtable.h b/lib/intel_aux_pgtable.h
> index 20278db0..ac82b7d2 100644
> --- a/lib/intel_aux_pgtable.h
> +++ b/lib/intel_aux_pgtable.h
> @@ -30,6 +30,7 @@ uint32_t
>  gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
>  			       drm_intel_bo *aux_pgtable_bo);
>  void
> -gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch,
> uint32_t state);
> +gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch,
> uint32_t state,
> +			     bool render);
>  
>  #endif
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index 3828ba75..51aae4dc 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -37,6 +37,7 @@
>  #include "intel_bufmgr.h"
>  #include "intel_chipset.h"
>  #include "intel_reg.h"
> +#include "veboxcopy.h"
>  #include "rendercopy.h"
>  #include "media_fill.h"
>  #include "ioctl_wrappers.h"
> @@ -850,6 +851,16 @@ igt_render_copyfunc_t
> igt_get_render_copyfunc(int devid)
>  	return copy;
>  }
>  
> +igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid)
> +{
> +	igt_vebox_copyfunc_t copy = NULL;
> +
> +	if (IS_GEN12(devid))
> +		copy = gen12_vebox_copyfunc;
> +
> +	return copy;
> +}
> +
>  /**
>   * igt_get_media_fillfunc:
>   * @devid: pci device id
> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> index 888188f9..37e3affe 100644
> --- a/lib/intel_batchbuffer.h
> +++ b/lib/intel_batchbuffer.h
> @@ -213,6 +213,7 @@ void intel_copy_bo(struct intel_batchbuffer
> *batch,
>  enum i915_compression {
>  	I915_COMPRESSION_NONE,
>  	I915_COMPRESSION_RENDER,
> +	I915_COMPRESSION_MEDIA,
>  };
>  
>  /**
> @@ -220,18 +221,20 @@ enum i915_compression {
>   * @bo: underlying libdrm buffer object
>   * @stride: stride of the buffer
>   * @tiling: tiling mode bits
> + * @compression: memory compression mode
>   * @bpp: bits per pixel, 8, 16 or 32.
>   * @data: pointer to the memory mapping of the buffer
>   * @size: size of the buffer object
>   *
>   * This is a i-g-t buffer object wrapper structure which augments
> the baseline
> - * libdrm buffer object with suitable data needed by the render copy
> and the
> - * fill functions.
> + * libdrm buffer object with suitable data needed by the
> render/vebox copy and
> + * the fill functions.
>   */
>  struct igt_buf {
>  	drm_intel_bo *bo;
>  	uint32_t stride;
>  	uint32_t tiling;
> +	enum i915_compression compression;
>  	uint32_t bpp;
>  	uint32_t *data;
>  	uint32_t size;
> @@ -307,6 +310,29 @@ typedef void (*igt_render_copyfunc_t)(struct
> intel_batchbuffer *batch,
>  
>  igt_render_copyfunc_t igt_get_render_copyfunc(int devid);
>  
> +
> +/**
> + * igt_vebox_copyfunc_t:
> + * @batch: batchbuffer object
> + * @src: source i-g-t buffer object
> + * @width: width of the copied rectangle
> + * @height: height of the copied rectangle
> + * @dst: destination i-g-t buffer object
> + *
> + * This is the type of the per-platform vebox copy functions. The
> + * platform-specific implementation can be obtained by calling
> + * igt_get_vebox_copyfunc().
> + *
> + * A vebox copy function will emit a batchbuffer to the kernel which
> executes
> + * the specified blit copy operation using the vebox engine.
> + */
> +typedef void (*igt_vebox_copyfunc_t)(struct intel_batchbuffer
> *batch,
> +				     const struct igt_buf *src,
> +				     unsigned width, unsigned height,
> +				     const struct igt_buf *dst);
> +
> +igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid);
> +
>  /**
>   * igt_fillfunc_t:
>   * @batch: batchbuffer object
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index cabfc879..0a9ee34c 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -674,6 +674,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> SOFTWARE.
>  #define RING_INVALID        0x00000000
>  
>  #define GEN12_GFX_AUX_TABLE_BASE_ADDR	0x4200
> +#define GEN12_VEBOX_AUX_TABLE_BASE_ADDR	0x4230
>  
>  
>  /* BitBlt Instructions
> diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h
> index e80e22f1..7614e688 100644
> --- a/lib/ioctl_wrappers.h
> +++ b/lib/ioctl_wrappers.h
> @@ -202,6 +202,7 @@ struct local_drm_mode_fb_cmd2 {
>  #define LOCAL_I915_FORMAT_MOD_Y_TILED_CCS	local_fourcc_mod_code(I
> NTEL, 4)
>  #define LOCAL_I915_FORMAT_MOD_Yf_TILED_CCS	local_fourcc_mod_code(I
> NTEL, 5)
>  #define LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS
> fourcc_mod_code(INTEL, 6)
> +#define LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS
> fourcc_mod_code(INTEL, 7)
>  #define LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC
> fourcc_mod_code(INTEL, 8)
>  #define LOCAL_DRM_IOCTL_MODE_ADDFB2	DRM_IOWR(0xB8,			
> \
>  						 struct
> local_drm_mode_fb_cmd2)
> diff --git a/lib/meson.build b/lib/meson.build
> index 62d61654..57eb7d93 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -64,6 +64,7 @@ lib_sources = [
>  	'igt_edid.c',
>  	'igt_eld.c',
>  	'igt_infoframe.c',
> +	'veboxcopy_gen12.c',
>  ]
>  
>  lib_deps = [
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index 991a6393..bf9d2ee1 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -1037,7 +1037,7 @@ void _gen9_render_copyfunc(struct
> intel_batchbuffer *batch,
>  	OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
>  				GEN9_PIPELINE_SELECTION_MASK);
>  
> -	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
> +	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state, true);
>  
>  	gen8_emit_sip(batch);
>  
> diff --git a/lib/veboxcopy.h b/lib/veboxcopy.h
> new file mode 100644
> index 00000000..949d83bf
> --- /dev/null
> +++ b/lib/veboxcopy.h
> @@ -0,0 +1,9 @@
> +#ifndef __VEBOXCOPY_H__
> +#define __VEBOXCOPY_H__
> +
> +void gen12_vebox_copyfunc(struct intel_batchbuffer *batch,
> +			  const struct igt_buf *src,
> +			  unsigned width, unsigned height,
> +			  const struct igt_buf *dst);
> +
> +#endif
> diff --git a/lib/veboxcopy_gen12.c b/lib/veboxcopy_gen12.c
> new file mode 100644
> index 00000000..87800941
> --- /dev/null
> +++ b/lib/veboxcopy_gen12.c
> @@ -0,0 +1,269 @@
> +/*
> + * Copyright Â© 2019 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> the
> + * Software is furnished to do so, subject to the following
> conditions:
> + *
> + * The above copyright notice and this permission notice (including
> the next
> + * paragraph) shall be included in all copies or substantial
> portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
> OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +#include <drm.h>
> +
> +#include "igt.h"
> +#include "intel_aux_pgtable.h"
> +#include "veboxcopy.h"
> +
> +#define R8G8B8A8_UNORM	8
> +
> +struct vebox_surface_state {
> +	struct {
> +		uint32_t dw_count:12;
> +		uint32_t pad:4;
> +		uint32_t sub_opcode_b:5;
> +		uint32_t sub_opcode_a:3;
> +		uint32_t media_cmd_opcode:3;
> +		uint32_t media_cmd_pipeline:2;
> +		uint32_t cmd_type:3;
> +	} ss0;
> +	struct {
> +#define VEBOX_SURFACE_INPUT	0
> +#define VEBOX_SURFACE_OUTPUT	1
> +		uint32_t surface_id:1;
> +		uint32_t pad:31;
> +	} ss1;
> +	struct {
> +		uint32_t pad:4;
> +		uint32_t width:14;
> +		uint32_t height:14;
> +	} ss2;
> +	struct {
> +#define VEBOX_TILE_WALK_XMAJOR 0
> +#define VEBOX_TILE_WALK_YMAJOR 1
> +		uint32_t tile_walk:1;
> +		uint32_t tiled_surface:1;
> +		uint32_t chroma_half_pitch:1;
> +		uint32_t surface_pitch:17;
> +		uint32_t chroma_interleave:1;
> +		uint32_t lsb_packed_enable:1;
> +		uint32_t bayer_input_alignment:2;
> +		uint32_t bayer_pattern_format:1;
> +		uint32_t bayer_pattern_offset:2;
> +		uint32_t surface_format:5;
> +	} ss3;
> +	struct {
> +		uint32_t u_y_offset:15;
> +		uint32_t u_x_offset:13;
> +		uint32_t pad:4;
> +	} ss4;
> +	struct {
> +		uint32_t v_y_offset:15;
> +		uint32_t v_x_offset:13;
> +		uint32_t pad:4;
> +	} ss5;
> +	struct {
> +		uint32_t frame_y_offset:15;
> +		uint32_t frame_x_offset:15;
> +		uint32_t pad:2;
> +	} ss6;
> +	struct {
> +		uint32_t derived_surface_pitch:17;
> +		uint32_t pad:15;
> +	} ss7;
> +	struct {
> +		uint32_t skin_score_output_surface_pitch:17;
> +		uint32_t pad:15;
> +	} ss8;
> +} __attribute__((packed));
> +
> +struct vebox_tiling_convert {
> +	struct {
> +		uint32_t dw_count:12;
> +		uint32_t pad:4;
> +		uint32_t sub_opcode_b:5;
> +		uint32_t sub_opcode_a:3;
> +		uint32_t cmd_opcode:3;
> +		uint32_t pipeline:2;
> +		uint32_t cmd_type:3;
> +	} tc0;
> +	union {
> +		struct {
> +			uint64_t input_encrypted_data:1;
> +			uint64_t input_mocs_idx:6;
> +			uint64_t input_memory_compression_enable:1;
> +#define COMPRESSION_TYPE_MEDIA 0
> +#define COMPRESSION_TYPE_RENDER	1
> +			uint64_t input_compression_type:1;
> +#define TRMODE_NONE	0
> +#define TRMODE_TILE_YF	1
> +#define TRMODE_TILE_YS	2
> +			uint64_t input_tiled_resource_mode:2;
> +			uint64_t pad:1;
> +			uint64_t input_address:52;
> +		} tc1_2;
> +		uint64_t tc1_2_l;
> +	};
> +	union {
> +		struct {
> +			uint64_t output_encrypted_data:1;
> +			uint64_t output_mocs_idx:6;
> +			uint64_t output_memory_compression_enable:1;
> +			uint64_t output_compression_type:1;
> +			uint64_t output_tiled_resource_mode:2;
> +			uint64_t pad:1;
> +			uint64_t output_address:52;
> +		} tc3_4;
> +		uint64_t tc3_4_l;
> +	};
> +} __attribute__((packed));
> +
> +static void emit_surface_state_cmd(struct intel_batchbuffer *batch,
> +				   int surface_id,
> +				   int width, int height, int bpp,
> +				   int pitch, uint32_t tiling, int
> format)
> +{
> +	struct vebox_surface_state *ss;
> +
> +	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 4);
> +
> +	ss->ss0.cmd_type = 3;
> +	ss->ss0.media_cmd_pipeline = 2;
> +	ss->ss0.media_cmd_opcode = 4;
> +	ss->ss0.dw_count = 7;
> +
> +	ss->ss1.surface_id = surface_id;
> +
> +	ss->ss2.height = height - 1;
> +	ss->ss2.width = width - 1;
> +
> +	ss->ss3.surface_format = format;
> +	ss->ss3.surface_pitch = pitch - 1;
> +	ss->ss3.tile_walk = (tiling == I915_TILING_Y) ||
> +			    (tiling == I915_TILING_Yf);
> +	ss->ss3.tiled_surface = tiling != I915_TILING_NONE;
> +
> +	ss->ss7.derived_surface_pitch = pitch - 1;
> +}
> +
> +static void emit_tiling_convert_cmd(struct intel_batchbuffer *batch,
> +				    drm_intel_bo *input_bo,
> +				    uint32_t input_tiling,
> +				    uint32_t input_compression,
> +				    drm_intel_bo *output_bo,
> +				    uint32_t output_tiling,
> +				    uint32_t output_compression)
> +{
> +	uint32_t reloc_delta;
> +	struct vebox_tiling_convert *tc;
> +	int ret;
> +
> +	tc = intel_batchbuffer_subdata_alloc(batch, sizeof(*tc), 8);
> +
> +	tc->tc0.cmd_type = 3;
> +	tc->tc0.pipeline = 2;
> +	tc->tc0.cmd_opcode = 4;
> +	tc->tc0.sub_opcode_b = 1;
> +
> +	tc->tc0.dw_count = 3;
> +
> +	if (input_compression != I915_COMPRESSION_NONE) {
> +		tc->tc1_2.input_memory_compression_enable = 1;
> +		tc->tc1_2.input_compression_type =
> +			input_compression == I915_COMPRESSION_RENDER;
> +	}
> +	tc->tc1_2.input_tiled_resource_mode = input_tiling ==
> I915_TILING_Yf;
> +	reloc_delta = tc->tc1_2_l;
> +
> +	igt_assert(input_bo->offset64 == ALIGN(input_bo->offset64,
> 0x1000));
> +	tc->tc1_2.input_address = input_bo->offset64 >> 12;
> +	igt_assert(reloc_delta <= INT32_MAX);
> +	ret = drm_intel_bo_emit_reloc(batch->bo,
> +				      intel_batchbuffer_subdata_offset(
> batch, tc) +
> +					offsetof(typeof(*tc), tc1_2),
> +				      input_bo, reloc_delta,
> +				      0, 0);
> +	igt_assert(ret == 0);
> +
> +	if (output_compression != I915_COMPRESSION_NONE) {
> +		tc->tc3_4.output_memory_compression_enable = 1;
> +		tc->tc3_4.output_compression_type =
> +			output_compression == I915_COMPRESSION_RENDER;
> +	}
> +	tc->tc3_4.output_tiled_resource_mode = output_tiling ==
> I915_TILING_Yf;
> +	reloc_delta = tc->tc3_4_l;
> +
> +	igt_assert(output_bo->offset64 == ALIGN(output_bo->offset64,
> 0x1000));
> +	tc->tc3_4.output_address = output_bo->offset64 >> 12;
> +	igt_assert(reloc_delta <= INT32_MAX);
> +	ret = drm_intel_bo_emit_reloc(batch->bo,
> +				      intel_batchbuffer_subdata_offset(
> batch, tc) +
> +					offsetof(typeof(*tc), tc3_4),
> +				      output_bo, reloc_delta,
> +				      0, I915_GEM_DOMAIN_RENDER);
> +	igt_assert(ret == 0);
> +
> +}
> +
> +/* Borrowing the idea from the rendercopy state setup. */
> +#define BATCH_STATE_SPLIT 2048
> +
> +void gen12_vebox_copyfunc(struct intel_batchbuffer *batch,
> +			  const struct igt_buf *src,
> +			  unsigned width, unsigned height,
> +			  const struct igt_buf *dst)
> +{
> +	struct aux_pgtable_info aux_pgtable_info = { };
> +	uint32_t aux_pgtable_state;
> +
> +	igt_assert(src->bpp == dst->bpp);
> +
> +	intel_batchbuffer_flush_on_ring(batch, I915_EXEC_VEBOX);
> +
> +	intel_batchbuffer_align(batch, 8);
> +
> +	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
> +
> +	gen12_aux_pgtable_init(&aux_pgtable_info, batch->bufmgr, src,
> dst);
> +
> +	aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
> +							   aux_pgtable_
> info.pgtable_bo);
> +
> +	assert(batch->ptr < &batch->buffer[4095]);
> +	batch->ptr = batch->buffer;
> +
> +	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state, false);
> +
> +	/* TODO: add support for more formats */
> +	igt_assert(src->bpp == 32);
> +	emit_surface_state_cmd(batch, VEBOX_SURFACE_INPUT,
> +			       width, height, src->bpp, src->stride,
> +			       src->tiling, R8G8B8A8_UNORM);
> +
> +	igt_assert(dst->bpp == 32);
> +	emit_surface_state_cmd(batch, VEBOX_SURFACE_OUTPUT,
> +			       width, height, dst->bpp, dst->stride,
> +			       dst->tiling, R8G8B8A8_UNORM);
> +
> +	emit_tiling_convert_cmd(batch,
> +				src->bo, src->tiling, src->compression,
> +				dst->bo, dst->tiling, dst-
> >compression);
> +
> +	OUT_BATCH(MI_BATCH_BUFFER_END);
> +
> +	intel_batchbuffer_flush_on_ring(batch, I915_EXEC_VEBOX);
> +
> +	gen12_aux_pgtable_cleanup(&aux_pgtable_info);
> +	intel_batchbuffer_reset(batch);
> +}
> diff --git a/tests/i915/gem_render_copy.c
> b/tests/i915/gem_render_copy.c
> index 15ba1704..9c36f38b 100644
> --- a/tests/i915/gem_render_copy.c
> +++ b/tests/i915/gem_render_copy.c
> @@ -471,6 +471,7 @@ static void scratch_buf_init(data_t *data, struct
> igt_buf *buf,
>  		aux_width = scratch_buf_aux_width(data->devid, buf);
>  		aux_height = scratch_buf_aux_height(data->devid, buf);
>  
> +		buf->compression = compression;
>  		buf->aux.offset = buf->stride * ALIGN(height, 32);
>  		buf->aux.stride = aux_width;
>