[PATCH i-g-t 5/6] lib: Add vebox copy support

Fri Nov 29 01:50:33 UTC 2019

Signed-off-by: Imre Deak <imre.deak at intel.com>
---
 lib/igt_fb.c                 |  74 +++++++---
 lib/intel_aux_pgtable.c      |  10 +-
 lib/intel_aux_pgtable.h      |   3 +-
 lib/intel_batchbuffer.c      |  11 ++
 lib/intel_batchbuffer.h      |  30 +++-
 lib/intel_reg.h              |   1 +
 lib/ioctl_wrappers.h         |   1 +
 lib/meson.build              |   1 +
 lib/rendercopy_gen9.c        |   2 +-
 lib/veboxcopy.h              |   9 ++
 lib/veboxcopy_gen12.c        | 269 +++++++++++++++++++++++++++++++++++
 tests/i915/gem_render_copy.c |   1 +
 12 files changed, 386 insertions(+), 26 deletions(-)
 create mode 100644 lib/veboxcopy.h
 create mode 100644 lib/veboxcopy_gen12.c

diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index 1d1e0d22..3bb4e5a1 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -402,6 +402,7 @@ void igt_get_fb_tile_size(int fd, uint64_t modifier, int fb_bpp,
 	case LOCAL_I915_FORMAT_MOD_Y_TILED:
 	case LOCAL_I915_FORMAT_MOD_Y_TILED_CCS:
 	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		igt_require_intel(fd);
 		if (intel_gen(intel_get_drm_devid(fd)) == 2) {
 			*width_ret = 128;
@@ -466,14 +467,19 @@ void igt_get_fb_tile_size(int fd, uint64_t modifier, int fb_bpp,
 	}
 }
 
+static bool is_gen12_mc_ccs_modifier(uint64_t modifier)
+{
+	return modifier == LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS;
+}
+
 static bool is_gen12_ccs_modifier(uint64_t modifier)
 {
-	return modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS;
+	return is_gen12_mc_ccs_modifier(modifier) ||
+		modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS;
 }
 
 static bool is_ccs_modifier(uint64_t modifier)
 {
-
 	return is_gen12_ccs_modifier(modifier) ||
 		modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
 		modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
@@ -719,6 +725,7 @@ uint64_t igt_fb_mod_to_tiling(uint64_t modifier)
 	case LOCAL_I915_FORMAT_MOD_Y_TILED:
 	case LOCAL_I915_FORMAT_MOD_Y_TILED_CCS:
 	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
+	case LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS:
 		return I915_TILING_Y;
 	case LOCAL_I915_FORMAT_MOD_Yf_TILED:
 	case LOCAL_I915_FORMAT_MOD_Yf_TILED_CCS:
@@ -1897,7 +1904,7 @@ static bool blitter_ok(const struct igt_fb *fb)
 	return true;
 }
 
-static bool use_rendercopy(const struct igt_fb *fb)
+static bool use_enginecopy(const struct igt_fb *fb)
 {
 	return is_ccs_modifier(fb->modifier) ||
 		(fb->modifier == I915_FORMAT_MOD_Yf_TILED &&
@@ -1943,25 +1950,52 @@ static void fini_buf(struct igt_buf *buf)
 	drm_intel_bo_unreference(buf->bo);
 }
 
-static void rendercopy(struct fb_blit_upload *blit,
-		       const struct igt_fb *dst_fb,
-		       const struct igt_fb *src_fb)
+/**
+ * copy_with_engine:
+ * @blit: context for the copy operation
+ * @dst_fb: destination buffer
+ * @src_fb: source buffer
+ *
+ * Copy @src_fb to @dst_fb using either the render or vebox engine. The engine
+ * is selected based on the compression surface format required by the @dst_fb
+ * FB modifier. On GEN12+ a given compression format (render or media) can be
+ * produced only by the selected engine:
+ * - For GEN12 media compressed: vebox engine
+ * - For uncompressed, pre-GEN12 compressed, GEN12+ render compressed: render engine
+ * Note that both GEN12 engine is capable of reading either compression formats.
+ */
+static void copy_with_engine(struct fb_blit_upload *blit,
+			     const struct igt_fb *dst_fb,
+			     const struct igt_fb *src_fb)
 {
 	struct igt_buf src = {}, dst = {};
-	igt_render_copyfunc_t render_copy =
-		igt_get_render_copyfunc(intel_get_drm_devid(blit->fd));
+	igt_render_copyfunc_t render_copy = NULL;
+	igt_vebox_copyfunc_t vebox_copy = NULL;
+
+	if (is_gen12_mc_ccs_modifier(dst_fb->modifier))
+		vebox_copy = igt_get_vebox_copyfunc(intel_get_drm_devid(blit->fd));
+	else
+		render_copy = igt_get_render_copyfunc(intel_get_drm_devid(blit->fd));
 
-	igt_require(render_copy);
+	igt_require(vebox_copy || render_copy);
 
 	igt_assert_eq(dst_fb->offsets[0], 0);
 	igt_assert_eq(src_fb->offsets[0], 0);
 
-	init_buf(blit, &src, src_fb, "cairo rendercopy src");
-	init_buf(blit, &dst, dst_fb, "cairo rendercopy dst");
+	init_buf(blit, &src, src_fb, "cairo enginecopy src");
+	init_buf(blit, &dst, dst_fb, "cairo enginecopy dst");
 
-	render_copy(blit->batch, NULL,
-		    &src, 0, 0, dst_fb->plane_width[0], dst_fb->plane_height[0],
-		    &dst, 0, 0);
+	if (vebox_copy)
+		vebox_copy(blit->batch, &src,
+			   dst_fb->plane_width[0], dst_fb->plane_height[0],
+			   &dst);
+	else
+		render_copy(blit->batch, NULL,
+			    &src,
+			    0, 0,
+			    dst_fb->plane_width[0], dst_fb->plane_height[0],
+			    &dst,
+			    0, 0);
 
 	fini_buf(&dst);
 	fini_buf(&src);
@@ -2012,7 +2046,7 @@ static void free_linear_mapping(struct fb_blit_upload *blit)
 			I915_GEM_DOMAIN_GTT, 0);
 
 		if (blit->batch)
-			rendercopy(blit, fb, &linear->fb);
+			copy_with_engine(blit, fb, &linear->fb);
 		else
 			blitcopy(fb, &linear->fb);
 
@@ -2043,7 +2077,7 @@ static void setup_linear_mapping(struct fb_blit_upload *blit)
 	struct igt_fb *fb = blit->fb;
 	struct fb_blit_linear *linear = &blit->linear;
 
-	if (!igt_vc4_is_tiled(fb->modifier) && use_rendercopy(fb)) {
+	if (!igt_vc4_is_tiled(fb->modifier) && use_enginecopy(fb)) {
 		blit->bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 		blit->batch = intel_batchbuffer_alloc(blit->bufmgr,
 						      intel_get_drm_devid(fd));
@@ -2079,7 +2113,7 @@ static void setup_linear_mapping(struct fb_blit_upload *blit)
 				I915_GEM_DOMAIN_GTT, 0);
 
 		if (blit->batch)
-			rendercopy(blit, &linear->fb, fb);
+			copy_with_engine(blit, &linear->fb, fb);
 		else
 			blitcopy(&linear->fb, fb);
 
@@ -3185,7 +3219,8 @@ static void create_cairo_surface__convert(int fd, struct igt_fb *fb)
 							     &blit->shadow_fb);
 	igt_assert(blit->shadow_ptr);
 
-	if (use_rendercopy(fb) || use_blitter(fb) || igt_vc4_is_tiled(fb->modifier)) {
+	if (use_enginecopy(fb) || use_blitter(fb) ||
+	    igt_vc4_is_tiled(fb->modifier)) {
 		setup_linear_mapping(&blit->base);
 	} else {
 		blit->base.linear.fb = *fb;
@@ -3268,7 +3303,8 @@ cairo_surface_t *igt_get_cairo_surface(int fd, struct igt_fb *fb)
 	if (fb->cairo_surface == NULL) {
 		if (use_convert(fb))
 			create_cairo_surface__convert(fd, fb);
-		else if (use_blitter(fb) || use_rendercopy(fb) || igt_vc4_is_tiled(fb->modifier))
+		else if (use_blitter(fb) || use_enginecopy(fb) ||
+			 igt_vc4_is_tiled(fb->modifier))
 			create_cairo_surface__gpu(fd, fb);
 		else
 			create_cairo_surface__gtt(fd, fb);
diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
index 2f22bb39..cbb3c320 100644
--- a/lib/intel_aux_pgtable.c
+++ b/lib/intel_aux_pgtable.c
@@ -555,16 +555,20 @@ gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
 }
 
 void
-gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state)
+gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state,
+			     bool render)
 {
+	uint32_t table_base_reg = render ? GEN12_GFX_AUX_TABLE_BASE_ADDR :
+					   GEN12_VEBOX_AUX_TABLE_BASE_ADDR;
+
 	if (!state)
 		return;
 
 	OUT_BATCH(MI_LOAD_REGISTER_MEM_GEN8 | MI_MMIO_REMAP_ENABLE_GEN12);
-	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR);
+	OUT_BATCH(table_base_reg);
 	OUT_RELOC(batch->bo, 0, 0, state);
 
 	OUT_BATCH(MI_LOAD_REGISTER_MEM_GEN8 | MI_MMIO_REMAP_ENABLE_GEN12);
-	OUT_BATCH(GEN12_GFX_AUX_TABLE_BASE_ADDR + 4);
+	OUT_BATCH(table_base_reg + 4);
 	OUT_RELOC(batch->bo, 0, 0, state + 4);
 }
diff --git a/lib/intel_aux_pgtable.h b/lib/intel_aux_pgtable.h
index 20278db0..ac82b7d2 100644
--- a/lib/intel_aux_pgtable.h
+++ b/lib/intel_aux_pgtable.h
@@ -30,6 +30,7 @@ uint32_t
 gen12_create_aux_pgtable_state(struct intel_batchbuffer *batch,
 			       drm_intel_bo *aux_pgtable_bo);
 void
-gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state);
+gen12_emit_aux_pgtable_state(struct intel_batchbuffer *batch, uint32_t state,
+			     bool render);
 
 #endif
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 3828ba75..51aae4dc 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -37,6 +37,7 @@
 #include "intel_bufmgr.h"
 #include "intel_chipset.h"
 #include "intel_reg.h"
+#include "veboxcopy.h"
 #include "rendercopy.h"
 #include "media_fill.h"
 #include "ioctl_wrappers.h"
@@ -850,6 +851,16 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
 	return copy;
 }
 
+igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid)
+{
+	igt_vebox_copyfunc_t copy = NULL;
+
+	if (IS_GEN12(devid))
+		copy = gen12_vebox_copyfunc;
+
+	return copy;
+}
+
 /**
  * igt_get_media_fillfunc:
  * @devid: pci device id
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 402e68bc..e05e464b 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -213,6 +213,7 @@ void intel_copy_bo(struct intel_batchbuffer *batch,
 enum i915_compression {
 	I915_COMPRESSION_NONE,
 	I915_COMPRESSION_RENDER,
+	I915_COMPRESSION_MEDIA,
 };
 
 /**
@@ -220,18 +221,20 @@ enum i915_compression {
  * @bo: underlying libdrm buffer object
  * @stride: stride of the buffer
  * @tiling: tiling mode bits
+ * @compression: memory compression mode
  * @bpp: bits per pixel, 8, 16 or 32.
  * @data: pointer to the memory mapping of the buffer
  * @size: size of the buffer object
  *
  * This is a i-g-t buffer object wrapper structure which augments the baseline
- * libdrm buffer object with suitable data needed by the render copy and the
- * fill functions.
+ * libdrm buffer object with suitable data needed by the render/vebox copy and
+ * the fill functions.
  */
 struct igt_buf {
 	drm_intel_bo *bo;
 	uint32_t stride;
 	uint32_t tiling;
+	enum i915_compression compression;
 	uint32_t bpp;
 	uint32_t *data;
 	uint32_t size;
@@ -304,6 +307,29 @@ typedef void (*igt_render_copyfunc_t)(struct intel_batchbuffer *batch,
 
 igt_render_copyfunc_t igt_get_render_copyfunc(int devid);
 
+
+/**
+ * igt_vebox_copyfunc_t:
+ * @batch: batchbuffer object
+ * @src: source i-g-t buffer object
+ * @width: width of the copied rectangle
+ * @height: height of the copied rectangle
+ * @dst: destination i-g-t buffer object
+ *
+ * This is the type of the per-platform vebox copy functions. The
+ * platform-specific implementation can be obtained by calling
+ * igt_get_vebox_copyfunc().
+ *
+ * A vebox copy function will emit a batchbuffer to the kernel which executes
+ * the specified blit copy operation using the vebox engine.
+ */
+typedef void (*igt_vebox_copyfunc_t)(struct intel_batchbuffer *batch,
+				     const struct igt_buf *src,
+				     unsigned width, unsigned height,
+				     const struct igt_buf *dst);
+
+igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid);
+
 /**
  * igt_fillfunc_t:
  * @batch: batchbuffer object
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index cabfc879..0a9ee34c 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -674,6 +674,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define RING_INVALID        0x00000000
 
 #define GEN12_GFX_AUX_TABLE_BASE_ADDR	0x4200
+#define GEN12_VEBOX_AUX_TABLE_BASE_ADDR	0x4230
 
 
 /* BitBlt Instructions
diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h
index f2412d78..7dd2a1e8 100644
--- a/lib/ioctl_wrappers.h
+++ b/lib/ioctl_wrappers.h
@@ -201,6 +201,7 @@ struct local_drm_mode_fb_cmd2 {
 #define LOCAL_I915_FORMAT_MOD_Y_TILED_CCS	local_fourcc_mod_code(INTEL, 4)
 #define LOCAL_I915_FORMAT_MOD_Yf_TILED_CCS	local_fourcc_mod_code(INTEL, 5)
 #define LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS fourcc_mod_code(INTEL, 6)
+#define LOCAL_I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7)
 
 #define LOCAL_DRM_IOCTL_MODE_ADDFB2	DRM_IOWR(0xB8, \
 						 struct local_drm_mode_fb_cmd2)
diff --git a/lib/meson.build b/lib/meson.build
index 3f908912..f7fce905 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -63,6 +63,7 @@ lib_sources = [
 	'igt_edid.c',
 	'igt_eld.c',
 	'igt_infoframe.c',
+	'veboxcopy_gen12.c',
 ]
 
 lib_deps = [
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index 94e816b6..e7fdd7a6 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -1026,7 +1026,7 @@ void _gen9_render_copyfunc(struct intel_batchbuffer *batch,
 	OUT_BATCH(G4X_PIPELINE_SELECT | PIPELINE_SELECT_3D |
 				GEN9_PIPELINE_SELECTION_MASK);
 
-	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state);
+	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state, true);
 
 	gen8_emit_sip(batch);
 
diff --git a/lib/veboxcopy.h b/lib/veboxcopy.h
new file mode 100644
index 00000000..949d83bf
--- /dev/null
+++ b/lib/veboxcopy.h
@@ -0,0 +1,9 @@
+#ifndef __VEBOXCOPY_H__
+#define __VEBOXCOPY_H__
+
+void gen12_vebox_copyfunc(struct intel_batchbuffer *batch,
+			  const struct igt_buf *src,
+			  unsigned width, unsigned height,
+			  const struct igt_buf *dst);
+
+#endif
diff --git a/lib/veboxcopy_gen12.c b/lib/veboxcopy_gen12.c
new file mode 100644
index 00000000..87800941
--- /dev/null
+++ b/lib/veboxcopy_gen12.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <drm.h>
+
+#include "igt.h"
+#include "intel_aux_pgtable.h"
+#include "veboxcopy.h"
+
+#define R8G8B8A8_UNORM	8
+
+struct vebox_surface_state {
+	struct {
+		uint32_t dw_count:12;
+		uint32_t pad:4;
+		uint32_t sub_opcode_b:5;
+		uint32_t sub_opcode_a:3;
+		uint32_t media_cmd_opcode:3;
+		uint32_t media_cmd_pipeline:2;
+		uint32_t cmd_type:3;
+	} ss0;
+	struct {
+#define VEBOX_SURFACE_INPUT	0
+#define VEBOX_SURFACE_OUTPUT	1
+		uint32_t surface_id:1;
+		uint32_t pad:31;
+	} ss1;
+	struct {
+		uint32_t pad:4;
+		uint32_t width:14;
+		uint32_t height:14;
+	} ss2;
+	struct {
+#define VEBOX_TILE_WALK_XMAJOR 0
+#define VEBOX_TILE_WALK_YMAJOR 1
+		uint32_t tile_walk:1;
+		uint32_t tiled_surface:1;
+		uint32_t chroma_half_pitch:1;
+		uint32_t surface_pitch:17;
+		uint32_t chroma_interleave:1;
+		uint32_t lsb_packed_enable:1;
+		uint32_t bayer_input_alignment:2;
+		uint32_t bayer_pattern_format:1;
+		uint32_t bayer_pattern_offset:2;
+		uint32_t surface_format:5;
+	} ss3;
+	struct {
+		uint32_t u_y_offset:15;
+		uint32_t u_x_offset:13;
+		uint32_t pad:4;
+	} ss4;
+	struct {
+		uint32_t v_y_offset:15;
+		uint32_t v_x_offset:13;
+		uint32_t pad:4;
+	} ss5;
+	struct {
+		uint32_t frame_y_offset:15;
+		uint32_t frame_x_offset:15;
+		uint32_t pad:2;
+	} ss6;
+	struct {
+		uint32_t derived_surface_pitch:17;
+		uint32_t pad:15;
+	} ss7;
+	struct {
+		uint32_t skin_score_output_surface_pitch:17;
+		uint32_t pad:15;
+	} ss8;
+} __attribute__((packed));
+
+struct vebox_tiling_convert {
+	struct {
+		uint32_t dw_count:12;
+		uint32_t pad:4;
+		uint32_t sub_opcode_b:5;
+		uint32_t sub_opcode_a:3;
+		uint32_t cmd_opcode:3;
+		uint32_t pipeline:2;
+		uint32_t cmd_type:3;
+	} tc0;
+	union {
+		struct {
+			uint64_t input_encrypted_data:1;
+			uint64_t input_mocs_idx:6;
+			uint64_t input_memory_compression_enable:1;
+#define COMPRESSION_TYPE_MEDIA 0
+#define COMPRESSION_TYPE_RENDER	1
+			uint64_t input_compression_type:1;
+#define TRMODE_NONE	0
+#define TRMODE_TILE_YF	1
+#define TRMODE_TILE_YS	2
+			uint64_t input_tiled_resource_mode:2;
+			uint64_t pad:1;
+			uint64_t input_address:52;
+		} tc1_2;
+		uint64_t tc1_2_l;
+	};
+	union {
+		struct {
+			uint64_t output_encrypted_data:1;
+			uint64_t output_mocs_idx:6;
+			uint64_t output_memory_compression_enable:1;
+			uint64_t output_compression_type:1;
+			uint64_t output_tiled_resource_mode:2;
+			uint64_t pad:1;
+			uint64_t output_address:52;
+		} tc3_4;
+		uint64_t tc3_4_l;
+	};
+} __attribute__((packed));
+
+static void emit_surface_state_cmd(struct intel_batchbuffer *batch,
+				   int surface_id,
+				   int width, int height, int bpp,
+				   int pitch, uint32_t tiling, int format)
+{
+	struct vebox_surface_state *ss;
+
+	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 4);
+
+	ss->ss0.cmd_type = 3;
+	ss->ss0.media_cmd_pipeline = 2;
+	ss->ss0.media_cmd_opcode = 4;
+	ss->ss0.dw_count = 7;
+
+	ss->ss1.surface_id = surface_id;
+
+	ss->ss2.height = height - 1;
+	ss->ss2.width = width - 1;
+
+	ss->ss3.surface_format = format;
+	ss->ss3.surface_pitch = pitch - 1;
+	ss->ss3.tile_walk = (tiling == I915_TILING_Y) ||
+			    (tiling == I915_TILING_Yf);
+	ss->ss3.tiled_surface = tiling != I915_TILING_NONE;
+
+	ss->ss7.derived_surface_pitch = pitch - 1;
+}
+
+static void emit_tiling_convert_cmd(struct intel_batchbuffer *batch,
+				    drm_intel_bo *input_bo,
+				    uint32_t input_tiling,
+				    uint32_t input_compression,
+				    drm_intel_bo *output_bo,
+				    uint32_t output_tiling,
+				    uint32_t output_compression)
+{
+	uint32_t reloc_delta;
+	struct vebox_tiling_convert *tc;
+	int ret;
+
+	tc = intel_batchbuffer_subdata_alloc(batch, sizeof(*tc), 8);
+
+	tc->tc0.cmd_type = 3;
+	tc->tc0.pipeline = 2;
+	tc->tc0.cmd_opcode = 4;
+	tc->tc0.sub_opcode_b = 1;
+
+	tc->tc0.dw_count = 3;
+
+	if (input_compression != I915_COMPRESSION_NONE) {
+		tc->tc1_2.input_memory_compression_enable = 1;
+		tc->tc1_2.input_compression_type =
+			input_compression == I915_COMPRESSION_RENDER;
+	}
+	tc->tc1_2.input_tiled_resource_mode = input_tiling == I915_TILING_Yf;
+	reloc_delta = tc->tc1_2_l;
+
+	igt_assert(input_bo->offset64 == ALIGN(input_bo->offset64, 0x1000));
+	tc->tc1_2.input_address = input_bo->offset64 >> 12;
+	igt_assert(reloc_delta <= INT32_MAX);
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				      intel_batchbuffer_subdata_offset(batch, tc) +
+					offsetof(typeof(*tc), tc1_2),
+				      input_bo, reloc_delta,
+				      0, 0);
+	igt_assert(ret == 0);
+
+	if (output_compression != I915_COMPRESSION_NONE) {
+		tc->tc3_4.output_memory_compression_enable = 1;
+		tc->tc3_4.output_compression_type =
+			output_compression == I915_COMPRESSION_RENDER;
+	}
+	tc->tc3_4.output_tiled_resource_mode = output_tiling == I915_TILING_Yf;
+	reloc_delta = tc->tc3_4_l;
+
+	igt_assert(output_bo->offset64 == ALIGN(output_bo->offset64, 0x1000));
+	tc->tc3_4.output_address = output_bo->offset64 >> 12;
+	igt_assert(reloc_delta <= INT32_MAX);
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				      intel_batchbuffer_subdata_offset(batch, tc) +
+					offsetof(typeof(*tc), tc3_4),
+				      output_bo, reloc_delta,
+				      0, I915_GEM_DOMAIN_RENDER);
+	igt_assert(ret == 0);
+
+}
+
+/* Borrowing the idea from the rendercopy state setup. */
+#define BATCH_STATE_SPLIT 2048
+
+void gen12_vebox_copyfunc(struct intel_batchbuffer *batch,
+			  const struct igt_buf *src,
+			  unsigned width, unsigned height,
+			  const struct igt_buf *dst)
+{
+	struct aux_pgtable_info aux_pgtable_info = { };
+	uint32_t aux_pgtable_state;
+
+	igt_assert(src->bpp == dst->bpp);
+
+	intel_batchbuffer_flush_on_ring(batch, I915_EXEC_VEBOX);
+
+	intel_batchbuffer_align(batch, 8);
+
+	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
+
+	gen12_aux_pgtable_init(&aux_pgtable_info, batch->bufmgr, src, dst);
+
+	aux_pgtable_state = gen12_create_aux_pgtable_state(batch,
+							   aux_pgtable_info.pgtable_bo);
+
+	assert(batch->ptr < &batch->buffer[4095]);
+	batch->ptr = batch->buffer;
+
+	gen12_emit_aux_pgtable_state(batch, aux_pgtable_state, false);
+
+	/* TODO: add support for more formats */
+	igt_assert(src->bpp == 32);
+	emit_surface_state_cmd(batch, VEBOX_SURFACE_INPUT,
+			       width, height, src->bpp, src->stride,
+			       src->tiling, R8G8B8A8_UNORM);
+
+	igt_assert(dst->bpp == 32);
+	emit_surface_state_cmd(batch, VEBOX_SURFACE_OUTPUT,
+			       width, height, dst->bpp, dst->stride,
+			       dst->tiling, R8G8B8A8_UNORM);
+
+	emit_tiling_convert_cmd(batch,
+				src->bo, src->tiling, src->compression,
+				dst->bo, dst->tiling, dst->compression);
+
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+
+	intel_batchbuffer_flush_on_ring(batch, I915_EXEC_VEBOX);
+
+	gen12_aux_pgtable_cleanup(&aux_pgtable_info);
+	intel_batchbuffer_reset(batch);
+}
diff --git a/tests/i915/gem_render_copy.c b/tests/i915/gem_render_copy.c
index 15ba1704..9c36f38b 100644
--- a/tests/i915/gem_render_copy.c
+++ b/tests/i915/gem_render_copy.c
@@ -471,6 +471,7 @@ static void scratch_buf_init(data_t *data, struct igt_buf *buf,
 		aux_width = scratch_buf_aux_width(data->devid, buf);
 		aux_height = scratch_buf_aux_height(data->devid, buf);
 
+		buf->compression = compression;
 		buf->aux.offset = buf->stride * ALIGN(height, 32);
 		buf->aux.stride = aux_width;
 
-- 
2.17.1