[igt-dev] [PATCH i-g-t 2/2] tests/gem_(gpgpu|media)_fill: remove the _v2 suffix

Fri May 29 10:41:27 UTC 2020

Remove libdrm functions and replace them with new ones after removing
_v2 suffix introduced for transition state.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
 lib/gpgpu_fill.c            |  304 +++--------
 lib/gpgpu_fill.h            |   83 +--
 lib/gpu_cmds.c              | 1015 +++++++----------------------------
 lib/gpu_cmds.h              |  165 +-----
 lib/intel_batchbuffer.c     |   56 +-
 lib/intel_batchbuffer.h     |   20 +-
 lib/media_fill.c            |  267 ++-------
 lib/media_fill.h            |   45 +-
 tests/i915/gem_gpgpu_fill.c |   83 +--
 tests/i915/gem_media_fill.c |   83 +--
 10 files changed, 385 insertions(+), 1736 deletions(-)

diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 49988a36..aa2ffa8d 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -25,7 +25,6 @@
  *  Dominik Zeromski <dominik.zeromski at intel.com>
  */
 
-#include <intel_bufmgr.h>
 #include <i915_drm.h>
 
 #include "intel_reg.h"
@@ -131,60 +130,11 @@ static const uint32_t gen12_gpgpu_kernel[][4] = {
 #define GEN7_VFE_STATE_GPGPU_MODE 1
 
 void
-gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
-		    unsigned int x, unsigned int y,
-		    unsigned int width, unsigned int height,
+gen7_gpgpu_fillfunc(int i915,
+		    struct intel_buf *buf,
+		    unsigned x, unsigned y,
+		    unsigned width, unsigned height,
 		    uint8_t color)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	/*
-	 * const buffer needs to fill for every thread, but as we have just 1
-	 * thread per every group, so need only one curbe data.
-	 * For each thread, just use thread group ID for buffer offset.
-	 */
-	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-
-	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-				gen7_gpgpu_kernel, sizeof(gen7_gpgpu_kernel));
-
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	batch->ptr = batch->buffer;
-
-	/* GPGPU pipeline */
-	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
-
-	gen7_emit_state_base_address(batch);
-	gen7_emit_vfe_state(batch, THREADS, GEN7_GPGPU_URB_ENTRIES,
-			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE,
-			    GEN7_VFE_STATE_GPGPU_MODE);
-	gen7_emit_curbe_load(batch, curbe_buffer);
-	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
-	gen7_emit_gpgpu_walk(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = intel_batchbuffer_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen7_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
-
-void
-gen7_gpgpu_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned x, unsigned y,
-		       unsigned width, unsigned height,
-		       uint8_t color)
 {
 	struct intel_bb *ibb;
 	uint32_t curbe_buffer, interface_descriptor;
@@ -195,7 +145,7 @@ gen7_gpgpu_fillfunc_v2(int i915,
 	intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
 
 	/* Fill curbe buffer data */
-	curbe_buffer = gen7_fill_curbe_buffer_data_v2(ibb, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(ibb, color);
 
 	/*
 	 * const buffer needs to fill for every thread, but as we have just 1
@@ -203,22 +153,22 @@ gen7_gpgpu_fillfunc_v2(int i915,
 	 * For each thread, just use thread group ID for buffer offset.
 	 */
 	interface_descriptor =
-			gen7_fill_interface_descriptor_v2(ibb, buf,
-							  gen7_gpgpu_kernel,
-							  sizeof(gen7_gpgpu_kernel));
+			gen7_fill_interface_descriptor(ibb, buf,
+						       gen7_gpgpu_kernel,
+						       sizeof(gen7_gpgpu_kernel));
 
 	intel_bb_ptr_set(ibb, 0);
 
 	/* GPGPU pipeline */
 	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
 
-	gen7_emit_state_base_address_v2(ibb);
-	gen7_emit_vfe_state_v2(ibb, THREADS, GEN7_GPGPU_URB_ENTRIES,
+	gen7_emit_state_base_address(ibb);
+	gen7_emit_vfe_state(ibb, THREADS, GEN7_GPGPU_URB_ENTRIES,
 			       GPGPU_URB_SIZE, GPGPU_CURBE_SIZE,
 			       GEN7_VFE_STATE_GPGPU_MODE);
-	gen7_emit_curbe_load_v2(ibb, curbe_buffer);
-	gen7_emit_interface_descriptor_load_v2(ibb, interface_descriptor);
-	gen7_emit_gpgpu_walk_v2(ibb, x, y, width, height);
+	gen7_emit_curbe_load(ibb, curbe_buffer);
+	gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
+	gen7_emit_gpgpu_walk(ibb, x, y, width, height);
 
 	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
 	intel_bb_ptr_align(ibb, 32);
@@ -230,59 +180,11 @@ gen7_gpgpu_fillfunc_v2(int i915,
 }
 
 void
-gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
-		    unsigned int x, unsigned int y,
-		    unsigned int width, unsigned int height,
+gen8_gpgpu_fillfunc(int i915,
+		    struct intel_buf *buf,
+		    unsigned x, unsigned y,
+		    unsigned width, unsigned height,
 		    uint8_t color)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	/*
-	 * const buffer needs to fill for every thread, but as we have just 1
-	 * thread per every group, so need only one curbe data.
-	 * For each thread, just use thread group ID for buffer offset.
-	 */
-	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-				gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
-
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	batch->ptr = batch->buffer;
-
-	/* GPGPU pipeline */
-	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
-
-	gen8_emit_state_base_address(batch);
-	gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
-			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
-	gen7_emit_curbe_load(batch, curbe_buffer);
-	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
-	gen8_emit_gpgpu_walk(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = intel_batchbuffer_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen7_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
-
-void
-gen8_gpgpu_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned x, unsigned y,
-		       unsigned width, unsigned height,
-		       uint8_t color)
 {
 	struct intel_bb *ibb;
 	uint32_t curbe_buffer, interface_descriptor;
@@ -297,9 +199,9 @@ gen8_gpgpu_fillfunc_v2(int i915,
 	 * thread per every group, so need only one curbe data.
 	 * For each thread, just use thread group ID for buffer offset.
 	 */
-	curbe_buffer = gen7_fill_curbe_buffer_data_v2(ibb, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(ibb, color);
 
-	interface_descriptor = gen8_fill_interface_descriptor_v2(ibb, buf,
+	interface_descriptor = gen8_fill_interface_descriptor(ibb, buf,
 				gen8_gpgpu_kernel, sizeof(gen8_gpgpu_kernel));
 
 	intel_bb_ptr_set(ibb, 0);
@@ -307,14 +209,14 @@ gen8_gpgpu_fillfunc_v2(int i915,
 	/* GPGPU pipeline */
 	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
 
-	gen8_emit_state_base_address_v2(ibb);
-	gen8_emit_vfe_state_v2(ibb, THREADS, GEN8_GPGPU_URB_ENTRIES,
-			       GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
+	gen8_emit_state_base_address(ibb);
+	gen8_emit_vfe_state(ibb, THREADS, GEN8_GPGPU_URB_ENTRIES,
+			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
 
-	gen7_emit_curbe_load_v2(ibb, curbe_buffer);
-	gen7_emit_interface_descriptor_load_v2(ibb, interface_descriptor);
+	gen7_emit_curbe_load(ibb, curbe_buffer);
+	gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
 
-	gen8_emit_gpgpu_walk_v2(ibb, x, y, width, height);
+	gen8_emit_gpgpu_walk(ibb, x, y, width, height);
 
 	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
 	intel_bb_ptr_align(ibb, 32);
@@ -326,63 +228,12 @@ gen8_gpgpu_fillfunc_v2(int i915,
 }
 
 static void
-__gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		      const struct igt_buf *dst,
-		      unsigned int x, unsigned int y,
-		      unsigned int width, unsigned int height,
-		      uint8_t color, const uint32_t kernel[][4],
-		      size_t kernel_size)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	/*
-	 * const buffer needs to fill for every thread, but as we have just 1
-	 * thread per every group, so need only one curbe data.
-	 * For each thread, just use thread group ID for buffer offset.
-	 */
-	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-				kernel, kernel_size);
-
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	batch->ptr = batch->buffer;
-
-	/* GPGPU pipeline */
-	OUT_BATCH(GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
-		  PIPELINE_SELECT_GPGPU);
-
-	gen9_emit_state_base_address(batch);
-	gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
-			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
-	gen7_emit_curbe_load(batch, curbe_buffer);
-	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
-	gen8_emit_gpgpu_walk(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = intel_batchbuffer_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen7_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
-
-
-static void
-__gen9_gpgpu_fillfunc_v2(int i915,
-			 struct intel_buf *buf,
-			 unsigned x, unsigned y,
-			 unsigned width, unsigned height,
-			 uint8_t color,
-			 const uint32_t kernel[][4], size_t kernel_size)
+__gen9_gpgpu_fillfunc(int i915,
+		      struct intel_buf *buf,
+		      unsigned x, unsigned y,
+		      unsigned width, unsigned height,
+		      uint8_t color,
+		      const uint32_t kernel[][4], size_t kernel_size)
 {
 	struct intel_bb *ibb;
 	uint32_t curbe_buffer, interface_descriptor;
@@ -398,11 +249,11 @@ __gen9_gpgpu_fillfunc_v2(int i915,
 	 * For each thread, just use thread group ID for buffer offset.
 	 */
 	/* Fill curbe buffer data */
-	curbe_buffer = gen7_fill_curbe_buffer_data_v2(ibb, color);
+	curbe_buffer = gen7_fill_curbe_buffer_data(ibb, color);
 
-	interface_descriptor = gen8_fill_interface_descriptor_v2(ibb, buf,
-								 kernel,
-								 kernel_size);
+	interface_descriptor = gen8_fill_interface_descriptor(ibb, buf,
+							      kernel,
+							      kernel_size);
 
 	intel_bb_ptr_set(ibb, 0);
 
@@ -410,15 +261,15 @@ __gen9_gpgpu_fillfunc_v2(int i915,
 	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | GEN9_PIPELINE_SELECTION_MASK |
 		     PIPELINE_SELECT_GPGPU);
 
-	gen9_emit_state_base_address_v2(ibb);
+	gen9_emit_state_base_address(ibb);
 
-	gen8_emit_vfe_state_v2(ibb, THREADS, GEN8_GPGPU_URB_ENTRIES,
-			       GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
+	gen8_emit_vfe_state(ibb, THREADS, GEN8_GPGPU_URB_ENTRIES,
+			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
 
-	gen7_emit_curbe_load_v2(ibb, curbe_buffer);
-	gen7_emit_interface_descriptor_load_v2(ibb, interface_descriptor);
+	gen7_emit_curbe_load(ibb, curbe_buffer);
+	gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
 
-	gen8_emit_gpgpu_walk_v2(ibb, x, y, width, height);
+	gen8_emit_gpgpu_walk(ibb, x, y, width, height);
 
 	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
 	intel_bb_ptr_align(ibb, 32);
@@ -429,66 +280,35 @@ __gen9_gpgpu_fillfunc_v2(int i915,
 	intel_bb_destroy(ibb);
 }
 
-void gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-			 const struct igt_buf *dst,
-			 unsigned int x, unsigned int y,
-			 unsigned int width, unsigned int height,
+void gen9_gpgpu_fillfunc(int i915,
+			 struct intel_buf *buf,
+			 unsigned x, unsigned y,
+			 unsigned width, unsigned height,
 			 uint8_t color)
 {
-	__gen9_gpgpu_fillfunc(batch, dst, x, y, width, height, color,
-			      gen9_gpgpu_kernel, sizeof(gen9_gpgpu_kernel));
-}
-
-void gen9_gpgpu_fillfunc_v2(int i915,
-			    struct intel_buf *buf,
-			    unsigned x, unsigned y,
-			    unsigned width, unsigned height,
-			    uint8_t color)
-{
-	__gen9_gpgpu_fillfunc_v2(i915, buf, x, y, width, height, color,
-				 gen9_gpgpu_kernel,
-				 sizeof(gen9_gpgpu_kernel));
+	__gen9_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
+			      gen9_gpgpu_kernel,
+			      sizeof(gen9_gpgpu_kernel));
 }
 
-
-void gen11_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-			  const struct igt_buf *dst,
-			  unsigned int x, unsigned int y,
-			  unsigned int width, unsigned int height,
+void gen11_gpgpu_fillfunc(int i915,
+			  struct intel_buf *buf,
+			  unsigned x, unsigned y,
+			  unsigned width, unsigned height,
 			  uint8_t color)
 {
-	__gen9_gpgpu_fillfunc(batch, dst, x, y, width, height, color,
-			      gen11_gpgpu_kernel, sizeof(gen11_gpgpu_kernel));
+	__gen9_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
+			      gen11_gpgpu_kernel,
+			      sizeof(gen11_gpgpu_kernel));
 }
 
-void gen11_gpgpu_fillfunc_v2(int i915,
-			     struct intel_buf *buf,
-			     unsigned x, unsigned y,
-			     unsigned width, unsigned height,
-			     uint8_t color)
-{
-	__gen9_gpgpu_fillfunc_v2(i915, buf, x, y, width, height, color,
-				 gen11_gpgpu_kernel,
-				 sizeof(gen11_gpgpu_kernel));
-}
-
-void gen12_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-			  const struct igt_buf *dst,
-			  unsigned int x, unsigned int y,
-			  unsigned int width, unsigned int height,
+void gen12_gpgpu_fillfunc(int i915,
+			  struct intel_buf *buf,
+			  unsigned x, unsigned y,
+			  unsigned width, unsigned height,
 			  uint8_t color)
 {
-	__gen9_gpgpu_fillfunc(batch, dst, x, y, width, height, color,
-			      gen12_gpgpu_kernel, sizeof(gen12_gpgpu_kernel));
-}
-
-void gen12_gpgpu_fillfunc_v2(int i915,
-			     struct intel_buf *buf,
-			     unsigned x, unsigned y,
-			     unsigned width, unsigned height,
-			     uint8_t color)
-{
-	__gen9_gpgpu_fillfunc_v2(i915, buf, x, y, width, height, color,
-				 gen12_gpgpu_kernel,
-				 sizeof(gen12_gpgpu_kernel));
+	__gen9_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
+			      gen12_gpgpu_kernel,
+			      sizeof(gen12_gpgpu_kernel));
 }
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index a387732b..25abe1fa 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -27,75 +27,38 @@
 #ifndef GPGPU_FILL_H
 #define GPGPU_FILL_H
 
-#include "intel_batchbuffer.h"
 #include "intel_bufops.h"
 
 void
-gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
-		    unsigned int x, unsigned int y,
-		    unsigned int width, unsigned int height,
+gen7_gpgpu_fillfunc(int i915,
+		    struct intel_buf *buf,
+		    unsigned x, unsigned y,
+		    unsigned width, unsigned height,
 		    uint8_t color);
 
 void
-gen7_gpgpu_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned x, unsigned y,
-		       unsigned width, unsigned height,
-		       uint8_t color);
-
-void
-gen8_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
-		    unsigned int x, unsigned int y,
-		    unsigned int width, unsigned int height,
-		    uint8_t color);
-
-void
-gen8_gpgpu_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned x, unsigned y,
-		       unsigned width, unsigned height,
-		       uint8_t color);
-
-void
-gen9_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
-		    unsigned int x, unsigned int y,
-		    unsigned int width, unsigned int height,
+gen8_gpgpu_fillfunc(int i915,
+		    struct intel_buf *buf,
+		    unsigned x, unsigned y,
+		    unsigned width, unsigned height,
 		    uint8_t color);
 
-void gen9_gpgpu_fillfunc_v2(int i915,
-			    struct intel_buf *buf,
-			    unsigned x, unsigned y,
-			    unsigned width, unsigned height,
-			    uint8_t color);
-
-void
-gen11_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		     const struct igt_buf *dst,
-		     unsigned int x, unsigned int y,
-		     unsigned int width, unsigned int height,
-		     uint8_t color);
-
-void gen11_gpgpu_fillfunc_v2(int i915,
-			     struct intel_buf *buf,
-			     unsigned x, unsigned y,
-			     unsigned width, unsigned height,
-			     uint8_t color);
+void gen9_gpgpu_fillfunc(int i915,
+			 struct intel_buf *buf,
+			 unsigned x, unsigned y,
+			 unsigned width, unsigned height,
+			 uint8_t color);
 
-void
-gen12_gpgpu_fillfunc(struct intel_batchbuffer *batch,
-		     const struct igt_buf *dst,
-		     unsigned int x, unsigned int y,
-		     unsigned int width, unsigned int height,
-		     uint8_t color);
+void gen11_gpgpu_fillfunc(int i915,
+			  struct intel_buf *buf,
+			  unsigned x, unsigned y,
+			  unsigned width, unsigned height,
+			  uint8_t color);
 
-void
-gen12_gpgpu_fillfunc_v2(int i915,
-			struct intel_buf *buf,
-			unsigned x, unsigned y,
-			unsigned width, unsigned height,
-			uint8_t color);
+void gen12_gpgpu_fillfunc(int i915,
+			  struct intel_buf *buf,
+			  unsigned x, unsigned y,
+			  unsigned width, unsigned height,
+			  uint8_t color);
 
 #endif /* GPGPU_FILL_H */
diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
index 8c41d0f3..8c284eb1 100644
--- a/lib/gpu_cmds.c
+++ b/lib/gpu_cmds.c
@@ -24,773 +24,24 @@
 
 #include "gpu_cmds.h"
 
-void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	int ret;
-
-	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
-	if (ret == 0)
-		ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
-					    NULL, 0, 0, 0);
-	igt_assert(ret == 0);
-}
-
-void
-gen7_render_context_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
-{
-	igt_assert_eq(drm_intel_bo_subdata(batch->bo,
-					   0, 4096, batch->buffer),
-		      0);
-	igt_assert_eq(drm_intel_gem_bo_context_exec(batch->bo, batch->ctx,
-						    batch_end, 0),
-		      0);
-}
-
-uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			    uint8_t color)
-{
-	uint8_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = intel_batchbuffer_subdata_alloc(batch,
-						       sizeof(uint32_t) * 8,
-						       64);
-	offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
-	*curbe_buffer = color;
-
-	return offset;
-}
-
-uint32_t
-gen11_fill_curbe_buffer_data(struct intel_bb *ibb)
-
-{
-	uint32_t *curbe_buffer;
-	uint32_t offset;
-
-	intel_bb_ptr_align(ibb, 64);
-	curbe_buffer = intel_bb_ptr(ibb);
-	offset = intel_bb_offset(ibb);
-
-	*curbe_buffer++ = 0;
-	*curbe_buffer = 1;
-	intel_bb_ptr_add(ibb, 64);
-
-	return offset;
-}
-
-uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			const struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen7_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
-	offset = intel_batchbuffer_subdata_offset(batch, ss);
-
-	ss->ss0.surface_type = SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss1.base_addr = buf->bo->offset;
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				intel_batchbuffer_subdata_offset(batch, ss) + 4,
-				buf->bo, 0,
-				read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-
-	ss->ss3.pitch  = buf->surface[0].stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			const struct igt_buf *dst)
-{
-	uint32_t *binding_table, offset;
-
-	binding_table = intel_batchbuffer_subdata_alloc(batch, 32, 64);
-	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
-	if (IS_GEN7(batch->devid))
-		binding_table[0] = gen7_fill_surface_state(batch, dst,
-						SURFACEFORMAT_R8_UNORM, 1);
-	else
-		binding_table[0] = gen8_fill_surface_state(batch, dst,
-						SURFACEFORMAT_R8_UNORM, 1);
-
-	return offset;
-}
-
-uint32_t
-gen11_fill_binding_table(struct intel_bb *ibb,
-			 const struct intel_buf *src,
-			 const struct intel_buf *dst)
-{
-	uint32_t binding_table_offset;
-	uint32_t *binding_table;
-
-	intel_bb_ptr_align(ibb, 64);
-	binding_table_offset = intel_bb_offset(ibb);
-	binding_table = intel_bb_ptr(ibb);
-	intel_bb_ptr_add(ibb, 64);
-
-	binding_table[0] = gen11_fill_surface_state(ibb, src,
-						    SURFACE_1D,
-						    SURFACEFORMAT_R32G32B32A32_FLOAT,
-						    0, 0, 0);
-	binding_table[1] = gen11_fill_surface_state(ibb, dst,
-						    SURFACE_BUFFER,
-						    SURFACEFORMAT_RAW,
-						    1, 1, 1);
-
-	return binding_table_offset;
-
-}
-
-uint32_t
-gen7_fill_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size)
-{
-	uint32_t offset;
-
-	offset = intel_batchbuffer_copy_data(batch, kernel, size, 64);
-
-	return offset;
-}
-
-uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
-			       const struct igt_buf *dst,
-			       const uint32_t kernel[][4],
-			       size_t size)
-{
-	struct gen7_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_kernel(batch, kernel, size);
-
-	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
-	offset = intel_batchbuffer_subdata_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc1.single_program_flow = 1;
-	idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
-
-	idd->desc2.sampler_count = 0;      /* 0 samplers used */
-	idd->desc2.sampler_state_pointer = 0;
-
-	idd->desc3.binding_table_entry_count = 0;
-	idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc4.constant_urb_entry_read_offset = 0;
-	idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	return offset;
-}
-
-void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
-
-	/* general */
-	OUT_BATCH(0);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		  BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		  BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		  BASE_ADDRESS_MODIFY);
-
-	/* general/dynamic/indirect/instruction access Bound */
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-}
-
-void
-gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
-		    uint32_t urb_entries, uint32_t urb_size,
-		    uint32_t curbe_size, uint32_t mode)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(threads << 16 |
-		urb_entries << 8 |
-		mode << 2); /* GPGPU vs media mode */
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(urb_size << 16 |	/* in 256 bits unit */
-		  curbe_size);		/* in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer)
-{
-	OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* curbe total data length */
-	OUT_BATCH(64);
-	/* curbe data start address, is relative to the dynamics base address */
-	OUT_BATCH(curbe_buffer);
-}
-
-void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
-				    uint32_t interface_descriptor)
-{
-	OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
-	OUT_BATCH(0);
-	/* interface descriptor data length */
-	if (IS_GEN7(batch->devid))
-		OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
-	else
-		OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
-	/* interface descriptor address, is relative to the dynamics base
-	 * address
-	 */
-	OUT_BATCH(interface_descriptor);
-}
-
-void
-gen7_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned int x, unsigned int y,
-			unsigned int width, unsigned int height)
-{
-	int i, j;
-
-	for (i = 0; i < width / 16; i++) {
-		for (j = 0; j < height / 16; j++) {
-			gen_emit_media_object(batch, x + i * 16, y + j * 16);
-		}
-	}
-}
-
-void
-gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned int x, unsigned int y,
-		     unsigned int width, unsigned int height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 9);
-
-	/* interface descriptor offset */
-	OUT_BATCH(0);
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
-uint32_t
-gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
-			    uint32_t iters)
-{
-	uint32_t *curbe_buffer;
-	uint32_t offset;
-
-	curbe_buffer = intel_batchbuffer_subdata_alloc(batch, 64, 64);
-	offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
-	*curbe_buffer = iters;
-
-	return offset;
-}
-
-uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			const struct igt_buf *buf,
-			uint32_t format,
-			int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	int ret;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
-	offset = intel_batchbuffer_subdata_offset(batch, ss);
-
-	ss->ss0.surface_type = SURFACE_2D;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = 1; /* align 4 */
-	ss->ss0.horizontal_alignment = 1; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-
-	ss->ss8.base_addr = buf->bo->offset;
-
-	ret = drm_intel_bo_emit_reloc(batch->bo,
-				intel_batchbuffer_subdata_offset(batch, ss) + 8 * 4,
-				buf->bo, 0, read_domain, write_domain);
-	igt_assert(ret == 0);
-
-	ss->ss2.height = igt_buf_height(buf) - 1;
-	ss->ss2.width  = igt_buf_width(buf) - 1;
-	ss->ss3.pitch  = buf->surface[0].stride - 1;
-
-	ss->ss7.shader_chanel_select_r = 4;
-	ss->ss7.shader_chanel_select_g = 5;
-	ss->ss7.shader_chanel_select_b = 6;
-	ss->ss7.shader_chanel_select_a = 7;
-
-	return offset;
-}
-
-uint32_t
-gen11_fill_surface_state(struct intel_bb *ibb,
-			 const struct intel_buf *buf,
-			 uint32_t surface_type,
-			 uint32_t format,
-			 uint32_t vertical_alignment,
-			 uint32_t horizontal_alignment,
-			 int is_dst)
-{
-	struct gen8_surface_state *ss;
-	uint32_t write_domain, read_domain, offset;
-	uint64_t address;
-
-	if (is_dst) {
-		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
-	} else {
-		write_domain = 0;
-		read_domain = I915_GEM_DOMAIN_SAMPLER;
-	}
-
-	intel_bb_ptr_align(ibb, 64);
-	offset = intel_bb_offset(ibb);
-	ss = intel_bb_ptr(ibb);
-	intel_bb_ptr_add(ibb, 64);
-
-	ss->ss0.surface_type = surface_type;
-	ss->ss0.surface_format = format;
-	ss->ss0.render_cache_read_write = 1;
-	ss->ss0.vertical_alignment = vertical_alignment; /* align 4 */
-	ss->ss0.horizontal_alignment = horizontal_alignment; /* align 4 */
-
-	if (buf->tiling == I915_TILING_X)
-		ss->ss0.tiled_mode = 2;
-	else if (buf->tiling == I915_TILING_Y)
-		ss->ss0.tiled_mode = 3;
-	else
-		ss->ss0.tiled_mode = 0;
-
-	address = intel_bb_offset_reloc(ibb, buf->handle,
-					read_domain, write_domain,
-					offset + 4 * 8, 0x0);
-
-	ss->ss8.base_addr = (uint32_t) address;
-	ss->ss9.base_addr_hi = address >> 32;
-
-	if (is_dst) {
-		ss->ss1.memory_object_control = 2;
-		ss->ss2.height = 1;
-		ss->ss2.width  = 95;
-		ss->ss3.pitch  = 0;
-		ss->ss7.shader_chanel_select_r = 4;
-		ss->ss7.shader_chanel_select_g = 5;
-		ss->ss7.shader_chanel_select_b = 6;
-		ss->ss7.shader_chanel_select_a = 7;
-	}
-	else {
-		ss->ss1.qpitch = 4040;
-		ss->ss1.base_mip_level = 31;
-		ss->ss2.height = 9216;
-		ss->ss2.width  = 1019;
-		ss->ss3.pitch  = 64;
-		ss->ss5.mip_count = 2;
-	}
-
-	return offset;
-}
-
-uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
-			       const struct igt_buf *dst,
-			       const uint32_t kernel[][4],
-			       size_t size)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen7_fill_binding_table(batch, dst);
-	kernel_offset = gen7_fill_kernel(batch, kernel, size);
-
-	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
-	offset = intel_batchbuffer_subdata_offset(batch, idd);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	idd->desc6.num_threads_in_tg = 1;
-
-	return offset;
-}
-
-static uint32_t
-gen7_fill_kernel_v2(struct intel_bb *ibb,
-		    const uint32_t kernel[][4],
-		    size_t size);
-
-uint32_t
-gen11_fill_interface_descriptor(struct intel_bb *ibb,
-				struct intel_buf *src, struct intel_buf *dst,
-				const uint32_t kernel[][4],
-				size_t size)
-{
-	struct gen8_interface_descriptor_data *idd;
-	uint32_t offset;
-	uint32_t binding_table_offset, kernel_offset;
-
-	binding_table_offset = gen11_fill_binding_table(ibb, src, dst);
-	kernel_offset = gen7_fill_kernel_v2(ibb, kernel, size);
-
-	intel_bb_ptr_align(ibb, 64);
-	idd = intel_bb_ptr(ibb);
-	offset = intel_bb_offset(ibb);
-
-	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
-
-	idd->desc2.single_program_flow = 1;
-	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
-
-	idd->desc3.sampler_count = 0;      /* 0 samplers used */
-	idd->desc3.sampler_state_pointer = 0;
-
-	idd->desc4.binding_table_entry_count = 0;
-	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
-
-	idd->desc5.constant_urb_entry_read_offset = 0;
-	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
-
-	idd->desc6.num_threads_in_tg = 1;
-
-	return offset;
-}
-
-void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo,
-		  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		  0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		  BASE_ADDRESS_MODIFY);
-
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* instruction buffer size, must set modify enable bit, otherwise it may
-	 * result in GPU hang
-	 */
-	OUT_BATCH(1 << 12 | 1);
-}
-
-void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
-	OUT_BATCH(0);
-}
-
-void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
-		    uint32_t urb_entries, uint32_t urb_size,
-		    uint32_t curbe_size)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(threads << 16 |
-		urb_entries << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(urb_size << 16 |
-		curbe_size);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-void
-gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned int x, unsigned int y,
-		     unsigned int width, unsigned int height)
-{
-	uint32_t x_dim, y_dim, tmp, right_mask;
-
-	/*
-	 * Simply do SIMD16 based dispatch, so every thread uses
-	 * SIMD16 channels.
-	 *
-	 * Define our own thread group size, e.g 16x1 for every group, then
-	 * will have 1 thread each group in SIMD16 dispatch. So thread
-	 * width/height/depth are all 1.
-	 *
-	 * Then thread group X = width / 16 (aligned to 16)
-	 * thread group Y = height;
-	 */
-	x_dim = (width + 15) / 16;
-	y_dim = height;
-
-	tmp = width & 15;
-	if (tmp == 0)
-		right_mask = (1 << 16) - 1;
-	else
-		right_mask = (1 << tmp) - 1;
-
-	OUT_BATCH(GEN7_GPGPU_WALKER | 13);
-
-	OUT_BATCH(0); /* kernel offset */
-	OUT_BATCH(0); /* indirect data length */
-	OUT_BATCH(0); /* indirect data offset */
-
-	/* SIMD size, thread w/h/d */
-	OUT_BATCH(1 << 30 | /* SIMD16 */
-		  0 << 16 | /* depth:1 */
-		  0 << 8 | /* height:1 */
-		  0); /* width:1 */
-
-	/* thread group X */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(x_dim);
-
-	/* thread group Y */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(y_dim);
-
-	/* thread group Z */
-	OUT_BATCH(0);
-	OUT_BATCH(1);
-
-	/* right mask */
-	OUT_BATCH(right_mask);
-
-	/* bottom mask, height 1, always 0xffffffff */
-	OUT_BATCH(0xffffffff);
-}
-
-void
-gen_emit_media_object(struct intel_batchbuffer *batch,
-		       unsigned int xoffset, unsigned int yoffset)
-{
-	OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
-
-	/* interface descriptor offset */
-	OUT_BATCH(0);
-
-	/* without indirect data */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* inline data (xoffset, yoffset) */
-	OUT_BATCH(xoffset);
-	OUT_BATCH(yoffset);
-	if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
-		gen8_emit_media_state_flush(batch);
-}
-
-void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch)
+uint32_t
+gen7_fill_curbe_buffer_data(struct intel_bb *ibb, uint8_t color)
 {
-	OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
-
-	/* general */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-
-	/* stateless data port */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-
-	/* surface */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0, BASE_ADDRESS_MODIFY);
-
-	/* dynamic */
-	OUT_RELOC(batch->bo,
-		  I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION,
-		  0, BASE_ADDRESS_MODIFY);
-
-	/* indirect */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
+	uint32_t *curbe_buffer;
+	uint32_t offset;
 
-	/* instruction */
-	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		  BASE_ADDRESS_MODIFY);
+	intel_bb_ptr_align(ibb, 64);
+	curbe_buffer = intel_bb_ptr(ibb);
+	offset = intel_bb_offset(ibb);
 
-	/* general state buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* dynamic state buffer size */
-	OUT_BATCH(1 << 12 | 1);
-	/* indirect object buffer size */
-	OUT_BATCH(0xfffff000 | 1);
-	/* intruction buffer size, must set modify enable bit, otherwise it may
-	 * result in GPU hang
-	 */
-	OUT_BATCH(1 << 12 | 1);
+	*curbe_buffer = color;
+	intel_bb_ptr_add(ibb, 32);
 
-	/* Bindless surface state base address */
-	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
-	OUT_BATCH(0);
-	OUT_BATCH(0xfffff000);
+	return offset;
 }
 
-/*
- * Here we start version of the gpgpu fill pipeline creation which is based
- * on intel_bb.
- */
 uint32_t
-gen7_fill_curbe_buffer_data_v2(struct intel_bb *ibb, uint8_t color)
+gen11_fill_curbe_buffer_data(struct intel_bb *ibb)
 {
 	uint32_t *curbe_buffer;
 	uint32_t offset;
@@ -799,16 +50,17 @@ gen7_fill_curbe_buffer_data_v2(struct intel_bb *ibb, uint8_t color)
 	curbe_buffer = intel_bb_ptr(ibb);
 	offset = intel_bb_offset(ibb);
 
-	*curbe_buffer = color;
-	intel_bb_ptr_add(ibb, 32);
+	*curbe_buffer++ = 0;
+	*curbe_buffer = 1;
+	intel_bb_ptr_add(ibb, 64);
 
 	return offset;
 }
 
 static uint32_t
-gen7_fill_kernel_v2(struct intel_bb *ibb,
-		    const uint32_t kernel[][4],
-		    size_t size)
+gen7_fill_kernel(struct intel_bb *ibb,
+		const uint32_t kernel[][4],
+		size_t size)
 {
 	uint32_t *kernel_dst;
 	uint32_t offset;
@@ -825,10 +77,10 @@ gen7_fill_kernel_v2(struct intel_bb *ibb,
 }
 
 static uint32_t
-gen7_fill_surface_state_v2(struct intel_bb *ibb,
-			   struct intel_buf *buf,
-			   uint32_t format,
-			   int is_dst)
+gen7_fill_surface_state(struct intel_bb *ibb,
+			struct intel_buf *buf,
+			uint32_t format,
+			int is_dst)
 {
 	struct gen7_surface_state *ss;
 	uint32_t write_domain, read_domain, offset;
@@ -875,10 +127,10 @@ gen7_fill_surface_state_v2(struct intel_bb *ibb,
 }
 
 static uint32_t
-gen8_fill_surface_state_v2(struct intel_bb *ibb,
-			   struct intel_buf *buf,
-			   uint32_t format,
-			   int is_dst)
+gen8_fill_surface_state(struct intel_bb *ibb,
+			struct intel_buf *buf,
+			uint32_t format,
+			int is_dst)
 {
 	struct gen8_surface_state *ss;
 	uint32_t write_domain, read_domain, offset;
@@ -927,8 +179,75 @@ gen8_fill_surface_state_v2(struct intel_bb *ibb,
 }
 
 static uint32_t
-gen7_fill_binding_table_v2(struct intel_bb *ibb,
-			   struct intel_buf *buf)
+gen11_fill_surface_state(struct intel_bb *ibb,
+			 const struct intel_buf *buf,
+			 uint32_t surface_type,
+			 uint32_t format,
+			 uint32_t vertical_alignment,
+			 uint32_t horizontal_alignment,
+			 int is_dst)
+{
+	struct gen8_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	uint64_t address;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	intel_bb_ptr_align(ibb, 64);
+	offset = intel_bb_offset(ibb);
+	ss = intel_bb_ptr(ibb);
+	intel_bb_ptr_add(ibb, 64);
+
+	ss->ss0.surface_type = surface_type;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+	ss->ss0.vertical_alignment = vertical_alignment; /* align 4 */
+	ss->ss0.horizontal_alignment = horizontal_alignment; /* align 4 */
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+	else
+		ss->ss0.tiled_mode = 0;
+
+	address = intel_bb_offset_reloc(ibb, buf->handle,
+					read_domain, write_domain,
+					offset + 4 * 8, 0x0);
+
+	ss->ss8.base_addr = (uint32_t) address;
+	ss->ss9.base_addr_hi = address >> 32;
+
+	if (is_dst) {
+		ss->ss1.memory_object_control = 2;
+		ss->ss2.height = 1;
+		ss->ss2.width  = 95;
+		ss->ss3.pitch  = 0;
+		ss->ss7.shader_chanel_select_r = 4;
+		ss->ss7.shader_chanel_select_g = 5;
+		ss->ss7.shader_chanel_select_b = 6;
+		ss->ss7.shader_chanel_select_a = 7;
+	}
+	else {
+		ss->ss1.qpitch = 4040;
+		ss->ss1.base_mip_level = 31;
+		ss->ss2.height = 9216;
+		ss->ss2.width  = 1019;
+		ss->ss3.pitch  = 64;
+		ss->ss5.mip_count = 2;
+	}
+
+	return offset;
+}
+
+static uint32_t
+gen7_fill_binding_table(struct intel_bb *ibb,
+			struct intel_buf *buf)
 {
 	uint32_t binding_table_offset;
 	uint32_t *binding_table;
@@ -940,28 +259,53 @@ gen7_fill_binding_table_v2(struct intel_bb *ibb,
 	intel_bb_ptr_add(ibb, 64);
 
 	if (IS_GEN7(devid))
-		binding_table[0] = gen7_fill_surface_state_v2(ibb, buf,
-							      SURFACEFORMAT_R8_UNORM, 1);
+		binding_table[0] = gen7_fill_surface_state(ibb, buf,
+							   SURFACEFORMAT_R8_UNORM, 1);
 
 	else
-		binding_table[0] = gen8_fill_surface_state_v2(ibb, buf,
-							      SURFACEFORMAT_R8_UNORM, 1);
+		binding_table[0] = gen8_fill_surface_state(ibb, buf,
+							   SURFACEFORMAT_R8_UNORM, 1);
+
+	return binding_table_offset;
+}
+
+static uint32_t
+gen11_fill_binding_table(struct intel_bb *ibb,
+			 const struct intel_buf *src,
+			 const struct intel_buf *dst)
+{
+	uint32_t binding_table_offset;
+	uint32_t *binding_table;
+
+	intel_bb_ptr_align(ibb, 64);
+	binding_table_offset = intel_bb_offset(ibb);
+	binding_table = intel_bb_ptr(ibb);
+	intel_bb_ptr_add(ibb, 64);
+
+	binding_table[0] = gen11_fill_surface_state(ibb, src,
+						    SURFACE_1D,
+						    SURFACEFORMAT_R32G32B32A32_FLOAT,
+						    0, 0, 0);
+	binding_table[1] = gen11_fill_surface_state(ibb, dst,
+						    SURFACE_BUFFER,
+						    SURFACEFORMAT_RAW,
+						    1, 1, 1);
 
 	return binding_table_offset;
 }
 
 uint32_t
-gen7_fill_interface_descriptor_v2(struct intel_bb *ibb,
-				  struct intel_buf *buf,
-				  const uint32_t kernel[][4],
-				  size_t size)
+gen7_fill_interface_descriptor(struct intel_bb *ibb,
+			       struct intel_buf *buf,
+			       const uint32_t kernel[][4],
+			       size_t size)
 {
 	struct gen7_interface_descriptor_data *idd;
 	uint32_t offset;
 	uint32_t binding_table_offset, kernel_offset;
 
-	binding_table_offset = gen7_fill_binding_table_v2(ibb, buf);
-	kernel_offset = gen7_fill_kernel_v2(ibb, kernel, size);
+	binding_table_offset = gen7_fill_binding_table(ibb, buf);
+	kernel_offset = gen7_fill_kernel(ibb, kernel, size);
 
 	intel_bb_ptr_align(ibb, 64);
 	idd = intel_bb_ptr(ibb);
@@ -987,17 +331,53 @@ gen7_fill_interface_descriptor_v2(struct intel_bb *ibb,
 }
 
 uint32_t
-gen8_fill_interface_descriptor_v2(struct intel_bb *ibb,
-				  struct intel_buf *buf,
-				  const uint32_t kernel[][4],
-				  size_t size)
+gen8_fill_interface_descriptor(struct intel_bb *ibb,
+			       struct intel_buf *buf,
+			       const uint32_t kernel[][4],
+			       size_t size)
+{
+	struct gen8_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen7_fill_binding_table(ibb, buf);
+	kernel_offset = gen7_fill_kernel(ibb, kernel, size);
+
+	intel_bb_ptr_align(ibb, 64);
+	idd = intel_bb_ptr(ibb);
+	offset = intel_bb_offset(ibb);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc2.single_program_flow = 1;
+	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
+
+	idd->desc3.sampler_count = 0;      /* 0 samplers used */
+	idd->desc3.sampler_state_pointer = 0;
+
+	idd->desc4.binding_table_entry_count = 0;
+	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc5.constant_urb_entry_read_offset = 0;
+	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	idd->desc6.num_threads_in_tg = 1;
+
+	return offset;
+}
+
+uint32_t
+gen11_fill_interface_descriptor(struct intel_bb *ibb,
+				struct intel_buf *src, struct intel_buf *dst,
+				const uint32_t kernel[][4],
+				size_t size)
 {
 	struct gen8_interface_descriptor_data *idd;
 	uint32_t offset;
 	uint32_t binding_table_offset, kernel_offset;
 
-	binding_table_offset = gen7_fill_binding_table_v2(ibb, buf);
-	kernel_offset = gen7_fill_kernel_v2(ibb, kernel, size);
+	binding_table_offset = gen11_fill_binding_table(ibb, src, dst);
+	kernel_offset = gen7_fill_kernel(ibb, kernel, size);
 
 	intel_bb_ptr_align(ibb, 64);
 	idd = intel_bb_ptr(ibb);
@@ -1023,7 +403,7 @@ gen8_fill_interface_descriptor_v2(struct intel_bb *ibb,
 }
 
 void
-gen7_emit_state_base_address_v2(struct intel_bb *ibb)
+gen7_emit_state_base_address(struct intel_bb *ibb)
 {
 	intel_bb_out(ibb, GEN7_STATE_BASE_ADDRESS | (10 - 2));
 
@@ -1056,7 +436,7 @@ gen7_emit_state_base_address_v2(struct intel_bb *ibb)
 }
 
 void
-gen8_emit_state_base_address_v2(struct intel_bb *ibb)
+gen8_emit_state_base_address(struct intel_bb *ibb)
 {
 	intel_bb_out(ibb, GEN8_STATE_BASE_ADDRESS | (16 - 2));
 
@@ -1101,9 +481,8 @@ gen8_emit_state_base_address_v2(struct intel_bb *ibb)
 	intel_bb_out(ibb, 1 << 12 | 1);
 }
 
-
 void
-gen9_emit_state_base_address_v2(struct intel_bb *ibb)
+gen9_emit_state_base_address(struct intel_bb *ibb)
 {
 	intel_bb_out(ibb, GEN8_STATE_BASE_ADDRESS | (19 - 2));
 
@@ -1152,9 +531,9 @@ gen9_emit_state_base_address_v2(struct intel_bb *ibb)
 }
 
 void
-gen7_emit_vfe_state_v2(struct intel_bb *ibb, uint32_t threads,
-		       uint32_t urb_entries, uint32_t urb_size,
-		       uint32_t curbe_size, uint32_t mode)
+gen7_emit_vfe_state(struct intel_bb *ibb, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size, uint32_t mode)
 {
 	intel_bb_out(ibb, GEN7_MEDIA_VFE_STATE | (8 - 2));
 
@@ -1179,9 +558,9 @@ gen7_emit_vfe_state_v2(struct intel_bb *ibb, uint32_t threads,
 }
 
 void
-gen8_emit_vfe_state_v2(struct intel_bb *ibb, uint32_t threads,
-		       uint32_t urb_entries, uint32_t urb_size,
-		       uint32_t curbe_size)
+gen8_emit_vfe_state(struct intel_bb *ibb, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size)
 {
 	intel_bb_out(ibb, GEN7_MEDIA_VFE_STATE | (9 - 2));
 
@@ -1204,7 +583,7 @@ gen8_emit_vfe_state_v2(struct intel_bb *ibb, uint32_t threads,
 }
 
 void
-gen7_emit_curbe_load_v2(struct intel_bb *ibb, uint32_t curbe_buffer)
+gen7_emit_curbe_load(struct intel_bb *ibb, uint32_t curbe_buffer)
 {
 	intel_bb_out(ibb, GEN7_MEDIA_CURBE_LOAD | (4 - 2));
 	intel_bb_out(ibb, 0);
@@ -1215,8 +594,8 @@ gen7_emit_curbe_load_v2(struct intel_bb *ibb, uint32_t curbe_buffer)
 }
 
 void
-gen7_emit_interface_descriptor_load_v2(struct intel_bb *ibb,
-				       uint32_t interface_descriptor)
+gen7_emit_interface_descriptor_load(struct intel_bb *ibb,
+				    uint32_t interface_descriptor)
 {
 	intel_bb_out(ibb, GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
 	intel_bb_out(ibb, 0);
@@ -1232,9 +611,9 @@ gen7_emit_interface_descriptor_load_v2(struct intel_bb *ibb,
 }
 
 void
-gen7_emit_gpgpu_walk_v2(struct intel_bb *ibb,
-			unsigned int x, unsigned int y,
-			unsigned int width, unsigned int height)
+gen7_emit_gpgpu_walk(struct intel_bb *ibb,
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height)
 {
 	uint32_t x_dim, y_dim, tmp, right_mask;
 
@@ -1289,9 +668,9 @@ gen7_emit_gpgpu_walk_v2(struct intel_bb *ibb,
 }
 
 void
-gen8_emit_gpgpu_walk_v2(struct intel_bb *ibb,
-			unsigned int x, unsigned int y,
-			unsigned int width, unsigned int height)
+gen8_emit_gpgpu_walk(struct intel_bb *ibb,
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height)
 {
 	uint32_t x_dim, y_dim, tmp, right_mask;
 
@@ -1349,15 +728,15 @@ gen8_emit_gpgpu_walk_v2(struct intel_bb *ibb,
 }
 
 void
-gen8_emit_media_state_flush_v2(struct intel_bb *ibb)
+gen8_emit_media_state_flush(struct intel_bb *ibb)
 {
 	intel_bb_out(ibb, GEN8_MEDIA_STATE_FLUSH | (2 - 2));
 	intel_bb_out(ibb, 0);
 }
 
 void
-gen_emit_media_object_v2(struct intel_bb *ibb,
-			 unsigned int xoffset, unsigned int yoffset)
+gen_emit_media_object(struct intel_bb *ibb,
+		      unsigned int xoffset, unsigned int yoffset)
 {
 	intel_bb_out(ibb, GEN7_MEDIA_OBJECT | (8 - 2));
 
@@ -1376,17 +755,17 @@ gen_emit_media_object_v2(struct intel_bb *ibb,
 	intel_bb_out(ibb, xoffset);
 	intel_bb_out(ibb, yoffset);
 	if (AT_LEAST_GEN(ibb->devid, 8) && !IS_CHERRYVIEW(ibb->devid))
-		gen8_emit_media_state_flush_v2(ibb);
+		gen8_emit_media_state_flush(ibb);
 }
 
 void
-gen7_emit_media_objects_v2(struct intel_bb *ibb,
-			   unsigned int x, unsigned int y,
-			   unsigned int width, unsigned int height)
+gen7_emit_media_objects(struct intel_bb *ibb,
+			unsigned int x, unsigned int y,
+			unsigned int width, unsigned int height)
 {
 	int i, j;
 
 	for (i = 0; i < width / 16; i++)
 		for (j = 0; j < height / 16; j++)
-			gen_emit_media_object_v2(ibb, x + i * 16, y + j * 16);
+			gen_emit_media_object(ibb, x + i * 16, y + j * 16);
 }
diff --git a/lib/gpu_cmds.h b/lib/gpu_cmds.h
index 64abc513..56f09b6e 100644
--- a/lib/gpu_cmds.h
+++ b/lib/gpu_cmds.h
@@ -25,7 +25,6 @@
 #ifndef GPU_CMDS_H
 #define GPU_CMDS_H
 
-#include <intel_bufmgr.h>
 #include <i915_drm.h>
 
 #include "media_fill.h"
@@ -38,182 +37,74 @@
 #include "intel_bufops.h"
 #include <assert.h>
 
-void
-gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
-
-void
-gen7_render_context_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
-
 uint32_t
-gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
-			uint8_t color);
+gen7_fill_curbe_buffer_data(struct intel_bb *ibb, uint8_t color);
 
 uint32_t
 gen11_fill_curbe_buffer_data(struct intel_bb *ibb);
 
 uint32_t
-gen7_fill_surface_state(struct intel_batchbuffer *batch,
-			const struct igt_buf *buf,
-			uint32_t format,
-			int is_dst);
-
-uint32_t
-gen7_fill_binding_table(struct intel_batchbuffer *batch,
-			const struct igt_buf *dst);
-uint32_t
-gen11_fill_binding_table(struct intel_bb *ibb,
-			 const struct intel_buf *src,
-			 const struct intel_buf *dst);
-
-uint32_t
-gen7_fill_kernel(struct intel_batchbuffer *batch,
-		const uint32_t kernel[][4],
-		size_t size);
-
-uint32_t
-gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
-			       const struct igt_buf *dst,
+gen7_fill_interface_descriptor(struct intel_bb *ibb,
+			       struct intel_buf *buf,
 			       const uint32_t kernel[][4],
 			       size_t size);
 
-void
-gen7_emit_state_base_address(struct intel_batchbuffer *batch);
-
-void
-gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
-		    uint32_t urb_entries, uint32_t urb_size,
-		    uint32_t curbe_size, uint32_t mode);
-
-void
-gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
-
-void
-gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
-				    uint32_t interface_descriptor);
-
-void
-gen7_emit_media_objects(struct intel_batchbuffer *batch,
-			unsigned int x, unsigned int y,
-			unsigned int width, unsigned int height);
-
-void
-gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned int x, unsigned int y,
-		     unsigned int width, unsigned int height);
-
-uint32_t
-gen8_spin_curbe_buffer_data(struct intel_batchbuffer *batch,
-			    uint32_t iters);
-
-uint32_t
-gen8_fill_surface_state(struct intel_batchbuffer *batch,
-			const struct igt_buf *buf,
-			uint32_t format,
-			int is_dst);
-uint32_t
-gen11_fill_surface_state(struct intel_bb *ibb,
-			 const struct intel_buf *buf,
-			 uint32_t surface_type,
-			 uint32_t format,
-			 uint32_t vertical_alignment,
-			 uint32_t horizontal_alignment,
-			 int is_dst);
-
 uint32_t
-gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
-			       const struct igt_buf *dst,
+gen8_fill_interface_descriptor(struct intel_bb *ibb,
+			       struct intel_buf *buf,
 			       const uint32_t kernel[][4],
 			       size_t size);
+
 uint32_t
 gen11_fill_interface_descriptor(struct intel_bb *ibb,
 				struct intel_buf *src, struct intel_buf *dst,
 				const uint32_t kernel[][4],
 				size_t size);
-void
-gen8_emit_state_base_address(struct intel_batchbuffer *batch);
 
 void
-gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
+gen7_emit_state_base_address(struct intel_bb *ibb);
 
 void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
-		    uint32_t urb_entries, uint32_t urb_size,
-		    uint32_t curbe_size);
+gen8_emit_state_base_address(struct intel_bb *ibb);
 
 void
-gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
-		     unsigned int x, unsigned int y,
-		     unsigned int width, unsigned int height);
-
-void
-gen_emit_media_object(struct intel_batchbuffer *batch, unsigned int xoffset,
-		  unsigned int yoffset);
+gen9_emit_state_base_address(struct intel_bb *ibb);
 
 void
-gen9_emit_state_base_address(struct intel_batchbuffer *batch);
-
-
-/* No libdrm */
-uint32_t
-gen7_fill_curbe_buffer_data_v2(struct intel_bb *ibb,
-			       uint8_t color);
-
-uint32_t
-gen7_fill_interface_descriptor_v2(struct intel_bb *ibb,
-				  struct intel_buf *buf,
-				  const uint32_t kernel[][4],
-				  size_t size);
-
-uint32_t
-gen8_fill_interface_descriptor_v2(struct intel_bb *ibb,
-				  struct intel_buf *buf,
-				  const uint32_t kernel[][4],
-				  size_t size);
+gen7_emit_vfe_state(struct intel_bb *ibb, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size, uint32_t mode);
 
 void
-gen7_emit_state_base_address_v2(struct intel_bb *ibb);
-
+gen8_emit_vfe_state(struct intel_bb *ibb, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size);
 void
-gen8_emit_state_base_address_v2(struct intel_bb *ibb);
+gen7_emit_curbe_load(struct intel_bb *ibb, uint32_t curbe_buffer);
 
 void
-gen9_emit_state_base_address_v2(struct intel_bb *ibb);
+gen7_emit_interface_descriptor_load(struct intel_bb *ibb,
+				    uint32_t interface_descriptor);
 
 void
-gen7_emit_vfe_state_v2(struct intel_bb *ibb, uint32_t threads,
-		       uint32_t urb_entries, uint32_t urb_size,
-		       uint32_t curbe_size, uint32_t mode);
+gen7_emit_gpgpu_walk(struct intel_bb *ibb,
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height);
 
 void
-gen8_emit_vfe_state_v2(struct intel_bb *ibb, uint32_t threads,
-		       uint32_t urb_entries, uint32_t urb_size,
-		       uint32_t curbe_size);
-void
-gen7_emit_curbe_load_v2(struct intel_bb *ibb, uint32_t curbe_buffer);
+gen8_emit_gpgpu_walk(struct intel_bb *ibb,
+		     unsigned int x, unsigned int y,
+		     unsigned int width, unsigned int height);
 
 void
-gen7_emit_interface_descriptor_load_v2(struct intel_bb *ibb,
-				       uint32_t interface_descriptor);
+gen8_emit_media_state_flush(struct intel_bb *ibb);
 
 void
-gen7_emit_gpgpu_walk_v2(struct intel_bb *ibb,
-			unsigned int x, unsigned int y,
-			unsigned int width, unsigned int height);
+gen_emit_media_object(struct intel_bb *ibb,
+		      unsigned int xoffset, unsigned int yoffset);
 
 void
-gen8_emit_gpgpu_walk_v2(struct intel_bb *ibb,
+gen7_emit_media_objects(struct intel_bb *ibb,
 			unsigned int x, unsigned int y,
 			unsigned int width, unsigned int height);
-
-void
-gen8_emit_media_state_flush_v2(struct intel_bb *ibb);
-
-void
-gen_emit_media_object_v2(struct intel_bb *ibb,
-			 unsigned int xoffset, unsigned int yoffset);
-
-void
-gen7_emit_media_objects_v2(struct intel_bb *ibb,
-			   unsigned int x, unsigned int y,
-			   unsigned int width, unsigned int height);
 #endif /* GPU_CMDS_H */
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 02c293be..49f2d0fe 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -1118,32 +1118,6 @@ igt_fillfunc_t igt_get_media_fillfunc(int devid)
 	return fill;
 }
 
-
-/**
- * igt_get_media_fillfunc_v2:
- * @devid: pci device id
- *
- * Returns:
- *
- * The platform-specific media fill function pointer for the device specified
- * with @devid. Will return NULL when no media fill function is implemented.
- */
-igt_fillfunc_v2_t igt_get_media_fillfunc_v2(int devid)
-{
-	igt_fillfunc_v2_t fill = NULL;
-
-	if (IS_GEN12(devid))
-		fill = gen12_media_fillfunc_v2;
-	else if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid))
-		fill = gen9_media_fillfunc_v2;
-	else if (IS_GEN8(devid))
-		fill = gen8_media_fillfunc_v2;
-	else if (IS_GEN7(devid))
-		fill = gen7_media_fillfunc_v2;
-
-	return fill;
-}
-
 igt_vme_func_t igt_get_media_vme_func(int devid)
 {
 	igt_vme_func_t fill = NULL;
@@ -1153,6 +1127,7 @@ igt_vme_func_t igt_get_media_vme_func(int devid)
 
 	return fill;
 }
+
 /**
  * igt_get_gpgpu_fillfunc:
  * @devid: pci device id
@@ -1168,7 +1143,7 @@ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
 
 	if (IS_GEN7(devid))
 		fill = gen7_gpgpu_fillfunc;
-	else if (IS_BROADWELL(devid))
+	else if (IS_GEN8(devid))
 		fill = gen8_gpgpu_fillfunc;
 	else if (IS_GEN9(devid) || IS_GEN10(devid))
 		fill = gen9_gpgpu_fillfunc;
@@ -1180,33 +1155,6 @@ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
 	return fill;
 }
 
-/**
- * igt_get_gpgpu_fillfunc_v2:
- * @devid: pci device id
- *
- * Returns:
- *
- * The platform-specific gpgpu fill function pointer for the device specified
- * with @devid. Will return NULL when no gpgpu fill function is implemented.
- */
-igt_fillfunc_v2_t igt_get_gpgpu_fillfunc_v2(int devid)
-{
-	igt_fillfunc_v2_t fill = NULL;
-
-	if (IS_GEN7(devid))
-		fill = gen7_gpgpu_fillfunc_v2;
-	else if (IS_GEN8(devid))
-		fill = gen8_gpgpu_fillfunc_v2;
-	else if (IS_GEN9(devid) || IS_GEN10(devid))
-		fill = gen9_gpgpu_fillfunc_v2;
-	else if (IS_GEN11(devid))
-		fill = gen11_gpgpu_fillfunc_v2;
-	else if (IS_GEN12(devid))
-		fill = gen12_gpgpu_fillfunc_v2;
-
-	return fill;
-}
-
 /**
  * igt_get_media_spinfunc:
  * @devid: pci device id
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 162e7f0c..ae052c17 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -370,8 +370,8 @@ igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid);
 
 /**
  * igt_fillfunc_t:
- * @batch: batchbuffer object
- * @dst: destination i-g-t buffer object
+ * @i915: drm fd
+ * @buf: destination intel_buf object
  * @x: destination pixel x-coordination
  * @y: destination pixel y-coordination
  * @width: width of the filled rectangle
@@ -385,23 +385,15 @@ igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid);
  * A fill function will emit a batchbuffer to the kernel which executes
  * the specified blit fill operation using the media/gpgpu engine.
  */
-typedef void (*igt_fillfunc_t)(struct intel_batchbuffer *batch,
-			       const struct igt_buf *dst,
+struct intel_buf;
+typedef void (*igt_fillfunc_t)(int i915,
+			       struct intel_buf *buf,
 			       unsigned x, unsigned y,
 			       unsigned width, unsigned height,
 			       uint8_t color);
 
-struct intel_buf;
-typedef void (*igt_fillfunc_v2_t)(int i915,
-				  struct intel_buf *buf,
-				  unsigned x, unsigned y,
-				  unsigned width, unsigned height,
-				  uint8_t color);
-
-igt_fillfunc_t igt_get_media_fillfunc(int devid);
 igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid);
-igt_fillfunc_v2_t igt_get_gpgpu_fillfunc_v2(int devid);
-igt_fillfunc_v2_t igt_get_media_fillfunc_v2(int devid);
+igt_fillfunc_t igt_get_media_fillfunc(int devid);
 
 typedef void (*igt_vme_func_t)(int i915,
 			       uint32_t ctx,
diff --git a/lib/media_fill.c b/lib/media_fill.c
index 53d63b4b..c21de54b 100644
--- a/lib/media_fill.c
+++ b/lib/media_fill.c
@@ -145,55 +145,11 @@ static const uint32_t gen12_media_kernel[][4] = {
 #define GEN7_VFE_STATE_MEDIA_MODE 0
 
 void
-gen7_media_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
+gen7_media_fillfunc(int i915,
+		    struct intel_buf *buf,
 		    unsigned int x, unsigned int y,
 		    unsigned int width, unsigned int height,
 		    uint8_t color)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen7_fill_interface_descriptor(batch, dst,
-					gen7_media_kernel,
-					sizeof(gen7_media_kernel));
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	/* media pipeline */
-	batch->ptr = batch->buffer;
-	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
-	gen7_emit_state_base_address(batch);
-
-	gen7_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
-			    MEDIA_CURBE_SIZE, GEN7_VFE_STATE_MEDIA_MODE);;
-
-	gen7_emit_curbe_load(batch, curbe_buffer);
-
-	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
-
-	gen7_emit_media_objects(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = intel_batchbuffer_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen7_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
-
-void
-gen7_media_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned int x, unsigned int y,
-		       unsigned int width, unsigned int height,
-		       uint8_t color)
 {
 	struct intel_bb *ibb;
 	uint32_t curbe_buffer, interface_descriptor;
@@ -203,24 +159,24 @@ gen7_media_fillfunc_v2(int i915,
 
 	intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
 
-	curbe_buffer = gen7_fill_curbe_buffer_data_v2(ibb, color);
-	interface_descriptor = gen7_fill_interface_descriptor_v2(ibb, buf,
+	curbe_buffer = gen7_fill_curbe_buffer_data(ibb, color);
+	interface_descriptor = gen7_fill_interface_descriptor(ibb, buf,
 					gen7_media_kernel,
 					sizeof(gen7_media_kernel));
 	intel_bb_ptr_set(ibb, 0);
 
 	/* media pipeline */
 	intel_bb_out(ibb, GEN7_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
-	gen7_emit_state_base_address_v2(ibb);
+	gen7_emit_state_base_address(ibb);
 
-	gen7_emit_vfe_state_v2(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
-			       MEDIA_CURBE_SIZE, GEN7_VFE_STATE_MEDIA_MODE);
+	gen7_emit_vfe_state(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+			    MEDIA_CURBE_SIZE, GEN7_VFE_STATE_MEDIA_MODE);
 
-	gen7_emit_curbe_load_v2(ibb, curbe_buffer);
+	gen7_emit_curbe_load(ibb, curbe_buffer);
 
-	gen7_emit_interface_descriptor_load_v2(ibb, interface_descriptor);
+	gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
 
-	gen7_emit_media_objects_v2(ibb, x, y, width, height);
+	gen7_emit_media_objects(ibb, x, y, width, height);
 
 	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
 	intel_bb_ptr_align(ibb, 32);
@@ -232,55 +188,11 @@ gen7_media_fillfunc_v2(int i915,
 }
 
 void
-gen8_media_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
+gen8_media_fillfunc(int i915,
+		    struct intel_buf *buf,
 		    unsigned int x, unsigned int y,
 		    unsigned int width, unsigned int height,
 		    uint8_t color)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-					gen8_media_kernel,
-					sizeof(gen8_media_kernel));
-	igt_assert(batch->ptr < &batch->buffer[4095]);
-
-	/* media pipeline */
-	batch->ptr = batch->buffer;
-	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
-	gen8_emit_state_base_address(batch);
-
-	gen8_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
-			    MEDIA_CURBE_SIZE);
-
-	gen7_emit_curbe_load(batch, curbe_buffer);
-
-	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
-
-	gen7_emit_media_objects(batch, x, y, width, height);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = intel_batchbuffer_align(batch, 8);
-	igt_assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen7_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
-
-void
-gen8_media_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned int x, unsigned int y,
-		       unsigned int width, unsigned int height,
-		       uint8_t color)
 {
 	struct intel_bb *ibb;
 	uint32_t curbe_buffer, interface_descriptor;
@@ -290,24 +202,24 @@ gen8_media_fillfunc_v2(int i915,
 
 	intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
 
-	curbe_buffer = gen7_fill_curbe_buffer_data_v2(ibb, color);
-	interface_descriptor = gen8_fill_interface_descriptor_v2(ibb, buf,
+	curbe_buffer = gen7_fill_curbe_buffer_data(ibb, color);
+	interface_descriptor = gen8_fill_interface_descriptor(ibb, buf,
 					gen8_media_kernel,
 					sizeof(gen8_media_kernel));
 	intel_bb_ptr_set(ibb, 0);
 
 	/* media pipeline */
 	intel_bb_out(ibb, GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
-	gen8_emit_state_base_address_v2(ibb);
+	gen8_emit_state_base_address(ibb);
 
-	gen8_emit_vfe_state_v2(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
-			       MEDIA_CURBE_SIZE);
+	gen8_emit_vfe_state(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+			    MEDIA_CURBE_SIZE);
 
-	gen7_emit_curbe_load_v2(ibb, curbe_buffer);
+	gen7_emit_curbe_load(ibb, curbe_buffer);
 
-	gen7_emit_interface_descriptor_load_v2(ibb, interface_descriptor);
+	gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
 
-	gen7_emit_media_objects_v2(ibb, x, y, width, height);
+	gen7_emit_media_objects(ibb, x, y, width, height);
 
 	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
 	intel_bb_ptr_align(ibb, 32);
@@ -319,81 +231,12 @@ gen8_media_fillfunc_v2(int i915,
 }
 
 static void
-__gen9_media_fillfunc(struct intel_batchbuffer *batch,
-		      const struct igt_buf *dst,
+__gen9_media_fillfunc(int i915,
+		      struct intel_buf *buf,
 		      unsigned int x, unsigned int y,
 		      unsigned int width, unsigned int height,
-		      uint8_t color, const uint32_t kernel[][4],
-		      size_t kernel_size)
-{
-	uint32_t curbe_buffer, interface_descriptor;
-	uint32_t batch_end;
-
-	intel_batchbuffer_flush(batch);
-
-	/* setup states */
-	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
-
-	curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
-	interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
-					kernel, kernel_size);
-	assert(batch->ptr < &batch->buffer[4095]);
-
-	/* media pipeline */
-	batch->ptr = batch->buffer;
-	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
-		  GEN9_FORCE_MEDIA_AWAKE_ENABLE |
-		  GEN9_SAMPLER_DOP_GATE_DISABLE |
-		  GEN9_PIPELINE_SELECTION_MASK |
-		  GEN9_SAMPLER_DOP_GATE_MASK |
-		  GEN9_FORCE_MEDIA_AWAKE_MASK);
-	gen9_emit_state_base_address(batch);
-
-	gen8_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
-			    MEDIA_CURBE_SIZE);
-
-	gen7_emit_curbe_load(batch, curbe_buffer);
-
-	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
-
-	gen7_emit_media_objects(batch, x, y, width, height);
-
-	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
-		  GEN9_FORCE_MEDIA_AWAKE_DISABLE |
-		  GEN9_SAMPLER_DOP_GATE_ENABLE |
-		  GEN9_PIPELINE_SELECTION_MASK |
-		  GEN9_SAMPLER_DOP_GATE_MASK |
-		  GEN9_FORCE_MEDIA_AWAKE_MASK);
-
-	OUT_BATCH(MI_BATCH_BUFFER_END);
-
-	batch_end = intel_batchbuffer_align(batch, 8);
-	assert(batch_end < BATCH_STATE_SPLIT);
-
-	gen7_render_flush(batch, batch_end);
-	intel_batchbuffer_reset(batch);
-}
-
-void
-gen9_media_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
-		    unsigned int x, unsigned int y,
-		    unsigned int width, unsigned int height,
-		    uint8_t color)
-{
-
-	__gen9_media_fillfunc(batch, dst, x, y, width, height, color,
-			      gen8_media_kernel, sizeof(gen8_media_kernel));
-
-}
-
-static void
-__gen9_media_fillfunc_v2(int i915,
-			 struct intel_buf *buf,
-			 unsigned int x, unsigned int y,
-			 unsigned int width, unsigned int height,
-			 uint8_t color,
-			 const uint32_t kernel[][4], size_t kernel_size)
+		      uint8_t color,
+		      const uint32_t kernel[][4], size_t kernel_size)
 {
 	struct intel_bb *ibb;
 	uint32_t curbe_buffer, interface_descriptor;
@@ -404,10 +247,10 @@ __gen9_media_fillfunc_v2(int i915,
 	/* setup states */
 	intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
 
-	curbe_buffer = gen7_fill_curbe_buffer_data_v2(ibb, color);
-	interface_descriptor = gen8_fill_interface_descriptor_v2(ibb, buf,
-								 kernel,
-								 kernel_size);
+	curbe_buffer = gen7_fill_curbe_buffer_data(ibb, color);
+	interface_descriptor = gen8_fill_interface_descriptor(ibb, buf,
+							      kernel,
+							      kernel_size);
 	intel_bb_ptr_set(ibb, 0);
 
 	/* media pipeline */
@@ -417,16 +260,16 @@ __gen9_media_fillfunc_v2(int i915,
 		     GEN9_PIPELINE_SELECTION_MASK |
 		     GEN9_SAMPLER_DOP_GATE_MASK |
 		     GEN9_FORCE_MEDIA_AWAKE_MASK);
-	gen9_emit_state_base_address_v2(ibb);
+	gen9_emit_state_base_address(ibb);
 
-	gen8_emit_vfe_state_v2(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
-			       MEDIA_CURBE_SIZE);
+	gen8_emit_vfe_state(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+			    MEDIA_CURBE_SIZE);
 
-	gen7_emit_curbe_load_v2(ibb, curbe_buffer);
+	gen7_emit_curbe_load(ibb, curbe_buffer);
 
-	gen7_emit_interface_descriptor_load_v2(ibb, interface_descriptor);
+	gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
 
-	gen7_emit_media_objects_v2(ibb, x, y, width, height);
+	gen7_emit_media_objects(ibb, x, y, width, height);
 
 	intel_bb_out(ibb, GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
 		     GEN9_FORCE_MEDIA_AWAKE_DISABLE |
@@ -445,15 +288,14 @@ __gen9_media_fillfunc_v2(int i915,
 }
 
 void
-gen9_media_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned int x, unsigned int y,
-		       unsigned int width, unsigned int height,
-		       uint8_t color)
+gen9_media_fillfunc(int i915,
+		    struct intel_buf *buf,
+		    unsigned int x, unsigned int y,
+		    unsigned int width, unsigned int height,
+		    uint8_t color)
 {
-
-	__gen9_media_fillfunc_v2(i915, buf, x, y, width, height, color,
-				 gen8_media_kernel, sizeof(gen8_media_kernel));
+	__gen9_media_fillfunc(i915, buf, x, y, width, height, color,
+			      gen8_media_kernel, sizeof(gen8_media_kernel));
 }
 
 static void
@@ -488,16 +330,16 @@ __gen11_media_vme_func(int i915,
 		     GEN9_PIPELINE_SELECTION_MASK |
 		     GEN9_SAMPLER_DOP_GATE_MASK |
 		     GEN9_FORCE_MEDIA_AWAKE_MASK);
-	gen9_emit_state_base_address_v2(ibb);
+	gen9_emit_state_base_address(ibb);
 
-	gen8_emit_vfe_state_v2(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+	gen8_emit_vfe_state(ibb, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
 			    MEDIA_CURBE_SIZE);
 
-	gen7_emit_curbe_load_v2(ibb, curbe_buffer);
+	gen7_emit_curbe_load(ibb, curbe_buffer);
 
-	gen7_emit_interface_descriptor_load_v2(ibb, interface_descriptor);
+	gen7_emit_interface_descriptor_load(ibb, interface_descriptor);
 
-	gen7_emit_media_objects_v2(ibb, 0, 0, width, height);
+	gen7_emit_media_objects(ibb, 0, 0, width, height);
 
 	intel_bb_out(ibb, GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
 		     GEN9_FORCE_MEDIA_AWAKE_DISABLE |
@@ -531,23 +373,12 @@ gen11_media_vme_func(int i915,
 }
 
 void
-gen12_media_fillfunc(struct intel_batchbuffer *batch,
-		     const struct igt_buf *dst,
+gen12_media_fillfunc(int i915,
+		     struct intel_buf *buf,
 		     unsigned int x, unsigned int y,
 		     unsigned int width, unsigned int height,
 		     uint8_t color)
 {
-	__gen9_media_fillfunc(batch, dst, x, y, width, height, color,
+	__gen9_media_fillfunc(i915, buf, x, y, width, height, color,
 			      gen12_media_kernel, sizeof(gen12_media_kernel));
 }
-
-void
-gen12_media_fillfunc_v2(int i915,
-			struct intel_buf *buf,
-			unsigned int x, unsigned int y,
-			unsigned int width, unsigned int height,
-			uint8_t color)
-{
-	__gen9_media_fillfunc_v2(i915, buf, x, y, width, height, color,
-				 gen12_media_kernel, sizeof(gen12_media_kernel));
-}
diff --git a/lib/media_fill.h b/lib/media_fill.h
index 851a9915..0d2d0db3 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -26,51 +26,29 @@
 #define RENDE_MEDIA_FILL_H
 
 #include <stdint.h>
-#include "intel_batchbuffer.h"
 #include "intel_bufops.h"
 
 void
-gen8_media_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
+gen7_media_fillfunc(int i915,
+		    struct intel_buf *buf,
 		    unsigned int x, unsigned int y,
 		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
-gen7_media_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
+gen8_media_fillfunc(int i915,
+		    struct intel_buf *buf,
 		    unsigned int x, unsigned int y,
 		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
 void
-gen7_media_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned int x, unsigned int y,
-		       unsigned int width, unsigned int height,
-		       uint8_t color);
-
-void
-gen8_media_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned int x, unsigned int y,
-		       unsigned int width, unsigned int height,
-		       uint8_t color);
-
-void
-gen9_media_fillfunc(struct intel_batchbuffer *batch,
-		    const struct igt_buf *dst,
+gen9_media_fillfunc(int i915,
+		    struct intel_buf *buf,
 		    unsigned int x, unsigned int y,
 		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
-void
-gen9_media_fillfunc_v2(int i915,
-		       struct intel_buf *buf,
-		       unsigned int x, unsigned int y,
-		       unsigned int width, unsigned int height,
-		       uint8_t color);
-
 void
 gen11_media_vme_func(int i915,
 		     uint32_t ctx,
@@ -79,17 +57,10 @@ gen11_media_vme_func(int i915,
 		     struct intel_buf *dst);
 
 void
-gen12_media_fillfunc(struct intel_batchbuffer *batch,
-		     const struct igt_buf *dst,
+gen12_media_fillfunc(int i915,
+		     struct intel_buf *buf,
 		     unsigned int x, unsigned int y,
 		     unsigned int width, unsigned int height,
 		     uint8_t color);
 
-void
-gen12_media_fillfunc_v2(int i915,
-			struct intel_buf *buf,
-			unsigned int x, unsigned int y,
-			unsigned int width, unsigned int height,
-			uint8_t color);
-
 #endif /* RENDE_MEDIA_FILL_H */
diff --git a/tests/i915/gem_gpgpu_fill.c b/tests/i915/gem_gpgpu_fill.c
index 39917d94..e1d3a288 100644
--- a/tests/i915/gem_gpgpu_fill.c
+++ b/tests/i915/gem_gpgpu_fill.c
@@ -45,7 +45,6 @@
 #include "drm.h"
 #include "i915/gem.h"
 #include "igt.h"
-#include "intel_bufmgr.h"
 #include "intel_bufops.h"
 
 #define WIDTH 64
@@ -59,32 +58,9 @@
 typedef struct {
 	int drm_fd;
 	uint32_t devid;
-	drm_intel_bufmgr *bufmgr;
-	uint8_t linear[WIDTH * HEIGHT];
 	struct buf_ops *bops;
 } data_t;
 
-static void scratch_buf_init(data_t *data, struct igt_buf *buf,
-			int width, int height, int stride, uint8_t color)
-{
-	drm_intel_bo *bo;
-	int i;
-
-	bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
-	for (i = 0; i < width * height; i++)
-		data->linear[i] = color;
-	gem_write(data->drm_fd, bo->handle, 0, data->linear,
-		sizeof(data->linear));
-
-	memset(buf, 0, sizeof(*buf));
-
-	buf->bo = bo;
-	buf->surface[0].stride = stride;
-	buf->tiling = I915_TILING_NONE;
-	buf->surface[0].size = SIZE;
-	buf->bpp = 32;
-}
-
 static struct intel_buf *
 create_buf(data_t *data, int width, int height, uint8_t color)
 {
@@ -113,20 +89,6 @@ create_buf(data_t *data, int width, int height, uint8_t color)
 	return buf;
 }
 
-static void
-scratch_buf_check(data_t *data, struct igt_buf *buf, int x, int y,
-		uint8_t color)
-{
-	uint8_t val;
-
-	gem_read(data->drm_fd, buf->bo->handle, 0,
-		data->linear, sizeof(data->linear));
-	val = data->linear[y * WIDTH + x];
-	igt_assert_f(val == color,
-		     "Expected 0x%02x, found 0x%02x at (%d,%d)\n",
-		     color, val, x, y);
-}
-
 static void buf_check(uint8_t *ptr, int x, int y, uint8_t color)
 {
 	uint8_t val;
@@ -137,7 +99,7 @@ static void buf_check(uint8_t *ptr, int x, int y, uint8_t color)
 		     color, val, x, y);
 }
 
-static void no_libdrm(data_t *data, igt_fillfunc_v2_t fill)
+static void gpgpu_fill(data_t *data, igt_fillfunc_t fill)
 {
 	struct intel_buf *buf;
 	uint8_t *ptr;
@@ -162,54 +124,19 @@ static void no_libdrm(data_t *data, igt_fillfunc_v2_t fill)
 	munmap(ptr, buf->size);
 }
 
-static void with_libdrm(data_t *data, igt_fillfunc_t fill)
-{
-	struct intel_batchbuffer *batch = NULL;
-	struct igt_buf dst;
-	int i, j;
-
-	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
-	igt_assert(batch);
-
-	scratch_buf_init(data, &dst, WIDTH, HEIGHT, STRIDE, COLOR_C4);
-
-	for (i = 0; i < WIDTH; i++)
-		for (j = 0; j < HEIGHT; j++)
-			scratch_buf_check(data, &dst, i, j, COLOR_C4);
-
-	fill(batch, &dst, 0, 0, WIDTH / 2, HEIGHT / 2, COLOR_4C);
-
-	for (i = 0; i < WIDTH; i++)
-		for (j = 0; j < HEIGHT; j++)
-			if (i < WIDTH / 2 && j < HEIGHT / 2)
-				scratch_buf_check(data, &dst, i, j, COLOR_4C);
-			else
-				scratch_buf_check(data, &dst, i, j, COLOR_C4);
-
-}
-
 igt_simple_main
 {
 	data_t data = {0, };
-	igt_fillfunc_t gpgpu_fill = NULL;
-	igt_fillfunc_v2_t gpgpu_fill_v2 = NULL;
+	igt_fillfunc_t fill_fn = NULL;
 
 	data.drm_fd = drm_open_driver_render(DRIVER_INTEL);
 	data.devid = intel_get_drm_devid(data.drm_fd);
 	igt_require_gem(data.drm_fd);
 	data.bops = buf_ops_create(data.drm_fd);
 
-	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
-	igt_assert(data.bufmgr);
-
-	gpgpu_fill = igt_get_gpgpu_fillfunc(data.devid);
-	gpgpu_fill_v2 = igt_get_gpgpu_fillfunc_v2(data.devid);
+	fill_fn = igt_get_gpgpu_fillfunc(data.devid);
 
-	igt_require_f(gpgpu_fill || gpgpu_fill_v2,
-		      "no gpgpu-fill function\n");
+	igt_require_f(fill_fn, "no gpgpu-fill function\n");
 
-	if (gpgpu_fill_v2)
-		no_libdrm(&data, gpgpu_fill_v2);
-	else
-		with_libdrm(&data, gpgpu_fill);
+	gpgpu_fill(&data, fill_fn);
 }
diff --git a/tests/i915/gem_media_fill.c b/tests/i915/gem_media_fill.c
index 7c975577..934a9402 100644
--- a/tests/i915/gem_media_fill.c
+++ b/tests/i915/gem_media_fill.c
@@ -45,7 +45,6 @@
 #include "drm.h"
 #include "i915/gem.h"
 #include "igt.h"
-#include "intel_bufmgr.h"
 
 IGT_TEST_DESCRIPTION("Basic test for the media_fill() function, a very simple"
 		     " workload for the Media pipeline.");
@@ -61,32 +60,9 @@ IGT_TEST_DESCRIPTION("Basic test for the media_fill() function, a very simple"
 typedef struct {
 	int drm_fd;
 	uint32_t devid;
-	drm_intel_bufmgr *bufmgr;
-	uint8_t linear[WIDTH * HEIGHT];
 	struct buf_ops *bops;
 } data_t;
 
-static void scratch_buf_init(data_t *data, struct igt_buf *buf,
-			int width, int height, int stride, uint8_t color)
-{
-	drm_intel_bo *bo;
-	int i;
-
-	bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
-	for (i = 0; i < width * height; i++)
-		data->linear[i] = color;
-	gem_write(data->drm_fd, bo->handle, 0, data->linear,
-		sizeof(data->linear));
-
-	memset(buf, 0, sizeof(*buf));
-
-	buf->bo = bo;
-	buf->surface[0].stride = stride;
-	buf->tiling = I915_TILING_NONE;
-	buf->surface[0].size = SIZE;
-	buf->bpp = 32;
-}
-
 static struct intel_buf *
 create_buf(data_t *data, int width, int height, uint8_t color)
 {
@@ -115,20 +91,6 @@ create_buf(data_t *data, int width, int height, uint8_t color)
 	return buf;
 }
 
-static void
-scratch_buf_check(data_t *data, struct igt_buf *buf, int x, int y,
-		uint8_t color)
-{
-	uint8_t val;
-
-	gem_read(data->drm_fd, buf->bo->handle, 0,
-		data->linear, sizeof(data->linear));
-	val = data->linear[y * WIDTH + x];
-	igt_assert_f(val == color,
-		     "Expected 0x%02x, found 0x%02x at (%d,%d)\n",
-		     color, val, x, y);
-}
-
 static void buf_check(uint8_t *ptr, int x, int y, uint8_t color)
 {
 	uint8_t val;
@@ -139,7 +101,7 @@ static void buf_check(uint8_t *ptr, int x, int y, uint8_t color)
 		     color, val, x, y);
 }
 
-static void no_libdrm(data_t *data, igt_fillfunc_v2_t fill)
+static void media_fill(data_t *data, igt_fillfunc_t fill)
 {
 	struct intel_buf *buf;
 	uint8_t *ptr;
@@ -164,37 +126,10 @@ static void no_libdrm(data_t *data, igt_fillfunc_v2_t fill)
 	munmap(ptr, buf->size);
 }
 
-static void with_libdrm(data_t *data, igt_fillfunc_t fill)
-{
-	struct intel_batchbuffer *batch = NULL;
-	struct igt_buf dst;
-	int i, j;
-
-	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
-	igt_assert(batch);
-
-	scratch_buf_init(data, &dst, WIDTH, HEIGHT, STRIDE, COLOR_C4);
-
-	for (i = 0; i < WIDTH; i++)
-		for (j = 0; j < HEIGHT; j++)
-			scratch_buf_check(data, &dst, i, j, COLOR_C4);
-
-	fill(batch, &dst, 0, 0, WIDTH / 2, HEIGHT / 2, COLOR_4C);
-
-	for (i = 0; i < WIDTH; i++)
-		for (j = 0; j < HEIGHT; j++)
-			if (i < WIDTH / 2 && j < HEIGHT / 2)
-				scratch_buf_check(data, &dst, i, j, COLOR_4C);
-			else
-				scratch_buf_check(data, &dst, i, j, COLOR_C4);
-
-}
-
 igt_simple_main
 {
 	data_t data = {0, };
-	igt_fillfunc_t media_fill = NULL;
-	igt_fillfunc_v2_t media_fill_v2 = NULL;
+	igt_fillfunc_t fill_fn = NULL;
 
 	data.drm_fd = drm_open_driver_render(DRIVER_INTEL);
 	igt_require_gem(data.drm_fd);
@@ -202,17 +137,9 @@ igt_simple_main
 	data.devid = intel_get_drm_devid(data.drm_fd);
 	data.bops = buf_ops_create(data.drm_fd);
 
-	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
-	igt_assert(data.bufmgr);
-
-	media_fill = igt_get_media_fillfunc(data.devid);
-	media_fill_v2 = igt_get_media_fillfunc_v2(data.devid);
+	fill_fn = igt_get_media_fillfunc(data.devid);
 
-	igt_require_f(media_fill || media_fill_v2,
-		      "no media-fill function\n");
+	igt_require_f(fill_fn, "no media-fill function\n");
 
-	if (media_fill_v2)
-		no_libdrm(&data, media_fill_v2);
-	else
-		with_libdrm(&data, media_fill);
+	media_fill(&data, fill_fn);
 }
-- 
2.26.0