[igt-dev] [PATCH i-g-t v2 3/3] igt: Remove duplicated macros

Tue Mar 7 10:45:19 UTC 2023

Introducing intel_gpu_commands.h requires removing all conflicting
macros definitions with altering the code (mostly command length).

For all commands used in IGT but not in the kernel (yet) add
intel_gpu_commands_staging.h which will keep all commands used
here only. Next import of command macros might finish verbatim
copy + removing from staging in one commit to compile cleanly.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Petri Latvala <adrinael at adrinael.net>
Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
---
 benchmarks/gem_wsim.c                    |  6 +--
 include/intel_gpu_commands_staging.h     | 18 +++++++
 include/linux/bitops.h                   |  2 +
 lib/gen4_render.h                        |  2 -
 lib/gen7_media.h                         |  2 -
 lib/gen7_render.h                        |  3 --
 lib/gen8_media.h                         |  2 -
 lib/i830_reg.h                           | 16 ------
 lib/i915/i915_blt.h                      |  4 +-
 lib/i915/i915_crc.c                      | 15 +++---
 lib/igt_draw.c                           |  4 +-
 lib/igt_dummyload.c                      |  2 +-
 lib/igt_store.c                          |  2 +-
 lib/intel_allocator.h                    |  8 +--
 lib/intel_aux_pgtable.c                  |  5 +-
 lib/intel_batchbuffer.c                  | 12 ++---
 lib/intel_bufops.c                       |  7 +++
 lib/intel_reg.h                          | 69 ++----------------------
 lib/ioctl_wrappers.h                     |  4 +-
 lib/rendercopy_gen9.c                    |  9 ++--
 tests/i915/api_intel_bb.c                |  2 +-
 tests/i915/gem_blits.c                   | 20 ++++---
 tests/i915/gem_busy.c                    |  8 +--
 tests/i915/gem_ccs.c                     |  2 +-
 tests/i915/gem_ctx_shared.c              |  4 +-
 tests/i915/gem_exec_async.c              |  2 +-
 tests/i915/gem_exec_balancer.c           | 23 +++-----
 tests/i915/gem_exec_capture.c            |  4 +-
 tests/i915/gem_exec_endless.c            | 13 +----
 tests/i915/gem_exec_fair.c               | 18 +++----
 tests/i915/gem_exec_fence.c              | 43 ++++++---------
 tests/i915/gem_exec_flush.c              |  6 +--
 tests/i915/gem_exec_gttfill.c            |  2 +-
 tests/i915/gem_exec_nop.c                |  4 +-
 tests/i915/gem_exec_parallel.c           |  2 +-
 tests/i915/gem_exec_params.c             |  4 +-
 tests/i915/gem_exec_reloc.c              | 29 ++++------
 tests/i915/gem_exec_schedule.c           | 43 ++++++---------
 tests/i915/gem_exec_store.c              |  6 +--
 tests/i915/gem_exec_suspend.c            |  2 +-
 tests/i915/gem_exec_whisper.c            |  2 +-
 tests/i915/gem_pipe_control_store_loop.c | 11 ++--
 tests/i915/gem_pxp.c                     |  7 +--
 tests/i915/gem_ringfill.c                |  2 +-
 tests/i915/gem_softpin.c                 | 16 +-----
 tests/i915/gem_sync.c                    | 16 +++---
 tests/i915/gem_userptr_blits.c           |  6 +--
 tests/i915/gem_vm_create.c               |  2 +-
 tests/i915/gem_watchdog.c                |  6 +--
 tests/i915/gem_workarounds.c             |  2 +-
 tests/i915/gen7_exec_parse.c             | 34 ++++++------
 tests/i915/gen9_exec_parse.c             | 47 +++++-----------
 tests/i915/i915_module_load.c            |  2 +-
 tests/i915/perf.c                        | 17 +-----
 tests/i915/perf_pmu.c                    | 18 +++----
 tests/i915/sysfs_timeslice_duration.c    | 17 ++----
 tests/prime_vgem.c                       |  2 +-
 tools/intel_audio_dump.c                 |  1 +
 tools/intel_reg.c                        |  2 +-
 59 files changed, 226 insertions(+), 413 deletions(-)
 create mode 100644 include/intel_gpu_commands_staging.h

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 2d60135817..7b5e62a3be 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -1426,7 +1426,7 @@ static unsigned int create_bb(struct w_step *w, int self)
 	cs = ptr = gem_mmap__wc(fd, w->bb_handle, 0, 4096, PROT_WRITE);
 
 	/* Store initial 64b timestamp: start */
-	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
+	*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_CS_MMIO_DST;
 	*cs++ = CS_GPR(START_TS) + 4;
 	*cs++ = 0;
 	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
@@ -1441,7 +1441,7 @@ static unsigned int create_bb(struct w_step *w, int self)
 		*cs++ = MI_ARB_CHECK;
 
 	/* Store this 64b timestamp: now */
-	*cs++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST;
+	*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_CS_MMIO_DST;
 	*cs++ = CS_GPR(NOW_TS) + 4;
 	*cs++ = 0;
 	*cs++ = MI_LOAD_REGISTER_REG | MI_CS_MMIO_DST | MI_CS_MMIO_SRC;
@@ -1456,7 +1456,7 @@ static unsigned int create_bb(struct w_step *w, int self)
 	*cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
 
 	/* Save delta for indirect read by COND_BBE */
-	*cs++ = MI_STORE_REGISTER_MEM | (1 + use_64b) | MI_CS_MMIO_DST;
+	*cs++ = MI_STORE_REGISTER_MEM_CMD | (1 + use_64b) | MI_CS_MMIO_DST;
 	*cs++ = CS_GPR(NOW_TS);
 	w->reloc[r].target_handle = self;
 	w->reloc[r].offset = offset_in_page(cs);
diff --git a/include/intel_gpu_commands_staging.h b/include/intel_gpu_commands_staging.h
new file mode 100644
index 0000000000..74b4fb6553
--- /dev/null
+++ b/include/intel_gpu_commands_staging.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT*/
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef _INTEL_GPU_COMMANDS_STAGING_H_
+#define _INTEL_GPU_COMMANDS_STAGING_H_
+
+#include "linux_scaffold.h"
+
+/* Length-free commands */
+#define MI_SEMAPHORE_WAIT_CMD		(0x1c << 23)
+#define MI_STORE_DWORD_IMM_CMD		(0x20 << 23)
+#define MI_STORE_REGISTER_MEM_CMD	(0x24 << 23)
+#define MI_FLUSH_DW_CMD			(0x26 << 23)
+#define MI_LOAD_REGISTER_MEM_CMD	(0x29 << 23)
+
+#endif /* _INTEL_GPU_COMMANDS_STAGING_H_ */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index fd73d510c6..b2ffcb50fb 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -17,4 +17,6 @@
 
 #include "linux_scaffold.h"
 
+#define REG_BIT(x) (1ul << (x))
+
 #endif /* _LINUX_BITOPS_H_ */
diff --git a/lib/gen4_render.h b/lib/gen4_render.h
index 7d8bc659a7..bbbddd346e 100644
--- a/lib/gen4_render.h
+++ b/lib/gen4_render.h
@@ -25,14 +25,12 @@
 #define GEN4_CS_URB_STATE			GEN4_3D(0, 0, 1)
 
 #define GEN4_STATE_BASE_ADDRESS			GEN4_3D(0, 1, 1)
-# define BASE_ADDRESS_MODIFY			(1 << 0)
 
 #define GEN4_STATE_SIP				GEN4_3D(0, 1, 2)
 
 #define GEN4_PIPELINE_SELECT			GEN4_3D(0, 1, 4)
 #define G4X_PIPELINE_SELECT			GEN4_3D(1, 1, 4)
 # define PIPELINE_SELECT_3D			0
-# define PIPELINE_SELECT_MEDIA			1
 
 #define GEN4_3DSTATE_PIPELINED_POINTERS		GEN4_3D(3, 0, 0)
 # define GEN4_GS_DISABLE			0
diff --git a/lib/gen7_media.h b/lib/gen7_media.h
index e81b5523a7..b5e49cae9e 100644
--- a/lib/gen7_media.h
+++ b/lib/gen7_media.h
@@ -14,11 +14,9 @@
 
 #define GEN7_PIPELINE_SELECT			GFXPIPE(1, 1, 4)
 # define PIPELINE_SELECT_3D			(0 << 0)
-# define PIPELINE_SELECT_MEDIA			(1 << 0)
 # define PIPELINE_SELECT_GPGPU			(2 << 0)
 
 #define GEN7_STATE_BASE_ADDRESS			GFXPIPE(0, 1, 1)
-# define BASE_ADDRESS_MODIFY			(1 << 0)
 
 #define GEN7_MEDIA_VFE_STATE			GFXPIPE(2, 0, 0)
 #define GEN7_MEDIA_CURBE_LOAD			GFXPIPE(2, 0, 1)
diff --git a/lib/gen7_render.h b/lib/gen7_render.h
index 5dfc04d4bc..d09ba6dad1 100644
--- a/lib/gen7_render.h
+++ b/lib/gen7_render.h
@@ -170,9 +170,6 @@
 /* DW1 */
 # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
 
-/* for GEN7_STATE_BASE_ADDRESS */
-#define BASE_ADDRESS_MODIFY		(1 << 0)
-
 /* for GEN7_PIPE_CONTROL */
 #define GEN7_PIPE_CONTROL_CS_STALL      (1 << 20)
 #define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD   (1 << 1)
diff --git a/lib/gen8_media.h b/lib/gen8_media.h
index 1643794156..d2a049a1ec 100644
--- a/lib/gen8_media.h
+++ b/lib/gen8_media.h
@@ -14,10 +14,8 @@
 
 #define GEN8_PIPELINE_SELECT			GFXPIPE(1, 1, 4)
 # define PIPELINE_SELECT_3D			(0 << 0)
-# define PIPELINE_SELECT_MEDIA			(1 << 0)
 
 #define GEN8_STATE_BASE_ADDRESS			GFXPIPE(0, 1, 1)
-# define BASE_ADDRESS_MODIFY			(1 << 0)
 
 #define GEN8_MEDIA_VFE_STATE			GFXPIPE(2, 0, 0)
 #define GEN8_MEDIA_CURBE_LOAD			GFXPIPE(2, 0, 1)
diff --git a/lib/i830_reg.h b/lib/i830_reg.h
index b8ad2ac00f..3c0b9b5bd0 100644
--- a/lib/i830_reg.h
+++ b/lib/i830_reg.h
@@ -30,12 +30,7 @@
 
 #define I830_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
 
-/* Flush */
-#define MI_FLUSH			(0x04<<23)
-#define MI_FLUSH_DW			(0x26<<23)
-
 #define MI_WRITE_DIRTY_STATE		(1<<4)
-#define MI_END_SCENE			(1<<3)
 #define MI_GLOBAL_SNAPSHOT_COUNT_RESET	(1<<3)
 #define MI_INHIBIT_RENDER_CACHE_FLUSH	(1<<2)
 #define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1)
@@ -43,15 +38,11 @@
 /* broadwater flush bits */
 #define BRW_MI_GLOBAL_SNAPSHOT_RESET   (1 << 3)
 
-#define MI_BATCH_BUFFER_END	(0xA << 23)
-
 /* Noop */
-#define MI_NOOP				0x00
 #define MI_NOOP_WRITE_ID		(1<<22)
 #define MI_NOOP_ID_MASK			(1<<22 - 1)
 
 /* Wait for Events */
-#define MI_WAIT_FOR_EVENT			(0x03<<23)
 #define MI_WAIT_FOR_PIPEB_SVBLANK		(1<<18)
 #define MI_WAIT_FOR_PIPEA_SVBLANK		(1<<17)
 #define MI_WAIT_FOR_OVERLAY_FLIP		(1<<16)
@@ -61,12 +52,10 @@
 #define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW	(1<<1)
 
 /* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */
-#define MI_LOAD_SCAN_LINES_INCL			(0x12<<23)
 #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA	(0)
 #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB	(0x1<<20)
 
 /* BLT commands */
-#define COLOR_BLT_CMD		((2<<29)|(0x40<<22)|(0x3))
 #define COLOR_BLT_WRITE_ALPHA	(1<<21)
 #define COLOR_BLT_WRITE_RGB	(1<<20)
 
@@ -76,16 +65,11 @@
 
 #define XY_SETUP_CLIP_BLT_CMD		((2<<29)|(3<<22)|1)
 
-#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22))
 #define XY_SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
 #define XY_SRC_COPY_BLT_WRITE_RGB	(1<<20)
 #define XY_SRC_COPY_BLT_SRC_TILED	(1<<15)
 #define XY_SRC_COPY_BLT_DST_TILED	(1<<11)
 
-#define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|0x4)
-#define SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
-#define SRC_COPY_BLT_WRITE_RGB		(1<<20)
-
 #define XY_PAT_BLT_IMMEDIATE		((2<<29)|(0x72<<22))
 
 #define XY_MONO_PAT_BLT_CMD		((0x2<<29)|(0x52<<22)|0x7)
diff --git a/lib/i915/i915_blt.h b/lib/i915/i915_blt.h
index c535961e8a..63951db753 100644
--- a/lib/i915/i915_blt.h
+++ b/lib/i915/i915_blt.h
@@ -135,8 +135,8 @@ struct blt_block_copy_data_ext {
 };
 
 enum blt_access_type {
-	INDIRECT_ACCESS,
-	DIRECT_ACCESS,
+	BLT_INDIRECT_ACCESS,
+	BLT_DIRECT_ACCESS,
 };
 
 struct blt_ctrl_surf_copy_object {
diff --git a/lib/i915/i915_crc.c b/lib/i915/i915_crc.c
index 7d68f8e5c4..9564b7327d 100644
--- a/lib/i915/i915_crc.c
+++ b/lib/i915/i915_crc.c
@@ -9,7 +9,6 @@
 #include "gem_create.h"
 #include "gem_engine_topology.h"
 #include "gem_mman.h"
-#include "i830_reg.h"
 #include "i915_drm.h"
 #include "intel_reg.h"
 #include "intel_chipset.h"
@@ -36,13 +35,13 @@
 	} while (0)
 
 #define LOAD_REGISTER_IMM32(__reg, __imm1) do { \
-		*bb++ = MI_LOAD_REGISTER_IMM | MI_CS_MMIO_DST; \
+		*bb++ = MI_LOAD_REGISTER_IMM(1) | MI_CS_MMIO_DST; \
 		*bb++ = (__reg); \
 		*bb++ = (__imm1); \
 	} while (0)
 
 #define LOAD_REGISTER_IMM64(__reg, __imm1, __imm2) do { \
-		*bb++ = (MI_LOAD_REGISTER_IMM + 2) | MI_CS_MMIO_DST; \
+		*bb++ = MI_LOAD_REGISTER_IMM(2) | MI_CS_MMIO_DST; \
 		*bb++ = (__reg); \
 		*bb++ = (__imm1); \
 		*bb++ = (__reg) + 4; \
@@ -50,29 +49,29 @@
 	} while (0)
 
 #define LOAD_REGISTER_MEM(__reg, __offset) do { \
-		*bb++ = MI_LOAD_REGISTER_MEM | MI_CS_MMIO_DST | 2; \
+		*bb++ = MI_LOAD_REGISTER_MEM_CMD | MI_CS_MMIO_DST | 2; \
 		*bb++ = (__reg); \
 		*bb++ = (__offset); \
 		*bb++ = (__offset) >> 32; \
 	} while (0)
 
 #define LOAD_REGISTER_MEM_WPARID(__reg, __offset) do { \
-		*bb++ = MI_LOAD_REGISTER_MEM | MI_CS_MMIO_DST | MI_WPARID_ENABLE_GEN12 | 2; \
+		*bb++ = MI_LOAD_REGISTER_MEM_CMD | MI_CS_MMIO_DST | MI_WPARID_ENABLE_GEN12 | 2; \
 		*bb++ = (__reg); \
 		*bb++ = (__offset); \
 		*bb++ = (__offset) >> 32; \
 	} while (0)
 
 #define STORE_REGISTER_MEM(__reg, __offset) do { \
-		*bb++ = MI_STORE_REGISTER_MEM | MI_CS_MMIO_DST | 2; \
+		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | MI_CS_MMIO_DST; \
 		*bb++ = (__reg); \
 		*bb++ = (__offset); \
 		*bb++ = (__offset) >> 32; \
 	} while (0)
 
 #define STORE_REGISTER_MEM_PREDICATED(__reg, __offset) do { \
-		*bb++ = MI_STORE_REGISTER_MEM | MI_CS_MMIO_DST | \
-			MI_STORE_PREDICATE_ENABLE_GEN12 | 2; \
+		*bb++ = MI_STORE_REGISTER_MEM_GEN8 | MI_CS_MMIO_DST | \
+			MI_STORE_PREDICATE_ENABLE_GEN12; \
 		*bb++ = (__reg); \
 		*bb++ = (__offset); \
 		*bb++ = (__offset) >> 32; \
diff --git a/lib/igt_draw.c b/lib/igt_draw.c
index 58ce0539be..ac512fac5a 100644
--- a/lib/igt_draw.c
+++ b/lib/igt_draw.c
@@ -385,12 +385,12 @@ static void switch_blt_tiling(struct intel_bb *ibb, uint32_t tiling, bool on)
 	/* To change the tile register, insert an MI_FLUSH_DW followed by an
 	 * MI_LOAD_REGISTER_IMM
 	 */
-	intel_bb_out(ibb, MI_FLUSH_DW | 2);
+	intel_bb_out(ibb, MI_FLUSH_DW_CMD | 2);
 	intel_bb_out(ibb, 0x0);
 	intel_bb_out(ibb, 0x0);
 	intel_bb_out(ibb, 0x0);
 
-	intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+	intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1));
 	intel_bb_out(ibb, 0x22200); /* BCS_SWCTRL */
 	intel_bb_out(ibb, bcs_swctrl);
 	intel_bb_out(ibb, MI_NOOP);
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index 5f3c6b10c7..b3dc18ee7d 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -256,7 +256,7 @@ emit_recursive_batch(igt_spin_t *spin,
 		r->offset = sizeof(uint32_t) * 1;
 		r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;
 
-		*cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 
 		if (gen >= 8) {
 			*cs++ = r->presumed_offset + r->delta;
diff --git a/lib/igt_store.c b/lib/igt_store.c
index 98c6c4fbd1..538405e7f5 100644
--- a/lib/igt_store.c
+++ b/lib/igt_store.c
@@ -76,7 +76,7 @@ void igt_store_word(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
 		obj[BATCH].offset = bb_offset;
 		obj[BATCH].flags |= EXEC_OBJECT_PINNED;
 	}
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		uint64_t addr = target_gpu_addr + delta;
 		batch[++i] = lower_32_bits(addr);
diff --git a/lib/intel_allocator.h b/lib/intel_allocator.h
index 28e1165540..a6bf573e9d 100644
--- a/lib/intel_allocator.h
+++ b/lib/intel_allocator.h
@@ -12,6 +12,7 @@
 #include <stdint.h>
 #include <stdatomic.h>
 #include "i915/gem_submission.h"
+#include "intel_reg.h"
 
 /**
  * SECTION:intel_allocator
@@ -217,13 +218,6 @@ void intel_allocator_print(uint64_t allocator_handle);
 
 #define GEN8_GTT_ADDRESS_WIDTH 48
 
-static inline uint64_t sign_extend64(uint64_t x, int high)
-{
-	int shift = 63 - high;
-
-	return (int64_t)(x << shift) >> shift;
-}
-
 static inline uint64_t CANONICAL(uint64_t offset)
 {
 	return sign_extend64(offset, GEN8_GTT_ADDRESS_WIDTH - 1);
diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
index 7556351a02..5205687080 100644
--- a/lib/intel_aux_pgtable.c
+++ b/lib/intel_aux_pgtable.c
@@ -9,7 +9,6 @@
 
 #include "i915/gem_mman.h"
 
-#define BITS_PER_LONG_LONG	(sizeof(long long) * 8)
 #define BITMASK(e, s)		((~0ULL << (s)) & \
 				 (~0ULL >> (BITS_PER_LONG_LONG - 1 - (e))))
 
@@ -644,11 +643,11 @@ gen12_emit_aux_pgtable_state(struct intel_bb *ibb, uint32_t state, bool render)
 	if (!state)
 		return;
 
-	intel_bb_out(ibb, MI_LOAD_REGISTER_MEM | MI_MMIO_REMAP_ENABLE_GEN12 | 2);
+	intel_bb_out(ibb, MI_LOAD_REGISTER_MEM_CMD | MI_MMIO_REMAP_ENABLE_GEN12 | 2);
 	intel_bb_out(ibb, table_base_reg);
 	intel_bb_emit_reloc(ibb, ibb->handle, 0, 0, state, ibb->batch_offset);
 
-	intel_bb_out(ibb, MI_LOAD_REGISTER_MEM | MI_MMIO_REMAP_ENABLE_GEN12 | 2);
+	intel_bb_out(ibb, MI_LOAD_REGISTER_MEM_CMD | MI_MMIO_REMAP_ENABLE_GEN12 | 2);
 	intel_bb_out(ibb, table_base_reg + 4);
 	intel_bb_emit_reloc(ibb, ibb->handle, 0, 0, state + 4, ibb->batch_offset);
 }
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 59c788e683..8695f1b7ac 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -378,7 +378,7 @@ void igt_blitter_src_copy(int fd,
 	if ((src_tiling | dst_tiling) >= I915_TILING_Y) {
 		unsigned int mask;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 
 		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
@@ -407,12 +407,12 @@ void igt_blitter_src_copy(int fd,
 
 	if ((src_tiling | dst_tiling) >= I915_TILING_Y) {
 		igt_assert(gen >= 6);
-		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = MI_FLUSH_DW_CMD | 2;
 		batch[i++] = 0;
 		batch[i++] = 0;
 		batch[i++] = 0;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
 	}
@@ -2413,7 +2413,7 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
 	}
 
 	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
-		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1));
 		intel_bb_out(ibb, BCS_SWCTRL);
 
 		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
@@ -2450,12 +2450,12 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
 
 	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
 		igt_assert(ibb->gen >= 6);
-		intel_bb_out(ibb, MI_FLUSH_DW | 2);
+		intel_bb_out(ibb, MI_FLUSH_DW_CMD | 2);
 		intel_bb_out(ibb, 0);
 		intel_bb_out(ibb, 0);
 		intel_bb_out(ibb, 0);
 
-		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1));
 		intel_bb_out(ibb, BCS_SWCTRL);
 		intel_bb_out(ibb, (BCS_SRC_Y | BCS_DST_Y) << 16);
 	}
diff --git a/lib/intel_bufops.c b/lib/intel_bufops.c
index 72c2189e05..cdc7a1698b 100644
--- a/lib/intel_bufops.c
+++ b/lib/intel_bufops.c
@@ -83,6 +83,13 @@
 #define DEBUGFN()
 #endif
 
+#undef TILE_NONE
+#undef TILE_X
+#undef TILE_Y
+#undef TILE_Yf
+#undef TILE_Ys
+#undef TILE_4
+
 #define TILE_DEF(x) (1 << (x))
 #define TILE_NONE   TILE_DEF(I915_TILING_NONE)
 #define TILE_X      TILE_DEF(I915_TILING_X)
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 6f7559ad9f..3bf3676dc5 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -44,6 +44,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #ifndef _I810_REG_H
 #define _I810_REG_H
 
+#include "intel_gpu_commands.h"
+#include "intel_gpu_commands_staging.h"
+
 /* I/O register offsets
  */
 #define CRX_MDA		0x3B4
@@ -2534,7 +2537,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define I855_CLOCK_166_250			(3 << 0)
 
 /* BLT commands */
-#define COLOR_BLT_CMD		((2<<29)|(0x40<<22)|(0x3))
 #define COLOR_BLT_WRITE_ALPHA	(1<<21)
 #define COLOR_BLT_WRITE_RGB	(1<<20)
 
@@ -2545,15 +2547,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define XY_SETUP_CLIP_BLT_CMD		((2<<29)|(3<<22)|1)
 
-#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22))
 #define XY_SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
 #define XY_SRC_COPY_BLT_WRITE_RGB	(1<<20)
-#define XY_SRC_COPY_BLT_SRC_TILED	(1<<15)
-#define XY_SRC_COPY_BLT_DST_TILED	(1<<11)
-
-#define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|0x4)
-#define SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
-#define SRC_COPY_BLT_WRITE_RGB		(1<<20)
 
 #define XY_PAT_BLT_IMMEDIATE		((2<<29)|(0x72<<22))
 
@@ -2591,15 +2586,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   XY_FAST_COPY_COLOR_DEPTH_64			(4  << 24)
 #define   XY_FAST_COPY_COLOR_DEPTH_128			(5  << 24)
 
-#define MI_STORE_DWORD_IMM		((0x20<<23)|2)
-#define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
-
-#define MI_SET_CONTEXT			(0x18<<23)
 #define CTXT_NO_RESTORE			(1)
 #define CTXT_PALETTE_SAVE_DISABLE	(1<<3)
 #define CTXT_PALETTE_RESTORE_DISABLE	(1<<2)
 
-#define MI_SET_APPID                    (0x0E << 23)
 #define APPID_CTXREST_INHIBIT           (1 << 9)
 #define APPID_CTXSAVE_INHIBIT           (1 << 8)
 #define APPTYPE(n)                      ((n) << 7)
@@ -2616,36 +2606,26 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_VERTEX_BUFFER_DISABLE	(1)
 
 /* Overlay Flip */
-#define MI_OVERLAY_FLIP			(0x11<<23)
 #define MI_OVERLAY_FLIP_CONTINUE	(0<<21)
 #define MI_OVERLAY_FLIP_ON		(1<<21)
 #define MI_OVERLAY_FLIP_OFF		(2<<21)
 
 /* Wait for Events */
-#define MI_WAIT_FOR_EVENT		(0x03<<23)
 #define MI_WAIT_FOR_PIPEB_SVBLANK	(1<<18)
 #define MI_WAIT_FOR_PIPEA_SVBLANK	(1<<17)
-#define MI_WAIT_FOR_OVERLAY_FLIP	(1<<16)
 #define MI_WAIT_FOR_PIPEB_VBLANK	(1<<7)
 #define MI_WAIT_FOR_PIPEA_VBLANK	(1<<3)
 #define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW	(1<<5)
 #define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW	(1<<1)
 
-#define MI_LOAD_SCAN_LINES_INCL		(0x12<<23)
-#define MI_LOAD_REGISTER_IMM		((0x22 << 23) | 1)
-#define MI_LOAD_REGISTER_REG		((0x2A << 23) | 1)
-#define MI_LOAD_REGISTER_MEM		(0x29 << 23)
 #define   MI_CS_MMIO_DST		(1 << 19)
 #define   MI_CS_MMIO_SRC		(1 << 18)
 #define   MI_MMIO_REMAP_ENABLE_GEN12	(1 << 17)
 #define   MI_WPARID_ENABLE_GEN12	(1 << 16)
-#define MI_STORE_REGISTER_MEM		(0x24 << 23)
 #define   MI_STORE_PREDICATE_ENABLE_GEN12 (1 << 21)
 
 /* Flush */
-#define MI_FLUSH			(0x04<<23)
 #define MI_WRITE_DIRTY_STATE		(1<<4)
-#define MI_END_SCENE			(1<<3)
 #define MI_GLOBAL_SNAPSHOT_COUNT_RESET	(1<<3)
 #define MI_INHIBIT_RENDER_CACHE_FLUSH	(1<<2)
 #define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1)
@@ -2654,27 +2634,16 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define BRW_MI_GLOBAL_SNAPSHOT_RESET   (1 << 3)
 
 /* Noop */
-#define MI_NOOP				0x00
 #define MI_NOOP_WRITE_ID		(1<<22)
 #define MI_NOOP_ID_MASK			(1<<22 - 1)
 
-/* ARB Check */
-#define MI_ARB_CHECK                    (0x5 << 23)
-
 #define STATE3D_COLOR_FACTOR	((0x3<<29)|(0x1d<<24)|(0x01<<16))
 
 /* Atomics */
-#define MI_ATOMIC			((0x2f << 23) | 1)
-#define   MI_ATOMIC_INLINE_DATA         (1 << 18)
 #define   MI_ATOMIC_INC                 (0x5 << 8)
 #define   MI_ATOMIC_ADD                 (0x7 << 8)
 
 /* Batch */
-#define MI_BATCH_BUFFER		((0x30 << 23) | 1)
-#define MI_BATCH_BUFFER_START	(0x31 << 23)
-#define MI_BATCH_BUFFER_START_GEN8 ((0x31 << 13) | 1)
-#define   MI_BATCH_PREDICATE       (1 << 15) /* HSW+ on RCS only*/
-#define MI_BATCH_BUFFER_END	(0xA << 23)
 #define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
 #define   MAD_GT_IDD                    (0 << 12)
 #define   MAD_GT_OR_EQ_IDD              (1 << 12)
@@ -2682,45 +2651,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define   MAD_LT_OR_EQ_IDD              (3 << 12)
 #define   MAD_EQ_IDD                    (4 << 12)
 #define   MAD_NEQ_IDD                   (5 << 12)
-#define MI_DO_COMPARE                   (1 << 21)
-
-#define MI_BATCH_NON_SECURE		(1)
-#define MI_BATCH_NON_SECURE_I965	(1 << 8)
-#define MI_BATCH_NON_SECURE_HSW		(1<<13) /* Additional bit for RCS */
 
 /* Math */
-#define MI_INSTR(opcode, flags)         (((opcode) << 23) | (flags))
-#define MI_MATH(x)                      MI_INSTR(0x1a, (x) - 1)
-#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
-/* Opcodes for MI_MATH_INSTR */
-#define   MI_MATH_NOOP                  MI_MATH_INSTR(0x000, 0x0, 0x0)
-#define   MI_MATH_LOAD(op1, op2)        MI_MATH_INSTR(0x080, op1, op2)
-#define   MI_MATH_LOADINV(op1, op2)     MI_MATH_INSTR(0x480, op1, op2)
-#define   MI_MATH_LOAD0(op1)            MI_MATH_INSTR(0x081, op1)
-#define   MI_MATH_LOAD1(op1)            MI_MATH_INSTR(0x481, op1)
-#define   MI_MATH_ADD                   MI_MATH_INSTR(0x100, 0x0, 0x0)
-#define   MI_MATH_SUB                   MI_MATH_INSTR(0x101, 0x0, 0x0)
-#define   MI_MATH_AND                   MI_MATH_INSTR(0x102, 0x0, 0x0)
-#define   MI_MATH_OR                    MI_MATH_INSTR(0x103, 0x0, 0x0)
-#define   MI_MATH_XOR                   MI_MATH_INSTR(0x104, 0x0, 0x0)
-#define   MI_MATH_STORE(op1, op2)       MI_MATH_INSTR(0x180, op1, op2)
-#define   MI_MATH_STOREINV(op1, op2)    MI_MATH_INSTR(0x580, op1, op2)
 /* DG2+ */
 #define   MI_MATH_SHL                   MI_MATH_INSTR(0x105, 0x0, 0x0)
 #define   MI_MATH_SHR                   MI_MATH_INSTR(0x106, 0x0, 0x0)
 #define   MI_MATH_SAR                   MI_MATH_INSTR(0x107, 0x0, 0x0)
 
-/* Registers used as operands in MI_MATH_INSTR */
-#define   MI_MATH_REG(x)                (x)
-#define   MI_MATH_REG_SRCA              0x20
-#define   MI_MATH_REG_SRCB              0x21
-#define   MI_MATH_REG_ACCU              0x31
-#define   MI_MATH_REG_ZF                0x32
-#define   MI_MATH_REG_CF                0x33
-
-/* DG2+ */
-#define MI_SET_PREDICATE                MI_INSTR(0x1, 0)
-
 #define MAX_DISPLAY_PIPES	2
 
 typedef enum {
diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h
index cf228c2651..e4d7c0d408 100644
--- a/lib/ioctl_wrappers.h
+++ b/lib/ioctl_wrappers.h
@@ -173,9 +173,9 @@ static inline uint64_t to_user_pointer(const void *ptr)
  *
  * Casts a 64bit value from an ioctl into a pointer.
  */
-static inline void *from_user_pointer(uint64_t u64)
+static inline void *from_user_pointer(uint64_t u64p)
 {
-	return (void *)(uintptr_t)u64;
+	return (void *)(uintptr_t)u64p;
 }
 
 /**
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index d74f1c9996..650d095020 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -967,10 +967,7 @@ static void gen8_emit_primitive(struct intel_bb *ibb, uint32_t offset)
 	intel_bb_out(ibb, 0);	/* index buffer offset, ignored */
 }
 
-#define GFX_OP_PIPE_CONTROL    ((3 << 29) | (3 << 27) | (2 << 24))
-#define PIPE_CONTROL_CS_STALL	            (1 << 20)
 #define PIPE_CONTROL_RENDER_TARGET_FLUSH    (1 << 12)
-#define PIPE_CONTROL_FLUSH_ENABLE           (1 << 7)
 #define PIPE_CONTROL_DATA_CACHE_INVALIDATE  (1 << 5)
 #define PIPE_CONTROL_PROTECTEDPATH_DISABLE  (1 << 27)
 #define PIPE_CONTROL_PROTECTEDPATH_ENABLE   (1 << 22)
@@ -986,7 +983,7 @@ static void gen12_emit_pxp_state(struct intel_bb *ibb, bool enable,
 
 	if (enable) {
 		pipe_ctl_flags = PIPE_CONTROL_FLUSH_ENABLE;
-		intel_bb_out(ibb, GFX_OP_PIPE_CONTROL);
+		intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(2));
 		intel_bb_out(ibb, pipe_ctl_flags);
 
 		set_app_id =  MI_SET_APPID |
@@ -1005,7 +1002,7 @@ static void gen12_emit_pxp_state(struct intel_bb *ibb, bool enable,
 			   PIPE_CONTROL_RENDER_TARGET_FLUSH |
 			   PIPE_CONTROL_DATA_CACHE_INVALIDATE |
 			   PIPE_CONTROL_POST_SYNC_OP);
-	intel_bb_out(ibb, GFX_OP_PIPE_CONTROL | 4);
+	intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(6));
 	intel_bb_out(ibb, pipe_ctl_flags);
 	intel_bb_emit_reloc(ibb, ibb->handle, 0, I915_GEM_DOMAIN_COMMAND,
 			    (enable ? pxp_write_op_offset : (pxp_write_op_offset+8)),
@@ -1107,7 +1104,7 @@ void _gen9_render_op(struct intel_bb *ibb,
 
 	if (fast_clear) {
 		for (int i = 0; i < 4; i++) {
-			intel_bb_out(ibb, MI_STORE_DWORD_IMM);
+			intel_bb_out(ibb, MI_STORE_DWORD_IMM_GEN4);
 			intel_bb_emit_reloc(ibb, dst->handle,
 					    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                                             dst->cc.offset + i*sizeof(float),
diff --git a/tests/i915/api_intel_bb.c b/tests/i915/api_intel_bb.c
index 7ccc00aa25..46633b0385 100644
--- a/tests/i915/api_intel_bb.c
+++ b/tests/i915/api_intel_bb.c
@@ -1154,7 +1154,7 @@ static void delta_check(struct buf_ops *bops)
 	intel_bb_add_object(ibb, buf->handle, intel_buf_bo_size(buf),
 			    buf->addr.offset, 0, false);
 
-	intel_bb_out(ibb, MI_STORE_DWORD_IMM);
+	intel_bb_out(ibb, MI_STORE_DWORD_IMM_GEN4);
 	intel_bb_emit_reloc(ibb, buf->handle,
 			    I915_GEM_DOMAIN_RENDER,
 			    I915_GEM_DOMAIN_RENDER,
diff --git a/tests/i915/gem_blits.c b/tests/i915/gem_blits.c
index d9296cf2d1..9ea3925c38 100644
--- a/tests/i915/gem_blits.c
+++ b/tests/i915/gem_blits.c
@@ -27,8 +27,6 @@
 #include "igt.h"
 #include "igt_x86.h"
 
-#define MI_FLUSH_DW (0x26 << 23)
-
 #define BCS_SWCTRL 0x22200
 #define BCS_SRC_Y (1 << 0)
 #define BCS_DST_Y (1 << 1)
@@ -198,7 +196,7 @@ static void buffer_set_tiling(const struct device *device,
 	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
 		unsigned int mask;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 
 		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
@@ -248,12 +246,12 @@ static void buffer_set_tiling(const struct device *device,
 
 	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
 		igt_assert(device->gen >= 6);
-		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = MI_FLUSH_DW_CMD | 2;
 		batch[i++] = 0;
 		batch[i++] = 0;
 		batch[i++] = 0;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
 	}
@@ -345,7 +343,7 @@ static bool blit_to_linear(const struct device *device,
 	if (buffer->tiling >= I915_TILING_Y) {
 		unsigned int mask;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 
 		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
@@ -388,12 +386,12 @@ static bool blit_to_linear(const struct device *device,
 
 	if (buffer->tiling >= I915_TILING_Y) {
 		igt_assert(device->gen >= 6);
-		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = MI_FLUSH_DW_CMD | 2;
 		batch[i++] = 0;
 		batch[i++] = 0;
 		batch[i++] = 0;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
 	}
@@ -678,7 +676,7 @@ blit(const struct device *device,
 	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
 		unsigned int mask;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 
 		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
@@ -729,12 +727,12 @@ blit(const struct device *device,
 
 	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
 		igt_assert(device->gen >= 6);
-		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = MI_FLUSH_DW_CMD | 2;
 		batch[i++] = 0;
 		batch[i++] = 0;
 		batch[i++] = 0;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = BCS_SWCTRL;
 		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
 	}
diff --git a/tests/i915/gem_busy.c b/tests/i915/gem_busy.c
index f11fa877d3..08a500a9ec 100644
--- a/tests/i915/gem_busy.c
+++ b/tests/i915/gem_busy.c
@@ -235,10 +235,10 @@ static void one(int fd, const intel_ctx_t *ctx,
 
 static void xchg_u32(void *array, unsigned i, unsigned j)
 {
-	uint32_t *u32 = array;
-	uint32_t tmp = u32[i];
-	u32[i] = u32[j];
-	u32[j] = tmp;
+	uint32_t *ui32 = array;
+	uint32_t tmp = ui32[i];
+	ui32[i] = ui32[j];
+	ui32[j] = tmp;
 }
 
 static void close_race(int fd, const intel_ctx_t *ctx)
diff --git a/tests/i915/gem_ccs.c b/tests/i915/gem_ccs.c
index fcac191230..d25e00fc89 100644
--- a/tests/i915/gem_ccs.c
+++ b/tests/i915/gem_ccs.c
@@ -137,7 +137,7 @@ static void surf_copy(int i915,
 	surf.i915 = i915;
 	surf.print_bb = param.print_bb;
 	set_surf_object(&surf.src, mid->handle, mid->region, mid->size,
-			uc_mocs, INDIRECT_ACCESS);
+			uc_mocs, BLT_INDIRECT_ACCESS);
 	set_surf_object(&surf.dst, ccs, REGION_SMEM, ccssize,
 			uc_mocs, DIRECT_ACCESS);
 	bb_size = 4096;
diff --git a/tests/i915/gem_ctx_shared.c b/tests/i915/gem_ctx_shared.c
index 18d8cc013d..3d73db581c 100644
--- a/tests/i915/gem_ctx_shared.c
+++ b/tests/i915/gem_ctx_shared.c
@@ -309,7 +309,7 @@ static void exec_shared_gtt(int i915, const intel_ctx_cfg_t *cfg,
 	batch = gem_create(i915, 4096);
 
 	i = 0;
-	cs[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	cs[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		cs[++i] = obj.offset;
 		cs[++i] = obj.offset >> 32;
@@ -564,7 +564,7 @@ static void store_dword(int i915, uint64_t ahnd, const intel_ctx_t *ctx,
 	obj[2].relocation_count = !ahnd ? 1 : 0;
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = reloc.presumed_offset + reloc.delta;
 		batch[++i] = 0;
diff --git a/tests/i915/gem_exec_async.c b/tests/i915/gem_exec_async.c
index d50fe45ec5..173bc4648a 100644
--- a/tests/i915/gem_exec_async.c
+++ b/tests/i915/gem_exec_async.c
@@ -73,7 +73,7 @@ static void store_dword(int fd, int id, const intel_ctx_t *ctx,
 	obj[1].relocation_count = !id ? 1 : 0;
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = target_offset + offset;
 		batch[++i] = (target_offset + offset) >> 32;
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index d7acdca190..1c655e583c 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -41,15 +41,6 @@
 
 IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
 
-#define MI_SEMAPHORE_WAIT		(0x1c << 23)
-#define   MI_SEMAPHORE_POLL             (1 << 15)
-#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
-#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
-#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
-#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
-#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
-#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
-
 #define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
 
 static size_t sizeof_load_balance(int count)
@@ -589,7 +580,7 @@ static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
 
 	/* Wait until the spinner is running */
 	addr = spin->obj[0].offset + 4 * SPIN_POLL_START_IDX;
-	*cs++ = MI_SEMAPHORE_WAIT |
+	*cs++ = MI_SEMAPHORE_WAIT_CMD |
 		MI_SEMAPHORE_POLL |
 		MI_SEMAPHORE_SAD_NEQ_SDD |
 		(4 - 2);
@@ -600,7 +591,7 @@ static uint32_t create_semaphore_to_spinner(int i915, igt_spin_t *spin)
 	/* Then cancel the spinner */
 	addr = spin->obj[IGT_SPIN_BATCH].offset +
 		offset_in_page(spin->condition);
-	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = MI_STORE_DWORD_IMM_GEN4;
 	*cs++ = addr;
 	*cs++ = addr >> 32;
 	*cs++ = MI_BATCH_BUFFER_END;
@@ -1116,7 +1107,7 @@ static uint32_t sync_from(int i915, uint32_t addr, uint32_t target)
 	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
 
 	/* cancel target spinner */
-	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = MI_STORE_DWORD_IMM_GEN4;
 	*cs++ = target + 64;
 	*cs++ = 0;
 	*cs++ = 0;
@@ -1131,7 +1122,7 @@ static uint32_t sync_from(int i915, uint32_t addr, uint32_t target)
 	*cs++ = 0;
 
 	/* self-heal */
-	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = MI_STORE_DWORD_IMM_GEN4;
 	*cs++ = addr + 64;
 	*cs++ = 0;
 	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
@@ -1162,13 +1153,13 @@ static uint32_t sync_to(int i915, uint32_t addr, uint32_t target)
 	*cs++ = MI_NOOP;
 
 	/* cancel their spin as a compliment */
-	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = MI_STORE_DWORD_IMM_GEN4;
 	*cs++ = target + 64;
 	*cs++ = 0;
 	*cs++ = 0;
 
 	/* self-heal */
-	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = MI_STORE_DWORD_IMM_GEN4;
 	*cs++ = addr + 64;
 	*cs++ = 0;
 	*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
@@ -1906,7 +1897,7 @@ static uint32_t sema_create(int i915, uint64_t addr, uint32_t **x)
 	for (int n = 1; n <= 32; n++) {
 		uint32_t *cs = *x + n * 16;
 
-		*cs++ = MI_SEMAPHORE_WAIT |
+		*cs++ = MI_SEMAPHORE_WAIT_CMD |
 			MI_SEMAPHORE_POLL |
 			MI_SEMAPHORE_SAD_GTE_SDD |
 			(4 - 2);
diff --git a/tests/i915/gem_exec_capture.c b/tests/i915/gem_exec_capture.c
index 2db58266fd..d0499a8312 100644
--- a/tests/i915/gem_exec_capture.c
+++ b/tests/i915/gem_exec_capture.c
@@ -308,7 +308,7 @@ static void __capture1(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
 			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = obj[SCRATCH].offset;
 		batch[++i] = obj[SCRATCH].offset >> 32;
@@ -498,7 +498,7 @@ __captureN(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
 			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = obj[0].offset;
 		batch[++i] = obj[0].offset >> 32;
diff --git a/tests/i915/gem_exec_endless.c b/tests/i915/gem_exec_endless.c
index 2c56cc2120..77719de83b 100644
--- a/tests/i915/gem_exec_endless.c
+++ b/tests/i915/gem_exec_endless.c
@@ -33,15 +33,6 @@
 
 #define MAX_ENGINES 64
 
-#define MI_SEMAPHORE_WAIT		(0x1c << 23)
-#define   MI_SEMAPHORE_POLL             (1 << 15)
-#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
-#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
-#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
-#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
-#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
-#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
-
 static uint32_t batch_create(int i915)
 {
 	const uint32_t bbe = MI_BATCH_BUFFER_END;
@@ -133,7 +124,7 @@ static void __supervisor_run(struct supervisor *sv)
 
 	sv->semaphore = cs + 1000;
 
-	*cs++ = MI_SEMAPHORE_WAIT |
+	*cs++ = MI_SEMAPHORE_WAIT_CMD |
 		MI_SEMAPHORE_POLL |
 		MI_SEMAPHORE_SAD_EQ_SDD |
 		(4 - 2);
@@ -142,7 +133,7 @@ static void __supervisor_run(struct supervisor *sv)
 	*cs++ = 0;
 
 	sv->terminate = cs;
-	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = MI_STORE_DWORD_IMM_GEN4;
 	*cs++ = offset_in_page(sv->semaphore);
 	*cs++ = 0;
 	*cs++ = 0;
diff --git a/tests/i915/gem_exec_fair.c b/tests/i915/gem_exec_fair.c
index 93a138ba47..8208ab404e 100644
--- a/tests/i915/gem_exec_fair.c
+++ b/tests/i915/gem_exec_fair.c
@@ -131,7 +131,7 @@ static void delay(int i915,
 
 	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
 
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(START_TS) + 4;
 	*cs++ = 0;
 	*cs++ = MI_LOAD_REGISTER_REG;
@@ -144,7 +144,7 @@ static void delay(int i915,
 
 	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
 
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(NOW_TS) + 4;
 	*cs++ = 0;
 	*cs++ = MI_LOAD_REGISTER_REG;
@@ -166,7 +166,7 @@ static void delay(int i915,
 
 	/* Delay between SRM and COND_BBE to post the writes */
 	for (int n = 0; n < 8; n++) {
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		if (use_64b) {
 			*cs++ = addr + 4064;
 			*cs++ = addr >> 32;
@@ -244,25 +244,25 @@ static void tslog(int i915,
 	*cs++ = addr >> 32;
 
 	/* Load the address + inc & mask variables */
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(ADDR);
 	addr_lo = cs;
 	*cs++ = addr;
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(ADDR) + 4;
 	*cs++ = addr >> 32;
 
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(INC);
 	*cs++ = 4;
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(INC) + 4;
 	*cs++ = 0;
 
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(MASK);
 	*cs++ = 0xfffff7ff;
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(MASK) + 4;
 	*cs++ = 0xffffffff;
 
diff --git a/tests/i915/gem_exec_fence.c b/tests/i915/gem_exec_fence.c
index 6bf1cdb577..c2d874f84b 100644
--- a/tests/i915/gem_exec_fence.c
+++ b/tests/i915/gem_exec_fence.c
@@ -50,15 +50,6 @@ struct sync_merge_data {
 #define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
 #endif
 
-#define MI_SEMAPHORE_WAIT		(0x1c << 23)
-#define   MI_SEMAPHORE_POLL             (1 << 15)
-#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
-#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
-#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
-#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
-#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
-#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
-
 static bool fence_busy(int fence)
 {
 	return poll(&(struct pollfd){fence, POLLIN}, 1, 0) == 0;
@@ -345,7 +336,7 @@ static uint32_t timeslicing_batches(int i915, uint32_t *offset)
 		for (int step = 0; step < 8; step++) {
 			if (pair) {
 				cs[i++] =
-					MI_SEMAPHORE_WAIT |
+					MI_SEMAPHORE_WAIT_CMD |
 					MI_SEMAPHORE_POLL |
 					MI_SEMAPHORE_SAD_EQ_SDD |
 					(4 - 2);
@@ -354,14 +345,14 @@ static uint32_t timeslicing_batches(int i915, uint32_t *offset)
 				cs[i++] = 0;
 			}
 
-			cs[i++] = MI_STORE_DWORD_IMM;
+			cs[i++] = MI_STORE_DWORD_IMM_GEN4;
 			cs[i++] = *offset;
 			cs[i++] = 0;
 			cs[i++] = x++;
 
 			if (!pair) {
 				cs[i++] =
-					MI_SEMAPHORE_WAIT |
+					MI_SEMAPHORE_WAIT_CMD |
 					MI_SEMAPHORE_POLL |
 					MI_SEMAPHORE_SAD_EQ_SDD |
 					(4 - 2);
@@ -452,7 +443,7 @@ static uint32_t submitN_batches(int i915, uint32_t offset, int count)
 
 		for (int step = 0; step < 8; step++) {
 			cs[i++] =
-				MI_SEMAPHORE_WAIT |
+				MI_SEMAPHORE_WAIT_CMD |
 				MI_SEMAPHORE_POLL |
 				MI_SEMAPHORE_SAD_EQ_SDD |
 				(4 - 2);
@@ -460,7 +451,7 @@ static uint32_t submitN_batches(int i915, uint32_t offset, int count)
 			cs[i++] = offset;
 			cs[i++] = 0;
 
-			cs[i++] = MI_STORE_DWORD_IMM;
+			cs[i++] = MI_STORE_DWORD_IMM_GEN4;
 			cs[i++] = offset;
 			cs[i++] = 0;
 			cs[i++] = x + 1;
@@ -606,7 +597,7 @@ static void test_parallel(int i915, const intel_ctx_t *ctx,
 		}
 
 		i = 0;
-		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			batch[++i] = scratch_offset + reloc.delta;
 			batch[++i] = scratch_offset >> 32;
@@ -726,7 +717,7 @@ static void test_concurrent(int i915, const intel_ctx_t *ctx,
 	close(fence);
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = target_offset + reloc.delta;
 		batch[++i] = target_offset >> 32;
@@ -2464,21 +2455,21 @@ build_wait_bb(int i915,
 	map = gem_mmap__device_coherent(i915, obj.handle, 0, 4096, PROT_WRITE);
 	bb = map;
 
-	*bb++ = MI_LOAD_REGISTER_IMM;
+	*bb++ = MI_LOAD_REGISTER_IMM(1);
 	*bb++ = mmio_base + HSW_CS_GPR(0);
 	*bb++ = wait_value & 0xffffffff;
-	*bb++ = MI_LOAD_REGISTER_IMM;
+	*bb++ = MI_LOAD_REGISTER_IMM(1);
 	*bb++ = mmio_base + HSW_CS_GPR(0) + 4;
 	*bb++ = wait_value >> 32;
 
 	*bb++ = MI_LOAD_REGISTER_REG;
 	*bb++ = mmio_base + RING_TIMESTAMP;
 	*bb++ = mmio_base + HSW_CS_GPR(1);
-	*bb++ = MI_LOAD_REGISTER_IMM;
+	*bb++ = MI_LOAD_REGISTER_IMM(1);
 	*bb++ = mmio_base + HSW_CS_GPR(1) + 4;
 	*bb++ = 0;
 
-	*bb++ = MI_LOAD_REGISTER_IMM;
+	*bb++ = MI_LOAD_REGISTER_IMM(1);
 	*bb++ = mmio_base + HSW_CS_GPR(2) + 4;
 	*bb++ = 0;
 	relocs->delta = offset_in_page(bb);
@@ -2563,23 +2554,23 @@ static void build_increment_engine_bb(struct inter_engine_batches *batch,
 {
 	uint32_t *bb = batch->increment_bb = calloc(1, 4096);
 
-	*bb++ = MI_LOAD_REGISTER_MEM | 2;
+	*bb++ = MI_LOAD_REGISTER_MEM_CMD | 2;
 	*bb++ = mmio_base + HSW_CS_GPR(0);
 	batch->read0_ptrs[0] = bb;
 	*bb++ = 0;
 	*bb++ = 0;
-	*bb++ = MI_LOAD_REGISTER_MEM | 2;
+	*bb++ = MI_LOAD_REGISTER_MEM_CMD | 2;
 	*bb++ = mmio_base + HSW_CS_GPR(0) + 4;
 	batch->read0_ptrs[1] = bb;
 	*bb++ = 0;
 	*bb++ = 0;
 
-	*bb++ = MI_LOAD_REGISTER_MEM | 2;
+	*bb++ = MI_LOAD_REGISTER_MEM_CMD | 2;
 	*bb++ = mmio_base + HSW_CS_GPR(1);
 	batch->read1_ptrs[0] = bb;
 	*bb++ = 0;
 	*bb++ = 0;
-	*bb++ = MI_LOAD_REGISTER_MEM | 2;
+	*bb++ = MI_LOAD_REGISTER_MEM_CMD | 2;
 	*bb++ = mmio_base + HSW_CS_GPR(1) + 4;
 	batch->read1_ptrs[1] = bb;
 	*bb++ = 0;
@@ -2591,12 +2582,12 @@ static void build_increment_engine_bb(struct inter_engine_batches *batch,
 	*bb++ = MI_MATH_ADD;
 	*bb++ = MI_MATH_STORE(MI_MATH_REG(0), MI_MATH_REG_ACCU);
 
-	*bb++ = MI_STORE_REGISTER_MEM | 2;
+	*bb++ = MI_STORE_REGISTER_MEM_GEN8;
 	*bb++ = mmio_base + HSW_CS_GPR(0);
 	batch->write_ptrs[0] = bb;
 	*bb++ = 0;
 	*bb++ = 0;
-	*bb++ = MI_STORE_REGISTER_MEM | 2;
+	*bb++ = MI_STORE_REGISTER_MEM_GEN8;
 	*bb++ = mmio_base + HSW_CS_GPR(0) + 4;
 	batch->write_ptrs[1] = bb;
 	*bb++ = 0;
diff --git a/tests/i915/gem_exec_flush.c b/tests/i915/gem_exec_flush.c
index 40c58db2bb..bb120e0d6c 100644
--- a/tests/i915/gem_exec_flush.c
+++ b/tests/i915/gem_exec_flush.c
@@ -208,7 +208,7 @@ static void run(int fd, unsigned ring, int nchild, int timeout,
 			reloc0[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 
 			offset = obj[0].offset + reloc0[i].delta;
-			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 			if (gen >= 8) {
 				*b++ = offset;
 				*b++ = offset >> 32;
@@ -242,7 +242,7 @@ static void run(int fd, unsigned ring, int nchild, int timeout,
 			reloc1[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 
 			offset = obj[0].offset + reloc1[i].delta;
-			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 			if (gen >= 8) {
 				*b++ = offset;
 				*b++ = offset >> 32;
@@ -496,7 +496,7 @@ static void batch(int fd, unsigned ring, int nchild, int timeout,
 				reloc.delta = i * sizeof(uint32_t);
 
 				offset = reloc.presumed_offset + reloc.delta;
-				*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+				*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 				if (gen >= 8) {
 					*b++ = offset;
 					*b++ = offset >> 32;
diff --git a/tests/i915/gem_exec_gttfill.c b/tests/i915/gem_exec_gttfill.c
index 137277fe53..d6c8f21920 100644
--- a/tests/i915/gem_exec_gttfill.c
+++ b/tests/i915/gem_exec_gttfill.c
@@ -70,7 +70,7 @@ static void submit(int fd, uint64_t ahnd, unsigned int gen,
 	reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 
 	n = 0;
-	batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[n] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[n] |= 1 << 21;
 		batch[n]++;
diff --git a/tests/i915/gem_exec_nop.c b/tests/i915/gem_exec_nop.c
index f35cc8401f..497f57f082 100644
--- a/tests/i915/gem_exec_nop.c
+++ b/tests/i915/gem_exec_nop.c
@@ -144,7 +144,7 @@ static void poll_ring(int fd, const intel_ctx_t *ctx,
 		r->delta = 4092;
 		r->read_domains = I915_GEM_DOMAIN_RENDER;
 
-		*b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		*b = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			*++b = r->delta;
 			*++b = 0;
@@ -272,7 +272,7 @@ static void poll_sequential(int fd, const intel_ctx_t *ctx,
 		r->read_domains = I915_GEM_DOMAIN_RENDER;
 		r->write_domain = I915_GEM_DOMAIN_RENDER;
 
-		*b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		*b = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			*++b = r->delta;
 			*++b = 0;
diff --git a/tests/i915/gem_exec_parallel.c b/tests/i915/gem_exec_parallel.c
index 429620884b..705b22cb9f 100644
--- a/tests/i915/gem_exec_parallel.c
+++ b/tests/i915/gem_exec_parallel.c
@@ -92,7 +92,7 @@ static void *thread(void *data)
 	}
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (t->gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (t->gen < 6 ? 1 << 22 : 0);
 	if (t->gen >= 8) {
 		batch[++i] = 4*t->id;
 		batch[++i] = 0;
diff --git a/tests/i915/gem_exec_params.c b/tests/i915/gem_exec_params.c
index fd86afa16d..d0805d330f 100644
--- a/tests/i915/gem_exec_params.c
+++ b/tests/i915/gem_exec_params.c
@@ -120,7 +120,7 @@ static void test_batch_first(int fd)
 	map = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_WRITE);
 	gem_set_domain(fd, obj[0].handle,
 			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
-	map[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	map[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		map[++i] = obj[1].offset;
 		map[++i] = obj[1].offset >> 32;
@@ -152,7 +152,7 @@ static void test_batch_first(int fd)
 	map = gem_mmap__cpu(fd, obj[2].handle, 0, 4096, PROT_WRITE);
 	gem_set_domain(fd, obj[2].handle,
 			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
-	map[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	map[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		map[++i] = obj[1].offset;
 		map[++i] = obj[1].offset >> 32;
diff --git a/tests/i915/gem_exec_reloc.c b/tests/i915/gem_exec_reloc.c
index 7a354a32a1..3ce89ca649 100644
--- a/tests/i915/gem_exec_reloc.c
+++ b/tests/i915/gem_exec_reloc.c
@@ -79,7 +79,7 @@ static void write_dword(int fd,
 	obj[1].handle = gem_create(fd, 4096);
 
 	i = 0;
-	buf[i++] = MI_STORE_DWORD_IMM | (gen < 6 ? 1<<22 : 0);
+	buf[i++] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1<<22 : 0);
 	if (gen >= 8) {
 		buf[i++] = target_offset;
 		buf[i++] = target_offset >> 32;
@@ -314,7 +314,7 @@ static void active(int fd, const intel_ctx_t *ctx, unsigned engine)
 	for (pass = 0; pass < 1024; pass++) {
 		uint32_t batch[16];
 		int i = 0;
-		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			batch[++i] = 0;
 			batch[++i] = 0;
@@ -526,17 +526,6 @@ static void basic_reloc(int fd, unsigned before, unsigned after, unsigned flags)
 	gem_close(fd, obj.handle);
 }
 
-static inline uint64_t sign_extend(uint64_t x, int index)
-{
-	int shift = 63 - index;
-	return (int64_t)(x << shift) >> shift;
-}
-
-static uint64_t gen8_canonical_address(uint64_t address)
-{
-	return sign_extend(address, 47);
-}
-
 static void basic_range(int fd, unsigned flags)
 {
 	struct drm_i915_gem_relocation_entry reloc[128];
@@ -563,7 +552,7 @@ static void basic_range(int fd, unsigned flags)
 	for (int i = 0; i <= count; i++) {
 		obj[n].handle = gem_create(fd, 4096);
 		obj[n].offset = (1ull << (i + 12)) - 4096;
-		obj[n].offset = gen8_canonical_address(obj[n].offset);
+		obj[n].offset = gen8_canonical_addr(obj[n].offset);
 		obj[n].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 		gem_write(fd, obj[n].handle, 0, &bbe, sizeof(bbe));
 		execbuf.buffers_ptr = to_user_pointer(&obj[n]);
@@ -583,7 +572,7 @@ static void basic_range(int fd, unsigned flags)
 	for (int i = 1; i < count; i++) {
 		obj[n].handle = gem_create(fd, 4096);
 		obj[n].offset = 1ull << (i + 12);
-		obj[n].offset = gen8_canonical_address(obj[n].offset);
+		obj[n].offset = gen8_canonical_addr(obj[n].offset);
 		obj[n].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 		gem_write(fd, obj[n].handle, 0, &bbe, sizeof(bbe));
 		execbuf.buffers_ptr = to_user_pointer(&obj[n]);
@@ -714,10 +703,10 @@ static int flags_to_index(const struct intel_execution_engine2 *e)
 
 static void xchg_u32(void *array, unsigned i, unsigned j)
 {
-	uint32_t *u32 = array;
-	uint32_t tmp = u32[i];
-	u32[i] = u32[j];
-	u32[j] = tmp;
+	uint32_t *ui32 = array;
+	uint32_t tmp = ui32[i];
+	ui32[i] = ui32[j];
+	ui32[j] = tmp;
 }
 
 static void concurrent_child(int i915, const intel_ctx_t *ctx,
@@ -790,7 +779,7 @@ static uint32_t create_concurrent_batch(int i915, unsigned int count)
 	uint32_t *map, *cs;
 	uint32_t cmd;
 
-	cmd = MI_STORE_DWORD_IMM;
+	cmd = MI_STORE_DWORD_IMM_GEN4;
 	if (gen < 6)
 		cmd |= 1 << 22;
 	if (gen < 4)
diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 58b118c79e..ab1dd7749b 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -55,15 +55,6 @@
 #define MAX_CONTEXTS 1024
 #define MAX_ELSP_QLEN 16
 
-#define MI_SEMAPHORE_WAIT		(0x1c << 23)
-#define   MI_SEMAPHORE_POLL             (1 << 15)
-#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
-#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
-#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
-#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
-#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
-#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
-
 IGT_TEST_DESCRIPTION("Check that we can control the order of execution");
 
 static unsigned int offset_in_page(void *addr)
@@ -148,7 +139,7 @@ static uint32_t __store_dword(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
 	obj[2].relocation_count = !ahnd ? 1 : 0;
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = reloc.presumed_offset + reloc.delta;
 		batch[++i] = (reloc.presumed_offset + reloc.delta) >> 32;
@@ -521,7 +512,7 @@ static uint32_t timeslicing_batches(int i915, uint32_t *offset)
 		for (int step = 0; step < 8; step++) {
 			if (pair) {
 				cs[i++] =
-					MI_SEMAPHORE_WAIT |
+					MI_SEMAPHORE_WAIT_CMD |
 					MI_SEMAPHORE_POLL |
 					MI_SEMAPHORE_SAD_EQ_SDD |
 					(4 - 2);
@@ -530,14 +521,14 @@ static uint32_t timeslicing_batches(int i915, uint32_t *offset)
 				cs[i++] = 0;
 			}
 
-			cs[i++] = MI_STORE_DWORD_IMM;
+			cs[i++] = MI_STORE_DWORD_IMM_GEN4;
 			cs[i++] = *offset;
 			cs[i++] = 0;
 			cs[i++] = x++;
 
 			if (!pair) {
 				cs[i++] =
-					MI_SEMAPHORE_WAIT |
+					MI_SEMAPHORE_WAIT_CMD |
 					MI_SEMAPHORE_POLL |
 					MI_SEMAPHORE_SAD_EQ_SDD |
 					(4 - 2);
@@ -629,7 +620,7 @@ static uint32_t timesliceN_batches(int i915, uint32_t offset, int count)
 
 		for (int step = 0; step < 8; step++) {
 			cs[i++] =
-				MI_SEMAPHORE_WAIT |
+				MI_SEMAPHORE_WAIT_CMD |
 				MI_SEMAPHORE_POLL |
 				MI_SEMAPHORE_SAD_EQ_SDD |
 				(4 - 2);
@@ -637,7 +628,7 @@ static uint32_t timesliceN_batches(int i915, uint32_t offset, int count)
 			cs[i++] = offset;
 			cs[i++] = 0;
 
-			cs[i++] = MI_STORE_DWORD_IMM;
+			cs[i++] = MI_STORE_DWORD_IMM_GEN4;
 			cs[i++] = offset;
 			cs[i++] = 0;
 			cs[i++] = x + 1;
@@ -797,7 +788,7 @@ static void cancel_spinner(int i915,
 	map = gem_mmap__device_coherent(i915, obj.handle, 0, 4096, PROT_WRITE);
 	cs = map;
 
-	*cs++ = MI_STORE_DWORD_IMM;
+	*cs++ = MI_STORE_DWORD_IMM_GEN4;
 	*cs++ = spin->obj[IGT_SPIN_BATCH].offset +
 		offset_in_page(spin->condition);
 	*cs++ = spin->obj[IGT_SPIN_BATCH].offset >> 32;
@@ -1108,13 +1099,13 @@ static void semaphore_resolve(int i915, const intel_ctx_cfg_t *cfg,
 		cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
 
 		/* Set semaphore initially to 1 for polling and signaling */
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		*cs++ = SEMAPHORE_ADDR;
 		*cs++ = 0;
 		*cs++ = 1;
 
 		/* Wait until another batch writes to our semaphore */
-		*cs++ = MI_SEMAPHORE_WAIT |
+		*cs++ = MI_SEMAPHORE_WAIT_CMD |
 			MI_SEMAPHORE_POLL |
 			MI_SEMAPHORE_SAD_EQ_SDD |
 			(4 - 2);
@@ -1123,7 +1114,7 @@ static void semaphore_resolve(int i915, const intel_ctx_cfg_t *cfg,
 		*cs++ = 0;
 
 		/* Then cancel the spinner */
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		*cs++ = spin->obj[IGT_SPIN_BATCH].offset +
 			offset_in_page(spin->condition);
 		*cs++ = 0;
@@ -1161,7 +1152,7 @@ static void semaphore_resolve(int i915, const intel_ctx_cfg_t *cfg,
 		/* Now the semaphore is spinning, cancel it */
 		cancel = gem_create(i915, 4096);
 		cs = map = gem_mmap__cpu(i915, cancel, 0, 4096, PROT_WRITE);
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		*cs++ = SEMAPHORE_ADDR;
 		*cs++ = 0;
 		*cs++ = 0;
@@ -1203,7 +1194,7 @@ static void semaphore_noskip(int i915, const intel_ctx_cfg_t *cfg,
 	const intel_ctx_t *ctx0, *ctx1;
 	uint64_t ahnd;
 
-	igt_require(gen >= 6); /* MI_STORE_DWORD_IMM convenience */
+	igt_require(gen >= 6); /* MI_STORE_DWORD_IMM_GEN4 convenience */
 
 	ctx0 = intel_ctx_create(i915, cfg);
 	ctx1 = intel_ctx_create(i915, cfg);
@@ -1233,7 +1224,7 @@ static void semaphore_noskip(int i915, const intel_ctx_cfg_t *cfg,
 		cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
 
 		/* Cancel the following spinner */
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		if (gen >= 8) {
 			*cs++ = spin->obj[IGT_SPIN_BATCH].offset +
 				offset_in_page(spin->condition);
@@ -1359,14 +1350,14 @@ noreorder(int i915, const intel_ctx_cfg_t *cfg,
 	addr = spin->obj[IGT_SPIN_BATCH].offset +
 		offset_in_page(spin->condition);
 	if (gen >= 8) {
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		*cs++ = addr;
 		addr >>= 32;
 	} else if (gen >= 4) {
-		*cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		*cs++ = 0;
 	} else {
-		*cs++ = (MI_STORE_DWORD_IMM | 1 << 22) - 1;
+		*cs++ = (MI_STORE_DWORD_IMM_GEN4 | 1 << 22) - 1;
 	}
 	*cs++ = addr;
 	*cs++ = MI_BATCH_BUFFER_END;
@@ -2294,7 +2285,7 @@ static void reorder_wide(int fd, const intel_ctx_cfg_t *cfg, unsigned ring)
 			addr = reloc.presumed_offset + reloc.delta;
 
 			i = execbuf.batch_start_offset / sizeof(uint32_t);
-			batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 			if (gen >= 8) {
 				batch[++i] = addr;
 				batch[++i] = addr >> 32;
diff --git a/tests/i915/gem_exec_store.c b/tests/i915/gem_exec_store.c
index efb9907ebb..7d23bcd5b4 100644
--- a/tests/i915/gem_exec_store.c
+++ b/tests/i915/gem_exec_store.c
@@ -94,7 +94,7 @@ static void store_dword(int fd, const intel_ctx_t *ctx,
 	}
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = obj[0].offset;
 		batch[++i] = obj[0].offset >> 32;
@@ -180,7 +180,7 @@ static void store_cachelines(int fd, const intel_ctx_t *ctx,
 		reloc[n].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 		dst_offset = CANONICAL(reloc[n].presumed_offset + reloc[n].delta);
 
-		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			batch[++i] = dst_offset;
 			batch[++i] = dst_offset >> 32;
@@ -283,7 +283,7 @@ static void store_all(int fd, const intel_ctx_t *ctx)
 
 	offset = sizeof(uint32_t);
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[address = ++i] = 0;
 		batch[++i] = 0;
diff --git a/tests/i915/gem_exec_suspend.c b/tests/i915/gem_exec_suspend.c
index 3b59966a11..1dadf06df0 100644
--- a/tests/i915/gem_exec_suspend.c
+++ b/tests/i915/gem_exec_suspend.c
@@ -159,7 +159,7 @@ static void run_test(int fd, const intel_ctx_t *ctx,
 		}
 
 		b = 0;
-		buf[b] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		buf[b] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			buf[++b] = offset;
 			buf[++b] = offset >> 32;
diff --git a/tests/i915/gem_exec_whisper.c b/tests/i915/gem_exec_whisper.c
index 616231aa96..29d96cdcaa 100644
--- a/tests/i915/gem_exec_whisper.c
+++ b/tests/i915/gem_exec_whisper.c
@@ -312,7 +312,7 @@ static void whisper(int fd, const intel_ctx_t *ctx,
 		}
 
 		i = 0;
-		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			batch[++i] = store.offset + loc;
 			batch[++i] = (store.offset + loc) >> 32;
diff --git a/tests/i915/gem_pipe_control_store_loop.c b/tests/i915/gem_pipe_control_store_loop.c
index df3da9f5b2..59959a3742 100644
--- a/tests/i915/gem_pipe_control_store_loop.c
+++ b/tests/i915/gem_pipe_control_store_loop.c
@@ -48,7 +48,6 @@ IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control QW writes.");
 
 static struct buf_ops *bops;
 
-#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
 #define   PIPE_CONTROL_WRITE_IMMEDIATE	(1<<14)
 #define   PIPE_CONTROL_WRITE_TIMESTAMP	(3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL (1<<13)
@@ -96,7 +95,7 @@ store_pipe_control_loop(bool preuse_buffer, int timeout)
 		 * support code will do that for us. */
 		if (ibb->gen >= 8) {
 			intel_bb_add_intel_buf(ibb, target_buf, true);
-			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL + 1);
+			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(5));
 			intel_bb_out(ibb, PIPE_CONTROL_WRITE_IMMEDIATE);
 			intel_bb_emit_reloc_fenced(ibb, target_buf->handle,
 						   I915_GEM_DOMAIN_INSTRUCTION,
@@ -108,13 +107,13 @@ store_pipe_control_loop(bool preuse_buffer, int timeout)
 			/* work-around hw issue, see intel_emit_post_sync_nonzero_flush
 			 * in mesa sources. */
 			intel_bb_add_intel_buf(ibb, target_buf, true);
-			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL);
+			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(4));
 			intel_bb_out(ibb, PIPE_CONTROL_CS_STALL |
 				     PIPE_CONTROL_STALL_AT_SCOREBOARD);
 			intel_bb_out(ibb, 0); /* address */
 			intel_bb_out(ibb, 0); /* write data */
 
-			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL);
+			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(4));
 			intel_bb_out(ibb, PIPE_CONTROL_WRITE_IMMEDIATE);
 			intel_bb_emit_reloc(ibb, target_buf->handle,
 					    I915_GEM_DOMAIN_INSTRUCTION,
@@ -124,10 +123,10 @@ store_pipe_control_loop(bool preuse_buffer, int timeout)
 			intel_bb_out(ibb, val); /* write data */
 		} else if (ibb->gen >= 4) {
 			intel_bb_add_intel_buf(ibb, target_buf, true);
-			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL |
+			intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(4) |
 				     PIPE_CONTROL_WC_FLUSH |
 				     PIPE_CONTROL_TC_FLUSH |
-				     PIPE_CONTROL_WRITE_IMMEDIATE | 2);
+				     PIPE_CONTROL_WRITE_IMMEDIATE);
 			intel_bb_emit_reloc(ibb, target_buf->handle,
 					    I915_GEM_DOMAIN_INSTRUCTION,
 					    I915_GEM_DOMAIN_INSTRUCTION,
diff --git a/tests/i915/gem_pxp.c b/tests/i915/gem_pxp.c
index 0c4224483f..af657d0e1b 100644
--- a/tests/i915/gem_pxp.c
+++ b/tests/i915/gem_pxp.c
@@ -748,10 +748,7 @@ static void test_pxp_pwrcycle_teardown_keychange(int i915, struct powermgt_data
 	igt_assert_eq(matched_after_keychange, 0);
 }
 
-#define GFX_OP_PIPE_CONTROL    ((3 << 29) | (3 << 27) | (2 << 24))
-#define PIPE_CONTROL_CS_STALL	            (1 << 20)
 #define PIPE_CONTROL_RENDER_TARGET_FLUSH    (1 << 12)
-#define PIPE_CONTROL_FLUSH_ENABLE           (1 << 7)
 #define PIPE_CONTROL_DATA_CACHE_INVALIDATE  (1 << 5)
 #define PIPE_CONTROL_PROTECTEDPATH_DISABLE  (1 << 27)
 #define PIPE_CONTROL_PROTECTEDPATH_ENABLE   (1 << 22)
@@ -765,7 +762,7 @@ static void emit_pipectrl(struct intel_bb *ibb, struct intel_buf *fenceb, bool b
 	uint32_t pipe_ctl_flags = 0;
 	uint32_t ps_op_id;
 
-	intel_bb_out(ibb, GFX_OP_PIPE_CONTROL);
+	intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(4));
 	intel_bb_out(ibb, pipe_ctl_flags);
 
 	if (before)
@@ -776,7 +773,7 @@ static void emit_pipectrl(struct intel_bb *ibb, struct intel_buf *fenceb, bool b
 	pipe_ctl_flags = (PIPE_CONTROL_FLUSH_ENABLE |
 			  PIPE_CONTROL_CS_STALL |
 			  PIPE_CONTROL_POST_SYNC_OP);
-	intel_bb_out(ibb, GFX_OP_PIPE_CONTROL | 4);
+	intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(6));
 	intel_bb_out(ibb, pipe_ctl_flags);
 	intel_bb_emit_reloc(ibb, fenceb->handle, 0, I915_GEM_DOMAIN_COMMAND, (before?0:8),
 			    fenceb->addr.offset);
diff --git a/tests/i915/gem_ringfill.c b/tests/i915/gem_ringfill.c
index 8ab00525ff..afcd7b73ed 100644
--- a/tests/i915/gem_ringfill.c
+++ b/tests/i915/gem_ringfill.c
@@ -158,7 +158,7 @@ static void setup_execbuf(int fd, const intel_ctx_t *ctx,
 		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 
 		offset = obj[0].offset + reloc[i].delta;
-		*b++ = MI_STORE_DWORD_IMM;
+		*b++ = MI_STORE_DWORD_IMM_GEN4;
 		if (gen >= 8) {
 			*b++ = offset;
 			*b++ = offset >> 32;
diff --git a/tests/i915/gem_softpin.c b/tests/i915/gem_softpin.c
index c29bfd43d9..7682f772a1 100644
--- a/tests/i915/gem_softpin.c
+++ b/tests/i915/gem_softpin.c
@@ -41,18 +41,6 @@ IGT_TEST_DESCRIPTION("Tests softpin feature with normal usage, invalid inputs"
 
 #define LIMIT_32b ((1ull << 32) - (1ull << 12))
 
-/* gen8_canonical_addr
- * Used to convert any address into canonical form, i.e. [63:48] == [47].
- * Based on kernel's sign_extend64 implementation.
- * @address - a virtual address
-*/
-#define GEN8_HIGH_ADDRESS_BIT 47
-static uint64_t gen8_canonical_addr(uint64_t address)
-{
-	__u8 shift = 63 - GEN8_HIGH_ADDRESS_BIT;
-	return (__s64)(address << shift) >> shift;
-}
-
 #define INTERRUPTIBLE 0x1
 
 static void test_invalid(int fd)
@@ -653,7 +641,7 @@ static void test_noreloc(int fd, enum sleep sleep, unsigned flags)
 	gem_set_domain(fd, object[i].handle,
 		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
 	for (i = 0; i < ARRAY_SIZE(object) - 1; i++) {
-		*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			*b++ = object[i].offset;
 			*b++ = object[i].offset >> 32;
@@ -922,7 +910,7 @@ static void submit(int fd, unsigned int gen,
 						   BATCH_ALIGNMENT);
 		address = obj.offset + BATCH_SIZE - eb->batch_start_offset - 8;
 		n = 0;
-		batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		batch[n] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			batch[n] |= 1 << 21;
 			batch[n]++;
diff --git a/tests/i915/gem_sync.c b/tests/i915/gem_sync.c
index 07cabf7abc..e7dc6637ab 100644
--- a/tests/i915/gem_sync.c
+++ b/tests/i915/gem_sync.c
@@ -588,7 +588,7 @@ store_ring(int fd, const intel_ctx_t *ctx, unsigned ring,
 			reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 
 			offset = object[0].offset + reloc[i].delta;
-			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 			if (gen >= 8) {
 				*b++ = offset;
 				*b++ = offset >> 32;
@@ -698,7 +698,7 @@ switch_ring(int fd, const intel_ctx_t *ctx, unsigned ring,
 				c->reloc[r].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 
 				offset = c->object[0].offset + c->reloc[r].delta;
-				*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+				*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 				if (gen >= 8) {
 					*b++ = offset;
 					*b++ = offset >> 32;
@@ -772,10 +772,10 @@ switch_ring(int fd, const intel_ctx_t *ctx, unsigned ring,
 
 static void xchg(void *array, unsigned i, unsigned j)
 {
-	uint32_t *u32 = array;
-	uint32_t tmp = u32[i];
-	u32[i] = u32[j];
-	u32[j] = tmp;
+	uint32_t *ui32 = array;
+	uint32_t tmp = ui32[i];
+	ui32[i] = ui32[j];
+	ui32[j] = tmp;
 }
 
 struct waiter {
@@ -859,7 +859,7 @@ __store_many(int fd, const intel_ctx_t *ctx, unsigned ring,
 		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 
 		offset = object[0].offset + reloc[i].delta;
-		*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			*b++ = offset;
 			*b++ = offset >> 32;
@@ -1080,7 +1080,7 @@ store_all(int fd, const intel_ctx_t *ctx, int num_children, int timeout)
 			reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 
 			offset = object[0].offset + reloc[i].delta;
-			*b++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+			*b++ = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 			if (gen >= 8) {
 				*b++ = offset;
 				*b++ = offset >> 32;
diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
index 483570d0ad..07a453229a 100644
--- a/tests/i915/gem_userptr_blits.c
+++ b/tests/i915/gem_userptr_blits.c
@@ -338,7 +338,7 @@ static void store_dword(int fd, uint32_t target,
 	obj[1].relocation_count = 1;
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = offset;
 		batch[++i] = 0;
@@ -1318,7 +1318,7 @@ static void store_dword_rand(int i915, const intel_ctx_t *ctx,
 
 		offset = reloc[n].presumed_offset + reloc[n].delta;
 
-		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			batch[++i] = offset;
 			batch[++i] = offset >> 32;
@@ -1379,7 +1379,7 @@ static void test_readonly(int i915)
 
 	/*
 	 * We have only a 31bit delta which we use for generating
-	 * the target address for MI_STORE_DWORD_IMM, so our maximum
+	 * the target address for MI_STORE_DWORD_IMM_GEN4, so our maximum
 	 * usable object size is only 2GiB. For now.
 	 */
 	igt_nsec_elapsed(memset(&tv, 0, sizeof(tv)));
diff --git a/tests/i915/gem_vm_create.c b/tests/i915/gem_vm_create.c
index 3005d347c3..f47d8c5569 100644
--- a/tests/i915/gem_vm_create.c
+++ b/tests/i915/gem_vm_create.c
@@ -268,7 +268,7 @@ write_to_address(int fd, uint32_t ctx, uint64_t addr, uint32_t value)
 	int i;
 
 	i = 0;
-	cs[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	cs[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		cs[++i] = addr;
 		cs[++i] = addr >> 32;
diff --git a/tests/i915/gem_watchdog.c b/tests/i915/gem_watchdog.c
index 01eb007694..27f3a2d7fd 100644
--- a/tests/i915/gem_watchdog.c
+++ b/tests/i915/gem_watchdog.c
@@ -332,7 +332,7 @@ static void delay(int i915,
 
 	cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
 
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(START_TS) + 4;
 	*cs++ = 0;
 	*cs++ = MI_LOAD_REGISTER_REG;
@@ -345,7 +345,7 @@ static void delay(int i915,
 
 	*cs++ = 0x5 << 23; /* MI_ARB_CHECK */
 
-	*cs++ = MI_LOAD_REGISTER_IMM;
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = CS_GPR(NOW_TS) + 4;
 	*cs++ = 0;
 	*cs++ = MI_LOAD_REGISTER_REG;
@@ -367,7 +367,7 @@ static void delay(int i915,
 
 	/* Delay between SRM and COND_BBE to post the writes */
 	for (int n = 0; n < 8; n++) {
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		if (use_64b) {
 			*cs++ = addr + 4064;
 			*cs++ = addr >> 32;
diff --git a/tests/i915/gem_workarounds.c b/tests/i915/gem_workarounds.c
index 5fb2d73fdd..30c68d1ac9 100644
--- a/tests/i915/gem_workarounds.c
+++ b/tests/i915/gem_workarounds.c
@@ -121,7 +121,7 @@ static int workaround_fail_count(int i915, const intel_ctx_t *ctx)
 	out = base =
 		gem_mmap__cpu(i915, obj[1].handle, 0, batch_sz, PROT_WRITE);
 	for (int i = 0; i < num_wa_regs; i++) {
-		*out++ = MI_STORE_REGISTER_MEM | (1 + (gen >= 8));
+		*out++ = MI_STORE_REGISTER_MEM_CMD | (1 + (gen >= 8));
 		*out++ = wa_regs[i].addr;
 		reloc[i].target_handle = obj[0].handle;
 		reloc[i].offset = (out - base) * sizeof(*out);
diff --git a/tests/i915/gen7_exec_parse.c b/tests/i915/gen7_exec_parse.c
index 69b768ed29..e9751ea73f 100644
--- a/tests/i915/gen7_exec_parse.c
+++ b/tests/i915/gen7_exec_parse.c
@@ -48,10 +48,6 @@
 #define INSTR_CLIENT_SHIFT	29
 #define   INSTR_INVALID_CLIENT  0x7
 
-#define MI_ARB_ON_OFF (0x8 << 23)
-#define MI_DISPLAY_FLIP ((0x14 << 23) | 1)
-
-#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
 #define   PIPE_CONTROL_QW_WRITE	(1<<14)
 #define   PIPE_CONTROL_LRI_POST_OP (1<<23)
 
@@ -298,7 +294,7 @@ static void
 test_lri(int fd, uint32_t handle, struct test_lri *test)
 {
 	uint32_t lri[] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		test->reg,
 		test->test_val,
 		MI_BATCH_BUFFER_END,
@@ -372,13 +368,13 @@ static void test_allocations(int fd)
 static void hsw_load_register_reg(void)
 {
 	uint32_t init_gpr0[16] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		HSW_CS_GPR0,
 		0xabcdabc0, /* leave [1:0] zero */
 		MI_BATCH_BUFFER_END,
 	};
 	uint32_t store_gpr0[16] = {
-		MI_STORE_REGISTER_MEM | (3 - 2),
+		MI_STORE_REGISTER_MEM_CMD | (3 - 2),
 		HSW_CS_GPR0,
 		0, /* reloc*/
 		MI_BATCH_BUFFER_END,
@@ -475,7 +471,7 @@ igt_main
 
 	igt_subtest("basic-allowed") {
 		uint32_t pc[] = {
-			GFX_OP_PIPE_CONTROL,
+			GFX_OP_PIPE_CONTROL(4),
 			PIPE_CONTROL_QW_WRITE,
 			0, /* To be patched */
 			0x12000000,
@@ -490,7 +486,7 @@ igt_main
 
 	igt_subtest("basic-offset") {
 		uint32_t pc[] = {
-			GFX_OP_PIPE_CONTROL,
+			GFX_OP_PIPE_CONTROL(4),
 			PIPE_CONTROL_QW_WRITE,
 			0, /* To be patched */
 			0x12000000,
@@ -597,7 +593,7 @@ igt_main
 
 	igt_subtest("bitmasks") {
 		uint32_t pc[] = {
-			GFX_OP_PIPE_CONTROL,
+			GFX_OP_PIPE_CONTROL(4),
 			(PIPE_CONTROL_QW_WRITE |
 			 PIPE_CONTROL_LRI_POST_OP),
 			0, /* To be patched */
@@ -631,13 +627,13 @@ igt_main
 
 	igt_subtest("cmd-crossing-page") {
 		uint32_t lri_ok[] = {
-			MI_LOAD_REGISTER_IMM,
+			MI_LOAD_REGISTER_IMM(1),
 			SO_WRITE_OFFSET_0, /* allowed register address */
 			0xdcbaabc0, /* [1:0] MBZ */
 			MI_BATCH_BUFFER_END,
 		};
 		uint32_t store_reg[] = {
-			MI_STORE_REGISTER_MEM | (3 - 2),
+			MI_STORE_REGISTER_MEM_CMD | (3 - 2),
 			SO_WRITE_OFFSET_0,
 			0, /* reloc */
 			MI_BATCH_BUFFER_END,
@@ -655,29 +651,29 @@ igt_main
 
 	igt_subtest("oacontrol-tracking") {
 		uint32_t lri_ok[] = {
-			MI_LOAD_REGISTER_IMM,
+			MI_LOAD_REGISTER_IMM(1),
 			OACONTROL,
 			0x31337000,
-			MI_LOAD_REGISTER_IMM,
+			MI_LOAD_REGISTER_IMM(1),
 			OACONTROL,
 			0x0,
 			MI_BATCH_BUFFER_END,
 			0
 		};
 		uint32_t lri_bad[] = {
-			MI_LOAD_REGISTER_IMM,
+			MI_LOAD_REGISTER_IMM(1),
 			OACONTROL,
 			0x31337000,
 			MI_BATCH_BUFFER_END,
 		};
 		uint32_t lri_extra_bad[] = {
-			MI_LOAD_REGISTER_IMM,
+			MI_LOAD_REGISTER_IMM(1),
 			OACONTROL,
 			0x31337000,
-			MI_LOAD_REGISTER_IMM,
+			MI_LOAD_REGISTER_IMM(1),
 			OACONTROL,
 			0x0,
-			MI_LOAD_REGISTER_IMM,
+			MI_LOAD_REGISTER_IMM(1),
 			OACONTROL,
 			0x31337000,
 			MI_BATCH_BUFFER_END,
@@ -701,7 +697,7 @@ igt_main
 
 	igt_subtest("chained-batch") {
 		uint32_t pc[] = {
-			GFX_OP_PIPE_CONTROL,
+			GFX_OP_PIPE_CONTROL(4),
 			PIPE_CONTROL_QW_WRITE,
 			0, /* To be patched */
 			0x12000000,
diff --git a/tests/i915/gen9_exec_parse.c b/tests/i915/gen9_exec_parse.c
index c8743a78a0..26b1517053 100644
--- a/tests/i915/gen9_exec_parse.c
+++ b/tests/i915/gen9_exec_parse.c
@@ -38,14 +38,6 @@
 #define INSTR_CLIENT_SHIFT	29
 #define   INSTR_INVALID_CLIENT  0x7
 
-#define MI_ARB_ON_OFF (0x8 << 23)
-#define MI_USER_INTERRUPT (0x02 << 23)
-#define MI_FLUSH_DW (0x26 << 23)
-#define MI_REPORT_HEAD (0x07 << 23)
-#define MI_SUSPEND_FLUSH (0x0b << 23)
-#define MI_LOAD_SCAN_LINES_EXCL (0x13 << 23)
-#define MI_UPDATE_GTT (0x23 << 23)
-
 #define BCS_SWCTRL     0x22200
 #define BCS_GPR_BASE   0x22600
 #define BCS_GPR(n)     (0x22600 + (n) * 8)
@@ -324,7 +316,7 @@ static const struct cmd allowed_cmds[] = {
 	CMD_N(MI_NOOP),
 	CMD_N(MI_USER_INTERRUPT),
 	CMD_N(MI_WAIT_FOR_EVENT),
-	CMD(MI_FLUSH_DW, 5),
+	CMD(MI_FLUSH_DW_CMD, 5),
 	CMD_N(MI_ARB_CHECK),
 	CMD_N(MI_REPORT_HEAD),
 	CMD_N(MI_FLUSH),
@@ -453,11 +445,11 @@ static void test_bb_start(const int i915, const uint32_t handle, int test)
 		MI_NOOP,
 		MI_NOOP,
 		MI_NOOP,
-		MI_STORE_DWORD_IMM,
+		MI_STORE_DWORD_IMM_GEN4,
 		0,
 		0,
 		1,
-		MI_STORE_DWORD_IMM,
+		MI_STORE_DWORD_IMM_GEN4,
 		4,
 		0,
 		2,
@@ -680,13 +672,13 @@ static void test_bb_chained(const int i915, const uint32_t handle)
 static void test_cmd_crossing_page(const int i915, const uint32_t handle)
 {
 	const uint32_t lri_ok[] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		BCS_GPR(0),
 		0xbaadf00d,
 		MI_BATCH_BUFFER_END,
 	};
 	const uint32_t store_reg[] = {
-		MI_STORE_REGISTER_MEM | 2,
+		MI_STORE_REGISTER_MEM_CMD | 2,
 		BCS_GPR(0),
 		0, /* reloc */
 		0, /* reloc */
@@ -711,21 +703,21 @@ static void test_invalid_length(const int i915, const uint32_t handle)
 	const uint32_t noops[8192] = { 0, };
 
 	const uint32_t lri_ok[] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		BCS_GPR(0),
 		ok_val,
 		MI_BATCH_BUFFER_END,
 	};
 
 	const uint32_t lri_bad[] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		BCS_GPR(0),
 		bad_val,
 		MI_BATCH_BUFFER_END,
 	};
 
 	const uint32_t store_reg[] = {
-		MI_STORE_REGISTER_MEM | 2,
+		MI_STORE_REGISTER_MEM_CMD | 2,
 		BCS_GPR(0),
 		0, /* reloc */
 		0, /* reloc */
@@ -824,21 +816,21 @@ static void test_register(const int i915, const uint32_t handle,
 			  const struct reg *r)
 {
 	const uint32_t lri_zero[] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		r->addr,
 		r->masked_write ? 0xffff0000 : 0,
 		MI_BATCH_BUFFER_END,
 	};
 
 	const uint32_t lri_mask[] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		r->addr,
 		r->masked_write ? (r->mask << 16) | r->mask : r->mask,
 		MI_BATCH_BUFFER_END,
 	};
 
 	const uint32_t store_reg[] = {
-		MI_STORE_REGISTER_MEM | 2,
+		MI_STORE_REGISTER_MEM_CMD | 2,
 		r->addr,
 		0, /* reloc */
 		0, /* reloc */
@@ -877,7 +869,7 @@ static long int read_reg(const int i915, const uint32_t handle,
 			 const uint32_t addr)
 {
 	const uint32_t store_reg[] = {
-		MI_STORE_REGISTER_MEM | 2,
+		MI_STORE_REGISTER_MEM_CMD | 2,
 		addr,
 		0, /* reloc */
 		0, /* reloc */
@@ -911,7 +903,7 @@ static int write_reg(const int i915, const uint32_t handle,
 		     const uint32_t addr, const uint32_t val)
 {
 	const uint32_t lri[] = {
-		MI_LOAD_REGISTER_IMM,
+		MI_LOAD_REGISTER_IMM(1),
 		addr,
 		val,
 		MI_BATCH_BUFFER_END,
@@ -1088,17 +1080,6 @@ static inline uint32_t fill_and_copy_shadow(uint32_t *batch, uint32_t len,
 	return i * sizeof(uint32_t);
 }
 
-static inline uint64_t sign_extend(uint64_t x, int index)
-{
-	int shift = 63 - index;
-	return (int64_t)(x << shift) >> shift;
-}
-
-static uint64_t gen8_canonical_address(uint64_t address)
-{
-	return sign_extend(address, 47);
-}
-
 static void test_shadow_peek(int fd)
 {
 	uint64_t size = PAGE_SIZE;
@@ -1130,7 +1111,7 @@ static void test_shadow_peek(int fd)
 
 	exec[1].handle = gem_create(fd, size); /* batch */
 	exec[1].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-	exec[1].offset = gen8_canonical_address(exec[0].pad_to_size);
+	exec[1].offset = gen8_canonical_addr(exec[0].pad_to_size);
 
 	vaddr = gem_mmap__wc(fd, exec[1].handle, 0, size, PROT_WRITE);
 
diff --git a/tests/i915/i915_module_load.c b/tests/i915/i915_module_load.c
index d3a86b1133..725687dab4 100644
--- a/tests/i915/i915_module_load.c
+++ b/tests/i915/i915_module_load.c
@@ -80,7 +80,7 @@ static void store_all(int i915)
 	int i;
 
 	i = 0;
-	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+	batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
 		batch[++i] = 0;
 		batch[++i] = 0;
diff --git a/tests/i915/perf.c b/tests/i915/perf.c
index dd1f1ac399..6453354cfc 100644
--- a/tests/i915/perf.c
+++ b/tests/i915/perf.c
@@ -58,30 +58,17 @@ IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
 #define OAREPORT_REASON_GO             (1<<4)
 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
 
-#define GFX_OP_PIPE_CONTROL     ((3 << 29) | (3 << 27) | (2 << 24))
-#define PIPE_CONTROL_CS_STALL	   (1 << 20)
 #define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET	(1 << 19)
-#define PIPE_CONTROL_TLB_INVALIDATE     (1 << 18)
 #define PIPE_CONTROL_SYNC_GFDT	  (1 << 17)
-#define PIPE_CONTROL_MEDIA_STATE_CLEAR  (1 << 16)
 #define PIPE_CONTROL_NO_WRITE	   (0 << 14)
 #define PIPE_CONTROL_WRITE_IMMEDIATE    (1 << 14)
 #define PIPE_CONTROL_WRITE_DEPTH_COUNT  (2 << 14)
-#define PIPE_CONTROL_WRITE_TIMESTAMP    (3 << 14)
-#define PIPE_CONTROL_DEPTH_STALL	(1 << 13)
 #define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
 #define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
-#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE   (1 << 10) /* GM45+ only */
 #define PIPE_CONTROL_ISP_DIS	    (1 << 9)
 #define PIPE_CONTROL_INTERRUPT_ENABLE   (1 << 8)
-#define PIPE_CONTROL_FLUSH_ENABLE       (1 << 7) /* Gen7+ only */
 /* GT */
 #define PIPE_CONTROL_DATA_CACHE_INVALIDATE      (1 << 5)
-#define PIPE_CONTROL_VF_CACHE_INVALIDATE	(1 << 4)
-#define PIPE_CONTROL_CONST_CACHE_INVALIDATE     (1 << 3)
-#define PIPE_CONTROL_STATE_CACHE_INVALIDATE     (1 << 2)
-#define PIPE_CONTROL_STALL_AT_SCOREBOARD	(1 << 1)
-#define PIPE_CONTROL_DEPTH_CACHE_FLUSH	  (1 << 0)
 #define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
 #define PIPE_CONTROL_GLOBAL_GTT_WRITE   (1 << 2)
 
@@ -3242,9 +3229,9 @@ emit_stall_timestamp_and_rpc(struct intel_bb *ibb,
 	intel_bb_add_intel_buf(ibb, dst, true);
 
 	if (intel_gen(devid) >= 8)
-		intel_bb_out(ibb, GFX_OP_PIPE_CONTROL | (6 - 2));
+		intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(6));
 	else
-		intel_bb_out(ibb, GFX_OP_PIPE_CONTROL | (5 - 2));
+		intel_bb_out(ibb, GFX_OP_PIPE_CONTROL(5));
 
 	intel_bb_out(ibb, pipe_ctl_flags);
 	intel_bb_emit_reloc(ibb, dst->handle,
diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c
index df194c8ad2..197e7cd254 100644
--- a/tests/i915/perf_pmu.c
+++ b/tests/i915/perf_pmu.c
@@ -681,12 +681,6 @@ no_sema(int gem_fd, const intel_ctx_t *ctx,
 	assert_within_epsilon(val[0][1], 0.0f, tolerance);
 }
 
-#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
-#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
-#define   MI_SEMAPHORE_POLL		(1<<15)
-#define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
-#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
-
 static void
 sema_wait(int gem_fd, const intel_ctx_t *ctx,
 	  const struct intel_execution_engine2 *e,
@@ -719,7 +713,7 @@ sema_wait(int gem_fd, const intel_ctx_t *ctx,
 
 	obj_ptr = gem_mmap__device_coherent(gem_fd, obj_handle, 0, 4096, PROT_WRITE);
 
-	batch[0] = MI_STORE_DWORD_IMM;
+	batch[0] = MI_STORE_DWORD_IMM_GEN4;
 	batch[1] = obj_offset + sizeof(*obj_ptr);
 	batch[2] = (obj_offset + sizeof(*obj_ptr)) >> 32;
 	batch[3] = 1;
@@ -807,7 +801,7 @@ create_sema(int gem_fd, uint64_t ahnd,
 {
 	uint32_t cs[] = {
 		/* Reset our semaphore wait */
-		MI_STORE_DWORD_IMM,
+		MI_STORE_DWORD_IMM_GEN4,
 		0,
 		0,
 		1,
@@ -1108,17 +1102,17 @@ event_wait(int gem_fd, const intel_ctx_t *ctx,
 	obj.handle = gem_create(gem_fd, 4096);
 
 	b = batch;
-	*b++ = MI_LOAD_REGISTER_IMM;
+	*b++ = MI_LOAD_REGISTER_IMM(1);
 	*b++ = FORCEWAKE_MT;
 	*b++ = 2 << 16 | 2;
-	*b++ = MI_LOAD_REGISTER_IMM;
+	*b++ = MI_LOAD_REGISTER_IMM(1);
 	*b++ = DERRMR;
 	*b++ = ~0u;
 	*b++ = MI_WAIT_FOR_EVENT;
-	*b++ = MI_LOAD_REGISTER_IMM;
+	*b++ = MI_LOAD_REGISTER_IMM(1);
 	*b++ = DERRMR;
 	*b++ = ~0u;
-	*b++ = MI_LOAD_REGISTER_IMM;
+	*b++ = MI_LOAD_REGISTER_IMM(1);
 	*b++ = FORCEWAKE_MT;
 	*b++ = 2 << 16;
 	*b++ = MI_BATCH_BUFFER_END;
diff --git a/tests/i915/sysfs_timeslice_duration.c b/tests/i915/sysfs_timeslice_duration.c
index 95dc377785..80d34285e2 100644
--- a/tests/i915/sysfs_timeslice_duration.c
+++ b/tests/i915/sysfs_timeslice_duration.c
@@ -46,15 +46,6 @@
 #define ATTR "timeslice_duration_ms"
 #define RESET_TIMEOUT 50 /* milliseconds, at least one jiffie for kworker */
 
-#define MI_SEMAPHORE_WAIT		(0x1c << 23)
-#define   MI_SEMAPHORE_POLL             (1 << 15)
-#define   MI_SEMAPHORE_SAD_GT_SDD       (0 << 12)
-#define   MI_SEMAPHORE_SAD_GTE_SDD      (1 << 12)
-#define   MI_SEMAPHORE_SAD_LT_SDD       (2 << 12)
-#define   MI_SEMAPHORE_SAD_LTE_SDD      (3 << 12)
-#define   MI_SEMAPHORE_SAD_EQ_SDD       (4 << 12)
-#define   MI_SEMAPHORE_SAD_NEQ_SDD      (5 << 12)
-
 static bool __enable_hangcheck(int dir, bool state)
 {
 	return igt_sysfs_set(dir, "enable_hangcheck", state ? "1" : "0");
@@ -214,7 +205,7 @@ static uint64_t __test_duration(int i915, int engine, unsigned int timeout)
 
 	cs = map;
 	for (i = 0; i < 10; i++) {
-		*cs++ = MI_SEMAPHORE_WAIT |
+		*cs++ = MI_SEMAPHORE_WAIT_CMD |
 			MI_SEMAPHORE_POLL |
 			MI_SEMAPHORE_SAD_NEQ_SDD |
 			(4 - 2 + (gen >= 12));
@@ -229,7 +220,7 @@ static uint64_t __test_duration(int i915, int engine, unsigned int timeout)
 		*cs++ = obj[1].offset + sizeof(uint32_t) * i;
 		*cs++ = 0;
 
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		*cs++ = obj[0].offset +
 			4096 - sizeof(uint32_t) * i - sizeof(uint32_t);
 		*cs++ = 0;
@@ -240,12 +231,12 @@ static uint64_t __test_duration(int i915, int engine, unsigned int timeout)
 	cs += 16 - ((cs - map) & 15);
 	start = (cs - map) * sizeof(*cs);
 	for (i = 0; i < 10; i++) {
-		*cs++ = MI_STORE_DWORD_IMM;
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
 		*cs++ = obj[0].offset + sizeof(uint32_t) * i;
 		*cs++ = 0;
 		*cs++ = 1;
 
-		*cs++ = MI_SEMAPHORE_WAIT |
+		*cs++ = MI_SEMAPHORE_WAIT_CMD |
 			MI_SEMAPHORE_POLL |
 			MI_SEMAPHORE_SAD_NEQ_SDD |
 			(4 - 2 + (gen >= 12));
diff --git a/tests/prime_vgem.c b/tests/prime_vgem.c
index 06be273c0b..7b473c03df 100644
--- a/tests/prime_vgem.c
+++ b/tests/prime_vgem.c
@@ -624,7 +624,7 @@ static void work(int i915, uint64_t ahnd, uint64_t scratch_offset, int dmabuf,
 		store[count].delta = sizeof(uint32_t) * count;
 		store[count].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 		store[count].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		batch[i] = MI_STORE_DWORD_IMM_GEN4 | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
 			batch[++i] = scratch_offset + store[count].delta;
 			batch[++i] = (scratch_offset + store[count].delta) >> 32;
diff --git a/tools/intel_audio_dump.c b/tools/intel_audio_dump.c
index 6d11659ec9..287dbd4759 100644
--- a/tools/intel_audio_dump.c
+++ b/tools/intel_audio_dump.c
@@ -48,6 +48,7 @@ static int disp_reg_base = 0;	/* base address of display registers */
 #define BITSTO(n)		(n >= sizeof(long) * 8 ? ~0 : (1UL << (n)) - 1)
 #define BITMASK(high, low)	(BITSTO(high+1) & ~BITSTO(low))
 #define REG_BITS(reg, high, low)	(((reg) & (BITMASK(high, low))) >> (low))
+#undef REG_BIT
 #define REG_BIT(reg, n)		REG_BITS(reg, n, n)
 
 #define min_t(type, x, y) ({                    \
diff --git a/tools/intel_reg.c b/tools/intel_reg.c
index b0d91473a8..6c37e14d12 100644
--- a/tools/intel_reg.c
+++ b/tools/intel_reg.c
@@ -322,7 +322,7 @@ static int register_srm(struct config *config, struct reg *reg,
 		batch[i++] = MI_NOOP;
 		batch[i++] = MI_NOOP;
 
-		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = MI_LOAD_REGISTER_IMM(1);
 		batch[i++] = reg->addr;
 		batch[i++] = *val_in;
 		batch[i++] = MI_NOOP;
-- 
2.34.1