[PATCH i-g-t 01/12] RELOC SQUASH

Wed Jul 28 14:50:00 UTC 2021

From: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>

---
 lib/huc_copy.c                          |  27 ++-
 lib/huc_copy.h                          |   4 +-
 lib/igt_dummyload.c                     |  74 ++++++--
 lib/igt_dummyload.h                     |   3 +
 lib/igt_fb.c                            |  22 ++-
 lib/igt_gt.c                            |  21 ++-
 lib/igt_gt.h                            |   4 +
 lib/intel_allocator.h                   |  55 ++++++
 lib/intel_batchbuffer.c                 | 118 +++++++++----
 lib/intel_batchbuffer.h                 |  18 +-
 tests/i915/gem_bad_reloc.c              |   1 +
 tests/i915/gem_busy.c                   |  35 +++-
 tests/i915/gem_create.c                 |  11 +-
 tests/i915/gem_ctx_engines.c            |  25 ++-
 tests/i915/gem_ctx_exec.c               |  45 ++++-
 tests/i915/gem_ctx_freq.c               |   4 +-
 tests/i915/gem_ctx_isolation.c          |  97 ++++++++---
 tests/i915/gem_ctx_param.c              |   7 +-
 tests/i915/gem_eio.c                    |  60 +++++--
 tests/i915/gem_exec_async.c             |  57 +++++--
 tests/i915/gem_exec_big.c               |   1 +
 tests/i915/gem_exec_capture.c           | 131 +++++++++++----
 tests/i915/gem_exec_gttfill.c           |   1 +
 tests/i915/gem_exec_parallel.c          |  33 +++-
 tests/i915/gem_exec_params.c            |  74 +++++---
 tests/i915/gem_exec_store.c             | 134 +++++++++++----
 tests/i915/gem_exec_suspend.c           |  44 +++--
 tests/i915/gem_huc_copy.c               |  12 +-
 tests/i915/gem_mmap.c                   |   4 +-
 tests/i915/gem_mmap_gtt.c               |  15 +-
 tests/i915/gem_mmap_offset.c            |   4 +-
 tests/i915/gem_mmap_wc.c                |   4 +-
 tests/i915/gem_request_retire.c         |  14 +-
 tests/i915/gem_ringfill.c               |  36 +++-
 tests/i915/gem_softpin.c                | 213 +++++++++++++++++++++++-
 tests/i915/gem_spin_batch.c             |  37 ++--
 tests/i915/gem_tiled_fence_blits.c      |  65 ++++++--
 tests/i915/gem_unfence_active_buffers.c |   5 +-
 tests/i915/gem_unref_active_buffers.c   |   5 +-
 tests/i915/gem_wait.c                   |   3 +
 tests/i915/gem_watchdog.c               |  11 +-
 tests/i915/gem_workarounds.c            |  17 +-
 tests/i915/i915_hangman.c               |  15 +-
 tests/i915/i915_module_load.c           |  23 ++-
 tests/i915/i915_pm_rc6_residency.c      |   8 +-
 tests/i915/i915_pm_rpm.c                |  27 ++-
 tests/i915/i915_pm_rps.c                |  19 ++-
 tests/i915/perf_pmu.c                   | 147 +++++++++++-----
 tests/i915/sysfs_heartbeat_interval.c   |  24 ++-
 tests/i915/sysfs_preempt_timeout.c      |  21 ++-
 tests/i915/sysfs_timeslice_duration.c   |  21 ++-
 tests/kms_busy.c                        |   9 +
 tests/kms_cursor_legacy.c               |   6 +
 tests/kms_flip.c                        |  14 +-
 tests/kms_vblank.c                      |  11 +-
 tests/prime_vgem.c                      | 120 +++++++++----
 56 files changed, 1630 insertions(+), 386 deletions(-)

diff --git a/lib/huc_copy.c b/lib/huc_copy.c
index bc98b1f9..6ec68864 100644
--- a/lib/huc_copy.c
+++ b/lib/huc_copy.c
@@ -23,7 +23,9 @@
  */
 
 #include <i915_drm.h>
+#include "drmtest.h"
 #include "huc_copy.h"
+#include "intel_allocator.h"
 
 static void
 gen9_emit_huc_virtual_addr_state(struct drm_i915_gem_exec_object2 *src,
@@ -40,6 +42,7 @@ gen9_emit_huc_virtual_addr_state(struct drm_i915_gem_exec_object2 *src,
 			buf[(*i)++] = src->offset;
 
 			reloc_src->target_handle = src->handle;
+			reloc_src->presumed_offset = src->offset;
 			reloc_src->delta = 0;
 			reloc_src->offset = (*i - 1) * sizeof(buf[0]);
 			reloc_src->read_domains = 0;
@@ -48,6 +51,7 @@ gen9_emit_huc_virtual_addr_state(struct drm_i915_gem_exec_object2 *src,
 			buf[(*i)++] = dst->offset;
 
 			reloc_dst->target_handle = dst->handle;
+			reloc_dst->presumed_offset = dst->offset;
 			reloc_dst->delta = 0;
 			reloc_dst->offset = (*i - 1) * sizeof(buf[0]);
 			reloc_dst->read_domains = 0;
@@ -61,8 +65,8 @@ gen9_emit_huc_virtual_addr_state(struct drm_i915_gem_exec_object2 *src,
 }
 
 void
-gen9_huc_copyfunc(int fd,
-		struct drm_i915_gem_exec_object2 *obj)
+gen9_huc_copyfunc(int fd, uint64_t ahnd,
+		  struct drm_i915_gem_exec_object2 *obj, uint64_t *objsize)
 {
 	struct drm_i915_gem_relocation_entry reloc[2];
 	struct drm_i915_gem_execbuffer2 execbuf;
@@ -86,6 +90,21 @@ gen9_huc_copyfunc(int fd,
 	buf[i++] = MFX_WAIT;
 
 	memset(reloc, 0, sizeof(reloc));
+
+	if (ahnd) {
+		obj[0].flags = EXEC_OBJECT_PINNED;
+		obj[1].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[2].flags = EXEC_OBJECT_PINNED;
+		obj[0].offset = get_offset(ahnd, obj[0].handle, objsize[0], 0);
+		obj[1].offset = get_offset(ahnd, obj[1].handle, objsize[1], 0);
+		obj[2].offset = get_offset(ahnd, obj[2].handle, objsize[2], 0);
+	} else {
+		obj[0].offset = 1 << 20;
+		obj[1].offset = ALIGN(obj[0].offset + objsize[0], 1 << 20);
+		obj[2].offset = ALIGN(obj[1].offset + objsize[1], 1 << 20);
+		obj[1].flags = EXEC_OBJECT_WRITE;
+	}
+
 	gen9_emit_huc_virtual_addr_state(&obj[0], &obj[1], &reloc[0], &reloc[1], buf, &i);
 
 	buf[i++] = HUC_START;
@@ -94,13 +113,13 @@ gen9_huc_copyfunc(int fd,
 	buf[i++] = MI_BATCH_BUFFER_END;
 
 	gem_write(fd, obj[2].handle, 0, buf, sizeof(buf));
-	obj[2].relocation_count = 2;
+	obj[2].relocation_count = !ahnd ? 2 : 0;
 	obj[2].relocs_ptr = to_user_pointer(reloc);
 
 	memset(&execbuf, 0, sizeof(execbuf));
 	execbuf.buffers_ptr = to_user_pointer(obj);
 	execbuf.buffer_count = 3;
-	execbuf.flags = I915_EXEC_BSD;
+	execbuf.flags = I915_EXEC_BSD | I915_EXEC_NO_RELOC;
 
 	gem_execbuf(fd, &execbuf);
 }
diff --git a/lib/huc_copy.h b/lib/huc_copy.h
index ac31d800..69d14093 100644
--- a/lib/huc_copy.h
+++ b/lib/huc_copy.h
@@ -43,7 +43,7 @@
 #define HUC_VIRTUAL_ADDR_REGION_DST	14
 
 void
-gen9_huc_copyfunc(int fd,
-		struct drm_i915_gem_exec_object2 *obj);
+gen9_huc_copyfunc(int fd, uint64_t ahnd,
+		  struct drm_i915_gem_exec_object2 *obj, uint64_t *objsize);
 
 #endif /* HUC_COPY_H */
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index 8a5ad5ee..645db922 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -35,6 +35,7 @@
 #include "i915/gem_engine_topology.h"
 #include "i915/gem_mman.h"
 #include "i915/gem_submission.h"
+#include "igt_aux.h"
 #include "igt_core.h"
 #include "igt_device.h"
 #include "igt_dummyload.h"
@@ -101,7 +102,7 @@ emit_recursive_batch(igt_spin_t *spin,
 	unsigned int flags[GEM_MAX_ENGINES];
 	unsigned int nengine;
 	int fence_fd = -1;
-	uint64_t addr;
+	uint64_t addr, addr_scratch, ahnd = opts->ahnd, objflags = 0;
 	uint32_t *cs;
 	int i;
 
@@ -119,11 +120,17 @@ emit_recursive_batch(igt_spin_t *spin,
 	 * are not allowed in the first 256KiB, for fear of negative relocations
 	 * that wrap.
 	 */
-	addr = gem_aperture_size(fd) / 2;
-	if (addr >> 31)
-		addr = 1u << 31;
-	addr += random() % addr / 2;
-	addr &= -4096;
+
+	if (!ahnd) {
+		addr = gem_aperture_size(fd) / 2;
+		if (addr >> 31)
+			addr = 1u << 31;
+		addr += random() % addr / 2;
+		addr &= -4096;
+	} else {
+		spin->ahnd = ahnd;
+		objflags |= EXEC_OBJECT_PINNED;
+	}
 
 	igt_assert(!(opts->ctx && opts->ctx_id));
 
@@ -164,16 +171,34 @@ emit_recursive_batch(igt_spin_t *spin,
 	execbuf->buffer_count++;
 	cs = spin->batch;
 
-	obj[BATCH].offset = addr;
+	if (ahnd)
+		addr = intel_allocator_alloc_with_strategy(ahnd, obj[BATCH].handle,
+							   BATCH_SIZE, 0,
+							   ALLOC_STRATEGY_LOW_TO_HIGH);
+	obj[BATCH].offset = CANONICAL(addr);
+	obj[BATCH].flags |= objflags;
+	if (obj[BATCH].offset >= (1ull << 32))
+		obj[BATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
 	addr += BATCH_SIZE;
 
 	if (opts->dependency) {
 		igt_assert(!(opts->flags & IGT_SPIN_POLL_RUN));
+		if (ahnd)
+			addr_scratch = intel_allocator_alloc_with_strategy(ahnd, opts->dependency,
+									   BATCH_SIZE, 0,
+									   ALLOC_STRATEGY_LOW_TO_HIGH);
+		else
+			addr_scratch = addr;
 
 		obj[SCRATCH].handle = opts->dependency;
-		obj[SCRATCH].offset = addr;
+		obj[SCRATCH].offset = CANONICAL(addr_scratch);
+		obj[SCRATCH].flags |= objflags;
+		if (obj[SCRATCH].offset >= (1ull << 32))
+			obj[SCRATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
 		if (!(opts->flags & IGT_SPIN_SOFTDEP)) {
-			obj[SCRATCH].flags = EXEC_OBJECT_WRITE;
+			obj[SCRATCH].flags |= EXEC_OBJECT_WRITE;
 
 			/* dummy write to dependency */
 			r = &relocs[obj[BATCH].relocation_count++];
@@ -212,8 +237,14 @@ emit_recursive_batch(igt_spin_t *spin,
 								       0, 4096,
 								       PROT_READ | PROT_WRITE);
 		}
+
+		if (ahnd)
+			addr = intel_allocator_alloc_with_strategy(ahnd,
+								   spin->poll_handle,
+								   BATCH_SIZE * 3, 0,
+								   ALLOC_STRATEGY_LOW_TO_HIGH);
 		addr += 4096; /* guard page */
-		obj[SCRATCH].offset = addr;
+		obj[SCRATCH].offset = CANONICAL(addr);
 		addr += 4096;
 
 		igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0);
@@ -223,11 +254,15 @@ emit_recursive_batch(igt_spin_t *spin,
 		r->offset = sizeof(uint32_t) * 1;
 		r->delta = sizeof(uint32_t) * SPIN_POLL_START_IDX;
 
+		obj[SCRATCH].flags |= objflags;
+		if (obj[SCRATCH].offset >= (1ull << 32))
+			obj[SCRATCH].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
 		*cs++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 
 		if (gen >= 8) {
 			*cs++ = r->presumed_offset + r->delta;
-			*cs++ = 0;
+			*cs++ = (r->presumed_offset + r->delta) >> 32;
 		} else if (gen >= 4) {
 			*cs++ = 0;
 			*cs++ = r->presumed_offset + r->delta;
@@ -314,10 +349,11 @@ emit_recursive_batch(igt_spin_t *spin,
 	r->offset = (cs + 1 - spin->batch) * sizeof(*cs);
 	r->read_domains = I915_GEM_DOMAIN_COMMAND;
 	r->delta = LOOP_START_OFFSET;
+
 	if (gen >= 8) {
 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 		*cs++ = r->presumed_offset + r->delta;
-		*cs++ = 0;
+		*cs++ = (r->presumed_offset + r->delta) >> 32;
 	} else if (gen >= 6) {
 		*cs++ = MI_BATCH_BUFFER_START | 1 << 8;
 		*cs++ = r->presumed_offset + r->delta;
@@ -351,6 +387,10 @@ emit_recursive_batch(igt_spin_t *spin,
 		execbuf->flags &= ~ENGINE_MASK;
 		execbuf->flags |= flags[i];
 
+		/* For allocator we have to rid of relocation_count */
+		for (int j = 0; j < ARRAY_SIZE(spin->obj) && ahnd; j++)
+			spin->obj[j].relocation_count = 0;
+
 		gem_execbuf_wr(fd, execbuf);
 
 		if (opts->flags & IGT_SPIN_FENCE_OUT) {
@@ -569,11 +609,17 @@ static void __igt_spin_free(int fd, igt_spin_t *spin)
 	if (spin->batch)
 		gem_munmap(spin->batch, BATCH_SIZE);
 
-	if (spin->poll_handle)
+	if (spin->poll_handle) {
 		gem_close(fd, spin->poll_handle);
+		if (spin->ahnd)
+			intel_allocator_free(spin->ahnd, spin->poll_handle);
+	}
 
-	if (spin->handle)
+	if (spin->handle) {
 		gem_close(fd, spin->handle);
+		if (spin->ahnd)
+			intel_allocator_free(spin->ahnd, spin->handle);
+	}
 
 	if (spin->out_fence >= 0)
 		close(spin->out_fence);
diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h
index 67e1a08e..f0205261 100644
--- a/lib/igt_dummyload.h
+++ b/lib/igt_dummyload.h
@@ -58,6 +58,8 @@ typedef struct igt_spin {
 
 	unsigned int flags;
 #define SPIN_CLFLUSH (1 << 0)
+
+	uint64_t ahnd;
 } igt_spin_t;
 
 /**
@@ -78,6 +80,7 @@ typedef struct igt_spin_factory {
 	unsigned int engine;
 	unsigned int flags;
 	int fence;
+	uint64_t ahnd;
 } igt_spin_factory_t;
 
 #define IGT_SPIN_FENCE_IN      (1 << 0)
diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index 75ab217b..c7dcadcf 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -2417,6 +2417,8 @@ static void blitcopy(const struct igt_fb *dst_fb,
 		     const struct igt_fb *src_fb)
 {
 	uint32_t src_tiling, dst_tiling;
+	uint32_t ctx = 0;
+	uint64_t ahnd = 0;
 
 	igt_assert_eq(dst_fb->fd, src_fb->fd);
 	igt_assert_eq(dst_fb->num_planes, src_fb->num_planes);
@@ -2424,6 +2426,12 @@ static void blitcopy(const struct igt_fb *dst_fb,
 	src_tiling = igt_fb_mod_to_tiling(src_fb->modifier);
 	dst_tiling = igt_fb_mod_to_tiling(dst_fb->modifier);
 
+	if (!gem_has_relocations(dst_fb->fd)) {
+		igt_require(gem_has_contexts(dst_fb->fd));
+		ctx = gem_context_create(dst_fb->fd);
+		ahnd = get_reloc_ahnd(dst_fb->fd, ctx);
+	}
+
 	for (int i = 0; i < dst_fb->num_planes; i++) {
 		igt_assert_eq(dst_fb->plane_bpp[i], src_fb->plane_bpp[i]);
 		igt_assert_eq(dst_fb->plane_width[i], src_fb->plane_width[i]);
@@ -2435,11 +2443,13 @@ static void blitcopy(const struct igt_fb *dst_fb,
 		 */
 		if (fast_blit_ok(src_fb) && fast_blit_ok(dst_fb)) {
 			igt_blitter_fast_copy__raw(dst_fb->fd,
+						   ahnd, ctx,
 						   src_fb->gem_handle,
 						   src_fb->offsets[i],
 						   src_fb->strides[i],
 						   src_tiling,
 						   0, 0, /* src_x, src_y */
+						   src_fb->size,
 						   dst_fb->plane_width[i],
 						   dst_fb->plane_height[i],
 						   dst_fb->plane_bpp[i],
@@ -2447,14 +2457,17 @@ static void blitcopy(const struct igt_fb *dst_fb,
 						   dst_fb->offsets[i],
 						   dst_fb->strides[i],
 						   dst_tiling,
-						   0, 0 /* dst_x, dst_y */);
+						   0, 0 /* dst_x, dst_y */,
+						   dst_fb->size);
 		} else {
 			igt_blitter_src_copy(dst_fb->fd,
+					     ahnd, ctx,
 					     src_fb->gem_handle,
 					     src_fb->offsets[i],
 					     src_fb->strides[i],
 					     src_tiling,
 					     0, 0, /* src_x, src_y */
+					     src_fb->size,
 					     dst_fb->plane_width[i],
 					     dst_fb->plane_height[i],
 					     dst_fb->plane_bpp[i],
@@ -2462,9 +2475,14 @@ static void blitcopy(const struct igt_fb *dst_fb,
 					     dst_fb->offsets[i],
 					     dst_fb->strides[i],
 					     dst_tiling,
-					     0, 0 /* dst_x, dst_y */);
+					     0, 0 /* dst_x, dst_y */,
+					     dst_fb->size);
 		}
 	}
+
+	if (ctx)
+		gem_context_destroy(dst_fb->fd, ctx);
+	put_ahnd(ahnd);
 }
 
 static void free_linear_mapping(struct fb_blit_upload *blit)
diff --git a/lib/igt_gt.c b/lib/igt_gt.c
index c049477d..a0ba04cc 100644
--- a/lib/igt_gt.c
+++ b/lib/igt_gt.c
@@ -269,7 +269,8 @@ static bool has_ctx_exec(int fd, unsigned ring, uint32_t ctx)
  * Returns:
  * Structure with helper internal state for igt_post_hang_ring().
  */
-igt_hang_t igt_hang_ctx(int fd, uint32_t ctx, int ring, unsigned flags)
+static igt_hang_t __igt_hang_ctx(int fd, uint64_t ahnd, uint32_t ctx, int ring,
+				 unsigned flags)
 {
 	struct drm_i915_gem_context_param param;
 	igt_spin_t *spin;
@@ -298,6 +299,7 @@ igt_hang_t igt_hang_ctx(int fd, uint32_t ctx, int ring, unsigned flags)
 		context_set_ban(fd, ctx, 0);
 
 	spin = __igt_spin_new(fd,
+			      .ahnd = ahnd,
 			      .ctx_id = ctx,
 			      .engine = ring,
 			      .flags = IGT_SPIN_NO_PREEMPTION);
@@ -305,6 +307,17 @@ igt_hang_t igt_hang_ctx(int fd, uint32_t ctx, int ring, unsigned flags)
 	return (igt_hang_t){ spin, ctx, ban, flags };
 }
 
+igt_hang_t igt_hang_ctx(int fd, uint32_t ctx, int ring, unsigned flags)
+{
+	return __igt_hang_ctx(fd, 0, ctx, ring, flags);
+}
+
+igt_hang_t igt_hang_ctx_with_ahnd(int fd, uint64_t ahnd, uint32_t ctx, int ring,
+				  unsigned flags)
+{
+	return __igt_hang_ctx(fd, ahnd, ctx, ring, flags);
+}
+
 /**
  * igt_hang_ring:
  * @fd: open i915 drm file descriptor
@@ -322,6 +335,12 @@ igt_hang_t igt_hang_ring(int fd, int ring)
 	return igt_hang_ctx(fd, 0, ring, 0);
 }
 
+igt_hang_t igt_hang_ring_with_ahnd(int fd, int ring, uint64_t ahnd)
+{
+	return igt_hang_ctx_with_ahnd(fd, ahnd, 0, ring, 0);
+}
+
+
 /**
  * igt_post_hang_ring:
  * @fd: open i915 drm file descriptor
diff --git a/lib/igt_gt.h b/lib/igt_gt.h
index 2ea360cc..fabb89cd 100644
--- a/lib/igt_gt.h
+++ b/lib/igt_gt.h
@@ -45,10 +45,14 @@ void igt_disallow_hang(int fd, igt_hang_t arg);
 #define HANG_POISON 0xc5c5c5c5
 
 igt_hang_t igt_hang_ctx(int fd, uint32_t ctx, int ring, unsigned flags);
+igt_hang_t igt_hang_ctx_with_ahnd(int fd, uint64_t ahnd, uint32_t ctx, int ring,
+				  unsigned flags);
+
 #define HANG_ALLOW_BAN 1
 #define HANG_ALLOW_CAPTURE 2
 
 igt_hang_t igt_hang_ring(int fd, int ring);
+igt_hang_t igt_hang_ring_with_ahnd(int fd, int ring, uint64_t ahnd);
 void igt_post_hang_ring(int fd, igt_hang_t arg);
 
 void igt_force_gpu_reset(int fd);
diff --git a/lib/intel_allocator.h b/lib/intel_allocator.h
index c14f57b4..f6511ffb 100644
--- a/lib/intel_allocator.h
+++ b/lib/intel_allocator.h
@@ -11,6 +11,7 @@
 #include <pthread.h>
 #include <stdint.h>
 #include <stdatomic.h>
+#include "i915/gem_submission.h"
 
 /**
  * SECTION:intel_allocator
@@ -228,4 +229,58 @@ static inline uint64_t CANONICAL(uint64_t offset)
 
 #define DECANONICAL(offset) (offset & ((1ull << GEN8_GTT_ADDRESS_WIDTH) - 1))
 
+static inline uint64_t get_simple_ahnd(int fd, uint32_t ctx)
+{
+	bool do_relocs = gem_has_relocations(fd);
+
+	return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_SIMPLE);
+}
+
+static inline uint64_t get_simple_l2h_ahnd(int fd, uint32_t ctx)
+{
+	bool do_relocs = gem_has_relocations(fd);
+
+	return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
+							 INTEL_ALLOCATOR_SIMPLE,
+							 ALLOC_STRATEGY_LOW_TO_HIGH);
+}
+
+static inline uint64_t get_simple_h2l_ahnd(int fd, uint32_t ctx)
+{
+	bool do_relocs = gem_has_relocations(fd);
+
+	return do_relocs ? 0 : intel_allocator_open_full(fd, ctx, 0, 0,
+							 INTEL_ALLOCATOR_SIMPLE,
+							 ALLOC_STRATEGY_LOW_TO_HIGH);
+}
+
+static inline uint64_t get_reloc_ahnd(int fd, uint32_t ctx)
+{
+	bool do_relocs = gem_has_relocations(fd);
+
+	return do_relocs ? 0 : intel_allocator_open(fd, ctx, INTEL_ALLOCATOR_RELOC);
+}
+
+static inline bool put_ahnd(uint64_t ahnd)
+{
+	return !ahnd || intel_allocator_close(ahnd);
+}
+
+static inline uint64_t get_offset(uint64_t ahnd, uint32_t handle,
+				  uint64_t size, uint64_t alignment)
+{
+	if (!ahnd)
+		return 0;
+
+	return intel_allocator_alloc(ahnd, handle, size, alignment);
+}
+
+static inline bool put_offset(uint64_t ahnd, uint32_t handle)
+{
+	if (!ahnd)
+		return 0;
+
+	return intel_allocator_free(ahnd, handle);
+}
+
 #endif
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 2b8b903e..bbf8e0da 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -679,36 +679,40 @@ static uint32_t fast_copy_dword1(unsigned int src_tiling,
 
 static void
 fill_relocation(struct drm_i915_gem_relocation_entry *reloc,
-		uint32_t gem_handle, uint32_t delta, /* in bytes */
+		uint32_t gem_handle, uint64_t presumed_offset,
+		uint32_t delta, /* in bytes */
 		uint32_t offset, /* in dwords */
 		uint32_t read_domains, uint32_t write_domains)
 {
 	reloc->target_handle = gem_handle;
 	reloc->delta = delta;
 	reloc->offset = offset * sizeof(uint32_t);
-	reloc->presumed_offset = 0;
+	reloc->presumed_offset = presumed_offset;
 	reloc->read_domains = read_domains;
 	reloc->write_domain = write_domains;
 }
 
 static void
-fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle,
+fill_object(struct drm_i915_gem_exec_object2 *obj,
+	    uint32_t gem_handle, uint64_t gem_offset,
 	    struct drm_i915_gem_relocation_entry *relocs, uint32_t count)
 {
 	memset(obj, 0, sizeof(*obj));
 	obj->handle = gem_handle;
+	obj->offset = gem_offset;
 	obj->relocation_count = count;
 	obj->relocs_ptr = to_user_pointer(relocs);
 }
 
 static void exec_blit(int fd,
 		      struct drm_i915_gem_exec_object2 *objs, uint32_t count,
-		      unsigned int gen)
+		      unsigned int gen, uint32_t ctx)
 {
 	struct drm_i915_gem_execbuffer2 exec = {
 		.buffers_ptr = to_user_pointer(objs),
 		.buffer_count = count,
-		.flags = gen >= 6 ? I915_EXEC_BLT : 0,
+		.flags = gen >= 6 ? I915_EXEC_BLT : 0 | I915_EXEC_NO_RELOC,
+		.rsvd1 = ctx,
 	};
 
 	gem_execbuf(fd, &exec);
@@ -758,12 +762,15 @@ static uint32_t src_copy_dword1(uint32_t dst_pitch, uint32_t bpp)
 /**
  * igt_blitter_src_copy:
  * @fd: file descriptor of the i915 driver
+ * @ahnd: handle to an allocator
+ * @ctx: context within which execute copy blit
  * @src_handle: GEM handle of the source buffer
  * @src_delta: offset into the source GEM bo, in bytes
  * @src_stride: Stride (in bytes) of the source buffer
  * @src_tiling: Tiling mode of the source buffer
  * @src_x: X coordinate of the source region to copy
  * @src_y: Y coordinate of the source region to copy
+ * @src_size: size of the src bo required for allocator and softpin
  * @width: Width of the region to copy
  * @height: Height of the region to copy
  * @bpp: source and destination bits per pixel
@@ -773,16 +780,20 @@ static uint32_t src_copy_dword1(uint32_t dst_pitch, uint32_t bpp)
  * @dst_tiling: Tiling mode of the destination buffer
  * @dst_x: X coordinate of destination
  * @dst_y: Y coordinate of destination
+ * @dst_size: size of the dst bo required for allocator and softpin
  *
  * Copy @src into @dst using the XY_SRC blit command.
  */
 void igt_blitter_src_copy(int fd,
+			  uint64_t ahnd,
+			  uint32_t ctx,
 			  /* src */
 			  uint32_t src_handle,
 			  uint32_t src_delta,
 			  uint32_t src_stride,
 			  uint32_t src_tiling,
 			  uint32_t src_x, uint32_t src_y,
+			  uint64_t src_size,
 
 			  /* size */
 			  uint32_t width, uint32_t height,
@@ -795,7 +806,8 @@ void igt_blitter_src_copy(int fd,
 			  uint32_t dst_delta,
 			  uint32_t dst_stride,
 			  uint32_t dst_tiling,
-			  uint32_t dst_x, uint32_t dst_y)
+			  uint32_t dst_x, uint32_t dst_y,
+			  uint64_t dst_size)
 {
 	uint32_t batch[32];
 	struct drm_i915_gem_exec_object2 objs[3];
@@ -804,9 +816,21 @@ void igt_blitter_src_copy(int fd,
 	uint32_t src_pitch, dst_pitch;
 	uint32_t dst_reloc_offset, src_reloc_offset;
 	uint32_t gen = intel_gen(intel_get_drm_devid(fd));
+	uint64_t batch_offset, src_offset, dst_offset;
 	const bool has_64b_reloc = gen >= 8;
 	int i = 0;
 
+	batch_handle = gem_create(fd, 4096);
+	if (ahnd) {
+		src_offset = get_offset(ahnd, src_handle, src_size, 0);
+		dst_offset = get_offset(ahnd, dst_handle, dst_size, 0);
+		batch_offset = get_offset(ahnd, batch_handle, 4096, 0);
+	} else {
+		src_offset = 16 << 20;
+		dst_offset = ALIGN(src_offset + src_size, 1 << 20);
+		batch_offset = ALIGN(dst_offset + dst_size, 1 << 20);
+	}
+
 	memset(batch, 0, sizeof(batch));
 
 	igt_assert((src_tiling == I915_TILING_NONE) ||
@@ -851,15 +875,15 @@ void igt_blitter_src_copy(int fd,
 	batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */
 	batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */
 	dst_reloc_offset = i;
-	batch[i++] = dst_delta; /* dst address lower bits */
+	batch[i++] = dst_offset + dst_delta; /* dst address lower bits */
 	if (has_64b_reloc)
-		batch[i++] = 0;	/* dst address upper bits */
+		batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */
 	batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */
 	batch[i++] = src_pitch;
 	src_reloc_offset = i;
-	batch[i++] = src_delta; /* src address lower bits */
+	batch[i++] = src_offset + src_delta; /* src address lower bits */
 	if (has_64b_reloc)
-		batch[i++] = 0;	/* src address upper bits */
+		batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */
 
 	if ((src_tiling | dst_tiling) >= I915_TILING_Y) {
 		igt_assert(gen >= 6);
@@ -878,22 +902,29 @@ void igt_blitter_src_copy(int fd,
 
 	igt_assert(i <= ARRAY_SIZE(batch));
 
-	batch_handle = gem_create(fd, 4096);
 	gem_write(fd, batch_handle, 0, batch, sizeof(batch));
 
-	fill_relocation(&relocs[0], dst_handle, dst_delta, dst_reloc_offset,
+	fill_relocation(&relocs[0], dst_handle, dst_offset,
+			dst_delta, dst_reloc_offset,
 			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-	fill_relocation(&relocs[1], src_handle, src_delta, src_reloc_offset,
+	fill_relocation(&relocs[1], src_handle, src_offset,
+			src_delta, src_reloc_offset,
 			I915_GEM_DOMAIN_RENDER, 0);
 
-	fill_object(&objs[0], dst_handle, NULL, 0);
-	fill_object(&objs[1], src_handle, NULL, 0);
-	fill_object(&objs[2], batch_handle, relocs, 2);
+	fill_object(&objs[0], dst_handle, dst_offset, NULL, 0);
+	fill_object(&objs[1], src_handle, src_offset, NULL, 0);
+	fill_object(&objs[2], batch_handle, batch_offset, relocs, 2);
 
-	objs[0].flags |= EXEC_OBJECT_NEEDS_FENCE;
+	objs[0].flags |= EXEC_OBJECT_NEEDS_FENCE | EXEC_OBJECT_WRITE;
 	objs[1].flags |= EXEC_OBJECT_NEEDS_FENCE;
 
-	exec_blit(fd, objs, 3, gen);
+	if (ahnd) {
+		objs[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		objs[1].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		objs[2].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+	}
+
+	exec_blit(fd, objs, 3, gen, ctx);
 
 	gem_close(fd, batch_handle);
 }
@@ -901,12 +932,15 @@ void igt_blitter_src_copy(int fd,
 /**
  * igt_blitter_fast_copy__raw:
  * @fd: file descriptor of the i915 driver
+ * @ahnd: handle to an allocator
+ * @ctx: context within which execute copy blit
  * @src_handle: GEM handle of the source buffer
  * @src_delta: offset into the source GEM bo, in bytes
  * @src_stride: Stride (in bytes) of the source buffer
  * @src_tiling: Tiling mode of the source buffer
  * @src_x: X coordinate of the source region to copy
  * @src_y: Y coordinate of the source region to copy
+ * @src_size: size of the src bo required for allocator and softpin
  * @width: Width of the region to copy
  * @height: Height of the region to copy
  * @bpp: source and destination bits per pixel
@@ -916,16 +950,20 @@ void igt_blitter_src_copy(int fd,
  * @dst_tiling: Tiling mode of the destination buffer
  * @dst_x: X coordinate of destination
  * @dst_y: Y coordinate of destination
+ * @dst_size: size of the dst bo required for allocator and softpin
  *
  * Like igt_blitter_fast_copy(), but talking to the kernel directly.
  */
 void igt_blitter_fast_copy__raw(int fd,
+				uint64_t ahnd,
+				uint32_t ctx,
 				/* src */
 				uint32_t src_handle,
 				unsigned int src_delta,
 				unsigned int src_stride,
 				unsigned int src_tiling,
 				unsigned int src_x, unsigned src_y,
+				uint64_t src_size,
 
 				/* size */
 				unsigned int width, unsigned int height,
@@ -938,7 +976,8 @@ void igt_blitter_fast_copy__raw(int fd,
 				unsigned dst_delta,
 				unsigned int dst_stride,
 				unsigned int dst_tiling,
-				unsigned int dst_x, unsigned dst_y)
+				unsigned int dst_x, unsigned dst_y,
+				uint64_t dst_size)
 {
 	uint32_t batch[12];
 	struct drm_i915_gem_exec_object2 objs[3];
@@ -946,8 +985,20 @@ void igt_blitter_fast_copy__raw(int fd,
 	uint32_t batch_handle;
 	uint32_t dword0, dword1;
 	uint32_t src_pitch, dst_pitch;
+	uint64_t batch_offset, src_offset, dst_offset;
 	int i = 0;
 
+	batch_handle = gem_create(fd, 4096);
+	if (ahnd) {
+		src_offset = get_offset(ahnd, src_handle, src_size, 0);
+		dst_offset = get_offset(ahnd, dst_handle, dst_size, 0);
+		batch_offset = get_offset(ahnd, batch_handle, 4096, 0);
+	} else {
+		src_offset = 16 << 20;
+		dst_offset = ALIGN(src_offset + src_size, 1 << 20);
+		batch_offset = ALIGN(dst_offset + dst_size, 1 << 20);
+	}
+
 	src_pitch = fast_copy_pitch(src_stride, src_tiling);
 	dst_pitch = fast_copy_pitch(dst_stride, dst_tiling);
 	dword0 = fast_copy_dword0(src_tiling, dst_tiling);
@@ -964,29 +1015,36 @@ void igt_blitter_fast_copy__raw(int fd,
 	batch[i++] = dword1 | dst_pitch;
 	batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */
 	batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */
-	batch[i++] = dst_delta; /* dst address lower bits */
-	batch[i++] = 0;	/* dst address upper bits */
+	batch[i++] = dst_offset + dst_delta; /* dst address lower bits */
+	batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */
 	batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */
 	batch[i++] = src_pitch;
-	batch[i++] = src_delta; /* src address lower bits */
-	batch[i++] = 0;	/* src address upper bits */
+	batch[i++] = src_offset + src_delta; /* src address lower bits */
+	batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */
 	batch[i++] = MI_BATCH_BUFFER_END;
 	batch[i++] = MI_NOOP;
 
 	igt_assert(i == ARRAY_SIZE(batch));
 
-	batch_handle = gem_create(fd, 4096);
 	gem_write(fd, batch_handle, 0, batch, sizeof(batch));
 
-	fill_relocation(&relocs[0], dst_handle, dst_delta, 4,
+	fill_relocation(&relocs[0], dst_handle, dst_offset, dst_delta, 4,
 			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-	fill_relocation(&relocs[1], src_handle, src_delta, 8, I915_GEM_DOMAIN_RENDER, 0);
+	fill_relocation(&relocs[1], src_handle, src_offset, src_delta, 8,
+			I915_GEM_DOMAIN_RENDER, 0);
+
+	fill_object(&objs[0], dst_handle, dst_offset, NULL, 0);
+	objs[0].flags |= EXEC_OBJECT_WRITE;
+	fill_object(&objs[1], src_handle, src_offset, NULL, 0);
+	fill_object(&objs[2], batch_handle, batch_offset, relocs, !ahnd ? 2 : 0);
 
-	fill_object(&objs[0], dst_handle, NULL, 0);
-	fill_object(&objs[1], src_handle, NULL, 0);
-	fill_object(&objs[2], batch_handle, relocs, 2);
+	if (ahnd) {
+		objs[0].flags |= EXEC_OBJECT_PINNED;
+		objs[1].flags |= EXEC_OBJECT_PINNED;
+		objs[2].flags |= EXEC_OBJECT_PINNED;
+	}
 
-	exec_blit(fd, objs, 3, intel_gen(intel_get_drm_devid(fd)));
+	exec_blit(fd, objs, 3, intel_gen(intel_get_drm_devid(fd)), ctx);
 
 	gem_close(fd, batch_handle);
 }
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index bd417e99..0839d761 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -271,12 +271,15 @@ unsigned int igt_buf_intel_ccs_height(unsigned int gen,
 				      const struct igt_buf *buf);
 
 void igt_blitter_src_copy(int fd,
+			  uint64_t ahnd,
+			  uint32_t ctx,
 			  /* src */
 			  uint32_t src_handle,
 			  uint32_t src_delta,
 			  uint32_t src_stride,
 			  uint32_t src_tiling,
 			  uint32_t src_x, uint32_t src_y,
+			  uint64_t src_size,
 
 			  /* size */
 			  uint32_t width, uint32_t height,
@@ -289,7 +292,8 @@ void igt_blitter_src_copy(int fd,
 			  uint32_t dst_delta,
 			  uint32_t dst_stride,
 			  uint32_t dst_tiling,
-			  uint32_t dst_x, uint32_t dst_y);
+			  uint32_t dst_x, uint32_t dst_y,
+			  uint64_t dst_size);
 
 void igt_blitter_fast_copy(struct intel_batchbuffer *batch,
 			   const struct igt_buf *src, unsigned src_delta,
@@ -300,12 +304,15 @@ void igt_blitter_fast_copy(struct intel_batchbuffer *batch,
 			   unsigned dst_x, unsigned dst_y);
 
 void igt_blitter_fast_copy__raw(int fd,
+				uint64_t ahnd,
+				uint32_t ctx,
 				/* src */
 				uint32_t src_handle,
 				unsigned int src_delta,
 				unsigned int src_stride,
 				unsigned int src_tiling,
 				unsigned int src_x, unsigned src_y,
+				uint64_t src_size,
 
 				/* size */
 				unsigned int width, unsigned int height,
@@ -318,7 +325,8 @@ void igt_blitter_fast_copy__raw(int fd,
 				unsigned int dst_delta,
 				unsigned int dst_stride,
 				unsigned int dst_tiling,
-				unsigned int dst_x, unsigned dst_y);
+				unsigned int dst_x, unsigned dst_y,
+				uint64_t dst_size);
 
 /**
  * igt_render_copyfunc_t:
@@ -671,10 +679,12 @@ void intel_bb_copy_intel_buf(struct intel_bb *ibb,
 /**
  * igt_huc_copyfunc_t:
  * @fd: drm fd
+ * @ahnd: allocator handle, if it is equal to 0 we use relocations
  * @obj: drm_i915_gem_exec_object2 buffer array
  *       obj[0] is source buffer
  *       obj[1] is destination buffer
  *       obj[2] is execution buffer
+ * @objsize: corresponding buffer sizes to @obj
  *
  * This is the type of the per-platform huc copy functions.
  *
@@ -682,8 +692,8 @@ void intel_bb_copy_intel_buf(struct intel_bb *ibb,
  * invoke the HuC Copy kernel to copy 4K bytes from the source buffer
  * to the destination buffer.
  */
-typedef void (*igt_huc_copyfunc_t)(int fd,
-		struct drm_i915_gem_exec_object2 *obj);
+typedef void (*igt_huc_copyfunc_t)(int fd, uint64_t ahnd,
+		struct drm_i915_gem_exec_object2 *obj, uint64_t *objsize);
 
 igt_huc_copyfunc_t	igt_get_huc_copyfunc(int devid);
 #endif
diff --git a/tests/i915/gem_bad_reloc.c b/tests/i915/gem_bad_reloc.c
index 3ca0f345..0e9c4c79 100644
--- a/tests/i915/gem_bad_reloc.c
+++ b/tests/i915/gem_bad_reloc.c
@@ -195,6 +195,7 @@ igt_main
 		/* Check if relocations supported by platform */
 		igt_require(gem_has_relocations(fd));
 		gem_require_blitter(fd);
+		igt_require(gem_has_relocations(fd));
 	}
 
 	igt_subtest("negative-reloc")
diff --git a/tests/i915/gem_busy.c b/tests/i915/gem_busy.c
index f0fca0e8..51ec5ad0 100644
--- a/tests/i915/gem_busy.c
+++ b/tests/i915/gem_busy.c
@@ -108,6 +108,7 @@ static void semaphore(int fd, const intel_ctx_t *ctx,
 	uint32_t handle[3];
 	uint32_t read, write;
 	uint32_t active;
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 	unsigned i;
 
 	handle[TEST] = gem_create(fd, 4096);
@@ -117,6 +118,7 @@ static void semaphore(int fd, const intel_ctx_t *ctx,
 	/* Create a long running batch which we can use to hog the GPU */
 	handle[BUSY] = gem_create(fd, 4096);
 	spin = igt_spin_new(fd,
+			    .ahnd = ahnd,
 			    .ctx = ctx,
 			    .engine = e->flags,
 			    .dependency = handle[BUSY]);
@@ -171,8 +173,10 @@ static void one(int fd, const intel_ctx_t *ctx,
 	struct timespec tv;
 	igt_spin_t *spin;
 	int timeout;
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 
 	spin = igt_spin_new(fd,
+			    .ahnd = ahnd,
 			    .ctx = ctx,
 			    .engine = e->flags,
 			    .dependency = scratch,
@@ -225,6 +229,7 @@ static void one(int fd, const intel_ctx_t *ctx,
 
 	igt_spin_free(fd, spin);
 	gem_close(fd, scratch);
+	put_ahnd(ahnd);
 }
 
 static void xchg_u32(void *array, unsigned i, unsigned j)
@@ -298,11 +303,13 @@ static void close_race(int fd, const intel_ctx_t *ctx)
 		struct sched_param rt = {.sched_priority = 99 };
 		igt_spin_t *spin[nhandles];
 		unsigned long count = 0;
+		uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 
 		igt_assert(sched_setscheduler(getpid(), SCHED_RR, &rt) == 0);
 
 		for (i = 0; i < nhandles; i++) {
 			spin[i] = __igt_spin_new(fd,
+						 .ahnd = ahnd,
 						 .ctx = ctx,
 						 .engine = engines[rand() % nengine]);
 			handles[i] = spin[i]->handle;
@@ -312,6 +319,7 @@ static void close_race(int fd, const intel_ctx_t *ctx)
 			for (i = 0; i < nhandles; i++) {
 				igt_spin_free(fd, spin[i]);
 				spin[i] = __igt_spin_new(fd,
+							 .ahnd = ahnd,
 							 .ctx = ctx,
 							 .engine = engines[rand() % nengine]);
 				handles[i] = spin[i]->handle;
@@ -324,6 +332,7 @@ static void close_race(int fd, const intel_ctx_t *ctx)
 
 		for (i = 0; i < nhandles; i++)
 			igt_spin_free(fd, spin[i]);
+		put_ahnd(ahnd);
 	}
 	igt_waitchildren();
 
@@ -355,11 +364,13 @@ static bool has_semaphores(int fd)
 
 static bool has_extended_busy_ioctl(int fd)
 {
-	igt_spin_t *spin = igt_spin_new(fd, .engine = I915_EXEC_DEFAULT);
+	uint64_t ahnd = get_reloc_ahnd(fd, 0);
+	igt_spin_t *spin = igt_spin_new(fd, .ahnd = ahnd, .engine = I915_EXEC_DEFAULT);
 	uint32_t read, write;
 
 	__gem_busy(fd, spin->handle, &read, &write);
 	igt_spin_free(fd, spin);
+	put_ahnd(ahnd);
 
 	return read != 0;
 }
@@ -367,8 +378,10 @@ static bool has_extended_busy_ioctl(int fd)
 static void basic(int fd, const intel_ctx_t *ctx,
 		  const struct intel_execution_engine2 *e, unsigned flags)
 {
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 	igt_spin_t *spin =
 		igt_spin_new(fd,
+			     .ahnd = ahnd,
 			     .ctx = ctx,
 			     .engine = e->flags,
 			     .flags = flags & HANG ?
@@ -394,6 +407,7 @@ static void basic(int fd, const intel_ctx_t *ctx,
 	}
 
 	igt_spin_free(fd, spin);
+	put_ahnd(ahnd);
 }
 
 static void all(int i915, const intel_ctx_t *ctx)
@@ -428,6 +442,7 @@ igt_main
 
 	igt_subtest_group {
 		igt_fixture {
+			intel_allocator_multiprocess_start();
 			igt_fork_hang_detector(fd);
 		}
 
@@ -445,6 +460,21 @@ igt_main
 			}
 		}
 
+		igt_subtest("close-race")
+			close_race(fd, ctx);
+
+		igt_fixture {
+			igt_stop_hang_detector();
+			intel_allocator_multiprocess_stop();
+		}
+	}
+
+
+	igt_subtest_group {
+		igt_fixture {
+			igt_fork_hang_detector(fd);
+		}
+
 		igt_subtest_group {
 			igt_fixture {
 				igt_require(has_extended_busy_ioctl(fd));
@@ -477,9 +507,6 @@ igt_main
 			}
 		}
 
-		igt_subtest("close-race")
-			close_race(fd, ctx);
-
 		igt_fixture {
 			igt_stop_hang_detector();
 		}
diff --git a/tests/i915/gem_create.c b/tests/i915/gem_create.c
index 1acf8ee6..45804cde 100644
--- a/tests/i915/gem_create.c
+++ b/tests/i915/gem_create.c
@@ -249,12 +249,16 @@ static void busy_create(int i915, int timeout)
 	const intel_ctx_t *ctx;
 	igt_spin_t *spin[I915_EXEC_RING_MASK + 1];
 	unsigned long count = 0;
+	uint64_t ahnd;
 
 	ctx = intel_ctx_create_all_physical(i915);
+	ahnd = get_reloc_ahnd(i915, ctx->id);
 
 	igt_fork_hang_detector(i915);
 	for_each_ctx_engine(i915, ctx, e)
-		spin[e->flags] = igt_spin_new(i915, .ctx = ctx,
+		spin[e->flags] = igt_spin_new(i915,
+					      .ahnd = ahnd,
+					      .ctx = ctx,
 					      .engine = e->flags);
 
 	igt_until_timeout(timeout) {
@@ -263,7 +267,9 @@ static void busy_create(int i915, int timeout)
 			igt_spin_t *next;
 
 			handle = gem_create(i915, 4096);
-			next = igt_spin_new(i915, .ctx = ctx,
+			next = igt_spin_new(i915,
+					    .ahnd = ahnd,
+					    .ctx = ctx,
 					    .engine = e->flags,
 					    .dependency = handle,
 					    .flags = IGT_SPIN_SOFTDEP);
@@ -277,6 +283,7 @@ static void busy_create(int i915, int timeout)
 	}
 
 	intel_ctx_destroy(i915, ctx);
+	put_ahnd(ahnd);
 
 	igt_info("Created %ld objects while busy\n", count);
 
diff --git a/tests/i915/gem_ctx_engines.c b/tests/i915/gem_ctx_engines.c
index bfa83f7e..003dd171 100644
--- a/tests/i915/gem_ctx_engines.c
+++ b/tests/i915/gem_ctx_engines.c
@@ -69,6 +69,7 @@ static void invalid_engines(int i915)
 	uint32_t handle;
 	igt_spin_t *spin;
 	void *ptr;
+	uint64_t ahnd;
 
 	param.size = 0;
 	igt_assert_eq(__set_param_fresh_context(i915, param), -EINVAL);
@@ -180,8 +181,10 @@ static void invalid_engines(int i915)
 
 	/* Test that we can't set engines after we've done an execbuf */
 	param.ctx_id = gem_context_create(i915);
-	spin = igt_spin_new(i915, .ctx_id = param.ctx_id);
+	ahnd = get_reloc_ahnd(i915, param.ctx_id);
+	spin = igt_spin_new(i915, .ahnd = ahnd, .ctx_id = param.ctx_id);
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 	igt_assert_eq(__gem_context_set_param(i915, &param), -EINVAL);
 	gem_context_destroy(i915, param.ctx_id);
 
@@ -283,14 +286,18 @@ static void execute_one(int i915)
 		for (int i = -1; i <= I915_EXEC_RING_MASK; i++) {
 			intel_ctx_cfg_t cfg = {};
 			const intel_ctx_t *ctx;
+			uint64_t ahnd;
 			igt_spin_t *spin;
 
 			cfg.num_engines = 1;
 			cfg.engines[0].engine_class = e->class;
 			cfg.engines[0].engine_instance = e->instance;
 			ctx = intel_ctx_create(i915, &cfg);
+			ahnd = get_reloc_ahnd(i915, ctx->id);
 
-			spin = igt_spin_new(i915, .ctx = ctx,
+			spin = igt_spin_new(i915,
+					    .ahnd = ahnd,
+					    .ctx = ctx,
 					    .flags = (IGT_SPIN_NO_PREEMPTION |
 						      IGT_SPIN_POLL_RUN));
 
@@ -324,6 +331,7 @@ static void execute_one(int i915)
 				      i != -1 ? 1 << e->class : 0);
 
 			igt_spin_free(i915, spin);
+			put_ahnd(ahnd);
 
 			gem_sync(i915, obj.handle);
 			intel_ctx_destroy(i915, ctx);
@@ -344,9 +352,11 @@ static void execute_oneforall(int i915)
 		.size = sizeof(engines),
 	};
 	const struct intel_execution_engine2 *e;
+	uint64_t ahnd;
 
 	for_each_physical_engine(i915, e) {
 		param.ctx_id = gem_context_create(i915);
+		ahnd = get_reloc_ahnd(i915, param.ctx_id);
 
 		memset(&engines, 0, sizeof(engines));
 		for (int i = 0; i <= I915_EXEC_RING_MASK; i++) {
@@ -360,6 +370,7 @@ static void execute_oneforall(int i915)
 			igt_spin_t *spin;
 
 			spin = __igt_spin_new(i915,
+					      .ahnd = ahnd,
 					      .ctx_id = param.ctx_id,
 					      .engine = i);
 
@@ -371,6 +382,7 @@ static void execute_oneforall(int i915)
 		}
 
 		gem_context_destroy(i915, param.ctx_id);
+		put_ahnd(ahnd);
 	}
 }
 
@@ -384,6 +396,7 @@ static void execute_allforone(int i915)
 	};
 	const struct intel_execution_engine2 *e;
 	int i;
+	uint64_t ahnd = get_reloc_ahnd(i915, param.ctx_id);
 
 	i = 0;
 	memset(&engines, 0, sizeof(engines));
@@ -401,6 +414,7 @@ static void execute_allforone(int i915)
 		igt_spin_t *spin;
 
 		spin = __igt_spin_new(i915,
+				      .ahnd = ahnd,
 				      .ctx_id = param.ctx_id,
 				      .engine = i++);
 
@@ -412,6 +426,7 @@ static void execute_allforone(int i915)
 	}
 
 	gem_context_destroy(i915, param.ctx_id);
+	put_ahnd(ahnd);
 }
 
 static uint32_t read_result(int timeline, uint32_t *map, int idx)
@@ -539,6 +554,7 @@ static void independent_all(int i915, const intel_ctx_t *ctx)
 	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
 	const struct intel_execution_engine2 *e;
 	igt_spin_t *spin = NULL;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
 
 	for_each_ctx_engine(i915, ctx, e) {
 		if (spin) {
@@ -546,7 +562,9 @@ static void independent_all(int i915, const intel_ctx_t *ctx)
 			spin->execbuf.flags |= e->flags;
 			gem_execbuf(i915, &spin->execbuf);
 		} else {
-			spin = igt_spin_new(i915, .ctx = ctx,
+			spin = igt_spin_new(i915,
+					    .ahnd = ahnd,
+					    .ctx = ctx,
 					    .engine = e->flags,
 					    .flags = (IGT_SPIN_NO_PREEMPTION |
 						      IGT_SPIN_POLL_RUN));
@@ -567,6 +585,7 @@ static void independent_all(int i915, const intel_ctx_t *ctx)
 	}
 	sched_yield();
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 	igt_waitchildren();
 }
 
diff --git a/tests/i915/gem_ctx_exec.c b/tests/i915/gem_ctx_exec.c
index 4d3d1c12..5691546c 100644
--- a/tests/i915/gem_ctx_exec.c
+++ b/tests/i915/gem_ctx_exec.c
@@ -178,6 +178,7 @@ static void norecovery(int i915)
 		};
 		int expect = pass == 0 ? -EIO : 0;
 		igt_spin_t *spin;
+		uint64_t ahnd = get_reloc_ahnd(i915, param.ctx_id);
 
 		gem_context_set_param(i915, &param);
 
@@ -185,7 +186,9 @@ static void norecovery(int i915)
 		gem_context_get_param(i915, &param);
 		igt_assert_eq(param.value, pass);
 
-		spin = __igt_spin_new(i915, .ctx = ctx,
+		spin = __igt_spin_new(i915,
+				      .ahnd = ahnd,
+				      .ctx = ctx,
 				      .flags = IGT_SPIN_POLL_RUN);
 		igt_spin_busywait_until_started(spin);
 
@@ -196,6 +199,7 @@ static void norecovery(int i915)
 		igt_spin_free(i915, spin);
 
 		intel_ctx_destroy(i915, ctx);
+		put_ahnd(ahnd);
 	}
 
 	 igt_disallow_hang(i915, hang);
@@ -271,6 +275,7 @@ static void nohangcheck_hostile(int i915)
 	const intel_ctx_t *ctx;
 	int err = 0;
 	int dir;
+	uint64_t ahnd;
 
 	/*
 	 * Even if the user disables hangcheck during their context,
@@ -284,6 +289,7 @@ static void nohangcheck_hostile(int i915)
 
 	ctx = intel_ctx_create_all_physical(i915);
 	hang = igt_allow_hang(i915, ctx->id, 0);
+	ahnd = get_reloc_ahnd(i915, ctx->id);
 
 	igt_require(__enable_hangcheck(dir, false));
 
@@ -295,7 +301,9 @@ static void nohangcheck_hostile(int i915)
 		gem_engine_property_printf(i915, e->name,
 					   "preempt_timeout_ms", "%d", 50);
 
-		spin = __igt_spin_new(i915, .ctx = ctx,
+		spin = __igt_spin_new(i915,
+				      .ahnd = ahnd,
+				      .ctx = ctx,
 				      .engine = e->flags,
 				      .flags = (IGT_SPIN_NO_PREEMPTION |
 						IGT_SPIN_FENCE_OUT));
@@ -333,6 +341,7 @@ static void nohangcheck_hostile(int i915)
 
 	igt_assert_eq(sync_fence_status(fence), -EIO);
 	close(fence);
+	put_ahnd(ahnd);
 
 	close(dir);
 	close(i915);
@@ -345,10 +354,14 @@ static void close_race(int i915)
 	const intel_ctx_t **ctx;
 	uint32_t *ctx_id;
 	igt_spin_t *spin;
+	uint64_t ahnd;
 
 	/* Check we can execute a polling spinner */
 	base_ctx = intel_ctx_create(i915, NULL);
-	igt_spin_free(i915, igt_spin_new(i915, .ctx = base_ctx,
+	ahnd = get_reloc_ahnd(i915, base_ctx->id);
+	igt_spin_free(i915, igt_spin_new(i915,
+					 .ahnd = ahnd,
+					 .ctx = base_ctx,
 					 .flags = IGT_SPIN_POLL_RUN));
 
 	ctx = calloc(ncpus, sizeof(*ctx));
@@ -361,7 +374,10 @@ static void close_race(int i915)
 	}
 
 	igt_fork(child, ncpus) {
-		spin = __igt_spin_new(i915, .ctx = base_ctx,
+		ahnd = get_reloc_ahnd(i915, base_ctx->id);
+		spin = __igt_spin_new(i915,
+				      .ahnd = ahnd,
+				      .ctx = base_ctx,
 				      .flags = IGT_SPIN_POLL_RUN);
 		igt_spin_end(spin);
 		gem_sync(i915, spin->handle);
@@ -403,6 +419,7 @@ static void close_race(int i915)
 		}
 
 		igt_spin_free(i915, spin);
+		put_ahnd(ahnd);
 	}
 
 	igt_until_timeout(5) {
@@ -474,11 +491,22 @@ igt_main
 	igt_subtest("basic-nohangcheck")
 		nohangcheck_hostile(fd);
 
-	igt_subtest("basic-close-race")
-		close_race(fd);
+	igt_subtest_group {
+		igt_fixture {
+			intel_allocator_multiprocess_start();
+		}
+
+		igt_subtest("basic-close-race")
+				close_race(fd);
+
+		igt_fixture {
+			intel_allocator_multiprocess_stop();
+		}
+	}
 
 	igt_subtest("reset-pin-leak") {
 		int i;
+		uint64_t ahnd;
 
 		/*
 		 * Use an explicit context to isolate the test from
@@ -486,6 +514,7 @@ igt_main
 		 * default context (eg. if they would be eliminated).
 		 */
 		ctx_id = gem_context_create(fd);
+		ahnd = get_reloc_ahnd(fd, ctx_id);
 
 		/*
 		 * Iterate enough times that the kernel will
@@ -493,7 +522,8 @@ igt_main
 		 * the last context is leaked at every reset.
 		 */
 		for (i = 0; i < 20; i++) {
-			igt_hang_t hang = igt_hang_ring(fd, 0);
+
+			igt_hang_t hang = igt_hang_ring_with_ahnd(fd, 0, ahnd);
 
 			igt_assert_eq(exec(fd, handle, 0, 0), 0);
 			igt_assert_eq(exec(fd, handle, 0, ctx_id), 0);
@@ -501,5 +531,6 @@ igt_main
 		}
 
 		gem_context_destroy(fd, ctx_id);
+		put_ahnd(ahnd);
 	}
 }
diff --git a/tests/i915/gem_ctx_freq.c b/tests/i915/gem_ctx_freq.c
index a34472de..a29fe68b 100644
--- a/tests/i915/gem_ctx_freq.c
+++ b/tests/i915/gem_ctx_freq.c
@@ -124,6 +124,7 @@ static void sysfs_range(int i915)
 	igt_spin_t *spin;
 	double measured;
 	int pmu;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0);
 
 	/*
 	 * The sysfs interface sets the global limits and overrides the
@@ -145,7 +146,7 @@ static void sysfs_range(int i915)
 		uint32_t cur, discard;
 
 		gem_quiescent_gpu(i915);
-		spin = igt_spin_new(i915);
+		spin = igt_spin_new(i915, .ahnd = ahnd);
 		usleep(10000);
 
 		set_sysfs_freq(sys_freq, sys_freq);
@@ -164,6 +165,7 @@ static void sysfs_range(int i915)
 	gem_quiescent_gpu(i915);
 
 	close(pmu);
+	put_ahnd(ahnd);
 
 #undef N_STEPS
 }
diff --git a/tests/i915/gem_ctx_isolation.c b/tests/i915/gem_ctx_isolation.c
index 24ddde0b..c57e0507 100644
--- a/tests/i915/gem_ctx_isolation.c
+++ b/tests/i915/gem_ctx_isolation.c
@@ -277,6 +277,7 @@ static void tmpl_regs(int fd,
 }
 
 static uint32_t read_regs(int fd,
+			  uint64_t ahnd,
 			  const intel_ctx_t *ctx,
 			  const struct intel_execution_engine2 *e,
 			  unsigned int flags)
@@ -305,6 +306,12 @@ static uint32_t read_regs(int fd,
 	obj[0].handle = gem_create(fd, regs_size);
 	obj[1].handle = gem_create(fd, batch_size);
 	obj[1].relocs_ptr = to_user_pointer(reloc);
+	if (ahnd) {
+		obj[0].offset = get_offset(ahnd, obj[0].handle, regs_size, 0);
+		obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[1].offset = get_offset(ahnd, obj[1].handle, batch_size, 0);
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+	}
 
 	b = batch = gem_mmap__cpu(fd, obj[1].handle, 0, batch_size, PROT_WRITE);
 	gem_set_domain(fd, obj[1].handle,
@@ -334,14 +341,14 @@ static uint32_t read_regs(int fd,
 			reloc[n].delta = offset;
 			reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
 			reloc[n].write_domain = I915_GEM_DOMAIN_RENDER;
-			*b++ = offset;
+			*b++ = obj[0].offset + offset;
 			if (r64b)
-				*b++ = 0;
+				*b++ = (obj[0].offset + offset) >> 32;
 			n++;
 		}
 	}
 
-	obj[1].relocation_count = n;
+	obj[1].relocation_count = !ahnd ? n : 0;
 	*b++ = MI_BATCH_BUFFER_END;
 	munmap(batch, batch_size);
 
@@ -353,6 +360,7 @@ static uint32_t read_regs(int fd,
 	gem_execbuf(fd, &execbuf);
 	gem_close(fd, obj[1].handle);
 	free(reloc);
+	put_offset(ahnd, obj[1].handle);
 
 	return obj[0].handle;
 }
@@ -419,6 +427,7 @@ static void write_regs(int fd,
 }
 
 static void restore_regs(int fd,
+			 uint64_t ahnd,
 			 const intel_ctx_t *ctx,
 			 const struct intel_execution_engine2 *e,
 			 unsigned int flags,
@@ -434,6 +443,7 @@ static void restore_regs(int fd,
 	struct drm_i915_gem_relocation_entry *reloc;
 	unsigned int batch_size, n;
 	uint32_t *batch, *b;
+	uint32_t regs_size = NUM_REGS * sizeof(uint32_t);
 
 	if (gen < 7) /* no LRM */
 		return;
@@ -448,6 +458,12 @@ static void restore_regs(int fd,
 	obj[0].handle = regs;
 	obj[1].handle = gem_create(fd, batch_size);
 	obj[1].relocs_ptr = to_user_pointer(reloc);
+	if (ahnd) {
+		obj[0].offset = get_offset(ahnd, obj[0].handle, regs_size, 0);
+		obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[1].offset = get_offset(ahnd, obj[1].handle, batch_size, 0);
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+	}
 
 	b = batch = gem_mmap__cpu(fd, obj[1].handle, 0, batch_size, PROT_WRITE);
 	gem_set_domain(fd, obj[1].handle,
@@ -477,13 +493,13 @@ static void restore_regs(int fd,
 			reloc[n].delta = offset;
 			reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
 			reloc[n].write_domain = 0;
-			*b++ = offset;
+			*b++ = obj[0].offset + offset;
 			if (r64b)
-				*b++ = 0;
+				*b++ = (obj[0].offset + offset) >> 32;
 			n++;
 		}
 	}
-	obj[1].relocation_count = n;
+	obj[1].relocation_count = !ahnd ? n : 0;
 	*b++ = MI_BATCH_BUFFER_END;
 	munmap(batch, batch_size);
 
@@ -494,6 +510,7 @@ static void restore_regs(int fd,
 	execbuf.rsvd1 = ctx->id;
 	gem_execbuf(fd, &execbuf);
 	gem_close(fd, obj[1].handle);
+	put_offset(ahnd, obj[1].offset);
 }
 
 __attribute__((unused))
@@ -622,15 +639,20 @@ static void nonpriv(int fd, const intel_ctx_cfg_t *cfg,
 		igt_spin_t *spin = NULL;
 		const intel_ctx_t *ctx;
 		uint32_t regs[2], tmpl;
+		uint64_t ahnd;
 
 		ctx = intel_ctx_create(fd, cfg);
+		ahnd = get_reloc_ahnd(fd, ctx->id);
 
-		tmpl = read_regs(fd, ctx, e, flags);
-		regs[0] = read_regs(fd, ctx, e, flags);
+		tmpl = read_regs(fd, ahnd, ctx, e, flags);
+		regs[0] = read_regs(fd, ahnd, ctx, e, flags);
 
 		tmpl_regs(fd, e, tmpl, values[v]);
 
-		spin = igt_spin_new(fd, .ctx = ctx, .engine = e->flags);
+		spin = igt_spin_new(fd,
+				    .ahnd = ahnd,
+				    .ctx = ctx,
+				    .engine = e->flags);
 
 		igt_debug("%s[%d]: Setting all registers to 0x%08x\n",
 			  __func__, v, values[v]);
@@ -638,15 +660,18 @@ static void nonpriv(int fd, const intel_ctx_cfg_t *cfg,
 
 		if (flags & DIRTY2) {
 			const intel_ctx_t *sw = intel_ctx_create(fd, &ctx->cfg);
+			uint64_t ahnd_sw = get_reloc_ahnd(fd, sw->id);
 			igt_spin_t *syncpt, *dirt;
 
 			/* Explicit sync to keep the switch between write/read */
 			syncpt = igt_spin_new(fd,
+					      .ahnd = ahnd,
 					      .ctx = ctx,
 					      .engine = e->flags,
 					      .flags = IGT_SPIN_FENCE_OUT);
 
 			dirt = igt_spin_new(fd,
+					    .ahnd = ahnd_sw,
 					    .ctx = sw,
 					    .engine = e->flags,
 					    .fence = syncpt->out_fence,
@@ -655,6 +680,7 @@ static void nonpriv(int fd, const intel_ctx_cfg_t *cfg,
 			igt_spin_free(fd, syncpt);
 
 			syncpt = igt_spin_new(fd,
+					      .ahnd = ahnd,
 					      .ctx = ctx,
 					      .engine = e->flags,
 					      .fence = dirt->out_fence,
@@ -663,15 +689,16 @@ static void nonpriv(int fd, const intel_ctx_cfg_t *cfg,
 
 			igt_spin_free(fd, syncpt);
 			intel_ctx_destroy(fd, sw);
+			put_ahnd(ahnd_sw);
 		}
 
-		regs[1] = read_regs(fd, ctx, e, flags);
+		regs[1] = read_regs(fd, ahnd, ctx, e, flags);
 
 		/*
 		 * Restore the original register values before the HW idles.
 		 * Or else it may never restart!
 		 */
-		restore_regs(fd, ctx, e, flags, regs[0]);
+		restore_regs(fd, ahnd, ctx, e, flags, regs[0]);
 
 		igt_spin_free(fd, spin);
 
@@ -681,6 +708,7 @@ static void nonpriv(int fd, const intel_ctx_cfg_t *cfg,
 			gem_close(fd, regs[n]);
 		intel_ctx_destroy(fd, ctx);
 		gem_close(fd, tmpl);
+		put_ahnd(ahnd);
 	}
 }
 
@@ -706,11 +734,16 @@ static void isolation(int fd, const intel_ctx_cfg_t *cfg,
 		igt_spin_t *spin = NULL;
 		const intel_ctx_t *ctx[2];
 		uint32_t regs[2], tmp;
+		uint64_t ahnd[2];
 
 		ctx[0] = intel_ctx_create(fd, cfg);
-		regs[0] = read_regs(fd, ctx[0], e, flags);
+		ahnd[0] = get_reloc_ahnd(fd, ctx[0]->id);
+		regs[0] = read_regs(fd, ahnd[0], ctx[0], e, flags);
 
-		spin = igt_spin_new(fd, .ctx = ctx[0], .engine = e->flags);
+		spin = igt_spin_new(fd,
+				    .ahnd = ahnd[0],
+				    .ctx = ctx[0],
+				    .engine = e->flags);
 
 		if (flags & DIRTY1) {
 			igt_debug("%s[%d]: Setting all registers of ctx 0 to 0x%08x\n",
@@ -727,7 +760,8 @@ static void isolation(int fd, const intel_ctx_cfg_t *cfg,
 		 * see the corruption from the previous context instead!
 		 */
 		ctx[1] = intel_ctx_create(fd, cfg);
-		regs[1] = read_regs(fd, ctx[1], e, flags);
+		ahnd[1] = get_reloc_ahnd(fd, ctx[1]->id);
+		regs[1] = read_regs(fd, ahnd[1], ctx[1], e, flags);
 
 		if (flags & DIRTY2) {
 			igt_debug("%s[%d]: Setting all registers of ctx 1 to 0x%08x\n",
@@ -739,8 +773,8 @@ static void isolation(int fd, const intel_ctx_cfg_t *cfg,
 		 * Restore the original register values before the HW idles.
 		 * Or else it may never restart!
 		 */
-		tmp = read_regs(fd, ctx[0], e, flags);
-		restore_regs(fd, ctx[0], e, flags, regs[0]);
+		tmp = read_regs(fd, ahnd[0], ctx[0], e, flags);
+		restore_regs(fd, ahnd[0], ctx[0], e, flags, regs[0]);
 
 		igt_spin_free(fd, spin);
 
@@ -752,6 +786,7 @@ static void isolation(int fd, const intel_ctx_cfg_t *cfg,
 		for (int n = 0; n < ARRAY_SIZE(ctx); n++) {
 			gem_close(fd, regs[n]);
 			intel_ctx_destroy(fd, ctx[n]);
+			put_ahnd(ahnd[n]);
 		}
 		gem_close(fd, tmp);
 	}
@@ -781,7 +816,9 @@ static void inject_reset_context(int fd, const intel_ctx_cfg_t *cfg,
 				 const struct intel_execution_engine2 *e)
 {
 	const intel_ctx_t *ctx = create_reset_context(fd, cfg);
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 	struct igt_spin_factory opts = {
+		.ahnd = ahnd,
 		.ctx = ctx,
 		.engine = e->flags,
 		.flags = IGT_SPIN_FAST,
@@ -826,21 +863,27 @@ static void preservation(int fd, const intel_ctx_cfg_t *cfg,
 	const unsigned int num_values = ARRAY_SIZE(values);
 	const intel_ctx_t *ctx[num_values + 1];
 	uint32_t regs[num_values + 1][2];
+	uint64_t ahnd[num_values + 1];
 	igt_spin_t *spin;
 
 	gem_quiescent_gpu(fd);
 
 	ctx[num_values] = intel_ctx_create(fd, cfg);
-	spin = igt_spin_new(fd, .ctx = ctx[num_values], .engine = e->flags);
-	regs[num_values][0] = read_regs(fd, ctx[num_values], e, flags);
+	ahnd[num_values] = get_reloc_ahnd(fd, ctx[num_values]->id);
+	spin = igt_spin_new(fd,
+			    .ahnd = ahnd[num_values],
+			    .ctx = ctx[num_values],
+			    .engine = e->flags);
+	regs[num_values][0] = read_regs(fd, ahnd[num_values], ctx[num_values],
+					e, flags);
 	for (int v = 0; v < num_values; v++) {
 		ctx[v] = intel_ctx_create(fd, cfg);
+		ahnd[v] = get_reloc_ahnd(fd, ctx[v]->id);
 		write_regs(fd, ctx[v], e, flags, values[v]);
 
-		regs[v][0] = read_regs(fd, ctx[v], e, flags);
-
+		regs[v][0] = read_regs(fd, ahnd[v], ctx[v], e, flags);
 	}
-	gem_close(fd, read_regs(fd, ctx[num_values], e, flags));
+	gem_close(fd, read_regs(fd, ahnd[num_values], ctx[num_values], e, flags));
 	igt_spin_free(fd, spin);
 
 	if (flags & RESET)
@@ -871,10 +914,14 @@ static void preservation(int fd, const intel_ctx_cfg_t *cfg,
 		break;
 	}
 
-	spin = igt_spin_new(fd, .ctx = ctx[num_values], .engine = e->flags);
+	spin = igt_spin_new(fd,
+			    .ahnd = ahnd[num_values],
+			    .ctx = ctx[num_values],
+			    .engine = e->flags);
 	for (int v = 0; v < num_values; v++)
-		regs[v][1] = read_regs(fd, ctx[v], e, flags);
-	regs[num_values][1] = read_regs(fd, ctx[num_values], e, flags);
+		regs[v][1] = read_regs(fd, ahnd[v], ctx[v], e, flags);
+	regs[num_values][1] = read_regs(fd, ahnd[num_values], ctx[num_values],
+					e, flags);
 	igt_spin_free(fd, spin);
 
 	for (int v = 0; v < num_values; v++) {
@@ -886,9 +933,11 @@ static void preservation(int fd, const intel_ctx_cfg_t *cfg,
 		gem_close(fd, regs[v][0]);
 		gem_close(fd, regs[v][1]);
 		intel_ctx_destroy(fd, ctx[v]);
+		put_ahnd(ahnd[v]);
 	}
 	compare_regs(fd, e, regs[num_values][0], regs[num_values][1], "clean");
 	intel_ctx_destroy(fd, ctx[num_values]);
+	put_ahnd(ahnd[num_values]);
 }
 
 static unsigned int __has_context_isolation(int fd)
diff --git a/tests/i915/gem_ctx_param.c b/tests/i915/gem_ctx_param.c
index c795f1b4..11bc08e3 100644
--- a/tests/i915/gem_ctx_param.c
+++ b/tests/i915/gem_ctx_param.c
@@ -165,6 +165,7 @@ static void test_vm(int i915)
 	int err;
 	uint32_t parent, child;
 	igt_spin_t *spin;
+	uint64_t ahnd;
 
 	/*
 	 * Proving 2 contexts share the same GTT is quite tricky as we have no
@@ -190,7 +191,8 @@ static void test_vm(int i915)
 
 	/* Test that we can't set the VM after we've done an execbuf */
 	arg.ctx_id = gem_context_create(i915);
-	spin = igt_spin_new(i915, .ctx_id = arg.ctx_id);
+	ahnd = get_reloc_ahnd(i915, arg.ctx_id);
+	spin = igt_spin_new(i915, .ahnd = ahnd, .ctx_id = arg.ctx_id);
 	igt_spin_free(i915, spin);
 	arg.value = gem_vm_create(i915);
 	err = __gem_context_set_param(i915, &arg);
@@ -202,7 +204,7 @@ static void test_vm(int i915)
 	child = gem_context_create(i915);
 
 	/* Create a background spinner to keep the engines busy */
-	spin = igt_spin_new(i915);
+	spin = igt_spin_new(i915, .ahnd = ahnd);
 	for (int i = 0; i < 16; i++) {
 		spin->execbuf.rsvd1 = gem_context_create(i915);
 		__gem_context_set_priority(i915, spin->execbuf.rsvd1, 1023);
@@ -259,6 +261,7 @@ static void test_vm(int i915)
 	igt_spin_free(i915, spin);
 	gem_sync(i915, batch.handle);
 	gem_close(i915, batch.handle);
+	put_ahnd(ahnd);
 }
 
 static void test_set_invalid_param(int fd, uint64_t param, uint64_t value)
diff --git a/tests/i915/gem_eio.c b/tests/i915/gem_eio.c
index 76a15274..d9ff1981 100644
--- a/tests/i915/gem_eio.c
+++ b/tests/i915/gem_eio.c
@@ -174,10 +174,11 @@ static int __gem_wait(int fd, uint32_t handle, int64_t timeout)
 	return err;
 }
 
-static igt_spin_t * __spin_poll(int fd, const intel_ctx_t *ctx,
-				unsigned long flags)
+static igt_spin_t *__spin_poll(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
+			       unsigned long flags)
 {
 	struct igt_spin_factory opts = {
+		.ahnd = ahnd,
 		.ctx = ctx,
 		.engine = flags,
 		.flags = IGT_SPIN_NO_PREEMPTION | IGT_SPIN_FENCE_OUT,
@@ -206,10 +207,10 @@ static void __spin_wait(int fd, igt_spin_t *spin)
 	}
 }
 
-static igt_spin_t * spin_sync(int fd, const intel_ctx_t *ctx,
-			      unsigned long flags)
+static igt_spin_t *spin_sync(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
+			     unsigned long flags)
 {
-	igt_spin_t *spin = __spin_poll(fd, ctx, flags);
+	igt_spin_t *spin = __spin_poll(fd, ahnd, ctx, flags);
 
 	__spin_wait(fd, spin);
 
@@ -346,6 +347,7 @@ static void __test_banned(int fd)
 
 	igt_until_timeout(5) {
 		igt_spin_t *hang;
+		uint64_t ahnd;
 
 		if (__gem_execbuf(fd, &execbuf) == -EIO) {
 			uint32_t ctx = 0;
@@ -366,9 +368,11 @@ static void __test_banned(int fd)
 		}
 
 		/* Trigger a reset, making sure we are detected as guilty */
-		hang = spin_sync(fd, intel_ctx_0(fd), 0);
+		ahnd = get_reloc_ahnd(fd, 0);
+		hang = spin_sync(fd, ahnd, intel_ctx_0(fd), 0);
 		trigger_reset(fd);
 		igt_spin_free(fd, hang);
+		put_ahnd(ahnd);
 
 		count++;
 	}
@@ -428,6 +432,7 @@ static void test_banned(int fd)
 static void test_wait(int fd, unsigned int flags, unsigned int wait)
 {
 	igt_spin_t *hang;
+	uint64_t ahnd;
 
 	fd = reopen_device(fd);
 
@@ -441,12 +446,14 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 	else
 		igt_require(i915_reset_control(fd, true));
 
-	hang = spin_sync(fd, intel_ctx_0(fd), I915_EXEC_DEFAULT);
+	ahnd = get_reloc_ahnd(fd, 0);
+	hang = spin_sync(fd, ahnd, intel_ctx_0(fd), I915_EXEC_DEFAULT);
 
 	igt_debugfs_dump(fd, "i915_engine_info");
 	check_wait(fd, hang->handle, wait, NULL);
 
 	igt_spin_free(fd, hang);
+	put_ahnd(ahnd);
 
 	igt_require(i915_reset_control(fd, true));
 
@@ -490,8 +497,10 @@ static void test_inflight(int fd, unsigned int wait)
 		struct drm_i915_gem_exec_object2 obj[2];
 		struct drm_i915_gem_execbuffer2 execbuf;
 		igt_spin_t *hang;
+		uint64_t ahnd;
 
 		fd = reopen_device(parent_fd);
+		ahnd = get_reloc_ahnd(fd, 0);
 
 		memset(obj, 0, sizeof(obj));
 		obj[0].flags = EXEC_OBJECT_WRITE;
@@ -502,7 +511,7 @@ static void test_inflight(int fd, unsigned int wait)
 		igt_debug("Starting %s on engine '%s'\n", __func__, e->name);
 		igt_require(i915_reset_control(fd, false));
 
-		hang = spin_sync(fd, intel_ctx_0(fd), eb_ring(e));
+		hang = spin_sync(fd, ahnd, intel_ctx_0(fd), eb_ring(e));
 		obj[0].handle = hang->handle;
 
 		memset(&execbuf, 0, sizeof(execbuf));
@@ -524,6 +533,7 @@ static void test_inflight(int fd, unsigned int wait)
 			close(fence[n]);
 		}
 		igt_spin_free(fd, hang);
+		put_ahnd(ahnd);
 
 		igt_assert(i915_reset_control(fd, true));
 		trigger_reset(fd);
@@ -541,6 +551,7 @@ static void test_inflight_suspend(int fd)
 	int fence[64]; /* mostly conservative estimate of ring size */
 	igt_spin_t *hang;
 	int max;
+	uint64_t ahnd;
 
 	/* Do a suspend first so that we don't skip inside the test */
 	igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_DEVICES);
@@ -553,13 +564,14 @@ static void test_inflight_suspend(int fd)
 	fd = reopen_device(fd);
 	igt_require(gem_has_exec_fence(fd));
 	igt_require(i915_reset_control(fd, false));
+	ahnd = get_reloc_ahnd(fd, 0);
 
 	memset(obj, 0, sizeof(obj));
 	obj[0].flags = EXEC_OBJECT_WRITE;
 	obj[1].handle = gem_create(fd, 4096);
 	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
 
-	hang = spin_sync(fd, intel_ctx_0(fd), 0);
+	hang = spin_sync(fd, ahnd, intel_ctx_0(fd), 0);
 	obj[0].handle = hang->handle;
 
 	memset(&execbuf, 0, sizeof(execbuf));
@@ -584,6 +596,7 @@ static void test_inflight_suspend(int fd)
 		close(fence[n]);
 	}
 	igt_spin_free(fd, hang);
+	put_ahnd(ahnd);
 
 	igt_assert(i915_reset_control(fd, true));
 	trigger_reset(fd);
@@ -624,6 +637,7 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 		igt_spin_t *hang;
 		const intel_ctx_t *ctx[64];
 		int fence[64];
+		uint64_t ahnd;
 
 		fd = reopen_device(parent_fd);
 
@@ -640,7 +654,8 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 		obj[1].handle = gem_create(fd, 4096);
 		gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
 
-		hang = spin_sync(fd, intel_ctx_0(fd), eb_ring(e));
+		ahnd = get_reloc_ahnd(fd, 0);
+		hang = spin_sync(fd, ahnd, intel_ctx_0(fd), eb_ring(e));
 		obj[0].handle = hang->handle;
 
 		memset(&execbuf, 0, sizeof(execbuf));
@@ -667,6 +682,7 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 		}
 		igt_spin_free(fd, hang);
 		gem_close(fd, obj[1].handle);
+		put_ahnd(ahnd);
 
 		igt_assert(i915_reset_control(fd, true));
 		trigger_reset(fd);
@@ -685,6 +701,7 @@ static void test_inflight_external(int fd)
 	struct drm_i915_gem_exec_object2 obj;
 	igt_spin_t *hang;
 	uint32_t fence;
+	uint64_t ahnd;
 	IGT_CORK_FENCE(cork);
 
 	fd = reopen_device(fd);
@@ -694,7 +711,8 @@ static void test_inflight_external(int fd)
 	fence = igt_cork_plug(&cork, fd);
 
 	igt_require(i915_reset_control(fd, false));
-	hang = __spin_poll(fd, intel_ctx_0(fd), 0);
+	ahnd = get_reloc_ahnd(fd, 0);
+	hang = __spin_poll(fd, ahnd, intel_ctx_0(fd), 0);
 
 	memset(&obj, 0, sizeof(obj));
 	obj.handle = gem_create(fd, 4096);
@@ -725,6 +743,7 @@ static void test_inflight_external(int fd)
 	close(fence);
 
 	igt_spin_free(fd, hang);
+	put_ahnd(ahnd);
 	igt_assert(i915_reset_control(fd, true));
 	trigger_reset(fd);
 	close(fd);
@@ -738,11 +757,13 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	int fences[I915_EXEC_RING_MASK + 1];
 	unsigned nfence = 0;
 	igt_spin_t *hang;
+	uint64_t ahnd;
 
 	fd = reopen_device(fd);
 	igt_require(gem_has_exec_fence(fd));
 	igt_require(i915_reset_control(fd, false));
-	hang = spin_sync(fd, intel_ctx_0(fd), 0);
+	ahnd = get_reloc_ahnd(fd, 0);
+	hang = spin_sync(fd, ahnd, intel_ctx_0(fd), 0);
 
 	memset(obj, 0, sizeof(obj));
 	obj[0].handle = hang->handle;
@@ -771,13 +792,14 @@ static void test_inflight_internal(int fd, unsigned int wait)
 		close(fences[nfence]);
 	}
 	igt_spin_free(fd, hang);
+	put_ahnd(ahnd);
 
 	igt_assert(i915_reset_control(fd, true));
 	trigger_reset(fd);
 	close(fd);
 }
 
-static void reset_stress(int fd, const intel_ctx_t *ctx0,
+static void reset_stress(int fd, uint64_t ahnd, const intel_ctx_t *ctx0,
 			 const char *name, unsigned int engine,
 			 unsigned int flags)
 {
@@ -815,7 +837,7 @@ static void reset_stress(int fd, const intel_ctx_t *ctx0,
 		 * Start executing a spin batch with some queued batches
 		 * against a different context after it.
 		 */
-		hang = spin_sync(fd, ctx0, engine);
+		hang = spin_sync(fd, ahnd, ctx0, engine);
 
 		execbuf.rsvd1 = ctx->id;
 		for (i = 0; i < max; i++)
@@ -863,11 +885,13 @@ static void reset_stress(int fd, const intel_ctx_t *ctx0,
 static void test_reset_stress(int fd, unsigned int flags)
 {
 	const intel_ctx_t *ctx0 = context_create_safe(fd);
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx0->id);
 
 	for_each_ring(e, fd)
-		reset_stress(fd, ctx0, e->name, eb_ring(e), flags);
+		reset_stress(fd, ahnd, ctx0, e->name, eb_ring(e), flags);
 
 	intel_ctx_destroy(fd, ctx0);
+	put_ahnd(ahnd);
 }
 
 /*
@@ -928,13 +952,15 @@ static void test_kms(int i915, igt_display_t *dpy)
 	test_inflight(i915, 0);
 	if (gem_has_contexts(i915)) {
 		const intel_ctx_t *ctx = context_create_safe(i915);
+		uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
 
-		reset_stress(i915, ctx,
+		reset_stress(i915, ahnd, ctx,
 			     "default", I915_EXEC_DEFAULT, 0);
-		reset_stress(i915, ctx,
+		reset_stress(i915, ahnd, ctx,
 			     "default", I915_EXEC_DEFAULT, TEST_WEDGE);
 
 		intel_ctx_destroy(i915, ctx);
+		put_ahnd(ahnd);
 	}
 
 	*shared = 1;
diff --git a/tests/i915/gem_exec_async.c b/tests/i915/gem_exec_async.c
index a3be6b3e..41f3b752 100644
--- a/tests/i915/gem_exec_async.c
+++ b/tests/i915/gem_exec_async.c
@@ -27,8 +27,11 @@
 
 IGT_TEST_DESCRIPTION("Check that we can issue concurrent writes across the engines.");
 
-static void store_dword(int fd, const intel_ctx_t *ctx, unsigned ring,
-			uint32_t target, uint32_t offset, uint32_t value)
+#define SZ_1M (1024 * 1024)
+
+static void store_dword(int fd, int id, const intel_ctx_t *ctx,
+			 unsigned ring, uint32_t target, uint64_t target_offset,
+			 uint32_t offset, uint32_t value)
 {
 	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
 	struct drm_i915_gem_exec_object2 obj[2];
@@ -50,6 +53,15 @@ static void store_dword(int fd, const intel_ctx_t *ctx, unsigned ring,
 	obj[0].flags = EXEC_OBJECT_ASYNC;
 	obj[1].handle = gem_create(fd, 4096);
 
+	if (id) {
+		obj[0].offset = target_offset;
+		obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
+				EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		obj[1].offset = (id + 1) * SZ_1M;
+		obj[1].flags |= EXEC_OBJECT_PINNED |
+				EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+	}
+
 	memset(&reloc, 0, sizeof(reloc));
 	reloc.target_handle = obj[0].handle;
 	reloc.presumed_offset = 0;
@@ -58,13 +70,13 @@ static void store_dword(int fd, const intel_ctx_t *ctx, unsigned ring,
 	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 	obj[1].relocs_ptr = to_user_pointer(&reloc);
-	obj[1].relocation_count = 1;
+	obj[1].relocation_count = !id ? 1 : 0;
 
 	i = 0;
 	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
-		batch[++i] = offset;
-		batch[++i] = 0;
+		batch[++i] = target_offset + offset;
+		batch[++i] = (target_offset + offset) >> 32;
 	} else if (gen >= 4) {
 		batch[++i] = 0;
 		batch[++i] = offset;
@@ -89,6 +101,8 @@ static void one(int fd, const intel_ctx_t *ctx,
 	uint32_t scratch = gem_create(fd, 4096);
 	igt_spin_t *spin;
 	uint32_t *result;
+	uint64_t ahnd = get_simple_l2h_ahnd(fd, ctx->id);
+	uint64_t scratch_offset = get_offset(ahnd, scratch, 4096, 0);
 	int i;
 
 	/*
@@ -96,11 +110,16 @@ static void one(int fd, const intel_ctx_t *ctx,
 	 * the scratch for write. Then on the other rings try and
 	 * write into that target. If it blocks we hang the GPU...
 	 */
-	spin = igt_spin_new(fd, .ctx = ctx, .engine = engine,
+	spin = igt_spin_new(fd,
+			    .ahnd = ahnd,
+			    .ctx = ctx,
+			    .engine = engine,
 			    .dependency = scratch);
 
 	i = 0;
 	for_each_ctx_engine(fd, ctx, e) {
+		int id = ahnd ? (i + 1) : 0;
+
 		if (e->flags == engine)
 			continue;
 
@@ -108,10 +127,15 @@ static void one(int fd, const intel_ctx_t *ctx,
 			continue;
 
 		if (flags & FORKED) {
-			igt_fork(child, 1)
-				store_dword(fd, ctx, e->flags, scratch, 4*i, ~i);
+			igt_fork(child, 1) {
+				store_dword(fd, id, ctx, e->flags,
+					    scratch, scratch_offset,
+					    4*i, ~i);
+			}
 		} else {
-			store_dword(fd, ctx, e->flags, scratch, 4*i, ~i);
+			store_dword(fd, id, ctx, e->flags,
+				    scratch, scratch_offset,
+				    4*i, ~i);
 		}
 		i++;
 	}
@@ -124,6 +148,7 @@ static void one(int fd, const intel_ctx_t *ctx,
 
 	igt_spin_free(fd, spin);
 	gem_close(fd, scratch);
+	put_ahnd(ahnd);
 }
 
 static bool has_async_execbuf(int fd)
@@ -162,8 +187,18 @@ igt_main
 	test_each_engine("concurrent-writes", fd, ctx, e)
 		one(fd, ctx, e->flags, 0);
 
-	test_each_engine("forked-writes", fd, ctx, e)
-		one(fd, ctx, e->flags, FORKED);
+	igt_subtest_group {
+		igt_fixture {
+			intel_allocator_multiprocess_start();
+		}
+
+		test_each_engine("forked-writes", fd, ctx, e)
+			one(fd, ctx, e->flags, FORKED);
+
+		igt_fixture {
+			intel_allocator_multiprocess_stop();
+		}
+	}
 
 	igt_fixture {
 		igt_stop_hang_detector();
diff --git a/tests/i915/gem_exec_big.c b/tests/i915/gem_exec_big.c
index 1f8c720b..9ea49eec 100644
--- a/tests/i915/gem_exec_big.c
+++ b/tests/i915/gem_exec_big.c
@@ -303,6 +303,7 @@ igt_main
 	igt_fixture {
 		i915 = drm_open_driver(DRIVER_INTEL);
 		igt_require_gem(i915);
+		igt_require(gem_has_relocations(i915));
 
 		use_64bit_relocs = intel_gen(intel_get_drm_devid(i915)) >= 8;
 	}
diff --git a/tests/i915/gem_exec_capture.c b/tests/i915/gem_exec_capture.c
index f59cb09d..6e817c46 100644
--- a/tests/i915/gem_exec_capture.c
+++ b/tests/i915/gem_exec_capture.c
@@ -33,6 +33,8 @@
 
 IGT_TEST_DESCRIPTION("Check that we capture the user specified objects on a hang");
 
+#define ALIGNMENT (1 << 12)
+
 static void check_error_state(int dir, struct drm_i915_gem_exec_object2 *obj)
 {
 	char *error, *str;
@@ -53,7 +55,7 @@ static void check_error_state(int dir, struct drm_i915_gem_exec_object2 *obj)
 		addr = hi;
 		addr <<= 32;
 		addr |= lo;
-		igt_assert_eq_u64(addr, obj->offset);
+		igt_assert_eq_u64(addr, DECANONICAL(obj->offset));
 		found = true;
 	}
 
@@ -61,8 +63,8 @@ static void check_error_state(int dir, struct drm_i915_gem_exec_object2 *obj)
 	igt_assert(found);
 }
 
-static void __capture1(int fd, int dir, const intel_ctx_t *ctx,
-		       unsigned ring, uint32_t target)
+static void __capture1(int fd, int dir, uint64_t ahnd, const intel_ctx_t *ctx,
+		       unsigned ring, uint32_t target, uint64_t target_size)
 {
 	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
 	struct drm_i915_gem_exec_object2 obj[4];
@@ -74,32 +76,46 @@ static void __capture1(int fd, int dir, const intel_ctx_t *ctx,
 	struct drm_i915_gem_execbuffer2 execbuf;
 	uint32_t *batch, *seqno;
 	int i;
+	bool do_relocs = gem_has_relocations(fd);
 
 	memset(obj, 0, sizeof(obj));
 	obj[SCRATCH].handle = gem_create(fd, 4096);
+	obj[SCRATCH].flags = EXEC_OBJECT_WRITE;
 	obj[CAPTURE].handle = target;
 	obj[CAPTURE].flags = EXEC_OBJECT_CAPTURE;
 	obj[NOCAPTURE].handle = gem_create(fd, 4096);
 
 	obj[BATCH].handle = gem_create(fd, 4096);
-	obj[BATCH].relocs_ptr = (uintptr_t)reloc;
-	obj[BATCH].relocation_count = ARRAY_SIZE(reloc);
+
+	for (i = 0; i < 4; i++) {
+		obj[i].offset = intel_allocator_alloc(ahnd, obj[i].handle,
+						      i == CAPTURE ? target_size : 4096,
+						      ALIGNMENT);
+		obj[i].offset = CANONICAL(obj[i].offset);
+		obj[i].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
+				(do_relocs ? 0 : EXEC_OBJECT_PINNED);
+	}
 
 	memset(reloc, 0, sizeof(reloc));
 	reloc[0].target_handle = obj[BATCH].handle; /* recurse */
-	reloc[0].presumed_offset = 0;
+	reloc[0].presumed_offset = obj[BATCH].offset;
 	reloc[0].offset = 5*sizeof(uint32_t);
 	reloc[0].delta = 0;
 	reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
 	reloc[0].write_domain = 0;
 
 	reloc[1].target_handle = obj[SCRATCH].handle; /* breadcrumb */
-	reloc[1].presumed_offset = 0;
+	reloc[1].presumed_offset = obj[SCRATCH].offset;
 	reloc[1].offset = sizeof(uint32_t);
 	reloc[1].delta = 0;
 	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
 	reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
 
+	if (do_relocs) {
+		obj[BATCH].relocs_ptr = (uintptr_t)reloc;
+		obj[BATCH].relocation_count = ARRAY_SIZE(reloc);
+	}
+
 	seqno = gem_mmap__wc(fd, obj[SCRATCH].handle, 0, 4096, PROT_READ);
 	gem_set_domain(fd, obj[SCRATCH].handle,
 			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
@@ -111,8 +127,8 @@ static void __capture1(int fd, int dir, const intel_ctx_t *ctx,
 	i = 0;
 	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
-		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[++i] = obj[SCRATCH].offset;
+		batch[++i] = obj[SCRATCH].offset >> 32;
 	} else if (gen >= 4) {
 		batch[++i] = 0;
 		batch[++i] = 0;
@@ -128,8 +144,8 @@ static void __capture1(int fd, int dir, const intel_ctx_t *ctx,
 	batch[++i] = MI_BATCH_BUFFER_START; /* not crashed? try again! */
 	if (gen >= 8) {
 		batch[i] |= 1 << 8 | 1;
-		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[++i] = obj[BATCH].offset;
+		batch[++i] = obj[BATCH].offset >> 32;
 	} else if (gen >= 6) {
 		batch[i] |= 1 << 8;
 		batch[++i] = 0;
@@ -165,6 +181,9 @@ static void __capture1(int fd, int dir, const intel_ctx_t *ctx,
 
 	gem_sync(fd, obj[BATCH].handle);
 
+	intel_allocator_free(ahnd, obj[BATCH].handle);
+	intel_allocator_free(ahnd, obj[NOCAPTURE].handle);
+	intel_allocator_free(ahnd, obj[SCRATCH].handle);
 	gem_close(fd, obj[BATCH].handle);
 	gem_close(fd, obj[NOCAPTURE].handle);
 	gem_close(fd, obj[SCRATCH].handle);
@@ -173,10 +192,16 @@ static void __capture1(int fd, int dir, const intel_ctx_t *ctx,
 static void capture(int fd, int dir, const intel_ctx_t *ctx, unsigned ring)
 {
 	uint32_t handle;
+	uint64_t ahnd;
 
 	handle = gem_create(fd, 4096);
-	__capture1(fd, dir, ctx, ring, handle);
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
+
+	__capture1(fd, dir, ahnd, ctx, ring, handle, 4096);
+
 	gem_close(fd, handle);
+	intel_allocator_free(ahnd, handle);
+	intel_allocator_close(ahnd);
 }
 
 static int cmp(const void *A, const void *B)
@@ -195,7 +220,7 @@ static int cmp(const void *A, const void *B)
 static struct offset {
 	uint64_t addr;
 	unsigned long idx;
-} *__captureN(int fd, int dir, unsigned ring,
+} *__captureN(int fd, int dir, uint64_t ahnd, unsigned ring,
 	      unsigned int size, int count,
 	      unsigned int flags)
 #define INCREMENTAL 0x1
@@ -208,18 +233,30 @@ static struct offset {
 	uint32_t *batch, *seqno;
 	struct offset *offsets;
 	int i;
+	bool do_relocs = gem_has_relocations(fd);
 
-	offsets = calloc(count , sizeof(*offsets));
+	offsets = calloc(count, sizeof(*offsets));
 	igt_assert(offsets);
 
 	obj = calloc(count + 2, sizeof(*obj));
 	igt_assert(obj);
 
 	obj[0].handle = gem_create(fd, 4096);
+	obj[0].offset = intel_allocator_alloc(ahnd, obj[0].handle,
+					      4096, ALIGNMENT);
+	obj[0].offset = CANONICAL(obj[0].offset);
+	obj[0].flags = EXEC_OBJECT_WRITE | EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
+		       (do_relocs ? 0 : EXEC_OBJECT_PINNED);
+
 	for (i = 0; i < count; i++) {
 		obj[i + 1].handle = gem_create(fd, size);
-		obj[i + 1].flags =
-			EXEC_OBJECT_CAPTURE | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+		obj[i + 1].offset = intel_allocator_alloc(ahnd, obj[i + 1].handle,
+							  size, ALIGNMENT);
+		obj[i + 1].offset = CANONICAL(obj[i + 1].offset);
+		obj[i + 1].flags = EXEC_OBJECT_CAPTURE |
+				   EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
+				   (do_relocs ? 0 : EXEC_OBJECT_PINNED);
+
 		if (flags & INCREMENTAL) {
 			uint32_t *ptr;
 
@@ -232,23 +269,32 @@ static struct offset {
 	}
 
 	obj[count + 1].handle = gem_create(fd, 4096);
-	obj[count + 1].relocs_ptr = (uintptr_t)reloc;
-	obj[count + 1].relocation_count = ARRAY_SIZE(reloc);
+	obj[count + 1].offset = intel_allocator_alloc(ahnd, obj[count + 1].handle,
+						      4096, ALIGNMENT);
+	obj[count + 1].offset = CANONICAL(obj[count + 1].offset);
+	obj[count + 1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
+			       (do_relocs ? 0 : EXEC_OBJECT_PINNED);
 
 	memset(reloc, 0, sizeof(reloc));
 	reloc[0].target_handle = obj[count + 1].handle; /* recurse */
-	reloc[0].presumed_offset = 0;
+	reloc[0].presumed_offset = obj[count + 1].offset;
 	reloc[0].offset = 5*sizeof(uint32_t);
 	reloc[0].delta = 0;
 	reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
 	reloc[0].write_domain = 0;
 
 	reloc[1].target_handle = obj[0].handle; /* breadcrumb */
-	reloc[1].presumed_offset = 0;
+	reloc[1].presumed_offset = obj[0].offset;
 	reloc[1].offset = sizeof(uint32_t);
 	reloc[1].delta = 0;
 	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
 	reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
+	if (do_relocs) {
+		obj[count + 1].relocs_ptr = (uintptr_t)reloc;
+		obj[count + 1].relocation_count = ARRAY_SIZE(reloc);
+	} else {
+		execbuf.flags = I915_EXEC_NO_RELOC;
+	}
 
 	seqno = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_READ);
 	gem_set_domain(fd, obj[0].handle,
@@ -261,8 +307,8 @@ static struct offset {
 	i = 0;
 	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
-		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[++i] = obj[0].offset;
+		batch[++i] = obj[0].offset >> 32;
 	} else if (gen >= 4) {
 		batch[++i] = 0;
 		batch[++i] = 0;
@@ -278,8 +324,8 @@ static struct offset {
 	batch[++i] = MI_BATCH_BUFFER_START; /* not crashed? try again! */
 	if (gen >= 8) {
 		batch[i] |= 1 << 8 | 1;
-		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[++i] = obj[count + 1].offset;
+		batch[++i] = obj[count + 1].offset >> 32;
 	} else if (gen >= 6) {
 		batch[i] |= 1 << 8;
 		batch[++i] = 0;
@@ -315,12 +361,16 @@ static struct offset {
 
 	gem_close(fd, obj[count + 1].handle);
 	for (i = 0; i < count; i++) {
-		offsets[i].addr = obj[i + 1].offset;
+		offsets[i].addr = DECANONICAL(obj[i + 1].offset);
 		offsets[i].idx = i;
 		gem_close(fd, obj[i + 1].handle);
+		intel_allocator_free(ahnd, obj[i + 1].handle);
 	}
 	gem_close(fd, obj[0].handle);
 
+	intel_allocator_free(ahnd, obj[0].handle);
+	intel_allocator_free(ahnd, obj[count + 1].handle);
+
 	qsort(offsets, count, sizeof(*offsets), cmp);
 	igt_assert(offsets[0].addr <= offsets[count-1].addr);
 	return offsets;
@@ -414,7 +464,7 @@ ascii85_decode(char *in, uint32_t **out, bool inflate, char **end)
 
 static void many(int fd, int dir, uint64_t size, unsigned int flags)
 {
-	uint64_t ram, gtt;
+	uint64_t ram, gtt, ahnd;
 	unsigned long count, blobs;
 	struct offset *offsets;
 	char *error, *str;
@@ -428,8 +478,9 @@ static void many(int fd, int dir, uint64_t size, unsigned int flags)
 	igt_require(count > 1);
 
 	intel_require_memory(count, size, CHECK_RAM);
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
 
-	offsets = __captureN(fd, dir, 0, size, count, flags);
+	offsets = __captureN(fd, dir, ahnd, 0, size, count, flags);
 
 	error = igt_sysfs_get(dir, "error");
 	igt_sysfs_set(dir, "error", "Begone!");
@@ -496,6 +547,7 @@ static void many(int fd, int dir, uint64_t size, unsigned int flags)
 
 	free(error);
 	free(offsets);
+	intel_allocator_close(ahnd);
 }
 
 static void prioinv(int fd, int dir, const intel_ctx_t *ctx,
@@ -512,10 +564,16 @@ static void prioinv(int fd, int dir, const intel_ctx_t *ctx,
 		.rsvd1 = ctx->id,
 	};
 	int64_t timeout = NSEC_PER_SEC; /* 1s, feeling generous, blame debug */
-	uint64_t ram, gtt, size = 4 << 20;
+	uint64_t ram, gtt, ahnd, size = 4 << 20;
 	unsigned long count;
 	int link[2], dummy;
 
+	intel_allocator_multiprocess_start();
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
+	obj.offset = intel_allocator_alloc(ahnd, obj.handle, 4096, ALIGNMENT);
+	obj.offset = CANONICAL(obj.offset);
+	obj.flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
 	igt_require(gem_scheduler_enabled(fd));
 	igt_require(igt_params_set(fd, "reset", "%u", -1)); /* engine resets! */
 	igt_require(gem_gpu_reset_type(fd) > 1);
@@ -544,7 +602,13 @@ static void prioinv(int fd, int dir, const intel_ctx_t *ctx,
 		fd = gem_reopen_driver(fd);
 		igt_debug("Submitting large capture [%ld x %dMiB objects]\n",
 			  count, (int)(size >> 20));
-		free(__captureN(fd, dir, ring, size, count, ASYNC));
+
+		/* Reopen the allocator in the new process. */
+		ahnd = intel_allocator_open(fd, child + 1, INTEL_ALLOCATOR_SIMPLE);
+
+		free(__captureN(fd, dir, ahnd, ring, size, count, ASYNC));
+		intel_allocator_close(ahnd);
+
 		write(link[1], &fd, sizeof(fd)); /* wake the parent up */
 		igt_force_gpu_reset(fd);
 		write(link[1], &fd, sizeof(fd)); /* wake the parent up */
@@ -567,19 +631,26 @@ static void prioinv(int fd, int dir, const intel_ctx_t *ctx,
 	close(link[1]);
 
 	gem_quiescent_gpu(fd);
+	intel_allocator_free(ahnd, obj.handle);
+	intel_allocator_close(ahnd);
+	intel_allocator_multiprocess_stop();
 }
 
 static void userptr(int fd, int dir)
 {
 	uint32_t handle;
+	uint64_t ahnd;
 	void *ptr;
 
 	igt_assert(posix_memalign(&ptr, 4096, 4096) == 0);
 	igt_require(__gem_userptr(fd, ptr, 4096, 0, 0, &handle) == 0);
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
 
-	__capture1(fd, dir, intel_ctx_0(fd), 0, handle);
+	__capture1(fd, dir, ahnd, intel_ctx_0(fd), 0, handle, 4096);
 
 	gem_close(fd, handle);
+	intel_allocator_free(ahnd, handle);
+	intel_allocator_close(ahnd);
 	free(ptr);
 }
 
diff --git a/tests/i915/gem_exec_gttfill.c b/tests/i915/gem_exec_gttfill.c
index b8283eb8..f1642871 100644
--- a/tests/i915/gem_exec_gttfill.c
+++ b/tests/i915/gem_exec_gttfill.c
@@ -216,6 +216,7 @@ igt_main
 	igt_fixture {
 		i915 = drm_open_driver(DRIVER_INTEL);
 		igt_require_gem(i915);
+		igt_require(gem_has_relocations(i915));
 		ctx = intel_ctx_create_all_physical(i915);
 		igt_fork_hang_detector(i915);
 	}
diff --git a/tests/i915/gem_exec_parallel.c b/tests/i915/gem_exec_parallel.c
index 5920ac73..36bf5f74 100644
--- a/tests/i915/gem_exec_parallel.c
+++ b/tests/i915/gem_exec_parallel.c
@@ -49,6 +49,7 @@ static inline uint32_t hash32(uint32_t val)
 #define USERPTR 0x4
 
 #define NUMOBJ 16
+#define NUMTHREADS 1024
 
 struct thread {
 	pthread_t thread;
@@ -56,11 +57,13 @@ struct thread {
 	pthread_cond_t *cond;
 	unsigned flags;
 	uint32_t *scratch;
+	uint64_t *offsets;
 	unsigned id;
 	const intel_ctx_t *ctx;
 	unsigned engine;
 	uint32_t used;
 	int fd, gen, *go;
+	uint64_t ahnd;
 };
 
 static void *thread(void *data)
@@ -70,6 +73,7 @@ static void *thread(void *data)
 	struct drm_i915_gem_relocation_entry reloc;
 	struct drm_i915_gem_execbuffer2 execbuf;
 	const intel_ctx_t *tmp_ctx = NULL;
+	uint64_t offset;
 	uint32_t batch[16];
 	uint16_t used;
 	int fd, i;
@@ -112,7 +116,7 @@ static void *thread(void *data)
 	reloc.delta = 4*t->id;
 	obj[1].handle = gem_create(fd, 4096);
 	obj[1].relocs_ptr = to_user_pointer(&reloc);
-	obj[1].relocation_count = 1;
+	obj[1].relocation_count = !t->ahnd ? 1 : 0;
 	gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
 
 	memset(&execbuf, 0, sizeof(execbuf));
@@ -140,6 +144,18 @@ static void *thread(void *data)
 		if (t->flags & FDS)
 			obj[0].handle = gem_open(fd, obj[0].handle);
 
+		if (t->ahnd) {
+			offset = t->offsets[x];
+			i = 0;
+			batch[++i] = offset + 4*t->id;
+			batch[++i] = offset >> 32;
+			obj[0].offset = offset;
+			obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+			obj[1].offset = get_offset(t->ahnd, obj[1].handle, 4096, 0);
+			obj[1].flags |= EXEC_OBJECT_PINNED;
+			gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
+		}
+
 		gem_execbuf(fd, &execbuf);
 
 		if (t->flags & FDS)
@@ -158,7 +174,7 @@ static void *thread(void *data)
 
 static void check_bo(int fd, uint32_t *data, uint32_t handle, int pass, struct thread *threads)
 {
-	uint32_t x = hash32(handle * pass) % 1024;
+	uint32_t x = hash32(handle * pass) % NUMTHREADS;
 	uint32_t result;
 
 	if (!(threads[x].used & (1 << pass)))
@@ -213,6 +229,7 @@ static void all(int fd, const intel_ctx_t *ctx,
 	void *arg[NUMOBJ];
 	int go;
 	int i;
+	uint64_t ahnd = get_reloc_ahnd(fd, 0), offsets[NUMOBJ];
 
 	if (flags & CONTEXTS)
 		gem_require_contexts(fd);
@@ -238,9 +255,11 @@ static void all(int fd, const intel_ctx_t *ctx,
 		scratch[i] = handle[i] = handle_create(fd, flags, &arg[i]);
 		if (flags & FDS)
 			scratch[i] = gem_flink(fd, handle[i]);
+		offsets[i] = get_offset(ahnd, scratch[i], 4096, 0);
+
 	}
 
-	threads = calloc(1024, sizeof(struct thread));
+	threads = calloc(NUMTHREADS, sizeof(struct thread));
 	igt_assert(threads);
 
 	intel_detect_and_clear_missed_interrupts(fd);
@@ -248,7 +267,7 @@ static void all(int fd, const intel_ctx_t *ctx,
 	pthread_cond_init(&cond, 0);
 	go = 0;
 
-	for (i = 0; i < 1024; i++) {
+	for (i = 0; i < NUMTHREADS; i++) {
 		threads[i].id = i;
 		threads[i].fd = fd;
 		threads[i].gen = gen;
@@ -256,19 +275,21 @@ static void all(int fd, const intel_ctx_t *ctx,
 		threads[i].engine = engines[i % nengine];
 		threads[i].flags = flags;
 		threads[i].scratch = scratch;
+		threads[i].offsets = ahnd ? offsets : NULL;
 		threads[i].mutex = &mutex;
 		threads[i].cond = &cond;
 		threads[i].go = &go;
+		threads[i].ahnd = ahnd;
 
 		pthread_create(&threads[i].thread, 0, thread, &threads[i]);
 	}
 
 	pthread_mutex_lock(&mutex);
-	go = 1024;
+	go = NUMTHREADS;
 	pthread_cond_broadcast(&cond);
 	pthread_mutex_unlock(&mutex);
 
-	for (i = 0; i < 1024; i++)
+	for (i = 0; i < NUMTHREADS; i++)
 		pthread_join(threads[i].thread, NULL);
 
 	for (i = 0; i < NUMOBJ; i++) {
diff --git a/tests/i915/gem_exec_params.c b/tests/i915/gem_exec_params.c
index 729d38a4..ba79791a 100644
--- a/tests/i915/gem_exec_params.c
+++ b/tests/i915/gem_exec_params.c
@@ -45,6 +45,8 @@
 #include "igt_device.h"
 #include "sw_sync.h"
 
+#define ALIGNMENT (1 << 22)
+
 static bool has_exec_batch_first(int fd)
 {
 	int val = -1;
@@ -74,24 +76,45 @@ static void test_batch_first(int fd)
 	struct drm_i915_gem_exec_object2 obj[3];
 	struct drm_i915_gem_relocation_entry reloc[2];
 	uint32_t *map, value;
+	uint64_t ahnd;
+	bool do_relocs = !gem_uses_ppgtt(fd);
 	int i;
 
 	igt_require(gem_can_store_dword(fd, 0));
 	igt_require(has_exec_batch_first(fd));
 
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
+
 	memset(obj, 0, sizeof(obj));
 	memset(reloc, 0, sizeof(reloc));
 
 	obj[0].handle = gem_create(fd, 4096);
+	obj[0].offset = intel_allocator_alloc(ahnd, obj[0].handle,
+						4096, ALIGNMENT);
+	obj[0].offset = CANONICAL(obj[0].offset);
+	obj[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 	obj[1].handle = gem_create(fd, 4096);
+	obj[1].offset = intel_allocator_alloc(ahnd, obj[1].handle,
+						4096, ALIGNMENT);
+	obj[1].offset = CANONICAL(obj[1].offset);
+	obj[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 	obj[2].handle = gem_create(fd, 4096);
-
-	reloc[0].target_handle = obj[1].handle;
-	reloc[0].offset = sizeof(uint32_t);
-	reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-	reloc[0].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-	obj[0].relocs_ptr = to_user_pointer(&reloc[0]);
-	obj[0].relocation_count = 1;
+	obj[2].offset = intel_allocator_alloc(ahnd, obj[2].handle,
+						4096, ALIGNMENT);
+	obj[2].offset = CANONICAL(obj[2].offset);
+	obj[2].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+	if (do_relocs) {
+		reloc[0].target_handle = obj[1].handle;
+		reloc[0].offset = sizeof(uint32_t);
+		reloc[0].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[0].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		obj[0].relocs_ptr = to_user_pointer(&reloc[0]);
+		obj[0].relocation_count = 1;
+	} else {
+		obj[0].flags |= EXEC_OBJECT_PINNED;
+		obj[1].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+	}
 
 	i = 0;
 	map = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_WRITE);
@@ -99,26 +122,31 @@ static void test_batch_first(int fd)
 			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
 	map[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
-		map[++i] = 0;
-		map[++i] = 0;
+		map[++i] = obj[1].offset;
+		map[++i] = obj[1].offset >> 32;
 	} else if (gen >= 4) {
 		map[++i] = 0;
-		map[++i] = 0;
+		map[++i] = obj[1].offset;
 		reloc[0].offset += sizeof(uint32_t);
 	} else {
 		map[i]--;
-		map[++i] = 0;
+		map[++i] = obj[1].offset;
 	}
 	map[++i] = 1;
 	map[++i] = MI_BATCH_BUFFER_END;
 	munmap(map, 4096);
 
-	reloc[1].target_handle = obj[1].handle;
-	reloc[1].offset = sizeof(uint32_t);
-	reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-	reloc[1].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-	obj[2].relocs_ptr = to_user_pointer(&reloc[1]);
-	obj[2].relocation_count = 1;
+	if (do_relocs) {
+		reloc[1].target_handle = obj[1].handle;
+		reloc[1].offset = sizeof(uint32_t);
+		reloc[1].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc[1].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		obj[2].relocs_ptr = to_user_pointer(&reloc[1]);
+		obj[2].relocation_count = 1;
+	} else {
+		obj[1].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[2].flags |= EXEC_OBJECT_PINNED;
+	}
 
 	i = 0;
 	map = gem_mmap__cpu(fd, obj[2].handle, 0, 4096, PROT_WRITE);
@@ -126,15 +154,15 @@ static void test_batch_first(int fd)
 			I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
 	map[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
-		map[++i] = 0;
-		map[++i] = 0;
+		map[++i] = obj[1].offset;
+		map[++i] = obj[1].offset >> 32;
 	} else if (gen >= 4) {
 		map[++i] = 0;
-		map[++i] = 0;
+		map[++i] = obj[1].offset;
 		reloc[1].offset += sizeof(uint32_t);
 	} else {
 		map[i]--;
-		map[++i] = 0;
+		map[++i] = obj[1].offset;
 	}
 	map[++i] = 2;
 	map[++i] = MI_BATCH_BUFFER_END;
@@ -158,8 +186,12 @@ static void test_batch_first(int fd)
 	igt_assert_eq_u32(value, 1);
 
 	gem_close(fd, obj[2].handle);
+	intel_allocator_free(ahnd, obj[2].handle);
 	gem_close(fd, obj[1].handle);
+	intel_allocator_free(ahnd, obj[1].handle);
 	gem_close(fd, obj[0].handle);
+	intel_allocator_free(ahnd, obj[0].handle);
+	intel_allocator_close(ahnd);
 }
 
 static int has_secure_batches(const int fd)
diff --git a/tests/i915/gem_exec_store.c b/tests/i915/gem_exec_store.c
index 0798f61d..38c595e3 100644
--- a/tests/i915/gem_exec_store.c
+++ b/tests/i915/gem_exec_store.c
@@ -37,6 +37,9 @@
 
 #define ENGINE_MASK  (I915_EXEC_RING_MASK | I915_EXEC_BSD_MASK)
 
+/* Without alignment detection we assume the worst-case scenario. */
+#define ALIGNMENT (1 << 21)
+
 static void store_dword(int fd, const intel_ctx_t *ctx,
 			const struct intel_execution_engine2 *e)
 {
@@ -45,6 +48,7 @@ static void store_dword(int fd, const intel_ctx_t *ctx,
 	struct drm_i915_gem_relocation_entry reloc;
 	struct drm_i915_gem_execbuffer2 execbuf;
 	uint32_t batch[16];
+	uint64_t ahnd;
 	int i;
 
 	intel_detect_and_clear_missed_interrupts(fd);
@@ -56,43 +60,63 @@ static void store_dword(int fd, const intel_ctx_t *ctx,
 		execbuf.flags |= I915_EXEC_SECURE;
 	execbuf.rsvd1 = ctx->id;
 
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
+
 	memset(obj, 0, sizeof(obj));
 	obj[0].handle = gem_create(fd, 4096);
+	obj[0].offset = intel_allocator_alloc(ahnd, obj[0].handle,
+					      4096, ALIGNMENT);
+	obj[0].offset = CANONICAL(obj[0].offset);
+	obj[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_WRITE;
 	obj[1].handle = gem_create(fd, 4096);
+	obj[1].offset = intel_allocator_alloc(ahnd, obj[1].handle,
+					      4096, ALIGNMENT);
+	obj[1].offset = CANONICAL(obj[1].offset);
+	obj[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
 
 	memset(&reloc, 0, sizeof(reloc));
 	reloc.target_handle = obj[0].handle;
-	reloc.presumed_offset = 0;
+	reloc.presumed_offset = obj[0].offset;
 	reloc.offset = sizeof(uint32_t);
 	reloc.delta = 0;
 	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-	obj[1].relocs_ptr = to_user_pointer(&reloc);
-	obj[1].relocation_count = 1;
+
+	if (gem_has_relocations(fd)) {
+		obj[1].relocs_ptr = to_user_pointer(&reloc);
+		obj[1].relocation_count = 1;
+	} else {
+		obj[0].flags |= EXEC_OBJECT_PINNED;
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+		execbuf.flags |= I915_EXEC_NO_RELOC;
+	}
 
 	i = 0;
 	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
-		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[++i] = obj[0].offset;
+		batch[++i] = obj[0].offset >> 32;
 	} else if (gen >= 4) {
 		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[++i] = obj[0].offset;
 		reloc.offset += sizeof(uint32_t);
 	} else {
 		batch[i]--;
-		batch[++i] = 0;
+		batch[++i] = obj[0].offset;
 	}
 	batch[++i] = 0xc0ffee;
 	batch[++i] = MI_BATCH_BUFFER_END;
 	gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
 	gem_execbuf(fd, &execbuf);
 	gem_close(fd, obj[1].handle);
+	intel_allocator_free(ahnd, obj[1].handle);
 
 	gem_read(fd, obj[0].handle, 0, batch, sizeof(batch));
 	gem_close(fd, obj[0].handle);
+	intel_allocator_free(ahnd, obj[0].handle);
 	igt_assert_eq(*batch, 0xc0ffee);
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+	intel_allocator_close(ahnd);
 }
 
 #define PAGES 1
@@ -106,7 +130,9 @@ static void store_cachelines(int fd, const intel_ctx_t *ctx,
 	struct drm_i915_gem_execbuffer2 execbuf;
 #define NCACHELINES (4096/64)
 	uint32_t *batch;
+	uint64_t ahnd, reloc_value;
 	int i;
+	bool do_relocs = gem_has_relocations(fd);
 
 	reloc = calloc(NCACHELINES, sizeof(*reloc));
 	igt_assert(reloc);
@@ -119,12 +145,25 @@ static void store_cachelines(int fd, const intel_ctx_t *ctx,
 		execbuf.flags |= I915_EXEC_SECURE;
 	execbuf.rsvd1 = ctx->id;
 
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
 	obj = calloc(execbuf.buffer_count, sizeof(*obj));
 	igt_assert(obj);
-	for (i = 0; i < execbuf.buffer_count; i++)
+	for (i = 0; i < execbuf.buffer_count; i++) {
 		obj[i].handle = gem_create(fd, 4096);
-	obj[i-1].relocs_ptr = to_user_pointer(reloc);
-	obj[i-1].relocation_count = NCACHELINES;
+		obj[i].offset = intel_allocator_alloc(ahnd, obj[i].handle,
+						      4096, ALIGNMENT);
+		obj[i].offset = CANONICAL(obj[i].offset);
+		obj[i].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
+			       (do_relocs ? 0 : EXEC_OBJECT_PINNED);
+		if (i + 1 < execbuf.buffer_count)
+			obj[i].flags |= EXEC_OBJECT_WRITE;
+	}
+	if (do_relocs) {
+		obj[i-1].relocs_ptr = to_user_pointer(reloc);
+		obj[i-1].relocation_count = NCACHELINES;
+	} else {
+		execbuf.flags |= I915_EXEC_NO_RELOC;
+	}
 	execbuf.buffers_ptr = to_user_pointer(obj);
 
 	batch = gem_mmap__cpu(fd, obj[i-1].handle, 0, 4096, PROT_WRITE);
@@ -132,23 +171,24 @@ static void store_cachelines(int fd, const intel_ctx_t *ctx,
 	i = 0;
 	for (unsigned n = 0; n < NCACHELINES; n++) {
 		reloc[n].target_handle = obj[n % (execbuf.buffer_count-1)].handle;
-		reloc[n].presumed_offset = -1;
+		reloc[n].presumed_offset = obj[n % (execbuf.buffer_count-1)].offset;
 		reloc[n].offset = (i + 1)*sizeof(uint32_t);
 		reloc[n].delta = 4 * (n * 16 + n % 16);
 		reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 		reloc[n].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc_value = CANONICAL(reloc[n].presumed_offset + reloc[n].delta);
 
 		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
-			batch[++i] = 0;
-			batch[++i] = 0;
+			batch[++i] = reloc_value;
+			batch[++i] = reloc_value >> 32;
 		} else if (gen >= 4) {
 			batch[++i] = 0;
-			batch[++i] = 0;
+			batch[++i] = reloc_value;
 			reloc[n].offset += sizeof(uint32_t);
 		} else {
 			batch[i]--;
-			batch[++i] = 0;
+			batch[++i] = reloc_value;
 		}
 		batch[++i] = n | ~n << 16;
 		i++;
@@ -168,11 +208,14 @@ static void store_cachelines(int fd, const intel_ctx_t *ctx,
 	}
 	free(reloc);
 
-	for (unsigned n = 0; n < execbuf.buffer_count; n++)
+	for (unsigned n = 0; n < execbuf.buffer_count; n++) {
 		gem_close(fd, obj[n].handle);
+		intel_allocator_free(ahnd, obj[n].handle);
+	}
 	free(obj);
 
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+	intel_allocator_close(ahnd);
 }
 
 static void store_all(int fd, const intel_ctx_t *ctx)
@@ -184,10 +227,11 @@ static void store_all(int fd, const intel_ctx_t *ctx)
 	struct drm_i915_gem_execbuffer2 execbuf;
 	unsigned *engines, *permuted;
 	uint32_t batch[16];
-	uint64_t offset;
+	uint64_t offset, ahnd, reloc_value;
 	unsigned nengine;
-	int value;
+	int value, address;
 	int i, j;
+	bool do_relocs = gem_has_relocations(fd);
 
 	nengine = 0;
 	for_each_ctx_engine(fd, ctx, engine) {
@@ -213,24 +257,41 @@ static void store_all(int fd, const intel_ctx_t *ctx)
 		execbuf.flags |= I915_EXEC_SECURE;
 	execbuf.rsvd1 = ctx->id;
 
+	ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_SIMPLE);
+
 	memset(obj, 0, sizeof(obj));
 	obj[0].handle = gem_create(fd, nengine*sizeof(uint32_t));
+	obj[0].offset = intel_allocator_alloc(ahnd, obj[0].handle,
+					      nengine*sizeof(uint32_t), ALIGNMENT);
+	obj[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_WRITE;
+	obj[0].offset = CANONICAL(obj[0].offset);
 	obj[1].handle = gem_create(fd, 2*nengine*sizeof(batch));
-	obj[1].relocation_count = 1;
+	obj[1].offset = intel_allocator_alloc(ahnd, obj[1].handle,
+					      nengine*sizeof(uint32_t), ALIGNMENT);
+	obj[1].offset = CANONICAL(obj[1].offset);
+	obj[1].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+	if (do_relocs) {
+		obj[1].relocation_count = 1;
+	} else {
+		obj[0].flags |= EXEC_OBJECT_PINNED;
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+		execbuf.flags |= I915_EXEC_NO_RELOC;
+	}
 
 	offset = sizeof(uint32_t);
 	i = 0;
 	batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 	if (gen >= 8) {
-		batch[++i] = 0;
+		batch[address = ++i] = 0;
 		batch[++i] = 0;
 	} else if (gen >= 4) {
 		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[address = ++i] = 0;
 		offset += sizeof(uint32_t);
 	} else {
 		batch[i]--;
-		batch[++i] = 0;
+		batch[address = ++i] = 0;
 	}
 	batch[value = ++i] = 0xc0ffee;
 	batch[++i] = MI_BATCH_BUFFER_END;
@@ -246,12 +307,17 @@ static void store_all(int fd, const intel_ctx_t *ctx)
 
 		j = 2*nengine;
 		reloc[j].target_handle = obj[0].handle;
-		reloc[j].presumed_offset = ~0;
+		reloc[j].presumed_offset = obj[0].offset;
 		reloc[j].offset = j*sizeof(batch) + offset;
 		reloc[j].delta = nengine*sizeof(uint32_t);
 		reloc[j].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 		reloc[j].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-		obj[1].relocs_ptr = to_user_pointer(&reloc[j]);
+		reloc_value = CANONICAL(obj[0].offset + nengine*sizeof(uint32_t));
+		batch[address] = reloc_value;
+		if (gen >= 8)
+			batch[address + 1] = reloc_value >> 32;
+		if (do_relocs)
+			obj[1].relocs_ptr = to_user_pointer(&reloc[j]);
 
 		batch[value] = 0xdeadbeef;
 		gem_write(fd, obj[1].handle, j*sizeof(batch),
@@ -261,12 +327,17 @@ static void store_all(int fd, const intel_ctx_t *ctx)
 
 		j = 2*nengine + 1;
 		reloc[j].target_handle = obj[0].handle;
-		reloc[j].presumed_offset = ~0;
+		reloc[j].presumed_offset = obj[0].offset;
 		reloc[j].offset = j*sizeof(batch) + offset;
 		reloc[j].delta = nengine*sizeof(uint32_t);
 		reloc[j].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 		reloc[j].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-		obj[1].relocs_ptr = to_user_pointer(&reloc[j]);
+		reloc_value = CANONICAL(obj[0].offset + nengine*sizeof(uint32_t));
+		batch[address] = reloc_value;
+		if (gen >= 8)
+			batch[address + 1] = reloc_value >> 32;
+		if (do_relocs)
+			obj[1].relocs_ptr = to_user_pointer(&reloc[j]);
 
 		batch[value] = nengine;
 		gem_write(fd, obj[1].handle, j*sizeof(batch),
@@ -279,30 +350,37 @@ static void store_all(int fd, const intel_ctx_t *ctx)
 	gem_sync(fd, obj[1].handle);
 
 	for (i = 0; i < nengine; i++) {
-		obj[1].relocs_ptr = to_user_pointer(&reloc[2*i]);
 		execbuf.batch_start_offset = 2*i*sizeof(batch);
 		memcpy(permuted, engines, nengine*sizeof(engines[0]));
 		igt_permute_array(permuted, nengine, igt_exchange_int);
+		if (do_relocs)
+			obj[1].relocs_ptr = to_user_pointer(&reloc[2*i]);
+
 		for (j = 0; j < nengine; j++) {
 			execbuf.flags &= ~ENGINE_MASK;
 			execbuf.flags |= permuted[j];
 			gem_execbuf(fd, &execbuf);
 		}
-		obj[1].relocs_ptr = to_user_pointer(&reloc[2*i+1]);
 		execbuf.batch_start_offset = (2*i+1)*sizeof(batch);
 		execbuf.flags &= ~ENGINE_MASK;
 		execbuf.flags |= engines[i];
+		if (do_relocs)
+			obj[1].relocs_ptr = to_user_pointer(&reloc[2*i+1]);
+
 		gem_execbuf(fd, &execbuf);
 	}
 	gem_close(fd, obj[1].handle);
+	intel_allocator_free(ahnd, obj[1].handle);
 
 	gem_read(fd, obj[0].handle, 0, engines, nengine*sizeof(engines[0]));
 	gem_close(fd, obj[0].handle);
+	intel_allocator_free(ahnd, obj[0].handle);
 
 	for (i = 0; i < nengine; i++)
 		igt_assert_eq_u32(engines[i], i);
 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
 
+	intel_allocator_close(ahnd);
 	free(permuted);
 	free(engines);
 	free(reloc);
diff --git a/tests/i915/gem_exec_suspend.c b/tests/i915/gem_exec_suspend.c
index 0ef26ce1..dbe0c8a7 100644
--- a/tests/i915/gem_exec_suspend.c
+++ b/tests/i915/gem_exec_suspend.c
@@ -83,6 +83,7 @@ static void run_test(int fd, const intel_ctx_t *ctx,
 	unsigned engines[I915_EXEC_RING_MASK + 1];
 	unsigned nengine;
 	igt_spin_t *spin = NULL;
+	uint64_t ahnd = get_reloc_ahnd(fd, 0);
 
 	nengine = 0;
 	if (engine == ALL_ENGINES) {
@@ -120,27 +121,39 @@ static void run_test(int fd, const intel_ctx_t *ctx,
 	igt_require(__gem_execbuf(fd, &execbuf) == 0);
 	gem_close(fd, obj[1].handle);
 
-	memset(&reloc, 0, sizeof(reloc));
-	reloc.target_handle = obj[0].handle;
-	reloc.presumed_offset = obj[0].offset;
-	reloc.offset = sizeof(uint32_t);
-	if (gen >= 4 && gen < 8)
-		reloc.offset += sizeof(uint32_t);
-	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-
-	obj[1].relocs_ptr = to_user_pointer(&reloc);
-	obj[1].relocation_count = 1;
+	if (!ahnd) {
+		memset(&reloc, 0, sizeof(reloc));
+		reloc.target_handle = obj[0].handle;
+		reloc.presumed_offset = obj[0].offset;
+		reloc.offset = sizeof(uint32_t);
+		if (gen >= 4 && gen < 8)
+			reloc.offset += sizeof(uint32_t);
+		reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+
+		obj[1].relocs_ptr = to_user_pointer(&reloc);
+		obj[1].relocation_count = 1;
+	} else {
+		/* ignore first execbuf offset */
+		obj[0].offset = get_offset(ahnd, obj[0].handle, 4096, 0);
+		obj[0].flags |= EXEC_OBJECT_PINNED;
+	}
 
 	for (int i = 0; i < 1024; i++) {
 		uint64_t offset;
 		uint32_t buf[16];
 		int b;
 
-		obj[1].handle = gem_create(fd, 4096);
-
 		reloc.delta = i * sizeof(uint32_t);
-		offset = reloc.presumed_offset + reloc.delta;
+
+		obj[1].handle = gem_create(fd, 4096);
+		if (ahnd) {
+			obj[1].offset = get_offset(ahnd, obj[1].handle, 4096, 0);
+			obj[1].flags |= EXEC_OBJECT_PINNED;
+			offset = obj[0].offset + reloc.delta;
+		} else {
+			offset = reloc.presumed_offset + reloc.delta;
+		}
 
 		b = 0;
 		buf[b] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
@@ -165,7 +178,7 @@ static void run_test(int fd, const intel_ctx_t *ctx,
 	}
 
 	if (flags & HANG)
-		spin = igt_spin_new(fd, .engine = engine);
+		spin = igt_spin_new(fd, .ahnd = ahnd, .engine = engine);
 
 	switch (mode(flags)) {
 	case NOSLEEP:
@@ -201,6 +214,7 @@ static void run_test(int fd, const intel_ctx_t *ctx,
 
 	check_bo(fd, obj[0].handle);
 	gem_close(fd, obj[0].handle);
+	put_ahnd(ahnd);
 
 	gem_quiescent_gpu(fd);
 
diff --git a/tests/i915/gem_huc_copy.c b/tests/i915/gem_huc_copy.c
index 9a32893e..ea32b705 100644
--- a/tests/i915/gem_huc_copy.c
+++ b/tests/i915/gem_huc_copy.c
@@ -89,6 +89,7 @@ igt_main
 	int drm_fd = -1;
 	uint32_t devid;
 	igt_huc_copyfunc_t huc_copy;
+	uint64_t ahnd;
 
 	igt_fixture {
 		drm_fd = drm_open_driver(DRIVER_INTEL);
@@ -97,6 +98,8 @@ igt_main
 		huc_copy = igt_get_huc_copyfunc(devid);
 
 		igt_require_f(huc_copy, "no huc_copy function\n");
+
+		ahnd = get_reloc_ahnd(drm_fd, 0);
 	}
 
 	igt_describe("Make sure that Huc firmware works"
@@ -106,6 +109,9 @@ igt_main
 	igt_subtest("huc-copy") {
 		char inputs[HUC_COPY_DATA_BUF_SIZE];
 		struct drm_i915_gem_exec_object2 obj[3];
+		uint64_t objsize[3] = { HUC_COPY_DATA_BUF_SIZE,
+					HUC_COPY_DATA_BUF_SIZE,
+					4096 };
 
 		test_huc_load(drm_fd);
 		/* Initialize src buffer randomly */
@@ -123,7 +129,7 @@ igt_main
 
 		gem_write(drm_fd, obj[0].handle, 0, inputs, HUC_COPY_DATA_BUF_SIZE);
 
-		huc_copy(drm_fd, obj);
+		huc_copy(drm_fd, ahnd, obj, objsize);
 		compare_huc_copy_result(drm_fd, obj[0].handle, obj[1].handle);
 
 		gem_close(drm_fd, obj[0].handle);
@@ -131,6 +137,8 @@ igt_main
 		gem_close(drm_fd, obj[2].handle);
 	}
 
-	igt_fixture
+	igt_fixture {
+		put_ahnd(ahnd);
 		close(drm_fd);
+	}
 }
diff --git a/tests/i915/gem_mmap.c b/tests/i915/gem_mmap.c
index a77c0ad6..61c9c5c1 100644
--- a/tests/i915/gem_mmap.c
+++ b/tests/i915/gem_mmap.c
@@ -123,8 +123,9 @@ test_pf_nonblock(int i915)
 {
 	igt_spin_t *spin;
 	uint32_t *ptr;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0);
 
-	spin = igt_spin_new(i915);
+	spin = igt_spin_new(i915, .ahnd = ahnd);
 
 	igt_set_timeout(1, "initial pagefaulting did not complete within 1s");
 
@@ -135,6 +136,7 @@ test_pf_nonblock(int i915)
 	igt_reset_timeout();
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 }
 
 static int mmap_ioctl(int i915, struct drm_i915_gem_mmap *arg)
diff --git a/tests/i915/gem_mmap_gtt.c b/tests/i915/gem_mmap_gtt.c
index 60282699..92bbb5d2 100644
--- a/tests/i915/gem_mmap_gtt.c
+++ b/tests/i915/gem_mmap_gtt.c
@@ -335,10 +335,12 @@ test_pf_nonblock(int i915)
 {
 	igt_spin_t *spin;
 	uint32_t *ptr;
+	uint64_t ahnd;
 
 	igt_require(mmap_gtt_version(i915) >= 3);
 
-	spin = igt_spin_new(i915);
+	ahnd = get_reloc_ahnd(i915, 0);
+	spin = igt_spin_new(i915, .ahnd = ahnd);
 
 	igt_set_timeout(1, "initial pagefaulting did not complete within 1s");
 
@@ -349,6 +351,7 @@ test_pf_nonblock(int i915)
 	igt_reset_timeout();
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 }
 
 static void
@@ -741,11 +744,13 @@ test_hang_busy(int i915)
 	igt_spin_t *spin;
 	igt_hang_t hang;
 	uint32_t handle;
+	uint64_t ahnd;
 
 	hang = igt_allow_hang(i915, ctx->id, 0);
 	igt_require(igt_params_set(i915, "reset", "1")); /* global */
 
-	spin = igt_spin_new(i915, .ctx = ctx,
+	ahnd = get_reloc_ahnd(i915, ctx->id);
+	spin = igt_spin_new(i915, .ctx = ctx, .ahnd = ahnd,
 			    .flags = IGT_SPIN_POLL_RUN |
 				     IGT_SPIN_FENCE_OUT |
 				     IGT_SPIN_NO_PREEMPTION);
@@ -788,6 +793,7 @@ test_hang_busy(int i915)
 	munmap(ptr, 4096);
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 	igt_disallow_hang(i915, hang);
 	intel_ctx_destroy(i915, ctx);
 }
@@ -800,11 +806,13 @@ test_hang_user(int i915)
 	igt_spin_t *spin;
 	igt_hang_t hang;
 	uint32_t handle;
+	uint64_t ahnd;
 
 	hang = igt_allow_hang(i915, ctx->id, 0);
 	igt_require(igt_params_set(i915, "reset", "1")); /* global */
 
-	spin = igt_spin_new(i915, .ctx = ctx,
+	ahnd = get_reloc_ahnd(i915, ctx->id);
+	spin = igt_spin_new(i915, .ctx = ctx, .ahnd = ahnd,
 			    .flags = IGT_SPIN_POLL_RUN |
 				     IGT_SPIN_FENCE_OUT |
 				     IGT_SPIN_NO_PREEMPTION);
@@ -843,6 +851,7 @@ test_hang_user(int i915)
 	munmap(ptr, 4096);
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 	igt_disallow_hang(i915, hang);
 	intel_ctx_destroy(i915, ctx);
 }
diff --git a/tests/i915/gem_mmap_offset.c b/tests/i915/gem_mmap_offset.c
index f1ba67b7..8148f0a2 100644
--- a/tests/i915/gem_mmap_offset.c
+++ b/tests/i915/gem_mmap_offset.c
@@ -248,7 +248,8 @@ static void isolation(int i915)
 
 static void pf_nonblock(int i915)
 {
-	igt_spin_t *spin = igt_spin_new(i915);
+	uint64_t ahnd = get_reloc_ahnd(i915, 0);
+	igt_spin_t *spin = igt_spin_new(i915, .ahnd = ahnd);
 
 	for_each_mmap_offset_type(i915, t) {
 		uint32_t *ptr;
@@ -268,6 +269,7 @@ static void pf_nonblock(int i915)
 	}
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 }
 
 static void *memchr_inv(const void *s, int c, size_t n)
diff --git a/tests/i915/gem_mmap_wc.c b/tests/i915/gem_mmap_wc.c
index abb89b8e..6dc7bae4 100644
--- a/tests/i915/gem_mmap_wc.c
+++ b/tests/i915/gem_mmap_wc.c
@@ -459,8 +459,9 @@ test_pf_nonblock(int i915)
 {
 	igt_spin_t *spin;
 	uint32_t *ptr;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0);
 
-	spin = igt_spin_new(i915);
+	spin = igt_spin_new(i915, .ahnd = ahnd);
 
 	igt_set_timeout(1, "initial pagefaulting did not complete within 1s");
 
@@ -471,6 +472,7 @@ test_pf_nonblock(int i915)
 	igt_reset_timeout();
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 }
 
 static int mmap_ioctl(int i915, struct drm_i915_gem_mmap *arg)
diff --git a/tests/i915/gem_request_retire.c b/tests/i915/gem_request_retire.c
index 3df54f2a..da9d405e 100644
--- a/tests/i915/gem_request_retire.c
+++ b/tests/i915/gem_request_retire.c
@@ -63,21 +63,29 @@ test_retire_vma_not_inactive(int fd)
 {
 	struct intel_execution_engine2 *e;
 	const intel_ctx_t *ctx;
+	uint64_t ahnd, ahndN;
 	igt_spin_t *bg = NULL;
 
 	ctx = intel_ctx_create_all_physical(fd);
+	ahnd = get_reloc_ahnd(fd, ctx->id);
 
 	for_each_ctx_engine(fd, ctx, e) {
 		igt_spin_t *spin;
 		const intel_ctx_t *spin_ctx;
 
 		if (!bg) {
-			bg = igt_spin_new(fd, .ctx = ctx, .engine = e->flags);
+			bg = igt_spin_new(fd,
+					  .ahnd = ahnd,
+					  .ctx = ctx,
+					  .engine = e->flags);
 			continue;
 		}
 
 		spin_ctx = intel_ctx_create(fd, &ctx->cfg);
-		spin = igt_spin_new(fd, .ctx = spin_ctx,
+		ahndN = get_reloc_ahnd(fd, spin_ctx->id);
+		spin = igt_spin_new(fd,
+				    .ahnd = ahndN,
+				    .ctx = spin_ctx,
 				    .engine = e->flags,
 				    .dependency = bg->handle,
 				    .flags = IGT_SPIN_SOFTDEP);
@@ -86,11 +94,13 @@ test_retire_vma_not_inactive(int fd)
 
 		gem_sync(fd, spin->handle);
 		igt_spin_free(fd, spin);
+		put_ahnd(ahndN);
 	}
 
 	igt_drop_caches_set(fd, DROP_RETIRE);
 	igt_spin_free(fd, bg);
 	intel_ctx_destroy(fd, ctx);
+	put_ahnd(ahnd);
 }
 
 int fd;
diff --git a/tests/i915/gem_ringfill.c b/tests/i915/gem_ringfill.c
index d32d4799..1e2c8d2d 100644
--- a/tests/i915/gem_ringfill.c
+++ b/tests/i915/gem_ringfill.c
@@ -94,6 +94,7 @@ static void fill_ring(int fd,
 	}
 }
 
+#define NUMSTORES 1024
 static void setup_execbuf(int fd, const intel_ctx_t *ctx,
 			  struct drm_i915_gem_execbuffer2 *execbuf,
 			  struct drm_i915_gem_exec_object2 *obj,
@@ -104,10 +105,11 @@ static void setup_execbuf(int fd, const intel_ctx_t *ctx,
 	const uint32_t bbe = MI_BATCH_BUFFER_END;
 	uint32_t *batch, *b;
 	int i;
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 
 	memset(execbuf, 0, sizeof(*execbuf));
 	memset(obj, 0, 2*sizeof(*obj));
-	memset(reloc, 0, 1024*sizeof(*reloc));
+	memset(reloc, 0, NUMSTORES * sizeof(*reloc));
 
 	execbuf->buffers_ptr = to_user_pointer(obj);
 	execbuf->flags = ring | (1 << 11) | (1 << 12);
@@ -118,23 +120,33 @@ static void setup_execbuf(int fd, const intel_ctx_t *ctx,
 	execbuf->rsvd1 = ctx->id;
 
 	obj[0].handle = gem_create(fd, 4096);
+	if (ahnd) {
+		obj[0].offset = get_offset(ahnd, obj[0].handle, 4096, 0);
+		obj[0].flags |= EXEC_OBJECT_PINNED;
+	}
+
 	gem_write(fd, obj[0].handle, 0, &bbe, sizeof(bbe));
 	execbuf->buffer_count = 1;
 	gem_execbuf(fd, execbuf);
 
 	obj[0].flags |= EXEC_OBJECT_WRITE;
-	obj[1].handle = gem_create(fd, 1024*16 + 4096);
-
+	obj[1].handle = gem_create(fd, NUMSTORES * 16 + 4096);
 	obj[1].relocs_ptr = to_user_pointer(reloc);
-	obj[1].relocation_count = 1024;
+	obj[1].relocation_count = !ahnd ? NUMSTORES : 0;
+
+	if (ahnd) {
+		obj[1].offset = get_offset(ahnd, obj[1].handle,
+				NUMSTORES * 16 + 4096, 0);
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+	}
 
-	batch = gem_mmap__cpu(fd, obj[1].handle, 0, 16*1024 + 4096,
+	batch = gem_mmap__cpu(fd, obj[1].handle, 0, NUMSTORES * 16 + 4096,
 			      PROT_WRITE | PROT_READ);
 	gem_set_domain(fd, obj[1].handle,
 		       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
 
 	b = batch;
-	for (i = 0; i < 1024; i++) {
+	for (i = 0; i < NUMSTORES; i++) {
 		uint64_t offset;
 
 		reloc[i].presumed_offset = obj[0].offset;
@@ -162,10 +174,11 @@ static void setup_execbuf(int fd, const intel_ctx_t *ctx,
 		*b++ = i;
 	}
 	*b++ = MI_BATCH_BUFFER_END;
-	munmap(batch, 16*1024+4096);
+	munmap(batch, NUMSTORES * 16 + 4096);
 
 	execbuf->buffer_count = 2;
 	gem_execbuf(fd, execbuf);
+	put_ahnd(ahnd);
 
 	check_bo(fd, obj[0].handle);
 }
@@ -177,6 +190,7 @@ static void run_test(int fd, const intel_ctx_t *ctx, unsigned ring,
 	struct drm_i915_gem_relocation_entry reloc[1024];
 	struct drm_i915_gem_execbuffer2 execbuf;
 	igt_hang_t hang;
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 
 	if (flags & (SUSPEND | HIBERNATE)) {
 		run_test(fd, ctx, ring, 0, 0);
@@ -187,7 +201,7 @@ static void run_test(int fd, const intel_ctx_t *ctx, unsigned ring,
 
 	memset(&hang, 0, sizeof(hang));
 	if (flags & HANG)
-		hang = igt_hang_ctx(fd, ctx->id, ring & ~(3<<13), 0);
+		hang = igt_hang_ring_with_ahnd(fd, ring & ~(3<<13), ahnd);
 
 	if (flags & (CHILD | FORKED | BOMB)) {
 		int nchild;
@@ -321,11 +335,13 @@ igt_main
 			for_each_ring(e, fd) {
 				igt_dynamic_f("%s", e->name) {
 					igt_require(gem_can_store_dword(fd, eb_ring(e)));
+					intel_allocator_multiprocess_start();
 					run_test(fd, intel_ctx_0(fd),
 						 eb_ring(e),
 						 m->flags,
 						 m->timeout);
 					gem_quiescent_gpu(fd);
+					intel_allocator_multiprocess_stop();
 				}
 			}
 		}
@@ -342,11 +358,13 @@ igt_main
 					continue;
 
 				igt_dynamic_f("%s", e->name) {
+					intel_allocator_multiprocess_start();
 					run_test(fd, ctx,
 						 e->flags,
 						 m->flags,
 						 m->timeout);
 					gem_quiescent_gpu(fd);
+					intel_allocator_multiprocess_stop();
 				}
 			}
 		}
@@ -354,6 +372,7 @@ igt_main
 
 	igt_subtest("basic-all") {
 		const struct intel_execution_engine2 *e;
+		intel_allocator_multiprocess_start();
 
 		for_each_ctx_engine(fd, ctx, e) {
 			if (!gem_class_can_store_dword(fd, e->class))
@@ -364,6 +383,7 @@ igt_main
 		}
 
 		igt_waitchildren();
+		intel_allocator_multiprocess_stop();
 	}
 
 	igt_fixture {
diff --git a/tests/i915/gem_softpin.c b/tests/i915/gem_softpin.c
index bdb04821..82d8a286 100644
--- a/tests/i915/gem_softpin.c
+++ b/tests/i915/gem_softpin.c
@@ -29,6 +29,7 @@
 #include "i915/gem.h"
 #include "i915/gem_create.h"
 #include "igt.h"
+#include "igt_rand.h"
 #include "intel_allocator.h"
 
 #define EXEC_OBJECT_PINNED	(1<<4)
@@ -877,9 +878,209 @@ static void test_allocator_fork(int fd)
 	intel_allocator_multiprocess_stop();
 }
 
+#define BATCH_SIZE (4096<<10)
+/* We don't have alignment detection yet, so assume the worst-case scenario. */
+#define BATCH_ALIGNMENT (1 << 21)
+
+struct batch {
+	uint32_t handle;
+	void *ptr;
+};
+
+static void xchg_batch(void *array, unsigned int i, unsigned int j)
+{
+	struct batch *batches = array;
+	struct batch tmp;
+
+	tmp = batches[i];
+	batches[i] = batches[j];
+	batches[j] = tmp;
+}
+
+static void submit(int fd, int gen,
+		   struct drm_i915_gem_execbuffer2 *eb,
+		   struct batch *batches, unsigned int count,
+		   uint64_t ahnd)
+{
+	struct drm_i915_gem_exec_object2 obj;
+	uint32_t batch[16];
+	uint64_t address;
+	unsigned n;
+
+	memset(&obj, 0, sizeof(obj));
+	obj.flags = EXEC_OBJECT_PINNED;
+
+	for (unsigned i = 0; i < count; i++) {
+		obj.handle = batches[i].handle;
+		obj.offset = intel_allocator_alloc(ahnd, obj.handle,
+						   BATCH_SIZE,
+						   BATCH_ALIGNMENT);
+		address = obj.offset + BATCH_SIZE - eb->batch_start_offset - 8;
+		n = 0;
+		batch[n] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			batch[n] |= 1 << 21;
+			batch[n]++;
+			batch[++n] = address;
+			batch[++n] = address >> 32;
+		} else if (gen >= 4) {
+			batch[++n] = 0;
+			batch[++n] = address;
+		} else {
+			batch[n]--;
+			batch[++n] = address;
+		}
+		batch[++n] = obj.offset; /* lower_32_bits(value) */
+		batch[++n] = obj.offset >> 32; /* upper_32_bits(value) / nop */
+		batch[++n] = MI_BATCH_BUFFER_END;
+		eb->buffers_ptr = to_user_pointer(&obj);
+
+		memcpy(batches[i].ptr + eb->batch_start_offset,
+		       batch, sizeof(batch));
+
+		gem_execbuf(fd, eb);
+	}
+	/* As we have been lying about the write_domain, we need to do a sync */
+	gem_sync(fd, obj.handle);
+}
+
+static void test_allocator_evict(int fd, const intel_ctx_t *ctx,
+				 unsigned ring, int timeout)
+{
+	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
+	struct drm_i915_gem_execbuffer2 execbuf;
+	unsigned engines[I915_EXEC_RING_MASK + 1];
+	volatile uint64_t *shared;
+	struct timespec tv = {};
+	struct batch *batches;
+	unsigned nengine;
+	unsigned count;
+	uint64_t size, ahnd;
+
+	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(shared != MAP_FAILED);
+
+	nengine = 0;
+	if (ring == ALL_ENGINES) {
+		struct intel_execution_engine2 *e;
+
+		for_each_ctx_engine(fd, ctx, e) {
+			if (!gem_class_can_store_dword(fd, e->class))
+				continue;
+
+			engines[nengine++] = e->flags;
+		}
+	} else {
+		engines[nengine++] = ring;
+	}
+	igt_require(nengine);
+	igt_assert(nengine * 64 <= BATCH_SIZE);
+
+	size = gem_aperture_size(fd);
+	if (!gem_uses_full_ppgtt(fd))
+		size /= 2;
+	if (size > 1ull<<32) /* Limit to 4GiB as we do not use allow-48b */
+		size = 1ull << 32;
+	igt_require(size < (1ull<<32) * BATCH_SIZE);
+
+	count = size / BATCH_SIZE + 1;
+	igt_debug("Using %'d batches to fill %'llu aperture on %d engines\n",
+		  count, (long long)size, nengine);
+
+	intel_allocator_multiprocess_start();
+	/* Avoid allocating on the last page */
+	ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+					 INTEL_ALLOCATOR_RELOC,
+					 ALLOC_STRATEGY_HIGH_TO_LOW);
+
+	intel_require_memory(count, BATCH_SIZE, CHECK_RAM);
+	intel_detect_and_clear_missed_interrupts(fd);
+
+	igt_nsec_elapsed(&tv);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffer_count = 1;
+	if (gen < 6)
+		execbuf.flags |= I915_EXEC_SECURE;
+
+	batches = calloc(count, sizeof(*batches));
+	igt_assert(batches);
+	for (unsigned i = 0; i < count; i++) {
+		batches[i].handle = gem_create(fd, BATCH_SIZE);
+		batches[i].ptr =
+			gem_mmap__device_coherent(fd, batches[i].handle,
+						  0, BATCH_SIZE, PROT_WRITE);
+	}
+
+	/* Flush all memory before we start the timer */
+	submit(fd, gen, &execbuf, batches, count, ahnd);
+
+	igt_info("Setup %u batches in %.2fms\n",
+		 count, 1e-6 * igt_nsec_elapsed(&tv));
+
+	igt_fork(child, nengine) {
+		uint64_t dst, src, dst_offset, src_offset;
+		uint64_t cycles = 0;
+
+		hars_petruska_f54_1_random_perturb(child);
+		igt_permute_array(batches, count, xchg_batch);
+		execbuf.batch_start_offset = child * 64;
+		execbuf.flags |= engines[child];
+
+		dst_offset = BATCH_SIZE - child*64 - 8;
+		if (gen >= 8)
+			src_offset = child*64 + 3*sizeof(uint32_t);
+		else if (gen >= 4)
+			src_offset = child*64 + 4*sizeof(uint32_t);
+		else
+			src_offset = child*64 + 2*sizeof(uint32_t);
+
+		/* We need to open the allocator again in the new process */
+		ahnd = intel_allocator_open_full(fd, 0, 0, size - 4096,
+						 INTEL_ALLOCATOR_RELOC,
+						 ALLOC_STRATEGY_HIGH_TO_LOW);
+
+		igt_until_timeout(timeout) {
+			submit(fd, gen, &execbuf, batches, count, ahnd);
+			for (unsigned i = 0; i < count; i++) {
+				dst = *(uint64_t *)(batches[i].ptr + dst_offset);
+				src = *(uint64_t *)(batches[i].ptr + src_offset);
+				igt_assert_eq_u64(dst, src);
+			}
+			cycles++;
+		}
+		shared[child] = cycles;
+		igt_info("engine[%d]: %llu cycles\n", child, (long long)cycles);
+		intel_allocator_close(ahnd);
+	}
+	igt_waitchildren();
+
+	intel_allocator_close(ahnd);
+	intel_allocator_multiprocess_stop();
+
+	for (unsigned i = 0; i < count; i++) {
+		munmap(batches[i].ptr, BATCH_SIZE);
+		gem_close(fd, batches[i].handle);
+	}
+	free(batches);
+
+	shared[nengine] = 0;
+	for (unsigned i = 0; i < nengine; i++)
+		shared[nengine] += shared[i];
+	igt_info("Total: %llu cycles\n", (long long)shared[nengine]);
+
+	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
+}
+
+#define test_each_engine(T, i915, ctx, e) \
+	igt_subtest_with_dynamic(T) for_each_ctx_engine(i915, ctx, e) \
+		igt_dynamic_f("%s", e->name)
+
 igt_main
 {
+	const struct intel_execution_engine2 *e;
 	int fd = -1;
+	const intel_ctx_t *ctx;
 
 	igt_fixture {
 		fd = drm_open_driver_master(DRIVER_INTEL);
@@ -887,6 +1088,8 @@ igt_main
 		gem_require_blitter(fd);
 		igt_require(gem_has_softpin(fd));
 		igt_require(gem_can_store_dword(fd, 0));
+
+		ctx = intel_ctx_create_all_physical(fd);
 	}
 
 	igt_subtest("invalid")
@@ -922,6 +1125,12 @@ igt_main
 
 		igt_subtest("allocator-fork")
 			test_allocator_fork(fd);
+
+		test_each_engine("allocator-evict", fd, ctx, e)
+			test_allocator_evict(fd, ctx, e->flags, 20);
+
+		igt_subtest("allocator-evict-all-engines")
+			test_allocator_evict(fd, ctx, ALL_ENGINES, 20);
 	}
 
 	igt_subtest("softpin")
@@ -949,6 +1158,8 @@ igt_main
 	igt_subtest("evict-hang")
 		test_evict_hang(fd);
 
-	igt_fixture
+	igt_fixture {
+		intel_ctx_destroy(fd, ctx);
 		close(fd);
+	}
 }
diff --git a/tests/i915/gem_spin_batch.c b/tests/i915/gem_spin_batch.c
index 4a9d6c2d..653812c7 100644
--- a/tests/i915/gem_spin_batch.c
+++ b/tests/i915/gem_spin_batch.c
@@ -45,13 +45,14 @@ static void spin(int fd, const intel_ctx_t *ctx_id,
 	struct timespec tv = { };
 	struct timespec itv = { };
 	uint64_t elapsed;
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx_id->id);
 
-	spin = __igt_spin_new(fd, .ctx = ctx_id, .engine = engine,
-			      .flags = flags);
+	spin = __igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx_id,
+			      .engine = engine, .flags = flags);
 	while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout_sec) {
 		igt_spin_t *next =
-			__igt_spin_new(fd, .ctx = ctx_id, .engine = engine,
-				       .flags = flags);
+			__igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx_id,
+				       .engine = engine, .flags = flags);
 
 		igt_spin_set_timeout(spin,
 				     timeout_100ms - igt_nsec_elapsed(&itv));
@@ -67,6 +68,7 @@ static void spin(int fd, const intel_ctx_t *ctx_id,
 		loops++;
 	}
 	igt_spin_free(fd, spin);
+	put_ahnd(ahnd);
 
 	igt_info("Completed %ld loops in %lld ns, target %ld\n",
 		 loops, (long long)elapsed, (long)(elapsed / timeout_100ms));
@@ -82,11 +84,12 @@ static void spin_resubmit(int fd, const intel_ctx_t *ctx,
 {
 	const intel_ctx_t *new_ctx = NULL;
 	igt_spin_t *spin;
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 
 	if (flags & RESUBMIT_NEW_CTX)
 		igt_require(gem_has_contexts(fd));
 
-	spin = __igt_spin_new(fd, .ctx = ctx, .engine = engine);
+	spin = __igt_spin_new(fd, .ahnd = ahnd, .ctx = ctx, .engine = engine);
 	if (flags & RESUBMIT_NEW_CTX) {
 		new_ctx = intel_ctx_create(fd, &ctx->cfg);
 		spin->execbuf.rsvd1 = new_ctx->id;
@@ -110,6 +113,7 @@ static void spin_resubmit(int fd, const intel_ctx_t *ctx,
 		intel_ctx_destroy(fd, new_ctx);
 
 	igt_spin_free(fd, spin);
+	put_ahnd(ahnd);
 }
 
 static void spin_exit_handler(int sig)
@@ -139,6 +143,7 @@ static void spin_all(int i915, const intel_ctx_t *ctx, unsigned int flags)
 	const struct intel_execution_engine2 *e;
 	intel_ctx_cfg_t cfg = ctx->cfg;
 	struct igt_spin *spin, *n;
+	uint64_t ahnd;
 	IGT_LIST_HEAD(list);
 
 	for_each_ctx_cfg_engine(i915, &cfg, e) {
@@ -147,9 +152,11 @@ static void spin_all(int i915, const intel_ctx_t *ctx, unsigned int flags)
 
 		if (flags & PARALLEL_SPIN_NEW_CTX)
 			ctx = intel_ctx_create(i915, &cfg);
+		ahnd = get_reloc_ahnd(i915, ctx->id);
 
 		/* Prevent preemption so only one is allowed on each engine */
 		spin = igt_spin_new(i915,
+				    .ahnd = ahnd,
 				    .ctx = ctx,
 				    .engine = e->flags,
 				    .flags = (IGT_SPIN_POLL_RUN |
@@ -163,9 +170,11 @@ static void spin_all(int i915, const intel_ctx_t *ctx, unsigned int flags)
 
 	igt_list_for_each_entry_safe(spin, n, &list, link) {
 		igt_assert(gem_bo_busy(i915, spin->handle));
+		ahnd = spin->ahnd;
 		igt_spin_end(spin);
 		gem_sync(i915, spin->handle);
 		igt_spin_free(i915, spin);
+		put_ahnd(ahnd);
 	}
 }
 
@@ -249,12 +258,20 @@ igt_main
 
 #undef test_each_engine
 
-	igt_subtest("spin-each")
-		spin_on_all_engines(fd, ctx, 0, 3);
+	igt_subtest_group {
+		igt_fixture
+			intel_allocator_multiprocess_start();
 
-	igt_subtest("user-each") {
-		igt_require(has_userptr(fd));
-		spin_on_all_engines(fd, ctx, IGT_SPIN_USERPTR, 3);
+		igt_subtest("spin-each")
+			spin_on_all_engines(fd, ctx, 0, 3);
+
+		igt_subtest("user-each") {
+			igt_require(has_userptr(fd));
+			spin_on_all_engines(fd, ctx, IGT_SPIN_USERPTR, 3);
+		}
+
+		igt_fixture
+			intel_allocator_multiprocess_stop();
 	}
 
 	igt_fixture {
diff --git a/tests/i915/gem_tiled_fence_blits.c b/tests/i915/gem_tiled_fence_blits.c
index 6ce3a38d..9ea61f11 100644
--- a/tests/i915/gem_tiled_fence_blits.c
+++ b/tests/i915/gem_tiled_fence_blits.c
@@ -86,18 +86,18 @@ static void check_bo(int fd, uint32_t handle, uint32_t start_val)
 	}
 }
 
-static uint32_t
-create_batch(int fd, struct drm_i915_gem_relocation_entry *reloc)
+static void
+update_batch(int fd, uint32_t bb_handle,
+	     struct drm_i915_gem_relocation_entry *reloc,
+	     uint64_t dst_offset, uint64_t src_offset)
 {
 	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
 	const bool has_64b_reloc = gen >= 8;
 	uint32_t *batch;
-	uint32_t handle;
 	uint32_t pitch;
 	int i = 0;
 
-	handle = gem_create(fd, 4096);
-	batch = gem_mmap__cpu(fd, handle, 0, 4096, PROT_WRITE);
+	batch = gem_mmap__cpu(fd, bb_handle, 0, 4096, PROT_WRITE);
 
 	batch[i] = (XY_SRC_COPY_BLT_CMD |
 		    XY_SRC_COPY_BLT_WRITE_ALPHA |
@@ -117,22 +117,20 @@ create_batch(int fd, struct drm_i915_gem_relocation_entry *reloc)
 	reloc[0].offset = sizeof(*batch) * i;
 	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
 	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
-	batch[i++] = 0;
+	batch[i++] = dst_offset;
 	if (has_64b_reloc)
-		batch[i++] = 0;
+		batch[i++] = dst_offset >> 32;
 
 	batch[i++] = 0; /* src (x1, y1) */
 	batch[i++] = pitch;
 	reloc[1].offset = sizeof(*batch) * i;
 	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
-	batch[i++] = 0;
+	batch[i++] = src_offset;
 	if (has_64b_reloc)
-		batch[i++] = 0;
+		batch[i++] = src_offset >> 32;
 
 	batch[i++] = MI_BATCH_BUFFER_END;
 	munmap(batch, 4096);
-
-	return handle;
 }
 
 static void xchg_u32(void *array, unsigned i, unsigned j)
@@ -144,7 +142,7 @@ static void xchg_u32(void *array, unsigned i, unsigned j)
 	base[j] = tmp;
 }
 
-static void run_test(int fd, int count)
+static void run_test(int fd, int count, uint64_t end)
 {
 	struct drm_i915_gem_relocation_entry reloc[2];
 	struct drm_i915_gem_exec_object2 obj[3];
@@ -152,14 +150,27 @@ static void run_test(int fd, int count)
 	uint32_t *src_order, *dst_order;
 	uint32_t *bo, *bo_start_val;
 	uint32_t start = 0;
+	uint64_t ahnd = 0;
 
+	if (!gem_has_relocations(fd))
+		ahnd = intel_allocator_open_full(fd, 0, 0, end,
+						 INTEL_ALLOCATOR_RELOC,
+						 ALLOC_STRATEGY_LOW_TO_HIGH);
 	memset(reloc, 0, sizeof(reloc));
 	memset(obj, 0, sizeof(obj));
 	obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
 	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
-	obj[2].handle = create_batch(fd, reloc);
+	obj[2].handle = gem_create(fd, 4096);
+	obj[2].offset = get_offset(ahnd, obj[2].handle, 4096, 0);
+	if (ahnd) {
+		obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+		obj[2].flags |= EXEC_OBJECT_PINNED;
+	}
 	obj[2].relocs_ptr = to_user_pointer(reloc);
-	obj[2].relocation_count = ARRAY_SIZE(reloc);
+	obj[2].relocation_count = !ahnd ? ARRAY_SIZE(reloc) : 0;
+	update_batch(fd, obj[2].handle, reloc,
+		     obj[0].offset, obj[1].offset);
 
 	memset(&eb, 0, sizeof(eb));
 	eb.buffers_ptr = to_user_pointer(obj);
@@ -198,7 +209,23 @@ static void run_test(int fd, int count)
 			reloc[0].target_handle = obj[0].handle = bo[dst];
 			reloc[1].target_handle = obj[1].handle = bo[src];
 
+			if (ahnd) {
+				obj[0].offset = get_offset(ahnd, obj[0].handle,
+						sizeof(linear), 0);
+				obj[1].offset = get_offset(ahnd, obj[1].handle,
+						sizeof(linear), 0);
+				obj[2].offset = get_offset(ahnd, obj[2].handle,
+						4096, 0);
+				update_batch(fd, obj[2].handle, reloc,
+					     obj[0].offset, obj[1].offset);
+			}
+
 			gem_execbuf(fd, &eb);
+			if (ahnd) {
+				gem_close(fd, obj[2].handle);
+				obj[2].handle = gem_create(fd, 4096);
+			}
+
 			bo_start_val[dst] = bo_start_val[src];
 		}
 	}
@@ -210,6 +237,7 @@ static void run_test(int fd, int count)
 	free(bo);
 
 	gem_close(fd, obj[2].handle);
+	put_ahnd(ahnd);
 }
 
 #define MAX_32b ((1ull << 32) - 4096)
@@ -217,7 +245,7 @@ static void run_test(int fd, int count)
 igt_main
 {
 	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
-	uint64_t count = 0;
+	uint64_t count = 0, end;
 	int fd;
 
 	igt_fixture {
@@ -229,6 +257,7 @@ igt_main
 		count = gem_mappable_aperture_size(fd); /* thrash fences! */
 		if (count >> 32)
 			count = MAX_32b;
+		end = count;
 		count = 3 + count / (1024 * 1024);
 		igt_require(count > 1);
 		intel_require_memory(count, 1024 * 1024 , CHECK_RAM);
@@ -238,12 +267,14 @@ igt_main
 	}
 
 	igt_subtest("basic")
-		run_test (fd, 2);
+		run_test(fd, 2, end);
 
 	igt_subtest("normal") {
+		intel_allocator_multiprocess_start();
 		igt_fork(child, ncpus)
-			run_test(fd, count);
+			run_test(fd, count, end);
 		igt_waitchildren();
+		intel_allocator_multiprocess_stop();
 	}
 
 	igt_fixture
diff --git a/tests/i915/gem_unfence_active_buffers.c b/tests/i915/gem_unfence_active_buffers.c
index 2c9cebb6..532eed2e 100644
--- a/tests/i915/gem_unfence_active_buffers.c
+++ b/tests/i915/gem_unfence_active_buffers.c
@@ -69,11 +69,13 @@ igt_simple_main
 {
 	int i915, num_fences;
 	igt_spin_t *spin;
+	uint64_t ahnd;
 
 	i915 = drm_open_driver(DRIVER_INTEL);
 	igt_require_gem(i915);
 
-	spin = igt_spin_new(i915);
+	ahnd = get_reloc_ahnd(i915, 0);
+	spin = igt_spin_new(i915, .ahnd = ahnd);
 
 	num_fences = gem_available_fences(i915);
 	igt_require(num_fences);
@@ -96,4 +98,5 @@ igt_simple_main
 	}
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 }
diff --git a/tests/i915/gem_unref_active_buffers.c b/tests/i915/gem_unref_active_buffers.c
index 731190b3..3b8c981d 100644
--- a/tests/i915/gem_unref_active_buffers.c
+++ b/tests/i915/gem_unref_active_buffers.c
@@ -69,11 +69,13 @@ igt_simple_main
 	struct itimerval itv;
 	igt_spin_t *spin;
 	int i915;
+	uint64_t ahnd;
 
 	i915 = drm_open_driver(DRIVER_INTEL);
 	igt_require_gem(i915);
 
-	spin = igt_spin_new(i915);
+	ahnd = get_reloc_ahnd(i915, 0);
+	spin = igt_spin_new(i915, .ahnd = ahnd);
 	fcntl(i915, F_SETFL, fcntl(i915, F_GETFL) | O_NONBLOCK);
 
 	sigaction(SIGALRM, &sa, &old_sa);
@@ -118,4 +120,5 @@ igt_simple_main
 	sigaction(SIGALRM, &old_sa, NULL);
 
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 }
diff --git a/tests/i915/gem_wait.c b/tests/i915/gem_wait.c
index d56707ed..0d1fea99 100644
--- a/tests/i915/gem_wait.c
+++ b/tests/i915/gem_wait.c
@@ -78,11 +78,13 @@ static void invalid_buf(int fd)
 static void basic(int fd, const intel_ctx_t *ctx, unsigned engine,
 		  unsigned flags)
 {
+	uint64_t ahnd = get_reloc_ahnd(fd, ctx->id);
 	IGT_CORK_HANDLE(cork);
 	uint32_t plug =
 		flags & (WRITE | AWAIT) ? igt_cork_plug(&cork, fd) : 0;
 	igt_spin_t *spin =
 		igt_spin_new(fd,
+			     .ahnd = ahnd,
 			     .ctx = ctx,
 			     .engine = engine,
 			     .dependency = plug,
@@ -147,6 +149,7 @@ static void basic(int fd, const intel_ctx_t *ctx, unsigned engine,
 	if (plug)
 		gem_close(fd, plug);
 	igt_spin_free(fd, spin);
+	put_ahnd(ahnd);
 }
 
 static void test_all_engines(const char *name, int i915, const intel_ctx_t *ctx,
diff --git a/tests/i915/gem_watchdog.c b/tests/i915/gem_watchdog.c
index 4d4aaee4..db562335 100644
--- a/tests/i915/gem_watchdog.c
+++ b/tests/i915/gem_watchdog.c
@@ -111,10 +111,13 @@ static void physical(int i915, const intel_ctx_t *ctx)
 	unsigned int num_engines, i, count;
 	const struct intel_execution_engine2 *e;
 	igt_spin_t *spin[GEM_MAX_ENGINES];
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
 
 	i = 0;
 	for_each_ctx_engine(i915, ctx, e) {
-		spin[i] = igt_spin_new(i915, .ctx = ctx,
+		spin[i] = igt_spin_new(i915,
+				       .ahnd = ahnd,
+				       .ctx = ctx,
 				       .engine = e->flags,
 				       .flags = spin_flags());
 		i++;
@@ -125,6 +128,7 @@ static void physical(int i915, const intel_ctx_t *ctx)
 
 	for (i = 0; i < num_engines; i++)
 		igt_spin_free(i915, spin[i]);
+	put_ahnd(ahnd);
 
 	igt_assert_eq(count, num_engines);
 }
@@ -216,6 +220,7 @@ static void virtual(int i915, const intel_ctx_cfg_t *base_cfg)
 	unsigned int expect = num_engines;
 	intel_ctx_cfg_t cfg = {};
 	const intel_ctx_t *ctx[num_engines];
+	uint64_t ahnd;
 
 	igt_require(gem_has_execlists(i915));
 
@@ -239,10 +244,12 @@ static void virtual(int i915, const intel_ctx_cfg_t *base_cfg)
 			igt_assert(i < num_engines);
 
 			ctx[i] = intel_ctx_create(i915, &cfg);
+			ahnd = get_reloc_ahnd(i915, ctx[i]->id);
 
 			set_load_balancer(i915, ctx[i]->id, ci, count, NULL);
 
 			spin[i] = igt_spin_new(i915,
+					       .ahnd = ahnd,
 					       .ctx = ctx[i],
 					       .flags = spin_flags());
 			i++;
@@ -254,8 +261,10 @@ static void virtual(int i915, const intel_ctx_cfg_t *base_cfg)
 	count = wait_timeout(i915, spin, num_engines, wait_us, expect);
 
 	for (i = 0; i < num_engines && spin[i]; i++) {
+		ahnd = spin[i]->ahnd;
 		igt_spin_free(i915, spin[i]);
 		intel_ctx_destroy(i915, ctx[i]);
+		put_ahnd(ahnd);
 	}
 
 	igt_assert_eq(count, expect);
diff --git a/tests/i915/gem_workarounds.c b/tests/i915/gem_workarounds.c
index e240901c..3d185127 100644
--- a/tests/i915/gem_workarounds.c
+++ b/tests/i915/gem_workarounds.c
@@ -94,6 +94,7 @@ static int workaround_fail_count(int i915, const intel_ctx_t *ctx)
 	uint32_t *base, *out;
 	igt_spin_t *spin;
 	int fw, fail = 0;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
 
 	reloc = calloc(num_wa_regs, sizeof(*reloc));
 	igt_assert(reloc);
@@ -109,7 +110,13 @@ static int workaround_fail_count(int i915, const intel_ctx_t *ctx)
 	gem_set_caching(i915, obj[0].handle, I915_CACHING_CACHED);
 	obj[1].handle = gem_create(i915, batch_sz);
 	obj[1].relocs_ptr = to_user_pointer(reloc);
-	obj[1].relocation_count = num_wa_regs;
+	obj[1].relocation_count = !ahnd ? num_wa_regs : 0;
+	if (ahnd) {
+		obj[0].offset = get_offset(ahnd, obj[0].handle, result_sz, 0);
+		obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[1].offset = get_offset(ahnd, obj[1].handle, batch_sz, 0);
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+	}
 
 	out = base =
 		gem_mmap__cpu(i915, obj[1].handle, 0, batch_sz, PROT_WRITE);
@@ -121,9 +128,9 @@ static int workaround_fail_count(int i915, const intel_ctx_t *ctx)
 		reloc[i].delta = i * sizeof(uint32_t);
 		reloc[i].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
 		reloc[i].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-		*out++ = reloc[i].delta;
+		*out++ = obj[0].offset + reloc[i].delta;
 		if (gen >= 8)
-			*out++ = 0;
+			*out++ = (obj[0].offset + reloc[i].delta) >> 32;
 	}
 	*out++ = MI_BATCH_BUFFER_END;
 	munmap(base, batch_sz);
@@ -136,7 +143,8 @@ static int workaround_fail_count(int i915, const intel_ctx_t *ctx)
 
 	gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
 
-	spin = igt_spin_new(i915, .ctx = ctx, .flags = IGT_SPIN_POLL_RUN);
+	spin = igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx,
+			    .flags = IGT_SPIN_POLL_RUN);
 	igt_spin_busywait_until_started(spin);
 
 	fw = igt_open_forcewake_handle(i915);
@@ -172,6 +180,7 @@ static int workaround_fail_count(int i915, const intel_ctx_t *ctx)
 
 	close(fw);
 	igt_spin_free(i915, spin);
+	put_ahnd(ahnd);
 
 	gem_close(i915, obj[1].handle);
 	gem_close(i915, obj[0].handle);
diff --git a/tests/i915/i915_hangman.c b/tests/i915/i915_hangman.c
index ddead949..4c18c22d 100644
--- a/tests/i915/i915_hangman.c
+++ b/tests/i915/i915_hangman.c
@@ -211,10 +211,12 @@ static void test_error_state_capture(const intel_ctx_t *ctx, unsigned ring_id,
 	uint32_t *batch;
 	igt_hang_t hang;
 	uint64_t offset;
+	uint64_t ahnd = get_reloc_ahnd(device, ctx->id);
 
 	clear_error_state();
 
-	hang = igt_hang_ctx(device, ctx->id, ring_id, HANG_ALLOW_CAPTURE);
+	hang = igt_hang_ctx_with_ahnd(device, ahnd, ctx->id, ring_id,
+				      HANG_ALLOW_CAPTURE);
 	offset = hang.spin->obj[IGT_SPIN_BATCH].offset;
 
 	batch = gem_mmap__cpu(device, hang.spin->handle, 0, 4096, PROT_READ);
@@ -224,6 +226,7 @@ static void test_error_state_capture(const intel_ctx_t *ctx, unsigned ring_id,
 
 	check_error_state(ring_name, offset, batch);
 	munmap(batch, 4096);
+	put_ahnd(ahnd);
 }
 
 static void
@@ -234,6 +237,7 @@ test_engine_hang(const intel_ctx_t *ctx,
 	const intel_ctx_t *tmp_ctx;
 	igt_spin_t *spin, *next;
 	IGT_LIST_HEAD(list);
+	uint64_t ahnd = get_reloc_ahnd(device, ctx->id), ahndN;
 
 	igt_skip_on(flags & IGT_SPIN_INVALID_CS &&
 		    gem_engine_has_cmdparser(device, &ctx->cfg, e->flags));
@@ -244,7 +248,10 @@ test_engine_hang(const intel_ctx_t *ctx,
 			continue;
 
 		tmp_ctx = intel_ctx_create(device, &ctx->cfg);
-		spin = __igt_spin_new(device, .ctx = tmp_ctx,
+		ahndN = get_reloc_ahnd(device, tmp_ctx->id);
+		spin = __igt_spin_new(device,
+				      .ahnd = ahndN,
+				      .ctx = tmp_ctx,
 				      .engine = other->flags,
 				      .flags = IGT_SPIN_FENCE_OUT);
 		intel_ctx_destroy(device, tmp_ctx);
@@ -254,6 +261,7 @@ test_engine_hang(const intel_ctx_t *ctx,
 
 	/* And on the target engine, we hang */
 	spin = igt_spin_new(device,
+			    .ahnd = ahnd,
 			    .ctx = ctx,
 			    .engine = e->flags,
 			    .flags = (IGT_SPIN_FENCE_OUT |
@@ -267,13 +275,16 @@ test_engine_hang(const intel_ctx_t *ctx,
 
 	/* But no other engines/clients should be affected */
 	igt_list_for_each_entry_safe(spin, next, &list, link) {
+		ahndN = spin->ahnd;
 		igt_assert(sync_fence_wait(spin->out_fence, 0) == -ETIME);
 		igt_spin_end(spin);
 
 		igt_assert(sync_fence_wait(spin->out_fence, 500) == 0);
 		igt_assert_eq(sync_fence_status(spin->out_fence), 1);
 		igt_spin_free(device, spin);
+		put_ahnd(ahndN);
 	}
+	put_ahnd(ahnd);
 }
 
 /* This test covers the case where we end up in an uninitialised area of the
diff --git a/tests/i915/i915_module_load.c b/tests/i915/i915_module_load.c
index 98ceb5d8..993c7832 100644
--- a/tests/i915/i915_module_load.c
+++ b/tests/i915/i915_module_load.c
@@ -42,6 +42,7 @@ static void store_all(int i915)
 	const unsigned int gen = intel_gen(intel_get_drm_devid(i915));
 	uint32_t engines[I915_EXEC_RING_MASK + 1];
 	uint32_t batch[16];
+	uint64_t ahnd, offset, bb_offset;
 	unsigned int sz = ALIGN(sizeof(batch) * ARRAY_SIZE(engines), 4096);
 	struct drm_i915_gem_relocation_entry reloc = {
 		.offset = sizeof(uint32_t),
@@ -91,6 +92,12 @@ static void store_all(int i915)
 	cs = gem_mmap__device_coherent(i915, obj[1].handle, 0, sz, PROT_WRITE);
 
 	ctx = intel_ctx_create_all_physical(i915);
+	ahnd = get_reloc_ahnd(i915, ctx->id);
+	if (ahnd)
+		obj[1].relocation_count = 0;
+	bb_offset = get_offset(ahnd, obj[1].handle, sz, 4096);
+	offset = get_offset(ahnd, obj[0].handle, sizeof(engines), 0);
+
 	for_each_ctx_engine(i915, ctx, e) {
 		uint64_t addr;
 
@@ -100,6 +107,16 @@ static void store_all(int i915)
 		if (!gem_class_can_store_dword(i915, e->class))
 			continue;
 
+		if (ahnd) {
+			i = 1;
+			batch[i++] = offset + reloc.delta;
+			batch[i++] = offset >> 32;
+			obj[0].offset = offset;
+			obj[0].flags |= EXEC_OBJECT_PINNED;
+			obj[1].offset = bb_offset;
+			obj[1].flags |= EXEC_OBJECT_PINNED;
+		}
+
 		batch[value] = nengine;
 
 		execbuf.flags = e->flags;
@@ -109,7 +126,8 @@ static void store_all(int i915)
 		execbuf.rsvd1 = ctx->id;
 
 		memcpy(cs + execbuf.batch_start_offset, batch, sizeof(batch));
-		memcpy(cs + reloc.offset, &addr, reloc_sz);
+		if (!ahnd)
+			memcpy(cs + reloc.offset, &addr, reloc_sz);
 		gem_execbuf(i915, &execbuf);
 
 		if (++nengine == ARRAY_SIZE(engines))
@@ -126,6 +144,9 @@ static void store_all(int i915)
 	gem_read(i915, obj[0].handle, 0, engines, nengine * sizeof(engines[0]));
 	gem_close(i915, obj[0].handle);
 	intel_ctx_destroy(i915, ctx);
+	put_offset(ahnd, obj[0].handle);
+	put_offset(ahnd, obj[1].handle);
+	put_ahnd(ahnd);
 
 	for (i = 0; i < nengine; i++)
 		igt_assert_eq_u32(engines[i], i);
diff --git a/tests/i915/i915_pm_rc6_residency.c b/tests/i915/i915_pm_rc6_residency.c
index d1cce474..96a95140 100644
--- a/tests/i915/i915_pm_rc6_residency.c
+++ b/tests/i915/i915_pm_rc6_residency.c
@@ -458,7 +458,7 @@ static void rc6_fence(int i915)
 	const intel_ctx_t *ctx;
 	struct power_sample sample[2];
 	unsigned long slept;
-	uint64_t rc6, ts[2];
+	uint64_t rc6, ts[2], ahnd;
 	struct rapl rapl;
 	int fd;
 
@@ -486,6 +486,7 @@ static void rc6_fence(int i915)
 
 	/* Submit but delay execution, we should be idle and conserving power */
 	ctx = intel_ctx_create_all_physical(i915);
+	ahnd = get_reloc_ahnd(i915, ctx->id);
 	for_each_ctx_engine(i915, ctx, e) {
 		igt_spin_t *spin;
 		int timeline;
@@ -493,7 +494,9 @@ static void rc6_fence(int i915)
 
 		timeline = sw_sync_timeline_create();
 		fence = sw_sync_timeline_create_fence(timeline, 1);
-		spin = igt_spin_new(i915, .ctx = ctx,
+		spin = igt_spin_new(i915,
+				    .ahnd = ahnd,
+				    .ctx = ctx,
 				    .engine = e->flags,
 				    .fence = fence,
 				    .flags = IGT_SPIN_FENCE_IN);
@@ -522,6 +525,7 @@ static void rc6_fence(int i915)
 		gem_quiescent_gpu(i915);
 	}
 	intel_ctx_destroy(i915, ctx);
+	put_ahnd(ahnd);
 
 	rapl_close(&rapl);
 	close(fd);
diff --git a/tests/i915/i915_pm_rpm.c b/tests/i915/i915_pm_rpm.c
index 39e0064a..62720d02 100644
--- a/tests/i915/i915_pm_rpm.c
+++ b/tests/i915/i915_pm_rpm.c
@@ -1178,7 +1178,8 @@ static void gem_pread_subtest(void)
 
 /* Paints a square of color $color, size $width x $height, at position $x x $y
  * of $dst_handle, which contains pitch $pitch. */
-static void submit_blt_cmd(uint32_t dst_handle, uint16_t x, uint16_t y,
+static void submit_blt_cmd(uint32_t dst_handle, int dst_size,
+			   uint16_t x, uint16_t y,
 			   uint16_t width, uint16_t height, uint32_t pitch,
 			   uint32_t color, uint32_t *presumed_dst_offset)
 {
@@ -1190,6 +1191,12 @@ static void submit_blt_cmd(uint32_t dst_handle, uint16_t x, uint16_t y,
 	struct drm_i915_gem_exec_object2 objs[2] = {{}, {}};
 	struct drm_i915_gem_relocation_entry relocs[1] = {{}};
 	struct drm_i915_gem_wait gem_wait;
+	uint64_t ahnd = get_reloc_ahnd(drm_fd, 0), dst_offset;
+
+	if (ahnd)
+		dst_offset = get_offset(ahnd, dst_handle, dst_size, 0);
+	else
+		dst_offset = *presumed_dst_offset;
 
 	i = 0;
 
@@ -1205,9 +1212,9 @@ static void submit_blt_cmd(uint32_t dst_handle, uint16_t x, uint16_t y,
 	batch_buf[i++] = (y << 16) | x;
 	batch_buf[i++] = ((y + height) << 16) | (x + width);
 	reloc_pos = i;
-	batch_buf[i++] = *presumed_dst_offset;
+	batch_buf[i++] = dst_offset;
 	if (intel_gen(ms_data.devid) >= 8)
-		batch_buf[i++] = 0;
+		batch_buf[i++] = dst_offset >> 32;
 	batch_buf[i++] = color;
 
 	batch_buf[i++] = MI_BATCH_BUFFER_END;
@@ -1230,9 +1237,16 @@ static void submit_blt_cmd(uint32_t dst_handle, uint16_t x, uint16_t y,
 	objs[0].alignment = 64;
 
 	objs[1].handle = batch_handle;
-	objs[1].relocation_count = 1;
+	objs[1].relocation_count = !ahnd ? 1 : 0;
 	objs[1].relocs_ptr = (uintptr_t)relocs;
 
+	if (ahnd) {
+		objs[0].offset = dst_offset;
+		objs[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		objs[1].offset = get_offset(ahnd, batch_handle, batch_size, 0);
+		objs[1].flags = EXEC_OBJECT_PINNED;
+	}
+
 	execbuf.buffers_ptr = (uintptr_t)objs;
 	execbuf.buffer_count = 2;
 	execbuf.batch_len = batch_size;
@@ -1253,6 +1267,7 @@ static void submit_blt_cmd(uint32_t dst_handle, uint16_t x, uint16_t y,
 	do_ioctl(drm_fd, DRM_IOCTL_I915_GEM_WAIT, &gem_wait);
 
 	gem_close(drm_fd, batch_handle);
+	put_ahnd(ahnd);
 }
 
 /* Make sure we can submit a batch buffer and verify its result. */
@@ -1285,7 +1300,7 @@ static void gem_execbuf_subtest(void)
 	disable_all_screens_and_wait(&ms_data);
 
 	color = 0x12345678;
-	submit_blt_cmd(handle, sq_x, sq_y, sq_w, sq_h, pitch, color,
+	submit_blt_cmd(handle, dst_size, sq_x, sq_y, sq_w, sq_h, pitch, color,
 		       &presumed_offset);
 	igt_assert(wait_for_suspended());
 
@@ -1324,7 +1339,7 @@ static void gem_execbuf_subtest(void)
 	 * suspended. We use the same spot, but a different color. As a bonus,
 	 * we're testing the presumed_offset from the previous command. */
 	color = 0x87654321;
-	submit_blt_cmd(handle, sq_x, sq_y, sq_w, sq_h, pitch, color,
+	submit_blt_cmd(handle, dst_size, sq_x, sq_y, sq_w, sq_h, pitch, color,
 		       &presumed_offset);
 
 	disable_all_screens_and_wait(&ms_data);
diff --git a/tests/i915/i915_pm_rps.c b/tests/i915/i915_pm_rps.c
index f51a4747..ada06aa9 100644
--- a/tests/i915/i915_pm_rps.c
+++ b/tests/i915/i915_pm_rps.c
@@ -250,6 +250,10 @@ static void load_helper_run(enum load load)
 		igt_spin_t *spin[2] = {};
 		bool prev_load;
 		uint32_t handle;
+		uint64_t ahnd;
+
+		intel_allocator_init();
+		ahnd = get_reloc_ahnd(drm_fd, 0);
 
 		signal(SIGTERM, load_helper_signal_handler);
 		signal(SIGUSR2, load_helper_signal_handler);
@@ -257,9 +261,9 @@ static void load_helper_run(enum load load)
 		igt_debug("Applying %s load...\n", lh.load ? "high" : "low");
 
 		prev_load = lh.load == HIGH;
-		spin[0] = __igt_spin_new(drm_fd);
+		spin[0] = __igt_spin_new(drm_fd, .ahnd = ahnd);
 		if (prev_load)
-			spin[1] = __igt_spin_new(drm_fd);
+			spin[1] = __igt_spin_new(drm_fd, .ahnd = ahnd);
 		prev_load = !prev_load; /* send the initial signal */
 		while (!lh.exit) {
 			bool high_load;
@@ -279,7 +283,7 @@ static void load_helper_run(enum load load)
 			} else {
 				spin[0] = spin[1];
 			}
-			spin[high_load] = __igt_spin_new(drm_fd);
+			spin[high_load] = __igt_spin_new(drm_fd, .ahnd = ahnd);
 
 			if (lh.signal && high_load != prev_load) {
 				write(lh.link, &lh.signal, sizeof(lh.signal));
@@ -310,6 +314,7 @@ static void load_helper_run(enum load load)
 
 		igt_spin_free(drm_fd, spin[1]);
 		igt_spin_free(drm_fd, spin[0]);
+		put_ahnd(ahnd);
 	}
 
 	close(lh.link);
@@ -542,11 +547,14 @@ static void boost_freq(int fd, int *boost_freqs)
 {
 	int64_t timeout = 1;
 	igt_spin_t *load;
+	/* We need to keep dependency spin offset for load->handle */
+	uint64_t ahnd = get_simple_l2h_ahnd(fd, 0);
 
-	load = igt_spin_new(fd);
+	//get_offset(ahnd, 1000, 0x1000000, 0);
+	load = igt_spin_new(fd, .ahnd = ahnd);
 
 	/* Strip off extra fences from the object, and keep it from starting */
-	igt_spin_free(fd, igt_spin_new(fd, .dependency = load->handle));
+	igt_spin_free(fd, igt_spin_new(fd, .ahnd = ahnd, .dependency = load->handle));
 
 	/* Waiting will grant us a boost to maximum */
 	gem_wait(fd, load->handle, &timeout);
@@ -558,6 +566,7 @@ static void boost_freq(int fd, int *boost_freqs)
 	igt_spin_end(load);
 	gem_sync(fd, load->handle);
 	igt_spin_free(fd, load);
+	put_ahnd(ahnd);
 }
 
 static void waitboost(int fd, bool reset)
diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c
index 10dc3bf2..924f39d1 100644
--- a/tests/i915/perf_pmu.c
+++ b/tests/i915/perf_pmu.c
@@ -174,10 +174,11 @@ static unsigned int measured_usleep(unsigned int usec)
 #define FLAG_HANG (32)
 #define TEST_S3 (64)
 
-static igt_spin_t * __spin_poll(int fd, const intel_ctx_t *ctx,
-				const struct intel_execution_engine2 *e)
+static igt_spin_t *__spin_poll(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
+			       const struct intel_execution_engine2 *e)
 {
 	struct igt_spin_factory opts = {
+		.ahnd = ahnd,
 		.ctx = ctx,
 		.engine = e->flags,
 	};
@@ -217,25 +218,26 @@ static unsigned long __spin_wait(int fd, igt_spin_t *spin)
 	return igt_nsec_elapsed(&start);
 }
 
-static igt_spin_t * __spin_sync(int fd, const intel_ctx_t *ctx,
-				const struct intel_execution_engine2 *e)
+static igt_spin_t *__spin_sync(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
+			       const struct intel_execution_engine2 *e)
 {
-	igt_spin_t *spin = __spin_poll(fd, ctx, e);
+	igt_spin_t *spin = __spin_poll(fd, ahnd, ctx, e);
 
 	__spin_wait(fd, spin);
 
 	return spin;
 }
 
-static igt_spin_t * spin_sync(int fd, const intel_ctx_t *ctx,
-			      const struct intel_execution_engine2 *e)
+static igt_spin_t *spin_sync(int fd, uint64_t ahnd, const intel_ctx_t *ctx,
+			     const struct intel_execution_engine2 *e)
 {
 	igt_require_gem(fd);
 
-	return __spin_sync(fd, ctx, e);
+	return __spin_sync(fd, ahnd, ctx, e);
 }
 
-static igt_spin_t * spin_sync_flags(int fd, const intel_ctx_t *ctx, unsigned int flags)
+static igt_spin_t *spin_sync_flags(int fd, uint64_t ahnd,
+				   const intel_ctx_t *ctx, unsigned int flags)
 {
 	struct intel_execution_engine2 e = { };
 
@@ -244,7 +246,7 @@ static igt_spin_t * spin_sync_flags(int fd, const intel_ctx_t *ctx, unsigned int
 		     (I915_EXEC_BSD | I915_EXEC_BSD_RING2) ? 1 : 0;
 	e.flags = flags;
 
-	return spin_sync(fd, ctx, &e);
+	return spin_sync(fd, ahnd, ctx, &e);
 }
 
 static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
@@ -286,11 +288,12 @@ single(int gem_fd, const intel_ctx_t *ctx,
 	igt_spin_t *spin;
 	uint64_t val;
 	int fd;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance));
 
 	if (flags & TEST_BUSY)
-		spin = spin_sync(gem_fd, ctx, e);
+		spin = spin_sync(gem_fd, ahnd, ctx, e);
 	else
 		spin = NULL;
 
@@ -321,6 +324,7 @@ single(int gem_fd, const intel_ctx_t *ctx,
 
 	igt_spin_free(gem_fd, spin);
 	close(fd);
+	put_ahnd(ahnd);
 
 	gem_quiescent_gpu(gem_fd);
 }
@@ -333,6 +337,7 @@ busy_start(int gem_fd, const intel_ctx_t *ctx,
 	uint64_t val, ts[2];
 	igt_spin_t *spin;
 	int fd;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	/*
 	 * Defeat the busy stats delayed disable, we need to guarantee we are
@@ -340,7 +345,7 @@ busy_start(int gem_fd, const intel_ctx_t *ctx,
 	 */
 	sleep(2);
 
-	spin = __spin_sync(gem_fd, ctx, e);
+	spin = __spin_sync(gem_fd, ahnd, ctx, e);
 
 	fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance));
 
@@ -351,6 +356,7 @@ busy_start(int gem_fd, const intel_ctx_t *ctx,
 
 	igt_spin_free(gem_fd, spin);
 	close(fd);
+	put_ahnd(ahnd);
 
 	assert_within_epsilon(val, ts[1] - ts[0], tolerance);
 	gem_quiescent_gpu(gem_fd);
@@ -370,8 +376,10 @@ busy_double_start(int gem_fd, const intel_ctx_t *ctx,
 	igt_spin_t *spin[2];
 	const intel_ctx_t *tmp_ctx;
 	int fd;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id), ahndN;
 
 	tmp_ctx = intel_ctx_create(gem_fd, &ctx->cfg);
+	ahndN = get_reloc_ahnd(gem_fd, tmp_ctx->id);
 
 	/*
 	 * Defeat the busy stats delayed disable, we need to guarantee we are
@@ -384,9 +392,10 @@ busy_double_start(int gem_fd, const intel_ctx_t *ctx,
 	 * re-submission in execlists mode. Make sure busyness is correctly
 	 * reported with the engine busy, and after the engine went idle.
 	 */
-	spin[0] = __spin_sync(gem_fd, ctx, e);
+	spin[0] = __spin_sync(gem_fd, ahnd, ctx, e);
 	usleep(500e3);
 	spin[1] = __igt_spin_new(gem_fd,
+				 .ahnd = ahndN,
 				 .ctx = tmp_ctx,
 				 .engine = e->flags);
 
@@ -419,6 +428,8 @@ busy_double_start(int gem_fd, const intel_ctx_t *ctx,
 	close(fd);
 
 	intel_ctx_destroy(gem_fd, tmp_ctx);
+	put_ahnd(ahnd);
+	put_ahnd(ahndN);
 
 	assert_within_epsilon(val, ts[1] - ts[0], tolerance);
 	igt_assert_eq(val2, 0);
@@ -457,6 +468,7 @@ busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 	int fd[num_engines];
 	unsigned long slept;
 	igt_spin_t *spin;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	i = 0;
 	fd[0] = -1;
@@ -472,7 +484,7 @@ busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 
 	igt_assert_eq(i, num_engines);
 
-	spin = spin_sync(gem_fd, ctx, e);
+	spin = spin_sync(gem_fd, ahnd, ctx, e);
 	pmu_read_multi(fd[0], num_engines, tval[0]);
 	slept = measured_usleep(batch_duration_ns / 1000);
 	if (flags & TEST_TRAILING_IDLE)
@@ -483,6 +495,7 @@ busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 	igt_spin_free(gem_fd, spin);
 	for (i = 0; i < num_engines; i++)
 		close(fd[i]);
+	put_ahnd(ahnd);
 
 	for (i = 0; i < num_engines; i++)
 		val[i] = tval[1][i] - tval[0][i];
@@ -524,6 +537,7 @@ most_busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 	unsigned long slept;
 	igt_spin_t *spin = NULL;
 	unsigned int idle_idx, i;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	i = 0;
 	for_each_ctx_engine(gem_fd, ctx, e_) {
@@ -532,7 +546,7 @@ most_busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 		else if (spin)
 			__submit_spin(gem_fd, spin, e_, 64);
 		else
-			spin = __spin_poll(gem_fd, ctx, e_);
+			spin = __spin_poll(gem_fd, ahnd, ctx, e_);
 
 		val[i++] = I915_PMU_ENGINE_BUSY(e_->class, e_->instance);
 	}
@@ -556,6 +570,7 @@ most_busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 	igt_spin_free(gem_fd, spin);
 	for (i = 0; i < num_engines; i++)
 		close(fd[i]);
+	put_ahnd(ahnd);
 
 	for (i = 0; i < num_engines; i++)
 		val[i] = tval[1][i] - tval[0][i];
@@ -583,13 +598,14 @@ all_busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 	unsigned long slept;
 	igt_spin_t *spin = NULL;
 	unsigned int i;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	i = 0;
 	for_each_ctx_engine(gem_fd, ctx, e) {
 		if (spin)
 			__submit_spin(gem_fd, spin, e, 64);
 		else
-			spin = __spin_poll(gem_fd, ctx, e);
+			spin = __spin_poll(gem_fd, ahnd, ctx, e);
 
 		val[i++] = I915_PMU_ENGINE_BUSY(e->class, e->instance);
 	}
@@ -612,6 +628,7 @@ all_busy_check_all(int gem_fd, const intel_ctx_t *ctx,
 	igt_spin_free(gem_fd, spin);
 	for (i = 0; i < num_engines; i++)
 		close(fd[i]);
+	put_ahnd(ahnd);
 
 	for (i = 0; i < num_engines; i++)
 		val[i] = tval[1][i] - tval[0][i];
@@ -631,6 +648,7 @@ no_sema(int gem_fd, const intel_ctx_t *ctx,
 	igt_spin_t *spin;
 	uint64_t val[2][2];
 	int fd[2];
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	fd[0] = open_group(gem_fd, I915_PMU_ENGINE_SEMA(e->class, e->instance),
 			   -1);
@@ -638,7 +656,7 @@ no_sema(int gem_fd, const intel_ctx_t *ctx,
 			   fd[0]);
 
 	if (flags & TEST_BUSY)
-		spin = spin_sync(gem_fd, ctx, e);
+		spin = spin_sync(gem_fd, ahnd, ctx, e);
 	else
 		spin = NULL;
 
@@ -657,6 +675,7 @@ no_sema(int gem_fd, const intel_ctx_t *ctx,
 	}
 	close(fd[0]);
 	close(fd[1]);
+	put_ahnd(ahnd);
 
 	assert_within_epsilon(val[0][0], 0.0f, tolerance);
 	assert_within_epsilon(val[0][1], 0.0f, tolerance);
@@ -682,6 +701,8 @@ sema_wait(int gem_fd, const intel_ctx_t *ctx,
 	uint32_t batch[16];
 	uint64_t val[2], ts[2];
 	int fd;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
+	uint64_t obj_offset, bb_offset;
 
 	igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
 
@@ -693,19 +714,21 @@ sema_wait(int gem_fd, const intel_ctx_t *ctx,
 
 	bb_handle = gem_create(gem_fd, 4096);
 	obj_handle = gem_create(gem_fd, 4096);
+	bb_offset = get_offset(ahnd, bb_handle, 4096, 0);
+	obj_offset = get_offset(ahnd, obj_handle, 4096, 0);
 
 	obj_ptr = gem_mmap__wc(gem_fd, obj_handle, 0, 4096, PROT_WRITE);
 
 	batch[0] = MI_STORE_DWORD_IMM;
-	batch[1] = sizeof(*obj_ptr);
-	batch[2] = 0;
+	batch[1] = obj_offset + sizeof(*obj_ptr);
+	batch[2] = (obj_offset + sizeof(*obj_ptr)) >> 32;
 	batch[3] = 1;
 	batch[4] = MI_SEMAPHORE_WAIT |
 		   MI_SEMAPHORE_POLL |
 		   MI_SEMAPHORE_SAD_GTE_SDD;
 	batch[5] = 1;
-	batch[6] = 0x0;
-	batch[7] = 0x0;
+	batch[6] = obj_offset;
+	batch[7] = obj_offset >> 32;
 	batch[8] = MI_BATCH_BUFFER_END;
 
 	gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch));
@@ -723,7 +746,7 @@ sema_wait(int gem_fd, const intel_ctx_t *ctx,
 	obj[0].handle = obj_handle;
 
 	obj[1].handle = bb_handle;
-	obj[1].relocation_count = 2;
+	obj[1].relocation_count = !ahnd ? 2 : 0;
 	obj[1].relocs_ptr = to_user_pointer(reloc);
 
 	eb.buffer_count = 2;
@@ -731,6 +754,13 @@ sema_wait(int gem_fd, const intel_ctx_t *ctx,
 	eb.flags = e->flags;
 	eb.rsvd1 = ctx->id;
 
+	if (ahnd) {
+		obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[0].offset = obj_offset;
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+		obj[1].offset = bb_offset;
+	}
+
 	/**
 	 * Start the semaphore wait PMU and after some known time let the above
 	 * semaphore wait command finish. Then check that the PMU is reporting
@@ -766,12 +796,14 @@ sema_wait(int gem_fd, const intel_ctx_t *ctx,
 	gem_close(gem_fd, obj_handle);
 	gem_close(gem_fd, bb_handle);
 	close(fd);
+	put_ahnd(ahnd);
 
 	assert_within_epsilon(val[1] - val[0], slept, tolerance);
 }
 
 static uint32_t
-create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc)
+create_sema(int gem_fd, uint64_t ahnd,
+	    struct drm_i915_gem_relocation_entry *reloc, __u64 *poffset)
 {
 	uint32_t cs[] = {
 		/* Reset our semaphore wait */
@@ -788,7 +820,12 @@ create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc)
 
 		MI_BATCH_BUFFER_END
 	};
-	uint32_t handle = gem_create(gem_fd, 4096);
+	uint32_t handle;
+
+	igt_assert(poffset);
+
+	handle = gem_create(gem_fd, 4096);
+	*poffset = get_offset(ahnd, handle, 4096, 0);
 
 	memset(reloc, 0, 2 * sizeof(*reloc));
 	reloc[0].target_handle = handle;
@@ -796,12 +833,19 @@ create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc)
 	reloc[1].target_handle = handle;
 	reloc[1].offset = 64 + 6 * sizeof(uint32_t);
 
+	if (ahnd) {
+		cs[1] = *poffset;
+		cs[2] = *poffset >> 32;
+		cs[6] = *poffset;
+		cs[7] = *poffset >> 32;
+	}
+
 	gem_write(gem_fd, handle, 64, cs, sizeof(cs));
 	return handle;
 }
 
 static void
-__sema_busy(int gem_fd, int pmu, const intel_ctx_t *ctx,
+__sema_busy(int gem_fd, uint64_t ahnd, int pmu, const intel_ctx_t *ctx,
 	    const struct intel_execution_engine2 *e,
 	    int sema_pct,
 	    int busy_pct)
@@ -814,8 +858,8 @@ __sema_busy(int gem_fd, int pmu, const intel_ctx_t *ctx,
 	uint64_t start[2], val[2];
 	struct drm_i915_gem_relocation_entry reloc[2];
 	struct drm_i915_gem_exec_object2 obj = {
-		.handle = create_sema(gem_fd, reloc),
-		.relocation_count = 2,
+		.handle = create_sema(gem_fd, ahnd, reloc, &obj.offset),
+		.relocation_count = !ahnd ? 2 : 0,
 		.relocs_ptr = to_user_pointer(reloc),
 	};
 	struct drm_i915_gem_execbuffer2 eb = {
@@ -835,7 +879,7 @@ __sema_busy(int gem_fd, int pmu, const intel_ctx_t *ctx,
 
 	map = gem_mmap__wc(gem_fd, obj.handle, 0, 4096, PROT_WRITE);
 	gem_execbuf(gem_fd, &eb);
-	spin = igt_spin_new(gem_fd, .ctx = ctx, .engine = e->flags);
+	spin = igt_spin_new(gem_fd, .ahnd = ahnd, .ctx = ctx, .engine = e->flags);
 
 	/* Wait until the batch is executed and the semaphore is busy-waiting */
 	while (!READ_ONCE(*map) && gem_bo_busy(gem_fd, obj.handle))
@@ -880,6 +924,7 @@ sema_busy(int gem_fd, const intel_ctx_t *ctx,
 	  unsigned int flags)
 {
 	int fd[2];
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
 
@@ -888,12 +933,13 @@ sema_busy(int gem_fd, const intel_ctx_t *ctx,
 	fd[1] = open_group(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance),
 			   fd[0]);
 
-	__sema_busy(gem_fd, fd[0], ctx, e, 50, 100);
-	__sema_busy(gem_fd, fd[0], ctx, e, 25, 50);
-	__sema_busy(gem_fd, fd[0], ctx, e, 75, 75);
+	__sema_busy(gem_fd, ahnd, fd[0], ctx, e, 50, 100);
+	__sema_busy(gem_fd, ahnd, fd[0], ctx, e, 25, 50);
+	__sema_busy(gem_fd, ahnd, fd[0], ctx, e, 75, 75);
 
 	close(fd[0]);
 	close(fd[1]);
+	put_ahnd(ahnd);
 }
 
 static void test_awake(int i915, const intel_ctx_t *ctx)
@@ -902,13 +948,14 @@ static void test_awake(int i915, const intel_ctx_t *ctx)
 	unsigned long slept;
 	uint64_t val;
 	int fd;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id);
 
 	fd = perf_i915_open(i915, I915_PMU_SOFTWARE_GT_AWAKE_TIME);
 	igt_skip_on(fd < 0);
 
 	/* Check that each engine is captured by the GT wakeref */
 	for_each_ctx_engine(i915, ctx, e) {
-		igt_spin_new(i915, .ctx = ctx, .engine = e->flags);
+		igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, .engine = e->flags);
 
 		val = pmu_read_single(fd);
 		slept = measured_usleep(batch_duration_ns / 1000);
@@ -920,7 +967,7 @@ static void test_awake(int i915, const intel_ctx_t *ctx)
 
 	/* And that the total GT wakeref matches walltime not summation */
 	for_each_ctx_engine(i915, ctx, e)
-		igt_spin_new(i915, .ctx = ctx, .engine = e->flags);
+		igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx, .engine = e->flags);
 
 	val = pmu_read_single(fd);
 	slept = measured_usleep(batch_duration_ns / 1000);
@@ -931,6 +978,7 @@ static void test_awake(int i915, const intel_ctx_t *ctx)
 
 	igt_free_spins(i915);
 	close(fd);
+	put_ahnd(ahnd);
 }
 
 #define   MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
@@ -1147,6 +1195,7 @@ multi_client(int gem_fd, const intel_ctx_t *ctx,
 	uint64_t val[2], ts[2], perf_slept[2];
 	igt_spin_t *spin;
 	int fd[2];
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
 
 	gem_quiescent_gpu(gem_fd);
 
@@ -1159,7 +1208,7 @@ multi_client(int gem_fd, const intel_ctx_t *ctx,
 	 */
 	fd[1] = open_pmu(gem_fd, config);
 
-	spin = spin_sync(gem_fd, ctx, e);
+	spin = spin_sync(gem_fd, ahnd, ctx, e);
 
 	val[0] = val[1] = __pmu_read_single(fd[0], &ts[0]);
 	slept[1] = measured_usleep(batch_duration_ns / 1000);
@@ -1177,6 +1226,7 @@ multi_client(int gem_fd, const intel_ctx_t *ctx,
 	gem_sync(gem_fd, spin->handle);
 	igt_spin_free(gem_fd, spin);
 	close(fd[0]);
+	put_ahnd(ahnd);
 
 	assert_within_epsilon(val[0], perf_slept[0], tolerance);
 	assert_within_epsilon(val[1], perf_slept[1], tolerance);
@@ -1240,6 +1290,7 @@ static void cpu_hotplug(int gem_fd)
 	int fd, ret;
 	int cur = 0;
 	char buf;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
 
 	igt_require(cpu0_hotplug_support());
 
@@ -1250,8 +1301,10 @@ static void cpu_hotplug(int gem_fd)
 	 * Create two spinners so test can ensure shorter gaps in engine
 	 * busyness as it is terminating one and re-starting the other.
 	 */
-	spin[0] = igt_spin_new(gem_fd, .engine = I915_EXEC_DEFAULT);
-	spin[1] = __igt_spin_new(gem_fd, .engine = I915_EXEC_DEFAULT);
+	spin[0] = igt_spin_new(gem_fd, .ahnd = ahnd,
+			       .engine = I915_EXEC_DEFAULT);
+	spin[1] = __igt_spin_new(gem_fd, .ahnd = ahnd,
+				 .engine = I915_EXEC_DEFAULT);
 
 	val = __pmu_read_single(fd, &ts[0]);
 
@@ -1334,7 +1387,7 @@ static void cpu_hotplug(int gem_fd)
 			break;
 
 		igt_spin_free(gem_fd, spin[cur]);
-		spin[cur] = __igt_spin_new(gem_fd,
+		spin[cur] = __igt_spin_new(gem_fd, .ahnd = ahnd,
 					   .engine = I915_EXEC_DEFAULT);
 		cur ^= 1;
 	}
@@ -1348,6 +1401,7 @@ static void cpu_hotplug(int gem_fd)
 	igt_waitchildren();
 	close(fd);
 	close(link[0]);
+	put_ahnd(ahnd);
 
 	/* Skip if child signals a problem with offlining a CPU. */
 	igt_skip_on(buf == 's');
@@ -1372,6 +1426,7 @@ test_interrupts(int gem_fd)
 	uint64_t idle, busy;
 	int fence_fd;
 	int fd;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
 
 	gem_quiescent_gpu(gem_fd);
 
@@ -1380,6 +1435,7 @@ test_interrupts(int gem_fd)
 	/* Queue spinning batches. */
 	for (int i = 0; i < target; i++) {
 		spin[i] = __igt_spin_new(gem_fd,
+					 .ahnd = ahnd,
 					 .engine = I915_EXEC_DEFAULT,
 					 .flags = IGT_SPIN_FENCE_OUT);
 		if (i == 0) {
@@ -1418,6 +1474,7 @@ test_interrupts(int gem_fd)
 	/* Free batches. */
 	for (int i = 0; i < target; i++)
 		igt_spin_free(gem_fd, spin[i]);
+	put_ahnd(ahnd);
 
 	/* Check at least as many interrupts has been generated. */
 	busy = pmu_read_single(fd) - idle;
@@ -1435,6 +1492,7 @@ test_interrupts_sync(int gem_fd)
 	struct pollfd pfd;
 	uint64_t idle, busy;
 	int fd;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
 
 	gem_quiescent_gpu(gem_fd);
 
@@ -1443,6 +1501,7 @@ test_interrupts_sync(int gem_fd)
 	/* Queue spinning batches. */
 	for (int i = 0; i < target; i++)
 		spin[i] = __igt_spin_new(gem_fd,
+					 .ahnd = ahnd,
 					 .flags = IGT_SPIN_FENCE_OUT);
 
 	/* Wait for idle state. */
@@ -1467,6 +1526,7 @@ test_interrupts_sync(int gem_fd)
 	/* Check at least as many interrupts has been generated. */
 	busy = pmu_read_single(fd) - idle;
 	close(fd);
+	put_ahnd(ahnd);
 
 	igt_assert_lte(target, busy);
 }
@@ -1479,6 +1539,7 @@ test_frequency(int gem_fd)
 	double min[2], max[2];
 	igt_spin_t *spin;
 	int fd[2], sysfs;
+	uint64_t ahnd = get_reloc_ahnd(gem_fd, 0);
 
 	sysfs = igt_sysfs_open(gem_fd);
 	igt_require(sysfs >= 0);
@@ -1506,7 +1567,7 @@ test_frequency(int gem_fd)
 	igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq);
 
 	gem_quiescent_gpu(gem_fd); /* Idle to be sure the change takes effect */
-	spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
+	spin = spin_sync_flags(gem_fd, ahnd, 0, I915_EXEC_DEFAULT);
 
 	slept = pmu_read_multi(fd[0], 2, start);
 	measured_usleep(batch_duration_ns / 1000);
@@ -1532,7 +1593,7 @@ test_frequency(int gem_fd)
 	igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq);
 
 	gem_quiescent_gpu(gem_fd);
-	spin = spin_sync_flags(gem_fd, 0, I915_EXEC_DEFAULT);
+	spin = spin_sync_flags(gem_fd, ahnd, 0, I915_EXEC_DEFAULT);
 
 	slept = pmu_read_multi(fd[0], 2, start);
 	measured_usleep(batch_duration_ns / 1000);
@@ -1553,6 +1614,7 @@ test_frequency(int gem_fd)
 			 min_freq, igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz"));
 	close(fd[0]);
 	close(fd[1]);
+	put_ahnd(ahnd);
 
 	igt_info("Min frequency: requested %.1f, actual %.1f\n",
 		 min[0], min[1]);
@@ -1839,9 +1901,13 @@ accuracy(int gem_fd, const intel_ctx_t *ctx,
 		};
 		uint64_t total_busy_ns = 0, total_ns = 0;
 		igt_spin_t *spin;
+		uint64_t ahnd;
+
+		intel_allocator_init();
+		ahnd = get_reloc_ahnd(gem_fd, 0);
 
 		/* Allocate our spin batch and idle it. */
-		spin = igt_spin_new(gem_fd, .ctx = ctx, .engine = e->flags);
+		spin = igt_spin_new(gem_fd, .ahnd = ahnd, .ctx = ctx, .engine = e->flags);
 		igt_spin_end(spin);
 		gem_sync(gem_fd, spin->handle);
 
@@ -1912,6 +1978,7 @@ accuracy(int gem_fd, const intel_ctx_t *ctx,
 		}
 
 		igt_spin_free(gem_fd, spin);
+		put_ahnd(ahnd);
 	}
 
 	fd = open_pmu(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance));
diff --git a/tests/i915/sysfs_heartbeat_interval.c b/tests/i915/sysfs_heartbeat_interval.c
index b70b653b..bc8d1b3c 100644
--- a/tests/i915/sysfs_heartbeat_interval.c
+++ b/tests/i915/sysfs_heartbeat_interval.c
@@ -36,6 +36,7 @@
 #include "i915/gem.h"
 #include "i915/gem_context.h"
 #include "i915/gem_engine_topology.h"
+#include "intel_allocator.h"
 #include "igt_debugfs.h"
 #include "igt_dummyload.h"
 #include "igt_sysfs.h"
@@ -149,6 +150,7 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 	igt_spin_t *spin[2];
 	uint64_t elapsed;
 	const intel_ctx_t *ctx[2];
+	uint64_t ahnd[2];
 
 	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
 	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
@@ -156,15 +158,18 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 	set_heartbeat(engine, timeout);
 
 	ctx[0] = create_ctx(i915, class, inst, 1023);
-	spin[0] = igt_spin_new(i915, .ctx = ctx[0],
+	ahnd[0] = get_reloc_ahnd(i915, ctx[0]->id);
+	spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx[0],
 			       .flags = (IGT_SPIN_NO_PREEMPTION |
 					 IGT_SPIN_POLL_RUN |
 					 IGT_SPIN_FENCE_OUT));
 	igt_spin_busywait_until_started(spin[0]);
 
 	ctx[1] = create_ctx(i915, class, inst, -1023);
+	ahnd[1] = get_reloc_ahnd(i915, ctx[1]->id);
 	igt_nsec_elapsed(&ts);
-	spin[1] = igt_spin_new(i915, .ctx = ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx[1],
+				.flags = IGT_SPIN_POLL_RUN);
 	igt_spin_busywait_until_started(spin[1]);
 	elapsed = igt_nsec_elapsed(&ts);
 
@@ -177,6 +182,8 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 
 	intel_ctx_destroy(i915, ctx[1]);
 	intel_ctx_destroy(i915, ctx[0]);
+	put_ahnd(ahnd[1]);
+	put_ahnd(ahnd[0]);
 	gem_quiescent_gpu(i915);
 
 	return elapsed;
@@ -292,17 +299,19 @@ static void client(int i915, int engine, int *ctl, int duration, int expect)
 	unsigned int class, inst;
 	unsigned long count = 0;
 	const intel_ctx_t *ctx;
+	uint64_t ahnd;
 
 	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
 	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
 
 	ctx = create_ctx(i915, class, inst, 0);
+	ahnd = get_reloc_ahnd(i915, ctx->id);
 
 	while (!READ_ONCE(*ctl)) {
 		unsigned int elapsed;
 		igt_spin_t *spin;
 
-		spin = igt_spin_new(i915, .ctx = ctx,
+		spin = igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx,
 				    .flags = (IGT_SPIN_NO_PREEMPTION |
 					      IGT_SPIN_POLL_RUN |
 					      IGT_SPIN_FENCE_OUT));
@@ -331,6 +340,7 @@ static void client(int i915, int engine, int *ctl, int duration, int expect)
 	}
 
 	intel_ctx_destroy(i915, ctx);
+	put_ahnd(ahnd);
 	igt_info("%s client completed %lu spins\n",
 		 expect < 0 ? "Bad" : "Good", count);
 }
@@ -354,6 +364,8 @@ static void __test_mixed(int i915, int engine,
 	 * terminate the hog leaving the good client to run.
 	 */
 
+	intel_allocator_multiprocess_start();
+
 	igt_assert(igt_sysfs_scanf(engine, ATTR, "%u", &saved) == 1);
 	igt_debug("Initial %s:%u\n", ATTR, saved);
 	gem_quiescent_gpu(i915);
@@ -378,6 +390,7 @@ static void __test_mixed(int i915, int engine,
 
 	gem_quiescent_gpu(i915);
 	set_heartbeat(engine, saved);
+	intel_allocator_multiprocess_stop();
 }
 
 static void test_mixed(int i915, int engine)
@@ -414,6 +427,7 @@ static void test_off(int i915, int engine)
 	unsigned int saved;
 	igt_spin_t *spin;
 	const intel_ctx_t *ctx;
+	uint64_t ahnd;
 
 	/*
 	 * Some other clients request that there is never any interruption
@@ -433,8 +447,9 @@ static void test_off(int i915, int engine)
 	set_heartbeat(engine, 0);
 
 	ctx = create_ctx(i915, class, inst, 0);
+	ahnd = get_reloc_ahnd(i915, ctx->id);
 
-	spin = igt_spin_new(i915, .ctx = ctx,
+	spin = igt_spin_new(i915, .ahnd = ahnd, .ctx = ctx,
 			    .flags = (IGT_SPIN_POLL_RUN |
 				      IGT_SPIN_NO_PREEMPTION |
 				      IGT_SPIN_FENCE_OUT));
@@ -455,6 +470,7 @@ static void test_off(int i915, int engine)
 	gem_quiescent_gpu(i915);
 	set_heartbeat(engine, saved);
 	intel_ctx_destroy(i915, ctx);
+	put_ahnd(ahnd);
 }
 
 igt_main
diff --git a/tests/i915/sysfs_preempt_timeout.c b/tests/i915/sysfs_preempt_timeout.c
index 9f00093e..d176ae72 100644
--- a/tests/i915/sysfs_preempt_timeout.c
+++ b/tests/i915/sysfs_preempt_timeout.c
@@ -37,6 +37,7 @@
 #include "igt_debugfs.h"
 #include "igt_dummyload.h"
 #include "igt_sysfs.h"
+#include "intel_allocator.h"
 #include "sw_sync.h"
 
 #define ATTR "preempt_timeout_ms"
@@ -143,6 +144,7 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 	igt_spin_t *spin[2];
 	uint64_t elapsed;
 	const intel_ctx_t *ctx[2];
+	uint64_t ahnd[2];
 
 	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
 	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
@@ -150,15 +152,18 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 	set_preempt_timeout(engine, timeout);
 
 	ctx[0] = create_ctx(i915, class, inst, -1023);
-	spin[0] = igt_spin_new(i915, .ctx = ctx[0],
+	ahnd[0] = get_reloc_ahnd(i915, ctx[0]->id);
+	spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx[0],
 			       .flags = (IGT_SPIN_NO_PREEMPTION |
 					 IGT_SPIN_POLL_RUN |
 					 IGT_SPIN_FENCE_OUT));
 	igt_spin_busywait_until_started(spin[0]);
 
 	ctx[1] = create_ctx(i915, class, inst, 1023);
+	ahnd[1] = get_reloc_ahnd(i915, ctx[1]->id);
 	igt_nsec_elapsed(&ts);
-	spin[1] = igt_spin_new(i915, .ctx = ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx[1],
+				.flags = IGT_SPIN_POLL_RUN);
 	igt_spin_busywait_until_started(spin[1]);
 	elapsed = igt_nsec_elapsed(&ts);
 
@@ -171,6 +176,8 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 
 	intel_ctx_destroy(i915, ctx[1]);
 	intel_ctx_destroy(i915, ctx[0]);
+	put_ahnd(ahnd[1]);
+	put_ahnd(ahnd[0]);
 	gem_quiescent_gpu(i915);
 
 	return elapsed;
@@ -231,6 +238,7 @@ static void test_off(int i915, int engine)
 	igt_spin_t *spin[2];
 	unsigned int saved;
 	const intel_ctx_t *ctx[2];
+	uint64_t ahnd[2];
 
 	/*
 	 * We support setting the timeout to 0 to disable the reset on
@@ -252,14 +260,17 @@ static void test_off(int i915, int engine)
 	set_preempt_timeout(engine, 0);
 
 	ctx[0] = create_ctx(i915, class, inst, -1023);
-	spin[0] = igt_spin_new(i915, .ctx = ctx[0],
+	ahnd[0] = get_reloc_ahnd(i915, ctx[0]->id);
+	spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx[0],
 			       .flags = (IGT_SPIN_NO_PREEMPTION |
 					 IGT_SPIN_POLL_RUN |
 					 IGT_SPIN_FENCE_OUT));
 	igt_spin_busywait_until_started(spin[0]);
 
 	ctx[1] = create_ctx(i915, class, inst, 1023);
-	spin[1] = igt_spin_new(i915, .ctx = ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	ahnd[1] = get_reloc_ahnd(i915, ctx[1]->id);
+	spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx[1],
+				.flags = IGT_SPIN_POLL_RUN);
 
 	for (int i = 0; i < 150; i++) {
 		igt_assert_eq(sync_fence_status(spin[0]->out_fence), 0);
@@ -278,6 +289,8 @@ static void test_off(int i915, int engine)
 
 	intel_ctx_destroy(i915, ctx[1]);
 	intel_ctx_destroy(i915, ctx[0]);
+	put_ahnd(ahnd[1]);
+	put_ahnd(ahnd[0]);
 
 	igt_assert(enable_hangcheck(i915, true));
 	gem_quiescent_gpu(i915);
diff --git a/tests/i915/sysfs_timeslice_duration.c b/tests/i915/sysfs_timeslice_duration.c
index b73ee388..8a2f1c2f 100644
--- a/tests/i915/sysfs_timeslice_duration.c
+++ b/tests/i915/sysfs_timeslice_duration.c
@@ -37,6 +37,7 @@
 #include "i915/gem_mman.h"
 #include "igt_dummyload.h"
 #include "igt_sysfs.h"
+#include "intel_allocator.h"
 #include "ioctl_wrappers.h"
 #include "intel_chipset.h"
 #include "intel_reg.h"
@@ -371,6 +372,7 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 	igt_spin_t *spin[2];
 	uint64_t elapsed;
 	const intel_ctx_t *ctx[2];
+	uint64_t ahnd[2];
 
 	igt_assert(igt_sysfs_scanf(engine, "class", "%u", &class) == 1);
 	igt_assert(igt_sysfs_scanf(engine, "instance", "%u", &inst) == 1);
@@ -378,15 +380,18 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 	set_timeslice(engine, timeout);
 
 	ctx[0] = create_ctx(i915, class, inst, 0);
-	spin[0] = igt_spin_new(i915, .ctx = ctx[0],
+	ahnd[0] = get_reloc_ahnd(i915, ctx[0]->id);
+	spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx[0],
 			       .flags = (IGT_SPIN_NO_PREEMPTION |
 					 IGT_SPIN_POLL_RUN |
 					 IGT_SPIN_FENCE_OUT));
 	igt_spin_busywait_until_started(spin[0]);
 
 	ctx[1] = create_ctx(i915, class, inst, 0);
+	ahnd[1] = get_reloc_ahnd(i915, ctx[1]->id);
 	igt_nsec_elapsed(&ts);
-	spin[1] = igt_spin_new(i915, .ctx = ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx[1],
+				.flags = IGT_SPIN_POLL_RUN);
 	igt_spin_busywait_until_started(spin[1]);
 	elapsed = igt_nsec_elapsed(&ts);
 
@@ -399,6 +404,8 @@ static uint64_t __test_timeout(int i915, int engine, unsigned int timeout)
 
 	intel_ctx_destroy(i915, ctx[1]);
 	intel_ctx_destroy(i915, ctx[0]);
+	put_ahnd(ahnd[1]);
+	put_ahnd(ahnd[0]);
 	gem_quiescent_gpu(i915);
 
 	return elapsed;
@@ -460,6 +467,7 @@ static void test_off(int i915, int engine)
 	unsigned int saved;
 	igt_spin_t *spin[2];
 	const intel_ctx_t *ctx[2];
+	uint64_t ahnd[2];
 
 	/*
 	 * As always, there are some who must run uninterrupted and simply do
@@ -482,14 +490,17 @@ static void test_off(int i915, int engine)
 	set_timeslice(engine, 0);
 
 	ctx[0] = create_ctx(i915, class, inst, 0);
-	spin[0] = igt_spin_new(i915, .ctx = ctx[0],
+	ahnd[0] = get_reloc_ahnd(i915, ctx[0]->id);
+	spin[0] = igt_spin_new(i915, .ahnd = ahnd[0], .ctx = ctx[0],
 			       .flags = (IGT_SPIN_NO_PREEMPTION |
 					 IGT_SPIN_POLL_RUN |
 					 IGT_SPIN_FENCE_OUT));
 	igt_spin_busywait_until_started(spin[0]);
 
 	ctx[1] = create_ctx(i915, class, inst, 0);
-	spin[1] = igt_spin_new(i915, .ctx = ctx[1], .flags = IGT_SPIN_POLL_RUN);
+	ahnd[1] = get_reloc_ahnd(i915, ctx[1]->id);
+	spin[1] = igt_spin_new(i915, .ahnd = ahnd[1], .ctx = ctx[1],
+				.flags = IGT_SPIN_POLL_RUN);
 
 	for (int i = 0; i < 150; i++) {
 		igt_assert_eq(sync_fence_status(spin[0]->out_fence), 0);
@@ -508,6 +519,8 @@ static void test_off(int i915, int engine)
 
 	intel_ctx_destroy(i915, ctx[1]);
 	intel_ctx_destroy(i915, ctx[0]);
+	put_ahnd(ahnd[1]);
+	put_ahnd(ahnd[0]);
 
 	igt_assert(enable_hangcheck(i915, true));
 	gem_quiescent_gpu(i915);
diff --git a/tests/kms_busy.c b/tests/kms_busy.c
index a60ff6b0..6cbdfa66 100644
--- a/tests/kms_busy.c
+++ b/tests/kms_busy.c
@@ -78,11 +78,13 @@ static void flip_to_fb(igt_display_t *dpy, int pipe,
 	struct pollfd pfd = { .fd = dpy->drm_fd, .events = POLLIN };
 	struct drm_event_vblank ev;
 	IGT_CORK_FENCE(cork);
+	uint64_t ahnd = get_reloc_ahnd(dpy->drm_fd, 0);
 	igt_spin_t *t;
 	int fence;
 
 	fence = igt_cork_plug(&cork, dpy->drm_fd);
 	t = igt_spin_new(dpy->drm_fd,
+			 .ahnd = ahnd,
 			 .fence = fence,
 			 .dependency = fb->gem_handle,
 			 .flags = IGT_SPIN_FENCE_IN);
@@ -130,6 +132,7 @@ static void flip_to_fb(igt_display_t *dpy, int pipe,
 	}
 
 	igt_spin_free(dpy->drm_fd, t);
+	put_ahnd(ahnd);
 }
 
 static void test_flip(igt_display_t *dpy, int pipe, bool modeset)
@@ -183,7 +186,9 @@ static void test_flip(igt_display_t *dpy, int pipe, bool modeset)
 static void test_atomic_commit_hang(igt_display_t *dpy, igt_plane_t *primary,
 				    struct igt_fb *busy_fb)
 {
+	uint64_t ahnd = get_reloc_ahnd(dpy->drm_fd, 0);
 	igt_spin_t *t = igt_spin_new(dpy->drm_fd,
+				     .ahnd = ahnd,
 				     .dependency = busy_fb->gem_handle,
 				     .flags = IGT_SPIN_NO_PREEMPTION);
 	struct pollfd pfd = { .fd = dpy->drm_fd, .events = POLLIN };
@@ -214,6 +219,7 @@ static void test_atomic_commit_hang(igt_display_t *dpy, igt_plane_t *primary,
 	igt_assert(read(dpy->drm_fd, &ev, sizeof(ev)) == sizeof(ev));
 
 	igt_spin_end(t);
+	put_ahnd(ahnd);
 }
 
 static void test_hang(igt_display_t *dpy,
@@ -265,6 +271,7 @@ static void test_pageflip_modeset_hang(igt_display_t *dpy, enum pipe pipe)
 	igt_output_t *output;
 	igt_plane_t *primary;
 	igt_spin_t *t;
+	uint64_t ahnd = get_reloc_ahnd(dpy->drm_fd, 0);
 
 	output = set_fb_on_crtc(dpy, pipe, &fb);
 	primary = igt_output_get_plane_type(output, DRM_PLANE_TYPE_PRIMARY);
@@ -272,6 +279,7 @@ static void test_pageflip_modeset_hang(igt_display_t *dpy, enum pipe pipe)
 	igt_display_commit2(dpy, dpy->is_atomic ? COMMIT_ATOMIC : COMMIT_LEGACY);
 
 	t = igt_spin_new(dpy->drm_fd,
+			 .ahnd = ahnd,
 			 .dependency = fb.gem_handle,
 			 .flags = IGT_SPIN_NO_PREEMPTION);
 
@@ -285,6 +293,7 @@ static void test_pageflip_modeset_hang(igt_display_t *dpy, enum pipe pipe)
 	igt_assert(read(dpy->drm_fd, &ev, sizeof(ev)) == sizeof(ev));
 
 	igt_spin_end(t);
+	put_ahnd(ahnd);
 
 	igt_remove_fb(dpy->drm_fd, &fb);
 }
diff --git a/tests/kms_cursor_legacy.c b/tests/kms_cursor_legacy.c
index 75a822c4..4f96c880 100644
--- a/tests/kms_cursor_legacy.c
+++ b/tests/kms_cursor_legacy.c
@@ -517,6 +517,7 @@ static void basic_flip_cursor(igt_display_t *display,
 	struct igt_fb fb_info, cursor_fb, cursor_fb2, argb_fb;
 	unsigned vblank_start;
 	enum pipe pipe = find_connected_pipe(display, false);
+	uint64_t ahnd = get_reloc_ahnd(display->drm_fd, 0);
 	igt_spin_t *spin;
 	int i, miss1 = 0, miss2 = 0, delta;
 
@@ -548,6 +549,7 @@ static void basic_flip_cursor(igt_display_t *display,
 		spin = NULL;
 		if (flags & BASIC_BUSY)
 			spin = igt_spin_new(display->drm_fd,
+					    .ahnd = ahnd,
 					    .dependency = fb_info.gem_handle);
 
 		/* Start with a synchronous query to align with the vblank */
@@ -631,6 +633,7 @@ static void basic_flip_cursor(igt_display_t *display,
 		igt_remove_fb(display->drm_fd, &argb_fb);
 	if (cursor_fb2.gem_handle)
 		igt_remove_fb(display->drm_fd, &cursor_fb2);
+	put_ahnd(ahnd);
 }
 
 static int
@@ -1319,6 +1322,7 @@ static void flip_vs_cursor_busy_crc(igt_display_t *display, bool atomic)
 	igt_pipe_t *pipe_connected = &display->pipes[pipe];
 	igt_plane_t *plane_primary = igt_pipe_get_plane_type(pipe_connected, DRM_PLANE_TYPE_PRIMARY);
 	igt_crc_t crcs[2], test_crc;
+	uint64_t ahnd = get_reloc_ahnd(display->drm_fd, 0);
 
 	if (atomic)
 		igt_require(display->is_atomic);
@@ -1366,6 +1370,7 @@ static void flip_vs_cursor_busy_crc(igt_display_t *display, bool atomic)
 		igt_spin_t *spin;
 
 		spin = igt_spin_new(display->drm_fd,
+				    .ahnd = ahnd,
 				    .dependency = fb_info[1].gem_handle);
 
 		vblank_start = kmstest_get_vblank(display->drm_fd, pipe, DRM_VBLANK_NEXTONMISS);
@@ -1394,6 +1399,7 @@ static void flip_vs_cursor_busy_crc(igt_display_t *display, bool atomic)
 	igt_remove_fb(display->drm_fd, &fb_info[1]);
 	igt_remove_fb(display->drm_fd, &fb_info[0]);
 	igt_remove_fb(display->drm_fd, &cursor_fb);
+	put_ahnd(ahnd);
 }
 
 igt_main
diff --git a/tests/kms_flip.c b/tests/kms_flip.c
index f2fce8d2..e87d571a 100755
--- a/tests/kms_flip.c
+++ b/tests/kms_flip.c
@@ -616,9 +616,9 @@ static void recreate_fb(struct test_output *o)
 	o->fb_info[o->current_fb_id].fb_id = new_fb_id;
 }
 
-static igt_hang_t hang_gpu(int fd)
+static igt_hang_t hang_gpu(int fd, uint64_t ahnd)
 {
-	return igt_hang_ring(fd, I915_EXEC_DEFAULT);
+	return igt_hang_ring_with_ahnd(fd, I915_EXEC_DEFAULT, ahnd);
 }
 
 static void unhang_gpu(int fd, igt_hang_t hang)
@@ -675,6 +675,7 @@ static bool run_test_step(struct test_output *o, unsigned int *events)
 	struct vblank_reply vbl_reply;
 	unsigned int target_seq;
 	igt_hang_t hang;
+	uint64_t ahnd = 0;
 
 	target_seq = o->vblank_state.seq_step;
 	/* Absolute waits only works once we have a frame counter. */
@@ -776,8 +777,11 @@ static bool run_test_step(struct test_output *o, unsigned int *events)
 	igt_print_activity();
 
 	memset(&hang, 0, sizeof(hang));
-	if (do_flip && (o->flags & TEST_HANG))
-		hang = hang_gpu(drm_fd);
+	if (do_flip && (o->flags & TEST_HANG)) {
+		if (is_i915_device(drm_fd))
+			ahnd = get_reloc_ahnd(drm_fd, 0);
+		hang = hang_gpu(drm_fd, ahnd);
+	}
 
 	/* try to make sure we can issue two flips during the same frame */
 	if (do_flip && (o->flags & TEST_EBUSY)) {
@@ -847,6 +851,8 @@ static bool run_test_step(struct test_output *o, unsigned int *events)
 		igt_assert(do_page_flip(o, new_fb_id, false) == expected_einval);
 
 	unhang_gpu(drm_fd, hang);
+	if (is_i915_device(drm_fd))
+		put_ahnd(ahnd);
 
 	*events = completed_events;
 
diff --git a/tests/kms_vblank.c b/tests/kms_vblank.c
index 83d5629f..4597f982 100644
--- a/tests/kms_vblank.c
+++ b/tests/kms_vblank.c
@@ -118,6 +118,7 @@ static void run_test(data_t *data, void (*testfunc)(data_t *, int, int))
 	igt_output_t *output = data->output;
 	int fd = display->drm_fd;
 	igt_hang_t hang;
+	uint64_t ahnd = 0;
 
 	prepare_crtc(data, fd, output);
 
@@ -128,8 +129,11 @@ static void run_test(data_t *data, void (*testfunc)(data_t *, int, int))
 		 igt_subtest_name(), kmstest_pipe_name(data->pipe),
 		 igt_output_name(output));
 
-	if (!(data->flags & NOHANG))
-		hang = igt_hang_ring(fd, I915_EXEC_DEFAULT);
+	if (!(data->flags & NOHANG)) {
+		if (is_i915_device(fd))
+			ahnd = get_reloc_ahnd(fd, 0);
+		hang = igt_hang_ring_with_ahnd(fd, I915_EXEC_DEFAULT, ahnd);
+	}
 
 	if (data->flags & BUSY) {
 		union drm_wait_vblank vbl;
@@ -166,6 +170,9 @@ static void run_test(data_t *data, void (*testfunc)(data_t *, int, int))
 	igt_info("\n%s on pipe %s, connector %s: PASSED\n\n",
 		 igt_subtest_name(), kmstest_pipe_name(data->pipe), igt_output_name(output));
 
+	if (is_i915_device(fd))
+		put_ahnd(ahnd);
+
 	/* cleanup what prepare_crtc() has done */
 	cleanup_crtc(data, fd, output);
 }
diff --git a/tests/prime_vgem.c b/tests/prime_vgem.c
index 25c5f42f..b837f2bf 100644
--- a/tests/prime_vgem.c
+++ b/tests/prime_vgem.c
@@ -207,10 +207,14 @@ static void test_fence_blt(int i915, int vgem)
 
 	igt_fork(child, 1) {
 		uint32_t native;
+		uint64_t ahnd;
 
 		close(master[0]);
 		close(slave[1]);
 
+		intel_allocator_init();
+		ahnd = get_reloc_ahnd(i915, 0);
+
 		native = gem_create(i915, scratch.size);
 
 		ptr = gem_mmap__wc(i915, native, 0, scratch.size, PROT_READ);
@@ -221,10 +225,11 @@ static void test_fence_blt(int i915, int vgem)
 		write(master[1], &child, sizeof(child));
 		read(slave[0], &child, sizeof(child));
 
-		igt_blitter_src_copy(i915, prime, 0, scratch.pitch,
-				     I915_TILING_NONE, 0, 0, scratch.width,
-				     scratch.height, scratch.bpp, native, 0,
-				     scratch.pitch, I915_TILING_NONE, 0, 0);
+		igt_blitter_src_copy(i915, ahnd, 0, prime, 0, scratch.pitch,
+				     I915_TILING_NONE, 0, 0, scratch.size,
+				     scratch.width, scratch.height, scratch.bpp,
+				     native, 0, scratch.pitch,
+				     I915_TILING_NONE, 0, 0, scratch.size);
 		gem_sync(i915, native);
 
 		for (i = 0; i < scratch.height; i++)
@@ -234,6 +239,7 @@ static void test_fence_blt(int i915, int vgem)
 		munmap(ptr, scratch.size);
 		gem_close(i915, native);
 		gem_close(i915, prime);
+		put_ahnd(ahnd);
 	}
 
 	close(master[1]);
@@ -375,6 +381,7 @@ static void test_blt(int vgem, int i915)
 	uint32_t prime, native;
 	uint32_t *ptr;
 	int dmabuf, i;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0);
 
 	scratch.width = 1024;
 	scratch.height = 1024;
@@ -391,9 +398,11 @@ static void test_blt(int vgem, int i915)
 		ptr[scratch.pitch * i / sizeof(*ptr)] = i;
 	munmap(ptr, scratch.size);
 
-	igt_blitter_src_copy(i915, native, 0, scratch.pitch, I915_TILING_NONE,
-			     0, 0, scratch.width, scratch.height, scratch.bpp,
-			     prime, 0, scratch.pitch, I915_TILING_NONE, 0, 0);
+	igt_blitter_src_copy(i915, ahnd, 0, native, 0, scratch.pitch,
+			     I915_TILING_NONE, 0, 0, scratch.size,
+			     scratch.width, scratch.height, scratch.bpp,
+			     prime, 0, scratch.pitch, I915_TILING_NONE, 0, 0,
+			     scratch.size);
 	prime_sync_start(dmabuf, true);
 	prime_sync_end(dmabuf, true);
 	close(dmabuf);
@@ -405,9 +414,11 @@ static void test_blt(int vgem, int i915)
 	}
 	munmap(ptr, scratch.size);
 
-	igt_blitter_src_copy(i915, prime, 0, scratch.pitch, I915_TILING_NONE,
-			     0, 0, scratch.width, scratch.height, scratch.bpp,
-			     native, 0, scratch.pitch, I915_TILING_NONE, 0, 0);
+	igt_blitter_src_copy(i915, ahnd, 0, prime, 0, scratch.pitch,
+			     I915_TILING_NONE, 0, 0, scratch.size,
+			     scratch.width, scratch.height, scratch.bpp,
+			     native, 0, scratch.pitch, I915_TILING_NONE, 0, 0,
+			     scratch.size);
 	gem_sync(i915, native);
 
 	ptr = gem_mmap__wc(i915, native, 0, scratch.size, PROT_READ);
@@ -418,6 +429,7 @@ static void test_blt(int vgem, int i915)
 	gem_close(i915, native);
 	gem_close(i915, prime);
 	gem_close(vgem, scratch.handle);
+	put_ahnd(ahnd);
 }
 
 static void test_shrink(int vgem, int i915)
@@ -509,6 +521,7 @@ static void test_blt_interleaved(int vgem, int i915)
 	uint32_t prime, native;
 	uint32_t *foreign, *local;
 	int dmabuf, i;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0);
 
 	scratch.width = 1024;
 	scratch.height = 1024;
@@ -525,20 +538,22 @@ static void test_blt_interleaved(int vgem, int i915)
 
 	for (i = 0; i < scratch.height; i++) {
 		local[scratch.pitch * i / sizeof(*local)] = i;
-		igt_blitter_src_copy(i915, native, 0, scratch.pitch,
-				     I915_TILING_NONE, 0, i, scratch.width, 1,
+		igt_blitter_src_copy(i915, ahnd, 0, native, 0, scratch.pitch,
+				     I915_TILING_NONE, 0, i, scratch.size,
+				     scratch.width, 1,
 				     scratch.bpp, prime, 0, scratch.pitch,
-				     I915_TILING_NONE, 0, i);
+				     I915_TILING_NONE, 0, i, scratch.size);
 		prime_sync_start(dmabuf, true);
 		igt_assert_eq_u32(foreign[scratch.pitch * i / sizeof(*foreign)],
 				  i);
 		prime_sync_end(dmabuf, true);
 
 		foreign[scratch.pitch * i / sizeof(*foreign)] = ~i;
-		igt_blitter_src_copy(i915, prime, 0, scratch.pitch,
-				     I915_TILING_NONE, 0, i, scratch.width, 1,
+		igt_blitter_src_copy(i915, ahnd, 0, prime, 0, scratch.pitch,
+				     I915_TILING_NONE, 0, i, scratch.size,
+				     scratch.width, 1,
 				     scratch.bpp, native, 0, scratch.pitch,
-				     I915_TILING_NONE, 0, i);
+				     I915_TILING_NONE, 0, i, scratch.size);
 		gem_sync(i915, native);
 		igt_assert_eq_u32(local[scratch.pitch * i / sizeof(*local)],
 				  ~i);
@@ -551,6 +566,7 @@ static void test_blt_interleaved(int vgem, int i915)
 	gem_close(i915, native);
 	gem_close(i915, prime);
 	gem_close(vgem, scratch.handle);
+	put_ahnd(ahnd);
 }
 
 static bool prime_busy(int fd, bool excl)
@@ -559,7 +575,8 @@ static bool prime_busy(int fd, bool excl)
 	return poll(&pfd, 1, 0) == 0;
 }
 
-static void work(int i915, int dmabuf, const intel_ctx_t *ctx, unsigned ring)
+static void work(int i915, uint64_t ahnd, uint64_t scratch_offset, int dmabuf,
+		 const intel_ctx_t *ctx, unsigned ring)
 {
 	const int SCRATCH = 0;
 	const int BATCH = 1;
@@ -584,10 +601,17 @@ static void work(int i915, int dmabuf, const intel_ctx_t *ctx, unsigned ring)
 	obj[SCRATCH].handle = prime_fd_to_handle(i915, dmabuf);
 
 	obj[BATCH].handle = gem_create(i915, size);
+	obj[BATCH].offset = get_offset(ahnd, obj[BATCH].handle, size, 0);
 	obj[BATCH].relocs_ptr = (uintptr_t)store;
-	obj[BATCH].relocation_count = ARRAY_SIZE(store);
+	obj[BATCH].relocation_count = !ahnd ? ARRAY_SIZE(store) : 0;
 	memset(store, 0, sizeof(store));
 
+	if (ahnd) {
+		obj[SCRATCH].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+		obj[SCRATCH].offset = scratch_offset;
+		obj[BATCH].flags = EXEC_OBJECT_PINNED;
+	}
+
 	batch = gem_mmap__wc(i915, obj[BATCH].handle, 0, size, PROT_WRITE);
 	gem_set_domain(i915, obj[BATCH].handle,
 		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
@@ -602,8 +626,8 @@ static void work(int i915, int dmabuf, const intel_ctx_t *ctx, unsigned ring)
 		store[count].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
 		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
 		if (gen >= 8) {
-			batch[++i] = 0;
-			batch[++i] = 0;
+			batch[++i] = scratch_offset + store[count].delta;
+			batch[++i] = (scratch_offset + store[count].delta) >> 32;
 		} else if (gen >= 4) {
 			batch[++i] = 0;
 			batch[++i] = 0;
@@ -626,8 +650,8 @@ static void work(int i915, int dmabuf, const intel_ctx_t *ctx, unsigned ring)
 	batch[i] = MI_BATCH_BUFFER_START;
 	if (gen >= 8) {
 		batch[i] |= 1 << 8 | 1;
-		batch[++i] = 0;
-		batch[++i] = 0;
+		batch[++i] = obj[BATCH].offset;
+		batch[++i] = obj[BATCH].offset >> 32;
 	} else if (gen >= 6) {
 		batch[i] |= 1 << 8;
 		batch[++i] = 0;
@@ -662,14 +686,18 @@ static void test_busy(int i915, int vgem, const intel_ctx_t *ctx, unsigned ring)
 	uint32_t *ptr;
 	int dmabuf;
 	int i;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id), scratch_offset;
 
 	scratch.width = 1024;
 	scratch.height = 1;
 	scratch.bpp = 32;
 	vgem_create(vgem, &scratch);
+	scratch_offset = get_offset(ahnd, scratch.handle, scratch.size, 0);
 	dmabuf = prime_handle_to_fd(vgem, scratch.handle);
 
-	work(i915, dmabuf, ctx, ring);
+	work(i915, ahnd, scratch_offset, dmabuf, ctx, ring);
+
+	put_ahnd(ahnd);
 
 	/* Calling busy in a loop should be enough to flush the rendering */
 	memset(&tv, 0, sizeof(tv));
@@ -691,14 +719,18 @@ static void test_wait(int i915, int vgem, const intel_ctx_t *ctx, unsigned ring)
 	struct pollfd pfd;
 	uint32_t *ptr;
 	int i;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id), scratch_offset;
 
 	scratch.width = 1024;
 	scratch.height = 1;
 	scratch.bpp = 32;
 	vgem_create(vgem, &scratch);
+	scratch_offset = get_offset(ahnd, scratch.handle, scratch.size, 0);
 	pfd.fd = prime_handle_to_fd(vgem, scratch.handle);
 
-	work(i915, pfd.fd, ctx, ring);
+	work(i915, ahnd, scratch_offset, pfd.fd, ctx, ring);
+
+	put_ahnd(ahnd);
 
 	pfd.events = POLLIN;
 	igt_assert_eq(poll(&pfd, 1, 10000), 1);
@@ -718,18 +750,22 @@ static void test_sync(int i915, int vgem, const intel_ctx_t *ctx, unsigned ring)
 	uint32_t *ptr;
 	int dmabuf;
 	int i;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id), scratch_offset;
 
 	scratch.width = 1024;
 	scratch.height = 1;
 	scratch.bpp = 32;
 	vgem_create(vgem, &scratch);
+	scratch_offset = get_offset(ahnd, scratch.handle, scratch.size, 0);
 	dmabuf = prime_handle_to_fd(vgem, scratch.handle);
 
 	ptr = mmap(NULL, scratch.size, PROT_READ, MAP_SHARED, dmabuf, 0);
 	igt_assert(ptr != MAP_FAILED);
 	gem_close(vgem, scratch.handle);
 
-	work(i915, dmabuf, ctx, ring);
+	work(i915, ahnd, scratch_offset, dmabuf, ctx, ring);
+
+	put_ahnd(ahnd);
 
 	prime_sync_start(dmabuf, false);
 	for (i = 0; i < 1024; i++)
@@ -746,12 +782,13 @@ static void test_fence_wait(int i915, int vgem, const intel_ctx_t *ctx, unsigned
 	uint32_t fence;
 	uint32_t *ptr;
 	int dmabuf;
+	uint64_t ahnd = get_reloc_ahnd(i915, ctx->id), scratch_offset;
 
 	scratch.width = 1024;
 	scratch.height = 1;
 	scratch.bpp = 32;
 	vgem_create(vgem, &scratch);
-
+	scratch_offset = get_offset(ahnd, scratch.handle, scratch.size, 0);
 	dmabuf = prime_handle_to_fd(vgem, scratch.handle);
 	fence = vgem_fence_attach(vgem, &scratch, VGEM_FENCE_WRITE);
 	igt_assert(prime_busy(dmabuf, false));
@@ -760,10 +797,14 @@ static void test_fence_wait(int i915, int vgem, const intel_ctx_t *ctx, unsigned
 	ptr = mmap(NULL, scratch.size, PROT_READ, MAP_SHARED, dmabuf, 0);
 	igt_assert(ptr != MAP_FAILED);
 
-	igt_fork(child, 1)
-		work(i915, dmabuf, ctx, ring);
+	igt_fork(child, 1) {
+		ahnd = get_reloc_ahnd(i915, ctx->id);
+		work(i915, ahnd, scratch_offset, dmabuf, ctx, ring);
+		put_ahnd(ahnd);
+	}
 
 	sleep(1);
+	put_ahnd(ahnd);
 
 	/* Check for invalidly completing the task early */
 	for (int i = 0; i < 1024; i++)
@@ -789,11 +830,13 @@ static void test_fence_hang(int i915, int vgem, unsigned flags)
 	uint32_t *ptr;
 	int dmabuf;
 	int i;
+	uint64_t ahnd = get_reloc_ahnd(i915, 0), scratch_offset;
 
 	scratch.width = 1024;
 	scratch.height = 1;
 	scratch.bpp = 32;
 	vgem_create(vgem, &scratch);
+	scratch_offset = get_offset(ahnd, scratch.handle, scratch.size, 0);
 	dmabuf = prime_handle_to_fd(vgem, scratch.handle);
 	vgem_fence_attach(vgem, &scratch, flags | WIP_VGEM_FENCE_NOTIMEOUT);
 
@@ -801,7 +844,9 @@ static void test_fence_hang(int i915, int vgem, unsigned flags)
 	igt_assert(ptr != MAP_FAILED);
 	gem_close(vgem, scratch.handle);
 
-	work(i915, dmabuf, intel_ctx_0(i915), 0);
+	work(i915, ahnd, scratch_offset, dmabuf, intel_ctx_0(i915), 0);
+
+	put_ahnd(ahnd);
 
 	/* The work should have been cancelled */
 
@@ -1146,8 +1191,6 @@ igt_main
 		igt_subtest("basic-fence-blt")
 			test_fence_blt(i915, vgem);
 
-		test_each_engine("fence-wait", vgem, i915, test_fence_wait);
-
 		igt_subtest("basic-fence-flip")
 			test_flip(i915, vgem, 0);
 
@@ -1166,6 +1209,21 @@ igt_main
 		}
 	}
 
+	/* Fence testing, requires multiprocess allocator */
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(vgem_has_fences(vgem));
+			intel_allocator_multiprocess_start();
+		}
+
+		test_each_engine("fence-wait", vgem, i915, test_fence_wait);
+
+		igt_fixture {
+			intel_allocator_multiprocess_stop();
+		}
+	}
+
+
 	igt_fixture {
 		close(i915);
 		close(vgem);
-- 
2.26.3