[igt-dev] [PATCH i-g-t v16 1/4] lib/intel_batchbuffer: Extend intel_bb

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Fri Jul 3 11:30:59 UTC 2020


As we're going toward removing libdrm from rendercopy tests some
additional code in intel_bb is required. So add new functions
and fix memory issues in the reset path.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
 lib/intel_batchbuffer.c | 516 ++++++++++++++++++++++++++++++++++++----
 lib/intel_batchbuffer.h |  80 ++++++-
 2 files changed, 541 insertions(+), 55 deletions(-)

diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 2a882627..9fc75dbe 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include <inttypes.h>
+#include <poll.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -42,6 +43,7 @@
 #include "rendercopy.h"
 #include "media_fill.h"
 #include "ioctl_wrappers.h"
+#include "sw_sync.h"
 #include "i915/gem_mman.h"
 #include "media_spin.h"
 #include "gpgpu_fill.h"
@@ -1045,8 +1047,6 @@ void igt_blitter_fast_copy(struct intel_batchbuffer *batch,
 	intel_batchbuffer_flush(batch);
 }
 
-#undef CHECK_RANGE
-
 /**
  * igt_get_render_copyfunc:
  * @devid: pci device id
@@ -1202,6 +1202,21 @@ static void __reallocate_objects(struct intel_bb *ibb)
 	}
 }
 
+static inline uint64_t __intel_bb_propose_offset(struct intel_bb *ibb)
+{
+	uint64_t offset;
+
+	if (ibb->enforce_relocs)
+		return 0;
+
+	/* randomize the address, we try to avoid relocations */
+	offset = hars_petruska_f54_1_random64(&ibb->prng);
+	offset &= (ibb->gtt_size - 1);
+	offset &= ~(4096 - 1);
+
+	return offset;
+}
+
 /**
  * intel_bb_create:
  * @i915: drm fd
@@ -1211,22 +1226,26 @@ static void __reallocate_objects(struct intel_bb *ibb)
  *
  * Pointer the intel_bb, asserts on failure.
  */
-struct intel_bb *intel_bb_create(int i915, uint32_t size)
+static struct intel_bb *
+__intel_bb_create(int i915, uint32_t size, bool do_relocs)
 {
 	struct intel_bb *ibb = calloc(1, sizeof(*ibb));
 	uint64_t gtt_size;
+	uint64_t bb_address;
 
 	igt_assert(ibb);
 
 	ibb->i915 = i915;
 	ibb->devid = intel_get_drm_devid(i915);
 	ibb->gen = intel_gen(ibb->devid);
+	ibb->enforce_relocs = do_relocs;
 	ibb->handle = gem_create(i915, size);
 	ibb->size = size;
 	ibb->batch = calloc(1, size);
 	igt_assert(ibb->batch);
 	ibb->ptr = ibb->batch;
 	ibb->prng = (uint32_t) to_user_pointer(ibb);
+	ibb->fence = -1;
 
 	gtt_size = gem_aperture_size(i915);
 	if (!gem_uses_full_ppgtt(i915))
@@ -1236,11 +1255,45 @@ struct intel_bb *intel_bb_create(int i915, uint32_t size)
 	ibb->gtt_size = gtt_size;
 
 	__reallocate_objects(ibb);
-	intel_bb_add_object(ibb, ibb->handle, 0, false);
+	bb_address = __intel_bb_propose_offset(ibb);
+	intel_bb_add_object(ibb, ibb->handle, bb_address, false);
+
+	ibb->refcount = 1;
 
 	return ibb;
 }
 
+/**
+ * intel_bb_create:
+ * @i915: drm fd
+ * @size: size of the batchbuffer
+ *
+ * Returns:
+ *
+ * Pointer the intel_bb, asserts on failure.
+ */
+struct intel_bb *intel_bb_create(int i915, uint32_t size)
+{
+	return __intel_bb_create(i915, size, false);
+}
+
+/**
+ * intel_bb_create_with_relocs:
+ * @i915: drm fd
+ * @size: size of the batchbuffer
+ *
+ * Disable passing or randomizing addresses. This will lead to relocations
+ * when objects are not previously pinned.
+ *
+ * Returns:
+ *
+ * Pointer the intel_bb, asserts on failure.
+ */
+struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size)
+{
+	return __intel_bb_create(i915, size, true);
+}
+
 /*
  * tdestroy() calls free function for each node, but we spread tree
  * on objects array, so do nothing.
@@ -1250,24 +1303,32 @@ static void __do_nothing(void *node)
 	(void) node;
 }
 
-static void __intel_bb_destroy_objects(struct intel_bb *ibb)
+static void __intel_bb_destroy_relocations(struct intel_bb *ibb)
 {
 	uint32_t i;
 
 	/* Free relocations */
-	for (i = 0; i < ibb->num_objects; i++)
+	for (i = 0; i < ibb->num_objects; i++) {
 		free(from_user_pointer(ibb->objects[i].relocs_ptr));
+		ibb->objects[i].relocs_ptr = to_user_pointer(NULL);
+		ibb->objects[i].relocation_count = 0;
+	}
 
-	free(ibb->objects);
-	tdestroy(ibb->root, __do_nothing);
+	ibb->relocs = NULL;
+	ibb->num_relocs = 0;
+	ibb->allocated_relocs = 0;
+}
 
+static void __intel_bb_destroy_objects(struct intel_bb *ibb)
+{
+	free(ibb->objects);
 	ibb->objects = NULL;
+
+	tdestroy(ibb->root, __do_nothing);
 	ibb->root = NULL;
+
 	ibb->num_objects = 0;
-	ibb->num_relocs = 0;
 	ibb->allocated_objects = 0;
-	ibb->allocated_relocs = 0;
-	ibb->ptr = ibb->batch;
 }
 
 /**
@@ -1280,9 +1341,16 @@ void intel_bb_destroy(struct intel_bb *ibb)
 {
 	igt_assert(ibb);
 
+	ibb->refcount--;
+	igt_assert_f(ibb->refcount == 0, "Trying to destroy referenced bb!");
+
+	__intel_bb_destroy_relocations(ibb);
 	__intel_bb_destroy_objects(ibb);
 	gem_close(ibb->i915, ibb->handle);
 
+	if (ibb->fence >= 0)
+		close(ibb->fence);
+
 	free(ibb);
 }
 
@@ -1291,10 +1359,21 @@ void intel_bb_destroy(struct intel_bb *ibb)
  * @ibb: pointer to intel_bb
  * @purge_objects_cache: if true destroy internal execobj and relocs + cache
  *
- * Recreate batch bo.
+ * Recreate batch bo when there's no additional reference.
 */
 void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
 {
+	uint64_t bb_address;
+
+	if (purge_objects_cache && ibb->refcount > 1)
+		igt_warn("Cannot purge objects cache on bb, refcount > 1!");
+
+	/* Someone keeps reference, just exit */
+	if (ibb->refcount > 1)
+		return;
+
+	__intel_bb_destroy_relocations(ibb);
+
 	if (purge_objects_cache) {
 		__intel_bb_destroy_objects(ibb);
 		__reallocate_objects(ibb);
@@ -1303,8 +1382,75 @@ void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
 	gem_close(ibb->i915, ibb->handle);
 	ibb->handle = gem_create(ibb->i915, ibb->size);
 
-	intel_bb_add_object(ibb, ibb->handle, 0, false);
+	bb_address = __intel_bb_propose_offset(ibb);
+	intel_bb_add_object(ibb, ibb->handle, bb_address, false);
 	ibb->ptr = ibb->batch;
+	memset(ibb->batch, 0, ibb->size);
+}
+
+/*
+ * intel_bb_sync:
+ * @ibb: pointer to intel_bb
+ *
+ * Waits for bb completion. Returns 0 on success, otherwise errno.
+ */
+int intel_bb_sync(struct intel_bb *ibb)
+{
+	int ret;
+
+	if (ibb->fence < 0)
+		return 0;
+
+	ret = sync_fence_wait(ibb->fence, -1);
+	if (ret == 0) {
+		close(ibb->fence);
+		ibb->fence = -1;
+	}
+
+	return ret;
+}
+
+/*
+ * intel_bb_print:
+ * @ibb: pointer to intel_bb
+ *
+ * Prints batch to stdout.
+ */
+void intel_bb_print(struct intel_bb *ibb)
+{
+	igt_info("drm fd: %d, gen: %d, devid: %u, debug: %d\n",
+		 ibb->i915, ibb->gen, ibb->devid, ibb->debug);
+	igt_info("handle: %u, size: %u, batch: %p, ptr: %p\n",
+		 ibb->handle, ibb->size, ibb->batch, ibb->ptr);
+	igt_info("prng: %u, gtt_size: %" PRIu64 ", supports 48bit: %d\n",
+		 ibb->prng, ibb->gtt_size, ibb->supports_48b_address);
+	igt_info("ctx: %u\n", ibb->ctx);
+	igt_info("root: %p\n", ibb->root);
+	igt_info("objects: %p, num_objects: %u, allocated obj: %u\n",
+		 ibb->objects, ibb->num_objects, ibb->allocated_objects);
+	igt_info("relocs: %p, num_relocs: %u, allocated_relocs: %u\n----\n",
+		 ibb->relocs, ibb->num_relocs, ibb->allocated_relocs);
+}
+
+/*
+ * intel_bb_dump:
+ * @ibb: pointer to intel_bb
+ * @filename: name to which write bb
+ *
+ * Dump batch bo to file.
+ */
+void intel_bb_dump(struct intel_bb *ibb, const char *filename)
+{
+	FILE *out;
+	void *ptr;
+
+	ptr = gem_mmap__device_coherent(ibb->i915, ibb->handle, 0, ibb->size,
+					PROT_READ);
+	out = fopen(filename, "wb");
+	igt_assert(out);
+	fwrite(ptr, ibb->size, 1, out);
+	fclose(out);
+	munmap(ptr, ibb->size);
 }
 
 /**
@@ -1331,8 +1477,7 @@ static int __compare_objects(const void *p1, const void *p2)
  * intel_bb_add_object:
  * @ibb: pointer to intel_bb
  * @handle: which handle to add to objects array
- * @offset: presumed offset of the object when I915_EXEC_NO_RELOC flag is
- * used in execbuf call
+ * @offset: presumed offset of the object when no relocation is enforced
  * @write: does a handle is a render target
  *
  * Function adds or updates execobj slot in bb objects array and
@@ -1352,6 +1497,7 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
 	i = ibb->num_objects;
 	object = &ibb->objects[i];
 	object->handle = handle;
+	object->offset = offset;
 
 	found = tsearch((void *) object, &ibb->root, __compare_objects);
 
@@ -1360,18 +1506,8 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
 	else
 		object = *found;
 
-	/* Assign address once */
-	if (object->offset == 0) {
-		if (offset) {
-			object->offset = offset;
-		} else {
-			/* randomize the address, we try to avoid relocations */
-			offset = hars_petruska_f54_1_random64(&ibb->prng);
-			offset &= (ibb->gtt_size - 1);
-			offset &= ~(4096 - 1);
-			object->offset = offset;
-		}
-	}
+	if (object->offset == INTEL_BUF_INVALID_ADDRESS)
+		object->offset = __intel_bb_propose_offset(ibb);
 
 	if (write)
 		object->flags |= EXEC_OBJECT_WRITE;
@@ -1382,6 +1518,23 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
 	return object;
 }
 
+static bool intel_bb_object_set_fence(struct intel_bb *ibb, uint32_t handle)
+{
+	struct drm_i915_gem_exec_object2 object = { .handle = handle };
+	struct drm_i915_gem_exec_object2 **found;
+
+	found = tfind((void *) &object, &ibb->root, __compare_objects);
+	if (!found) {
+		igt_warn("Trying to set fence on not found handle: %u\n",
+			 handle);
+		return false;
+	}
+
+	(*found)->flags |= EXEC_OBJECT_NEEDS_FENCE;
+
+	return true;
+}
+
 /*
  * intel_bb_add_reloc:
  * @ibb: pointer to intel_bb
@@ -1390,8 +1543,10 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
  * @write_domain: gem domain bit for the relocation
  * @delta: delta value to add to @buffer's gpu address
  * @offset: offset within bb to be patched
- * @presumed_offset: address of the object in address space, important for
- * I915_EXEC_NO_RELOC flag
+ * @presumed_offset: address of the object in address space. If -1 is passed
+ * then final offset of the object will be randomized (for no-reloc bb) or
+ * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
+ * case address is known it should passed in @presumed_offset (for no-reloc).
  *
  * Function allocates additional relocation slot in reloc array for a handle.
  * It also implicitly adds handle in the objects array if object doesn't
@@ -1426,14 +1581,18 @@ static uint64_t intel_bb_add_reloc(struct intel_bb *ibb,
 	relocs[i].write_domain = write_domain;
 	relocs[i].delta = delta;
 	relocs[i].offset = offset;
-	relocs[i].presumed_offset = object->offset;
+	if (ibb->enforce_relocs)
+		relocs[i].presumed_offset = -1;
+	else
+		relocs[i].presumed_offset = object->offset;
 
 	igt_debug("add reloc: handle: %u, r/w: 0x%x/0x%x, "
 		  "delta: 0x%" PRIx64 ", "
 		  "offset: 0x%" PRIx64 ", "
-		  "poffset: 0x%" PRIx64 "\n",
+		  "poffset: %p\n",
 		  handle, read_domains, write_domain,
-		  delta, offset, presumed_offset);
+		  delta, offset,
+		  from_user_pointer(relocs[i].presumed_offset));
 
 	return object->offset;
 }
@@ -1445,8 +1604,10 @@ static uint64_t intel_bb_add_reloc(struct intel_bb *ibb,
  * @read_domains: gem domain bits for the relocation
  * @write_domain: gem domain bit for the relocation
  * @delta: delta value to add to @buffer's gpu address
- * @presumed_offset: address of the object in address space, important for
- * I915_EXEC_NO_RELOC flag
+ * @presumed_offset: address of the object in address space. If -1 is passed
+ * then final offset of the object will be randomized (for no-reloc bb) or
+ * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
+ * case address is known it should passed in @presumed_offset (for no-reloc).
  * @write: does a handle is a render target
  *
  * Function prepares relocation (execobj if required + reloc) and emits
@@ -1479,6 +1640,23 @@ uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
 	return address;
 }
 
+uint64_t intel_bb_emit_reloc_fenced(struct intel_bb *ibb,
+				    uint32_t handle,
+				    uint32_t read_domains,
+				    uint32_t write_domain,
+				    uint64_t delta,
+				    uint64_t presumed_offset)
+{
+	uint64_t address;
+
+	address = intel_bb_emit_reloc(ibb, handle, read_domains, write_domain,
+				      delta, presumed_offset);
+
+	intel_bb_object_set_fence(ibb, handle);
+
+	return address;
+}
+
 /**
  * intel_bb_offset_reloc:
  * @ibb: pointer to intel_bb
@@ -1486,8 +1664,10 @@ uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
  * @read_domains: gem domain bits for the relocation
  * @write_domain: gem domain bit for the relocation
  * @offset: offset within bb to be patched
- * @presumed_offset: address of the object in address space, important for
- * I915_EXEC_NO_RELOC flag
+ * @presumed_offset: address of the object in address space. If -1 is passed
+ * then final offset of the object will be randomized (for no-reloc bb) or
+ * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
+ * case address is known it should passed in @presumed_offset (for no-reloc).
  *
  * Function prepares relocation (execobj if required + reloc). It it used
  * for editing batchbuffer via modifying structures. It means when we're
@@ -1509,11 +1689,27 @@ uint64_t intel_bb_offset_reloc(struct intel_bb *ibb,
 				  0, offset, presumed_offset);
 }
 
-static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
+uint64_t intel_bb_offset_reloc_with_delta(struct intel_bb *ibb,
+					  uint32_t handle,
+					  uint32_t read_domains,
+					  uint32_t write_domain,
+					  uint32_t delta,
+					  uint32_t offset,
+					  uint64_t presumed_offset)
+{
+	igt_assert(ibb);
+
+	return intel_bb_add_reloc(ibb, handle, read_domains, write_domain,
+				  delta, offset, presumed_offset);
+}
+
+static void intel_bb_dump_execbuf(struct intel_bb *ibb,
+				  struct drm_i915_gem_execbuffer2 *execbuf)
 {
 	struct drm_i915_gem_exec_object2 *objects;
 	struct drm_i915_gem_relocation_entry *relocs, *reloc;
 	int i, j;
+	uint64_t address;
 
 	igt_info("execbuf batch len: %u, start offset: 0x%x, "
 		 "DR1: 0x%x, DR4: 0x%x, "
@@ -1529,26 +1725,33 @@ static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
 		objects = &((struct drm_i915_gem_exec_object2 *)
 			    from_user_pointer(execbuf->buffers_ptr))[i];
 		relocs = from_user_pointer(objects->relocs_ptr);
+		address = objects->offset;
+		if (address != INTEL_BUF_INVALID_ADDRESS)
+			address = address & (ibb->gtt_size - 1);
 		igt_info(" [%d] handle: %u, reloc_count: %d, reloc_ptr: %p, "
-			 "align: 0x%llx, offset: 0x%llx, flags: 0x%llx, "
+			 "align: 0x%llx, offset: 0x%" PRIx64 ", flags: 0x%llx, "
 			 "rsvd1: 0x%llx, rsvd2: 0x%llx\n",
 			 i, objects->handle, objects->relocation_count,
 			 relocs,
 			 objects->alignment,
-			 objects->offset, objects->flags,
+			 address,
+			 objects->flags,
 			 objects->rsvd1, objects->rsvd2);
 		if (objects->relocation_count) {
 			igt_info("\texecbuf relocs:\n");
 			for (j = 0; j < objects->relocation_count; j++) {
 				reloc = &relocs[j];
+				address = reloc->presumed_offset;
+				if (address != INTEL_BUF_INVALID_ADDRESS)
+					address = address & (ibb->gtt_size - 1);
 				igt_info("\t [%d] target handle: %u, "
 					 "offset: 0x%llx, delta: 0x%x, "
-					 "presumed_offset: 0x%llx, "
+					 "presumed_offset: 0x%" PRIx64 ", "
 					 "read_domains: 0x%x, "
 					 "write_domain: 0x%x\n",
 					 j, reloc->target_handle,
 					 reloc->offset, reloc->delta,
-					 reloc->presumed_offset,
+					 address,
 					 reloc->read_domains,
 					 reloc->write_domain);
 			}
@@ -1593,7 +1796,7 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
 		    uint32_t ctx, uint64_t flags, bool sync)
 {
 	struct drm_i915_gem_execbuffer2 execbuf;
-	int ret;
+	int ret, fence, new_fence;
 
 	ibb->objects[0].relocs_ptr = to_user_pointer(ibb->relocs);
 	ibb->objects[0].relocation_count = ibb->num_relocs;
@@ -1606,17 +1809,34 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
 	execbuf.buffer_count = ibb->num_objects;
 	execbuf.batch_len = end_offset;
 	execbuf.rsvd1 = ibb->ctx = ctx;
-	execbuf.flags = flags | I915_EXEC_BATCH_FIRST;
-
-	ret = __gem_execbuf(ibb->i915, &execbuf);
-	if (ret)
+	execbuf.flags = flags | I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_OUT;
+	if (ibb->enforce_relocs)
+		execbuf.flags &= ~I915_EXEC_NO_RELOC;
+	execbuf.rsvd2 = 0;
+
+	ret = __gem_execbuf_wr(ibb->i915, &execbuf);
+	if (ret) {
+		intel_bb_dump_execbuf(ibb, &execbuf);
 		return ret;
+	}
+
+	/* Save/merge fences */
+	fence = execbuf.rsvd2 >> 32;
+
+	if (ibb->fence < 0) {
+		ibb->fence = fence;
+	} else {
+		new_fence = sync_fence_merge(ibb->fence, fence);
+		close(ibb->fence);
+		close(fence);
+		ibb->fence = new_fence;
+	}
 
 	if (sync || ibb->debug)
-		gem_sync(ibb->i915, ibb->handle);
+		igt_assert(intel_bb_sync(ibb) == 0);
 
 	if (ibb->debug) {
-		intel_bb_dump_execbuf(&execbuf);
+		intel_bb_dump_execbuf(ibb, &execbuf);
 		if (intel_bb_debug_tree) {
 			igt_info("\nTree:\n");
 			twalk(ibb->root, print_node);
@@ -1672,14 +1892,20 @@ uint64_t intel_bb_get_object_offset(struct intel_bb *ibb, uint32_t handle)
 {
 	struct drm_i915_gem_exec_object2 object = { .handle = handle };
 	struct drm_i915_gem_exec_object2 **found;
+	uint64_t address;
 
 	igt_assert(ibb);
 
 	found = tfind((void *) &object, &ibb->root, __compare_objects);
 	if (!found)
-		return 0;
+		return INTEL_BUF_INVALID_ADDRESS;
+
+	address = (*found)->offset;
 
-	return (*found)->offset;
+	if (address == INTEL_BUF_INVALID_ADDRESS)
+		return address;
+
+	return address & (ibb->gtt_size - 1);
 }
 
 /**
@@ -1706,8 +1932,196 @@ bool intel_bb_object_offset_to_buf(struct intel_bb *ibb, struct intel_buf *buf)
 		return false;
 	}
 
-	buf->addr.offset = (*found)->offset;
+	buf->addr.offset = (*found)->offset & (ibb->gtt_size - 1);
 	buf->addr.ctx = ibb->ctx;
 
 	return true;
 }
+
+/*
+ * intel_bb_emit_bbe:
+ * @ibb: batchbuffer
+ *
+ * Outputs MI_BATCH_BUFFER_END and ensures batch is properly aligned.
+ */
+uint32_t intel_bb_emit_bbe(struct intel_bb *ibb)
+{
+	/* Mark the end of the buffer. */
+	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
+	intel_bb_ptr_align(ibb, 8);
+
+	return intel_bb_offset(ibb);
+}
+
+/*
+ * intel_bb_flush_with_context_ring:
+ * @ibb: batchbuffer
+ * @ctx: context id
+ * @ring: ring
+ *
+ * Submits the batch for execution on the @ring engine with the supplied
+ * hardware context @ctx.
+ */
+static void intel_bb_flush_with_context_ring(struct intel_bb *ibb,
+					     uint32_t ctx, uint32_t ring)
+{
+	intel_bb_exec_with_context(ibb, intel_bb_offset(ibb), ctx,
+				   ring | I915_EXEC_NO_RELOC,
+				   false);
+	intel_bb_reset(ibb, false);
+}
+
+void intel_bb_flush_render(struct intel_bb *ibb)
+{
+	uint32_t ring = I915_EXEC_RENDER;
+
+	intel_bb_flush_with_context_ring(ibb, ibb->ctx, ring);
+}
+
+void intel_bb_flush_blit(struct intel_bb *ibb)
+{
+	uint32_t ring = I915_EXEC_DEFAULT;
+
+	if (HAS_BLT_RING(ibb->devid))
+		ring = I915_EXEC_BLT;
+
+	intel_bb_flush_with_context_ring(ibb, ibb->ctx, ring);
+}
+
+uint32_t intel_bb_copy_data(struct intel_bb *ibb,
+			    const void *data, unsigned int bytes,
+			    uint32_t align)
+{
+	uint32_t *subdata, offset;
+
+	igt_assert((bytes & 3) == 0);
+
+	intel_bb_ptr_align(ibb, align);
+	offset = intel_bb_offset(ibb);
+	igt_assert(offset + bytes < ibb->size);
+
+	subdata = intel_bb_ptr(ibb);
+	memcpy(subdata, data, bytes);
+	intel_bb_ptr_add(ibb, bytes);
+
+	return offset;
+}
+
+void intel_bb_blit_start(struct intel_bb *ibb, uint32_t flags)
+{
+	intel_bb_out(ibb, XY_SRC_COPY_BLT_CMD |
+		     XY_SRC_COPY_BLT_WRITE_ALPHA |
+		     XY_SRC_COPY_BLT_WRITE_RGB |
+		     (flags) |
+		     (6 + 2*(ibb->gen >= 8)));
+}
+
+void intel_bb_emit_blt_copy(struct intel_bb *ibb,
+			    struct intel_buf *src,
+			    int src_x1, int src_y1, int src_pitch,
+			    struct intel_buf *dst,
+			    int dst_x1, int dst_y1, int dst_pitch,
+			    int width, int height, int bpp)
+{
+	const int gen = ibb->gen;
+	uint32_t cmd_bits = 0;
+	uint32_t br13_bits;
+	uint32_t mask;
+
+	igt_assert(bpp*(src_x1 + width) <= 8*src_pitch);
+	igt_assert(bpp*(dst_x1 + width) <= 8*dst_pitch);
+	igt_assert(src_pitch * (src_y1 + height) <= src->size);
+	igt_assert(dst_pitch * (dst_y1 + height) <= dst->size);
+
+	if (gen >= 4 && src->tiling != I915_TILING_NONE) {
+		src_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
+	}
+
+	if (gen >= 4 && dst->tiling != I915_TILING_NONE) {
+		dst_pitch /= 4;
+		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
+	}
+
+	CHECK_RANGE(src_x1); CHECK_RANGE(src_y1);
+	CHECK_RANGE(dst_x1); CHECK_RANGE(dst_y1);
+	CHECK_RANGE(width); CHECK_RANGE(height);
+	CHECK_RANGE(src_x1 + width); CHECK_RANGE(src_y1 + height);
+	CHECK_RANGE(dst_x1 + width); CHECK_RANGE(dst_y1 + height);
+	CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);
+
+	br13_bits = 0;
+	switch (bpp) {
+	case 8:
+		break;
+	case 16:		/* supporting only RGB565, not ARGB1555 */
+		br13_bits |= 1 << 24;
+		break;
+	case 32:
+		br13_bits |= 3 << 24;
+		cmd_bits |= XY_SRC_COPY_BLT_WRITE_ALPHA |
+			    XY_SRC_COPY_BLT_WRITE_RGB;
+		break;
+	default:
+		igt_fail(IGT_EXIT_FAILURE);
+	}
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+		intel_bb_out(ibb, BCS_SWCTRL);
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (src->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		if (dst->tiling == I915_TILING_Y)
+			mask |= BCS_DST_Y;
+		intel_bb_out(ibb, mask);
+	}
+
+	intel_bb_blit_start(ibb, cmd_bits);
+	intel_bb_out(ibb, (br13_bits) |
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	intel_bb_out(ibb, (dst_y1 << 16) | dst_x1); /* dst x1,y1 */
+	intel_bb_out(ibb, ((dst_y1 + height) << 16) | (dst_x1 + width)); /* dst x2,y2 */
+	intel_bb_emit_reloc_fenced(ibb, dst->handle,
+				   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+				   0, dst->addr.offset);
+	intel_bb_out(ibb, (src_y1 << 16) | src_x1); /* src x1,y1 */
+	intel_bb_out(ibb, src_pitch);
+	intel_bb_emit_reloc_fenced(ibb, src->handle,
+				   I915_GEM_DOMAIN_RENDER, 0,
+				   0, src->addr.offset);
+
+	if (gen >= 6 && src->handle == dst->handle) {
+		intel_bb_out(ibb, XY_SETUP_CLIP_BLT_CMD);
+		intel_bb_out(ibb, 0);
+		intel_bb_out(ibb, 0);
+	}
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		igt_assert(ibb->gen >= 6);
+		intel_bb_out(ibb, MI_FLUSH_DW | 2);
+		intel_bb_out(ibb, 0);
+		intel_bb_out(ibb, 0);
+		intel_bb_out(ibb, 0);
+
+		intel_bb_out(ibb, MI_LOAD_REGISTER_IMM);
+		intel_bb_out(ibb, BCS_SWCTRL);
+		intel_bb_out(ibb, (BCS_SRC_Y | BCS_DST_Y) << 16);
+	}
+}
+
+void intel_bb_blt_copy(struct intel_bb *ibb,
+		       struct intel_buf *src,
+		       int src_x1, int src_y1, int src_pitch,
+		       struct intel_buf *dst,
+		       int dst_x1, int dst_y1, int dst_pitch,
+		       int width, int height, int bpp)
+{
+	intel_bb_emit_blt_copy(ibb, src, src_x1, src_y1, src_pitch,
+			       dst, dst_x1, dst_y1, dst_pitch,
+			       width, height, bpp);
+	intel_bb_emit_bbe(ibb);
+	intel_bb_flush_blit(ibb);
+}
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 0649fc22..02168b9d 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -433,11 +433,13 @@ struct intel_bb {
 	int i915;
 	int gen;
 	bool debug;
+	bool enforce_relocs;
 	uint32_t devid;
 	uint32_t handle;
 	uint32_t size;
 	uint32_t *batch;
 	uint32_t *ptr;
+	int fence;
 
 	uint32_t prng;
 	uint64_t gtt_size;
@@ -454,12 +456,34 @@ struct intel_bb {
 	struct drm_i915_gem_relocation_entry *relocs;
 	uint32_t num_relocs;
 	uint32_t allocated_relocs;
+
+	/*
+	 * BO recreate in reset path only when refcount == 0
+	 * Currently we don't need to use atomics because intel_bb
+	 * is not thread-safe.
+	 */
+	int32_t refcount;
 };
 
 struct intel_bb *intel_bb_create(int i915, uint32_t size);
-
+struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size);
 void intel_bb_destroy(struct intel_bb *ibb);
+
+static inline void intel_bb_ref(struct intel_bb *ibb)
+{
+	ibb->refcount++;
+}
+
+static inline void intel_bb_unref(struct intel_bb *ibb)
+{
+	igt_assert_f(ibb->refcount > 0, "intel_bb refcount is 0!");
+	ibb->refcount--;
+}
+
 void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache);
+int intel_bb_sync(struct intel_bb *ibb);
+void intel_bb_print(struct intel_bb *ibb);
+void intel_bb_dump(struct intel_bb *ibb, const char *filename);
 void intel_bb_set_debug(struct intel_bb *ibb, bool debug);
 
 static inline uint32_t intel_bb_offset(struct intel_bb *ibb)
@@ -471,7 +495,7 @@ static inline void intel_bb_ptr_set(struct intel_bb *ibb, uint32_t offset)
 {
 	ibb->ptr = (void *) ((uint8_t *) ibb->batch + offset);
 
-	igt_assert(intel_bb_offset(ibb) < ibb->size);
+	igt_assert(intel_bb_offset(ibb) <= ibb->size);
 }
 
 static inline void intel_bb_ptr_add(struct intel_bb *ibb, uint32_t offset)
@@ -479,10 +503,22 @@ static inline void intel_bb_ptr_add(struct intel_bb *ibb, uint32_t offset)
 	intel_bb_ptr_set(ibb, intel_bb_offset(ibb) + offset);
 }
 
-static inline void intel_bb_ptr_align(struct intel_bb *ibb,
+static inline uint32_t intel_bb_ptr_add_return_prev_offset(struct intel_bb *ibb,
+							   uint32_t offset)
+{
+	uint32_t previous_offset = intel_bb_offset(ibb);
+
+	intel_bb_ptr_set(ibb, previous_offset + offset);
+
+	return previous_offset;
+}
+
+static inline void *intel_bb_ptr_align(struct intel_bb *ibb,
 				      uint32_t alignment)
 {
 	intel_bb_ptr_set(ibb, ALIGN(intel_bb_offset(ibb), alignment));
+
+	return (void *) ibb->ptr;
 }
 
 static inline void *intel_bb_ptr(struct intel_bb *ibb)
@@ -495,7 +531,7 @@ static inline void intel_bb_out(struct intel_bb *ibb, uint32_t dword)
 	*ibb->ptr = dword;
 	ibb->ptr++;
 
-	igt_assert(intel_bb_offset(ibb) < ibb->size);
+	igt_assert(intel_bb_offset(ibb) <= ibb->size);
 }
 
 
@@ -510,6 +546,13 @@ uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
 			 uint64_t delta,
 			 uint64_t presumed_offset);
 
+uint64_t intel_bb_emit_reloc_fenced(struct intel_bb *ibb,
+				    uint32_t handle,
+				    uint32_t read_domains,
+				    uint32_t write_domain,
+				    uint64_t delta,
+				    uint64_t presumed_offset);
+
 uint64_t intel_bb_offset_reloc(struct intel_bb *ibb,
 			       uint32_t handle,
 			       uint32_t read_domains,
@@ -517,6 +560,14 @@ uint64_t intel_bb_offset_reloc(struct intel_bb *ibb,
 			       uint32_t offset,
 			       uint64_t presumed_offset);
 
+uint64_t intel_bb_offset_reloc_with_delta(struct intel_bb *ibb,
+					  uint32_t handle,
+					  uint32_t read_domains,
+					  uint32_t write_domain,
+					  uint32_t delta,
+					  uint32_t offset,
+					  uint64_t presumed_offset);
+
 int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
 		    uint32_t ctx, uint64_t flags, bool sync);
 
@@ -529,4 +580,25 @@ void intel_bb_exec_with_context(struct intel_bb *ibb, uint32_t end_offset,
 uint64_t intel_bb_get_object_offset(struct intel_bb *ibb, uint32_t handle);
 bool intel_bb_object_offset_to_buf(struct intel_bb *ibb, struct intel_buf *buf);
 
+uint32_t intel_bb_emit_bbe(struct intel_bb *ibb);
+void intel_bb_flush_render(struct intel_bb *ibb);
+void intel_bb_flush_blit(struct intel_bb *ibb);
+
+uint32_t intel_bb_copy_data(struct intel_bb *ibb,
+			    const void *data, unsigned int bytes,
+			    uint32_t align);
+
+void intel_bb_blit_start(struct intel_bb *ibb, uint32_t flags);
+void intel_bb_emit_blt_copy(struct intel_bb *ibb,
+			    struct intel_buf *src,
+			    int src_x1, int src_y1, int src_pitch,
+			    struct intel_buf *dst,
+			    int dst_x1, int dst_y1, int dst_pitch,
+			    int width, int height, int bpp);
+void intel_bb_blt_copy(struct intel_bb *ibb,
+		       struct intel_buf *src,
+		       int src_x1, int src_y1, int src_pitch,
+		       struct intel_buf *dst,
+		       int dst_x1, int dst_y1, int dst_pitch,
+		       int width, int height, int bpp);
 #endif
-- 
2.26.0



More information about the igt-dev mailing list