[igt-dev] [PATCH i-g-t v3 3/7] lib/intel_batchbuffer: Introduce intel_bb

Mon May 18 09:09:15 UTC 2020

Simple batchbuffer facility which gathers and outputs relocations.

v2: make bb api more consistent and universal
v3: fix compiling issues on non-x86 arch

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
 lib/intel_batchbuffer.c | 385 ++++++++++++++++++++++++++++++++++++++++
 lib/intel_batchbuffer.h |  89 ++++++++++
 2 files changed, 474 insertions(+)

diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index f1a45b47..580feabb 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -41,6 +41,7 @@
 #include "rendercopy.h"
 #include "media_fill.h"
 #include "ioctl_wrappers.h"
+#include "i915/gem_mman.h"
 #include "media_spin.h"
 #include "gpgpu_fill.h"
 #include "igt_aux.h"
@@ -1171,3 +1172,387 @@ igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
 
 	return spin;
 }
+
+/* Intel batchbuffer v2 */
+
+/*
+ * __intel_bb_create:
+ * @i915: drm fd
+ * @size: size of the batchbuffer
+ *
+ * Returns:
+ *
+ * Pointer the intel_bb, asserts on failure.
+ */
+struct intel_bb *intel_bb_create(int i915, uint32_t size)
+{
+	struct intel_bb *ibb = calloc(1, sizeof(*ibb));
+
+	igt_assert(ibb);
+
+	ibb->i915 = i915;
+	ibb->devid = intel_get_drm_devid(i915);
+	ibb->gen = intel_gen(ibb->devid);
+	ibb->handle = gem_create(i915, size);
+	ibb->size = size;
+	ibb->batch = calloc(1, size);
+	igt_assert(ibb->batch);
+	ibb->ptr = ibb->batch;
+	ibb->objects = NULL;
+
+	return ibb;
+}
+
+/**
+ * intel_bb_destroy:
+ * @ibb: pointer to intel_bb
+ *
+ * Frees all relocations / objects allocated during filling the batch.
+ */
+void intel_bb_destroy(struct intel_bb *ibb)
+{
+	uint32_t i;
+	void *ptr;
+
+	igt_assert(ibb);
+
+	/* Free relocations */
+	for (i = 0; i < ibb->num_objects; i++) {
+		ptr = from_user_pointer(ibb->objects[i].relocs_ptr);
+		if (ptr)
+			free(ptr);
+	}
+
+	free(ibb->objects);
+
+	munmap(ibb->batch, ibb->size);
+	gem_close(ibb->i915, ibb->handle);
+
+	free(ibb);
+}
+
+/**
+ * intel_bb_set_debug:
+ * @ibb: pointer to intel_bb
+ * @debug: true / false
+ *
+ * Sets debug to true / false. Execbuf is then called synchronously and
+ * object/reloc arrays are printed after execution.
+ */
+void intel_bb_set_debug(struct intel_bb *ibb, bool debug)
+{
+	ibb->debug = debug;
+}
+
+/*
+ * intel_bb_add_handle:
+ * @ibb: pointer to intel_bb
+ * @handle: which handle to add to objects array
+ * @offset: presumed offset of the object when I915_EXEC_NO_RELOC flag is
+ * used in execbuf call
+ *
+ * Function allocates additional execobj slot in object array for a handle.
+ * For batchbuffer only presumed address is saved.
+ */
+static void intel_bb_add_handle(struct intel_bb *ibb,
+				uint32_t handle,
+				uint64_t offset)
+{
+	uint32_t i;
+
+	/* Skip bb as object, it will be added before exec */
+	if (ibb->handle == handle) {
+		igt_assert(ibb->batch_offset == 0 ||
+			   ibb->batch_offset == offset);
+		ibb->batch_offset = offset;
+		return;
+	}
+
+	for (i = 0; i < ibb->num_objects; i++)
+		if (ibb->objects[i].handle == handle)
+			return;
+
+	i = ibb->num_objects++;
+	ibb->objects = realloc(ibb->objects,
+			       sizeof(*ibb->objects) * (i + 1));
+	igt_assert(ibb->objects);
+
+	memset(&ibb->objects[i], 0, sizeof(*ibb->objects));
+	ibb->objects[i].handle = handle;
+	ibb->objects[i].offset = offset;
+}
+
+/*
+ * intel_bb_add_reloc:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle which address will be taken to patch the bb
+ * @read_domains: gem domain bits for the relocation
+ * @write_domain: gem domain bit for the relocation
+ * @delta: delta value to add to @buffer's gpu address
+ * @offset: offset within bb to be patched
+ * @presumed_offset: address of the object in address space, important for
+ * I915_EXEC_NO_RELOC flag
+ *
+ * Function allocates additional relocation slot in reloc array for a handle.
+ */
+static void intel_bb_add_reloc(struct intel_bb *ibb,
+			       uint32_t handle,
+			       uint32_t read_domains,
+			       uint32_t write_domain,
+			       uint64_t delta,
+			       uint64_t offset,
+			       uint64_t presumed_offset)
+{
+	struct drm_i915_gem_relocation_entry *relocs;
+	uint32_t i;
+
+	intel_bb_add_handle(ibb, handle, presumed_offset);
+
+	relocs = ibb->relocs;
+	if (ibb->num_relocs == ibb->allocated_relocs) {
+		ibb->allocated_relocs += 4096 / sizeof(*relocs);
+		relocs = realloc(relocs, sizeof(*relocs) * ibb->allocated_relocs);
+		igt_assert(relocs);
+		ibb->relocs = relocs;
+	}
+
+	i = ibb->num_relocs++;
+	memset(&relocs[i], 0, sizeof(*relocs));
+	relocs[i].target_handle = handle;
+	relocs[i].read_domains = read_domains;
+	relocs[i].write_domain = write_domain;
+	relocs[i].delta = delta;
+	relocs[i].offset = offset;
+	relocs[i].presumed_offset = presumed_offset;
+
+	igt_debug("add reloc: handle: %u, r/w: 0x%x/0x%x, "
+		  "delta: 0x%" PRIx64 ", "
+		  "offset: 0x%" PRIx64 ", "
+		  "poffset: 0x%" PRIx64 "\n",
+		  handle, read_domains, write_domain,
+		  delta, offset, presumed_offset);
+}
+
+/**
+ * intel_bb_emit_reloc:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle which address will be taken to patch the bb
+ * @read_domains: gem domain bits for the relocation
+ * @write_domain: gem domain bit for the relocation
+ * @delta: delta value to add to @buffer's gpu address
+ * @presumed_offset: address of the object in address space, important for
+ * I915_EXEC_NO_RELOC flag
+ *
+ * Function prepares relocation (execobj if required + reloc) and emits
+ * offset in bb. For I915_EXEC_NO_RELOC presumed_offset is a hint we already
+ * have object in valid place and relocation step can be skipped in this case.
+ *
+ * Note: delta is value added to address, mostly used when some instructions
+ * require modify-bit set to apply change. Which delta is valid depends
+ * on instruction (see instruction specification).
+ */
+void intel_bb_emit_reloc(struct intel_bb *ibb,
+			 uint32_t handle,
+			 uint32_t read_domains,
+			 uint32_t write_domain,
+			 uint64_t delta,
+			 uint64_t presumed_offset)
+{
+	igt_assert(ibb);
+
+	intel_bb_add_reloc(ibb, handle, read_domains, write_domain,
+			   delta, intel_bb_offset(ibb), presumed_offset);
+
+	intel_bb_out(ibb, delta + presumed_offset);
+	if (ibb->gen >= 8)
+		intel_bb_out(ibb, presumed_offset >> 32);
+}
+
+/**
+ * intel_bb_offset_reloc:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle which address will be taken to patch the bb
+ * @read_domains: gem domain bits for the relocation
+ * @write_domain: gem domain bit for the relocation
+ * @offset: offset within bb to be patched
+ * @presumed_offset: address of the object in address space, important for
+ * I915_EXEC_NO_RELOC flag
+ *
+ * Function prepares relocation (execobj if required + reloc). It it used
+ * for editing batchbuffer via modifying structures. It means when we're
+ * preparing batchbuffer it is more descriptive to edit the structure
+ * than emitting dwords. But it require for some fields to point the
+ * relocation. For that case @offset is passed by the user and it points
+ * to the offset in bb where the relocation will be applied.
+ */
+void intel_bb_offset_reloc(struct intel_bb *ibb,
+			   uint32_t handle,
+			   uint32_t read_domains,
+			   uint32_t write_domain,
+			   uint32_t offset,
+			   uint64_t presumed_offset)
+{
+	igt_assert(ibb);
+
+	intel_bb_add_reloc(ibb, handle, read_domains, write_domain,
+			   0, offset, presumed_offset);
+}
+
+static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	struct drm_i915_gem_exec_object2 *objects;
+	struct drm_i915_gem_relocation_entry *relocs, *reloc;
+	int i, j;
+
+	igt_info("execbuf batch len: %u, start offset: 0x%x, "
+		 "DR1: 0x%x, DR4: 0x%x, "
+		 "num clip: %u, clipptr: 0x%llx, "
+		 "flags: 0x%llx, rsvd1: 0x%llx, rsvd2: 0x%llx\n",
+		 execbuf->batch_len, execbuf->batch_start_offset,
+		 execbuf->DR1, execbuf->DR4,
+		 execbuf->num_cliprects, execbuf->cliprects_ptr,
+		 execbuf->flags, execbuf->rsvd1, execbuf->rsvd2);
+
+	igt_info("execbuf buffer_count: %d\n", execbuf->buffer_count);
+	for (i = 0; i < execbuf->buffer_count; i++) {
+		objects = &((struct drm_i915_gem_exec_object2 *)
+			    from_user_pointer(execbuf->buffers_ptr))[i];
+		relocs = from_user_pointer(objects->relocs_ptr);
+		igt_info(" [%d] <handle: %u, reloc_count: %d, reloc_ptr: %p, "
+			 "align: 0x%llx, offset: 0x%llx, flags: 0x%llx, "
+			 "rsvd1: 0x%llx, rsvd2: 0x%llx\n",
+			 i, objects->handle, objects->relocation_count,
+			 relocs,
+			 objects->alignment,
+			 objects->offset, objects->flags,
+			 objects->rsvd1, objects->rsvd2);
+		if (objects->relocation_count) {
+			igt_info("execbuf relocs:\n");
+			for (j = 0; j < objects->relocation_count; j++) {
+				reloc = &relocs[j];
+				igt_info(" [%d] <target handle: %u, "
+					 "offset: 0x%llx, delta: 0x%x, "
+					 "presumed_offset: 0x%llx, "
+					 "read_domains: 0x%x, "
+					 "write_domain: 0x%x\n",
+					 j, reloc->target_handle,
+					 reloc->offset, reloc->delta,
+					 reloc->presumed_offset,
+					 reloc->read_domains,
+					 reloc->write_domain);
+			}
+		}
+	}
+}
+
+/*
+ * @__intel_bb_exec:
+ * @ibb: pointer to intel_bb
+ * @end_offset: offset of the last instruction in the bb
+ * @flags: flags passed directly to execbuf
+ * @ctx: context
+ * @sync: if true wait for execbuf completion, otherwise caller is responsible
+ * to wait for completion
+ *
+ * Returns: 0 on success, otherwise errno.
+ *
+ * Note: In this step execobj for bb is allocated and inserted to the objects
+ * array.
+*/
+int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
+		    uint32_t ctx, uint64_t flags, bool sync)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	uint32_t i;
+	int ret;
+
+	i = ibb->num_objects;
+
+	if (i == 0 || ibb->objects[i - 1].handle != ibb->handle) {
+		ibb->objects = realloc(ibb->objects, sizeof(*ibb->objects) * (i + 1));
+		igt_assert(ibb->objects);
+
+		gem_write(ibb->i915, ibb->handle, 0, ibb->batch, ibb->size);
+
+		memset(&ibb->objects[i], 0, sizeof(*ibb->objects));
+		ibb->objects[i].relocs_ptr = to_user_pointer(ibb->relocs);
+		ibb->objects[i].relocation_count = ibb->num_relocs;
+		ibb->objects[i].handle = ibb->handle;
+		ibb->objects[i].offset = ibb->batch_offset;
+		ibb->num_objects++;
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = (uintptr_t) ibb->objects;
+	execbuf.buffer_count = ibb->num_objects;
+	execbuf.batch_len = end_offset;
+	execbuf.rsvd1 = ctx;
+	execbuf.flags = flags;
+
+	ret = __gem_execbuf(ibb->i915, &execbuf);
+	if (ret)
+		return ret;
+
+	if (sync || ibb->debug)
+		gem_sync(ibb->i915, ibb->handle);
+
+	if (ibb->debug)
+		intel_bb_dump_execbuf(&execbuf);
+
+	return 0;
+}
+
+/**
+ * intel_bb_exec:
+ * @ibb: pointer to intel_bb
+ * @end_offset: offset of the last instruction in the bb
+ * @flags: flags passed directly to execbuf
+ * @sync: if true wait for execbuf completion, otherwise caller is responsible
+ * to wait for completion
+ *
+ * Do execbuf with default context. Asserts on failure.
+*/
+void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
+		   uint64_t flags, bool sync)
+{
+	igt_assert_eq(__intel_bb_exec(ibb, end_offset, 0, flags, sync), 0);
+}
+
+/*
+ * intel_bb_exec_with_context:
+ * @ibb: pointer to intel_bb
+ * @end_offset: offset of the last instruction in the bb
+ * @flags: flags passed directly to execbuf
+ * @ctx: context
+ * @sync: if true wait for execbuf completion, otherwise caller is responsible
+ * to wait for completion
+ *
+ * Do execbuf with context @context.
+*/
+void intel_bb_exec_with_context(struct intel_bb *ibb, uint32_t end_offset,
+				uint32_t ctx, uint64_t flags, bool sync)
+{
+	igt_assert_eq(__intel_bb_exec(ibb, end_offset, ctx, flags, sync), 0);
+}
+
+/**
+ * intel_bb_get_presumed_address:
+ * @ibb: pointer to intel_bb
+ * @handle: object handle
+ *
+ * When objects addresses are previously pinned and we don't want to relocate
+ * we need to acquire them from previous execbuf. Function checks previous
+ * relocation entry for @handle and returns presumed_offset field.
+ */
+uint64_t intel_bb_get_presumed_offset(struct intel_bb *ibb, uint32_t handle)
+{
+	uint32_t i;
+
+	igt_assert(ibb);
+
+	for (i = 0; i < ibb->num_relocs; i++)
+		if (ibb->relocs[i].target_handle == handle)
+			return ibb->relocs[i].presumed_offset;
+
+	return 0;
+}
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 442f3a18..5f3bd974 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -7,6 +7,7 @@
 
 #include "igt_core.h"
 #include "intel_reg.h"
+#include "drmtest.h"
 
 #define BATCH_SZ 4096
 #define BATCH_RESERVED 16
@@ -422,4 +423,92 @@ typedef void (*igt_media_spinfunc_t)(struct intel_batchbuffer *batch,
 
 igt_media_spinfunc_t igt_get_media_spinfunc(int devid);
 
+
+/*
+ * Batchbuffer without libdrm dependency
+ */
+struct intel_bb {
+	int i915;
+	int gen;
+	bool debug;
+	uint32_t devid;
+	uint32_t handle;
+	uint32_t size;
+	uint32_t *batch;
+	uint32_t *ptr;
+
+	struct drm_i915_gem_exec_object2 *objects;
+	uint32_t num_objects;
+	uint64_t batch_offset;
+
+	struct drm_i915_gem_relocation_entry *relocs;
+	uint32_t num_relocs;
+	uint32_t allocated_relocs;
+};
+
+struct intel_bb *intel_bb_create(int i915, uint32_t size);
+
+void intel_bb_destroy(struct intel_bb *ibb);
+void intel_bb_set_debug(struct intel_bb *ibb, bool debug);
+
+inline uint32_t intel_bb_offset(struct intel_bb *ibb)
+{
+	return (uint32_t) ((uint8_t *) ibb->ptr - (uint8_t *) ibb->batch);
+}
+
+inline void intel_bb_ptr_set(struct intel_bb *ibb, uint32_t offset)
+{
+	ibb->ptr = (void *) ((uint8_t *) ibb->batch + offset);
+
+	igt_assert(intel_bb_offset(ibb) < ibb->size);
+}
+
+inline void intel_bb_ptr_add(struct intel_bb *ibb, uint32_t offset)
+{
+	intel_bb_ptr_set(ibb, intel_bb_offset(ibb) + offset);
+}
+
+inline void intel_bb_ptr_align(struct intel_bb *ibb, uint32_t alignment)
+{
+	intel_bb_ptr_set(ibb, ALIGN(intel_bb_offset(ibb), alignment));
+}
+
+inline void *intel_bb_ptr(struct intel_bb *ibb)
+{
+	return (void *) ibb->ptr;
+}
+
+inline void intel_bb_out(struct intel_bb *ibb, uint32_t dword)
+{
+	*ibb->ptr = dword;
+	ibb->ptr++;
+
+	igt_assert(intel_bb_offset(ibb) < ibb->size);
+}
+
+void intel_bb_emit_reloc(struct intel_bb *ibb,
+			 uint32_t handle,
+			 uint32_t read_domains,
+			 uint32_t write_domain,
+			 uint64_t delta,
+			 uint64_t presumed_offset);
+
+void intel_bb_offset_reloc(struct intel_bb *ibb,
+			   uint32_t handle,
+			   uint32_t read_domains,
+			   uint32_t write_domain,
+			   uint32_t offset,
+			   uint64_t presumed_offset);
+
+int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
+		    uint32_t ctx, uint64_t flags, bool sync);
+
+void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
+		   uint64_t flags, bool sync);
+
+void intel_bb_exec_with_context(struct intel_bb *ibb, uint32_t end_offset,
+				uint32_t ctx, uint64_t flags, bool sync);
+
+uint64_t intel_bb_get_presumed_offset(struct intel_bb *ibb, uint32_t handle);
+
 #endif
-- 
2.26.0