[igt-dev] [PATCH i-g-t v4 06/10] lib/intel_batchbuffer: Add Xe support in intel-bb
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Fri Apr 21 05:54:53 UTC 2023
Intention of creating intel-bb was to replace libdrm for i915.
Due to many code relies on it (kms for example) most rational way
is to extend and add Xe path to it.
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Signed-off-by: Bhanuprakash Modem <bhanuprakash.modem at intel.com>
---
lib/gpu_cmds.c | 2 +-
lib/intel_aux_pgtable.c | 2 +-
lib/intel_batchbuffer.c | 430 +++++++++++++++++++++++++++++----------
lib/intel_batchbuffer.h | 28 ++-
tests/i915/gem_caching.c | 4 +-
tests/i915/gem_pxp.c | 2 +-
6 files changed, 353 insertions(+), 115 deletions(-)
diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
index cee81555d8..afb26d2990 100644
--- a/lib/gpu_cmds.c
+++ b/lib/gpu_cmds.c
@@ -251,7 +251,7 @@ gen7_fill_binding_table(struct intel_bb *ibb,
{
uint32_t binding_table_offset;
uint32_t *binding_table;
- uint32_t devid = intel_get_drm_devid(ibb->i915);
+ uint32_t devid = intel_get_drm_devid(ibb->fd);
intel_bb_ptr_align(ibb, 64);
binding_table_offset = intel_bb_offset(ibb);
diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
index 5205687080..946ca60b97 100644
--- a/lib/intel_aux_pgtable.c
+++ b/lib/intel_aux_pgtable.c
@@ -481,7 +481,7 @@ intel_aux_pgtable_create(struct intel_bb *ibb,
intel_bb_add_intel_buf_with_alignment(ibb, pgt->buf,
pgt->max_align, false);
- pgt_map(ibb->i915, pgt);
+ pgt_map(ibb->fd, pgt);
pgt_populate_entries(pgt, bufs, buf_count);
pgt_unmap(pgt);
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index a4eb4c2bbc..a77c2a6055 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -29,6 +29,8 @@
#include <glib.h>
#include "i915/gem_create.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
#include "intel_batchbuffer.h"
#include "intel_bufops.h"
#include "intel_chipset.h"
@@ -38,6 +40,9 @@
#include "veboxcopy.h"
#include "sw_sync.h"
#include "gpgpu_fill.h"
+#include "igt_aux.h"
+#include "igt_syncobj.h"
+#include "i830_reg.h"
#include "huc_copy.h"
#include "i915/i915_blt.h"
@@ -828,21 +833,22 @@ static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb,
/**
* __intel_bb_create:
- * @i915: drm fd
+ * @fd: drm fd - i915 or xe
* @ctx: context id
- * @cfg: intel_ctx configuration, NULL for default context or legacy mode
+ * @cfg: for i915 intel_ctx configuration, NULL for default context or legacy mode,
+ * unused for xe
* @size: size of the batchbuffer
* @do_relocs: use relocations or allocator
* @allocator_type: allocator type, must be INTEL_ALLOCATOR_NONE for relocations
*
* intel-bb assumes it will work in one of two modes - with relocations or
- * with using allocator (currently RANDOM and SIMPLE are implemented).
+ * with using allocator (currently RELOC and SIMPLE are implemented).
* Some description is required to describe how they maintain the addresses.
*
* Before entering into each scenarios generic rule is intel-bb keeps objects
* and their offsets in the internal cache and reuses in subsequent execs.
*
- * 1. intel-bb with relocations
+ * 1. intel-bb with relocations (i915 only)
*
* Creating new intel-bb adds handle to cache implicitly and sets its address
* to 0. Objects added to intel-bb later also have address 0 set for first run.
@@ -850,14 +856,15 @@ static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb,
* works in reloc mode addresses are only suggestion to the driver and we
* cannot be sure they won't change at next exec.
*
- * 2. with allocator
+ * 2. with allocator (i915 or xe)
*
* This mode is valid only for ppgtt. Addresses are acquired from allocator
- * and softpinned. intel-bb cache must be then coherent with allocator
- * (simple is coherent, random is not due to fact we don't keep its state).
+ * and softpinned (i915) or vm-binded (xe). intel-bb cache must be then
+ * coherent with allocator (simple is coherent, reloc partially [doesn't
+ * support address reservation]).
* When we do intel-bb reset with purging cache it has to reacquire addresses
* from allocator (allocator should return same address - what is true for
- * simple allocator and false for random as mentioned before).
+ * simple and reloc allocators).
*
* If we do reset without purging caches we use addresses from intel-bb cache
* during execbuf objects construction.
@@ -873,7 +880,7 @@ static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb,
* Pointer the intel_bb, asserts on failure.
*/
static struct intel_bb *
-__intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
+__intel_bb_create(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
uint32_t size, bool do_relocs,
uint64_t start, uint64_t end,
uint8_t allocator_type, enum allocator_strategy strategy)
@@ -883,48 +890,83 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
igt_assert(ibb);
- ibb->uses_full_ppgtt = gem_uses_full_ppgtt(i915);
- ibb->devid = intel_get_drm_devid(i915);
+ ibb->devid = intel_get_drm_devid(fd);
ibb->gen = intel_gen(ibb->devid);
+ ibb->ctx = ctx;
+
+ ibb->fd = fd;
+ ibb->driver = is_i915_device(fd) ? INTEL_DRIVER_I915 :
+ is_xe_device(fd) ? INTEL_DRIVER_XE : 0;
+ igt_assert(ibb->driver);
/*
* If we don't have full ppgtt driver can change our addresses
* so allocator is useless in this case. Just enforce relocations
* for such gens and don't use allocator at all.
*/
- if (!ibb->uses_full_ppgtt)
- do_relocs = true;
+ if (ibb->driver == INTEL_DRIVER_I915) {
+ ibb->uses_full_ppgtt = gem_uses_full_ppgtt(fd);
- /*
- * For softpin mode allocator has full control over offsets allocation
- * so we want kernel to not interfere with this.
- */
- if (do_relocs)
- ibb->allows_obj_alignment = gem_allows_obj_alignment(i915);
+ if (!ibb->uses_full_ppgtt)
+ do_relocs = true;
- /* Use safe start offset instead assuming 0x0 is safe */
- start = max_t(uint64_t, start, gem_detect_safe_start_offset(i915));
+ /*
+ * For softpin mode allocator has full control over offsets allocation
+ * so we want kernel to not interfere with this.
+ */
+ if (do_relocs) {
+ ibb->allows_obj_alignment = gem_allows_obj_alignment(fd);
+ allocator_type = INTEL_ALLOCATOR_NONE;
+ } else {
+ /* Use safe start offset instead assuming 0x0 is safe */
+ start = max_t(uint64_t, start, gem_detect_safe_start_offset(fd));
+
+ /* if relocs are set we won't use an allocator */
+ ibb->allocator_handle =
+ intel_allocator_open_full(fd, ctx,
+ start, end,
+ allocator_type,
+ strategy, 0);
+ }
+
+ ibb->vm_id = 0;
+ } else {
+ igt_assert(!do_relocs);
+
+ if (!ctx)
+ ctx = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
+
+ ibb->uses_full_ppgtt = 1;
+ ibb->allocator_handle =
+ intel_allocator_open_full(fd, ctx, start, end,
+ allocator_type,
+ strategy,
+ xe_get_default_alignment(fd));
+ ibb->vm_id = ctx;
+ ibb->last_engine = ~0U;
+ }
- /* if relocs are set we won't use an allocator */
- if (do_relocs)
- allocator_type = INTEL_ALLOCATOR_NONE;
- else
- ibb->allocator_handle = intel_allocator_open_full(i915, ctx,
- start, end,
- allocator_type,
- strategy, 0);
ibb->allocator_type = allocator_type;
ibb->allocator_strategy = strategy;
ibb->allocator_start = start;
ibb->allocator_end = end;
-
- ibb->i915 = i915;
ibb->enforce_relocs = do_relocs;
- ibb->handle = gem_create(i915, size);
+
+ if (ibb->driver == INTEL_DRIVER_I915) {
+ ibb->handle = gem_create(fd, size);
+ ibb->alignment = gem_detect_safe_alignment(fd);
+ ibb->gtt_size = gem_aperture_size(fd);
+ } else {
+ ibb->alignment = xe_get_default_alignment(fd);
+ size = ALIGN(size, ibb->alignment);
+ ibb->handle = xe_bo_create_flags(fd, 0, size,
+ xe_has_vram(fd) ?
+ vram_memory(fd, 0) :
+ system_memory(fd));
+ ibb->gtt_size = 1ull << xe_va_bits(fd);
+ }
+
ibb->size = size;
- ibb->alignment = gem_detect_safe_alignment(i915);
- ibb->ctx = ctx;
- ibb->vm_id = 0;
ibb->batch = calloc(1, size);
igt_assert(ibb->batch);
ibb->ptr = ibb->batch;
@@ -937,7 +979,6 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
memcpy(ibb->cfg, cfg, sizeof(*cfg));
}
- ibb->gtt_size = gem_aperture_size(i915);
if ((ibb->gtt_size - 1) >> 32)
ibb->supports_48b_address = true;
@@ -961,13 +1002,13 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
/**
* intel_bb_create_full:
- * @i915: drm fd
+ * @fd: drm fd - i915 or xe
* @ctx: context
* @cfg: intel_ctx configuration, NULL for default context or legacy mode
* @size: size of the batchbuffer
* @start: allocator vm start address
* @end: allocator vm start address
- * @allocator_type: allocator type, SIMPLE, RANDOM, ...
+ * @allocator_type: allocator type, SIMPLE, RELOC, ...
* @strategy: allocation strategy
*
* Creates bb with context passed in @ctx, size in @size and allocator type
@@ -980,19 +1021,19 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
*
* Pointer the intel_bb, asserts on failure.
*/
-struct intel_bb *intel_bb_create_full(int i915, uint32_t ctx,
+struct intel_bb *intel_bb_create_full(int fd, uint32_t ctx,
const intel_ctx_cfg_t *cfg, uint32_t size,
uint64_t start, uint64_t end,
uint8_t allocator_type,
enum allocator_strategy strategy)
{
- return __intel_bb_create(i915, ctx, cfg, size, false, start, end,
+ return __intel_bb_create(fd, ctx, cfg, size, false, start, end,
allocator_type, strategy);
}
/**
* intel_bb_create_with_allocator:
- * @i915: drm fd
+ * @fd: drm fd - i915 or xe
* @ctx: context
* @cfg: intel_ctx configuration, NULL for default context or legacy mode
* @size: size of the batchbuffer
@@ -1006,18 +1047,18 @@ struct intel_bb *intel_bb_create_full(int i915, uint32_t ctx,
*
* Pointer the intel_bb, asserts on failure.
*/
-struct intel_bb *intel_bb_create_with_allocator(int i915, uint32_t ctx,
+struct intel_bb *intel_bb_create_with_allocator(int fd, uint32_t ctx,
const intel_ctx_cfg_t *cfg,
uint32_t size,
uint8_t allocator_type)
{
- return __intel_bb_create(i915, ctx, cfg, size, false, 0, 0,
+ return __intel_bb_create(fd, ctx, cfg, size, false, 0, 0,
allocator_type, ALLOC_STRATEGY_HIGH_TO_LOW);
}
-static bool aux_needs_softpin(int i915)
+static bool aux_needs_softpin(int fd)
{
- return intel_gen(intel_get_drm_devid(i915)) >= 12;
+ return intel_gen(intel_get_drm_devid(fd)) >= 12;
}
static bool has_ctx_cfg(struct intel_bb *ibb)
@@ -1027,7 +1068,7 @@ static bool has_ctx_cfg(struct intel_bb *ibb)
/**
* intel_bb_create:
- * @i915: drm fd
+ * @fd: drm fd - i915 or xe
* @size: size of the batchbuffer
*
* Creates bb with default context.
@@ -1045,19 +1086,19 @@ static bool has_ctx_cfg(struct intel_bb *ibb)
* connection to it inside intel_bb is not valid anymore.
* Trying to use it leads to catastrofic errors.
*/
-struct intel_bb *intel_bb_create(int i915, uint32_t size)
+struct intel_bb *intel_bb_create(int fd, uint32_t size)
{
- bool relocs = gem_has_relocations(i915);
+ bool relocs = is_i915_device(fd) && gem_has_relocations(fd);
- return __intel_bb_create(i915, 0, NULL, size,
- relocs && !aux_needs_softpin(i915), 0, 0,
+ return __intel_bb_create(fd, 0, NULL, size,
+ relocs && !aux_needs_softpin(fd), 0, 0,
INTEL_ALLOCATOR_SIMPLE,
ALLOC_STRATEGY_HIGH_TO_LOW);
}
/**
* intel_bb_create_with_context:
- * @i915: drm fd
+ * @fd: drm fd - i915 or xe
* @ctx: context id
* @cfg: intel_ctx configuration, NULL for default context or legacy mode
* @size: size of the batchbuffer
@@ -1070,20 +1111,20 @@ struct intel_bb *intel_bb_create(int i915, uint32_t size)
* Pointer the intel_bb, asserts on failure.
*/
struct intel_bb *
-intel_bb_create_with_context(int i915, uint32_t ctx,
+intel_bb_create_with_context(int fd, uint32_t ctx,
const intel_ctx_cfg_t *cfg, uint32_t size)
{
- bool relocs = gem_has_relocations(i915);
+ bool relocs = is_i915_device(fd) && gem_has_relocations(fd);
- return __intel_bb_create(i915, ctx, cfg, size,
- relocs && !aux_needs_softpin(i915), 0, 0,
+ return __intel_bb_create(fd, ctx, cfg, size,
+ relocs && !aux_needs_softpin(fd), 0, 0,
INTEL_ALLOCATOR_SIMPLE,
ALLOC_STRATEGY_HIGH_TO_LOW);
}
/**
* intel_bb_create_with_relocs:
- * @i915: drm fd
+ * @fd: drm fd - i915
* @size: size of the batchbuffer
*
* Creates bb which will disable passing addresses.
@@ -1093,17 +1134,17 @@ intel_bb_create_with_context(int i915, uint32_t ctx,
*
* Pointer the intel_bb, asserts on failure.
*/
-struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size)
+struct intel_bb *intel_bb_create_with_relocs(int fd, uint32_t size)
{
- igt_require(gem_has_relocations(i915));
+ igt_require(is_i915_device(fd) && gem_has_relocations(fd));
- return __intel_bb_create(i915, 0, NULL, size, true, 0, 0,
+ return __intel_bb_create(fd, 0, NULL, size, true, 0, 0,
INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE);
}
/**
* intel_bb_create_with_relocs_and_context:
- * @i915: drm fd
+ * @fd: drm fd - i915
* @ctx: context
* @cfg: intel_ctx configuration, NULL for default context or legacy mode
* @size: size of the batchbuffer
@@ -1116,19 +1157,19 @@ struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size)
* Pointer the intel_bb, asserts on failure.
*/
struct intel_bb *
-intel_bb_create_with_relocs_and_context(int i915, uint32_t ctx,
+intel_bb_create_with_relocs_and_context(int fd, uint32_t ctx,
const intel_ctx_cfg_t *cfg,
uint32_t size)
{
- igt_require(gem_has_relocations(i915));
+ igt_require(is_i915_device(fd) && gem_has_relocations(fd));
- return __intel_bb_create(i915, ctx, cfg, size, true, 0, 0,
+ return __intel_bb_create(fd, ctx, cfg, size, true, 0, 0,
INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE);
}
/**
* intel_bb_create_no_relocs:
- * @i915: drm fd
+ * @fd: drm fd - i915 or xe
* @size: size of the batchbuffer
*
* Creates bb with disabled relocations.
@@ -1138,11 +1179,12 @@ intel_bb_create_with_relocs_and_context(int i915, uint32_t ctx,
*
* Pointer the intel_bb, asserts on failure.
*/
-struct intel_bb *intel_bb_create_no_relocs(int i915, uint32_t size)
+struct intel_bb *intel_bb_create_no_relocs(int fd, uint32_t size)
{
- igt_require(gem_uses_full_ppgtt(i915));
+ igt_require(is_xe_device(fd) ||
+ (is_i915_device(fd) && gem_uses_full_ppgtt(fd)));
- return __intel_bb_create(i915, 0, NULL, size, false, 0, 0,
+ return __intel_bb_create(fd, 0, NULL, size, false, 0, 0,
INTEL_ALLOCATOR_SIMPLE,
ALLOC_STRATEGY_HIGH_TO_LOW);
}
@@ -1217,16 +1259,80 @@ void intel_bb_destroy(struct intel_bb *ibb)
intel_allocator_free(ibb->allocator_handle, ibb->handle);
intel_allocator_close(ibb->allocator_handle);
}
- gem_close(ibb->i915, ibb->handle);
+ gem_close(ibb->fd, ibb->handle);
if (ibb->fence >= 0)
close(ibb->fence);
+ if (ibb->engine_syncobj)
+ syncobj_destroy(ibb->fd, ibb->engine_syncobj);
+ if (ibb->vm_id && !ibb->ctx)
+ xe_vm_destroy(ibb->fd, ibb->vm_id);
free(ibb->batch);
free(ibb->cfg);
free(ibb);
}
+static struct drm_xe_vm_bind_op *xe_alloc_bind_ops(struct intel_bb *ibb,
+ uint32_t op, uint32_t region)
+{
+ struct drm_i915_gem_exec_object2 **objects = ibb->objects;
+ struct drm_xe_vm_bind_op *bind_ops, *ops;
+ bool set_obj = (op & 0xffff) == XE_VM_BIND_OP_MAP;
+
+ bind_ops = calloc(ibb->num_objects, sizeof(*bind_ops));
+ igt_assert(bind_ops);
+
+ igt_debug("bind_ops: %s\n", set_obj ? "MAP" : "UNMAP");
+ for (int i = 0; i < ibb->num_objects; i++) {
+ ops = &bind_ops[i];
+
+ if (set_obj)
+ ops->obj = objects[i]->handle;
+
+ ops->op = op;
+ ops->obj_offset = 0;
+ ops->addr = objects[i]->offset;
+ ops->range = objects[i]->rsvd1;
+ ops->region = region;
+
+ igt_debug(" [%d]: handle: %u, offset: %llx, size: %llx\n",
+ i, ops->obj, (long long)ops->addr, (long long)ops->range);
+ }
+
+ return bind_ops;
+}
+
+static void __unbind_xe_objects(struct intel_bb *ibb)
+{
+ struct drm_xe_sync syncs[2] = {
+ { .flags = DRM_XE_SYNC_SYNCOBJ },
+ { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
+ };
+ int ret;
+
+ syncs[0].handle = ibb->engine_syncobj;
+ syncs[1].handle = syncobj_create(ibb->fd, 0);
+
+ if (ibb->num_objects > 1) {
+ struct drm_xe_vm_bind_op *bind_ops;
+ uint32_t op = XE_VM_BIND_OP_UNMAP | XE_VM_BIND_FLAG_ASYNC;
+
+ bind_ops = xe_alloc_bind_ops(ibb, op, 0);
+ xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops,
+ ibb->num_objects, syncs, 2);
+ free(bind_ops);
+ } else {
+ xe_vm_unbind_async(ibb->fd, ibb->vm_id, 0, 0,
+ ibb->batch_offset, ibb->size, syncs, 2);
+ }
+ ret = syncobj_wait_err(ibb->fd, &syncs[1].handle, 1, INT64_MAX, 0);
+ igt_assert_eq(ret, 0);
+ syncobj_destroy(ibb->fd, syncs[1].handle);
+
+ ibb->xe_bound = false;
+}
+
/*
* intel_bb_reset:
* @ibb: pointer to intel_bb
@@ -1238,7 +1344,6 @@ void intel_bb_destroy(struct intel_bb *ibb)
* from intel-bb tracking list. Removing intel_bufs releases their addresses
* in the allocator.
*/
-
void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
{
uint32_t i;
@@ -1258,6 +1363,9 @@ void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
for (i = 0; i < ibb->num_objects; i++)
ibb->objects[i]->flags &= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ if (is_xe_device(ibb->fd) && ibb->xe_bound)
+ __unbind_xe_objects(ibb);
+
__intel_bb_destroy_relocations(ibb);
__intel_bb_destroy_objects(ibb);
__reallocate_objects(ibb);
@@ -1277,11 +1385,16 @@ void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
intel_bb_remove_object(ibb, ibb->handle, ibb->batch_offset,
ibb->size);
- gem_close(ibb->i915, ibb->handle);
- ibb->handle = gem_create(ibb->i915, ibb->size);
+ gem_close(ibb->fd, ibb->handle);
+ if (is_i915_device(ibb->fd))
+ ibb->handle = gem_create(ibb->fd, ibb->size);
+ else
+ ibb->handle = xe_bo_create_flags(ibb->fd, 0, ibb->size,
+ vram_if_possible(ibb->fd, 0));
- /* Keep address for bb in reloc mode and RANDOM allocator */
- if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE)
+ /* Reacquire offset for RELOC and SIMPLE */
+ if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE ||
+ ibb->allocator_type == INTEL_ALLOCATOR_RELOC)
ibb->batch_offset = __intel_bb_get_offset(ibb,
ibb->handle,
ibb->size,
@@ -1304,13 +1417,19 @@ int intel_bb_sync(struct intel_bb *ibb)
{
int ret;
- if (ibb->fence < 0)
+ if (ibb->fence < 0 && !ibb->engine_syncobj)
return 0;
- ret = sync_fence_wait(ibb->fence, -1);
- if (ret == 0) {
- close(ibb->fence);
- ibb->fence = -1;
+ if (ibb->fence >= 0) {
+ ret = sync_fence_wait(ibb->fence, -1);
+ if (ret == 0) {
+ close(ibb->fence);
+ ibb->fence = -1;
+ }
+ } else {
+ igt_assert_neq(ibb->engine_syncobj, 0);
+ ret = syncobj_wait_err(ibb->fd, &ibb->engine_syncobj,
+ 1, INT64_MAX, 0);
}
return ret;
@@ -1325,7 +1444,7 @@ int intel_bb_sync(struct intel_bb *ibb)
void intel_bb_print(struct intel_bb *ibb)
{
igt_info("drm fd: %d, gen: %d, devid: %u, debug: %d\n",
- ibb->i915, ibb->gen, ibb->devid, ibb->debug);
+ ibb->fd, ibb->gen, ibb->devid, ibb->debug);
igt_info("handle: %u, size: %u, batch: %p, ptr: %p\n",
ibb->handle, ibb->size, ibb->batch, ibb->ptr);
igt_info("gtt_size: %" PRIu64 ", supports 48bit: %d\n",
@@ -1350,7 +1469,7 @@ void intel_bb_dump(struct intel_bb *ibb, const char *filename)
FILE *out;
void *ptr;
- ptr = gem_mmap__device_coherent(ibb->i915, ibb->handle, 0, ibb->size,
+ ptr = gem_mmap__device_coherent(ibb->fd, ibb->handle, 0, ibb->size,
PROT_READ);
out = fopen(filename, "wb");
igt_assert(out);
@@ -1501,7 +1620,7 @@ static void __remove_from_objects(struct intel_bb *ibb,
}
/**
- * intel_bb_add_object:
+ * __intel_bb_add_object:
* @ibb: pointer to intel_bb
* @handle: which handle to add to objects array
* @size: object size
@@ -1513,9 +1632,9 @@ static void __remove_from_objects(struct intel_bb *ibb,
* in the object tree. When object is a render target it has to
* be marked with EXEC_OBJECT_WRITE flag.
*/
-struct drm_i915_gem_exec_object2 *
-intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
- uint64_t offset, uint64_t alignment, bool write)
+static struct drm_i915_gem_exec_object2 *
+__intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
+ uint64_t offset, uint64_t alignment, bool write)
{
struct drm_i915_gem_exec_object2 *object;
@@ -1523,8 +1642,12 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
|| ALIGN(offset, alignment) == offset);
igt_assert(is_power_of_two(alignment));
+ if (ibb->driver == INTEL_DRIVER_I915)
+ alignment = max_t(uint64_t, alignment, gem_detect_safe_alignment(ibb->fd));
+ else
+ alignment = max_t(uint64_t, ibb->alignment, alignment);
+
object = __add_to_cache(ibb, handle);
- alignment = max_t(uint64_t, alignment, gem_detect_safe_alignment(ibb->i915));
__add_to_objects(ibb, object);
/*
@@ -1584,9 +1707,27 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
if (ibb->allows_obj_alignment)
object->alignment = alignment;
+ if (ibb->driver == INTEL_DRIVER_XE) {
+ object->alignment = alignment;
+ object->rsvd1 = size;
+ }
+
return object;
}
+struct drm_i915_gem_exec_object2 *
+intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
+ uint64_t offset, uint64_t alignment, bool write)
+{
+ struct drm_i915_gem_exec_object2 *obj = NULL;
+
+ obj = __intel_bb_add_object(ibb, handle, size, offset,
+ alignment, write);
+ igt_assert(obj);
+
+ return obj;
+}
+
bool intel_bb_remove_object(struct intel_bb *ibb, uint32_t handle,
uint64_t offset, uint64_t size)
{
@@ -1999,7 +2140,7 @@ static void intel_bb_dump_execbuf(struct intel_bb *ibb,
uint64_t address;
igt_debug("execbuf [pid: %ld, fd: %d, ctx: %u]\n",
- (long) getpid(), ibb->i915, ibb->ctx);
+ (long) getpid(), ibb->fd, ibb->ctx);
igt_debug("execbuf batch len: %u, start offset: 0x%x, "
"DR1: 0x%x, DR4: 0x%x, "
"num clip: %u, clipptr: 0x%llx, "
@@ -2135,6 +2276,82 @@ static void update_offsets(struct intel_bb *ibb,
}
#define LINELEN 76
+
+static int
+__xe_bb_exec(struct intel_bb *ibb, uint64_t flags, bool sync)
+{
+ uint32_t engine = flags & (I915_EXEC_BSD_MASK | I915_EXEC_RING_MASK);
+ uint32_t engine_id;
+ struct drm_xe_sync syncs[2] = {
+ { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
+ { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
+ };
+ struct drm_xe_vm_bind_op *bind_ops;
+ void *map;
+
+ igt_assert_eq(ibb->num_relocs, 0);
+ igt_assert_eq(ibb->xe_bound, false);
+
+ if (ibb->last_engine != engine) {
+ struct drm_xe_engine_class_instance inst = { };
+
+ inst.engine_instance =
+ (flags & I915_EXEC_BSD_MASK) >> I915_EXEC_BSD_SHIFT;
+
+ switch (flags & I915_EXEC_RING_MASK) {
+ case I915_EXEC_DEFAULT:
+ case I915_EXEC_BLT:
+ inst.engine_class = DRM_XE_ENGINE_CLASS_COPY;
+ break;
+ case I915_EXEC_BSD:
+ inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
+ break;
+ case I915_EXEC_RENDER:
+ inst.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
+ break;
+ case I915_EXEC_VEBOX:
+ inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
+ break;
+ default:
+ igt_assert_f(false, "Unknown engine: %x", (uint32_t) flags);
+ }
+ igt_debug("Run on %s\n", xe_engine_class_string(inst.engine_class));
+
+ ibb->engine_id = engine_id =
+ xe_engine_create(ibb->fd, ibb->vm_id, &inst, 0);
+ } else {
+ engine_id = ibb->engine_id;
+ }
+ ibb->last_engine = engine;
+
+ map = xe_bo_map(ibb->fd, ibb->handle, ibb->size);
+ memcpy(map, ibb->batch, ibb->size);
+ gem_munmap(map, ibb->size);
+
+ syncs[0].handle = syncobj_create(ibb->fd, 0);
+ if (ibb->num_objects > 1) {
+ bind_ops = xe_alloc_bind_ops(ibb, XE_VM_BIND_OP_MAP | XE_VM_BIND_FLAG_ASYNC, 0);
+ xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops,
+ ibb->num_objects, syncs, 1);
+ ibb->xe_bound = true;
+ free(bind_ops);
+ } else {
+ xe_vm_bind_async(ibb->fd, ibb->vm_id, 0, ibb->handle, 0,
+ ibb->batch_offset, ibb->size, syncs, 1);
+ }
+
+ syncs[0].flags &= ~DRM_XE_SYNC_SIGNAL;
+ ibb->engine_syncobj = syncobj_create(ibb->fd, 0);
+ syncs[1].handle = ibb->engine_syncobj;
+
+ xe_exec_sync(ibb->fd, engine_id, ibb->batch_offset, syncs, 2);
+
+ if (sync)
+ intel_bb_sync(ibb);
+
+ return 0;
+}
+
/*
* __intel_bb_exec:
* @ibb: pointer to intel_bb
@@ -2160,7 +2377,7 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
ibb->objects[0]->handle = ibb->handle;
ibb->objects[0]->offset = ibb->batch_offset;
- gem_write(ibb->i915, ibb->handle, 0, ibb->batch, ibb->size);
+ gem_write(ibb->fd, ibb->handle, 0, ibb->batch, ibb->size);
memset(&execbuf, 0, sizeof(execbuf));
objects = create_objects_array(ibb);
@@ -2173,13 +2390,10 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
execbuf.flags &= ~I915_EXEC_NO_RELOC;
execbuf.rsvd2 = 0;
- if (ibb->dump_base64)
- intel_bb_dump_base64(ibb, LINELEN);
-
/* For debugging on CI, remove in final series */
intel_bb_dump_execbuf(ibb, &execbuf);
- ret = __gem_execbuf_wr(ibb->i915, &execbuf);
+ ret = __gem_execbuf_wr(ibb->fd, &execbuf);
if (ret) {
intel_bb_dump_execbuf(ibb, &execbuf);
free(objects);
@@ -2220,7 +2434,7 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
/**
* intel_bb_exec:
* @ibb: pointer to intel_bb
- * @end_offset: offset of the last instruction in the bb
+ * @end_offset: offset of the last instruction in the bb (for i915)
* @flags: flags passed directly to execbuf
* @sync: if true wait for execbuf completion, otherwise caller is responsible
* to wait for completion
@@ -2230,7 +2444,13 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
uint64_t flags, bool sync)
{
- igt_assert_eq(__intel_bb_exec(ibb, end_offset, flags, sync), 0);
+ if (ibb->dump_base64)
+ intel_bb_dump_base64(ibb, LINELEN);
+
+ if (ibb->driver == INTEL_DRIVER_I915)
+ igt_assert_eq(__intel_bb_exec(ibb, end_offset, flags, sync), 0);
+ else
+ igt_assert_eq(__xe_bb_exec(ibb, flags, sync), 0);
}
/**
@@ -2409,13 +2629,13 @@ uint32_t intel_bb_copy_data(struct intel_bb *ibb,
*/
void intel_bb_blit_start(struct intel_bb *ibb, uint32_t flags)
{
- if (blt_has_xy_src_copy(ibb->i915))
+ if (blt_has_xy_src_copy(ibb->fd))
intel_bb_out(ibb, XY_SRC_COPY_BLT_CMD |
XY_SRC_COPY_BLT_WRITE_ALPHA |
XY_SRC_COPY_BLT_WRITE_RGB |
flags |
(6 + 2 * (ibb->gen >= 8)));
- else if (blt_has_fast_copy(ibb->i915))
+ else if (blt_has_fast_copy(ibb->fd))
intel_bb_out(ibb, XY_FAST_COPY_BLT | flags);
else
igt_assert_f(0, "No supported blit command found\n");
@@ -2456,9 +2676,9 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
if (gen >= 4 && src->tiling != I915_TILING_NONE) {
src_pitch /= 4;
- if (blt_has_xy_src_copy(ibb->i915))
+ if (blt_has_xy_src_copy(ibb->fd))
cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
- else if (blt_has_fast_copy(ibb->i915))
+ else if (blt_has_fast_copy(ibb->fd))
cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling);
else
igt_assert_f(0, "No supported blit command found\n");
@@ -2466,7 +2686,7 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
if (gen >= 4 && dst->tiling != I915_TILING_NONE) {
dst_pitch /= 4;
- if (blt_has_xy_src_copy(ibb->i915))
+ if (blt_has_xy_src_copy(ibb->fd))
cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
else
cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling);
@@ -2480,7 +2700,7 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);
br13_bits = 0;
- if (blt_has_xy_src_copy(ibb->i915)) {
+ if (blt_has_xy_src_copy(ibb->fd)) {
switch (bpp) {
case 8:
break;
@@ -2496,7 +2716,7 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
igt_fail(IGT_EXIT_FAILURE);
}
} else {
- br13_bits = fast_copy_dword1(ibb->i915, src->tiling, dst->tiling, bpp);
+ br13_bits = fast_copy_dword1(ibb->fd, src->tiling, dst->tiling, bpp);
}
if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
@@ -2628,14 +2848,20 @@ void intel_bb_track(bool do_tracking)
static void __intel_bb_reinit_alloc(struct intel_bb *ibb)
{
+ uint64_t alignment = 0;
+
if (ibb->allocator_type == INTEL_ALLOCATOR_NONE)
return;
- ibb->allocator_handle = intel_allocator_open_full(ibb->i915, ibb->ctx,
+ if (ibb->driver == INTEL_DRIVER_XE)
+ alignment = xe_get_default_alignment(ibb->fd);
+
+ ibb->allocator_handle = intel_allocator_open_full(ibb->fd, ibb->ctx,
ibb->allocator_start, ibb->allocator_end,
ibb->allocator_type,
ibb->allocator_strategy,
- 0);
+ alignment);
+
intel_bb_reset(ibb, true);
}
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 10e4126606..e2566d8223 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -235,6 +235,11 @@ struct igt_pxp {
uint32_t appid;
};
+enum intel_driver {
+ INTEL_DRIVER_I915 = 1,
+ INTEL_DRIVER_XE,
+};
+
/*
* Batchbuffer without libdrm dependency
*/
@@ -246,7 +251,9 @@ struct intel_bb {
uint8_t allocator_type;
enum allocator_strategy allocator_strategy;
- int i915;
+ enum intel_driver driver;
+ int fd;
+
unsigned int gen;
bool debug;
bool dump_base64;
@@ -268,6 +275,11 @@ struct intel_bb {
uint32_t ctx;
uint32_t vm_id;
+ bool xe_bound;
+ uint32_t engine_syncobj;
+ uint32_t engine_id;
+ uint32_t last_engine;
+
/* Context configuration */
intel_ctx_cfg_t *cfg;
@@ -299,21 +311,21 @@ struct intel_bb {
};
struct intel_bb *
-intel_bb_create_full(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
+intel_bb_create_full(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
uint32_t size, uint64_t start, uint64_t end,
uint8_t allocator_type, enum allocator_strategy strategy);
struct intel_bb *
-intel_bb_create_with_allocator(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
+intel_bb_create_with_allocator(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
uint32_t size, uint8_t allocator_type);
-struct intel_bb *intel_bb_create(int i915, uint32_t size);
+struct intel_bb *intel_bb_create(int fd, uint32_t size);
struct intel_bb *
-intel_bb_create_with_context(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
+intel_bb_create_with_context(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
uint32_t size);
-struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size);
+struct intel_bb *intel_bb_create_with_relocs(int fd, uint32_t size);
struct intel_bb *
-intel_bb_create_with_relocs_and_context(int i915, uint32_t ctx,
+intel_bb_create_with_relocs_and_context(int fd, uint32_t ctx,
const intel_ctx_cfg_t *cfg, uint32_t size);
-struct intel_bb *intel_bb_create_no_relocs(int i915, uint32_t size);
+struct intel_bb *intel_bb_create_no_relocs(int fd, uint32_t size);
void intel_bb_destroy(struct intel_bb *ibb);
/* make it safe to use intel_allocator after failed test */
diff --git a/tests/i915/gem_caching.c b/tests/i915/gem_caching.c
index b6ecd8346c..6e944f0acb 100644
--- a/tests/i915/gem_caching.c
+++ b/tests/i915/gem_caching.c
@@ -83,7 +83,7 @@ copy_bo(struct intel_bb *ibb, struct intel_buf *src, struct intel_buf *dst)
intel_bb_add_intel_buf(ibb, src, false);
intel_bb_add_intel_buf(ibb, dst, true);
- if (blt_has_xy_src_copy(ibb->i915)) {
+ if (blt_has_xy_src_copy(ibb->fd)) {
intel_bb_out(ibb,
XY_SRC_COPY_BLT_CMD |
XY_SRC_COPY_BLT_WRITE_ALPHA |
@@ -93,7 +93,7 @@ copy_bo(struct intel_bb *ibb, struct intel_buf *src, struct intel_buf *dst)
intel_bb_out(ibb, (3 << 24) | /* 32 bits */
(0xcc << 16) | /* copy ROP */
4096);
- } else if (blt_has_fast_copy(ibb->i915)) {
+ } else if (blt_has_fast_copy(ibb->fd)) {
intel_bb_out(ibb, XY_FAST_COPY_BLT);
intel_bb_out(ibb, XY_FAST_COPY_COLOR_DEPTH_32 | 4096);
} else {
diff --git a/tests/i915/gem_pxp.c b/tests/i915/gem_pxp.c
index af657d0e1b..2f27abd582 100644
--- a/tests/i915/gem_pxp.c
+++ b/tests/i915/gem_pxp.c
@@ -809,7 +809,7 @@ static int gem_execbuf_flush_store_dw(int i915, struct intel_bb *ibb, uint32_t c
ret = __intel_bb_exec(ibb, intel_bb_offset(ibb),
I915_EXEC_RENDER | I915_EXEC_NO_RELOC, false);
if (ret == 0) {
- gem_sync(ibb->i915, fence->handle);
+ gem_sync(ibb->fd, fence->handle);
assert_pipectl_storedw_done(i915, fence->handle);
}
return ret;
--
2.34.1
More information about the igt-dev
mailing list