[igt-dev] [PATCH i-g-t v3 06/10] lib/intel_batchbuffer: Add Xe support in intel-bb
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Thu Apr 20 20:12:24 UTC 2023
On Thu, Apr 20, 2023 at 02:48:33PM +0200, Manszewski, Christoph wrote:
> Hi Zbigniew,
>
> On 20.04.2023 08:39, Zbigniew Kempczyński wrote:
> > Intention of creating intel-bb was to replace libdrm for i915.
> > Due to many code relies on it (kms for example) most rational way
> > is to extend and add Xe path to it.
> >
> > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > Signed-off-by: Bhanuprakash Modem <bhanuprakash.modem at intel.com>
> > ---
> > lib/gpu_cmds.c | 2 +-
> > lib/intel_aux_pgtable.c | 2 +-
> > lib/intel_batchbuffer.c | 430 ++++++++++++++++++++++++++++++---------
> > lib/intel_batchbuffer.h | 28 ++-
> > tests/i915/gem_caching.c | 4 +-
> > tests/i915/gem_pxp.c | 2 +-
> > 6 files changed, 355 insertions(+), 113 deletions(-)
> >
> > diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
> > index cee81555d8..afb26d2990 100644
> > --- a/lib/gpu_cmds.c
> > +++ b/lib/gpu_cmds.c
> > @@ -251,7 +251,7 @@ gen7_fill_binding_table(struct intel_bb *ibb,
> > {
> > uint32_t binding_table_offset;
> > uint32_t *binding_table;
> > - uint32_t devid = intel_get_drm_devid(ibb->i915);
> > + uint32_t devid = intel_get_drm_devid(ibb->fd);
> > intel_bb_ptr_align(ibb, 64);
> > binding_table_offset = intel_bb_offset(ibb);
> > diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
> > index 5205687080..946ca60b97 100644
> > --- a/lib/intel_aux_pgtable.c
> > +++ b/lib/intel_aux_pgtable.c
> > @@ -481,7 +481,7 @@ intel_aux_pgtable_create(struct intel_bb *ibb,
> > intel_bb_add_intel_buf_with_alignment(ibb, pgt->buf,
> > pgt->max_align, false);
> > - pgt_map(ibb->i915, pgt);
> > + pgt_map(ibb->fd, pgt);
> > pgt_populate_entries(pgt, bufs, buf_count);
> > pgt_unmap(pgt);
> > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> > index a4eb4c2bbc..47ab579daa 100644
> > --- a/lib/intel_batchbuffer.c
> > +++ b/lib/intel_batchbuffer.c
> > @@ -29,6 +29,8 @@
> > #include <glib.h>
> > #include "i915/gem_create.h"
> > +#include "xe/xe_ioctl.h"
> > +#include "xe/xe_query.h"
> > #include "intel_batchbuffer.h"
> > #include "intel_bufops.h"
> > #include "intel_chipset.h"
> > @@ -38,6 +40,9 @@
> > #include "veboxcopy.h"
> > #include "sw_sync.h"
> > #include "gpgpu_fill.h"
> > +#include "igt_aux.h"
> > +#include "igt_syncobj.h"
> > +#include "i830_reg.h"
> > #include "huc_copy.h"
> > #include "i915/i915_blt.h"
> > @@ -828,21 +833,22 @@ static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb,
> > /**
> > * __intel_bb_create:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915 or xe
> > * @ctx: context id
> > - * @cfg: intel_ctx configuration, NULL for default context or legacy mode
> > + * @cfg: for i915 intel_ctx configuration, NULL for default context or legacy mode,
> > + * unused for xe
> > * @size: size of the batchbuffer
> > * @do_relocs: use relocations or allocator
> > * @allocator_type: allocator type, must be INTEL_ALLOCATOR_NONE for relocations
> > *
> > * intel-bb assumes it will work in one of two modes - with relocations or
> > - * with using allocator (currently RANDOM and SIMPLE are implemented).
> > + * with using allocator (currently RELOC and SIMPLE are implemented).
> > * Some description is required to describe how they maintain the addresses.
> > *
> > * Before entering into each scenarios generic rule is intel-bb keeps objects
> > * and their offsets in the internal cache and reuses in subsequent execs.
> > *
> > - * 1. intel-bb with relocations
> > + * 1. intel-bb with relocations (i915 only)
> > *
> > * Creating new intel-bb adds handle to cache implicitly and sets its address
> > * to 0. Objects added to intel-bb later also have address 0 set for first run.
> > @@ -850,14 +856,15 @@ static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb,
> > * works in reloc mode addresses are only suggestion to the driver and we
> > * cannot be sure they won't change at next exec.
> > *
> > - * 2. with allocator
> > + * 2. with allocator (i915 or xe)
> > *
> > * This mode is valid only for ppgtt. Addresses are acquired from allocator
> > - * and softpinned. intel-bb cache must be then coherent with allocator
> > - * (simple is coherent, random is not due to fact we don't keep its state).
> > + * and softpinned (i915) or vm-binded (xe). intel-bb cache must be then
> > + * coherent with allocator (simple is coherent, reloc partially [doesn't
> > + * support address reservation]).
> > * When we do intel-bb reset with purging cache it has to reacquire addresses
> > * from allocator (allocator should return same address - what is true for
> > - * simple allocator and false for random as mentioned before).
> > + * simple and reloc allocators).
> > *
> > * If we do reset without purging caches we use addresses from intel-bb cache
> > * during execbuf objects construction.
> > @@ -873,7 +880,7 @@ static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb,
> > * Pointer the intel_bb, asserts on failure.
> > */
> > static struct intel_bb *
> > -__intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > +__intel_bb_create(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > uint32_t size, bool do_relocs,
> > uint64_t start, uint64_t end,
> > uint8_t allocator_type, enum allocator_strategy strategy)
> > @@ -883,48 +890,86 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > igt_assert(ibb);
> > - ibb->uses_full_ppgtt = gem_uses_full_ppgtt(i915);
> > - ibb->devid = intel_get_drm_devid(i915);
> > + ibb->devid = intel_get_drm_devid(fd);
> > ibb->gen = intel_gen(ibb->devid);
> > + ibb->ctx = ctx;
> > +
> > + ibb->fd = fd;
> > + ibb->driver = is_i915_device(fd) ? INTEL_DRIVER_I915 :
> > + is_xe_device(fd) ? INTEL_DRIVER_XE : 0;
> > + igt_assert(ibb->driver);
> > /*
> > * If we don't have full ppgtt driver can change our addresses
> > * so allocator is useless in this case. Just enforce relocations
> > * for such gens and don't use allocator at all.
> > */
> > - if (!ibb->uses_full_ppgtt)
> > - do_relocs = true;
> > + if (ibb->driver == INTEL_DRIVER_I915) {
> > + ibb->uses_full_ppgtt = gem_uses_full_ppgtt(fd);
> > - /*
> > - * For softpin mode allocator has full control over offsets allocation
> > - * so we want kernel to not interfere with this.
> > - */
> > - if (do_relocs)
> > - ibb->allows_obj_alignment = gem_allows_obj_alignment(i915);
> > + if (!ibb->uses_full_ppgtt)
> > + do_relocs = true;
> > - /* Use safe start offset instead assuming 0x0 is safe */
> > - start = max_t(uint64_t, start, gem_detect_safe_start_offset(i915));
>
> Previously start was adjusted before calling 'intel_allocator_open_full'
>
>
> > + /*
> > + * For softpin mode allocator has full control over offsets allocation
> > + * so we want kernel to not interfere with this.
> > + */
> > + if (do_relocs) {
> > + ibb->allows_obj_alignment = gem_allows_obj_alignment(fd);
> > + allocator_type = INTEL_ALLOCATOR_NONE;
> > + } else {
> > + /* if relocs are set we won't use an allocator */
> > + ibb->allocator_handle =
> > + intel_allocator_open_full(fd, ctx,
> > + start, end,
> > + allocator_type,
> > + strategy, 0);
> > + }
> > +
> > + /* Use safe start offset instead assuming 0x0 is safe */
> > + start = max_t(uint64_t, start, gem_detect_safe_start_offset(fd));
>
> now it is called after. I see that intel_alocator checks for the start
> value, though I'm not sure if this change is intended. It also looks like a
> refactor of i915 related code.
That wasn't my intention. I want to alter start before opening the
allocator. Thanks for spotting that.
>
>
> > +
> > + ibb->vm_id = 0;
> > + } else {
> > + igt_assert(!do_relocs);
> > +
> > + if (!ctx)
> > + ctx = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0);
> > +
> > + ibb->uses_full_ppgtt = 1;
> > + ibb->allocator_handle =
> > + intel_allocator_open_full(fd, ctx, start, end,
> > + allocator_type,
> > + strategy,
> > + xe_get_default_alignment(fd));
> > + ibb->vm_id = ctx;
> > + ibb->last_engine = ~0U;
> > + }
> > - /* if relocs are set we won't use an allocator */
> > - if (do_relocs)
> > - allocator_type = INTEL_ALLOCATOR_NONE;
> > - else
> > - ibb->allocator_handle = intel_allocator_open_full(i915, ctx,
> > - start, end,
> > - allocator_type,
> > - strategy, 0);
> > ibb->allocator_type = allocator_type;
> > ibb->allocator_strategy = strategy;
> > ibb->allocator_start = start;
> > ibb->allocator_end = end;
> > - ibb->i915 = i915;
> > ibb->enforce_relocs = do_relocs;
> > - ibb->handle = gem_create(i915, size);
> > + igt_assert(ibb->driver == INTEL_DRIVER_I915 ||
> > + (ibb->driver == INTEL_DRIVER_XE && !do_relocs));
>
> This assert seems redundant. You already ensure this with:
Agree, !relocs is catched in below 'else'. I'm going to fix this in next version.
> > + ibb->driver = is_i915_device(fd) ? INTEL_DRIVER_I915 :
> > + is_xe_device(fd) ? INTEL_DRIVER_XE : 0;
> > + igt_assert(ibb->driver);
>
> and
>
> > + if (ibb->driver == INTEL_DRIVER_I915) {
> > + ...
> > + } else {
> > + igt_assert(!do_relocs);
> > + ...
> > + }
>
>
> > +
> > + if (ibb->driver == INTEL_DRIVER_I915) {
> > + ibb->handle = gem_create(fd, size);
> > + ibb->alignment = gem_detect_safe_alignment(fd);
> > + ibb->gtt_size = gem_aperture_size(fd);
> > + } else {
> > + ibb->alignment = xe_get_default_alignment(fd);
> > + size = ALIGN(size, ibb->alignment);
> > + ibb->handle = xe_bo_create_flags(fd, 0, size,
> > + xe_has_vram(fd) ?
> > + vram_memory(fd, 0) :
> > + system_memory(fd));
> > + ibb->gtt_size = 1ull << xe_va_bits(fd);
> > + }
> > +
> > ibb->size = size;
> > - ibb->alignment = gem_detect_safe_alignment(i915);
> > - ibb->ctx = ctx;
> > - ibb->vm_id = 0;
> > ibb->batch = calloc(1, size);
> > igt_assert(ibb->batch);
> > ibb->ptr = ibb->batch;
> > @@ -937,7 +982,6 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > memcpy(ibb->cfg, cfg, sizeof(*cfg));
> > }
> > - ibb->gtt_size = gem_aperture_size(i915);
> > if ((ibb->gtt_size - 1) >> 32)
> > ibb->supports_48b_address = true;
> > @@ -961,13 +1005,13 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > /**
> > * intel_bb_create_full:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915 or xe
> > * @ctx: context
> > * @cfg: intel_ctx configuration, NULL for default context or legacy mode
> > * @size: size of the batchbuffer
> > * @start: allocator vm start address
> > * @end: allocator vm start address
> > - * @allocator_type: allocator type, SIMPLE, RANDOM, ...
> > + * @allocator_type: allocator type, SIMPLE, RELOC, ...
> > * @strategy: allocation strategy
> > *
> > * Creates bb with context passed in @ctx, size in @size and allocator type
> > @@ -980,19 +1024,19 @@ __intel_bb_create(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > *
> > * Pointer the intel_bb, asserts on failure.
> > */
> > -struct intel_bb *intel_bb_create_full(int i915, uint32_t ctx,
> > +struct intel_bb *intel_bb_create_full(int fd, uint32_t ctx,
> > const intel_ctx_cfg_t *cfg, uint32_t size,
> > uint64_t start, uint64_t end,
> > uint8_t allocator_type,
> > enum allocator_strategy strategy)
> > {
> > - return __intel_bb_create(i915, ctx, cfg, size, false, start, end,
> > + return __intel_bb_create(fd, ctx, cfg, size, false, start, end,
> > allocator_type, strategy);
> > }
> > /**
> > * intel_bb_create_with_allocator:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915 or xe
> > * @ctx: context
> > * @cfg: intel_ctx configuration, NULL for default context or legacy mode
> > * @size: size of the batchbuffer
> > @@ -1006,18 +1050,18 @@ struct intel_bb *intel_bb_create_full(int i915, uint32_t ctx,
> > *
> > * Pointer the intel_bb, asserts on failure.
> > */
> > -struct intel_bb *intel_bb_create_with_allocator(int i915, uint32_t ctx,
> > +struct intel_bb *intel_bb_create_with_allocator(int fd, uint32_t ctx,
> > const intel_ctx_cfg_t *cfg,
> > uint32_t size,
> > uint8_t allocator_type)
> > {
> > - return __intel_bb_create(i915, ctx, cfg, size, false, 0, 0,
> > + return __intel_bb_create(fd, ctx, cfg, size, false, 0, 0,
> > allocator_type, ALLOC_STRATEGY_HIGH_TO_LOW);
> > }
> > -static bool aux_needs_softpin(int i915)
> > +static bool aux_needs_softpin(int fd)
> > {
> > - return intel_gen(intel_get_drm_devid(i915)) >= 12;
> > + return intel_gen(intel_get_drm_devid(fd)) >= 12;
> > }
> > static bool has_ctx_cfg(struct intel_bb *ibb)
> > @@ -1027,7 +1071,7 @@ static bool has_ctx_cfg(struct intel_bb *ibb)
> > /**
> > * intel_bb_create:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915 or xe
> > * @size: size of the batchbuffer
> > *
> > * Creates bb with default context.
> > @@ -1045,19 +1089,19 @@ static bool has_ctx_cfg(struct intel_bb *ibb)
> > * connection to it inside intel_bb is not valid anymore.
> > * Trying to use it leads to catastrofic errors.
> > */
> > -struct intel_bb *intel_bb_create(int i915, uint32_t size)
> > +struct intel_bb *intel_bb_create(int fd, uint32_t size)
> > {
> > - bool relocs = gem_has_relocations(i915);
> > + bool relocs = is_i915_device(fd) && gem_has_relocations(fd);
> > - return __intel_bb_create(i915, 0, NULL, size,
> > - relocs && !aux_needs_softpin(i915), 0, 0,
> > + return __intel_bb_create(fd, 0, NULL, size,
> > + relocs && !aux_needs_softpin(fd), 0, 0,
> > INTEL_ALLOCATOR_SIMPLE,
> > ALLOC_STRATEGY_HIGH_TO_LOW);
> > }
> > /**
> > * intel_bb_create_with_context:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915 or xe
> > * @ctx: context id
> > * @cfg: intel_ctx configuration, NULL for default context or legacy mode
> > * @size: size of the batchbuffer
> > @@ -1070,20 +1114,20 @@ struct intel_bb *intel_bb_create(int i915, uint32_t size)
> > * Pointer the intel_bb, asserts on failure.
> > */
> > struct intel_bb *
> > -intel_bb_create_with_context(int i915, uint32_t ctx,
> > +intel_bb_create_with_context(int fd, uint32_t ctx,
> > const intel_ctx_cfg_t *cfg, uint32_t size)
> > {
> > - bool relocs = gem_has_relocations(i915);
> > + bool relocs = is_i915_device(fd) && gem_has_relocations(fd);
> > - return __intel_bb_create(i915, ctx, cfg, size,
> > - relocs && !aux_needs_softpin(i915), 0, 0,
> > + return __intel_bb_create(fd, ctx, cfg, size,
> > + relocs && !aux_needs_softpin(fd), 0, 0,
> > INTEL_ALLOCATOR_SIMPLE,
> > ALLOC_STRATEGY_HIGH_TO_LOW);
> > }
> > /**
> > * intel_bb_create_with_relocs:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915
> > * @size: size of the batchbuffer
> > *
> > * Creates bb which will disable passing addresses.
> > @@ -1093,17 +1137,17 @@ intel_bb_create_with_context(int i915, uint32_t ctx,
> > *
> > * Pointer the intel_bb, asserts on failure.
> > */
> > -struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size)
> > +struct intel_bb *intel_bb_create_with_relocs(int fd, uint32_t size)
> > {
> > - igt_require(gem_has_relocations(i915));
> > + igt_require(is_i915_device(fd) && gem_has_relocations(fd));
> > - return __intel_bb_create(i915, 0, NULL, size, true, 0, 0,
> > + return __intel_bb_create(fd, 0, NULL, size, true, 0, 0,
> > INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE);
> > }
> > /**
> > * intel_bb_create_with_relocs_and_context:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915
> > * @ctx: context
> > * @cfg: intel_ctx configuration, NULL for default context or legacy mode
> > * @size: size of the batchbuffer
> > @@ -1116,19 +1160,19 @@ struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size)
> > * Pointer the intel_bb, asserts on failure.
> > */
> > struct intel_bb *
> > -intel_bb_create_with_relocs_and_context(int i915, uint32_t ctx,
> > +intel_bb_create_with_relocs_and_context(int fd, uint32_t ctx,
> > const intel_ctx_cfg_t *cfg,
> > uint32_t size)
> > {
> > - igt_require(gem_has_relocations(i915));
> > + igt_require(is_i915_device(fd) && gem_has_relocations(fd));
> > - return __intel_bb_create(i915, ctx, cfg, size, true, 0, 0,
> > + return __intel_bb_create(fd, ctx, cfg, size, true, 0, 0,
> > INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE);
> > }
> > /**
> > * intel_bb_create_no_relocs:
> > - * @i915: drm fd
> > + * @fd: drm fd - i915 or xe
> > * @size: size of the batchbuffer
> > *
> > * Creates bb with disabled relocations.
> > @@ -1138,11 +1182,12 @@ intel_bb_create_with_relocs_and_context(int i915, uint32_t ctx,
> > *
> > * Pointer the intel_bb, asserts on failure.
> > */
> > -struct intel_bb *intel_bb_create_no_relocs(int i915, uint32_t size)
> > +struct intel_bb *intel_bb_create_no_relocs(int fd, uint32_t size)
> > {
> > - igt_require(gem_uses_full_ppgtt(i915));
> > + igt_require(is_xe_device(fd) ||
> > + (is_i915_device(fd) && gem_uses_full_ppgtt(fd)));
> > - return __intel_bb_create(i915, 0, NULL, size, false, 0, 0,
> > + return __intel_bb_create(fd, 0, NULL, size, false, 0, 0,
> > INTEL_ALLOCATOR_SIMPLE,
> > ALLOC_STRATEGY_HIGH_TO_LOW);
> > }
> > @@ -1217,16 +1262,80 @@ void intel_bb_destroy(struct intel_bb *ibb)
> > intel_allocator_free(ibb->allocator_handle, ibb->handle);
> > intel_allocator_close(ibb->allocator_handle);
> > }
> > - gem_close(ibb->i915, ibb->handle);
> > + gem_close(ibb->fd, ibb->handle);
> > if (ibb->fence >= 0)
> > close(ibb->fence);
> > + if (ibb->engine_syncobj)
> > + syncobj_destroy(ibb->fd, ibb->engine_syncobj);
> > + if (ibb->vm_id && !ibb->ctx)
> > + xe_vm_destroy(ibb->fd, ibb->vm_id);
> > free(ibb->batch);
> > free(ibb->cfg);
> > free(ibb);
> > }
> > +static struct drm_xe_vm_bind_op *xe_alloc_bind_ops(struct intel_bb *ibb,
> > + uint32_t op, uint32_t region)
> > +{
> > + struct drm_i915_gem_exec_object2 **objects = ibb->objects;
> > + struct drm_xe_vm_bind_op *bind_ops, *ops;
> > + bool set_obj = (op & 0xffff) == XE_VM_BIND_OP_MAP;
> > +
> > + bind_ops = calloc(ibb->num_objects, sizeof(*bind_ops));
> > + igt_assert(bind_ops);
> > +
> > + igt_debug("bind_ops: %s\n", set_obj ? "MAP" : "UNMAP");
> > + for (int i = 0; i < ibb->num_objects; i++) {
> > + ops = &bind_ops[i];
> > +
> > + if (set_obj)
> > + ops->obj = objects[i]->handle;
> > +
> > + ops->op = op;
> > + ops->obj_offset = 0;
> > + ops->addr = objects[i]->offset;
> > + ops->range = objects[i]->rsvd1;
> > + ops->region = region;
> > +
> > + igt_debug(" [%d]: handle: %u, offset: %llx, size: %llx\n",
> > + i, ops->obj, (long long)ops->addr, (long long)ops->range);
> > + }
> > +
> > + return bind_ops;
> > +}
> > +
> > +static void __unbind_xe_objects(struct intel_bb *ibb)
> > +{
> > + struct drm_xe_sync syncs[2] = {
> > + { .flags = DRM_XE_SYNC_SYNCOBJ },
> > + { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
> > + };
> > + int ret;
> > +
> > + syncs[0].handle = ibb->engine_syncobj;
> > + syncs[1].handle = syncobj_create(ibb->fd, 0);
> > +
> > + if (ibb->num_objects > 1) {
> > + struct drm_xe_vm_bind_op *bind_ops;
> > + uint32_t op = XE_VM_BIND_OP_UNMAP | XE_VM_BIND_FLAG_ASYNC;
> > +
> > + bind_ops = xe_alloc_bind_ops(ibb, op, 0);
> > + xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops,
> > + ibb->num_objects, syncs, 2);
> > + free(bind_ops);
> > + } else {
> > + xe_vm_unbind_async(ibb->fd, ibb->vm_id, 0, 0,
> > + ibb->batch_offset, ibb->size, syncs, 2);
> > + }
> > + ret = syncobj_wait_err(ibb->fd, &syncs[1].handle, 1, INT64_MAX, 0);
> > + igt_assert_eq(ret, 0);
> > + syncobj_destroy(ibb->fd, syncs[1].handle);
> > +
> > + ibb->xe_bound = false;
> > +}
> > +
> > /*
> > * intel_bb_reset:
> > * @ibb: pointer to intel_bb
> > @@ -1238,7 +1347,6 @@ void intel_bb_destroy(struct intel_bb *ibb)
> > * from intel-bb tracking list. Removing intel_bufs releases their addresses
> > * in the allocator.
> > */
> > -
> > void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
> > {
> > uint32_t i;
> > @@ -1258,6 +1366,9 @@ void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
> > for (i = 0; i < ibb->num_objects; i++)
> > ibb->objects[i]->flags &= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> > + if (is_xe_device(ibb->fd) && ibb->xe_bound)
> > + __unbind_xe_objects(ibb);
> > +
> > __intel_bb_destroy_relocations(ibb);
> > __intel_bb_destroy_objects(ibb);
> > __reallocate_objects(ibb);
> > @@ -1277,11 +1388,16 @@ void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
> > intel_bb_remove_object(ibb, ibb->handle, ibb->batch_offset,
> > ibb->size);
> > - gem_close(ibb->i915, ibb->handle);
> > - ibb->handle = gem_create(ibb->i915, ibb->size);
> > + gem_close(ibb->fd, ibb->handle);
> > + if (is_i915_device(ibb->fd))
> > + ibb->handle = gem_create(ibb->fd, ibb->size);
> > + else
> > + ibb->handle = xe_bo_create_flags(ibb->fd, 0, ibb->size,
> > + vram_if_possible(ibb->fd, 0));
> > - /* Keep address for bb in reloc mode and RANDOM allocator */
> > - if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE)
> > + /* Reacquire offset for RELOC and SIMPLE */
> > + if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE ||
> > + ibb->allocator_type == INTEL_ALLOCATOR_RELOC)
> > ibb->batch_offset = __intel_bb_get_offset(ibb,
> > ibb->handle,
> > ibb->size,
> > @@ -1304,13 +1420,19 @@ int intel_bb_sync(struct intel_bb *ibb)
> > {
> > int ret;
> > - if (ibb->fence < 0)
> > + if (ibb->fence < 0 && !ibb->engine_syncobj)
> > return 0;
> > - ret = sync_fence_wait(ibb->fence, -1);
> > - if (ret == 0) {
> > - close(ibb->fence);
> > - ibb->fence = -1;
> > + if (ibb->fence >= 0) {
> > + ret = sync_fence_wait(ibb->fence, -1);
> > + if (ret == 0) {
> > + close(ibb->fence);
> > + ibb->fence = -1;
> > + }
> > + } else {
> > + igt_assert_neq(ibb->engine_syncobj, 0);
> > + ret = syncobj_wait_err(ibb->fd, &ibb->engine_syncobj,
> > + 1, INT64_MAX, 0);
> > }
> > return ret;
> > @@ -1325,7 +1447,7 @@ int intel_bb_sync(struct intel_bb *ibb)
> > void intel_bb_print(struct intel_bb *ibb)
> > {
> > igt_info("drm fd: %d, gen: %d, devid: %u, debug: %d\n",
> > - ibb->i915, ibb->gen, ibb->devid, ibb->debug);
> > + ibb->fd, ibb->gen, ibb->devid, ibb->debug);
> > igt_info("handle: %u, size: %u, batch: %p, ptr: %p\n",
> > ibb->handle, ibb->size, ibb->batch, ibb->ptr);
> > igt_info("gtt_size: %" PRIu64 ", supports 48bit: %d\n",
> > @@ -1350,7 +1472,7 @@ void intel_bb_dump(struct intel_bb *ibb, const char *filename)
> > FILE *out;
> > void *ptr;
> > - ptr = gem_mmap__device_coherent(ibb->i915, ibb->handle, 0, ibb->size,
> > + ptr = gem_mmap__device_coherent(ibb->fd, ibb->handle, 0, ibb->size,
> > PROT_READ);
> > out = fopen(filename, "wb");
> > igt_assert(out);
> > @@ -1501,7 +1623,7 @@ static void __remove_from_objects(struct intel_bb *ibb,
> > }
> > /**
> > - * intel_bb_add_object:
> > + * __intel_bb_add_object:
> > * @ibb: pointer to intel_bb
> > * @handle: which handle to add to objects array
> > * @size: object size
> > @@ -1513,9 +1635,9 @@ static void __remove_from_objects(struct intel_bb *ibb,
> > * in the object tree. When object is a render target it has to
> > * be marked with EXEC_OBJECT_WRITE flag.
> > */
> > -struct drm_i915_gem_exec_object2 *
> > -intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
> > - uint64_t offset, uint64_t alignment, bool write)
> > +static struct drm_i915_gem_exec_object2 *
> > +__intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
> > + uint64_t offset, uint64_t alignment, bool write)
> > {
> > struct drm_i915_gem_exec_object2 *object;
> > @@ -1523,8 +1645,12 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
> > || ALIGN(offset, alignment) == offset);
> > igt_assert(is_power_of_two(alignment));
> > + if (ibb->driver == INTEL_DRIVER_I915)
> > + alignment = max_t(uint64_t, alignment, gem_detect_safe_alignment(ibb->fd));
> > + else if (ibb->driver == INTEL_DRIVER_XE)
> > + alignment = max_t(uint64_t, ibb->alignment, alignment);
>
> I think we should always ensure we have one of the (now two) possible
> drivers. In other places of this series you used "if else" and asserts.
> Using 'if else if' and quietly doing nothing if we get a different value,
> looks a little bit unpredicatable.
Agree. In this case we have filtered out only i915 or xe are in game.
I'm going to change this to 'else' only.
>
>
> > +
> > object = __add_to_cache(ibb, handle);
> > - alignment = max_t(uint64_t, alignment, gem_detect_safe_alignment(ibb->i915));
> > __add_to_objects(ibb, object);
> > /*
> > @@ -1584,9 +1710,27 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
> > if (ibb->allows_obj_alignment)
> > object->alignment = alignment;
> > + if (ibb->driver == INTEL_DRIVER_XE) {
> > + object->alignment = alignment;
> > + object->rsvd1 = size;
> > + }
> > +
> > return object;
> > }
> > +struct drm_i915_gem_exec_object2 *
> > +intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size,
> > + uint64_t offset, uint64_t alignment, bool write)
> > +{
> > + struct drm_i915_gem_exec_object2 *obj = NULL;
> > +
> > + obj = __intel_bb_add_object(ibb, handle, size, offset,
> > + alignment, write);
> > + igt_assert(obj);
> > +
> > + return obj;
> > +}
> > +
> > bool intel_bb_remove_object(struct intel_bb *ibb, uint32_t handle,
> > uint64_t offset, uint64_t size)
> > {
> > @@ -1999,7 +2143,7 @@ static void intel_bb_dump_execbuf(struct intel_bb *ibb,
> > uint64_t address;
> > igt_debug("execbuf [pid: %ld, fd: %d, ctx: %u]\n",
> > - (long) getpid(), ibb->i915, ibb->ctx);
> > + (long) getpid(), ibb->fd, ibb->ctx);
> > igt_debug("execbuf batch len: %u, start offset: 0x%x, "
> > "DR1: 0x%x, DR4: 0x%x, "
> > "num clip: %u, clipptr: 0x%llx, "
> > @@ -2135,6 +2279,83 @@ static void update_offsets(struct intel_bb *ibb,
> > }
> > #define LINELEN 76
> > +
> > +static int
> > +__xe_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
> > + uint64_t flags, bool sync)
>
> Nit: function name and parameter list fits in 100 characters :)
>
Wherever possible we try to fit in 80 ;)
>
> > +{
> > + uint32_t engine = flags & (I915_EXEC_BSD_MASK | I915_EXEC_RING_MASK);
> > + uint32_t engine_id;
> > + struct drm_xe_sync syncs[2] = {
> > + { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
> > + { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, },
> > + };
> > + struct drm_xe_vm_bind_op *bind_ops;
> > + void *map;
> > +
> > + igt_assert_eq(ibb->num_relocs, 0);
> > + igt_assert_eq(ibb->xe_bound, false);
> > +
> > + if (ibb->last_engine != engine) {
> > + struct drm_xe_engine_class_instance inst = { };
> > +
> > + inst.engine_instance =
> > + (flags & I915_EXEC_BSD_MASK) >> I915_EXEC_BSD_SHIFT;
> > +
> > + switch (flags & I915_EXEC_RING_MASK) {
> > + case I915_EXEC_DEFAULT:
> > + case I915_EXEC_BLT:
> > + inst.engine_class = DRM_XE_ENGINE_CLASS_COPY;
> > + break;
> > + case I915_EXEC_BSD:
> > + inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> > + break;
> > + case I915_EXEC_RENDER:
> > + inst.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
> > + break;
> > + case I915_EXEC_VEBOX:
> > + inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> > + break;
> > + default:
> > + igt_assert(false);
>
> Maybe we could use igt_assert_f and print some message.
>
> Lgtm so far, however I still have some homework to do.
Ok, I'm going to add some information about failure.
Thank you for the review.
--
Zbigniew
>
> Christoph
>
>
> > + }
> > + igt_debug("Run on %s\n", xe_engine_class_string(inst.engine_class));
> > +
> > + ibb->engine_id = engine_id =
> > + xe_engine_create(ibb->fd, ibb->vm_id, &inst, 0);
> > + } else {
> > + engine_id = ibb->engine_id;
> > + }
> > + ibb->last_engine = engine;
> > +
> > + map = xe_bo_map(ibb->fd, ibb->handle, ibb->size);
> > + memcpy(map, ibb->batch, ibb->size);
> > + gem_munmap(map, ibb->size);
> > +
> > + syncs[0].handle = syncobj_create(ibb->fd, 0);
> > + if (ibb->num_objects > 1) {
> > + bind_ops = xe_alloc_bind_ops(ibb, XE_VM_BIND_OP_MAP | XE_VM_BIND_FLAG_ASYNC, 0);
> > + xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops,
> > + ibb->num_objects, syncs, 1);
> > + ibb->xe_bound = true;
> > + free(bind_ops);
> > + } else {
> > + xe_vm_bind_async(ibb->fd, ibb->vm_id, 0, ibb->handle, 0,
> > + ibb->batch_offset, ibb->size, syncs, 1);
> > + }
> > +
> > + syncs[0].flags &= ~DRM_XE_SYNC_SIGNAL;
> > + ibb->engine_syncobj = syncobj_create(ibb->fd, 0);
> > + syncs[1].handle = ibb->engine_syncobj;
> > +
> > + xe_exec_sync(ibb->fd, engine_id, ibb->batch_offset, syncs, 2);
> > +
> > + if (sync)
> > + intel_bb_sync(ibb);
> > +
> > + return 0;
> > +}
> > +
> > /*
> > * __intel_bb_exec:
> > * @ibb: pointer to intel_bb
> > @@ -2160,7 +2381,7 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
> > ibb->objects[0]->handle = ibb->handle;
> > ibb->objects[0]->offset = ibb->batch_offset;
> > - gem_write(ibb->i915, ibb->handle, 0, ibb->batch, ibb->size);
> > + gem_write(ibb->fd, ibb->handle, 0, ibb->batch, ibb->size);
> > memset(&execbuf, 0, sizeof(execbuf));
> > objects = create_objects_array(ibb);
> > @@ -2173,13 +2394,10 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
> > execbuf.flags &= ~I915_EXEC_NO_RELOC;
> > execbuf.rsvd2 = 0;
> > - if (ibb->dump_base64)
> > - intel_bb_dump_base64(ibb, LINELEN);
> > -
> > /* For debugging on CI, remove in final series */
>
> PS I know this is not part of this patch but it left a smile on my face :)
>
>
> > intel_bb_dump_execbuf(ibb, &execbuf);
> > - ret = __gem_execbuf_wr(ibb->i915, &execbuf);
> > + ret = __gem_execbuf_wr(ibb->fd, &execbuf);
> > if (ret) {
> > intel_bb_dump_execbuf(ibb, &execbuf);
> > free(objects);
> > @@ -2230,7 +2448,13 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
> > void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
> > uint64_t flags, bool sync)
> > {
> > - igt_assert_eq(__intel_bb_exec(ibb, end_offset, flags, sync), 0);
> > + if (ibb->dump_base64)
> > + intel_bb_dump_base64(ibb, LINELEN);
> > +
> > + if (ibb->driver == INTEL_DRIVER_I915)
> > + igt_assert_eq(__intel_bb_exec(ibb, end_offset, flags, sync), 0);
> > + else
> > + igt_assert_eq(__xe_bb_exec(ibb, end_offset, flags, sync), 0);
> > }
> > /**
> > @@ -2409,13 +2633,13 @@ uint32_t intel_bb_copy_data(struct intel_bb *ibb,
> > */
> > void intel_bb_blit_start(struct intel_bb *ibb, uint32_t flags)
> > {
> > - if (blt_has_xy_src_copy(ibb->i915))
> > + if (blt_has_xy_src_copy(ibb->fd))
> > intel_bb_out(ibb, XY_SRC_COPY_BLT_CMD |
> > XY_SRC_COPY_BLT_WRITE_ALPHA |
> > XY_SRC_COPY_BLT_WRITE_RGB |
> > flags |
> > (6 + 2 * (ibb->gen >= 8)));
> > - else if (blt_has_fast_copy(ibb->i915))
> > + else if (blt_has_fast_copy(ibb->fd))
> > intel_bb_out(ibb, XY_FAST_COPY_BLT | flags);
> > else
> > igt_assert_f(0, "No supported blit command found\n");
> > @@ -2456,9 +2680,9 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
> > if (gen >= 4 && src->tiling != I915_TILING_NONE) {
> > src_pitch /= 4;
> > - if (blt_has_xy_src_copy(ibb->i915))
> > + if (blt_has_xy_src_copy(ibb->fd))
> > cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
> > - else if (blt_has_fast_copy(ibb->i915))
> > + else if (blt_has_fast_copy(ibb->fd))
> > cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling);
> > else
> > igt_assert_f(0, "No supported blit command found\n");
> > @@ -2466,7 +2690,7 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
> > if (gen >= 4 && dst->tiling != I915_TILING_NONE) {
> > dst_pitch /= 4;
> > - if (blt_has_xy_src_copy(ibb->i915))
> > + if (blt_has_xy_src_copy(ibb->fd))
> > cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
> > else
> > cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling);
> > @@ -2480,7 +2704,7 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
> > CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);
> > br13_bits = 0;
> > - if (blt_has_xy_src_copy(ibb->i915)) {
> > + if (blt_has_xy_src_copy(ibb->fd)) {
> > switch (bpp) {
> > case 8:
> > break;
> > @@ -2496,7 +2720,7 @@ void intel_bb_emit_blt_copy(struct intel_bb *ibb,
> > igt_fail(IGT_EXIT_FAILURE);
> > }
> > } else {
> > - br13_bits = fast_copy_dword1(ibb->i915, src->tiling, dst->tiling, bpp);
> > + br13_bits = fast_copy_dword1(ibb->fd, src->tiling, dst->tiling, bpp);
> > }
> > if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
> > @@ -2628,14 +2852,20 @@ void intel_bb_track(bool do_tracking)
> > static void __intel_bb_reinit_alloc(struct intel_bb *ibb)
> > {
> > + uint64_t alignment = 0;
> > +
> > if (ibb->allocator_type == INTEL_ALLOCATOR_NONE)
> > return;
> > - ibb->allocator_handle = intel_allocator_open_full(ibb->i915, ibb->ctx,
> > + if (ibb->driver == INTEL_DRIVER_XE)
> > + alignment = xe_get_default_alignment(ibb->fd);
> > +
> > + ibb->allocator_handle = intel_allocator_open_full(ibb->fd, ibb->ctx,
> > ibb->allocator_start, ibb->allocator_end,
> > ibb->allocator_type,
> > ibb->allocator_strategy,
> > - 0);
> > + alignment);
> > +
> > intel_bb_reset(ibb, true);
> > }
> > diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> > index 10e4126606..e2566d8223 100644
> > --- a/lib/intel_batchbuffer.h
> > +++ b/lib/intel_batchbuffer.h
> > @@ -235,6 +235,11 @@ struct igt_pxp {
> > uint32_t appid;
> > };
> > +enum intel_driver {
> > + INTEL_DRIVER_I915 = 1,
> > + INTEL_DRIVER_XE,
> > +};
> > +
> > /*
> > * Batchbuffer without libdrm dependency
> > */
> > @@ -246,7 +251,9 @@ struct intel_bb {
> > uint8_t allocator_type;
> > enum allocator_strategy allocator_strategy;
> > - int i915;
> > + enum intel_driver driver;
> > + int fd;
> > +
> > unsigned int gen;
> > bool debug;
> > bool dump_base64;
> > @@ -268,6 +275,11 @@ struct intel_bb {
> > uint32_t ctx;
> > uint32_t vm_id;
> > + bool xe_bound;
> > + uint32_t engine_syncobj;
> > + uint32_t engine_id;
> > + uint32_t last_engine;
> > +
> > /* Context configuration */
> > intel_ctx_cfg_t *cfg;
> > @@ -299,21 +311,21 @@ struct intel_bb {
> > };
> > struct intel_bb *
> > -intel_bb_create_full(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > +intel_bb_create_full(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > uint32_t size, uint64_t start, uint64_t end,
> > uint8_t allocator_type, enum allocator_strategy strategy);
> > struct intel_bb *
> > -intel_bb_create_with_allocator(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > +intel_bb_create_with_allocator(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > uint32_t size, uint8_t allocator_type);
> > -struct intel_bb *intel_bb_create(int i915, uint32_t size);
> > +struct intel_bb *intel_bb_create(int fd, uint32_t size);
> > struct intel_bb *
> > -intel_bb_create_with_context(int i915, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > +intel_bb_create_with_context(int fd, uint32_t ctx, const intel_ctx_cfg_t *cfg,
> > uint32_t size);
> > -struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size);
> > +struct intel_bb *intel_bb_create_with_relocs(int fd, uint32_t size);
> > struct intel_bb *
> > -intel_bb_create_with_relocs_and_context(int i915, uint32_t ctx,
> > +intel_bb_create_with_relocs_and_context(int fd, uint32_t ctx,
> > const intel_ctx_cfg_t *cfg, uint32_t size);
> > -struct intel_bb *intel_bb_create_no_relocs(int i915, uint32_t size);
> > +struct intel_bb *intel_bb_create_no_relocs(int fd, uint32_t size);
> > void intel_bb_destroy(struct intel_bb *ibb);
> > /* make it safe to use intel_allocator after failed test */
> > diff --git a/tests/i915/gem_caching.c b/tests/i915/gem_caching.c
> > index b6ecd8346c..6e944f0acb 100644
> > --- a/tests/i915/gem_caching.c
> > +++ b/tests/i915/gem_caching.c
> > @@ -83,7 +83,7 @@ copy_bo(struct intel_bb *ibb, struct intel_buf *src, struct intel_buf *dst)
> > intel_bb_add_intel_buf(ibb, src, false);
> > intel_bb_add_intel_buf(ibb, dst, true);
> > - if (blt_has_xy_src_copy(ibb->i915)) {
> > + if (blt_has_xy_src_copy(ibb->fd)) {
> > intel_bb_out(ibb,
> > XY_SRC_COPY_BLT_CMD |
> > XY_SRC_COPY_BLT_WRITE_ALPHA |
> > @@ -93,7 +93,7 @@ copy_bo(struct intel_bb *ibb, struct intel_buf *src, struct intel_buf *dst)
> > intel_bb_out(ibb, (3 << 24) | /* 32 bits */
> > (0xcc << 16) | /* copy ROP */
> > 4096);
> > - } else if (blt_has_fast_copy(ibb->i915)) {
> > + } else if (blt_has_fast_copy(ibb->fd)) {
> > intel_bb_out(ibb, XY_FAST_COPY_BLT);
> > intel_bb_out(ibb, XY_FAST_COPY_COLOR_DEPTH_32 | 4096);
> > } else {
> > diff --git a/tests/i915/gem_pxp.c b/tests/i915/gem_pxp.c
> > index af657d0e1b..2f27abd582 100644
> > --- a/tests/i915/gem_pxp.c
> > +++ b/tests/i915/gem_pxp.c
> > @@ -809,7 +809,7 @@ static int gem_execbuf_flush_store_dw(int i915, struct intel_bb *ibb, uint32_t c
> > ret = __intel_bb_exec(ibb, intel_bb_offset(ibb),
> > I915_EXEC_RENDER | I915_EXEC_NO_RELOC, false);
> > if (ret == 0) {
> > - gem_sync(ibb->i915, fence->handle);
> > + gem_sync(ibb->fd, fence->handle);
> > assert_pipectl_storedw_done(i915, fence->handle);
> > }
> > return ret;
More information about the igt-dev
mailing list