[Intel-gfx] [PATCH 17/20] drm/i915/uapi: add NEEDS_CPU_ACCESS hint
Thomas Hellström
thomas.hellstrom at linux.intel.com
Thu Feb 3 09:28:40 UTC 2022
On 1/26/22 16:21, Matthew Auld wrote:
> If set, force the allocation to be placed in the mappable portion of
> LMEM. One big restriction here is that system memory must be given as a
> potential placement for the object, that way we can always spill the
> object into system memory if we can't make space.
>
> XXX: Still very much WIP and needs IGTs. Including now just for the sake
> of having more complete picture.
>
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> drivers/gpu/drm/i915/gem/i915_gem_create.c | 28 ++++++++++++-------
> include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++-
> 2 files changed, 49 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> index e7456443f163..98d63cb21e94 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> @@ -238,6 +238,7 @@ struct create_ext {
> struct drm_i915_private *i915;
> struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
> unsigned int n_placements;
> + unsigned int placement_mask;
> unsigned long flags;
> };
>
> @@ -334,6 +335,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
> for (i = 0; i < args->num_regions; i++)
> ext_data->placements[i] = placements[i];
>
> + ext_data->placement_mask = mask;
> return 0;
>
> out_dump:
> @@ -408,7 +410,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
> struct drm_i915_gem_object *obj;
> int ret;
>
> - if (args->flags)
> + if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
> return -EINVAL;
>
> ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
> @@ -424,14 +426,22 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
> ext_data.n_placements = 1;
> }
>
> - /*
> - * TODO: add a userspace hint to force CPU_ACCESS for the object, which
> - * can override this.
> - */
> - if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
> - ext_data.placements[0]->type !=
> - INTEL_MEMORY_SYSTEM))
> - ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
> + if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) {
> + if (ext_data.n_placements == 1)
> + return -EINVAL;
> +
> + /*
> + * We always need to be able to spill to system memory, if we
> + * can't place in the mappable part of LMEM.
> + */
> + if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
> + return -EINVAL;
> + } else {
> + if (!IS_DG1(i915) && (ext_data.n_placements > 1 ||
> + ext_data.placements[0]->type !=
> + INTEL_MEMORY_SYSTEM))
> + ext_data.flags |= I915_BO_ALLOC_TOPDOWN;
> + }
>
> obj = __i915_gem_object_create_user_ext(i915, args->size,
> ext_data.placements,
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 914ebd9290e5..ecfa805549a7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3157,7 +3157,36 @@ struct drm_i915_gem_create_ext {
> * Object handles are nonzero.
> */
> __u32 handle;
> - /** @flags: MBZ */
> + /**
> + * @flags: Optional flags.
> + *
> + * Supported values:
> + *
> + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
> + * the object will need to be accessed via the CPU.
> + *
> + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
> + * only strictly required on platforms where only some of the device
> + * memory is directly visible or mappable through the CPU, like on DG2+.
> + *
> + * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
> + * ensure we can always spill the allocation to system memory, if we
> + * can't place the object in the mappable part of
> + * I915_MEMORY_CLASS_DEVICE.
> + *
> + * Note that buffers that need to be captured with EXEC_OBJECT_CAPTURE,
> + * will need to enable this hint, if the object can also be placed in
> + * I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf call will
> + * throw an error otherwise. This also means that such objects will need
> + * I915_MEMORY_CLASS_SYSTEM set as a possible placement.
> + *
I wonder, should we try to migrate capture objects at execbuf time
instead on an on-demand basis? If migration fails, then we just skip
capturing that object, similar to how the capture code handles errors?
> + * Without this hint, the kernel will assume that non-mappable
> + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
> + * kernel can still migrate the object to the mappable part, as a last
> + * resort, if userspace ever CPU faults this object, but this might be
> + * expensive, and so ideally should be avoided.
> + */
> +#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1<<0)
> __u32 flags;
> /**
> * @extensions: The chain of extensions to apply to this object.
More information about the Intel-gfx
mailing list