[PATCH 18/26] drm/i915/uapi: add NEEDS_CPU_ACCESS hint
Matthew Auld
matthew.auld at intel.com
Mon Feb 14 10:12:21 UTC 2022
If set, force the allocation to be placed in the mappable portion of
LMEM. One big restriction here is that system memory must be given as a
potential placement for the object, that way we can always spill the
object into system memory if we can't make space.
XXX: Still needs IGTs. Including now just for the sake of having more
complete picture.
Signed-off-by: Matthew Auld <matthew.auld at intel.com>
Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Cc: Jon Bloomfield <jon.bloomfield at intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_create.c | 26 ++++++++++++------
include/uapi/drm/i915_drm.h | 31 +++++++++++++++++++++-
2 files changed, 48 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index ce8d559b207e..95d880b5af99 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -239,6 +239,7 @@ struct create_ext {
struct drm_i915_private *i915;
struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
unsigned int n_placements;
+ unsigned int placement_mask;
unsigned long flags;
};
@@ -335,6 +336,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
for (i = 0; i < args->num_regions; i++)
ext_data->placements[i] = placements[i];
+ ext_data->placement_mask = mask;
return 0;
out_dump:
@@ -409,7 +411,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_object *obj;
int ret;
- if (args->flags)
+ if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
return -EINVAL;
ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
@@ -425,13 +427,21 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
ext_data.n_placements = 1;
}
- /*
- * TODO: add a userspace hint to force CPU_ACCESS for the object, which
- * can override this.
- */
- if (ext_data.n_placements > 1 ||
- ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)
- ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;
+ if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) {
+ if (ext_data.n_placements == 1)
+ return -EINVAL;
+
+ /*
+ * We always need to be able to spill to system memory, if we
+ * can't place in the mappable part of LMEM.
+ */
+ if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
+ return -EINVAL;
+ } else {
+ if (ext_data.n_placements > 1 ||
+ ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)
+ ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;
+ }
obj = __i915_gem_object_create_user_ext(i915, args->size,
ext_data.placements,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 914ebd9290e5..925685bd261e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3157,7 +3157,36 @@ struct drm_i915_gem_create_ext {
* Object handles are nonzero.
*/
__u32 handle;
- /** @flags: MBZ */
+ /**
+ * @flags: Optional flags.
+ *
+ * Supported values:
+ *
+ * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
+ * the object will need to be accessed via the CPU.
+ *
+ * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and
+ * only strictly required on platforms where only some of the device
+ * memory is directly visible or mappable through the CPU, like on DG2+.
+ *
+ * One of the placements MUST also be I915_MEMORY_CLASS_SYSTEM, to
+ * ensure we can always spill the allocation to system memory, if we
+ * can't place the object in the mappable part of
+ * I915_MEMORY_CLASS_DEVICE.
+ *
+ * Note that buffers that need to be captured with EXEC_OBJECT_CAPTURE,
+ * will need to enable this hint, if the object can also be placed in
+ * I915_MEMORY_CLASS_DEVICE, starting from DG2+. The execbuf call will
+ * throw an error otherwise. This also means that such objects will need
+ * I915_MEMORY_CLASS_SYSTEM set as a possible placement.
+ *
+ * Without this hint, the kernel will assume that non-mappable
+ * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
+ * kernel can still migrate the object to the mappable part, as a last
+ * resort, if userspace ever CPU faults this object, but this might be
+ * expensive, and so ideally should be avoided.
+ */
+#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0)
__u32 flags;
/**
* @extensions: The chain of extensions to apply to this object.
--
2.34.1
More information about the Intel-gfx-trybot
mailing list