[Intel-gfx] [PATCH] drm/915: Pad GTT views of exec objects up to user specified size
Daniel Vetter
daniel at ffwll.ch
Thu Oct 22 01:04:55 PDT 2015
On Wed, Oct 21, 2015 at 04:24:58PM +0100, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
>
> v2: Long time, no see
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Usual "needs igt + open-source userspace" broken record from maintainer,
in case someone really wants to use this. Noises I've heard is that it's
for opencl, but who knows.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_drv.h | 6 ++-
> drivers/gpu/drm/i915/i915_gem.c | 77 +++++++++++++++---------------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 ++++++-
> include/uapi/drm/i915_drm.h | 5 +-
> 4 files changed, 60 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2771b7edde02..a0ce011a5dc0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2786,11 +2786,13 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
> int __must_check
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags);
> int __must_check
> i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> const struct i915_ggtt_view *view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags);
>
> @@ -3048,8 +3050,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
> uint32_t alignment,
> unsigned flags)
> {
> - return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
> - alignment, flags | PIN_GLOBAL);
> + return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment,
> + flags | PIN_GLOBAL);
> }
>
> static inline int
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4f49c9dc24e2..d079111d15c5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1784,7 +1784,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> /* Now pin it into the GTT if needed */
> - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
> + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
> if (ret)
> goto unlock;
>
> @@ -3362,20 +3362,20 @@ static struct i915_vma *
> i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> const struct i915_ggtt_view *ggtt_view,
> + uint64_t size,
> unsigned alignment,
> uint64_t flags)
> {
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> - u32 fence_alignment, unfenced_alignment;
> u64 start, end;
> - u64 size, fence_size;
> u32 search_flag, alloc_flag;
> struct i915_vma *vma;
> int ret;
>
> if (i915_is_ggtt(vm)) {
> - u32 view_size;
> + u32 fence_size, fence_alignment, unfenced_alignment;
> + u64 view_size;
>
> if (WARN_ON(!ggtt_view))
> return ERR_PTR(-EINVAL);
> @@ -3393,21 +3393,22 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> view_size,
> obj->tiling_mode,
> false);
> - size = flags & PIN_MAPPABLE ? fence_size : view_size;
> + size = max(size, view_size);
> + if (flags & PIN_MAPPABLE)
> + size = max_t(u64, size, fence_size);
> +
> + if (alignment == 0)
> + alignment = flags & PIN_MAPPABLE ? fence_alignment :
> + unfenced_alignment;
> + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> + DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> + ggtt_view ? ggtt_view->type : 0,
> + alignment);
> + return ERR_PTR(-EINVAL);
> + }
> } else {
> - fence_size = i915_gem_get_gtt_size(dev,
> - obj->base.size,
> - obj->tiling_mode);
> - fence_alignment = i915_gem_get_gtt_alignment(dev,
> - obj->base.size,
> - obj->tiling_mode,
> - true);
> - unfenced_alignment =
> - i915_gem_get_gtt_alignment(dev,
> - obj->base.size,
> - obj->tiling_mode,
> - false);
> - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> + size = max_t(u64, size, obj->base.size);
> + alignment = 4096;
> }
>
> start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> @@ -3418,24 +3419,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> if (flags & PIN_ZONE_4G)
> end = min_t(u64, end, 1ULL << 32);
>
> - if (alignment == 0)
> - alignment = flags & PIN_MAPPABLE ? fence_alignment :
> - unfenced_alignment;
> - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> - ggtt_view ? ggtt_view->type : 0,
> - alignment);
> - return ERR_PTR(-EINVAL);
> - }
> -
> /* If binding the object/GGTT view requires more space than the entire
> * aperture has, reject it early before evicting everything in a vain
> * attempt to find space.
> */
> if (size > end) {
> - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
> + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> ggtt_view ? ggtt_view->type : 0,
> - size,
> + size, obj->base.size,
> flags & PIN_MAPPABLE ? "mappable" : "total",
> end);
> return ERR_PTR(-E2BIG);
> @@ -3892,7 +3883,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> * (e.g. libkms for the bootup splash), we have to ensure that we
> * always use map_and_fenceable for all scanout buffers.
> */
> - ret = i915_gem_object_ggtt_pin(obj, view, alignment,
> + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> view->type == I915_GGTT_VIEW_NORMAL ?
> PIN_MAPPABLE : 0);
> if (ret)
> @@ -4043,12 +4034,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
> }
>
> static bool
> -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
> +i915_vma_misplaced(struct i915_vma *vma,
> + uint64_t size,
> + uint32_t alignment,
> + uint64_t flags)
> {
> struct drm_i915_gem_object *obj = vma->obj;
>
> - if (alignment &&
> - vma->node.start & (alignment - 1))
> + if (vma->node.size < size)
> + return true;
> +
> + if (alignment && vma->node.start & (alignment - 1))
> return true;
>
> if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
> @@ -4088,6 +4084,7 @@ static int
> i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> const struct i915_ggtt_view *ggtt_view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> @@ -4118,7 +4115,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
> return -EBUSY;
>
> - if (i915_vma_misplaced(vma, alignment, flags)) {
> + if (i915_vma_misplaced(vma, size, alignment, flags)) {
> WARN(vma->pin_count,
> "bo is already pinned in %s with incorrect alignment:"
> " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> @@ -4139,8 +4136,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>
> bound = vma ? vma->bound : 0;
> if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
> - flags);
> + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
> + size, alignment, flags);
> if (IS_ERR(vma))
> return PTR_ERR(vma);
> } else {
> @@ -4162,17 +4159,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> int
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> return i915_gem_object_do_pin(obj, vm,
> i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
> - alignment, flags);
> + size, alignment, flags);
> }
>
> int
> i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> const struct i915_ggtt_view *view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> @@ -4180,7 +4179,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> return -EINVAL;
>
> return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
> - alignment, flags | PIN_GLOBAL);
> + size, alignment, flags | PIN_GLOBAL);
> }
>
> void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 045a7631faa0..b6bc22d49628 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -605,10 +605,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> flags |= PIN_HIGH;
> }
>
> - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> + ret = i915_gem_object_pin(obj, vma->vm,
> + entry->rsvd1, /* pad_to_size */
> + entry->alignment,
> + flags);
> if ((ret == -ENOSPC || ret == -E2BIG) &&
> only_mappable_for_reloc(entry->flags))
> ret = i915_gem_object_pin(obj, vma->vm,
> + entry->rsvd1, /* pad_to_size */
> entry->alignment,
> flags & ~PIN_MAPPABLE);
> if (ret)
> @@ -672,6 +676,9 @@ eb_vma_misplaced(struct i915_vma *vma)
> vma->node.start & (entry->alignment - 1))
> return true;
>
> + if (vma->node.size < entry->rsvd1) /* pad_to_size */
> + return true;
> +
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> vma->node.start < BATCH_OFFSET_BIAS)
> return true;
> @@ -988,6 +995,13 @@ validate_exec_list(struct drm_device *dev,
> if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
> return -EINVAL;
>
> + /* pad_to_size was once a reserved field, so sanitize it */
> + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
> + if (offset_in_page(exec[i].rsvd1))
> + return -EINVAL;
> + } else
> + exec[i].rsvd1 = 0;
> +
> /* First check for malicious input causing overflow in
> * the worst case where we need to allocate the entire
> * relocation tree as a single array.
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 08e047cba76a..678f7d5320ae 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
> #define EXEC_OBJECT_NEEDS_GTT (1<<1)
> #define EXEC_OBJECT_WRITE (1<<2)
> #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> +#define EXEC_OBJECT_PAD_TO_SIZE (1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> __u64 flags;
>
> - __u64 rsvd1;
> + __u64 rsvd1; /* pad_to_size */
> __u64 rsvd2;
> };
>
> --
> 2.6.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list