[Intel-gfx] [PATCH 105/190] drm/i915: Pad GTT views of exec objects up to user specified size
David Weinehall
david.weinehall at linux.intel.com
Tue Mar 22 14:32:00 UTC 2016
On Mon, Jan 11, 2016 at 10:44:49AM +0000, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
>
> v2: Long time, no see
> v3: Try an anonymous union for uapi struct compatability
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Testcase: gem_mmap_gtt --run-subtest huge-bo-tiledX
Tested-by: David Weinehall <david.weinehall at intel.com>
This patch fixes an OOPS on gen8+ triggered by the mentioned testcase.
> ---
> drivers/gpu/drm/i915/i915_drv.h | 6 ++-
> drivers/gpu/drm/i915/i915_gem.c | 79 +++++++++++++++---------------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++-
> include/uapi/drm/i915_drm.h | 8 ++-
> 4 files changed, 64 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4ada625b751e..49b126e4191e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2694,11 +2694,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
> int __must_check
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags);
> int __must_check
> i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> const struct i915_ggtt_view *view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags);
>
> @@ -2931,8 +2933,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
> uint32_t alignment,
> unsigned flags)
> {
> - return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
> - alignment, flags | PIN_GLOBAL);
> + return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment,
> + flags | PIN_GLOBAL);
> }
>
> static inline int
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a82a06a61262..2f14d2da75a5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1440,7 +1440,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> /* Now pin it into the GTT if needed */
> - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
> + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
> if (ret)
> goto unlock;
>
> @@ -2746,20 +2746,20 @@ static struct i915_vma *
> i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> const struct i915_ggtt_view *ggtt_view,
> + uint64_t size,
> unsigned alignment,
> uint64_t flags)
> {
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> - u32 fence_alignment, unfenced_alignment;
> - u32 search_flag, alloc_flag;
> u64 start, end;
> - u64 size, fence_size;
> + u32 search_flag, alloc_flag;
> struct i915_vma *vma;
> int ret;
>
> if (i915_is_ggtt(vm)) {
> - u32 view_size;
> + u32 fence_size, fence_alignment, unfenced_alignment;
> + u64 view_size;
>
> if (WARN_ON(!ggtt_view))
> return ERR_PTR(-EINVAL);
> @@ -2777,21 +2777,22 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> view_size,
> obj->tiling_mode,
> false);
> - size = flags & PIN_MAPPABLE ? fence_size : view_size;
> + size = max(size, view_size);
> + if (flags & PIN_MAPPABLE)
> + size = max_t(u64, size, fence_size);
> +
> + if (alignment == 0)
> + alignment = flags & PIN_MAPPABLE ? fence_alignment :
> + unfenced_alignment;
> + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> + DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> + ggtt_view ? ggtt_view->type : 0,
> + alignment);
> + return ERR_PTR(-EINVAL);
> + }
> } else {
> - fence_size = i915_gem_get_gtt_size(dev,
> - obj->base.size,
> - obj->tiling_mode);
> - fence_alignment = i915_gem_get_gtt_alignment(dev,
> - obj->base.size,
> - obj->tiling_mode,
> - true);
> - unfenced_alignment =
> - i915_gem_get_gtt_alignment(dev,
> - obj->base.size,
> - obj->tiling_mode,
> - false);
> - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> + size = max_t(u64, size, obj->base.size);
> + alignment = 4096;
> }
>
> start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> @@ -2801,24 +2802,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> if (flags & PIN_ZONE_4G)
> end = min_t(u64, end, (1ULL << 32));
>
> - if (alignment == 0)
> - alignment = flags & PIN_MAPPABLE ? fence_alignment :
> - unfenced_alignment;
> - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> - ggtt_view ? ggtt_view->type : 0,
> - alignment);
> - return ERR_PTR(-EINVAL);
> - }
> -
> /* If binding the object/GGTT view requires more space than the entire
> * aperture has, reject it early before evicting everything in a vain
> * attempt to find space.
> */
> if (size > end) {
> - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
> + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> ggtt_view ? ggtt_view->type : 0,
> - size,
> + size, obj->base.size,
> flags & PIN_MAPPABLE ? "mappable" : "total",
> end);
> return ERR_PTR(-E2BIG);
> @@ -3309,7 +3300,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> * (e.g. libkms for the bootup splash), we have to ensure that we
> * always use map_and_fenceable for all scanout buffers.
> */
> - ret = i915_gem_object_ggtt_pin(obj, view, alignment,
> + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> view->type == I915_GGTT_VIEW_NORMAL ?
> PIN_MAPPABLE : 0);
> if (ret)
> @@ -3459,12 +3450,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
> }
>
> static bool
> -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
> +i915_vma_misplaced(struct i915_vma *vma,
> + uint64_t size,
> + uint32_t alignment,
> + uint64_t flags)
> {
> struct drm_i915_gem_object *obj = vma->obj;
>
> - if (alignment &&
> - vma->node.start & (alignment - 1))
> + if (vma->node.size < size)
> + return true;
> +
> + if (alignment && vma->node.start & (alignment - 1))
> return true;
>
> if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
> @@ -3508,6 +3504,7 @@ static int
> i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> const struct i915_ggtt_view *ggtt_view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> @@ -3538,7 +3535,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
> return -EBUSY;
>
> - if (i915_vma_misplaced(vma, alignment, flags)) {
> + if (i915_vma_misplaced(vma, size, alignment, flags)) {
> WARN(vma->pin_count,
> "bo is already pinned in %s with incorrect alignment:"
> " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> @@ -3559,8 +3556,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>
> bound = vma ? vma->bound : 0;
> if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
> - flags);
> + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
> + size, alignment, flags);
> if (IS_ERR(vma))
> return PTR_ERR(vma);
> } else {
> @@ -3582,17 +3579,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> int
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> return i915_gem_object_do_pin(obj, vm,
> i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
> - alignment, flags);
> + size, alignment, flags);
> }
>
> int
> i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> const struct i915_ggtt_view *view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> @@ -3600,7 +3599,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> return -EINVAL;
>
> return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
> - alignment, flags | PIN_GLOBAL);
> + size, alignment, flags | PIN_GLOBAL);
> }
>
> void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index d88be1d3cb86..899220139a8a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -642,10 +642,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> flags |= PIN_HIGH;
> }
>
> - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> + ret = i915_gem_object_pin(obj, vma->vm,
> + entry->pad_to_size,
> + entry->alignment,
> + flags);
> if ((ret == -ENOSPC || ret == -E2BIG) &&
> only_mappable_for_reloc(entry->flags))
> ret = i915_gem_object_pin(obj, vma->vm,
> + entry->pad_to_size,
> entry->alignment,
> flags & ~PIN_MAPPABLE);
> if (ret)
> @@ -708,6 +712,9 @@ eb_vma_misplaced(struct i915_vma *vma)
> vma->node.start & (entry->alignment - 1))
> return true;
>
> + if (vma->node.size < entry->pad_to_size)
> + return true;
> +
> if (entry->flags & EXEC_OBJECT_PINNED &&
> vma->node.start != entry->offset)
> return true;
> @@ -1044,6 +1051,13 @@ validate_exec_list(struct drm_device *dev,
> if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
> return -EINVAL;
>
> + /* pad_to_size was once a reserved field, so sanitize it */
> + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
> + if (offset_in_page(exec[i].pad_to_size))
> + return -EINVAL;
> + } else
> + exec[i].pad_to_size = 0;
> +
> /* First check for malicious input causing overflow in
> * the worst case where we need to allocate the entire
> * relocation tree as a single array.
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7fee4416dcc7..ff7b438059da 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -697,10 +697,14 @@ struct drm_i915_gem_exec_object2 {
> #define EXEC_OBJECT_WRITE (1<<2)
> #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> #define EXEC_OBJECT_PINNED (1<<4)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
> +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> __u64 flags;
>
> - __u64 rsvd1;
> + union {
> + __u64 rsvd1;
> + __u64 pad_to_size;
> + };
> __u64 rsvd2;
> };
>
> --
> 2.7.0.rc3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list