[Intel-gfx] [PATCH 06/38] drm/i915: Pad GTT views of exec objects up to user specified size
Daniel Vetter
daniel at ffwll.ch
Wed Jun 8 09:41:01 UTC 2016
On Fri, Jun 03, 2016 at 05:55:21PM +0100, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
>
> v2: Long time, no see
> v3: Try an anonymous union for uapi struct compatability
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Hm, where's the userspace for this? Commit message should elaborate imo a
bit more on what's going on here ...
-Daniel
> ---
> drivers/gpu/drm/i915/i915_drv.h | 6 ++-
> drivers/gpu/drm/i915/i915_gem.c | 82 +++++++++++++++---------------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++-
> include/uapi/drm/i915_drm.h | 8 ++-
> 4 files changed, 65 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a065325580d8..9520adba33f6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2945,11 +2945,13 @@ void i915_gem_free_object(struct drm_gem_object *obj);
> int __must_check
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags);
> int __must_check
> i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> const struct i915_ggtt_view *view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags);
>
> @@ -3209,8 +3211,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
> struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> struct i915_ggtt *ggtt = &dev_priv->ggtt;
>
> - return i915_gem_object_pin(obj, &ggtt->base,
> - alignment, flags | PIN_GLOBAL);
> + return i915_gem_object_pin(obj, &ggtt->base, 0, alignment,
> + flags | PIN_GLOBAL);
> }
>
> void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 19b8d2ea7698..0f0101300b2b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1438,7 +1438,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> /* Now pin it into the GTT if needed */
> - ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
> + ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
> if (ret)
> goto unlock;
>
> @@ -2678,21 +2678,20 @@ static struct i915_vma *
> i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> const struct i915_ggtt_view *ggtt_view,
> + uint64_t size,
> unsigned alignment,
> uint64_t flags)
> {
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = to_i915(dev);
> - struct i915_ggtt *ggtt = &dev_priv->ggtt;
> - u32 fence_alignment, unfenced_alignment;
> - u32 search_flag, alloc_flag;
> u64 start, end;
> - u64 size, fence_size;
> + u32 search_flag, alloc_flag;
> struct i915_vma *vma;
> int ret;
>
> if (i915_is_ggtt(vm)) {
> - u32 view_size;
> + u32 fence_size, fence_alignment, unfenced_alignment;
> + u64 view_size;
>
> if (WARN_ON(!ggtt_view))
> return ERR_PTR(-EINVAL);
> @@ -2710,48 +2709,39 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> view_size,
> obj->tiling_mode,
> false);
> - size = flags & PIN_MAPPABLE ? fence_size : view_size;
> + size = max(size, view_size);
> + if (flags & PIN_MAPPABLE)
> + size = max_t(u64, size, fence_size);
> +
> + if (alignment == 0)
> + alignment = flags & PIN_MAPPABLE ? fence_alignment :
> + unfenced_alignment;
> + if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> + DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> + ggtt_view ? ggtt_view->type : 0,
> + alignment);
> + return ERR_PTR(-EINVAL);
> + }
> } else {
> - fence_size = i915_gem_get_gtt_size(dev,
> - obj->base.size,
> - obj->tiling_mode);
> - fence_alignment = i915_gem_get_gtt_alignment(dev,
> - obj->base.size,
> - obj->tiling_mode,
> - true);
> - unfenced_alignment =
> - i915_gem_get_gtt_alignment(dev,
> - obj->base.size,
> - obj->tiling_mode,
> - false);
> - size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> + size = max_t(u64, size, obj->base.size);
> + alignment = 4096;
> }
>
> start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> end = vm->total;
> if (flags & PIN_MAPPABLE)
> - end = min_t(u64, end, ggtt->mappable_end);
> + end = min_t(u64, end, dev_priv->ggtt.mappable_end);
> if (flags & PIN_ZONE_4G)
> end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
>
> - if (alignment == 0)
> - alignment = flags & PIN_MAPPABLE ? fence_alignment :
> - unfenced_alignment;
> - if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> - DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> - ggtt_view ? ggtt_view->type : 0,
> - alignment);
> - return ERR_PTR(-EINVAL);
> - }
> -
> /* If binding the object/GGTT view requires more space than the entire
> * aperture has, reject it early before evicting everything in a vain
> * attempt to find space.
> */
> if (size > end) {
> - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
> + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
> ggtt_view ? ggtt_view->type : 0,
> - size,
> + size, obj->base.size,
> flags & PIN_MAPPABLE ? "mappable" : "total",
> end);
> return ERR_PTR(-E2BIG);
> @@ -3243,7 +3233,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> * (e.g. libkms for the bootup splash), we have to ensure that we
> * always use map_and_fenceable for all scanout buffers.
> */
> - ret = i915_gem_object_ggtt_pin(obj, view, alignment,
> + ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
> view->type == I915_GGTT_VIEW_NORMAL ?
> PIN_MAPPABLE : 0);
> if (ret)
> @@ -3393,12 +3383,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
> }
>
> static bool
> -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
> +i915_vma_misplaced(struct i915_vma *vma,
> + uint64_t size,
> + uint32_t alignment,
> + uint64_t flags)
> {
> struct drm_i915_gem_object *obj = vma->obj;
>
> - if (alignment &&
> - vma->node.start & (alignment - 1))
> + if (vma->node.size < size)
> + return true;
> +
> + if (alignment && vma->node.start & (alignment - 1))
> return true;
>
> if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
> @@ -3442,6 +3437,7 @@ static int
> i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> const struct i915_ggtt_view *ggtt_view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> @@ -3469,7 +3465,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
> return -EBUSY;
>
> - if (i915_vma_misplaced(vma, alignment, flags)) {
> + if (i915_vma_misplaced(vma, size, alignment, flags)) {
> WARN(vma->pin_count,
> "bo is already pinned in %s with incorrect alignment:"
> " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> @@ -3490,8 +3486,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>
> bound = vma ? vma->bound : 0;
> if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> - vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
> - flags);
> + vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
> + size, alignment, flags);
> if (IS_ERR(vma))
> return PTR_ERR(vma);
> } else {
> @@ -3513,17 +3509,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
> int
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> struct i915_address_space *vm,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> return i915_gem_object_do_pin(obj, vm,
> i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
> - alignment, flags);
> + size, alignment, flags);
> }
>
> int
> i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> const struct i915_ggtt_view *view,
> + uint64_t size,
> uint32_t alignment,
> uint64_t flags)
> {
> @@ -3534,7 +3532,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
> BUG_ON(!view);
>
> return i915_gem_object_do_pin(obj, &ggtt->base, view,
> - alignment, flags | PIN_GLOBAL);
> + size, alignment, flags | PIN_GLOBAL);
> }
>
> void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 40937a09855d..c1e7ee212e7e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -652,10 +652,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> flags |= PIN_HIGH;
> }
>
> - ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> + ret = i915_gem_object_pin(obj, vma->vm,
> + entry->pad_to_size,
> + entry->alignment,
> + flags);
> if ((ret == -ENOSPC || ret == -E2BIG) &&
> only_mappable_for_reloc(entry->flags))
> ret = i915_gem_object_pin(obj, vma->vm,
> + entry->pad_to_size,
> entry->alignment,
> flags & ~PIN_MAPPABLE);
> if (ret)
> @@ -718,6 +722,9 @@ eb_vma_misplaced(struct i915_vma *vma)
> vma->node.start & (entry->alignment - 1))
> return true;
>
> + if (vma->node.size < entry->pad_to_size)
> + return true;
> +
> if (entry->flags & EXEC_OBJECT_PINNED &&
> vma->node.start != entry->offset)
> return true;
> @@ -1058,6 +1065,13 @@ validate_exec_list(struct drm_device *dev,
> if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
> return -EINVAL;
>
> + /* pad_to_size was once a reserved field, so sanitize it */
> + if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
> + if (offset_in_page(exec[i].pad_to_size))
> + return -EINVAL;
> + } else
> + exec[i].pad_to_size = 0;
> +
> /* First check for malicious input causing overflow in
> * the worst case where we need to allocate the entire
> * relocation tree as a single array.
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index d6c668e58426..3b861746ba7a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -701,10 +701,14 @@ struct drm_i915_gem_exec_object2 {
> #define EXEC_OBJECT_WRITE (1<<2)
> #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> #define EXEC_OBJECT_PINNED (1<<4)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
> +#define EXEC_OBJECT_PAD_TO_SIZE (1<<5)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> __u64 flags;
>
> - __u64 rsvd1;
> + union {
> + __u64 rsvd1;
> + __u64 pad_to_size;
> + };
> __u64 rsvd2;
> };
>
> --
> 2.8.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list