[Intel-gfx] [PATCH RFC] drm/i915: Pad GTT views of exec objects up to user specified size

Wed Mar 25 06:29:10 PDT 2015

Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
Tvrtko is working on an igt to exercise this feature. The original
request basically stems from trying to mix client allocations (userptr)
and tiling. But I see it as a nice extension of our lazy_fencing code to
reduce buffer sizes for tiled allocations (as we then do not need to
allocate the entire last tilerow) and for relaxing restrictions on
foriegn buffers like DRI3.
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 +++--
 drivers/gpu/drm/i915/i915_gem.c            | 41 +++++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++++++-
 include/uapi/drm/i915_drm.h                |  5 ++--
 4 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d1f12ccce4fa..0beac0062942 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2631,11 +2631,13 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    uint32_t size,
 		    uint32_t alignment,
 		    uint64_t flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 uint32_t size,
 			 uint32_t alignment,
 			 uint64_t flags);
 
@@ -2896,8 +2898,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 		      uint32_t alignment,
 		      unsigned flags)
 {
-	return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
-				   alignment, flags | PIN_GLOBAL);
+	return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment,
+				   flags | PIN_GLOBAL);
 }
 
 static inline int
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f47aa7e205b9..5e86dee12759 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3540,12 +3540,13 @@ static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   const struct i915_ggtt_view *ggtt_view,
+			   uint32_t size,
 			   unsigned alignment,
 			   uint64_t flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 size, fence_size, fence_alignment, unfenced_alignment;
+	u32 fence_size, fence_alignment, unfenced_alignment;
 	unsigned long start =
 		flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 	unsigned long end =
@@ -3575,14 +3576,16 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 		return ERR_PTR(-EINVAL);
 	}
 
-	size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+	size = max_t(u32, size, obj->base.size);
+	if (flags & PIN_MAPPABLE)
+		size = max_t(u32, size, fence_size);
 
 	/* If the object is bigger than the entire aperture, reject it early
 	 * before evicting everything in a vain attempt to find space.
 	 */
-	if (obj->base.size > end) {
-		DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
-			  obj->base.size,
+	if (size > end) {
+		DRM_DEBUG("Attempting to bind an object larger than the aperture: requested=%u [object=%zd] > %s aperture=%lu\n",
+			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
 		return ERR_PTR(-E2BIG);
@@ -4023,7 +4026,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * put it anyway and hope that userspace can cope (but always first
 	 * try to preserve the existing ABI).
 	 */
-	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
+	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 				       view->type == I915_GGTT_VIEW_NORMAL ?
 				       PIN_MAPPABLE : 0);
 	if (ret)
@@ -4187,12 +4190,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 }
 
 static bool
-i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+i915_vma_misplaced(struct i915_vma *vma,
+		   uint32_t size,
+		   uint32_t alignment,
+		   uint64_t flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	if (alignment &&
-	    vma->node.start & (alignment - 1))
+	if (vma->node.size < size)
+		return true;
+
+	if (alignment && vma->node.start & (alignment - 1))
 		return true;
 
 	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
@@ -4209,6 +4217,7 @@ static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		       struct i915_address_space *vm,
 		       const struct i915_ggtt_view *ggtt_view,
+		       uint32_t size,
 		       uint32_t alignment,
 		       uint64_t flags)
 {
@@ -4239,7 +4248,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 			return -EBUSY;
 
-		if (i915_vma_misplaced(vma, alignment, flags)) {
+		if (i915_vma_misplaced(vma, size, alignment, flags)) {
 			unsigned long offset;
 			offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view->type) :
 					     i915_gem_obj_offset(obj, vm);
@@ -4266,8 +4275,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		 * means we must do a context switch before the GPU can
 		 * accurately read some of the VMAs.
 		 */
-		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
-						 flags);
+		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
+						 size, alignment, flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 	}
@@ -4290,7 +4299,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 							     obj->tiling_mode,
 							     true);
 
-		fenceable = (vma->node.size == fence_size &&
+		fenceable = (vma->node.size >= fence_size &&
 			     (vma->node.start & (fence_alignment - 1)) == 0);
 
 		mappable = (vma->node.start + fence_size <=
@@ -4311,17 +4320,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    uint32_t size,
 		    uint32_t alignment,
 		    uint64_t flags)
 {
 	return i915_gem_object_do_pin(obj, vm,
 				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
-				      alignment, flags);
+				      size, alignment, flags);
 }
 
 int
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 uint32_t size,
 			 uint32_t alignment,
 			 uint64_t flags)
 {
@@ -4329,7 +4340,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 		return -EINVAL;
 
 	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
-				      alignment, flags | PIN_GLOBAL);
+				      size, alignment, flags | PIN_GLOBAL);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7f3d78e0eec2..d39bc24c6d1f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -595,10 +595,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 	}
 
-	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+	ret = i915_gem_object_pin(obj, vma->vm,
+				  entry->pad_to_size,
+				  entry->alignment,
+				  flags);
 	if ((ret == -ENOSPC  || ret == -E2BIG) &&
 	    only_mappable_for_reloc(entry->flags))
 		ret = i915_gem_object_pin(obj, vma->vm,
+					  entry->pad_to_size,
 					  entry->alignment,
 					  flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
 	if (ret)
@@ -662,6 +666,9 @@ eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (vma->node.size < entry->pad_to_size)
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -948,6 +955,13 @@ validate_exec_list(struct drm_device *dev,
 		if (exec[i].flags & invalid_flags)
 			return -EINVAL;
 
+		/* pad_to_size was once a reserved field, so sanitize it */
+		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
+			if (offset_in_page(exec[i].pad_to_size))
+				return -EINVAL;
+		} else
+			exec[i].pad_to_size = 0;
+
 		/* First check for malicious input causing overflow in
 		 * the worst case where we need to allocate the entire
 		 * relocation tree as a single array.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8d1be9073380..94e9a8fb80bf 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -680,10 +680,11 @@ struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_PAD_TO_SIZE	(1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
 	__u64 flags;
 
-	__u64 rsvd1;
+	__u64 pad_to_size;
 	__u64 rsvd2;
 };
 
-- 
2.1.4