[Intel-gfx] [PATCH 41/48] drm/i915: Use multiple VMs -- the point of no return

Ben Widawsky benjamin.widawsky at intel.com
Fri Dec 6 23:12:14 CET 2013


From: Ben Widawsky <ben at bwidawsk.net>

As with processes which run on the CPU, the goal of multiple VMs is to
provide process isolation. Specific to GEN, there is also the ability to
map more objects per process (2GB each instead of 2Gb-2k total).

For the most part, all the pipes have been laid, and all we need to do
is remove asserts and actually start changing address spaces with the
context switch. Since prior to this we've converted the setting of the
page tables to a streamed version, this is quite easy.

One important thing to point out (since it'd been hotly contested) is
that with this patch, every context created will have it's own address
space (provided the HW can do it).

v2: Disable BDW on rebase

NOTE: I tried to make this commit as small as possible. I needed one
place where I could "turn everything on" and that is here. It could be
split into finer commits, but I didn't really see much point.

Cc: Eric Anholt <eric at anholt.net>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_dma.c            |  3 ++
 drivers/gpu/drm/i915/i915_drv.c            |  3 +-
 drivers/gpu/drm/i915/i915_drv.h            | 12 +++++-
 drivers/gpu/drm/i915/i915_gem.c            | 22 ++++-------
 drivers/gpu/drm/i915/i915_gem_context.c    | 60 +++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++---
 drivers/gpu/drm/i915/i915_gem_gtt.c        | 22 ++++++++---
 drivers/gpu/drm/i915/i915_gpu_error.c      |  5 ---
 include/uapi/drm/i915_drm.h                |  1 +
 9 files changed, 86 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 22a3a67..7a184b6 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1003,6 +1003,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_HANDLE_LUT:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_FULL_PPGTT:
+		value = USES_FULL_PPGTT(dev);
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 65b5c83..6cdaa78 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -116,7 +116,8 @@ MODULE_PARM_DESC(enable_hangcheck,
 int i915_enable_ppgtt __read_mostly = -1;
 module_param_named(i915_enable_ppgtt, i915_enable_ppgtt, int, 0400);
 MODULE_PARM_DESC(i915_enable_ppgtt,
-		"Enable PPGTT (default: true)");
+		"Override PPGTT usage. "
+		"(-1=auto [default], 0=disabled, 1=aliasing, 2=full)");
 
 int i915_enable_psr __read_mostly = 0;
 module_param_named(enable_psr, i915_enable_psr, int, 0600);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fe13023..f404491 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1830,7 +1830,9 @@ struct drm_i915_file_private {
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
 #define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev))
+#define HAS_PPGTT(dev)		(INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_BROADWELL(dev))
 #define USES_ALIASING_PPGTT(dev) intel_enable_ppgtt(dev, false)
+#define USES_FULL_PPGTT(dev)	intel_enable_ppgtt(dev, true)
 
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
@@ -2011,6 +2013,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_object_ops *ops);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
+void i915_init_vm(struct drm_i915_private *dev_priv,
+		  struct i915_address_space *vm);
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
@@ -2289,7 +2293,8 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full)
 	if (i915_enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
 		return false;
 
-	BUG_ON(full);
+	if (i915_enable_ppgtt == 1 && full)
+		return false;
 
 #ifdef CONFIG_INTEL_IOMMU
 	/* Disable ppgtt on SNB if VT-d is on. */
@@ -2299,7 +2304,10 @@ static inline bool intel_enable_ppgtt(struct drm_device *dev, bool full)
 	}
 #endif
 
-	return HAS_ALIASING_PPGTT(dev);
+	if (full)
+		return HAS_PPGTT(dev);
+	else
+		return HAS_ALIASING_PPGTT(dev);
 }
 
 static inline void ppgtt_release(struct kref *kref)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 63047da..f7114ae 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2247,7 +2247,10 @@ request_to_vm(struct drm_i915_gem_request *request)
 	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
 	struct i915_address_space *vm;
 
-	vm = &dev_priv->gtt.base;
+	if (request->ctx)
+		vm = request->ctx->vm;
+	else
+		vm = &dev_priv->gtt.base;
 
 	return vm;
 }
@@ -2718,9 +2721,6 @@ int i915_vma_unbind(struct i915_vma *vma)
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	/* For now we only ever use 1 vma per object */
-	WARN_ON(!list_is_singular(&obj->vma_list));
-
 	if (list_empty(&vma->vma_link))
 		return 0;
 
@@ -3268,17 +3268,12 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
-	BUG_ON(!i915_is_ggtt(vm));
-
 	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unpin;
 	}
 
-	/* For now we only ever use 1 vma per object */
-	WARN_ON(!list_is_singular(&obj->vma_list));
-
 search_free:
 	/* FIXME: Some tests are failing when they receive a reloc of 0. To
 	 * prevent this, we simply don't allow the 0th offset. */
@@ -4182,9 +4177,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->phys_obj)
 		i915_gem_detach_phys_object(dev, obj);
 
-	/* NB: 0 or 1 elements */
-	WARN_ON(!list_empty(&obj->vma_list) &&
-		!list_is_singular(&obj->vma_list));
 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
 		int ret;
 
@@ -4580,9 +4572,11 @@ init_ring_lists(struct intel_ring_buffer *ring)
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
-static void i915_init_vm(struct drm_i915_private *dev_priv,
-			 struct i915_address_space *vm)
+void i915_init_vm(struct drm_i915_private *dev_priv,
+		  struct i915_address_space *vm)
 {
+	if (!i915_is_ggtt(vm))
+		drm_mm_init(&vm->mm, vm->start, vm->total);
 	vm->dev = dev_priv->dev;
 	INIT_LIST_HEAD(&vm->active_list);
 	INIT_LIST_HEAD(&vm->inactive_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 165a5c7..ebe0f67 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -288,17 +288,15 @@ i915_gem_create_context(struct drm_device *dev,
 				DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
 				goto err_destroy;
 			}
+
+			ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
 		}
 	} else if (USES_ALIASING_PPGTT(dev)) {
 		/* For platforms which only have aliasing PPGTT, we fake the
 		 * address space and refcounting. */
-		kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
-	}
-
-	/* TODO: Until full ppgtt... */
-	if (USES_ALIASING_PPGTT(dev))
 		ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
-	else
+		kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
+	} else
 		ctx->vm = &dev_priv->gtt.base;
 
 	return ctx;
@@ -500,7 +498,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 
 	mutex_lock(&dev->struct_mutex);
 	file_priv->private_default_ctx =
-		i915_gem_create_context(dev, file_priv, false);
+		i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
 	mutex_unlock(&dev->struct_mutex);
 
 	if (IS_ERR(file_priv->private_default_ctx)) {
@@ -587,6 +585,7 @@ static int do_switch(struct intel_ring_buffer *ring,
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct i915_hw_context *from = ring->last_context;
+	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to);
 	u32 hw_flags = 0;
 	int ret, i;
 
@@ -598,17 +597,15 @@ static int do_switch(struct intel_ring_buffer *ring,
 	if (from == to && from->last_ring == ring && !to->remap_slice)
 		return 0;
 
-	if (ring != &dev_priv->ring[RCS]) {
-		if (from)
-			i915_gem_context_unreference(from);
-		goto done;
+	/* Trying to pin first makes error handling easier. */
+	if (ring == &dev_priv->ring[RCS]) {
+		ret = i915_gem_obj_ggtt_pin(to->obj,
+					    get_context_alignment(ring->dev),
+					    false, false);
+		if (ret)
+			return ret;
 	}
 
-	ret = i915_gem_obj_ggtt_pin(to->obj, get_context_alignment(ring->dev),
-				    false, false);
-	if (ret)
-		return ret;
-
 	/*
 	 * Pin can switch back to the default context if we end up calling into
 	 * evict_everything - as a last ditch gtt defrag effort that also
@@ -616,6 +613,18 @@ static int do_switch(struct intel_ring_buffer *ring,
 	 */
 	from = ring->last_context;
 
+	if (USES_FULL_PPGTT(ring->dev)) {
+		ret = ppgtt->switch_mm(ppgtt, ring, false);
+		if (ret)
+			goto unpin_out;
+	}
+
+	if (ring != &dev_priv->ring[RCS]) {
+		if (from)
+			i915_gem_context_unreference(from);
+		goto done;
+	}
+
 	/*
 	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
 	 * that thanks to write = false in this call and us not setting any gpu
@@ -625,10 +634,8 @@ static int do_switch(struct intel_ring_buffer *ring,
 	 * XXX: We need a real interface to do this instead of trickery.
 	 */
 	ret = i915_gem_object_set_to_gtt_domain(to->obj, false);
-	if (ret) {
-		i915_gem_object_ggtt_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
 	if (!to->obj->has_global_gtt_mapping) {
 		struct i915_vma *vma = i915_gem_obj_to_vma(to->obj,
@@ -640,10 +647,8 @@ static int do_switch(struct intel_ring_buffer *ring,
 		hw_flags |= MI_RESTORE_INHIBIT;
 
 	ret = mi_set_context(ring, to, hw_flags);
-	if (ret) {
-		i915_gem_object_ggtt_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
 	for (i = 0; i < MAX_L3_SLICES; i++) {
 		if (!(to->remap_slice & (1<<i)))
@@ -688,6 +693,11 @@ done:
 	to->last_ring = ring;
 
 	return 0;
+
+unpin_out:
+	if (ring->id == RCS)
+		i915_gem_object_ggtt_unpin(to->obj);
+	return ret;
 }
 
 /**
@@ -736,7 +746,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(dev, file_priv, false);
+	ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7a34fc4..81fa1ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -998,7 +998,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_hw_context *ctx;
 	struct i915_address_space *vm;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
-	u32 exec_start, exec_len;
+	u32 exec_start = args->batch_start_offset, exec_len;
 	u32 mask, flags;
 	int ret, mode, i;
 	bool need_relocs;
@@ -1119,9 +1119,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	i915_gem_context_reference(ctx);
 
-	/* HACK until we have full PPGTT */
-	/* vm = ctx->vm; */
-	vm = &dev_priv->gtt.base;
+	vm = ctx->vm;
+	if (!USES_FULL_PPGTT(dev))
+		vm = &dev_priv->gtt.base;
 
 	eb = eb_create(args);
 	if (eb == NULL) {
@@ -1177,6 +1177,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
 	}
 
+	if (flags & I915_DISPATCH_SECURE)
+		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
+	else
+		exec_start += i915_gem_obj_offset(batch_obj, vm);
+
 	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
 	if (ret)
 		goto err;
@@ -1206,8 +1211,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
-	exec_start = i915_gem_obj_offset(batch_obj, vm) +
-		args->batch_start_offset;
+
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 88e49b1..4143efd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -324,6 +324,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 		container_of(vm, struct i915_hw_ppgtt, base);
 	int i, j;
 
+	list_del(&vm->global_link);
 	drm_mm_takedown(&vm->mm);
 
 	for (i = 0; i < ppgtt->num_pd_pages ; i++) {
@@ -755,6 +756,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 		container_of(vm, struct i915_hw_ppgtt, base);
 	int i;
 
+	list_del(&vm->global_link);
 	drm_mm_takedown(&ppgtt->base.mm);
 	drm_mm_remove_node(&ppgtt->node);
 
@@ -901,17 +903,22 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 		BUG();
 
 	if (!ret) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
 		kref_init(&ppgtt->ref);
 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
 			    ppgtt->base.total);
-		if (INTEL_INFO(dev)->gen < 8)
+		i915_init_vm(dev_priv, &ppgtt->base);
+		if (INTEL_INFO(dev)->gen < 8) {
 			gen6_write_pdes(ppgtt);
+			DRM_DEBUG("Adding PPGTT at offset %x\n",
+				  ppgtt->pd_offset << 10);
+		}
 	}
 
 	return ret;
 }
 
-static void __always_unused
+static void
 ppgtt_bind_vma(struct i915_vma *vma,
 	       enum i915_cache_level cache_level,
 	       u32 flags)
@@ -923,7 +930,7 @@ ppgtt_bind_vma(struct i915_vma *vma,
 	vma->vm->insert_entries(vma->vm, vma->obj->pages, entry, cache_level);
 }
 
-static void __always_unused ppgtt_unbind_vma(struct i915_vma *vma)
+static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
 	const unsigned long entry = vma->node.start >> PAGE_SHIFT;
 
@@ -1719,8 +1726,13 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
 	case 8:
 	case 7:
 	case 6:
-		vma->unbind_vma = ggtt_unbind_vma;
-		vma->bind_vma = ggtt_bind_vma;
+		if (i915_is_ggtt(vm)) {
+			vma->unbind_vma = ggtt_unbind_vma;
+			vma->bind_vma = ggtt_bind_vma;
+		} else {
+			vma->unbind_vma = ppgtt_unbind_vma;
+			vma->bind_vma = ppgtt_bind_vma;
+		}
 		break;
 	case 5:
 	case 4:
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b553824..e4c923f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -908,11 +908,6 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
 		cnt++;
 
-	if (WARN(cnt > 1, "Multiple VMs not yet supported\n"))
-		cnt = 1;
-
-	vm = &dev_priv->gtt.base;
-
 	error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
 	error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
 	error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 52aed89..d5b5284 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -337,6 +337,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
+#define I915_PARAM_HAS_FULL_PPGTT	 28
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
1.8.4.2




More information about the Intel-gfx mailing list