[Intel-gfx] [PATCH 07/12] drm/i915: Use PDEs as the guard page

Ben Widawsky ben at bwidawsk.net
Wed Apr 24 08:15:35 CEST 2013


Scary alert.

AFAICT, we simply do not need the guard page if we have the PDEs at the
top since all prefetching is CS related, and it should always be safe to
prefetch into a PDE (provided the PDE is valid). The PDE fetching itself
should not be subject to the prefetching problem, though without full
PPGTT support, we may never know.

Potentially this is achievable even without using the PPGTT reworks I've
done prior to this, but I figure there is no point in rocking the boat
without the PPGTT generalizations I've submitted.

There is a very simple bool left to help test if things break and we
fear guard page related stuff. Of course the commit is easily
revertable as well.

One reason why it's desirable to do this is with the drm_mm_node
allocation I've done with the PDEs, we fragment a bit of space finding the
the first properly aligned address (if my math is right, we get 11 spare
pages fragmented at the top of our address space). This problem didn't
exist with the original implementation that stole the PDEs out from
under the drm_mm.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     |  3 +-
 drivers/gpu/drm/i915/i915_gem.c     |  2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c | 94 +++++++++++++++++++++----------------
 3 files changed, 56 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9ab6254..9717c69 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1723,7 +1723,8 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj);
 void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
 void i915_gem_init_global_gtt(struct drm_device *dev);
 void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
-			       unsigned long mappable_end, unsigned long end);
+			       unsigned long mappable_end, unsigned long end,
+			       bool guard_page);
 int i915_gem_gtt_init(struct drm_device *dev);
 static inline void i915_gem_chipset_flush(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index af56802..9c5eaf0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -161,7 +161,7 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data,
 
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_setup_global_gtt(dev, args->gtt_start, args->gtt_end,
-				  args->gtt_end);
+				  args->gtt_end, true);
 	dev_priv->gtt.mappable_end = args->gtt_end;
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b825d7b..39ac37d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -254,7 +254,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 						  ppgtt->node, GEN6_PD_SIZE,
 						  GEN6_PD_ALIGN, 0,
 						  dev_priv->gtt.mappable_end,
-						  dev_priv->gtt.total - PAGE_SIZE,
+						  dev_priv->gtt.total,
 						  DRM_MM_TOPDOWN);
 	if (ret)
 		return ret;
@@ -323,21 +323,14 @@ err_pt_alloc:
 	return ret;
 }
 
-static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
+int i915_gem_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_hw_ppgtt *ppgtt;
 	int ret;
 
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return -ENOMEM;
-
 	ppgtt->node = kzalloc(sizeof(*ppgtt->node), GFP_KERNEL);
-	if (!ppgtt->node) {
-		kfree(ppgtt);
+	if (!ppgtt->node)
 		return -ENOMEM;
-	}
 
 	ppgtt->dev = dev;
 	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
@@ -347,12 +340,8 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
 	else
 		BUG();
 
-	if (ret) {
+	if (ret)
 		drm_mm_put_block(ppgtt->node);
-		kfree(ppgtt);
-	} else {
-		dev_priv->mm.aliasing_ppgtt = ppgtt;
-	}
 
 	return ret;
 }
@@ -599,10 +588,26 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
 			*end -= 4096;
 	}
 }
+
+static bool intel_enable_ppgtt(struct drm_device *dev)
+{
+	if (i915_enable_ppgtt >= 0)
+		return i915_enable_ppgtt;
+
+#ifdef CONFIG_INTEL_IOMMU
+	/* Disable ppgtt on SNB if VT-d is on. */
+	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
+		return false;
+#endif
+
+	return true;
+}
+
 void i915_gem_setup_global_gtt(struct drm_device *dev,
 			       unsigned long start,
 			       unsigned long mappable_end,
-			       unsigned long end)
+			       unsigned long end,
+			       bool guard_page)
 {
 	/* Let GEM Manage all of the aperture.
 	 *
@@ -620,8 +625,12 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 
 	BUG_ON(mappable_end > end);
 
-	/* Subtract the guard page ... */
-	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
+	if (!guard_page)
+		drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
+	else
+		drm_mm_init(&dev_priv->mm.gtt_space, start,
+			    end - start - PAGE_SIZE); /* Guard page */
+
 	if (!HAS_LLC(dev))
 		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
 
@@ -650,46 +659,49 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 					      (hole_end-hole_start) / PAGE_SIZE);
 	}
 
-	/* And finally clear the reserved guard page */
-	dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
-}
-
-static bool
-intel_enable_ppgtt(struct drm_device *dev)
-{
-	if (i915_enable_ppgtt >= 0)
-		return i915_enable_ppgtt;
-
-#ifdef CONFIG_INTEL_IOMMU
-	/* Disable ppgtt on SNB if VT-d is on. */
-	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
-		return false;
-#endif
-
-	return true;
+	/* And finally clear the reserved guard page (if exists) */
+	if (guard_page)
+		dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
 }
 
 void i915_gem_init_global_gtt(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long gtt_size, mappable_size;
+	int ret = 0;
 
 	gtt_size = dev_priv->gtt.total;
 	mappable_size = dev_priv->gtt.mappable_end;
 
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
-		int ret;
+		struct i915_hw_ppgtt *ppgtt;
 
-		i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
+		i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size, false);
 
-		ret = i915_gem_init_aliasing_ppgtt(dev);
-		if (!ret)
-			return;
+		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+		if (!ppgtt) {
+			ret = -ENOMEM;
+			goto ggtt_only;
+		}
+
+		ret = i915_gem_ppgtt_init(dev, ppgtt);
+		if (ret)
+			goto ggtt_only;
+		dev_priv->mm.aliasing_ppgtt = ppgtt;
 
+		return;
+	}
+
+/* XXX: We need to takedown the drm_mm and have this fall back because of the
+ * conditional use of the guard page.
+ * TODO: It's a bit hackish and could use cleanup.
+ */
+ggtt_only:
+	if (ret) {
 		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
 		drm_mm_takedown(&dev_priv->mm.gtt_space);
 	}
-	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
+	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size, true);
 }
 
 static int setup_scratch_page(struct drm_device *dev)
-- 
1.8.2.1




More information about the Intel-gfx mailing list