[Intel-gfx] [RFC] Who wants a bigger GTT mapping range?

Jesse Barnes jbarnes at virtuousgeek.org
Fri Dec 11 00:56:30 CET 2009


Most of our chips (at least since 965, possibly 915/945 too) support
a larger GPU visible aperture than what's available to the CPU through
the PCI BAR.  This allows us to submit large batches and generally
handle more complex scenes more easily, which is especially important
on newer chips.

This patch adds some detection code for finding the max size, but is
still missing the other bits, namely:
  - GTT mapped objects must reside in the CPU visible range
  - eviction code needs to be smarter and only kick out CPU range
    objects if we're evicting for a GTT map
  - batches could probably preferentially use higher addresses, since
    they won't be accessed (at least during the batch) by the CPU

We could either split our drm_mm into two ranges, or just handle
relocating things as needed (I think the latter will probably be
slightly easier).  I think Jerome has some code for this, I'll go check
it out now.

I'm curious about 965 and before users though; this code should print
out your GPU address space size, and if it's greater than 256M a full
solution to this problem should benefit you (less eviction, less
thrashing, etc. yay).

-- 
Jesse Barnes, Intel Open Source Technology Center

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 701bfea..2d02dae 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1194,6 +1194,7 @@ static int i915_load_modeset_init(struct drm_device *dev,
 				  unsigned long agp_size)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long gtt_size = 256*1024*1024;
 	int fb_bar = IS_I9XX(dev) ? 2 : 0;
 	int ret = 0;
 
@@ -1208,6 +1209,14 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	if (IS_I965G(dev) || IS_G33(dev))
 		dev_priv->cursor_needs_physical = false;
 
+	/*
+	 * Truncate preallocated space to 16M.  This lets GEM have more
+	 * aperture space and can allow us to reclaim it for general system
+	 * use.
+	 */
+	if (prealloc_size > 16*1024*1024)
+		prealloc_size = 16*1024*1024;
+
 	/* Basic memrange allocator for stolen space (aka vram) */
 	drm_mm_init(&dev_priv->vram, 0, prealloc_size);
 	DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
@@ -1215,6 +1224,39 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	/* We're off and running w/KMS */
 	dev_priv->mm.suspended = 0;
 
+	/* G33+ indicate GTT size in PGTBL_CTL */
+	if (IS_G33(dev) || IS_G4X(dev) || IS_IRONLAKE(dev)) {
+		u32 pgtbl_ctl = I915_READ(PGTBL_CTL);
+		if ((pgtbl_ctl & PGTBL_SIZE_MASK) == PGTBL_SIZE_512K) {
+			gtt_size = 512 << 20;
+		} else if ((pgtbl_ctl & PGTBL_SIZE_MASK) == PGTBL_SIZE_256K) {
+			gtt_size = 256 << 20;
+		} else if ((pgtbl_ctl & PGTBL_SIZE_MASK) == PGTBL_SIZE_128K) {
+			gtt_size = 128 << 20;
+		} else if ((pgtbl_ctl & PGTBL_SIZE_MASK) == PGTBL_SIZE_1024K) {
+			gtt_size = 1024 << 20;
+		} else if ((pgtbl_ctl & PGTBL_SIZE_MASK) == PGTBL_SIZE_2048K) {
+			gtt_size = 2048 << 20;
+		} else if ((pgtbl_ctl & PGTBL_SIZE_MASK) == PGTBL_SIZE_1536K) {
+			gtt_size = 1536 << 20;
+		}
+	} else { /* Previous chips use MSAC */
+		u8 msac;
+
+		pci_read_config_byte(dev->pdev, MSAC, &msac);
+		if ((msac & MSAC_GMADDR_SIZE_MASK) == MSAC_GMADDR_SIZE_512M) {
+			gtt_size = 512 << 20;
+		} else if  ((msac & MSAC_GMADDR_SIZE_MASK) ==
+			    MSAC_GMADDR_SIZE_256M) {
+			gtt_size = 256 << 20;
+		} else if  ((msac & MSAC_GMADDR_SIZE_MASK) ==
+			    MSAC_GMADDR_SIZE_128M) {
+			gtt_size = 128 << 20;
+		}
+	}
+
+	DRM_DEBUG_DRIVER("detected %dM GTT address space\n", gtt_size >> 20);
+
 	/* Let GEM Manage from end of prealloc space to end of aperture.
 	 *
 	 * However, leave one page at the end still bound to the scratch page.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 917b837..97356ec 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1198,6 +1198,14 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	/* Now bind it into the GTT if needed */
 	mutex_lock(&dev->struct_mutex);
+	/*
+	 * If the object isn't in the CPU visible aperture, unbind and move
+	 * it.
+	 */
+	if (obj_priv->gtt_space && obj_priv->gtt_offset + obj->size >
+	    dev->agp->agp_info.aper_size)
+		i915_gem_object_unbind(obj);
+
 	if (!obj_priv->gtt_space) {
 		ret = i915_gem_object_bind_to_gtt(obj, 0);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 974b3cf..1183c7e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -55,6 +55,12 @@
 
 /* PCI config space */
 
+#define MSAC	0x66
+#define   MSAC_GMADDR_SIZE_MASK		(0x0e)
+#define   MSAC_GMADDR_SIZE_512M 	(3<<1)
+#define   MSAC_GMADDR_SIZE_256M		(1<<1)
+#define   MSAC_GMADDR_SIZE_128M		(0<<1)
+
 #define HPLLCC	0xc0 /* 855 only */
 #define   GC_CLOCK_CONTROL_MASK		(0xf << 0)
 #define   GC_CLOCK_133_200		(0 << 0)
@@ -237,6 +243,14 @@
 /*
  * Instruction and interrupt control regs
  */
+#define PGTBL_CTL	0x02020
+#define   PGTBL_SIZE_MASK	(0x0000000e)
+#define   PGTBL_SIZE_512K	(0<<1)
+#define   PGTBL_SIZE_256K	(1<<1)
+#define   PGTBL_SIZE_128K	(2<<1)
+#define   PGTBL_SIZE_1024K	(3<<1) /* GM45+ */
+#define   PGTBL_SIZE_2048K	(4<<1) /* GM45+ */
+#define   PGTBL_SIZE_1536K	(5<<1) /* GM45+ */
 #define PGTBL_ER	0x02024
 #define PRB0_TAIL	0x02030
 #define PRB0_HEAD	0x02034



More information about the Intel-gfx mailing list