[Intel-gfx] [PATCH] drm/i915: use PIPE_CONTROL instruction on 965+

Jesse Barnes jbarnes at virtuousgeek.org
Wed Apr 21 00:35:08 CEST 2010


...and hang your box even faster.

-- 
Jesse Barnes, Intel Open Source Technology Center

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 790fef3..11c25ab 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -240,11 +240,14 @@ typedef struct drm_i915_private {
 
 	drm_dma_handle_t *status_page_dmah;
 	void *hw_status_page;
+	void *seqno_page;
 	dma_addr_t dma_status_page;
 	uint32_t counter;
 	unsigned int status_gfx_addr;
+	unsigned int seqno_gfx_addr;
 	drm_local_map_t hws_map;
 	struct drm_gem_object *hws_obj;
+	struct drm_gem_object *seqno_obj;
 	struct drm_gem_object *pwrctx;
 
 	struct resource mch_res;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b85727c..731eca5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1621,13 +1621,24 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
 	if (dev_priv->mm.next_gem_seqno == 0)
 		dev_priv->mm.next_gem_seqno++;
 
-	BEGIN_LP_RING(4);
-	OUT_RING(MI_STORE_DWORD_INDEX);
-	OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	OUT_RING(seqno);
+	if (IS_I965G(dev)) {
+		BEGIN_LP_RING(4);
+		OUT_RING(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
+			 PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_IS_FLUSH |
+			 PIPE_CONTROL_NOTIFY);
+		OUT_RING(dev_priv->seqno_gfx_addr | PIPE_CONTROL_GLOBAL_GTT);
+		OUT_RING(seqno);
+		OUT_RING(0);
+		ADVANCE_LP_RING();
+	} else {
+		BEGIN_LP_RING(4);
+		OUT_RING(MI_STORE_DWORD_INDEX);
+		OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
+		OUT_RING(seqno);
 
-	OUT_RING(MI_USER_INTERRUPT);
-	ADVANCE_LP_RING();
+		OUT_RING(MI_USER_INTERRUPT);
+		ADVANCE_LP_RING();
+	}
 
 	DRM_DEBUG_DRIVER("%d\n", seqno);
 
@@ -1666,17 +1677,24 @@ static uint32_t
 i915_retire_commands(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
 	uint32_t flush_domains = 0;
 	RING_LOCALS;
 
 	/* The sampler always gets flushed on i965 (sigh) */
-	if (IS_I965G(dev))
+	if (IS_I965G(dev)) {
 		flush_domains |= I915_GEM_DOMAIN_SAMPLER;
-	BEGIN_LP_RING(2);
-	OUT_RING(cmd);
-	OUT_RING(0); /* noop */
-	ADVANCE_LP_RING();
+		BEGIN_LP_RING(4);
+		OUT_RING(GFX_OP_PIPE_CONTROL);
+		OUT_RING(0); /* unused addr */
+		OUT_RING(0); /* unused data */
+		OUT_RING(0); /* unused data */
+		ADVANCE_LP_RING();
+	} else {
+		BEGIN_LP_RING(2);
+		OUT_RING(MI_FLUSH | MI_NO_WRITE_FLUSH);
+		OUT_RING(0); /* noop */
+		ADVANCE_LP_RING();
+	}
 	return flush_domains;
 }
 
@@ -1751,7 +1769,10 @@ i915_get_gem_seqno(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
-	return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
+	if (IS_I965G(dev))
+		return ((volatile u32 *)(dev_priv->seqno_page))[0];
+	else
+		return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
 }
 
 /**
@@ -1930,29 +1951,45 @@ i915_gem_flush(struct drm_device *dev,
 		 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
 		 * are flushed at any MI_FLUSH.
 		 */
-
-		cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
-		if ((invalidate_domains|flush_domains) &
-		    I915_GEM_DOMAIN_RENDER)
-			cmd &= ~MI_NO_WRITE_FLUSH;
-		if (!IS_I965G(dev)) {
+		if (IS_I965G(dev)) {
+			cmd = GFX_OP_PIPE_CONTROL;
+			if ((invalidate_domains|flush_domains) &
+			    I915_GEM_DOMAIN_RENDER)
+				cmd |= PIPE_CONTROL_WC_FLUSH;
+			if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
+				cmd |= PIPE_CONTROL_TC_FLUSH;
+			if (IS_GEN6(dev) &&
+			    invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
+				cmd |= PIPE_CONTROL_IS_FLUSH;
+			BEGIN_LP_RING(4);
+			OUT_RING(cmd);
+			OUT_RING(0); /* unused addr */
+			OUT_RING(0); /* unused data */
+			OUT_RING(0); /* unused data */
+			ADVANCE_LP_RING();
+		} else {
+			cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
+			if ((invalidate_domains|flush_domains) &
+			    I915_GEM_DOMAIN_RENDER)
+				cmd &= ~MI_NO_WRITE_FLUSH;
 			/*
 			 * On the 965, the sampler cache always gets flushed
 			 * and this bit is reserved.
 			 */
 			if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
 				cmd |= MI_READ_FLUSH;
-		}
-		if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
-			cmd |= MI_EXE_FLUSH;
+			if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
+				cmd |= MI_EXE_FLUSH;
 
 #if WATCH_EXEC
-		DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
+			DRM_INFO("%s: queue flush %08x to ring\n", __func__,
+				 cmd);
 #endif
-		BEGIN_LP_RING(2);
-		OUT_RING(cmd);
-		OUT_RING(MI_NOOP);
-		ADVANCE_LP_RING();
+			BEGIN_LP_RING(2);
+			OUT_RING(cmd);
+			OUT_RING(MI_NOOP);
+			ADVANCE_LP_RING();
+		}
 	}
 }
 
@@ -4594,6 +4631,37 @@ i915_gem_init_hws(struct drm_device *dev)
 	}
 	DRM_DEBUG_DRIVER("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
 
+	/* Set up pipe control qword area */
+	/* FIXME: cleanup hws above if this fails */
+	if (IS_I965G(dev)) {
+		obj = drm_gem_object_alloc(dev, 4096);
+		if (obj == NULL) {
+			DRM_ERROR("Failed to allocate seqno page\n");
+			return -ENOMEM;
+		}
+		obj_priv = to_intel_bo(obj);
+		obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
+
+		ret = i915_gem_object_pin(obj, 4096);
+		if (ret != 0) {
+			drm_gem_object_unreference(obj);
+			return ret;
+		}
+
+		dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
+		dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
+		if (dev_priv->seqno_page == NULL) {
+			DRM_ERROR("Failed to map seqno page.\n");
+			i915_gem_object_unpin(obj);
+			drm_gem_object_unreference(obj);
+			return -EINVAL;
+		}
+		dev_priv->seqno_obj = obj;
+		memset(dev_priv->seqno_page, 0, PAGE_SIZE);
+		DRM_ERROR("seqno gfx offset: 0x%08x, kmap: %p\n",
+			  dev_priv->seqno_gfx_addr, dev_priv->seqno_page);
+	}
+
 	return 0;
 }
 
@@ -4618,6 +4686,16 @@ i915_gem_cleanup_hws(struct drm_device *dev)
 	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
 	dev_priv->hw_status_page = NULL;
 
+	if (IS_I965G(dev)) {
+		obj = dev_priv->seqno_obj;
+		obj_priv = to_intel_bo(obj);
+		kunmap(obj_priv->pages[0]);
+		i915_gem_object_unpin(obj);
+		drm_gem_object_unreference(obj);
+		dev_priv->seqno_obj = NULL;
+
+		dev_priv->seqno_page = NULL;
+	}
 	/* Write high address into HWS_PGA when disabling. */
 	I915_WRITE(HWS_PGA, 0x1ffff000);
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7701cbd..13283ed 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -51,9 +51,6 @@
 	 I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT |	\
 	 I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
 
-/** Interrupts that we mask and unmask at runtime. */
-#define I915_INTERRUPT_ENABLE_VAR (I915_USER_INTERRUPT)
-
 #define I915_PIPE_VBLANK_STATUS	(PIPE_START_VBLANK_INTERRUPT_STATUS |\
 				 PIPE_VBLANK_INTERRUPT_STATUS)
 
@@ -352,7 +349,7 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 				READ_BREADCRUMB(dev_priv);
 	}
 
-	if (gt_iir & GT_USER_INTERRUPT) {
+	if (gt_iir & GT_PIPE_NOTIFY) {
 		u32 seqno = i915_get_gem_seqno(dev);
 		dev_priv->mm.irq_gem_seqno = seqno;
 		trace_i915_gem_request_complete(dev, seqno);
@@ -361,6 +358,9 @@ irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 		mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
 	}
 
+	if (gt_iir & GT_SYNC_STATUS)
+		DRM_ERROR("sync status interrupt\n");
+
 	if (de_iir & DE_GSE)
 		ironlake_opregion_gse_intr(dev);
 
@@ -1005,13 +1005,19 @@ void i915_user_irq_get(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
+	u32 irq;
+
+	if (IS_I965G(dev))
+		irq = I915_PIPE_CONTROL_NOTIFY_INTERRUPT;
+	else
+		irq = I915_USER_INTERRUPT;
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	if (dev->irq_enabled && (++dev_priv->user_irq_refcount == 1)) {
 		if (HAS_PCH_SPLIT(dev))
-			ironlake_enable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
+			ironlake_enable_graphics_irq(dev_priv, GT_SYNC_STATUS | GT_PIPE_NOTIFY);
 		else
-			i915_enable_irq(dev_priv, I915_USER_INTERRUPT);
+			i915_enable_irq(dev_priv, irq);
 	}
 	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
 }
@@ -1020,14 +1026,20 @@ void i915_user_irq_put(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
+	u32 irq;
+
+	if (IS_I965G(dev))
+		irq = I915_PIPE_CONTROL_NOTIFY_INTERRUPT;
+	else
+		irq = I915_USER_INTERRUPT;
 
 	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags);
 	BUG_ON(dev->irq_enabled && dev_priv->user_irq_refcount <= 0);
 	if (dev->irq_enabled && (--dev_priv->user_irq_refcount == 0)) {
 		if (HAS_PCH_SPLIT(dev))
-			ironlake_disable_graphics_irq(dev_priv, GT_USER_INTERRUPT);
+			ironlake_disable_graphics_irq(dev_priv, GT_SYNC_STATUS | GT_PIPE_NOTIFY);
 		else
-			i915_disable_irq(dev_priv, I915_USER_INTERRUPT);
+			i915_disable_irq(dev_priv, irq);
 	}
 	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags);
 }
@@ -1309,7 +1321,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 	/* enable kind of interrupts always enabled */
 	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
 			   DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE;
-	u32 render_mask = GT_USER_INTERRUPT;
+	u32 render_mask = GT_SYNC_STATUS | GT_PIPE_NOTIFY;
 	u32 hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
 			   SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;
 
@@ -1383,7 +1395,7 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
 int i915_driver_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 enable_mask = I915_INTERRUPT_ENABLE_FIX | I915_INTERRUPT_ENABLE_VAR;
+	u32 enable_mask = I915_INTERRUPT_ENABLE_FIX;
 	u32 error_mask;
 
 	DRM_INIT_WAITQUEUE(&dev_priv->irq_queue);
@@ -1393,6 +1405,11 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 	if (HAS_PCH_SPLIT(dev))
 		return ironlake_irq_postinstall(dev);
 
+	if (IS_I965G(dev))
+		enable_mask |= I915_PIPE_CONTROL_NOTIFY_INTERRUPT;
+	else
+		enable_mask |= I915_USER_INTERRUPT;
+
 	/* Unmask the interrupts that we always want on. */
 	dev_priv->irq_mask_reg = ~I915_INTERRUPT_ENABLE_FIX;
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 527d30a..6ecf53c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -230,6 +230,14 @@
 #define   ASYNC_FLIP                (1<<22)
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
+#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
+#define   PIPE_CONTROL_QW_WRITE	(1<<14)
+#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
+#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
+#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_ISP_DIS	(1<<9)
+#define   PIPE_CONTROL_NOTIFY	(1<<8)
+#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
 /*
  * Fence registers
@@ -2339,6 +2347,7 @@
 #define DEIER   0x4400c
 
 /* GT interrupt */
+#define GT_PIPE_NOTIFY		(1 << 4)
 #define GT_SYNC_STATUS          (1 << 2)
 #define GT_USER_INTERRUPT       (1 << 0)
 



More information about the Intel-gfx mailing list