[Intel-gfx] [PATCH 2/3] drm/i915/vlv: Valleyview support for forcewake Individual power wells.

deepak.s at intel.com deepak.s at intel.com
Sat Nov 23 10:25:43 CET 2013


From: Deepak S <deepak.s at intel.com>

Split vlv force wake routines to help individually control Media/Render
well based on the register access.

We've seen power savings in the lower sub-1W range on workloads that
only need on of the power wells, e.g. glbenchmark, media playback

Signed-off-by: Deepak S <deepak.s at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  17 +++
 drivers/gpu/drm/i915/i915_reg.h         |   4 +-
 drivers/gpu/drm/i915/intel_pm.c         |   7 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |  10 +-
 drivers/gpu/drm/i915/intel_uncore.c     | 183 +++++++++++++++++++++++++++-----
 5 files changed, 186 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7ec24eb..40d0288 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -464,6 +464,9 @@ struct intel_uncore {
 	unsigned fifo_count;
 	unsigned forcewake_count;
 
+	unsigned fw_rendercount;
+	unsigned fw_mediacount;
+
 	struct delayed_work force_wake_work;
 };
 
@@ -2461,6 +2464,20 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
 int vlv_gpu_freq(struct drm_i915_private *dev_priv, int val);
 int vlv_freq_opcode(struct drm_i915_private *dev_priv, int val);
 
+void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine);
+void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine);
+
+#define FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg) \
+	(((reg) >= 0x2000 && (reg) < 0x4000) ||\
+	((reg) >= 0x5000 && (reg) < 0x8000) ||\
+	((reg) >= 0xB000 && (reg) < 0x12000) ||\
+	((reg) >= 0x2E000 && (reg) < 0x30000))
+
+#define FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)\
+	(((reg) >= 0x12000 && (reg) < 0x14000) ||\
+	((reg) >= 0x22000 && (reg) < 0x24000) ||\
+	((reg) >= 0x30000 && (reg) < 0x40000))
+
 #define FORCEWAKE_RENDER	(1 << 0)
 #define FORCEWAKE_MEDIA		(1 << 1)
 #define FORCEWAKE_ALL		(FORCEWAKE_RENDER | FORCEWAKE_MEDIA)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9292c9e..04d46b2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4840,8 +4840,8 @@
 #define  EDP_LINK_TRAIN_VOL_EMP_MASK_IVB	(0x3f<<22)
 
 #define  FORCEWAKE				0xA18C
-#define  FORCEWAKE_VLV			0x1300b0
-#define  FORCEWAKE_ACK_VLV		0x1300b4
+#define  FORCEWAKE_VLV				0x1300b0
+#define  FORCEWAKE_ACK_VLV			0x1300b4
 #define  FORCEWAKE_MEDIA_VLV			0x1300b8
 #define  FORCEWAKE_ACK_MEDIA_VLV		0x1300bc
 #define  FORCEWAKE_ACK_HSW			0x130044
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index c2dff4a..c00e987 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -191,7 +191,10 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 	u32 blt_ecoskpd;
 
 	/* Make sure blitter notifies FBC of writes */
-	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
+	/* Blitter is part of Media powerwell on VLV. No impact of
+	 * his param in other platforms for now */
+	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA);
 
 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
@@ -204,7 +207,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
 
-	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA);
 }
 
 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6c7ce7b..3079853 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1033,7 +1033,10 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
 	/* It looks like we need to prevent the gt from suspending while waiting
 	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
 	 * blt/bsd rings on ivb. */
-	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+	if (ring->id == RCS)
+		gen6_gt_force_wake_get(dev_priv, FORCEWAKE_RENDER);
+	else
+		gen6_gt_force_wake_get(dev_priv, FORCEWAKE_MEDIA);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
 	if (ring->irq_refcount++ == 0) {
@@ -1067,7 +1070,10 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
-	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
+	if (ring->id == RCS)
+		gen6_gt_force_wake_put(dev_priv, FORCEWAKE_RENDER);
+	else
+		gen6_gt_force_wake_put(dev_priv, FORCEWAKE_MEDIA);
 }
 
 static bool
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index c6acbb2..7619d87 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -175,38 +175,112 @@ static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
 	__raw_posting_read(dev_priv, FORCEWAKE_ACK_VLV);
 }
 
-static void vlv_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine)
-{
-	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
-
-	__raw_i915_write32(dev_priv, FORCEWAKE_VLV,
-			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
-	__raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV,
-			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
-
-	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n");
+static void __vlv_force_wake_get(struct drm_i915_private *dev_priv,
+						int fw_engine)
+{
+	/* Check for Render Engine */
+	if (FORCEWAKE_RENDER & fw_engine) {
+		if (wait_for_atomic((__raw_i915_read32(dev_priv,
+						FORCEWAKE_ACK_VLV) &
+						FORCEWAKE_KERNEL) == 0,
+					FORCEWAKE_ACK_TIMEOUT_MS))
+			DRM_ERROR("Timed out: Render forcewake old ack to clear.\n");
+
+		__raw_i915_write32(dev_priv, FORCEWAKE_VLV,
+				   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
+
+		if (wait_for_atomic((__raw_i915_read32(dev_priv,
+						FORCEWAKE_ACK_VLV) &
+						FORCEWAKE_KERNEL),
+					FORCEWAKE_ACK_TIMEOUT_MS))
+			DRM_ERROR("Timed out: waiting for Render to ack.\n");
+	}
 
-	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_VLV) &
-			     FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");
+	/* Check for Media Engine */
+	if (FORCEWAKE_MEDIA & fw_engine) {
+		if (wait_for_atomic((__raw_i915_read32(dev_priv,
+						FORCEWAKE_ACK_MEDIA_VLV) &
+						FORCEWAKE_KERNEL) == 0,
+					FORCEWAKE_ACK_TIMEOUT_MS))
+			DRM_ERROR("Timed out: Media forcewake old ack to clear.\n");
+
+		__raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV,
+				   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
+
+		if (wait_for_atomic((__raw_i915_read32(dev_priv,
+						FORCEWAKE_ACK_MEDIA_VLV) &
+						FORCEWAKE_KERNEL),
+					FORCEWAKE_ACK_TIMEOUT_MS))
+			DRM_ERROR("Timed out: waiting for media to ack.\n");
+	}
 
 	/* WaRsForcewakeWaitTC0:vlv */
 	__gen6_gt_wait_for_thread_c0(dev_priv);
+
 }
 
-static void vlv_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine)
+static void __vlv_force_wake_put(struct drm_i915_private *dev_priv,
+					int fw_engine)
 {
-	__raw_i915_write32(dev_priv, FORCEWAKE_VLV,
-			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
-	__raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV,
-			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
+
+	/* Check for Render Engine */
+	if (FORCEWAKE_RENDER & fw_engine)
+		__raw_i915_write32(dev_priv, FORCEWAKE_VLV,
+					_MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
+
+
+	/* Check for Media Engine */
+	if (FORCEWAKE_MEDIA & fw_engine)
+		__raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV,
+				_MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
+
 	/* The below doubles as a POSTING_READ */
 	gen6_gt_check_fifodbg(dev_priv);
+
+}
+
+void vlv_force_wake_get(struct drm_i915_private *dev_priv,
+						int fw_engine)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	if (FORCEWAKE_RENDER & fw_engine) {
+		if (dev_priv->uncore.fw_rendercount++ == 0)
+			dev_priv->uncore.funcs.force_wake_get(dev_priv,
+							FORCEWAKE_RENDER);
+	}
+	if (FORCEWAKE_MEDIA & fw_engine) {
+		if (dev_priv->uncore.fw_mediacount++ == 0)
+			dev_priv->uncore.funcs.force_wake_get(dev_priv,
+							FORCEWAKE_MEDIA);
+	}
+
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+void vlv_force_wake_put(struct drm_i915_private *dev_priv,
+						int fw_engine)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+
+	if (FORCEWAKE_RENDER & fw_engine) {
+		WARN_ON(dev_priv->uncore.fw_rendercount == 0);
+		if (--dev_priv->uncore.fw_rendercount == 0)
+			dev_priv->uncore.funcs.force_wake_put(dev_priv,
+							FORCEWAKE_RENDER);
+	}
+
+	if (FORCEWAKE_MEDIA & fw_engine) {
+		WARN_ON(dev_priv->uncore.fw_mediacount == 0);
+		if (--dev_priv->uncore.fw_mediacount == 0)
+			dev_priv->uncore.funcs.force_wake_put(dev_priv,
+							FORCEWAKE_MEDIA);
+	}
+
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
 static void gen6_force_wake_work(struct work_struct *work)
@@ -292,6 +366,10 @@ void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv, int fw_engine)
 	if (!dev_priv->uncore.funcs.force_wake_get)
 		return;
 
+	/* Redirect to VLV specific routine */
+	if (IS_VALLEYVIEW(dev_priv->dev))
+		return vlv_force_wake_get(dev_priv, fw_engine);
+
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 	if (dev_priv->uncore.forcewake_count++ == 0)
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
@@ -308,6 +386,11 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv, int fw_engine)
 	if (!dev_priv->uncore.funcs.force_wake_put)
 		return;
 
+	/* Redirect to VLV specific routine */
+	if (IS_VALLEYVIEW(dev_priv->dev))
+		return vlv_force_wake_put(dev_priv, fw_engine);
+
+
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 	if (--dev_priv->uncore.forcewake_count == 0) {
 		dev_priv->uncore.forcewake_count++;
@@ -395,6 +478,39 @@ gen6_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
 	REG_READ_FOOTER; \
 }
 
+#define __vlv_read(x) \
+static u##x \
+vlv_read##x(struct drm_i915_private *dev_priv, off_t reg, bool trace) { \
+	unsigned fwengine = 0; \
+	unsigned *fwcount = 0; \
+	REG_READ_HEADER(x); \
+	if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg)) {   \
+		fwengine = FORCEWAKE_RENDER;            \
+		fwcount = &dev_priv->uncore.fw_rendercount;    \
+	}                                               \
+	else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(reg)) {       \
+		fwengine = FORCEWAKE_MEDIA;             \
+		fwcount = &dev_priv->uncore.fw_mediacount;     \
+	}  \
+	if (fwengine != 0) {		\
+		if ((*fwcount)++ == 0) \
+			(dev_priv)->uncore.funcs.force_wake_get(dev_priv, \
+								fwengine); \
+		val = __raw_i915_read##x(dev_priv, reg); \
+		if (--(*fwcount) == 0) \
+			(dev_priv)->uncore.funcs.force_wake_put(dev_priv, \
+							FORCEWAKE_ALL); \
+	} else { \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} \
+	REG_READ_FOOTER; \
+}
+
+
+__vlv_read(8)
+__vlv_read(16)
+__vlv_read(32)
+__vlv_read(64)
 __gen6_read(8)
 __gen6_read(16)
 __gen6_read(32)
@@ -408,6 +524,7 @@ __gen4_read(16)
 __gen4_read(32)
 __gen4_read(64)
 
+#undef __vlv_read
 #undef __gen6_read
 #undef __gen5_read
 #undef __gen4_read
@@ -542,8 +659,8 @@ void intel_uncore_init(struct drm_device *dev)
 			  gen6_force_wake_work);
 
 	if (IS_VALLEYVIEW(dev)) {
-		dev_priv->uncore.funcs.force_wake_get = vlv_force_wake_get;
-		dev_priv->uncore.funcs.force_wake_put = vlv_force_wake_put;
+		dev_priv->uncore.funcs.force_wake_get = __vlv_force_wake_get;
+		dev_priv->uncore.funcs.force_wake_put = __vlv_force_wake_put;
 	} else if (IS_HASWELL(dev) || IS_GEN8(dev)) {
 		dev_priv->uncore.funcs.force_wake_get = __gen6_gt_force_wake_mt_get;
 		dev_priv->uncore.funcs.force_wake_put = __gen6_gt_force_wake_mt_put;
@@ -609,10 +726,18 @@ void intel_uncore_init(struct drm_device *dev)
 			dev_priv->uncore.funcs.mmio_writel  = gen6_write32;
 			dev_priv->uncore.funcs.mmio_writeq  = gen6_write64;
 		}
-		dev_priv->uncore.funcs.mmio_readb  = gen6_read8;
-		dev_priv->uncore.funcs.mmio_readw  = gen6_read16;
-		dev_priv->uncore.funcs.mmio_readl  = gen6_read32;
-		dev_priv->uncore.funcs.mmio_readq  = gen6_read64;
+
+		if (IS_VALLEYVIEW(dev)) {
+			dev_priv->uncore.funcs.mmio_readb  = vlv_read8;
+			dev_priv->uncore.funcs.mmio_readw  = vlv_read16;
+			dev_priv->uncore.funcs.mmio_readl  = vlv_read32;
+			dev_priv->uncore.funcs.mmio_readq  = vlv_read64;
+		} else {
+			dev_priv->uncore.funcs.mmio_readb  = gen6_read8;
+			dev_priv->uncore.funcs.mmio_readw  = gen6_read16;
+			dev_priv->uncore.funcs.mmio_readl  = gen6_read32;
+			dev_priv->uncore.funcs.mmio_readq  = gen6_read64;
+		}
 		break;
 	case 5:
 		dev_priv->uncore.funcs.mmio_writeb  = gen5_write8;
-- 
1.8.4.2




More information about the Intel-gfx mailing list