[PATCH 2/2] drm/i915/bxt: Broxton decoupled MMIO

Praveen Paneri praveen.paneri at intel.com
Fri Sep 30 09:46:42 UTC 2016


Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles for a single read/write and
avoids frequent software forcewake.
This certainly gives advantage over the forcewake as this new
mechanism “decouples” CPU cycles and allow them to complete even
when GT is in a CPD (frequency change) or C6 state.

This can co-exist with forcewake and we will continue to use forcewake
as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
separately and land into funny situations.

v2:
- Moved platform check out of the function and got rid of duplicate
 functions to find out decoupled power domain (Chris)
- Added a check for forcewake already held and skipped decoupled
 access (Chris)
- Skipped writing 64 bit registers through decoupled MMIO (Chris)

v3:
- Improved commit message with more info on decoupled mmio (Tvrtko)
- Changed decoupled operation to enum and used u32 instead of
 uint_32 data type for register offset (Tvrtko)
- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
- Improved __gen9_decoupled_read and __gen9_decoupled_write routines (Tvrtko)

Cc: "Goel, Akash <akash.goel at intel.com>"
Cc: "Tvrtko Ursulin <tursulin at ursulin.net>"
Signed-off-by: Zhe Wang <zhe1.wang at intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
 drivers/gpu/drm/i915/i915_pci.c     |   1 +
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
 4 files changed, 138 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ec8d4c1..54e0cc2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -559,6 +559,18 @@ enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)
 
+enum decoupled_power_domains {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+	GEN9_DECOUPLED_OP_WRITE = 0,
+	GEN9_DECOUPLED_OP_READ
+};
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg, unsigned int op);
@@ -680,7 +692,8 @@ struct intel_csr {
 	func(has_snoop) sep \
 	func(has_ddi) sep \
 	func(has_fpga_dbg) sep \
-	func(has_pooled_eu)
+	func(has_pooled_eu) sep \
+	func(has_decoupled_mmio)
 
 #define DEFINE_FLAG(name) u8 name:1
 #define SEP_SEMICOLON ;
@@ -2862,6 +2875,9 @@ struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+/* fixme: Removed BXT C0 check just for for trybot */
+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio)
+
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 31e6edd..5c56c0c 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
 	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_guc = 1,
+	.has_decoupled_mmio = 1,
 	.ddb_size = 512,
 	GEN_DEFAULT_PIPEOFFSETS,
 	IVB_CURSOR_OFFSETS,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8d44cee..bf7b4c9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7398,6 +7398,13 @@ enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO			(1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT			28
+#define GEN9_DECOUPLED_OP_SHIFT			24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
 #define _TRANS_DDI_FUNC_CTL_B		0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 6cd1e78..f78b197 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -808,6 +808,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+static const enum decoupled_power_domains fw2dpd_engine[] = {
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_BLITTER,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+					 u32 reg,
+					 enum forcewake_domains fw_engine,
+					 enum decoupled_ops operation)
+{
+	enum decoupled_power_domains dpd_engine;
+	u32 ctrl_reg_data = 0;
+
+	dpd_engine = fw2dpd_engine[fw_engine - 1];
+
+	ctrl_reg_data |= reg;
+	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
+			FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Decoupled MMIO wait timed out\n");
+}
+
+static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
+                                      u32 reg,
+                                      enum forcewake_domains fw_engine)
+{
+	__gen9_decoupled_mmio_access(dev_priv,
+			reg,
+			fw_engine,
+			GEN9_DECOUPLED_OP_READ);
+
+	return __raw_i915_read32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW0);
+}
+
+static inline void __gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
+                                      u32 reg, u32 data,
+                                      enum forcewake_domains fw_engine)
+{
+
+	__raw_i915_write32(dev_priv,
+			GEN9_DECOUPLED_REG0_DW0,
+			data);
+
+	__gen9_decoupled_mmio_access(dev_priv,
+			reg,
+			fw_engine,
+			GEN9_DECOUPLED_OP_WRITE);
+}
+
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv);
@@ -932,6 +998,27 @@ gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	GEN6_READ_FOOTER; \
 }
 
+#define __gen9_decoupled_read(x) \
+static u##x \
+gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_READ_HEADER(x); \
+	fw_engine = __gen9_reg_read_fw_domains(offset); \
+	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} else { \
+		unsigned i; \
+		u32 *ptr_data = (u32 *) &val; \
+		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
+			*ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
+						     offset, \
+						     fw_engine); \
+	} \
+	GEN6_READ_FOOTER; \
+}
+
+__gen9_decoupled_read(32)
+__gen9_decoupled_read(64)
 __gen9_read(8)
 __gen9_read(16)
 __gen9_read(32)
@@ -1099,6 +1186,24 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 	GEN6_WRITE_FOOTER; \
 }
 
+#define __gen9_decoupled_write(x) \
+static void \
+gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
+		bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_WRITE_HEADER; \
+	fw_engine = __gen9_reg_write_fw_domains(offset); \
+	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) \
+		__raw_i915_write##x(dev_priv, reg, val); \
+	else \
+		__gen9_decoupled_mmio_write(dev_priv, \
+					     offset, \
+					     val, \
+					     fw_engine); \
+	GEN6_WRITE_FOOTER; \
+}
+
+__gen9_decoupled_write(32)
 __gen9_write(8)
 __gen9_write(16)
 __gen9_write(32)
@@ -1322,6 +1427,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
 	case 9:
 		ASSIGN_WRITE_MMIO_VFUNCS(gen9);
 		ASSIGN_READ_MMIO_VFUNCS(gen9);
+		if (HAS_DECOUPLED_MMIO(dev_priv)) {
+			dev_priv->uncore.funcs.mmio_readl =
+						gen9_decoupled_read32;
+			dev_priv->uncore.funcs.mmio_readq =
+						gen9_decoupled_read64;
+			dev_priv->uncore.funcs.mmio_writel =
+						gen9_decoupled_write32;
+		}
 		break;
 	case 8:
 		if (IS_CHERRYVIEW(dev_priv)) {
-- 
1.9.1



More information about the Intel-gfx-trybot mailing list