FW: [PATCH 2/2] drm/i915/bxt: Broxton decoupled MMIO

Paneri, Praveen praveen.paneri at intel.com
Mon Oct 3 12:53:00 UTC 2016


> On 30/09/2016 10:46, Praveen Paneri wrote:
>> Decoupled MMIO is an alternative way to access forcewake domain
>> registers, which requires less cycles for a single read/write and
>> avoids frequent software forcewake.
>
> How it is for multiple read/writes? We would pay the busy spin cost for every access with decoupled mmio?
In the Driver code, mostly wherever there are multiple register 
read/writes, back to back, they are covered by explicit FW get/put. So 
in that case direct register access will take place, bypassing decoupled 
MMIO method.
For a single register access, there will be a spinning, but that should 
get over in no time if the domain is already active. But such spinning 
will happen for forcewake access also (on the ACK register) unless the 
domain timer is active.
>
> Would it be slower than waking up the forcewake domain in case of a few accesses close together?
Yes, due to the implementation, it could be slower for few accesses 
close together (if explicit FW get/put is not used there) as for every 
access it will try to enter the spinning loop, whereas with force wake 
method the timer could be active on the next register access so direct 
access will take place.
>
> What is the response time for it versus the forcewake times?
This would have to be determined empirically, as so far didn’t come 
across such data.
>
>> This certainly gives advantage over the forcewake as this new
>> mechanism “decouples” CPU cycles and allow them to complete even when
>> GT is in a CPD (frequency change) or C6 state.
>>
>> This can co-exist with forcewake and we will continue to use forcewake
>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>> separately and land into funny situations.
>>
>> v2:
>> - Moved platform check out of the function and got rid of duplicate
>>   functions to find out decoupled power domain (Chris)
>> - Added a check for forcewake already held and skipped decoupled
>>   access (Chris)
>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>
>> v3:
>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>> - Changed decoupled operation to enum and used u32 instead of
>>   uint_32 data type for register offset (Tvrtko)
>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>> - Improved __gen9_decoupled_read and __gen9_decoupled_write routines
>> (Tvrtko)
>>
>> Cc: "Goel, Akash <akash.goel at intel.com>"
>> Cc: "Tvrtko Ursulin <tursulin at ursulin.net>"
>> Signed-off-by: Zhe Wang <zhe1.wang at intel.com>
>> Signed-off-by: Praveen Paneri <praveen.paneri at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>   drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>   drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>   drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
>>   4 files changed, 138 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h index ec8d4c1..54e0cc2 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -559,6 +559,18 @@ enum forcewake_domains {
>>   #define FW_REG_READ  (1)
>>   #define FW_REG_WRITE (2)
>>
>> +enum decoupled_power_domains {
>> +	GEN9_DECOUPLED_PD_BLITTER = 0,
>> +	GEN9_DECOUPLED_PD_RENDER,
>> +	GEN9_DECOUPLED_PD_MEDIA,
>> +	GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +enum decoupled_ops {
>> +	GEN9_DECOUPLED_OP_WRITE = 0,
>> +	GEN9_DECOUPLED_OP_READ
>> +};
>> +
>>   enum forcewake_domains
>>   intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>   			       i915_reg_t reg, unsigned int op); @@ -680,7 +692,8 @@
>> struct intel_csr {
>>   	func(has_snoop) sep \
>>   	func(has_ddi) sep \
>>   	func(has_fpga_dbg) sep \
>> -	func(has_pooled_eu)
>> +	func(has_pooled_eu) sep \
>> +	func(has_decoupled_mmio)
>>
>>   #define DEFINE_FLAG(name) u8 name:1
>>   #define SEP_SEMICOLON ;
>> @@ -2862,6 +2875,9 @@ struct drm_i915_cmd_table {
>>   #define GT_FREQUENCY_MULTIPLIER 50
>>   #define GEN9_FREQ_SCALER 3
>>
>> +/* fixme: Removed BXT C0 check just for for trybot */ #define
>> +HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio)
>> +
>>   #include "i915_trace.h"
>>
>>   static inline bool intel_scanout_needs_vtd_wa(struct
>> drm_i915_private *dev_priv) diff --git
>> a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 31e6edd..5c56c0c 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
>>   	.has_hw_contexts = 1,
>>   	.has_logical_ring_contexts = 1,
>>   	.has_guc = 1,
>> +	.has_decoupled_mmio = 1,
>>   	.ddb_size = 512,
>>   	GEN_DEFAULT_PIPEOFFSETS,
>>   	IVB_CURSOR_OFFSETS,
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h index 8d44cee..bf7b4c9 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7398,6 +7398,13 @@ enum {
>>   #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>   #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>
>> +/* Decoupled MMIO register pair for kernel driver */
>> +#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
>> +#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
>> +#define GEN9_DECOUPLED_DW1_GO			(1<<31)
>> +#define GEN9_DECOUPLED_PD_SHIFT			28
>> +#define GEN9_DECOUPLED_OP_SHIFT			24
>> +
>>   /* Per-pipe DDI Function Control */
>>   #define _TRANS_DDI_FUNC_CTL_A		0x60400
>>   #define _TRANS_DDI_FUNC_CTL_B		0x61400
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>> b/drivers/gpu/drm/i915/intel_uncore.c
>> index 6cd1e78..f78b197 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -808,6 +808,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
>>   	__unclaimed_reg_debug(dev_priv, reg, read, before);
>>   }
>>
>> +static const enum decoupled_power_domains fw2dpd_engine[] = {
>> +	GEN9_DECOUPLED_PD_RENDER,
>> +	GEN9_DECOUPLED_PD_BLITTER,
>> +	GEN9_DECOUPLED_PD_ALL,
>> +	GEN9_DECOUPLED_PD_MEDIA,
>> +	GEN9_DECOUPLED_PD_ALL,
>> +	GEN9_DECOUPLED_PD_ALL,
>> +	GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +/*
>> + * Decoupled MMIO access for only 1 DWORD  */ static void
>> +__gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
>> +					 u32 reg,
>> +					 enum forcewake_domains fw_engine,
>> +					 enum decoupled_ops operation)
>> +{
>> +	enum decoupled_power_domains dpd_engine;
>> +	u32 ctrl_reg_data = 0;
>> +
>> +	dpd_engine = fw2dpd_engine[fw_engine - 1];
>> +
>> +	ctrl_reg_data |= reg;
>> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +	ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>> +ctrl_reg_data);
>> +
>> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>> +ctrl_reg_data);
>> +
>> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
>> +			FORCEWAKE_ACK_TIMEOUT_MS))
>> +		DRM_ERROR("Decoupled MMIO wait timed out\n"); }
>> +
>> +static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
>> +                                      u32 reg,
>> +                                      enum forcewake_domains
>> +fw_engine) {
>> +	__gen9_decoupled_mmio_access(dev_priv,
>> +			reg,
>> +			fw_engine,
>> +			GEN9_DECOUPLED_OP_READ);
>> +
>> +	return __raw_i915_read32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW0);
>> +}
>> +
>> +static inline void __gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
>> +                                      u32 reg, u32 data,
>> +                                      enum forcewake_domains
>> +fw_engine) {
>> +
>> +	__raw_i915_write32(dev_priv,
>> +			GEN9_DECOUPLED_REG0_DW0,
>> +			data);
>> +
>> +	__gen9_decoupled_mmio_access(dev_priv,
>> +			reg,
>> +			fw_engine,
>> +			GEN9_DECOUPLED_OP_WRITE);
>> +}
>> +
>> +
>>   #define GEN2_READ_HEADER(x) \
>>   	u##x val = 0; \
>>   	assert_rpm_wakelock_held(dev_priv);
>> @@ -932,6 +998,27 @@ gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>>   	GEN6_READ_FOOTER; \
>>   }
>>
>> +#define __gen9_decoupled_read(x) \
>> +static u##x \
>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
>> +	enum forcewake_domains fw_engine; \
>> +	GEN6_READ_HEADER(x); \
>> +	fw_engine = __gen9_reg_read_fw_domains(offset); \
>> +	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
>> +		val = __raw_i915_read##x(dev_priv, reg); \
>> +	} else { \
>> +		unsigned i; \
>> +		u32 *ptr_data = (u32 *) &val; \
>> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>> +			*ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
>> +						     offset, \
>> +						     fw_engine); \
>> +	} \
>> +	GEN6_READ_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_read(32)
>> +__gen9_decoupled_read(64)
>>   __gen9_read(8)
>>   __gen9_read(16)
>>   __gen9_read(32)
>> @@ -1099,6 +1186,24 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
>>   	GEN6_WRITE_FOOTER; \
>>   }
>>
>> +#define __gen9_decoupled_write(x) \
>> +static void \
>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
>> +		bool trace) { \
>> +	enum forcewake_domains fw_engine; \
>> +	GEN6_WRITE_HEADER; \
>> +	fw_engine = __gen9_reg_write_fw_domains(offset); \
>> +	if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) \
>> +		__raw_i915_write##x(dev_priv, reg, val); \
>> +	else \
>> +		__gen9_decoupled_mmio_write(dev_priv, \
>> +					     offset, \
>> +					     val, \
>> +					     fw_engine); \
>> +	GEN6_WRITE_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_write(32)
>>   __gen9_write(8)
>>   __gen9_write(16)
>>   __gen9_write(32)
>> @@ -1322,6 +1427,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>>   	case 9:
>>   		ASSIGN_WRITE_MMIO_VFUNCS(gen9);
>>   		ASSIGN_READ_MMIO_VFUNCS(gen9);
>> +		if (HAS_DECOUPLED_MMIO(dev_priv)) {
>> +			dev_priv->uncore.funcs.mmio_readl =
>> +						gen9_decoupled_read32;
>> +			dev_priv->uncore.funcs.mmio_readq =
>> +						gen9_decoupled_read64;
>> +			dev_priv->uncore.funcs.mmio_writel =
>> +						gen9_decoupled_write32;
>> +		}
>>   		break;
>>   	case 8:
>>   		if (IS_CHERRYVIEW(dev_priv)) {
>
> Minus the high level questions above, the patch looks good to me.
Thanks,
Praveen
>
> Regards,
>
> Tvrtko
>


More information about the Intel-gfx-trybot mailing list