[PATCH 2/2] drm/i915/bxt: Broxton decoupled MMIO
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Sep 30 13:49:32 UTC 2016
On 30/09/2016 10:46, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles for a single read/write and
> avoids frequent software forcewake.
How it is for multiple read/writes? We would pay the busy spin cost for
every access with decoupled mmio?
Would it be slower than waking up the forcewake domain in case of a few
accesses close together?
What is the response time for it versus the forcewake times?
> This certainly gives advantage over the forcewake as this new
> mechanism “decouples” CPU cycles and allow them to complete even
> when GT is in a CPD (frequency change) or C6 state.
>
> This can co-exist with forcewake and we will continue to use forcewake
> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> separately and land into funny situations.
>
> v2:
> - Moved platform check out of the function and got rid of duplicate
> functions to find out decoupled power domain (Chris)
> - Added a check for forcewake already held and skipped decoupled
> access (Chris)
> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>
> v3:
> - Improved commit message with more info on decoupled mmio (Tvrtko)
> - Changed decoupled operation to enum and used u32 instead of
> uint_32 data type for register offset (Tvrtko)
> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> - Improved __gen9_decoupled_read and __gen9_decoupled_write routines (Tvrtko)
>
> Cc: "Goel, Akash <akash.goel at intel.com>"
> Cc: "Tvrtko Ursulin <tursulin at ursulin.net>"
> Signed-off-by: Zhe Wang <zhe1.wang at intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 18 +++++-
> drivers/gpu/drm/i915/i915_pci.c | 1 +
> drivers/gpu/drm/i915/i915_reg.h | 7 +++
> drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
> 4 files changed, 138 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ec8d4c1..54e0cc2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -559,6 +559,18 @@ enum forcewake_domains {
> #define FW_REG_READ (1)
> #define FW_REG_WRITE (2)
>
> +enum decoupled_power_domains {
> + GEN9_DECOUPLED_PD_BLITTER = 0,
> + GEN9_DECOUPLED_PD_RENDER,
> + GEN9_DECOUPLED_PD_MEDIA,
> + GEN9_DECOUPLED_PD_ALL
> +};
> +
> +enum decoupled_ops {
> + GEN9_DECOUPLED_OP_WRITE = 0,
> + GEN9_DECOUPLED_OP_READ
> +};
> +
> enum forcewake_domains
> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> i915_reg_t reg, unsigned int op);
> @@ -680,7 +692,8 @@ struct intel_csr {
> func(has_snoop) sep \
> func(has_ddi) sep \
> func(has_fpga_dbg) sep \
> - func(has_pooled_eu)
> + func(has_pooled_eu) sep \
> + func(has_decoupled_mmio)
>
> #define DEFINE_FLAG(name) u8 name:1
> #define SEP_SEMICOLON ;
> @@ -2862,6 +2875,9 @@ struct drm_i915_cmd_table {
> #define GT_FREQUENCY_MULTIPLIER 50
> #define GEN9_FREQ_SCALER 3
>
> +/* fixme: Removed BXT C0 check just for for trybot */
> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio)
> +
> #include "i915_trace.h"
>
> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 31e6edd..5c56c0c 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
> .has_hw_contexts = 1,
> .has_logical_ring_contexts = 1,
> .has_guc = 1,
> + .has_decoupled_mmio = 1,
> .ddb_size = 512,
> GEN_DEFAULT_PIPEOFFSETS,
> IVB_CURSOR_OFFSETS,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8d44cee..bf7b4c9 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7398,6 +7398,13 @@ enum {
> #define SKL_FUSE_PG1_DIST_STATUS (1<<26)
> #define SKL_FUSE_PG2_DIST_STATUS (1<<25)
>
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO (1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT 28
> +#define GEN9_DECOUPLED_OP_SHIFT 24
> +
> /* Per-pipe DDI Function Control */
> #define _TRANS_DDI_FUNC_CTL_A 0x60400
> #define _TRANS_DDI_FUNC_CTL_B 0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 6cd1e78..f78b197 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -808,6 +808,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
> __unclaimed_reg_debug(dev_priv, reg, read, before);
> }
>
> +static const enum decoupled_power_domains fw2dpd_engine[] = {
> + GEN9_DECOUPLED_PD_RENDER,
> + GEN9_DECOUPLED_PD_BLITTER,
> + GEN9_DECOUPLED_PD_ALL,
> + GEN9_DECOUPLED_PD_MEDIA,
> + GEN9_DECOUPLED_PD_ALL,
> + GEN9_DECOUPLED_PD_ALL,
> + GEN9_DECOUPLED_PD_ALL
> +};
> +
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> + u32 reg,
> + enum forcewake_domains fw_engine,
> + enum decoupled_ops operation)
> +{
> + enum decoupled_power_domains dpd_engine;
> + u32 ctrl_reg_data = 0;
> +
> + dpd_engine = fw2dpd_engine[fw_engine - 1];
> +
> + ctrl_reg_data |= reg;
> + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> + ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> + if (wait_for_atomic((__raw_i915_read32(dev_priv,
> + GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> + FORCEWAKE_ACK_TIMEOUT_MS))
> + DRM_ERROR("Decoupled MMIO wait timed out\n");
> +}
> +
> +static inline u32 __gen9_decoupled_mmio_read(struct drm_i915_private *dev_priv,
> + u32 reg,
> + enum forcewake_domains fw_engine)
> +{
> + __gen9_decoupled_mmio_access(dev_priv,
> + reg,
> + fw_engine,
> + GEN9_DECOUPLED_OP_READ);
> +
> + return __raw_i915_read32(dev_priv,
> + GEN9_DECOUPLED_REG0_DW0);
> +}
> +
> +static inline void __gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
> + u32 reg, u32 data,
> + enum forcewake_domains fw_engine)
> +{
> +
> + __raw_i915_write32(dev_priv,
> + GEN9_DECOUPLED_REG0_DW0,
> + data);
> +
> + __gen9_decoupled_mmio_access(dev_priv,
> + reg,
> + fw_engine,
> + GEN9_DECOUPLED_OP_WRITE);
> +}
> +
> +
> #define GEN2_READ_HEADER(x) \
> u##x val = 0; \
> assert_rpm_wakelock_held(dev_priv);
> @@ -932,6 +998,27 @@ gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
> GEN6_READ_FOOTER; \
> }
>
> +#define __gen9_decoupled_read(x) \
> +static u##x \
> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
> + enum forcewake_domains fw_engine; \
> + GEN6_READ_HEADER(x); \
> + fw_engine = __gen9_reg_read_fw_domains(offset); \
> + if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) { \
> + val = __raw_i915_read##x(dev_priv, reg); \
> + } else { \
> + unsigned i; \
> + u32 *ptr_data = (u32 *) &val; \
> + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
> + *ptr_data = __gen9_decoupled_mmio_read(dev_priv, \
> + offset, \
> + fw_engine); \
> + } \
> + GEN6_READ_FOOTER; \
> +}
> +
> +__gen9_decoupled_read(32)
> +__gen9_decoupled_read(64)
> __gen9_read(8)
> __gen9_read(16)
> __gen9_read(32)
> @@ -1099,6 +1186,24 @@ gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
> GEN6_WRITE_FOOTER; \
> }
>
> +#define __gen9_decoupled_write(x) \
> +static void \
> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
> + bool trace) { \
> + enum forcewake_domains fw_engine; \
> + GEN6_WRITE_HEADER; \
> + fw_engine = __gen9_reg_write_fw_domains(offset); \
> + if (!fw_engine || !(fw_engine & ~dev_priv->uncore.fw_domains_active)) \
> + __raw_i915_write##x(dev_priv, reg, val); \
> + else \
> + __gen9_decoupled_mmio_write(dev_priv, \
> + offset, \
> + val, \
> + fw_engine); \
> + GEN6_WRITE_FOOTER; \
> +}
> +
> +__gen9_decoupled_write(32)
> __gen9_write(8)
> __gen9_write(16)
> __gen9_write(32)
> @@ -1322,6 +1427,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
> case 9:
> ASSIGN_WRITE_MMIO_VFUNCS(gen9);
> ASSIGN_READ_MMIO_VFUNCS(gen9);
> + if (HAS_DECOUPLED_MMIO(dev_priv)) {
> + dev_priv->uncore.funcs.mmio_readl =
> + gen9_decoupled_read32;
> + dev_priv->uncore.funcs.mmio_readq =
> + gen9_decoupled_read64;
> + dev_priv->uncore.funcs.mmio_writel =
> + gen9_decoupled_write32;
> + }
> break;
> case 8:
> if (IS_CHERRYVIEW(dev_priv)) {
Minus the high level questions above, the patch looks good to me.
Regards,
Tvrtko
More information about the Intel-gfx-trybot
mailing list