[Intel-gfx] [PATCH v4] drm/i915/bxt: Broxton decoupled MMIO

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Nov 15 10:59:54 UTC 2016


On 15/11/2016 10:56, Praveen Paneri wrote:
> Hi Tvrtko,
>
> On Tuesday 15 November 2016 03:06 PM, Tvrtko Ursulin wrote:
>>
>> On 15/11/2016 06:40, Praveen Paneri wrote:
>>> Decoupled MMIO is an alternative way to access forcewake domain
>>> registers, which requires less cycles for a single read/write and
>>> avoids frequent software forcewake.
>>> This certainly gives advantage over the forcewake as this new
>>> mechanism “decouples” CPU cycles and allow them to complete even
>>> when GT is in a CPD (frequency change) or C6 state.
>>>
>>> This can co-exist with forcewake and we will continue to use forcewake
>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>> separately and land into funny situations.
>>>
>>> v2:
>>> - Moved platform check out of the function and got rid of duplicate
>>>  functions to find out decoupled power domain (Chris)
>>> - Added a check for forcewake already held and skipped decoupled
>>>  access (Chris)
>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>
>>> v3:
>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>> - Changed decoupled operation to enum and used u32 instead of
>>>  uint_32 data type for register offset (Tvrtko)
>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>>  routines (Tvrtko)
>>>
>>> v4:
>>> - Fixed alignment and variable names (Chris)
>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>
>>> Signed-off-by: Zhe Wang <zhe1.wang at intel.com>
>>> Signed-off-by: Praveen Paneri <praveen.paneri at intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>>  drivers/gpu/drm/i915/intel_uncore.c | 109
>>> ++++++++++++++++++++++++++++++++++++
>>>  4 files changed, 134 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 4e7148a..158f05c 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>>  #define FW_REG_READ  (1)
>>>  #define FW_REG_WRITE (2)
>>>
>>> +enum decoupled_power_domain {
>>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>>> +    GEN9_DECOUPLED_PD_RENDER,
>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>> +    GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +enum decoupled_ops {
>>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>>> +    GEN9_DECOUPLED_OP_READ
>>> +};
>>> +
>>>  enum forcewake_domains
>>>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>>                     i915_reg_t reg, unsigned int op);
>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>>      func(cursor_needs_physical); \
>>>      func(hws_needs_physical); \
>>>      func(overlay_needs_physical); \
>>> -    func(supports_tv)
>>> +    func(supports_tv); \
>>> +    func(has_decoupled_mmio)
>>>
>>>  struct sseu_dev_info {
>>>      u8 slice_mask;
>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>>  #define GT_FREQUENCY_MULTIPLIER 50
>>>  #define GEN9_FREQ_SCALER 3
>>>
>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>>> +        && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>>> +
>>
>> Looks like I've missed this before, but Would you mind renaming the dev
>> argument to dev_priv?
>>
>>>  #include "i915_trace.h"
>>>
>>>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
>>> *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 70a99ce..fce8e19 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -363,6 +363,7 @@
>>>      .has_hw_contexts = 1,
>>>      .has_logical_ring_contexts = 1,
>>>      .has_guc = 1,
>>> +    .has_decoupled_mmio = 1,
>>>      .ddb_size = 512,
>>>      GEN_DEFAULT_PIPEOFFSETS,
>>>      IVB_CURSOR_OFFSETS,
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index 3361d7f..c70c07a 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -7342,6 +7342,13 @@ enum {
>>>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>>
>>> +/* Decoupled MMIO register pair for kernel driver */
>>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>>> +
>>>  /* Per-pipe DDI Function Control */
>>>  #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>>  #define _TRANS_DDI_FUNC_CTL_B        0x61400
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>>> b/drivers/gpu/drm/i915/intel_uncore.c
>>> index e2b188d..e4c50cb 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
>>>      __unclaimed_reg_debug(dev_priv, reg, read, before);
>>>  }
>>>
>>> +static const enum decoupled_power_domain fw2dpd_domain[] = {
>>> +    GEN9_DECOUPLED_PD_RENDER,
>>> +    GEN9_DECOUPLED_PD_BLITTER,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +/*
>>> + * Decoupled MMIO access for only 1 DWORD
>>> + */
>>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private
>>> *dev_priv,
>>> +                     u32 reg,
>>> +                     enum forcewake_domains fw_domain,
>>> +                     enum decoupled_ops operation)
>>> +{
>>> +    enum decoupled_power_domain dp_domain;
>>> +    u32 ctrl_reg_data = 0;
>>> +
>>> +    dp_domain = fw2dpd_domain[fw_domain - 1];
>>> +
>>> +    ctrl_reg_data |= reg;
>>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>>> +    ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
>>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>>> ctrl_reg_data);
>>> +
>>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>>> +                GEN9_DECOUPLED_REG0_DW1) &
>>> +                GEN9_DECOUPLED_DW1_GO) == 0,
>>> +                FORCEWAKE_ACK_TIMEOUT_MS))
>>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>>> +}
>>> +
>>> +static inline u32
>>> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
>>> +                 u32 reg,
>>> +                 enum forcewake_domains fw_domain)
>>> +{
>>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>>> +                     GEN9_DECOUPLED_OP_READ);
>>> +
>>> +    return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
>>> +}
>>> +
>>> +static inline void
>>> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
>>> +                u32 reg, u32 data,
>>> +                enum forcewake_domains fw_domain)
>>> +{
>>> +
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
>>> +
>>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>>> +                     GEN9_DECOUPLED_OP_WRITE);
>>> +}
>>> +
>>> +
>>>  #define GEN2_READ_HEADER(x) \
>>>      u##x val = 0; \
>>>      assert_rpm_wakelock_held(dev_priv);
>>> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct
>>> drm_i915_private *dev_priv,
>>>      GEN6_READ_FOOTER; \
>>>  }
>>>
>>> +#define __gen9_decoupled_read(x) \
>>> +static u##x \
>>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
>>> +               i915_reg_t reg, bool trace) { \
>>> +    enum forcewake_domains fw_engine; \
>>> +    GEN6_READ_HEADER(x); \
>>> +    fw_engine = __fwtable_reg_read_fw_domains(offset); \
>>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
>>> +        unsigned i; \
>>> +        u32 *ptr_data = (u32 *) &val; \
>>> +        for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>>> +            *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
>>> +                                 offset, \
>>> +                                 fw_engine); \
>>> +    } else { \
>>> +        val = __raw_i915_read##x(dev_priv, reg); \
>>> +    } \
>>> +    GEN6_READ_FOOTER; \
>>> +}
>>> +
>>> +__gen9_decoupled_read(32)
>>> +__gen9_decoupled_read(64)
>>>  __fwtable_read(8)
>>>  __fwtable_read(16)
>>>  __fwtable_read(32)
>>> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct
>>> drm_i915_private *dev_priv,
>>>      GEN6_WRITE_FOOTER; \
>>>  }
>>>
>>> +#define __gen9_decoupled_write(x) \
>>> +static void \
>>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
>>> +            i915_reg_t reg, u##x val, \
>>> +        bool trace) { \
>>> +    enum forcewake_domains fw_engine; \
>>> +    GEN6_WRITE_HEADER; \
>>> +    fw_engine = __fwtable_reg_write_fw_domains(offset); \
>>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
>>> +        __gen9_decoupled_mmio_write(dev_priv, \
>>> +                        offset, \
>>> +                        val, \
>>> +                        fw_engine); \
>>> +    else \
>>> +        __raw_i915_write##x(dev_priv, reg, val); \
>>> +    GEN6_WRITE_FOOTER; \
>>> +}
>>> +
>>> +__gen9_decoupled_write(32)
>>>  __fwtable_write(8)
>>>  __fwtable_write(16)
>>>  __fwtable_write(32)
>>> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private
>>> *dev_priv)
>>>          ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>>>          ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>>>          ASSIGN_READ_MMIO_VFUNCS(fwtable);
>>> +        if (HAS_DECOUPLED_MMIO(dev_priv)) {
>>> +            dev_priv->uncore.funcs.mmio_readl =
>>> +                        gen9_decoupled_read32;
>>> +            dev_priv->uncore.funcs.mmio_readq =
>>> +                        gen9_decoupled_read64;
>>> +            dev_priv->uncore.funcs.mmio_writel =
>>> +                        gen9_decoupled_write32;
>>> +        }
>>>          break;
>>>      case 8:
>>>          if (IS_CHERRYVIEW(dev_priv)) {
>>>
>>
>> With the rename fix above:
>>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Thank you for your review. Will fix the name and re send.

Chris also made a good suggestion since the HAS_DECOUPLED_MMIO is 
currently different from info->has_decoupled_mmio which is confusing.

I think his idea was to edit info->has_decoupled_mmio in 
intel_device_runtime_init using the stepping data and remove the same 
from the macro.

Regards,

Tvrtko


More information about the Intel-gfx mailing list