[Intel-gfx] [RFC] drm/i915: Simplify waiting for registers
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Apr 12 10:42:55 UTC 2017
On 12/04/2017 11:36, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> I was thinking if we could get away with simplifying the API
> a bit by getting rid of the _fw variant and only have these
> three functions with a common implementation:
>
> intel_wait_for_register
> intel_wait_for_register_atomic
> __intel_wait_for_register
>
> The fast/busy loop in all cases grabs it's own forcewake and
> is done under the uncore lock. The extra overhead for call
> sites which already have the forcewake, or do not need it is
> there, but not sure that it matters for where wait_for_register
> functions are used.
This is probably quite bad for pcode, since AFAIR those can be quite
slow. So scratch this idea I think..
Regards,
Tvrtko
> This makes the difference between the normal and atomic API
> just in the fact atomic doesn't sleep while the normal can.
>
> I've also put in the change to move the BUILD_BUG_ON from
> _wait_for_atomic to wait_for_atomic(_us) macros, as Michal
> suggested at one point, which should fix the GCC 4.4 build
> issue.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 75 ++++++++++++++-----
> drivers/gpu/drm/i915/intel_drv.h | 18 ++++-
> drivers/gpu/drm/i915/intel_i2c.c | 4 +-
> drivers/gpu/drm/i915/intel_pm.c | 10 ++-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 9 ++-
> drivers/gpu/drm/i915/intel_uc.c | 10 +--
> drivers/gpu/drm/i915/intel_uncore.c | 123 +++++++++-----------------------
> 7 files changed, 121 insertions(+), 128 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ed079c244b5d..ba95a54b9dfa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3083,27 +3083,68 @@ u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
>
> void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
>
> -int intel_wait_for_register(struct drm_i915_private *dev_priv,
> - i915_reg_t reg,
> - u32 mask,
> - u32 value,
> - unsigned int timeout_ms);
> -int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> - i915_reg_t reg,
> - u32 mask,
> - u32 value,
> - unsigned int fast_timeout_us,
> - unsigned int slow_timeout_ms,
> - u32 *out_value);
> -static inline
> -int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> +int __intel_wait_for_register(struct drm_i915_private *dev_priv,
> + i915_reg_t reg,
> + u32 mask,
> + u32 value,
> + unsigned int fast_timeout_us,
> + unsigned int slow_timeout_ms,
> + u32 *out_value);
> +
> +/**
> + * intel_wait_for_register - wait until register matches expected state
> + * @dev_priv: the i915 device
> + * @reg: the register to read
> + * @mask: mask to apply to register value
> + * @value: expected value
> + * @timeout_ms: timeout in millisecond
> + *
> + * This routine waits until the target register @reg contains the expected
> + * @value after applying the @mask, i.e. it waits until ::
> + *
> + * (I915_READ(reg) & mask) == value
> + *
> + * Otherwise, the wait will timeout after @timeout_ms milliseconds.
> + *
> + * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> + */
> +static inline int
> +intel_wait_for_register(struct drm_i915_private *dev_priv,
> + i915_reg_t reg,
> + u32 mask,
> + u32 value,
> + unsigned int timeout_ms)
> +{
> + return __intel_wait_for_register(dev_priv, reg, mask, value,
> + 2, timeout_ms, NULL);
> +}
> +
> +/**
> + * intel_wait_for_register_atomic - wait until register matches expected state
> + * @dev_priv: the i915 device
> + * @reg: the register to read
> + * @mask: mask to apply to register value
> + * @value: expected value
> + * @timeout_us: timeout in microsecond
> + *
> + * This routine waits until the target register @reg contains the expected
> + * @value after applying the @mask, i.e. it waits until ::
> + *
> + * (I915_READ(reg) & mask) == value
> + *
> + * The wait is done with busy-looping.
> + *
> + * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> + */
> +static inline int
> +intel_wait_for_register_atomic(struct drm_i915_private *dev_priv,
> i915_reg_t reg,
> u32 mask,
> u32 value,
> - unsigned int timeout_ms)
> + unsigned int timeout_us)
> {
> - return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
> - 2, timeout_ms, NULL);
> + return __intel_wait_for_register(dev_priv, reg, mask, value,
> + timeout_us, 0, NULL);
> }
>
> static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 43ea74882f9a..d7018d44371c 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -88,7 +88,6 @@
> int cpu, ret, timeout = (US) * 1000; \
> u64 base; \
> _WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
> - BUILD_BUG_ON((US) > 50000); \
> if (!(ATOMIC)) { \
> preempt_disable(); \
> cpu = smp_processor_id(); \
> @@ -130,8 +129,21 @@
> ret__; \
> })
>
> -#define wait_for_atomic(COND, MS) _wait_for_atomic((COND), (MS) * 1000, 1)
> -#define wait_for_atomic_us(COND, US) _wait_for_atomic((COND), (US), 1)
> +#define wait_for_atomic(COND, MS) \
> +({ \
> + int ret__; \
> + BUILD_BUG_ON((MS) > 50); \
> + ret__ = _wait_for_atomic((COND), (MS) * 1000, 1); \
> + ret__; \
> +})
> +
> +#define wait_for_atomic_us(COND, US) \
> +({ \
> + int ret__; \
> + BUILD_BUG_ON((US) > 50000); \
> + ret__ = _wait_for_atomic((COND), (US), 1); \
> + ret__; \
> +})
>
> #define KHz(x) (1000 * (x))
> #define MHz(x) KHz(1000 * (x))
> diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
> index b6401e8f1bd6..ddc615d3d40d 100644
> --- a/drivers/gpu/drm/i915/intel_i2c.c
> +++ b/drivers/gpu/drm/i915/intel_i2c.c
> @@ -297,9 +297,7 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv)
> add_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
> I915_WRITE_FW(GMBUS4, irq_enable);
>
> - ret = intel_wait_for_register_fw(dev_priv,
> - GMBUS2, GMBUS_ACTIVE, 0,
> - 10);
> + ret = intel_wait_for_register(dev_priv, GMBUS2, GMBUS_ACTIVE, 0, 10);
>
> I915_WRITE_FW(GMBUS4, 0);
> remove_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index cacb65fa2dd5..a93e6427aabf 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -8135,9 +8135,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
> I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
> I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>
> - if (__intel_wait_for_register_fw(dev_priv,
> - GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> - 500, 0, NULL)) {
> + if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
> + GEN6_PCODE_READY, 0, 500)) {
> DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
> return -ETIMEDOUT;
> }
> @@ -8180,9 +8179,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
> I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
> I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>
> - if (__intel_wait_for_register_fw(dev_priv,
> - GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> - 500, 0, NULL)) {
> + if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
> + GEN6_PCODE_READY, 0, 500)) {
> DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
> return -ETIMEDOUT;
> }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 97d5fcca8805..af31d786a517 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1729,11 +1729,10 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
> I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
>
> /* Wait for the ring not to be idle, i.e. for it to wake up. */
> - if (__intel_wait_for_register_fw(dev_priv,
> - GEN6_BSD_SLEEP_PSMI_CONTROL,
> - GEN6_BSD_SLEEP_INDICATOR,
> - 0,
> - 1000, 0, NULL))
> + if (intel_wait_for_register_atomic(dev_priv,
> + GEN6_BSD_SLEEP_PSMI_CONTROL,
> + GEN6_BSD_SLEEP_INDICATOR, 0,
> + 1000))
> DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
>
> /* Now that the ring is fully powered up, update the tail */
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 4364b1a9064e..bae60f5c52d6 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -389,11 +389,11 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
> * No GuC command should ever take longer than 10ms.
> * Fast commands should still complete in 10us.
> */
> - ret = __intel_wait_for_register_fw(dev_priv,
> - SOFT_SCRATCH(0),
> - INTEL_GUC_RECV_MASK,
> - INTEL_GUC_RECV_MASK,
> - 10, 10, &status);
> + ret = __intel_wait_for_register(dev_priv,
> + SOFT_SCRATCH(0),
> + INTEL_GUC_RECV_MASK,
> + INTEL_GUC_RECV_MASK,
> + 10, 10, &status);
> if (status != INTEL_GUC_STATUS_SUCCESS) {
> /*
> * Either the GuC explicitly returned an error (which
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index fb38c7692fc2..215fbed8808c 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1535,9 +1535,8 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
> __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
>
> /* Spin waiting for the device to ack the reset requests */
> - return intel_wait_for_register_fw(dev_priv,
> - GEN6_GDRST, hw_domain_mask, 0,
> - 500);
> + return intel_wait_for_register(dev_priv, GEN6_GDRST, hw_domain_mask, 0,
> + 500);
> }
>
> /**
> @@ -1585,7 +1584,7 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
> }
>
> /**
> - * __intel_wait_for_register_fw - wait until register matches expected state
> + * __intel_wait_for_register - wait until register matches expected state
> * @dev_priv: the i915 device
> * @reg: the register to read
> * @mask: mask to apply to register value
> @@ -1597,90 +1596,47 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
> * This routine waits until the target register @reg contains the expected
> * @value after applying the @mask, i.e. it waits until ::
> *
> - * (I915_READ_FW(reg) & mask) == value
> + * (I915_READ(reg) & mask) == value
> *
> * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
> - * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
> - * must be not larger than 20,0000 microseconds.
> - *
> - * Note that this routine assumes the caller holds forcewake asserted, it is
> - * not suitable for very long waits. See intel_wait_for_register() if you
> - * wish to wait without holding forcewake for the duration (i.e. you expect
> - * the wait to be slow).
> *
> * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> */
> -int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> - i915_reg_t reg,
> - u32 mask,
> - u32 value,
> - unsigned int fast_timeout_us,
> - unsigned int slow_timeout_ms,
> - u32 *out_value)
> -{
> +int __intel_wait_for_register(struct drm_i915_private *dev_priv,
> + i915_reg_t reg,
> + u32 mask,
> + u32 value,
> + unsigned int fast_timeout_us,
> + unsigned int slow_timeout_ms,
> + u32 *out_value)
> +{
> + unsigned long flags;
> + unsigned int fw_domains;
> u32 reg_value;
> -#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
> int ret;
>
> /* Catch any overuse of this function */
> might_sleep_if(slow_timeout_ms);
> - GEM_BUG_ON(fast_timeout_us > 20000);
> -
> - ret = -ETIMEDOUT;
> - if (fast_timeout_us && fast_timeout_us <= 20000)
> - ret = _wait_for_atomic(done, fast_timeout_us, 0);
> - if (ret)
> - ret = wait_for(done, slow_timeout_ms);
> + GEM_BUG_ON(fast_timeout_us > 1000);
>
> - if (out_value)
> - *out_value = reg_value;
> + spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
> + intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
>
> - return ret;
> +#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
> + ret = _wait_for_atomic(done, fast_timeout_us, 1);
> #undef done
> -}
>
> -/**
> - * intel_wait_for_register - wait until register matches expected state
> - * @dev_priv: the i915 device
> - * @reg: the register to read
> - * @mask: mask to apply to register value
> - * @value: expected value
> - * @timeout_ms: timeout in millisecond
> - *
> - * This routine waits until the target register @reg contains the expected
> - * @value after applying the @mask, i.e. it waits until ::
> - *
> - * (I915_READ(reg) & mask) == value
> - *
> - * Otherwise, the wait will timeout after @timeout_ms milliseconds.
> - *
> - * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> - */
> -int intel_wait_for_register(struct drm_i915_private *dev_priv,
> - i915_reg_t reg,
> - u32 mask,
> - u32 value,
> - unsigned int timeout_ms)
> -{
> - unsigned fw =
> - intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
> - int ret;
> -
> - might_sleep();
> -
> - spin_lock_irq(&dev_priv->uncore.lock);
> - intel_uncore_forcewake_get__locked(dev_priv, fw);
> -
> - ret = __intel_wait_for_register_fw(dev_priv,
> - reg, mask, value,
> - 2, 0, NULL);
> + intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>
> - intel_uncore_forcewake_put__locked(dev_priv, fw);
> - spin_unlock_irq(&dev_priv->uncore.lock);
> +#define done (((reg_value = I915_READ_NOTRACE(reg)) & mask) == value)
> + if (ret && slow_timeout_ms)
> + ret = wait_for(done, slow_timeout_ms);
> +#undef done
>
> - if (ret)
> - ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
> - timeout_ms);
> + if (out_value)
> + *out_value = reg_value;
>
> return ret;
> }
> @@ -1693,11 +1649,11 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
> I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
> _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
>
> - ret = intel_wait_for_register_fw(dev_priv,
> - RING_RESET_CTL(engine->mmio_base),
> - RESET_CTL_READY_TO_RESET,
> - RESET_CTL_READY_TO_RESET,
> - 700);
> + ret = intel_wait_for_register(dev_priv,
> + RING_RESET_CTL(engine->mmio_base),
> + RESET_CTL_READY_TO_RESET,
> + RESET_CTL_READY_TO_RESET,
> + 700);
> if (ret)
> DRM_ERROR("%s: reset request timeout\n", engine->name);
>
> @@ -1780,21 +1736,10 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
>
> int intel_guc_reset(struct drm_i915_private *dev_priv)
> {
> - int ret;
> - unsigned long irqflags;
> -
> if (!HAS_GUC(dev_priv))
> return -EINVAL;
>
> - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
> -
> - ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
> -
> - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
> - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -
> - return ret;
> + return gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
> }
>
> bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
>
More information about the Intel-gfx
mailing list