[Intel-gfx] [RFC] drm/i915: Simplify waiting for registers

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Apr 12 10:42:55 UTC 2017


On 12/04/2017 11:36, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> I was thinking if we could get away with simplifying the API
> a bit by getting rid of the _fw variant and only have these
> three functions with a common implementation:
>
>   intel_wait_for_register
>   intel_wait_for_register_atomic
>   __intel_wait_for_register
>
> The fast/busy loop in all cases grabs it's own forcewake and
> is done under the uncore lock. The extra overhead for call
> sites which already have the forcewake, or do not need it is
> there, but not sure that it matters for where wait_for_register
> functions are used.

This is probably quite bad for pcode, since AFAIR those can be quite 
slow. So scratch this idea I think..

Regards,

Tvrtko

> This makes the difference between the normal and atomic API
> just in the fact atomic doesn't sleep while the normal can.
>
> I've also put in the change to move the BUILD_BUG_ON from
> _wait_for_atomic to wait_for_atomic(_us) macros, as Michal
> suggested at one point, which should fix the GCC 4.4 build
> issue.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  75 ++++++++++++++-----
>  drivers/gpu/drm/i915/intel_drv.h        |  18 ++++-
>  drivers/gpu/drm/i915/intel_i2c.c        |   4 +-
>  drivers/gpu/drm/i915/intel_pm.c         |  10 ++-
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   9 ++-
>  drivers/gpu/drm/i915/intel_uc.c         |  10 +--
>  drivers/gpu/drm/i915/intel_uncore.c     | 123 +++++++++-----------------------
>  7 files changed, 121 insertions(+), 128 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ed079c244b5d..ba95a54b9dfa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3083,27 +3083,68 @@ u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
>
>  void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
>
> -int intel_wait_for_register(struct drm_i915_private *dev_priv,
> -			    i915_reg_t reg,
> -			    u32 mask,
> -			    u32 value,
> -			    unsigned int timeout_ms);
> -int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> -				 i915_reg_t reg,
> -				 u32 mask,
> -				 u32 value,
> -				 unsigned int fast_timeout_us,
> -				 unsigned int slow_timeout_ms,
> -				 u32 *out_value);
> -static inline
> -int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> +int __intel_wait_for_register(struct drm_i915_private *dev_priv,
> +			      i915_reg_t reg,
> +			      u32 mask,
> +			      u32 value,
> +			      unsigned int fast_timeout_us,
> +			      unsigned int slow_timeout_ms,
> +			      u32 *out_value);
> +
> +/**
> + * intel_wait_for_register - wait until register matches expected state
> + * @dev_priv: the i915 device
> + * @reg: the register to read
> + * @mask: mask to apply to register value
> + * @value: expected value
> + * @timeout_ms: timeout in millisecond
> + *
> + * This routine waits until the target register @reg contains the expected
> + * @value after applying the @mask, i.e. it waits until ::
> + *
> + *     (I915_READ(reg) & mask) == value
> + *
> + * Otherwise, the wait will timeout after @timeout_ms milliseconds.
> + *
> + * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> + */
> +static inline int
> +intel_wait_for_register(struct drm_i915_private *dev_priv,
> +			i915_reg_t reg,
> +			u32 mask,
> +			u32 value,
> +			unsigned int timeout_ms)
> +{
> +	return __intel_wait_for_register(dev_priv, reg, mask, value,
> +					 2, timeout_ms, NULL);
> +}
> +
> +/**
> + * intel_wait_for_register_atomic - wait until register matches expected state
> + * @dev_priv: the i915 device
> + * @reg: the register to read
> + * @mask: mask to apply to register value
> + * @value: expected value
> + * @timeout_us: timeout in microsecond
> + *
> + * This routine waits until the target register @reg contains the expected
> + * @value after applying the @mask, i.e. it waits until ::
> + *
> + *     (I915_READ(reg) & mask) == value
> + *
> + * The wait is done with busy-looping.
> + *
> + * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> + */
> +static inline int
> +intel_wait_for_register_atomic(struct drm_i915_private *dev_priv,
>  			       i915_reg_t reg,
>  			       u32 mask,
>  			       u32 value,
> -			       unsigned int timeout_ms)
> +			       unsigned int timeout_us)
>  {
> -	return __intel_wait_for_register_fw(dev_priv, reg, mask, value,
> -					    2, timeout_ms, NULL);
> +	return __intel_wait_for_register(dev_priv, reg, mask, value,
> +					 timeout_us, 0, NULL);
>  }
>
>  static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 43ea74882f9a..d7018d44371c 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -88,7 +88,6 @@
>  	int cpu, ret, timeout = (US) * 1000; \
>  	u64 base; \
>  	_WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
> -	BUILD_BUG_ON((US) > 50000); \
>  	if (!(ATOMIC)) { \
>  		preempt_disable(); \
>  		cpu = smp_processor_id(); \
> @@ -130,8 +129,21 @@
>  	ret__; \
>  })
>
> -#define wait_for_atomic(COND, MS)	_wait_for_atomic((COND), (MS) * 1000, 1)
> -#define wait_for_atomic_us(COND, US)	_wait_for_atomic((COND), (US), 1)
> +#define wait_for_atomic(COND, MS) \
> +({ \
> +	int ret__; \
> +	BUILD_BUG_ON((MS) > 50); \
> +	ret__ = _wait_for_atomic((COND), (MS) * 1000, 1); \
> +	ret__; \
> +})
> +
> +#define wait_for_atomic_us(COND, US) \
> +({ \
> +	int ret__; \
> +	BUILD_BUG_ON((US) > 50000); \
> +	ret__ = _wait_for_atomic((COND), (US), 1); \
> +	ret__; \
> +})
>
>  #define KHz(x) (1000 * (x))
>  #define MHz(x) KHz(1000 * (x))
> diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
> index b6401e8f1bd6..ddc615d3d40d 100644
> --- a/drivers/gpu/drm/i915/intel_i2c.c
> +++ b/drivers/gpu/drm/i915/intel_i2c.c
> @@ -297,9 +297,7 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv)
>  	add_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
>  	I915_WRITE_FW(GMBUS4, irq_enable);
>
> -	ret = intel_wait_for_register_fw(dev_priv,
> -					 GMBUS2, GMBUS_ACTIVE, 0,
> -					 10);
> +	ret = intel_wait_for_register(dev_priv, GMBUS2, GMBUS_ACTIVE, 0, 10);
>
>  	I915_WRITE_FW(GMBUS4, 0);
>  	remove_wait_queue(&dev_priv->gmbus_wait_queue, &wait);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index cacb65fa2dd5..a93e6427aabf 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -8135,9 +8135,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val
>  	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
>  	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>
> -	if (__intel_wait_for_register_fw(dev_priv,
> -					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> -					 500, 0, NULL)) {
> +	if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
> +					   GEN6_PCODE_READY, 0, 500)) {
>  		DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox);
>  		return -ETIMEDOUT;
>  	}
> @@ -8180,9 +8179,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
>  	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
>  	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
>
> -	if (__intel_wait_for_register_fw(dev_priv,
> -					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
> -					 500, 0, NULL)) {
> +	if (intel_wait_for_register_atomic(dev_priv, GEN6_PCODE_MAILBOX,
> +					   GEN6_PCODE_READY, 0, 500)) {
>  		DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox);
>  		return -ETIMEDOUT;
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 97d5fcca8805..af31d786a517 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1729,11 +1729,10 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
>  	I915_WRITE64_FW(GEN6_BSD_RNCID, 0x0);
>
>  	/* Wait for the ring not to be idle, i.e. for it to wake up. */
> -	if (__intel_wait_for_register_fw(dev_priv,
> -					 GEN6_BSD_SLEEP_PSMI_CONTROL,
> -					 GEN6_BSD_SLEEP_INDICATOR,
> -					 0,
> -					 1000, 0, NULL))
> +	if (intel_wait_for_register_atomic(dev_priv,
> +					   GEN6_BSD_SLEEP_PSMI_CONTROL,
> +					   GEN6_BSD_SLEEP_INDICATOR, 0,
> +					   1000))
>  		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
>
>  	/* Now that the ring is fully powered up, update the tail */
> diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
> index 4364b1a9064e..bae60f5c52d6 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -389,11 +389,11 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
>  	 * No GuC command should ever take longer than 10ms.
>  	 * Fast commands should still complete in 10us.
>  	 */
> -	ret = __intel_wait_for_register_fw(dev_priv,
> -					   SOFT_SCRATCH(0),
> -					   INTEL_GUC_RECV_MASK,
> -					   INTEL_GUC_RECV_MASK,
> -					   10, 10, &status);
> +	ret = __intel_wait_for_register(dev_priv,
> +					SOFT_SCRATCH(0),
> +					INTEL_GUC_RECV_MASK,
> +					INTEL_GUC_RECV_MASK,
> +					10, 10, &status);
>  	if (status != INTEL_GUC_STATUS_SUCCESS) {
>  		/*
>  		 * Either the GuC explicitly returned an error (which
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index fb38c7692fc2..215fbed8808c 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1535,9 +1535,8 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
>  	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
>
>  	/* Spin waiting for the device to ack the reset requests */
> -	return intel_wait_for_register_fw(dev_priv,
> -					  GEN6_GDRST, hw_domain_mask, 0,
> -					  500);
> +	return intel_wait_for_register(dev_priv, GEN6_GDRST, hw_domain_mask, 0,
> +				       500);
>  }
>
>  /**
> @@ -1585,7 +1584,7 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
>  }
>
>  /**
> - * __intel_wait_for_register_fw - wait until register matches expected state
> + * __intel_wait_for_register - wait until register matches expected state
>   * @dev_priv: the i915 device
>   * @reg: the register to read
>   * @mask: mask to apply to register value
> @@ -1597,90 +1596,47 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
>   * This routine waits until the target register @reg contains the expected
>   * @value after applying the @mask, i.e. it waits until ::
>   *
> - *     (I915_READ_FW(reg) & mask) == value
> + *     (I915_READ(reg) & mask) == value
>   *
>   * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
> - * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
> - * must be not larger than 20,0000 microseconds.
> - *
> - * Note that this routine assumes the caller holds forcewake asserted, it is
> - * not suitable for very long waits. See intel_wait_for_register() if you
> - * wish to wait without holding forcewake for the duration (i.e. you expect
> - * the wait to be slow).
>   *
>   * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
>   */
> -int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
> -				 i915_reg_t reg,
> -				 u32 mask,
> -				 u32 value,
> -				 unsigned int fast_timeout_us,
> -				 unsigned int slow_timeout_ms,
> -				 u32 *out_value)
> -{
> +int __intel_wait_for_register(struct drm_i915_private *dev_priv,
> +			      i915_reg_t reg,
> +			      u32 mask,
> +			      u32 value,
> +			      unsigned int fast_timeout_us,
> +			      unsigned int slow_timeout_ms,
> +			      u32 *out_value)
> +{
> +	unsigned long flags;
> +	unsigned int fw_domains;
>  	u32 reg_value;
> -#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
>  	int ret;
>
>  	/* Catch any overuse of this function */
>  	might_sleep_if(slow_timeout_ms);
> -	GEM_BUG_ON(fast_timeout_us > 20000);
> -
> -	ret = -ETIMEDOUT;
> -	if (fast_timeout_us && fast_timeout_us <= 20000)
> -		ret = _wait_for_atomic(done, fast_timeout_us, 0);
> -	if (ret)
> -		ret = wait_for(done, slow_timeout_ms);
> +	GEM_BUG_ON(fast_timeout_us > 1000);
>
> -	if (out_value)
> -		*out_value = reg_value;
> +	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> +	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
> +	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
>
> -	return ret;
> +#define done (((reg_value = I915_READ_FW(reg)) & mask) == value)
> +	ret = _wait_for_atomic(done, fast_timeout_us, 1);
>  #undef done
> -}
>
> -/**
> - * intel_wait_for_register - wait until register matches expected state
> - * @dev_priv: the i915 device
> - * @reg: the register to read
> - * @mask: mask to apply to register value
> - * @value: expected value
> - * @timeout_ms: timeout in millisecond
> - *
> - * This routine waits until the target register @reg contains the expected
> - * @value after applying the @mask, i.e. it waits until ::
> - *
> - *     (I915_READ(reg) & mask) == value
> - *
> - * Otherwise, the wait will timeout after @timeout_ms milliseconds.
> - *
> - * Returns 0 if the register matches the desired condition, or -ETIMEOUT.
> - */
> -int intel_wait_for_register(struct drm_i915_private *dev_priv,
> -			    i915_reg_t reg,
> -			    u32 mask,
> -			    u32 value,
> -			    unsigned int timeout_ms)
> -{
> -	unsigned fw =
> -		intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ);
> -	int ret;
> -
> -	might_sleep();
> -
> -	spin_lock_irq(&dev_priv->uncore.lock);
> -	intel_uncore_forcewake_get__locked(dev_priv, fw);
> -
> -	ret = __intel_wait_for_register_fw(dev_priv,
> -					   reg, mask, value,
> -					   2, 0, NULL);
> +	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
> +	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>
> -	intel_uncore_forcewake_put__locked(dev_priv, fw);
> -	spin_unlock_irq(&dev_priv->uncore.lock);
> +#define done (((reg_value = I915_READ_NOTRACE(reg)) & mask) == value)
> +	if (ret && slow_timeout_ms)
> +		ret = wait_for(done, slow_timeout_ms);
> +#undef done
>
> -	if (ret)
> -		ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value,
> -			       timeout_ms);
> +	if (out_value)
> +		*out_value = reg_value;
>
>  	return ret;
>  }
> @@ -1693,11 +1649,11 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
>  	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
>  		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
>
> -	ret = intel_wait_for_register_fw(dev_priv,
> -					 RING_RESET_CTL(engine->mmio_base),
> -					 RESET_CTL_READY_TO_RESET,
> -					 RESET_CTL_READY_TO_RESET,
> -					 700);
> +	ret = intel_wait_for_register(dev_priv,
> +				      RING_RESET_CTL(engine->mmio_base),
> +				      RESET_CTL_READY_TO_RESET,
> +				      RESET_CTL_READY_TO_RESET,
> +				      700);
>  	if (ret)
>  		DRM_ERROR("%s: reset request timeout\n", engine->name);
>
> @@ -1780,21 +1736,10 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
>
>  int intel_guc_reset(struct drm_i915_private *dev_priv)
>  {
> -	int ret;
> -	unsigned long irqflags;
> -
>  	if (!HAS_GUC(dev_priv))
>  		return -EINVAL;
>
> -	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
> -
> -	ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
> -
> -	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
> -	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> -
> -	return ret;
> +	return gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
>  }
>
>  bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
>


More information about the Intel-gfx mailing list