[Intel-gfx] [PATCH v2] drm/i915/vlv: WA for Turbo and RC6 to work together.

S, Deepak deepak.s at intel.com
Tue Mar 4 15:20:02 CET 2014


Hi Ville,

Please review the patch and share the comments

Thanks
Deepak

On 3/3/2014 11:35 AM, deepak.s at intel.com wrote:
> From: Deepak S <deepak.s at intel.com>
>
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
>
> v2: Refactor duplicate code. (ville)
>
> Signed-off-by: Deepak S <deepak.s at intel.com>
>
> ---
>   drivers/gpu/drm/i915/i915_drv.h |  19 ++++++
>   drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++--
>   drivers/gpu/drm/i915/i915_reg.h |  15 +++++
>   drivers/gpu/drm/i915/intel_pm.c |  50 ++++++++++----
>   4 files changed, 213 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 728b9c3..2baeeef 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -957,6 +957,12 @@ struct i915_suspend_saved_registers {
>   	u32 savePCH_PORT_HOTPLUG;
>   };
>
> +struct intel_rps_ei_calc {
> +	u32 cz_ts_ei;
> +	u32 render_ei_c0;
> +	u32 media_ei_c0;
> +};
> +
>   struct intel_gen6_power_mgmt {
>   	/* work and pm_iir are protected by dev_priv->irq_lock */
>   	struct work_struct work;
> @@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt {
>   	u8 rp1_delay;
>   	u8 rp0_delay;
>   	u8 hw_max;
> +	u8 hw_min;
>
>   	bool rp_up_masked;
>   	bool rp_down_masked;
>
> +	u32 cz_freq;
> +	u32 ei_interrupt_count;
> +
> +	bool use_RC0_residency_for_turbo;
> +
>   	int last_adj;
>   	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>
> @@ -1531,6 +1543,13 @@ typedef struct drm_i915_private {
>   	/* gen6+ rps state */
>   	struct intel_gen6_power_mgmt rps;
>
> +	/* rps wa up ei calculation */
> +	struct intel_rps_ei_calc rps_up_ei;
> +
> +	/* rps wa down ei calculation */
> +	struct intel_rps_ei_calc rps_down_ei;
> +
> +
>   	/* ilk-only ips/rps state. Everything in here is protected by the global
>   	 * mchdev_lock in intel_pm.c */
>   	struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 56edff3..93b6ebf 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
>   	}
>   }
>
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> +				struct  intel_rps_ei_calc *rps_ei)
> +{
> +	u32 cz_ts, cz_freq_khz;
> +	u32 render_count, media_count;
> +	u32 elapsed_render, elapsed_media, elapsed_time;
> +	u32 residency = 0;
> +
> +	cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> +	cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> +	render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> +	media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> +	if (rps_ei->cz_ts_ei == 0) {
> +		rps_ei->cz_ts_ei = cz_ts;
> +		rps_ei->render_ei_c0 = render_count;
> +		rps_ei->media_ei_c0 = media_count;
> +
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +	elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> +	rps_ei->cz_ts_ei = cz_ts;
> +
> +	elapsed_render = render_count - rps_ei->render_ei_c0;
> +	rps_ei->render_ei_c0 = render_count;
> +
> +	elapsed_media = media_count - rps_ei->media_ei_c0;
> +	rps_ei->media_ei_c0 = media_count;
> +
> +	/* Convert all the counters into common unit of milli sec */
> +	elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> +	elapsed_render /=  cz_freq_khz;
> +	elapsed_media /= cz_freq_khz;
> +
> +	/* Calculate overall C0 residency percentage only
> +	* if elapsed time is non zero
> +	*/
> +	if (elapsed_time) {
> +		residency =
> +			((max(elapsed_render, elapsed_media) * 100)
> +				/ elapsed_time);
> +	}
> +
> +	return residency;
> +}
> +
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> +	u32 residency_C0_up = 0, residency_C0_down = 0;
> +	u8 new_delay;
> +
> +	dev_priv->rps.ei_interrupt_count++;
> +
> +	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> +	if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> +		vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> +		return dev_priv->rps.cur_delay;
> +	}
> +
> +
> +	/* To down throttle, C0 residency should be less than down threshold
> +	* for continous EI intervals. So calculate down EI counters
> +	* once in VLV_INT_COUNT_FOR_DOWN_EI
> +	*/
> +	if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> +		dev_priv->rps.ei_interrupt_count = 0;
> +
> +		residency_C0_down =  vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_down_ei);
> +	} else {
> +		residency_C0_up =  vlv_c0_residency(dev_priv,
> +						&dev_priv->rps_up_ei);
> +	}
> +
> +	new_delay = dev_priv->rps.cur_delay;
> +
> +	/* C0 residency is greater than UP threshold. Increase Frequency */
> +	if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> +
> +		if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
> +			new_delay = dev_priv->rps.cur_delay + 1;
> +
> +		/*
> +		 * For better performance, jump directly
> +		 * to RPe if we're below it.
> +		 */
> +		if (new_delay < dev_priv->rps.rpe_delay)
> +			new_delay = dev_priv->rps.rpe_delay;
> +
> +	} else if (!dev_priv->rps.ei_interrupt_count &&
> +			(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> +		/* This means, C0 residency is less than down threshold over
> +		* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> +		*/
> +		if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
> +			new_delay = dev_priv->rps.cur_delay - 1;
> +	}
> +
> +	return new_delay;
> +}
> +
>   static void gen6_pm_rps_work(struct work_struct *work)
>   {
>   	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> @@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work)
>   	pm_iir = dev_priv->rps.pm_iir;
>   	dev_priv->rps.pm_iir = 0;
>   	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
> +	else
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
>   	spin_unlock_irq(&dev_priv->irq_lock);
>
>   	/* Make sure we didn't queue anything we're not going to process. */
> -	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
> +	WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED));
>
> -	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
> +	if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
>   		return;
>
>   	mutex_lock(&dev_priv->rps.hw_lock);
> @@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>   		else
>   			new_delay = dev_priv->rps.min_delay;
>   		adj = 0;
> +	} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
>   	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
>   		if (adj < 0)
>   			adj *= 2;
> @@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>   		queue_work(dev_priv->wq, &dev_priv->rps.work);
>   	}
>
> +	if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> +		spin_lock(&dev_priv->irq_lock);
> +		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
> +		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
> +		spin_unlock(&dev_priv->irq_lock);
> +		DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
> +
> +		queue_work(dev_priv->wq, &dev_priv->rps.work);
> +	}
> +
>   	if (HAS_VEBOX(dev_priv->dev)) {
>   		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
>   			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
> @@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
>   		if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
>   			gmbus_irq_handler(dev);
>
> -		if (pm_iir)
> +		if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED))
>   			gen6_rps_irq_handler(dev_priv, pm_iir);
>
>   		I915_WRITE(GTIIR, gt_iir);
> @@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
>   			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>
>   		dev_priv->pm_irq_mask = 0xffffffff;
> +
> +		if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
> +			pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED;
> +		} else {
> +			dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS;
> +			pm_irqs |= GEN6_PM_RPS_EVENTS;
> +		}
> +
>   		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
>   		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
>   		I915_WRITE(GEN6_PMIER, pm_irqs);
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f73a49d..e58b37e 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -391,6 +391,7 @@
>   #define PUNIT_REG_GPU_FREQ_STS			0xd8
>   #define   GENFREQSTATUS				(1<<0)
>   #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP			0xce
>
>   #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
>   #define PUNIT_FUSE_BUS1				0xf5 /* bits 55:48 */
> @@ -406,6 +407,11 @@
>   #define   FB_FMAX_VMIN_FREQ_LO_SHIFT		27
>   #define   FB_FMAX_VMIN_FREQ_LO_MASK		0xf8000000
>
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC		100000
> +#define VLV_RP_UP_EI_THRESHOLD			90
> +#define VLV_RP_DOWN_EI_THRESHOLD		70
> +#define VLV_INT_COUNT_FOR_DOWN_EI		5
> +
>   /* vlv2 north clock has */
>   #define CCK_FUSE_REG				0x8
>   #define  CCK_FUSE_HPLL_FREQ_MASK		0x3
> @@ -4857,6 +4863,7 @@
>   #define  VLV_GTLC_PW_STATUS			0x130094
>   #define VLV_GTLC_PW_RENDER_STATUS_MASK		0x80
>   #define VLV_GTLC_PW_MEDIA_STATUS_MASK		0x20
> +#define VLV_GTLC_SURVIVABILITY_REG              0x130098
>   #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
>   #define   FORCEWAKE_KERNEL			0x1
>   #define   FORCEWAKE_USER			0x2
> @@ -4864,6 +4871,11 @@
>   #define  ECOBUS					0xa180
>   #define    FORCEWAKE_MT_ENABLE			(1<<5)
>
> +#define VLV_GFX_CLK_FORCE_ON_BIT                (1<<2)
> +#define VLV_GFX_CLK_STATUS_BIT                  (1<<3)
> +
> +#define VLV_RC_COUNTER_CONTROL                  0xFFFF00FF
> +
>   #define  GTFIFODBG				0x120000
>   #define    GT_FIFO_SBDROPERR			(1<<6)
>   #define    GT_FIFO_BLOBDROPERR			(1<<5)
> @@ -4979,6 +4991,9 @@
>   #define VLV_GFX_CLK_STATUS_BIT			(1<<3)
>   #define VLV_GFX_CLK_FORCE_ON_BIT		(1<<2)
>
> +#define VLV_RENDER_C0_COUNT_REG		0x138118
> +#define VLV_MEDIA_C0_COUNT_REG			0x13811C
> +
>   #define GEN6_GT_GFX_RC6_LOCKED			0x138104
>   #define VLV_COUNTER_CONTROL			0x138104
>   #define   VLV_COUNT_RANGE_HIGH			(1<<15)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 9ab3883..8002ac7 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
>   		I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
>   				~VLV_GFX_CLK_FORCE_ON_BIT);
>
> -	/* Unmask Up interrupts */
> -	dev_priv->rps.rp_up_masked = true;
> -	gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> +	/* Unmask Turbo interrupts */
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED);
> +	else {
> +		dev_priv->rps.rp_up_masked = true;
> +		gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
>   						dev_priv->rps.min_delay);
> +	}
>   }
>
>   void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>
>   	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> -	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> +						~GEN6_PM_RP_UP_EI_EXPIRED);
> +	} else {
> +		I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
> +						~GEN6_PM_RPS_EVENTS);
> +	}
>   	/* Complete PM interrupt masking here doesn't race with the rps work
>   	 * item again unmasking PM interrupts because that is using a different
>   	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
> @@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
>   	dev_priv->rps.pm_iir = 0;
>   	spin_unlock_irq(&dev_priv->irq_lock);
>
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
> +	else
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
>   }
>
>   static void gen6_disable_rps(struct drm_device *dev)
> @@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
>   	struct drm_i915_private *dev_priv = dev->dev_private;
>   	u32 enabled_intrs;
>
> +	/* Clear out any stale interrupts first */
>   	spin_lock_irq(&dev_priv->irq_lock);
>   	WARN_ON(dev_priv->rps.pm_iir);
> -	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> -	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	if (dev_priv->rps.use_RC0_residency_for_turbo) {
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
> +	} else {
> +		snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
> +		I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
> +	}
>   	spin_unlock_irq(&dev_priv->irq_lock);
>
>   	/* only unmask PM interrupts we need. Mask all others. */
> -	enabled_intrs = GEN6_PM_RPS_EVENTS;
> +	if (dev_priv->rps.use_RC0_residency_for_turbo)
> +		enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED;
> +	else
> +		enabled_intrs = GEN6_PM_RPS_EVENTS;
>
>   	/* IVB and SNB hard hangs on looping batchbuffer
>   	 * if GEN6_PM_UP_EI_EXPIRED is masked.
>   	 */
> -	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
> +	if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) &&
> +			!dev_priv->rps.use_RC0_residency_for_turbo)
>   		enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
>
>   	I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
> @@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>   	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>
>   	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> +	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>
>   	I915_WRITE(GEN6_RP_CONTROL,
>   		   GEN6_RP_MEDIA_TURBO |
> @@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
>   	I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
>
>   	/* allows RC6 residency counter to work */
> -	I915_WRITE(VLV_COUNTER_CONTROL,
> -		   _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> -				      VLV_MEDIA_RC6_COUNT_EN |
> -				      VLV_RENDER_RC6_COUNT_EN));
> +	I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
>   	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
>   		rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>
> @@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
>   	dev_priv->rps.rp_up_masked = false;
>   	dev_priv->rps.rp_down_masked = false;
>
> +	/* enable WA for RC6+turbo to work together */
> +	dev_priv->rps.use_RC0_residency_for_turbo = true;
> +
>   	gen6_enable_rps_interrupts(dev);
>
>   	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
>



More information about the Intel-gfx mailing list