[Intel-gfx] [PATCH v5] drm/i915/vlv: WA for Turbo and RC6 to work together.
Ville Syrjälä
ville.syrjala at linux.intel.com
Fri Mar 28 13:53:48 CET 2014
On Thu, Mar 27, 2014 at 12:05:01PM +0530, deepak.s at linux.intel.com wrote:
> From: Deepak S <deepak.s at linux.intel.com>
>
> With RC6 enabled, BYT has an HW issue in determining the right
> Gfx busyness.
> WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
> on increasing/decreasing the freq. This logic will monitor C0
> counters of render/media power-wells over EI period and takes
> necessary action based on these values
>
> v2: Refactor duplicate code. (Ville)
>
> v3: Reformat the comments. (Ville)
>
> v4: Enable required counters and remove unwanted code (Ville)
>
> v5: Resolved comments and remove kernel-doc style comments. (Ville)
That doesn't tell me that you added the frequency change acceleration
support.
>
> Signed-off-by: Deepak S <deepak.s at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 15 +++++
> drivers/gpu/drm/i915/i915_irq.c | 135 +++++++++++++++++++++++++++++++++++++++-
> drivers/gpu/drm/i915/i915_reg.h | 12 +++-
> drivers/gpu/drm/i915/intel_pm.c | 13 +++-
> 4 files changed, 170 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7c212f3..c48ea93 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -816,6 +816,12 @@ struct i915_suspend_saved_registers {
> u32 savePCH_PORT_HOTPLUG;
> };
>
> +struct intel_rps_ei_calc {
> + u32 cz_ts_ei;
> + u32 render_ei_c0;
> + u32 media_ei_c0;
> +};
> +
> struct intel_gen6_power_mgmt {
> /* work and pm_iir are protected by dev_priv->irq_lock */
> struct work_struct work;
> @@ -843,6 +849,8 @@ struct intel_gen6_power_mgmt {
> bool rp_up_masked;
> bool rp_down_masked;
>
> + u32 ei_interrupt_count;
> +
> int last_adj;
> enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
>
> @@ -1403,6 +1411,13 @@ typedef struct drm_i915_private {
> /* gen6+ rps state */
> struct intel_gen6_power_mgmt rps;
>
> + /* rps wa up ei calculation */
> + struct intel_rps_ei_calc rps_up_ei;
> +
> + /* rps wa down ei calculation */
> + struct intel_rps_ei_calc rps_down_ei;
> +
> +
> /* ilk-only ips/rps state. Everything in here is protected by the global
> * mchdev_lock in intel_pm.c */
> struct intel_ilk_power_mgmt ips;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 300f127..4b421b4 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1121,6 +1121,132 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
> }
> }
>
> +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> + struct intel_rps_ei_calc *rps_ei)
> +{
> + u32 cz_ts, cz_freq_khz;
> + u32 render_count, media_count;
> + u32 elapsed_render, elapsed_media, elapsed_time;
> + u32 residency = 0;
> +
> + cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> + cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> +
> + render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> + media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> +
> + if (rps_ei->cz_ts_ei == 0) {
> + rps_ei->cz_ts_ei = cz_ts;
> + rps_ei->render_ei_c0 = render_count;
> + rps_ei->media_ei_c0 = media_count;
> +
> + return dev_priv->rps.cur_freq;
> + }
> +
> + elapsed_time = cz_ts - rps_ei->cz_ts_ei;
> + rps_ei->cz_ts_ei = cz_ts;
> +
> + elapsed_render = render_count - rps_ei->render_ei_c0;
> + rps_ei->render_ei_c0 = render_count;
> +
> + elapsed_media = media_count - rps_ei->media_ei_c0;
> + rps_ei->media_ei_c0 = media_count;
> +
> + /* Convert all the counters into common unit of milli sec */
> + elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> + elapsed_render /= cz_freq_khz;
> + elapsed_media /= cz_freq_khz;
> +
> + /*
> + * Calculate overall C0 residency percentage
> + * only if elapsed time is non zero
> + */
> + if (elapsed_time) {
> + residency =
> + ((max(elapsed_render, elapsed_media) * 100)
> + / elapsed_time);
> + }
> +
> + return residency;
> +}
> +
> +
> +/**
> + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> + * busy-ness calculated from C0 counters of render & media power wells
> + * @dev_priv: DRM device private
> + *
> + */
> +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +{
> + u32 residency_C0_up = 0, residency_C0_down = 0;
> + u8 new_delay, adj;
> +
> + dev_priv->rps.ei_interrupt_count++;
> +
> + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> +
> +
> + if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
> + vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
> + vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
> + return dev_priv->rps.cur_freq;
> + }
> +
> +
> + /*
> + * To down throttle, C0 residency should be less than down threshold
> + * for continous EI intervals. So calculate down EI counters
> + * once in VLV_INT_COUNT_FOR_DOWN_EI
> + */
> + if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> +
> + dev_priv->rps.ei_interrupt_count = 0;
> +
> + residency_C0_down = vlv_c0_residency(dev_priv,
> + &dev_priv->rps_down_ei);
> + } else {
> + residency_C0_up = vlv_c0_residency(dev_priv,
> + &dev_priv->rps_up_ei);
> + }
> +
> + new_delay = dev_priv->rps.cur_freq;
> +
> + adj = dev_priv->rps.last_adj;
> + /* C0 residency is greater than UP threshold. Increase Frequency */
> + if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> + if (adj > 0)
> + adj *= 2;
> + else
> + adj = 1;
> +
> + if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
> + new_delay = dev_priv->rps.cur_freq + adj;
> +
> + /*
> + * For better performance, jump directly
> + * to RPe if we're below it.
> + */
> + if (new_delay < dev_priv->rps.efficient_freq)
> + new_delay = dev_priv->rps.efficient_freq;
> +
> + } else if (!dev_priv->rps.ei_interrupt_count &&
> + (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> + if (adj < 0)
> + adj *= 2;
> + else
> + adj = -1;
> + /*
> + * This means, C0 residency is less than down threshold over
> + * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> + */
> + if (dev_priv->rps.cur_freq > dev_priv->rps.max_freq_softlimit)
> + new_delay = dev_priv->rps.cur_freq + adj;
> + }
> +
> + return new_delay;
> +}
> +
> static void gen6_pm_rps_work(struct work_struct *work)
> {
> drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
> @@ -1163,6 +1289,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
> else
> new_delay = dev_priv->rps.min_freq_softlimit;
> adj = 0;
> + } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> + new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
> } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> if (adj < 0)
> adj *= 2;
> @@ -3053,6 +3181,7 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
> pm_irqs |= PM_VEBOX_USER_INTERRUPT;
>
> dev_priv->pm_irq_mask = 0xffffffff;
> +
> I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
> I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
> I915_WRITE(GEN6_PMIER, pm_irqs);
> @@ -4095,7 +4224,11 @@ void intel_irq_init(struct drm_device *dev)
> INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
>
> /* Let's track the enabled rps events */
> - dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
> + if (IS_VALLEYVIEW(dev))
> + /* WAUseRC0ResidenncyTurbo:VLV */
Looks like you forgot to fix the w/a comment.
Otherwise the patch looks good to me, so if you fix those two small
issues you can add:
Reviewed-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
> + else
> + dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
>
> setup_timer(&dev_priv->gpu_error.hangcheck_timer,
> i915_hangcheck_elapsed,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 927a7c1..e334bf1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -506,6 +506,7 @@ enum punit_power_well {
> #define PUNIT_REG_GPU_FREQ_STS 0xd8
> #define GENFREQSTATUS (1<<0)
> #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc
> +#define PUNIT_REG_CZ_TIMESTAMP 0xce
>
> #define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */
> #define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */
> @@ -521,6 +522,11 @@ enum punit_power_well {
> #define FB_FMAX_VMIN_FREQ_LO_SHIFT 27
> #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
>
> +#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
> +#define VLV_RP_UP_EI_THRESHOLD 90
> +#define VLV_RP_DOWN_EI_THRESHOLD 70
> +#define VLV_INT_COUNT_FOR_DOWN_EI 5
> +
> /* vlv2 north clock has */
> #define CCK_FUSE_REG 0x8
> #define CCK_FUSE_HPLL_FREQ_MASK 0x3
> @@ -4984,6 +4990,7 @@ enum punit_power_well {
> #define VLV_GTLC_PW_STATUS 0x130094
> #define VLV_GTLC_PW_RENDER_STATUS_MASK 0x80
> #define VLV_GTLC_PW_MEDIA_STATUS_MASK 0x20
> +#define VLV_GTLC_SURVIVABILITY_REG 0x130098
> #define FORCEWAKE_MT 0xa188 /* multi-threaded */
> #define FORCEWAKE_KERNEL 0x1
> #define FORCEWAKE_USER 0x2
> @@ -5112,12 +5119,15 @@ enum punit_power_well {
> #define GEN6_GT_GFX_RC6_LOCKED 0x138104
> #define VLV_COUNTER_CONTROL 0x138104
> #define VLV_COUNT_RANGE_HIGH (1<<15)
> +#define VLV_MEDIA_RC0_COUNT_EN (1<<5)
> +#define VLV_RENDER_RC0_COUNT_EN (1<<4)
> #define VLV_MEDIA_RC6_COUNT_EN (1<<1)
> #define VLV_RENDER_RC6_COUNT_EN (1<<0)
> #define GEN6_GT_GFX_RC6 0x138108
> #define GEN6_GT_GFX_RC6p 0x13810C
> #define GEN6_GT_GFX_RC6pp 0x138110
> -
> +#define VLV_RENDER_C0_COUNT_REG 0x138118
> +#define VLV_MEDIA_C0_COUNT_REG 0x13811C
> #define GEN6_PCODE_MAILBOX 0x138124
> #define GEN6_PCODE_READY (1<<31)
> #define GEN6_READ_OC_PARAMS 0xc
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index b66a43b..30730be 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3097,9 +3097,13 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> ~VLV_GFX_CLK_FORCE_ON_BIT);
>
> /* Unmask Up interrupts */
> - dev_priv->rps.rp_up_masked = true;
> - gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> + I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
> + else {
> + dev_priv->rps.rp_up_masked = true;
> + gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
> dev_priv->rps.min_freq_softlimit);
> + }
> }
>
> void gen6_rps_idle(struct drm_i915_private *dev_priv)
> @@ -3619,6 +3623,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
> I915_WRITE(GEN6_RP_DOWN_EI, 350000);
>
> I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
> + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
>
> I915_WRITE(GEN6_RP_CONTROL,
> GEN6_RP_MEDIA_TURBO |
> @@ -3639,9 +3644,11 @@ static void valleyview_enable_rps(struct drm_device *dev)
>
> /* allows RC6 residency counter to work */
> I915_WRITE(VLV_COUNTER_CONTROL,
> - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
> + _MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
> + VLV_RENDER_RC0_COUNT_EN |
> VLV_MEDIA_RC6_COUNT_EN |
> VLV_RENDER_RC6_COUNT_EN));
> +
> if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
> rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
>
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel OTC
More information about the Intel-gfx
mailing list