[Intel-gfx] [RFC] drm/i915/bdw: Initialize BDW workarounds in render ring init fn

Ville Syrjälä ville.syrjala at linux.intel.com
Tue Jul 29 11:56:08 CEST 2014


On Mon, Jul 28, 2014 at 09:46:33PM +0100, Siluvery, Arun wrote:
> On 28/07/2014 18:00, Ville Syrjälä wrote:
> > On Mon, Jul 28, 2014 at 05:31:46PM +0100, arun.siluvery at linux.intel.com wrote:
> >> From: Arun Siluvery <arun.siluvery at linux.intel.com>
> >>
> >> The workarounds at the moment are initialized in init_clock_gating() but
> >> they are lost during reset; In case of execlists some workarounds modify
> >> registers that are part of register state context, since these are not
> >> initialized until init_clock_gating() default context ends up with
> >> incorrect values as render context is restored and saved before updated
> >> by workarounds hence move them to render ring init fn. This should be
> >> ok as these workarounds are not related to display clock gating.
> >>
> >> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_debugfs.c     | 46 ++++++++++++++++++++++
> >>   drivers/gpu/drm/i915/intel_pm.c         | 59 ----------------------------
> >>   drivers/gpu/drm/i915/intel_ringbuffer.c | 68 +++++++++++++++++++++++++++++++++
> >>   3 files changed, 114 insertions(+), 59 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> >> index 083683c..cf7da30 100644
> >> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> >> @@ -2397,20 +2397,65 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
> >>   		seq_printf(m, " dpll_md: 0x%08x\n", pll->hw_state.dpll_md);
> >>   		seq_printf(m, " fp0:     0x%08x\n", pll->hw_state.fp0);
> >>   		seq_printf(m, " fp1:     0x%08x\n", pll->hw_state.fp1);
> >>   		seq_printf(m, " wrpll:   0x%08x\n", pll->hw_state.wrpll);
> >>   	}
> >>   	drm_modeset_unlock_all(dev);
> >>
> >>   	return 0;
> >>   }
> >>
> >> +static int i915_workaround_info(struct seq_file *m, void *unused)
> >> +{
> >> +	struct drm_info_node *node = (struct drm_info_node *) m->private;
> >> +	struct drm_device *dev = node->minor->dev;
> >> +	struct drm_i915_private *dev_priv = dev->dev_private;
> >> +	int ret;
> >> +
> >> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	if (IS_BROADWELL(dev)) {
> >> +		seq_printf(m, "GEN8_ROW_CHICKEN:\t0x%08x\n",
> >> +			   I915_READ(GEN8_ROW_CHICKEN));
> >> +		seq_printf(m, "HALF_SLICE_CHICKEN3:\t0x%08x\n",
> >> +			   I915_READ(HALF_SLICE_CHICKEN3));
> >> +		seq_printf(m, "GAMTARBMODE:\t0x%08x\n", I915_READ(GAMTARBMODE));
> >> +		seq_printf(m, "_3D_CHICKEN3:\t0x%08x\n",
> >> +			   I915_READ(_3D_CHICKEN3));
> >> +		seq_printf(m, "COMMON_SLICE_CHICKEN2:\t0x%08x\n",
> >> +			   I915_READ(COMMON_SLICE_CHICKEN2));
> >> +		seq_printf(m, "GEN7_HALF_SLICE_CHICKEN1:\t0x%08x\n",
> >> +			   I915_READ(GEN7_HALF_SLICE_CHICKEN1));
> >> +		seq_printf(m, "GEN7_ROW_CHICKEN2:\t0x%08x\n",
> >> +			   I915_READ(GEN7_ROW_CHICKEN2));
> >> +		seq_printf(m, "GAM_ECOCHK:\t0x%08x\n",
> >> +			   I915_READ(GAM_ECOCHK));
> >> +		seq_printf(m, "HDC_CHICKEN0:\t0x%08x\n",
> >> +			   I915_READ(HDC_CHICKEN0));
> >> +		seq_printf(m, "GEN7_FF_THREAD_MODE:\t0x%08x\n",
> >> +			   I915_READ(GEN7_FF_THREAD_MODE));
> >> +		seq_printf(m, "GEN8_UCGCTL6:\t0x%08x\n",
> >> +			   I915_READ(GEN8_UCGCTL6));
> >> +		seq_printf(m, "GEN6_RC_SLEEP_PSMI_CONTROL:\t0x%08x\n",
> >> +			   I915_READ(GEN6_RC_SLEEP_PSMI_CONTROL));
> >> +		seq_printf(m, "CACHE_MODE_1:\t0x%08x\n",
> >> +			   I915_READ(CACHE_MODE_1));
> >> +	} else
> >> +		DRM_DEBUG_DRIVER("Not available for Gen%d\n",
> >> +				 INTEL_INFO(dev)->gen);
> >> +
> >> +	mutex_unlock(&dev->struct_mutex);
> >> +	return 0;
> >> +}
> >> +
> >
> > This smells like a separate patch. But I'm not sure we want at all since
> > intel_reg_read will provide the same information.
> >
> >>   struct pipe_crc_info {
> >>   	const char *name;
> >>   	struct drm_device *dev;
> >>   	enum pipe pipe;
> >>   };
> >>
> >>   static int i915_pipe_crc_open(struct inode *inode, struct file *filep)
> >>   {
> >>   	struct pipe_crc_info *info = inode->i_private;
> >>   	struct drm_i915_private *dev_priv = info->dev->dev_private;
> >> @@ -3904,20 +3949,21 @@ static const struct drm_info_list i915_debugfs_list[] = {
> >>   	{"i915_ppgtt_info", i915_ppgtt_info, 0},
> >>   	{"i915_llc", i915_llc, 0},
> >>   	{"i915_edp_psr_status", i915_edp_psr_status, 0},
> >>   	{"i915_sink_crc_eDP1", i915_sink_crc, 0},
> >>   	{"i915_energy_uJ", i915_energy_uJ, 0},
> >>   	{"i915_pc8_status", i915_pc8_status, 0},
> >>   	{"i915_power_domain_info", i915_power_domain_info, 0},
> >>   	{"i915_display_info", i915_display_info, 0},
> >>   	{"i915_semaphore_status", i915_semaphore_status, 0},
> >>   	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
> >> +	{"i915_workaround_info", i915_workaround_info, 0},
> >>   };
> >>   #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
> >>
> >>   static const struct i915_debugfs_files {
> >>   	const char *name;
> >>   	const struct file_operations *fops;
> >>   } i915_debugfs_files[] = {
> >>   	{"i915_wedged", &i915_wedged_fops},
> >>   	{"i915_max_freq", &i915_max_freq_fops},
> >>   	{"i915_min_freq", &i915_min_freq_fops},
> >> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> >> index 3f88f29..9597f95 100644
> >> --- a/drivers/gpu/drm/i915/intel_pm.c
> >> +++ b/drivers/gpu/drm/i915/intel_pm.c
> >> @@ -5393,101 +5393,42 @@ static void gen8_init_clock_gating(struct drm_device *dev)
> >>   	struct drm_i915_private *dev_priv = dev->dev_private;
> >>   	enum pipe pipe;
> >>
> >>   	I915_WRITE(WM3_LP_ILK, 0);
> >>   	I915_WRITE(WM2_LP_ILK, 0);
> >>   	I915_WRITE(WM1_LP_ILK, 0);
> >>
> >>   	/* FIXME(BDW): Check all the w/a, some might only apply to
> >>   	 * pre-production hw. */
> >>
> >> -	/* WaDisablePartialInstShootdown:bdw */
> >> -	I915_WRITE(GEN8_ROW_CHICKEN,
> >> -		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
> >> -
> >> -	/* WaDisableThreadStallDopClockGating:bdw */
> >> -	/* FIXME: Unclear whether we really need this on production bdw. */
> >> -	I915_WRITE(GEN8_ROW_CHICKEN,
> >> -		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
> >> -
> >> -	/*
> >> -	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
> >> -	 * pre-production hardware
> >> -	 */
> >> -	I915_WRITE(HALF_SLICE_CHICKEN3,
> >> -		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
> >> -	I915_WRITE(HALF_SLICE_CHICKEN3,
> >> -		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
> >> -	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
> >> -
> >> -	I915_WRITE(_3D_CHICKEN3,
> >> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
> >> -
> >> -	I915_WRITE(COMMON_SLICE_CHICKEN2,
> >> -		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
> >> -
> >> -	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
> >> -		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
> >> -
> >> -	/* WaDisableDopClockGating:bdw May not be needed for production */
> >> -	I915_WRITE(GEN7_ROW_CHICKEN2,
> >> -		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
> >> -
> >> -	/* WaSwitchSolVfFArbitrationPriority:bdw */
> >> -	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
> >>
> >>   	/* WaPsrDPAMaskVBlankInSRD:bdw */
> >>   	I915_WRITE(CHICKEN_PAR1_1,
> >>   		   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
> >>
> >>   	/* WaPsrDPRSUnmaskVBlankInSRD:bdw */
> >>   	for_each_pipe(pipe) {
> >>   		I915_WRITE(CHICKEN_PIPESL_1(pipe),
> >>   			   I915_READ(CHICKEN_PIPESL_1(pipe)) |
> >>   			   BDW_DPRS_MASK_VBLANK_SRD);
> >>   	}
> >>
> >> -	/* Use Force Non-Coherent whenever executing a 3D context. This is a
> >> -	 * workaround for for a possible hang in the unlikely event a TLB
> >> -	 * invalidation occurs during a PSD flush.
> >> -	 */
> >> -	I915_WRITE(HDC_CHICKEN0,
> >> -		   I915_READ(HDC_CHICKEN0) |
> >> -		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
> >> -
> >> -	/* WaVSRefCountFullforceMissDisable:bdw */
> >> -	/* WaDSRefCountFullforceMissDisable:bdw */
> >> -	I915_WRITE(GEN7_FF_THREAD_MODE,
> >> -		   I915_READ(GEN7_FF_THREAD_MODE) &
> >> -		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
> >> -
> >>   	/*
> >>   	 * BSpec recommends 8x4 when MSAA is used,
> >>   	 * however in practice 16x4 seems fastest.
> >>   	 *
> >>   	 * Note that PS/WM thread counts depend on the WIZ hashing
> >>   	 * disable bit, which we don't touch here, but it's good
> >>   	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
> >>   	 */
> >>   	I915_WRITE(GEN7_GT_MODE,
> >>   		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> >
> > GT_MODE doesn't get clobbered by a reset? I would have expected it to
> > behave the same way as CACHE_MODE_{0,1).
> >
> >> -
> >> -	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
> >> -		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
> >> -
> >> -	/* WaDisableSDEUnitClockGating:bdw */
> >> -	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
> >> -		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
> >
> > And I would have expected the UCGCTL registers to not be clobbered. IIRC
> > they didn't get clobbered on my IVB, but my memory might be wrong here.
> 
> I could complete testing because of the problem with CACHE_MODE_1, will 
> double check these registers and move/unmove them accordingly.

I would try to identify the approriate regiters by simply using
intel_reg_write to first write some non-reset value to the suspect
register(s), then perform GPU reset (w/ intel_reg_write as well)
and afterwards check if the register value(s) changed.

As long as there are no drm clients around that should work out
nicely. It would avoid uncertainty about context saved registers
getting restored from the context at "random" times, and also
avoids the kernel's hangcheck getting in the way.

-- 
Ville Syrjälä
Intel OTC



More information about the Intel-gfx mailing list