[Intel-gfx] [RFC] drm/i915/bdw: Initialize BDW workarounds in render ring init fn

Siluvery, Arun arun.siluvery at linux.intel.com
Mon Jul 28 22:46:33 CEST 2014


On 28/07/2014 18:00, Ville Syrjälä wrote:
> On Mon, Jul 28, 2014 at 05:31:46PM +0100, arun.siluvery at linux.intel.com wrote:
>> From: Arun Siluvery <arun.siluvery at linux.intel.com>
>>
>> The workarounds at the moment are initialized in init_clock_gating() but
>> they are lost during reset; In case of execlists some workarounds modify
>> registers that are part of register state context, since these are not
>> initialized until init_clock_gating() default context ends up with
>> incorrect values as render context is restored and saved before updated
>> by workarounds hence move them to render ring init fn. This should be
>> ok as these workarounds are not related to display clock gating.
>>
>> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c     | 46 ++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_pm.c         | 59 ----------------------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 68 +++++++++++++++++++++++++++++++++
>>   3 files changed, 114 insertions(+), 59 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 083683c..cf7da30 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2397,20 +2397,65 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
>>   		seq_printf(m, " dpll_md: 0x%08x\n", pll->hw_state.dpll_md);
>>   		seq_printf(m, " fp0:     0x%08x\n", pll->hw_state.fp0);
>>   		seq_printf(m, " fp1:     0x%08x\n", pll->hw_state.fp1);
>>   		seq_printf(m, " wrpll:   0x%08x\n", pll->hw_state.wrpll);
>>   	}
>>   	drm_modeset_unlock_all(dev);
>>
>>   	return 0;
>>   }
>>
>> +static int i915_workaround_info(struct seq_file *m, void *unused)
>> +{
>> +	struct drm_info_node *node = (struct drm_info_node *) m->private;
>> +	struct drm_device *dev = node->minor->dev;
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +	int ret;
>> +
>> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
>> +	if (ret)
>> +		return ret;
>> +
>> +	if (IS_BROADWELL(dev)) {
>> +		seq_printf(m, "GEN8_ROW_CHICKEN:\t0x%08x\n",
>> +			   I915_READ(GEN8_ROW_CHICKEN));
>> +		seq_printf(m, "HALF_SLICE_CHICKEN3:\t0x%08x\n",
>> +			   I915_READ(HALF_SLICE_CHICKEN3));
>> +		seq_printf(m, "GAMTARBMODE:\t0x%08x\n", I915_READ(GAMTARBMODE));
>> +		seq_printf(m, "_3D_CHICKEN3:\t0x%08x\n",
>> +			   I915_READ(_3D_CHICKEN3));
>> +		seq_printf(m, "COMMON_SLICE_CHICKEN2:\t0x%08x\n",
>> +			   I915_READ(COMMON_SLICE_CHICKEN2));
>> +		seq_printf(m, "GEN7_HALF_SLICE_CHICKEN1:\t0x%08x\n",
>> +			   I915_READ(GEN7_HALF_SLICE_CHICKEN1));
>> +		seq_printf(m, "GEN7_ROW_CHICKEN2:\t0x%08x\n",
>> +			   I915_READ(GEN7_ROW_CHICKEN2));
>> +		seq_printf(m, "GAM_ECOCHK:\t0x%08x\n",
>> +			   I915_READ(GAM_ECOCHK));
>> +		seq_printf(m, "HDC_CHICKEN0:\t0x%08x\n",
>> +			   I915_READ(HDC_CHICKEN0));
>> +		seq_printf(m, "GEN7_FF_THREAD_MODE:\t0x%08x\n",
>> +			   I915_READ(GEN7_FF_THREAD_MODE));
>> +		seq_printf(m, "GEN8_UCGCTL6:\t0x%08x\n",
>> +			   I915_READ(GEN8_UCGCTL6));
>> +		seq_printf(m, "GEN6_RC_SLEEP_PSMI_CONTROL:\t0x%08x\n",
>> +			   I915_READ(GEN6_RC_SLEEP_PSMI_CONTROL));
>> +		seq_printf(m, "CACHE_MODE_1:\t0x%08x\n",
>> +			   I915_READ(CACHE_MODE_1));
>> +	} else
>> +		DRM_DEBUG_DRIVER("Not available for Gen%d\n",
>> +				 INTEL_INFO(dev)->gen);
>> +
>> +	mutex_unlock(&dev->struct_mutex);
>> +	return 0;
>> +}
>> +
>
> This smells like a separate patch. But I'm not sure we want at all since
> intel_reg_read will provide the same information.
>
>>   struct pipe_crc_info {
>>   	const char *name;
>>   	struct drm_device *dev;
>>   	enum pipe pipe;
>>   };
>>
>>   static int i915_pipe_crc_open(struct inode *inode, struct file *filep)
>>   {
>>   	struct pipe_crc_info *info = inode->i_private;
>>   	struct drm_i915_private *dev_priv = info->dev->dev_private;
>> @@ -3904,20 +3949,21 @@ static const struct drm_info_list i915_debugfs_list[] = {
>>   	{"i915_ppgtt_info", i915_ppgtt_info, 0},
>>   	{"i915_llc", i915_llc, 0},
>>   	{"i915_edp_psr_status", i915_edp_psr_status, 0},
>>   	{"i915_sink_crc_eDP1", i915_sink_crc, 0},
>>   	{"i915_energy_uJ", i915_energy_uJ, 0},
>>   	{"i915_pc8_status", i915_pc8_status, 0},
>>   	{"i915_power_domain_info", i915_power_domain_info, 0},
>>   	{"i915_display_info", i915_display_info, 0},
>>   	{"i915_semaphore_status", i915_semaphore_status, 0},
>>   	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
>> +	{"i915_workaround_info", i915_workaround_info, 0},
>>   };
>>   #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
>>
>>   static const struct i915_debugfs_files {
>>   	const char *name;
>>   	const struct file_operations *fops;
>>   } i915_debugfs_files[] = {
>>   	{"i915_wedged", &i915_wedged_fops},
>>   	{"i915_max_freq", &i915_max_freq_fops},
>>   	{"i915_min_freq", &i915_min_freq_fops},
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
>> index 3f88f29..9597f95 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -5393,101 +5393,42 @@ static void gen8_init_clock_gating(struct drm_device *dev)
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>   	enum pipe pipe;
>>
>>   	I915_WRITE(WM3_LP_ILK, 0);
>>   	I915_WRITE(WM2_LP_ILK, 0);
>>   	I915_WRITE(WM1_LP_ILK, 0);
>>
>>   	/* FIXME(BDW): Check all the w/a, some might only apply to
>>   	 * pre-production hw. */
>>
>> -	/* WaDisablePartialInstShootdown:bdw */
>> -	I915_WRITE(GEN8_ROW_CHICKEN,
>> -		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>> -
>> -	/* WaDisableThreadStallDopClockGating:bdw */
>> -	/* FIXME: Unclear whether we really need this on production bdw. */
>> -	I915_WRITE(GEN8_ROW_CHICKEN,
>> -		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>> -
>> -	/*
>> -	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
>> -	 * pre-production hardware
>> -	 */
>> -	I915_WRITE(HALF_SLICE_CHICKEN3,
>> -		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
>> -	I915_WRITE(HALF_SLICE_CHICKEN3,
>> -		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>> -	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
>> -
>> -	I915_WRITE(_3D_CHICKEN3,
>> -		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
>> -
>> -	I915_WRITE(COMMON_SLICE_CHICKEN2,
>> -		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
>> -
>> -	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
>> -		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
>> -
>> -	/* WaDisableDopClockGating:bdw May not be needed for production */
>> -	I915_WRITE(GEN7_ROW_CHICKEN2,
>> -		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>> -
>> -	/* WaSwitchSolVfFArbitrationPriority:bdw */
>> -	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
>>
>>   	/* WaPsrDPAMaskVBlankInSRD:bdw */
>>   	I915_WRITE(CHICKEN_PAR1_1,
>>   		   I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
>>
>>   	/* WaPsrDPRSUnmaskVBlankInSRD:bdw */
>>   	for_each_pipe(pipe) {
>>   		I915_WRITE(CHICKEN_PIPESL_1(pipe),
>>   			   I915_READ(CHICKEN_PIPESL_1(pipe)) |
>>   			   BDW_DPRS_MASK_VBLANK_SRD);
>>   	}
>>
>> -	/* Use Force Non-Coherent whenever executing a 3D context. This is a
>> -	 * workaround for for a possible hang in the unlikely event a TLB
>> -	 * invalidation occurs during a PSD flush.
>> -	 */
>> -	I915_WRITE(HDC_CHICKEN0,
>> -		   I915_READ(HDC_CHICKEN0) |
>> -		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>> -
>> -	/* WaVSRefCountFullforceMissDisable:bdw */
>> -	/* WaDSRefCountFullforceMissDisable:bdw */
>> -	I915_WRITE(GEN7_FF_THREAD_MODE,
>> -		   I915_READ(GEN7_FF_THREAD_MODE) &
>> -		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
>> -
>>   	/*
>>   	 * BSpec recommends 8x4 when MSAA is used,
>>   	 * however in practice 16x4 seems fastest.
>>   	 *
>>   	 * Note that PS/WM thread counts depend on the WIZ hashing
>>   	 * disable bit, which we don't touch here, but it's good
>>   	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>>   	 */
>>   	I915_WRITE(GEN7_GT_MODE,
>>   		   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>
> GT_MODE doesn't get clobbered by a reset? I would have expected it to
> behave the same way as CACHE_MODE_{0,1).
>
>> -
>> -	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
>> -		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
>> -
>> -	/* WaDisableSDEUnitClockGating:bdw */
>> -	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
>> -		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>
> And I would have expected the UCGCTL registers to not be clobbered. IIRC
> they didn't get clobbered on my IVB, but my memory might be wrong here.

I could complete testing because of the problem with CACHE_MODE_1, will 
double check these registers and move/unmove them accordingly.

>
>> -
>> -	/* Wa4x4STCOptimizationDisable:bdw */
>> -	I915_WRITE(CACHE_MODE_1,
>> -		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>>   }
>>
>>   static void haswell_init_clock_gating(struct drm_device *dev)
>>   {
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>
>>   	ilk_init_lp_watermarks(dev);
>>
>>   	/* L3 caching of data atomics doesn't work -- disable it. */
>>   	I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index b3d8f76..6818e34 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -591,20 +591,85 @@ init_pipe_control(struct intel_engine_cs *ring)
>>   	return 0;
>>
>>   err_unpin:
>>   	i915_gem_object_ggtt_unpin(ring->scratch.obj);
>>   err_unref:
>>   	drm_gem_object_unreference(&ring->scratch.obj->base);
>>   err:
>>   	return ret;
>>   }
>>
>> +static void bdw_init_workarounds(struct drm_device *dev)
>> +{
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +	/* WaDisablePartialInstShootdown:bdw */
>> +	I915_WRITE(GEN8_ROW_CHICKEN,
>> +		   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>> +
>> +	/* WaDisableThreadStallDopClockGating:bdw */
>> +	/* FIXME: Unclear whether we really need these on production bdw. */
>> +	I915_WRITE(GEN8_ROW_CHICKEN,
>> +		   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>> +
>> +	/*
>> +	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
>> +	 * pre-production hardware
>> +	 */
>> +	I915_WRITE(HALF_SLICE_CHICKEN3,
>> +		   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS));
>> +	I915_WRITE(HALF_SLICE_CHICKEN3,
>> +		   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>> +	I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE));
>> +
>> +	I915_WRITE(_3D_CHICKEN3,
>> +		   _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)));
>> +
>> +	I915_WRITE(COMMON_SLICE_CHICKEN2,
>> +		   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
>> +
>> +	I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
>> +		   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
>> +
>> +	/* WaDisableDopClockGating:bdw May not be needed for production */
>> +	I915_WRITE(GEN7_ROW_CHICKEN2,
>> +		   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>> +
>> +	/* WaSwitchSolVfFArbitrationPriority:bdw */
>> +	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
>> +
>> +	/* Use Force Non-Coherent whenever executing a 3D context. This is a
>> +	 * workaround for for a possible hang in the unlikely event a TLB
>> +	 * invalidation occurs during a PSD flush.
>> +	 */
>> +	I915_WRITE(HDC_CHICKEN0,
>> +		   I915_READ(HDC_CHICKEN0) |
>
> You might kill this read since it's a masked register. Maybe as a
> separate prep-patch.
>
>> +		   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>> +
>> +	/* WaVSRefCountFullforceMissDisable:bdw */
>> +	/* WaDSRefCountFullforceMissDisable:bdw */
>> +	I915_WRITE(GEN7_FF_THREAD_MODE,
>> +		   I915_READ(GEN7_FF_THREAD_MODE) &
>> +		   ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
>> +
>> +	/* WaDisableSDEUnitClockGating:bdw */
>> +	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
>> +		   GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
>> +
>> +	I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL,
>> +		   _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
>> +
>> +	/* Wa4x4STCOptimizationDisable:bdw */
>> +	I915_WRITE(CACHE_MODE_1,
>> +		   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>
> I think it would be nice to reorder this function a bit to keep similar registers
> close to each other. The current mismash makes it easy to overlook a
> workaround and then we may end up with duplicates (wouldn't be the first
> time that happened).
>
sure, I will take care of this in the next version.

>> +}
>> +
>>   static int init_render_ring(struct intel_engine_cs *ring)
>>   {
>>   	struct drm_device *dev = ring->dev;
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>   	int ret = init_ring_common(ring);
>>   	if (ret)
>>   		return ret;
>>
>>   	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
>>   	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
>> @@ -646,20 +711,23 @@ static int init_render_ring(struct intel_engine_cs *ring)
>>   		I915_WRITE(CACHE_MODE_0,
>>   			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
>>   	}
>>
>>   	if (INTEL_INFO(dev)->gen >= 6)
>>   		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
>>
>>   	if (HAS_L3_DPF(dev))
>>   		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
>>
>> +	if (IS_BROADWELL(dev))
>> +		bdw_init_workarounds(dev);
>> +
>>   	return ret;
>>   }
>>
>>   static void render_ring_cleanup(struct intel_engine_cs *ring)
>>   {
>>   	struct drm_device *dev = ring->dev;
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>>
>>   	if (dev_priv->semaphore_obj) {
>>   		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
>> --
>> 1.9.2
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>




More information about the Intel-gfx mailing list