[Intel-gfx] [PATCH 06/39] drm/i915: Always try to reset the GPU on takeover

Mika Kuoppala mika.kuoppala at linux.intel.com
Wed Jan 2 13:19:52 UTC 2019


Chris Wilson <chris at chris-wilson.co.uk> writes:

> When we first introduced the reset to sanitize the GPU on taking over
> from the BIOS and before returning control to third parties (the BIOS!),
> we restricted it to only systems utilizing HW contexts as we were
> uncertain of how stable our reset mechanism truly was. We now have
> reasonable coverage across all machines that expose a GPU reset method,
> and so we should be safe to sanitize the GPU state everywhere.
>
> v2: We _have_ to skip the reset if it would clobber the display.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.c           |  2 +-
>  drivers/gpu/drm/i915/i915_gem.c           | 11 ++---------
>  drivers/gpu/drm/i915/i915_pci.c           |  5 +++++
>  drivers/gpu/drm/i915/intel_device_info.h  |  1 +
>  drivers/gpu/drm/i915/intel_display.c      |  4 ++--
>  drivers/gpu/drm/i915/intel_engine_cs.c    | 14 +++++++++++++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h   |  2 +-
>  drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +-
>  8 files changed, 26 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index dcb935338c63..a96169acdb07 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -2174,7 +2174,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
>  
>  	intel_power_domains_resume(dev_priv);
>  
> -	intel_engines_sanitize(dev_priv);
> +	intel_engines_sanitize(dev_priv, true);
>  
>  	enable_rpm_wakeref_asserts(dev_priv);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e46a25747ab7..0ebde13620cb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3432,8 +3432,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	i915_retire_requests(i915);
>  	GEM_BUG_ON(i915->gt.active_requests);
>  
> -	if (!intel_gpu_reset(i915, ALL_ENGINES))
> -		intel_engines_sanitize(i915);
> +	intel_engines_sanitize(i915, false);
>  
>  	/*
>  	 * Undo nop_submit_request. We prevent all new i915 requests from
> @@ -5037,8 +5036,6 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
>  
>  void i915_gem_sanitize(struct drm_i915_private *i915)
>  {
> -	int err;
> -
>  	GEM_TRACE("\n");
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> @@ -5063,11 +5060,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  	 * it may impact the display and we are uncertain about the stability
>  	 * of the reset, so this could be applied to even earlier gen.
>  	 */
> -	err = -ENODEV;
> -	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
> -		err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
> -	if (!err)
> -		intel_engines_sanitize(i915);
> +	intel_engines_sanitize(i915, false);
>  
>  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
>  	intel_runtime_pm_put(i915);
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 0d342f2b44a5..dd4aff2b256e 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -82,6 +82,7 @@
>  	.display.has_overlay = 1, \
>  	.display.overlay_needs_physical = 1, \
>  	.display.has_gmch_display = 1, \
> +	.gpu_reset_clobbers_display = true, \
>  	.hws_needs_physical = 1, \
>  	.unfenced_needs_alignment = 1, \
>  	.ring_mask = RENDER_RING, \
> @@ -122,6 +123,7 @@ static const struct intel_device_info intel_i865g_info = {
>  	GEN(3), \
>  	.num_pipes = 2, \
>  	.display.has_gmch_display = 1, \
> +	.gpu_reset_clobbers_display = true, \
>  	.ring_mask = RENDER_RING, \
>  	.has_snoop = true, \
>  	.has_coherent_ggtt = true, \
> @@ -198,6 +200,7 @@ static const struct intel_device_info intel_pineview_info = {
>  	.num_pipes = 2, \
>  	.display.has_hotplug = 1, \
>  	.display.has_gmch_display = 1, \
> +	.gpu_reset_clobbers_display = true, \
>  	.ring_mask = RENDER_RING, \
>  	.has_snoop = true, \
>  	.has_coherent_ggtt = true, \
> @@ -228,6 +231,7 @@ static const struct intel_device_info intel_g45_info = {
>  	GEN4_FEATURES,
>  	PLATFORM(INTEL_G45),
>  	.ring_mask = RENDER_RING | BSD_RING,
> +	.gpu_reset_clobbers_display = false,
>  };
>  
>  static const struct intel_device_info intel_gm45_info = {
> @@ -237,6 +241,7 @@ static const struct intel_device_info intel_gm45_info = {
>  	.display.has_fbc = 1,
>  	.display.supports_tv = 1,
>  	.ring_mask = RENDER_RING | BSD_RING,
> +	.gpu_reset_clobbers_display = false,

Why not explicitly set this on rest of gen >= 5?
-Mika


>  };
>  
>  #define GEN5_FEATURES \
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index dd34f974a857..8fd683497956 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -89,6 +89,7 @@ enum intel_ppgtt {
>  	func(is_alpha_support); \
>  	/* Keep has_* in alphabetical order */ \
>  	func(has_64bit_reloc); \
> +	func(gpu_reset_clobbers_display); \
>  	func(has_reset_engine); \
>  	func(has_fpga_dbg); \
>  	func(has_guc); \
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index ab3b4d02d499..1679c04280eb 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -3746,8 +3746,8 @@ __intel_display_resume(struct drm_device *dev,
>  
>  static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
>  {
> -	return intel_has_gpu_reset(dev_priv) &&
> -		INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
> +	return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display &&
> +		intel_has_gpu_reset(dev_priv));
>  }
>  
>  void intel_prepare_reset(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 189a934a63e9..eefa55000818 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1043,22 +1043,34 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
>  		engine->set_default_submission(engine);
>  }
>  
> +static bool reset_engines(struct drm_i915_private *i915)
> +{
> +	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
> +		return false;
> +
> +	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
> +}
> +
>  /**
>   * intel_engines_sanitize: called after the GPU has lost power
>   * @i915: the i915 device
> + * @force: ignore a failed reset and sanitize engine state anyway
>   *
>   * Anytime we reset the GPU, either with an explicit GPU reset or through a
>   * PCI power cycle, the GPU loses state and we must reset our state tracking
>   * to match. Note that calling intel_engines_sanitize() if the GPU has not
>   * been reset results in much confusion!
>   */
> -void intel_engines_sanitize(struct drm_i915_private *i915)
> +void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
>  {
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
>  
>  	GEM_TRACE("\n");
>  
> +	if (!reset_engines(i915) && !force)
> +		return;
> +
>  	for_each_engine(engine, i915, id) {
>  		if (engine->reset.reset)
>  			engine->reset.reset(engine, NULL);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 91ef00d34e91..a62f09ffcfc9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -1019,7 +1019,7 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
>  	return cs;
>  }
>  
> -void intel_engines_sanitize(struct drm_i915_private *i915);
> +void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
>  
>  bool intel_engine_is_idle(struct intel_engine_cs *engine);
>  bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
> index d0aa19d17653..bdcc53e15e75 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
> @@ -121,7 +121,7 @@ static void pm_resume(struct drm_i915_private *i915)
>  	 */
>  	intel_runtime_pm_get(i915);
>  
> -	intel_engines_sanitize(i915);
> +	intel_engines_sanitize(i915, false);
>  	i915_gem_sanitize(i915);
>  	i915_gem_resume(i915);
>  
> -- 
> 2.20.1


More information about the Intel-gfx mailing list