[Intel-gfx] [PATCH 21/31] drm/i915/slpc: Send RESET event to enable SLPC during Load/TDR

Michal Wajdeczko michal.wajdeczko at intel.com
Thu Sep 21 14:06:20 UTC 2017


On Tue, 19 Sep 2017 19:41:57 +0200, Sagar Arun Kamble  
<sagar.a.kamble at intel.com> wrote:

> Send host2guc SLPC reset event to GuC post GuC load.
> Post this, i915 can ascertain if SLPC has started running successfully
> through shared data. This check is done during intel_init_gt_powersave.
> This allows to get initial configuration setup by SLPC and if needed
> move to Host RPS if SLPC runs into issues.
> On TDR/Engine reset i915 should send extra flag
> SLPC_RESET_FLAG_TDR_OCCURREDto clear SLPC state as appropriate.
>
> v1: Extract host2guc_slpc to handle slpc status code
>     coding style changes (Paulo)
>     Removed WARN_ON for checking msb of gtt address of
>     shared gem obj. (ChrisW)
>     host2guc_action to i915_guc_action change.(Sagar)
>     Updating SLPC enabled status. (Sagar)
>
> v2: Commit message update. (David)
>
> v3: Rebase.
>
> v4: Added DRM_INFO message when SLPC is enabled.
>
> v5: Updated patch as host2guc_slpc is moved to earlier patch.
>     SLPC activation status message put after checking the
>     state from shared data during intel_init_gt_powersave.
>
> v6: Added definition of host2guc_slpc and clflush the shared data only
>     for required size. Setting state to NOT_RUNNING before sending RESET
>     event. Output data for SLPC actions is to be retrieved during
>     intel_guc_send with lock protection so created wrapper
>     __intel_guc_send that outputs GuC output data if needed. Clearing
>     pm_rps_events on confirming SLPC RUNNING status so that even if
>     host touches any of the PM registers by mistake it should not have
>     any effect. (Sagar)
>
> v7: Added save/restore_default_rps as Uncore sanitize will clear the
>     RP_CONTROL setup by BIOS. s/i915_ggtt_offset/guc_ggtt_offset.
>
> v8: Added support for handling TDR based SLPC reset. Added functions
>     host2guc_slpc_tdr_reset, intel_slpc_reset_prepare and
>     intel_slpc_tdr_reset to handle TDR based SLPC reset.
>
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Signed-off-by: Tom O'Rourke <Tom.O'Rourke at intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c   |   2 +
>  drivers/gpu/drm/i915/i915_irq.c   |   7 +-
>  drivers/gpu/drm/i915/intel_pm.c   |  10 +++
>  drivers/gpu/drm/i915/intel_slpc.c | 170  
> ++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_slpc.h |   9 ++
>  drivers/gpu/drm/i915/intel_uc.c   |   1 +
>  6 files changed, 198 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c  
> b/drivers/gpu/drm/i915/i915_drv.c
> index f13a3de..932f9ef 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1074,6 +1074,8 @@ static int i915_driver_init_hw(struct  
> drm_i915_private *dev_priv)
> 	intel_sanitize_options(dev_priv);
> +	intel_slpc_save_default_rps(&dev_priv->guc.slpc);
> +
>  	ret = i915_ggtt_probe_hw(dev_priv);
>  	if (ret)
>  		return ret;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c  
> b/drivers/gpu/drm/i915/i915_irq.c
> index 4a1554c..2d5ad13 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2838,8 +2838,13 @@ void i915_handle_error(struct drm_i915_private  
> *dev_priv,
>  		}
>  	}
> -	if (!engine_mask)
> +	if (!engine_mask) {
> +		if (intel_slpc_active(&dev_priv->guc.slpc)) {
> +			intel_slpc_reset_prepare(&dev_priv->guc.slpc);
> +			intel_slpc_tdr_reset(&dev_priv->guc.slpc);
> +		}

Can you just jump to single slpc function that will hide slpc internals ?

>  		goto out;
> +	}
> 	/* Full reset needs the mutex, stop any other user trying to do so. */
>  	if (test_and_set_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags)) {
> diff --git a/drivers/gpu/drm/i915/intel_pm.c  
> b/drivers/gpu/drm/i915/intel_pm.c
> index 6b2b7f8..c2065f2 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7918,6 +7918,16 @@ void intel_init_gt_powersave(struct  
> drm_i915_private *dev_priv)
>  		intel_runtime_pm_get(dev_priv);
>  	}
> +	if (intel_slpc_enabled()) {
> +		dev_priv->guc.slpc.active =
> +			intel_slpc_get_status(&dev_priv->guc.slpc);
> +		if (!intel_slpc_active(&dev_priv->guc.slpc)) {
> +			i915.enable_slpc = 0;
> +			intel_sanitize_gt_powersave(dev_priv);
> +		} else
> +			dev_priv->pm_rps_events = 0;
> +	}
> +

Hmm, on one hand you're trying to use friendly wrappers like
enabled() active() but at the same time you're modifying data
which these helpers were trying to hide ...

>  	mutex_lock(&dev_priv->drm.struct_mutex);
>  	mutex_lock(&dev_priv->pm.pcu_lock);
> diff --git a/drivers/gpu/drm/i915/intel_slpc.c  
> b/drivers/gpu/drm/i915/intel_slpc.c
> index f47d81e..57e69d4 100644
> --- a/drivers/gpu/drm/i915/intel_slpc.c
> +++ b/drivers/gpu/drm/i915/intel_slpc.c
> @@ -390,6 +390,140 @@ static void slpc_shared_data_init(struct  
> intel_slpc *slpc)
>  	kunmap_atomic(data);
>  }
> +static void host2guc_slpc_reset(struct intel_slpc *slpc)
> +{
> +	struct slpc_event_input data = {0};
> +	u32 shared_data_gtt_offset = guc_ggtt_offset(slpc->vma);
> +
> +	data.header.value = SLPC_EVENT(SLPC_EVENT_RESET, 2);
> +	data.args[0] = shared_data_gtt_offset;
> +	data.args[1] = 0;
> +
> +	host2guc_slpc(slpc, &data, 4);
> +}
> +
> +static void host2guc_slpc_tdr_reset(struct intel_slpc *slpc)
> +{
> +	struct slpc_event_input data = {0};
> +	u32 shared_data_gtt_offset = guc_ggtt_offset(slpc->vma);
> +
> +	data.header.value = SLPC_EVENT(SLPC_EVENT_RESET, 3);
> +	data.args[0] = shared_data_gtt_offset;
> +	data.args[1] = 0;
> +	data.args[2] = SLPC_RESET_FLAG_TDR_OCCURRED;
> +
> +	host2guc_slpc(slpc, &data, 5);
> +}
> +
> +static void host2guc_slpc_query_task_state(struct intel_slpc *slpc)
> +{
> +	struct slpc_event_input data = {0};
> +	u32 shared_data_gtt_offset = guc_ggtt_offset(slpc->vma);
> +
> +	data.header.value = SLPC_EVENT(SLPC_EVENT_QUERY_TASK_STATE, 2);
> +	data.args[0] = shared_data_gtt_offset;
> +	data.args[1] = 0;
> +
> +	host2guc_slpc(slpc, &data, 4);
> +}
> +
> +void intel_slpc_query_task_state(struct intel_slpc *slpc)
> +{
> +	if (slpc->active)
> +		host2guc_slpc_query_task_state(slpc);
> +}
> +
> +/*
> + * This function will reads the state updates from GuC SLPC into shared  
> data
> + * by invoking H2G action. Returns current state of GuC SLPC.
> + */
> +void intel_slpc_read_shared_data(struct intel_slpc *slpc,
> +				 struct slpc_shared_data *data)
> +{
> +	struct page *page;
> +	void *pv = NULL;
> +
> +	intel_slpc_query_task_state(slpc);
> +
> +	page = i915_vma_first_page(slpc->vma);
> +	pv = kmap_atomic(page);
> +
> +	drm_clflush_virt_range(pv, sizeof(struct slpc_shared_data));
> +	memcpy(data, pv, sizeof(struct slpc_shared_data));
> +
> +	kunmap_atomic(pv);
> +}
> +
> +const char *intel_slpc_get_state_str(enum slpc_global_state state)
> +{
> +	if (state == SLPC_GLOBAL_STATE_NOT_RUNNING)
> +		return "not running";
> +	else if (state == SLPC_GLOBAL_STATE_INITIALIZING)
> +		return "initializing";
> +	else if (state == SLPC_GLOBAL_STATE_RESETTING)
> +		return "resetting";
> +	else if (state == SLPC_GLOBAL_STATE_RUNNING)
> +		return "running";
> +	else if (state == SLPC_GLOBAL_STATE_SHUTTING_DOWN)
> +		return "shutting down";
> +	else if (state == SLPC_GLOBAL_STATE_ERROR)
> +		return "error";
> +	else
> +		return "unknown";

s/if..else/switch..case

> +}
> +
> +bool intel_slpc_get_status(struct intel_slpc *slpc)
> +{
> +	struct slpc_shared_data data;
> +	bool ret = false;
> +
> +	intel_slpc_read_shared_data(slpc, &data);
> +	DRM_INFO("SLPC state: %s\n",
> +		 intel_slpc_get_state_str(data.global_state));
> +
> +	switch (data.global_state) {
> +	case SLPC_GLOBAL_STATE_RUNNING:
> +		/* Capture required state from SLPC here */
> +		ret = true;
> +		break;
> +	case SLPC_GLOBAL_STATE_ERROR:
> +		DRM_ERROR("SLPC in error state.\n");
> +		break;
> +	case SLPC_GLOBAL_STATE_RESETTING:
> +		/*
> +		 * SLPC enabling in GuC should be completing fast.
> +		 * If SLPC is taking time to initialize (unlikely as we are
> +		 * sending reset event during GuC load itself).
> +		 * TODO: Need to wait till state changes to RUNNING.
> +		 */
> +		ret = true;
> +		DRM_ERROR("SLPC not running yet.!!!");
> +		break;
> +	default:
> +		break;
> +	}
> +	return ret;
> +}

Hmm, this function is trying to do much more than simple 'get' status.
Is this necessary to print that many messages here ?

> +
> +/*
> + * Uncore sanitize clears RPS state in Host GTPM flows set by BIOS,  
> Save the
> + * initial BIOS programmed RPS state that is needed by SLPC and not set  
> by SLPC.
> + * Set this state while enabling SLPC.
> + */
> +void intel_slpc_save_default_rps(struct intel_slpc *slpc)
> +{
> +	struct drm_i915_private *dev_priv = slpc_to_i915(slpc);
> +
> +	slpc->rp_control = I915_READ(GEN6_RP_CONTROL);
> +}
> +
> +static void intel_slpc_restore_default_rps(struct intel_slpc *slpc)
> +{
> +	struct drm_i915_private *dev_priv = slpc_to_i915(slpc);
> +
> +	I915_WRITE(GEN6_RP_CONTROL, slpc->rp_control);
> +}
> +
>  void intel_slpc_init(struct intel_slpc *slpc)
>  {
>  	struct intel_guc *guc = slpc_to_guc(slpc);
> @@ -426,6 +560,42 @@ void intel_slpc_cleanup(struct intel_slpc *slpc)
> void intel_slpc_enable(struct intel_slpc *slpc)
>  {
> +	struct page *page;
> +	struct slpc_shared_data *data;
> +
> +	intel_slpc_restore_default_rps(slpc);
> +
> +	page = i915_vma_first_page(slpc->vma);
> +	data = kmap_atomic(page);
> +	data->global_state = SLPC_GLOBAL_STATE_NOT_RUNNING;
> +	kunmap_atomic(data);
> +
> +	if (slpc->tdr_reset) {
> +		host2guc_slpc_tdr_reset(slpc);
> +		slpc->tdr_reset = false;
> +	} else {
> +		host2guc_slpc_reset(slpc);
> +	}
> +
> +	slpc->active = true;
> +}
> +
> +void intel_slpc_reset_prepare(struct intel_slpc *slpc)
> +{
> +	if (intel_slpc_active(slpc)) {
> +		intel_slpc_disable(slpc);
> +		slpc->tdr_reset = true;
> +	}
> +}
> +
> +void intel_slpc_tdr_reset(struct intel_slpc *slpc)
> +{
> +	intel_slpc_restore_default_rps(slpc);
> +	slpc_shared_data_init(slpc);
> +
> +	host2guc_slpc_tdr_reset(slpc);
> +	slpc->active = true;
> +	slpc->tdr_reset = false;
>  }
> void intel_slpc_disable(struct intel_slpc *slpc)
> diff --git a/drivers/gpu/drm/i915/intel_slpc.h  
> b/drivers/gpu/drm/i915/intel_slpc.h
> index 0ff17f0..20c342b 100644
> --- a/drivers/gpu/drm/i915/intel_slpc.h
> +++ b/drivers/gpu/drm/i915/intel_slpc.h
> @@ -26,7 +26,9 @@
> struct intel_slpc {
>  	bool active;
> +	bool tdr_reset;
>  	struct i915_vma *vma;
> +	u32 rp_control;
>  };
> static inline int intel_slpc_enabled(void)
> @@ -255,9 +257,16 @@ int intel_slpc_task_control(struct intel_slpc  
> *slpc, u64 val,
>  			    u32 enable_id, u32 disable_id);
>  int intel_slpc_task_status(struct intel_slpc *slpc, u64 *val,
>  			   u32 enable_id, u32 disable_id);
> +void intel_slpc_read_shared_data(struct intel_slpc *slpc,
> +				 struct slpc_shared_data *data);
> +const char *intel_slpc_get_state_str(enum slpc_global_state state);
> +bool intel_slpc_get_status(struct intel_slpc *slpc);
> +void intel_slpc_save_default_rps(struct intel_slpc *slpc);
>  void intel_slpc_init(struct intel_slpc *slpc);
>  void intel_slpc_cleanup(struct intel_slpc *slpc);
>  void intel_slpc_enable(struct intel_slpc *slpc);
>  void intel_slpc_disable(struct intel_slpc *slpc);
> +void intel_slpc_reset_prepare(struct intel_slpc *slpc);
> +void intel_slpc_tdr_reset(struct intel_slpc *slpc);
> #endif
> diff --git a/drivers/gpu/drm/i915/intel_uc.c  
> b/drivers/gpu/drm/i915/intel_uc.c
> index 990d84a..d8582b8 100644
> --- a/drivers/gpu/drm/i915/intel_uc.c
> +++ b/drivers/gpu/drm/i915/intel_uc.c
> @@ -502,5 +502,6 @@ int intel_uc_resume(struct drm_i915_private  
> *dev_priv)
> int intel_uc_reset_prepare(struct drm_i915_private *dev_priv)
>  {
> +	intel_slpc_reset_prepare(&dev_priv->guc.slpc);
>  	return intel_uc_suspend(dev_priv);
>  }


More information about the Intel-gfx mailing list