[Intel-gfx] [PATCH 7/8] drm/i915/guc: Update GuC messages in intel_guc_submission.c

Mon Jan 23 23:11:39 UTC 2023

On 1/20/2023 08:40, Michal Wajdeczko wrote:
> Use new macros to have common prefix that also include GT#.
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: John Harrison <John.C.Harrison at Intel.com>
> ---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 60 ++++++++-----------
>   1 file changed, 26 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index b436dd7f12e4..bb98206304ee 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -27,6 +27,7 @@
>   
>   #include "intel_guc_ads.h"
>   #include "intel_guc_capture.h"
> +#include "intel_guc_print.h"
>   #include "intel_guc_submission.h"
>   
>   #include "i915_drv.h"
> @@ -1443,8 +1444,7 @@ static void guc_init_engine_stats(struct intel_guc *guc)
>   		int ret = guc_action_enable_usage_stats(guc);
>   
>   		if (ret)
> -			drm_err(&gt->i915->drm,
> -				"Failed to enable usage stats: %d!\n", ret);
> +			guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
>   	}
>   }
>   
> @@ -3585,8 +3585,7 @@ static int guc_request_alloc(struct i915_request *rq)
>   		intel_context_sched_disable_unpin(ce);
>   	else if (intel_context_is_closed(ce))
>   		if (wait_for(context_close_done(ce), 1500))
> -			drm_warn(&guc_to_gt(guc)->i915->drm,
> -				 "timed out waiting on context sched close before realloc\n");
> +			guc_warn(guc, "timed out waiting on context sched close before realloc\n");
>   	/*
>   	 * Call pin_guc_id here rather than in the pinning step as with
>   	 * dma_resv, contexts can be repeatedly pinned / unpinned trashing the
> @@ -4349,11 +4348,14 @@ static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
>   
>   	ret = intel_guc_send(guc, (u32 *)&policy->h2g,
>   			     __guc_scheduling_policy_action_size(policy));
> -	if (ret < 0)
> +	if (ret < 0) {
> +		guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n",
> +				ERR_PTR(ret));
>   		return ret;
> +	}
>   
>   	if (ret != policy->count) {
> -		drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
> +		guc_warn(guc, "global scheduler policy processed %d of %d KLVs!",
>   			 ret, policy->count);
>   		if (ret > policy->count)
>   			return -EPROTO;
> @@ -4367,7 +4369,7 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc)
>   	struct scheduling_policy policy;
>   	struct intel_gt *gt = guc_to_gt(guc);
>   	intel_wakeref_t wakeref;
> -	int ret = 0;
> +	int ret;
>   
>   	if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
>   		return 0;
> @@ -4385,10 +4387,6 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc)
>   						yield, ARRAY_SIZE(yield));
>   
>   		ret = __guc_action_set_scheduling_policies(guc, &policy);
> -		if (ret)
> -			i915_probe_error(gt->i915,
> -					 "Failed to configure global scheduling policies: %pe!\n",
> -					 ERR_PTR(ret));
>   	}
>   
>   	return ret;
> @@ -4487,21 +4485,18 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
>   	struct intel_context *ce;
>   
>   	if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
> -		drm_err(&guc_to_gt(guc)->i915->drm,
> -			"Invalid ctx_id %u\n", ctx_id);
> +		guc_err(guc, "Invalid ctx_id %u\n", ctx_id);
>   		return NULL;
>   	}
>   
>   	ce = __get_context(guc, ctx_id);
>   	if (unlikely(!ce)) {
> -		drm_err(&guc_to_gt(guc)->i915->drm,
> -			"Context is NULL, ctx_id %u\n", ctx_id);
> +		guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id);
>   		return NULL;
>   	}
>   
>   	if (unlikely(intel_context_is_child(ce))) {
> -		drm_err(&guc_to_gt(guc)->i915->drm,
> -			"Context is child, ctx_id %u\n", ctx_id);
> +		guc_err(guc, "Context is child, ctx_id %u\n", ctx_id);
>   		return NULL;
>   	}
>   
> @@ -4516,7 +4511,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
>   	u32 ctx_id;
>   
>   	if (unlikely(len < 1)) {
> -		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
> +		guc_err(guc, "Invalid length %u\n", len);
>   		return -EPROTO;
>   	}
>   	ctx_id = msg[0];
> @@ -4568,7 +4563,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
>   	u32 ctx_id;
>   
>   	if (unlikely(len < 2)) {
> -		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
> +		guc_err(guc, "Invalid length %u\n", len);
>   		return -EPROTO;
>   	}
>   	ctx_id = msg[0];
> @@ -4580,8 +4575,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
>   	if (unlikely(context_destroyed(ce) ||
>   		     (!context_pending_enable(ce) &&
>   		     !context_pending_disable(ce)))) {
> -		drm_err(&guc_to_gt(guc)->i915->drm,
> -			"Bad context sched_state 0x%x, ctx_id %u\n",
> +		guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n",
>   			ce->guc_state.sched_state, ctx_id);
>   		return -EPROTO;
>   	}
> @@ -4669,7 +4663,7 @@ static void guc_handle_context_reset(struct intel_guc *guc,
>   		capture_error_state(guc, ce);
>   		guc_context_replay(ce);
>   	} else {
> -		drm_info(&guc_to_gt(guc)->i915->drm,
> +		guc_info(guc,
>   			 "Ignoring context reset notification of exiting context 0x%04X on %s",
Could unwrap this line now.

>   			 ce->guc_id.id, ce->engine->name);
>   	}
> @@ -4683,7 +4677,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
>   	int ctx_id;
>   
>   	if (unlikely(len != 1)) {
> -		drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
> +		guc_err(guc, "Invalid length %u", len);
>   		return -EPROTO;
>   	}
>   
> @@ -4716,13 +4710,13 @@ int intel_guc_error_capture_process_msg(struct intel_guc *guc,
>   	u32 status;
>   
>   	if (unlikely(len != 1)) {
> -		drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
> +		guc_dbg(guc, "Invalid length %u", len);
>   		return -EPROTO;
>   	}
>   
>   	status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
>   	if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
> -		drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
> +		guc_warn(guc, "G2H-Error capture no space");
Maybe improve the English on this one? "Received 'no space for error 
capture' notification"? Or maybe just "No space for error capture"? I 
don't think you can get a similar error from anywhere on the i915 side.

>   
>   	intel_guc_capture_process(guc);
>   
> @@ -4765,13 +4759,12 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
>   					 const u32 *msg, u32 len)
>   {
>   	struct intel_engine_cs *engine;
> -	struct intel_gt *gt = guc_to_gt(guc);
>   	u8 guc_class, instance;
>   	u32 reason;
>   	unsigned long flags;
>   
>   	if (unlikely(len != 3)) {
> -		drm_err(&gt->i915->drm, "Invalid length %u", len);
> +		guc_err(guc, "Invalid length %u", len);
>   		return -EPROTO;
>   	}
>   
> @@ -4781,8 +4774,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
>   
>   	engine = intel_guc_lookup_engine(guc, guc_class, instance);
>   	if (unlikely(!engine)) {
> -		drm_err(&gt->i915->drm,
> -			"Invalid engine %d:%d", guc_class, instance);
> +		guc_err(guc, "Invalid engine %d:%d", guc_class, instance);
>   		return -EPROTO;
>   	}
>   
> @@ -4790,7 +4782,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
>   	 * This is an unexpected failure of a hardware feature. So, log a real
>   	 * error message not just the informational that comes with the reset.
>   	 */
> -	drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
> +	guc_err(guc, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
>   		guc_class, instance, engine->name, reason);
Again, redundant 'GuC' string. Also, maybe drop the 'request' given that 
this is a GuC generated reset not a reset request from i915. This 
message has resulted in confused bug reports in the past. So, maybe go 
with just "Engine reset failed on ...".

John.

>   
>   	spin_lock_irqsave(&guc->submission_state.lock, flags);
> @@ -5342,8 +5334,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
>   
>   		GEM_BUG_ON(!is_power_of_2(sibling->mask));
>   		if (sibling->mask & ve->base.mask) {
> -			DRM_DEBUG("duplicate %s entry in load balancer\n",
> -				  sibling->name);
> +			guc_dbg(guc, "duplicate %s entry in load balancer\n",
> +				sibling->name);
>   			err = -EINVAL;
>   			goto err_put;
>   		}
> @@ -5352,8 +5344,8 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
>   		ve->base.logical_mask |= sibling->logical_mask;
>   
>   		if (n != 0 && ve->base.class != sibling->class) {
> -			DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
> -				  sibling->class, ve->base.class);
> +			guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n",
> +				sibling->class, ve->base.class);
>   			err = -EINVAL;
>   			goto err_put;
>   		} else if (n == 0) {