[Intel-gfx] [PATCH 2/2] drm/i915/guc: Add a selftest for FAST_REQUEST errors

Daniele Ceraolo Spurio daniele.ceraolospurio at intel.com
Thu Nov 9 20:33:05 UTC 2023



On 11/6/2023 3:59 PM, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> There is a mechanism for reporting errors from fire and forget H2G
> messages. This is the only way to find out about almost any error in
> the GuC backend submission path. So it would be useful to know that it
> is working.
>
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_guc.h    |   4 +
>   drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c |   9 ++
>   drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 122 ++++++++++++++++++++++
>   3 files changed, 135 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index 2b6dfe62c8f2a..e22c12ce245ad 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -297,6 +297,10 @@ struct intel_guc {
>   	 * @number_guc_id_stolen: The number of guc_ids that have been stolen
>   	 */
>   	int number_guc_id_stolen;
> +	/**
> +	 * @fast_response_selftest: Backdoor to CT handler for fast response selftest
> +	 */
> +	u32 fast_response_selftest;
>   #endif
>   };
>   
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index 89e314b3756bb..9d958afb78b7f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
>   		found = true;
>   		break;
>   	}
> +
> +#ifdef CONFIG_DRM_I915_SELFTEST
> +	if (!found && ct_to_guc(ct)->fast_response_selftest) {
> +		CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
> +		ct_to_guc(ct)->fast_response_selftest++;
> +		found = 1;

found = true ? it's the same thing, but it's cleaner to assign boolean 
values to bool variables

> +	}
> +#endif
> +
>   	if (!found) {
>   		CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
>   			 len, hxg[0], fence, ct->requests.last_fence);
> diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> index bfb72143566f6..97fbbb396336c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
> @@ -286,11 +286,133 @@ static int intel_guc_steal_guc_ids(void *arg)
>   	return ret;
>   }
>   
> +/*
> + * Send a context schedule H2G message with an invalid context id.
> + * This should generate a GUC_RESULT_INVALID_CONTEXT response.
> + */
> +static int bad_h2g(struct intel_guc *guc)
> +{
> +	u32 action[3], len = 0;

AFAICS This is a 2 DW command, so you can use action[2].

> +
> +	action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
> +	action[len++] = 0x12345678;
> +
> +	return intel_guc_send_nb(guc, action, len, 0);
> +}
> +
> +/*
> + * Set a spinner running to make sure the system is alive and active,
> + * then send a bad but asynchronous H2G command and wait to see if an
> + * error response is returned. If no response is received or if the
> + * spinner dies then the test will fail.
> + */
> +#define FAST_RESPONSE_TIMEOUT_MS	1000
> +static int intel_guc_fast_request(void *arg)
> +{
> +	struct intel_gt *gt = arg;
> +	struct intel_context *ce;
> +	struct igt_spinner spin;
> +	struct i915_request *rq;
> +	intel_wakeref_t wakeref;
> +	struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
> +	ktime_t before, now, delta;
> +	bool spinning = false;
> +	u64 delta_ms;
> +	int ret = 0;
> +
> +	if (!engine)
> +		return 0;
> +
> +	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce)) {
> +		ret = PTR_ERR(ce);
> +		gt_err(gt, "Failed to create spinner request: %pe\n", ce);
> +		goto err_pm;
> +	}
> +
> +	ret = igt_spinner_init(&spin, engine->gt);
> +	if (ret) {
> +		gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
> +		goto err_pm;
> +	}
> +	spinning = true;
> +
> +	rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
> +	intel_context_put(ce);
> +	if (IS_ERR(rq)) {
> +		ret = PTR_ERR(rq);
> +		gt_err(gt, "Failed to create spinner request: %pe\n", rq);
> +		goto err_spin;
> +	}
> +
> +	ret = request_add_spin(rq, &spin);
> +	if (ret) {
> +		gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
> +		goto err_rq;
> +	}
> +
> +	gt->uc.guc.fast_response_selftest = 1;
> +
> +	ret = bad_h2g(&gt->uc.guc);
> +	if (ret) {
> +		gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
> +		goto err_rq;
> +	}
> +
> +	before = ktime_get();
> +	while (gt->uc.guc.fast_response_selftest == 1) {
> +		ret = i915_request_wait(rq, 0, 1);
> +		if (ret != -ETIME) {
> +			gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
> +			goto err_rq;
> +		}
> +		now = ktime_get();
> +		delta = ktime_sub(now, before);
> +		delta_ms = ktime_to_ms(delta);
> +
> +		if (delta_ms > FAST_RESPONSE_TIMEOUT_MS) {
> +			gt_err(gt, "Timed out waiting for fast request error!\n");
> +			ret = -ETIME;
> +			goto err_rq;
> +		}
> +	}


This seems a bit convoluted. Why not just wait for either the request 
completion or the fast_response_selftest value to change?
something like:

ret = wait_for(fast_response_selftest != 1 || i915_request_completed(rq),
                FAST_RESPONSE_TIMEOUT_MS);

if (ret || i915_request_completed(rq))
     // error


Daniele

> +
> +	if (gt->uc.guc.fast_response_selftest != 2) {
> +		gt_err(gt, "Unexpected fast response count: %d\n",
> +		       gt->uc.guc.fast_response_selftest);
> +		goto err_rq;
> +	}
> +
> +	igt_spinner_end(&spin);
> +	spinning = false;
> +
> +	ret = intel_selftest_wait_for_rq(rq);
> +	if (ret) {
> +		gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
> +		goto err_rq;
> +	}
> +
> +err_rq:
> +	i915_request_put(rq);
> +
> +err_spin:
> +	if (spinning)
> +		igt_spinner_end(&spin);
> +	igt_spinner_fini(&spin);
> +
> +err_pm:
> +	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
> +	return ret;
> +}
> +
>   int intel_guc_live_selftests(struct drm_i915_private *i915)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(intel_guc_scrub_ctbs),
>   		SUBTEST(intel_guc_steal_guc_ids),
> +		SUBTEST(intel_guc_fast_request),
>   	};
>   	struct intel_gt *gt = to_gt(i915);
>   



More information about the Intel-gfx mailing list