[PATCH v8 04/26] drm/xe: Error handling in xe_force_wake_get()

Michal Wajdeczko michal.wajdeczko at intel.com
Tue Oct 8 18:05:12 UTC 2024



On 08.10.2024 09:10, Himal Prasad Ghimiray wrote:
> If an acknowledgment timeout occurs for a forcewake domain awake
> request, do not increment the reference count for the domain. This
> ensures that subsequent _get calls do not incorrectly assume the domain
> is awake. The return value is a mask of domains that got refcounted,
> and these domains need to be provided for subsequent xe_force_wake_put
> call.
> 
> While at it, add simple kernel-doc for xe_force_wake_get()
> 
> v3
> - Use explicit type for mask (Michal/Badal)
> - Improve kernel-doc (Michal)
> - Use unsigned int instead of abusing enum (Michal)
> 
> v5
> - Use unsigned int for return (MattB/Badal/Rodrigo)
> - use xe_gt_WARN for domain awake ack failure (Badal/Rodrigo)
> 
> v6
> - Change XE_FORCEWAKE_ALL to single bit, this helps accommodate
> actually refcounted domains in return. (Michal)
> - Modify commit message and warn message (Badal)
> - Remove unnecessary information in kernel-doc (Michal)
> 
> v7
> - Add assert condition for valid input domains (Badal)
> 
> Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Badal Nilawar <badal.nilawar at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Cc: Nirmoy Das <nirmoy.das at intel.com>
> Reviewed-by: Badal Nilawar <badal.nilawar at intel.com>(#rev5)
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_force_wake.c       | 53 +++++++++++++++++++-----
>  drivers/gpu/drm/xe/xe_force_wake.h       |  4 +-
>  drivers/gpu/drm/xe/xe_force_wake_types.h |  2 +-
>  3 files changed, 46 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
> index ac0419da7173..bfba276c48ac 100644
> --- a/drivers/gpu/drm/xe/xe_force_wake.c
> +++ b/drivers/gpu/drm/xe/xe_force_wake.c
> @@ -154,29 +154,62 @@ static int domain_sleep_wait(struct xe_gt *gt,
>  					 (ffs(tmp__) - 1))) && \
>  					 domain__->reg_ctl.addr)
>  
> -int xe_force_wake_get(struct xe_force_wake *fw,
> -		      enum xe_force_wake_domains domains)
> +/**
> + * xe_force_wake_get() : Increase the domain refcount
> + * @fw: struct xe_force_wake
> + * @domains: forcewake domains to get refcount on
> + *
> + * This function takes references for the input @domains and wakes them if

hmm, 'taking a reference' is implementation detail compared to the 'wake
up domain from sleep' so I would swap those statements

> + * they are asleep.If requested domain is XE_FORCEWAKE_ALL then only
                     ^
missing space after dot

> + * applicable/initialized domains will be considered for refcount and it is
> + * a caller responsibilty to check returned ref if it includes any specific

typo

> + * domain by using xe_force_wake_ref_has_domain() function. caller must call

s/caller/Caller

> + * xe_force_wake_put() function to decrease incremented refcounts.
> + *
> + * Return: opaque reference to woken domains or zero if none of requested
> + * domains were awake.
> + */
> +unsigned int xe_force_wake_get(struct xe_force_wake *fw,
> +			       enum xe_force_wake_domains domains)
>  {
>  	struct xe_gt *gt = fw->gt;
>  	struct xe_force_wake_domain *domain;
> -	enum xe_force_wake_domains tmp, woken = 0;
> +	unsigned int ref_incr = 0, awake_rqst = 0, awake_failed = 0;
> +	unsigned int tmp, ref_rqst;
>  	unsigned long flags;
> -	int ret = 0;
> +
> +	xe_gt_assert(gt, is_power_of_2(domains) && domains <= XE_FORCEWAKE_ALL);

better to split into two asserts to better see which one fails

> +
> +	if (domains != XE_FORCEWAKE_ALL) {
> +		xe_gt_assert(gt, fw->initialized_domains & domains);

can we keep all asserts together at the top of the function as:

xe_gt_assert(gt, domains == XE_FORCEWAKE_ALL ||
	     fw->initialized_domains & domains);

> +		ref_rqst = domains;
> +	} else {
> +		ref_rqst = fw->initialized_domains;
> +	}
>  
>  	spin_lock_irqsave(&fw->lock, flags);
> -	for_each_fw_domain_masked(domain, domains, fw, tmp) {
> +	for_each_fw_domain_masked(domain, ref_rqst, fw, tmp) {
>  		if (!domain->ref++) {
> -			woken |= BIT(domain->id);
> +			awake_rqst |= BIT(domain->id);
>  			domain_wake(gt, domain);
>  		}
> +		ref_incr |= BIT(domain->id);
>  	}
> -	for_each_fw_domain_masked(domain, woken, fw, tmp) {
> -		ret |= domain_wake_wait(gt, domain);
> +	for_each_fw_domain_masked(domain, awake_rqst, fw, tmp) {
> +		if (domain_wake_wait(gt, domain) == 0) {
> +			fw->awake_domains |= BIT(domain->id);
> +		} else {
> +			awake_failed |= BIT(domain->id);
> +			--domain->ref;
> +		}
>  	}
> -	fw->awake_domains |= woken;
> +	ref_incr &= ~awake_failed;
>  	spin_unlock_irqrestore(&fw->lock, flags);
>  
> -	return ret;
> +	xe_gt_WARN(gt, awake_failed, "Forcewake domain%s %#x failed to acknowledge awake request\n",
> +		   str_plural(hweight_long(awake_failed)), awake_failed);
> +
> +	return (ref_incr == fw->initialized_domains) ? ref_incr | XE_FORCEWAKE_ALL : ref_incr;

maybe simpler:

	if (ref_incr == fw->initialized_domains)
		ref_incr |= XE_FORCEWAKE_ALL;

	return ref_incr;

>  }
>  
>  int xe_force_wake_put(struct xe_force_wake *fw,
> diff --git a/drivers/gpu/drm/xe/xe_force_wake.h b/drivers/gpu/drm/xe/xe_force_wake.h
> index 1608a55edc84..75fa1a19797c 100644
> --- a/drivers/gpu/drm/xe/xe_force_wake.h
> +++ b/drivers/gpu/drm/xe/xe_force_wake.h
> @@ -15,8 +15,8 @@ void xe_force_wake_init_gt(struct xe_gt *gt,
>  			   struct xe_force_wake *fw);
>  void xe_force_wake_init_engines(struct xe_gt *gt,
>  				struct xe_force_wake *fw);
> -int xe_force_wake_get(struct xe_force_wake *fw,
> -		      enum xe_force_wake_domains domains);
> +unsigned int xe_force_wake_get(struct xe_force_wake *fw,
> +			       enum xe_force_wake_domains domains);
>  int xe_force_wake_put(struct xe_force_wake *fw,
>  		      enum xe_force_wake_domains domains);
>  
> diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
> index fde17dc3d01e..899fbbcb3ea9 100644
> --- a/drivers/gpu/drm/xe/xe_force_wake_types.h
> +++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
> @@ -48,7 +48,7 @@ enum xe_force_wake_domains {
>  	XE_FW_MEDIA_VEBOX2	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX2),
>  	XE_FW_MEDIA_VEBOX3	= BIT(XE_FW_DOMAIN_ID_MEDIA_VEBOX3),
>  	XE_FW_GSC		= BIT(XE_FW_DOMAIN_ID_GSC),
> -	XE_FORCEWAKE_ALL	= BIT(XE_FW_DOMAIN_ID_COUNT) - 1
> +	XE_FORCEWAKE_ALL	= BIT(XE_FW_DOMAIN_ID_COUNT)
>  };
>  
>  /**

but overall LGTM so with above nits fixed,

	Reviewed-by: Michal Wajdeczko <michal.wajdeczko at intel.com>



More information about the Intel-xe mailing list