[Intel-xe] [PATCH] drm/xe: Use atomic instead of mutex for xe_device_mem_access_ongoing

Thu Mar 2 11:26:18 UTC 2023

On 2023-03-02 00:14, Rodrigo Vivi wrote:
> On Wed, Mar 01, 2023 at 08:36:29AM -0800, Lucas De Marchi wrote:
>> On Tue, Feb 28, 2023 at 11:17:30AM +0100, Maarten Lankhorst wrote:
>>> xe_guc_ct_fast_path() is called from an irq context, and cannot lock
>>> the mutex used by xe_device_mem_access_ongoing().
>>>
>>> Fortunately it is easy to fix, and the atomic guarantees are good enough
>>> to ensure xe->mem_access.hold_rpm is set before last ref is dropped.
>>>
>>> As far as I can tell, the runtime ref in device access should be
>>> killable, but don't dare to do it yet.
>> I don't follow this last paragraph. Could you point it in the code?
> I also didn't understand this... if we remove that we will end up in
> memory access with the sleeping device...
I may understand the code wrong, but without error checking by the 
callers, and changing the prototype to return int is there any way this 
will be guaranteed to work regardless?
>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>>> ---
>>> drivers/gpu/drm/xe/xe_device.c       | 17 ++++++++---------
>>> drivers/gpu/drm/xe/xe_device.h       | 14 ++++----------
>>> drivers/gpu/drm/xe/xe_device_types.h |  4 +---
>>> 3 files changed, 13 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
>>> index 4eb6786b11f0..ab179b1e24c1 100644
>>> --- a/drivers/gpu/drm/xe/xe_device.c
>>> +++ b/drivers/gpu/drm/xe/xe_device.c
>>> @@ -237,7 +237,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
>>> 	if (err)
>>> 		goto err;
>>>
>>> -	mutex_init(&xe->mem_access.lock);
>>> 	return xe;
>>>
>>> err_put:
>>> @@ -424,25 +423,25 @@ u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
>>> void xe_device_mem_access_get(struct xe_device *xe)
>>> {
>>> 	bool resumed = xe_pm_runtime_resume_if_suspended(xe);
>>> +	int ref = atomic_inc_return(&xe->mem_access.ref);
>>
>> +Matt Brost
>>
>> Any reason for not using kref?
> hmmm... my bad actually...
>
> I did considered the kref, but I can't remember why I haven't used it.
> I recently was asking myself the same question.

I looked at it, I don't think you can kref from 0 to 1 by design.

xe_device_mem_access_get() is usually called with force wake held 
explicitly or implicitly, so we shouldn't need the runtime pm ref there.

>> Lucas De Marchi
>>
>>> -	mutex_lock(&xe->mem_access.lock);
>>> -	if (xe->mem_access.ref++ == 0)
>>> +	if (ref == 1)
>>> 		xe->mem_access.hold_rpm = xe_pm_runtime_get_if_active(xe);
> hmmm... I'm afraid this can be tricky without locks...
>
> if we have 3 simultaneous threads calling this.
> get
> get
> put
> get
>
> and they happened in this order but the resume didn't finished yet
> on the first one, then you will:
> 1. end up the runtime pm twice.
> 2. the second will pass over thinking the gpu is already awake, but it might
> be still asleep.

>>> -	mutex_unlock(&xe->mem_access.lock);
>>>
>>> 	/* The usage counter increased if device was immediately resumed */
>>> 	if (resumed)
>>> 		xe_pm_runtime_put(xe);
>>>
>>> -	XE_WARN_ON(xe->mem_access.ref == S32_MAX);
>>> +	XE_WARN_ON(ref == S32_MAX);
>>> }
>>>
>>> void xe_device_mem_access_put(struct xe_device *xe)
>>> {
>>> -	mutex_lock(&xe->mem_access.lock);
>>> -	if (--xe->mem_access.ref == 0 && xe->mem_access.hold_rpm)
>>> +	bool hold = xe->mem_access.hold_rpm;
>>> +	int ref = atomic_dec_return(&xe->mem_access.ref);
>>> +
>>> +	if (!ref && hold)
>>> 		xe_pm_runtime_put(xe);
>>> -	mutex_unlock(&xe->mem_access.lock);
>>>
>>> -	XE_WARN_ON(xe->mem_access.ref < 0);
>>> +	XE_WARN_ON(ref < 0);
>>> }
>>> diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
>>> index 263620953c3b..96b4f3d7969e 100644
>>> --- a/drivers/gpu/drm/xe/xe_device.h
>>> +++ b/drivers/gpu/drm/xe/xe_device.h
>>> @@ -90,20 +90,14 @@ static inline struct xe_force_wake * gt_to_fw(struct xe_gt *gt)
>>> void xe_device_mem_access_get(struct xe_device *xe);
>>> void xe_device_mem_access_put(struct xe_device *xe);
>>>
>>> -static inline void xe_device_assert_mem_access(struct xe_device *xe)
>>> +static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
>>> {
>>> -	XE_WARN_ON(!xe->mem_access.ref);
>>> +	return atomic_read(&xe->mem_access.ref);
>>> }
>>>
>>> -static inline bool xe_device_mem_access_ongoing(struct xe_device *xe)
>>> +static inline void xe_device_assert_mem_access(struct xe_device *xe)
>>> {
>>> -	bool ret;
>>> -
>>> -	mutex_lock(&xe->mem_access.lock);
>>> -	ret = xe->mem_access.ref;
>>> -	mutex_unlock(&xe->mem_access.lock);
>>> -
>>> -	return ret;
>>> +	XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
>>> }
>>>
>>> static inline bool xe_device_in_fault_mode(struct xe_device *xe)
>>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>>> index 9743987fc883..0b8c4ee0ad48 100644
>>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>>> @@ -230,10 +230,8 @@ struct xe_device {
>>> 	 * triggering additional actions when they occur.
>>> 	 */
>>> 	struct {
>>> -		/** @lock: protect the ref count */
>>> -		struct mutex lock;
>>> 		/** @ref: ref count of memory accesses */
>>> -		s32 ref;
>>> +		atomic_t ref;
>>> 		/** @hold_rpm: need to put rpm ref back at the end */
>>> 		bool hold_rpm;
>>> 	} mem_access;
>>> -- 
>>> 2.34.1
>>>