[PATCH v2 3/7] drm/amdgpu: Add new function to put GPU power profile

Tue Aug 22 04:51:10 UTC 2023

On 8/21/2023 12:17 PM, Arvind Yadav wrote:
> This patch adds a function which will clear the GPU
> power profile after job finished.
> 
> This is how it works:
> - schedular will set the GPU power profile based on ring_type.
> - Schedular will clear the GPU Power profile once job finished.
> - Here, the *_workload_profile_set function will set the GPU
>    power profile and the *_workload_profile_put function will
>    schedule the smu_delayed_work task after 100ms delay. This
>    smu_delayed_work task will clear a GPU power profile if any
>    new jobs are not scheduled within 100 ms. But if any new job
>    comes within 100ms then the *_workload_profile_set function
>    will cancel this work and set the GPU power profile based on
>    preferences.
> 
> v2:
> - Splitting workload_profile_set and workload_profile_put
>    into two separate patches.
> - Addressed review comment.
> 
> Cc: Shashank Sharma <shashank.sharma at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Arvind Yadav <Arvind.Yadav at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c  | 97 +++++++++++++++++++
>   drivers/gpu/drm/amd/include/amdgpu_workload.h |  3 +
>   2 files changed, 100 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
> index e661cc5b3d92..6367eb88a44d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_workload.c
> @@ -24,6 +24,9 @@
>   
>   #include "amdgpu.h"
>   
> +/* 100 millsecond timeout */
> +#define SMU_IDLE_TIMEOUT	msecs_to_jiffies(100)
> +
>   static enum PP_SMC_POWER_PROFILE
>   ring_to_power_profile(uint32_t ring_type)
>   {
> @@ -59,6 +62,80 @@ amdgpu_power_profile_set(struct amdgpu_device *adev,
>   	return ret;
>   }
>   
> +static int
> +amdgpu_power_profile_clear(struct amdgpu_device *adev,
> +			   enum PP_SMC_POWER_PROFILE profile)
> +{
> +	int ret = amdgpu_dpm_switch_power_profile(adev, profile, false);
> +
> +	if (!ret) {
> +		/* Clear the bit for the submitted workload profile */
> +		adev->smu_workload.submit_workload_status &= ~(1 << profile);
> +	}
> +
> +	return ret;
> +}
> +
> +static void
> +amdgpu_power_profile_idle_work_handler(struct work_struct *work)
> +{
> +
> +	struct amdgpu_smu_workload *workload = container_of(work,
> +						      struct amdgpu_smu_workload,
> +						      smu_delayed_work.work);
> +	struct amdgpu_device *adev = workload->adev;
> +	bool reschedule = false;
> +	int index  = fls(workload->submit_workload_status);
> +	int ret;
> +
> +	mutex_lock(&workload->workload_lock);
> +	for (; index > 0; index--) {

Why not use for_each_set_bit?

> +		int val = atomic_read(&workload->power_profile_ref[index]);
> +
> +		if (val) {
> +			reschedule = true;

Why do you need to do reschedule? For each put(), a schedule is called. 
If refcount is not zero, that means some other job has already set the 
profile. It is supposed to call put() and at that time, this job will be 
run to clear it anyway, right?

> +		} else {
> +			if (workload->submit_workload_status &
> +			    (1 << index)) {
> +				ret = amdgpu_power_profile_clear(adev, index);
> +				if (ret) {
> +					DRM_WARN("Failed to clear workload %s,error = %d\n",
> +						 amdgpu_workload_mode_name[index], ret);
> +					goto exit;
> +				}
> +			}
> +		}
> +	}
> +	if (reschedule)
> +		schedule_delayed_work(&workload->smu_delayed_work,
> +				      SMU_IDLE_TIMEOUT);
> +exit:
> +	mutex_unlock(&workload->workload_lock);
> +}
> +
> +void amdgpu_workload_profile_put(struct amdgpu_device *adev,
> +				 uint32_t ring_type)
> +{
> +	struct amdgpu_smu_workload *workload = &adev->smu_workload;
> +	enum PP_SMC_POWER_PROFILE profile = ring_to_power_profile(ring_type);
> +
> +	if (profile == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT)
> +		return;
> +
> +	mutex_lock(&workload->workload_lock);
> +
> +	if (!atomic_read(&workload->power_profile_ref[profile])) {
> +		DRM_WARN("Power profile %s ref. count error\n",
> +			 amdgpu_workload_mode_name[profile]);
> +	} else {
> +		atomic_dec(&workload->power_profile_ref[profile]);
> +		schedule_delayed_work(&workload->smu_delayed_work,
> +				      SMU_IDLE_TIMEOUT);
> +	}
> +
> +	mutex_unlock(&workload->workload_lock);
> +}
> +
>   void amdgpu_workload_profile_set(struct amdgpu_device *adev,
>   				 uint32_t ring_type)
>   {
> @@ -70,13 +147,30 @@ void amdgpu_workload_profile_set(struct amdgpu_device *adev,
>   		return;
>   
>   	mutex_lock(&workload->workload_lock);
> +	cancel_delayed_work_sync(&workload->smu_delayed_work);

This is a potential deadlock. You already hold the mutex and then 
waiting for idle work to finish. Idle work could now be at the point 
where it is waiting for the same mutex. Suggest not to call cancel here 
and let the mutex take care of the sequence.

>   
>   	ret = amdgpu_power_profile_set(adev, profile);
>   	if (ret) {
>   		DRM_WARN("Failed to set workload profile to %s, error = %d\n",
>   			 amdgpu_workload_mode_name[profile], ret);
> +		goto exit;
> +	}
> +
> +	/* Clear the already finished jobs of higher power profile*/
> +	for (int index = fls(workload->submit_workload_status);
> +	     index > profile; index--) {
> +		if (!atomic_read(&workload->power_profile_ref[index]) &&
> +		    workload->submit_workload_status & (1 << index)) {
> +			ret = amdgpu_power_profile_clear(adev, index);
> +			if (ret) {
> +				DRM_WARN("Failed to clear workload %s, err = %d\n",
> +					 amdgpu_workload_mode_name[profile], ret);
> +				goto exit;
> +			}
> +		}

If you follow the earlier comment, that will keep this logic only at one 
place - i.e, at idle work handler. Basically just let the idle work 
handle its duty. If some job starts running during the clear call, it's 
just unfortunate timing and let the next set() take the lock and request 
profile again.

Thanks,
Lijo

>   	}
>   
> +exit:
>   	mutex_unlock(&workload->workload_lock);
>   }
>   
> @@ -87,6 +181,8 @@ void amdgpu_workload_profile_init(struct amdgpu_device *adev)
>   	adev->smu_workload.initialized = true;
>   
>   	mutex_init(&adev->smu_workload.workload_lock);
> +	INIT_DELAYED_WORK(&adev->smu_workload.smu_delayed_work,
> +			  amdgpu_power_profile_idle_work_handler);
>   }
>   
>   void amdgpu_workload_profile_fini(struct amdgpu_device *adev)
> @@ -94,6 +190,7 @@ void amdgpu_workload_profile_fini(struct amdgpu_device *adev)
>   	if (!adev->smu_workload.initialized)
>   		return;
>   
> +	cancel_delayed_work_sync(&adev->smu_workload.smu_delayed_work);
>   	adev->smu_workload.submit_workload_status = 0;
>   	adev->smu_workload.initialized = false;
>   	mutex_destroy(&adev->smu_workload.workload_lock);
> diff --git a/drivers/gpu/drm/amd/include/amdgpu_workload.h b/drivers/gpu/drm/amd/include/amdgpu_workload.h
> index 5022f28fc2f9..ee1f87257f2d 100644
> --- a/drivers/gpu/drm/amd/include/amdgpu_workload.h
> +++ b/drivers/gpu/drm/amd/include/amdgpu_workload.h
> @@ -46,6 +46,9 @@ static const char * const amdgpu_workload_mode_name[] = {
>   	"Window3D"
>   };
>   
> +void amdgpu_workload_profile_put(struct amdgpu_device *adev,
> +				 uint32_t ring_type);
> +
>   void amdgpu_workload_profile_set(struct amdgpu_device *adev,
>   				 uint32_t ring_type);
>