[PATCH] drm/amdgpu: XGMI pstate switch initial support

Christian König ckoenig.leichtzumerken at gmail.com
Fri Mar 22 15:50:19 UTC 2019


Am 20.03.19 um 21:21 schrieb Liu, Shaoyun:
> Driver vote low to high pstate switch whenever there is an outstanding
> XGMI mapping request. Driver vote high to low pstate when all the
> outstanding XGMI mapping is terminated.

Only a few style nit picks, but apart from that reviewed-by: Christian 
König <christian.koenig at amd.com>

>
> Change-Id: I197501f853c47f844055c0e28c0ac00a1ff06607
> Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 +++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 26 ++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c   | 16 +++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h   |  2 ++
>   6 files changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 1db1921..c9c24d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -932,6 +932,11 @@ struct amdgpu_device {
>   	struct work_struct		xgmi_reset_work;
>   
>   	bool                            in_baco_reset;
> +
> +	/* counter of mapped memory through xgmi */
> +	atomic_t			xgmi_map_counter;

This doesn't need to be an atomic any more when you protect it with a 
mutex anyway.

> +	struct mutex  			lock_pstate;

Better put both fields that into the VM manager structure.

> +
>   };
>   
>   static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 2065837..a5af885 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2018,6 +2018,10 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
>   	r = amdgpu_device_enable_mgpu_fan_boost();
>   	if (r)
>   		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
> +
> +	/*set to low pstate by default */
> +	amdgpu_xgmi_set_pstate(adev, 0);
> +
>   }
>   
>   static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
> @@ -2467,6 +2471,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	mutex_init(&adev->virt.vf_errors.lock);
>   	hash_init(adev->mn_hash);
>   	mutex_init(&adev->lock_reset);
> +	mutex_init(&adev->lock_pstate);
>   
>   	amdgpu_device_check_arguments(adev);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 220a6a7..c430e82 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -72,6 +72,8 @@ struct amdgpu_bo_va {
>   
>   	/* If the mappings are cleared or filled */
>   	bool				cleared;
> +
> +	bool				is_xgmi;
>   };
>   
>   struct amdgpu_bo {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index c8f0e4c..3639cf6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -34,6 +34,7 @@
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gmc.h"
> +#include "amdgpu_xgmi.h"
>   
>   /**
>    * DOC: GPUVM
> @@ -2352,6 +2353,14 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>   	return 0;
>   }
>   
> +static bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
> +		struct amdgpu_device *bo_adev)
> +{
> +	return (adev != bo_adev &&
> +		adev->gmc.xgmi.hive_id &&
> +		adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id);
> +}

Maybe put that into amdgpu_xgmi.h, could be useful in other cases as well.

Regards,
Christian.

> +
>   /**
>    * amdgpu_vm_bo_add - add a bo to a specific vm
>    *
> @@ -2383,6 +2392,15 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>   	INIT_LIST_HEAD(&bo_va->valids);
>   	INIT_LIST_HEAD(&bo_va->invalids);
>   
> +	if (amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev))) {
> +		bo_va->is_xgmi = true;
> +		mutex_lock(&adev->lock_pstate);
> +		/* Power up XGMI if it can be potentially used */
> +		if (atomic_inc_return(&adev->xgmi_map_counter) == 1)
> +			amdgpu_xgmi_set_pstate(adev, 1);
> +		mutex_unlock(&adev->lock_pstate);
> +	}
> +
>   	return bo_va;
>   }
>   
> @@ -2801,6 +2819,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>   	}
>   
>   	dma_fence_put(bo_va->last_pt_update);
> +
> +	if (bo_va->is_xgmi) {
> +		mutex_lock(&adev->lock_pstate);
> +		if (atomic_dec_return(&adev->xgmi_map_counter) == 0)
> +			amdgpu_xgmi_set_pstate(adev, 0);
> +		mutex_unlock(&adev->lock_pstate);
> +	}
> +
>   	kfree(bo_va);
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index fcc4b05..3368347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -200,12 +200,26 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>   
>   	if (lock)
>   		mutex_lock(&tmp->hive_lock);
> -
> +	tmp->pstate = -1;
>   	mutex_unlock(&xgmi_mutex);
>   
>   	return tmp;
>   }
>   
> +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
> +{
> +	int ret = 0;
> +	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
> +
> +	if (!hive)
> +		return 0;
> +
> +	if (hive->pstate == pstate)
> +		return 0;
> +	/* Todo : sent the message to SMU for pstate change */
> +	return ret;
> +}
> +
>   int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
>   {
>   	int ret = -EINVAL;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index 24a3b03..7e1710fc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -33,11 +33,13 @@ struct amdgpu_hive_info {
>   	struct kobject *kobj;
>   	struct device_attribute dev_attr;
>   	struct amdgpu_device *adev;
> +	int pstate; /*0 -- low , 1 -- high , -1 unknown*/
>   };
>   
>   struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
>   int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
>   int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
>   void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
> +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
>   
>   #endif



More information about the amd-gfx mailing list