[PATCH] drm/amdgpu: fix race between pstate and remote buffer map

Fri Apr 17 04:52:14 UTC 2020

Am 2020-04-16 um 7:59 a.m. schrieb Jonathan Kim:
> Vega20 arbitrates pstate at hive level and not device level. Last peer to
> remote buffer unmap could drop P-State while another process is still
> remote buffer mapped.
>
> With this fix, P-States still needs to be disabled for now as SMU bug
> was discovered on synchronous P2P transfers.  This should be fixed in the
> next FW update.
>
> Signed-off-by: Jonathan Kim <Jonathan.Kim at amd.com>

This looks reasonable. I have some suggestions inline for some clearer
variable names. With that fixed the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

See inline ...

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  2 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 16 ++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h   |  4 --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 66 ++++++++++++------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  6 +++
>  5 files changed, 43 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4e1d4cfe7a9f..94dff899248d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -982,8 +982,6 @@ struct amdgpu_device {
>  	uint64_t			unique_id;
>  	uint64_t	df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
>  
> -	/* device pstate */
> -	int				pstate;
>  	/* enable runtime pm on the device */
>  	bool                            runpm;
>  	bool                            in_runpm;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index accbb34ea670..95560eea61c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2135,11 +2135,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>  	if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) &&
>  	    (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)) {
>  		bo_va->is_xgmi = true;
> -		mutex_lock(&adev->vm_manager.lock_pstate);
>  		/* Power up XGMI if it can be potentially used */
> -		if (++adev->vm_manager.xgmi_map_counter == 1)
> -			amdgpu_xgmi_set_pstate(adev, 1);
> -		mutex_unlock(&adev->vm_manager.lock_pstate);
> +		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);
>  	}
>  
>  	return bo_va;
> @@ -2562,12 +2559,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
>  
>  	dma_fence_put(bo_va->last_pt_update);
>  
> -	if (bo && bo_va->is_xgmi) {
> -		mutex_lock(&adev->vm_manager.lock_pstate);
> -		if (--adev->vm_manager.xgmi_map_counter == 0)
> -			amdgpu_xgmi_set_pstate(adev, 0);
> -		mutex_unlock(&adev->vm_manager.lock_pstate);
> -	}
> +	if (bo && bo_va->is_xgmi)
> +		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN);
>  
>  	kfree(bo_va);
>  }
> @@ -3177,9 +3170,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>  
>  	idr_init(&adev->vm_manager.pasid_idr);
>  	spin_lock_init(&adev->vm_manager.pasid_lock);
> -
> -	adev->vm_manager.xgmi_map_counter = 0;
> -	mutex_init(&adev->vm_manager.lock_pstate);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index ea771d84bf2b..c8e68d7890bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -357,10 +357,6 @@ struct amdgpu_vm_manager {
>  	 */
>  	struct idr				pasid_idr;
>  	spinlock_t				pasid_lock;
> -
> -	/* counter of mapped memory through xgmi */
> -	uint32_t				xgmi_map_counter;
> -	struct mutex				lock_pstate;
>  };
>  
>  #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> index 8c3215505e78..52f45b9fe271 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> @@ -373,7 +373,13 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo
>  
>  	if (lock)
>  		mutex_lock(&tmp->hive_lock);
> -	tmp->pstate = -1;
> +	tmp->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
> +	tmp->high_gpu = NULL;
> +	/*
> +	 * hive pstate on boot is high in vega20 so we have to go to low
> +	 * pstate on after boot.
> +	 */
> +	tmp->map_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE;
>  	mutex_unlock(&xgmi_mutex);
>  
>  	return tmp;
> @@ -383,50 +389,49 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
>  {
>  	int ret = 0;
>  	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
> -	struct amdgpu_device *tmp_adev;
> -	bool update_hive_pstate = true;
> -	bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20;
> +	struct amdgpu_device *tar_adev = hive->high_gpu ?
> +							hive->high_gpu : adev;

I'm not sure what "tar" stands for here. I guess the "r" stands for
"request"?

> +	bool map = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
> +	bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
>  
> -	if (!hive)
> +	if (!hive || adev->asic_type == CHIP_VEGA20)
>  		return 0;
>  
>  	mutex_lock(&hive->hive_lock);
>  
> -	if (hive->pstate == pstate) {
> -		adev->pstate = is_high_pstate ? pstate : adev->pstate;
> +	if (map)
> +		hive->map_count++;
> +	else
> +		hive->map_count--;
> +
> +	/*
> +	 * Vega20 only needs single peer to request pstate high for the hive to
> +	 * go high but all peers must request pstate low for the hive to go low
> +	 */
> +	if (hive->pstate == pstate || (!map && hive->map_count && !init_low))
>  		goto out;
> -	}
>  
> -	dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
> +	dev_dbg(tar_adev->dev, "Set xgmi pstate %d.\n", pstate);
>  
> -	ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate);
> +	ret = amdgpu_dpm_set_xgmi_pstate(tar_adev, pstate);
>  	if (ret) {
> -		dev_err(adev->dev,
> +		dev_err(tar_adev->dev,
>  			"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
> -			adev->gmc.xgmi.node_id,
> -			adev->gmc.xgmi.hive_id, ret);
> +			tar_adev->gmc.xgmi.node_id,
> +			tar_adev->gmc.xgmi.hive_id, ret);
>  		goto out;
>  	}
>  
> -	/* Update device pstate */
> -	adev->pstate = pstate;
> -
> -	/*
> -	 * Update the hive pstate only all devices of the hive
> -	 * are in the same pstate
> -	 */
> -	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> -		if (tmp_adev->pstate != adev->pstate) {
> -			update_hive_pstate = false;
> -			break;
> -		}
> -	}
> -	if (update_hive_pstate || is_high_pstate)
> +	if (init_low)
> +		hive->pstate = hive->map_count ?
> +					hive->pstate : AMDGPU_XGMI_PSTATE_MIN;
> +	else {
>  		hive->pstate = pstate;
> -
> +		hive->high_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ?
> +							adev : NULL;
> +	}
>  out:
>  	mutex_unlock(&hive->hive_lock);
> -
>  	return ret;
>  }
>  
> @@ -507,9 +512,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
>  		goto exit;
>  	}
>  
> -	/* Set default device pstate */
> -	adev->pstate = -1;
> -
>  	top_info = &adev->psp.xgmi_context.top_info;
>  
>  	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> index d5a63904ec33..b5c4acf2316d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
> @@ -25,6 +25,10 @@
>  #include <drm/task_barrier.h>
>  #include "amdgpu_psp.h"
>  
> +#define AMDGPU_XGMI_PSTATE_UNKNOWN	-1
> +#define AMDGPU_XGMI_PSTATE_MIN		0
> +#define AMDGPU_XGMI_PSTATE_MAX_VEGA20	1

You could define this as an enum and use that below.

> +
>  struct amdgpu_hive_info {
>  	uint64_t		hive_id;
>  	struct list_head	device_list;
> @@ -34,6 +38,8 @@ struct amdgpu_hive_info {
>  	struct device_attribute dev_attr;
>  	struct amdgpu_device *adev;
>  	int pstate; /*0 -- low , 1 -- high , -1 unknown*/

The comment here isn't really needed any more with the definitions
above. But if you make that an enum, you can make the connection clear
by using the enum type here instead of int.

> +	int map_count;
> +	struct amdgpu_device *high_gpu;

The name "map_count" doesn't make much sense in the context of the XGMI
hive. And it doesn't show how the map_count relates to high_gpu. I'd
rename these to high_request_count and high_request_gpu. Or some
abreviation if that's to verbose (hi_req_count, hi_req_gpu).

Regards,
  Felix

>  	struct task_barrier tb;
>  };
>