[PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

Tao, Yintian Yintian.Tao at amd.com
Thu Apr 9 13:42:46 UTC 2020


Hi  Christian


Many thanks for your review. I will submit one new patch according to your suggestion.


Best Regards
Yintian Tao
-----Original Message-----
From: Koenig, Christian <Christian.Koenig at amd.com> 
Sent: 2020年4月9日 20:42
To: Tao, Yintian <Yintian.Tao at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; Deng, Emily <Emily.Deng at amd.com>
Cc: amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

Am 09.04.20 um 08:01 schrieb Yintian Tao:
> Under bare metal, there is no more else to take care of the GPU 
> register access through MMIO.
> Under Virtualization, to access GPU register is implemented through 
> KIQ during run-time due to world-switch.
>
> Therefore, under SR-IOV user can only access debugfs to r/w GPU 
> registers when meets all three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> Signed-off-by: Yintian Tao <yttao at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 83 ++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  7 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 23 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>   4 files changed, 114 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..4f9780aabf5a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +

It would be better to merge these two functions together.

E.g. that amdgpu_virt_enable_access_debugfs() returns an error if we can't allow this.

And -EINVAL is maybe not the right thing here, since this is not caused by an invalid value.

Maybe use -EPERM instead.

Regards,
Christian.

>   	if (use_bank) {
>   		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>   		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return -EINVAL;
>   		}
>   		mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +213,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -255,6 +262,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -263,6 +275,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -275,6 +288,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -304,6 +318,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -311,6 +330,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -325,6 +345,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -354,6 +375,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -362,6 +388,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -374,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -403,6 +431,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -410,6 +443,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -424,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -453,6 +488,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -461,6 +501,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -473,6 +514,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -502,6 +544,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	while (size) {
>   		uint32_t value;
>   
> @@ -509,6 +556,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   		if (r) {
>   			pm_runtime_mark_last_busy(adev->ddev->dev);
>   			pm_runtime_put_autosuspend(adev->ddev->dev);
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
>   		}
>   
> @@ -523,6 +571,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -651,16 +700,25 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>   
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> -	if (r)
> +	if (r) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return r;
> +	}
>   
> -	if (size > valuesize)
> +	if (size > valuesize) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return -EINVAL;
> +	}
>   
>   	outsize = 0;
>   	x = 0;
> @@ -673,6 +731,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>   		}
>   	}
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return !r ? outsize : r;
>   }
>   
> @@ -720,6 +779,11 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	/* switch to the specific se/sh/cu */
>   	mutex_lock(&adev->grbm_idx_mutex);
>   	amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -734,16 +798,20 @@ 
> static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   	pm_runtime_mark_last_busy(adev->ddev->dev);
>   	pm_runtime_put_autosuspend(adev->ddev->dev);
>   
> -	if (!x)
> +	if (!x) {
> +		amdgpu_virt_disable_access_debugfs(adev);
>   		return -EINVAL;
> +	}
>   
>   	while (size && (offset < x * 4)) {
>   		uint32_t value;
>   
>   		value = data[offset >> 2];
>   		r = put_user(value, (uint32_t *)buf);
> -		if (r)
> +		if (r) {
> +			amdgpu_virt_disable_access_debugfs(adev);
>   			return r;
> +		}
>   
>   		result += 4;
>   		buf += 4;
> @@ -751,6 +819,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>   		size -= 4;
>   	}
>   
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> @@ -805,6 +874,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>   	if (r < 0)
>   		return r;
>   
> +	if (!amdgpu_virt_can_access_debugfs(adev))
> +		return -EINVAL;
> +	else
> +		amdgpu_virt_enable_access_debugfs(adev);
> +
>   	/* switch to the specific se/sh/cu */
>   	mutex_lock(&adev->grbm_idx_mutex);
>   	amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -840,6 +914,7 @@ 
> static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user 
> *buf,
>   
>   err:
>   	kfree(data);
> +	amdgpu_virt_disable_access_debugfs(adev);
>   	return result;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..993b75dde5d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>   	struct amdgpu_job *job = to_amdgpu_job(s_job);
>   	struct amdgpu_task_info ti;
> +	struct amdgpu_device *adev = ring->adev;
>   
>   	memset(&ti, 0, sizeof(struct amdgpu_task_info));
>   
> @@ -49,10 +50,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>   	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>   		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
>   
> -	if (amdgpu_device_should_recover_gpu(ring->adev))
> +	if (amdgpu_device_should_recover_gpu(ring->adev)) {
>   		amdgpu_device_gpu_recover(ring->adev, job);
> -	else
> +	} else {
>   		drm_sched_suspend_timeout(&ring->sched);
> +		adev->virt.tdr_debug = true;
> +	}
>   }
>   
>   int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..d0dfe99ebc75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,26 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>   			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>   	}
>   }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev) {
> +	if (!amdgpu_sriov_vf(adev))
> +		return true;
> +
> +	if (amdgpu_sriov_is_debug(adev))
> +		return true;
> +
> +	return false;
> +}
> +
> +void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev) {
> +	if (amdgpu_sriov_vf(adev))
> +		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; }
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) {
> +	if (amdgpu_sriov_vf(adev))
> +		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..a01742b7bf12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>   	uint32_t gim_feature;
>   	uint32_t reg_access_mode;
>   	int req_init_data_ver;
> +	bool tdr_debug;
>   };
>   
>   #define amdgpu_sriov_enabled(adev) \ @@ -296,6 +297,8 @@ static 
> inline bool is_virtual_machine(void)
>   
>   #define amdgpu_sriov_is_pp_one_vf(adev) \
>   	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>   
>   bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>   void amdgpu_virt_init_setting(struct amdgpu_device *adev); @@ -314,4 
> +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>   					unsigned int chksum);
>   void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
>   void amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); void 
> +amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void 
> +amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>   #endif



More information about the amd-gfx mailing list