[PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

Alex Deucher alexdeucher at gmail.com
Thu Apr 9 15:21:02 UTC 2020


On Thu, Apr 9, 2020 at 10:54 AM Yintian Tao <yttao at amd.com> wrote:
>
> Under bare metal, there is no more else to take
> care of the GPU register access through MMIO.
> Under Virtualization, to access GPU register is
> implemented through KIQ during run-time due to
> world-switch.
>
> Therefore, under SR-IOV user can only access
> debugfs to r/w GPU registers when meets all
> three conditions below.
> - amdgpu_gpu_recovery=0
> - TDR happened
> - in_gpu_reset=0
>
> v2: merge amdgpu_virt_can_access_debugfs() into
>     amdgpu_virt_enable_access_debugfs()
>
> Signed-off-by: Yintian Tao <yttao at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 73 +++++++++++++++++++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  8 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 26 ++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
>  4 files changed, 108 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> index c0f9a651dc06..1a4894fa3693 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
> @@ -152,11 +152,16 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         if (use_bank) {
>                 if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
>                     (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return -EINVAL;
>                 }
>                 mutex_lock(&adev->grbm_idx_mutex);
> @@ -207,6 +212,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -255,6 +261,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -263,6 +273,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -275,6 +286,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -304,6 +316,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -311,6 +327,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -325,6 +342,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -354,6 +372,10 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -362,6 +384,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -374,6 +397,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -403,6 +427,10 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -410,6 +438,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -424,6 +453,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -453,6 +483,10 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -461,6 +495,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -473,6 +508,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -502,6 +538,10 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         while (size) {
>                 uint32_t value;
>
> @@ -509,6 +549,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>                 if (r) {
>                         pm_runtime_mark_last_busy(adev->ddev->dev);
>                         pm_runtime_put_autosuspend(adev->ddev->dev);
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
>                 }
>
> @@ -523,6 +564,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -651,16 +693,24 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
>
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (r)
> +       if (r) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return r;
> +       }
>
> -       if (size > valuesize)
> +       if (size > valuesize) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         outsize = 0;
>         x = 0;
> @@ -673,6 +723,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
>                 }
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return !r ? outsize : r;
>  }
>
> @@ -720,6 +771,10 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu);
> @@ -734,16 +789,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>         pm_runtime_mark_last_busy(adev->ddev->dev);
>         pm_runtime_put_autosuspend(adev->ddev->dev);
>
> -       if (!x)
> +       if (!x) {
> +               amdgpu_virt_disable_access_debugfs(adev);
>                 return -EINVAL;
> +       }
>
>         while (size && (offset < x * 4)) {
>                 uint32_t value;
>
>                 value = data[offset >> 2];
>                 r = put_user(value, (uint32_t *)buf);
> -               if (r)
> +               if (r) {
> +                       amdgpu_virt_disable_access_debugfs(adev);
>                         return r;
> +               }
>
>                 result += 4;
>                 buf += 4;
> @@ -751,6 +810,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
>                 size -= 4;
>         }
>
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> @@ -805,6 +865,10 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>         if (r < 0)
>                 return r;
>
> +       r = amdgpu_virt_enable_access_debugfs(adev);
> +       if (r < 0)
> +               return r;
> +
>         /* switch to the specific se/sh/cu */
>         mutex_lock(&adev->grbm_idx_mutex);
>         amdgpu_gfx_select_se_sh(adev, se, sh, cu);
> @@ -840,6 +904,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
>
>  err:
>         kfree(data);
> +       amdgpu_virt_disable_access_debugfs(adev);
>         return result;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 2b99f5952375..35c381ec0423 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>         struct amdgpu_job *job = to_amdgpu_job(s_job);
>         struct amdgpu_task_info ti;
> +       struct amdgpu_device *adev = ring->adev;
>
>         memset(&ti, 0, sizeof(struct amdgpu_task_info));
>
> @@ -49,10 +50,13 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
>         DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
>                   ti.process_name, ti.tgid, ti.task_name, ti.pid);
>
> -       if (amdgpu_device_should_recover_gpu(ring->adev))
> +       if (amdgpu_device_should_recover_gpu(ring->adev)) {
>                 amdgpu_device_gpu_recover(ring->adev, job);
> -       else
> +       } else {
>                 drm_sched_suspend_timeout(&ring->sched);
> +               if (amdgpu_sriov_vf(adev))
> +                       adev->virt.tdr_debug = true;
> +       }
>  }
>
>  int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 4d06c79065bf..e8266847675b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -334,3 +334,29 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
>                         adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
>         }
>  }
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev)
> +{
> +       return amdgpu_sriov_is_debug(adev) ? true : false;
> +}
> +
> +int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
> +{
> +       int ret = 0;
> +
> +       if (!amdgpu_sriov_vf(adev))
> +               return ret;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               ret = -EPERM;
> +
> +       return ret;

You can drop the ret variable in this function and just return
constants for each case.  E.g.,

> +       if (!amdgpu_sriov_vf(adev))
> +               return 0;
> +
> +       if (amdgpu_virt_can_access_debugfs(adev))
> +               adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
> +       else
> +               return -EPERM;
> +
> +       return 0;

Other than that the patch looks good to me.
Acked-by: Alex Deucher <alexander.deucher at amd.com>

Alex

> +}
> +
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
> +{
> +       if (amdgpu_sriov_vf(adev))
> +               adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index f6ae3c656304..8f20e6dbd7a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -265,6 +265,7 @@ struct amdgpu_virt {
>         uint32_t gim_feature;
>         uint32_t reg_access_mode;
>         int req_init_data_ver;
> +       bool tdr_debug;
>  };
>
>  #define amdgpu_sriov_enabled(adev) \
> @@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
>
>  #define amdgpu_sriov_is_pp_one_vf(adev) \
>         ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
> +#define amdgpu_sriov_is_debug(adev) \
> +       ((!adev->in_gpu_reset) && adev->virt.tdr_debug)
>
>  bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
>  void amdgpu_virt_init_setting(struct amdgpu_device *adev);
> @@ -314,4 +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
>                                         unsigned int chksum);
>  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
>  void amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +
> +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
> +int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
> +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
>  #endif
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list