[PATCH 2/4] drm/amdgpu: return -ENODEV to user space when vram is lost v2
Deucher, Alexander
Alexander.Deucher at amd.com
Tue May 23 15:08:06 UTC 2017
> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
> Of Chunming Zhou
> Sent: Tuesday, May 16, 2017 5:26 AM
> To: amd-gfx at lists.freedesktop.org
> Cc: Zhou, David(ChunMing)
> Subject: [PATCH 2/4] drm/amdgpu: return -ENODEV to user space when
> vram is lost v2
>
> below ioctl will return -ENODEV:
> amdgpu_cs_ioctl
> amdgpu_cs_wait_ioctl
> amdgpu_cs_wait_fences_ioctl
> amdgpu_gem_va_ioctl
> amdgpu_info_ioctl
Do we want to block the info ioctl? Isn't that where the lost context query is?
Alex
>
> v2: only for map and replace cases in amdgpu_gem_va_ioctl
>
> Change-Id: I8970cde3301b7cfeb4263cc0f0e54aece215c98e
> Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 +++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 +++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ++++++++++
> 5 files changed, 31 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index f9da215..dcd6203 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -855,6 +855,7 @@ struct amdgpu_fpriv {
> struct amdgpu_ctx_mgr ctx_mgr;
> spinlock_t sem_handles_lock;
> struct idr sem_handles;
> + u32 vram_lost_counter;
> };
>
> /*
> @@ -1607,6 +1608,7 @@ struct amdgpu_device {
> atomic64_t num_bytes_moved;
> atomic64_t num_evictions;
> atomic_t gpu_reset_counter;
> + atomic_t vram_lost_counter;
>
> /* data for buffer migration throttling */
> struct {
> @@ -2005,6 +2007,8 @@ static inline void
> amdgpu_unregister_atpx_handler(void) {}
> extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
> extern const int amdgpu_max_kms_ioctl;
>
> +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
> + struct amdgpu_fpriv *fpriv);
> int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
> int amdgpu_driver_unload_kms(struct drm_device *dev);
> void amdgpu_driver_lastclose_kms(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index b803412..911aa02 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1097,6 +1097,7 @@ static int amdgpu_cs_submit(struct
> amdgpu_cs_parser *p,
> int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file
> *filp)
> {
> struct amdgpu_device *adev = dev->dev_private;
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> union drm_amdgpu_cs *cs = data;
> struct amdgpu_cs_parser parser = {};
> bool reserved_buffers = false;
> @@ -1104,6 +1105,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void
> *data, struct drm_file *filp)
>
> if (!adev->accel_working)
> return -EBUSY;
> + if (amdgpu_kms_vram_lost(adev, fpriv))
> + return -ENODEV;
>
> parser.adev = adev;
> parser.filp = filp;
> @@ -1165,12 +1168,15 @@ int amdgpu_cs_wait_ioctl(struct drm_device
> *dev, void *data,
> {
> union drm_amdgpu_wait_cs *wait = data;
> struct amdgpu_device *adev = dev->dev_private;
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
> struct amdgpu_ring *ring = NULL;
> struct amdgpu_ctx *ctx;
> struct fence *fence;
> long r;
>
> + if (amdgpu_kms_vram_lost(adev, fpriv))
> + return -ENODEV;
> r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait-
> >in.ip_instance,
> wait->in.ring, &ring);
> if (r)
> @@ -1344,12 +1350,15 @@ int amdgpu_cs_wait_fences_ioctl(struct
> drm_device *dev, void *data,
> struct drm_file *filp)
> {
> struct amdgpu_device *adev = dev->dev_private;
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> union drm_amdgpu_wait_fences *wait = data;
> uint32_t fence_count = wait->in.fence_count;
> struct drm_amdgpu_fence *fences_user;
> struct drm_amdgpu_fence *fences;
> int r;
>
> + if (amdgpu_kms_vram_lost(adev, fpriv))
> + return -ENODEV;
> /* Get the fences from userspace */
> fences = kmalloc_array(fence_count, sizeof(struct
> drm_amdgpu_fence),
> GFP_KERNEL);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index c56ae4a..2f0fcf8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2913,8 +2913,10 @@ int amdgpu_gpu_reset(struct amdgpu_device
> *adev)
> if (r)
> goto out;
> vram_lost = amdgpu_check_vram_lost(adev);
> - if (vram_lost)
> + if (vram_lost) {
> DRM_ERROR("VRAM is lost!\n");
> + atomic_inc(&adev->vram_lost_counter);
> + }
> r = amdgpu_ttm_recover_gart(adev);
> if (r)
> goto out;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index d8275ef..83bc94c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -802,6 +802,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
> void *data,
> args->operation);
> return -EINVAL;
> }
> + if ((args->operation == AMDGPU_VA_OP_MAP) ||
> + (args->operation == AMDGPU_VA_OP_REPLACE)) {
> + if (amdgpu_kms_vram_lost(adev, fpriv))
> + return -ENODEV;
> + }
>
> INIT_LIST_HEAD(&list);
> if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 368829a..a231aa1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -235,6 +235,7 @@ static int amdgpu_firmware_info(struct
> drm_amdgpu_info_firmware *fw_info,
> static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct
> drm_file *filp)
> {
> struct amdgpu_device *adev = dev->dev_private;
> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
> struct drm_amdgpu_info *info = data;
> struct amdgpu_mode_info *minfo = &adev->mode_info;
> void __user *out = (void __user *)(uintptr_t)info->return_pointer;
> @@ -247,6 +248,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev,
> void *data, struct drm_file
>
> if (!info->return_size || !info->return_pointer)
> return -EINVAL;
> + if (amdgpu_kms_vram_lost(adev, fpriv))
> + return -ENODEV;
>
> switch (info->query) {
> case AMDGPU_INFO_VIRTUAL_RANGE: {
> @@ -779,6 +782,12 @@ void amdgpu_driver_lastclose_kms(struct
> drm_device *dev)
> vga_switcheroo_process_delayed_switch();
> }
>
> +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
> + struct amdgpu_fpriv *fpriv)
> +{
> + return fpriv->vram_lost_counter != atomic_read(&adev-
> >vram_lost_counter);
> +}
> +
> /**
> * amdgpu_driver_open_kms - drm callback for open
> *
> @@ -833,6 +842,7 @@ int amdgpu_driver_open_kms(struct drm_device
> *dev, struct drm_file *file_priv)
>
> amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>
> + fpriv->vram_lost_counter = atomic_read(&adev-
> >vram_lost_counter);
> file_priv->driver_priv = fpriv;
>
> out_suspend:
> --
> 1.9.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list