[PATCH 2/4] drm/amdgpu: return -ENODEV to user space when vram is lost v2

zhoucm1 david1.zhou at amd.com
Wed May 24 02:20:46 UTC 2017



On 2017年05月23日 23:16, Christian König wrote:
> Am 23.05.2017 um 17:08 schrieb Deucher, Alexander:
>>> -----Original Message-----
>>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
>>> Of Chunming Zhou
>>> Sent: Tuesday, May 16, 2017 5:26 AM
>>> To: amd-gfx at lists.freedesktop.org
>>> Cc: Zhou, David(ChunMing)
>>> Subject: [PATCH 2/4] drm/amdgpu: return -ENODEV to user space when
>>> vram is lost v2
>>>
>>> below ioctl will return -ENODEV:
>>> amdgpu_cs_ioctl
>>> amdgpu_cs_wait_ioctl
>>> amdgpu_cs_wait_fences_ioctl
>>> amdgpu_gem_va_ioctl
>>> amdgpu_info_ioctl
>> Do we want to block the info ioctl?  Isn't that where the lost 
>> context query is?
>
> No, that's amdgpu_ctx_ioctl.
>
> But I think the conclusion is that we want to move the vram_lost 
> counter to be per CTX and not per device.
Yes, Monk is working on it for virt case, after it, I think we can reuse it.

Regards,
David zhou
>
> Christian.
>
>>
>> Alex
>>
>>> v2: only for map and replace cases in amdgpu_gem_va_ioctl
>>>
>>> Change-Id: I8970cde3301b7cfeb4263cc0f0e54aece215c98e
>>> Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  4 ++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |  9 +++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  4 +++-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    |  5 +++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    | 10 ++++++++++
>>>   5 files changed, 31 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index f9da215..dcd6203 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -855,6 +855,7 @@ struct amdgpu_fpriv {
>>>       struct amdgpu_ctx_mgr    ctx_mgr;
>>>       spinlock_t        sem_handles_lock;
>>>       struct idr        sem_handles;
>>> +    u32            vram_lost_counter;
>>>   };
>>>
>>>   /*
>>> @@ -1607,6 +1608,7 @@ struct amdgpu_device {
>>>       atomic64_t            num_bytes_moved;
>>>       atomic64_t            num_evictions;
>>>       atomic_t            gpu_reset_counter;
>>> +    atomic_t            vram_lost_counter;
>>>
>>>       /* data for buffer migration throttling */
>>>       struct {
>>> @@ -2005,6 +2007,8 @@ static inline void
>>> amdgpu_unregister_atpx_handler(void) {}
>>>   extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
>>>   extern const int amdgpu_max_kms_ioctl;
>>>
>>> +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
>>> +              struct amdgpu_fpriv *fpriv);
>>>   int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long 
>>> flags);
>>>   int amdgpu_driver_unload_kms(struct drm_device *dev);
>>>   void amdgpu_driver_lastclose_kms(struct drm_device *dev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index b803412..911aa02 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -1097,6 +1097,7 @@ static int amdgpu_cs_submit(struct
>>> amdgpu_cs_parser *p,
>>>   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct 
>>> drm_file
>>> *filp)
>>>   {
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>       union drm_amdgpu_cs *cs = data;
>>>       struct amdgpu_cs_parser parser = {};
>>>       bool reserved_buffers = false;
>>> @@ -1104,6 +1105,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *filp)
>>>
>>>       if (!adev->accel_working)
>>>           return -EBUSY;
>>> +    if (amdgpu_kms_vram_lost(adev, fpriv))
>>> +        return -ENODEV;
>>>
>>>       parser.adev = adev;
>>>       parser.filp = filp;
>>> @@ -1165,12 +1168,15 @@ int amdgpu_cs_wait_ioctl(struct drm_device
>>> *dev, void *data,
>>>   {
>>>       union drm_amdgpu_wait_cs *wait = data;
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>       unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
>>>       struct amdgpu_ring *ring = NULL;
>>>       struct amdgpu_ctx *ctx;
>>>       struct fence *fence;
>>>       long r;
>>>
>>> +    if (amdgpu_kms_vram_lost(adev, fpriv))
>>> +        return -ENODEV;
>>>       r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait-
>>>> in.ip_instance,
>>>                      wait->in.ring, &ring);
>>>       if (r)
>>> @@ -1344,12 +1350,15 @@ int amdgpu_cs_wait_fences_ioctl(struct
>>> drm_device *dev, void *data,
>>>                   struct drm_file *filp)
>>>   {
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>       union drm_amdgpu_wait_fences *wait = data;
>>>       uint32_t fence_count = wait->in.fence_count;
>>>       struct drm_amdgpu_fence *fences_user;
>>>       struct drm_amdgpu_fence *fences;
>>>       int r;
>>>
>>> +    if (amdgpu_kms_vram_lost(adev, fpriv))
>>> +        return -ENODEV;
>>>       /* Get the fences from userspace */
>>>       fences = kmalloc_array(fence_count, sizeof(struct
>>> drm_amdgpu_fence),
>>>               GFP_KERNEL);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index c56ae4a..2f0fcf8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2913,8 +2913,10 @@ int amdgpu_gpu_reset(struct amdgpu_device
>>> *adev)
>>>               if (r)
>>>                   goto out;
>>>               vram_lost = amdgpu_check_vram_lost(adev);
>>> -            if (vram_lost)
>>> +            if (vram_lost) {
>>>                   DRM_ERROR("VRAM is lost!\n");
>>> +                atomic_inc(&adev->vram_lost_counter);
>>> +            }
>>>               r = amdgpu_ttm_recover_gart(adev);
>>>               if (r)
>>>                   goto out;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d8275ef..83bc94c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -802,6 +802,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>>               args->operation);
>>>           return -EINVAL;
>>>       }
>>> +    if ((args->operation == AMDGPU_VA_OP_MAP) ||
>>> +        (args->operation == AMDGPU_VA_OP_REPLACE)) {
>>> +        if (amdgpu_kms_vram_lost(adev, fpriv))
>>> +            return -ENODEV;
>>> +    }
>>>
>>>       INIT_LIST_HEAD(&list);
>>>       if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> index 368829a..a231aa1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> @@ -235,6 +235,7 @@ static int amdgpu_firmware_info(struct
>>> drm_amdgpu_info_firmware *fw_info,
>>>   static int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
>>> struct
>>> drm_file *filp)
>>>   {
>>>       struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>       struct drm_amdgpu_info *info = data;
>>>       struct amdgpu_mode_info *minfo = &adev->mode_info;
>>>       void __user *out = (void __user 
>>> *)(uintptr_t)info->return_pointer;
>>> @@ -247,6 +248,8 @@ static int amdgpu_info_ioctl(struct drm_device 
>>> *dev,
>>> void *data, struct drm_file
>>>
>>>       if (!info->return_size || !info->return_pointer)
>>>           return -EINVAL;
>>> +    if (amdgpu_kms_vram_lost(adev, fpriv))
>>> +        return -ENODEV;
>>>
>>>       switch (info->query) {
>>>       case AMDGPU_INFO_VIRTUAL_RANGE: {
>>> @@ -779,6 +782,12 @@ void amdgpu_driver_lastclose_kms(struct
>>> drm_device *dev)
>>>       vga_switcheroo_process_delayed_switch();
>>>   }
>>>
>>> +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
>>> +              struct amdgpu_fpriv *fpriv)
>>> +{
>>> +    return fpriv->vram_lost_counter != atomic_read(&adev-
>>>> vram_lost_counter);
>>> +}
>>> +
>>>   /**
>>>    * amdgpu_driver_open_kms - drm callback for open
>>>    *
>>> @@ -833,6 +842,7 @@ int amdgpu_driver_open_kms(struct drm_device
>>> *dev, struct drm_file *file_priv)
>>>
>>>       amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>>>
>>> +    fpriv->vram_lost_counter = atomic_read(&adev-
>>>> vram_lost_counter);
>>>       file_priv->driver_priv = fpriv;
>>>
>>>   out_suspend:
>>> -- 
>>> 1.9.1
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>



More information about the amd-gfx mailing list