[PATCH 2/4] drm/amdgpu: return -ENODEV to user space when vram is lost v2
zhoucm1
david1.zhou at amd.com
Wed May 24 02:20:46 UTC 2017
On 2017年05月23日 23:16, Christian König wrote:
> Am 23.05.2017 um 17:08 schrieb Deucher, Alexander:
>>> -----Original Message-----
>>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
>>> Of Chunming Zhou
>>> Sent: Tuesday, May 16, 2017 5:26 AM
>>> To: amd-gfx at lists.freedesktop.org
>>> Cc: Zhou, David(ChunMing)
>>> Subject: [PATCH 2/4] drm/amdgpu: return -ENODEV to user space when
>>> vram is lost v2
>>>
>>> below ioctl will return -ENODEV:
>>> amdgpu_cs_ioctl
>>> amdgpu_cs_wait_ioctl
>>> amdgpu_cs_wait_fences_ioctl
>>> amdgpu_gem_va_ioctl
>>> amdgpu_info_ioctl
>> Do we want to block the info ioctl? Isn't that where the lost
>> context query is?
>
> No, that's amdgpu_ctx_ioctl.
>
> But I think the conclusion is that we want to move the vram_lost
> counter to be per CTX and not per device.
Yes, Monk is working on it for virt case, after it, I think we can reuse it.
Regards,
David zhou
>
> Christian.
>
>>
>> Alex
>>
>>> v2: only for map and replace cases in amdgpu_gem_va_ioctl
>>>
>>> Change-Id: I8970cde3301b7cfeb4263cc0f0e54aece215c98e
>>> Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 +++++++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 +++++
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ++++++++++
>>> 5 files changed, 31 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index f9da215..dcd6203 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -855,6 +855,7 @@ struct amdgpu_fpriv {
>>> struct amdgpu_ctx_mgr ctx_mgr;
>>> spinlock_t sem_handles_lock;
>>> struct idr sem_handles;
>>> + u32 vram_lost_counter;
>>> };
>>>
>>> /*
>>> @@ -1607,6 +1608,7 @@ struct amdgpu_device {
>>> atomic64_t num_bytes_moved;
>>> atomic64_t num_evictions;
>>> atomic_t gpu_reset_counter;
>>> + atomic_t vram_lost_counter;
>>>
>>> /* data for buffer migration throttling */
>>> struct {
>>> @@ -2005,6 +2007,8 @@ static inline void
>>> amdgpu_unregister_atpx_handler(void) {}
>>> extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
>>> extern const int amdgpu_max_kms_ioctl;
>>>
>>> +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
>>> + struct amdgpu_fpriv *fpriv);
>>> int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long
>>> flags);
>>> int amdgpu_driver_unload_kms(struct drm_device *dev);
>>> void amdgpu_driver_lastclose_kms(struct drm_device *dev);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index b803412..911aa02 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -1097,6 +1097,7 @@ static int amdgpu_cs_submit(struct
>>> amdgpu_cs_parser *p,
>>> int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct
>>> drm_file
>>> *filp)
>>> {
>>> struct amdgpu_device *adev = dev->dev_private;
>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> union drm_amdgpu_cs *cs = data;
>>> struct amdgpu_cs_parser parser = {};
>>> bool reserved_buffers = false;
>>> @@ -1104,6 +1105,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void
>>> *data, struct drm_file *filp)
>>>
>>> if (!adev->accel_working)
>>> return -EBUSY;
>>> + if (amdgpu_kms_vram_lost(adev, fpriv))
>>> + return -ENODEV;
>>>
>>> parser.adev = adev;
>>> parser.filp = filp;
>>> @@ -1165,12 +1168,15 @@ int amdgpu_cs_wait_ioctl(struct drm_device
>>> *dev, void *data,
>>> {
>>> union drm_amdgpu_wait_cs *wait = data;
>>> struct amdgpu_device *adev = dev->dev_private;
>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
>>> struct amdgpu_ring *ring = NULL;
>>> struct amdgpu_ctx *ctx;
>>> struct fence *fence;
>>> long r;
>>>
>>> + if (amdgpu_kms_vram_lost(adev, fpriv))
>>> + return -ENODEV;
>>> r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait-
>>>> in.ip_instance,
>>> wait->in.ring, &ring);
>>> if (r)
>>> @@ -1344,12 +1350,15 @@ int amdgpu_cs_wait_fences_ioctl(struct
>>> drm_device *dev, void *data,
>>> struct drm_file *filp)
>>> {
>>> struct amdgpu_device *adev = dev->dev_private;
>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> union drm_amdgpu_wait_fences *wait = data;
>>> uint32_t fence_count = wait->in.fence_count;
>>> struct drm_amdgpu_fence *fences_user;
>>> struct drm_amdgpu_fence *fences;
>>> int r;
>>>
>>> + if (amdgpu_kms_vram_lost(adev, fpriv))
>>> + return -ENODEV;
>>> /* Get the fences from userspace */
>>> fences = kmalloc_array(fence_count, sizeof(struct
>>> drm_amdgpu_fence),
>>> GFP_KERNEL);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index c56ae4a..2f0fcf8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -2913,8 +2913,10 @@ int amdgpu_gpu_reset(struct amdgpu_device
>>> *adev)
>>> if (r)
>>> goto out;
>>> vram_lost = amdgpu_check_vram_lost(adev);
>>> - if (vram_lost)
>>> + if (vram_lost) {
>>> DRM_ERROR("VRAM is lost!\n");
>>> + atomic_inc(&adev->vram_lost_counter);
>>> + }
>>> r = amdgpu_ttm_recover_gart(adev);
>>> if (r)
>>> goto out;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> index d8275ef..83bc94c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>>> @@ -802,6 +802,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev,
>>> void *data,
>>> args->operation);
>>> return -EINVAL;
>>> }
>>> + if ((args->operation == AMDGPU_VA_OP_MAP) ||
>>> + (args->operation == AMDGPU_VA_OP_REPLACE)) {
>>> + if (amdgpu_kms_vram_lost(adev, fpriv))
>>> + return -ENODEV;
>>> + }
>>>
>>> INIT_LIST_HEAD(&list);
>>> if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> index 368829a..a231aa1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>> @@ -235,6 +235,7 @@ static int amdgpu_firmware_info(struct
>>> drm_amdgpu_info_firmware *fw_info,
>>> static int amdgpu_info_ioctl(struct drm_device *dev, void *data,
>>> struct
>>> drm_file *filp)
>>> {
>>> struct amdgpu_device *adev = dev->dev_private;
>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> struct drm_amdgpu_info *info = data;
>>> struct amdgpu_mode_info *minfo = &adev->mode_info;
>>> void __user *out = (void __user
>>> *)(uintptr_t)info->return_pointer;
>>> @@ -247,6 +248,8 @@ static int amdgpu_info_ioctl(struct drm_device
>>> *dev,
>>> void *data, struct drm_file
>>>
>>> if (!info->return_size || !info->return_pointer)
>>> return -EINVAL;
>>> + if (amdgpu_kms_vram_lost(adev, fpriv))
>>> + return -ENODEV;
>>>
>>> switch (info->query) {
>>> case AMDGPU_INFO_VIRTUAL_RANGE: {
>>> @@ -779,6 +782,12 @@ void amdgpu_driver_lastclose_kms(struct
>>> drm_device *dev)
>>> vga_switcheroo_process_delayed_switch();
>>> }
>>>
>>> +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
>>> + struct amdgpu_fpriv *fpriv)
>>> +{
>>> + return fpriv->vram_lost_counter != atomic_read(&adev-
>>>> vram_lost_counter);
>>> +}
>>> +
>>> /**
>>> * amdgpu_driver_open_kms - drm callback for open
>>> *
>>> @@ -833,6 +842,7 @@ int amdgpu_driver_open_kms(struct drm_device
>>> *dev, struct drm_file *file_priv)
>>>
>>> amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>>>
>>> + fpriv->vram_lost_counter = atomic_read(&adev-
>>>> vram_lost_counter);
>>> file_priv->driver_priv = fpriv;
>>>
>>> out_suspend:
>>> --
>>> 1.9.1
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
More information about the amd-gfx
mailing list