[PATCH 07/12] drm/amdgpu/sriov:implement strict gpu reset
Liu, Monk
Monk.Liu at amd.com
Mon Oct 9 08:30:10 UTC 2017
why we shouldn't return error code ? any particular reason ?
if we wake up a thread waiting on some fence, and return 0 the UMD won't aware that a gpu hang occurred just now ...
for this strict mode reset the policy is aligned with vulkan spec:
it defines that the vk runtime should return VK_DEVICELOST error to app on those waiting functions, and kernel should let VK UMD aware of it so kernel must return error to UMD in the cs_wait IOCTL
UMD cannot invoke amdgpu_query_ctx from no reason, it should be notified by some error fence like cs_wait
BR Monk
-----Original Message-----
From: Christian König [mailto:ckoenig.leichtzumerken at gmail.com]
Sent: 2017年10月9日 16:20
To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 07/12] drm/amdgpu/sriov:implement strict gpu reset
Am 30.09.2017 um 08:03 schrieb Monk Liu:
> changes:
> 1)implement strict mode sriov gpu reset 2)always call
> sriov_gpu_reset_strict if hypervisor notify FLR 3)in strict reset
> mode, set error to all fences.
> 4)change fence_wait/cs_wait functions to return -ENODEV if fence
> signaled with error == -ETIME,
>
> Since after strict gpu reset we consider the VRAM were lost, and since
> assuming VRAM lost there is little help to recover shadow BO because
> all textures/resources/shaders cannot recovered (if they resident in
> VRAM)
NAK, we shouldn't return an error code on the wait function and instead handle the fence error code in amdgpu_ctx_query().
Regards,
Christian.
>
> Change-Id: I50d9b8b5185ba92f137f07c9deeac19d740d753b
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 25 ++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 90 +++++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 +
> drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 4 +-
> drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 60 ++++++++++++++++++
> drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 +
> 10 files changed, 188 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index de11527..de9c164 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -123,6 +123,7 @@ extern int amdgpu_cntl_sb_buf_per_se;
> extern int amdgpu_param_buf_per_se;
> extern int amdgpu_job_hang_limit;
> extern int amdgpu_lbpw;
> +extern int amdgpu_sriov_reset_level;
>
> #ifdef CONFIG_DRM_AMDGPU_SI
> extern int amdgpu_si_support;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c6a214f..9467cf6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1262,6 +1262,7 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
> struct amdgpu_ctx *ctx;
> struct dma_fence *fence;
> long r;
> + int fence_err = 0;
>
> if (amdgpu_kms_vram_lost(adev, fpriv))
> return -ENODEV;
> @@ -1283,6 +1284,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
> r = PTR_ERR(fence);
> else if (fence) {
> r = dma_fence_wait_timeout(fence, true, timeout);
> + /* gpu hang and this fence is signaled by gpu reset if fence_err < 0 */
> + fence_err = dma_fence_get_status(fence);
> dma_fence_put(fence);
> } else
> r = 1;
> @@ -1292,7 +1295,10 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
> return r;
>
> memset(wait, 0, sizeof(*wait));
> - wait->out.status = (r == 0);
> + wait->out.status = (fence_err < 0);
> +
> + if (fence_err < 0)
> + return -ENODEV;
>
> return 0;
> }
> @@ -1346,6 +1352,7 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
> uint32_t fence_count = wait->in.fence_count;
> unsigned int i;
> long r = 1;
> + int fence_err = 0;
>
> for (i = 0; i < fence_count; i++) {
> struct dma_fence *fence;
> @@ -1358,16 +1365,20 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
> continue;
>
> r = dma_fence_wait_timeout(fence, true, timeout);
> + fence_err = dma_fence_get_status(fence);
> dma_fence_put(fence);
> if (r < 0)
> return r;
>
> - if (r == 0)
> + if (r == 0 || fence_err < 0)
> break;
> }
>
> memset(wait, 0, sizeof(*wait));
> - wait->out.status = (r > 0);
> + wait->out.status = (r > 0 && fence_err == 0);
> +
> + if (fence_err < 0)
> + return -ENODEV;
>
> return 0;
> }
> @@ -1391,6 +1402,7 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
> struct dma_fence **array;
> unsigned int i;
> long r;
> + int fence_err = 0;
>
> /* Prepare the fence array */
> array = kcalloc(fence_count, sizeof(struct dma_fence *),
> GFP_KERNEL); @@ -1418,10 +1430,12 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
> &first);
> if (r < 0)
> goto err_free_fence_array;
> + else
> + fence_err = dma_fence_get_status(array[first]);
>
> out:
> memset(wait, 0, sizeof(*wait));
> - wait->out.status = (r > 0);
> + wait->out.status = (r > 0 && fence_err == 0);
> wait->out.first_signaled = first;
> /* set return value 0 to indicate success */
> r = 0;
> @@ -1431,6 +1445,9 @@ static int amdgpu_cs_wait_any_fence(struct amdgpu_device *adev,
> dma_fence_put(array[i]);
> kfree(array);
>
> + if (fence_err < 0)
> + return -ENODEV;
> +
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 9efbb33..122e2e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2734,6 +2734,96 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
> }
>
> /**
> + * amdgpu_sriov_gpu_reset_strict - reset the asic under strict mode
> + *
> + * @adev: amdgpu device pointer
> + * @job: which job trigger hang
> + *
> + * Attempt the reset the GPU if it has hung (all asics).
> + * for SRIOV case.
> + * Returns 0 for success or an error on failure.
> + *
> + * this function will deny all process/fence created before this
> +reset,
> + * and drop all jobs unfinished during this reset.
> + *
> + * Application should take the responsibility to re-open the FD to
> +re-create
> + * the VM page table and recover all resources as well
> + *
> + **/
> +int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct
> +amdgpu_job *job) {
> + int i, r = 0;
> + int resched;
> + struct amdgpu_ring *ring;
> +
> + /* other thread is already into the gpu reset so just quit and come later */
> + if (!atomic_add_unless(&adev->in_sriov_reset, 1, 1))
> + return -EAGAIN;
> +
> + atomic_inc(&adev->gpu_reset_counter);
> +
> + /* block TTM */
> + resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
> +
> + /* fake signal jobs already scheduled */
> + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> + ring = adev->rings[i];
> +
> + if (!ring || !ring->sched.thread)
> + continue;
> +
> + kthread_park(ring->sched.thread);
> + amd_sched_set_sched_hang(&ring->sched);
> + amdgpu_fence_driver_force_completion_ring(ring);
> + amd_sched_set_queue_hang(&ring->sched);
> + }
> +
> + /* request to take full control of GPU before re-initialization */
> + if (job)
> + amdgpu_virt_reset_gpu(adev);
> + else
> + amdgpu_virt_request_full_gpu(adev, true);
> +
> + /* Resume IP prior to SMC */
> + amdgpu_sriov_reinit_early(adev);
> +
> + /* we need recover gart prior to run SMC/CP/SDMA resume */
> + amdgpu_ttm_recover_gart(adev);
> +
> + /* now we are okay to resume SMC/CP/SDMA */
> + amdgpu_sriov_reinit_late(adev);
> +
> + /* resume IRQ status */
> + amdgpu_irq_gpu_reset_resume_helper(adev);
> +
> + if (amdgpu_ib_ring_tests(adev))
> + dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
> +
> + /* release full control of GPU after ib test */
> + amdgpu_virt_release_full_gpu(adev, true);
> +
> + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> + ring = adev->rings[i];
> +
> + if (!ring || !ring->sched.thread)
> + continue;
> +
> + kthread_unpark(ring->sched.thread);
> + }
> +
> + drm_helper_resume_force_mode(adev->ddev);
> +
> + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
> + if (r)
> + dev_info(adev->dev, "Strict mode GPU reset failed\n");
> + else
> + dev_info(adev->dev, "Strict mode GPU reset successed!\n");
> +
> + atomic_set(&adev->in_sriov_reset, 0);
> + return 0;
> +}
> +
> +/**
> * amdgpu_sriov_gpu_reset - reset the asic
> *
> * @adev: amdgpu device pointer
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 8f5211c..eee67dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -123,6 +123,7 @@ int amdgpu_cntl_sb_buf_per_se = 0;
> int amdgpu_param_buf_per_se = 0;
> int amdgpu_job_hang_limit = 0;
> int amdgpu_lbpw = -1;
> +int amdgpu_sriov_reset_level = 0;
>
> MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
> module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@
> -269,6 +270,9 @@ module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);
> MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
> module_param_named(lbpw, amdgpu_lbpw, int, 0444);
>
> +MODULE_PARM_DESC(sriov_reset_level, "what level will gpu reset on, 0:
> +loose, 1:strict, other:disable (default 0))");
> +module_param_named(sriov_reset_level, amdgpu_sriov_reset_level, int
> +,0444);
> +
> #ifdef CONFIG_DRM_AMDGPU_SI
>
> #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 0db81a4..933823a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -41,7 +41,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
> int r;
>
> try_again:
> - r = amdgpu_sriov_gpu_reset(job->adev, job);
> + if (amdgpu_sriov_reset_level == 1)
> + r = amdgpu_sriov_gpu_reset_strict(job->adev, job);
> + else
> + r = amdgpu_sriov_gpu_reset(job->adev, job);
> +
> if (r == -EAGAIN) {
> /* maye two different schedulers all have hang job, try later */
> schedule();
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index a3cbd5a..5664a10 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -100,5 +100,6 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
> int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
> int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
> void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
> +int amdgpu_sriov_gpu_reset_strict(struct amdgpu_device *adev, struct
> +amdgpu_job *job);
>
> #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index 2812d88..00a9629 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -247,8 +247,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
> return;
> }
>
> - /* Trigger recovery due to world switch failure */
> - amdgpu_sriov_gpu_reset(adev, NULL);
> + /* use strict mode if FLR triggered from hypervisor */
> + amdgpu_sriov_gpu_reset_strict(adev, NULL);
> }
>
> static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> index c25a831..c94b6e9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> @@ -513,8 +513,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
> return;
> }
>
> - /* Trigger recovery due to world switch failure */
> - amdgpu_sriov_gpu_reset(adev, NULL);
> + /* use strict mode if FLR triggered from hypervisor */
> + amdgpu_sriov_gpu_reset_strict(adev, NULL);
> }
>
> static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index 97c94f9..12c3092 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -430,6 +430,66 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
> spin_unlock(&sched->job_list_lock);
> }
>
> +/**
> + * amd_sched_set_sched_hang
> + * @sched: the scheduler need to set all pending jobs hang
> + *
> + * this routine set all unfinished jobs pending in the sched to
> + * an error -ETIME statues
> + *
> + **/
> +void amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched) {
> + struct amd_sched_job *s_job;
> +
> + spin_lock(&sched->job_list_lock);
> + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node)
> + dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
> +
> + spin_unlock(&sched->job_list_lock);
> +}
> +
> +/**
> + * amd_sched_set_queue_hang
> + * @sched: the scheduler need to set all job in kfifo hang
> + *
> + * this routine set all jobs in the KFIFO of @sched to an error
> + * -ETIME status and signal those jobs.
> + *
> + **/
> +
> +void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched) {
> + struct amd_sched_entity *entity, *tmp;
> + struct amd_sched_job *s_job;
> + struct amd_sched_rq *rq;
> + int i;
> +
> + /* set HANG status on all jobs queued and fake signal them */
> + for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++) {
> + rq = &sched->sched_rq[i];
> +
> + spin_lock(&rq->lock);
> + list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
> + if (entity->dependency) {
> + dma_fence_remove_callback(entity->dependency, &entity->cb);
> + dma_fence_put(entity->dependency);
> + entity->dependency = NULL;
> + }
> +
> + spin_lock(&entity->queue_lock);
> + while(kfifo_out(&entity->job_queue, &s_job, sizeof(s_job)) == sizeof(s_job)) {
> + dma_fence_set_error(&s_job->s_fence->finished, -ETIME);
> + amd_sched_fence_scheduled(s_job->s_fence);
> + amd_sched_fence_finished(s_job->s_fence);
> + }
> + spin_unlock(&entity->queue_lock);
> + }
> + spin_unlock(&rq->lock);
> + }
> + wake_up(&sched->job_scheduled);
> +}
> +
> void amd_sched_job_kickout(struct amd_sched_job *s_job)
> {
> struct amd_gpu_scheduler *sched = s_job->sched; diff --git
> a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> index f9d8f28..f0242aa 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> @@ -167,4 +167,6 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
> bool amd_sched_dependency_optimized(struct dma_fence* fence,
> struct amd_sched_entity *entity);
> void amd_sched_job_kickout(struct amd_sched_job *s_job);
> +void amd_sched_set_queue_hang(struct amd_gpu_scheduler *sched); void
> +amd_sched_set_sched_hang(struct amd_gpu_scheduler *sched);
> #endif
More information about the amd-gfx
mailing list