[PATCH] drm/radeon: fence virtual address and free it once idle v3

Mon Aug 6 09:06:00 PDT 2012

On 04.08.2012 11:07, Christian König wrote:
> On 03.08.2012 23:26, j.glisse at gmail.com wrote:
>> From: Jerome Glisse <jglisse at redhat.com>
>>
>> Virtual address need to be fenced to know when we can safely remove it.
>> This patch also properly clear the pagetable. Previously it was
>> serouisly broken.
>>
>> Kernel 3.5/3.4 need a similar patch but adapted for difference in 
>> mutex locking.
>>
>> v2: For to update pagetable when unbinding bo (don't bailout if
>>      bo_va->valid is true).
>> v3: Add kernel 3.5/3.4 comment.
>>
>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
> It should fix the problem, going to test it as soon as I find some 
> 5min spare in my vacation. Till then it is:
>
> Reviewed-by: Christian König <christian.koenig at amd.com>
>
> In the future I would really like to make the updating of the PTE also 
> async, that should save us allot of problems here.

Had time today to test that a bit more. Found two minor notes in the 
patch, see below.

>
>
>> ---
>>   drivers/gpu/drm/radeon/radeon.h        |    1 +
>>   drivers/gpu/drm/radeon/radeon_cs.c     |   32 
>> +++++++++++++++++++++++++++++---
>>   drivers/gpu/drm/radeon/radeon_gart.c   |   24 ++++++++++++++++++++++--
>>   drivers/gpu/drm/radeon/radeon_gem.c    |   13 ++-----------
>>   drivers/gpu/drm/radeon/radeon_object.c |    6 +-----
>>   5 files changed, 55 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h 
>> b/drivers/gpu/drm/radeon/radeon.h
>> index 5431af2..8d75c65 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -300,6 +300,7 @@ struct radeon_bo_va {
>>       uint64_t            soffset;
>>       uint64_t            eoffset;
>>       uint32_t            flags;
>> +    struct radeon_fence        *fence;
>>       bool                valid;
>>   };
>>   diff --git a/drivers/gpu/drm/radeon/radeon_cs.c 
>> b/drivers/gpu/drm/radeon/radeon_cs.c
>> index 8a4c49e..995f3ab 100644
>> --- a/drivers/gpu/drm/radeon/radeon_cs.c
>> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
>> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct 
>> radeon_cs_parser *p, void *data)
>>       return 0;
>>   }
>>   +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
>> +                  struct radeon_fence *fence)
>> +{
>> +    struct radeon_fpriv *fpriv = parser->filp->driver_priv;
>> +    struct radeon_vm *vm = &fpriv->vm;
>> +    struct radeon_bo_list *lobj;

>> +    int r;
"r" is unused here, please remove.

>> +
>> +    if (parser->chunk_ib_idx == -1)
>> +        return;
>> +    if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>> +        return;
>> +
>> +    list_for_each_entry(lobj, &parser->validated, tv.head) {
>> +        struct radeon_bo_va *bo_va;
>> +        struct radeon_bo *rbo = lobj->bo;
>> +
>> +        bo_va = radeon_bo_va(rbo, vm);
>> +        radeon_fence_unref(&bo_va->fence);
>> +        bo_va->fence = radeon_fence_ref(fence);
>> +    }

>> +    return 0;
The function doesn't return an error value, so this should just be a 
"return" without value.

>> +}
>> +
>>   /**
>>    * cs_parser_fini() - clean parser states
>>    * @parser:    parser structure holding parsing context.
>> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct 
>> radeon_cs_parser *parser, int error)
>>   {
>>       unsigned i;
>>   -    if (!error)
>> +    if (!error) {
>> +        /* fence all bo va before ttm_eu_fence_buffer_objects so bo 
>> are still reserved */
>> +        radeon_bo_vm_fence_va(parser, parser->ib.fence);
>>           ttm_eu_fence_buffer_objects(&parser->validated,
>>                           parser->ib.fence);
>> -    else
>> +    } else {
>>           ttm_eu_backoff_reservation(&parser->validated);
>> +    }
>>         if (parser->relocs != NULL) {
>>           for (i = 0; i < parser->nrelocs; i++) {
>> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct 
>> radeon_device *rdev,
>>         if (parser->chunk_ib_idx == -1)
>>           return 0;
>> -
>>       if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>           return 0;
>>   diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
>> b/drivers/gpu/drm/radeon/radeon_gart.c
>> index b372005..9912182 100644
>> --- a/drivers/gpu/drm/radeon/radeon_gart.c
>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
>> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device 
>> *rdev,
>>           return -EINVAL;
>>       }
>>   -    if (bo_va->valid)
>> +    if (bo_va->valid && mem)
>>           return 0;
>>         ngpu_pages = radeon_bo_ngpu_pages(bo);
>> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
>>                struct radeon_bo *bo)
>>   {
>>       struct radeon_bo_va *bo_va;
>> +    int r;
>>         bo_va = radeon_bo_va(bo, vm);
>>       if (bo_va == NULL)
>>           return 0;
>>   +    /* wait for va use to end */
>> +    while (bo_va->fence) {
>> +        r = radeon_fence_wait(bo_va->fence, false);
>> +        if (r) {
>> +            DRM_ERROR("error while waiting for fence: %d\n", r);
>> +        }
>> +        if (r == -EDEADLK) {
>> +            r = radeon_gpu_reset(rdev);
>> +            if (!r)
>> +                continue;
>> +        }
>> +        break;
>> +    }
>> +    radeon_fence_unref(&bo_va->fence);
>> +
>>       mutex_lock(&rdev->vm_manager.lock);
>>       mutex_lock(&vm->mutex);
>>       radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
>> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev, 
>> struct radeon_vm *vm)
>>       radeon_vm_unbind_locked(rdev, vm);
>>       mutex_unlock(&rdev->vm_manager.lock);
>>   -    /* remove all bo */
>> +    /* remove all bo at this point non are busy any more because unbind
>> +     * waited for the last vm fence to signal
>> +     */
>>       r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>>       if (!r) {
>>           bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
>>           list_del_init(&bo_va->bo_list);
>>           list_del_init(&bo_va->vm_list);
>> +        radeon_fence_unref(&bo_va->fence);
>>           radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
>>           kfree(bo_va);
>>       }
>> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, 
>> struct radeon_vm *vm)
>>           r = radeon_bo_reserve(bo_va->bo, false);
>>           if (!r) {
>>               list_del_init(&bo_va->bo_list);
>> +            radeon_fence_unref(&bo_va->fence);
>>               radeon_bo_unreserve(bo_va->bo);
>>               kfree(bo_va);
>>           }
>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c 
>> b/drivers/gpu/drm/radeon/radeon_gem.c
>> index 84d0452..1b57b00 100644
>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct 
>> drm_gem_object *obj,
>>       struct radeon_device *rdev = rbo->rdev;
>>       struct radeon_fpriv *fpriv = file_priv->driver_priv;
>>       struct radeon_vm *vm = &fpriv->vm;
>> -    struct radeon_bo_va *bo_va, *tmp;
>>         if (rdev->family < CHIP_CAYMAN) {
>>           return;
>>       }
>>         if (radeon_bo_reserve(rbo, false)) {
>> +        dev_err(rdev->dev, "leaking bo va because we fail to reserve 
>> bo\n");
>>           return;
>>       }
>> -    list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
>> -        if (bo_va->vm == vm) {
>> -            /* remove from this vm address space */
>> -            mutex_lock(&vm->mutex);
>> -            list_del(&bo_va->vm_list);
>> -            mutex_unlock(&vm->mutex);
>> -            list_del(&bo_va->bo_list);
>> -            kfree(bo_va);
>> -        }
>> -    }
>> +    radeon_vm_bo_rmv(rdev, vm, rbo);
>>       radeon_bo_unreserve(rbo);
>>   }
>>   diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
>> b/drivers/gpu/drm/radeon/radeon_object.c
>> index 1f1a4c8..1cb014b 100644
>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
>>         list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
>>           /* remove from all vm address space */
>> -        mutex_lock(&bo_va->vm->mutex);
>> -        list_del(&bo_va->vm_list);
>> -        mutex_unlock(&bo_va->vm->mutex);
>> -        list_del(&bo_va->bo_list);
>> -        kfree(bo_va);
>> +        radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
>>       }
>>   }
>

Additional to that patch we still need a minor fix to mesa (just move 
freeing the VM range after closing the handle). Going to send that in 
the next minute to the mesa-dev list.

Otherwise the patch indeed fixes the problem, but also isn't the best 
idea for performance.

Cheers,
Christian.