[PATCH] drm/radeon: fence virtual address and free it once idle v3
Jerome Glisse
j.glisse at gmail.com
Mon Aug 6 09:30:41 PDT 2012
On Mon, Aug 6, 2012 at 12:06 PM, Christian König
<deathsimple at vodafone.de> wrote:
> On 04.08.2012 11:07, Christian König wrote:
>>
>> On 03.08.2012 23:26, j.glisse at gmail.com wrote:
>>>
>>> From: Jerome Glisse <jglisse at redhat.com>
>>>
>>> Virtual address need to be fenced to know when we can safely remove it.
>>> This patch also properly clear the pagetable. Previously it was
>>> serouisly broken.
>>>
>>> Kernel 3.5/3.4 need a similar patch but adapted for difference in mutex
>>> locking.
>>>
>>> v2: For to update pagetable when unbinding bo (don't bailout if
>>> bo_va->valid is true).
>>> v3: Add kernel 3.5/3.4 comment.
>>>
>>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>>
>> It should fix the problem, going to test it as soon as I find some 5min
>> spare in my vacation. Till then it is:
>>
>> Reviewed-by: Christian König <christian.koenig at amd.com>
>>
>> In the future I would really like to make the updating of the PTE also
>> async, that should save us allot of problems here.
>
>
> Had time today to test that a bit more. Found two minor notes in the patch,
> see below.
>
>
>>
>>
>>> ---
>>> drivers/gpu/drm/radeon/radeon.h | 1 +
>>> drivers/gpu/drm/radeon/radeon_cs.c | 32
>>> +++++++++++++++++++++++++++++---
>>> drivers/gpu/drm/radeon/radeon_gart.c | 24 ++++++++++++++++++++++--
>>> drivers/gpu/drm/radeon/radeon_gem.c | 13 ++-----------
>>> drivers/gpu/drm/radeon/radeon_object.c | 6 +-----
>>> 5 files changed, 55 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>> b/drivers/gpu/drm/radeon/radeon.h
>>> index 5431af2..8d75c65 100644
>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>> @@ -300,6 +300,7 @@ struct radeon_bo_va {
>>> uint64_t soffset;
>>> uint64_t eoffset;
>>> uint32_t flags;
>>> + struct radeon_fence *fence;
>>> bool valid;
>>> };
>>> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c
>>> b/drivers/gpu/drm/radeon/radeon_cs.c
>>> index 8a4c49e..995f3ab 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_cs.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
>>> @@ -278,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser
>>> *p, void *data)
>>> return 0;
>>> }
>>> +static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
>>> + struct radeon_fence *fence)
>>> +{
>>> + struct radeon_fpriv *fpriv = parser->filp->driver_priv;
>>> + struct radeon_vm *vm = &fpriv->vm;
>>> + struct radeon_bo_list *lobj;
>
>
>>> + int r;
>
> "r" is unused here, please remove.
>
>
>>> +
>>> + if (parser->chunk_ib_idx == -1)
>>> + return;
>>> + if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>> + return;
>>> +
>>> + list_for_each_entry(lobj, &parser->validated, tv.head) {
>>> + struct radeon_bo_va *bo_va;
>>> + struct radeon_bo *rbo = lobj->bo;
>>> +
>>> + bo_va = radeon_bo_va(rbo, vm);
>>> + radeon_fence_unref(&bo_va->fence);
>>> + bo_va->fence = radeon_fence_ref(fence);
>>> + }
>
>
>>> + return 0;
>
> The function doesn't return an error value, so this should just be a
> "return" without value.
>
>
>>> +}
>>> +
>>> /**
>>> * cs_parser_fini() - clean parser states
>>> * @parser: parser structure holding parsing context.
>>> @@ -290,11 +314,14 @@ static void radeon_cs_parser_fini(struct
>>> radeon_cs_parser *parser, int error)
>>> {
>>> unsigned i;
>>> - if (!error)
>>> + if (!error) {
>>> + /* fence all bo va before ttm_eu_fence_buffer_objects so bo are
>>> still reserved */
>>> + radeon_bo_vm_fence_va(parser, parser->ib.fence);
>>> ttm_eu_fence_buffer_objects(&parser->validated,
>>> parser->ib.fence);
>>> - else
>>> + } else {
>>> ttm_eu_backoff_reservation(&parser->validated);
>>> + }
>>> if (parser->relocs != NULL) {
>>> for (i = 0; i < parser->nrelocs; i++) {
>>> @@ -388,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device
>>> *rdev,
>>> if (parser->chunk_ib_idx == -1)
>>> return 0;
>>> -
>>> if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
>>> return 0;
>>> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c
>>> b/drivers/gpu/drm/radeon/radeon_gart.c
>>> index b372005..9912182 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gart.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
>>> @@ -814,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device
>>> *rdev,
>>> return -EINVAL;
>>> }
>>> - if (bo_va->valid)
>>> + if (bo_va->valid && mem)
>>> return 0;
>>> ngpu_pages = radeon_bo_ngpu_pages(bo);
>>> @@ -859,11 +859,27 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
>>> struct radeon_bo *bo)
>>> {
>>> struct radeon_bo_va *bo_va;
>>> + int r;
>>> bo_va = radeon_bo_va(bo, vm);
>>> if (bo_va == NULL)
>>> return 0;
>>> + /* wait for va use to end */
>>> + while (bo_va->fence) {
>>> + r = radeon_fence_wait(bo_va->fence, false);
>>> + if (r) {
>>> + DRM_ERROR("error while waiting for fence: %d\n", r);
>>> + }
>>> + if (r == -EDEADLK) {
>>> + r = radeon_gpu_reset(rdev);
>>> + if (!r)
>>> + continue;
>>> + }
>>> + break;
>>> + }
>>> + radeon_fence_unref(&bo_va->fence);
>>> +
>>> mutex_lock(&rdev->vm_manager.lock);
>>> mutex_lock(&vm->mutex);
>>> radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
>>> @@ -952,12 +968,15 @@ void radeon_vm_fini(struct radeon_device *rdev,
>>> struct radeon_vm *vm)
>>> radeon_vm_unbind_locked(rdev, vm);
>>> mutex_unlock(&rdev->vm_manager.lock);
>>> - /* remove all bo */
>>> + /* remove all bo at this point non are busy any more because unbind
>>> + * waited for the last vm fence to signal
>>> + */
>>> r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
>>> if (!r) {
>>> bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
>>> list_del_init(&bo_va->bo_list);
>>> list_del_init(&bo_va->vm_list);
>>> + radeon_fence_unref(&bo_va->fence);
>>> radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
>>> kfree(bo_va);
>>> }
>>> @@ -969,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev,
>>> struct radeon_vm *vm)
>>> r = radeon_bo_reserve(bo_va->bo, false);
>>> if (!r) {
>>> list_del_init(&bo_va->bo_list);
>>> + radeon_fence_unref(&bo_va->fence);
>>> radeon_bo_unreserve(bo_va->bo);
>>> kfree(bo_va);
>>> }
>>> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c
>>> b/drivers/gpu/drm/radeon/radeon_gem.c
>>> index 84d0452..1b57b00 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_gem.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
>>> @@ -134,25 +134,16 @@ void radeon_gem_object_close(struct drm_gem_object
>>> *obj,
>>> struct radeon_device *rdev = rbo->rdev;
>>> struct radeon_fpriv *fpriv = file_priv->driver_priv;
>>> struct radeon_vm *vm = &fpriv->vm;
>>> - struct radeon_bo_va *bo_va, *tmp;
>>> if (rdev->family < CHIP_CAYMAN) {
>>> return;
>>> }
>>> if (radeon_bo_reserve(rbo, false)) {
>>> + dev_err(rdev->dev, "leaking bo va because we fail to reserve
>>> bo\n");
>>> return;
>>> }
>>> - list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
>>> - if (bo_va->vm == vm) {
>>> - /* remove from this vm address space */
>>> - mutex_lock(&vm->mutex);
>>> - list_del(&bo_va->vm_list);
>>> - mutex_unlock(&vm->mutex);
>>> - list_del(&bo_va->bo_list);
>>> - kfree(bo_va);
>>> - }
>>> - }
>>> + radeon_vm_bo_rmv(rdev, vm, rbo);
>>> radeon_bo_unreserve(rbo);
>>> }
>>> diff --git a/drivers/gpu/drm/radeon/radeon_object.c
>>> b/drivers/gpu/drm/radeon/radeon_object.c
>>> index 1f1a4c8..1cb014b 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_object.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_object.c
>>> @@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
>>> list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
>>> /* remove from all vm address space */
>>> - mutex_lock(&bo_va->vm->mutex);
>>> - list_del(&bo_va->vm_list);
>>> - mutex_unlock(&bo_va->vm->mutex);
>>> - list_del(&bo_va->bo_list);
>>> - kfree(bo_va);
>>> + radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
>>> }
>>> }
>>
>>
>
> Additional to that patch we still need a minor fix to mesa (just move
> freeing the VM range after closing the handle). Going to send that in the
> next minute to the mesa-dev list.
>
> Otherwise the patch indeed fixes the problem, but also isn't the best idea
> for performance.
>
> Cheers,
> Christian.
>
This does not impact performance, it's all about destroying bo/va so
it's not a performance path. Or am i missing something here ?
Cheers,
Jerome
More information about the dri-devel
mailing list