[PATCH] drm/amdgpu: Verify root PD is mapped into kernel address space.

Christian König christian.koenig at amd.com
Thu Jul 5 07:17:54 UTC 2018


Am 05.07.2018 um 04:09 schrieb zhoucm1:
>
>
> On 2018年07月05日 03:49, Andrey Grodzovsky wrote:
>> Problem: When PD/PT update made by CPU root PD was not yet mapped 
>> causing
>> page fault.
>>
>> Fix: Move amdgpu_bo_kmap into amdgpu_vm_bo_base_init to cover
>> all cases and avoid code duplication with amdgpu_vm_alloc_levels.
>>
>> Link: https://bugs.freedesktop.org/show_bug.cgi?id=107065
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 47 
>> ++++++++++++++++++++++------------
>>   1 file changed, 31 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 845f73a..f546afa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -143,25 +143,36 @@ struct amdgpu_prt_cb {
>>    * Initialize a bo_va_base structure and add it to the appropriate 
>> lists
>>    *
>>    */
>> -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>> +static int amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>                      struct amdgpu_vm *vm,
>>                      struct amdgpu_bo *bo)
>>   {
>> +    int r = 0;
>>       base->vm = vm;
>>       base->bo = bo;
>>       INIT_LIST_HEAD(&base->bo_list);
>>       INIT_LIST_HEAD(&base->vm_status);
>>         if (!bo)
>> -        return;
>> +        return r;
>> +
>>       list_add_tail(&base->bo_list, &bo->va);
>>   +    if (vm->use_cpu_for_update && bo->tbo.type == 
>> ttm_bo_type_kernel) {
>> +        r = amdgpu_bo_kmap(bo, NULL);
>> +        if (r) {
>> +            amdgpu_bo_unref(&bo->shadow);
>> +            amdgpu_bo_unref(&bo);
> I feel these two lines should move out of helper function, ref/unref 
> should appear in where used with pair.

Yeah agree, but there is an approach which is even cleaner I think.

>
> Regards,
> David Zhou
>> +            return r;
>> +        }
>> +    }
>> +
>>       if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
>> -        return;
>> +        return r;
>>         if (bo->preferred_domains &
>>           amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
>> -        return;
>> +        return r;
>>         /*
>>        * we checked all the prerequisites, but it looks like this per 
>> vm bo
>> @@ -169,6 +180,8 @@ static void amdgpu_vm_bo_base_init(struct 
>> amdgpu_vm_bo_base *base,
>>        * is validated on next vm use to avoid fault.
>>        * */
>>       list_move_tail(&base->vm_status, &vm->evicted);
>> +
>> +    return r;

Here we move it into the evicted status when the placement isn't ok.

Now for PDs/PTs the caller moves it immediately into the relocated 
status to make sure that the parent entries are updated:
> amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>                         list_move(&entry->base.vm_status, &vm->relocated);

And amdgpu_vm_update_directories() maps all relocated BOs anyway before 
it begins:
>                 list_for_each_entry(bo_base, &vm->relocated, vm_status) {
>                         r = amdgpu_bo_kmap(bo_base->bo, NULL);
>                         if (unlikely(r))
>                                 return r;
>                 }

So all we need to do is to make sure that we add the root PD to the 
relocated list as well.

Cleanest approach would probably be to move the 
"list_move(&entry->base.vm_status, &vm->relocated);" into 
amdgpu_vm_bo_base_init().

Regards,
Christian.

>>   }
>>     /**
>> @@ -525,21 +538,15 @@ static int amdgpu_vm_alloc_levels(struct 
>> amdgpu_device *adev,
>>                   return r;
>>               }
>>   -            if (vm->use_cpu_for_update) {
>> -                r = amdgpu_bo_kmap(pt, NULL);
>> -                if (r) {
>> -                    amdgpu_bo_unref(&pt->shadow);
>> -                    amdgpu_bo_unref(&pt);
>> -                    return r;
>> -                }
>> -            }
>> -
>>               /* Keep a reference to the root directory to avoid
>>               * freeing them up in the wrong order.
>>               */
>>               pt->parent = amdgpu_bo_ref(parent->base.bo);
>>   -            amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +            r = amdgpu_vm_bo_base_init(&entry->base, vm, pt);
>> +            if (r)
>> +                return r;
>> +
>>               list_move(&entry->base.vm_status, &vm->relocated);
>>           }
>>   @@ -1992,12 +1999,17 @@ struct amdgpu_bo_va 
>> *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>>                         struct amdgpu_bo *bo)
>>   {
>>       struct amdgpu_bo_va *bo_va;
>> +    int r;
>>         bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL);
>>       if (bo_va == NULL) {
>>           return NULL;
>>       }
>> -    amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
>> +
>> +    if ((r = amdgpu_vm_bo_base_init(&bo_va->base, vm, bo))) {
>> +        WARN_ONCE(1,"r = %d\n", r);
>> +        return NULL;
>> +    }
>>         bo_va->ref_count = 1;
>>       INIT_LIST_HEAD(&bo_va->valids);
>> @@ -2613,7 +2625,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>       if (r)
>>           goto error_unreserve;
>>   -    amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
>> +    r = amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
>> +    if (r)
>> +            goto error_unreserve;
>> +
>>       amdgpu_bo_unreserve(vm->root.base.bo);
>>         if (pasid) {
>



More information about the amd-gfx mailing list