[PATCH 1/2] drm/amdgpu: Dump PDEs and PTEs on VM faults (v3)

Tue Jul 16 19:57:11 UTC 2019

On 2019-07-16 5:27 a.m., Christian König wrote:
> Am 13.07.19 um 08:42 schrieb Kuehling, Felix:
>> Walk page table for the faulting address and dump PDEs and PTEs at
>> all levels. Also flag discrepancies where a PDE points to a different
>> address than the next level PDB or PTB BO.
>>
>> v2:
>> * Fix address shift for GFXv8
>> * Redo PDB/PTB address checking to work on all generations
>>
>> v3:
>> * Simplified pde address and flag check
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  5 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 79 ++++++++++++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  7 ++-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   |  6 +-
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   |  5 +-
>>   6 files changed, 95 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index bbbf069efb77..78440748c87f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -1505,9 +1505,8 @@ static bool 
>> amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>>    * This is used to access VRAM that backs a buffer object via MMIO
>>    * access for debugging purposes.
>>    */
>> -static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>> -                    unsigned long offset,
>> -                    void *buf, int len, int write)
>> +int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned 
>> long offset,
>> +                 void *buf, int len, int write)
>>   {
>>       struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>>       struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index bccb8c49e597..cffbafffa9d7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -83,6 +83,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev);
>>   void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
>>                       bool enable);
>>   +int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, 
>> unsigned long offset,
>> +                 void *buf, int len, int write);
>>   int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>>                  uint64_t dst_offset, uint32_t byte_count,
>>                  struct reservation_object *resv,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 1951f2abbdbc..64ee46eaa041 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -544,6 +544,78 @@ static void amdgpu_vm_pt_next_dfs(struct 
>> amdgpu_device *adev,
>>            amdgpu_vm_pt_continue_dfs((start), (entry));            \
>>            (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), 
>> &(cursor)))
>>   +/**
>> + * amdgpu_vm_dump_pte - dump PTEs along a page table walk
>> + *
>> + * @adev: amdgpu device pointer
>> + * @vm: VM address space
>> + * @addr: virtual address
>> + *
>> + * Walks the page table of @vm at the given @addr and prints the PDEs
>> + * and PTEs along the way on a single line.
>> + */
>> +void amdgpu_vm_dump_pte(struct amdgpu_device *adev, struct amdgpu_vm 
>> *vm,
>> +            uint64_t addr)
>> +{
>> +    static const char *level_entry[4] = {"PDE2", "PDE1", "PDE0", 
>> "PTE"};
>> +    static const char *level_block[4] = {"PDB2", "PDB1", "PDB0", 
>> "PTB"};
>> +    struct amdgpu_vm_pt_cursor cursor;
>> +    uint64_t pde_addr, pde_flags, last_pde;
>> +    char buf[128];
>> +    int i = 0;
>> +
>> +    amdgpu_gmc_get_pde_for_bo(vm->root.base.bo, 
>> adev->vm_manager.root_level,
>> +                  &pde_addr, &pde_flags);
>> +    last_pde = pde_addr | pde_flags;
>> +
>> +    amdgpu_vm_pt_start(adev, vm, addr >> PAGE_SHIFT, &cursor);
>
> Walking the VM structure without a lock is dangerous, but we can only 
> take the lock in the fault worker on Vega10.

Not sure what you mean by fault worker. As far as I can tell, the VM 
structure is protected by the page table reservation lock, which I can't 
take in an interrupt handler. You're referring to a fault worker, which 
could take the lock, but I don't see that in staging yet.

>
>> +
>> +    do {
>> +        unsigned int mask, shift, idx;
>> +        struct amdgpu_bo *bo;
>> +        uint64_t pte;
>> +
>> +        mask = amdgpu_vm_entries_mask(adev, cursor.level);
>> +        shift = amdgpu_vm_level_shift(adev, cursor.level);
>> +        idx = (cursor.pfn >> shift) & mask;
>> +
>> +        bo = cursor.entry->base.bo;
>> +        if (bo) {
>> +            /* Flag discrepancy between previous level PDE
>> +             * and the actual address of this PTB or PDB.
>> +             */
>> +            amdgpu_gmc_get_pde_for_bo(bo, cursor.level,
>> +                          &pde_addr, &pde_flags);
>> +            if ((pde_addr | pde_flags) != last_pde)
>> +                i += snprintf(buf + i, sizeof(buf) - i, "!");
>> +
>> +            amdgpu_ttm_access_memory(&bo->tbo, idx * sizeof(pte),
>> +                         &pte, sizeof(pte), false);
>> +            i += snprintf(buf + i, sizeof(buf) - i,
>> +                      "%s[%d]=0x%llx ",
>> +                      level_entry[cursor.level], idx, pte);
>> +            last_pde = pte;
>> +        } else {
>> +            /* Flag discrepancy if previous level PDE had
>> +             * a valid entry but there is no PTB or PDB BO.
>> +             */
>> +            if ((last_pde & AMDGPU_PTE_VALID) &&
>> +                !(last_pde & AMDGPU_PDE_PTE))
>> +                i += snprintf(buf + i, sizeof(buf) - i, "!");
>> +            i += snprintf(buf + i, sizeof(buf) - i,
>> +                      "no %s ", level_block[cursor.level]);
>> +            last_pde = 0;
>> +        }
>> +
>> +        ++cursor.level;
>> +        cursor.parent = cursor.entry;
>> +        if (!cursor.entry->entries)
>> +            break;
>> +        cursor.entry = &cursor.entry->entries[idx];
>> +    } while (cursor.entry);
>> +    dev_err(adev->dev, "%s", buf);
>> +}
>> +
>>   /**
>>    * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
>>    *
>> @@ -3081,8 +3153,9 @@ int amdgpu_vm_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file *filp)
>>    * @pasid: PASID identifier for VM
>>    * @task_info: task_info to fill.
>>    */
>> -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned 
>> int pasid,
>> -             struct amdgpu_task_info *task_info)
>> +struct amdgpu_vm *amdgpu_vm_get_task_info(struct amdgpu_device *adev,
>> +                      unsigned int pasid,
>> +                      struct amdgpu_task_info *task_info)
>>   {
>>       struct amdgpu_vm *vm;
>>       unsigned long flags;
>> @@ -3094,6 +3167,8 @@ void amdgpu_vm_get_task_info(struct 
>> amdgpu_device *adev, unsigned int pasid,
>>           *task_info = vm->task_info;
>> spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> +
>> +    return vm;
>
> This is dangerous as well when we are in the interrupt handler.
>
> As soon as the spinlock is dropped the VM structure can be freed by 
> another thread.

OK, that's easier to fix. I was trying to avoid looking up the VM from 
the PASID twice. But I could do it in amdgpu_vm_dump_pte and hold the 
spin lock as long as I'm accessing the VM.

The reservation lock is the bigger problem. I'll keep this patch around, 
because I find it useful, but I won't submit it until I find a safe 
place to access the VM structure in a fault handler or worker.

Regards,
   Felix

>
> Christian.
>
>
>>   }
>>     /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 489a162ca620..6a8b833d180e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -348,6 +348,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm,
>>   int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct 
>> amdgpu_vm *vm, unsigned int pasid);
>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct 
>> amdgpu_vm *vm);
>>   void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>> +void amdgpu_vm_dump_pte(struct amdgpu_device *adev, struct amdgpu_vm 
>> *vm,
>> +            uint64_t addr);
>>   void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>>                struct list_head *validated,
>>                struct amdgpu_bo_list_entry *entry);
>> @@ -401,8 +403,9 @@ bool amdgpu_vm_need_pipeline_sync(struct 
>> amdgpu_ring *ring,
>>                     struct amdgpu_job *job);
>>   void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>>   -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned 
>> int pasid,
>> -                 struct amdgpu_task_info *task_info);
>> +struct amdgpu_vm *amdgpu_vm_get_task_info(struct amdgpu_device *adev,
>> +                      unsigned int pasid,
>> +                      struct amdgpu_task_info *task_info);
>>     void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 8bf2ba310fd9..18207ecfd85c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -1448,9 +1448,10 @@ static int gmc_v8_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>         if (printk_ratelimit()) {
>>           struct amdgpu_task_info task_info;
>> +        struct amdgpu_vm *vm;
>>             memset(&task_info, 0, sizeof(struct amdgpu_task_info));
>> -        amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
>> +        vm = amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
>>             dev_err(adev->dev, "GPU fault detected: %d 0x%08x for 
>> process %s pid %d thread %s pid %d\n",
>>               entry->src_id, entry->src_data[0], task_info.process_name,
>> @@ -1461,6 +1462,9 @@ static int gmc_v8_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>               status);
>>           gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
>>                        entry->pasid);
>> +        if (vm)
>> +            amdgpu_vm_dump_pte(adev, vm, (uint64_t)addr
>> +                       << AMDGPU_GPU_PAGE_SHIFT);
>>       }
>>         vmid = REG_GET_FIELD(status, 
>> VM_CONTEXT1_PROTECTION_FAULT_STATUS,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index bd5d36944481..f27e88af4016 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -331,9 +331,10 @@ static int gmc_v9_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>         if (printk_ratelimit()) {
>>           struct amdgpu_task_info task_info;
>> +        struct amdgpu_vm *vm;
>>             memset(&task_info, 0, sizeof(struct amdgpu_task_info));
>> -        amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
>> +        vm = amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
>>             dev_err(adev->dev,
>>               "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
>> @@ -349,6 +350,8 @@ static int gmc_v9_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>               dev_err(adev->dev,
>>                   "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>                   status);
>> +        if (vm)
>> +            amdgpu_vm_dump_pte(adev, vm, addr);
>>       }
>>         return 0;
>