[PATCH] drm/amdgpu: flush TLB if valid PDE turns into PTE

Christian König ckoenig.leichtzumerken at gmail.com
Thu May 13 08:04:17 UTC 2021


Am 12.05.21 um 20:40 schrieb philip yang:
>
> On 2021-05-12 11:54 a.m., Felix Kuehling wrote:
>
>> Am 2021-05-12 um 8:38 a.m. schrieb Christian König:
>>> Am 12.05.21 um 14:34 schrieb Philip Yang:
>>>> Mapping huge page, 2MB aligned address with 2MB size, uses PDE0 as PTE.
>>>> If previously valid PDE0, PDE0.V=1 and PDE0.P=0 turns into PTE, this
>>>> requires TLB flush, otherwise page table walker will not read updated
>>>> PDE0.
>>>>
>>>> Change page table update mapping to return free_table flag to indicate
>>>> the previously valid PDE may have turned into a PTE if page table is
>>>> freed.
>>>>
>>>> Signed-off-by: Philip Yang<Philip.Yang at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++++++++++------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 12 ++++++++++--
>>>>    3 files changed, 22 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 3dcdcc9ff522..27418f9407f1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1457,7 +1457,7 @@ static void amdgpu_vm_fragment(struct
>>>> amdgpu_vm_update_params *params,
>>>>     */
>>>>    static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params
>>>> *params,
>>>>                     uint64_t start, uint64_t end,
>>>> -                 uint64_t dst, uint64_t flags)
>>>> +                 uint64_t dst, uint64_t flags, bool *free_table)
>>> Please put that flag into the params structure instead.
>>>
>>> Christian.
>> I agree. Christian, I think we also need to keep track of this for
>> graphics command submissions. amdgpu_cs_vm_handling needs get this flag
>> from amdgpu_vm_bo_update, and amdgpu_cs_submit needs to update
>> job->vm_needs_flushed based on this.
>
> amdgpu_vm_bo_update to pass NULL to ignore the free_table return flag.
>
> This new flag only used by SVM APIs. Old KFD and graphics command 
> submission path is not changed.
>

In the CS path we always flush, no matter if it is a mapping or 
unmapping operation.

But we should still move this into the parmeters structure instead. It 
is less overhead than the if(ptr) *p = true;

Christian.

> Thanks,
>
> Philip
>
>> Regards,
>>    Felix
>>
>>
>>>>    {
>>>>        struct amdgpu_device *adev = params->adev;
>>>>        struct amdgpu_vm_pt_cursor cursor;
>>>> @@ -1583,6 +1583,8 @@ static int amdgpu_vm_update_ptes(struct
>>>> amdgpu_vm_update_params *params,
>>>>                while (cursor.pfn < frag_start) {
>>>>                    amdgpu_vm_free_pts(adev, params->vm, &cursor);
>>>>                    amdgpu_vm_pt_next(adev, &cursor);
>>>> +                if (free_table)
>>>> +                    *free_table = true;
>>>>                }
>>>>              } else if (frag >= shift) {
>>>> @@ -1610,6 +1612,7 @@ static int amdgpu_vm_update_ptes(struct
>>>> amdgpu_vm_update_params *params,
>>>>     * @nodes: array of drm_mm_nodes with the MC addresses
>>>>     * @pages_addr: DMA addresses to use for mapping
>>>>     * @fence: optional resulting fence
>>>> + * @free_table: return true if page table is freed
>>>>     *
>>>>     * Fill in the page table entries between @start and @last.
>>>>     *
>>>> @@ -1624,7 +1627,8 @@ int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>>                    uint64_t flags, uint64_t offset,
>>>>                    struct drm_mm_node *nodes,
>>>>                    dma_addr_t *pages_addr,
>>>> -                struct dma_fence **fence)
>>>> +                struct dma_fence **fence,
>>>> +                bool *free_table)
>>>>    {
>>>>        struct amdgpu_vm_update_params params;
>>>>        enum amdgpu_sync_mode sync_mode;
>>>> @@ -1721,7 +1725,8 @@ int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>>            }
>>>>              tmp = start + num_entries;
>>>> -        r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags);
>>>> +        r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags,
>>>> +                      free_table);
>>>>            if (r)
>>>>                goto error_unlock;
>>>>    @@ -1879,7 +1884,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>                            resv, mapping->start,
>>>>                            mapping->last, update_flags,
>>>>                            mapping->offset, nodes,
>>>> -                        pages_addr, last_update);
>>>> +                        pages_addr, last_update, NULL);
>>>>            if (r)
>>>>                return r;
>>>>        }
>>>> @@ -2090,7 +2095,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device
>>>> *adev,
>>>>            r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
>>>>                            resv, mapping->start,
>>>>                            mapping->last, init_pte_value,
>>>> -                        0, NULL, NULL, &f);
>>>> +                        0, NULL, NULL, &f, NULL);
>>>>            amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>>            if (r) {
>>>>                dma_fence_put(f);
>>>> @@ -3428,7 +3433,7 @@ bool amdgpu_vm_handle_fault(struct
>>>> amdgpu_device *adev, u32 pasid,
>>>>        }
>>>>          r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false,
>>>> NULL, addr,
>>>> -                    addr, flags, value, NULL, NULL,
>>>> +                    addr, flags, value, NULL, NULL, NULL,
>>>>                        NULL);
>>>>        if (r)
>>>>            goto error_unlock;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index ea60ec122b51..f61c20b70b79 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -404,7 +404,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>>                    uint64_t flags, uint64_t offset,
>>>>                    struct drm_mm_node *nodes,
>>>>                    dma_addr_t *pages_addr,
>>>> -                struct dma_fence **fence);
>>>> +                struct dma_fence **fence, bool *free_table);
>>>>    int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>                struct amdgpu_bo_va *bo_va,
>>>>                bool clear);
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> index b665e9ff77e3..4f28052d44bf 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>> @@ -1084,7 +1084,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device
>>>> *adev, struct amdgpu_vm *vm,
>>>>          return amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>>>> true, NULL,
>>>>                           start, last, init_pte_value, 0,
>>>> -                       NULL, NULL, fence);
>>>> +                       NULL, NULL, fence, NULL);
>>>>    }
>>>>      static int
>>>> @@ -1137,12 +1137,15 @@ svm_range_map_to_gpu(struct amdgpu_device
>>>> *adev, struct amdgpu_vm *vm,
>>>>                 struct amdgpu_device *bo_adev, struct dma_fence **fence)
>>>>    {
>>>>        struct amdgpu_bo_va bo_va;
>>>> +    bool free_table = false;
>>>> +    struct kfd_process *p;
>>>>        uint64_t pte_flags;
>>>>        int r = 0;
>>>>          pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
>>>> prange->start,
>>>>             prange->last);
>>>>    +    p = container_of(prange->svms, struct kfd_process, svms);
>>>>        if (prange->svm_bo && prange->ttm_res) {
>>>>            bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
>>>>            prange->mapping.bo_va = &bo_va;
>>>> @@ -1159,7 +1162,8 @@ svm_range_map_to_gpu(struct amdgpu_device
>>>> *adev, struct amdgpu_vm *vm,
>>>>                        prange->mapping.offset,
>>>>                        prange->ttm_res ?
>>>>                            prange->ttm_res->mm_node : NULL,
>>>> -                    dma_addr, &vm->last_update);
>>>> +                    dma_addr, &vm->last_update,
>>>> +                    &free_table);
>>>>        if (r) {
>>>>            pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
>>>>            goto out;
>>>> @@ -1175,6 +1179,10 @@ svm_range_map_to_gpu(struct amdgpu_device
>>>> *adev, struct amdgpu_vm *vm,
>>>>        if (fence)
>>>>            *fence = dma_fence_get(vm->last_update);
>>>>    +    if (free_table)
>>>> +        amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
>>>> +                          p->pasid);
>>>> +
>>>>    out:
>>>>        prange->mapping.bo_va = NULL;
>>>>        return r;
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



More information about the amd-gfx mailing list