[PATCH 2/3] drm/amdgpu: rework how PTE flags are generated
Felix Kuehling
felix.kuehling at amd.com
Wed Jul 2 16:21:18 UTC 2025
On 2025-06-06 08:57, Christian König wrote:
> Previously we tried to keep the HW specific PTE flags in each mapping,
> but for CRIU that isn't sufficient any more since the original value is
> needed for the checkpoint procedure.
>
> So rework the whole handling, nuke the early mapping function, keep the
> UAPI flags in each mapping instead of the HW flags and translate them to
> the HW flags while filling in the PTEs.
>
> Only tested on Navi 23 for now, so probably needs quite a bit of more
> work.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 37 +----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h | 1 -
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 15 ++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 8 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +-
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 57 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 57 +++++++++--------
> drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 56 ++++++++---------
> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 4 +-
> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 10 ++-
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 61 +++++++++----------
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 +-
> 16 files changed, 161 insertions(+), 187 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 260165bbe373..9e66d6108a9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -494,17 +494,20 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
> return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
> }
>
> -static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
> +static uint64_t get_pte_flags(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> + struct kgd_mem *mem)
> {
> uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
> AMDGPU_VM_MTYPE_DEFAULT;
> + uint64_t pte_flags = 0;
>
> if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
> mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
> if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
> mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>
> - return amdgpu_gem_va_map_flags(adev, mapping_flags);
> + amdgpu_gmc_get_vm_pte(adev, vm, NULL, mapping_flags, &pte_flags);
> + return pte_flags;
> }
>
> /**
> @@ -961,7 +964,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
> goto unwind;
> }
> attachment[i]->va = va;
> - attachment[i]->pte_flags = get_pte_flags(adev, mem);
> + attachment[i]->pte_flags = get_pte_flags(adev, vm, mem);
> attachment[i]->adev = adev;
> list_add(&attachment[i]->list, &mem->attachments);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 0ecc88df7208..1e335ffbf2df 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -791,36 +791,6 @@ amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> return fence;
> }
>
> -/**
> - * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
> - *
> - * @adev: amdgpu_device pointer
> - * @flags: GEM UAPI flags
> - *
> - * Returns the GEM UAPI flags mapped into hardware for the ASIC.
> - */
> -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
> -{
> - uint64_t pte_flag = 0;
> -
> - if (flags & AMDGPU_VM_PAGE_EXECUTABLE)
> - pte_flag |= AMDGPU_PTE_EXECUTABLE;
> - if (flags & AMDGPU_VM_PAGE_READABLE)
> - pte_flag |= AMDGPU_PTE_READABLE;
> - if (flags & AMDGPU_VM_PAGE_WRITEABLE)
> - pte_flag |= AMDGPU_PTE_WRITEABLE;
I was trying to see where AMDGPU_PTE_READABLE and AMDGPU_PTE_WRITABLE
are being set now. I think it depends on amdgpu_ttm_tt_pte_flags. But
that never gets called for SVM ranges. That's probably why David found
SVM to be broken.
Regards,
Felix
> - if (flags & AMDGPU_VM_PAGE_PRT)
> - pte_flag |= AMDGPU_PTE_PRT_FLAG(adev);
> - if (flags & AMDGPU_VM_PAGE_NOALLOC)
> - pte_flag |= AMDGPU_PTE_NOALLOC;
> -
> - if (adev->gmc.gmc_funcs->map_mtype)
> - pte_flag |= amdgpu_gmc_map_mtype(adev,
> - flags & AMDGPU_VM_MTYPE_MASK);
> -
> - return pte_flag;
> -}
> -
> int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp)
> {
> @@ -841,7 +811,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> struct dma_fence_chain *timeline_chain = NULL;
> struct dma_fence *fence;
> struct drm_exec exec;
> - uint64_t va_flags;
> uint64_t vm_size;
> int r = 0;
>
> @@ -945,10 +914,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>
> switch (args->operation) {
> case AMDGPU_VA_OP_MAP:
> - va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
> args->offset_in_bo, args->map_size,
> - va_flags);
> + args->flags);
> break;
> case AMDGPU_VA_OP_UNMAP:
> r = amdgpu_vm_bo_unmap(adev, bo_va, args->va_address);
> @@ -960,10 +928,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> args->map_size);
> break;
> case AMDGPU_VA_OP_REPLACE:
> - va_flags = amdgpu_gem_va_map_flags(adev, args->flags);
> r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
> args->offset_in_bo, args->map_size,
> - va_flags);
> + args->flags);
> break;
> default:
> break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
> index 3a8f57900a3a..b51e8f95ee86 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
> @@ -63,7 +63,6 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp);
> int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp);
> -uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags);
> int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> struct drm_file *filp);
> int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index 46b2bcbd5025..c69317804071 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -152,15 +152,15 @@ struct amdgpu_gmc_funcs {
> unsigned pasid);
> /* enable/disable PRT support */
> void (*set_prt)(struct amdgpu_device *adev, bool enable);
> - /* map mtype to hardware flags */
> - uint64_t (*map_mtype)(struct amdgpu_device *adev, uint32_t flags);
> /* get the pde for a given mc addr */
> void (*get_vm_pde)(struct amdgpu_device *adev, int level,
> u64 *dst, u64 *flags);
> - /* get the pte flags to use for a BO VA mapping */
> + /* get the pte flags to use for PTEs */
> void (*get_vm_pte)(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> - uint64_t *flags);
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> + uint64_t *pte_flags);
> /* override per-page pte flags */
> void (*override_vm_pte_flags)(struct amdgpu_device *dev,
> struct amdgpu_vm *vm,
> @@ -354,9 +354,10 @@ struct amdgpu_gmc {
>
> #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
> -#define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
> -#define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags))
> +#define amdgpu_gmc_get_vm_pte(adev, vm, bo, vm_flags, pte_flags) \
> + (adev)->gmc.gmc_funcs->get_vm_pte((adev), (vm), (bo), (vm_flags), \
> + (pte_flags))
> #define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \
> (adev)->gmc.gmc_funcs->override_vm_pte_flags \
> ((adev), (vm), (addr), (pte_flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index c316920f3450..87523fcd4386 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -69,7 +69,7 @@ struct amdgpu_bo_va_mapping {
> uint64_t last;
> uint64_t __subtree_last;
> uint64_t offset;
> - uint64_t flags;
> + uint32_t flags;
> };
>
> /* User space allocated BO in a VM */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> index d45ebfb642ca..a0b479d5fff1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> @@ -67,9 +67,9 @@ static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
> int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> struct amdgpu_bo_va **bo_va)
> {
> - u64 seq64_addr, va_flags;
> struct amdgpu_bo *bo;
> struct drm_exec exec;
> + u64 seq64_addr;
> int r;
>
> bo = adev->seq64.sbo;
> @@ -94,9 +94,9 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
> seq64_addr = amdgpu_seq64_get_va_base(adev) & AMDGPU_GMC_HOLE_MASK;
>
> - va_flags = amdgpu_gem_va_map_flags(adev, AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
> - r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE,
> - va_flags);
> + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0,
> + AMDGPU_VA_RESERVED_SEQ64_SIZE,
> + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_MTYPE_UC);
> if (r) {
> DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r);
> amdgpu_vm_bo_del(adev, *bo_va);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 3911c78f8282..10d135e71088 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1328,13 +1328,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
> /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
> * but in case of something, we filter the flags in first place
> */
> - if (!(mapping->flags & AMDGPU_PTE_READABLE))
> + if (!(mapping->flags & AMDGPU_VM_PAGE_READABLE))
> update_flags &= ~AMDGPU_PTE_READABLE;
> - if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
> + if (!(mapping->flags & AMDGPU_VM_PAGE_WRITEABLE))
> update_flags &= ~AMDGPU_PTE_WRITEABLE;
>
> /* Apply ASIC specific mapping flags */
> - amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
> + amdgpu_gmc_get_vm_pte(adev, vm, bo, mapping->flags,
> + &update_flags);
>
> trace_amdgpu_vm_bo_update(mapping);
>
> @@ -1475,7 +1476,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
> struct amdgpu_bo_va_mapping *mapping,
> struct dma_fence *fence)
> {
> - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
> + if (mapping->flags & AMDGPU_VM_PAGE_PRT)
> amdgpu_vm_add_prt_cb(adev, fence);
> kfree(mapping);
> }
> @@ -1754,7 +1755,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
> list_add(&mapping->list, &bo_va->invalids);
> amdgpu_vm_it_insert(mapping, &vm->va);
>
> - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
> + if (mapping->flags & AMDGPU_VM_PAGE_PRT)
> amdgpu_vm_prt_get(adev);
>
> if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
> @@ -1814,7 +1815,7 @@ static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
> int amdgpu_vm_bo_map(struct amdgpu_device *adev,
> struct amdgpu_bo_va *bo_va,
> uint64_t saddr, uint64_t offset,
> - uint64_t size, uint64_t flags)
> + uint64_t size, uint32_t flags)
> {
> struct amdgpu_bo_va_mapping *mapping, *tmp;
> struct amdgpu_bo *bo = bo_va->base.bo;
> @@ -1873,7 +1874,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
> int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
> struct amdgpu_bo_va *bo_va,
> uint64_t saddr, uint64_t offset,
> - uint64_t size, uint64_t flags)
> + uint64_t size, uint32_t flags)
> {
> struct amdgpu_bo_va_mapping *mapping;
> struct amdgpu_bo *bo = bo_va->base.bo;
> @@ -2730,7 +2731,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> dma_fence_put(vm->last_tlb_flush);
>
> list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
> - if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) {
> + if (mapping->flags & AMDGPU_VM_PAGE_PRT && prt_fini_needed) {
> amdgpu_vm_prt_fini(adev, vm);
> prt_fini_needed = false;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index f3ad687125ad..f9549f6b3d1f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -539,11 +539,11 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
> int amdgpu_vm_bo_map(struct amdgpu_device *adev,
> struct amdgpu_bo_va *bo_va,
> uint64_t addr, uint64_t offset,
> - uint64_t size, uint64_t flags);
> + uint64_t size, uint32_t flags);
> int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
> struct amdgpu_bo_va *bo_va,
> uint64_t addr, uint64_t offset,
> - uint64_t size, uint64_t flags);
> + uint64_t size, uint32_t flags);
> int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
> struct amdgpu_bo_va *bo_va,
> uint64_t addr);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index a3e2787501f1..846b67d52dfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -469,24 +469,6 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
> * 0 valid
> */
>
> -static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
> -{
> - switch (flags) {
> - case AMDGPU_VM_MTYPE_DEFAULT:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_NC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_WC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC);
> - case AMDGPU_VM_MTYPE_CC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC);
> - case AMDGPU_VM_MTYPE_UC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC);
> - default:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
> - }
> -}
> -
> static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
> uint64_t *addr, uint64_t *flags)
> {
> @@ -511,21 +493,39 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
> }
>
> static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> - struct amdgpu_bo *bo = mapping->bo_va->base.bo;
> -
> - *flags &= ~AMDGPU_PTE_EXECUTABLE;
> - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
> + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
> + *flags |= AMDGPU_PTE_EXECUTABLE;
> + else
> + *flags &= ~AMDGPU_PTE_EXECUTABLE;
>
> - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
> - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
> + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
> + case AMDGPU_VM_MTYPE_DEFAULT:
> + case AMDGPU_VM_MTYPE_NC:
> + default:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
> + break;
> + case AMDGPU_VM_MTYPE_WC:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
> + break;
> + case AMDGPU_VM_MTYPE_CC:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
> + break;
> + case AMDGPU_VM_MTYPE_UC:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
> + break;
> + }
>
> - *flags &= ~AMDGPU_PTE_NOALLOC;
> - *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
> + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
> + *flags |= AMDGPU_PTE_NOALLOC;
> + else
> + *flags &= ~AMDGPU_PTE_NOALLOC;
>
> - if (mapping->flags & AMDGPU_PTE_PRT) {
> + if (vm_flags & AMDGPU_VM_PAGE_PRT) {
> *flags |= AMDGPU_PTE_PRT;
> *flags |= AMDGPU_PTE_SNOOPED;
> *flags |= AMDGPU_PTE_LOG;
> @@ -566,7 +566,6 @@ static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
> .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
> - .map_mtype = gmc_v10_0_map_mtype,
> .get_vm_pde = gmc_v10_0_get_vm_pde,
> .get_vm_pte = gmc_v10_0_get_vm_pte,
> .get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index 72211409227b..36720d1c07a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -433,24 +433,6 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int
> * 0 valid
> */
>
> -static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
> -{
> - switch (flags) {
> - case AMDGPU_VM_MTYPE_DEFAULT:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_NC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_WC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC);
> - case AMDGPU_VM_MTYPE_CC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC);
> - case AMDGPU_VM_MTYPE_UC:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC);
> - default:
> - return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
> - }
> -}
> -
> static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
> uint64_t *addr, uint64_t *flags)
> {
> @@ -475,21 +457,39 @@ static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level,
> }
>
> static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> - struct amdgpu_bo *bo = mapping->bo_va->base.bo;
> -
> - *flags &= ~AMDGPU_PTE_EXECUTABLE;
> - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
> + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
> + *flags |= AMDGPU_PTE_EXECUTABLE;
> + else
> + *flags &= ~AMDGPU_PTE_EXECUTABLE;
>
> - *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
> - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
> + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
> + case AMDGPU_VM_MTYPE_DEFAULT:
> + case AMDGPU_VM_MTYPE_NC:
> + default:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_NC);
> + break;
> + case AMDGPU_VM_MTYPE_WC:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_WC);
> + break;
> + case AMDGPU_VM_MTYPE_CC:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_CC);
> + break;
> + case AMDGPU_VM_MTYPE_UC:
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
> + break;
> + }
>
> - *flags &= ~AMDGPU_PTE_NOALLOC;
> - *flags |= (mapping->flags & AMDGPU_PTE_NOALLOC);
> + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
> + *flags |= AMDGPU_PTE_NOALLOC;
> + else
> + *flags &= ~AMDGPU_PTE_NOALLOC;
>
> - if (mapping->flags & AMDGPU_PTE_PRT) {
> + if (vm_flags & AMDGPU_VM_PAGE_PRT) {
> *flags |= AMDGPU_PTE_PRT;
> *flags |= AMDGPU_PTE_SNOOPED;
> *flags |= AMDGPU_PTE_LOG;
> @@ -530,7 +530,6 @@ static const struct amdgpu_gmc_funcs gmc_v11_0_gmc_funcs = {
> .flush_gpu_tlb_pasid = gmc_v11_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v11_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v11_0_emit_pasid_mapping,
> - .map_mtype = gmc_v11_0_map_mtype,
> .get_vm_pde = gmc_v11_0_get_vm_pde,
> .get_vm_pte = gmc_v11_0_get_vm_pte,
> .get_vbios_fb_size = gmc_v11_0_get_vbios_fb_size,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> index b645d3e6a6c8..fb3fb31724a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
> @@ -456,20 +456,6 @@ static void gmc_v12_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
> * 0 valid
> */
>
> -static uint64_t gmc_v12_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
> -{
> - switch (flags) {
> - case AMDGPU_VM_MTYPE_DEFAULT:
> - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_NC:
> - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_UC:
> - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_UC);
> - default:
> - return AMDGPU_PTE_MTYPE_GFX12(0ULL, MTYPE_NC);
> - }
> -}
> -
> static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level,
> uint64_t *addr, uint64_t *flags)
> {
> @@ -493,30 +479,45 @@ static void gmc_v12_0_get_vm_pde(struct amdgpu_device *adev, int level,
> }
>
> static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> - struct amdgpu_bo *bo = mapping->bo_va->base.bo;
> + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
> + *flags |= AMDGPU_PTE_EXECUTABLE;
> + else
> + *flags &= ~AMDGPU_PTE_EXECUTABLE;
>
> - *flags &= ~AMDGPU_PTE_EXECUTABLE;
> - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
> + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
> + case AMDGPU_VM_MTYPE_DEFAULT:
> + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
> + break;
> + case AMDGPU_VM_MTYPE_NC:
> + default:
> + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
> + break;
> + case AMDGPU_VM_MTYPE_UC:
> + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
> + break;
> + }
>
> - *flags &= ~AMDGPU_PTE_MTYPE_GFX12_MASK;
> - *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_GFX12_MASK);
> + if (vm_flags & AMDGPU_VM_PAGE_NOALLOC)
> + *flags |= AMDGPU_PTE_NOALLOC;
> + else
> + *flags &= ~AMDGPU_PTE_NOALLOC;
>
> - if (mapping->flags & AMDGPU_PTE_PRT_GFX12) {
> - *flags |= AMDGPU_PTE_PRT_GFX12;
> + if (vm_flags & AMDGPU_VM_PAGE_PRT) {
> *flags |= AMDGPU_PTE_SNOOPED;
> *flags |= AMDGPU_PTE_SYSTEM;
> *flags |= AMDGPU_PTE_IS_PTE;
> *flags &= ~AMDGPU_PTE_VALID;
> }
>
> - if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
> - *flags |= AMDGPU_PTE_DCC;
> -
> - if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
> - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
> + if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
> + AMDGPU_GEM_CREATE_EXT_COHERENT |
> + AMDGPU_GEM_CREATE_UNCACHED))
> + *flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
> }
>
> static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
> @@ -546,7 +547,6 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = {
> .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v12_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v12_0_emit_pasid_mapping,
> - .map_mtype = gmc_v12_0_map_mtype,
> .get_vm_pde = gmc_v12_0_get_vm_pde,
> .get_vm_pte = gmc_v12_0_get_vm_pte,
> .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> index 8030fcd64210..f6ad7911f1e6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
> @@ -382,7 +382,9 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
> }
>
> static void gmc_v6_0_get_vm_pte(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> *flags &= ~AMDGPU_PTE_EXECUTABLE;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index a8d5795084fc..93d7ccb7d013 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -504,7 +504,9 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
> }
>
> static void gmc_v7_0_get_vm_pte(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> *flags &= ~AMDGPU_PTE_EXECUTABLE;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 99ca08e9bdb5..1ea198fcb51f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -716,11 +716,15 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
> }
>
> static void gmc_v8_0_get_vm_pte(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> - *flags &= ~AMDGPU_PTE_EXECUTABLE;
> - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
> + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
> + *flags |= AMDGPU_PTE_EXECUTABLE;
> + else
> + *flags &= ~AMDGPU_PTE_EXECUTABLE;
> *flags &= ~AMDGPU_PTE_PRT;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index cad014c3bbf7..aa2ea1078cb8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1084,27 +1084,6 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int v
> * 0 valid
> */
>
> -static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
> -
> -{
> - switch (flags) {
> - case AMDGPU_VM_MTYPE_DEFAULT:
> - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_NC:
> - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
> - case AMDGPU_VM_MTYPE_WC:
> - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_WC);
> - case AMDGPU_VM_MTYPE_RW:
> - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_RW);
> - case AMDGPU_VM_MTYPE_CC:
> - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_CC);
> - case AMDGPU_VM_MTYPE_UC:
> - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC);
> - default:
> - return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
> - }
> -}
> -
> static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
> uint64_t *addr, uint64_t *flags)
> {
> @@ -1134,6 +1113,7 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
> static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
> struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
> @@ -1247,25 +1227,43 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
> }
>
> static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
> - struct amdgpu_bo_va_mapping *mapping,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo *bo,
> + uint32_t vm_flags,
> uint64_t *flags)
> {
> - struct amdgpu_bo *bo = mapping->bo_va->base.bo;
> -
> - *flags &= ~AMDGPU_PTE_EXECUTABLE;
> - *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
> + if (vm_flags & AMDGPU_VM_PAGE_EXECUTABLE)
> + *flags |= AMDGPU_PTE_EXECUTABLE;
> + else
> + *flags &= ~AMDGPU_PTE_EXECUTABLE;
>
> - *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
> - *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
> + switch (vm_flags & AMDGPU_VM_MTYPE_MASK) {
> + case AMDGPU_VM_MTYPE_DEFAULT:
> + case AMDGPU_VM_MTYPE_NC:
> + default:
> + *flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_NC);
> + break;
> + case AMDGPU_VM_MTYPE_WC:
> + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_WC);
> + break;
> + case AMDGPU_VM_MTYPE_RW:
> + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_RW);
> + break;
> + case AMDGPU_VM_MTYPE_CC:
> + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
> + break;
> + case AMDGPU_VM_MTYPE_UC:
> + *flags |= AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_UC);
> + break;
> + }
>
> - if (mapping->flags & AMDGPU_PTE_PRT) {
> + if (vm_flags & AMDGPU_VM_PAGE_PRT) {
> *flags |= AMDGPU_PTE_PRT;
> *flags &= ~AMDGPU_PTE_VALID;
> }
>
> if ((*flags & AMDGPU_PTE_VALID) && bo)
> - gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.vm, bo,
> - flags);
> + gmc_v9_0_get_coherence_flags(adev, vm, bo, vm_flags, flags);
> }
>
> static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
> @@ -1442,7 +1440,6 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
> .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
> - .map_mtype = gmc_v9_0_map_mtype,
> .get_vm_pde = gmc_v9_0_get_vm_pde,
> .get_vm_pte = gmc_v9_0_get_vm_pte,
> .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 865dca2547de..4cdd585de0be 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -1190,7 +1190,7 @@ svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b)
> }
>
> static uint64_t
> -svm_range_get_pte_flags(struct kfd_node *node,
> +svm_range_get_pte_flags(struct kfd_node *node, struct amdgpu_vm *vm,
> struct svm_range *prange, int domain)
> {
> struct kfd_node *bo_node;
> @@ -1306,7 +1306,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
> if (gc_ip_version >= IP_VERSION(12, 0, 0))
> pte_flags |= AMDGPU_PTE_IS_PTE;
>
> - pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags);
> + amdgpu_gmc_get_vm_pte(node->adev, vm, NULL, mapping_flags, &pte_flags);
> return pte_flags;
> }
>
> @@ -1413,7 +1413,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
> pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
> last_start, prange->start + i, last_domain ? "GPU" : "CPU");
>
> - pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain);
> + pte_flags = svm_range_get_pte_flags(pdd->dev, vm, prange, last_domain);
> if (readonly)
> pte_flags &= ~AMDGPU_PTE_WRITEABLE;
>
More information about the amd-gfx
mailing list