[PATCH 3/5] drm/amdkfd: Postpone memory mapping flags calculation to mapping time
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Aug 9 14:43:08 UTC 2019
Am 09.08.19 um 16:21 schrieb Zeng, Oak:
>
> Regards,
> Oak
>
> -----Original Message-----
> From: Koenig, Christian <Christian.Koenig at amd.com>
> Sent: Friday, August 9, 2019 8:31 AM
> To: Zeng, Oak <Oak.Zeng at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Keely, Sean <Sean.Keely at amd.com>
> Subject: Re: [PATCH 3/5] drm/amdkfd: Postpone memory mapping flags calculation to mapping time
>
> Am 09.08.19 um 04:15 schrieb Zeng, Oak:
>> Some mapping flags are decided by memory mapping destination which is
>> not know at memory object allocation time. So it is reasonable to
>> decide memory mapping flags at mapping time, instead of alloc time.
>> Record memory allocation flags during allocation time and calculate
>> mapping flags during memory mapping time.
>>
>> Also made the memory mapping flage calculation to be asic-specific,
>> because different asic can have different mapping scheme.
>>
>> The new memory mapping flag is decided by both memory allocation flags
>> and whether the memory mapping is to a remote device.
>>
>> This is preparation work to introduce more sophisticated mapping scheme.
>>
>> Change-Id: I98e7c47d850585ad7f0a9e11617c92b7a9aced3b
>> Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 +++++-----------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++++
>> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 20 +++++++++++++++++++-
>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 19 ++++++++++++++++++-
>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 20 +++++++++++++++++++-
>> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 21 ++++++++++++++++++++-
>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++++++++++-
>> 8 files changed, 106 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index e519df3..026475a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -58,6 +58,7 @@ struct kgd_mem {
>> uint64_t va;
>>
>> uint32_t mapping_flags;
>> + uint32_t alloc_flags;
>>
>> atomic_t invalid;
>> struct amdkfd_process_info *process_info; diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 44a52b0..f91ef48 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -1081,7 +1081,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>> int byte_align;
>> u32 domain, alloc_domain;
>> u64 alloc_flags;
>> - uint32_t mapping_flags;
>> int ret;
>>
>> /*
>> @@ -1143,16 +1142,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>> adev->asic_type != CHIP_VEGAM) ?
>> VI_BO_SIZE_ALIGN : 1;
>>
>> - mapping_flags = AMDGPU_VM_PAGE_READABLE;
>> - if (flags & ALLOC_MEM_FLAGS_WRITABLE)
>> - mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
>> - if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>> - mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>> - if (flags & ALLOC_MEM_FLAGS_COHERENT)
>> - mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> - else
>> - mapping_flags |= AMDGPU_VM_MTYPE_NC;
>> - (*mem)->mapping_flags = mapping_flags;
>> + (*mem)->alloc_flags = flags;
>>
>> amdgpu_sync_create(&(*mem)->sync);
>>
>> @@ -1298,6 +1288,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>> struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
>> {
>> struct amdgpu_device *adev = get_amdgpu_device(kgd);
>> + struct amdgpu_device *bo_adev;
>> struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
>> int ret;
>> struct amdgpu_bo *bo;
>> @@ -1315,6 +1306,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>> return -EINVAL;
>> }
>>
>> + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> + mem->mapping_flags = amdgpu_gmc_get_vm_mapping_flags(adev,
>> +mem->alloc_flags, bo_adev != adev);
>> +
>> /* Make sure restore is not running concurrently. Since we
>> * don't map invalid userptr BOs, we rely on the next restore
>> * worker to do the mapping
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> index 071145a..6bd0c28 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
>> @@ -105,6 +105,9 @@ struct amdgpu_gmc_funcs {
>> /* get the pde for a given mc addr */
>> void (*get_vm_pde)(struct amdgpu_device *adev, int level,
>> u64 *dst, u64 *flags);
>> + /* get per asic vm mapping flags */
>> + uint32_t (*get_vm_mapping_flags)(struct amdgpu_device *adev,
>> + uint32_t alloc_flags, bool remote_mapping);
>> };
>>
>> struct amdgpu_xgmi {
>> @@ -185,6 +188,7 @@ struct amdgpu_gmc {
>> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
>> #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
>> #define amdgpu_gmc_get_pte_flags(adev, flags)
>> (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
>> +#define amdgpu_gmc_get_vm_mapping_flags(adev, alloc_flags,
>> +remote_mapping) ((adev)->gmc.gmc_funcs->get_vm_mapping_flags((adev),
>> +(alloc_flags), (remote_mapping)))
>>
>> /**
>> * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible
>> through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 4e3ac10..7e420e0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -415,12 +415,30 @@ static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
>> }
>> }
>>
>> +static uint32_t gmc_v10_0_get_vm_mapping_flags(struct amdgpu_device *adev,
>> + uint32_t alloc_flags, bool remote_mapping) {
>> + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE;
>> +
>> + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT)
>> + mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> + else
>> + mapping_flags |= AMDGPU_VM_MTYPE_NC;
>> +
>> + return mapping_flags;
>> +}
>> +
>> static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
>> .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
>> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
>> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
>> .get_vm_pte_flags = gmc_v10_0_get_vm_pte_flags,
>> - .get_vm_pde = gmc_v10_0_get_vm_pde
>> + .get_vm_pde = gmc_v10_0_get_vm_pde,
>> + .get_vm_mapping_flags = gmc_v10_0_get_vm_mapping_flags
>> };
>>
>> static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) diff
>> --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> index b06d876..2b2af6a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> @@ -404,6 +404,22 @@ static void gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, int level,
>> BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
>> }
>>
>> +static uint32_t gmc_v6_0_get_vm_mapping_flags(struct amdgpu_device *adev,
>> + uint32_t alloc_flags, bool remote_mapping) {
>> + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE;
>> +
>> + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT)
>> + mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> + else
>> + mapping_flags |= AMDGPU_VM_MTYPE_NC;
>> +
>> + return mapping_flags;
>> +}
>> static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev,
>> bool value)
>> {
>> @@ -1157,7 +1173,8 @@ static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
>> .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
>> .set_prt = gmc_v6_0_set_prt,
>> .get_vm_pde = gmc_v6_0_get_vm_pde,
>> - .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
>> + .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags,
>> + .get_vm_mapping_flags = gmc_v6_0_get_vm_mapping_flags
> That looks like a duplication of what get_vm_pte_flags is supposed to be doing.
>
> [Oak] There are two layers of definition here: general user-accessible AMDGPU_VM_xxx definitions (in amdgpu_drm.h) and asic-specific AMDGPU_PTE_xxx definitions (defined in amdgpu_vm.h). For amdgpu drm user, user pass in AMDGPU_VM_XX flags and get_vm_pte_flags translate those flags to AMDGPU_PTE_xxx flags to program page table.
>
> For compute stack, there are two layers of translation: we will first generate the AMDGPU_VM_xxx flags from memory object allocation flags/memory physical location/memory mapping destination/memory access path (pcie or xgmi). Then get_vm_pte_flags is called to translate AMDGPU_VM_xx flags to AMDGPU_PTE_xx flags. The second layer translation is the same with the drm usage.
>
> For compute stack user, due the API definition, user can't directly pass in the AMDGPU_VM_xxx flags, instead it pass in an allocation flags. Also it is not possible for user to pass in the AMDGPU_VM flags as some of the flags is decided by factors that user is unaware, for example whether the access is through pcie or xgmi.
>
> Due to above reasons, We had to do two layers of translations. The two layers of translation exists before this patch seriers (the first layer was in amdgpu_amdkfd_gpuvm.c). When I do this patch series, I had a need to make the first layers of translation also to be asic-specific as Arcturus introduce different mapping scheme. Then I moved the first layer translation to asic-specifc gmc functions.
Yeah, but this is a really bad design. We are essentially leaking the
problem that we have two upper layers into the hardware specific lower
layer and that is a rather big no-go.
We need to find a different approach for this which makes it possible to
handle this with a single callback or the design is a rather clear NAK.
Christian.
>
> Christian.
>
>> };
>>
>> static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { diff
>> --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 75aa333..619862e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -481,6 +481,23 @@ static void gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, int level,
>> BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
>> }
>>
>> +static uint32_t gmc_v7_0_get_vm_mapping_flags(struct amdgpu_device *adev,
>> + uint32_t alloc_flags, bool remote_mapping) {
>> + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE;
>> +
>> + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT)
>> + mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> + else
>> + mapping_flags |= AMDGPU_VM_MTYPE_NC;
>> +
>> + return mapping_flags;
>> +}
>> +
>> /**
>> * gmc_v8_0_set_fault_enable_default - update VM fault handling
>> *
>> @@ -1353,7 +1370,8 @@ static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
>> .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
>> .set_prt = gmc_v7_0_set_prt,
>> .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
>> - .get_vm_pde = gmc_v7_0_get_vm_pde
>> + .get_vm_pde = gmc_v7_0_get_vm_pde,
>> + .get_vm_mapping_flags = gmc_v7_0_get_vm_mapping_flags
>> };
>>
>> static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff
>> --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 8bf2ba3..d2cecb30 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -706,6 +706,24 @@ static void gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, int level,
>> BUG_ON(*addr & 0xFFFFFF0000000FFFULL);
>> }
>>
>> +static uint32_t gmc_v8_0_get_vm_mapping_flags(struct amdgpu_device *adev,
>> + uint32_t alloc_flags, bool remote_mapping) {
>> + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE;
>> +
>> + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT)
>> + mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> + else
>> + mapping_flags |= AMDGPU_VM_MTYPE_NC;
>> +
>> + return mapping_flags;
>> +}
>> +
>> +
>> /**
>> * gmc_v8_0_set_fault_enable_default - update VM fault handling
>> *
>> @@ -1721,7 +1739,8 @@ static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
>> .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
>> .set_prt = gmc_v8_0_set_prt,
>> .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
>> - .get_vm_pde = gmc_v8_0_get_vm_pde
>> + .get_vm_pde = gmc_v8_0_get_vm_pde,
>> + .get_vm_mapping_flags = gmc_v8_0_get_vm_mapping_flags
>> };
>>
>> static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff
>> --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index e6adc86..d709902 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -626,12 +626,31 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
>> }
>> }
>>
>> +static uint32_t gmc_v9_0_get_vm_mapping_flags(struct amdgpu_device *adev,
>> + uint32_t alloc_flags, bool remote_mapping) {
>> + uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE;
>> +
>> + if (alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
>> + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
>> + if (alloc_flags & ALLOC_MEM_FLAGS_COHERENT)
>> + mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> + else
>> + mapping_flags |= AMDGPU_VM_MTYPE_NC;
>> +
>> + return mapping_flags;
>> +}
>> +
>> +
>> static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
>> .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
>> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
>> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
>> .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
>> - .get_vm_pde = gmc_v9_0_get_vm_pde
>> + .get_vm_pde = gmc_v9_0_get_vm_pde,
>> + .get_vm_mapping_flags = gmc_v9_0_get_vm_mapping_flags
>> };
>>
>> static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list