[PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
Christian König
ckoenig.leichtzumerken at gmail.com
Tue Dec 12 09:28:25 UTC 2017
Am 12.12.2017 um 08:58 schrieb Chunming Zhou:
>
>
> On 2017年12月09日 00:41, Christian König wrote:
>> Instead of falling back to 2 level and very limited address space use
>> 2+1 PD support and 128TB + 512GB of virtual address space.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++
>> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42
>> ++++++++++++++++++---------
>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 ++++++++++++++---
>> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 49
>> ++++++++++++++++++++------------
>> 5 files changed, 86 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index e5e0fbd43273..9517c0f76d27 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -541,6 +541,7 @@ struct amdgpu_mc {
>> u64 private_aperture_end;
>> /* protects concurrent invalidation */
>> spinlock_t invalidate_lock;
>> + bool translate_further;
>> };
>> /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 228f63e9ac5e..79134f0c26d9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry;
>> /* PDE is handled as PTE for VEGA10 */
>> #define AMDGPU_PDE_PTE (1ULL << 54)
>> +/* PTE is handled as PDE for VEGA10 */
>> +#define AMDGPU_PTE_TRANSLATE_FURTHER (1ULL << 56)
>> +
>> /* VEGA10 only */
>> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
>> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> index f1effadfbaa6..a56f77259130 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> @@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct
>> amdgpu_device *adev)
>> WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>> tmp = mmVM_L2_CNTL3_DEFAULT;
>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> + if (adev->mc.translate_further) {
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> + } else {
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>> + }
>> WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>> tmp = mmVM_L2_CNTL4_DEFAULT;
>> @@ -183,31 +190,40 @@ static void
>> gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>> static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device
>> *adev)
>> {
>> - int i;
>> + unsigned num_level, block_size;
>> uint32_t tmp;
>> + int i;
>> +
>> + num_level = adev->vm_manager.num_level;
>> + block_size = adev->vm_manager.block_size;
>> + if (adev->mc.translate_further)
>> + num_level -= 1;
>> + else
>> + block_size -= 9;
>> for (i = 0; i <= 14; i++) {
>> tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
>> - adev->vm_manager.num_level);
>> + num_level);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>> + 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - PAGE_TABLE_BLOCK_SIZE,
>> - adev->vm_manager.block_size - 9);
>> + PAGE_TABLE_BLOCK_SIZE,
>> + block_size);
>> /* Send no-retry XNACK on fault to suppress VM fault storm. */
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 0fe2a4e782ff..d6a19514c92b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -481,6 +481,21 @@ static void gmc_v9_0_get_vm_pde(struct
>> amdgpu_device *adev, int level,
>> *addr = adev->vm_manager.vram_base_offset + *addr -
>> adev->mc.vram_start;
>> BUG_ON(*addr & 0xFFFF00000000003FULL);
>> +
>> + if (!adev->mc.translate_further)
>> + return;
>> +
>> + if (level == 0) {
> it's better to check if (level == num_level - 1 -1)
> Or as you posted in 'reverse PDBs order', we can use enumerate here
> and below for level checking.
That is a very good argument for your change, yes.
>
>> + /* Set the block size */
>> + if (!(*flags & AMDGPU_PDE_PTE))
>> + *flags |= 9ULL << 59;
> here native page size is 9, why the
> VM_L2_CNTL3.L2_CACHE_BIGK_FRAGMENT_SIZE is 6?
Take a look below. I change L2_CACHE_BIGK_FRAGMENT_SIZE to 9 when this
is active.
But it reminds me that I wanted to add a define for this to amdgpu_vm.h.
>
>> +
>> + } else if (level == 1) {
> it's better to check if (level == num_level -1)
>
> BTW: when we enable TF bit, the shift and num_entries of PTB is
> different by different native page size, I didn't see it in your
> patches, but since you select 2^9 * 2MB as block size, which avoid
> many trouble things.
> I tested it with 2+1, it works, but when I change a bit for 4+1, It
> failed.
> Anyway, it works for RV2 case.
Correct, yes that is only implemented for the 2+1 case. With the
cleanups you suggested 3+1 should be working as well, but I don't see
much use for it.
Any other native page size than 2MB also doesn't make to much sense,
cause we want to reduce the usage of VRAM on APUs as much as possible
and the huge pages we get from the OS for system memory are also 2MB in
size.
Regards,
Christian.
>
> Regards,
> David Zhou
>
>> + if (*flags & AMDGPU_PDE_PTE)
>> + *flags &= ~AMDGPU_PDE_PTE;
>> + else
>> + *flags |= AMDGPU_PTE_TRANSLATE_FURTHER;
>> + }
>> }
>> static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
>> @@ -771,11 +786,14 @@ static int gmc_v9_0_sw_init(void *handle)
>> switch (adev->asic_type) {
>> case CHIP_RAVEN:
>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
>> - if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
>> + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
>> amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
>> - else
>> - /* vm_size is 64GB for legacy 2-level page support */
>> - amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
>> + } else {
>> + /* vm_size is 128TB + 512GB for legacy 3-level page
>> support */
>> + amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
>> + adev->mc.translate_further =
>> + adev->vm_manager.num_level > 1;
>> + }
>> break;
>> case CHIP_VEGA10:
>> /* XXX Don't know how to get VRAM type yet. */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> index bd160d8700e0..a88f43b097dc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> @@ -156,10 +156,15 @@ static void mmhub_v1_0_init_cache_regs(struct
>> amdgpu_device *adev)
>> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>> WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>> - tmp = mmVM_L2_CNTL3_DEFAULT;
>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
>> + if (adev->mc.translate_further) {
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>> + } else {
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>> + }
>> tmp = mmVM_L2_CNTL4_DEFAULT;
>> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
>> VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
>> @@ -197,32 +202,40 @@ static void
>> mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>> static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>> {
>> - int i;
>> + unsigned num_level, block_size;
>> uint32_t tmp;
>> + int i;
>> +
>> + num_level = adev->vm_manager.num_level;
>> + block_size = adev->vm_manager.block_size;
>> + if (adev->mc.translate_further)
>> + num_level -= 1;
>> + else
>> + block_size -= 9;
>> for (i = 0; i <= 14; i++) {
>> tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
>> + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
>> + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
>> + num_level);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - ENABLE_CONTEXT, 1);
>> - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
>> - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>> + 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> - PAGE_TABLE_BLOCK_SIZE,
>> - adev->vm_manager.block_size - 9);
>> + PAGE_TABLE_BLOCK_SIZE,
>> + block_size);
>> /* Send no-retry XNACK on fault to suppress VM fault storm. */
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>
More information about the amd-gfx
mailing list