[PATCH 8/8] drm/amdgpu: implement 2+1 PD support for Raven
Chunming Zhou
zhoucm1 at amd.com
Tue Dec 12 07:58:29 UTC 2017
On 2017年12月09日 00:41, Christian König wrote:
> Instead of falling back to 2 level and very limited address space use
> 2+1 PD support and 128TB + 512GB of virtual address space.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++
> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42 ++++++++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 ++++++++++++++---
> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 49 ++++++++++++++++++++------------
> 5 files changed, 86 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e5e0fbd43273..9517c0f76d27 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -541,6 +541,7 @@ struct amdgpu_mc {
> u64 private_aperture_end;
> /* protects concurrent invalidation */
> spinlock_t invalidate_lock;
> + bool translate_further;
> };
>
> /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 228f63e9ac5e..79134f0c26d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -69,6 +69,9 @@ struct amdgpu_bo_list_entry;
> /* PDE is handled as PTE for VEGA10 */
> #define AMDGPU_PDE_PTE (1ULL << 54)
>
> +/* PTE is handled as PDE for VEGA10 */
> +#define AMDGPU_PTE_TRANSLATE_FURTHER (1ULL << 56)
> +
> /* VEGA10 only */
> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> index f1effadfbaa6..a56f77259130 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> @@ -144,8 +144,15 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
> WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>
> tmp = mmVM_L2_CNTL3_DEFAULT;
> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> + if (adev->mc.translate_further) {
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> + L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> + } else {
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> + L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
> + }
> WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>
> tmp = mmVM_L2_CNTL4_DEFAULT;
> @@ -183,31 +190,40 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>
> static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
> {
> - int i;
> + unsigned num_level, block_size;
> uint32_t tmp;
> + int i;
> +
> + num_level = adev->vm_manager.num_level;
> + block_size = adev->vm_manager.block_size;
> + if (adev->mc.translate_further)
> + num_level -= 1;
> + else
> + block_size -= 9;
>
> for (i = 0; i <= 14; i++) {
> tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
> - adev->vm_manager.num_level);
> + num_level);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
> + 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - PAGE_TABLE_BLOCK_SIZE,
> - adev->vm_manager.block_size - 9);
> + PAGE_TABLE_BLOCK_SIZE,
> + block_size);
> /* Send no-retry XNACK on fault to suppress VM fault storm. */
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 0fe2a4e782ff..d6a19514c92b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -481,6 +481,21 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
> *addr = adev->vm_manager.vram_base_offset + *addr -
> adev->mc.vram_start;
> BUG_ON(*addr & 0xFFFF00000000003FULL);
> +
> + if (!adev->mc.translate_further)
> + return;
> +
> + if (level == 0) {
it's better to check if (level == num_level - 1 -1)
Or as you posted in 'reverse PDBs order', we can use enumerate here and
below for level checking.
> + /* Set the block size */
> + if (!(*flags & AMDGPU_PDE_PTE))
> + *flags |= 9ULL << 59;
here native page size is 9, why the
VM_L2_CNTL3.L2_CACHE_BIGK_FRAGMENT_SIZE is 6?
> +
> + } else if (level == 1) {
it's better to check if (level == num_level -1)
BTW: when we enable TF bit, the shift and num_entries of PTB is
different by different native page size, I didn't see it in your
patches, but since you select 2^9 * 2MB as block size, which avoid many
trouble things.
I tested it with 2+1, it works, but when I change a bit for 4+1, It failed.
Anyway, it works for RV2 case.
Regards,
David Zhou
> + if (*flags & AMDGPU_PDE_PTE)
> + *flags &= ~AMDGPU_PDE_PTE;
> + else
> + *flags |= AMDGPU_PTE_TRANSLATE_FURTHER;
> + }
> }
>
> static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
> @@ -771,11 +786,14 @@ static int gmc_v9_0_sw_init(void *handle)
> switch (adev->asic_type) {
> case CHIP_RAVEN:
> adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
> - if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
> + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
> amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
> - else
> - /* vm_size is 64GB for legacy 2-level page support */
> - amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
> + } else {
> + /* vm_size is 128TB + 512GB for legacy 3-level page support */
> + amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
> + adev->mc.translate_further =
> + adev->vm_manager.num_level > 1;
> + }
> break;
> case CHIP_VEGA10:
> /* XXX Don't know how to get VRAM type yet. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> index bd160d8700e0..a88f43b097dc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> @@ -156,10 +156,15 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
> WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>
> - tmp = mmVM_L2_CNTL3_DEFAULT;
> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
> + if (adev->mc.translate_further) {
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> + L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
> + } else {
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
> + L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
> + }
>
> tmp = mmVM_L2_CNTL4_DEFAULT;
> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
> @@ -197,32 +202,40 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>
> static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
> {
> - int i;
> + unsigned num_level, block_size;
> uint32_t tmp;
> + int i;
> +
> + num_level = adev->vm_manager.num_level;
> + block_size = adev->vm_manager.block_size;
> + if (adev->mc.translate_further)
> + num_level -= 1;
> + else
> + block_size -= 9;
>
> for (i = 0; i <= 14; i++) {
> tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i);
> + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
> + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
> + num_level);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - ENABLE_CONTEXT, 1);
> - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - PAGE_TABLE_DEPTH, adev->vm_manager.num_level);
> - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
> + 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> - PAGE_TABLE_BLOCK_SIZE,
> - adev->vm_manager.block_size - 9);
> + PAGE_TABLE_BLOCK_SIZE,
> + block_size);
> /* Send no-retry XNACK on fault to suppress VM fault storm. */
> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
More information about the amd-gfx
mailing list