[PATCH] drm/amdgpu: implement 2+1 PD support for Raven v2
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Dec 15 10:04:14 UTC 2017
Am 15.12.2017 um 08:30 schrieb Chunming Zhou:
>
>
> On 2017年12月15日 02:04, Alex Deucher wrote:
>> On Thu, Dec 14, 2017 at 7:03 AM, Christian König
>> <ckoenig.leichtzumerken at gmail.com> wrote:
>>> Instead of falling back to 2 level and very limited address space use
>>> 2+1 PD support and 128TB + 512GB of virtual address space.
>>>
>>> v2: cleanup defines, rebase on top of level enum
>>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 ++++
>>> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 42
>>> ++++++++++++++++++---------
>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 26 ++++++++++++++---
>>> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 49
>>> ++++++++++++++++++++------------
>>> 5 files changed, 89 insertions(+), 35 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 0cb2235f4798..8ac5875472bd 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -541,6 +541,7 @@ struct amdgpu_mc {
>>> u64 private_aperture_end;
>>> /* protects concurrent invalidation */
>>> spinlock_t invalidate_lock;
>>> + bool translate_further;
>>> };
>>>
>>> /*
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 1056484de0e3..edd2ea52dc00 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -70,6 +70,12 @@ struct amdgpu_bo_list_entry;
>>> /* PDE is handled as PTE for VEGA10 */
>>> #define AMDGPU_PDE_PTE (1ULL << 54)
>>>
>>> +/* PTE is handled as PDE for VEGA10 (Translate Further) */
>>> +#define AMDGPU_PTE_TF (1ULL << 56)
>>> +
>>> +/* PDE Block Fragment Size for VEGA10 */
>>> +#define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59)
>> Safer to put parens around the a in case some one passes in an
>> expression.
>>
>>
>>> +
>>> /* VEGA10 only */
>>> #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57)
>>> #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>> index 1d392f186e0d..197005e54d78 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>> @@ -143,8 +143,15 @@ static void gfxhub_v1_0_init_cache_regs(struct
>>> amdgpu_device *adev)
>>> WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
>>>
>>> tmp = mmVM_L2_CNTL3_DEFAULT;
>>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>>> + if (adev->mc.translate_further) {
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>>> + } else {
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>>> + }
>> Is this correct? Aren't the cases reversed here? Won't this change 2
>> and 4 level?
> obviously it's reversed.
Ah, crap yeah. New patch is on the list, please review.
>
> In addition, I wonder that we should set L2_CACH3_BIGK_FRAGMENT_SIZE
> to 0 for TF usage? For TF case, the page size pionted by PTE still is
> 4KB.
> If TF bit is disabled, it certainly should be 9 same as
> BLOCK_FRAGMENT_SIZE of PDE.
No, that needs to be 6 when translate further is disabled. With the P
bit set we have a different handling in the hardware than with the TF
bit cleared.
>
> And I tested it failed on Vega10, I think some points we need to address:
> 0. set root_level if tf is enabled.
> 1. alloc SUBPTB
> 2. get entry for SUBPTB
> 3. shift for tf case.
>
> I made a draft as the attached, have no time to try yet.
NAK, complete overkill. Just tested it and both 2+1 as well as 3+1 work
perfectly fine.
Please review the new patch on the mailing list.
Regards,
Christian.
>
> Regards,
> David Zhou
>
>>
>>
>>> WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
>>>
>>> tmp = mmVM_L2_CNTL4_DEFAULT;
>>> @@ -182,31 +189,40 @@ static void
>>> gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>>>
>>> static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>>> {
>>> - int i;
>>> + unsigned num_level, block_size;
>>> uint32_t tmp;
>>> + int i;
>>> +
>>> + num_level = adev->vm_manager.num_level;
>>> + block_size = adev->vm_manager.block_size;
>>> + if (adev->mc.translate_further)
>>> + num_level -= 1;
>>> + else
>>> + block_size -= 9;
>>>
>>> for (i = 0; i <= 14; i++) {
>>> tmp = RREG32_SOC15_OFFSET(GC, 0,
>>> mmVM_CONTEXT1_CNTL, i);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> ENABLE_CONTEXT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> PAGE_TABLE_DEPTH,
>>> - adev->vm_manager.num_level);
>>> + num_level);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>>> + 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - PAGE_TABLE_BLOCK_SIZE,
>>> - adev->vm_manager.block_size - 9);
>>> + PAGE_TABLE_BLOCK_SIZE,
>>> + block_size);
>>> /* Send no-retry XNACK on fault to suppress VM
>>> fault storm. */
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index 1b5dfccfd5d5..ab0a74b0d30f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -476,6 +476,21 @@ static void gmc_v9_0_get_vm_pde(struct
>>> amdgpu_device *adev, int level,
>>> *addr = adev->vm_manager.vram_base_offset + *addr -
>>> adev->mc.vram_start;
>>> BUG_ON(*addr & 0xFFFF00000000003FULL);
>>> +
>>> + if (!adev->mc.translate_further)
>>> + return;
>>> +
>>> + if (level == AMDGPU_VM_PDB1) {
>>> + /* Set the block fragment size */
>>> + if (!(*flags & AMDGPU_PDE_PTE))
>>> + *flags |= AMDGPU_PDE_BFS(0x9);
>>> +
>>> + } else if (level == AMDGPU_VM_PDB0) {
>>> + if (*flags & AMDGPU_PDE_PTE)
>>> + *flags &= ~AMDGPU_PDE_PTE;
>>> + else
>>> + *flags |= AMDGPU_PTE_TF;
>>> + }
>>> }
>>>
>>> static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
>>> @@ -765,11 +780,14 @@ static int gmc_v9_0_sw_init(void *handle)
>>> switch (adev->asic_type) {
>>> case CHIP_RAVEN:
>>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
>>> - if (adev->rev_id == 0x0 || adev->rev_id == 0x1)
>>> + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
>>> amdgpu_vm_adjust_size(adev, 256 * 1024, 9,
>>> 3, 48);
>>> - else
>>> - /* vm_size is 64GB for legacy 2-level page
>>> support */
>>> - amdgpu_vm_adjust_size(adev, 64, 9, 1, 48);
>>> + } else {
>>> + /* vm_size is 128TB + 512GB for legacy
>>> 3-level page support */
>>> + amdgpu_vm_adjust_size(adev, 128 * 1024 +
>>> 512, 9, 2, 48);
>>> + adev->mc.translate_further =
>>> + adev->vm_manager.num_level > 1;
>>> + }
>>> break;
>>> case CHIP_VEGA10:
>>> /* XXX Don't know how to get VRAM type yet. */
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>> index 0c5a76f88d35..ba1dfc7fb79e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>> @@ -155,10 +155,15 @@ static void mmhub_v1_0_init_cache_regs(struct
>>> amdgpu_device *adev)
>>> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
>>> WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
>>>
>>> - tmp = mmVM_L2_CNTL3_DEFAULT;
>>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>>> - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>>> L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>>> - WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp);
>>> + if (adev->mc.translate_further) {
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 9);
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
>>> + } else {
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
>>> + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
>>> + L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
>>> + }
>> Same here.
>>
>>> tmp = mmVM_L2_CNTL4_DEFAULT;
>>> tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4,
>>> VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
>>> @@ -196,32 +201,40 @@ static void
>>> mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
>>>
>>> static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
>>> {
>>> - int i;
>>> + unsigned num_level, block_size;
>>> uint32_t tmp;
>>> + int i;
>>> +
>>> + num_level = adev->vm_manager.num_level;
>>> + block_size = adev->vm_manager.block_size;
>>> + if (adev->mc.translate_further)
>>> + num_level -= 1;
>>> + else
>>> + block_size -= 9;
>>>
>>> for (i = 0; i <= 14; i++) {
>>> tmp = RREG32_SOC15_OFFSET(MMHUB, 0,
>>> mmVM_CONTEXT1_CNTL, i);
>>> + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> ENABLE_CONTEXT, 1);
>>> + tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> PAGE_TABLE_DEPTH,
>>> + num_level);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - ENABLE_CONTEXT, 1);
>>> - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - PAGE_TABLE_DEPTH,
>>> adev->vm_manager.num_level);
>>> - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
>>> + 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> - PAGE_TABLE_BLOCK_SIZE,
>>> - adev->vm_manager.block_size - 9);
>>> + PAGE_TABLE_BLOCK_SIZE,
>>> + block_size);
>>> /* Send no-retry XNACK on fault to suppress VM
>>> fault storm. */
>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>> RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
>>> --
>>> 2.11.0
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
More information about the amd-gfx
mailing list