[PATCH] drm/amdgpu: fix vm size and block size for VMPT (v3)
Zhang, Jerry (Junwei)
Jerry.Zhang at amd.com
Wed Apr 5 02:41:32 UTC 2017
On 04/05/2017 10:27 AM, Alex Deucher wrote:
> On Tue, Apr 4, 2017 at 10:11 PM, Zhang, Jerry (Junwei)
> <Jerry.Zhang at amd.com> wrote:
>> On 04/04/2017 03:34 AM, Deucher, Alexander wrote:
>>>>
>>>> -----Original Message-----
>>>> From: Junwei Zhang [mailto:Jerry.Zhang at amd.com]
>>>> Sent: Friday, March 31, 2017 10:44 PM
>>>> To: Deucher, Alexander; Koenig, Christian
>>>> Cc: amd-gfx at lists.freedesktop.org; Zhang, Jerry
>>>> Subject: [PATCH] drm/amdgpu: fix vm size and block size for VMPT (v3)
>>>>
>>>> From: "Zhang, Jerry" <Jerry.Zhang at amd.com>
>>>>
>>>> v2: set both of them in gmc
>>>> v3: move vm size and block size in vm manager
>>>>
>>>> Signed-off-by: Junwei Zhang <Jerry.Zhang at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 --------
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 ++++++++++++---------
>>>> -
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++-
>>>> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 2 +-
>>>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 10 ++++++++--
>>>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 9 +++++++--
>>>> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 9 +++++++--
>>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 +++++++++++++--------
>>>> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 +-
>>>> 9 files changed, 52 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index fbb4afb..1d0c742 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -1041,14 +1041,6 @@ static bool amdgpu_check_pot_argument(int arg)
>>>>
>>>> static void amdgpu_get_block_size(struct amdgpu_device *adev)
>>>> {
>>>> - /* from AI, asic starts to support multiple level VMPT */
>>>> - if (adev->asic_type >= CHIP_VEGA10) {
>>>> - if (amdgpu_vm_block_size != 9)
>>>> - dev_warn(adev->dev,
>>>> - "Multi-VMPT limits block size to one
>>>> page!\n");
>>>> - amdgpu_vm_block_size = 9;
>>>> - return;
>>>> - }
>>>> /* defines number of bits in page table versus page directory,
>>>> * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
>>>> * page table and the remaining bits are in the page directory */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 43adc4b..46d3f1a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -100,13 +100,14 @@ static unsigned amdgpu_vm_num_entries(struct
>>>> amdgpu_device *adev,
>>>> if (level == 0)
>>>> /* For the root directory */
>>>> return adev->vm_manager.max_pfn >>
>>>> - (amdgpu_vm_block_size * adev-
>>>>> vm_manager.num_level);
>>>> + (adev->vm_manager.block_size *
>>>> + adev->vm_manager.num_level);
>>>> else if (level == adev->vm_manager.num_level)
>>>> /* For the page tables on the leaves */
>>>> - return AMDGPU_VM_PTE_COUNT;
>>>> + return AMDGPU_VM_PTE_COUNT(adev);
>>>> else
>>>> /* Everything in between */
>>>> - return 1 << amdgpu_vm_block_size;
>>>> + return 1 << adev->vm_manager.block_size;
>>>> }
>>>>
>>>> /**
>>>> @@ -271,7 +272,7 @@ static int amdgpu_vm_alloc_levels(struct
>>>> amdgpu_device *adev,
>>>> unsigned level)
>>>> {
>>>> unsigned shift = (adev->vm_manager.num_level - level) *
>>>> - amdgpu_vm_block_size;
>>>> + adev->vm_manager.block_size;
>>>> unsigned pt_idx, from, to;
>>>> int r;
>>>>
>>>> @@ -970,7 +971,7 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct
>>>> amdgpu_pte_update_params *p,
>>>> unsigned idx, level = p->adev->vm_manager.num_level;
>>>>
>>>> while (entry->entries) {
>>>> - idx = addr >> (amdgpu_vm_block_size * level--);
>>>> + idx = addr >> (p->adev->vm_manager.block_size * level--);
>>>> idx %= amdgpu_bo_size(entry->bo) / 8;
>>>> entry = &entry->entries[idx];
>>>> }
>>>> @@ -997,7 +998,8 @@ static void amdgpu_vm_update_ptes(struct
>>>> amdgpu_pte_update_params *params,
>>>> uint64_t start, uint64_t end,
>>>> uint64_t dst, uint64_t flags)
>>>> {
>>>> - const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
>>>> + struct amdgpu_device *adev = params->adev;
>>>> + const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
>>>>
>>>> uint64_t cur_pe_start, cur_nptes, cur_dst;
>>>> uint64_t addr; /* next GPU address to be updated */
>>>> @@ -1021,7 +1023,7 @@ static void amdgpu_vm_update_ptes(struct
>>>> amdgpu_pte_update_params *params,
>>>> if ((addr & ~mask) == (end & ~mask))
>>>> nptes = end - addr;
>>>> else
>>>> - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
>>>> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>>>>
>>>> cur_pe_start = amdgpu_bo_gpu_offset(pt);
>>>> cur_pe_start += (addr & mask) * 8;
>>>> @@ -1049,7 +1051,7 @@ static void amdgpu_vm_update_ptes(struct
>>>> amdgpu_pte_update_params *params,
>>>> if ((addr & ~mask) == (end & ~mask))
>>>> nptes = end - addr;
>>>> else
>>>> - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
>>>> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr &
>>>> mask);
>>>>
>>>> next_pe_start = amdgpu_bo_gpu_offset(pt);
>>>> next_pe_start += (addr & mask) * 8;
>>>> @@ -1196,7 +1198,7 @@ static int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>> * reserve space for one command every (1 << BLOCK_SIZE)
>>>> * entries or 2k dwords (whatever is smaller)
>>>> */
>>>> - ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1;
>>>> + ncmds = (nptes >> min(adev->vm_manager.block_size, 11)) + 1;
>>>>
>>>> /* padding, etc. */
>>>> ndw = 64;
>>>> @@ -2067,7 +2069,7 @@ void amdgpu_vm_bo_invalidate(struct
>>>> amdgpu_device *adev,
>>>> int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>>>> {
>>>> const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
>>>> - AMDGPU_VM_PTE_COUNT * 8);
>>>> + AMDGPU_VM_PTE_COUNT(adev) * 8);
>>>> unsigned ring_instance;
>>>> struct amdgpu_ring *ring;
>>>> struct amd_sched_rq *rq;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index 102b1f7..1242264 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -45,7 +45,7 @@
>>>> #define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
>>>>
>>>> /* number of entries in page table */
>>>> -#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
>>>> +#define AMDGPU_VM_PTE_COUNT(adev) (1 << adev-
>>>>> vm_manager.block_size)
>>>
>>>
>>> For safety, put parens around adev. E.g.,
>>> #define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
>>
>>
>> Yeah, thanks to point it out.
>>
>>>
>>> With that fixed:
>>> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
>>>
>>> As a follow on patch, it would be nice to add some reasonable clamping in
>>> the
>>> gmc files so we can still override the default settings and not cause too
>>> much
>>> problem with multiple asics in the same system.
>>
>>
>> Could you elaborate it a bit more, about "clamping"?
>
> e.g., setting amdgpu_vm_block_size to -1 (auto) bt default and setting
> a reasonable default for each gmc if it's set to 0, or overriding if
> it's not set to -1. That way we can have different defaults on each
> family and still override for testing/debugging.
Thanks for your explanation.
Now I got the "clamping" :)
Jerry
>
> Alex
>
>
>>
>> Jerry
>>
>>>
>>> Alex
>>>
>>>>
>>>> /* PTBs (Page Table Blocks) need to be aligned to 32K */
>>>> #define AMDGPU_VM_PTB_ALIGN_SIZE 32768
>>>> @@ -155,6 +155,8 @@ struct amdgpu_vm_manager {
>>>>
>>>> uint64_t max_pfn;
>>>> uint32_t num_level;
>>>> + uint64_t vm_size;
>>>> + uint32_t block_size;
>>>> /* vram base address for page table entry */
>>>> u64 vram_base_offset;
>>>> /* is vm enabled? */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>>> index 30ef312..9223c2b 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>>>> @@ -222,7 +222,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device
>>>> *adev)
>>>>
>>>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> PAGE_TABLE_BLOCK_SIZE,
>>>> - amdgpu_vm_block_size - 9);
>>>> + adev->vm_manager.block_size - 9);
>>>> WREG32(SOC15_REG_OFFSET(GC, 0,
>>>> mmVM_CONTEXT1_CNTL) + i, tmp);
>>>> WREG32(SOC15_REG_OFFSET(GC, 0,
>>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
>>>> WREG32(SOC15_REG_OFFSET(GC, 0,
>>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>>>> index d958660..30d5c42 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>>>> @@ -543,7 +543,8 @@ static int gmc_v6_0_gart_enable(struct
>>>> amdgpu_device *adev)
>>>> WREG32(mmVM_CONTEXT1_CNTL,
>>>> VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
>>>> (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) |
>>>> - ((amdgpu_vm_block_size - 9) <<
>>>> VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT));
>>>> + ((adev->vm_manager.block_size - 9)
>>>> + << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT));
>>>> if (amdgpu_vm_fault_stop ==
>>>> AMDGPU_VM_FAULT_STOP_ALWAYS)
>>>> gmc_v6_0_set_fault_enable_default(adev, false);
>>>> else
>>>> @@ -848,7 +849,12 @@ static int gmc_v6_0_sw_init(void *handle)
>>>> if (r)
>>>> return r;
>>>>
>>>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>>>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>>>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>>>> +
>>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>>>> + adev->vm_manager.vm_size, adev-
>>>>> vm_manager.block_size);
>>>>
>>>> adev->mc.mc_mask = 0xffffffffffULL;
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>> index 0c0a601..9427c7d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>>>> @@ -639,7 +639,7 @@ static int gmc_v7_0_gart_enable(struct
>>>> amdgpu_device *adev)
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> ENABLE_CONTEXT, 1);
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> PAGE_TABLE_DEPTH, 1);
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> PAGE_TABLE_BLOCK_SIZE,
>>>> - amdgpu_vm_block_size - 9);
>>>> + adev->vm_manager.block_size - 9);
>>>> WREG32(mmVM_CONTEXT1_CNTL, tmp);
>>>> if (amdgpu_vm_fault_stop ==
>>>> AMDGPU_VM_FAULT_STOP_ALWAYS)
>>>> gmc_v7_0_set_fault_enable_default(adev, false);
>>>> @@ -998,7 +998,12 @@ static int gmc_v7_0_sw_init(void *handle)
>>>> * Currently set to 4GB ((1 << 20) 4k pages).
>>>> * Max GPUVM size for cayman and SI is 40 bits.
>>>> */
>>>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>>>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>>>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>>>> +
>>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>>>> + adev->vm_manager.vm_size, adev-
>>>>> vm_manager.block_size);
>>>>
>>>> /* Set the internal MC address mask
>>>> * This is the max address of the GPU's
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>> index d19d1c5..16742be 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>>>> @@ -848,7 +848,7 @@ static int gmc_v8_0_gart_enable(struct
>>>> amdgpu_device *adev)
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> PAGE_TABLE_BLOCK_SIZE,
>>>> - amdgpu_vm_block_size - 9);
>>>> + adev->vm_manager.block_size - 9);
>>>> WREG32(mmVM_CONTEXT1_CNTL, tmp);
>>>> if (amdgpu_vm_fault_stop ==
>>>> AMDGPU_VM_FAULT_STOP_ALWAYS)
>>>> gmc_v8_0_set_fault_enable_default(adev, false);
>>>> @@ -1082,7 +1082,12 @@ static int gmc_v8_0_sw_init(void *handle)
>>>> * Currently set to 4GB ((1 << 20) 4k pages).
>>>> * Max GPUVM size for cayman and SI is 40 bits.
>>>> */
>>>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>>>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>>>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>>>> +
>>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>>>> + adev->vm_manager.vm_size, adev-
>>>>> vm_manager.block_size);
>>>>
>>>> /* Set the internal MC address mask
>>>> * This is the max address of the GPU's
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> index df69aae..2180edb 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>>> @@ -543,11 +543,23 @@ static int gmc_v9_0_sw_init(void *handle)
>>>>
>>>> if (adev->flags & AMD_IS_APU) {
>>>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
>>>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>>>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>>>> } else {
>>>> /* XXX Don't know how to get VRAM type yet. */
>>>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
>>>> + /*
>>>> + * To fulfill 4-level page support,
>>>> + * vm size is 256TB (48bit), maximum size of Vega10,
>>>> + * block size 512 (9bit)
>>>> + */
>>>> + adev->vm_manager.vm_size = 1U << 18;
>>>> + adev->vm_manager.block_size = 9;
>>>> }
>>>>
>>>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>>>> + adev->vm_manager.vm_size, adev-
>>>>> vm_manager.block_size);
>>>> +
>>>> /* This interrupt is VMC page fault.*/
>>>> r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
>>>> &adev->mc.vm_fault);
>>>> @@ -557,14 +569,7 @@ static int gmc_v9_0_sw_init(void *handle)
>>>> if (r)
>>>> return r;
>>>>
>>>> - /* Because of four level VMPTs, vm size is at least 512GB.
>>>> - * The maximum size is 256TB (48bit).
>>>> - */
>>>> - if (amdgpu_vm_size < 512) {
>>>> - DRM_WARN("VM size is at least 512GB!\n");
>>>> - amdgpu_vm_size = 512;
>>>> - }
>>>> - adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
>>>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>>>>
>>>> /* Set the internal MC address mask
>>>> * This is the max address of the GPU's
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>>> index 266a0f4..2cc1d58 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>>>> @@ -242,7 +242,7 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device
>>>> *adev)
>>>>
>>>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>>>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>>>> PAGE_TABLE_BLOCK_SIZE,
>>>> - amdgpu_vm_block_size - 9);
>>>> + adev->vm_manager.block_size - 9);
>>>> WREG32(SOC15_REG_OFFSET(MMHUB, 0,
>>>> mmVM_CONTEXT1_CNTL) + i, tmp);
>>>> WREG32(SOC15_REG_OFFSET(MMHUB, 0,
>>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
>>>> WREG32(SOC15_REG_OFFSET(MMHUB, 0,
>>>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
>>>> --
>>>> 1.9.1
>>>
>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list