[PATCH] drm/amdgpu: fix vm size and block size for VMPT (v3)
Zhang, Jerry (Junwei)
Jerry.Zhang at amd.com
Wed Apr 5 02:11:09 UTC 2017
On 04/04/2017 03:34 AM, Deucher, Alexander wrote:
>> -----Original Message-----
>> From: Junwei Zhang [mailto:Jerry.Zhang at amd.com]
>> Sent: Friday, March 31, 2017 10:44 PM
>> To: Deucher, Alexander; Koenig, Christian
>> Cc: amd-gfx at lists.freedesktop.org; Zhang, Jerry
>> Subject: [PATCH] drm/amdgpu: fix vm size and block size for VMPT (v3)
>>
>> From: "Zhang, Jerry" <Jerry.Zhang at amd.com>
>>
>> v2: set both of them in gmc
>> v3: move vm size and block size in vm manager
>>
>> Signed-off-by: Junwei Zhang <Jerry.Zhang at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 --------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 ++++++++++++---------
>> -
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++-
>> drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 10 ++++++++--
>> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 9 +++++++--
>> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 9 +++++++--
>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 +++++++++++++--------
>> drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 +-
>> 9 files changed, 52 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index fbb4afb..1d0c742 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1041,14 +1041,6 @@ static bool amdgpu_check_pot_argument(int arg)
>>
>> static void amdgpu_get_block_size(struct amdgpu_device *adev)
>> {
>> - /* from AI, asic starts to support multiple level VMPT */
>> - if (adev->asic_type >= CHIP_VEGA10) {
>> - if (amdgpu_vm_block_size != 9)
>> - dev_warn(adev->dev,
>> - "Multi-VMPT limits block size to one
>> page!\n");
>> - amdgpu_vm_block_size = 9;
>> - return;
>> - }
>> /* defines number of bits in page table versus page directory,
>> * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
>> * page table and the remaining bits are in the page directory */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 43adc4b..46d3f1a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -100,13 +100,14 @@ static unsigned amdgpu_vm_num_entries(struct
>> amdgpu_device *adev,
>> if (level == 0)
>> /* For the root directory */
>> return adev->vm_manager.max_pfn >>
>> - (amdgpu_vm_block_size * adev-
>> >vm_manager.num_level);
>> + (adev->vm_manager.block_size *
>> + adev->vm_manager.num_level);
>> else if (level == adev->vm_manager.num_level)
>> /* For the page tables on the leaves */
>> - return AMDGPU_VM_PTE_COUNT;
>> + return AMDGPU_VM_PTE_COUNT(adev);
>> else
>> /* Everything in between */
>> - return 1 << amdgpu_vm_block_size;
>> + return 1 << adev->vm_manager.block_size;
>> }
>>
>> /**
>> @@ -271,7 +272,7 @@ static int amdgpu_vm_alloc_levels(struct
>> amdgpu_device *adev,
>> unsigned level)
>> {
>> unsigned shift = (adev->vm_manager.num_level - level) *
>> - amdgpu_vm_block_size;
>> + adev->vm_manager.block_size;
>> unsigned pt_idx, from, to;
>> int r;
>>
>> @@ -970,7 +971,7 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct
>> amdgpu_pte_update_params *p,
>> unsigned idx, level = p->adev->vm_manager.num_level;
>>
>> while (entry->entries) {
>> - idx = addr >> (amdgpu_vm_block_size * level--);
>> + idx = addr >> (p->adev->vm_manager.block_size * level--);
>> idx %= amdgpu_bo_size(entry->bo) / 8;
>> entry = &entry->entries[idx];
>> }
>> @@ -997,7 +998,8 @@ static void amdgpu_vm_update_ptes(struct
>> amdgpu_pte_update_params *params,
>> uint64_t start, uint64_t end,
>> uint64_t dst, uint64_t flags)
>> {
>> - const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
>> + struct amdgpu_device *adev = params->adev;
>> + const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
>>
>> uint64_t cur_pe_start, cur_nptes, cur_dst;
>> uint64_t addr; /* next GPU address to be updated */
>> @@ -1021,7 +1023,7 @@ static void amdgpu_vm_update_ptes(struct
>> amdgpu_pte_update_params *params,
>> if ((addr & ~mask) == (end & ~mask))
>> nptes = end - addr;
>> else
>> - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
>> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
>>
>> cur_pe_start = amdgpu_bo_gpu_offset(pt);
>> cur_pe_start += (addr & mask) * 8;
>> @@ -1049,7 +1051,7 @@ static void amdgpu_vm_update_ptes(struct
>> amdgpu_pte_update_params *params,
>> if ((addr & ~mask) == (end & ~mask))
>> nptes = end - addr;
>> else
>> - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
>> + nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr &
>> mask);
>>
>> next_pe_start = amdgpu_bo_gpu_offset(pt);
>> next_pe_start += (addr & mask) * 8;
>> @@ -1196,7 +1198,7 @@ static int amdgpu_vm_bo_update_mapping(struct
>> amdgpu_device *adev,
>> * reserve space for one command every (1 << BLOCK_SIZE)
>> * entries or 2k dwords (whatever is smaller)
>> */
>> - ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1;
>> + ncmds = (nptes >> min(adev->vm_manager.block_size, 11)) + 1;
>>
>> /* padding, etc. */
>> ndw = 64;
>> @@ -2067,7 +2069,7 @@ void amdgpu_vm_bo_invalidate(struct
>> amdgpu_device *adev,
>> int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>> {
>> const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
>> - AMDGPU_VM_PTE_COUNT * 8);
>> + AMDGPU_VM_PTE_COUNT(adev) * 8);
>> unsigned ring_instance;
>> struct amdgpu_ring *ring;
>> struct amd_sched_rq *rq;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 102b1f7..1242264 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -45,7 +45,7 @@
>> #define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
>>
>> /* number of entries in page table */
>> -#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
>> +#define AMDGPU_VM_PTE_COUNT(adev) (1 << adev-
>> >vm_manager.block_size)
>
> For safety, put parens around adev. E.g.,
> #define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
Yeah, thanks to point it out.
>
> With that fixed:
> Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
>
> As a follow on patch, it would be nice to add some reasonable clamping in the
> gmc files so we can still override the default settings and not cause too much
> problem with multiple asics in the same system.
Could you elaborate it a bit more, about "clamping"?
Jerry
>
> Alex
>
>>
>> /* PTBs (Page Table Blocks) need to be aligned to 32K */
>> #define AMDGPU_VM_PTB_ALIGN_SIZE 32768
>> @@ -155,6 +155,8 @@ struct amdgpu_vm_manager {
>>
>> uint64_t max_pfn;
>> uint32_t num_level;
>> + uint64_t vm_size;
>> + uint32_t block_size;
>> /* vram base address for page table entry */
>> u64 vram_base_offset;
>> /* is vm enabled? */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> index 30ef312..9223c2b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
>> @@ -222,7 +222,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device
>> *adev)
>>
>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> PAGE_TABLE_BLOCK_SIZE,
>> - amdgpu_vm_block_size - 9);
>> + adev->vm_manager.block_size - 9);
>> WREG32(SOC15_REG_OFFSET(GC, 0,
>> mmVM_CONTEXT1_CNTL) + i, tmp);
>> WREG32(SOC15_REG_OFFSET(GC, 0,
>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
>> WREG32(SOC15_REG_OFFSET(GC, 0,
>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> index d958660..30d5c42 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
>> @@ -543,7 +543,8 @@ static int gmc_v6_0_gart_enable(struct
>> amdgpu_device *adev)
>> WREG32(mmVM_CONTEXT1_CNTL,
>> VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
>> (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) |
>> - ((amdgpu_vm_block_size - 9) <<
>> VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT));
>> + ((adev->vm_manager.block_size - 9)
>> + << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT));
>> if (amdgpu_vm_fault_stop ==
>> AMDGPU_VM_FAULT_STOP_ALWAYS)
>> gmc_v6_0_set_fault_enable_default(adev, false);
>> else
>> @@ -848,7 +849,12 @@ static int gmc_v6_0_sw_init(void *handle)
>> if (r)
>> return r;
>>
>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>> +
>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>> + adev->vm_manager.vm_size, adev-
>> >vm_manager.block_size);
>>
>> adev->mc.mc_mask = 0xffffffffffULL;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> index 0c0a601..9427c7d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
>> @@ -639,7 +639,7 @@ static int gmc_v7_0_gart_enable(struct
>> amdgpu_device *adev)
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> ENABLE_CONTEXT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> PAGE_TABLE_DEPTH, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> PAGE_TABLE_BLOCK_SIZE,
>> - amdgpu_vm_block_size - 9);
>> + adev->vm_manager.block_size - 9);
>> WREG32(mmVM_CONTEXT1_CNTL, tmp);
>> if (amdgpu_vm_fault_stop ==
>> AMDGPU_VM_FAULT_STOP_ALWAYS)
>> gmc_v7_0_set_fault_enable_default(adev, false);
>> @@ -998,7 +998,12 @@ static int gmc_v7_0_sw_init(void *handle)
>> * Currently set to 4GB ((1 << 20) 4k pages).
>> * Max GPUVM size for cayman and SI is 40 bits.
>> */
>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>> +
>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>> + adev->vm_manager.vm_size, adev-
>> >vm_manager.block_size);
>>
>> /* Set the internal MC address mask
>> * This is the max address of the GPU's
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index d19d1c5..16742be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -848,7 +848,7 @@ static int gmc_v8_0_gart_enable(struct
>> amdgpu_device *adev)
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> PAGE_TABLE_BLOCK_SIZE,
>> - amdgpu_vm_block_size - 9);
>> + adev->vm_manager.block_size - 9);
>> WREG32(mmVM_CONTEXT1_CNTL, tmp);
>> if (amdgpu_vm_fault_stop ==
>> AMDGPU_VM_FAULT_STOP_ALWAYS)
>> gmc_v8_0_set_fault_enable_default(adev, false);
>> @@ -1082,7 +1082,12 @@ static int gmc_v8_0_sw_init(void *handle)
>> * Currently set to 4GB ((1 << 20) 4k pages).
>> * Max GPUVM size for cayman and SI is 40 bits.
>> */
>> - adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>> +
>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>> + adev->vm_manager.vm_size, adev-
>> >vm_manager.block_size);
>>
>> /* Set the internal MC address mask
>> * This is the max address of the GPU's
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index df69aae..2180edb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -543,11 +543,23 @@ static int gmc_v9_0_sw_init(void *handle)
>>
>> if (adev->flags & AMD_IS_APU) {
>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
>> + adev->vm_manager.vm_size = amdgpu_vm_size;
>> + adev->vm_manager.block_size = amdgpu_vm_block_size;
>> } else {
>> /* XXX Don't know how to get VRAM type yet. */
>> adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
>> + /*
>> + * To fulfill 4-level page support,
>> + * vm size is 256TB (48bit), maximum size of Vega10,
>> + * block size 512 (9bit)
>> + */
>> + adev->vm_manager.vm_size = 1U << 18;
>> + adev->vm_manager.block_size = 9;
>> }
>>
>> + DRM_INFO("vm size is %d GB, block size is %d-bit\n",
>> + adev->vm_manager.vm_size, adev-
>> >vm_manager.block_size);
>> +
>> /* This interrupt is VMC page fault.*/
>> r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
>> &adev->mc.vm_fault);
>> @@ -557,14 +569,7 @@ static int gmc_v9_0_sw_init(void *handle)
>> if (r)
>> return r;
>>
>> - /* Because of four level VMPTs, vm size is at least 512GB.
>> - * The maximum size is 256TB (48bit).
>> - */
>> - if (amdgpu_vm_size < 512) {
>> - DRM_WARN("VM size is at least 512GB!\n");
>> - amdgpu_vm_size = 512;
>> - }
>> - adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
>> + adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
>>
>> /* Set the internal MC address mask
>> * This is the max address of the GPU's
>> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> index 266a0f4..2cc1d58 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
>> @@ -242,7 +242,7 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device
>> *adev)
>>
>> EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
>> tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
>> PAGE_TABLE_BLOCK_SIZE,
>> - amdgpu_vm_block_size - 9);
>> + adev->vm_manager.block_size - 9);
>> WREG32(SOC15_REG_OFFSET(MMHUB, 0,
>> mmVM_CONTEXT1_CNTL) + i, tmp);
>> WREG32(SOC15_REG_OFFSET(MMHUB, 0,
>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
>> WREG32(SOC15_REG_OFFSET(MMHUB, 0,
>> mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
>> --
>> 1.9.1
>
More information about the amd-gfx
mailing list