[PATCH 6/8] drm/amdgpu: meld together VM fragment and huge page handling
Koenig, Christian
Christian.Koenig at amd.com
Fri Nov 9 12:13:02 UTC 2018
Adding Aaron who initially reported the problem with this patch and
suspend/resume.
Am 08.11.18 um 20:35 schrieb Samuel Pitoiset:
> On 11/8/18 5:50 PM, Christian König wrote:
>> Hi Samuel,
>>
>> yeah, that is a known issue and I was already looking into that all
>> day today. Problem is that I can't reproduce it reliable.
>>
>> Do you have any hint how to trigger that? E.g. memory exhaustion maybe?
>
> Hi Christian,
>
> Good to hear that you are already working on.
>
> The GPU hang with Rise of Tomb Raider is reliable on my side. It
> always hangs while loading the benchmark. I use the Ultra High preset.
>
> Do you have access to that game? As mentioned, F1 2017 is also
> affected but I used RoTR during my bisect.
I was able to reproduce the problem with a memory stress test, but still
have not the slightest idea what is going wrong here.
Going to investigate further,
Christian.
>
> Thanks.
>
>>
>> Thanks,
>> Christian.
>>
>> Am 08.11.18 um 17:17 schrieb Samuel Pitoiset:
>>> Hi,
>>>
>>> This change introduces GPU hangs with F1 2017 and Rise of Tomb
>>> Raider on RADV/GFX9. I bisected the issue.
>>>
>>> Can you have a look?
>>>
>>> Thanks!
>>>
>>> On 9/12/18 10:54 AM, Christian König wrote:
>>>> This optimizes the generating of PTEs by walking the hierarchy only
>>>> once
>>>> for a range and making changes as necessary.
>>>>
>>>> It allows for both huge (2MB) as well giant (1GB) pages to be used on
>>>> Vega and Raven.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>>> Acked-by: Junwei Zhang <Jerry.Zhang at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 267
>>>> ++++++++++++++++++---------------
>>>> 1 file changed, 147 insertions(+), 120 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 747b6ff6fea7..328325324a1d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1488,46 +1488,76 @@ int amdgpu_vm_update_directories(struct
>>>> amdgpu_device *adev,
>>>> }
>>>> /**
>>>> - * amdgpu_vm_handle_huge_pages - handle updating the PD with huge
>>>> pages
>>>> + * amdgpu_vm_update_huge - figure out parameters for PTE updates
>>>> *
>>>> - * @p: see amdgpu_pte_update_params definition
>>>> - * @entry: vm_pt entry to check
>>>> - * @parent: parent entry
>>>> - * @nptes: number of PTEs updated with this operation
>>>> - * @dst: destination address where the PTEs should point to
>>>> - * @flags: access flags fro the PTEs
>>>> - *
>>>> - * Check if we can update the PD with a huge page.
>>>> + * Make sure to set the right flags for the PTEs at the desired
>>>> level.
>>>> */
>>>> -static void amdgpu_vm_handle_huge_pages(struct
>>>> amdgpu_pte_update_params *p,
>>>> - struct amdgpu_vm_pt *entry,
>>>> - struct amdgpu_vm_pt *parent,
>>>> - unsigned nptes, uint64_t dst,
>>>> - uint64_t flags)
>>>> -{
>>>> - uint64_t pde;
>>>> +static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params
>>>> *params,
>>>> + struct amdgpu_bo *bo, unsigned level,
>>>> + uint64_t pe, uint64_t addr,
>>>> + unsigned count, uint32_t incr,
>>>> + uint64_t flags)
>>>> - /* In the case of a mixed PT the PDE must point to it*/
>>>> - if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
>>>> - nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
>>>> - /* Set the huge page flag to stop scanning at this PDE */
>>>> +{
>>>> + if (level != AMDGPU_VM_PTB) {
>>>> flags |= AMDGPU_PDE_PTE;
>>>> + amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
>>>> }
>>>> - if (!(flags & AMDGPU_PDE_PTE)) {
>>>> - if (entry->huge) {
>>>> - /* Add the entry to the relocated list to update it. */
>>>> - entry->huge = false;
>>>> - amdgpu_vm_bo_relocated(&entry->base);
>>>> - }
>>>> + amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
>>>> +}
>>>> +
>>>> +/**
>>>> + * amdgpu_vm_fragment - get fragment for PTEs
>>>> + *
>>>> + * @params: see amdgpu_pte_update_params definition
>>>> + * @start: first PTE to handle
>>>> + * @end: last PTE to handle
>>>> + * @flags: hw mapping flags
>>>> + * @frag: resulting fragment size
>>>> + * @frag_end: end of this fragment
>>>> + *
>>>> + * Returns the first possible fragment for the start and end address.
>>>> + */
>>>> +static void amdgpu_vm_fragment(struct amdgpu_pte_update_params
>>>> *params,
>>>> + uint64_t start, uint64_t end, uint64_t flags,
>>>> + unsigned int *frag, uint64_t *frag_end)
>>>> +{
>>>> + /**
>>>> + * The MC L1 TLB supports variable sized pages, based on a
>>>> fragment
>>>> + * field in the PTE. When this field is set to a non-zero
>>>> value, page
>>>> + * granularity is increased from 4KB to (1 << (12 + frag)).
>>>> The PTE
>>>> + * flags are considered valid for all PTEs within the fragment
>>>> range
>>>> + * and corresponding mappings are assumed to be physically
>>>> contiguous.
>>>> + *
>>>> + * The L1 TLB can store a single PTE for the whole fragment,
>>>> + * significantly increasing the space available for translation
>>>> + * caching. This leads to large improvements in throughput
>>>> when the
>>>> + * TLB is under pressure.
>>>> + *
>>>> + * The L2 TLB distributes small and large fragments into two
>>>> + * asymmetric partitions. The large fragment cache is
>>>> significantly
>>>> + * larger. Thus, we try to use large fragments wherever possible.
>>>> + * Userspace can support this by aligning virtual base address
>>>> and
>>>> + * allocation size to the fragment size.
>>>> + */
>>>> + unsigned max_frag = params->adev->vm_manager.fragment_size;
>>>> +
>>>> + /* system pages are non continuously */
>>>> + if (params->src || !(flags & AMDGPU_PTE_VALID)) {
>>>> + *frag = 0;
>>>> + *frag_end = end;
>>>> return;
>>>> }
>>>> - entry->huge = true;
>>>> - amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
>>>> -
>>>> - pde = (entry - parent->entries) * 8;
>>>> - amdgpu_vm_update_func(p, parent->base.bo, pde, dst, 1, 0, flags);
>>>> + /* This intentionally wraps around if no bit is set */
>>>> + *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end -
>>>> start) - 1);
>>>> + if (*frag >= max_frag) {
>>>> + *frag = max_frag;
>>>> + *frag_end = end & ~((1ULL << max_frag) - 1);
>>>> + } else {
>>>> + *frag_end = start + (1 << *frag);
>>>> + }
>>>> }
>>>> /**
>>>> @@ -1545,108 +1575,105 @@ static void
>>>> amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
>>>> * 0 for success, -EINVAL for failure.
>>>> */
>>>> static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params
>>>> *params,
>>>> - uint64_t start, uint64_t end,
>>>> - uint64_t dst, uint64_t flags)
>>>> + uint64_t start, uint64_t end,
>>>> + uint64_t dst, uint64_t flags)
>>>> {
>>>> struct amdgpu_device *adev = params->adev;
>>>> - const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
>>>> struct amdgpu_vm_pt_cursor cursor;
>>>> + uint64_t frag_start = start, frag_end;
>>>> + unsigned int frag;
>>>> - /* walk over the address space and update the page tables */
>>>> - for_each_amdgpu_vm_pt_leaf(adev, params->vm, start, end - 1,
>>>> cursor) {
>>>> + /* figure out the initial fragment */
>>>> + amdgpu_vm_fragment(params, frag_start, end, flags, &frag,
>>>> &frag_end);
>>>> +
>>>> + /* walk over the address space and update the PTs */
>>>> + amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>>> + while (cursor.pfn < end) {
>>>> struct amdgpu_bo *pt = cursor.entry->base.bo;
>>>> - uint64_t pe_start;
>>>> - unsigned nptes;
>>>> + unsigned shift, parent_shift, num_entries;
>>>> + uint64_t incr, entry_end, pe_start;
>>>> - if (!pt || cursor.level != AMDGPU_VM_PTB)
>>>> + if (!pt)
>>>> return -ENOENT;
>>>> - if ((cursor.pfn & ~mask) == (end & ~mask))
>>>> - nptes = end - cursor.pfn;
>>>> - else
>>>> - nptes = AMDGPU_VM_PTE_COUNT(adev) - (cursor.pfn & mask);
>>>> -
>>>> - amdgpu_vm_handle_huge_pages(params, cursor.entry,
>>>> cursor.parent,
>>>> - nptes, dst, flags);
>>>> - /* We don't need to update PTEs for huge pages */
>>>> - if (cursor.entry->huge) {
>>>> - dst += nptes * AMDGPU_GPU_PAGE_SIZE;
>>>> + /* The root level can't be a huge page */
>>>> + if (cursor.level == adev->vm_manager.root_level) {
>>>> + if (!amdgpu_vm_pt_descendant(adev, &cursor))
>>>> + return -ENOENT;
>>>> continue;
>>>> }
>>>> - pe_start = (cursor.pfn & mask) * 8;
>>>> - amdgpu_vm_update_func(params, pt, pe_start, dst, nptes,
>>>> - AMDGPU_GPU_PAGE_SIZE, flags);
>>>> - dst += nptes * AMDGPU_GPU_PAGE_SIZE;
>>>> - }
>>>> -
>>>> - return 0;
>>>> -}
>>>> + /* First check if the entry is already handled */
>>>> + if (cursor.pfn < frag_start) {
>>>> + cursor.entry->huge = true;
>>>> + amdgpu_vm_pt_next(adev, &cursor);
>>>> + continue;
>>>> + }
>>>> -/*
>>>> - * amdgpu_vm_frag_ptes - add fragment information to PTEs
>>>> - *
>>>> - * @params: see amdgpu_pte_update_params definition
>>>> - * @vm: requested vm
>>>> - * @start: first PTE to handle
>>>> - * @end: last PTE to handle
>>>> - * @dst: addr those PTEs should point to
>>>> - * @flags: hw mapping flags
>>>> - *
>>>> - * Returns:
>>>> - * 0 for success, -EINVAL for failure.
>>>> - */
>>>> -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params
>>>> *params,
>>>> - uint64_t start, uint64_t end,
>>>> - uint64_t dst, uint64_t flags)
>>>> -{
>>>> - /**
>>>> - * The MC L1 TLB supports variable sized pages, based on a
>>>> fragment
>>>> - * field in the PTE. When this field is set to a non-zero
>>>> value, page
>>>> - * granularity is increased from 4KB to (1 << (12 + frag)).
>>>> The PTE
>>>> - * flags are considered valid for all PTEs within the fragment
>>>> range
>>>> - * and corresponding mappings are assumed to be physically
>>>> contiguous.
>>>> - *
>>>> - * The L1 TLB can store a single PTE for the whole fragment,
>>>> - * significantly increasing the space available for translation
>>>> - * caching. This leads to large improvements in throughput
>>>> when the
>>>> - * TLB is under pressure.
>>>> - *
>>>> - * The L2 TLB distributes small and large fragments into two
>>>> - * asymmetric partitions. The large fragment cache is
>>>> significantly
>>>> - * larger. Thus, we try to use large fragments wherever possible.
>>>> - * Userspace can support this by aligning virtual base address
>>>> and
>>>> - * allocation size to the fragment size.
>>>> - */
>>>> - unsigned max_frag = params->adev->vm_manager.fragment_size;
>>>> - int r;
>>>> + /* If it isn't already handled it can't be a huge page */
>>>> + if (cursor.entry->huge) {
>>>> + /* Add the entry to the relocated list to update it. */
>>>> + cursor.entry->huge = false;
>>>> + amdgpu_vm_bo_relocated(&cursor.entry->base);
>>>> + }
>>>> - /* system pages are non continuously */
>>>> - if (params->src || !(flags & AMDGPU_PTE_VALID))
>>>> - return amdgpu_vm_update_ptes(params, start, end, dst, flags);
>>>> -
>>>> - while (start != end) {
>>>> - uint64_t frag_flags, frag_end;
>>>> - unsigned frag;
>>>> -
>>>> - /* This intentionally wraps around if no bit is set */
>>>> - frag = min((unsigned)ffs(start) - 1,
>>>> - (unsigned)fls64(end - start) - 1);
>>>> - if (frag >= max_frag) {
>>>> - frag_flags = AMDGPU_PTE_FRAG(max_frag);
>>>> - frag_end = end & ~((1ULL << max_frag) - 1);
>>>> - } else {
>>>> - frag_flags = AMDGPU_PTE_FRAG(frag);
>>>> - frag_end = start + (1 << frag);
>>>> + shift = amdgpu_vm_level_shift(adev, cursor.level);
>>>> + parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
>>>> + if (adev->asic_type < CHIP_VEGA10) {
>>>> + /* No huge page support before GMC v9 */
>>>> + if (cursor.level != AMDGPU_VM_PTB) {
>>>> + if (!amdgpu_vm_pt_descendant(adev, &cursor))
>>>> + return -ENOENT;
>>>> + continue;
>>>> + }
>>>> + } else if (frag < shift) {
>>>> + /* We can't use this level when the fragment size is
>>>> + * smaller than the address shift. Go to the next
>>>> + * child entry and try again.
>>>> + */
>>>> + if (!amdgpu_vm_pt_descendant(adev, &cursor))
>>>> + return -ENOENT;
>>>> + continue;
>>>> + } else if (frag >= parent_shift) {
>>>> + /* If the fragment size is even larger than the parent
>>>> + * shift we should go up one level and check it again.
>>>> + */
>>>> + if (!amdgpu_vm_pt_ancestor(&cursor))
>>>> + return -ENOENT;
>>>> + continue;
>>>> }
>>>> - r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
>>>> - flags | frag_flags);
>>>> - if (r)
>>>> - return r;
>>>> + /* Looks good so far, calculate parameters for the update */
>>>> + incr = AMDGPU_GPU_PAGE_SIZE << shift;
>>>> + num_entries = amdgpu_vm_num_entries(adev, cursor.level);
>>>> + pe_start = ((cursor.pfn >> shift) & (num_entries - 1)) * 8;
>>>> + entry_end = num_entries << shift;
>>>> + entry_end += cursor.pfn & ~(entry_end - 1);
>>>> + entry_end = min(entry_end, end);
>>>> +
>>>> + do {
>>>> + uint64_t upd_end = min(entry_end, frag_end);
>>>> + unsigned nptes = (upd_end - frag_start) >> shift;
>>>> +
>>>> + amdgpu_vm_update_huge(params, pt, cursor.level,
>>>> + pe_start, dst, nptes, incr,
>>>> + flags | AMDGPU_PTE_FRAG(frag));
>>>> +
>>>> + pe_start += nptes * 8;
>>>> + dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
>>>> +
>>>> + frag_start = upd_end;
>>>> + if (frag_start >= frag_end) {
>>>> + /* figure out the next fragment */
>>>> + amdgpu_vm_fragment(params, frag_start, end,
>>>> + flags, &frag, &frag_end);
>>>> + if (frag < shift)
>>>> + break;
>>>> + }
>>>> + } while (frag_start < entry_end);
>>>> - dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
>>>> - start = frag_end;
>>>> + if (frag >= shift)
>>>> + amdgpu_vm_pt_next(adev, &cursor);
>>>> }
>>>> return 0;
>>>> @@ -1708,8 +1735,8 @@ static int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>> params.func = amdgpu_vm_cpu_set_ptes;
>>>> params.pages_addr = pages_addr;
>>>> - return amdgpu_vm_frag_ptes(¶ms, start, last + 1,
>>>> - addr, flags);
>>>> + return amdgpu_vm_update_ptes(¶ms, start, last + 1,
>>>> + addr, flags);
>>>> }
>>>> ring = container_of(vm->entity.rq->sched, struct
>>>> amdgpu_ring, sched);
>>>> @@ -1788,7 +1815,7 @@ static int amdgpu_vm_bo_update_mapping(struct
>>>> amdgpu_device *adev,
>>>> if (r)
>>>> goto error_free;
>>>> - r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
>>>> + r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags);
>>>> if (r)
>>>> goto error_free;
>>>>
>>
More information about the amd-gfx
mailing list