drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole (v2)
Felix Kuehling
felix.kuehling at amd.com
Mon Jan 29 14:51:56 UTC 2024
On 2024-01-29 8:58, Shengyu Qu wrote:
> Hi,
>
> Seems rocm-opengl interop hang problem still exists[1]. Btw have you
>
> discovered into this problem?
>
> Best regards,
>
> Shengyu
>
> [1]
> https://projects.blender.org/blender/blender/issues/100353#issuecomment-1111599
Maybe you're having a different problem. Do you see this issue also
without any version of the "Relocate TBA/TMA ..." patch?
Regards,
Felix
>
> 在 2024/1/27 03:15, Shengyu Qu 写道:
>> Hello Felix,
>>
>> This patch seems working on my system, also it seems fixes the
>> ROCM/OpenGL
>> interop problem.
>>
>> Is this intended to happen or not? Maybe we need more users to test it.
>>
>> Besides,
>>
>> Tested-by: Shengyu Qu <wiagn233 at outlook.com>
>>
>> Best Regards,
>>
>> Shengyu
>>
>> 在 2024/1/26 06:27, Felix Kuehling 写道:
>>> The TBA and TMA, along with an unused IB allocation, reside at low
>>> addresses in the VM address space. A stray VM fault which hits these
>>> pages must be serviced by making their page table entries invalid.
>>> The scheduler depends upon these pages being resident and fails,
>>> preventing a debugger from inspecting the failure state.
>>>
>>> By relocating these pages above 47 bits in the VM address space they
>>> can only be reached when bits [63:48] are set to 1. This makes it much
>>> less likely for a misbehaving program to generate accesses to them.
>>> The current placement at VA (PAGE_SIZE*2) is readily hit by a NULL
>>> access with a small offset.
>>>
>>> v2:
>>> - Move it to the reserved space to avoid concflicts with Mesa
>>> - Add macros to make reserved space management easier
>>>
>>> Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>> Signed-off-by: Jay Cornwall <jay.cornwall at amd.com>
>>> Signed-off-by: Felix Kuehling <felix.kuehling at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 4 +--
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 7 ++---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 12 ++++++--
>>> drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 30
>>> +++++++++++---------
>>> 4 files changed, 30 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> index 823d31f4a2a3..53d0a458d78e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>>> @@ -28,9 +28,9 @@
>>> uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
>>> {
>>> - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
>>> + uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(
>>> + adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT);
>>> - addr -= AMDGPU_VA_RESERVED_CSA_SIZE;
>>> addr = amdgpu_gmc_sign_extend(addr);
>>> return addr;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
>>> index 3d0d56087d41..9e769ef50f2e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
>>> @@ -45,11 +45,8 @@
>>> */
>>> static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device
>>> *adev)
>>> {
>>> - u64 addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
>>> -
>>> - addr -= AMDGPU_VA_RESERVED_TOP;
>>> -
>>> - return addr;
>>> + return AMDGPU_VA_RESERVED_SEQ64_START(
>>> + adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT);
>>> }
>>> /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 666698a57192..f23b6153d310 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -135,11 +135,19 @@ struct amdgpu_mem_stats;
>>> #define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) <
>>> AMDGPU_MMHUB1_START)
>>> #define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) <
>>> AMDGPU_MAX_VMHUBS)
>>> -/* Reserve 2MB at top/bottom of address space for kernel use */
>>> +/* Reserve space at top/bottom of address space for kernel use */
>>> #define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20)
>>> +#define AMDGPU_VA_RESERVED_CSA_START(top) ((top) \
>>> + - AMDGPU_VA_RESERVED_CSA_SIZE)
>>> #define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
>>> +#define AMDGPU_VA_RESERVED_SEQ64_START(top)
>>> (AMDGPU_VA_RESERVED_CSA_START(top) \
>>> + - AMDGPU_VA_RESERVED_SEQ64_SIZE)
>>> +#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
>>> +#define AMDGPU_VA_RESERVED_TRAP_START(top)
>>> (AMDGPU_VA_RESERVED_SEQ64_START(top) \
>>> + - AMDGPU_VA_RESERVED_TRAP_SIZE)
>>> #define AMDGPU_VA_RESERVED_BOTTOM (2ULL << 20)
>>> -#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_SEQ64_SIZE + \
>>> +#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \
>>> + AMDGPU_VA_RESERVED_SEQ64_SIZE + \
>>> AMDGPU_VA_RESERVED_CSA_SIZE)
>>> /* See vm_update_mode */
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>>> index 6604a3f99c5e..f899cce25b2a 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>>> @@ -36,6 +36,7 @@
>>> #include <linux/mm.h>
>>> #include <linux/mman.h>
>>> #include <linux/processor.h>
>>> +#include "amdgpu_vm.h"
>>> /*
>>> * The primary memory I/O features being added for revisions of gfxip
>>> @@ -326,10 +327,16 @@ static void kfd_init_apertures_vi(struct
>>> kfd_process_device *pdd, uint8_t id)
>>> * with small reserved space for kernel.
>>> * Set them to CANONICAL addresses.
>>> */
>>> - pdd->gpuvm_base = SVM_USER_BASE;
>>> + pdd->gpuvm_base = max(SVM_USER_BASE, AMDGPU_VA_RESERVED_BOTTOM);
>>> pdd->gpuvm_limit =
>>> pdd->dev->kfd->shared_resources.gpuvm_size - 1;
>>> + /* dGPUs: the reserved space for kernel
>>> + * before SVM
>>> + */
>>> + pdd->qpd.cwsr_base = SVM_CWSR_BASE;
>>> + pdd->qpd.ib_base = SVM_IB_BASE;
>>> +
>>> pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
>>> pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
>>> }
>>> @@ -339,18 +346,19 @@ static void kfd_init_apertures_v9(struct
>>> kfd_process_device *pdd, uint8_t id)
>>> pdd->lds_base = MAKE_LDS_APP_BASE_V9();
>>> pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
>>> - /* Raven needs SVM to support graphic handle, etc. Leave
>>> the small
>>> - * reserved space before SVM on Raven as well, even though
>>> we don't
>>> - * have to.
>>> - * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so
>>> that they
>>> - * are used in Thunk to reserve SVM.
>>> - */
>>> - pdd->gpuvm_base = SVM_USER_BASE;
>>> + pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM;
>>> pdd->gpuvm_limit =
>>> pdd->dev->kfd->shared_resources.gpuvm_size - 1;
>>> pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
>>> pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
>>> +
>>> + /*
>>> + * Place TBA/TMA on opposite side of VM hole to prevent
>>> + * stray faults from triggering SVM on these pages.
>>> + */
>>> + pdd->qpd.cwsr_base = AMDGPU_VA_RESERVED_TRAP_START(
>>> + pdd->dev->adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT);
>>> }
>>> int kfd_init_apertures(struct kfd_process *process)
>>> @@ -407,12 +415,6 @@ int kfd_init_apertures(struct kfd_process
>>> *process)
>>> return -EINVAL;
>>> }
>>> }
>>> -
>>> - /* dGPUs: the reserved space for kernel
>>> - * before SVM
>>> - */
>>> - pdd->qpd.cwsr_base = SVM_CWSR_BASE;
>>> - pdd->qpd.ib_base = SVM_IB_BASE;
>>> }
>>> dev_dbg(kfd_device, "node id %u\n", id);
>>
More information about the amd-gfx
mailing list