drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole (v2)

Mon Jan 29 13:58:30 UTC 2024

Hi,

Seems rocm-opengl interop hang problem still exists[1]. Btw have you

discovered into this problem?

Best regards,

Shengyu

[1] 
https://projects.blender.org/blender/blender/issues/100353#issuecomment-1111599

在 2024/1/27 03:15, Shengyu Qu 写道:
> Hello Felix,
>
> This patch seems working on my system, also it seems fixes the 
> ROCM/OpenGL
> interop problem.
>
> Is this intended to happen or not? Maybe we need more users to test it.
>
> Besides,
>
> Tested-by: Shengyu Qu <wiagn233 at outlook.com>
>
> Best Regards,
>
> Shengyu
>
> 在 2024/1/26 06:27, Felix Kuehling 写道:
>> The TBA and TMA, along with an unused IB allocation, reside at low
>> addresses in the VM address space. A stray VM fault which hits these
>> pages must be serviced by making their page table entries invalid.
>> The scheduler depends upon these pages being resident and fails,
>> preventing a debugger from inspecting the failure state.
>>
>> By relocating these pages above 47 bits in the VM address space they
>> can only be reached when bits [63:48] are set to 1. This makes it much
>> less likely for a misbehaving program to generate accesses to them.
>> The current placement at VA (PAGE_SIZE*2) is readily hit by a NULL
>> access with a small offset.
>>
>> v2:
>> - Move it to the reserved space to avoid concflicts with Mesa
>> - Add macros to make reserved space management easier
>>
>> Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
>> Cc: Christian Koenig <christian.koenig at amd.com>
>> Signed-off-by: Jay Cornwall <jay.cornwall at amd.com>
>> Signed-off-by: Felix Kuehling <felix.kuehling at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c      |  4 +--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c    |  7 ++---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h       | 12 ++++++--
>>   drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 30 +++++++++++---------
>>   4 files changed, 30 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> index 823d31f4a2a3..53d0a458d78e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
>> @@ -28,9 +28,9 @@
>>     uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
>>   {
>> -    uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
>> +    uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(
>> +        adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT);
>>   -    addr -= AMDGPU_VA_RESERVED_CSA_SIZE;
>>       addr = amdgpu_gmc_sign_extend(addr);
>>         return addr;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
>> index 3d0d56087d41..9e769ef50f2e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
>> @@ -45,11 +45,8 @@
>>    */
>>   static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
>>   {
>> -    u64 addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
>> -
>> -    addr -= AMDGPU_VA_RESERVED_TOP;
>> -
>> -    return addr;
>> +    return AMDGPU_VA_RESERVED_SEQ64_START(
>> +        adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT);
>>   }
>>     /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 666698a57192..f23b6153d310 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -135,11 +135,19 @@ struct amdgpu_mem_stats;
>>   #define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < 
>> AMDGPU_MMHUB1_START)
>>   #define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < 
>> AMDGPU_MAX_VMHUBS)
>>   -/* Reserve 2MB at top/bottom of address space for kernel use */
>> +/* Reserve space at top/bottom of address space for kernel use */
>>   #define AMDGPU_VA_RESERVED_CSA_SIZE        (2ULL << 20)
>> +#define AMDGPU_VA_RESERVED_CSA_START(top)    ((top) \
>> +                         - AMDGPU_VA_RESERVED_CSA_SIZE)
>>   #define AMDGPU_VA_RESERVED_SEQ64_SIZE        (2ULL << 20)
>> +#define AMDGPU_VA_RESERVED_SEQ64_START(top) 
>> (AMDGPU_VA_RESERVED_CSA_START(top) \
>> +                         - AMDGPU_VA_RESERVED_SEQ64_SIZE)
>> +#define AMDGPU_VA_RESERVED_TRAP_SIZE        (2ULL << 12)
>> +#define AMDGPU_VA_RESERVED_TRAP_START(top) 
>> (AMDGPU_VA_RESERVED_SEQ64_START(top) \
>> +                         - AMDGPU_VA_RESERVED_TRAP_SIZE)
>>   #define AMDGPU_VA_RESERVED_BOTTOM        (2ULL << 20)
>> -#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_SEQ64_SIZE + \
>> +#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \
>> +                         AMDGPU_VA_RESERVED_SEQ64_SIZE + \
>>                            AMDGPU_VA_RESERVED_CSA_SIZE)
>>     /* See vm_update_mode */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> index 6604a3f99c5e..f899cce25b2a 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
>> @@ -36,6 +36,7 @@
>>   #include <linux/mm.h>
>>   #include <linux/mman.h>
>>   #include <linux/processor.h>
>> +#include "amdgpu_vm.h"
>>     /*
>>    * The primary memory I/O features being added for revisions of gfxip
>> @@ -326,10 +327,16 @@ static void kfd_init_apertures_vi(struct 
>> kfd_process_device *pdd, uint8_t id)
>>        * with small reserved space for kernel.
>>        * Set them to CANONICAL addresses.
>>        */
>> -    pdd->gpuvm_base = SVM_USER_BASE;
>> +    pdd->gpuvm_base = max(SVM_USER_BASE, AMDGPU_VA_RESERVED_BOTTOM);
>>       pdd->gpuvm_limit =
>>           pdd->dev->kfd->shared_resources.gpuvm_size - 1;
>>   +    /* dGPUs: the reserved space for kernel
>> +     * before SVM
>> +     */
>> +    pdd->qpd.cwsr_base = SVM_CWSR_BASE;
>> +    pdd->qpd.ib_base = SVM_IB_BASE;
>> +
>>       pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
>>       pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
>>   }
>> @@ -339,18 +346,19 @@ static void kfd_init_apertures_v9(struct 
>> kfd_process_device *pdd, uint8_t id)
>>       pdd->lds_base = MAKE_LDS_APP_BASE_V9();
>>       pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
>>   -        /* Raven needs SVM to support graphic handle, etc. Leave 
>> the small
>> -         * reserved space before SVM on Raven as well, even though 
>> we don't
>> -         * have to.
>> -         * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so 
>> that they
>> -         * are used in Thunk to reserve SVM.
>> -         */
>> -        pdd->gpuvm_base = SVM_USER_BASE;
>> +    pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM;
>>       pdd->gpuvm_limit =
>>           pdd->dev->kfd->shared_resources.gpuvm_size - 1;
>>         pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
>>       pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
>> +
>> +    /*
>> +     * Place TBA/TMA on opposite side of VM hole to prevent
>> +     * stray faults from triggering SVM on these pages.
>> +     */
>> +    pdd->qpd.cwsr_base = AMDGPU_VA_RESERVED_TRAP_START(
>> +        pdd->dev->adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT);
>>   }
>>     int kfd_init_apertures(struct kfd_process *process)
>> @@ -407,12 +415,6 @@ int kfd_init_apertures(struct kfd_process *process)
>>                       return -EINVAL;
>>                   }
>>               }
>> -
>> -                        /* dGPUs: the reserved space for kernel
>> -                         * before SVM
>> -                         */
>> -                        pdd->qpd.cwsr_base = SVM_CWSR_BASE;
>> -                        pdd->qpd.ib_base = SVM_IB_BASE;
>>           }
>>             dev_dbg(kfd_device, "node id %u\n", id);
>