[PATCH v3] drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole
Christian König
ckoenig.leichtzumerken at gmail.com
Thu Feb 15 14:27:38 UTC 2024
Am 13.02.24 um 23:12 schrieb Felix Kuehling:
> The TBA and TMA, along with an unused IB allocation, reside at low
> addresses in the VM address space. A stray VM fault which hits these
> pages must be serviced by making their page table entries invalid.
> The scheduler depends upon these pages being resident and fails,
> preventing a debugger from inspecting the failure state.
>
> By relocating these pages above 47 bits in the VM address space they
> can only be reached when bits [63:48] are set to 1. This makes it much
> less likely for a misbehaving program to generate accesses to them.
> The current placement at VA (PAGE_SIZE*2) is readily hit by a NULL
> access with a small offset.
>
> v2:
> - Move it to the reserved space to avoid concflicts with Mesa
> - Add macros to make reserved space management easier
>
> v3:
> - Move VM max PFN calculation into AMDGPU_VA_RESERVED macros
>
> Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Signed-off-by: Jay Cornwall <jay.cornwall at amd.com>
> Signed-off-by: Felix Kuehling <felix.kuehling at amd.com>
Can't full judge the KFD VI changes, but the rest looks good to me.
Reviewed-by: Christian König <christian.koenig at amd.com>
Regards,
Christian.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c | 6 +---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++++++-
> drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 29 ++++++++++----------
> 4 files changed, 27 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> index 823d31f4a2a3..b0fb14a4b43c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
> @@ -28,9 +28,8 @@
>
> uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
> {
> - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
> + uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev);
>
> - addr -= AMDGPU_VA_RESERVED_CSA_SIZE;
> addr = amdgpu_gmc_sign_extend(addr);
>
> return addr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> index 3d0d56087d41..4b9afc4df031 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c
> @@ -45,11 +45,7 @@
> */
> static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev)
> {
> - u64 addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
> -
> - addr -= AMDGPU_VA_RESERVED_TOP;
> -
> - return addr;
> + return AMDGPU_VA_RESERVED_SEQ64_START(adev);
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 2c4053b29bb3..42f6ddec50c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -137,9 +137,18 @@ struct amdgpu_mem_stats;
>
> /* Reserve space at top/bottom of address space for kernel use */
> #define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20)
> +#define AMDGPU_VA_RESERVED_CSA_START(adev) (((adev)->vm_manager.max_pfn \
> + << AMDGPU_GPU_PAGE_SHIFT) \
> + - AMDGPU_VA_RESERVED_CSA_SIZE)
> #define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20)
> +#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \
> + - AMDGPU_VA_RESERVED_SEQ64_SIZE)
> +#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12)
> +#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \
> + - AMDGPU_VA_RESERVED_TRAP_SIZE)
> #define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16)
> -#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_SEQ64_SIZE + \
> +#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \
> + AMDGPU_VA_RESERVED_SEQ64_SIZE + \
> AMDGPU_VA_RESERVED_CSA_SIZE)
>
> /* See vm_update_mode */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> index 6604a3f99c5e..4a64307bc438 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
> @@ -36,6 +36,7 @@
> #include <linux/mm.h>
> #include <linux/mman.h>
> #include <linux/processor.h>
> +#include "amdgpu_vm.h"
>
> /*
> * The primary memory I/O features being added for revisions of gfxip
> @@ -326,10 +327,16 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
> * with small reserved space for kernel.
> * Set them to CANONICAL addresses.
> */
> - pdd->gpuvm_base = SVM_USER_BASE;
> + pdd->gpuvm_base = max(SVM_USER_BASE, AMDGPU_VA_RESERVED_BOTTOM);
> pdd->gpuvm_limit =
> pdd->dev->kfd->shared_resources.gpuvm_size - 1;
>
> + /* dGPUs: the reserved space for kernel
> + * before SVM
> + */
> + pdd->qpd.cwsr_base = SVM_CWSR_BASE;
> + pdd->qpd.ib_base = SVM_IB_BASE;
> +
> pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
> pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> }
> @@ -339,18 +346,18 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
> pdd->lds_base = MAKE_LDS_APP_BASE_V9();
> pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
>
> - /* Raven needs SVM to support graphic handle, etc. Leave the small
> - * reserved space before SVM on Raven as well, even though we don't
> - * have to.
> - * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
> - * are used in Thunk to reserve SVM.
> - */
> - pdd->gpuvm_base = SVM_USER_BASE;
> + pdd->gpuvm_base = AMDGPU_VA_RESERVED_BOTTOM;
> pdd->gpuvm_limit =
> pdd->dev->kfd->shared_resources.gpuvm_size - 1;
>
> pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
> pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
> +
> + /*
> + * Place TBA/TMA on opposite side of VM hole to prevent
> + * stray faults from triggering SVM on these pages.
> + */
> + pdd->qpd.cwsr_base = AMDGPU_VA_RESERVED_TRAP_START(pdd->dev->adev);
> }
>
> int kfd_init_apertures(struct kfd_process *process)
> @@ -407,12 +414,6 @@ int kfd_init_apertures(struct kfd_process *process)
> return -EINVAL;
> }
> }
> -
> - /* dGPUs: the reserved space for kernel
> - * before SVM
> - */
> - pdd->qpd.cwsr_base = SVM_CWSR_BASE;
> - pdd->qpd.ib_base = SVM_IB_BASE;
> }
>
> dev_dbg(kfd_device, "node id %u\n", id);
More information about the amd-gfx
mailing list