[PATCH] drm/amdkfd: Increase the size of the memory reserved for the TBA
Felix Kuehling
felix.kuehling at amd.com
Fri Feb 23 22:55:49 UTC 2024
On 2024-02-23 14:05, Laurent Morichetti wrote:
> In a future commit, the cwsr trap handler code size for gfx10.1 will
> increase to slightly above the one page mark. Since the TMA does not
> need to be page aligned, and only 2 pointers are stored in it, push
> the TMA offset by 2 KiB and keep the TBA+TMA reserved memory size
> to two pages.
>
> Signed-off-by: Laurent Morichetti <laurent.morichetti at amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 23 ++++++++++++++++-------
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +++---
> 2 files changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 4d399c0c8a57..041ec3de55e7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -466,34 +466,43 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
> {
> if (cwsr_enable && kfd->device_info.supports_cwsr) {
> if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
> - BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex)
> + > KFD_CWSR_TMA_OFFSET);
> kfd->cwsr_isa = cwsr_trap_gfx8_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
> } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
> - BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
> + BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex)
> + > KFD_CWSR_TMA_OFFSET);
> kfd->cwsr_isa = cwsr_trap_arcturus_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
> } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
> - BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
> + BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex)
> + > KFD_CWSR_TMA_OFFSET);
> kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
> } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
> - BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex)
> + > KFD_CWSR_TMA_OFFSET);
> kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
> } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
> - BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
> + > KFD_CWSR_TMA_OFFSET);
> kfd->cwsr_isa = cwsr_trap_gfx9_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
> } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
> - BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
> + BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex)
> + > KFD_CWSR_TMA_OFFSET);
> kfd->cwsr_isa = cwsr_trap_nv1x_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
> } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
> - BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
> + BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex)
> + > KFD_CWSR_TMA_OFFSET);
> kfd->cwsr_isa = cwsr_trap_gfx10_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
> } else {
> + /* The gfx11 cwsr trap handler must fit inside a single
> + page. */
> BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
> kfd->cwsr_isa = cwsr_trap_gfx11_hex;
> kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 80320b8603fc..42d40560cd30 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -99,11 +99,11 @@
> /*
> * Size of the per-process TBA+TMA buffer: 2 pages
> *
> - * The first page is the TBA used for the CWSR ISA code. The second
> - * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
> + * The first chunk is the TBA used for the CWSR ISA code. The second
> + * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode.
> */
> #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
> -#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
> +#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048)
>
> #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \
> (KFD_MAX_NUM_OF_PROCESSES * \
>
> base-commit: 420b2460a743b320868f30e407d4c4685958ea2c
More information about the amd-gfx
mailing list