[PATCH] drm/amdkfd: Increase the size of the memory reserved for the TBA

Felix Kuehling felix.kuehling at amd.com
Fri Feb 23 22:55:49 UTC 2024


On 2024-02-23 14:05, Laurent Morichetti wrote:
> In a future commit, the cwsr trap handler code size for gfx10.1 will
> increase to slightly above the one page mark. Since the TMA does not
> need to be page aligned, and only 2 pointers are stored in it, push
> the TMA offset by 2 KiB and keep the TBA+TMA reserved memory size
> to two pages.
>
> Signed-off-by: Laurent Morichetti <laurent.morichetti at amd.com>

Reviewed-by: Felix Kuehling <felix.kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c | 23 ++++++++++++++++-------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  6 +++---
>   2 files changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 4d399c0c8a57..041ec3de55e7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -466,34 +466,43 @@ static void kfd_cwsr_init(struct kfd_dev *kfd)
>   {
>   	if (cwsr_enable && kfd->device_info.supports_cwsr) {
>   		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
> -			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
> +			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex)
> +					     > KFD_CWSR_TMA_OFFSET);
>   			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
>   		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
> -			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
> +			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex)
> +					     > KFD_CWSR_TMA_OFFSET);
>   			kfd->cwsr_isa = cwsr_trap_arcturus_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
>   		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
> -			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
> +			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex)
> +					     > KFD_CWSR_TMA_OFFSET);
>   			kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
>   		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3)) {
> -			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex) > PAGE_SIZE);
> +			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_4_3_hex)
> +					     > KFD_CWSR_TMA_OFFSET);
>   			kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
>   		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
> -			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
> +			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex)
> +					     > KFD_CWSR_TMA_OFFSET);
>   			kfd->cwsr_isa = cwsr_trap_gfx9_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
>   		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
> -			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
> +			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex)
> +					     > KFD_CWSR_TMA_OFFSET);
>   			kfd->cwsr_isa = cwsr_trap_nv1x_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
>   		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
> -			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
> +			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex)
> +					     > KFD_CWSR_TMA_OFFSET);
>   			kfd->cwsr_isa = cwsr_trap_gfx10_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
>   		} else {
> +			/* The gfx11 cwsr trap handler must fit inside a single
> +			   page. */
>   			BUILD_BUG_ON(sizeof(cwsr_trap_gfx11_hex) > PAGE_SIZE);
>   			kfd->cwsr_isa = cwsr_trap_gfx11_hex;
>   			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 80320b8603fc..42d40560cd30 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -99,11 +99,11 @@
>   /*
>    * Size of the per-process TBA+TMA buffer: 2 pages
>    *
> - * The first page is the TBA used for the CWSR ISA code. The second
> - * page is used as TMA for user-mode trap handler setup in daisy-chain mode.
> + * The first chunk is the TBA used for the CWSR ISA code. The second
> + * chunk is used as TMA for user-mode trap handler setup in daisy-chain mode.
>    */
>   #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
> -#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
> +#define KFD_CWSR_TMA_OFFSET (PAGE_SIZE + 2048)
>   
>   #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
>   	(KFD_MAX_NUM_OF_PROCESSES *			\
>
> base-commit: 420b2460a743b320868f30e407d4c4685958ea2c


More information about the amd-gfx mailing list