[PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10

Kuehling, Felix Felix.Kuehling at amd.com
Thu Nov 7 04:52:33 UTC 2019


On 2019-10-30 20:17, Zhao, Yong wrote:
> The kernel queue functions for v9 and v10 are the same except
> pm_map_process_v* which have small difference, so they should be reused.
> This eliminates the need of reapplying several patches which were
> applied on v9 but not on v10, such as bigger GWS and more than 2
> SDMA engine support which were introduced on Arcturus.

This looks reasonable in principle. See one suggestion inline to 
simplify it further.


>
> Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
> Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/Makefile           |   1 -
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |   4 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |   1 -
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 ------------------
>   .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  |  49 ++-
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |   3 -
>   6 files changed, 44 insertions(+), 331 deletions(-)
>   delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
> index 48155060a57c..017a8b7156da 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -41,7 +41,6 @@ AMDKFD_FILES	:= $(AMDKFD_PATH)/kfd_module.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
>   		$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
> -		$(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
>   		$(AMDKFD_PATH)/kfd_packet_manager.o \
>   		$(AMDKFD_PATH)/kfd_process_queue_manager.o \
>   		$(AMDKFD_PATH)/kfd_device_queue_manager.o \
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 11d244891393..0d966408ea87 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
>   	case CHIP_RAVEN:
>   	case CHIP_RENOIR:
>   	case CHIP_ARCTURUS:
> -		kernel_queue_init_v9(&kq->ops_asic_specific);
> -		break;
>   	case CHIP_NAVI10:
>   	case CHIP_NAVI12:
>   	case CHIP_NAVI14:
> -		kernel_queue_init_v10(&kq->ops_asic_specific);
> +		kernel_queue_init_v9(&kq->ops_asic_specific);
>   		break;
>   	default:
>   		WARN(1, "Unexpected ASIC family %u",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> index 365fc674fea4..a7116a939029 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> @@ -102,6 +102,5 @@ struct kernel_queue {
>   void kernel_queue_init_cik(struct kernel_queue_ops *ops);
>   void kernel_queue_init_vi(struct kernel_queue_ops *ops);
>   void kernel_queue_init_v9(struct kernel_queue_ops *ops);
> -void kernel_queue_init_v10(struct kernel_queue_ops *ops);
>   
>   #endif /* KFD_KERNEL_QUEUE_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> deleted file mode 100644
> index bfd6221acae9..000000000000
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ /dev/null
[snip]
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index f0e4910a8865..d8f7343bfe71 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -62,8 +62,9 @@ void kernel_queue_init_v9(struct kernel_queue_ops *ops)
>   	ops->submit_packet = submit_packet_v9;
>   }
>   
> -static int pm_map_process_v9(struct packet_manager *pm,
> -		uint32_t *buffer, struct qcm_process_device *qpd)
> +static int pm_map_process_v9_base(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd,
> +		unsigned int sq_shader_tba_hi_trap_en_shift)
>   {
>   	struct pm4_mes_map_process *packet;
>   	uint64_t vm_page_table_base_addr = qpd->page_table_base;
> @@ -85,10 +86,16 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   
>   	packet->sh_mem_config = qpd->sh_mem_config;
>   	packet->sh_mem_bases = qpd->sh_mem_bases;
> -	packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> -	packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> -	packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> -	packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> +	if (qpd->tba_addr) {
> +		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> +		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> +		if (sq_shader_tba_hi_trap_en_shift) {
> +			packet->sq_shader_tba_hi |=
> +					1 << sq_shader_tba_hi_trap_en_shift;

If you pass in a mask instead of a shift, you don't need the 
conditional. I.e.

     packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8) | 
sq_shader_tba_hi_trap_en_mask;

> +		}
> +		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> +		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> +	}
>   
>   	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
>   	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
> @@ -101,6 +108,11 @@ static int pm_map_process_v9(struct packet_manager *pm,
>   	return 0;
>   }
>   
> +static int pm_map_process_v9(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd) {
> +	return pm_map_process_v9_base(pm, buffer, qpd, 0);
> +}
> +
>   static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
>   			uint64_t ib, size_t ib_size_in_dwords, bool chain)
>   {
> @@ -352,3 +364,28 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
>   	.query_status_size	= sizeof(struct pm4_mes_query_status),
>   	.release_mem_size	= 0,
>   };
> +
> +#include "gc/gc_10_1_0_sh_mask.h"
> +
> +static int pm_map_process_v10(struct packet_manager *pm,
> +		uint32_t *buffer, struct qcm_process_device *qpd) {
> +	return pm_map_process_v9_base(pm, buffer, qpd,
> +			SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);

With my suggestion above, pass in (1 << 
SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) here.

Regards,
   Felix

> +}
> +
> +const struct packet_manager_funcs kfd_v10_pm_funcs = {
> +	.map_process		= pm_map_process_v10,
> +	.runlist		= pm_runlist_v9,
> +	.set_resources		= pm_set_resources_v9,
> +	.map_queues		= pm_map_queues_v9,
> +	.unmap_queues		= pm_unmap_queues_v9,
> +	.query_status		= pm_query_status_v9,
> +	.release_mem		= NULL,
> +	.map_process_size	= sizeof(struct pm4_mes_map_process),
> +	.runlist_size		= sizeof(struct pm4_mes_runlist),
> +	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
> +	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
> +	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
> +	.query_status_size	= sizeof(struct pm4_mes_query_status),
> +	.release_mem_size	= 0,
> +};
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 62db4d20ed32..5127ddee24c8 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -996,9 +996,6 @@ void pm_release_ib(struct packet_manager *pm);
>   
>   /* Following PM funcs can be shared among VI and AI */
>   unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
> -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
> -			struct scheduling_resources *res);
> -
>   
>   uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
>   


More information about the amd-gfx mailing list