[PATCH 3/3] drm/amdkfd: Use kernel queue v9 functions for v10
Kuehling, Felix
Felix.Kuehling at amd.com
Thu Nov 7 04:52:33 UTC 2019
On 2019-10-30 20:17, Zhao, Yong wrote:
> The kernel queue functions for v9 and v10 are the same except
> pm_map_process_v* which have small difference, so they should be reused.
> This eliminates the need of reapplying several patches which were
> applied on v9 but not on v10, such as bigger GWS and more than 2
> SDMA engine support which were introduced on Arcturus.
This looks reasonable in principle. See one suggestion inline to
simplify it further.
>
> Change-Id: I2d385961e3c884db14e30b5afc98d0d9e4cb1802
> Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/Makefile | 1 -
> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 4 +-
> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 1 -
> .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c | 317 ------------------
> .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 49 ++-
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 -
> 6 files changed, 44 insertions(+), 331 deletions(-)
> delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
> index 48155060a57c..017a8b7156da 100644
> --- a/drivers/gpu/drm/amd/amdkfd/Makefile
> +++ b/drivers/gpu/drm/amd/amdkfd/Makefile
> @@ -41,7 +41,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \
> $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
> $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
> $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
> - $(AMDKFD_PATH)/kfd_kernel_queue_v10.o \
> $(AMDKFD_PATH)/kfd_packet_manager.o \
> $(AMDKFD_PATH)/kfd_process_queue_manager.o \
> $(AMDKFD_PATH)/kfd_device_queue_manager.o \
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 11d244891393..0d966408ea87 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -332,12 +332,10 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
> case CHIP_RAVEN:
> case CHIP_RENOIR:
> case CHIP_ARCTURUS:
> - kernel_queue_init_v9(&kq->ops_asic_specific);
> - break;
> case CHIP_NAVI10:
> case CHIP_NAVI12:
> case CHIP_NAVI14:
> - kernel_queue_init_v10(&kq->ops_asic_specific);
> + kernel_queue_init_v9(&kq->ops_asic_specific);
> break;
> default:
> WARN(1, "Unexpected ASIC family %u",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> index 365fc674fea4..a7116a939029 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
> @@ -102,6 +102,5 @@ struct kernel_queue {
> void kernel_queue_init_cik(struct kernel_queue_ops *ops);
> void kernel_queue_init_vi(struct kernel_queue_ops *ops);
> void kernel_queue_init_v9(struct kernel_queue_ops *ops);
> -void kernel_queue_init_v10(struct kernel_queue_ops *ops);
>
> #endif /* KFD_KERNEL_QUEUE_H_ */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> deleted file mode 100644
> index bfd6221acae9..000000000000
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v10.c
> +++ /dev/null
[snip]
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> index f0e4910a8865..d8f7343bfe71 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
> @@ -62,8 +62,9 @@ void kernel_queue_init_v9(struct kernel_queue_ops *ops)
> ops->submit_packet = submit_packet_v9;
> }
>
> -static int pm_map_process_v9(struct packet_manager *pm,
> - uint32_t *buffer, struct qcm_process_device *qpd)
> +static int pm_map_process_v9_base(struct packet_manager *pm,
> + uint32_t *buffer, struct qcm_process_device *qpd,
> + unsigned int sq_shader_tba_hi_trap_en_shift)
> {
> struct pm4_mes_map_process *packet;
> uint64_t vm_page_table_base_addr = qpd->page_table_base;
> @@ -85,10 +86,16 @@ static int pm_map_process_v9(struct packet_manager *pm,
>
> packet->sh_mem_config = qpd->sh_mem_config;
> packet->sh_mem_bases = qpd->sh_mem_bases;
> - packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> - packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> - packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> - packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> + if (qpd->tba_addr) {
> + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
> + packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
> + if (sq_shader_tba_hi_trap_en_shift) {
> + packet->sq_shader_tba_hi |=
> + 1 << sq_shader_tba_hi_trap_en_shift;
If you pass in a mask instead of a shift, you don't need the
conditional. I.e.
packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8) |
sq_shader_tba_hi_trap_en_mask;
> + }
> + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
> + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
> + }
>
> packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
> packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
> @@ -101,6 +108,11 @@ static int pm_map_process_v9(struct packet_manager *pm,
> return 0;
> }
>
> +static int pm_map_process_v9(struct packet_manager *pm,
> + uint32_t *buffer, struct qcm_process_device *qpd) {
> + return pm_map_process_v9_base(pm, buffer, qpd, 0);
> +}
> +
> static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
> uint64_t ib, size_t ib_size_in_dwords, bool chain)
> {
> @@ -352,3 +364,28 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = {
> .query_status_size = sizeof(struct pm4_mes_query_status),
> .release_mem_size = 0,
> };
> +
> +#include "gc/gc_10_1_0_sh_mask.h"
> +
> +static int pm_map_process_v10(struct packet_manager *pm,
> + uint32_t *buffer, struct qcm_process_device *qpd) {
> + return pm_map_process_v9_base(pm, buffer, qpd,
> + SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);
With my suggestion above, pass in (1 <<
SQ_SHADER_TBA_HI__TRAP_EN__SHIFT) here.
Regards,
Felix
> +}
> +
> +const struct packet_manager_funcs kfd_v10_pm_funcs = {
> + .map_process = pm_map_process_v10,
> + .runlist = pm_runlist_v9,
> + .set_resources = pm_set_resources_v9,
> + .map_queues = pm_map_queues_v9,
> + .unmap_queues = pm_unmap_queues_v9,
> + .query_status = pm_query_status_v9,
> + .release_mem = NULL,
> + .map_process_size = sizeof(struct pm4_mes_map_process),
> + .runlist_size = sizeof(struct pm4_mes_runlist),
> + .set_resources_size = sizeof(struct pm4_mes_set_resources),
> + .map_queues_size = sizeof(struct pm4_mes_map_queues),
> + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
> + .query_status_size = sizeof(struct pm4_mes_query_status),
> + .release_mem_size = 0,
> +};
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 62db4d20ed32..5127ddee24c8 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -996,9 +996,6 @@ void pm_release_ib(struct packet_manager *pm);
>
> /* Following PM funcs can be shared among VI and AI */
> unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
> -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
> - struct scheduling_resources *res);
> -
>
> uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
>
More information about the amd-gfx
mailing list