[PATCH] drm/amd/amdgpu: Fix MES init sequence
Alex Deucher
alexdeucher at gmail.com
Mon Mar 10 21:09:09 UTC 2025
On Mon, Mar 10, 2025 at 1:58 PM Shaoyun Liu <shaoyun.liu at amd.com> wrote:
>
> When MES is been used , the set_hw_resource_1 API is required to
> initialize MES internal context correctly
>
> Signed-off-by: Shaoyun Liu <shaoyun.liu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 6 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 6 +--
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 52 +++++++++++-------------
> drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 40 ++++++++----------
> 4 files changed, 48 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 4391b3383f0c..78362a838212 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -143,9 +143,9 @@ struct amdgpu_mes {
> const struct amdgpu_mes_funcs *funcs;
>
> /* mes resource_1 bo*/
> - struct amdgpu_bo *resource_1;
> - uint64_t resource_1_gpu_addr;
> - void *resource_1_addr;
> + struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES];
> + uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES];
> + void *resource_1_addr[AMDGPU_MAX_MES_PIPES];
>
> };
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index ab7e73d0e7b1..980dfb8935b6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -614,10 +614,10 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
> vf2pf_info->decode_usage = 0;
>
> vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
> - vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr;
> + vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr[0];
>
> - if (adev->mes.resource_1) {
> - vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size;
> + if (adev->mes.resource_1[0]) {
> + vf2pf_info->mes_info_size = adev->mes.resource_1[0]->tbo.base.size;
> }
> vf2pf_info->checksum =
> amd_sriov_msg_checksum(
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index a569d09a1a74..299f17868822 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -751,10 +751,10 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
> mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
> mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
> - mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr;
> + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr[0];
> mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE;
> mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr =
> - mes->resource_1_gpu_addr + MES11_HW_RESOURCE_1_SIZE;
> + mes->resource_1_gpu_addr[0] + MES11_HW_RESOURCE_1_SIZE;
This offset here will need to be adjusted if MES11_HW_RESOURCE_1_SIZE
depends on SR-IOV. See below.
>
> return mes_v11_0_submit_pkt_and_poll_completion(mes,
> &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
> @@ -1392,7 +1392,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev,
> static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
> {
> struct amdgpu_device *adev = ip_block->adev;
> - int pipe, r;
> + int pipe, r, bo_size;
>
> adev->mes.funcs = &mes_v11_0_funcs;
> adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
> @@ -1427,19 +1427,21 @@ static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
> if (r)
> return r;
>
> - if (amdgpu_sriov_is_mes_info_enable(adev) ||
> - adev->gfx.enable_cleaner_shader) {
> - r = amdgpu_bo_create_kernel(adev,
> - MES11_HW_RESOURCE_1_SIZE + AMDGPU_GPU_PAGE_SIZE,
> - PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->mes.resource_1,
> - &adev->mes.resource_1_gpu_addr,
> - &adev->mes.resource_1_addr);
> - if (r) {
> - dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
> - return r;
> - }
> + bo_size = AMDGPU_GPU_PAGE_SIZE;
> + if (amdgpu_sriov_is_mes_info_enable(adev)
> + bo_size += MES11_HW_RESOURCE_1_SIZE;
if you make the size depend on amdgpu_sriov_is_mes_info_enable(), it
will break the address for
mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr above when SR-IOV is
not enabled.
> +
> + /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/
> + r = amdgpu_bo_create_kernel(adev,
> + bo_size,
> + PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->mes.resource_1[0],
> + &adev->mes.resource_1_gpu_addr[0],
> + &adev->mes.resource_1_addr[0]);
> + if (r) {
> + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
> + return r;
> }
>
> return 0;
> @@ -1450,11 +1452,8 @@ static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> int pipe;
>
> - if (amdgpu_sriov_is_mes_info_enable(adev) ||
> - adev->gfx.enable_cleaner_shader) {
> - amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr,
> - &adev->mes.resource_1_addr);
> - }
> + amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0],
> + &adev->mes.resource_1_addr[0]);
>
> for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
> kfree(adev->mes.mqd_backup[pipe]);
> @@ -1643,13 +1642,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block)
> if (r)
> goto failure;
>
> - if (amdgpu_sriov_is_mes_info_enable(adev) ||
> - adev->gfx.enable_cleaner_shader) {
> - r = mes_v11_0_set_hw_resources_1(&adev->mes);
> - if (r) {
> - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
> - goto failure;
> - }
> + r = mes_v11_0_set_hw_resources_1(&adev->mes);
> + if (r) {
> + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
> + goto failure;
> }
>
> r = mes_v11_0_query_sched_status(&adev->mes);
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> index 96336652d14c..abe8592170b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
> @@ -687,7 +687,7 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe)
> mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
> mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa;
> mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr =
> - mes->resource_1_gpu_addr;
> + mes->resource_1_gpu_addr[pipe];
>
> return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe,
> &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt),
> @@ -1530,21 +1530,19 @@ static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block)
>
> if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE)
> r = mes_v12_0_kiq_ring_init(adev);
> - else
> + else {
Per kernel coding style, the top part of this if clause needs {} if
you add them to the else half.
Alex
> r = mes_v12_0_ring_init(adev, pipe);
> - if (r)
> - return r;
> - }
> -
> - if (adev->enable_uni_mes) {
> - r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
> - AMDGPU_GEM_DOMAIN_VRAM,
> - &adev->mes.resource_1,
> - &adev->mes.resource_1_gpu_addr,
> - &adev->mes.resource_1_addr);
> - if (r) {
> - dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r);
> - return r;
> + if (r)
> + return r;
> + r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_VRAM,
> + &adev->mes.resource_1[pipe],
> + &adev->mes.resource_1_gpu_addr[pipe],
> + &adev->mes.resource_1_addr[pipe]);
> + if (r) {
> + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe);
> + return r;
> + }
> }
> }
>
> @@ -1556,12 +1554,11 @@ static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block)
> struct amdgpu_device *adev = ip_block->adev;
> int pipe;
>
> - if (adev->enable_uni_mes)
> - amdgpu_bo_free_kernel(&adev->mes.resource_1,
> - &adev->mes.resource_1_gpu_addr,
> - &adev->mes.resource_1_addr);
> -
> for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) {
> + amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe],
> + &adev->mes.resource_1_gpu_addr[pipe],
> + &adev->mes.resource_1_addr[pipe]);
> +
> kfree(adev->mes.mqd_backup[pipe]);
>
> amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe],
> @@ -1760,8 +1757,7 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block)
> if (r)
> goto failure;
>
> - if (adev->enable_uni_mes)
> - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
> + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE);
>
> mes_v12_0_init_aggregated_doorbell(&adev->mes);
>
> --
> 2.34.1
>
More information about the amd-gfx
mailing list