[PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
Dhume, Samir
Samir.Dhume at amd.com
Tue Aug 8 16:31:05 UTC 2023
[AMD Official Use Only - General]
Thanks Leo. I'll restore the check for sriov before calling amdgpu_virt_alloc_mm_table(). That will make it consistent with other vcn ip versions. I'll retain the check for sriov inside amdgpu_virt_alloc_mm_table() as well, as a conservative check.
Thanks,
Samir
-----Original Message-----
From: Liu, Leo <Leo.Liu at amd.com>
Sent: Tuesday, August 8, 2023 8:29 AM
To: Dhume, Samir <Samir.Dhume at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Luo, Zhigang <Zhigang.Luo at amd.com>; Chen, Guchun <Guchun.Chen at amd.com>; Wan, Gavin <Gavin.Wan at amd.com>; Lazar, Lijo <Lijo.Lazar at amd.com>; Min, Frank <Frank.Min at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>
Subject: Re: [PATCH v3 3/7] drm/amdgpu/vcn: sriov support for vcn_v4_0_3
On 2023-07-28 15:15, Samir Dhume wrote:
> initialization table handshake with mmsch
>
> Signed-off-by: Samir Dhume <samir.dhume at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 257 +++++++++++++++++++++---
> 1 file changed, 233 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index 411c1d802823..b978265b2d77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -31,6 +31,7 @@
> #include "soc15d.h"
> #include "soc15_hw_ip.h"
> #include "vcn_v2_0.h"
> +#include "mmsch_v4_0_3.h"
>
> #include "vcn/vcn_4_0_3_offset.h"
> #include "vcn/vcn_4_0_3_sh_mask.h"
> @@ -44,6 +45,7 @@
> #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
> #define VCN1_VID_SOC_ADDRESS_3_0 0x48300
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
> static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev);
> static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev);
> static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -130,6
> +132,10 @@ static int vcn_v4_0_3_sw_init(void *handle)
> amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
> }
>
> + r = amdgpu_virt_alloc_mm_table(adev);
Since this function is not for bare-metal, please move amdgpu_sriov_vf() check from inside of the function to here, to avoid confusion.
> + if (r)
> + return r;
> +
> if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
> adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode;
>
> @@ -167,6 +173,8 @@ static int vcn_v4_0_3_sw_fini(void *handle)
> drm_dev_exit(idx);
> }
>
> + amdgpu_virt_free_mm_table(adev);
Same as above.
Regards,
Leo
> +
> r = amdgpu_vcn_suspend(adev);
> if (r)
> return r;
> @@ -189,33 +197,47 @@ static int vcn_v4_0_3_hw_init(void *handle)
> struct amdgpu_ring *ring;
> int i, r, vcn_inst;
>
> - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> - vcn_inst = GET_INST(VCN, i);
> - ring = &adev->vcn.inst[i].ring_enc[0];
> + if (amdgpu_sriov_vf(adev)) {
> + r = vcn_v4_0_3_start_sriov(adev);
> + if (r)
> + goto done;
>
> - if (ring->use_doorbell) {
> - adev->nbio.funcs->vcn_doorbell_range(
> - adev, ring->use_doorbell,
> - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> - 9 * vcn_inst,
> - adev->vcn.inst[i].aid_id);
> -
> - WREG32_SOC15(
> - VCN, GET_INST(VCN, ring->me),
> - regVCN_RB1_DB_CTRL,
> - ring->doorbell_index
> - << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> - VCN_RB1_DB_CTRL__EN_MASK);
> -
> - /* Read DB_CTRL to flush the write DB_CTRL command. */
> - RREG32_SOC15(
> - VCN, GET_INST(VCN, ring->me),
> - regVCN_RB1_DB_CTRL);
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + ring = &adev->vcn.inst[i].ring_enc[0];
> + ring->wptr = 0;
> + ring->wptr_old = 0;
> + vcn_v4_0_3_unified_ring_set_wptr(ring);
> + ring->sched.ready = true;
> }
> + } else {
> + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
> + vcn_inst = GET_INST(VCN, i);
> + ring = &adev->vcn.inst[i].ring_enc[0];
> +
> + if (ring->use_doorbell) {
> + adev->nbio.funcs->vcn_doorbell_range(
> + adev, ring->use_doorbell,
> + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
> + 9 * vcn_inst,
> + adev->vcn.inst[i].aid_id);
> +
> + WREG32_SOC15(
> + VCN, GET_INST(VCN, ring->me),
> + regVCN_RB1_DB_CTRL,
> + ring->doorbell_index
> + << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
> + VCN_RB1_DB_CTRL__EN_MASK);
> +
> + /* Read DB_CTRL to flush the write DB_CTRL command. */
> + RREG32_SOC15(
> + VCN, GET_INST(VCN, ring->me),
> + regVCN_RB1_DB_CTRL);
> + }
>
> - r = amdgpu_ring_test_helper(ring);
> - if (r)
> - goto done;
> + r = amdgpu_ring_test_helper(ring);
> + if (r)
> + goto done;
> + }
> }
>
> done:
> @@ -813,6 +835,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
> return 0;
> }
>
> +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev)
> +{
> + int i, vcn_inst;
> + struct amdgpu_ring *ring_enc;
> + uint64_t cache_addr;
> + uint64_t rb_enc_addr;
> + uint64_t ctx_addr;
> + uint32_t param, resp, expected;
> + uint32_t offset, cache_size;
> + uint32_t tmp, timeout;
> +
> + struct amdgpu_mm_table *table = &adev->virt.mm_table;
> + uint32_t *table_loc;
> + uint32_t table_size;
> + uint32_t size, size_dw;
> + uint32_t init_status;
> + uint32_t enabled_vcn;
> +
> + struct mmsch_v4_0_cmd_direct_write
> + direct_wt = { {0} };
> + struct mmsch_v4_0_cmd_direct_read_modify_write
> + direct_rd_mod_wt = { {0} };
> + struct mmsch_v4_0_cmd_end end = { {0} };
> + struct mmsch_v4_0_3_init_header header;
> +
> + volatile struct amdgpu_vcn4_fw_shared *fw_shared;
> + volatile struct amdgpu_fw_shared_rb_setup *rb_setup;
> +
> + direct_wt.cmd_header.command_type =
> + MMSCH_COMMAND__DIRECT_REG_WRITE;
> + direct_rd_mod_wt.cmd_header.command_type =
> + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
> + end.cmd_header.command_type = MMSCH_COMMAND__END;
> +
> + for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
> + vcn_inst = GET_INST(VCN, i);
> +
> + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header));
> + header.version = MMSCH_VERSION;
> + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2;
> +
> + table_loc = (uint32_t *)table->cpu_addr;
> + table_loc += header.total_size;
> +
> + table_size = 0;
> +
> + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS),
> + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
> +
> + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
> +
> + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
> +
> + offset = 0;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET0), 0);
> + } else {
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
> + lower_32_bits(adev->vcn.inst[i].gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
> + upper_32_bits(adev->vcn.inst[i].gpu_addr));
> + offset = cache_size;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET0),
> + AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
> + }
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE0),
> + cache_size);
> +
> + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset;
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET1), 0);
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE);
> +
> + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset +
> + AMDGPU_VCN_STACK_SIZE;
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr));
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr));
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_OFFSET2), 0);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE);
> +
> + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr;
> + rb_setup = &fw_shared->rb_setup;
> +
> + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0];
> + ring_enc->wptr = 0;
> + rb_enc_addr = ring_enc->gpu_addr;
> +
> + rb_setup->is_rb_enabled_flags |= RB_ENABLED;
> + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
> + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
> + rb_setup->rb_size = ring_enc->ring_size / 4;
> + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);
> +
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
> + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
> + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr));
> + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0,
> + regUVD_VCPU_NONCACHE_SIZE0),
> + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
> + MMSCH_V4_0_INSERT_END();
> +
> + header.vcn0.init_status = 0;
> + header.vcn0.table_offset = header.total_size;
> + header.vcn0.table_size = table_size;
> + header.total_size += table_size;
> +
> + /* Send init table to mmsch */
> + size = sizeof(struct mmsch_v4_0_3_init_header);
> + table_loc = (uint32_t *)table->cpu_addr;
> + memcpy((void *)table_loc, &header, size);
> +
> + ctx_addr = table->gpu_addr;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
> +
> + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID);
> + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
> + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp);
> +
> + size = header.total_size;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size);
> +
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0);
> +
> + param = 0x00000001;
> + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param);
> + tmp = 0;
> + timeout = 1000;
> + resp = 0;
> + expected = MMSCH_VF_MAILBOX_RESP__OK;
> + while (resp != expected) {
> + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP);
> + if (resp != 0)
> + break;
> +
> + udelay(10);
> + tmp = tmp + 10;
> + if (tmp >= timeout) {
> + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
> + " waiting for regMMSCH_VF_MAILBOX_RESP "\
> + "(expected=0x%08x, readback=0x%08x)\n",
> + tmp, expected, resp);
> + return -EBUSY;
> + }
> + }
> +
> + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
> + init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status;
> + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
> + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) {
> + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
> + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);
> + }
> + }
> +
> + return 0;
> +}
> +
> /**
> * vcn_v4_0_3_start - VCN start
> *
More information about the amd-gfx
mailing list