[PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
Zhu, Changfeng
Changfeng.Zhu at amd.com
Wed Nov 6 08:15:51 UTC 2019
Thanks, Chris.
I double check all engines on gfx10 implement the emit_reg_write_reg_wait callback.
You're right. I miss .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, for gfx_v10_0_ring_funcs_kiq. So I add it.
For sdma5, there is one amdgpu_ring_funs engine: amdgpu_ring_funcs sdma_v5_0_ring_funcs
We have defined:
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
For vcn/uvd/vce,Their engines have all been defined:
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper;
before.
So after adding .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait
for gfx_v10_0_ring_funcs_kiq.
I think there should be no NULL pointer deref problem.
BR,
Changfeng.
-----Original Message-----
From: Koenig, Christian <Christian.Koenig at amd.com>
Sent: Tuesday, November 5, 2019 7:58 PM
To: Zhu, Changfeng <Changfeng.Zhu at amd.com>; amd-gfx at lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov at amd.com>; Huang, Ray <Ray.Huang at amd.com>; Huang, Shimmer <Xinmei.Huang at amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
Am 05.11.19 um 12:42 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu at amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface. This has caused a problem where status
> registers requiring HW to update have a 1 cycle delay, due to the
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the
> use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning
> to update firmware in case firmware is too old to have function to
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 47 +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 ++---
> drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 13 ++++++-
> 4 files changed, 63 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
> uint32_t mec2_feature_version;
> bool mec_fw_write_wait;
> bool me_fw_write_wait;
> + bool cp_fw_write_wait;
> struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
> unsigned num_gfx_rings;
> struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..e82b6d796b69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
> kfree(adev->gfx.rlc.register_list_format);
> }
>
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> + adev->gfx.cp_fw_write_wait = false;
> +
> + switch (adev->asic_type) {
> + case CHIP_NAVI10:
> + case CHIP_NAVI12:
> + case CHIP_NAVI14:
> + if ((adev->gfx.me_fw_version >= 0x00000046) &&
> + (adev->gfx.me_feature_version >= 27) &&
> + (adev->gfx.pfp_fw_version >= 0x00000068) &&
> + (adev->gfx.pfp_feature_version >= 27) &&
> + (adev->gfx.mec_fw_version >= 0x0000005b) &&
> + (adev->gfx.mec_feature_version >= 27))
> + adev->gfx.cp_fw_write_wait = true;
> + break;
> + default:
> + break;
> + }
> +
> + if (adev->gfx.cp_fw_write_wait == false)
> + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> + GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
> static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
> {
> const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -4768,6 +4794,25
> @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
> gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
> }
>
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> + uint32_t reg0, uint32_t reg1,
> + uint32_t ref, uint32_t mask)
> +{
> + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> + struct amdgpu_device *adev = ring->adev;
> + bool fw_version_ok = false;
> +
> + gfx_v10_0_check_fw_write_wait(adev);
> + fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> + if (fw_version_ok)
> + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> + ref, mask, 0x20);
> + else
> + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> + ref, mask);
> +}
> +
> static void
> gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
> uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
> .emit_tmz = gfx_v10_0_ring_emit_tmz,
> .emit_wreg = gfx_v10_0_ring_emit_wreg,
> .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> };
>
> static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute =
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_wreg = gfx_v10_0_ring_emit_wreg,
> .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
> };
>
> static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
> upper_32_bits(pd_addr));
>
> - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> - /* wait for the invalidate to complete */
> - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> - 1 << vmid, 1 << vmid);
> + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> + hub->vm_inv_eng0_ack + eng,
> + req, 1 << vmid);
You need to double check that all engines on gfx10 implement the emit_reg_write_reg_wait callback (probably as dummy with .emit_reg_write_reg_wait=amdgpu_ring_emit_reg_write_reg_wait_helper).
Otherwise that could quickly result in a NULL pointer deref here for the multimedia engines.
Apart from that the patch looks good to me now.
Thanks,
Christian.
>
> return pd_addr;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
> SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
> }
>
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> + uint32_t reg0, uint32_t reg1,
> + uint32_t ref, uint32_t mask) {
> + amdgpu_ring_emit_wreg(ring, reg0, ref);
> + /* wait for a cycle to reset vm_inv_eng*_ack */
> + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
> static int sdma_v5_0_early_init(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
> 6 + /* sdma_v5_0_ring_emit_pipeline_sync */
> /* sdma_v5_0_ring_emit_vm_flush */
> SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
> 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
> .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
> .emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
> .pad_ib = sdma_v5_0_ring_pad_ib,
> .emit_wreg = sdma_v5_0_ring_emit_wreg,
> .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
> .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
> .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
> .preempt_ib = sdma_v5_0_ring_preempt_ib,
More information about the amd-gfx
mailing list