[PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov in TLB flush

Jian, Jane Jane.Jian at amd.com
Mon Jun 24 09:56:33 UTC 2024


[AMD Official Use Only - AMD Internal Distribution Only]

Hi Christian,
The reason that why I moved around amdgpu_asic_funcs because the new function that I declared in amdgpu_asic_funcs used the enum amd_hw_ip_block_type, therefore I move it after the definition of the amd_hw_ip_block_type.

Thanks,
Jane

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken at gmail.com>
Sent: Monday, June 24, 2024 5:35 PM
To: Jian, Jane <Jane.Jian at amd.com>; Lazar, Lijo <Lijo.Lazar at amd.com>; Chang, HaiJun <HaiJun.Chang at amd.com>; Zhao, Victor <Victor.Zhao at amd.com>
Cc: amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov in TLB flush

Am 24.06.24 um 11:13 schrieb Jane Jian:
> [WHY]
> sriov has the higher bit violation when flushing tlb
>
> [HOW]
> normalize the registers to keep lower 16-bit(dword aligned) to aviod
> higher bit violation RLCG will mask xcd out and always assume it's
> accessing its own xcd
>
> [TODO]
> later will add the normalization in sriovw/rreg after fixing bugs
>
> v2
> rename the normalized macro, add ip block type for further use move
> asics func declaration after ip block type since new func refers ip
> block type add normalization in emit flush tlb as well
>
> v3
> declare the new func in the asic specific header
>
> Signed-off-by: Jane Jian <Jane.Jian at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 112 +++++++++++----------
>   drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c |  17 ++++
>   drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h |  28 ++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |  32 ++++--
>   drivers/gpu/drm/amd/amdgpu/soc15.c         |   2 +
>   drivers/gpu/drm/amd/amdgpu/soc15_common.h  |   5 +-
>   6 files changed, 130 insertions(+), 66 deletions(-)
>   create mode 100644 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 083f353cff6e..070fd9e601fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -583,61 +583,6 @@ struct amdgpu_video_codecs {
>       const struct amdgpu_video_codec_info *codec_array;
>   };
>
> -/*
> - * ASIC specific functions.
> - */
> -struct amdgpu_asic_funcs {
> -     bool (*read_disabled_bios)(struct amdgpu_device *adev);
> -     bool (*read_bios_from_rom)(struct amdgpu_device *adev,
> -                                u8 *bios, u32 length_bytes);
> -     int (*read_register)(struct amdgpu_device *adev, u32 se_num,
> -                          u32 sh_num, u32 reg_offset, u32 *value);
> -     void (*set_vga_state)(struct amdgpu_device *adev, bool state);
> -     int (*reset)(struct amdgpu_device *adev);
> -     enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
> -     /* get the reference clock */
> -     u32 (*get_xclk)(struct amdgpu_device *adev);
> -     /* MM block clocks */
> -     int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
> -     int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
> -     /* static power management */
> -     int (*get_pcie_lanes)(struct amdgpu_device *adev);
> -     void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
> -     /* get config memsize register */
> -     u32 (*get_config_memsize)(struct amdgpu_device *adev);
> -     /* flush hdp write queue */
> -     void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
> -     /* invalidate hdp read cache */
> -     void (*invalidate_hdp)(struct amdgpu_device *adev,
> -                            struct amdgpu_ring *ring);
> -     /* check if the asic needs a full reset of if soft reset will work */
> -     bool (*need_full_reset)(struct amdgpu_device *adev);
> -     /* initialize doorbell layout for specific asic*/
> -     void (*init_doorbell_index)(struct amdgpu_device *adev);
> -     /* PCIe bandwidth usage */
> -     void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
> -                            uint64_t *count1);
> -     /* do we need to reset the asic at init time (e.g., kexec) */
> -     bool (*need_reset_on_init)(struct amdgpu_device *adev);
> -     /* PCIe replay counter */
> -     uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
> -     /* device supports BACO */
> -     int (*supports_baco)(struct amdgpu_device *adev);
> -     /* pre asic_init quirks */
> -     void (*pre_asic_init)(struct amdgpu_device *adev);
> -     /* enter/exit umd stable pstate */
> -     int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);
> -     /* query video codecs */
> -     int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
> -                               const struct amdgpu_video_codecs **codecs);
> -     /* encode "> 32bits" smn addressing */
> -     u64 (*encode_ext_smn_addressing)(int ext_id);
> -
> -     ssize_t (*get_reg_state)(struct amdgpu_device *adev,
> -                              enum amdgpu_reg_state reg_state, void *buf,
> -                              size_t max_size);
> -};
> -
>   /*
>    * IOCTL.
>    */
> @@ -728,6 +673,63 @@ enum amd_hw_ip_block_type {
>       MAX_HWIP
>   };
>
> +/*
> + * ASIC specific functions.
> + */
> +struct amdgpu_asic_funcs {
> +     bool (*read_disabled_bios)(struct amdgpu_device *adev);
> +     bool (*read_bios_from_rom)(struct amdgpu_device *adev,
> +                                u8 *bios, u32 length_bytes);
> +     int (*read_register)(struct amdgpu_device *adev, u32 se_num,
> +                          u32 sh_num, u32 reg_offset, u32 *value);
> +     void (*set_vga_state)(struct amdgpu_device *adev, bool state);
> +     int (*reset)(struct amdgpu_device *adev);
> +     enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
> +     /* get the reference clock */
> +     u32 (*get_xclk)(struct amdgpu_device *adev);
> +     /* MM block clocks */
> +     int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
> +     int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
> +     /* static power management */
> +     int (*get_pcie_lanes)(struct amdgpu_device *adev);
> +     void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
> +     /* get config memsize register */
> +     u32 (*get_config_memsize)(struct amdgpu_device *adev);
> +     /* flush hdp write queue */
> +     void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
> +     /* invalidate hdp read cache */
> +     void (*invalidate_hdp)(struct amdgpu_device *adev,
> +                            struct amdgpu_ring *ring);
> +     /* check if the asic needs a full reset of if soft reset will work */
> +     bool (*need_full_reset)(struct amdgpu_device *adev);
> +     /* initialize doorbell layout for specific asic*/
> +     void (*init_doorbell_index)(struct amdgpu_device *adev);
> +     /* PCIe bandwidth usage */
> +     void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
> +                            uint64_t *count1);
> +     /* do we need to reset the asic at init time (e.g., kexec) */
> +     bool (*need_reset_on_init)(struct amdgpu_device *adev);
> +     /* PCIe replay counter */
> +     uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
> +     /* device supports BACO */
> +     int (*supports_baco)(struct amdgpu_device *adev);
> +     /* pre asic_init quirks */
> +     void (*pre_asic_init)(struct amdgpu_device *adev);
> +     /* enter/exit umd stable pstate */
> +     int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);
> +     /* query video codecs */
> +     int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
> +                               const struct amdgpu_video_codecs **codecs);
> +     /* encode "> 32bits" smn addressing */
> +     u64 (*encode_ext_smn_addressing)(int ext_id);
> +
> +     ssize_t (*get_reg_state)(struct amdgpu_device *adev,
> +                              enum amdgpu_reg_state reg_state, void *buf,
> +                              size_t max_size);
> +     /* normalize offset to keep in lower 16-bit */
> +     u32 (*normalize_reg_offset)(enum amd_hw_ip_block_type hwip, u32
> +offset); };
> +

Why are you moving this definition around?

Regards,
Christian.

>   #define HWIP_MAX_INSTANCE   44
>
>   #define HW_ID_MAX           300
> diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
> b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
> index 2c9a0aa41e2d..7cdd4b9d08ba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
> +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
> @@ -29,6 +29,7 @@
>   #include "gfx_v9_4_3.h"
>   #include "gfxhub_v1_2.h"
>   #include "sdma_v4_4_2.h"
> +#include "aqua_vanjaram.h"
>
>   #define XCP_INST_MASK(num_inst, xcp_id)                                        \
>       (num_inst ? GENMASK(num_inst - 1, 0) << (xcp_id * num_inst) : 0) @@
> -1085,3 +1086,19 @@ ssize_t aqua_vanjaram_get_reg_state(struct
> amdgpu_device *adev,
>
>       return size;
>   }
> +
> +u32 aqua_vanjaram_normalize_reg_offset(enum amd_hw_ip_block_type
> +hwip, u32 offset) {
> +     u32 normalized_offset;
> +
> +     switch (hwip) {
> +     case GC_HWIP:
> +             normalized_offset = offset & 0xffff;
> +             break;
> +     default:
> +             normalized_offset = offset;
> +             break;
> +     }
> +
> +     return normalized_offset;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h
> b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h
> new file mode 100644
> index 000000000000..8d1b7a89cb71
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.h
> @@ -0,0 +1,28 @@
> +/*
> + * Copyright 2024 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person
> +obtaining a
> + * copy of this software and associated documentation files (the
> +"Software"),
> + * to deal in the Software without restriction, including without
> +limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> +sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom
> +the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> +included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> +EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> +MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> +SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> +DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> +OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
> +OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +#ifndef __AQUA_VANJARAM_H__
> +#define __AQUA_VANJARAM_H__
> +
> +u32 aqua_vanjaram_normalize_reg_offset(enum amd_hw_ip_block_type
> +hwip, u32 offset);
> +
> +#endif
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 88b4644f8e96..19e4429db37c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -853,8 +853,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>        */
>       if (adev->gfx.kiq[inst].ring.sched.ready &&
>           (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
> -             uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> -             uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> +             /* Select lower 16 bits to write in local xcc */
> +             if (AMDGPU_IS_GFXHUB(vmhub)) {
> +                     req = NORMALIZE_XCC_REG_OFFSET(req);
> +                     ack = NORMALIZE_XCC_REG_OFFSET(ack);
> +             }
>
>               amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
>                                                1 << vmid, inst);
> @@ -979,6 +983,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>       struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
>       uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
>       unsigned int eng = ring->vm_inv_eng;
> +     u32 low_distance, high_distance, req_offset, ack;
>
>       /*
>        * It may lose gpuvm invalidate acknowldege state across
> power-gating @@ -986,7 +991,18 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>        * release after invalidation to avoid entering power gated state
>        * to WA the Issue
>        */
> +     low_distance = hub->ctx0_ptb_addr_lo32 + (hub->ctx_addr_distance * vmid);
> +     high_distance = hub->ctx0_ptb_addr_hi32 + (hub->ctx_addr_distance * vmid);
> +     req_offset = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +     ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>
> +     /* Select lower 16 bits to write in local xcc */
> +     if (AMDGPU_IS_GFXHUB(ring->vm_hub)) {
> +             low_distance = NORMALIZE_XCC_REG_OFFSET(low_distance);
> +             high_distance = NORMALIZE_XCC_REG_OFFSET(high_distance);
> +             req_offset = NORMALIZE_XCC_REG_OFFSET(req_offset);
> +             ack = NORMALIZE_XCC_REG_OFFSET(ack);
> +     }
>       /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>       if (use_semaphore)
>               /* a read return value of 1 means semaphore acuqire */ @@ -994,18
> +1010,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>                                         hub->vm_inv_eng0_sem +
>                                         hub->eng_distance * eng, 0x1, 0x1);
>
> -     amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
> -                           (hub->ctx_addr_distance * vmid),
> +     amdgpu_ring_emit_wreg(ring, low_distance,
>                             lower_32_bits(pd_addr));
>
> -     amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
> -                           (hub->ctx_addr_distance * vmid),
> +     amdgpu_ring_emit_wreg(ring, high_distance,
>                             upper_32_bits(pd_addr));
>
> -     amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
> -                                         hub->eng_distance * eng,
> -                                         hub->vm_inv_eng0_ack +
> -                                         hub->eng_distance * eng,
> +     amdgpu_ring_emit_reg_write_reg_wait(ring, req_offset,
> +                                         ack,
>                                           req, 1 << vmid);
>
>       /* TODO: It needs to continue working on debugging with semaphore
> for GFXHUB as well. */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8d16dacdc172..3a1fa2797f02 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -78,6 +78,7 @@
>   #include "mxgpu_ai.h"
>   #include "amdgpu_ras.h"
>   #include "amdgpu_xgmi.h"
> +#include "aqua_vanjaram.h"
>   #include <uapi/linux/kfd_ioctl.h>
>
>   #define mmMP0_MISC_CGTT_CTRL0                                                                   0x01b9
> @@ -927,6 +928,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
>       .query_video_codecs = &soc15_query_video_codecs,
>       .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
>       .get_reg_state = &aqua_vanjaram_get_reg_state,
> +     .normalize_reg_offset = &aqua_vanjaram_normalize_reg_offset,
>   };
>
>   static int soc15_common_early_init(void *handle) diff --git
> a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
> b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
> index 242b24f73c17..01afd1a24e8b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
> @@ -210,4 +210,7 @@
>   #define WREG64_MCA(ext, mca_base, idx, val) \
>       WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) +
> mca_base + (idx * 8), val)
>
> -#endif
> +#define NORMALIZE_XCC_REG_OFFSET(offset) \
> +     ((amdgpu_sriov_vf(adev) && adev->asic_funcs->normalize_reg_offset) ? \
> +     adev->asic_funcs->normalize_reg_offset(GC_HWIP, offset) : offset)
> +#endif
> \ No newline at end of file



More information about the amd-gfx mailing list