[PATCH] drm/amdgpu: normalize registers as local xcc to read/write under sriov in TLB flush

Lazar, Lijo lijo.lazar at amd.com
Mon Jun 24 08:19:05 UTC 2024



On 6/21/2024 1:45 PM, Jane Jian wrote:
> [WHY]
> sriov has the higher bit violation when flushing tlb
> 
> [HOW]
> normalize the registers to keep lower 16-bit(dword aligned) to aviod higher bit violation
> RLCG will mask xcd out and always assume it's accessing its own xcd
> 
> [TODO]
> later will add the normalization in sriovw/rreg after fixing bugs
> 
> v2
> rename the normalized macro, add ip block type for further use
> move asics func declaration after ip block type since new func refers ip block type
> add normalization in emit flush tlb as well
> 
> Signed-off-by: Jane Jian <Jane.Jian at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 112 +++++++++++----------
>  drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c |  16 +++
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |  32 ++++--
>  drivers/gpu/drm/amd/amdgpu/soc15.c         |   1 +
>  drivers/gpu/drm/amd/amdgpu/soc15.h         |   1 +
>  drivers/gpu/drm/amd/amdgpu/soc15_common.h  |   5 +-
>  6 files changed, 101 insertions(+), 66 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 083f353cff6e..070fd9e601fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -583,61 +583,6 @@ struct amdgpu_video_codecs {
>  	const struct amdgpu_video_codec_info *codec_array;
>  };
>  
> -/*
> - * ASIC specific functions.
> - */
> -struct amdgpu_asic_funcs {
> -	bool (*read_disabled_bios)(struct amdgpu_device *adev);
> -	bool (*read_bios_from_rom)(struct amdgpu_device *adev,
> -				   u8 *bios, u32 length_bytes);
> -	int (*read_register)(struct amdgpu_device *adev, u32 se_num,
> -			     u32 sh_num, u32 reg_offset, u32 *value);
> -	void (*set_vga_state)(struct amdgpu_device *adev, bool state);
> -	int (*reset)(struct amdgpu_device *adev);
> -	enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
> -	/* get the reference clock */
> -	u32 (*get_xclk)(struct amdgpu_device *adev);
> -	/* MM block clocks */
> -	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
> -	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
> -	/* static power management */
> -	int (*get_pcie_lanes)(struct amdgpu_device *adev);
> -	void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
> -	/* get config memsize register */
> -	u32 (*get_config_memsize)(struct amdgpu_device *adev);
> -	/* flush hdp write queue */
> -	void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
> -	/* invalidate hdp read cache */
> -	void (*invalidate_hdp)(struct amdgpu_device *adev,
> -			       struct amdgpu_ring *ring);
> -	/* check if the asic needs a full reset of if soft reset will work */
> -	bool (*need_full_reset)(struct amdgpu_device *adev);
> -	/* initialize doorbell layout for specific asic*/
> -	void (*init_doorbell_index)(struct amdgpu_device *adev);
> -	/* PCIe bandwidth usage */
> -	void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
> -			       uint64_t *count1);
> -	/* do we need to reset the asic at init time (e.g., kexec) */
> -	bool (*need_reset_on_init)(struct amdgpu_device *adev);
> -	/* PCIe replay counter */
> -	uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
> -	/* device supports BACO */
> -	int (*supports_baco)(struct amdgpu_device *adev);
> -	/* pre asic_init quirks */
> -	void (*pre_asic_init)(struct amdgpu_device *adev);
> -	/* enter/exit umd stable pstate */
> -	int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);
> -	/* query video codecs */
> -	int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
> -				  const struct amdgpu_video_codecs **codecs);
> -	/* encode "> 32bits" smn addressing */
> -	u64 (*encode_ext_smn_addressing)(int ext_id);
> -
> -	ssize_t (*get_reg_state)(struct amdgpu_device *adev,
> -				 enum amdgpu_reg_state reg_state, void *buf,
> -				 size_t max_size);
> -};
> -
>  /*
>   * IOCTL.
>   */
> @@ -728,6 +673,63 @@ enum amd_hw_ip_block_type {
>  	MAX_HWIP
>  };
>  
> +/*
> + * ASIC specific functions.
> + */
> +struct amdgpu_asic_funcs {
> +	bool (*read_disabled_bios)(struct amdgpu_device *adev);
> +	bool (*read_bios_from_rom)(struct amdgpu_device *adev,
> +				   u8 *bios, u32 length_bytes);
> +	int (*read_register)(struct amdgpu_device *adev, u32 se_num,
> +			     u32 sh_num, u32 reg_offset, u32 *value);
> +	void (*set_vga_state)(struct amdgpu_device *adev, bool state);
> +	int (*reset)(struct amdgpu_device *adev);
> +	enum amd_reset_method (*reset_method)(struct amdgpu_device *adev);
> +	/* get the reference clock */
> +	u32 (*get_xclk)(struct amdgpu_device *adev);
> +	/* MM block clocks */
> +	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
> +	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
> +	/* static power management */
> +	int (*get_pcie_lanes)(struct amdgpu_device *adev);
> +	void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
> +	/* get config memsize register */
> +	u32 (*get_config_memsize)(struct amdgpu_device *adev);
> +	/* flush hdp write queue */
> +	void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
> +	/* invalidate hdp read cache */
> +	void (*invalidate_hdp)(struct amdgpu_device *adev,
> +			       struct amdgpu_ring *ring);
> +	/* check if the asic needs a full reset of if soft reset will work */
> +	bool (*need_full_reset)(struct amdgpu_device *adev);
> +	/* initialize doorbell layout for specific asic*/
> +	void (*init_doorbell_index)(struct amdgpu_device *adev);
> +	/* PCIe bandwidth usage */
> +	void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
> +			       uint64_t *count1);
> +	/* do we need to reset the asic at init time (e.g., kexec) */
> +	bool (*need_reset_on_init)(struct amdgpu_device *adev);
> +	/* PCIe replay counter */
> +	uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
> +	/* device supports BACO */
> +	int (*supports_baco)(struct amdgpu_device *adev);
> +	/* pre asic_init quirks */
> +	void (*pre_asic_init)(struct amdgpu_device *adev);
> +	/* enter/exit umd stable pstate */
> +	int (*update_umd_stable_pstate)(struct amdgpu_device *adev, bool enter);
> +	/* query video codecs */
> +	int (*query_video_codecs)(struct amdgpu_device *adev, bool encode,
> +				  const struct amdgpu_video_codecs **codecs);
> +	/* encode "> 32bits" smn addressing */
> +	u64 (*encode_ext_smn_addressing)(int ext_id);
> +
> +	ssize_t (*get_reg_state)(struct amdgpu_device *adev,
> +				 enum amdgpu_reg_state reg_state, void *buf,
> +				 size_t max_size);
> +	/* normalize offset to keep in lower 16-bit */
> +	u32 (*normalize_reg_offset)(enum amd_hw_ip_block_type hwip, u32 offset);
> +};
> +
>  #define HWIP_MAX_INSTANCE	44
>  
>  #define HW_ID_MAX		300
> diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
> index 2c9a0aa41e2d..98b00c0e522f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
> +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
> @@ -1085,3 +1085,19 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
>  
>  	return size;
>  }
> +
> +u32 aqua_vanjaram_normalize_reg_offset(enum amd_hw_ip_block_type hwip, u32 offset)
> +{
> +	u32 normalized_offset;
> +
> +	switch (hwip) {
> +	case GC_HWIP:
> +		normalized_offset = offset & 0xffff;
> +		break;
> +	default:
> +		normalized_offset = offset;
> +		break;
> +	}
> +
> +	return normalized_offset;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 88b4644f8e96..7542a75f068b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -853,8 +853,12 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>  	 */
>  	if (adev->gfx.kiq[inst].ring.sched.ready &&
>  	    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) {
> -		uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> -		uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
> +
> +		/* Select lower 16 bits to write in local xcc */
> +		if (AMDGPU_IS_GFXHUB(vmhub)) {
> +			req = NORMALIZE_XCC_REG_OFFSET(GC, req);
> +			ack = NORMALIZE_XCC_REG_OFFSET(GC, ack);
> +		}
>  
>  		amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req,
>  						 1 << vmid, inst);
> @@ -979,6 +983,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>  	struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
>  	uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
>  	unsigned int eng = ring->vm_inv_eng;
> +	u32 low_distance, high_distance, req_offset, ack;
>  
>  	/*
>  	 * It may lose gpuvm invalidate acknowldege state across power-gating
> @@ -986,7 +991,18 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>  	 * release after invalidation to avoid entering power gated state
>  	 * to WA the Issue
>  	 */
> +	low_distance = hub->ctx0_ptb_addr_lo32 + (hub->ctx_addr_distance * vmid);
> +	high_distance = hub->ctx0_ptb_addr_hi32 + (hub->ctx_addr_distance * vmid);
> +	req_offset = hub->vm_inv_eng0_req + hub->eng_distance * eng;
> +	ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
>  
> +	/* Select lower 16 bits to write in local xcc */
> +	if (AMDGPU_IS_GFXHUB(ring->vm_hub)) {
> +		low_distance = NORMALIZE_XCC_REG_OFFSET(GC, low_distance);
> +		high_distance = NORMALIZE_XCC_REG_OFFSET(GC, high_distance);
> +		req_offset = NORMALIZE_XCC_REG_OFFSET(GC, req_offset);
> +		ack = NORMALIZE_XCC_REG_OFFSET(GC, ack);
> +	}
>  	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
>  	if (use_semaphore)
>  		/* a read return value of 1 means semaphore acuqire */
> @@ -994,18 +1010,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>  					  hub->vm_inv_eng0_sem +
>  					  hub->eng_distance * eng, 0x1, 0x1);
>  
> -	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
> -			      (hub->ctx_addr_distance * vmid),
> +	amdgpu_ring_emit_wreg(ring, low_distance,
>  			      lower_32_bits(pd_addr));
>  
> -	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
> -			      (hub->ctx_addr_distance * vmid),
> +	amdgpu_ring_emit_wreg(ring, high_distance,
>  			      upper_32_bits(pd_addr));
>  
> -	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
> -					    hub->eng_distance * eng,
> -					    hub->vm_inv_eng0_ack +
> -					    hub->eng_distance * eng,
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, req_offset,
> +					    ack,
>  					    req, 1 << vmid);
>  
>  	/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8d16dacdc172..e6e61fc77080 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -927,6 +927,7 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs =
>  	.query_video_codecs = &soc15_query_video_codecs,
>  	.encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing,
>  	.get_reg_state = &aqua_vanjaram_get_reg_state,
> +	.normalize_reg_offset = &aqua_vanjaram_normalize_reg_offset,
>  };
>  
>  static int soc15_common_early_init(void *handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
> index 282584a48be0..f1e974604e3e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.h
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
> @@ -124,4 +124,5 @@ ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev,
>  void vega10_doorbell_index_init(struct amdgpu_device *adev);
>  void vega20_doorbell_index_init(struct amdgpu_device *adev);
>  void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev);
> +u32 aqua_vanjaram_normalize_reg_offset(u32 hwip, u32 offset);

There is a mismatch in prototype in declaration vs definition.

>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
> index 242b24f73c17..ddf0aad51821 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
> @@ -210,4 +210,7 @@
>  #define WREG64_MCA(ext, mca_base, idx, val) \
>  	WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val)
>  
> -#endif
> +#define NORMALIZE_XCC_REG_OFFSET(ip, offset) \

This naming is specific to XCC while what it does is not specific to XCC.

If it's specific to XCC, you may want only this way

NORMALIZE_XCC_REG_OFFSET(offset)
adev->asic_funcs->normalize_reg_offset(GC_HWIP, offset)

Thanks,
Lijo
> +	((amdgpu_sriov_vf(adev) && adev->asic_funcs->normalize_reg_offset) ? \
> +	adev->asic_funcs->normalize_reg_offset(ip##_HWIP, offset) : offset)
> +#endif
> \ No newline at end of file


More information about the amd-gfx mailing list