[PATCH] drm/amdgpu: add dummy read by engines for some GCVM status registers

Koenig, Christian Christian.Koenig at amd.com
Tue Nov 5 11:17:14 UTC 2019


Am 05.11.19 um 11:21 schrieb Zhu, Changfeng:
> Hi Chris,
>
> Maybe it's better to use amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); to replace
> amdgpu_ring_emit_reg_wait(ring, reg1, 0, 0); ?

Good point. I've mixed up request and acknowledge register.

Important is that you need 0 as mask and value, or otherwise we could 
potentially wait forever.

Regards,
Christian.

>
> http://ontrack-internal.amd.com/browse/SWDEV-192660
> Jira ticket recommends to read VM_INVALIDATE_ENG*_REQ.
>
> BR,
> Changfeng.
>
> -----Original Message-----
> From: Koenig, Christian <Christian.Koenig at amd.com>
> Sent: Tuesday, November 5, 2019 5:13 PM
> To: Zhu, Changfeng <Changfeng.Zhu at amd.com>; amd-gfx at lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov at amd.com>; Huang, Ray <Ray.Huang at amd.com>; Huang, Shimmer <Xinmei.Huang at amd.com>
> Subject: Re: [PATCH] drm/amdgpu: add dummy read by engines for some GCVM status registers
>
> Am 05.11.19 um 07:32 schrieb Zhu, Changfeng:
>> From: changzhu <Changfeng.Zhu at amd.com>
>>
>> The GRBM register interface is now capable of bursting 1 cycle per
>> register wr->wr, wr->rd much faster than previous muticycle per
>> transaction done interface.  This has caused a problem where status
>> registers requiring HW to update have a 1 cycle delay, due to the
>> register update having to go through GRBM.
>>
>> For cp ucode, it has realized dummy read in cp firmware.It covers the
>> use of WAIT_REG_MEM operation 1 case only.So it needs to call
>> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning
>> to update firmware in case firmware is too old to have function to
>> realize dummy read in cp firmware.
>>
>> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma
>> is moved to gfxhub in gfx10. So it needs to add dummy read in driver
>> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
> First of all thanks for getting your environment setup properly, we are finally making progress with that issue.
>
> A bunch of nice to have comments below and two major bugs/typos which really needs to be fixed.
>
>> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
>> Signed-off-by: changzhu <Changfeng.Zhu at amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>>    drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 50 +++++++++++++++++++++++++
>>    drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   |  7 ++++
>>    drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++--
>>    drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>>    5 files changed, 73 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 459aa9059542..a74ecd449775 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>>    	uint32_t			mec2_feature_version;
>>    	bool				mec_fw_write_wait;
>>    	bool				me_fw_write_wait;
>> +	bool				cp_fw_write_wait;
>>    	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>>    	unsigned			num_gfx_rings;
>>    	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index 17a5cbfd0024..814764723c26 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>>    	kfree(adev->gfx.rlc.register_list_format);
>>    }
>>    
>> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
>> +{
>> +	adev->gfx.cp_fw_write_wait = false;
>> +
>> +	switch (adev->asic_type) {
>> +	case CHIP_NAVI10:
>> +	case CHIP_NAVI12:
>> +	case CHIP_NAVI14:
>> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
>> +		    (adev->gfx.me_feature_version >= 27) &&
>> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
>> +		    (adev->gfx.pfp_feature_version >= 27) &&
>> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
>> +		    (adev->gfx.mec_feature_version >= 27))
>> +			adev->gfx.cp_fw_write_wait = true;
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	if (adev->gfx.cp_fw_write_wait == false)
>> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
>> +				  GRBM requires 1-cycle delay in cp firmware\n"); }
>> +
>> +
>>    static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>>    {
>>    	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -4768,6 +4794,28
>> @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>>    	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>>    }
>>    
>> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
>> +						  uint32_t reg0, uint32_t reg1,
>> +						  uint32_t ref, uint32_t mask)
>> +{
>> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
>> +	struct amdgpu_device *adev = ring->adev;
>> +	bool fw_version_ok = false;
>> +
>> +	gfx_v10_0_check_fw_write_wait(adev);
>> +
>> +	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
>> +		ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
> That check is probably superfluous. A few lines below you are using the function in the gfx_v10_0_ring_funcs_gfx and gfx_v10_0_ring_funcs_compute, so the ring->funcs->type is always constant.
>
>> +		fw_version_ok = adev->gfx.cp_fw_write_wait;
>> +
>> +	if (fw_version_ok)
>> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
>> +				      ref, mask, 0x20);
>> +	else
>> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
>> +							   ref, mask);
>> +}
>> +
>>    static void
>>    gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>>    				      uint32_t me, uint32_t pipe, @@ -5158,6 +5206,7 @@ static
>> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>>    	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>>    	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>>    	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>>    };
>>    
>>    static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute =
>> { @@ -5191,6 +5240,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>>    	.pad_ib = amdgpu_ring_generic_pad_ib,
>>    	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>>    	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>>    };
>>    
>>    static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 9d5f900e3e1c..f52fcb895d51 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -982,6 +982,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
>>    	adev->gfx.me_fw_write_wait = false;
>>    	adev->gfx.mec_fw_write_wait = false;
>>    
>> +	if ((adev->gfx.mec_fw_version < 0x000001a5) ||
>> +	    (adev->gfx.mec_feature_version < 46) ||
>> +	    (adev->gfx.pfp_fw_version < 0x000000b7) ||
>> +	    (adev->gfx.pfp_feature_version < 46))
>> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
>> +					   GRBM requires 1-cycle delay in cp firmware\n");
>> +
> Not a hard requirement, but it would be nice to have this in a separate patch. So that gfx9 and gfx10 changes are clearly separated.
>
>>    	switch (adev->asic_type) {
>>    	case CHIP_VEGA10:
>>    		if ((adev->gfx.me_fw_version >= 0x0000009c) && diff --git
>> a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index 3b00bce14cfb..9ff3ec1531ed 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>    	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>>    			      upper_32_bits(pd_addr));
>>    
>> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>> -
>> -	/* wait for the invalidate to complete */
>> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
>> -				  1 << vmid, 1 << vmid);
>> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
>> +						hub->vm_inv_eng0_ack,
> That register offset is wrong! This needs to be "hub->vm_inv_eng0_ack + eng".
>
>> +						req, 1 << vmid);
>>    
>>    	return pd_addr;
>>    }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> index 3460c00f3eaa..4cf8e3d23c60 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
>> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>>    			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>>    }
>>    
>> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
>> +												 uint32_t reg0, uint32_t reg1,
>> +												 uint32_t ref, uint32_t mask) {
>> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
>> +	/* wait for a cycle to reset vm_inv_eng*_ack */
>> +	amdgpu_ring_emit_reg_wait(ring, reg0, mask, mask);
> Well, that's exactly what won't work. Please use the following instead:
>
> amdgpu_ring_emit_reg_wait(ring, reg1, 0, 0);
>
> Regards,
> Christian.
>
>> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
>> +
>>    static int sdma_v5_0_early_init(void *handle)
>>    {
>>    	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
>> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>>    		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>>    		/* sdma_v5_0_ring_emit_vm_flush */
>>    		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
>> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
>> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>>    		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>>    	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>>    	.emit_ib = sdma_v5_0_ring_emit_ib,
>> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>>    	.pad_ib = sdma_v5_0_ring_pad_ib,
>>    	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>>    	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
>> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>>    	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>>    	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>>    	.preempt_ib = sdma_v5_0_ring_preempt_ib,



More information about the amd-gfx mailing list