[PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting

Lazar, Lijo Lijo.Lazar at amd.com
Tue Jun 22 06:08:11 UTC 2021


[Public]

AFAIK, that expression is legal (some code analyzer may warn on value of 4*max_wgp_per_sh); similar kind is used in rotate shift operations.

Thanks,
Lijo

-----Original Message-----
From: Quan, Evan <Evan.Quan at amd.com> 
Sent: Tuesday, June 22, 2021 7:56 AM
To: Lazar, Lijo <Lijo.Lazar at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
Subject: RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting

[AMD Official Use Only]

Thanks Lijo.
However, I'm not quite sure whether " 0xffffffff << (4 * max_wgp_per_sh);" is a valid expression since it kind of triggers some overflow.
Can that work for non-x86 platform or even work reliably for x86 platform?

BR
Evan
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar at amd.com>
> Sent: Monday, June 21, 2021 8:08 PM
> To: Quan, Evan <Evan.Quan at amd.com>; amd-gfx at lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher at amd.com>
> Subject: Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
> 
> One minor comment below, apart from that the series looks good.
> 
> Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
> 
> On 6/21/2021 12:30 PM, Evan Quan wrote:
> > Add missing settings for SQC bits. And correct some confusing logics 
> > around active wgp bitmap calculation.
> >
> > Change-Id: If4992e175fd61d5609b00328cbe21f487517d039
> > Signed-off-by: Evan Quan <evan.quan at amd.com>
> > ---
> > V1->V2:
> >    - restore correct tcp_harvest setting for NV10 and NV12
> >    - move asic type guard upper layer for better readability
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 103 ++++++++++++++------
> -----
> >   1 file changed, 57 insertions(+), 46 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 15ae9e33b925..384b95fbad8b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -5090,47 +5090,50 @@ static void gfx_v10_0_tcp_harvest(struct
> amdgpu_device *adev)
> >   		4 + /* RMI */
> >   		1); /* SQG */
> >
> > -	if (adev->asic_type == CHIP_NAVI10 ||
> > -	    adev->asic_type == CHIP_NAVI14 ||
> > -	    adev->asic_type == CHIP_NAVI12) {
> > -		mutex_lock(&adev->grbm_idx_mutex);
> > -		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > -			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > -				gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > -				wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > -				/*
> > -				 * Set corresponding TCP bits for the inactive
> WGPs in
> > -				 * GCRD_SA_TARGETS_DISABLE
> > -				 */
> > -				gcrd_targets_disable_tcp = 0;
> > -				/* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > -				utcl_invreq_disable = 0;
> > -
> > -				for (k = 0; k < max_wgp_per_sh; k++) {
> > -					if (!(wgp_active_bitmap & (1 << k))) {
> > -						gcrd_targets_disable_tcp |=
> 3 << (2 * k);
> > -						utcl_invreq_disable |= (3 <<
> (2 * k)) |
> > -							(3 << (2 *
> (max_wgp_per_sh + k)));
> > -					}
> > +	mutex_lock(&adev->grbm_idx_mutex);
> > +	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > +		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > +			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > +			wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > +			/*
> > +			 * Set corresponding TCP bits for the inactive WGPs in
> > +			 * GCRD_SA_TARGETS_DISABLE
> > +			 */
> > +			gcrd_targets_disable_tcp = 0;
> > +			/* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > +			utcl_invreq_disable = 0;
> > +
> > +			for (k = 0; k < max_wgp_per_sh; k++) {
> > +				if (!(wgp_active_bitmap & (1 << k))) {
> > +					gcrd_targets_disable_tcp |= 3 << (2 *
> k);
> > +					gcrd_targets_disable_tcp |= 1 << (k +
> (max_wgp_per_sh * 2));
> > +					utcl_invreq_disable |= (3 << (2 * k)) |
> > +						(3 << (2 * (max_wgp_per_sh
> + k)));
> >   				}
> > -
> > -				tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > -				/* only override TCP & SQC bits */
> > -				tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> > -				tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > -				WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > -
> > -				tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > -				/* only override TCP bits */
> > -				tmp &= 0xffffffff << (2 * max_wgp_per_sh);
> > -				tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > -				WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> >   			}
> > -		}
> >
> > -		gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> > -		mutex_unlock(&adev->grbm_idx_mutex);
> > +			tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > +			/* only override TCP & SQC bits */
> > +			if (adev->asic_type == CHIP_NAVI14)
> > +				tmp &= 0xff000000;
> > +			else
> > +				tmp &=0xfff00000;
> 
> For the disable field mask calculation (which is the value that is 
> applied finally), there is no ASIC check. The above code may utilize 
> the same method as in the original code without ASIC check.
> 
> tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> 
> Same for below case also - 3*max_wgp_per_sh.
> 
> Thanks,
> Lijo
> 
> > +			tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > +			WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > +
> > +			tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > +			/* only override TCP & SQC bits */
> > +			if (adev->asic_type == CHIP_NAVI14)
> > +				tmp &= 0xfffc0000;
> > +			else
> > +				tmp &= 0xffff8000;
> > +			tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > +			WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> > +		}
> >   	}
> > +
> > +	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> > +	mutex_unlock(&adev->grbm_idx_mutex);
> >   }
> >
> >   static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@
> > -7408,7 +7411,10 @@ static int gfx_v10_0_hw_init(void *handle)
> >   	 * init golden registers and rlc resume may override some registers,
> >   	 * reconfig them here
> >   	 */
> > -	gfx_v10_0_tcp_harvest(adev);
> > +	if (adev->asic_type == CHIP_NAVI10 ||
> > +	    adev->asic_type == CHIP_NAVI14 ||
> > +	    adev->asic_type == CHIP_NAVI12)
> > +		gfx_v10_0_tcp_harvest(adev);
> >
> >   	r = gfx_v10_0_cp_resume(adev);
> >   	if (r)
> > @@ -9328,17 +9334,22 @@ static void
> > gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
> *
> >
> >   static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct
> amdgpu_device *adev)
> >   {
> > -	u32 data, wgp_bitmask;
> > -	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
> > -	data |= RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +	u32 disabled_mask =
> > +		~amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +	u32 efuse_setting = 0;
> > +	u32 vbios_setting = 0;
> > +
> > +	efuse_setting = RREG32_SOC15(GC, 0,
> mmCC_GC_SHADER_ARRAY_CONFIG);
> > +	efuse_setting &=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +	efuse_setting >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > -	data >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> > +	vbios_setting = RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +	vbios_setting &=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +	vbios_setting >>=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -	wgp_bitmask =
> > -		amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +	disabled_mask |= efuse_setting | vbios_setting;
> >
> > -	return (~data) & wgp_bitmask;
> > +	return (~disabled_mask);
> >   }
> >
> >   static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct
> > amdgpu_device *adev)
> >


More information about the amd-gfx mailing list