[PATCH 2/2] drm/amdgpu: fix vcn sw init failed

Lazar, Lijo lijo.lazar at amd.com
Tue Nov 12 14:54:11 UTC 2024



On 11/12/2024 8:00 PM, Jesse.zhang at amd.com wrote:
> [ 2875.870277] [drm:amdgpu_device_init [amdgpu]] *ERROR* sw_init of IP block <vcn_v4_0_3> failed -22
> [ 2875.880494] amdgpu 0000:01:00.0: amdgpu: amdgpu_device_ip_init failed
> [ 2875.887689] amdgpu 0000:01:00.0: amdgpu: Fatal error during GPU init
> [ 2875.894791] amdgpu 0000:01:00.0: amdgpu: amdgpu: finishing device.
> 
> Add irqs with different IRQ source pointer for vcn0 and vcn1.
> 
> Signed-off-by: Jesse Zhang <jesse.zhang at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> index ef3dfd44a022..82b90f1e6f33 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
> @@ -83,6 +83,10 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = {
>  
>  #define NORMALIZE_VCN_REG_OFFSET(offset) \
>  		(offset & 0x1FFFF)
> +static int amdgpu_ih_clientid_vcns[] = {
> +	SOC15_IH_CLIENTID_VCN,
> +	SOC15_IH_CLIENTID_VCN1

This is not valid for 4.0.3. It uses only the same client id, different
node_id to distinguish. Also, there are max of 4 instances.

I would say that entire IP instance series was done in a haste without
applying thought and breaks other things including ip block mask.

Thanks,
Lijo

> +};
>  
>  static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev);
>  static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev, int inst);
> @@ -150,9 +154,9 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
>  	if (r)
>  		return r;
>  
> -	/* VCN DEC TRAP */
> -	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
> -		VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq);
> +	/* VCN UNIFIED TRAP */
> +	r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[inst],
> +			VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[inst].irq);
>  	if (r)
>  		return r;
>  
> @@ -174,7 +178,7 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
>  
>  	ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[inst].aid_id);
>  	sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[inst].aid_id);
> -	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
> +	r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[inst].irq, 0,
>  				 AMDGPU_RING_PRIO_DEFAULT,
>  				 &adev->vcn.inst[inst].sched_score);
>  	if (r)
> @@ -1734,9 +1738,12 @@ static const struct amdgpu_irq_src_funcs vcn_v4_0_3_irq_funcs = {
>   */
>  static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev, int inst)
>  {
> -	adev->vcn.inst->irq.num_types++;
> +	if (adev->vcn.harvest_config & (1 << inst))
> +		return;
> +
> +	adev->vcn.inst[inst].irq.num_types = adev->vcn.num_enc_rings + 1;
>  
> -	adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs;
> +	adev->vcn.inst[inst].irq.funcs = &vcn_v4_0_3_irq_funcs;
>  }
>  
>  static void vcn_v4_0_3_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)


More information about the amd-gfx mailing list