[PATCH 3/3] drm/amdgpu: Add plumbing for handling SQ EDC/ECC interrupts.

Nicolai Hähnle nicolai.haehnle at amd.com
Tue Jun 5 17:08:58 UTC 2018


On 05.06.2018 15:17, Andrey Grodzovsky wrote:
> From: David Panariti <David.Panariti at amd.com>
> 
> SQ can generate interrupts and installs the ISR to
> handle the SQ interrupts.
> 
> Add parsing SQ data in interrupt handler.
> 
> Signed-off-by: David Panariti <David.Panariti at amd.com>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 110 +++++++++++++++++++++++++++++++++-
>   1 file changed, 109 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index a19fcc6..c4a2c3d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -2058,6 +2058,15 @@ static int gfx_v8_0_sw_init(void *handle)
>   		}
>   	}
>   
> +	/* SQ interrupts. */
> +	/* @todo XXX is this CZ only? */

The SQ interrupt source in general is the same on all GCN as far as I know.

EDC/ECC (which is the context for this change) is only available on some 
chips, and I don't remember off the top of my head which ones, but it 
really doesn't matter for this commit: we can install the IRQ on all 
chips, and the EDC/ECC path simply never triggers for non-ECC enabled 
hardware. There are still the other potential IRQ causes for SQ.

Cheers,
Nicolai



> +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239,
> +			      &adev->gfx.sq_irq);
> +	if (r) {
> +		DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
> +		return r;
> +	}
> +
>   	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
>   
>   	gfx_v8_0_scratch_init(adev);
> @@ -5122,6 +5131,8 @@ static int gfx_v8_0_hw_fini(void *handle)
>   	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
>   	if (adev->asic_type == CHIP_CARRIZO)
>   		amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
> +	/* @todo XXX Is this CZ only? */
> +	amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
>   
>   	/* disable KCQ to avoid CPC touch memory not valid anymore */
>   	for (i = 0; i < adev->gfx.num_compute_rings; i++)
> @@ -5561,6 +5572,14 @@ static int gfx_v8_0_late_init(void *handle)
>   			return r;
>   		}
>   	}
> +	/* @todo XXX Is this CZ only? */
> +	r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
> +	if (r) {
> +		DRM_ERROR(
> +			"amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
> +			r);
> +		return r;
> +	}
>   
>   	amdgpu_device_ip_set_powergating_state(adev,
>   					       AMD_IP_BLOCK_TYPE_GFX,
> @@ -6852,6 +6871,32 @@ static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
>   	return 0;
>   }
>   
> +static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
> +				     struct amdgpu_irq_src *source,
> +				     unsigned int type,
> +				     enum amdgpu_interrupt_state state)
> +{
> +	int enable_flag;
> +
> +	switch (state) {
> +	case AMDGPU_IRQ_STATE_DISABLE:
> +		enable_flag = 1;
> +		break;
> +
> +	case AMDGPU_IRQ_STATE_ENABLE:
> +		enable_flag = 0;
> +		break;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
> +		     enable_flag);
> +
> +	return 0;
> +}
> +
>   static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
>   			    struct amdgpu_irq_src *source,
>   			    struct amdgpu_iv_entry *entry)
> @@ -6906,7 +6951,62 @@ static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
>   				     struct amdgpu_irq_src *source,
>   				     struct amdgpu_iv_entry *entry)
>   {
> -	DRM_ERROR("ECC error detected.");
> +	DRM_ERROR("CP EDC/ECC error detected.");
> +	return 0;
> +}
> +
> +static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
> +			   struct amdgpu_irq_src *source,
> +			   struct amdgpu_iv_entry *entry)
> +{
> +	u8 enc, se_id;
> +	char type[20];
> +
> +	/* Parse all fields according to SQ_INTERRUPT* registers */
> +	enc = (entry->src_data[0] >> 26) & 0x3;
> +	se_id = (entry->src_data[0] >> 24) & 0x3;
> +
> +	switch (enc) {
> +		case 0:
> +			DRM_INFO("SQ general purpose intr detected:"
> +					"se_id %d, immed_overflow %d, host_reg_overflow %d,"
> +					"host_cmd_overflow %d, cmd_timestamp %d,"
> +					"reg_timestamp %d, thread_trace_buff_full %d,"
> +					"wlt %d, thread_trace %d.\n",
> +					se_id,
> +					(entry->src_data[0] >> 7) & 0x1,
> +					(entry->src_data[0] >> 6) & 0x1,
> +					(entry->src_data[0] >> 5) & 0x1,
> +					(entry->src_data[0] >> 4) & 0x1,
> +					(entry->src_data[0] >> 3) & 0x1,
> +					(entry->src_data[0] >> 2) & 0x1,
> +					(entry->src_data[0] >> 1) & 0x1,
> +					entry->src_data[0] & 0x1
> +					);
> +			break;
> +		case 1:
> +		case 2:
> +
> +			if (enc == 1)
> +				sprintf(type, "instruction intr");
> +			else
> +				sprintf(type, "EDC/ECC error");
> +
> +			DRM_INFO(
> +				"SQ %s detected: "
> +					"se_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d\n",
> +					type, se_id,
> +					(entry->src_data[0] >> 20) & 0xf,
> +					(entry->src_data[0] >> 18) & 0x3,
> +					(entry->src_data[0] >> 14) & 0xf,
> +					(entry->src_data[0] >> 10) & 0xf
> +					);
> +			break;
> +		default:
> +			DRM_ERROR("SQ invalid encoding type\n.");
> +			return -EINVAL;
> +	}
> +
>   	return 0;
>   }
>   
> @@ -7115,6 +7215,11 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
>   	.process = gfx_v8_0_cp_ecc_error_irq,
>   };
>   
> +static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
> +	.set = gfx_v8_0_set_sq_int_state,
> +	.process = gfx_v8_0_sq_irq,
> +};
> +
>   static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
>   {
>   	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
> @@ -7131,6 +7236,9 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
>   
>   	adev->gfx.cp_ecc_error_irq.num_types = 1;
>   	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
> +
> +	adev->gfx.sq_irq.num_types = 1;
> +	adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
>   }
>   
>   static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
> 



More information about the amd-gfx mailing list