[PATCH] drm/amdkfd: check both client id and src id in interrupt handlers

Felix Kuehling felix.kuehling at amd.com
Fri Dec 18 21:54:33 UTC 2020


Am 2020-12-18 um 4:34 p.m. schrieb Alex Deucher:
> We can have the same src ids for different client ids so make sure to
> check both the client id and the source id when handling interrupts.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>

Looks reasonable to me. Does this fix a real problem, e.g. KFD
intercepting an interrupt meant for another client?

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>

> ---
>  .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   | 46 ++++++++++++++-----
>  1 file changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> index 241bd6ff79f4..0ca0327a39e5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
> @@ -44,6 +44,21 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
>  	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
>  	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
>  
> +	/* Only handle clients we care about */
> +	if (client_id != SOC15_IH_CLIENTID_GRBM_CP &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA0 &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA1 &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA2 &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA3 &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA4 &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA5 &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA6 &&
> +	    client_id != SOC15_IH_CLIENTID_SDMA7 &&
> +	    client_id != SOC15_IH_CLIENTID_VMC &&
> +	    client_id != SOC15_IH_CLIENTID_VMC1 &&
> +	    client_id != SOC15_IH_CLIENTID_UTCL2)
> +		return false;
> +
>  	/* This is a known issue for gfx9. Under non HWS, pasid is not set
>  	 * in the interrupt payload, so we need to find out the pasid on our
>  	 * own.
> @@ -96,17 +111,26 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
>  	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
>  	context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
>  
> -	if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
> -		kfd_signal_event_interrupt(pasid, context_id, 32);
> -	else if (source_id == SOC15_INTSRC_SDMA_TRAP)
> -		kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28);
> -	else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG)
> -		kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24);
> -	else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
> -		kfd_signal_hw_exception_event(pasid);
> -	else if (client_id == SOC15_IH_CLIENTID_VMC ||
> -		client_id == SOC15_IH_CLIENTID_VMC1 ||
> -		 client_id == SOC15_IH_CLIENTID_UTCL2) {
> +	if (client_id == SOC15_IH_CLIENTID_GRBM_CP) {
> +		if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
> +			kfd_signal_event_interrupt(pasid, context_id, 32);
> +		else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG)
> +			kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24);
> +		else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
> +			kfd_signal_hw_exception_event(pasid);
> +	} else if (client_id == SOC15_IH_CLIENTID_SDMA0 ||
> +		   client_id == SOC15_IH_CLIENTID_SDMA1 ||
> +		   client_id == SOC15_IH_CLIENTID_SDMA2 ||
> +		   client_id == SOC15_IH_CLIENTID_SDMA3 ||
> +		   client_id == SOC15_IH_CLIENTID_SDMA4 ||
> +		   client_id == SOC15_IH_CLIENTID_SDMA5 ||
> +		   client_id == SOC15_IH_CLIENTID_SDMA6 ||
> +		   client_id == SOC15_IH_CLIENTID_SDMA7) {
> +		if (source_id == SOC15_INTSRC_SDMA_TRAP)
> +			kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28);
> +	} else if (client_id == SOC15_IH_CLIENTID_VMC ||
> +		   client_id == SOC15_IH_CLIENTID_VMC1 ||
> +		   client_id == SOC15_IH_CLIENTID_UTCL2) {
>  		struct kfd_vm_fault_info info = {0};
>  		uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry);
>  


More information about the amd-gfx mailing list