[PATCH 14/32] drm/amdgpu: expose debug api for mes

Felix Kuehling felix.kuehling at amd.com
Mon Mar 20 20:47:23 UTC 2023


On 2023-01-25 14:53, Jonathan Kim wrote:
> Similar to the F32 HWS, the RS64 HWS for GFX11 now supports a multi-process
> debug API.
>
> The skip_process_ctx_clear ADD_QUEUE requirement is to prevent the MES
> from clearing the process context when the first queue is added to the
> scheduler in order to maintain debug mode settings during queue preemption
> and restore.  The MES clears the process context in this case due to an
> unresolved FW caching bug during normal mode operations.
> During debug mode, the KFD will hold a reference to the target process
> so the process context should never go stale and MES can afford to skip
> this requirement.
>
> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c       | 32 +++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h       | 20 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/mes_v11_0.c        | 12 +++++++
>   drivers/gpu/drm/amd/include/mes_v11_api_def.h | 21 +++++++++++-
>   4 files changed, 84 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 82e27bd4f038..4916e0b0156f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -924,6 +924,38 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
>   	return r;
>   }
>   
> +int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
> +				uint64_t process_context_addr,
> +				uint32_t spi_gdbg_per_vmid_cntl,
> +				const uint32_t *tcp_watch_cntl,
> +				uint32_t flags)
> +{
> +	struct mes_misc_op_input op_input = {0};
> +	int r;
> +
> +	if (!adev->mes.funcs->misc_op) {
> +		DRM_ERROR("mes set shader debugger is not supported!\n");
> +		return -EINVAL;
> +	}
> +
> +	op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
> +	op_input.set_shader_debugger.process_context_addr = process_context_addr;
> +	op_input.set_shader_debugger.flags.u32all = flags;
> +	op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl;
> +	memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
> +			sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
> +
> +	amdgpu_mes_lock(&adev->mes);
> +
> +	r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
> +	if (r)
> +		DRM_ERROR("failed to set_shader_debugger\n");
> +
> +	amdgpu_mes_unlock(&adev->mes);
> +
> +	return r;
> +}
> +
>   static void
>   amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
>   			       struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 547ec35691fa..d20df0cf0d88 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -256,6 +256,7 @@ enum mes_misc_opcode {
>   	MES_MISC_OP_READ_REG,
>   	MES_MISC_OP_WRM_REG_WAIT,
>   	MES_MISC_OP_WRM_REG_WR_WAIT,
> +	MES_MISC_OP_SET_SHADER_DEBUGGER,
>   };
>   
>   struct mes_misc_op_input {
> @@ -278,6 +279,20 @@ struct mes_misc_op_input {
>   			uint32_t                   reg0;
>   			uint32_t                   reg1;
>   		} wrm_reg;
> +
> +		struct {
> +			uint64_t process_context_addr;
> +			union {
> +				struct {
> +					uint64_t single_memop : 1;
> +					uint64_t single_alu_op : 1;
> +					uint64_t reserved: 30;
> +				};
> +				uint32_t u32all;
> +			} flags;
> +			uint32_t spi_gdbg_per_vmid_cntl;
> +			uint32_t tcp_watch_cntl[4];
> +		} set_shader_debugger;
>   	};
>   };
>   
> @@ -340,6 +355,11 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
>   int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
>   				  uint32_t reg0, uint32_t reg1,
>   				  uint32_t ref, uint32_t mask);
> +int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
> +				uint64_t process_context_addr,
> +				uint32_t spi_gdbg_per_vmid_cntl,
> +				const uint32_t *tcp_watch_cntl,
> +				uint32_t flags);
>   
>   int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
>   			int queue_type, int idx,
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index 62cdd2113135..fbacdc42efac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -334,6 +334,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
>   		misc_pkt.wait_reg_mem.reg_offset1 = input->wrm_reg.reg0;
>   		misc_pkt.wait_reg_mem.reg_offset2 = input->wrm_reg.reg1;
>   		break;
> +	case MES_MISC_OP_SET_SHADER_DEBUGGER:
> +		misc_pkt.opcode = MESAPI_MISC__SET_SHADER_DEBUGGER;
> +		misc_pkt.set_shader_debugger.process_context_addr =
> +				input->set_shader_debugger.process_context_addr;
> +		misc_pkt.set_shader_debugger.flags.u32all =
> +				input->set_shader_debugger.flags.u32all;
> +		misc_pkt.set_shader_debugger.spi_gdbg_per_vmid_cntl =
> +				input->set_shader_debugger.spi_gdbg_per_vmid_cntl;
> +		memcpy(misc_pkt.set_shader_debugger.tcp_watch_cntl,
> +				input->set_shader_debugger.tcp_watch_cntl,
> +				sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl));
> +		break;
>   	default:
>   		DRM_ERROR("unsupported misc op (%d) \n", input->op);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> index dc694cb246d9..f3c15f18ddb5 100644
> --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
> @@ -274,7 +274,8 @@ union MESAPI__ADD_QUEUE {
>   			uint32_t is_kfd_process		: 1;
>   			uint32_t trap_en		: 1;
>   			uint32_t is_aql_queue		: 1;
> -			uint32_t reserved		: 20;
> +			uint32_t skip_process_ctx_clear : 1;
> +			uint32_t reserved		: 19;
>   		};
>   		struct MES_API_STATUS		api_status;
>   		uint64_t                        tma_addr;
> @@ -523,6 +524,7 @@ enum MESAPI_MISC_OPCODE {
>   	MESAPI_MISC__QUERY_STATUS,
>   	MESAPI_MISC__READ_REG,
>   	MESAPI_MISC__WAIT_REG_MEM,
> +	MESAPI_MISC__SET_SHADER_DEBUGGER,
>   	MESAPI_MISC__MAX,
>   };
>   
> @@ -561,6 +563,20 @@ struct QUERY_STATUS {
>   	uint32_t context_id;
>   };
>   
> +struct SET_SHADER_DEBUGGER {
> +	uint64_t process_context_addr;
> +	union {
> +		struct {
> +			uint32_t single_memop : 1;  /* SQ_DEBUG.single_memop */
> +			uint32_t single_alu_op : 1; /* SQ_DEBUG.single_alu_op */
> +			uint32_t reserved : 30;
> +		};
> +		uint32_t u32all;
> +	} flags;
> +	uint32_t spi_gdbg_per_vmid_cntl;
> +	uint32_t tcp_watch_cntl[4]; /* TCP_WATCHx_CNTL */
> +};
> +
>   union MESAPI__MISC {
>   	struct {
>   		union MES_API_HEADER	header;
> @@ -573,6 +589,9 @@ union MESAPI__MISC {
>   			struct		QUERY_STATUS query_status;
>   			struct		READ_REG read_reg;
>   			struct          WAIT_REG_MEM wait_reg_mem;
> +			struct		SET_SHADER_DEBUGGER set_shader_debugger;
> +			enum MES_AMD_PRIORITY_LEVEL queue_sch_level;
> +
>   			uint32_t	data[MISC_DATA_MAX_SIZE_IN_DWORDS];
>   		};
>   	};


More information about the amd-gfx mailing list