[PATCH v3 23/24] drm/amdkfd: set debug trap bit when enabling PC Sampling

James Zhu jamesz at amd.com
Tue Jan 2 20:35:56 UTC 2024


On 2023-12-15 10:59, James Zhu wrote:
> From: David Yat Sin<David.YatSin at amd.com>
>
> We need the SPI_GDBG_PER_VMID_CNTL.TRAP_EN bit to be set during PC
> Sampling so that the TTMP registers are valid inside the sampling data.
> runtime_info.ttmp_setup will be cleared when the user application
> does the AMDKFD_IOC_RUNTIME_ENABLE ioctl without
> KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK flag on exit.
>
> It is also not valid to have the debugger attached to a process while PC
> sampling is enabled so adding some checks to prevent this.
>
> Signed-off-by: David Yat Sin<David.YatSin at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c     | 31 ++++----------
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.c       | 22 ++++++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.h       |  3 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c | 43 +++++++++++++++++---
>   drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h |  4 +-
>   5 files changed, 75 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 1a3a8ded9c93..f7a8794c2bde 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1775,7 +1775,7 @@ static int kfd_ioctl_pc_sample(struct file *filep,
>   			pr_debug("failed to bind process %p with gpu id 0x%x", p, args->gpu_id);
>   			ret = -ESRCH;
>   		} else {
> -			ret = kfd_pc_sample(pdd, args);
> +			ret = kfd_pc_sample(p, pdd, args);
>   		}
>   	}
>   	mutex_unlock(&p->mutex);
> @@ -2808,26 +2808,9 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
>   
>   	p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
>   	p->runtime_info.r_debug = r_debug;
> -	p->runtime_info.ttmp_setup = enable_ttmp_setup;
>   
> -	if (p->runtime_info.ttmp_setup) {
> -		for (i = 0; i < p->n_pdds; i++) {
> -			struct kfd_process_device *pdd = p->pdds[i];
> -
> -			if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
> -				amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> -				pdd->dev->kfd2kgd->enable_debug_trap(
> -						pdd->dev->adev,
> -						true,
> -						pdd->dev->vm_info.last_vmid_kfd);
> -			} else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
> -				pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
> -						pdd->dev->adev,
> -						false,
> -						0);
> -			}
> -		}
> -	}
> +	if (enable_ttmp_setup)
> +		kfd_dbg_enable_ttmp_setup(p);
>   
>   retry:
>   	if (p->debug_trap_enabled) {
> @@ -2976,9 +2959,13 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
>   		goto out;
>   	}
>   
> -	/* Check if target is still PTRACED. */
>   	rcu_read_lock();
> -	if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
> +
> +	if (kfd_pc_sampling_enabled(target)) {
> +		pr_debug("Cannot enable debug trap on PID:%d because PC Sampling active\n", args->pid);
> +		r = -EBUSY;
> +	/* Check if target is still PTRACED. */
> +	} else if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
>   				&& ptrace_parent(target->lead_thread) != current) {
>   		pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);
>   		r = -EPERM;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 9ec750666382..092c2dc84d24 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -1118,3 +1118,25 @@ void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
>   
>   	mutex_unlock(&target->event_mutex);
>   }
> +
> +void kfd_dbg_enable_ttmp_setup(struct kfd_process *p)
> +{
> +	int i;
> +	p->runtime_info.ttmp_setup = true;
> +	for (i = 0; i < p->n_pdds; i++) {
> +		struct kfd_process_device *pdd = p->pdds[i];
> +
> +		if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
> +			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> +			pdd->dev->kfd2kgd->enable_debug_trap(
> +					pdd->dev->adev,
> +					true,
> +					pdd->dev->vm_info.last_vmid_kfd);
> +		} else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
> +			pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
> +					pdd->dev->adev,
> +					false,
> +					0);
> +		}
> +	}
> +}
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index fd0ff64d4184..d7ce0b119dd0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -90,6 +90,9 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
>   
>   void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
>   					uint64_t exception_set_mask);
> +
> +void kfd_dbg_enable_ttmp_setup(struct kfd_process *p);
> +
>   /*
>    * If GFX off is enabled, chips that do not support RLC restore for the debug
>    * registers will disable GFX off temporarily for the entire debug session.
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> index d8286aabd5a7..6870548fc42c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> @@ -24,6 +24,7 @@
>   #include "kfd_priv.h"
>   #include "amdgpu_amdkfd.h"
>   #include "kfd_pc_sampling.h"
> +#include "kfd_debug.h"
>   #include "kfd_device_queue_manager.h"
>   
>   struct supported_pc_sample_info {
> @@ -39,6 +40,19 @@ struct supported_pc_sample_info supported_formats[] = {
>   	{ IP_VERSION(9, 4, 2), &sample_info_hosttrap_9_0_0 },
>   };
>   
> +/* Checks whether PC Sampling is enabled on any devices in use by this process */
> +bool kfd_pc_sampling_enabled(struct kfd_process *p) {

[JZ] why not add flag p->enabled_trap_type eg0x1. for debug, 0x2 for 
host trap 0x4 for stochastic. ...

.... update p->enabled_trap_type  after pcs creation successfully, clear 
when at the end of pcs destroy, then we needn't this function.

> +	int i;
> +
> +	for (i = 0; i < p->n_pdds; i++) {
> +		struct kfd_process_device *pdd = p->pdds[i];
> +
> +		if (pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method)
> +			return true;
> +	}
> +	return false;
> +}
> +
>   static int kfd_pc_sample_thread(void *param)
>   {
>   	struct amdgpu_device *adev;
> @@ -99,13 +113,19 @@ static int kfd_pc_sample_thread_start(struct kfd_node *node)
>   	return ret;
>   }
>   
> -static int kfd_pc_sample_query_cap(struct kfd_process_device *pdd,
> +static int kfd_pc_sample_query_cap(struct kfd_process *p, struct kfd_process_device *pdd,
>   					struct kfd_ioctl_pc_sample_args __user *user_args)
>   {
>   	uint64_t sample_offset;
>   	int num_method = 0;
>   	int i;
>   
> +	if (p->debug_trap_enabled) {
[JZ] Move to kfd_pc_sample(...)
> +		pr_debug("Cannot have PC Sampling and debug trap simultaneously");
> +		user_args->num_sample_info = 0;
> +		return 0;
> +	}
> +
>   	for (i = 0; i < ARRAY_SIZE(supported_formats); i++)
>   		if (KFD_GC_VERSION(pdd->dev) == supported_formats[i].ip_version)
>   			num_method++;
> @@ -205,7 +225,7 @@ static int kfd_pc_sample_stop(struct kfd_process_device *pdd,
>   	return 0;
>   }
>   
> -static int kfd_pc_sample_create(struct kfd_process_device *pdd,
> +static int kfd_pc_sample_create(struct kfd_process *p, struct kfd_process_device *pdd,
>   					struct kfd_ioctl_pc_sample_args __user *user_args)
>   {
>   	struct kfd_pc_sample_info *supported_format = NULL;
> @@ -217,6 +237,11 @@ static int kfd_pc_sample_create(struct kfd_process_device *pdd,
>   	if (user_args->num_sample_info != 1)
>   		return -EINVAL;
>   
> +	if (p->debug_trap_enabled) {
[JZ] Move to kfd_pc_sample(...)
> +		pr_debug("Cannot have PC Sampling and debug trap simultaneously");
> +		return -EBUSY;
> +	}
> +
>   	ret = copy_from_user(&user_info, (void __user *) user_args->sample_info_ptr,
>   				sizeof(struct kfd_pc_sample_info));
>   	if (ret) {
> @@ -275,6 +300,14 @@ static int kfd_pc_sample_create(struct kfd_process_device *pdd,
>   	pcs_entry->pdd = pdd;
>   	user_args->trace_id = (uint32_t)i;
>   
> +	/*
> +	 * Set SPI_GDBG_PER_VMID_CNTL.TRAP_EN so that TTMP registers are valid in the sampling data
> +	 * p->runtime_info.ttmp_setup will be cleared when user application calls runtime_disable
[JZ] why don't put enable into runtime_enable with the same code level.
> +	 * on exit.
> +	 */
> +	if (!p->runtime_info.ttmp_setup)
[JZ] this check can be put into kfd_dbg_enable_ttmp_setup
> +		kfd_dbg_enable_ttmp_setup(p);
>   	pr_debug("alloc pcs_entry = %p, trace_id = 0x%x on gpu 0x%x", pcs_entry, i, pdd->dev->id);
>   
>   	return 0;
> @@ -321,7 +354,7 @@ void kfd_pc_sample_release(struct kfd_process_device *pdd)
>   	mutex_unlock(&pdd->dev->pcs_data.mutex);
>   }
>   
> -int kfd_pc_sample(struct kfd_process_device *pdd,
> +int kfd_pc_sample(struct kfd_process *p, struct kfd_process_device *pdd,
>   					struct kfd_ioctl_pc_sample_args __user *args)
>   {
>   	struct pc_sampling_entry *pcs_entry;
> @@ -344,10 +377,10 @@ int kfd_pc_sample(struct kfd_process_device *pdd,
>   
>   	switch (args->op) {
>   	case KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES:
> -		return kfd_pc_sample_query_cap(pdd, args);
> +		return kfd_pc_sample_query_cap(p, pdd, args);
>   
>   	case KFD_IOCTL_PCS_OP_CREATE:
> -		return kfd_pc_sample_create(pdd, args);
> +		return kfd_pc_sample_create(p, pdd, args);
>   
>   	case KFD_IOCTL_PCS_OP_DESTROY:
>   		if (pcs_entry->enabled)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> index 6175563ca9be..42525feefb85 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> @@ -28,8 +28,10 @@
>   #include "amdgpu.h"
>   #include "kfd_priv.h"
>   
> -int kfd_pc_sample(struct kfd_process_device *pdd,
> +int kfd_pc_sample(struct kfd_process *p, struct kfd_process_device *pdd,
>   					struct kfd_ioctl_pc_sample_args __user *args);
>   void kfd_pc_sample_release(struct kfd_process_device *pdd);
>   
> +bool kfd_pc_sampling_enabled(struct kfd_process *p);
> +
>   #endif /* KFD_PC_SAMPLING_H_ */
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20240102/84e82a87/attachment-0001.htm>


More information about the amd-gfx mailing list