[PATCH v3 23/24] drm/amdkfd: set debug trap bit when enabling PC Sampling
James Zhu
jamesz at amd.com
Tue Jan 2 20:35:56 UTC 2024
On 2023-12-15 10:59, James Zhu wrote:
> From: David Yat Sin<David.YatSin at amd.com>
>
> We need the SPI_GDBG_PER_VMID_CNTL.TRAP_EN bit to be set during PC
> Sampling so that the TTMP registers are valid inside the sampling data.
> runtime_info.ttmp_setup will be cleared when the user application
> does the AMDKFD_IOC_RUNTIME_ENABLE ioctl without
> KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK flag on exit.
>
> It is also not valid to have the debugger attached to a process while PC
> sampling is enabled so adding some checks to prevent this.
>
> Signed-off-by: David Yat Sin<David.YatSin at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 31 ++++----------
> drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 22 ++++++++++
> drivers/gpu/drm/amd/amdkfd/kfd_debug.h | 3 ++
> drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c | 43 +++++++++++++++++---
> drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h | 4 +-
> 5 files changed, 75 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 1a3a8ded9c93..f7a8794c2bde 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1775,7 +1775,7 @@ static int kfd_ioctl_pc_sample(struct file *filep,
> pr_debug("failed to bind process %p with gpu id 0x%x", p, args->gpu_id);
> ret = -ESRCH;
> } else {
> - ret = kfd_pc_sample(pdd, args);
> + ret = kfd_pc_sample(p, pdd, args);
> }
> }
> mutex_unlock(&p->mutex);
> @@ -2808,26 +2808,9 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
>
> p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
> p->runtime_info.r_debug = r_debug;
> - p->runtime_info.ttmp_setup = enable_ttmp_setup;
>
> - if (p->runtime_info.ttmp_setup) {
> - for (i = 0; i < p->n_pdds; i++) {
> - struct kfd_process_device *pdd = p->pdds[i];
> -
> - if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
> - amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> - pdd->dev->kfd2kgd->enable_debug_trap(
> - pdd->dev->adev,
> - true,
> - pdd->dev->vm_info.last_vmid_kfd);
> - } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
> - pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
> - pdd->dev->adev,
> - false,
> - 0);
> - }
> - }
> - }
> + if (enable_ttmp_setup)
> + kfd_dbg_enable_ttmp_setup(p);
>
> retry:
> if (p->debug_trap_enabled) {
> @@ -2976,9 +2959,13 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
> goto out;
> }
>
> - /* Check if target is still PTRACED. */
> rcu_read_lock();
> - if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
> +
> + if (kfd_pc_sampling_enabled(target)) {
> + pr_debug("Cannot enable debug trap on PID:%d because PC Sampling active\n", args->pid);
> + r = -EBUSY;
> + /* Check if target is still PTRACED. */
> + } else if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
> && ptrace_parent(target->lead_thread) != current) {
> pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);
> r = -EPERM;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 9ec750666382..092c2dc84d24 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -1118,3 +1118,25 @@ void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
>
> mutex_unlock(&target->event_mutex);
> }
> +
> +void kfd_dbg_enable_ttmp_setup(struct kfd_process *p)
> +{
> + int i;
> + p->runtime_info.ttmp_setup = true;
> + for (i = 0; i < p->n_pdds; i++) {
> + struct kfd_process_device *pdd = p->pdds[i];
> +
> + if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
> + amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> + pdd->dev->kfd2kgd->enable_debug_trap(
> + pdd->dev->adev,
> + true,
> + pdd->dev->vm_info.last_vmid_kfd);
> + } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
> + pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
> + pdd->dev->adev,
> + false,
> + 0);
> + }
> + }
> +}
> \ No newline at end of file
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index fd0ff64d4184..d7ce0b119dd0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -90,6 +90,9 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
>
> void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
> uint64_t exception_set_mask);
> +
> +void kfd_dbg_enable_ttmp_setup(struct kfd_process *p);
> +
> /*
> * If GFX off is enabled, chips that do not support RLC restore for the debug
> * registers will disable GFX off temporarily for the entire debug session.
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> index d8286aabd5a7..6870548fc42c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
> @@ -24,6 +24,7 @@
> #include "kfd_priv.h"
> #include "amdgpu_amdkfd.h"
> #include "kfd_pc_sampling.h"
> +#include "kfd_debug.h"
> #include "kfd_device_queue_manager.h"
>
> struct supported_pc_sample_info {
> @@ -39,6 +40,19 @@ struct supported_pc_sample_info supported_formats[] = {
> { IP_VERSION(9, 4, 2), &sample_info_hosttrap_9_0_0 },
> };
>
> +/* Checks whether PC Sampling is enabled on any devices in use by this process */
> +bool kfd_pc_sampling_enabled(struct kfd_process *p) {
[JZ] why not add flag p->enabled_trap_type eg0x1. for debug, 0x2 for
host trap 0x4 for stochastic. ...
.... update p->enabled_trap_type after pcs creation successfully, clear
when at the end of pcs destroy, then we needn't this function.
> + int i;
> +
> + for (i = 0; i < p->n_pdds; i++) {
> + struct kfd_process_device *pdd = p->pdds[i];
> +
> + if (pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method)
> + return true;
> + }
> + return false;
> +}
> +
> static int kfd_pc_sample_thread(void *param)
> {
> struct amdgpu_device *adev;
> @@ -99,13 +113,19 @@ static int kfd_pc_sample_thread_start(struct kfd_node *node)
> return ret;
> }
>
> -static int kfd_pc_sample_query_cap(struct kfd_process_device *pdd,
> +static int kfd_pc_sample_query_cap(struct kfd_process *p, struct kfd_process_device *pdd,
> struct kfd_ioctl_pc_sample_args __user *user_args)
> {
> uint64_t sample_offset;
> int num_method = 0;
> int i;
>
> + if (p->debug_trap_enabled) {
[JZ] Move to kfd_pc_sample(...)
> + pr_debug("Cannot have PC Sampling and debug trap simultaneously");
> + user_args->num_sample_info = 0;
> + return 0;
> + }
> +
> for (i = 0; i < ARRAY_SIZE(supported_formats); i++)
> if (KFD_GC_VERSION(pdd->dev) == supported_formats[i].ip_version)
> num_method++;
> @@ -205,7 +225,7 @@ static int kfd_pc_sample_stop(struct kfd_process_device *pdd,
> return 0;
> }
>
> -static int kfd_pc_sample_create(struct kfd_process_device *pdd,
> +static int kfd_pc_sample_create(struct kfd_process *p, struct kfd_process_device *pdd,
> struct kfd_ioctl_pc_sample_args __user *user_args)
> {
> struct kfd_pc_sample_info *supported_format = NULL;
> @@ -217,6 +237,11 @@ static int kfd_pc_sample_create(struct kfd_process_device *pdd,
> if (user_args->num_sample_info != 1)
> return -EINVAL;
>
> + if (p->debug_trap_enabled) {
[JZ] Move to kfd_pc_sample(...)
> + pr_debug("Cannot have PC Sampling and debug trap simultaneously");
> + return -EBUSY;
> + }
> +
> ret = copy_from_user(&user_info, (void __user *) user_args->sample_info_ptr,
> sizeof(struct kfd_pc_sample_info));
> if (ret) {
> @@ -275,6 +300,14 @@ static int kfd_pc_sample_create(struct kfd_process_device *pdd,
> pcs_entry->pdd = pdd;
> user_args->trace_id = (uint32_t)i;
>
> + /*
> + * Set SPI_GDBG_PER_VMID_CNTL.TRAP_EN so that TTMP registers are valid in the sampling data
> + * p->runtime_info.ttmp_setup will be cleared when user application calls runtime_disable
[JZ] why don't put enable into runtime_enable with the same code level.
> + * on exit.
> + */
> + if (!p->runtime_info.ttmp_setup)
[JZ] this check can be put into kfd_dbg_enable_ttmp_setup
> + kfd_dbg_enable_ttmp_setup(p);
> pr_debug("alloc pcs_entry = %p, trace_id = 0x%x on gpu 0x%x", pcs_entry, i, pdd->dev->id);
>
> return 0;
> @@ -321,7 +354,7 @@ void kfd_pc_sample_release(struct kfd_process_device *pdd)
> mutex_unlock(&pdd->dev->pcs_data.mutex);
> }
>
> -int kfd_pc_sample(struct kfd_process_device *pdd,
> +int kfd_pc_sample(struct kfd_process *p, struct kfd_process_device *pdd,
> struct kfd_ioctl_pc_sample_args __user *args)
> {
> struct pc_sampling_entry *pcs_entry;
> @@ -344,10 +377,10 @@ int kfd_pc_sample(struct kfd_process_device *pdd,
>
> switch (args->op) {
> case KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES:
> - return kfd_pc_sample_query_cap(pdd, args);
> + return kfd_pc_sample_query_cap(p, pdd, args);
>
> case KFD_IOCTL_PCS_OP_CREATE:
> - return kfd_pc_sample_create(pdd, args);
> + return kfd_pc_sample_create(p, pdd, args);
>
> case KFD_IOCTL_PCS_OP_DESTROY:
> if (pcs_entry->enabled)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> index 6175563ca9be..42525feefb85 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
> @@ -28,8 +28,10 @@
> #include "amdgpu.h"
> #include "kfd_priv.h"
>
> -int kfd_pc_sample(struct kfd_process_device *pdd,
> +int kfd_pc_sample(struct kfd_process *p, struct kfd_process_device *pdd,
> struct kfd_ioctl_pc_sample_args __user *args);
> void kfd_pc_sample_release(struct kfd_process_device *pdd);
>
> +bool kfd_pc_sampling_enabled(struct kfd_process *p);
> +
> #endif /* KFD_PC_SAMPLING_H_ */
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20240102/84e82a87/attachment-0001.htm>
More information about the amd-gfx
mailing list