<!DOCTYPE html><html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2023-12-15 10:59, James Zhu wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">From: David Yat Sin <a class="moz-txt-link-rfc2396E" href="mailto:David.YatSin@amd.com"><David.YatSin@amd.com></a>

We need the SPI_GDBG_PER_VMID_CNTL.TRAP_EN bit to be set during PC
Sampling so that the TTMP registers are valid inside the sampling data.
runtime_info.ttmp_setup will be cleared when the user application
does the AMDKFD_IOC_RUNTIME_ENABLE ioctl without
KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK flag on exit.

It is also not valid to have the debugger attached to a process while PC
sampling is enabled so adding some checks to prevent this.

Signed-off-by: David Yat Sin <a class="moz-txt-link-rfc2396E" href="mailto:David.YatSin@amd.com"><David.YatSin@amd.com></a>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c     | 31 ++++----------
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c       | 22 ++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_debug.h       |  3 ++
 drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c | 43 +++++++++++++++++---
 drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h |  4 +-
 5 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 1a3a8ded9c93..f7a8794c2bde 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1775,7 +1775,7 @@ static int kfd_ioctl_pc_sample(struct file *filep,
                        pr_debug("failed to bind process %p with gpu id 0x%x", p, args->gpu_id);
                        ret = -ESRCH;
                } else {
-                       ret = kfd_pc_sample(pdd, args);
+                       ret = kfd_pc_sample(p, pdd, args);
                }
        }
        mutex_unlock(&p->mutex);
@@ -2808,26 +2808,9 @@ static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
 
        p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
        p->runtime_info.r_debug = r_debug;
-       p->runtime_info.ttmp_setup = enable_ttmp_setup;
 
-       if (p->runtime_info.ttmp_setup) {
-               for (i = 0; i < p->n_pdds; i++) {
-                       struct kfd_process_device *pdd = p->pdds[i];
-
-                       if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
-                               amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
-                               pdd->dev->kfd2kgd->enable_debug_trap(
-                                               pdd->dev->adev,
-                                               true,
-                                               pdd->dev->vm_info.last_vmid_kfd);
-                       } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
-                               pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
-                                               pdd->dev->adev,
-                                               false,
-                                               0);
-                       }
-               }
-       }
+       if (enable_ttmp_setup)
+               kfd_dbg_enable_ttmp_setup(p);
 
 retry:
        if (p->debug_trap_enabled) {
@@ -2976,9 +2959,13 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
                goto out;
        }
 
-       /* Check if target is still PTRACED. */
        rcu_read_lock();
-       if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
+
+       if (kfd_pc_sampling_enabled(target)) {
+               pr_debug("Cannot enable debug trap on PID:%d because PC Sampling active\n", args->pid);
+               r = -EBUSY;
+       /* Check if target is still PTRACED. */
+       } else if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE
                                && ptrace_parent(target->lead_thread) != current) {
                pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid);
                r = -EPERM;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 9ec750666382..092c2dc84d24 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -1118,3 +1118,25 @@ void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
 
        mutex_unlock(&target->event_mutex);
 }
+
+void kfd_dbg_enable_ttmp_setup(struct kfd_process *p)
+{
+       int i;
+       p->runtime_info.ttmp_setup = true;
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+
+               if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
+                       amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+                       pdd->dev->kfd2kgd->enable_debug_trap(
+                                       pdd->dev->adev,
+                                       true,
+                                       pdd->dev->vm_info.last_vmid_kfd);
+               } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+                       pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+                                       pdd->dev->adev,
+                                       false,
+                                       0);
+               }
+       }
+}
\ No newline at end of file
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index fd0ff64d4184..d7ce0b119dd0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -90,6 +90,9 @@ int kfd_dbg_trap_device_snapshot(struct kfd_process *target,
 
 void kfd_dbg_set_enabled_debug_exception_mask(struct kfd_process *target,
                                        uint64_t exception_set_mask);
+
+void kfd_dbg_enable_ttmp_setup(struct kfd_process *p);
+
 /*
  * If GFX off is enabled, chips that do not support RLC restore for the debug
  * registers will disable GFX off temporarily for the entire debug session.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
index d8286aabd5a7..6870548fc42c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
@@ -24,6 +24,7 @@
 #include "kfd_priv.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_pc_sampling.h"
+#include "kfd_debug.h"
 #include "kfd_device_queue_manager.h"
 
 struct supported_pc_sample_info {
@@ -39,6 +40,19 @@ struct supported_pc_sample_info supported_formats[] = {
        { IP_VERSION(9, 4, 2), &sample_info_hosttrap_9_0_0 },
 };
 
+/* Checks whether PC Sampling is enabled on any devices in use by this process */
+bool kfd_pc_sampling_enabled(struct kfd_process *p) {</pre>
    </blockquote>
    <p>[JZ] why not add flag  <span style="white-space: pre-wrap">p->enabled</span>_trap_type
      eg<span style="white-space: pre-wrap"> 0x1. for debug,  0x2 for host trap 0x4 for stochastic. ...</span></p>
    <p><span style="white-space: pre-wrap"> ....</span> update <span style="white-space: pre-wrap">p-></span><span style="white-space: pre-wrap">enabled</span>_trap_type  after pcs
      creation successfully, clear when at the end of pcs destroy, then
      we needn't this function.</p>
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">+    int i;
+
+       for (i = 0; i < p->n_pdds; i++) {
+               struct kfd_process_device *pdd = p->pdds[i];
+
+               if (pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method)</pre>
    </blockquote>
    <span style="white-space: pre-wrap">
</span>
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">+                    return true;
+       }
+       return false;
+}
+
 static int kfd_pc_sample_thread(void *param)
 {
        struct amdgpu_device *adev;
@@ -99,13 +113,19 @@ static int kfd_pc_sample_thread_start(struct kfd_node *node)
        return ret;
 }
 
-static int kfd_pc_sample_query_cap(struct kfd_process_device *pdd,
+static int kfd_pc_sample_query_cap(struct kfd_process *p, struct kfd_process_device *pdd,
                                        struct kfd_ioctl_pc_sample_args __user *user_args)
 {
        uint64_t sample_offset;
        int num_method = 0;
        int i;
 
+       if (p->debug_trap_enabled) {</pre>
    </blockquote>
    [JZ] Move to kfd_pc_sample(...)<br>
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">+            pr_debug("Cannot have PC Sampling and debug trap simultaneously");
+               user_args->num_sample_info = 0;
+               return 0;
+       }
+
        for (i = 0; i < ARRAY_SIZE(supported_formats); i++)
                if (KFD_GC_VERSION(pdd->dev) == supported_formats[i].ip_version)
                        num_method++;
@@ -205,7 +225,7 @@ static int kfd_pc_sample_stop(struct kfd_process_device *pdd,
        return 0;
 }
 
-static int kfd_pc_sample_create(struct kfd_process_device *pdd,
+static int kfd_pc_sample_create(struct kfd_process *p, struct kfd_process_device *pdd,
                                        struct kfd_ioctl_pc_sample_args __user *user_args)
 {
        struct kfd_pc_sample_info *supported_format = NULL;
@@ -217,6 +237,11 @@ static int kfd_pc_sample_create(struct kfd_process_device *pdd,
        if (user_args->num_sample_info != 1)
                return -EINVAL;
 
+       if (p->debug_trap_enabled) {</pre>
    </blockquote>
    [JZ] Move to kfd_pc_sample(...)
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">+            pr_debug("Cannot have PC Sampling and debug trap simultaneously");
+               return -EBUSY;
+       }
+
        ret = copy_from_user(&user_info, (void __user *) user_args->sample_info_ptr,
                                sizeof(struct kfd_pc_sample_info));
        if (ret) {
@@ -275,6 +300,14 @@ static int kfd_pc_sample_create(struct kfd_process_device *pdd,
        pcs_entry->pdd = pdd;
        user_args->trace_id = (uint32_t)i;
 
+       /*
+        * Set SPI_GDBG_PER_VMID_CNTL.TRAP_EN so that TTMP registers are valid in the sampling data
+        * p->runtime_info.ttmp_setup will be cleared when user application calls runtime_disable</pre>
    </blockquote>
    [JZ] why don't put enable into <span style="white-space: pre-wrap">runtime_enable with the same code level.  </span>
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">
+        * on exit.
+        */
+       if (!p->runtime_info.ttmp_setup)</pre>
    </blockquote>
    [JZ] this check can be put into <span style="white-space: pre-wrap">kfd_dbg_enable_ttmp_setup</span>
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">+            kfd_dbg_enable_ttmp_setup(p);</pre>
    </blockquote>
    <blockquote type="cite" cite="mid:20231215155951.811884-24-James.Zhu@amd.com">
      <pre class="moz-quote-pre" wrap="">     pr_debug("alloc pcs_entry = %p, trace_id = 0x%x on gpu 0x%x", pcs_entry, i, pdd->dev->id);
 
        return 0;
@@ -321,7 +354,7 @@ void kfd_pc_sample_release(struct kfd_process_device *pdd)
        mutex_unlock(&pdd->dev->pcs_data.mutex);
 }
 
-int kfd_pc_sample(struct kfd_process_device *pdd,
+int kfd_pc_sample(struct kfd_process *p, struct kfd_process_device *pdd,
                                        struct kfd_ioctl_pc_sample_args __user *args)
 {
        struct pc_sampling_entry *pcs_entry;
@@ -344,10 +377,10 @@ int kfd_pc_sample(struct kfd_process_device *pdd,
 
        switch (args->op) {
        case KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES:
-               return kfd_pc_sample_query_cap(pdd, args);
+               return kfd_pc_sample_query_cap(p, pdd, args);
 
        case KFD_IOCTL_PCS_OP_CREATE:
-               return kfd_pc_sample_create(pdd, args);
+               return kfd_pc_sample_create(p, pdd, args);
 
        case KFD_IOCTL_PCS_OP_DESTROY:
                if (pcs_entry->enabled)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
index 6175563ca9be..42525feefb85 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.h
@@ -28,8 +28,10 @@
 #include "amdgpu.h"
 #include "kfd_priv.h"
 
-int kfd_pc_sample(struct kfd_process_device *pdd,
+int kfd_pc_sample(struct kfd_process *p, struct kfd_process_device *pdd,
                                        struct kfd_ioctl_pc_sample_args __user *args);
 void kfd_pc_sample_release(struct kfd_process_device *pdd);
 
+bool kfd_pc_sampling_enabled(struct kfd_process *p);
+
 #endif /* KFD_PC_SAMPLING_H_ */
</pre>
    </blockquote>
  </body>
</html>