<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0078D7;margin:15pt;" align="Left">
[AMD Official Use Only - Internal Distribution Only]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
I spotted two cosmetic issues when browsing it. see them inline [yz].</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Yong</div>
<div id="appendonsend"></div>
<div style="font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
<br>
</div>
<hr tabindex="-1" style="display:inline-block; width:98%">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" color="#000000" style="font-size:11pt"><b>From:</b> amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Gang Ba <gaba@amd.com><br>
<b>Sent:</b> Tuesday, April 28, 2020 9:58 AM<br>
<b>To:</b> amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Subject:</b> [PATCH] drm/amd: add Streaming Performance Monitor feature</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt">
<div class="PlainText">Signed-off-by: Gang Ba <gaba@amperd.com><br>
Change-Id: If83ee0a14ef834699de6733eeab0f140159bbd6e<br>
---<br>
 drivers/gpu/drm/amd/amdgpu/Makefile                |   3 +-<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h         |  10 +<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c | 165 ++++++<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h            |  27 +<br>
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c             | 175 +++++++<br>
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c              | 176 +++++++<br>
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c              | 168 ++++++<br>
 drivers/gpu/drm/amd/amdkfd/Makefile                |   3 +-<br>
 drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c   |  10 +-<br>
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           |  17 +<br>
 drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c    |  11 +-<br>
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  20 +<br>
 drivers/gpu/drm/amd/amdkfd/kfd_process.c           |   1 +<br>
 drivers/gpu/drm/amd/amdkfd/kfd_spm.c               | 577 +++++++++++++++++++++<br>
 drivers/gpu/drm/amd/amdkfd/soc15_int.h             |   1 +<br>
 include/uapi/linux/kfd_ioctl.h                     |  55 +-<br>
 16 files changed, 1413 insertions(+), 6 deletions(-)<br>
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c<br>
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_spm.c<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile<br>
index 210d57a..1498b18 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/Makefile<br>
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile<br>
@@ -177,7 +177,8 @@ amdgpu-y += \<br>
          amdgpu_amdkfd_gfx_v8.o \<br>
          amdgpu_amdkfd_gfx_v9.o \<br>
          amdgpu_amdkfd_arcturus.o \<br>
-        amdgpu_amdkfd_gfx_v10.o<br>
+        amdgpu_amdkfd_gfx_v10.o \<br>
+        amdgpu_amdkfd_rlc_spm.o \<br>
 <br>
 ifneq ($(CONFIG_DRM_AMDGPU_CIK),)<br>
 amdgpu-y += amdgpu_amdkfd_gfx_v7.o<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h<br>
index d065c50..fdc438a 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h<br>
@@ -246,6 +246,16 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);<br>
 int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,<br>
                                 struct tile_config *config);<br>
 <br>
+void amdgpu_amdkfd_rlc_spm_cntl(struct kgd_dev *kgd, bool cntl);<br>
+int amdgpu_amdkfd_rlc_spm(struct kgd_dev *kgd, void *args);<br>
+void amdgpu_amdkfd_rlc_spm_acquire(struct kgd_dev *kgd, u64 gpu_addr, u32 size);<br>
+int amdgpu_amdkfd_rlc_spm_release(struct kgd_dev *kgd);<br>
+u32 amdgpu_amdkfd_rlc_spm_get_rdptr(struct kgd_dev *kgd);<br>
+void amdgpu_amdkfd_rlc_spm_set_rdptr(struct kgd_dev *kgd, u32 rptr);<br>
+u32 amdgpu_amdkfd_rlc_spm_get_wrptr(struct kgd_dev *kgd);<br>
+void amdgpu_amdkfd_rlc_spm_set_wrptr(struct kgd_dev *kgd, u32 wptr);<br>
+int amdgpu_amdkfd_rlc_spm_set_reg(struct kgd_dev *kgd, u64 uReg, u32 value);<br>
+<br>
 /* KGD2KFD callbacks */<br>
 int kgd2kfd_init(void);<br>
 void kgd2kfd_exit(void);<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c<br>
new file mode 100644<br>
index 0000000..b492c1e<br>
--- /dev/null<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_rlc_spm.c<br>
@@ -0,0 +1,165 @@<br>
+/*<br>
+ * Copyright 2014-2020 Advanced Micro Devices, Inc.<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice shall be included in<br>
+ * all copies or substantial portions of the Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL<br>
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR<br>
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,<br>
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR<br>
+ * OTHER DEALINGS IN THE SOFTWARE.<br>
+ */<br>
+<br>
+#include <linux/dma-buf.h><br>
+#include <linux/list.h><br>
+#include <linux/pagemap.h><br>
+#include <linux/sched/mm.h><br>
+#include <linux/sched/task.h><br>
+<br>
+#include "amdgpu_object.h"<br>
+#include "amdgpu_vm.h"<br>
+#include "amdgpu_amdkfd.h"<br>
+#include <uapi/linux/kfd_ioctl.h><br>
+<br>
+<br>
+<br>
+void amdgpu_amdkfd_rlc_spm_cntl(struct kgd_dev *kgd, bool cntl)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        cntl == true ? adev->gfx.spm.spmf->start(adev):adev->gfx.spm.spmf->stop(adev);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+}<br>
+<br>
+u32 amdgpu_amdkfd_rlc_spm_get_rdptr(struct kgd_dev *kgd)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+        u32 rptr = 0;<br>
+<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        rptr = adev->gfx.spm.spmf->get_rdptr(adev);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+<br>
+        return rptr;<br>
+}<br>
+<br>
+void amdgpu_amdkfd_rlc_spm_set_rdptr(struct kgd_dev *kgd, u32 rptr)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        adev->gfx.spm.spmf->set_rdptr(adev, rptr);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+<br>
+}<br>
+<br>
+u32 amdgpu_amdkfd_rlc_spm_get_wrptr(struct kgd_dev *kgd)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+        u32 wptr;<br>
+<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        wptr = adev->gfx.spm.spmf->get_wrptr(adev);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+        return wptr;<br>
+}<br>
+<br>
+void amdgpu_amdkfd_rlc_spm_set_wrptr(struct kgd_dev *kgd, u32 wptr)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        adev->gfx.spm.spmf->set_wrptr(adev, wptr);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+}<br>
+<br>
+void amdgpu_amdkfd_rlc_spm_acquire(struct kgd_dev *kgd, u64 gpu_addr, u32 size)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+        /* init spm vmid with 0xf */<br>
+        if (adev->gfx.rlc.funcs->update_spm_vmid)<br>
+                adev->gfx.rlc.funcs->update_spm_vmid(adev, 0x0);<br>
+<br>
+        /* set spm ring registers */<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        adev->gfx.spm.spmf->set_spm_porfmon_ring_buf(adev, gpu_addr, size);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+}<br>
+<br>
+<br>
+int amdgpu_amdkfd_rlc_spm_release(struct kgd_dev *kgd)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+<br>
+        /* stop spm stream and interupt */<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        adev->gfx.spm.spmf->stop(adev);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+<br>
+        amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0);<br>
+<br>
+        /* revert spm vmid with 0xf */<br>
+        if (adev->gfx.rlc.funcs->update_spm_vmid)<br>
+             adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);<br>
+<br>
+        return 0;<br>
+}<br>
+<br>
+int amdgpu_amdkfd_rlc_spm_set_reg(struct kgd_dev *kgd, u64 uReg, u32 value)<br>
+{<br>
+        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;<br>
+        struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+        struct amdgpu_spm *spm = &adev->gfx.spm;<br>
+<br>
+        pr_debug("[%s]\n", __func__);<br>
+<br>
+        /* stop spm stream and interupt */<br>
+        spin_lock(&adev->gfx.kiq.ring_lock);<br>
+        amdgpu_ring_alloc(kiq_ring, spm->spmf->get_spm_data_size);<br>
+        adev->gfx.spm.spmf->set_reg(adev, uReg, value);<br>
+        amdgpu_ring_commit(kiq_ring);<br>
+        spin_unlock(&adev->gfx.kiq.ring_lock);<br>
+<br>
+        return 0;<br>
+}<br>
+<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
index ee698f0..ba4da52 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h<br>
@@ -105,6 +105,31 @@ struct amdgpu_kiq {<br>
         const struct kiq_pm4_funcs *pmf;<br>
 };<br>
 <br>
+struct spm_funcs {<br>
+       void (*start)(struct amdgpu_device *adev);<br>
+       void (*stop)(struct amdgpu_device *adev);<br>
+       void (*set_reg)(struct amdgpu_device *adev, uint64_t reg, uint32_t val);<br>
+       u32 (*get_rdptr)(struct amdgpu_device *adev);<br>
+       void (*set_rdptr)(struct amdgpu_device *adev, u32 rptr);<br>
+       u32 (*get_wrptr)(struct amdgpu_device *adev);<br>
+       void (*set_wrptr)(struct amdgpu_device *adev, u32 wptr);<br>
+       void (*set_spm_porfmon_ring_buf)(struct amdgpu_device *adev, u64 gpu_rptr, u32 size);<br>
+<br>
+       /* Packet sizes */<br>
+       int set_spm_config_size;<br>
+       int get_spm_data_size;<br>
+};<br>
+<br>
+struct amdgpu_spm {<br>
+       const struct spm_funcs *spmf;<br>
+       u64              spm_gpu_addr;<br>
+       u32              spm_gpu_size;<br>
+       u32              spm_ring_rptr;<br>
+       u32              spm_ring_rptrsize_to_read;<br>
+       struct amdgpu_bo *spm_obj;<br>
+       void             *spm_cpu_addr;<br>
+};<br>
+<br>
 /*<br>
  * GPU scratch registers structures, functions & helpers<br>
  */<br>
@@ -256,6 +281,7 @@ struct amdgpu_gfx {<br>
         struct amdgpu_me                me;<br>
         struct amdgpu_mec               mec;<br>
         struct amdgpu_kiq               kiq;<br>
+       struct amdgpu_spm               spm;<br>
         struct amdgpu_scratch           scratch;<br>
         const struct firmware           *me_fw; /* ME firmware */<br>
         uint32_t                        me_fw_version;<br>
@@ -292,6 +318,7 @@ struct amdgpu_gfx {<br>
         struct amdgpu_irq_src           priv_reg_irq;<br>
         struct amdgpu_irq_src           priv_inst_irq;<br>
         struct amdgpu_irq_src           cp_ecc_error_irq;<br>
+       struct amdgpu_irq_src           spm_irq;<br>
         struct amdgpu_irq_src           sq_irq;<br>
         struct sq_work                  sq_work;<br>
 <br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
index 63ac430..9c507d2 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
@@ -4121,6 +4121,13 @@ static int gfx_v10_0_sw_init(void *handle)<br>
         if (r)<br>
                 return r;<br>
 <br>
+       /* KIQ SPM */<br>
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_RLC,<br>
+                             GFX_10_1__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT,<br>
+                             &adev->gfx.spm_irq);<br>
+       if (r)<br>
+               return r;<br>
+<br>
         /* EOP Event */<br>
         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,<br>
                               GFX_10_1__SRCID__CP_EOP_INTERRUPT,<br>
@@ -6603,6 +6610,7 @@ static int gfx_v10_0_hw_fini(void *handle)<br>
 <br>
         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);<br>
         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);<br>
+       amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0);<br>
 #ifndef BRING_UP_DEBUG<br>
         if (amdgpu_async_gfx_ring) {<br>
                 r = gfx_v10_0_kiq_disable_kgq(adev);<br>
@@ -6768,6 +6776,126 @@ static void gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,<br>
                                     (1 << (oa_size + oa_base)) - (1 << oa_base));<br>
 }<br>
 <br>
+static void gfx_v10_0_spm_start(struct amdgpu_device *adev)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t data = 0;<br>
+       u8 se;<br>
+<br>
+       for (se = 0; se < adev->gfx.config.max_shader_engines + 1; ++se)<br>
+       {<br>
+               uint32_t mux_addr_reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_ADDR);<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       INSTANCE_BROADCAST_WRITES, 1);<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       SA_BROADCAST_WRITES, 1);<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       SE_BROADCAST_WRITES, 1);<br>
+<br>
+               if (se < adev->gfx.config.max_shader_engines) // SE else GB<br>
+               {<br>
+                       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se);<br>
+                       mux_addr_reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);<br>
+               }<br>
+               gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);<br>
+               gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, mux_addr_reg, 0);<br>
+       }<br>
+<br>
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, 0);<br>
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);<br>
+<br>
+       data = 0;<br>
+       data |= CP_PERFMON_STATE_DISABLE_AND_RESET <<4 ;<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+<br>
+       data = 0;<br>
+       data |= STRM_PERFMON_STATE_START_COUNTING <<4 ;<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_INT_CNTL) ,1);<br>
+}<br>
+<br>
+static void gfx_v10_0_spm_stop(struct amdgpu_device *adev)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t data = 0;<br>
+<br>
+       data = CP_PERFMON_STATE_STOP_COUNTING;<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+<br>
+       data |= (1<<10);<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_INT_CNTL), 0);<br>
+}<br>
+<br>
+static u32 gfx_v10_0_spm_get_rdptr(struct amdgpu_device *adev)<br>
+{<br>
+       return RREG32_SOC15(GC, 0, mmRLC_SPM_RING_RDPTR);;<br>
+}<br>
+<br>
+static void gfx_v10_0_spm_set_rdptr(struct amdgpu_device *adev,  u32 rptr)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), rptr);<br>
+}<br>
+<br>
+static u32 gfx_v10_0_spm_get_wrptr(struct amdgpu_device *adev)<br>
+{<br>
+       return RREG32_SOC15(GC, 0, mmRLC_SPM_RING_WRPTR);<br>
+}<br>
+<br>
+static void gfx_v10_0_spm_set_wrptr(struct amdgpu_device *adev,  u32 wptr)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_WRPTR), wptr);<br>
+}<br>
+<br>
+static void gfx_v10_0_set_spm_porfmon_ring_buf(struct amdgpu_device *adev, u64 gpu_addr, u32 size)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0,<br>
+                       mmRLC_SPM_PERFMON_RING_BASE_LO), lower_32_bits(gpu_addr));<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false,<br>
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_HI), upper_32_bits (gpu_addr));<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false,<br>
+                       SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE), size);<br>
+}<br>
+<br>
+static void gfx_v10_0_spm_set_reg(struct amdgpu_device *adev, u64 reg, u32 value)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t grbm_cntl;<br>
+       grbm_cntl = adev->reg_offset[GC_HWIP][0][1] + reg;<br>
+<br>
+       gfx_v10_0_write_data_to_reg(kiq_ring, 0, false, grbm_cntl, value);<br>
+}<br>
+<br>
+static const struct spm_funcs gfx_v10_0_spm_funcs = {<br>
+       .start = gfx_v10_0_spm_start,<br>
+       .stop = gfx_v10_0_spm_stop,<br>
+       .get_rdptr= gfx_v10_0_spm_get_rdptr,<br>
+       .set_rdptr= gfx_v10_0_spm_set_rdptr,<br>
+       .get_wrptr= gfx_v10_0_spm_get_wrptr,<br>
+       .set_wrptr= gfx_v10_0_spm_set_wrptr,<br>
+       .set_spm_porfmon_ring_buf = gfx_v10_0_set_spm_porfmon_ring_buf,<br>
+       .set_reg = gfx_v10_0_spm_set_reg,<br>
+       .set_spm_config_size = 3,<br>
+       .get_spm_data_size = 128,<br>
+};<br>
+<br>
+static void gfx_v10_0_set_spm_funcs(struct amdgpu_device *adev)<br>
+{<br>
+       adev->gfx.spm.spmf = &gfx_v10_0_spm_funcs;<br>
+}<br>
+<br>
+<br>
 static int gfx_v10_0_early_init(void *handle)<br>
 {<br>
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;<br>
@@ -6776,6 +6904,7 @@ static int gfx_v10_0_early_init(void *handle)<br>
 <br>
         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;<br>
 <br>
+       gfx_v10_0_set_spm_funcs(adev);<br>
         gfx_v10_0_set_kiq_pm4_funcs(adev);<br>
         gfx_v10_0_set_ring_funcs(adev);<br>
         gfx_v10_0_set_irq_funcs(adev);<br>
@@ -6794,6 +6923,10 @@ static int gfx_v10_0_late_init(void *handle)<br>
         if (r)<br>
                 return r;<br>
 <br>
+        r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0);<br>
+        if (r)<br>
+               return r;<br>
+<br>
         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);<br>
         if (r)<br>
                 return r;<br>
@@ -6860,6 +6993,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade<br>
                 if (def != data)<br>
                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);<br>
 <br>
+<br>
                 /* MGLS is a global flag to control all MGLS in GFX */<br>
                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {<br>
                         /* 2 - RLC memory Light sleep */<br>
@@ -8018,6 +8152,39 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)<br>
         amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */<br>
 }<br>
 <br>
+static int gfx_v10_0_spm_set_interrupt_state(struct amdgpu_device *adev,<br>
+                                            struct amdgpu_irq_src *src,<br>
+                                            unsigned int type,<br>
+                                            enum amdgpu_interrupt_state state)<br>
+{<br>
+       switch (state) {<br>
+       case AMDGPU_IRQ_STATE_DISABLE:<br>
+               WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 0);<br>
+               break;<br>
+       case AMDGPU_IRQ_STATE_ENABLE:<br>
+               WREG32_SOC15(GC, 0, mmRLC_SPM_INT_CNTL, 1);<br>
+               break;<br>
+       default:<br>
+               break;<br>
+       }<br>
+       return 0;<br>
+}<br>
+<br>
+static int gfx_v10_0_spm_irq(struct amdgpu_device *adev,<br>
+                            struct amdgpu_irq_src *source,<br>
+                            struct amdgpu_iv_entry *entry)<br>
+{<br>
+       u8 me_id, pipe_id, queue_id;<br>
+<br>
+       me_id = (entry->ring_id & 0x0c) >> 2;<br>
+       pipe_id = (entry->ring_id & 0x03) >> 0;<br>
+       queue_id = (entry->ring_id & 0x70) >> 4;<br>
+       pr_debug ("IH: RLC_RPM_INT, me:%d, pipe:%d, queue:%d\n",<br>
+                                         me_id, pipe_id, queue_id);<br>
+       return  0; /* This prevents sending it to KFD */<br>
+}<br>
+<br>
+<br>
 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {<br>
         .name = "gfx_v10_0",<br>
         .early_init = gfx_v10_0_early_init,<br>
@@ -8189,6 +8356,11 @@ static const struct amdgpu_irq_src_funcs gfx_v10_0_kiq_irq_funcs = {<br>
         .process = gfx_v10_0_kiq_irq,<br>
 };<br>
 <br>
+static const struct amdgpu_irq_src_funcs gfx_v10_0_spm_irq_funcs = {<br>
+    .set = gfx_v10_0_spm_set_interrupt_state,<br>
+    .process = gfx_v10_0_spm_irq,<br>
+};<br>
+<br>
 static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)<br>
 {<br>
         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;<br>
@@ -8197,6 +8369,9 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev)<br>
         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;<br>
         adev->gfx.kiq.irq.funcs = &gfx_v10_0_kiq_irq_funcs;<br>
 <br>
+       adev->gfx.spm_irq.num_types = 1;<br>
+       adev->gfx.spm_irq.funcs = &gfx_v10_0_spm_irq_funcs;<br>
+<br>
         adev->gfx.priv_reg_irq.num_types = 1;<br>
         adev->gfx.priv_reg_irq.funcs = &gfx_v10_0_priv_reg_irq_funcs;<br>
 <br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
index 14790f8..1125b91 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
@@ -1955,6 +1955,12 @@ static int gfx_v8_0_sw_init(void *handle)<br>
         adev->gfx.mec.num_pipe_per_mec = 4;<br>
         adev->gfx.mec.num_queue_per_pipe = 8;<br>
 <br>
+       /* KIQ SPM */<br>
+       r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_RLC_STRM_PERF_MONITOR,<br>
+                             &adev->gfx.spm_irq);<br>
+       if (r)<br>
+        return r;<br>
+<br>
         /* EOP Event */<br>
         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);<br>
         if (r)<br>
@@ -4927,6 +4933,7 @@ static int gfx_v8_0_hw_fini(void *handle)<br>
         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);<br>
 <br>
         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);<br>
+       amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0);<br>
 <br>
         /* disable KCQ to avoid CPC touch memory not valid anymore */<br>
         gfx_v8_0_kcq_disable(adev);<br>
@@ -5291,6 +5298,126 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {<br>
         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q<br>
 };<br>
 <br>
+static void gfx_v8_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,<br>
+                                      bool wc, uint32_t reg, uint32_t val)<br>
+{<br>
+       amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));<br>
+       amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |<br>
+                               WRITE_DATA_DST_SEL(0) |<br>
+                               (wc ? WR_CONFIRM : 0));<br>
+       amdgpu_ring_write(ring, reg);<br>
+       amdgpu_ring_write(ring, 0);<br>
+       amdgpu_ring_write(ring, val);<br>
+}<br>
+<br>
+static void gfx_v8_0_spm_start(struct amdgpu_device *adev)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t data = 0;<br>
+       u8 se;<br>
+<br>
+       for (se = 0; se < adev->gfx.config.max_shader_engines + 1; ++se)<br>
+       {<br>
+               uint32_t mux_addr_reg = mmRLC_SPM_GLOBAL_MUXSEL_ADDR;<br>
+<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       INSTANCE_BROADCAST_WRITES, 1);<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       SH_BROADCAST_WRITES, 1);<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       SE_BROADCAST_WRITES, 1);<br>
+<br>
+               if (se < adev->gfx.config.max_shader_engines) // SE else GB<br>
+               {<br>
+                       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se);<br>
+<br>
+                       mux_addr_reg = mmRLC_SPM_SE_MUXSEL_ADDR;<br>
+               }<br>
+               gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmGRBM_GFX_INDEX, data);<br>
+               // init addr<br>
+               gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mux_addr_reg, data);<br>
+       }<br>
+<br>
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, 0);<br>
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmGRBM_GFX_INDEX, data);<br>
+<br>
+       data = 0;<br>
+       data |= CP_PERFMON_STATE_DISABLE_AND_RESET <<4 ;<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, data);<br>
+<br>
+       data = 0;<br>
+       data |= STRM_PERFMON_STATE_START_COUNTING <<4 ;<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL,       data);</div>
<div class="PlainText">[yz] unnecessary spaces here<br>
+<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_INT_CNTL ,1);<br>
+}<br>
+<br>
+static void gfx_v8_0_spm_stop(struct amdgpu_device *adev)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t data = 0;<br>
+<br>
+       data = CP_PERFMON_STATE_STOP_COUNTING;<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, data);<br>
+       data |= (1<<10);<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmCP_PERFMON_CNTL, data);<br>
+<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_INT_CNTL, 0);<br>
+}<br>
+<br>
+<br>
+static u32 gfx_v8_0_spm_get_rdptr(struct amdgpu_device *adev)<br>
+{<br>
+       return RREG32 (mmRLC_SPM_RING_RDPTR);<br>
+}<br>
+<br>
+static void gfx_v8_0_spm_set_rdptr(struct amdgpu_device *adev,  u32 rptr)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_RING_RDPTR, rptr);<br>
+}<br>
+<br>
+static u32 gfx_v8_0_spm_get_wrptr(struct amdgpu_device *adev)<br>
+{<br>
+       return  -1;<br>
+}<br>
+<br>
+static void gfx_v8_0_set_spm_porfmon_ring_buf(struct amdgpu_device *adev, u64 gpu_addr, u32 size)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_PERFMON_RING_BASE_LO, lower_32_bits(gpu_addr));<br>
+<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_PERFMON_RING_BASE_HI, upper_32_bits (gpu_addr));<br>
+<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, mmRLC_SPM_PERFMON_RING_SIZE, size);<br>
+}<br>
+<br>
+static void gfx_v8_0_spm_set_reg(struct amdgpu_device *adev, u64 reg, u32 value)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       gfx_v8_0_write_data_to_reg(kiq_ring, 0, false, reg, value);<br>
+}<br>
+<br>
+static const struct spm_funcs gfx_v8_0_spm_funcs = {<br>
+       .start = gfx_v8_0_spm_start,<br>
+       .stop = gfx_v8_0_spm_stop,<br>
+       .get_rdptr= gfx_v8_0_spm_get_rdptr,<br>
+       .set_rdptr= gfx_v8_0_spm_set_rdptr,<br>
+       .get_wrptr= gfx_v8_0_spm_get_wrptr,<br>
+       .set_spm_porfmon_ring_buf = gfx_v8_0_set_spm_porfmon_ring_buf,<br>
+       .set_reg = gfx_v8_0_spm_set_reg,<br>
+       .set_spm_config_size = 3,<br>
+       .get_spm_data_size = 128,<br>
+};<br>
+<br>
+static void gfx_v8_0_set_spm_funcs(struct amdgpu_device *adev)<br>
+{<br>
+       adev->gfx.spm.spmf = &gfx_v8_0_spm_funcs;<br>
+}<br>
+<br>
 static int gfx_v8_0_early_init(void *handle)<br>
 {<br>
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;<br>
@@ -5298,6 +5425,8 @@ static int gfx_v8_0_early_init(void *handle)<br>
         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;<br>
         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;<br>
         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;<br>
+<br>
+       gfx_v8_0_set_spm_funcs(adev);<br>
         gfx_v8_0_set_ring_funcs(adev);<br>
         gfx_v8_0_set_irq_funcs(adev);<br>
         gfx_v8_0_set_gds_init(adev);<br>
@@ -5338,6 +5467,10 @@ static int gfx_v8_0_late_init(void *handle)<br>
                 return r;<br>
         }<br>
 <br>
+       r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0);<br>
+       if (r)<br>
+               return r;<br>
+<br>
         return 0;<br>
 }<br>
 <br>
@@ -6845,6 +6978,41 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)<br>
         amdgpu_ring_write(ring, 0x0000000A);    /* poll interval */<br>
 }<br>
 <br>
+static int gfx_v8_0_spm_set_interrupt_state(struct amdgpu_device *adev,<br>
+                                            struct amdgpu_irq_src *src,<br>
+                                            unsigned int type,<br>
+                                            enum amdgpu_interrupt_state state)<br>
+{<br>
+       switch (state) {<br>
+       case AMDGPU_IRQ_STATE_DISABLE:<br>
+               WREG32(mmRLC_SPM_INT_CNTL, 0);<br>
+               break;<br>
+       case AMDGPU_IRQ_STATE_ENABLE:<br>
+               WREG32(mmRLC_SPM_INT_CNTL, 1);<br>
+               break;<br>
+       default:<br>
+               break;<br>
+       }<br>
+       return 0;<br>
+}<br>
+<br>
+static int gfx_v8_0_spm_irq(struct amdgpu_device *adev,<br>
+                            struct amdgpu_irq_src *source,<br>
+                            struct amdgpu_iv_entry *entry)<br>
+{<br>
+       u8 me_id, pipe_id, queue_id;<br>
+       struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);<br>
+<br>
+       me_id = (entry->ring_id & 0x0c) >> 2;<br>
+       pipe_id = (entry->ring_id & 0x03) >> 0;<br>
+       queue_id = (entry->ring_id & 0x70) >> 4;<br>
+       pr_debug("IH: RLC_RPM_INT, me:%d, pipe:%d, queue:%d\n",<br>
+                       me_id, pipe_id, queue_id);<br>
+<br>
+       amdgpu_fence_process(ring);<br>
+       return 0;<br>
+}<br>
+<br>
 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {<br>
         .name = "gfx_v8_0",<br>
         .early_init = gfx_v8_0_early_init,<br>
@@ -7005,11 +7173,19 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {<br>
         .process = gfx_v8_0_sq_irq,<br>
 };<br>
 <br>
+static const struct amdgpu_irq_src_funcs gfx_v8_0_spm_irq_funcs = {<br>
+       .set = gfx_v8_0_spm_set_interrupt_state,<br>
+       .process = gfx_v8_0_spm_irq,<br>
+};<br>
+<br>
 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)<br>
 {<br>
         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;<br>
         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;<br>
 <br>
+       adev->gfx.spm_irq.num_types = 1;<br>
+       adev->gfx.spm_irq.funcs = &gfx_v8_0_spm_irq_funcs;<br>
+<br>
         adev->gfx.priv_reg_irq.num_types = 1;<br>
         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;<br>
 <br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
index 2767c6d..bfde274 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
@@ -2249,6 +2249,13 @@ static int gfx_v9_0_sw_init(void *handle)<br>
         adev->gfx.mec.num_pipe_per_mec = 4;<br>
         adev->gfx.mec.num_queue_per_pipe = 8;<br>
 <br>
+       /* KIQ SPM */<br>
+       r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_RLC,<br>
+                             GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT,<br>
+                             &adev->gfx.spm_irq);<br>
+       if (r)<br>
+        return r;<br>
+<br>
         /* EOP Event */<br>
         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);<br>
         if (r)<br>
@@ -3907,6 +3914,7 @@ static int gfx_v9_0_hw_fini(void *handle)<br>
 <br>
         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);<br>
         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);<br>
+       amdgpu_irq_put(adev, &adev->gfx.spm_irq, 0);<br>
         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);<br>
 <br>
         /* DF freeze and kcq disable will fail */<br>
@@ -4617,6 +4625,121 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)<br>
         return r;<br>
 }<br>
 <br>
+static void gfx_v9_0_spm_start(struct amdgpu_device *adev)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t data = 0;<br>
+       u8 se;<br>
+<br>
+       for (se = 0; se < adev->gfx.config.max_shader_engines + 1; ++se)<br>
+       {<br>
+               uint32_t mux_addr_reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_GLOBAL_MUXSEL_ADDR);<br>
+<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       INSTANCE_BROADCAST_WRITES, 1);<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       SH_BROADCAST_WRITES, 1);<br>
+               data = REG_SET_FIELD(data, GRBM_GFX_INDEX,<br>
+                       SE_BROADCAST_WRITES, 1);<br>
+<br>
+               if (se < adev->gfx.config.max_shader_engines) // SE else GB<br>
+               {<br>
+                       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se);<br>
+<br>
+                       mux_addr_reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_SE_MUXSEL_ADDR);<br>
+               }<br>
+               gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);<br>
+               // init addr<br>
+               gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, mux_addr_reg, data);<br>
+       }<br>
+<br>
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, 0);<br>
+       data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);<br>
+<br>
+       data = 0;<br>
+       data |= CP_PERFMON_STATE_DISABLE_AND_RESET <<4 ;<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+<br>
+       data = 0;<br>
+       data |= STRM_PERFMON_STATE_START_COUNTING <<4 ;<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_INT_CNTL) ,1);<br>
+}<br>
+<br>
+<br>
+static void gfx_v9_0_spm_stop(struct amdgpu_device *adev)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t data = 0;<br>
+<br>
+<br>
+       data = CP_PERFMON_STATE_STOP_COUNTING;<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+       data |= (1<<10);<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL), data);<br>
+<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_INT_CNTL), 0);<br>
+}<br>
+<br>
+static u32 gfx_v9_0_spm_get_rdptr(struct amdgpu_device *adev)<br>
+{<br>
+       return RREG32_SOC15(GC, 0, mmRLC_SPM_RING_RDPTR);;<br>
+}<br>
+<br>
+static void gfx_v9_0_spm_set_rdptr(struct amdgpu_device *adev,  u32 rptr)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_RING_RDPTR), rptr);<br>
+}<br>
+<br>
+static u32 gfx_v9_0_spm_get_wrptr(struct amdgpu_device *adev)<br>
+{<br>
+       return -1;<br>
+}<br>
+<br>
+static void gfx_v9_0_set_spm_porfmon_ring_buf(struct amdgpu_device *adev, u64 gpu_addr, u32 size)</div>
<div class="PlainText">[yz] <span style="font-family: "Segoe UI", "Segoe UI Web (West European)", "Segoe UI", -apple-system, BlinkMacSystemFont, Roboto, "Helvetica Neue", sans-serif; background-color: rgb(255, 255, 255); display: inline !important">porfmon
 -> perfmon</span><br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, SOC15_REG_OFFSET(GC, 0,<br>
+                               mmRLC_SPM_PERFMON_RING_BASE_LO), lower_32_bits(gpu_addr));<br>
+<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,<br>
+                               SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_RING_BASE_HI), upper_32_bits (gpu_addr));<br>
+<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false,<br>
+                               SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_PERFMON_RING_SIZE), size);<br>
+}<br>
+<br>
+static void gfx_v9_0_spm_set_reg(struct amdgpu_device *adev, u64 reg, u32 value)<br>
+{<br>
+       struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;<br>
+       uint32_t grbm_cntl;<br>
+       grbm_cntl = adev->reg_offset[GC_HWIP][0][1] + reg;<br>
+<br>
+       gfx_v9_0_write_data_to_reg(kiq_ring, 0, false, grbm_cntl, value);<br>
+}<br>
+<br>
+static const struct spm_funcs gfx_v9_0_spm_funcs = {<br>
+       .start = gfx_v9_0_spm_start,<br>
+       .stop = gfx_v9_0_spm_stop,<br>
+       .get_rdptr= gfx_v9_0_spm_get_rdptr,<br>
+       .set_rdptr= gfx_v9_0_spm_set_rdptr,<br>
+       .get_wrptr= gfx_v9_0_spm_get_wrptr,<br>
+       .set_spm_porfmon_ring_buf = gfx_v9_0_set_spm_porfmon_ring_buf,<br>
+       .set_reg = gfx_v9_0_spm_set_reg,<br>
+       .set_spm_config_size = 3,<br>
+       .get_spm_data_size = 128,<br>
+};<br>
+<br>
+static void gfx_v9_0_set_spm_funcs(struct amdgpu_device *adev)<br>
+{<br>
+       adev->gfx.spm.spmf = &gfx_v9_0_spm_funcs;<br>
+}<br>
+<br>
 static int gfx_v9_0_early_init(void *handle)<br>
 {<br>
         struct amdgpu_device *adev = (struct amdgpu_device *)handle;<br>
@@ -4626,6 +4749,7 @@ static int gfx_v9_0_early_init(void *handle)<br>
         else<br>
                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;<br>
         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;<br>
+       gfx_v9_0_set_spm_funcs(adev);<br>
         gfx_v9_0_set_kiq_pm4_funcs(adev);<br>
         gfx_v9_0_set_ring_funcs(adev);<br>
         gfx_v9_0_set_irq_funcs(adev);<br>
@@ -4677,6 +4801,10 @@ static int gfx_v9_0_late_init(void *handle)<br>
         if (r)<br>
                 return r;<br>
 <br>
+       r = amdgpu_irq_get(adev, &adev->gfx.spm_irq, 0);<br>
+       if (r)<br>
+               return r;<br>
+<br>
         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);<br>
         if (r)<br>
                 return r;<br>
@@ -6657,6 +6785,39 @@ static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)<br>
         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */<br>
 }<br>
 <br>
+static int gfx_v9_0_spm_set_interrupt_state(struct amdgpu_device *adev,<br>
+                                            struct amdgpu_irq_src *src,<br>
+                                            unsigned int type,<br>
+                                            enum amdgpu_interrupt_state state)<br>
+{<br>
+       switch (state) {<br>
+       case AMDGPU_IRQ_STATE_DISABLE:<br>
+               WREG32(mmRLC_SPM_INT_CNTL, 0);<br>
+               break;<br>
+       case AMDGPU_IRQ_STATE_ENABLE:<br>
+               WREG32(mmRLC_SPM_INT_CNTL, 1);<br>
+               break;<br>
+       default:<br>
+               break;<br>
+       }<br>
+       return 0;<br>
+}<br>
+<br>
+static int gfx_v9_0_spm_irq(struct amdgpu_device *adev,<br>
+                            struct amdgpu_irq_src *source,<br>
+                            struct amdgpu_iv_entry *entry)<br>
+{<br>
+       u8 me_id, pipe_id, queue_id;<br>
+<br>
+       me_id = (entry->ring_id & 0x0c) >> 2;<br>
+       pipe_id = (entry->ring_id & 0x03) >> 0;<br>
+       queue_id = (entry->ring_id & 0x70) >> 4;<br>
+       pr_debug("IH: RLC_RPM_INT, me:%d, pipe:%d, queue:%d\n",<br>
+                       me_id, pipe_id, queue_id);<br>
+<br>
+       return 0; /* This also prevents sending it to KFD */<br>
+}<br>
+<br>
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {<br>
         .name = "gfx_v9_0",<br>
         .early_init = gfx_v9_0_early_init,<br>
@@ -6825,12 +6986,19 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {<br>
         .process = amdgpu_gfx_cp_ecc_error_irq,<br>
 };<br>
 <br>
+static const struct amdgpu_irq_src_funcs gfx_v9_0_spm_irq_funcs = {<br>
+       .set = gfx_v9_0_spm_set_interrupt_state,<br>
+       .process = gfx_v9_0_spm_irq,<br>
+};<br>
 <br>
 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)<br>
 {<br>
         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;<br>
         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;<br>
 <br>
+       adev->gfx.spm_irq.num_types = 1;<br>
+       adev->gfx.spm_irq.funcs = &gfx_v9_0_spm_irq_funcs;<br>
+<br>
         adev->gfx.priv_reg_irq.num_types = 1;<br>
         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;<br>
 <br>
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile<br>
index 6147462..43edba0 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/Makefile<br>
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile<br>
@@ -53,7 +53,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \<br>
                 $(AMDKFD_PATH)/kfd_int_process_v9.o \<br>
                 $(AMDKFD_PATH)/kfd_dbgdev.o \<br>
                 $(AMDKFD_PATH)/kfd_dbgmgr.o \<br>
-               $(AMDKFD_PATH)/kfd_crat.o<br>
+               $(AMDKFD_PATH)/kfd_crat.o       \<br>
+               $(AMDKFD_PATH)/kfd_spm.o<br>
 <br>
 ifneq ($(CONFIG_AMD_IOMMU_V2),)<br>
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c<br>
index 9f59ba9..cd394cd 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c<br>
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c<br>
@@ -24,6 +24,7 @@<br>
 #include "kfd_events.h"<br>
 #include "cik_int.h"<br>
 #include "amdgpu_amdkfd.h"<br>
+#include "ivsrcid/ivsrcid_vislands30.h"<br>
 <br>
 static bool cik_event_interrupt_isr(struct kfd_dev *dev,<br>
                                         const uint32_t *ih_ring_entry,<br>
@@ -37,6 +38,11 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,<br>
         uint16_t pasid;<br>
         bool ret;<br>
 <br>
+       vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);<br>
+<br>
+       if ((ihre->source_id == VISLANDS30_IV_SRCID_RLC_STRM_PERF_MONITOR) && (vmid == 0))<br>
+                return (kfd_spm_interrupt_isr (dev, ihre->source_id, ihre->source_id));<br>
+<br>
         /* This workaround is due to HW/FW limitation on Hawaii that<br>
          * VMID and PASID are not written into ih_ring_entry<br>
          */<br>
@@ -49,7 +55,6 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,<br>
                 *patched_flag = true;<br>
                 *tmp_ihre = *ihre;<br>
 <br>
-               vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);<br>
                 ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid);<br>
 <br>
                 tmp_ihre->ring_id &= 0x000000ff;<br>
@@ -95,6 +100,9 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,<br>
         if (pasid == 0)<br>
                 return;<br>
 <br>
+       if ((ihre->source_id == VISLANDS30_IV_SRCID_RLC_STRM_PERF_MONITOR) && (vmid == 0))<br>
+               kfd_spm_interrupt_wq(dev, ihre->source_id);<br>
+<br>
         if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)<br>
                 kfd_signal_event_interrupt(pasid, context_id, 28);<br>
         else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP)<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c<br>
index f8fa03a..bfc83beb 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c<br>
@@ -1732,6 +1732,20 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,<br>
         return r;<br>
 }<br>
 <br>
+static int kfd_ioctl_rlc_spm(struct file *filep,<br>
+                                  struct kfd_process *p, void *data)<br>
+{<br>
+       struct kfd_ioctl_spm_args *args = data;<br>
+       int err;<br>
+<br>
+       err = kfd_rlc_spm(p,<br>
+                       (void __user *)args,<br>
+                        args->buf_size,<br>
+                        args->op);<br>
+<br>
+       return err;<br>
+}<br>
+<br>
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \<br>
         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \<br>
                             .cmd_drv = 0, .name = #ioctl}<br>
@@ -1827,6 +1841,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {<br>
 <br>
         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,<br>
                         kfd_ioctl_alloc_queue_gws, 0),<br>
+<br>
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_RLC_SPM,<br>
+                       kfd_ioctl_rlc_spm, 0),<br>
 };<br>
 <br>
 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c<br>
index e05d75e..481f0ae 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c<br>
@@ -24,6 +24,7 @@<br>
 #include "kfd_events.h"<br>
 #include "soc15_int.h"<br>
 #include "kfd_device_queue_manager.h"<br>
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"<br>
 <br>
 static bool event_interrupt_isr_v9(struct kfd_dev *dev,<br>
                                         const uint32_t *ih_ring_entry,<br>
@@ -35,12 +36,15 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,<br>
 <br>
         /* Only handle interrupts from KFD VMIDs */<br>
         vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);<br>
+       source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);<br>
+       client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);<br>
+       if ((source_id == GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT) && (vmid == 0))<br>
+                return (kfd_spm_interrupt_isr (dev, source_id, client_id));<br>
+<br>
         if (vmid < dev->vm_info.first_vmid_kfd ||<br>
             vmid > dev->vm_info.last_vmid_kfd)<br>
                 return 0;<br>
 <br>
-       source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);<br>
-       client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);<br>
         pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);<br>
 <br>
         /* This is a known issue for gfx9. Under non HWS, pasid is not set<br>
@@ -95,6 +99,9 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,<br>
         vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);<br>
         context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);<br>
 <br>
+       if ((source_id == GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT) && (vmid == 0))<br>
+               kfd_spm_interrupt_wq(dev, source_id);<br>
+<br>
         if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)<br>
                 kfd_signal_event_interrupt(pasid, context_id, 32);<br>
         else if (source_id == SOC15_INTSRC_SDMA_TRAP)<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h<br>
index 43b888b..707d672 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h<br>
@@ -309,6 +309,9 @@ struct kfd_dev {<br>
 <br>
         /* Global GWS resource shared b/t processes*/<br>
         void *gws;<br>
+<br>
+       /*spm process id */<br>
+       unsigned int spm_pasid;<br>
 };<br>
 <br>
 enum kfd_mempool {<br>
@@ -740,6 +743,13 @@ struct kfd_process {<br>
         struct kobject *kobj;<br>
         struct kobject *kobj_queues;<br>
         struct attribute attr_pasid;<br>
+       /* spm data */<br>
+       struct kfd_spm_cntr *spm_cntr;<br>
+       bool is_spm_acquired;<br>
+       /* Work items for tranfer data to user */<br>
+       struct delayed_work copy_to_user_work;<br>
+       /* spm-related data */<br>
+       struct mutex spm_mutex;<br>
 };<br>
 <br>
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */<br>
@@ -1045,10 +1055,20 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);<br>
 <br>
 bool kfd_is_locked(void);<br>
 <br>
+void kfd_spm_init_process(struct kfd_process *p);<br>
+int kfd_rlc_spm(struct kfd_process *p,  void __user *data,<br>
+                      uint32_t buf_size, __u32 op);<br>
+<br>
 /* Compute profile */<br>
 void kfd_inc_compute_active(struct kfd_dev *dev);<br>
 void kfd_dec_compute_active(struct kfd_dev *dev);<br>
 <br>
+/* spm interrupt */<br>
+bool kfd_spm_interrupt_isr(struct kfd_dev *dev,<br>
+                                       uint16_t source_id,     uint32_t client_id);<br>
+void kfd_spm_interrupt_wq(struct kfd_dev *dev, uint16_t source_id);<br>
+<br>
+<br>
 /* Cgroup Support */<br>
 /* Check with device cgroup if @kfd device is accessible */<br>
 static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c<br>
index fe0cd49..338868d 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c<br>
@@ -746,6 +746,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)<br>
         INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);<br>
         process->last_restore_timestamp = get_jiffies_64();<br>
         kfd_event_init_process(process);<br>
+       kfd_spm_init_process(process);<br>
         process->is_32bit_user_mode = in_compat_syscall();<br>
 <br>
         process->pasid = kfd_pasid_alloc();<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_spm.c b/drivers/gpu/drm/amd/amdkfd/kfd_spm.c<br>
new file mode 100644<br>
index 0000000..773e2ee<br>
--- /dev/null<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_spm.c<br>
@@ -0,0 +1,577 @@<br>
+/*<br>
+ * Copyright 2020 Advanced Micro Devices, Inc.<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice shall be included in<br>
+ * all copies or substantial portions of the Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL<br>
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR<br>
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,<br>
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR<br>
+ * OTHER DEALINGS IN THE SOFTWARE.<br>
+ */<br>
+<br>
+#include <linux/device.h><br>
+#include "kfd_priv.h"<br>
+#include "amdgpu_amdkfd.h"<br>
+#include "soc15_int.h"<br>
+#include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"<br>
+#include "ivsrcid/ivsrcid_vislands30.h"<br>
+#include <linux/delay.h><br>
+<br>
+#define SAMPLING_MAX 4<br>
+<br>
+struct user_buf {<br>
+        uint64_t __user *user_addr;<br>
+        u32 ubufsize;<br>
+<br>
+};<br>
+<br>
+struct kfd_spm_cntr {<br>
+        struct kgd_dev *kgd;<br>
+        u64    spm_gpu_addr;<br>
+        u32    spm_ring_buf_size;<br>
+        u32    spm_ring_buf_mark;<br>
+           u32     spm_ring_rptr;<br>
+        u32    spm_ring_wptr;<br>
+        u32    spm_ring_size_copied;<br>
+        void   *spm_obj;<br>
+        u32    *spm_cpu_addr;<br>
+        struct user_buf ubuf [SAMPLING_MAX];<br>
+        u32    buf_count;<br>
+        bool   is_uesr_buf_filled;<br>
+        bool   is_nowait;<br>
+        struct task_struct *thread;<br>
+        bool   thread_ready;<br>
+        char   *name;<br>
+        wait_queue_head_t spm_wq;<br>
+        int    wanted_cluster;<br>
+        bool   is_timeout;<br>
+};<br>
+<br>
+static int kfd_spm_data_cocy (struct kfd_spm_cntr * spm_cntr, u32 size_to_copy)<br>
+{<br>
+        u32 user_buf_space_left;<br>
+        int ret = 0;<br>
+        u32 bufSize;<br>
+        uint64_t __user * user_address;<br>
+        uint64_t * ring_buf;<br>
+        <br>
+        pr_debug("[%s]\n", __func__);<br>
+<br>
+        bufSize = spm_cntr->ubuf[0].ubufsize;<br>
+        user_address = (uint64_t*)((uint64_t)spm_cntr->ubuf[0].user_addr + spm_cntr->spm_ring_size_copied);<br>
+        ring_buf =  (uint64_t*)((uint64_t)spm_cntr->spm_cpu_addr + spm_cntr->spm_ring_rptr);<br>
+<br>
+        if (user_address == NULL)<br>
+               return -EFAULT;<br>
+<br>
+        user_buf_space_left = bufSize - spm_cntr->spm_ring_size_copied;<br>
+<br>
+        if (size_to_copy <= user_buf_space_left) {<br>
+               ret = copy_to_user(user_address, ring_buf, size_to_copy);<br>
+                 if (ret) {<br>
+                 pr_err("copy_to_user failed, ret = %x\n", ret);<br>
+                       return -EFAULT;<br>
+                }<br>
+               spm_cntr->spm_ring_size_copied += size_to_copy;<br>
+       } else {<br>
+               size_to_copy = spm_cntr->spm_ring_buf_size - spm_cntr->spm_ring_size_copied;<br>
+               ret = copy_to_user(user_address, ring_buf, user_buf_space_left);<br>
+               if (ret)<br>
+                       return -EFAULT;<br>
+<br>
+               spm_cntr->spm_ring_size_copied = bufSize;<br>
+               spm_cntr->is_uesr_buf_filled = true;<br>
+       }<br>
+<br>
+       return ret;<br>
+}<br>
+<br>
+static int kfd_spm_reag_ring_buf_polling (struct kfd_spm_cntr * spm_cntr, long timeout)<br>
+{<br>
+       u32 size_to_copy;<br>
+       int ret = 0;<br>
+<br>
+       pr_info("[%s]\n", __func__);<br>
+<br>
+       while (spm_cntr->is_uesr_buf_filled != true){<br>
+               spm_cntr->spm_ring_rptr = amdgpu_amdkfd_rlc_spm_get_rdptr(spm_cntr->kgd) & spm_cntr->spm_ring_buf_mark;<br>
+#if 1<br>
+               spm_cntr->spm_ring_wptr = amdgpu_amdkfd_rlc_spm_get_wrptr(spm_cntr->kgd) & spm_cntr->spm_ring_buf_mark;<br>
+#else<br>
+               spm_cntr->spm_ring_wptr = spm_cntr->spm_cpu_addr[0] & spm_cntr->spm_ring_buf_mark;<br>
+#endif<br>
+<br>
+               if ((spm_cntr->spm_ring_rptr >= 0) &&  (spm_cntr->spm_ring_rptr  < 0x20))<br>
+                               spm_cntr->spm_ring_rptr = 0x20;<br>
+<br>
+               if (spm_cntr->is_uesr_buf_filled == true)<br>
+                       goto exit;<br>
+<br>
+               if (spm_cntr->spm_ring_wptr > spm_cntr->spm_ring_rptr) {<br>
+                       size_to_copy = spm_cntr->spm_ring_wptr - spm_cntr->spm_ring_rptr;<br>
+                       ret = kfd_spm_data_cocy(spm_cntr, size_to_copy);<br>
+               } else if (spm_cntr->spm_ring_wptr < spm_cntr->spm_ring_rptr) {<br>
+                       size_to_copy = spm_cntr->spm_ring_buf_size - spm_cntr->spm_ring_rptr;<br>
+                       ret = kfd_spm_data_cocy(spm_cntr, size_to_copy);<br>
+<br>
+                       /* correct counter start point */<br>
+                       spm_cntr->spm_ring_rptr = 0x20;<br>
+                       size_to_copy = spm_cntr->spm_ring_wptr;<br>
+                       ret = kfd_spm_data_cocy(spm_cntr, size_to_copy);<br>
+               }<br>
+<br>
+               if (!ret) {<br>
+                       if (spm_cntr->is_uesr_buf_filled == true) {<br>
+                               /* stop */<br>
+                               amdgpu_amdkfd_rlc_spm_cntl(spm_cntr->kgd, 0);<br>
+                               amdgpu_amdkfd_rlc_spm_set_rdptr(spm_cntr->kgd, 0);<br>
+#if 0<br>
+                               amdgpu_amdkfd_rlc_spm_set_wrptr(spm_cntr->kgd, 0);<br>
+#else<br>
+                               spm_cntr->spm_cpu_addr[0]= 0;<br>
+#endif<br>
+                               return ret;<br>
+                       } else<br>
+                               amdgpu_amdkfd_rlc_spm_set_rdptr(spm_cntr->kgd, spm_cntr->spm_ring_wptr);<br>
+<br>
+                       if (spm_cntr->is_timeout == true) {<br>
+                               /* stop */<br>
+                               amdgpu_amdkfd_rlc_spm_cntl(spm_cntr->kgd, 0);<br>
+                               amdgpu_amdkfd_rlc_spm_set_rdptr(spm_cntr->kgd, 0);<br>
+#if 0<br>
+                               amdgpu_amdkfd_rlc_spm_set_wrptr(spm_cntr->kgd, 0);<br>
+#else<br>
+                               spm_cntr->spm_cpu_addr[0]= 0;<br>
+#endif<br>
+                               break;<br>
+                       }<br>
+               }<br>
+       }<br>
+exit:<br>
+       return ret;<br>
+}<br>
+<br>
+static int kfd_spm_reag_ring_buf (struct kfd_spm_cntr * spm_cntr)<br>
+{<br>
+       u32 size_to_copy;<br>
+       int ret = 0;<br>
+<br>
+       pr_debug("[%s]\n", __func__);<br>
+<br>
+       spm_cntr->spm_ring_rptr = amdgpu_amdkfd_rlc_spm_get_rdptr(spm_cntr->kgd) & spm_cntr->spm_ring_buf_mark;<br>
+#if 1<br>
+       spm_cntr->spm_ring_wptr = amdgpu_amdkfd_rlc_spm_get_wrptr(spm_cntr->kgd) & spm_cntr->spm_ring_buf_mark;<br>
+#else<br>
+       spm_cntr->spm_ring_wptr = spm_cntr->spm_cpu_addr[0] & spm_cntr->spm_ring_buf_mark;<br>
+#endif<br>
+       if ((spm_cntr->spm_ring_rptr >= 0) &&  (spm_cntr->spm_ring_rptr  < 0x20)) {<br>
+               spm_cntr->spm_ring_rptr = 0x20;<br>
+       }<br>
+<br>
+       if (spm_cntr->is_uesr_buf_filled == true)<br>
+               goto exit;<br>
+<br>
+       if (spm_cntr->spm_ring_wptr > spm_cntr->spm_ring_rptr) {<br>
+               size_to_copy = spm_cntr->spm_ring_wptr - spm_cntr->spm_ring_rptr;<br>
+               ret = kfd_spm_data_cocy(spm_cntr, size_to_copy);<br>
+<br>
+       } else if (spm_cntr->spm_ring_wptr < spm_cntr->spm_ring_rptr) {<br>
+               size_to_copy = spm_cntr->spm_ring_buf_size - spm_cntr->spm_ring_rptr;<br>
+               ret = kfd_spm_data_cocy(spm_cntr, size_to_copy);<br>
+<br>
+               spm_cntr->spm_ring_rptr = 0x20;<br>
+               size_to_copy = spm_cntr->spm_ring_wptr;<br>
+               ret = kfd_spm_data_cocy(spm_cntr, size_to_copy);<br>
+       }<br>
+               if (!ret) {<br>
+                       if (spm_cntr->is_uesr_buf_filled == true) {<br>
+                               /* stop */<br>
+                               amdgpu_amdkfd_rlc_spm_cntl(spm_cntr->kgd, 0);<br>
+                               amdgpu_amdkfd_rlc_spm_set_rdptr(spm_cntr->kgd, 0);<br>
+#if 0<br>
+                               amdgpu_amdkfd_rlc_spm_set_wrptr(spm_cntr->kgd, 0);<br>
+#else<br>
+                               spm_cntr->spm_cpu_addr[0]= 0;<br>
+#endif<br>
+                               return ret;<br>
+                       } else<br>
+                               amdgpu_amdkfd_rlc_spm_set_rdptr(spm_cntr->kgd, spm_cntr->spm_ring_wptr);<br>
+               }<br>
+exit:<br>
+       return ret;<br>
+}<br>
+<br>
+static int kfd_spm_sched_main(void *param)<br>
+{<br>
+       struct kfd_spm_cntr * spm_cntr = (struct kfd_spm_cntr *)param;<br>
+<br>
+       while (!kthread_should_stop()) {<br>
+               wait_event_interruptible(spm_cntr->spm_wq,<br>
+                               spm_cntr->wanted_cluster != false ||<br>
+                               kthread_should_stop());<br>
+<br>
+               kfd_spm_reag_ring_buf (spm_cntr);<br>
+<br>
+               spm_cntr->wanted_cluster = false;<br>
+       }<br>
+       return 0;<br>
+}<br>
+<br>
+static void transfer_data_process_worker(struct work_struct *work)<br>
+{<br>
+       struct kfd_process *p;<br>
+       struct delayed_work *dwork;<br>
+<br>
+       pr_debug("[%s]\n", __func__);<br>
+<br>
+       dwork = to_delayed_work(work);<br>
+<br>
+       /* Process termination destroys this worker thread. So during the<br>
+        * lifetime of this thread, kfd_process p will be valid<br>
+        */<br>
+       p = container_of(dwork, struct kfd_process, copy_to_user_work);<br>
+<br>
+       p->spm_cntr->is_timeout = true;<br>
+}<br>
+<br>
+<br>
+/**<br>
+ * kfd_spm_init - init driver ring struct.<br>
+ * Returns 0 on success, error on failure.<br>
+ */<br>
+int kfd_spm_shed_init(struct kfd_process *p, struct kgd_dev *kgd)<br>
+{<br>
+       int ret = 0;<br>
+       init_waitqueue_head(&p->spm_cntr->spm_wq);<br>
+       p->spm_cntr->wanted_cluster = false;<br>
+       p->spm_cntr->kgd = kgd;<br>
+       INIT_DELAYED_WORK(&p->copy_to_user_work, transfer_data_process_worker);<br>
+<br>
+       /* Each scheduler will run on a seperate kernel thread */<br>
+       p->spm_cntr->thread = kthread_run(kfd_spm_sched_main, p->spm_cntr, p->spm_cntr->name);<br>
+       if (IS_ERR(p->spm_cntr->thread)) {<br>
+               ret = PTR_ERR(p->spm_cntr->thread);<br>
+               p->spm_cntr->thread = NULL;<br>
+               DRM_ERROR("Failed to create scheduler for %s.\n", p->spm_cntr->name);<br>
+               return ret;<br>
+       }<br>
+<br>
+       p->spm_cntr->thread_ready = true;<br>
+       return ret;<br>
+}<br>
+<br>
+/**<br>
+ * amdgpu_ring_fini - tear down the driver ring struct.<br>
+ *<br>
+ * @adev: amdgpu_device pointer<br>
+ * @ring: amdgpu_ring structure holding ring information<br>
+ *<br>
+ * Tear down the driver information for the selected ring (all asics).<br>
+ */<br>
+void kfd_spm_shed_fini(struct kfd_process *p)<br>
+{<br>
+       if (p->spm_cntr->thread)<br>
+               kthread_stop(p->spm_cntr->thread);<br>
+<br>
+       cancel_delayed_work_sync(&p->copy_to_user_work);<br>
+       p->spm_cntr->thread = NULL;<br>
+       p->spm_cntr->thread_ready = false;<br>
+}<br>
+<br>
+void kfd_spm_init_process(struct kfd_process *p)<br>
+{<br>
+       mutex_init(&p->spm_mutex);<br>
+       p->spm_cntr = NULL;<br>
+       p->is_spm_acquired = false;<br>
+}<br>
+<br>
+static struct kfd_spm_cntr *allocate_spm_cntr_data(void)<br>
+{<br>
+       struct kfd_spm_cntr *cntr;<br>
+<br>
+       cntr = kzalloc(sizeof(*cntr), GFP_KERNEL);<br>
+       if (!cntr)<br>
+               return NULL;<br>
+<br>
+       return cntr;<br>
+}<br>
+<br>
+int kfd_acquire_spm(struct kfd_process *p, struct kgd_dev *kgd)<br>
+{<br>
+       int retval;<br>
+       int count;<br>
+<br>
+       pr_debug("[%s]\n", __func__);<br>
+<br>
+       if(p->is_spm_acquired == true)<br>
+               return EINVAL;<br>
+<br>
+       if (!p->spm_cntr) {<br>
+               p->spm_cntr = allocate_spm_cntr_data();<br>
+               if (!p->spm_cntr)<br>
+                       return -ENOMEM;<br>
+               /* git spm ring buffer 128KB */<br>
+               p->spm_cntr->spm_ring_buf_size = order_base_2(128 * 1024/4);<br>
+               p->spm_cntr->spm_ring_buf_size = (1 << p->spm_cntr->spm_ring_buf_size) * 4;<br>
+               p->spm_cntr->spm_ring_buf_mark = p->spm_cntr->spm_ring_buf_size -1;<br>
+               for (count = 0; count < SAMPLING_MAX; ++count) {<br>
+                       p->spm_cntr->ubuf[count].user_addr = NULL;<br>
+                       p->spm_cntr->ubuf[count].ubufsize = 0;<br>
+               }<br>
+               p->spm_cntr->buf_count = 0;<br>
+               p->spm_cntr->is_uesr_buf_filled = false;<br>
+               p->spm_cntr->is_nowait = false;<br>
+               p->spm_cntr->thread_ready = false;<br>
+       }<br>
+<br>
+       retval = amdgpu_amdkfd_alloc_gtt_mem(kgd,<br>
+                       p->spm_cntr->spm_ring_buf_size, &p->spm_cntr->spm_obj,<br>
+                       &p->spm_cntr->spm_gpu_addr, (void *)&p->spm_cntr->spm_cpu_addr,<br>
+                       false);<br>
+<br>
+       if (retval)<br>
+               return EINVAL;<br>
+<br>
+       memset(p->spm_cntr->spm_cpu_addr, 0, p->spm_cntr->spm_ring_buf_size);<br>
+<br>
+       amdgpu_amdkfd_rlc_spm_acquire(kgd, p->spm_cntr->spm_gpu_addr, p->spm_cntr->spm_ring_buf_size);<br>
+<br>
+       if (p->spm_cntr->thread_ready == false) {<br>
+               p->spm_cntr->name = "spm";<br>
+               retval = kfd_spm_shed_init(p, kgd);<br>
+               if (retval) {<br>
+                       DRM_ERROR("Failed to create spm thread %s.\n",  p->spm_cntr->name);<br>
+                       return retval;<br>
+               }<br>
+       }<br>
+       p->is_spm_acquired = true;<br>
+<br>
+       return 0;<br>
+}<br>
+<br>
+int kfd_release_spm(struct kfd_process *p, struct kgd_dev *kgd)<br>
+{<br>
+<br>
+       kfd_spm_shed_fini(p);<br>
+<br>
+       amdgpu_amdkfd_free_gtt_mem(kgd, p->spm_cntr->spm_obj);<br>
+<br>
+       kfree(p->spm_cntr);<br>
+       p->spm_cntr = NULL;<br>
+       p->is_spm_acquired = false;<br>
+<br>
+       return 0;<br>
+}<br>
+<br>
+int set_dest_buf_polling(struct kfd_spm_cntr *spm, struct kgd_dev *kgd, void __user *data)<br>
+{<br>
+       struct kfd_ioctl_spm_args __user *user_spm_data =<br>
+                       (struct kfd_ioctl_spm_args __user *) data;<br>
+       u32 i;<br>
+       int ret = 0;<br>
+       unsigned long timeout;<br>
+<br>
+       pr_debug ("[%s]\n", __func__);<br>
+<br>
+       timeout = msecs_to_jiffies(user_spm_data->timeout) + 1;<br>
+<br>
+       /* if buf = NULL, stop spm */<br>
+       if (!user_spm_data->destptr) {<br>
+               amdgpu_amdkfd_rlc_spm_cntl(kgd, 0);<br>
+<br>
+               user_spm_data->bytes_copied = spm->spm_ring_size_copied;<br>
+               spm->spm_ring_size_copied = 0;<br>
+               user_spm_data->spmtptr = (uint64_t)spm->ubuf[0].user_addr;<br>
+<br>
+               for (i = 0; i < spm->buf_count; ++i) {<br>
+                       spm->ubuf [i] = spm->ubuf [i+1];<br>
+               }<br>
+               spm->ubuf[spm->buf_count].user_addr = NULL;<br>
+               spm->ubuf[spm->buf_count].ubufsize = 0;<br>
+               spm->buf_count --;<br>
+               if (spm->buf_count < 0)<br>
+                       spm->buf_count = 0;<br>
+               return ret;<br>
+       }<br>
+<br>
+       if (!spm->buf_count) {<br>
+               /* First time save user spm buffer, then start spm sampling */<br>
+               spm->ubuf[0].user_addr = (uint64_t*)user_spm_data->destptr;<br>
+               spm->ubuf[0].ubufsize = user_spm_data->buf_size;<br>
+               user_spm_data->bytes_copied = 0;<br>
+               spm->spm_ring_size_copied = 0;<br>
+               spm->buf_count ++;<br>
+               spm->is_uesr_buf_filled = false;<br>
+               amdgpu_amdkfd_rlc_spm_cntl(kgd, 1);<br>
+<br>
+               ret = kfd_spm_reag_ring_buf_polling (spm, timeout);<br>
+               user_spm_data->bytes_copied = spm->spm_ring_size_copied;<br>
+               user_spm_data->spmtptr = (uint64_t)spm->ubuf[0].user_addr;<br>
+               spm->buf_count --;<br>
+       } else {<br>
+               spm->spm_ring_size_copied = 0;<br>
+               spm->is_uesr_buf_filled = false;<br>
+               amdgpu_amdkfd_rlc_spm_cntl(kgd, 1);<br>
+               ret = kfd_spm_reag_ring_buf_polling (spm, timeout);<br>
+<br>
+               user_spm_data->bytes_copied = spm->spm_ring_size_copied;<br>
+               user_spm_data->spmtptr = (uint64_t)spm->ubuf[0].user_addr;<br>
+               spm->buf_count --;<br>
+<br>
+               for (i = 0; i < spm->buf_count; ++i)<br>
+                       /* Repeated dest buf */<br>
+                       if (spm->ubuf[i].user_addr == (uint64_t*)user_spm_data->destptr)<br>
+                               break;<br>
+               if (i == spm->buf_count) {<br>
+                       spm->ubuf[i].user_addr = (uint64_t*)user_spm_data->destptr;<br>
+                       spm->ubuf[i].ubufsize = user_spm_data->buf_size;<br>
+                       spm->buf_count ++;<br>
+               }<br>
+<br>
+               for (i = 0; i < spm->buf_count; ++i)<br>
+                       spm->ubuf[i] = spm->ubuf[i + 1];<br>
+               spm->ubuf[spm->buf_count].user_addr  = NULL;<br>
+       }<br>
+<br>
+       user_spm_data->bytes_copied = spm->spm_ring_size_copied;<br>
+       return ret;<br>
+<br>
+}<br>
+<br>
+int kfd_set_dest_buffer(struct kfd_process *p, struct kgd_dev *kgd, void __user *data)<br>
+{<br>
+       struct kfd_ioctl_spm_args __user *user_spm_data =<br>
+                                                       (struct kfd_ioctl_spm_args __user *) data;<br>
+       struct kfd_spm_cntr *spm = p->spm_cntr;<br>
+       unsigned long timeout;<br>
+<br>
+       pr_debug("[%s]\n", __func__);<br>
+<br>
+       if(p->is_spm_acquired == false)<br>
+               return -EINVAL;<br>
+<br>
+       timeout = msecs_to_jiffies(user_spm_data->timeout) + 1;<br>
+       spm->is_timeout = false;<br>
+       schedule_delayed_work(&p->copy_to_user_work, timeout);<br>
+       return set_dest_buf_polling(spm, kgd, data);<br>
+}<br>
+<br>
+int kfd_config_spm(struct kfd_process *p, struct kgd_dev *kgd,struct kfd_ioctl_spm_args *data)<br>
+{<br>
+       struct kfd_ioctl_spm_args __user *user_spm_data =<br>
+                       (struct kfd_ioctl_spm_args __user *) data;<br>
+       struct kfd_spm_set_reg *spm_reg;<br>
+<br>
+       pr_debug("[%s]\n", __func__);<br>
+<br>
+       spm_reg = kvmalloc_array(1, sizeof (struct kfd_spm_set_reg), GFP_KERNEL);<br>
+<br>
+       if (copy_from_user(spm_reg, (void __user *)user_spm_data->destptr, sizeof (struct kfd_spm_set_reg))) {<br>
+               pr_err("copy_from_user Fail\n");<br>
+               goto exit;<br>
+       }<br>
+       amdgpu_amdkfd_rlc_spm_set_reg(kgd, spm_reg->reg, spm_reg->value);<br>
+exit:<br>
+       kfree(spm_reg);<br>
+       return 0;<br>
+}<br>
+<br>
+int kfd_rlc_spm(struct kfd_process *p,  void __user *data,<br>
+                      uint32_t buf_size, __u32 operation)<br>
+{<br>
+       struct kfd_ioctl_spm_args *args = data;<br>
+       struct kfd_dev *dev;<br>
+       int r;<br>
+<br>
+       dev = kfd_device_by_id(args->gpu_id);<br>
+       if (!dev)<br>
+               return -EINVAL;<br>
+<br>
+<br>
+       switch (operation) {<br>
+       case KFD_IOCTL_SPM_OP_ACQUIRE:<br>
+               dev->spm_pasid = p->pasid;<br>
+               r = kfd_acquire_spm(p, dev->kgd);<br>
+               break;<br>
+<br>
+       case KFD_IOCTL_SPM_OP_RELEASE:<br>
+               r = kfd_release_spm(p, dev->kgd);<br>
+                               break;<br>
+<br>
+       case KFD_IOCTL_SPM_OP_SET_DEST_BUF:<br>
+               r = kfd_set_dest_buffer(p, dev->kgd, data);<br>
+               break;<br>
+<br>
+       case KFD_IOCTL_SPM_OP_CONFIG:<br>
+               r = kfd_config_spm(p, dev->kgd, args);<br>
+                               break;<br>
+<br>
+       default:<br>
+               r = -EINVAL;<br>
+               break;<br>
+       }<br>
+       return r;<br>
+}<br>
+<br>
+void kfd_spm_interrupt(unsigned int pasid)<br>
+{<br>
+<br>
+       struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);<br>
+<br>
+       if (!p) {<br>
+               pr_info("kfd_spm_interrupt p = %p \n", p);<br>
+               return; /* Presumably process exited. */<br>
+       }<br>
+<br>
+       mutex_lock(&p->spm_mutex);<br>
+<br>
+       p->spm_cntr->wanted_cluster = true;<br>
+       wake_up_interruptible(&p->spm_cntr->spm_wq);<br>
+<br>
+       mutex_unlock(&p->spm_mutex);<br>
+<br>
+       kfd_unref_process(p);<br>
+}<br>
+<br>
+bool kfd_spm_interrupt_isr(struct kfd_dev *dev,<br>
+                                       uint16_t source_id,     uint32_t client_id)<br>
+{<br>
+       pr_debug("[%s]\n", __func__);<br>
+<br>
+       if (source_id != GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT)<br>
+               return 0;<br>
+<br>
+       /* Interrupt types we care about: various signals and faults.<br>
+        * They will be forwarded to a work queue (see below).<br>
+        */<br>
+<br>
+       return source_id == GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT ||<br>
+               source_id == VISLANDS30_IV_SRCID_RLC_STRM_PERF_MONITOR ||<br>
+               client_id == AMDGPU_IRQ_CLIENTID_LEGACY ||<br>
+               client_id == SOC15_IH_CLIENTID_RLC;<br>
+}<br>
+<br>
+void kfd_spm_interrupt_wq(struct kfd_dev *dev, uint16_t source_id)<br>
+{<br>
+       uint16_t pasid;<br>
+<br>
+       pr_debug("[%s]\n", __func__);<br>
+<br>
+       pasid = dev->spm_pasid;<br>
+#if 0<br>
+       if ((source_id == GFX_9_0__SRCID__RLC_STRM_PERF_MONITOR_INTERRUPT) ||<br>
+                               (source_id == VISLANDS30_IV_SRCID_RLC_STRM_PERF_MONITOR))<br>
+               kfd_spm_interrupt(pasid);<br>
+#endif<br>
+}<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h<br>
index 0bc0b25..fb4ad60 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h<br>
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h<br>
@@ -30,6 +30,7 @@<br>
 #define SOC15_INTSRC_SQ_INTERRUPT_MSG   239<br>
 #define SOC15_INTSRC_VMC_FAULT          0<br>
 #define SOC15_INTSRC_SDMA_TRAP          224<br>
+#define AMDGPU_IRQ_CLIENTID_LEGACY     0<br>
 <br>
 <br>
 #define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)<br>
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h<br>
index 4f66764..f7cb7d4 100644<br>
--- a/include/uapi/linux/kfd_ioctl.h<br>
+++ b/include/uapi/linux/kfd_ioctl.h<br>
@@ -442,6 +442,56 @@ struct kfd_ioctl_import_dmabuf_args {<br>
         __u32 dmabuf_fd;        /* to KFD */<br>
 };<br>
 <br>
+/**<br>
+ * kfd_ioctl_spm_op - SPM ioctl operations<br>
+ *<br>
+ * @KFD_IOCTL_SPM_OP_ACQUIRE: acquire exclusive access to SPM<br>
+ * @KFD_IOCTL_SPM_OP_RELEASE: release exclusive access to SPM<br>
+ * @KFD_IOCTL_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM streaming<br>
+ */<br>
+enum kfd_ioctl_spm_op {<br>
+       KFD_IOCTL_SPM_OP_ACQUIRE,<br>
+       KFD_IOCTL_SPM_OP_RELEASE,<br>
+       KFD_IOCTL_SPM_OP_SET_DEST_BUF,<br>
+       KFD_IOCTL_SPM_OP_CONFIG<br>
+};<br>
+<br>
+/* Don't wait for previous buffer to fill up */<br>
+#define KFD_IOCTL_SPM_FLAG_POLLING 1<br>
+<br>
+/**<br>
+ * kfd_ioctl_spm_args - Arguments for SPM ioctl<br>
+ *<br>
+ * @op:     specifies the operation to perform<br>
+ * @destptr:used for the address of the destination buffer in @KFD_IOCTL_SPM_SET_DEST_BUFFER<br>
+ * @buf_size:size  of the destination buffer in @KFD_IOCTL_SPM_SET_DEST_BUFFER<br>
+ * @timeout: timeout to wait buffer get filled<br>
+ * @gpu_id: gpi ID<br>
+ * @bytes_copied: byte copied from streaming performance ring buffer<br>
+ *<br>
+ * If @ptr is NULL, the destination buffer address is unset and copying of counters<br>
+ * is stopped.<br>
+ *<br>
+ * Returns negative error code on failure. On success, @KFD_IOCTL_SPM_OP_ACQUIRE and<br>
+ * @KFD_IOCTL_SPM_OP_RELEASE return 0, @KFD_IOCTL_SPM_OP_SET_DEST_BUF returns the fill<br>
+ * level of the previous buffer.<br>
+ */<br>
+struct kfd_ioctl_spm_args {<br>
+       __u64 destptr;<br>
+       __u64 spmtptr;<br>
+       __u32 buf_size;<br>
+       __u32 op;<br>
+       __u32 timeout;<br>
+       __u32 gpu_id;   /* to KFD */<br>
+       /* from KFD: Total amount of bytes copied */<br>
+       __u64 bytes_copied;<br>
+};<br>
+<br>
+struct kfd_spm_set_reg {<br>
+       __u64 reg;              /* to KFD */<br>
+       __u32 value;<br>
+};<br>
+<br>
 /* Register offset inside the remapped mmio page<br>
  */<br>
 enum kfd_mmio_remap {<br>
@@ -546,7 +596,10 @@ enum kfd_mmio_remap {<br>
 #define AMDKFD_IOC_ALLOC_QUEUE_GWS              \<br>
                 AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)<br>
 <br>
+#define AMDKFD_IOC_RLC_SPM             \<br>
+               AMDKFD_IOWR(0x1F, struct kfd_ioctl_spm_args)<br>
+<br>
 #define AMDKFD_COMMAND_START            0x01<br>
-#define AMDKFD_COMMAND_END             0x1F<br>
+#define AMDKFD_COMMAND_END             0x20<br>
 <br>
 #endif<br>
-- <br>
2.7.4<br>
<br>
_______________________________________________<br>
amd-gfx mailing list<br>
amd-gfx@lists.freedesktop.org<br>
<a href="https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C326c22b09293444c1a3f08d7eb7c3c2b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637236791554300056&amp;sdata=70Ud36avjI1hPHwGX0Enfo8YiRokubqepxdL3HLsocU%3D&amp;reserved=0">https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cyong.zhao%40amd.com%7C326c22b09293444c1a3f08d7eb7c3c2b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637236791554300056&amp;sdata=70Ud36avjI1hPHwGX0Enfo8YiRokubqepxdL3HLsocU%3D&amp;reserved=0</a><br>
</div>
</span></font></div>
</div>
</body>
</html>