<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Felix,</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Can you review the 4th and 5th patch? They were kept unchanged. </div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Regards,</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Yong</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Zhao, Yong<br>
<b>Sent:</b> Tuesday, February 5, 2019 3:31 PM<br>
<b>To:</b> amd-gfx@lists.freedesktop.org<br>
<b>Cc:</b> Zhao, Yong<br>
<b>Subject:</b> [PATCH 4/5] drm/amdkfd: Fix bugs regarding CP user queue doorbells mask on SOC15</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">Reserved doorbells for SDMA IH and VCN were not properly masked out<br>
when allocating doorbells for CP user queues. This patch fixed that.<br>
<br>
Change-Id: I670adfc3fd7725d2ed0bd9665cb7f69f8b9023c2<br>
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 17 +++++++++++----<br>
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h  |  4 ++++<br>
 drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c  |  3 +++<br>
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c  |  3 +++<br>
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  9 ++++++++<br>
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 21 ++++++++++++++-----<br>
 .../gpu/drm/amd/include/kgd_kfd_interface.h   | 19 ++++++-----------<br>
 7 files changed, 54 insertions(+), 22 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c<br>
index e957e42c539a..ee8527701731 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c<br>
@@ -196,11 +196,20 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)<br>
                         gpu_resources.sdma_doorbell[1][i+1] =<br>
                                 adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);<br>
                 }<br>
-               /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for<br>
-                * SDMA, IH and VCN. So don't use them for the CP.<br>
+<br>
+               /* Because of the setting in registers like<br>
+                * SDMA0_DOORBELL_RANGE etc., BIF statically uses the<br>
+                * lower 12 bits of doorbell address for routing. In<br>
+                * order to route the CP queue doorbells to CP engine,<br>
+                * the doorbells allocated to CP queues have to be<br>
+                * outside the range set for SDMA, VCN, and IH blocks<br>
+                * Prior to SOC15, all queues use queue ID to<br>
+                * determine doorbells.<br>
                  */<br>
-               gpu_resources.reserved_doorbell_mask = 0x1e0;<br>
-               gpu_resources.reserved_doorbell_val  = 0x0e0;<br>
+               gpu_resources.reserved_doorbells_start =<br>
+                               adev->doorbell_index.sdma_engine[0];<br>
+               gpu_resources.reserved_doorbells_end =<br>
+                               adev->doorbell_index.last_non_cp;<br>
 <br>
                 kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);<br>
         }<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h<br>
index 59c41841cbce..74b8e2bfabd3 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h<br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h<br>
@@ -70,6 +70,7 @@ struct amdgpu_doorbell_index {<br>
                         uint32_t vce_ring6_7;<br>
                 } uvd_vce;<br>
         };<br>
+       uint32_t last_non_cp;<br>
         uint32_t max_assignment;<br>
         uint32_t last_idx;<br>
         /* Per engine SDMA doorbell size in dword */<br>
@@ -141,6 +142,7 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT<br>
         AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,<br>
         AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,<br>
         AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,<br>
+       AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,<br>
         AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x18F,<br>
         AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF<br>
 } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;<br>
@@ -216,6 +218,8 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT<br>
         AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,<br>
         AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,<br>
 <br>
+       AMDGPU_DOORBELL64_LAST_NON_CP             = AMDGPU_DOORBELL64_VCE_RING6_7,<br>
+<br>
         AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,<br>
         AMDGPU_DOORBELL64_INVALID                 = 0xFFFF<br>
 } AMDGPU_DOORBELL64_ASSIGNMENT;<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c<br>
index 65214c7b0b20..76166c0ec509 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c<br>
@@ -80,6 +80,9 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)<br>
         adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3;<br>
         adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5;<br>
         adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7;<br>
+<br>
+       adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP;<br>
+<br>
         /* In unit of dword doorbell */<br>
         adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1;<br>
         adev->doorbell_index.dw_range_per_sdma_eng =<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c<br>
index a388d306391a..10df2fed5a99 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c<br>
@@ -84,6 +84,9 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)<br>
         adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;<br>
         adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;<br>
         adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;<br>
+<br>
+       adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP;<br>
+<br>
         adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1;<br>
         adev->doorbell_index.dw_range_per_sdma_eng =<br>
                         (adev->doorbell_index.sdma_engine[1]<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h<br>
index e5ebcca7f031..6b8459f852cc 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h<br>
@@ -103,6 +103,15 @@<br>
 <br>
 #define KFD_KERNEL_QUEUE_SIZE 2048<br>
 <br>
+/* 512 = 0x200<br>
+ * On SOC15, the doorbell index distance for SDMA RLC i and (i + 1) in the<br>
+ * same SDMA engine, where i is a even number.<br>
+ * For 8-bytes doorbells, it ensures that the mirror doorbell range (in terms<br>
+ * of low 12 bit address for each HW engine) on the second doorbell page is<br>
+ * the same as the range of the first doorbell page.*/<br>
+#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512<br>
+<br>
+<br>
 /*<br>
  * Kernel module parameter to specify maximum number of supported queues per<br>
  * device<br>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c<br>
index 80b36e860a0a..e904d6036b3d 100644<br>
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c<br>
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c<br>
@@ -607,13 +607,24 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,<br>
         if (!qpd->doorbell_bitmap)<br>
                 return -ENOMEM;<br>
 <br>
-       /* Mask out any reserved doorbells */<br>
-       for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)<br>
-               if ((dev->shared_resources.reserved_doorbell_mask & i) ==<br>
-                   dev->shared_resources.reserved_doorbell_val) {<br>
+       /* Mask out all reserved doorbells for SDMA, IH, and VCN on SOC15.<br>
+        * Because of the setting in registers like SDMA0_DOORBELL_RANGE etc.,<br>
+        * BIF statically uses the lower 12 bits of doorbell address for<br>
+        * routing. In order to route the CP queue doorbells to CP engine,<br>
+        * the doorbells allocated to CP queues have to be outside the range<br>
+        * set for SDMA, VCN, and IH blocks.<br>
+        * Prior to SOC15, all queues use queue ID to<br>
+        * determine doorbells. */<br>
+       for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {<br>
+               if (i >= dev->shared_resources.reserved_doorbells_start<br>
+                       && i <= dev->shared_resources.reserved_doorbells_end) {<br>
                         set_bit(i, qpd->doorbell_bitmap);<br>
-                       pr_debug("reserved doorbell 0x%03x\n", i);<br>
+                       set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,<br>
+                               qpd->doorbell_bitmap);<br>
+                       pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,<br>
+                               i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);<br>
                 }<br>
+       }<br>
 <br>
         return 0;<br>
 }<br>
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h<br>
index 83d960110d23..b1bf45419d93 100644<br>
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h<br>
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h<br>
@@ -137,20 +137,13 @@ struct kgd2kfd_shared_resources {<br>
         /* Bit n == 1 means Queue n is available for KFD */<br>
         DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);<br>
 <br>
-       /* Doorbell assignments (SOC15 and later chips only). Only<br>
-        * specific doorbells are routed to each SDMA engine. Others<br>
-        * are routed to IH and VCN. They are not usable by the CP.<br>
-        *<br>
-        * Any doorbell number D that satisfies the following condition<br>
-        * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val<br>
-        *<br>
-        * KFD currently uses 1024 (= 0x3ff) doorbells per process. If<br>
-        * doorbells 0x0e0-0x0ff and 0x2e0-0x2ff are reserved, that means<br>
-        * mask would be set to 0x1e0 and val set to 0x0e0.<br>
-        */<br>
         unsigned int sdma_doorbell[2][8];<br>
-       unsigned int reserved_doorbell_mask;<br>
-       unsigned int reserved_doorbell_val;<br>
+<br>
+       /* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,<br>
+        * and VCN<br>
+        */<br>
+       unsigned int reserved_doorbells_start;<br>
+       unsigned int reserved_doorbells_end;<br>
 <br>
         /* Base address of doorbell aperture. */<br>
         phys_addr_t doorbell_physical_address;<br>
-- <br>
2.17.1<br>
<br>
</div>
</span></font></div>
</body>
</html>