<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<!--[if !mso]><style>v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style><![endif]--><style><!--
/* Font Definitions */
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"Segoe UI";
        panose-1:2 11 5 2 4 2 4 2 2 3;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#0563C1;
        text-decoration:underline;}
span.EmailStyle18
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
p.msipheadera4477989, li.msipheadera4477989, div.msipheadera4477989
        {mso-style-name:msipheadera4477989;
        mso-margin-top-alt:auto;
        margin-right:0in;
        mso-margin-bottom-alt:auto;
        margin-left:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-US" link="#0563C1" vlink="#954F72" style="word-wrap:break-word">
<div class="WordSection1">
<p class="msipheadera4477989" style="margin:0in"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:blue">[AMD Official Use Only]</span><o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal">This was a bug in the original definition, but it functionally it makes no difference (in both cases the macros resolve to the same value).<o:p></o:p></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div style="border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="MsoNormal"><b>From:</b> Nieto, David M <David.Nieto@amd.com> <br>
<b>Sent:</b> Wednesday, December 15, 2021 2:16 PM<br>
<b>To:</b> Skvortsov, Victor <Victor.Skvortsov@amd.com>; amd-gfx@lists.freedesktop.org; Deng, Emily <Emily.Deng@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Ming, Davis <Davis.Ming@amd.com>; Liu, Shaoyun <Shaoyun.Liu@amd.com>; Zhou, Peng Ju <PengJu.Zhou@amd.com>;
 Chen, JingWen <JingWen.Chen2@amd.com>; Chen, Horace <Horace.Chen@amd.com><br>
<b>Subject:</b> Re: [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov<o:p></o:p></p>
</div>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<p style="margin:5.0pt"><span style="font-size:10.0pt;font-family:"Arial",sans-serif;color:blue">[AMD Official Use Only]<o:p></o:p></span></p>
<p class="MsoNormal"><o:p> </o:p></p>
<div>
<div>
<p class="MsoNormal" style="background:white"><span style="font-family:"Segoe UI",sans-serif;color:black;background:white">         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;</span><span style="font-size:12.0pt;color:black"><br>
</span><span style="font-family:"Segoe UI",sans-serif;color:black;background:white">         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;</span><span style="font-family:"Segoe UI",sans-serif;color:black"><br>
<span style="background:white">-       scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;</span><br>
<span style="background:white">-       scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;</span><br>
<span style="background:white">+       scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;</span><br>
<span style="background:white">+       scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;</span><br>
<span style="background:white">         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;</span></span><span style="font-size:12.0pt;color:black"><o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="background:white"><span style="font-size:12.0pt;color:black"><o:p> </o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="background:white"><span style="font-family:"Segoe UI",sans-serif;color:black;background:white">the definition of scratch_reg2 and 3 has here.... will this be backwards compatible? Was it a bug in the definition?</span><span style="font-size:12.0pt;color:black"><o:p></o:p></span></p>
</div>
<div class="MsoNormal" align="center" style="text-align:center">
<hr size="1" width="98%" align="center">
</div>
<div id="divRplyFwdMsg">
<p class="MsoNormal"><b><span style="color:black">From:</span></b><span style="color:black"> Skvortsov, Victor <<a href="mailto:Victor.Skvortsov@amd.com">Victor.Skvortsov@amd.com</a>><br>
<b>Sent:</b> Wednesday, December 15, 2021 10:55 AM<br>
<b>To:</b> <a href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a> <<a href="mailto:amd-gfx@lists.freedesktop.org">amd-gfx@lists.freedesktop.org</a>>; Deng, Emily <<a href="mailto:Emily.Deng@amd.com">Emily.Deng@amd.com</a>>; Liu, Monk
 <<a href="mailto:Monk.Liu@amd.com">Monk.Liu@amd.com</a>>; Ming, Davis <<a href="mailto:Davis.Ming@amd.com">Davis.Ming@amd.com</a>>; Liu, Shaoyun <<a href="mailto:Shaoyun.Liu@amd.com">Shaoyun.Liu@amd.com</a>>; Zhou, Peng Ju <<a href="mailto:PengJu.Zhou@amd.com">PengJu.Zhou@amd.com</a>>;
 Chen, JingWen <<a href="mailto:JingWen.Chen2@amd.com">JingWen.Chen2@amd.com</a>>; Chen, Horace <<a href="mailto:Horace.Chen@amd.com">Horace.Chen@amd.com</a>>; Nieto, David M <<a href="mailto:David.Nieto@amd.com">David.Nieto@amd.com</a>><br>
<b>Cc:</b> Skvortsov, Victor <<a href="mailto:Victor.Skvortsov@amd.com">Victor.Skvortsov@amd.com</a>><br>
<b>Subject:</b> [PATCH 5/5] drm/amdgpu: Modify indirect register access for gfx9 sriov</span>
<o:p></o:p></p>
<div>
<p class="MsoNormal"> <o:p></o:p></p>
</div>
</div>
<div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt">Expand RLCG interface for new GC read & write commands.<br>
New interface will only be used if the PF enables the flag in pf2vf msg.<br>
<br>
Signed-off-by: Victor Skvortsov <<a href="mailto:victor.skvortsov@amd.com">victor.skvortsov@amd.com</a>><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 111 +++++++++++++++++++-------<br>
 1 file changed, 83 insertions(+), 28 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
index d252b06efa43..bce6ab52cae0 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
@@ -63,6 +63,13 @@<br>
 #define mmGCEA_PROBE_MAP                        0x070c<br>
 #define mmGCEA_PROBE_MAP_BASE_IDX               0<br>
 <br>
+#define GFX9_RLCG_GC_WRITE_OLD                 (0x8 << 28)<br>
+#define GFX9_RLCG_GC_WRITE                     (0x0 << 28)<br>
+#define GFX9_RLCG_GC_READ                      (0x1 << 28)<br>
+#define GFX9_RLCG_VFGATE_DISABLED              0x4000000<br>
+#define GFX9_RLCG_WRONG_OPERATION_TYPE         0x2000000<br>
+#define GFX9_RLCG_NOT_IN_RANGE                 0x1000000<br>
+<br>
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");<br>
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");<br>
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");<br>
@@ -739,7 +746,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =<br>
         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,<br>
 };<br>
 <br>
-static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag)<br>
+static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag)<br>
 {<br>
         static void *scratch_reg0;<br>
         static void *scratch_reg1;<br>
@@ -748,21 +755,20 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 f<br>
         static void *spare_int;<br>
         static uint32_t grbm_cntl;<br>
         static uint32_t grbm_idx;<br>
+       uint32_t i = 0;<br>
+       uint32_t retries = 50000;<br>
+       u32 ret = 0;<br>
+       u32 tmp;<br>
 <br>
         scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;<br>
         scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;<br>
-       scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;<br>
-       scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;<br>
+       scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4;<br>
+       scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4;<br>
         spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;<br>
 <br>
         grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;<br>
         grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;<br>
 <br>
-       if (amdgpu_sriov_runtime(adev)) {<br>
-               pr_err("shouldn't call rlcg write register during runtime\n");<br>
-               return;<br>
-       }<br>
-<br>
         if (offset == grbm_cntl || offset == grbm_idx) {<br>
                 if (offset  == grbm_cntl)<br>
                         writel(v, scratch_reg2);<br>
@@ -771,41 +777,89 @@ static void gfx_v9_0_rlcg_w(struct amdgpu_device *adev, u32 offset, u32 v, u32 f<br>
 <br>
                 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));<br>
         } else {<br>
-               uint32_t i = 0;<br>
-               uint32_t retries = 50000;<br>
-<br>
                 writel(v, scratch_reg0);<br>
-               writel(offset | 0x80000000, scratch_reg1);<br>
+               writel(offset | flag, scratch_reg1);<br>
                 writel(1, spare_int);<br>
-               for (i = 0; i < retries; i++) {<br>
-                       u32 tmp;<br>
 <br>
+               for (i = 0; i < retries; i++) {<br>
                         tmp = readl(scratch_reg1);<br>
-                       if (!(tmp & 0x80000000))<br>
+                       if (!(tmp & flag))<br>
                                 break;<br>
 <br>
                         udelay(10);<br>
                 }<br>
-               if (i >= retries)<br>
-                       pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);<br>
+<br>
+               if (i >= retries) {<br>
+                       if (amdgpu_sriov_reg_indirect_gc(adev)) {<br>
+                               if (tmp & GFX9_RLCG_VFGATE_DISABLED)<br>
+                                       pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset);<br>
+                               else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE)<br>
+                                       pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset);<br>
+                               else if (tmp & GFX9_RLCG_NOT_IN_RANGE)<br>
+                                       pr_err("The register is not in range, program reg:0x%05x failed!\n", offset);<br>
+                               else<br>
+                                       pr_err("Unknown error type, program reg:0x%05x failed!\n", offset);<br>
+                       } else<br>
+                               pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset);<br>
+               }<br>
+       }<br>
+<br>
+       ret = readl(scratch_reg0);<br>
+<br>
+       return ret;<br>
+}<br>
+<br>
+static bool gfx_v9_0_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip,<br>
+                               int write, u32 *rlcg_flag)<br>
+{<br>
+<br>
+       switch (hwip) {<br>
+       case GC_HWIP:<br>
+               if (amdgpu_sriov_reg_indirect_gc(adev)) {<br>
+                       *rlcg_flag = write ? GFX9_RLCG_GC_WRITE : GFX9_RLCG_GC_READ;<br>
+<br>
+                       return true;<br>
+               /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */<br>
+               } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {<br>
+                       *rlcg_flag = GFX9_RLCG_GC_WRITE_OLD;<br>
+                       return true;<br>
+               }<br>
+<br>
+               break;<br>
+       default:<br>
+               return false;<br>
         }<br>
 <br>
+       return false;<br>
+}<br>
+<br>
+static u32 gfx_v9_0_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip)<br>
+{<br>
+       u32 rlcg_flag;<br>
+<br>
+       if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag))<br>
+               return gfx_v9_0_rlcg_rw(adev, offset, 0, rlcg_flag);<br>
+<br>
+       if (acc_flags & AMDGPU_REGS_NO_KIQ)<br>
+               return RREG32_NO_KIQ(offset);<br>
+       else<br>
+               return RREG32(offset);<br>
 }<br>
 <br>
 static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset,<br>
-                              u32 v, u32 acc_flags, u32 hwip)<br>
+                              u32 value, u32 acc_flags, u32 hwip)<br>
 {<br>
-       if ((acc_flags & AMDGPU_REGS_RLC) &&<br>
-           amdgpu_sriov_fullaccess(adev)) {<br>
-               gfx_v9_0_rlcg_w(adev, offset, v, acc_flags);<br>
+       u32 rlcg_flag;<br>
 <br>
+       if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) {<br>
+               gfx_v9_0_rlcg_rw(adev, offset, value, rlcg_flag);<br>
                 return;<br>
         }<br>
 <br>
         if (acc_flags & AMDGPU_REGS_NO_KIQ)<br>
-               WREG32_NO_KIQ(offset, v);<br>
+               WREG32_NO_KIQ(offset, value);<br>
         else<br>
-               WREG32(offset, v);<br>
+               WREG32(offset, value);<br>
 }<br>
 <br>
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042<br>
@@ -5134,7 +5188,7 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)<br>
         if (amdgpu_sriov_is_pp_one_vf(adev))<br>
                 data = RREG32_NO_KIQ(reg);<br>
         else<br>
-               data = RREG32(reg);<br>
+               data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);<br>
 <br>
         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;<br>
         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;<br>
@@ -5190,6 +5244,7 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {<br>
         .start = gfx_v9_0_rlc_start,<br>
         .update_spm_vmid = gfx_v9_0_update_spm_vmid,<br>
         .sriov_wreg = gfx_v9_0_sriov_wreg,<br>
+       .sriov_rreg = gfx_v9_0_sriov_rreg,<br>
         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,<br>
 };<br>
 <br>
@@ -5795,16 +5850,16 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,<br>
 <br>
         switch (state) {<br>
         case AMDGPU_IRQ_STATE_DISABLE:<br>
-               mec_int_cntl = RREG32(mec_int_cntl_reg);<br>
+               mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);<br>
                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,<br>
                                              TIME_STAMP_INT_ENABLE, 0);<br>
-               WREG32(mec_int_cntl_reg, mec_int_cntl);<br>
+               WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);<br>
                 break;<br>
         case AMDGPU_IRQ_STATE_ENABLE:<br>
-               mec_int_cntl = RREG32(mec_int_cntl_reg);<br>
+               mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);<br>
                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,<br>
                                              TIME_STAMP_INT_ENABLE, 1);<br>
-               WREG32(mec_int_cntl_reg, mec_int_cntl);<br>
+               WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);<br>
                 break;<br>
         default:<br>
                 break;<br>
-- <br>
2.25.1<o:p></o:p></p>
</div>
</div>
</div>
</div>
</body>
</html>