<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0078D7;margin:15pt;" align="Left">
[AMD Official Use Only - Internal Distribution Only]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Hi Ken,</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
The KMD has never used ACQUIRE_MEM before. It has to be requested explicitly, because it's a change in KMD behavior.<br>
</div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
Marek<br>
</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Qiao, Ken <Ken.Qiao@amd.com><br>
<b>Sent:</b> March 26, 2020 02:08<br>
<b>To:</b> Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; amd-gfx@lists.freedesktop.org <amd-gfx@lists.freedesktop.org><br>
<b>Cc:</b> Olsak, Marek <Marek.Olsak@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Zhou, David(ChunMing) <David1.Zhou@amd.com><br>
<b>Subject:</b> 回复: [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">Hi Andrey,<br>
<br>
Why not set the default value of sync_mem flag to true in KMD? So the legacy UMD driver can be compatible with it without any change. And if UMD doesn’t need ACQUIRE_MEM then can explicitly disable it by setting the flag to false.<br>
<br>
Thanks,<br>
Ken<br>
<br>
-----邮件原件-----<br>
发件人: Andrey Grodzovsky <andrey.grodzovsky@amd.com> <br>
发送时间: 2020年3月25日 22:30<br>
收件人: amd-gfx@lists.freedesktop.org<br>
抄送: Qiao, Ken <Ken.Qiao@amd.com>; Olsak, Marek <Marek.Olsak@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com><br>
主题: [PATCH 3/4] drm/amdgpu: Add mem_sync implementation for all the ASICs.<br>
<br>
Implement the .mem_sync hook defined earlier.<br>
<br>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com><br>
---<br>
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 27 ++++++++++++++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c  | 16 +++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c  | 16 +++++++++++++++-  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 17 ++++++++++++++++- 
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 22 +++++++++++++++++++++-<br>
 5 files changed, 93 insertions(+), 5 deletions(-)<br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
index 7f9ac1a1..d7f3177 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
@@ -5236,6 +5236,29 @@ static int gfx_v10_0_kiq_irq(struct amdgpu_device *adev,<br>
         return 0;<br>
 }<br>
 <br>
+static void gfx_v10_0_mem_sync(struct amdgpu_ring *ring) {<br>
+       unsigned gcr_cntl = PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |<br>
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |<br>
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_INV(1) |<br>
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLM_WB(1) |<br>
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GL1_INV(1) |<br>
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |<br>
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |<br>
+                           /* TODO is this eqvivalent to V_586_GLI_ALL ? */<br>
+                           PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);<br>
+<br>
+       /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */<br>
+       amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));<br>
+       amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */<br>
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */<br>
+       amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */<br>
+       amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */<br>
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */<br>
+       amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */<br>
+       amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ }<br>
+<br>
 static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {<br>
         .name = "gfx_v10_0",<br>
         .early_init = gfx_v10_0_early_init,<br>
@@ -5283,7 +5306,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {<br>
                 3 + /* CNTX_CTRL */<br>
                 5 + /* HDP_INVL */<br>
                 8 + 8 + /* FENCE x2 */<br>
-               2, /* SWITCH_BUFFER */<br>
+               2 + /* SWITCH_BUFFER */<br>
+               8, /* gfx_v10_0_mem_sync */<br>
         .emit_ib_size = 4, /* gfx_v10_0_ring_emit_ib_gfx */<br>
         .emit_ib = gfx_v10_0_ring_emit_ib_gfx,<br>
         .emit_fence = gfx_v10_0_ring_emit_fence, @@ -5304,6 +5328,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {<br>
         .emit_wreg = gfx_v10_0_ring_emit_wreg,<br>
         .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,<br>
         .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,<br>
+       .mem_sync = gfx_v10_0_mem_sync,<br>
 };<br>
 <br>
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c<br>
index 31f44d0..ced6459 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c<br>
@@ -3466,6 +3466,18 @@ static int gfx_v6_0_set_powergating_state(void *handle,<br>
         return 0;<br>
 }<br>
 <br>
+static void gfx_v6_0_mem_sync(struct amdgpu_ring *ring) {<br>
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));<br>
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |<br>
+                         PACKET3_TC_ACTION_ENA |<br>
+                         PACKET3_SH_KCACHE_ACTION_ENA |<br>
+                         PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */<br>
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */<br>
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */<br>
+       amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }<br>
+<br>
 static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {<br>
         .name = "gfx_v6_0",<br>
         .early_init = gfx_v6_0_early_init,<br>
@@ -3496,7 +3508,8 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {<br>
                 14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */<br>
                 7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */<br>
                 SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */<br>
-               3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */<br>
+               3 + 2 + /* gfx_v6_ring_emit_cntxcntl including vgt flush */<br>
+               5, /* SURFACE_SYNC */<br>
         .emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */<br>
         .emit_ib = gfx_v6_0_ring_emit_ib,<br>
         .emit_fence = gfx_v6_0_ring_emit_fence, @@ -3507,6 +3520,7 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {<br>
         .insert_nop = amdgpu_ring_insert_nop,<br>
         .emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,<br>
         .emit_wreg = gfx_v6_0_ring_emit_wreg,<br>
+       .mem_sync = gfx_v6_0_mem_sync,<br>
 };<br>
 <br>
 static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c<br>
index 733d398..88c54c4 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c<br>
@@ -5001,6 +5001,18 @@ static int gfx_v7_0_set_powergating_state(void *handle,<br>
         return 0;<br>
 }<br>
 <br>
+static void gfx_v7_0_mem_sync(struct amdgpu_ring *ring) {<br>
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));<br>
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |<br>
+                         PACKET3_TC_ACTION_ENA |<br>
+                         PACKET3_SH_KCACHE_ACTION_ENA |<br>
+                         PACKET3_SH_ICACHE_ACTION_ENA);  /* CP_COHER_CNTL */<br>
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */<br>
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */<br>
+       amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }<br>
+<br>
 static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {<br>
         .name = "gfx_v7_0",<br>
         .early_init = gfx_v7_0_early_init,<br>
@@ -5033,7 +5045,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {<br>
                 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */<br>
                 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */<br>
                 CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */<br>
-               3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/<br>
+               3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/<br>
+               5, /* SURFACE_SYNC */<br>
         .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */<br>
         .emit_ib = gfx_v7_0_ring_emit_ib_gfx,<br>
         .emit_fence = gfx_v7_0_ring_emit_fence_gfx, @@ -5048,6 +5061,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {<br>
         .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,<br>
         .emit_wreg = gfx_v7_0_ring_emit_wreg,<br>
         .soft_recovery = gfx_v7_0_ring_soft_recovery,<br>
+       .mem_sync = gfx_v7_0_mem_sync,<br>
 };<br>
 <br>
 static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
index fc32586..0b1d3a8 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c<br>
@@ -6815,6 +6815,19 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,<br>
         return 0;<br>
 }<br>
 <br>
+static void gfx_v8_0_mem_sync(struct amdgpu_ring *ring) {<br>
+       amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));<br>
+       amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |<br>
+                         PACKET3_TC_ACTION_ENA |<br>
+                         PACKET3_SH_KCACHE_ACTION_ENA |<br>
+                         PACKET3_SH_ICACHE_ACTION_ENA |<br>
+                         PACKET3_TC_WB_ACTION_ENA);  /* CP_COHER_CNTL */<br>
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */<br>
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE */<br>
+       amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ }<br>
+<br>
 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {<br>
         .name = "gfx_v8_0",<br>
         .early_init = gfx_v8_0_early_init,<br>
@@ -6861,7 +6874,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {<br>
                 3 + /* CNTX_CTRL */<br>
                 5 + /* HDP_INVL */<br>
                 12 + 12 + /* FENCE x2 */<br>
-               2, /* SWITCH_BUFFER */<br>
+               2 + /* SWITCH_BUFFER */<br>
+               5, /* SURFACE_SYNC */<br>
         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */<br>
         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,<br>
         .emit_fence = gfx_v8_0_ring_emit_fence_gfx, @@ -6879,6 +6893,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {<br>
         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,<br>
         .emit_wreg = gfx_v8_0_ring_emit_wreg,<br>
         .soft_recovery = gfx_v8_0_ring_soft_recovery,<br>
+       .mem_sync = gfx_v8_0_mem_sync,<br>
 };<br>
 <br>
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
index fb567cf..f851e80 100644<br>
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c<br>
@@ -6613,6 +6613,24 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,<br>
         return 0;<br>
 }<br>
 <br>
+static void gfx_v9_0_mem_sync(struct amdgpu_ring *ring) {<br>
+       unsigned cp_coher_cntl = PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |<br>
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |<br>
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |<br>
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |<br>
+                                PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);<br>
+<br>
+       /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */<br>
+       amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));<br>
+       amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */<br>
+       amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */<br>
+       amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */<br>
+       amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */<br>
+       amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */<br>
+       amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ }<br>
+<br>
 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {<br>
         .name = "gfx_v9_0",<br>
         .early_init = gfx_v9_0_early_init,<br>
@@ -6659,7 +6677,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {<br>
                 3 + /* CNTX_CTRL */<br>
                 5 + /* HDP_INVL */<br>
                 8 + 8 + /* FENCE x2 */<br>
-               2, /* SWITCH_BUFFER */<br>
+               2 + /* SWITCH_BUFFER */<br>
+               7, /* gfx_v9_0_mem_sync */<br>
         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */<br>
         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,<br>
         .emit_fence = gfx_v9_0_ring_emit_fence, @@ -6680,6 +6699,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {<br>
         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,<br>
         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,<br>
         .soft_recovery = gfx_v9_0_ring_soft_recovery,<br>
+       .mem_sync = gfx_v9_0_mem_sync,<br>
 };<br>
 <br>
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {<br>
--<br>
2.7.4<br>
<br>
</div>
</span></font></div>
</div>
</body>
</html>