[PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

Zhu, Changfeng Changfeng.Zhu at amd.com
Fri Oct 25 03:20:21 UTC 2019


Inline.


-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Tuikov, Luben
Sent: Friday, October 25, 2019 5:17 AM
To: amd-gfx at lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher at amd.com>; Pelloux-prayer, Pierre-eric <Pierre-eric.Pelloux-prayer at amd.com>; Tuikov, Luben <Luben.Tuikov at amd.com>; Koenig, Christian <Christian.Koenig at amd.com>
Subject: [PATCH] drm/amdgpu: GFX9, GFX10: GRBM requires 1-cycle delay

The GRBM interface is now capable of bursting 1-cycle op per register, a WRITE followed by another WRITE, or a WRITE followed by a READ--much faster than previous muti-cycle per completed-transaction interface. This causes a problem, whereby status registers requiring a read/write by hardware, have a 1-cycle delay, due to the register update having to go through GRBM interface.

This patch adds this delay.

A one cycle read op is added after updating the invalidate request and before reading the invalidate-ACK status.

See also commit
534991731cb5fa94b5519957646cf849ca10d17d.

Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++--  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 4 ++--  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 9 +++++++++  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 ++++++++  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index ac43b1af69e3..0042868dbd53 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5129,7 +5129,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 		5 + /* COND_EXEC */
 		7 + /* PIPELINE_SYNC */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
 		2 + /* VM_FLUSH */
 		8 + /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
@@ -5182,7 +5182,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 		5 + /* hdp invalidate */
 		7 + /* gfx_v10_0_ring_emit_pipeline_sync */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
 		2 + /* gfx_v10_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v10_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size =	7, /* gfx_v10_0_ring_emit_ib_compute */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9fe95e7693d5..9a7a717208de 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -6218,7 +6218,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 		5 +  /* COND_EXEC */
 		7 +  /* PIPELINE_SYNC */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
 		2 + /* VM_FLUSH */
 		8 +  /* FENCE for VM_FLUSH */
 		20 + /* GDS switch */
@@ -6271,7 +6271,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 		5 + /* hdp invalidate */
 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 * 2 +
 		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 6e1b25bd1fe7..100d526e9a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -346,6 +346,15 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 
 	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
 
+	/* Insert a dummy read to delay one cycle before the ACK
+	 * inquiry.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_SDMA ||
+	    ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
+	    ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+		amdgpu_ring_emit_reg_wait(ring,
+					  hub->vm_inv_eng0_req + eng, 0, 0);
+
 	/* wait for the invalidate to complete */
 	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
 				  1 << vmid, 1 << vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 9f2a893871ec..8f3097e45299 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -495,6 +495,14 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
+	/* Insert a dummy read to delay one cycle before the ACK
+	 * inquiry.
+	 */
+	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX  ||
+	    ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+		amdgpu_ring_emit_reg_wait(ring,
+					  hub->vm_inv_eng0_req + eng, 0, 0);
+


	Why do we add amdgpu_ring_emit_reg_wait here? There is no amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); before it like gmc10.
	In gmc9,amdgpu_ring_emit_wreg  and amdgpu_ring_emit_reg_wait  are called in amdgpu_ring_emit_reg_write_reg_wait.
	I think it may be more reasonable to add dummy amdgpu_ring_emit_reg_wait in amdgpu_ring_emit_reg_write_reg_wait.
	Besides, we should also think about the influence of SROV's patch:
	drm/amdgpu: Remove the sriov checking and add firmware checking



 	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
 					    hub->vm_inv_eng0_ack + eng,
 					    req, 1 << vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index b8fdb192f6d6..0c41b4fdc58b 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1588,7 +1588,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 		/* sdma_v5_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 	.emit_ib = sdma_v5_0_ring_emit_ib,
--
2.23.0.385.gbc12974a89

_______________________________________________
amd-gfx mailing list
amd-gfx at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list