[PATCH 2/2] drm/amdgpu: invalidate semphore mmhub workaround for gfx9/gfx10

Changfeng.Zhu changfeng.zhu at amd.com
Thu Nov 14 10:17:05 UTC 2019


From: changzhu <Changfeng.Zhu at amd.com>

MMHUB may lose GPUVM invalidate acknowledge state across power-gating off
cycle when it does invalidation req/ack work.

So we must acquire/release one of the vm_invalidate_eng*_sem around the
invalidation req/ack.

Besides, vm_invalidate_eng*_sem will be read-only after acquire it. So
it may cause dead lock when one process acquires vm_invalidate_eng*_sem
and another process acquires the same vm_invalidate_eng*_sem
immediately.

In case of dead lock, it needs to add spinlock when doing invalidation
req/ack.

Change-Id: Ica63593e1dc26444ac9c05cced0988515082def3
Signed-off-by: changzhu <Changfeng.Zhu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 60 ++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 90 +++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c |  8 ++-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c  |  8 ++-
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c  | 12 +++-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c  | 12 +++-
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c  | 12 +++-
 8 files changed, 190 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index af2615ba52aa..b7948c63ad0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -29,6 +29,7 @@
 #include "hdp/hdp_5_0_0_sh_mask.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "mmhub/mmhub_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_offset.h"
 #include "dcn/dcn_2_0_0_sh_mask.h"
 #include "oss/osssys_5_0_0_offset.h"
@@ -232,7 +233,30 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 	u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
 	/* Use register 17 for GART */
 	const unsigned eng = 17;
-	unsigned int i;
+	unsigned int i, j;
+	uint32_t vm_inv_eng0_sem = SOC15_REG_OFFSET(MMHUB, 0,
+						    mmMMVM_INVALIDATE_ENG0_SEM);
+
+	spin_lock(&adev->gmc.invalidate_lock);
+
+	/*
+	 * mmhub loses gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid mmhub entering power gated
+	 * state to WA the Issue
+	 */
+	if (vmhub == AMDGPU_MMHUB_0 || vmhub == AMDGPU_MMHUB_1) {
+		for (j = 0; j < adev->usec_timeout; j++) {
+			/* a read return value of 1 means semaphore acuqire */
+			tmp = RREG32_NO_KIQ(vm_inv_eng0_sem + eng);
+			if (tmp & 0x1)
+				break;
+			udelay(1);
+		}
+
+	if (j >= adev->usec_timeout)
+		DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+	}
 
 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
@@ -253,6 +277,15 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
 		udelay(1);
 	}
 
+	/*
+	 * add semaphore release after invalidation,
+	 * write with 0 means semaphore release
+	 */
+	if (vmhub == AMDGPU_MMHUB_0 || vmhub == AMDGPU_MMHUB_1)
+		WREG32_NO_KIQ(vm_inv_eng0_sem + eng, 0);
+
+	spin_unlock(&adev->gmc.invalidate_lock);
+
 	if (i < adev->usec_timeout)
 		return;
 
@@ -334,9 +367,26 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					     unsigned vmid, uint64_t pd_addr)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
 	unsigned eng = ring->vm_inv_eng;
+	uint32_t vm_inv_eng0_sem = SOC15_REG_OFFSET(MMHUB, 0,
+						    mmMMVM_INVALIDATE_ENG0_SEM);
+
+	spin_lock(&adev->gmc.invalidate_lock);
+
+	/*
+	 * mmhub loses gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid mmhub entering power gated
+	 * state to WA the Issue
+	 */
+
+	if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || ring->funcs->vmhub == AMDGPU_MMHUB_1)
+		/* a read return value of 1 means semaphore acuqire */
+		amdgpu_ring_emit_reg_wait(ring,
+					  vm_inv_eng0_sem + eng, 0x1, 0x1);
 
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
 			      lower_32_bits(pd_addr));
@@ -347,6 +397,14 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
 					    hub->vm_inv_eng0_ack + eng,
 					    req, 1 << vmid);
+	/*
+	 * add semaphore release after invalidation,
+	 * write with 0 means semaphore release
+	 */
+	if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || ring->funcs->vmhub == AMDGPU_MMHUB_1)
+		amdgpu_ring_emit_wreg(ring, vm_inv_eng0_sem + eng, 0);
+
+	spin_unlock(&adev->gmc.invalidate_lock);
 
 	return pd_addr;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e8e1e6e86e77..9291d1af743e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -34,6 +34,7 @@
 #include "dce/dce_12_0_sh_mask.h"
 #include "vega10_enum.h"
 #include "mmhub/mmhub_1_0_offset.h"
+#include "mmhub/mmhub_2_0_0_offset.h"
 #include "athub/athub_1_0_offset.h"
 #include "oss/osssys_4_0_offset.h"
 
@@ -432,8 +433,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 					uint32_t vmhub, uint32_t flush_type)
 {
 	const unsigned eng = 17;
-	u32 j, tmp;
+	u32 i, j, tmp;
 	struct amdgpu_vmhub *hub;
+	int32_t vm_inv_eng0_sem = SOC15_REG_OFFSET(MMHUB, 0,
+						   mmMMVM_INVALIDATE_ENG0_SEM);
 
 	BUG_ON(vmhub >= adev->num_vmhubs);
 
@@ -449,12 +452,63 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 		uint32_t req = hub->vm_inv_eng0_req + eng;
 		uint32_t ack = hub->vm_inv_eng0_ack + eng;
 
+	spin_lock(&adev->gmc.invalidate_lock);
+
+	/*
+	 * mmhub loses gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid mmhub entering power gated
+	 * state to WA the Issue
+	 */
+	if (vmhub == AMDGPU_MMHUB_0 || vmhub == AMDGPU_MMHUB_1) {
+		for (i = 0; i < adev->usec_timeout; i++) {
+			/* a read return value of 1 means semaphore acuqire */
+			tmp = RREG32_NO_KIQ(vm_inv_eng0_sem + eng);
+			if (tmp & 0x1)
+				break;
+			udelay(1);
+		}
+
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+	}
+
 		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
 				1 << vmid);
+
+	/*
+	 * add semaphore release after invalidation,
+	 * write with 0 means semaphore release
+	 */
+	if (vmhub == AMDGPU_MMHUB_0 || vmhub == AMDGPU_MMHUB_1)
+		WREG32_NO_KIQ(vm_inv_eng0_sem + eng, 0);
+
+	spin_unlock(&adev->gmc.invalidate_lock);
+
 		return;
 	}
 
 	spin_lock(&adev->gmc.invalidate_lock);
+
+	/*
+	 * mmhub loses gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid mmhub entering power gated
+	 * state to WA the Issue
+	 */
+	if (vmhub == AMDGPU_MMHUB_0 || vmhub == AMDGPU_MMHUB_1) {
+		for (i = 0; i < adev->usec_timeout; i++) {
+			/* a read return value of 1 means semaphore acuqire */
+			tmp = RREG32_NO_KIQ(vm_inv_eng0_sem + eng);
+			if (tmp & 0x1)
+				break;
+			udelay(1);
+		}
+
+	if (i >= adev->usec_timeout)
+		DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
+	}
+
 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
 	/*
@@ -470,7 +524,16 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 			break;
 		udelay(1);
 	}
+
+	/*
+	 * add semaphore release after invalidation,
+	 * write with 0 means semaphore release
+	 */
+	if (vmhub == AMDGPU_MMHUB_0 || vmhub == AMDGPU_MMHUB_1)
+		WREG32_NO_KIQ(vm_inv_eng0_sem + eng, 0);
+
 	spin_unlock(&adev->gmc.invalidate_lock);
+
 	if (j < adev->usec_timeout)
 		return;
 
@@ -484,6 +547,22 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
 	uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
 	unsigned eng = ring->vm_inv_eng;
+	uint32_t vm_inv_eng0_sem = SOC15_REG_OFFSET(MMHUB, 0,
+						    mmMMVM_INVALIDATE_ENG0_SEM);
+
+	spin_lock(&adev->gmc.invalidate_lock);
+
+	/*
+	 * mmhub loses gpuvm invalidate acknowldege state across power-gating
+	 * off cycle, add semaphore acquire before invalidation and semaphore
+	 * release after invalidation to avoid mmhub entering power gated
+	 * state to WA the Issue
+	 */
+
+	if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || ring->funcs->vmhub == AMDGPU_MMHUB_1)
+		/* a read return value of 1 means semaphore acuqire */
+		amdgpu_ring_emit_reg_wait(ring,
+					  vm_inv_eng0_sem + eng, 0x1, 0x1);
 
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
 			      lower_32_bits(pd_addr));
@@ -495,6 +574,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 					    hub->vm_inv_eng0_ack + eng,
 					    req, 1 << vmid);
 
+	/*
+	 * add semaphore release after invalidation,
+	 * write with 0 means semaphore release
+	 */
+	if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || ring->funcs->vmhub == AMDGPU_MMHUB_1)
+		amdgpu_ring_emit_wreg(ring, vm_inv_eng0_sem + eng, 0);
+
+	spin_unlock(&adev->gmc.invalidate_lock);
+
 	return pd_addr;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 482d3e224067..cba8bf958fc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2210,7 +2210,9 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
 		/* sdma_v4_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
-		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+		10 + 10 + 10 + /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+		2 * (3 + 6), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
 	.emit_ib = sdma_v4_0_ring_emit_ib,
 	.emit_fence = sdma_v4_0_ring_emit_fence,
@@ -2242,7 +2244,9 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = {
 		/* sdma_v4_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
-		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+		10 + 10 + 10 + /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+		2 * (3 + 6), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
 	.emit_ib = sdma_v4_0_ring_emit_ib,
 	.emit_fence = sdma_v4_0_ring_emit_fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 8c2b31d4017e..0bf4388d8975 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -1784,7 +1784,9 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* uvd_v7_0_ring_emit_vm_flush */
-		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
+		14 + 14 + /* uvd_v7_0_ring_emit_fence x2 vm fence */
+		2 * (6 + 8), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
 	.emit_ib = uvd_v7_0_ring_emit_ib,
 	.emit_fence = uvd_v7_0_ring_emit_fence,
@@ -1817,7 +1819,9 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
 		4 + /* uvd_v7_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
-		1, /* uvd_v7_0_enc_ring_insert_end */
+		1 + /* uvd_v7_0_enc_ring_insert_end */
+		2 * (3 + 4), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
 	.emit_ib = uvd_v7_0_enc_ring_emit_ib,
 	.emit_fence = uvd_v7_0_enc_ring_emit_fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 93d80ef17685..69abb9bfa6bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1080,7 +1080,9 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
 		4 + /* vce_v4_0_emit_vm_flush */
 		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
-		1, /* vce_v4_0_ring_insert_end */
+		1 + /* vce_v4_0_ring_insert_end */
+		2 * (3 + 4), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
 	.emit_ib = vce_v4_0_ring_emit_ib,
 	.emit_vm_flush = vce_v4_0_emit_vm_flush,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index b23362102e51..23270deea788 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -2209,7 +2209,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* vcn_v1_0_dec_ring_emit_vm_flush */
 		14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */
-		6,
+		6 +
+		2 * (6 + 8), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */
 	.emit_ib = vcn_v1_0_dec_ring_emit_ib,
 	.emit_fence = vcn_v1_0_dec_ring_emit_fence,
@@ -2242,7 +2244,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
 		4 + /* vcn_v1_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */
-		1, /* vcn_v1_0_enc_ring_insert_end */
+		1 + /* vcn_v1_0_enc_ring_insert_end */
+		2 * (3 + 4), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */
 	.emit_ib = vcn_v1_0_enc_ring_emit_ib,
 	.emit_fence = vcn_v1_0_enc_ring_emit_fence,
@@ -2276,7 +2280,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */
 		26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */
-		6,
+		6 +
+		2 * (6 + 8), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */
 	.emit_ib = vcn_v1_0_jpeg_ring_emit_ib,
 	.emit_fence = vcn_v1_0_jpeg_ring_emit_fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 16f192f6c967..83cc0da37cb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -2171,7 +2171,9 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
 		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
-		6,
+		6 +
+		2 * (6 + 8), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
 	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
 	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
@@ -2202,7 +2204,9 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
-		1, /* vcn_v2_0_enc_ring_insert_end */
+		1 + /* vcn_v2_0_enc_ring_insert_end */
+		2 * (3 + 4), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
@@ -2231,7 +2235,9 @@ static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
 		18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
-		8 + 16,
+		8 + 16 +
+		2 * (6 + 8), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
 	.emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
 	.emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index ff6cc77ad0b0..82a190185bba 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -1008,7 +1008,9 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
 		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
-		6,
+		6 +
+		2 * (6 + 8), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
 	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
 	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
@@ -1108,7 +1110,9 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
 		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
 		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
-		1, /* vcn_v2_0_enc_ring_insert_end */
+		1 + /* vcn_v2_0_enc_ring_insert_end */
+		2 * (3 + 4), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
 	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
 	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
@@ -1187,7 +1191,9 @@ static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
 		18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
-		8 + 16,
+		8 + 16 +
+		2 * (6 + 8), /* semaphore acquire/release WA for mmhub invalidation
+			      * adds more packets in gmc flush */
 	.emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
 	.emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
 	.emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
-- 
2.17.1



More information about the amd-gfx mailing list