[PATCH 06/11] drm/radeon: make VM flushs a ring operation

Christian König deathsimple at vodafone.de
Mon Aug 20 01:08:18 PDT 2012


Move flushing the VMs as function into the rings.
First step to make VM operations async.

Signed-off-by: Christian König <deathsimple at vodafone.de>
Reviewed-by: Jerome Glisse <jglisse at redhat.com>
---
 drivers/gpu/drm/radeon/ni.c          |   31 ++++++++++++++++---------------
 drivers/gpu/drm/radeon/radeon.h      |    6 ++++--
 drivers/gpu/drm/radeon/radeon_asic.c |   12 +++++++++---
 drivers/gpu/drm/radeon/radeon_asic.h |    3 +--
 drivers/gpu/drm/radeon/radeon_cs.c   |    1 +
 drivers/gpu/drm/radeon/radeon_gart.c |    4 +++-
 drivers/gpu/drm/radeon/radeon_ring.c |    8 ++++++++
 drivers/gpu/drm/radeon/si.c          |   15 ---------------
 8 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 3b1aab3..ad337e8 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1494,24 +1494,9 @@ int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0);
 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn);
 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
-	/* flush hdp cache */
-	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-	/* bits 0-7 are the VM contexts0-7 */
-	WREG32(VM_INVALIDATE_REQUEST, 1 << id);
 	return 0;
 }
 
-void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-	if (vm->id == -1)
-		return;
-
-	/* flush hdp cache */
-	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-	/* bits 0-7 are the VM contexts0-7 */
-	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
-}
-
 #define R600_PTE_VALID     (1 << 0)
 #define R600_PTE_SYSTEM    (1 << 1)
 #define R600_PTE_SNOOPED   (1 << 2)
@@ -1543,3 +1528,19 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
 	addr |= flags;
 	writeq(addr, ptr + (pfn * 8));
 }
+
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	if (!ib->vm || ib->vm->id == -1)
+		return;
+
+	/* flush hdp cache */
+	radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	radeon_ring_write(ring, 0x1);
+
+	/* bits 0-7 are the VM contexts0-7 */
+	radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
+	radeon_ring_write(ring, 1 << ib->vm->id);
+}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index aeb2d1f..1228778 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -658,6 +658,8 @@ struct radeon_vm {
 	struct mutex			mutex;
 	/* last fence for cs using this vm */
 	struct radeon_fence		*fence;
+	/* last flush or NULL if we still need to flush */
+	struct radeon_fence		*last_flush;
 };
 
 struct radeon_vm_manager {
@@ -1135,7 +1137,6 @@ struct radeon_asic {
 		int (*init)(struct radeon_device *rdev);
 		void (*fini)(struct radeon_device *rdev);
 		int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id);
-		void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm);
 		uint32_t (*page_flags)(struct radeon_device *rdev,
 				       struct radeon_vm *vm,
 				       uint32_t flags);
@@ -1154,6 +1155,7 @@ struct radeon_asic {
 		int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
 		int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
 		bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
+		void (*vm_flush)(struct radeon_device *rdev, struct radeon_ib *ib);
 	} ring[RADEON_NUM_RINGS];
 	/* irqs */
 	struct {
@@ -1696,7 +1698,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
 #define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id))
-#define radeon_asic_vm_tlb_flush(rdev, v) (rdev)->asic->vm.tlb_flush((rdev), (v))
 #define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags))
 #define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags))
 #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
@@ -1705,6 +1706,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
 #define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
 #define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp))
+#define radeon_ring_vm_flush(rdev, r, ib) (rdev)->asic->ring[(r)].vm_flush((rdev), (ib))
 #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 9d5f4f3..b44ab96 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1360,7 +1360,6 @@ static struct radeon_asic cayman_asic = {
 		.init = &cayman_vm_init,
 		.fini = &cayman_vm_fini,
 		.bind = &cayman_vm_bind,
-		.tlb_flush = &cayman_vm_tlb_flush,
 		.page_flags = &cayman_vm_page_flags,
 		.set_page = &cayman_vm_set_page,
 	},
@@ -1374,6 +1373,7 @@ static struct radeon_asic cayman_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		},
 		[CAYMAN_RING_TYPE_CP1_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1384,6 +1384,7 @@ static struct radeon_asic cayman_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		},
 		[CAYMAN_RING_TYPE_CP2_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1394,6 +1395,7 @@ static struct radeon_asic cayman_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		}
 	},
 	.irq = {
@@ -1462,7 +1464,6 @@ static struct radeon_asic trinity_asic = {
 		.init = &cayman_vm_init,
 		.fini = &cayman_vm_fini,
 		.bind = &cayman_vm_bind,
-		.tlb_flush = &cayman_vm_tlb_flush,
 		.page_flags = &cayman_vm_page_flags,
 		.set_page = &cayman_vm_set_page,
 	},
@@ -1476,6 +1477,7 @@ static struct radeon_asic trinity_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		},
 		[CAYMAN_RING_TYPE_CP1_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1486,6 +1488,7 @@ static struct radeon_asic trinity_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		},
 		[CAYMAN_RING_TYPE_CP2_INDEX] = {
 			.ib_execute = &cayman_ring_ib_execute,
@@ -1496,6 +1499,7 @@ static struct radeon_asic trinity_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &evergreen_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		}
 	},
 	.irq = {
@@ -1564,7 +1568,6 @@ static struct radeon_asic si_asic = {
 		.init = &si_vm_init,
 		.fini = &si_vm_fini,
 		.bind = &si_vm_bind,
-		.tlb_flush = &si_vm_tlb_flush,
 		.page_flags = &cayman_vm_page_flags,
 		.set_page = &cayman_vm_set_page,
 	},
@@ -1578,6 +1581,7 @@ static struct radeon_asic si_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &si_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		},
 		[CAYMAN_RING_TYPE_CP1_INDEX] = {
 			.ib_execute = &si_ring_ib_execute,
@@ -1588,6 +1592,7 @@ static struct radeon_asic si_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &si_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		},
 		[CAYMAN_RING_TYPE_CP2_INDEX] = {
 			.ib_execute = &si_ring_ib_execute,
@@ -1598,6 +1603,7 @@ static struct radeon_asic si_asic = {
 			.ring_test = &r600_ring_test,
 			.ib_test = &r600_ib_test,
 			.is_lockup = &si_gpu_is_lockup,
+			.vm_flush = &cayman_vm_flush,
 		}
 	},
 	.irq = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index f4af243..87466d3 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -440,7 +440,7 @@ int cayman_vm_init(struct radeon_device *rdev);
 void cayman_vm_fini(struct radeon_device *rdev);
 int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
 void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
-void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm);
+void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
 uint32_t cayman_vm_page_flags(struct radeon_device *rdev,
 			      struct radeon_vm *vm,
 			      uint32_t flags);
@@ -470,7 +470,6 @@ int si_vm_init(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
 int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
 void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
-void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 85a80e4..d4a804b 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -484,6 +484,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 		goto out;
 	}
 	radeon_cs_sync_rings(parser);
+	radeon_cs_sync_to(parser, vm->last_flush);
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 18a03ab..a6a3dca 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -520,6 +520,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 		break;
 	}
 	radeon_fence_unref(&vm->fence);
+	radeon_fence_unref(&vm->last_flush);
 
 	/* hw unbind */
 	rdev->vm_manager.use_bitmap &= ~(1 << vm->id);
@@ -639,6 +640,7 @@ retry_id:
 
 	/* do hw bind */
 	r = radeon_asic_vm_bind(rdev, vm, id);
+	radeon_fence_unref(&vm->last_flush);
 	if (r) {
 		radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
 		return r;
@@ -836,7 +838,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 		}
 		radeon_asic_vm_set_page(rdev, bo_va->vm, i + pfn, addr, flags);
 	}
-	radeon_asic_vm_tlb_flush(rdev, bo_va->vm);
+	radeon_fence_unref(&vm->last_flush);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 9ab3a3c..ceb334b 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -160,6 +160,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 	if (!need_sync) {
 		radeon_semaphore_free(rdev, &ib->semaphore, NULL);
 	}
+	/* if we can't remember our last VM flush then flush now! */
+	if (ib->vm && !ib->vm->last_flush) {
+		radeon_ring_vm_flush(rdev, ib->ring, ib);
+	}
 	if (const_ib) {
 		radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
 		radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
@@ -174,6 +178,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 	if (const_ib) {
 		const_ib->fence = radeon_fence_ref(ib->fence);
 	}
+	/* we just flushed the VM, remember that */
+	if (ib->vm && !ib->vm->last_flush) {
+		ib->vm->last_flush = radeon_fence_ref(ib->fence);
+	}
 	radeon_ring_unlock_commit(rdev, ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 40a9a85..7ef16d6 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2788,24 +2788,9 @@ int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
 	else
 		WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
 		       vm->pt_gpu_addr >> 12);
-	/* flush hdp cache */
-	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-	/* bits 0-15 are the VM contexts0-15 */
-	WREG32(VM_INVALIDATE_REQUEST, 1 << id);
 	return 0;
 }
 
-void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-	if (vm->id == -1)
-		return;
-
-	/* flush hdp cache */
-	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
-	/* bits 0-15 are the VM contexts0-15 */
-	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
-}
-
 /*
  * RLC
  */
-- 
1.7.9.5



More information about the dri-devel mailing list