[PATCH 11/11] drm/radeon: make page table updates async v2

Christian König deathsimple at vodafone.de
Mon Aug 20 01:08:23 PDT 2012


Currently doing the update with the CP.

v2: Rebased on Jeromes bugfix. Make validity comparison
    more human readable.

Signed-off-by: Christian König <deathsimple at vodafone.de>
---
 drivers/gpu/drm/radeon/ni.c          |   20 ++++++----
 drivers/gpu/drm/radeon/nid.h         |    1 +
 drivers/gpu/drm/radeon/radeon.h      |    2 +
 drivers/gpu/drm/radeon/radeon_asic.c |    3 ++
 drivers/gpu/drm/radeon/radeon_gart.c |   70 +++++++++++++++++++++++++---------
 5 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 1fd2e41..78d9cfb 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1513,20 +1513,24 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
 			unsigned pfn, struct ttm_mem_reg *mem,
 			unsigned npages, uint32_t flags)
 {
-	void __iomem *ptr = (void *)vm->pt;
-	uint64_t addr;
+	struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
+	uint64_t addr, pt = vm->pt_gpu_addr + pfn * 8;
 	int i;
 
 	addr = flags = cayman_vm_page_flags(rdev, flags);
 
-        for (i = 0; i < npages; ++i, ++pfn) {
-                if (mem) {
-                        addr = radeon_vm_get_addr(rdev, mem, i);
+	radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, 1 + npages * 2));
+	radeon_ring_write(ring, pt & 0xffffffff);
+	radeon_ring_write(ring, (pt >> 32) & 0xff);
+	for (i = 0; i < npages; ++i) {
+		if (mem) {
+			addr = radeon_vm_get_addr(rdev, mem, i);
 			addr = addr & 0xFFFFFFFFFFFFF000ULL;
 			addr |= flags;
-                }
-		writeq(addr, ptr + (pfn * 8));
-        }
+		}
+		radeon_ring_write(ring, addr & 0xffffffff);
+		radeon_ring_write(ring, (addr >> 32) & 0xffffffff);
+	}
 }
 
 void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 870db34..2423d1b 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -585,6 +585,7 @@
 #define	PACKET3_SET_CONTEXT_REG_INDIRECT		0x73
 #define	PACKET3_SET_RESOURCE_INDIRECT			0x74
 #define	PACKET3_SET_APPEND_CNT			        0x75
+#define	PACKET3_ME_WRITE				0x7A
 
 #endif
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7d37cb2..3de0f08 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1154,6 +1154,8 @@ struct radeon_asic {
 	struct {
 		int (*init)(struct radeon_device *rdev);
 		void (*fini)(struct radeon_device *rdev);
+
+		u32 pt_ring_index;
 		void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm,
 				 unsigned pfn, struct ttm_mem_reg *mem,
 				 unsigned npages, uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 4b99a24..d0b4e50 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1359,6 +1359,7 @@ static struct radeon_asic cayman_asic = {
 	.vm = {
 		.init = &cayman_vm_init,
 		.fini = &cayman_vm_fini,
+		.pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.set_page = &cayman_vm_set_page,
 	},
 	.ring = {
@@ -1461,6 +1462,7 @@ static struct radeon_asic trinity_asic = {
 	.vm = {
 		.init = &cayman_vm_init,
 		.fini = &cayman_vm_fini,
+		.pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.set_page = &cayman_vm_set_page,
 	},
 	.ring = {
@@ -1563,6 +1565,7 @@ static struct radeon_asic si_asic = {
 	.vm = {
 		.init = &si_vm_init,
 		.fini = &si_vm_fini,
+		.pt_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.set_page = &cayman_vm_set_page,
 	},
 	.ring = {
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index bf5378b..3ef6698 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -464,15 +464,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
 			continue;
 
 		list_for_each_entry(bo_va, &vm->va, vm_list) {
-			struct ttm_mem_reg *mem = NULL;
-			if (bo_va->valid)
-				mem = &bo_va->bo->tbo.mem;
-
 			bo_va->valid = false;
-			r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem);
-			if (r) {
-				DRM_ERROR("Failed to update pte for vm %d!\n", vm->id);
-			}
 		}
 	}
 	return 0;
@@ -801,7 +793,6 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev,
 	return addr;
 }
 
-/* object have to be reserved & global and local mutex must be locked */
 /**
  * radeon_vm_bo_update_pte - map a bo into the vm page table
  *
@@ -812,15 +803,21 @@ u64 radeon_vm_get_addr(struct radeon_device *rdev,
  *
  * Fill in the page table entries for @bo (cayman+).
  * Returns 0 for success, -EINVAL for failure.
+ *
+ * Object have to be reserved & global and local mutex must be locked!
  */
 int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 			    struct radeon_vm *vm,
 			    struct radeon_bo *bo,
 			    struct ttm_mem_reg *mem)
 {
+	unsigned ridx = rdev->asic->vm.pt_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ridx];
+	struct radeon_semaphore *sem = NULL;
 	struct radeon_bo_va *bo_va;
-	unsigned ngpu_pages;
+	unsigned ngpu_pages, ndw;
 	uint64_t pfn;
+	int r;
 
 	/* nothing to do if vm isn't bound */
 	if (vm->sa_bo == NULL)
@@ -832,7 +829,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 		return -EINVAL;
 	}
 
-	if (bo_va->valid && mem)
+	if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
 		return 0;
 
 	ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -846,12 +843,50 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 		if (mem->mem_type == TTM_PL_TT) {
 			bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
 		}
-	}
-	if (!bo_va->valid) {
-		mem = NULL;
+		if (!bo_va->valid) {
+			mem = NULL;
+		}
+	} else {
+		bo_va->valid = false;
 	}
 	pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE;
-	radeon_asic_vm_set_page(rdev, bo_va->vm, pfn, mem, ngpu_pages, bo_va->flags);
+
+	if (vm->fence && radeon_fence_signaled(vm->fence)) {
+		radeon_fence_unref(&vm->fence);
+	}
+
+	if (vm->fence && vm->fence->ring != ridx) {
+		r = radeon_semaphore_create(rdev, &sem);
+		if (r) {
+			return r;
+		}
+	}
+
+	/* estimate number of dw needed */
+	ndw = 32;
+	ndw += (ngpu_pages >> 12) * 3;
+	ndw += ngpu_pages * 2;
+
+	r = radeon_ring_lock(rdev, ring, ndw);
+	if (r) {
+		return r;
+	}
+
+	if (sem && radeon_fence_need_sync(vm->fence, ridx)) {
+		radeon_semaphore_sync_rings(rdev, sem, vm->fence->ring, ridx);
+		radeon_fence_note_sync(vm->fence, ridx);
+	}
+
+	radeon_asic_vm_set_page(rdev, vm, pfn, mem, ngpu_pages, bo_va->flags);
+
+	radeon_fence_unref(&vm->fence);
+	r = radeon_fence_emit(rdev, &vm->fence, ridx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, vm->fence);
 	radeon_fence_unref(&vm->last_flush);
 	return 0;
 }
@@ -875,6 +910,7 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 		     struct radeon_bo *bo)
 {
 	struct radeon_bo_va *bo_va;
+	int r;
 
 	bo_va = radeon_bo_va(bo, vm);
 	if (bo_va == NULL)
@@ -882,14 +918,14 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 
 	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
-	radeon_vm_free_pt(rdev, vm);
+	r = radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
 	mutex_unlock(&rdev->vm_manager.lock);
 	list_del(&bo_va->vm_list);
 	mutex_unlock(&vm->mutex);
 	list_del(&bo_va->bo_list);
 
 	kfree(bo_va);
-	return 0;
+	return r;
 }
 
 /**
-- 
1.7.9.5



More information about the dri-devel mailing list