[PATCH] drm/radeon: use an intervall tree to manage the VMA v2
Christian König
deathsimple at vodafone.de
Fri Jul 18 01:29:51 PDT 2014
From: Christian König <christian.koenig at amd.com>
Scales much better than scanning the address range linearly.
v2: store pfn instead of address
Signed-off-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/Kconfig | 1 +
drivers/gpu/drm/radeon/radeon.h | 7 ++-
drivers/gpu/drm/radeon/radeon_gem.c | 4 +-
drivers/gpu/drm/radeon/radeon_trace.h | 4 +-
drivers/gpu/drm/radeon/radeon_vm.c | 93 +++++++++++++++++------------------
5 files changed, 52 insertions(+), 57 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index f512004..9b2eedc 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -114,6 +114,7 @@ config DRM_RADEON
select POWER_SUPPLY
select HWMON
select BACKLIGHT_CLASS_DEVICE
+ select INTERVAL_TREE
help
Choose this option if you have an ATI Radeon graphics card. There
are both PCI and AGP versions. You don't need to choose this to
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 0ec7864..aa8721b 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -64,6 +64,7 @@
#include <linux/wait.h>
#include <linux/list.h>
#include <linux/kref.h>
+#include <linux/interval_tree.h>
#include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h>
@@ -444,14 +445,12 @@ struct radeon_mman {
struct radeon_bo_va {
/* protected by bo being reserved */
struct list_head bo_list;
- uint64_t soffset;
- uint64_t eoffset;
uint32_t flags;
uint64_t addr;
unsigned ref_count;
/* protected by vm mutex */
- struct list_head vm_list;
+ struct interval_tree_node it;
struct list_head vm_status;
/* constant after initialization */
@@ -868,7 +867,7 @@ struct radeon_vm_pt {
};
struct radeon_vm {
- struct list_head va;
+ struct rb_root va;
unsigned id;
/* BOs moved, but not yet updated in the PT */
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 07a13c9..30c5c7f 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -502,9 +502,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) {
case RADEON_VA_MAP:
- if (bo_va->soffset) {
+ if (bo_va->it.start) {
args->operation = RADEON_VA_RESULT_VA_EXIST;
- args->offset = bo_va->soffset;
+ args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
goto out;
}
r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h
index f749f2c..2ae4eaa 100644
--- a/drivers/gpu/drm/radeon/radeon_trace.h
+++ b/drivers/gpu/drm/radeon/radeon_trace.h
@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
),
TP_fast_assign(
- __entry->soffset = bo_va->soffset;
- __entry->eoffset = bo_va->eoffset;
+ __entry->soffset = bo_va->it.start;
+ __entry->eoffset = bo_va->it.last + 1;
__entry->flags = bo_va->flags;
),
TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index f1030fc..ab08a56 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -325,17 +325,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
}
bo_va->vm = vm;
bo_va->bo = bo;
- bo_va->soffset = 0;
- bo_va->eoffset = 0;
+ bo_va->it.start = 0;
+ bo_va->it.last = 0;
bo_va->flags = 0;
bo_va->addr = 0;
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list);
- INIT_LIST_HEAD(&bo_va->vm_list);
INIT_LIST_HEAD(&bo_va->vm_status);
mutex_lock(&vm->mutex);
- list_add(&bo_va->vm_list, &vm->va);
list_add_tail(&bo_va->bo_list, &bo->va);
mutex_unlock(&vm->mutex);
@@ -419,11 +417,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
uint32_t flags)
{
uint64_t size = radeon_bo_size(bo_va->bo);
- uint64_t eoffset, last_offset = 0;
struct radeon_vm *vm = bo_va->vm;
- struct radeon_bo_va *tmp;
- struct list_head *head;
unsigned last_pfn, pt_idx;
+ uint64_t eoffset;
int r;
if (soffset) {
@@ -445,47 +441,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
}
mutex_lock(&vm->mutex);
- head = &vm->va;
- last_offset = 0;
- list_for_each_entry(tmp, &vm->va, vm_list) {
- if (bo_va == tmp) {
- /* skip over currently modified bo */
- continue;
+ if (bo_va->it.start || bo_va->it.last) {
+ if (bo_va->addr) {
+ /* add a clone of the bo_va to clear the old address */
+ struct radeon_bo_va *tmp;
+ tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+ tmp->it.start = bo_va->it.start;
+ tmp->it.last = bo_va->it.last;
+ tmp->vm = vm;
+ tmp->addr = bo_va->addr;
+ list_add(&tmp->vm_status, &vm->freed);
}
- if (soffset >= last_offset && eoffset <= tmp->soffset) {
- /* bo can be added before this one */
- break;
- }
- if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
+ interval_tree_remove(&bo_va->it, &vm->va);
+ bo_va->it.start = 0;
+ bo_va->it.last = 0;
+ }
+
+ soffset /= RADEON_GPU_PAGE_SIZE;
+ eoffset /= RADEON_GPU_PAGE_SIZE;
+ if (soffset || eoffset) {
+ struct interval_tree_node *it;
+ it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
+ if (it) {
+ struct radeon_bo_va *tmp;
+ tmp = container_of(it, struct radeon_bo_va, it);
/* bo and tmp overlap, invalid offset */
- dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
- bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
- (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
+ dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
+ "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
+ soffset, tmp->bo, tmp->it.start, tmp->it.last);
mutex_unlock(&vm->mutex);
return -EINVAL;
}
- last_offset = tmp->eoffset;
- head = &tmp->vm_list;
- }
-
- if (bo_va->soffset) {
- /* add a clone of the bo_va to clear the old address */
- tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
- tmp->soffset = bo_va->soffset;
- tmp->eoffset = bo_va->eoffset;
- tmp->vm = vm;
- list_add(&tmp->vm_status, &vm->freed);
+ bo_va->it.start = soffset;
+ bo_va->it.last = eoffset - 1;
+ interval_tree_insert(&bo_va->it, &vm->va);
}
- bo_va->soffset = soffset;
- bo_va->eoffset = eoffset;
bo_va->flags = flags;
bo_va->addr = 0;
- list_move(&bo_va->vm_list, head);
- soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
- eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+ soffset >>= radeon_vm_block_size;
+ eoffset >>= radeon_vm_block_size;
BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
@@ -773,9 +770,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
unsigned count = 0;
uint64_t addr;
- start = start / RADEON_GPU_PAGE_SIZE;
- end = end / RADEON_GPU_PAGE_SIZE;
-
/* walk over the address space and update the page tables */
for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> radeon_vm_block_size;
@@ -842,7 +836,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
uint64_t addr;
int r;
- if (!bo_va->soffset) {
+ if (!bo_va->it.start) {
dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
bo_va->bo, vm);
return -EINVAL;
@@ -872,7 +866,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
trace_radeon_vm_bo_update(bo_va);
- nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE;
+ nptes = bo_va->it.last - bo_va->it.start + 1;
/* padding, etc. */
ndw = 64;
@@ -897,8 +891,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
return r;
ib.length_dw = 0;
- radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
- addr, radeon_vm_page_flags(bo_va->flags));
+ radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+ bo_va->it.last + 1, addr,
+ radeon_vm_page_flags(bo_va->flags));
radeon_semaphore_sync_to(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL);
@@ -984,7 +979,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
list_del(&bo_va->bo_list);
mutex_lock(&vm->mutex);
- list_del(&bo_va->vm_list);
+ interval_tree_remove(&bo_va->it, &vm->va);
list_del(&bo_va->vm_status);
if (bo_va->addr) {
@@ -1041,7 +1036,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
vm->last_flush = NULL;
vm->last_id_use = NULL;
mutex_init(&vm->mutex);
- INIT_LIST_HEAD(&vm->va);
+ vm->va = RB_ROOT;
INIT_LIST_HEAD(&vm->invalidated);;
INIT_LIST_HEAD(&vm->freed);
@@ -1086,11 +1081,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
struct radeon_bo_va *bo_va, *tmp;
int i, r;
- if (!list_empty(&vm->va)) {
+ if (!RB_EMPTY_ROOT(&vm->va)) {
dev_err(rdev->dev, "still active bo inside vm\n");
}
- list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
- list_del_init(&bo_va->vm_list);
+ rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
+ interval_tree_remove(&bo_va->it, &vm->va);
r = radeon_bo_reserve(bo_va->bo, false);
if (!r) {
list_del_init(&bo_va->bo_list);
--
1.9.1
More information about the dri-devel
mailing list