[PATCH 07/11] drm/radeon: rework VMID handling
Christian König
deathsimple at vodafone.de
Mon Aug 20 01:08:19 PDT 2012
Move binding onto the ring, simplifying handling a bit.
Signed-off-by: Christian König <deathsimple at vodafone.de>
Reviewed-by: Jerome Glisse <jglisse at redhat.com>
---
drivers/gpu/drm/radeon/ni.c | 20 ++---
drivers/gpu/drm/radeon/radeon.h | 30 +++++++-
drivers/gpu/drm/radeon/radeon_asic.c | 9 +--
drivers/gpu/drm/radeon/radeon_asic.h | 4 +-
drivers/gpu/drm/radeon/radeon_cs.c | 9 +--
drivers/gpu/drm/radeon/radeon_device.c | 1 -
drivers/gpu/drm/radeon/radeon_gart.c | 126 +++++++++++++++++++++-----------
drivers/gpu/drm/radeon/si.c | 30 ++++++--
8 files changed, 153 insertions(+), 76 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index ad337e8..88d5713 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1489,14 +1489,6 @@ void cayman_vm_fini(struct radeon_device *rdev)
{
}
-int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
-{
- WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0);
- WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn);
- WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
- return 0;
-}
-
#define R600_PTE_VALID (1 << 0)
#define R600_PTE_SYSTEM (1 << 1)
#define R600_PTE_SNOOPED (1 << 2)
@@ -1532,10 +1524,20 @@ void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
+ struct radeon_vm *vm = ib->vm;
- if (!ib->vm || ib->vm->id == -1)
+ if (vm == NULL)
return;
+ radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0));
+ radeon_ring_write(ring, 0);
+
+ radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0));
+ radeon_ring_write(ring, vm->last_pfn);
+
+ radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
+ radeon_ring_write(ring, vm->pt_gpu_addr >> 12);
+
/* flush hdp cache */
radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
radeon_ring_write(ring, 0x1);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 1228778..e0c6673 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -268,6 +268,22 @@ static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
}
}
+static inline bool radeon_fence_is_earlier(struct radeon_fence *a,
+ struct radeon_fence *b)
+{
+ if (!a) {
+ return false;
+ }
+
+ if (!b) {
+ return true;
+ }
+
+ BUG_ON(a->ring != b->ring);
+
+ return a->seq < b->seq;
+}
+
/*
* Tiling registers
*/
@@ -647,10 +663,13 @@ struct radeon_ring {
/*
* VM
*/
+
+#define RADEON_NUM_VM 16
+
struct radeon_vm {
struct list_head list;
struct list_head va;
- int id;
+ unsigned id;
unsigned last_pfn;
u64 pt_gpu_addr;
u64 *pt;
@@ -665,7 +684,7 @@ struct radeon_vm {
struct radeon_vm_manager {
struct mutex lock;
struct list_head lru_vm;
- uint32_t use_bitmap;
+ struct radeon_fence *active[RADEON_NUM_VM];
struct radeon_sa_manager sa_manager;
uint32_t max_pfn;
/* number of VMIDs */
@@ -1136,7 +1155,6 @@ struct radeon_asic {
struct {
int (*init)(struct radeon_device *rdev);
void (*fini)(struct radeon_device *rdev);
- int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id);
uint32_t (*page_flags)(struct radeon_device *rdev,
struct radeon_vm *vm,
uint32_t flags);
@@ -1697,7 +1715,6 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p))
#define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
#define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
-#define radeon_asic_vm_bind(rdev, v, id) (rdev)->asic->vm.bind((rdev), (v), (id))
#define radeon_asic_vm_page_flags(rdev, v, flags) (rdev)->asic->vm.page_flags((rdev), (v), (flags))
#define radeon_asic_vm_set_page(rdev, v, pfn, addr, flags) (rdev)->asic->vm.set_page((rdev), (v), (pfn), (addr), (flags))
#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
@@ -1779,6 +1796,11 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm);
void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
+struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
+ struct radeon_vm *vm, int ring);
+void radeon_vm_fence(struct radeon_device *rdev,
+ struct radeon_vm *vm,
+ struct radeon_fence *fence);
int radeon_vm_bo_update_pte(struct radeon_device *rdev,
struct radeon_vm *vm,
struct radeon_bo *bo,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index b44ab96..5486674 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1359,7 +1359,6 @@ static struct radeon_asic cayman_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
- .bind = &cayman_vm_bind,
.page_flags = &cayman_vm_page_flags,
.set_page = &cayman_vm_set_page,
},
@@ -1463,7 +1462,6 @@ static struct radeon_asic trinity_asic = {
.vm = {
.init = &cayman_vm_init,
.fini = &cayman_vm_fini,
- .bind = &cayman_vm_bind,
.page_flags = &cayman_vm_page_flags,
.set_page = &cayman_vm_set_page,
},
@@ -1567,7 +1565,6 @@ static struct radeon_asic si_asic = {
.vm = {
.init = &si_vm_init,
.fini = &si_vm_fini,
- .bind = &si_vm_bind,
.page_flags = &cayman_vm_page_flags,
.set_page = &cayman_vm_set_page,
},
@@ -1581,7 +1578,7 @@ static struct radeon_asic si_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
- .vm_flush = &cayman_vm_flush,
+ .vm_flush = &si_vm_flush,
},
[CAYMAN_RING_TYPE_CP1_INDEX] = {
.ib_execute = &si_ring_ib_execute,
@@ -1592,7 +1589,7 @@ static struct radeon_asic si_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
- .vm_flush = &cayman_vm_flush,
+ .vm_flush = &si_vm_flush,
},
[CAYMAN_RING_TYPE_CP2_INDEX] = {
.ib_execute = &si_ring_ib_execute,
@@ -1603,7 +1600,7 @@ static struct radeon_asic si_asic = {
.ring_test = &r600_ring_test,
.ib_test = &r600_ib_test,
.is_lockup = &si_gpu_is_lockup,
- .vm_flush = &cayman_vm_flush,
+ .vm_flush = &si_vm_flush,
}
},
.irq = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 87466d3..6f8002b 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -438,7 +438,6 @@ int cayman_asic_reset(struct radeon_device *rdev);
void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
int cayman_vm_init(struct radeon_device *rdev);
void cayman_vm_fini(struct radeon_device *rdev);
-int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
uint32_t cayman_vm_page_flags(struct radeon_device *rdev,
@@ -468,8 +467,7 @@ int si_irq_set(struct radeon_device *rdev);
int si_irq_process(struct radeon_device *rdev);
int si_vm_init(struct radeon_device *rdev);
void si_vm_fini(struct radeon_device *rdev);
-int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
-void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib);
int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index d4a804b..dc4554e 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -485,6 +485,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
}
radeon_cs_sync_rings(parser);
radeon_cs_sync_to(parser, vm->last_flush);
+ radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
if ((rdev->family >= CHIP_TAHITI) &&
(parser->chunk_const_ib_idx != -1)) {
@@ -493,13 +494,11 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
r = radeon_ib_schedule(rdev, &parser->ib, NULL);
}
-out:
if (!r) {
- if (vm->fence) {
- radeon_fence_unref(&vm->fence);
- }
- vm->fence = radeon_fence_ref(parser->ib.fence);
+ radeon_vm_fence(rdev, vm, parser->ib.fence);
}
+
+out:
mutex_unlock(&vm->mutex);
mutex_unlock(&rdev->vm_manager.lock);
return r;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 742af82..9909d1f 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1018,7 +1018,6 @@ int radeon_device_init(struct radeon_device *rdev,
return r;
/* initialize vm here */
mutex_init(&rdev->vm_manager.lock);
- rdev->vm_manager.use_bitmap = 1;
rdev->vm_manager.max_pfn = 1 << 20;
INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index a6a3dca..d668733 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -437,7 +437,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
int r;
if (!rdev->vm_manager.enabled) {
- /* mark first vm as always in use, it's the system one */
/* allocate enough for 2 full VM pts */
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
rdev->vm_manager.max_pfn * 8 * 2,
@@ -461,7 +460,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
/* restore page table */
list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
- if (vm->id == -1)
+ if (vm->sa_bo == NULL)
continue;
list_for_each_entry(bo_va, &vm->va, vm_list) {
@@ -475,11 +474,6 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
DRM_ERROR("Failed to update pte for vm %d!\n", vm->id);
}
}
-
- r = radeon_asic_vm_bind(rdev, vm, vm->id);
- if (r) {
- DRM_ERROR("Failed to bind vm %d!\n", vm->id);
- }
}
return 0;
}
@@ -500,10 +494,6 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
{
struct radeon_bo_va *bo_va;
- if (vm->id == -1) {
- return;
- }
-
/* wait for vm use to end */
while (vm->fence) {
int r;
@@ -523,9 +513,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
radeon_fence_unref(&vm->last_flush);
/* hw unbind */
- rdev->vm_manager.use_bitmap &= ~(1 << vm->id);
list_del_init(&vm->list);
- vm->id = -1;
radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
vm->pt = NULL;
@@ -544,6 +532,7 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
void radeon_vm_manager_fini(struct radeon_device *rdev)
{
struct radeon_vm *vm, *tmp;
+ int i;
if (!rdev->vm_manager.enabled)
return;
@@ -553,6 +542,9 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
radeon_vm_unbind_locked(rdev, vm);
}
+ for (i = 0; i < RADEON_NUM_VM; ++i) {
+ radeon_fence_unref(&rdev->vm_manager.active[i]);
+ }
radeon_asic_vm_fini(rdev);
mutex_unlock(&rdev->vm_manager.lock);
@@ -593,14 +585,13 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
{
struct radeon_vm *vm_evict;
- unsigned i;
- int id = -1, r;
+ int r;
if (vm == NULL) {
return -EINVAL;
}
- if (vm->id != -1) {
+ if (vm->sa_bo != NULL) {
/* update lru */
list_del_init(&vm->list);
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
@@ -623,33 +614,86 @@ retry:
vm->pt_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));
-retry_id:
- /* search for free vm */
- for (i = 0; i < rdev->vm_manager.nvm; i++) {
- if (!(rdev->vm_manager.use_bitmap & (1 << i))) {
- id = i;
- break;
+ list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
+ return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
+ &rdev->ring_tmp_bo.bo->tbo.mem);
+}
+
+/**
+ * radeon_vm_grab_id - allocate the next free VMID
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ *
+ * Allocate an id for the vm (cayman+).
+ * Returns the fence we need to sync to (if any).
+ *
+ * Global and local mutex must be locked!
+ */
+struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
+ struct radeon_vm *vm, int ring)
+{
+ struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+ unsigned choices[2] = {};
+ unsigned i;
+
+ /* check if the id is still valid */
+ if (vm->fence && vm->fence == rdev->vm_manager.active[vm->id])
+ return NULL;
+
+ /* we definately need to flush */
+ radeon_fence_unref(&vm->last_flush);
+
+ /* skip over VMID 0, since it is the system VM */
+ for (i = 1; i < rdev->vm_manager.nvm; ++i) {
+ struct radeon_fence *fence = rdev->vm_manager.active[i];
+
+ if (fence == NULL) {
+ /* found a free one */
+ vm->id = i;
+ return NULL;
+ }
+
+ if (radeon_fence_is_earlier(fence, best[fence->ring])) {
+ best[fence->ring] = fence;
+ choices[fence->ring == ring ? 0 : 1] = i;
}
- }
- /* evict vm if necessary */
- if (id == -1) {
- vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
- radeon_vm_unbind(rdev, vm_evict);
- goto retry_id;
}
- /* do hw bind */
- r = radeon_asic_vm_bind(rdev, vm, id);
- radeon_fence_unref(&vm->last_flush);
- if (r) {
- radeon_sa_bo_free(rdev, &vm->sa_bo, NULL);
- return r;
+ for (i = 0; i < 2; ++i) {
+ if (choices[i]) {
+ vm->id = choices[i];
+ return rdev->vm_manager.active[choices[i]];
+ }
}
- rdev->vm_manager.use_bitmap |= 1 << id;
- vm->id = id;
- list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
- return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
- &rdev->ring_tmp_bo.bo->tbo.mem);
+
+ /* should never happen */
+ BUG();
+ return NULL;
+}
+
+/**
+ * radeon_vm_fence - remember fence for vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm we want to fence
+ * @fence: fence to remember
+ *
+ * Fence the vm (cayman+).
+ * Set the fence used to protect page table and id.
+ *
+ * Global and local mutex must be locked!
+ */
+void radeon_vm_fence(struct radeon_device *rdev,
+ struct radeon_vm *vm,
+ struct radeon_fence *fence)
+{
+ radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
+ rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+
+ radeon_fence_unref(&vm->fence);
+ vm->fence = radeon_fence_ref(fence);
}
/* object have to be reserved */
@@ -806,7 +850,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
uint32_t flags;
/* nothing to do if vm isn't bound */
- if (vm->id == -1)
+ if (vm->sa_bo == NULL)
return 0;
bo_va = radeon_bo_va(bo, vm);
@@ -928,7 +972,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
{
int r;
- vm->id = -1;
+ vm->id = 0;
vm->fence = NULL;
mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 7ef16d6..af115b8 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2781,14 +2781,30 @@ void si_vm_fini(struct radeon_device *rdev)
{
}
-int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
+void si_vm_flush(struct radeon_device *rdev, struct radeon_ib *ib)
{
- if (id < 8)
- WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
- else
- WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
- vm->pt_gpu_addr >> 12);
- return 0;
+ struct radeon_ring *ring = &rdev->ring[ib->ring];
+ struct radeon_vm *vm = ib->vm;
+
+ if (vm == NULL)
+ return;
+
+ if (vm->id < 8) {
+ radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ + (vm->id << 2), 0));
+ } else {
+ radeon_ring_write(ring, PACKET0(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ + ((vm->id - 8) << 2), 0));
+ }
+ radeon_ring_write(ring, vm->pt_gpu_addr >> 12);
+
+ /* flush hdp cache */
+ radeon_ring_write(ring, PACKET0(HDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+ radeon_ring_write(ring, 0x1);
+
+ /* bits 0-7 are the VM contexts0-7 */
+ radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0));
+ radeon_ring_write(ring, 1 << ib->vm->id);
}
/*
--
1.7.9.5
More information about the dri-devel
mailing list