[PATCH v3 16/22] drm/xe: Update PT layer with better error handling
Matthew Brost
matthew.brost at intel.com
Tue Feb 6 23:37:23 UTC 2024
Update PT layer so if a memory allocation for a PTE fails the error can
be propagated to the user without requiring to be killed.
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_pt.c | 210 ++++++++++++++++++++++++-------
drivers/gpu/drm/xe/xe_pt_types.h | 2 +
2 files changed, 165 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index 7b7b3f99321d..8160a295dd84 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -796,19 +796,27 @@ xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *t
}
}
-static void xe_pt_abort_bind(struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries,
- u32 num_entries)
+static void xe_pt_cancel_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
{
u32 i, j;
for (i = 0; i < num_entries; i++) {
- if (!entries[i].pt_entries)
+ struct xe_pt *pt = entries[i].pt;
+
+ if (!pt)
continue;
- for (j = 0; j < entries[i].qwords; j++)
- xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL);
+ if (pt->level) {
+ for (j = 0; j < entries[i].qwords; j++)
+ xe_pt_destroy(entries[i].pt_entries[j].pt,
+ xe_vma_vm(vma)->flags, NULL);
+ }
+
kfree(entries[i].pt_entries);
+ entries[i].pt_entries = NULL;
+ entries[i].qwords = 0;
}
}
@@ -824,10 +832,61 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
xe_vm_assert_held(vm);
}
-static void xe_pt_commit_bind(struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries,
- u32 num_entries, bool rebind,
- struct llist_head *deferred)
+static void xe_pt_commit(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries, struct llist_head *deferred)
+{
+ u32 i, j;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (i = 0; i < num_entries; i++) {
+ struct xe_pt *pt = entries[i].pt;
+
+ if (!pt->level)
+ continue;
+
+ for (j = 0; j < entries[i].qwords; j++) {
+ struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
+
+ xe_pt_destroy(oldpte, xe_vma_vm(vma)->flags, deferred);
+ }
+ }
+}
+
+static void xe_pt_abort_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries, bool rebind)
+{
+ int i, j;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (i = num_entries - 1; i >= 0; --i) {
+ struct xe_pt *pt = entries[i].pt;
+ struct xe_pt_dir *pt_dir;
+
+ if (!rebind)
+ pt->num_live -= entries[i].qwords;
+
+ if (!pt->level)
+ continue;
+
+ pt_dir = as_xe_pt_dir(pt);
+ for (j = 0; j < entries[i].qwords; j++) {
+ u32 j_ = j + entries[i].ofs;
+ struct xe_pt *newpte = xe_pt_entry(pt_dir, j_);
+ struct xe_pt *oldpte = entries[i].pt_entries[j].pt;
+
+ pt_dir->dir.entries[j_] = oldpte ? &oldpte->base : 0;
+ xe_pt_destroy(newpte, xe_vma_vm(vma)->flags, NULL);
+ }
+ }
+}
+
+static void xe_pt_commit_prepare_bind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries, bool rebind)
{
u32 i, j;
@@ -847,12 +906,13 @@ static void xe_pt_commit_bind(struct xe_vma *vma,
for (j = 0; j < entries[i].qwords; j++) {
u32 j_ = j + entries[i].ofs;
struct xe_pt *newpte = entries[i].pt_entries[j].pt;
+ struct xe_pt *oldpte = NULL;
if (xe_pt_entry(pt_dir, j_))
- xe_pt_destroy(xe_pt_entry(pt_dir, j_),
- xe_vma_vm(vma)->flags, deferred);
+ oldpte = xe_pt_entry(pt_dir, j_);
pt_dir->dir.entries[j_] = &newpte->base;
+ entries[i].pt_entries[j].pt = oldpte;
}
}
}
@@ -876,8 +936,6 @@ xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma,
err = xe_pt_stage_bind(tile, vma, entries, num_entries);
if (!err)
xe_tile_assert(tile, *num_entries);
- else /* abort! */
- xe_pt_abort_bind(vma, entries, *num_entries);
return err;
}
@@ -1366,7 +1424,7 @@ xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset,
&end_offset))
return 0;
- (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false);
+ (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, true);
xe_walk->wupd.updates[level].update->qwords = end_offset - offset;
return 0;
@@ -1434,32 +1492,58 @@ xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update,
memset64(ptr, empty, num_qwords);
}
+static void xe_pt_abort_unbind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
+{
+ int j, i;
+
+ xe_pt_commit_locks_assert(vma);
+
+ for (j = num_entries - 1; j >= 0; --j) {
+ struct xe_vm_pgtable_update *entry = &entries[j];
+ struct xe_pt *pt = entry->pt;
+ struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
+
+ pt->num_live += entry->qwords;
+
+ if (!pt->level)
+ continue;
+
+ for (i = entry->ofs; i < entry->ofs + entry->qwords; i++)
+ pt_dir->dir.entries[i] =
+ entries[j].pt_entries[i - entry->ofs].pt ?
+ &entries[j].pt_entries[i - entry->ofs].pt->base : 0;
+ }
+}
+
static void
-xe_pt_commit_unbind(struct xe_vma *vma,
- struct xe_vm_pgtable_update *entries, u32 num_entries,
- struct llist_head *deferred)
+xe_pt_commit_prepare_unbind(struct xe_vma *vma,
+ struct xe_vm_pgtable_update *entries,
+ u32 num_entries)
{
- u32 j;
+ int j, i;
xe_pt_commit_locks_assert(vma);
for (j = 0; j < num_entries; ++j) {
struct xe_vm_pgtable_update *entry = &entries[j];
struct xe_pt *pt = entry->pt;
+ struct xe_pt_dir *pt_dir;
pt->num_live -= entry->qwords;
- if (pt->level) {
- struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
- u32 i;
+ if (!pt->level)
+ continue;
- for (i = entry->ofs; i < entry->ofs + entry->qwords;
- i++) {
- if (xe_pt_entry(pt_dir, i))
- xe_pt_destroy(xe_pt_entry(pt_dir, i),
- xe_vma_vm(vma)->flags, deferred);
+ pt_dir = as_xe_pt_dir(pt);
+ for (i = entry->ofs; i < entry->ofs + entry->qwords; i++) {
+ if (xe_pt_entry(pt_dir, i))
+ entries[j].pt_entries[i - entry->ofs].pt =
+ xe_pt_entry(pt_dir, i);
+ else
+ entries[j].pt_entries[i - entry->ofs].pt = NULL;
- pt_dir->dir.entries[i] = NULL;
- }
+ pt_dir->dir.entries[i] = NULL;
}
}
}
@@ -1496,7 +1580,6 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
{
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
- struct llist_head *deferred = &pt_update_ops->deferred;
int err;
xe_bo_assert_held(xe_vma_bo(vma));
@@ -1505,6 +1588,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
"Preparing bind, with range [%llx...%llx)\n",
xe_vma_start(vma), xe_vma_end(vma) - 1);
+ pt_op->vma = NULL;
pt_op->bind = true;
pt_op->rebind = BIT(tile->id) & vma->tile_present;
@@ -1538,9 +1622,11 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
(!pt_op->rebind && vm->scratch_pt[tile->id] &&
xe_vm_in_preempt_fence_mode(vm));
- /* FIXME: Don't commit right away */
- xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries,
- pt_op->rebind, deferred);
+ pt_op->vma = vma;
+ xe_pt_commit_prepare_bind(vma, pt_op->entries,
+ pt_op->num_entries, pt_op->rebind);
+ } else {
+ xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries);
}
return err;
@@ -1552,7 +1638,6 @@ static int unbind_op_prepare(struct xe_tile *tile,
{
u32 current_op = pt_update_ops->current_op;
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op];
- struct llist_head *deferred = &pt_update_ops->deferred;
xe_bo_assert_held(xe_vma_bo(vma));
@@ -1560,6 +1645,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
"Preparing unbind, with range [%llx...%llx)\n",
xe_vma_start(vma), xe_vma_end(vma) - 1);
+ pt_op->vma = vma;
pt_op->bind = false;
pt_op->rebind = false;
@@ -1570,9 +1656,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
pt_update_ops->needs_invalidation = true;
- /* FIXME: Don't commit right away */
- xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries,
- deferred);
+ xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries);
return 0;
}
@@ -1782,7 +1866,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
struct invalidation_fence *ifence = NULL;
struct xe_range_fence *rfence;
struct xe_vma_op *op;
- int err = 0;
+ int err = 0, i;
struct xe_migrate_pt_update update = {
.ops = pt_update_ops->needs_userptr_lock ?
&userptr_migrate_ops :
@@ -1796,8 +1880,10 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
if (pt_update_ops->needs_invalidation) {
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
- if (!ifence)
- return ERR_PTR(-ENOMEM);
+ if (!ifence) {
+ err = -ENOMEM;
+ goto kill_vm_tile1;
+ }
}
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
@@ -1806,10 +1892,19 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
goto free_ifence;
}
+ /* FIXME: Point of no return - VM killed if failure after this */
+ for (i = 0; i < pt_update_ops->num_ops; ++i) {
+ struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
+
+ xe_pt_commit(pt_op->vma, pt_op->entries,
+ pt_op->num_entries, &pt_update_ops->deferred);
+ pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */
+ }
+
fence = xe_migrate_update_pgtables(tile->migrate, &update);
if (IS_ERR(fence)) {
err = PTR_ERR(fence);
- goto free_rfence;
+ goto kill_vm_tile0;
}
err = xe_range_fence_insert(&vm->rftree[tile->id], rfence,
@@ -1847,10 +1942,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
if (pt_update_ops->needs_userptr_lock)
up_read(&vm->userptr.notifier_lock);
dma_fence_put(fence);
-free_rfence:
+kill_vm_tile0:
+ if (!tile->id)
+ xe_vm_kill(vops->vm, false);
kfree(rfence);
free_ifence:
kfree(ifence);
+kill_vm_tile1:
+ if (tile->id)
+ xe_vm_kill(vops->vm, false);
return ERR_PTR(err);
}
@@ -1871,12 +1971,10 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
lockdep_assert_held(&vops->vm->lock);
xe_vm_assert_held(vops->vm);
- /* FIXME: Not 100% correct */
for (i = 0; i < pt_update_ops->num_ops; ++i) {
struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i];
- if (pt_op->bind)
- xe_pt_free_bind(pt_op->entries, pt_op->num_entries);
+ xe_pt_free_bind(pt_op->entries, pt_op->num_entries);
}
xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
}
@@ -1890,10 +1988,28 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops)
*/
void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
{
+ struct xe_vm_pgtable_update_ops *pt_update_ops =
+ &vops->pt_update_ops[tile->id];
+ int i;
+
lockdep_assert_held(&vops->vm->lock);
xe_vm_assert_held(vops->vm);
- /* FIXME: Just kill VM for now + cleanup PTs */
+ for (i = pt_update_ops->num_ops - 1; i >= 0; --i) {
+ struct xe_vm_pgtable_update_op *pt_op =
+ &pt_update_ops->ops[i];
+
+ if (!pt_op->vma || i >= pt_update_ops->current_op)
+ continue;
+
+ if (pt_op->bind)
+ xe_pt_abort_bind(pt_op->vma, pt_op->entries,
+ pt_op->num_entries,
+ pt_op->rebind);
+ else
+ xe_pt_abort_unbind(pt_op->vma, pt_op->entries,
+ pt_op->num_entries);
+ }
+
xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
- xe_vm_kill(vops->vm, false);
}
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 16252f1be055..384cc04de719 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -78,6 +78,8 @@ struct xe_vm_pgtable_update {
struct xe_vm_pgtable_update_op {
/** @entries: entries to update for this operation */
struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1];
+ /** @vma: VMA for operation, operation not valid if NULL */
+ struct xe_vma *vma;
/** @num_entries: number of entries for this update operation */
u32 num_entries;
/** @bind: is a bind */
--
2.34.1
More information about the Intel-xe
mailing list