[Intel-gfx] [PATCH] drm/i915: Fix random -ENOSPC eviction errors due to locked vma objects
Sviatoslav Peleshko
sviatoslav.peleshko at globallogic.com
Wed Aug 17 06:55:41 UTC 2022
The i915_gem_object_trylock we had in the grab_vma() makes it return false
when the vma->obj is already locked. In this case we'll skip this vma
during eviction, and eventually might be forced to return -ENOSPC even
though we could've evicted this vma if we waited for the lock a bit.
To fix this, replace the i915_gem_object_trylock with i915_gem_object_lock.
And because we have to worry about the potential deadlock now, bubble-up
the error code, so it will be correctly handled by the WW mechanism.
This fixes the issue https://gitlab.freedesktop.org/drm/intel/-/issues/6564
Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.")
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko at globallogic.com>
---
drivers/gpu/drm/i915/i915_gem_evict.c | 69 ++++++++++++++++++---------
1 file changed, 46 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index f025ee4fa526..9d43f213f68f 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -55,49 +55,58 @@ static int ggtt_flush(struct intel_gt *gt)
return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
}
-static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
+static int grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
{
+ int ret = 0;
+
/*
* We add the extra refcount so the object doesn't drop to zero until
* after ungrab_vma(), this way trylock is always paired with unlock.
*/
if (i915_gem_object_get_rcu(vma->obj)) {
- if (!i915_gem_object_trylock(vma->obj, ww)) {
+ ret = i915_gem_object_lock(vma->obj, ww);
+ if (ret)
i915_gem_object_put(vma->obj);
- return false;
- }
} else {
/* Dead objects don't need pins */
atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
}
- return true;
+ return ret;
}
-static void ungrab_vma(struct i915_vma *vma)
+static void ungrab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
{
if (dying_vma(vma))
return;
- i915_gem_object_unlock(vma->obj);
+ if (!ww)
+ i915_gem_object_unlock(vma->obj);
+
i915_gem_object_put(vma->obj);
}
-static bool
+static int
mark_free(struct drm_mm_scan *scan,
struct i915_gem_ww_ctx *ww,
struct i915_vma *vma,
unsigned int flags,
struct list_head *unwind)
{
+ int err;
+
if (i915_vma_is_pinned(vma))
- return false;
+ return -ENOSPC;
- if (!grab_vma(vma, ww))
- return false;
+ err = grab_vma(vma, ww);
+ if (err)
+ return err;
list_add(&vma->evict_link, unwind);
- return drm_mm_scan_add_block(scan, &vma->node);
+ if (!drm_mm_scan_add_block(scan, &vma->node))
+ return -ENOSPC;
+
+ return 0;
}
static bool defer_evict(struct i915_vma *vma)
@@ -150,6 +159,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
enum drm_mm_insert_mode mode;
struct i915_vma *active;
int ret;
+ int err = 0;
lockdep_assert_held(&vm->mutex);
trace_i915_gem_evict(vm, min_size, alignment, flags);
@@ -210,17 +220,23 @@ i915_gem_evict_something(struct i915_address_space *vm,
continue;
}
- if (mark_free(&scan, ww, vma, flags, &eviction_list))
+ err = mark_free(&scan, ww, vma, flags, &eviction_list);
+ if (!err)
goto found;
+ if (err == -EDEADLK)
+ break;
}
/* Nothing found, clean up and bail out! */
list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
ret = drm_mm_scan_remove_block(&scan, &vma->node);
BUG_ON(ret);
- ungrab_vma(vma);
+ ungrab_vma(vma, ww);
}
+ if (err == -EDEADLK)
+ return err;
+
/*
* Can we unpin some objects such as idle hw contents,
* or pending flips? But since only the GGTT has global entries
@@ -267,7 +283,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
__i915_vma_pin(vma);
} else {
list_del(&vma->evict_link);
- ungrab_vma(vma);
+ ungrab_vma(vma, ww);
}
}
@@ -277,17 +293,21 @@ i915_gem_evict_something(struct i915_address_space *vm,
__i915_vma_unpin(vma);
if (ret == 0)
ret = __i915_vma_unbind(vma);
- ungrab_vma(vma);
+ ungrab_vma(vma, ww);
}
while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) {
vma = container_of(node, struct i915_vma, node);
/* If we find any non-objects (!vma), we cannot evict them */
- if (vma->node.color != I915_COLOR_UNEVICTABLE &&
- grab_vma(vma, ww)) {
- ret = __i915_vma_unbind(vma);
- ungrab_vma(vma);
+ if (vma->node.color != I915_COLOR_UNEVICTABLE) {
+ ret = grab_vma(vma, ww);
+ if (!ret) {
+ ret = __i915_vma_unbind(vma);
+ ungrab_vma(vma, ww);
+ } else if (ret != -EDEADLK) {
+ ret = -ENOSPC;
+ }
} else {
ret = -ENOSPC;
}
@@ -382,8 +402,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
break;
}
- if (!grab_vma(vma, ww)) {
- ret = -ENOSPC;
+ ret = grab_vma(vma, ww);
+ if (ret) {
+ if (ret != -EDEADLK)
+ ret = -ENOSPC;
+
break;
}
@@ -405,7 +428,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
if (ret == 0)
ret = __i915_vma_unbind(vma);
- ungrab_vma(vma);
+ ungrab_vma(vma, ww);
}
return ret;
--
2.37.1
More information about the Intel-gfx
mailing list