i915_vma_reopen checked if the vma is closed before without taking the lock. So multiple threads could attempt removing the vma.
Instead the lock needs to be taken before actually checking.
v2: move struct declaration
Cc: Chris Wilson chris@chris-wilson.co.uk Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5732 Signed-off-by: Karol Herbst kherbst@redhat.com --- drivers/gpu/drm/i915/i915_vma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 162e8d83691b..2efdad2b43fa 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1615,17 +1615,17 @@ void i915_vma_close(struct i915_vma *vma)
static void __i915_vma_remove_closed(struct i915_vma *vma) { - struct intel_gt *gt = vma->vm->gt; - - spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link); - spin_unlock_irq(>->closed_lock); }
void i915_vma_reopen(struct i915_vma *vma) { + struct intel_gt *gt = vma->vm->gt; + + spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock); }
static void force_unbind(struct i915_vma *vma) @@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma, bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
@@ -1651,7 +1652,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy)
spin_unlock(&obj->vma.lock);
+ spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma); + spin_unlock_irq(>->closed_lock);
if (vm_ddestroy) i915_vm_resv_put(vma->vm);
friendly ping.
I am not even myself completely convinced that this is the correct patch and it might just workaround some issues, but list_debug.c does check if a list was already deleted and throws an error if it was and this patch indeed fixes this one issue as multiple threads could enter __i915_vma_remove_closed on the same vma.
On Wed, Apr 20, 2022 at 11:57 AM Karol Herbst kherbst@redhat.com wrote:
i915_vma_reopen checked if the vma is closed before without taking the lock. So multiple threads could attempt removing the vma.
Instead the lock needs to be taken before actually checking.
v2: move struct declaration
Cc: Chris Wilson chris@chris-wilson.co.uk Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5732 Signed-off-by: Karol Herbst kherbst@redhat.com
drivers/gpu/drm/i915/i915_vma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 162e8d83691b..2efdad2b43fa 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1615,17 +1615,17 @@ void i915_vma_close(struct i915_vma *vma)
static void __i915_vma_remove_closed(struct i915_vma *vma) {
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link);
spin_unlock_irq(>->closed_lock);
}
void i915_vma_reopen(struct i915_vma *vma) {
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma);
spin_unlock_irq(>->closed_lock);
}
static void force_unbind(struct i915_vma *vma) @@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma, bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj;
struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma));
@@ -1651,7 +1652,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy)
spin_unlock(&obj->vma.lock);
spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma);
spin_unlock_irq(>->closed_lock); if (vm_ddestroy) i915_vm_resv_put(vma->vm);
-- 2.35.1
On 20/04/2022 10:57, Karol Herbst wrote:
i915_vma_reopen checked if the vma is closed before without taking the lock. So multiple threads could attempt removing the vma.
Instead the lock needs to be taken before actually checking.
v2: move struct declaration
Fix looks correct to me. In which case it seems breakage was introduced with:
Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock") Cc: Matthew Auld matthew.auld@intel.com Cc: stable@vger.kernel.org # v5.3+
AFAICT at least. I will add these tags and pull it in unless someone shouts a correction.
Regards,
Tvrtko
Cc: Chris Wilson chris@chris-wilson.co.uk Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5732 Signed-off-by: Karol Herbst kherbst@redhat.com
drivers/gpu/drm/i915/i915_vma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 162e8d83691b..2efdad2b43fa 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1615,17 +1615,17 @@ void i915_vma_close(struct i915_vma *vma)
static void __i915_vma_remove_closed(struct i915_vma *vma) {
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link);
spin_unlock_irq(>->closed_lock); }
void i915_vma_reopen(struct i915_vma *vma) {
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma);
spin_unlock_irq(>->closed_lock); }
static void force_unbind(struct i915_vma *vma)
@@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma, bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj;
struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
@@ -1651,7 +1652,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy)
spin_unlock(&obj->vma.lock);
spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma);
spin_unlock_irq(>->closed_lock);
if (vm_ddestroy) i915_vm_resv_put(vma->vm);
On 20/04/2022 10:57, Karol Herbst wrote:
i915_vma_reopen checked if the vma is closed before without taking the lock. So multiple threads could attempt removing the vma.
Instead the lock needs to be taken before actually checking.
v2: move struct declaration
Cc: Chris Wilson chris@chris-wilson.co.uk Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5732 Signed-off-by: Karol Herbst kherbst@redhat.com
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
Regards,
Tvrtko
drivers/gpu/drm/i915/i915_vma.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 162e8d83691b..2efdad2b43fa 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1615,17 +1615,17 @@ void i915_vma_close(struct i915_vma *vma)
static void __i915_vma_remove_closed(struct i915_vma *vma) {
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(>->closed_lock); list_del_init(&vma->closed_link);
spin_unlock_irq(>->closed_lock); }
void i915_vma_reopen(struct i915_vma *vma) {
struct intel_gt *gt = vma->vm->gt;
spin_lock_irq(>->closed_lock); if (i915_vma_is_closed(vma)) __i915_vma_remove_closed(vma);
spin_unlock_irq(>->closed_lock); }
static void force_unbind(struct i915_vma *vma)
@@ -1641,6 +1641,7 @@ static void force_unbind(struct i915_vma *vma) static void release_references(struct i915_vma *vma, bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj;
struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
@@ -1651,7 +1652,9 @@ static void release_references(struct i915_vma *vma, bool vm_ddestroy)
spin_unlock(&obj->vma.lock);
spin_lock_irq(>->closed_lock); __i915_vma_remove_closed(vma);
spin_unlock_irq(>->closed_lock);
if (vm_ddestroy) i915_vm_resv_put(vma->vm);
dri-devel@lists.freedesktop.org