[Intel-gfx] [RFC PATCH] drm/ttm: Allow the driver to resolve a WW transaction rollback

Thomas Hellström thomas.hellstrom at linux.intel.com
Fri May 5 14:17:19 UTC 2023


Allow drivers to resolve a WW transaction rollback. This allows for
1) Putting a lower-priority transaction to sleep allowing another to
succeed instead both fighting using trylocks.
2) Letting the driver know whether a received -ENOMEM is the result of
competition with another WW transaction, which can be resolved using
rollback and retry or a real -ENOMEM which should be propagated back
to user-space as a failure.

Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 17 +++++++++++++++--
 include/drm/ttm/ttm_bo.h     |  2 ++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index bd5dae4d1624..c3ccbea2be3e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -561,6 +561,10 @@ static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo,
 	if (!busy_bo || !ticket)
 		return -EBUSY;
 
+	/* We want to resolve contention before trying to lock again. */
+	if (ctx->propagate_edeadlk && ctx->contended_bo)
+		return  -EDEADLK;
+
 	if (ctx->interruptible)
 		r = dma_resv_lock_interruptible(busy_bo->base.resv,
 							  ticket);
@@ -575,7 +579,15 @@ static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo,
 	if (!r)
 		dma_resv_unlock(busy_bo->base.resv);
 
-	return r == -EDEADLK ? -EBUSY : r;
+	if (r == -EDEADLK) {
+		if (ctx->propagate_edeadlk) {
+			ttm_bo_get(busy_bo);
+			ctx->contended_bo = busy_bo;
+		}
+		r = -EBUSY;
+	}
+
+	return r;
 }
 
 int ttm_mem_evict_first(struct ttm_device *bdev,
@@ -816,7 +828,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			goto error;
 	}
 
-	ret = -ENOMEM;
+	ret = (ctx->propagate_edeadlk && ctx->contended_bo) ? -EDEADLK : -ENOMEM;
 	if (!type_found) {
 		pr_err(TTM_PFX "No compatible memory type found\n");
 		ret = -EINVAL;
@@ -913,6 +925,7 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 	}
+
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_validate);
diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h
index 8b113c384236..d8e35a794ce5 100644
--- a/include/drm/ttm/ttm_bo.h
+++ b/include/drm/ttm/ttm_bo.h
@@ -181,8 +181,10 @@ struct ttm_operation_ctx {
 	bool gfp_retry_mayfail;
 	bool allow_res_evict;
 	bool force_alloc;
+	bool propagate_edeadlk;
 	struct dma_resv *resv;
 	uint64_t bytes_moved;
+	struct ttm_buffer_object *contended_bo;
 };
 
 /**
-- 
2.39.2



More information about the Intel-gfx mailing list