[PATCH 2/6] drm/i915: Slightly rework EXEC_OBJECT_CAPTURE handling
Maarten Lankhorst
maarten.lankhorst at linux.intel.com
Thu Jul 22 13:52:36 UTC 2021
Use a single null-terminated array for simplicity instead of a linked
list. This might slightly speed up execbuf when many vma's may be marked
as capture, but definitely removes an allocation from a signaling path.
We are not allowed to allocate memory in eb_move_to_gpu, but we can't
enforce it yet through annotations.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 26 ++++++++++++-------
drivers/gpu/drm/i915/i915_gpu_error.c | 9 ++++---
drivers/gpu/drm/i915/i915_request.c | 9 ++-----
drivers/gpu/drm/i915/i915_request.h | 7 +----
4 files changed, 24 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1ed7475de454..3e17290948ca 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -255,6 +255,9 @@ struct i915_execbuffer {
/** actual size of execobj[] as we may extend it for the cmdparser */
unsigned int buffer_count;
+ /* Number of objects with EXEC_OBJECT_CAPTURE set */
+ unsigned int capture_count;
+
/** list of vma not yet bound during reservation phase */
struct list_head unbound;
@@ -867,6 +870,9 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
goto err;
}
+ if (eb->exec[i].flags & EXEC_OBJECT_CAPTURE)
+ eb->capture_count++;
+
err = eb_validate_vma(eb, &eb->exec[i], vma);
if (unlikely(err)) {
i915_vma_put(vma);
@@ -2243,16 +2249,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
assert_vma_held(vma);
- if (flags & EXEC_OBJECT_CAPTURE) {
- struct i915_capture_list *capture;
-
- capture = kmalloc(sizeof(*capture), GFP_KERNEL);
- if (capture) {
- capture->next = eb->request->capture_list;
- capture->vma = vma;
- eb->request->capture_list = capture;
- }
- }
+ if (flags & EXEC_OBJECT_CAPTURE && eb->request->capture_list)
+ eb->request->capture_list[--eb->capture_count] = vma;
/*
* If the GPU is not _reading_ through the CPU cache, we need
@@ -3182,6 +3180,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.fences = NULL;
eb.num_fences = 0;
+ eb.capture_count = 0;
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
@@ -3313,6 +3312,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
}
+ if (eb.capture_count) {
+ eb.request->capture_list =
+ kvcalloc(eb.capture_count + 1,
+ sizeof(*eb.request->capture_list),
+ GFP_KERNEL | __GFP_NOWARN);
+ }
+
/*
* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a2c58b54a592..cd398bb93653 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1326,10 +1326,10 @@ capture_user(struct intel_engine_capture_vma *capture,
const struct i915_request *rq,
gfp_t gfp)
{
- struct i915_capture_list *c;
+ int i;
- for (c = rq->capture_list; c; c = c->next)
- capture = capture_vma(capture, c->vma, "user", gfp);
+ for (i = 0; rq->capture_list[i]; i++)
+ capture = capture_vma(capture, rq->capture_list[i], "user", gfp);
return capture;
}
@@ -1377,7 +1377,8 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
* by userspace.
*/
vma = capture_vma(vma, rq->batch, "batch", gfp);
- vma = capture_user(vma, rq, gfp);
+ if (rq->capture_list)
+ vma = capture_user(vma, rq, gfp);
vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
vma = capture_vma(vma, rq->context->state, "HW context", gfp);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d3de9f60e03a..4bdb4a6594a0 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -211,15 +211,10 @@ static void __notify_execute_cb_imm(struct i915_request *rq)
static void free_capture_list(struct i915_request *request)
{
- struct i915_capture_list *capture;
+ struct i915_vma **capture;
capture = fetch_and_zero(&request->capture_list);
- while (capture) {
- struct i915_capture_list *next = capture->next;
-
- kfree(capture);
- capture = next;
- }
+ kvfree(capture);
}
static void __i915_request_fill(struct i915_request *rq, u8 val)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 5deb65ec5fa5..421da501a5a3 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -48,11 +48,6 @@ struct drm_i915_gem_object;
struct drm_printer;
struct i915_request;
-struct i915_capture_list {
- struct i915_capture_list *next;
- struct i915_vma *vma;
-};
-
#define RQ_TRACE(rq, fmt, ...) do { \
const struct i915_request *rq__ = (rq); \
ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \
@@ -271,7 +266,7 @@ struct i915_request {
* active reference - all objects on this list must also be
* on the active_list (of their final request).
*/
- struct i915_capture_list *capture_list;
+ struct i915_vma **capture_list;
/** Time at which this request was emitted, in jiffies. */
unsigned long emitted_jiffies;
--
2.31.0
More information about the Intel-gfx-trybot
mailing list