[Intel-gfx] [PATCH 17/24] drm/i915: Stash a pointer to the obj's resv in the vma

Chris Wilson chris at chris-wilson.co.uk
Thu May 18 09:46:31 UTC 2017


During execbuf, a mandatory step is that we add this request (this
fence) to each object's reservation_object. Inside execbuf, we track the
vma, and to add the fence to the reservation_object then means having to
first chase the obj, incurring another cache miss. We can reduce the
 number of cache misses by stashing a pointer to the reservation_object
in the vma itself.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 ++++++++++++-------------
 drivers/gpu/drm/i915/i915_vma.c            |  1 +
 drivers/gpu/drm/i915/i915_vma.h            |  3 ++-
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9715099664ac..f5a0e419bfec 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1207,17 +1207,17 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_request;
 
-	GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true));
+	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	i915_vma_move_to_active(batch, rq, 0);
-	reservation_object_lock(obj->resv, NULL);
-	reservation_object_add_excl_fence(obj->resv, &rq->fence);
-	reservation_object_unlock(obj->resv);
+	reservation_object_lock(batch->resv, NULL);
+	reservation_object_add_excl_fence(batch->resv, &rq->fence);
+	reservation_object_unlock(batch->resv);
 	i915_vma_unpin(batch);
 
 	i915_vma_move_to_active(vma, rq, true);
-	reservation_object_lock(vma->obj->resv, NULL);
-	reservation_object_add_excl_fence(vma->obj->resv, &rq->fence);
-	reservation_object_unlock(vma->obj->resv);
+	reservation_object_lock(vma->resv, NULL);
+	reservation_object_add_excl_fence(vma->resv, &rq->fence);
+	reservation_object_unlock(vma->resv);
 
 	rq->batch = batch;
 
@@ -1267,7 +1267,6 @@ relocate_entry(struct i915_vma *vma,
 	       struct i915_execbuffer *eb,
 	       const struct i915_vma *target)
 {
-	struct drm_i915_gem_object *obj = vma->obj;
 	u64 offset = reloc->offset;
 	u64 target_offset = relocation_target(reloc, target);
 	bool wide = eb->reloc_cache.use_64bit_reloc;
@@ -1275,7 +1274,7 @@ relocate_entry(struct i915_vma *vma,
 
 	if (!eb->reloc_cache.vaddr &&
 	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
-	     !reservation_object_test_signaled_rcu(obj->resv, true))) {
+	     !reservation_object_test_signaled_rcu(vma->resv, true))) {
 		const unsigned int gen = eb->reloc_cache.gen;
 		unsigned int len;
 		u32 *batch;
@@ -1335,7 +1334,7 @@ relocate_entry(struct i915_vma *vma,
 	}
 
 repeat:
-	vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
+	vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
 
@@ -1802,11 +1801,11 @@ static int eb_relocate_slow(struct i915_execbuffer *eb)
 	return err ?: have_copy;
 }
 
-static void eb_export_fence(struct drm_i915_gem_object *obj,
+static void eb_export_fence(struct i915_vma *vma,
 			    struct drm_i915_gem_request *req,
 			    unsigned int flags)
 {
-	struct reservation_object *resv = obj->resv;
+	struct reservation_object *resv = vma->resv;
 
 	/*
 	 * Ignore errors from failing to allocate the new fence, we can't
@@ -1866,7 +1865,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
 		const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 		struct i915_vma *vma = exec_to_vma(entry);
 
-		eb_export_fence(vma->obj, eb->request, entry->flags);
+		eb_export_fence(vma, eb->request, entry->flags);
 		if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
 			i915_vma_put(vma);
 	}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 09b00e9de2f0..fc1a8412b0a0 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -90,6 +90,7 @@ vma_create(struct drm_i915_gem_object *obj,
 	init_request_active(&vma->last_fence, NULL);
 	vma->vm = vm;
 	vma->obj = obj;
+	vma->resv = obj->resv;
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 062addfee6ef..3840b8cdc6b1 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -50,6 +50,7 @@ struct i915_vma {
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
 	struct drm_i915_fence_reg *fence;
+	struct reservation_object *resv;
 	struct sg_table *pages;
 	void __iomem *iomap;
 	u64 size;
@@ -111,8 +112,8 @@ struct i915_vma {
 	/**
 	 * Used for performing relocations during execbuffer insertion.
 	 */
-	struct hlist_node exec_node;
 	struct drm_i915_gem_exec_object2 *exec_entry;
+	struct hlist_node exec_node;
 	u32 exec_handle;
 
 	struct i915_gem_context *ctx;
-- 
2.11.0



More information about the Intel-gfx mailing list