[Intel-gfx] [PATCH] drm/i915: Optimise VMA lookup slightly
Tvrtko Ursulin
tursulin at ursulin.net
Tue Dec 13 12:22:18 UTC 2016
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
A few details to hopefully make a very hot function a tiny bit
more efficient:
1. Cast VM pointers before substraction to save the compiler
doing a smart one which includes multiplication.
2. Use smaller type for comparison since we only care about
the sign.
3. Prefer the ppgtt lookup branch and inline it, allowing the
compiler to optimise out the second part of i915_vma_compare
and save one call indirection.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 23 +++++++++++++++++------
drivers/gpu/drm/i915/i915_vma.h | 9 ++++++---
2 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ef00d36680c9..aa81945a608b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3338,17 +3338,17 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
i915_ggtt_flush(dev_priv);
}
-struct i915_vma *
-i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- const struct i915_ggtt_view *view)
+static inline struct i915_vma *
+__i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view)
{
struct rb_node *rb;
rb = obj->vma_tree.rb_node;
while (rb) {
struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
- long cmp;
+ int cmp;
cmp = i915_vma_compare(vma, vm, view);
if (cmp == 0)
@@ -3363,6 +3363,14 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
return NULL;
}
+noinline struct i915_vma *
+i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view)
+{
+ return __i915_gem_obj_to_vma(obj, vm, view);
+}
+
struct i915_vma *
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -3373,7 +3381,10 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
lockdep_assert_held(&obj->base.dev->struct_mutex);
GEM_BUG_ON(view && !i915_is_ggtt(vm));
- vma = i915_gem_obj_to_vma(obj, vm, view);
+ if (likely(!view))
+ vma = __i915_gem_obj_to_vma(obj, vm, NULL);
+ else
+ vma = i915_gem_obj_to_vma(obj, vm, view);
if (!vma) {
vma = i915_vma_create(obj, vm, view);
GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 21be74c61065..098f206c1a4d 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -178,15 +178,18 @@ static inline void i915_vma_put(struct i915_vma *vma)
i915_gem_object_put(vma->obj);
}
-static inline long
+static inline int
i915_vma_compare(struct i915_vma *vma,
struct i915_address_space *vm,
const struct i915_ggtt_view *view)
{
+ long cmp;
+
GEM_BUG_ON(view && !i915_is_ggtt(vm));
- if (vma->vm != vm)
- return vma->vm - vm;
+ cmp = (unsigned long)vma->vm - (unsigned long)vm;
+ if (cmp)
+ return cmp;
if (!view)
return vma->ggtt_view.type;
--
2.7.4
More information about the Intel-gfx
mailing list