[Bug 71029] [ilk] igt/gem_close_race is slow due to mutex contention vs clflush

Fri Jul 11 15:34:17 PDT 2014

https://bugs.freedesktop.org/show_bug.cgi?id=71029

--- Comment #20 from Chris Wilson <chris at chris-wilson.co.uk> ---
One idea is to recognise that we have some extremely long mutex hold times, and
disabling spinning for thosei, i.e:

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 338f80c..ed45276 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3769,7 +3769,10 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
                return false;

        trace_i915_gem_object_clflush(obj);
+
+       mutex_spin_disable(&obj->base.dev->struct_mutex);
        drm_clflush_sg(obj->pages);
+       mutex_spin_enable(&obj->base.dev->struct_mutex);

        return true;
 }

That's obvious not going to fly upstream. A better suggestion would perhaps be
to autotune the spinning by timing out. So something like:

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index 838dc9e..5e0f27e 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -63,6 +63,7 @@ bool osq_lock(struct optimistic_spin_queue **lock)
 {
        struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
        struct optimistic_spin_queue *prev, *next;
+       unsigned long timeout;

        node->locked = 0;
        node->next = NULL;
@@ -82,7 +83,11 @@ bool osq_lock(struct optimistic_spin_queue **lock)
         * cmpxchg in an attempt to undo our queueing.
         */

+       timeout = jiffies + 1;
        while (!smp_load_acquire(&node->locked)) {
+               if (time_after(jiffies, timeout))
+                       goto unqueue;
+
                /*
                 * If we need to reschedule bail... so we can block.
                 */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index bc73d33..ac8c435 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -147,8 +147,17 @@ static inline bool owner_running(struct mutex *lock,
struct task_struct *owner)
 static noinline
 int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 {
+       unsigned long timeout;
+
+       if (need_resched())
+               return false;
+
        rcu_read_lock();
+       timeout = jiffies + 1;
        while (owner_running(lock, owner)) {
+               if (time_after(jiffies, timeout))
+                       break;
+
                if (need_resched())

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/intel-gfx-bugs/attachments/20140711/1eed72b7/attachment-0001.html>