<html>
    <head>
      <base href="https://bugs.freedesktop.org/" />
    </head>
    <body>
      <p>
        <div>
            <b><a class="bz_bug_link 
          bz_status_REOPENED "
   title="REOPENED --- - [ilk] igt/gem_close_race is slow due to mutex contention vs clflush"
   href="https://bugs.freedesktop.org/show_bug.cgi?id=71029#c20">Comment # 20</a>
              on <a class="bz_bug_link 
          bz_status_REOPENED "
   title="REOPENED --- - [ilk] igt/gem_close_race is slow due to mutex contention vs clflush"
   href="https://bugs.freedesktop.org/show_bug.cgi?id=71029">bug 71029</a>
              from <span class="vcard"><a class="email" href="mailto:chris@chris-wilson.co.uk" title="Chris Wilson <chris@chris-wilson.co.uk>"> <span class="fn">Chris Wilson</span></a>
</span></b>
        <pre>One idea is to recognise that we have some extremely long mutex hold times, and
disabling spinning for thosei, i.e:

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 338f80c..ed45276 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3769,7 +3769,10 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
                return false;

        trace_i915_gem_object_clflush(obj);
+
+       mutex_spin_disable(&obj->base.dev->struct_mutex);
        drm_clflush_sg(obj->pages);
+       mutex_spin_enable(&obj->base.dev->struct_mutex);

        return true;
 }

That's obvious not going to fly upstream. A better suggestion would perhaps be
to autotune the spinning by timing out. So something like:

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index 838dc9e..5e0f27e 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -63,6 +63,7 @@ bool osq_lock(struct optimistic_spin_queue **lock)
 {
        struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
        struct optimistic_spin_queue *prev, *next;
+       unsigned long timeout;

        node->locked = 0;
        node->next = NULL;
@@ -82,7 +83,11 @@ bool osq_lock(struct optimistic_spin_queue **lock)
         * cmpxchg in an attempt to undo our queueing.
         */

+       timeout = jiffies + 1;
        while (!smp_load_acquire(&node->locked)) {
+               if (time_after(jiffies, timeout))
+                       goto unqueue;
+
                /*
                 * If we need to reschedule bail... so we can block.
                 */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index bc73d33..ac8c435 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -147,8 +147,17 @@ static inline bool owner_running(struct mutex *lock,
struct task_struct *owner)
 static noinline
 int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 {
+       unsigned long timeout;
+
+       if (need_resched())
+               return false;
+
        rcu_read_lock();
+       timeout = jiffies + 1;
        while (owner_running(lock, owner)) {
+               if (time_after(jiffies, timeout))
+                       break;
+
                if (need_resched())</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are the QA Contact for the bug.</li>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>