[Nouveau] [RFC PATCH] nouveau: add locking

Ilia Mirkin imirkin at alum.mit.edu
Sun Jun 5 03:47:48 UTC 2016


---

This is still a WIP. Just wanted to get people's opinions. It's also not bullet-proof. Unfortunately nouveau_bo_wait [which is in turn called by nouveau_bo_map] can trigger a kick, so technically we have to have a lock around any nouveau_bo_map.

My strategy here was to add locks around all the user-accessible APIs while leaving all the internal stuff unlocked. When waiting for a fence, we actually allow other threads to proceed by releasing the lock while the CPU is spinning.

Long term, we probably have to rethink how libdrm works - right now it's very easy to accidentally trigger a push action. Or perhaps how nouveau in mesa uses libdrm. However that's a larger change.

With this, the various glx-multithread tests pass on nv50, nvc0. I still need to hook this up for nv30. I also need to hook up the vdec bits of it.

I would be curious if people could test this out and see what effect it has on their workloads, or if I messed something up.

 src/gallium/drivers/nouveau/nouveau_buffer.c       | 29 +++++++++--
 src/gallium/drivers/nouveau/nouveau_fence.c        | 18 +++++++
 src/gallium/drivers/nouveau/nouveau_fence.h        |  5 +-
 src/gallium/drivers/nouveau/nouveau_screen.c       | 12 ++++-
 src/gallium/drivers/nouveau/nouveau_screen.h       |  3 ++
 src/gallium/drivers/nouveau/nv50/nv50_compute.c    |  4 ++
 src/gallium/drivers/nouveau/nv50/nv50_context.c    | 11 +++++
 src/gallium/drivers/nouveau/nv50/nv50_context.h    |  5 ++
 src/gallium/drivers/nouveau/nv50/nv50_query.c      | 14 +++++-
 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c   | 20 ++++++--
 .../drivers/nouveau/nv50/nv50_query_hw_sm.c        |  8 ++++
 src/gallium/drivers/nouveau/nv50/nv50_surface.c    | 43 +++++++++++++++--
 src/gallium/drivers/nouveau/nv50/nv50_transfer.c   |  6 +++
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c        |  5 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    |  5 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c    | 15 ++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  5 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c      | 14 +++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c   | 16 ++++++-
 .../drivers/nouveau/nvc0/nvc0_query_hw_sm.c        |  8 ++++
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c    | 49 ++++++++++++++++---
 src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c   | 56 +++++++++++++++++-----
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c        |  4 ++
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |  4 ++
 25 files changed, 321 insertions(+), 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 2db538c..2af6c5f 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -380,6 +380,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
                             struct pipe_transfer **ptransfer)
 {
    struct nouveau_context *nv = nouveau_context(pipe);
+   struct nouveau_screen *screen = nv->screen;
    struct nv04_resource *buf = nv04_resource(resource);
    struct nouveau_transfer *tx = MALLOC_STRUCT(nouveau_transfer);
    uint8_t *map;
@@ -426,15 +427,20 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
                align_free(buf->data);
                buf->data = NULL;
             }
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_transfer_staging(nv, tx, false);
+            pipe_mutex_unlock(screen->push_mutex);
             nouveau_transfer_read(nv, tx);
          } else {
             /* The buffer is currently idle. Create a staging area for writes,
              * and make sure that the cached data is up-to-date. */
             if (usage & PIPE_TRANSFER_WRITE)
                nouveau_transfer_staging(nv, tx, true);
-            if (!buf->data)
+            if (!buf->data) {
+               pipe_mutex_lock(screen->push_mutex);
                nouveau_buffer_cache(nv, buf);
+               pipe_mutex_unlock(screen->push_mutex);
+            }
          }
       }
       return buf->data ? (buf->data + box->x) : tx->map;
@@ -479,7 +485,9 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
       if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
          /* Discarding was not possible, must sync because
           * subsequent transfers might use UNSYNCHRONIZED. */
+         pipe_mutex_lock(screen->push_mutex);
          nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
+         pipe_mutex_unlock(screen->push_mutex);
       } else
       if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
          /* The whole range is being discarded, so it doesn't matter what was
@@ -488,10 +496,13 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
          map = tx->map;
       } else
       if (nouveau_buffer_busy(buf, PIPE_TRANSFER_READ)) {
-         if (usage & PIPE_TRANSFER_DONTBLOCK)
+         if (usage & PIPE_TRANSFER_DONTBLOCK) {
             map = NULL;
-         else
+         } else {
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
+            pipe_mutex_unlock(screen->push_mutex);
+         }
       } else {
          /* It is expected that the returned buffer be a representation of the
           * data in question, so we must copy it over from the buffer. */
@@ -515,9 +526,13 @@ nouveau_buffer_transfer_flush_region(struct pipe_context *pipe,
 {
    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    struct nv04_resource *buf = nv04_resource(transfer->resource);
+   struct nouveau_screen *screen = nouveau_context(pipe)->screen;
 
-   if (tx->map)
+   if (tx->map) {
+      pipe_mutex_lock(screen->push_mutex);
       nouveau_transfer_write(nouveau_context(pipe), tx, box->x, box->width);
+      pipe_mutex_unlock(screen->push_mutex);
+   }
 
    util_range_add(&buf->valid_buffer_range,
                   tx->base.box.x + box->x,
@@ -537,11 +552,15 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
    struct nouveau_context *nv = nouveau_context(pipe);
    struct nouveau_transfer *tx = nouveau_transfer(transfer);
    struct nv04_resource *buf = nv04_resource(transfer->resource);
+   struct nouveau_screen *screen = nouveau_context(pipe)->screen;
 
    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
       if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
-         if (tx->map)
+         if (tx->map) {
+            pipe_mutex_lock(screen->push_mutex);
             nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+            pipe_mutex_unlock(screen->push_mutex);
+         }
 
          util_range_add(&buf->valid_buffer_range,
                         tx->base.box.x, tx->base.box.x + tx->base.box.width);
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index 691553a..9cbfc2a 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -71,12 +71,14 @@ nouveau_fence_emit(struct nouveau_fence *fence)
 
    ++fence->ref;
 
+   pipe_mutex_lock(screen->fence.list_mutex);
    if (screen->fence.tail)
       screen->fence.tail->next = fence;
    else
       screen->fence.head = fence;
 
    screen->fence.tail = fence;
+   pipe_mutex_unlock(screen->fence.list_mutex);
 
    screen->fence.emit(&screen->base, &fence->sequence);
 
@@ -90,6 +92,9 @@ nouveau_fence_del(struct nouveau_fence *fence)
    struct nouveau_fence *it;
    struct nouveau_screen *screen = fence->screen;
 
+   /* XXX This can race against fence_update. But fence_update can also call
+    * into this, so ... be have to be careful.
+    */
    if (fence->state == NOUVEAU_FENCE_STATE_EMITTED ||
        fence->state == NOUVEAU_FENCE_STATE_FLUSHED) {
       if (fence == screen->fence.head) {
@@ -123,6 +128,7 @@ nouveau_fence_update(struct nouveau_screen *screen, bool flushed)
       return;
    screen->fence.sequence_ack = sequence;
 
+   pipe_mutex_lock(screen->fence.list_mutex);
    for (fence = screen->fence.head; fence; fence = next) {
       next = fence->next;
       sequence = fence->sequence;
@@ -144,6 +150,7 @@ nouveau_fence_update(struct nouveau_screen *screen, bool flushed)
          if (fence->state == NOUVEAU_FENCE_STATE_EMITTED)
             fence->state = NOUVEAU_FENCE_STATE_FLUSHED;
    }
+   pipe_mutex_unlock(screen->fence.list_mutex);
 }
 
 #define NOUVEAU_FENCE_MAX_SPINS (1 << 31)
@@ -198,18 +205,27 @@ nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debu
    uint32_t spins = 0;
    int64_t start = 0;
 
+   /* Fast-path for the case where the fence is already signaled to avoid
+    * messing around with mutexes and timing.
+    */
+   if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
+      return true;
+
    if (debug && debug->debug_message)
       start = os_time_get_nano();
 
    if (!nouveau_fence_kick(fence))
       return false;
 
+   pipe_mutex_unlock(screen->push_mutex);
+
    do {
       if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
          if (debug && debug->debug_message)
             pipe_debug_message(debug, PERF_INFO,
                                "stalled %.3f ms waiting for fence",
                                (os_time_get_nano() - start) / 1000000.f);
+         pipe_mutex_lock(screen->push_mutex);
          return true;
       }
       if (!spins)
@@ -227,6 +243,8 @@ nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debu
                 fence->sequence,
                 screen->fence.sequence_ack, screen->fence.sequence);
 
+   pipe_mutex_lock(screen->push_mutex);
+
    return false;
 }
 
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index f10016d..98d021e 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -2,6 +2,7 @@
 #ifndef __NOUVEAU_FENCE_H__
 #define __NOUVEAU_FENCE_H__
 
+#include "util/u_atomic.h"
 #include "util/u_inlines.h"
 #include "util/list.h"
 
@@ -47,10 +48,10 @@ static inline void
 nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
 {
    if (fence)
-      ++fence->ref;
+      p_atomic_inc(&fence->ref);
 
    if (*ref) {
-      if (--(*ref)->ref == 0)
+      if (p_atomic_dec_zero(&(*ref)->ref))
          nouveau_fence_del(*ref);
    }
 
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 2c421cc..634fdc3 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -73,10 +73,14 @@ nouveau_screen_fence_finish(struct pipe_screen *screen,
                             struct pipe_fence_handle *pfence,
                             uint64_t timeout)
 {
+   bool ret;
    if (!timeout)
       return nouveau_fence_signalled(nouveau_fence(pfence));
 
-   return nouveau_fence_wait(nouveau_fence(pfence), NULL);
+   pipe_mutex_lock(nouveau_screen(screen)->push_mutex);
+   ret = nouveau_fence_wait(nouveau_fence(pfence), NULL);
+   pipe_mutex_unlock(nouveau_screen(screen)->push_mutex);
+   return ret;
 }
 
 
@@ -153,6 +157,9 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
    if (nv_dbg)
       nouveau_mesa_debug = atoi(nv_dbg);
 
+   pipe_mutex_init(screen->push_mutex);
+   pipe_mutex_init(screen->fence.list_mutex);
+
    /* These must be set before any failure is possible, as the cleanup
     * paths assume they're responsible for deleting them.
     */
@@ -253,6 +260,9 @@ nouveau_screen_fini(struct nouveau_screen *screen)
    nouveau_device_del(&screen->device);
    nouveau_drm_del(&screen->drm);
    close(fd);
+
+   pipe_mutex_destroy(screen->push_mutex);
+   pipe_mutex_destroy(screen->fence.list_mutex);
 }
 
 static void
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 28c4760..28c8620 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -3,6 +3,7 @@
 
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
+#include "os/os_thread.h"
 
 #ifdef DEBUG
 # define NOUVEAU_ENABLE_DRIVER_STATISTICS
@@ -22,6 +23,7 @@ struct nouveau_screen {
    struct nouveau_object *channel;
    struct nouveau_client *client;
    struct nouveau_pushbuf *pushbuf;
+   pipe_mutex push_mutex;
 
    int refcount;
 
@@ -39,6 +41,7 @@ struct nouveau_screen {
       struct nouveau_fence *head;
       struct nouveau_fence *tail;
       struct nouveau_fence *current;
+      pipe_mutex list_mutex;
       u32 sequence;
       u32 sequence_ack;
       void (*emit)(struct pipe_screen *, u32 *sequence);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
index d781f6f..3c174e4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c
@@ -249,9 +249,11 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    struct nv50_program *cp = nv50->compprog;
    bool ret;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    ret = !nv50_state_validate_cp(nv50, ~0);
    if (ret) {
       NOUVEAU_ERR("Failed to launch grid !\n");
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -284,6 +286,8 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
 
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    /* bind a compute shader clobbers fragment shader state */
    nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
 }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 5af0e9b..8a148fc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -37,7 +37,9 @@ nv50_flush(struct pipe_context *pipe,
    if (fence)
       nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
 
+   pipe_mutex_lock(screen->push_mutex);
    PUSH_KICK(screen->pushbuf);
+   pipe_mutex_unlock(screen->push_mutex);
 
    nouveau_context_update_frame_stats(nouveau_context(pipe));
 }
@@ -47,10 +49,12 @@ nv50_texture_barrier(struct pipe_context *pipe)
 {
    struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
 
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
    BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
    BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
    PUSH_DATA (push, 0x20);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
 }
 
 static void
@@ -107,6 +111,7 @@ nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       data_words = string_words;
    else
       data_words = string_words + !!(len & 3);
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
    BEGIN_NI04(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
    if (string_words)
       PUSH_DATAp(push, str, string_words);
@@ -115,6 +120,7 @@ nv50_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       memcpy(&data, &str[string_words * 4], len & 3);
       PUSH_DATA (push, data);
    }
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
 }
 
 void
@@ -291,6 +297,8 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
       return NULL;
    pipe = &nv50->base.pipe;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    if (!nv50_blitctx_create(nv50))
       goto out_err;
 
@@ -391,9 +399,12 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
 
    util_dynarray_init(&nv50->global_residents);
 
+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;
 
 out_err:
+   pipe_mutex_unlock(screen->base.push_mutex);
    if (nv50->bufctx_3d)
       nouveau_bufctx_del(&nv50->bufctx_3d);
    if (nv50->bufctx_cp)
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 2317fa2..b7963a4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -217,6 +217,11 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
 /* nv50_draw.c */
 extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
 
+/* nv50_query.c */
+void nv50_render_condition(struct pipe_context *pipe,
+                           struct pipe_query *pq,
+                           boolean condition, uint mode);
+
 /* nv50_shader_state.c */
 void nv50_vertprog_validate(struct nv50_context *);
 void nv50_gmtyprog_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c b/src/gallium/drivers/nouveau/nv50/nv50_query.c
index 9a1397a..c90e20e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -70,7 +70,7 @@ nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq,
    return q->funcs->get_query_result(nv50_context(pipe), q, wait, result);
 }
 
-static void
+void
 nv50_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -145,6 +145,16 @@ nv50_render_condition(struct pipe_context *pipe,
 }
 
 static void
+nv50_render_condition_locked(struct pipe_context *pipe,
+                             struct pipe_query *pq,
+                             boolean condition, uint mode)
+{
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
+   nv50_render_condition(pipe, pq, condition, mode);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
+}
+
+static void
 nv50_set_active_query_state(struct pipe_context *pipe, boolean enable)
 {
 }
@@ -160,7 +170,7 @@ nv50_init_query_functions(struct nv50_context *nv50)
    pipe->end_query = nv50_end_query;
    pipe->get_query_result = nv50_get_query_result;
    pipe->set_active_query_state = nv50_set_active_query_state;
-   pipe->render_condition = nv50_render_condition;
+   pipe->render_condition = nv50_render_condition_locked;
    nv50->cond_condmode = NV50_3D_COND_MODE_ALWAYS;
 }
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index 727b509..9067bcc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -129,6 +129,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
 {
    struct nouveau_pushbuf *push = nv50->base.pushbuf;
    struct nv50_hw_query *hq = nv50_hw_query(q);
+   bool ret = true;
 
    if (hq->funcs && hq->funcs->begin_query)
       return hq->funcs->begin_query(nv50, hq);
@@ -154,6 +155,7 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
    if (!hq->is64bit)
       hq->data[0] = hq->sequence++; /* the previously used one */
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -193,10 +195,13 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q)
       break;
    default:
       assert(0);
-      return false;
+      ret = false;
+      break;
    }
-   hq->state = NV50_HW_QUERY_STATE_ACTIVE;
-   return true;
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+   if (ret)
+      hq->state = NV50_HW_QUERY_STATE_ACTIVE;
+   return ret;
 }
 
 static void
@@ -212,6 +217,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
 
    hq->state = NV50_HW_QUERY_STATE_ENDED;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -264,6 +270,7 @@ nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q)
       assert(0);
       break;
    }
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
    if (hq->is64bit)
       nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence);
 }
@@ -286,16 +293,21 @@ nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q,
       nv50_hw_query_update(q);
 
    if (hq->state != NV50_HW_QUERY_STATE_READY) {
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       if (!wait) {
          /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */
          if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) {
             hq->state = NV50_HW_QUERY_STATE_FLUSHED;
             PUSH_KICK(nv50->base.pushbuf);
          }
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return false;
       }
-      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client))
+      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) {
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return false;
+      }
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
    }
    hq->state = NV50_HW_QUERY_STATE_READY;
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index bcfba9f..31445eb 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -176,6 +176,7 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
       return false;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 4);
    PUSH_SPACE(push, 4 * 4);
 
@@ -208,6 +209,7 @@ nv50_hw_sm_begin_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
       BEGIN_NV04(push, NV50_CP(MP_PM_SET(c)), 1);
       PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }
 
@@ -237,6 +239,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
       screen->pm.prog = prog;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* disable all counting */
    PUSH_SPACE(push, 8);
    for (c = 0; c < 4; c++) {
@@ -260,6 +263,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
    PUSH_SPACE(push, 2);
    BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
    PUSH_DATA (push, 0);
+   pipe_mutex_unlock(screen->base.push_mutex);
 
    pipe->bind_compute_state(pipe, screen->pm.prog);
    input[0] = hq->bo->offset + hq->base_offset;
@@ -276,6 +280,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
 
    nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY);
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* re-active other counters */
    PUSH_SPACE(push, 8);
    mask = 0;
@@ -302,6 +307,7 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
                     | cfg->ctr[i].unit | cfg->ctr[i].mode);
       }
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
 
 static inline bool
@@ -343,7 +349,9 @@ nv50_hw_sm_get_query_result(struct nv50_context *nv50, struct nv50_hw_query *hq,
 
    cfg = nv50_hw_sm_query_get_cfg(nv50, hq);
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    ret = nv50_hw_sm_query_read_data(count, nv50, wait, hq, cfg, mp_count);
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    if (!ret)
       return false;
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 61dec3f..d6b9de0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -204,10 +204,13 @@ nv50_resource_copy_region(struct pipe_context *pipe,
    bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nv50->base,
                           nv04_resource(dst), dstx,
                           nv04_resource(src), src_box->x, src_box->width);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -247,6 +250,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
          else
             srect.base += src_mt->layer_stride;
       }
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -270,6 +274,7 @@ nv50_resource_copy_region(struct pipe_context *pipe,
          break;
    }
    nouveau_bufctx_reset(nv50->bufctx, NV50_BIND_2D);
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
 
 static void
@@ -288,14 +293,18 @@ nv50_clear_render_target(struct pipe_context *pipe,
 
    assert(dst->texture->target != PIPE_BUFFER);
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
    PUSH_DATAf(push, color->f[0]);
    PUSH_DATAf(push, color->f[1]);
    PUSH_DATAf(push, color->f[2]);
    PUSH_DATAf(push, color->f[3]);
 
-   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
 
@@ -353,6 +362,8 @@ nv50_clear_render_target(struct pipe_context *pipe,
    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
    PUSH_DATA (push, nv50->cond_condmode);
 
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }
 
@@ -376,6 +387,8 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
    assert(dst->texture->target != PIPE_BUFFER);
    assert(nouveau_bo_memtype(bo)); /* ZETA cannot be linear */
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (clear_flags & PIPE_CLEAR_DEPTH) {
       BEGIN_NV04(push, NV50_3D(CLEAR_DEPTH), 1);
       PUSH_DATAf(push, depth);
@@ -388,8 +401,10 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
       mode |= NV50_3D_CLEAR_BUFFERS_S;
    }
 
-   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
 
@@ -436,6 +451,8 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
    BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
    PUSH_DATA (push, nv50->cond_condmode);
 
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }
 
@@ -524,9 +541,12 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
    unsigned i, j, k;
    uint32_t mode = 0;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
-   if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER))
+   if (!nv50_state_validate_3d(nv50, NV50_NEW_3D_FRAMEBUFFER)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    /* We have to clear ALL of the layers, not up to the min number of layers
     * of any attachment. */
@@ -592,6 +612,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
    /* restore the array mode */
    BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
    PUSH_DATA (push, nv50->rt_array_mode);
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
 
 static void
@@ -719,14 +740,18 @@ nv50_clear_buffer(struct pipe_context *pipe,
 
    assert(size % data_size == 0);
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (offset & 0xff) {
       unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
       assert(fixup_size % data_size == 0);
       nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
       offset += fixup_size;
       size -= fixup_size;
-      if (!size)
+      if (!size) {
+         pipe_mutex_unlock(nv50->screen->base.push_mutex);
          return;
+      }
    }
 
    elements = size / data_size;
@@ -742,8 +767,10 @@ nv50_clear_buffer(struct pipe_context *pipe,
    PUSH_DATAf(push, color.f[2]);
    PUSH_DATAf(push, color.f[3]);
 
-   if (nouveau_pushbuf_space(push, 32, 1, 0))
+   if (nouveau_pushbuf_space(push, 32, 1, 0)) {
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
 
@@ -798,6 +825,8 @@ nv50_clear_buffer(struct pipe_context *pipe,
                              data, data_size);
    }
 
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
+
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER | NV50_NEW_3D_SCISSOR;
 }
 
@@ -1700,6 +1729,8 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
         info->src.box.height != -info->dst.box.height))
       eng3d = true;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    if (nv50->screen->num_occlusion_queries_active) {
       BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
       PUSH_DATA (push, 0);
@@ -1714,6 +1745,8 @@ nv50_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
       BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
       PUSH_DATA (push, 1);
    }
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
 
 static void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
index 86a8c15..f5c7c57 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -304,6 +304,7 @@ nv50_miptree_transfer_map(struct pipe_context *pctx,
       unsigned base = tx->rect[0].base;
       unsigned z = tx->rect[0].z;
       unsigned i;
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       for (i = 0; i < box->depth; ++i) {
          nv50_m2mf_transfer_rect(nv50, &tx->rect[1], &tx->rect[0],
                                  tx->nblocksx, tx->nblocksy);
@@ -313,6 +314,9 @@ nv50_miptree_transfer_map(struct pipe_context *pctx,
             tx->rect[0].base += mt->layer_stride;
          tx->rect[1].base += size;
       }
+      /* Kick these reads out so we don't have to reacquire a lock below */
+      PUSH_KICK(nv50->base.pushbuf);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       tx->rect[0].z = z;
       tx->rect[0].base = base;
       tx->rect[1].base = 0;
@@ -349,6 +353,7 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
    unsigned i;
 
    if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+      pipe_mutex_lock(nv50->screen->base.push_mutex);
       for (i = 0; i < tx->base.box.depth; ++i) {
          nv50_m2mf_transfer_rect(nv50, &tx->rect[0], &tx->rect[1],
                                  tx->nblocksx, tx->nblocksy);
@@ -362,6 +367,7 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
       /* Allow the copies above to finish executing before freeing the source */
       nouveau_fence_work(nv50->screen->base.fence.current,
                          nouveau_fence_unref_bo, tx->rect[1].bo);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
    } else {
       nouveau_bo_ref(NULL, &tx->rect[1].bo);
    }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index a11cdf8..f73329c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -767,6 +767,8 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    bool tex_dirty = false;
    int s;
 
+   pipe_mutex_lock(nv50->screen->base.push_mutex);
+
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
    nv50->vb_elt_first = info->min_index + info->index_bias;
    nv50->vb_elt_limit = info->max_index - info->min_index;
@@ -827,6 +829,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       nv50_push_vbo(nv50, info);
       push->kick_notify = nv50_default_kick_notify;
       nouveau_pushbuf_bufctx(push, NULL);
+      pipe_mutex_unlock(nv50->screen->base.push_mutex);
       return;
    }
 
@@ -886,4 +889,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    nv50_release_user_vbufs(nv50);
 
    nouveau_pushbuf_bufctx(push, NULL);
+
+   pipe_mutex_unlock(nv50->screen->base.push_mutex);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 8c88225..33b68c7 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -387,13 +387,17 @@ void
 nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nvc0_program *cp = nvc0->compprog;
    int ret;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    ret = !nvc0_state_validate_cp(nvc0, ~0);
    if (ret) {
       NOUVEAU_ERR("Failed to launch grid !\n");
+      pipe_mutex_unlock(screen->base.push_mutex);
       return;
    }
 
@@ -458,4 +462,5 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 
    /* TODO: Not sure if this is really necessary. */
    nvc0_compute_invalidate_surfaces(nvc0, 5);
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 1137e6c..28d6fec 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -38,7 +38,9 @@ nvc0_flush(struct pipe_context *pipe,
    if (fence)
       nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
 
+   pipe_mutex_lock(screen->push_mutex);
    PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */
+   pipe_mutex_unlock(screen->push_mutex);
 
    nouveau_context_update_frame_stats(&nvc0->base);
 }
@@ -48,8 +50,10 @@ nvc0_texture_barrier(struct pipe_context *pipe)
 {
    struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf;
 
+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
    IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
    IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0);
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }
 
 static void
@@ -59,6 +63,8 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    int i, s;
 
+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
+
    if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
       for (i = 0; i < nvc0->num_vtxbufs; ++i) {
          if (!nvc0->vtxbuf[i].buffer)
@@ -108,6 +114,8 @@ nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
       nvc0->cb_dirty = true;
    if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_INDEX_BUFFER))
       nvc0->base.vbo_dirty = true;
+
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }
 
 static void
@@ -124,6 +132,7 @@ nvc0_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       data_words = string_words;
    else
       data_words = string_words + !!(len & 3);
+   pipe_mutex_lock(nvc0_context(pipe)->screen->base.push_mutex);
    BEGIN_NIC0(push, SUBC_3D(NV04_GRAPH_NOP), data_words);
    if (string_words)
       PUSH_DATAp(push, str, string_words);
@@ -132,6 +141,7 @@ nvc0_emit_string_marker(struct pipe_context *pipe, const char *str, int len)
       memcpy(&data, &str[string_words * 4], len & 3);
       PUSH_DATA (push, data);
    }
+   pipe_mutex_unlock(nvc0_context(pipe)->screen->base.push_mutex);
 }
 
 static void
@@ -362,6 +372,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
       return NULL;
    pipe = &nvc0->base.pipe;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    if (!nvc0_blitctx_create(nvc0))
       goto out_err;
 
@@ -465,9 +477,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
 
    util_dynarray_init(&nvc0->global_residents);
 
+   pipe_mutex_unlock(screen->base.push_mutex);
+
    return pipe;
 
 out_err:
+   pipe_mutex_unlock(screen->base.push_mutex);
    if (nvc0) {
       if (nvc0->bufctx_3d)
          nouveau_bufctx_del(&nvc0->bufctx_3d);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 1b3f88b..8d27300 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -292,6 +292,11 @@ uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
                                     uint32_t label);
 void nvc0_program_init_tcp_empty(struct nvc0_context *);
 
+/* nvc0_query.c */
+void nvc0_render_condition(struct pipe_context *pipe,
+                           struct pipe_query *pq,
+                           boolean condition, uint mode);
+
 /* nvc0_shader_state.c */
 void nvc0_vertprog_validate(struct nvc0_context *);
 void nvc0_tctlprog_validate(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 91fb72f..55d1dc1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -92,7 +92,7 @@ nvc0_get_query_result_resource(struct pipe_context *pipe,
                                        index, resource, offset);
 }
 
-static void
+void
 nvc0_render_condition(struct pipe_context *pipe,
                       struct pipe_query *pq,
                       boolean condition, uint mode)
@@ -161,6 +161,16 @@ nvc0_render_condition(struct pipe_context *pipe,
    PUSH_DATA (push, hq->bo->offset + hq->offset);
 }
 
+static void
+nvc0_render_condition_locked(struct pipe_context *pipe,
+                             struct pipe_query *pq,
+                             boolean condition, uint mode)
+{
+   pipe_mutex_lock(nouveau_context(pipe)->screen->push_mutex);
+   nvc0_render_condition(pipe, pq, condition, mode);
+   pipe_mutex_unlock(nouveau_context(pipe)->screen->push_mutex);
+}
+
 int
 nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
                                   unsigned id,
@@ -272,6 +282,6 @@ nvc0_init_query_functions(struct nvc0_context *nvc0)
    pipe->get_query_result = nvc0_get_query_result;
    pipe->get_query_result_resource = nvc0_get_query_result_resource;
    pipe->set_active_query_state = nvc0_set_active_query_state;
-   pipe->render_condition = nvc0_render_condition;
+   pipe->render_condition = nvc0_render_condition_locked;
    nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 4c34593..f2584cb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -154,6 +154,7 @@ nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    }
    hq->sequence++;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -198,6 +199,7 @@ nvc0_hw_begin_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    default:
       break;
    }
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
    hq->state = NVC0_HW_QUERY_STATE_ACTIVE;
    return ret;
 }
@@ -221,6 +223,7 @@ nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    }
    hq->state = NVC0_HW_QUERY_STATE_ENDED;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
    switch (q->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
    case PIPE_QUERY_OCCLUSION_PREDICATE:
@@ -276,6 +279,7 @@ nvc0_hw_end_query(struct nvc0_context *nvc0, struct nvc0_query *q)
    default:
       break;
    }
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
    if (hq->is64bit)
       nouveau_fence_ref(nvc0->screen->base.fence.current, &hq->fence);
 }
@@ -298,16 +302,21 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
       nvc0_hw_query_update(nvc0->screen->base.client, q);
 
    if (hq->state != NVC0_HW_QUERY_STATE_READY) {
+      pipe_mutex_lock(nvc0->screen->base.push_mutex);
       if (!wait) {
          if (hq->state != NVC0_HW_QUERY_STATE_FLUSHED) {
             hq->state = NVC0_HW_QUERY_STATE_FLUSHED;
             /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
             PUSH_KICK(nvc0->base.pushbuf);
          }
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return false;
       }
-      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
+      if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nvc0->screen->base.client)) {
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return false;
+      }
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
    }
    hq->state = NVC0_HW_QUERY_STATE_READY;
@@ -374,6 +383,8 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
 
    assert(!hq->funcs || !hq->funcs->get_query_result);
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (index == -1) {
       /* TODO: Use a macro to write the availability of the query */
       if (hq->state != NVC0_HW_QUERY_STATE_READY)
@@ -382,6 +393,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
       nvc0->base.push_cb(&nvc0->base, buf, offset,
                          result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
                          ready);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
 
@@ -469,6 +481,8 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
                            4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
    }
 
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
+
    if (buf->mm) {
       nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
       nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 27cbbc4..d416e00 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -1662,6 +1662,7 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
       return false;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 4);
    PUSH_SPACE(push, 4 * 8 * + 6);
 
@@ -1710,6 +1711,7 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
      BEGIN_NVC0(push, NVE4_CP(MP_PM_SET(c)), 1);
      PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }
 
@@ -1733,6 +1735,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
       return false;
    }
 
+   pipe_mutex_lock(screen->base.push_mutex);
    assert(cfg->num_counters <= 8);
    PUSH_SPACE(push, 8 * 8 + 2);
 
@@ -1779,6 +1782,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
       BEGIN_NVC0(push, NVC0_CP(MP_PM_SET(c)), 1);
       PUSH_DATA (push, 0);
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
    return true;
 }
 
@@ -1866,6 +1870,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
    if (unlikely(!screen->pm.prog))
       screen->pm.prog = nvc0_hw_sm_get_program(screen);
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* disable all counting */
    PUSH_SPACE(push, 8);
    for (c = 0; c < 8; ++c)
@@ -1893,6 +1898,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
 
    /* upload input data for the compute shader which reads MP counters */
    nvc0_hw_sm_upload_input(nvc0, hq);
+   pipe_mutex_unlock(screen->base.push_mutex);
 
    pipe->bind_compute_state(pipe, screen->pm.prog);
    for (i = 0; i < 3; i++) {
@@ -1906,6 +1912,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
 
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
 
+   pipe_mutex_lock(screen->base.push_mutex);
    /* re-activate other counters */
    PUSH_SPACE(push, 16);
    mask = 0;
@@ -1930,6 +1937,7 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
          PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
       }
    }
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
 
 static inline bool
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index b9437b2..558b5c5 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -491,7 +491,9 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
        * _current_ one, and remove both.
        */
       nouveau_fence_ref(screen->base.fence.current, &current);
+      pipe_mutex_lock(screen->base.push_mutex);
       nouveau_fence_wait(current, NULL);
+      pipe_mutex_unlock(screen->base.push_mutex);
       nouveau_fence_ref(NULL, &current);
       nouveau_fence_ref(NULL, &screen->base.fence.current);
    }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index ddcb66f..94b1972 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -206,11 +206,14 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
    bool m2mf;
    unsigned dst_layer = dstz, src_layer = src_box->z;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
       nouveau_copy_buffer(&nvc0->base,
                           nv04_resource(dst), dstx,
                           nv04_resource(src), src_box->x, src_box->width);
       NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
    NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1);
@@ -251,6 +254,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
          else
             srect.base += src_mt->layer_stride;
       }
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
 
@@ -273,6 +277,7 @@ nvc0_resource_copy_region(struct pipe_context *pipe,
          break;
    }
    nouveau_bufctx_reset(nvc0->bufctx, 0);
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 static void
@@ -290,8 +295,12 @@ nvc0_clear_render_target(struct pipe_context *pipe,
 
    assert(dst->texture->target != PIPE_BUFFER);
 
-   if (!PUSH_SPACE(push, 32 + sf->depth))
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
+   if (!PUSH_SPACE(push, 32 + sf->depth)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN (push, res->bo, res->domain | NOUVEAU_BO_WR);
 
@@ -354,6 +363,8 @@ nvc0_clear_render_target(struct pipe_context *pipe,
    IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
 
    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 static void
@@ -541,8 +552,11 @@ nvc0_clear_buffer(struct pipe_context *pipe,
 
    assert(size % data_size == 0);
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (data_size == 12) {
       nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
    }
 
@@ -552,8 +566,10 @@ nvc0_clear_buffer(struct pipe_context *pipe,
       nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
       offset += fixup_size;
       size -= fixup_size;
-      if (!size)
+      if (!size) {
+         pipe_mutex_unlock(nvc0->screen->base.push_mutex);
          return;
+      }
    }
 
    elements = size / data_size;
@@ -563,8 +579,10 @@ nvc0_clear_buffer(struct pipe_context *pipe,
       width &= ~0xff;
    assert(width > 0);
 
-   if (!PUSH_SPACE(push, 40))
+   if (!PUSH_SPACE(push, 40)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
 
@@ -612,6 +630,8 @@ nvc0_clear_buffer(struct pipe_context *pipe,
    }
 
    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 static void
@@ -633,8 +653,11 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
 
    assert(dst->texture->target != PIPE_BUFFER);
 
-   if (!PUSH_SPACE(push, 32 + sf->depth))
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+   if (!PUSH_SPACE(push, 32 + sf->depth)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);
 
@@ -681,6 +704,8 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
    IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
 
    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 void
@@ -694,9 +719,13 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
    unsigned i, j, k;
    uint32_t mode = 0;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
-   if (!nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER))
+   if (!nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER)) {
+      pipe_mutex_unlock(nvc0->screen->base.push_mutex);
       return;
+   }
 
    if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
       BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
@@ -755,6 +784,8 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
                     (j << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
       }
    }
+
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
 }
 
 
@@ -1148,8 +1179,8 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
    nvc0->samplers_dirty[4] |= 3;
 
    if (nvc0->cond_query && !blit->render_condition_enable)
-      nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
-                                       nvc0->cond_cond, nvc0->cond_mode);
+      nvc0_render_condition(&nvc0->base.pipe, nvc0->cond_query,
+                            nvc0->cond_cond, nvc0->cond_mode);
 
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP);
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
@@ -1608,6 +1639,8 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
         info->src.box.height != -info->dst.box.height))
       eng3d = true;
 
+   pipe_mutex_lock(nvc0->screen->base.push_mutex);
+
    if (nvc0->screen->num_occlusion_queries_active)
       IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
 
@@ -1619,6 +1652,8 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
    if (nvc0->screen->num_occlusion_queries_active)
       IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
 
+   pipe_mutex_unlock(nvc0->screen->base.push_mutex);
+
    NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
 }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index 67ebdb2..02b2c18 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -344,16 +344,18 @@ nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
    return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr, &nvc0->base.debug);
 }
 
-void *
-nvc0_miptree_transfer_map(struct pipe_context *pctx,
-                          struct pipe_resource *res,
-                          unsigned level,
-                          unsigned usage,
-                          const struct pipe_box *box,
-                          struct pipe_transfer **ptransfer)
+static void *
+nvc0_miptree_transfer_map_unlocked(
+      struct pipe_context *pctx,
+      struct pipe_resource *res,
+      unsigned level,
+      unsigned usage,
+      const struct pipe_box *box,
+      struct pipe_transfer **ptransfer)
 {
    struct nvc0_context *nvc0 = nvc0_context(pctx);
-   struct nouveau_device *dev = nvc0->screen->base.device;
+   struct nvc0_screen *screen = nvc0->screen;
+   struct nouveau_device *dev = screen->base.device;
    struct nv50_miptree *mt = nv50_miptree(res);
    struct nvc0_transfer *tx;
    uint32_t size;
@@ -460,9 +462,29 @@ nvc0_miptree_transfer_map(struct pipe_context *pctx,
    return tx->rect[1].bo->map;
 }
 
-void
-nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
-                            struct pipe_transfer *transfer)
+void *
+nvc0_miptree_transfer_map(
+      struct pipe_context *pctx,
+      struct pipe_resource *res,
+      unsigned level,
+      unsigned usage,
+      const struct pipe_box *box,
+      struct pipe_transfer **ptransfer)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pctx);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   pipe_mutex_lock(screen->base.push_mutex);
+   void *ret = nvc0_miptree_transfer_map_unlocked(
+         pctx, res, level, usage, box, ptransfer);
+   pipe_mutex_unlock(screen->base.push_mutex);
+
+   return ret;
+}
+
+static void
+nvc0_miptree_transfer_unmap_unlocked(struct pipe_context *pctx,
+                                     struct pipe_transfer *transfer)
 {
    struct nvc0_context *nvc0 = nvc0_context(pctx);
    struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
@@ -502,6 +524,18 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
    FREE(tx);
 }
 
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+                            struct pipe_transfer *transfer)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pctx);
+   struct nvc0_screen *screen = nvc0->screen;
+
+   pipe_mutex_lock(screen->base.push_mutex);
+   nvc0_miptree_transfer_unmap_unlocked(pctx, transfer);
+   pipe_mutex_unlock(screen->base.push_mutex);
+}
+
 /* This happens rather often with DTD9/st. */
 static void
 nvc0_cb_push(struct nouveau_context *nv,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 888c094..7662bdd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -938,6 +938,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    struct nvc0_screen *screen = nvc0->screen;
    int s;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
    nvc0->vb_elt_first = info->min_index + info->index_bias;
    nvc0->vb_elt_limit = info->max_index - info->min_index;
@@ -1031,6 +1033,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       nvc0_push_vbo(nvc0, info);
       push->kick_notify = nvc0_default_kick_notify;
       nouveau_pushbuf_bufctx(push, NULL);
+      pipe_mutex_unlock(screen->base.push_mutex);
       return;
    }
 
@@ -1083,4 +1086,5 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    nvc0_release_user_vbufs(nvc0);
 
    nouveau_pushbuf_bufctx(push, NULL);
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 60508d8..edc4ad3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -545,12 +545,15 @@ void
 nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 {
    struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nvc0_screen *screen = nvc0->screen;
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct nve4_cp_launch_desc *desc;
    uint64_t desc_gpuaddr;
    struct nouveau_bo *desc_bo;
    int ret;
 
+   pipe_mutex_lock(screen->base.push_mutex);
+
    desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
    if (!desc) {
       ret = -1;
@@ -636,6 +639,7 @@ out:
       NOUVEAU_ERR("Failed to launch grid !\n");
    nouveau_scratch_done(&nvc0->base);
    nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
+   pipe_mutex_unlock(screen->base.push_mutex);
 }
 
 
-- 
2.7.3



More information about the Nouveau mailing list