[Mesa-dev] [PATCH 46/84] st/nine: Implement gallium nine CSMT

Axel Davy axel.davy at ens.fr
Wed Dec 7 22:55:19 UTC 2016


From: Patrick Rudolph <siro at das-labor.org>

Use an offloading thread for all nine_context functions.
Macros are used to ease the reading of the code.

Signed-off-by: Patrick Rudolph <siro at das-labor.org>
Signed-off-by: Axel Davy <axel.davy at ens.fr>
---
 src/gallium/auxiliary/os/os_thread.h               |  11 +
 src/gallium/state_trackers/nine/Makefile.sources   |   1 +
 src/gallium/state_trackers/nine/adapter9.h         |   1 +
 src/gallium/state_trackers/nine/device9.c          |  33 +-
 src/gallium/state_trackers/nine/device9.h          |   5 +
 src/gallium/state_trackers/nine/nine_csmt_helper.h | 377 ++++++++++++++
 src/gallium/state_trackers/nine/nine_queue.c       |  15 +
 src/gallium/state_trackers/nine/nine_queue.h       |   3 +
 src/gallium/state_trackers/nine/nine_state.c       | 554 ++++++++++++++-------
 src/gallium/state_trackers/nine/nine_state.h       |  28 +-
 src/gallium/state_trackers/nine/pixelshader9.c     |   2 +-
 src/gallium/state_trackers/nine/surface9.c         |   1 +
 src/gallium/state_trackers/nine/swapchain9.c       |   2 +
 src/gallium/state_trackers/nine/vertexshader9.c    |   2 +-
 src/gallium/targets/d3dadapter9/drm.c              |   6 +
 src/mesa/drivers/dri/common/xmlpool/t_options.h    |   5 +
 16 files changed, 871 insertions(+), 175 deletions(-)
 create mode 100644 src/gallium/state_trackers/nine/nine_csmt_helper.h

diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index ec8adbc..21faf4b 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -97,6 +97,17 @@ static inline void pipe_thread_setname( const char *name )
 }
 
 
+static inline int pipe_thread_is_self( pipe_thread thread )
+{
+#if defined(HAVE_PTHREAD)
+#  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
+      (__GLIBC__ >= 3 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 12))
+   return pthread_equal(pthread_self(), thread);
+#  endif
+#endif
+   return 0;
+}
+
 /* pipe_mutex
  */
 typedef mtx_t pipe_mutex;
diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources
index 1264908..2bb08a2 100644
--- a/src/gallium/state_trackers/nine/Makefile.sources
+++ b/src/gallium/state_trackers/nine/Makefile.sources
@@ -23,6 +23,7 @@ C_SOURCES := \
 	indexbuffer9.h \
 	iunknown.c \
 	iunknown.h \
+	nine_csmt_helper.h \
 	nine_debug.c \
 	nine_debug.h \
 	nine_defines.h \
diff --git a/src/gallium/state_trackers/nine/adapter9.h b/src/gallium/state_trackers/nine/adapter9.h
index 4a71540..60be056 100644
--- a/src/gallium/state_trackers/nine/adapter9.h
+++ b/src/gallium/state_trackers/nine/adapter9.h
@@ -41,6 +41,7 @@ struct d3dadapter9_context
     BOOL thread_submit;
     BOOL discard_delayed_release;
     BOOL tearfree_discard;
+    int csmt_force;
 
     void (*destroy)( struct d3dadapter9_context *ctx );
 };
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index df96e88..8ac3fc1 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -263,6 +263,24 @@ NineDevice9_ctor( struct NineDevice9 *This,
         nine_bind(&This->context.rt[i], This->state.rt[i]);
     }
 
+    /* Initialize CSMT */
+    if (pCTX->csmt_force == 1)
+        This->csmt_active = true;
+    else if (pCTX->csmt_force == 0)
+        This->csmt_active = false;
+    else
+        /* r600 and radeonsi are thread safe. */
+        This->csmt_active = strstr(pScreen->get_name(pScreen), "AMD") != NULL;
+
+    if (This->csmt_active) {
+        This->csmt_ctx = nine_csmt_create(This);
+        if (!This->csmt_ctx)
+            return E_OUTOFMEMORY;
+    }
+
+    if (This->csmt_active)
+        DBG("\033[1;32mCSMT is active\033[0m\n");
+
     /* Initialize a dummy VBO to be used when a vertex declaration does not
      * specify all the inputs needed by vertex shader, on win default behavior
      * is to pass 0,0,0,0 to the shader */
@@ -444,8 +462,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
 
     /* Allocate upload helper for drivers that suck (from st pov ;). */
 
-    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS);
-    This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS);
+    This->driver_caps.user_vbufs = GET_PCAP(USER_VERTEX_BUFFERS) && !This->csmt_active;
+    This->driver_caps.user_ibufs = GET_PCAP(USER_INDEX_BUFFERS) && !This->csmt_active;
     This->driver_caps.user_cbufs = GET_PCAP(USER_CONSTANT_BUFFERS);
     This->driver_caps.user_sw_vbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_VERTEX_BUFFERS);
     This->driver_caps.user_sw_cbufs = This->screen_sw->get_param(This->screen_sw, PIPE_CAP_USER_CONSTANT_BUFFERS);
@@ -487,6 +505,8 @@ NineDevice9_ctor( struct NineDevice9 *This,
     nine_state_init_sw(This);
 
     ID3DPresentGroup_Release(This->present);
+    if (This->csmt_active)
+        nine_csmt_process(This);
 
     return D3D_OK;
 }
@@ -499,6 +519,13 @@ NineDevice9_dtor( struct NineDevice9 *This )
 
     DBG("This=%p\n", This);
 
+    /* Do not call nine_csmt_process here. The device is dead! */
+    if (This->csmt_active && This->csmt_ctx) {
+        nine_csmt_destroy(This, This->csmt_ctx);
+        This->csmt_active = FALSE;
+        This->csmt_ctx = NULL;
+    }
+
     nine_ff_fini(This);
     nine_state_destroy_sw(This);
     nine_state_clear(&This->state, TRUE);
@@ -564,7 +591,7 @@ NineDevice9_GetScreen( struct NineDevice9 *This )
 struct pipe_context *
 NineDevice9_GetPipe( struct NineDevice9 *This )
 {
-    return This->context.pipe;
+    return nine_context_get_pipe(This);
 }
 
 const D3DCAPS9 *
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index 21e045c..4539cda 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -38,6 +38,7 @@ struct pipe_context;
 struct cso_context;
 struct hud_context;
 struct u_upload_mgr;
+struct csmt_context;
 
 struct NineSwapChain9;
 struct NineStateBlock9;
@@ -56,6 +57,10 @@ struct NineDevice9
     struct pipe_context *pipe_sw;
     struct cso_context *cso_sw;
 
+    /* CSMT context */
+    struct csmt_context *csmt_ctx;
+    BOOL csmt_active;
+
     /* creation parameters */
     D3DCAPS9 caps;
     D3DDEVICE_CREATION_PARAMETERS params;
diff --git a/src/gallium/state_trackers/nine/nine_csmt_helper.h b/src/gallium/state_trackers/nine/nine_csmt_helper.h
new file mode 100644
index 0000000..92e5a9f
--- /dev/null
+++ b/src/gallium/state_trackers/nine/nine_csmt_helper.h
@@ -0,0 +1,377 @@
+/*
+ * Copyright 2016 Patrick Rudolph <siro at das-labor.org>
+ *
+ * Permission is hereby granted, free of charge, f, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, f, copy, modify, merge, f, publish, distribute, f, sub
+ * license, f, and/or sell copies of the Software, f, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISe, f, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* get number of arguments with __NARG__ */
+#define __NARG__(...)  __NARG_I_(__VA_ARGS__,__RSEQ_N())
+#define __NARG_I_(...) __ARG_N(__VA_ARGS__)
+#define __ARG_N( \
+      _1, _2, _3, _4, _5, _6, _7, _8, _9,_10, \
+     _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \
+     _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \
+     _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \
+     _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \
+     _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \
+     _61,_62,_63,N,...) N
+#define __RSEQ_N() \
+     63,62,61,60,                   \
+     59,58,57,56,55,54,53,52,51,50, \
+     49,48,47,46,45,44,43,42,41,40, \
+     39,38,37,36,35,34,33,32,31,30, \
+     29,28,27,26,25,24,23,22,21,20, \
+     19,18,17,16,15,14,13,12,11,10, \
+     9,8,7,6,5,4,3,2,1,0
+
+
+#define _args_for_bypass_1(a) a
+#define _args_for_bypass_7(a, b, c, d, e, f, g) ,g
+#define _args_for_bypass_14(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_7(__VA_ARGS__)
+#define _args_for_bypass_21(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_14(__VA_ARGS__)
+#define _args_for_bypass_28(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_21(__VA_ARGS__)
+#define _args_for_bypass_35(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_28(__VA_ARGS__)
+#define _args_for_bypass_42(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_35(__VA_ARGS__)
+#define _args_for_bypass_49(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_42(__VA_ARGS__)
+#define _args_for_bypass_56(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_49(__VA_ARGS__)
+#define _args_for_bypass_63(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_56(__VA_ARGS__)
+#define _args_for_bypass_70(a, b, c, d, e, f, g, ...) ,g _args_for_bypass_63(__VA_ARGS__)
+
+#define _GFUNC_(n) _args_for_bypass_##n
+#define _GFUNC(n) _GFUNC_(n)
+
+#define ARGS_FOR_BYPASS(...) _GFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__)
+
+#define _args_for_mem_1(a) a;
+#define _args_for_mem_7(a, b, c, d, e, f, g) f;
+#define _args_for_mem_14(a, b, c, d, e, f, g, ...) f; _args_for_mem_7(__VA_ARGS__)
+#define _args_for_mem_21(a, b, c, d, e, f, g, ...) f; _args_for_mem_14(__VA_ARGS__)
+#define _args_for_mem_28(a, b, c, d, e, f, g, ...) f; _args_for_mem_21(__VA_ARGS__)
+#define _args_for_mem_35(a, b, c, d, e, f, g, ...) f; _args_for_mem_28(__VA_ARGS__)
+#define _args_for_mem_42(a, b, c, d, e, f, g, ...) f; _args_for_mem_35(__VA_ARGS__)
+#define _args_for_mem_49(a, b, c, d, e, f, g, ...) f; _args_for_mem_42(__VA_ARGS__)
+#define _args_for_mem_56(a, b, c, d, e, f, g, ...) f; _args_for_mem_49(__VA_ARGS__)
+#define _args_for_mem_63(a, b, c, d, e, f, g, ...) f; _args_for_mem_56(__VA_ARGS__)
+#define _args_for_mem_70(a, b, c, d, e, f, g, ...) f; _args_for_mem_63(__VA_ARGS__)
+
+#define _FFUNC_(n) _args_for_mem_##n
+#define _FFUNC(n) _FFUNC_(n)
+
+#define ARGS_FOR_MEM(...) _FFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__)
+
+#define _args_for_unbind_1(a) a;
+#define _args_for_unbind_7(a, b, c, d, e, f, g) e;
+#define _args_for_unbind_14(a, b, c, d, e, f, g, ...) e; _args_for_unbind_7(__VA_ARGS__)
+#define _args_for_unbind_21(a, b, c, d, e, f, g, ...) e; _args_for_unbind_14(__VA_ARGS__)
+#define _args_for_unbind_28(a, b, c, d, e, f, g, ...) e; _args_for_unbind_21(__VA_ARGS__)
+#define _args_for_unbind_35(a, b, c, d, e, f, g, ...) e; _args_for_unbind_28(__VA_ARGS__)
+#define _args_for_unbind_42(a, b, c, d, e, f, g, ...) e; _args_for_unbind_35(__VA_ARGS__)
+#define _args_for_unbind_49(a, b, c, d, e, f, g, ...) e; _args_for_unbind_42(__VA_ARGS__)
+#define _args_for_unbind_56(a, b, c, d, e, f, g, ...) e; _args_for_unbind_49(__VA_ARGS__)
+#define _args_for_unbind_63(a, b, c, d, e, f, g, ...) e; _args_for_unbind_56(__VA_ARGS__)
+#define _args_for_unbind_70(a, b, c, d, e, f, g, ...) e; _args_for_unbind_63(__VA_ARGS__)
+
+#define _EFUNC_(n) _args_for_unbind_##n
+#define _EFUNC(n) _EFUNC_(n)
+
+#define ARGS_FOR_UNBIND(...) _EFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__)
+
+#define _args_for_call_1(a) a
+#define _args_for_call_7(a, b, c, d, e, f, g) ,d
+#define _args_for_call_14(a, b, c, d, e, f, g, ...) ,d _args_for_call_7(__VA_ARGS__)
+#define _args_for_call_21(a, b, c, d, e, f, g, ...) ,d _args_for_call_14(__VA_ARGS__)
+#define _args_for_call_28(a, b, c, d, e, f, g, ...) ,d _args_for_call_21(__VA_ARGS__)
+#define _args_for_call_35(a, b, c, d, e, f, g, ...) ,d _args_for_call_28(__VA_ARGS__)
+#define _args_for_call_42(a, b, c, d, e, f, g, ...) ,d _args_for_call_35(__VA_ARGS__)
+#define _args_for_call_49(a, b, c, d, e, f, g, ...) ,d _args_for_call_42(__VA_ARGS__)
+#define _args_for_call_56(a, b, c, d, e, f, g, ...) ,d _args_for_call_49(__VA_ARGS__)
+#define _args_for_call_63(a, b, c, d, e, f, g, ...) ,d _args_for_call_56(__VA_ARGS__)
+#define _args_for_call_70(a, b, c, d, e, f, g, ...) ,d _args_for_call_63(__VA_ARGS__)
+
+#define _DFUNC_(n) _args_for_call_##n
+#define _DFUNC(n) _DFUNC_(n)
+
+#define ARGS_FOR_CALL(...) _DFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__)
+
+#define _args_for_decl_1(a) a
+#define _args_for_decl_7(a, b, c, d, e, f, g) ,c
+#define _args_for_decl_14(a, b, c, d, e, f, g, ...) ,c _args_for_decl_7(__VA_ARGS__)
+#define _args_for_decl_21(a, b, c, d, e, f, g, ...) ,c _args_for_decl_14(__VA_ARGS__)
+#define _args_for_decl_28(a, b, c, d, e, f, g, ...) ,c _args_for_decl_21(__VA_ARGS__)
+#define _args_for_decl_35(a, b, c, d, e, f, g, ...) ,c _args_for_decl_28(__VA_ARGS__)
+#define _args_for_decl_42(a, b, c, d, e, f, g, ...) ,c _args_for_decl_35(__VA_ARGS__)
+#define _args_for_decl_49(a, b, c, d, e, f, g, ...) ,c _args_for_decl_42(__VA_ARGS__)
+#define _args_for_decl_56(a, b, c, d, e, f, g, ...) ,c _args_for_decl_49(__VA_ARGS__)
+#define _args_for_decl_63(a, b, c, d, e, f, g, ...) ,c _args_for_decl_56(__VA_ARGS__)
+#define _args_for_decl_70(a, b, c, d, e, f, g, ...) ,c _args_for_decl_63(__VA_ARGS__)
+
+#define _CFUNC_(n) _args_for_decl_##n
+#define _CFUNC(n) _CFUNC_(n)
+
+#define ARGS_FOR_DECLARATION(...) _CFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__)
+
+#define _args_for_assign_1(a) a
+#define _args_for_assign_7(a, b, c, d, e, f, g) b;
+#define _args_for_assign_14(a, b, c, d, e, f, g, ...) b; _args_for_assign_7(__VA_ARGS__)
+#define _args_for_assign_21(a, b, c, d, e, f, g, ...) b; _args_for_assign_14(__VA_ARGS__)
+#define _args_for_assign_28(a, b, c, d, e, f, g, ...) b; _args_for_assign_21(__VA_ARGS__)
+#define _args_for_assign_35(a, b, c, d, e, f, g, ...) b; _args_for_assign_28(__VA_ARGS__)
+#define _args_for_assign_42(a, b, c, d, e, f, g, ...) b; _args_for_assign_35(__VA_ARGS__)
+#define _args_for_assign_49(a, b, c, d, e, f, g, ...) b; _args_for_assign_42(__VA_ARGS__)
+#define _args_for_assign_56(a, b, c, d, e, f, g, ...) b; _args_for_assign_49(__VA_ARGS__)
+#define _args_for_assign_63(a, b, c, d, e, f, g, ...) b; _args_for_assign_56(__VA_ARGS__)
+#define _args_for_assign_70(a, b, c, d, e, f, g, ...) b; _args_for_assign_63(__VA_ARGS__)
+
+#define _BFUNC_(n) _args_for_assign_##n
+#define _BFUNC(n) _BFUNC_(n)
+
+#define ARGS_FOR_ASSIGN(...) _BFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__)
+
+#define _args_for_struct_1(a) a;
+#define _args_for_struct_7(a, b, c, d, e, f, g) a;
+#define _args_for_struct_14(a, b, c, d, e, f, g, ...) a; _args_for_struct_7(__VA_ARGS__)
+#define _args_for_struct_21(a, b, c, d, e, f, g, ...) a; _args_for_struct_14(__VA_ARGS__)
+#define _args_for_struct_28(a, b, c, d, e, f, g, ...) a; _args_for_struct_21(__VA_ARGS__)
+#define _args_for_struct_35(a, b, c, d, e, f, g, ...) a; _args_for_struct_28(__VA_ARGS__)
+#define _args_for_struct_42(a, b, c, d, e, f, g, ...) a; _args_for_struct_35(__VA_ARGS__)
+#define _args_for_struct_49(a, b, c, d, e, f, g, ...) a; _args_for_struct_42(__VA_ARGS__)
+#define _args_for_struct_56(a, b, c, d, e, f, g, ...) a; _args_for_struct_49(__VA_ARGS__)
+#define _args_for_struct_63(a, b, c, d, e, f, g, ...) a; _args_for_struct_56(__VA_ARGS__)
+#define _args_for_struct_70(a, b, c, d, e, f, g, ...) a; _args_for_struct_63(__VA_ARGS__)
+
+#define _AFUNC_(n) _args_for_struct_##n
+#define _AFUNC(n) _AFUNC_(n)
+
+#define ARGS_FOR_STRUCT(...) _AFUNC(__NARG__(__VA_ARGS__)) (__VA_ARGS__)
+
+/* Serialization and deserialization */
+
+#define CSMT_ITEM_NO_WAIT(name, ...) \
+\
+struct s_##name##_private { \
+    struct csmt_instruction instr; \
+    ARGS_FOR_STRUCT( __VA_ARGS__ ) \
+}; \
+\
+static void \
+name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ); \
+\
+static int \
+name##_rx( struct NineDevice9 *device, struct csmt_instruction *instr ) \
+{ \
+    struct csmt_context *ctx = device->csmt_ctx; \
+    struct s_##name##_private *args = (struct s_##name##_private *)instr; \
+    \
+    (void) args; \
+    (void) ctx; \
+    name##_priv( \
+        device ARGS_FOR_CALL( __VA_ARGS__ ) \
+    ); \
+    ARGS_FOR_UNBIND( __VA_ARGS__ ) \
+    return 0; \
+} \
+\
+void \
+name( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ) \
+{ \
+    struct csmt_context *ctx = device->csmt_ctx; \
+    struct s_##name##_private *args; \
+    unsigned memsize = sizeof(struct s_##name##_private); \
+    unsigned memsize2 = 0; \
+    \
+    if (!device->csmt_active) { \
+        name##_priv( \
+            device ARGS_FOR_BYPASS( __VA_ARGS__ ) \
+        ); \
+        return; \
+    } \
+    ARGS_FOR_MEM ( __VA_ARGS__ ) \
+    args = nine_queue_alloc(ctx->pool, memsize + memsize2); \
+    assert(args); \
+    args->instr.func = &name##_rx; \
+    ARGS_FOR_ASSIGN( __VA_ARGS__ ) \
+} \
+\
+static void \
+name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) )
+
+#define CSMT_ITEM_DO_WAIT(name, ...) \
+\
+struct s_##name##_private { \
+    struct csmt_instruction instr; \
+    ARGS_FOR_STRUCT( __VA_ARGS__ ) \
+}; \
+static void \
+name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ); \
+\
+static int \
+name##_rx( struct NineDevice9 *device, struct csmt_instruction *instr) \
+{ \
+    struct csmt_context *ctx = device->csmt_ctx; \
+    struct s_##name##_private *args = (struct s_##name##_private *)instr; \
+    \
+    (void) args; \
+    (void) ctx; \
+    name##_priv( \
+        device ARGS_FOR_CALL( __VA_ARGS__ ) \
+    ); \
+    ARGS_FOR_UNBIND( __VA_ARGS__ ) \
+    return 1; \
+} \
+\
+void \
+name( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) ) \
+{ \
+    struct csmt_context *ctx = device->csmt_ctx; \
+    struct s_##name##_private *args; \
+    unsigned memsize = sizeof(struct s_##name##_private); \
+    unsigned memsize2 = 0; \
+    \
+    if (!device->csmt_active) { \
+        name##_priv( \
+            device ARGS_FOR_BYPASS( __VA_ARGS__ ) \
+        ); \
+        return; \
+    } \
+    ARGS_FOR_MEM ( __VA_ARGS__ ) \
+    args = nine_queue_alloc(ctx->pool, memsize + memsize2); \
+    assert(args); \
+    args->instr.func = &name##_rx; \
+    ARGS_FOR_ASSIGN( __VA_ARGS__ ) \
+    ctx->processed = FALSE; \
+    nine_queue_flush(ctx->pool); \
+    nine_csmt_wait_processed(ctx); \
+} \
+\
+static void \
+name##_priv( struct NineDevice9 *device ARGS_FOR_DECLARATION( __VA_ARGS__ ) )
+
+/* ARGS_FOR_STRUCT, ARGS_FOR_ASSIGN, ARGS_FOR_DECLARATION, ARGS_FOR_CALL, ARGS_FOR_UNBIND, ARGS_FOR_MEM, ARGS_FOR_BYPASS */
+#define ARG_VAL(x, y) \
+        x _##y ; ,\
+        args->_##y = y ; ,\
+        x y ,\
+        args->_##y ,\
+        ,\
+        ,\
+        y
+
+#define ARG_REF(x, y) \
+        x* _##y ; ,\
+        args->_##y = y; ,\
+        x *y ,\
+        args->_##y ,\
+        ,\
+        ,\
+        y
+
+#define ARG_COPY_REF(x, y) \
+        x * _##y ; x __##y ; ,\
+        if ( y ) { args->_##y = &args->__##y ; args->__##y = *y ; } else { args->_##y = NULL; } ,\
+        const x *y ,\
+        (const x *)args->_##y ,\
+        ,\
+        ,\
+        (const x *)y
+
+#define ARG_BIND_REF(x, y) \
+        x * _##y ,\
+        args->_##y = NULL; \
+        if (args->_##y != y && args->_##y) \
+            NineUnknown_Unbind((void *)(args->_##y)); \
+        if ( args->_##y != y && y ) \
+            NineUnknown_Bind( (void *)y ); \
+        if ( args->_##y != y ) \
+            args->_##y = y ; ,\
+        x *y ,\
+        args->_##y,\
+        if (args->_##y != NULL && args->_##y) \
+            NineUnknown_Unbind((void *)(args->_##y)); \
+        args->_##y = NULL; ,\
+        ,\
+        y
+
+#define ARG_BIND_RES(x, y) \
+        x * _##y ,\
+        args->_##y = NULL; \
+        if (y) \
+            pipe_resource_reference(&args->_##y, y); ,\
+        x *y ,\
+        args->_##y ,\
+        if (args->_##y) \
+            pipe_resource_reference(&args->_##y, NULL); ,\
+        ,\
+        y
+
+#define ARG_MEM(x, y) \
+        x * _##y ,\
+        args->_##y = (void *)args + memsize;\
+        memcpy(args->_##y, y, memsize2); ,\
+        const x *y ,\
+        (const x *)args->_##y ,\
+        ,\
+        ,\
+        (const x *)y
+
+#define ARG_MEM_SIZE(x, y) \
+        x _##y ,\
+        args->_##y = y; ,\
+        x y ,\
+        args->_##y ,\
+        ,\
+        memsize2 = y, \
+        y
+
+#define ARG_BIND_BLIT(x, y) \
+        x _##y ,\
+        memcpy(&args->_##y , y, sizeof(x)); \
+        args->_##y.src.resource = NULL; \
+        args->_##y.dst.resource = NULL; \
+        pipe_resource_reference(&args->_##y.src.resource, y->src.resource); \
+        pipe_resource_reference(&args->_##y.dst.resource, y->dst.resource);,\
+        x *y ,\
+        &args->_##y ,\
+        pipe_resource_reference(&args->_##y.src.resource, NULL); \
+        pipe_resource_reference(&args->_##y.dst.resource, NULL);,\
+        ,\
+        y
+
+#define ARG_BIND_BUF(x, y) \
+        x _##y ,\
+        memcpy(&args->_##y , y, sizeof(x)); \
+        args->_##y.buffer = NULL; \
+        pipe_resource_reference(&args->_##y.buffer, y->buffer); ,\
+        x *y ,\
+        &args->_##y ,\
+        pipe_resource_reference(&args->_##y.buffer, NULL); ,\
+        ,\
+        y
+
+#define ARG_BIND_VIEW(x, y) \
+        x * _##y ,\
+        args->_##y = NULL; \
+        if (y) \
+            pipe_sampler_view_reference(&args->_##y, y); ,\
+        x *y ,\
+        args->_##y ,\
+        if (args->_##y) \
+            pipe_sampler_view_reference(&args->_##y, NULL); ,\
+        ,\
+        y
+
diff --git a/src/gallium/state_trackers/nine/nine_queue.c b/src/gallium/state_trackers/nine/nine_queue.c
index 31f9ce7..885b0ab 100644
--- a/src/gallium/state_trackers/nine/nine_queue.c
+++ b/src/gallium/state_trackers/nine/nine_queue.c
@@ -143,6 +143,10 @@ nine_queue_flush(struct nine_queue_pool* ctx)
     DBG("flushing cmdbuf=%p instr=%d size=%d\n",
            cmdbuf, cmdbuf->num_instr, cmdbuf->offset);
 
+    /* Nothing to flush */
+    if (!cmdbuf->num_instr)
+        return;
+
     /* signal waiting worker */
     pipe_mutex_lock(ctx->mutex_push);
     cmdbuf->full = 1;
@@ -200,6 +204,17 @@ nine_queue_alloc(struct nine_queue_pool* ctx, unsigned space)
     return cmdbuf->mem_pool + offset;
 }
 
+/* Returns the current queue empty state.
+ * TRUE no instructions queued.
+ * FALSE one ore more instructions queued. */
+bool
+nine_queue_isempty(struct nine_queue_pool* ctx)
+{
+    struct nine_cmdbuf *cmdbuf = &ctx->pool[ctx->head];
+
+    return (ctx->tail == ctx->head) && !cmdbuf->num_instr;
+}
+
 struct nine_queue_pool*
 nine_queue_create(void)
 {
diff --git a/src/gallium/state_trackers/nine/nine_queue.h b/src/gallium/state_trackers/nine/nine_queue.h
index 259978e..cc15bd8 100644
--- a/src/gallium/state_trackers/nine/nine_queue.h
+++ b/src/gallium/state_trackers/nine/nine_queue.h
@@ -39,6 +39,9 @@ nine_queue_flush(struct nine_queue_pool* ctx);
 void *
 nine_queue_alloc(struct nine_queue_pool* ctx, unsigned space);
 
+bool
+nine_queue_isempty(struct nine_queue_pool* ctx);
+
 struct nine_queue_pool*
 nine_queue_create(void);
 
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 076a959..ef19374 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -21,7 +21,10 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#define NINE_STATE
+
 #include "device9.h"
+#include "swapchain9.h"
 #include "basetexture9.h"
 #include "buffer9.h"
 #include "indexbuffer9.h"
@@ -36,13 +39,213 @@
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "cso_cache/cso_context.h"
+#include "util/u_atomic.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_math.h"
 #include "util/u_box.h"
 #include "util/u_simple_shaders.h"
 
+/* CSMT headers */
+#include "nine_queue.h"
+#include "nine_csmt_helper.h"
+#include "os/os_thread.h"
+
 #define DBG_CHANNEL DBG_DEVICE
 
+/* Nine CSMT */
+
+struct csmt_instruction {
+    int (* func)(struct NineDevice9 *This, struct csmt_instruction *instr);
+};
+
+struct csmt_context {
+    pipe_thread worker;
+    struct nine_queue_pool* pool;
+    BOOL terminate;
+    pipe_condvar event_processed;
+    pipe_mutex mutex_processed;
+    struct NineDevice9 *device;
+    BOOL processed;
+};
+
+/* Wait for instruction to be processed.
+ * Caller has to ensure that only one thread waits at time.
+ */
+static void
+nine_csmt_wait_processed(struct csmt_context *ctx)
+{
+    pipe_mutex_lock(ctx->mutex_processed);
+    while (!p_atomic_read(&ctx->processed)) {
+        pipe_condvar_wait(ctx->event_processed, ctx->mutex_processed);
+    }
+    pipe_mutex_unlock(ctx->mutex_processed);
+}
+
+/* CSMT worker thread */
+static
+PIPE_THREAD_ROUTINE(nine_csmt_worker, arg)
+{
+    struct csmt_context *ctx = arg;
+    struct csmt_instruction *instr;
+    DBG("CSMT worker spawned\n");
+
+    pipe_thread_setname("CSMT-Worker");
+
+    while (1) {
+        nine_queue_wait_flush(ctx->pool);
+
+        /* Get instruction. NULL on empty cmdbuf. */
+        while (!p_atomic_read(&ctx->terminate) &&
+               (instr = (struct csmt_instruction *)nine_queue_get(ctx->pool))) {
+
+            /* decode */
+            if (instr->func(ctx->device, instr)) {
+                pipe_mutex_lock(ctx->mutex_processed);
+                p_atomic_set(&ctx->processed, TRUE);
+                pipe_condvar_signal(ctx->event_processed);
+                pipe_mutex_unlock(ctx->mutex_processed);
+            }
+        }
+        if (p_atomic_read(&ctx->terminate)) {
+            pipe_mutex_lock(ctx->mutex_processed);
+            p_atomic_set(&ctx->processed, TRUE);
+            pipe_condvar_signal(ctx->event_processed);
+            pipe_mutex_unlock(ctx->mutex_processed);
+            break;
+        }
+    }
+
+    DBG("CSMT worker destroyed\n");
+    return 0;
+}
+
+/* Create a CSMT context.
+ * Spawns a worker thread.
+ */
+struct csmt_context *
+nine_csmt_create( struct NineDevice9 *This )
+{
+    struct csmt_context *ctx;
+
+    ctx = CALLOC_STRUCT(csmt_context);
+    if (!ctx)
+        return NULL;
+
+    ctx->pool = nine_queue_create();
+    if (!ctx->pool) {
+        FREE(ctx);
+        return NULL;
+    }
+    pipe_condvar_init(ctx->event_processed);
+    pipe_mutex_init(ctx->mutex_processed);
+
+#if DEBUG
+    pipe_thread_setname("Main thread");
+#endif
+
+    ctx->device = This;
+
+    ctx->worker = pipe_thread_create(nine_csmt_worker, ctx);
+    if (!ctx->worker) {
+        nine_queue_delete(ctx->pool);
+        FREE(ctx);
+        return NULL;
+    }
+
+    DBG("Returning context %p\n", ctx);
+
+    return ctx;
+}
+
+static int
+nop_func( struct NineDevice9 *This, struct csmt_instruction *instr )
+{
+    (void) This;
+    (void) instr;
+
+    return 1;
+}
+
+/* Push nop instruction and flush the queue.
+ * Waits for the worker to complete. */
+void
+nine_csmt_process( struct NineDevice9 *device )
+{
+    struct csmt_instruction* instr;
+    struct csmt_context *ctx = device->csmt_ctx;
+
+    if (!device->csmt_active)
+        return;
+
+    if (nine_queue_isempty(ctx->pool))
+        return;
+
+    DBG("device=%p\n", device);
+
+    /* NOP */
+    instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
+    assert(instr);
+    instr->func = nop_func;
+
+    p_atomic_set(&ctx->processed, FALSE);
+    nine_queue_flush(ctx->pool);
+
+    nine_csmt_wait_processed(ctx);
+}
+
+/* Destroys a CSMT context.
+ * Waits for the worker thread to terminate.
+ */
+void
+nine_csmt_destroy( struct NineDevice9 *device, struct csmt_context *ctx )
+{
+    struct csmt_instruction* instr;
+    pipe_thread render_thread = ctx->worker;
+
+    DBG("device=%p ctx=%p\n", device, ctx);
+
+    /* Push nop and flush the queue. */
+    instr = nine_queue_alloc(ctx->pool, sizeof(struct csmt_instruction));
+    assert(instr);
+    instr->func = nop_func;
+
+    p_atomic_set(&ctx->processed, FALSE);
+    /* Signal worker to terminate. */
+    p_atomic_set(&ctx->terminate, TRUE);
+    nine_queue_flush(ctx->pool);
+
+    nine_csmt_wait_processed(ctx);
+    nine_queue_delete(ctx->pool);
+    pipe_mutex_destroy(ctx->mutex_processed);
+
+    FREE(ctx);
+
+    pipe_thread_wait(render_thread);
+}
+
+struct pipe_context *
+nine_context_get_pipe( struct NineDevice9 *device )
+{
+    if (device->csmt_active)
+        nine_csmt_process(device);
+    return device->context.pipe;
+}
+
+struct pipe_context *
+nine_context_get_pipe_multithread( struct NineDevice9 *device )
+{
+    struct csmt_context *ctx = device->csmt_ctx;
+
+    if (!device->csmt_active)
+        return device->context.pipe;
+
+    if (!pipe_thread_is_self(ctx->worker))
+        nine_csmt_process(device);
+
+    return device->context.pipe;
+}
+/* Nine state functions */
+
 /* Check if some states need to be set dirty */
 
 static inline DWORD
@@ -1094,11 +1297,44 @@ NineDevice9_ResolveZ( struct NineDevice9 *device )
 #define ALPHA_TO_COVERAGE_ENABLE   MAKEFOURCC('A', '2', 'M', '1')
 #define ALPHA_TO_COVERAGE_DISABLE  MAKEFOURCC('A', '2', 'M', '0')
 
+/* Nine_context functions.
+ * Serialized through CSMT macros.
+ */
 
-void
-nine_context_set_render_state(struct NineDevice9 *device,
-                              D3DRENDERSTATETYPE State,
-                              DWORD Value)
+static void
+nine_context_set_texture_apply(struct NineDevice9 *device,
+                               DWORD stage,
+                               BOOL enabled,
+                               BOOL shadow,
+                               DWORD lod,
+                               D3DRESOURCETYPE type,
+                               uint8_t pstype,
+                               struct pipe_resource *res,
+                               struct pipe_sampler_view *view0,
+                               struct pipe_sampler_view *view1);
+static void
+nine_context_set_stream_source_apply(struct NineDevice9 *device,
+                                    UINT StreamNumber,
+                                    struct pipe_resource *res,
+                                    UINT OffsetInBytes,
+                                    UINT Stride);
+
+static void
+nine_context_set_indices_apply(struct NineDevice9 *device,
+                               struct pipe_resource *res,
+                               UINT IndexSize,
+                               UINT OffsetInBytes);
+
+static void
+nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device,
+                                                     UINT StartRegister,
+                                                     const int *pConstantData,
+                                                     unsigned pConstantData_size,
+                                                     UINT Vector4iCount);
+
+CSMT_ITEM_NO_WAIT(nine_context_set_render_state,
+                  ARG_VAL(D3DRENDERSTATETYPE, State),
+                  ARG_VAL(DWORD, Value))
 {
     struct nine_context *context = &device->context;
 
@@ -1137,17 +1373,16 @@ nine_context_set_render_state(struct NineDevice9 *device,
     context->changed.group |= nine_render_state_group[State];
 }
 
-static void
-nine_context_set_texture_apply(struct NineDevice9 *device,
-                               DWORD stage,
-                               BOOL enabled,
-                               BOOL shadow,
-                               DWORD lod,
-                               D3DRESOURCETYPE type,
-                               uint8_t pstype,
-                               struct pipe_resource *res,
-                               struct pipe_sampler_view *view0,
-                               struct pipe_sampler_view *view1)
+CSMT_ITEM_NO_WAIT(nine_context_set_texture_apply,
+                  ARG_VAL(DWORD, stage),
+                  ARG_VAL(BOOL, enabled),
+                  ARG_VAL(BOOL, shadow),
+                  ARG_VAL(DWORD, lod),
+                  ARG_VAL(D3DRESOURCETYPE, type),
+                  ARG_VAL(uint8_t, pstype),
+                  ARG_BIND_RES(struct pipe_resource, res),
+                  ARG_BIND_VIEW(struct pipe_sampler_view, view0),
+                  ARG_BIND_VIEW(struct pipe_sampler_view, view1))
 {
     struct nine_context *context = &device->context;
 
@@ -1197,11 +1432,10 @@ nine_context_set_texture(struct NineDevice9 *device,
                                    res, view0, view1);
 }
 
-void
-nine_context_set_sampler_state(struct NineDevice9 *device,
-                               DWORD Sampler,
-                               D3DSAMPLERSTATETYPE Type,
-                               DWORD Value)
+CSMT_ITEM_NO_WAIT(nine_context_set_sampler_state,
+                  ARG_VAL(DWORD, Sampler),
+                  ARG_VAL(D3DSAMPLERSTATETYPE, Type),
+                  ARG_VAL(DWORD, Value))
 {
     struct nine_context *context = &device->context;
 
@@ -1213,12 +1447,11 @@ nine_context_set_sampler_state(struct NineDevice9 *device,
     context->changed.sampler[Sampler] |= 1 << Type;
 }
 
-static void
-nine_context_set_stream_source_apply(struct NineDevice9 *device,
-                                    UINT StreamNumber,
-                                    struct pipe_resource *res,
-                                    UINT OffsetInBytes,
-                                    UINT Stride)
+CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_apply,
+                  ARG_VAL(UINT, StreamNumber),
+                  ARG_BIND_RES(struct pipe_resource, res),
+                  ARG_VAL(UINT, OffsetInBytes),
+                  ARG_VAL(UINT, Stride))
 {
     struct nine_context *context = &device->context;
     const unsigned i = StreamNumber;
@@ -1249,10 +1482,9 @@ nine_context_set_stream_source(struct NineDevice9 *device,
                                          Stride);
 }
 
-void
-nine_context_set_stream_source_freq(struct NineDevice9 *device,
-                                    UINT StreamNumber,
-                                    UINT Setting)
+CSMT_ITEM_NO_WAIT(nine_context_set_stream_source_freq,
+                  ARG_VAL(UINT, StreamNumber),
+                  ARG_VAL(UINT, Setting))
 {
     struct nine_context *context = &device->context;
 
@@ -1267,11 +1499,10 @@ nine_context_set_stream_source_freq(struct NineDevice9 *device,
         context->changed.group |= NINE_STATE_STREAMFREQ;
 }
 
-static void
-nine_context_set_indices_apply(struct NineDevice9 *device,
-                               struct pipe_resource *res,
-                               UINT IndexSize,
-                               UINT OffsetInBytes)
+CSMT_ITEM_NO_WAIT(nine_context_set_indices_apply,
+                  ARG_BIND_RES(struct pipe_resource, res),
+                  ARG_VAL(UINT, IndexSize),
+                  ARG_VAL(UINT, OffsetInBytes))
 {
     struct nine_context *context = &device->context;
 
@@ -1302,9 +1533,8 @@ nine_context_set_indices(struct NineDevice9 *device,
     nine_context_set_indices_apply(device, res, IndexSize, OffsetInBytes);
 }
 
-void
-nine_context_set_vertex_declaration(struct NineDevice9 *device,
-                                    struct NineVertexDeclaration9 *vdecl)
+CSMT_ITEM_NO_WAIT(nine_context_set_vertex_declaration,
+                  ARG_BIND_REF(struct NineVertexDeclaration9, vdecl))
 {
     struct nine_context *context = &device->context;
     BOOL was_programmable_vs = context->programmable_vs;
@@ -1320,9 +1550,8 @@ nine_context_set_vertex_declaration(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_VDECL;
 }
 
-void
-nine_context_set_vertex_shader(struct NineDevice9 *device,
-                               struct NineVertexShader9 *pShader)
+CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader,
+                  ARG_BIND_REF(struct NineVertexShader9, pShader))
 {
     struct nine_context *context = &device->context;
     BOOL was_programmable_vs = context->programmable_vs;
@@ -1338,12 +1567,11 @@ nine_context_set_vertex_shader(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_VS;
 }
 
-void
-nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device,
-                                          UINT StartRegister,
-                                          const float *pConstantData,
-                                          const unsigned pConstantData_size,
-                                          UINT Vector4fCount)
+CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_f,
+                  ARG_VAL(UINT, StartRegister),
+                  ARG_MEM(float, pConstantData),
+                  ARG_MEM_SIZE(unsigned, pConstantData_size),
+                  ARG_VAL(UINT, Vector4fCount))
 {
     struct nine_context *context = &device->context;
     float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f;
@@ -1364,13 +1592,11 @@ nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_VS_CONST;
 }
 
-
-void
-nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device,
-                                          UINT StartRegister,
-                                          const int *pConstantData,
-                                          const unsigned pConstantData_size,
-                                          UINT Vector4iCount)
+CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_i,
+                  ARG_VAL(UINT, StartRegister),
+                  ARG_MEM(int, pConstantData),
+                  ARG_MEM_SIZE(unsigned, pConstantData_size),
+                  ARG_VAL(UINT, Vector4iCount))
 {
     struct nine_context *context = &device->context;
     int i;
@@ -1392,12 +1618,11 @@ nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_VS_CONST;
 }
 
-void
-nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device,
-                                          UINT StartRegister,
-                                          const BOOL *pConstantData,
-                                          const unsigned pConstantData_size,
-                                          UINT BoolCount)
+CSMT_ITEM_NO_WAIT(nine_context_set_vertex_shader_constant_b,
+                  ARG_VAL(UINT, StartRegister),
+                  ARG_MEM(BOOL, pConstantData),
+                  ARG_MEM_SIZE(unsigned, pConstantData_size),
+                  ARG_VAL(UINT, BoolCount))
 {
     struct nine_context *context = &device->context;
     int i;
@@ -1412,9 +1637,8 @@ nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_VS_CONST;
 }
 
-void
-nine_context_set_pixel_shader(struct NineDevice9 *device,
-                              struct NinePixelShader9* ps)
+CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader,
+                  ARG_BIND_REF(struct NinePixelShader9, ps))
 {
     struct nine_context *context = &device->context;
     unsigned old_mask = context->ps ? context->ps->rt_mask : 1;
@@ -1435,12 +1659,11 @@ nine_context_set_pixel_shader(struct NineDevice9 *device,
         context->changed.group |= NINE_STATE_FB;
 }
 
-void
-nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device,
-                                        UINT StartRegister,
-                                        const float *pConstantData,
-                                        const unsigned pConstantData_size,
-                                        UINT Vector4fCount)
+CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_f,
+                  ARG_VAL(UINT, StartRegister),
+                  ARG_MEM(float, pConstantData),
+                  ARG_MEM_SIZE(unsigned, pConstantData_size),
+                  ARG_VAL(UINT, Vector4fCount))
 {
     struct nine_context *context = &device->context;
 
@@ -1453,11 +1676,11 @@ nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device,
 }
 
 /* For stateblocks */
-static void
-nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device,
-                                                     UINT StartRegister,
-                                                     const int *pConstantData,
-                                                     UINT Vector4iCount)
+CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i_transformed,
+                  ARG_VAL(UINT, StartRegister),
+                  ARG_MEM(int, pConstantData),
+                  ARG_MEM_SIZE(unsigned, pConstantData_size),
+                  ARG_VAL(UINT, Vector4iCount))
 {
     struct nine_context *context = &device->context;
 
@@ -1469,12 +1692,11 @@ nine_context_set_pixel_shader_constant_i_transformed(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_PS_CONST;
 }
 
-void
-nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device,
-                                         UINT StartRegister,
-                                         const int *pConstantData,
-                                         const unsigned pConstantData_size,
-                                         UINT Vector4iCount)
+CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_i,
+                  ARG_VAL(UINT, StartRegister),
+                  ARG_MEM(int, pConstantData),
+                  ARG_MEM_SIZE(unsigned, pConstantData_size),
+                  ARG_VAL(UINT, Vector4iCount))
 {
     struct nine_context *context = &device->context;
     int i;
@@ -1495,12 +1717,11 @@ nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_PS_CONST;
 }
 
-void
-nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device,
-                                         UINT StartRegister,
-                                         const BOOL *pConstantData,
-                                         const unsigned pConstantData_size,
-                                         UINT BoolCount)
+CSMT_ITEM_NO_WAIT(nine_context_set_pixel_shader_constant_b,
+                  ARG_VAL(UINT, StartRegister),
+                  ARG_MEM(BOOL, pConstantData),
+                  ARG_MEM_SIZE(unsigned, pConstantData_size),
+                  ARG_VAL(UINT, BoolCount))
 {
     struct nine_context *context = &device->context;
     int i;
@@ -1515,10 +1736,10 @@ nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_PS_CONST;
 }
 
-void
-nine_context_set_render_target(struct NineDevice9 *device,
-                               DWORD RenderTargetIndex,
-                               struct NineSurface9 *rt)
+/* XXX: use resource, as resource might change */
+CSMT_ITEM_NO_WAIT(nine_context_set_render_target,
+                  ARG_VAL(DWORD, RenderTargetIndex),
+                  ARG_BIND_REF(struct NineSurface9, rt))
 {
     struct nine_context *context = &device->context;
     const unsigned i = RenderTargetIndex;
@@ -1550,9 +1771,9 @@ nine_context_set_render_target(struct NineDevice9 *device,
     }
 }
 
-void
-nine_context_set_depth_stencil(struct NineDevice9 *device,
-                               struct NineSurface9 *ds)
+/* XXX: use resource instead of ds, as resource might change */
+CSMT_ITEM_NO_WAIT(nine_context_set_depth_stencil,
+                  ARG_BIND_REF(struct NineSurface9, ds))
 {
     struct nine_context *context = &device->context;
 
@@ -1560,9 +1781,8 @@ nine_context_set_depth_stencil(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_FB;
 }
 
-void
-nine_context_set_viewport(struct NineDevice9 *device,
-                          const D3DVIEWPORT9 *viewport)
+CSMT_ITEM_NO_WAIT(nine_context_set_viewport,
+                  ARG_COPY_REF(D3DVIEWPORT9, viewport))
 {
     struct nine_context *context = &device->context;
 
@@ -1570,9 +1790,8 @@ nine_context_set_viewport(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_VIEWPORT;
 }
 
-void
-nine_context_set_scissor(struct NineDevice9 *device,
-                         const struct pipe_scissor_state *scissor)
+CSMT_ITEM_NO_WAIT(nine_context_set_scissor,
+                  ARG_COPY_REF(struct pipe_scissor_state, scissor))
 {
     struct nine_context *context = &device->context;
 
@@ -1580,10 +1799,9 @@ nine_context_set_scissor(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_SCISSOR;
 }
 
-void
-nine_context_set_transform(struct NineDevice9 *device,
-                           D3DTRANSFORMSTATETYPE State,
-                           const D3DMATRIX *pMatrix)
+CSMT_ITEM_NO_WAIT(nine_context_set_transform,
+                  ARG_VAL(D3DTRANSFORMSTATETYPE, State),
+                  ARG_COPY_REF(D3DMATRIX, pMatrix))
 {
     struct nine_context *context = &device->context;
     D3DMATRIX *M = nine_state_access_transform(&context->ff, State, TRUE);
@@ -1593,9 +1811,8 @@ nine_context_set_transform(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_FF;
 }
 
-void
-nine_context_set_material(struct NineDevice9 *device,
-                          const D3DMATERIAL9 *pMaterial)
+CSMT_ITEM_NO_WAIT(nine_context_set_material,
+                  ARG_COPY_REF(D3DMATERIAL9, pMaterial))
 {
     struct nine_context *context = &device->context;
 
@@ -1603,10 +1820,9 @@ nine_context_set_material(struct NineDevice9 *device,
     context->changed.group |= NINE_STATE_FF_MATERIAL;
 }
 
-void
-nine_context_set_light(struct NineDevice9 *device,
-                       DWORD Index,
-                       const D3DLIGHT9 *pLight)
+CSMT_ITEM_NO_WAIT(nine_context_set_light,
+                  ARG_VAL(DWORD, Index),
+                  ARG_COPY_REF(D3DLIGHT9, pLight))
 {
     struct nine_context *context = &device->context;
 
@@ -1623,25 +1839,25 @@ nine_context_light_enable_stateblock(struct NineDevice9 *device,
 {
     struct nine_context *context = &device->context;
 
+    if (device->csmt_active) /* TODO: fix */
+        nine_csmt_process(device);
     memcpy(&context->ff.active_light, &active_light, NINE_MAX_LIGHTS_ACTIVE * sizeof(context->ff.active_light[0]));
     context->ff.num_lights_active = num_lights_active;
 }
 
-void
-nine_context_light_enable(struct NineDevice9 *device,
-                          DWORD Index,
-                          BOOL Enable)
+CSMT_ITEM_NO_WAIT(nine_context_light_enable,
+                  ARG_VAL(DWORD, Index),
+                  ARG_VAL(BOOL, Enable))
 {
     struct nine_context *context = &device->context;
 
     nine_state_light_enable(&context->ff, &context->changed.group, Index, Enable);
 }
 
-void
-nine_context_set_texture_stage_state(struct NineDevice9 *device,
-                                     DWORD Stage,
-                                     D3DTEXTURESTAGESTATETYPE Type,
-                                     DWORD Value)
+CSMT_ITEM_NO_WAIT(nine_context_set_texture_stage_state,
+                  ARG_VAL(DWORD, Stage),
+                  ARG_VAL(D3DTEXTURESTAGESTATETYPE, Type),
+                  ARG_VAL(DWORD, Value))
 {
     struct nine_context *context = &device->context;
     int bumpmap_index = -1;
@@ -1682,10 +1898,9 @@ nine_context_set_texture_stage_state(struct NineDevice9 *device,
     context->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
 }
 
-void
-nine_context_set_clip_plane(struct NineDevice9 *device,
-                            DWORD Index,
-                            struct nine_clipplane *pPlane)
+CSMT_ITEM_NO_WAIT(nine_context_set_clip_plane,
+                  ARG_VAL(DWORD, Index),
+                  ARG_COPY_REF(struct nine_clipplane, pPlane))
 {
     struct nine_context *context = &device->context;
 
@@ -1693,9 +1908,8 @@ nine_context_set_clip_plane(struct NineDevice9 *device,
     context->changed.ucp = TRUE;
 }
 
-void
-nine_context_set_swvp(struct NineDevice9 *device,
-                      boolean swvp)
+CSMT_ITEM_NO_WAIT(nine_context_set_swvp,
+                  ARG_VAL(boolean, swvp))
 {
     struct nine_context *context = &device->context;
 
@@ -2015,14 +2229,17 @@ nine_context_apply_stateblock(struct NineDevice9 *device,
         for (r = src->changed.vs_const_f; r; r = r->next)
             nine_context_set_vertex_shader_constant_f(device, r->bgn,
                                                       &src->vs_const_f[r->bgn * 4],
+                                                      sizeof(float[4]) * (r->end - r->bgn),
                                                       r->end - r->bgn);
         for (r = src->changed.vs_const_i; r; r = r->next)
             nine_context_set_vertex_shader_constant_i(device, r->bgn,
                                                       &src->vs_const_i[r->bgn * 4],
+                                                      sizeof(int[4]) * (r->end - r->bgn),
                                                       r->end - r->bgn);
         for (r = src->changed.vs_const_b; r; r = r->next)
             nine_context_set_vertex_shader_constant_b(device, r->bgn,
                                                       &src->vs_const_b[r->bgn * 4],
+                                                      sizeof(BOOL) * (r->end - r->bgn),
                                                       r->end - r->bgn);
     }
 
@@ -2032,20 +2249,21 @@ nine_context_apply_stateblock(struct NineDevice9 *device,
         for (r = src->changed.ps_const_f; r; r = r->next)
             nine_context_set_pixel_shader_constant_f(device, r->bgn,
                                                      &src->ps_const_f[r->bgn * 4],
+                                                     sizeof(float[4]) * (r->end - r->bgn),
                                                      r->end - r->bgn);
         if (src->changed.ps_const_i) {
             uint16_t m = src->changed.ps_const_i;
             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
                 if (m & 1)
                     nine_context_set_pixel_shader_constant_i_transformed(device, i,
-                                                                         src->ps_const_i[i], 1);
+                                                                         src->ps_const_i[i], sizeof(int[4]), 1);
         }
         if (src->changed.ps_const_b) {
             uint16_t m = src->changed.ps_const_b;
             for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
                 if (m & 1)
                     nine_context_set_pixel_shader_constant_b(device, i,
-                                                             &src->ps_const_b[i], 1);
+                                                             &src->ps_const_b[i], sizeof(BOOL), 1);
         }
     }
 
@@ -2112,15 +2330,13 @@ nine_update_state_framebuffer_clear(struct NineDevice9 *device)
         update_framebuffer(device, TRUE);
 }
 
-/* Checks were already done before the call */
-void
-nine_context_clear_fb(struct NineDevice9 *device,
-              DWORD Count,
-              const D3DRECT *pRects,
-              DWORD Flags,
-              D3DCOLOR Color,
-              float Z,
-              DWORD Stencil)
+CSMT_ITEM_NO_WAIT(nine_context_clear_fb,
+                  ARG_VAL(DWORD, Count),
+                  ARG_COPY_REF(D3DRECT, pRects),
+                  ARG_VAL(DWORD, Flags),
+                  ARG_VAL(D3DCOLOR, Color),
+                  ARG_VAL(float, Z),
+                  ARG_VAL(DWORD, Stencil))
 {
     struct nine_context *context = &device->context;
     const int sRGB = context->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
@@ -2275,11 +2491,10 @@ init_draw_info(struct pipe_draw_info *info,
     info->indirect_params = NULL;
 }
 
-void
-nine_context_draw_primitive(struct NineDevice9 *device,
-                            D3DPRIMITIVETYPE PrimitiveType,
-                            UINT StartVertex,
-                            UINT PrimitiveCount)
+CSMT_ITEM_NO_WAIT(nine_context_draw_primitive,
+                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
+                  ARG_VAL(UINT, StartVertex),
+                  ARG_VAL(UINT, PrimitiveCount))
 {
     struct nine_context *context = &device->context;
     struct pipe_draw_info info;
@@ -2296,14 +2511,13 @@ nine_context_draw_primitive(struct NineDevice9 *device,
     context->pipe->draw_vbo(context->pipe, &info);
 }
 
-void
-nine_context_draw_indexed_primitive(struct NineDevice9 *device,
-                                    D3DPRIMITIVETYPE PrimitiveType,
-                                    INT BaseVertexIndex,
-                                    UINT MinVertexIndex,
-                                    UINT NumVertices,
-                                    UINT StartIndex,
-                                    UINT PrimitiveCount)
+CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive,
+                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
+                   ARG_VAL(INT, BaseVertexIndex),
+                   ARG_VAL(UINT, MinVertexIndex),
+                   ARG_VAL(UINT, NumVertices),
+                   ARG_VAL(UINT, StartIndex),
+                   ARG_VAL(UINT, PrimitiveCount))
 {
     struct nine_context *context = &device->context;
     struct pipe_draw_info info;
@@ -2321,11 +2535,10 @@ nine_context_draw_indexed_primitive(struct NineDevice9 *device,
     context->pipe->draw_vbo(context->pipe, &info);
 }
 
-void
-nine_context_draw_primitive_from_vtxbuf(struct NineDevice9 *device,
-                                        D3DPRIMITIVETYPE PrimitiveType,
-                                        UINT PrimitiveCount,
-                                        struct pipe_vertex_buffer *vtxbuf)
+CSMT_ITEM_NO_WAIT(nine_context_draw_primitive_from_vtxbuf,
+                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
+                  ARG_VAL(UINT, PrimitiveCount),
+                  ARG_BIND_BUF(struct pipe_vertex_buffer, vtxbuf))
 {
     struct nine_context *context = &device->context;
     struct pipe_draw_info info;
@@ -2346,14 +2559,13 @@ nine_context_draw_primitive_from_vtxbuf(struct NineDevice9 *device,
     pipe_resource_reference(&vtxbuf->buffer, NULL);
 }
 
-void
-nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf(struct NineDevice9 *device,
-                                                       D3DPRIMITIVETYPE PrimitiveType,
-                                                       UINT MinVertexIndex,
-                                                       UINT NumVertices,
-                                                       UINT PrimitiveCount,
-                                                       struct pipe_vertex_buffer *vbuf,
-                                                       struct pipe_index_buffer *ibuf)
+CSMT_ITEM_NO_WAIT(nine_context_draw_indexed_primitive_from_vtxbuf_idxbuf,
+                  ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType),
+                  ARG_VAL(UINT, MinVertexIndex),
+                  ARG_VAL(UINT, NumVertices),
+                  ARG_VAL(UINT, PrimitiveCount),
+                  ARG_BIND_BUF(struct pipe_vertex_buffer, vbuf),
+                  ARG_BIND_BUF(struct pipe_index_buffer, ibuf))
 {
     struct nine_context *context = &device->context;
     struct pipe_draw_info info;
@@ -2380,27 +2592,29 @@ nine_context_create_query(struct NineDevice9 *device, unsigned query_type)
 {
     struct nine_context *context = &device->context;
 
+    if (device->csmt_active)
+        nine_csmt_process(device);
     return context->pipe->create_query(context->pipe, query_type, 0);
 }
 
-void
-nine_context_destroy_query(struct NineDevice9 *device, struct pipe_query *query)
+CSMT_ITEM_DO_WAIT(nine_context_destroy_query,
+                  ARG_REF(struct pipe_query, query))
 {
     struct nine_context *context = &device->context;
 
     context->pipe->destroy_query(context->pipe, query);
 }
 
-void
-nine_context_begin_query(struct NineDevice9 *device, struct pipe_query *query)
+CSMT_ITEM_NO_WAIT(nine_context_begin_query,
+                  ARG_REF(struct pipe_query, query))
 {
     struct nine_context *context = &device->context;
 
     (void) context->pipe->begin_query(context->pipe, query);
 }
 
-void
-nine_context_end_query(struct NineDevice9 *device, struct pipe_query *query)
+CSMT_ITEM_NO_WAIT(nine_context_end_query,
+                  ARG_REF(struct pipe_query, query))
 {
     struct nine_context *context = &device->context;
 
@@ -2415,6 +2629,8 @@ nine_context_get_query_result(struct NineDevice9 *device, struct pipe_query *que
     struct nine_context *context = &device->context;
 
     (void) flush;
+    if (device->csmt_active)
+        nine_csmt_process(device);
     return context->pipe->get_query_result(context->pipe, query, wait, result);
 }
 
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index cd3afed..ee88a94 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -337,6 +337,10 @@ extern const uint32_t nine_render_states_vertex[(NINED3DRS_COUNT + 31) / 32];
 
 struct NineDevice9;
 
+/* Internal multithreading: When enabled, the nine_context functions
+ * will append work to a worker thread when possible. Only the worker
+ * thread can access struct nine_context. */
+
 void
 nine_context_set_render_state(struct NineDevice9 *device,
                               D3DRENDERSTATETYPE State,
@@ -468,7 +472,7 @@ nine_context_set_depth_stencil(struct NineDevice9 *device,
 void
 nine_context_set_clip_plane(struct NineDevice9 *device,
                             DWORD Index,
-                            struct nine_clipplane *pPlane);
+                            const struct nine_clipplane *pPlane);
 
 void
 nine_context_set_swvp(struct NineDevice9 *device,
@@ -561,4 +565,26 @@ nine_state_light_enable(struct nine_ff_state *, uint32_t *,
 
 const char *nine_d3drs_to_string(DWORD State);
 
+/* CSMT functions */
+struct csmt_context;
+
+struct csmt_context *
+nine_csmt_create( struct NineDevice9 *This );
+
+void
+nine_csmt_destroy( struct NineDevice9 *This, struct csmt_context *ctx );
+
+void
+nine_csmt_process( struct NineDevice9 *This );
+
+
+/* Get the pipe_context (should not be called from the worker thread).
+ * All the work in the worker thread is finished before returning. */
+struct pipe_context *
+nine_context_get_pipe( struct NineDevice9 *device );
+
+/* Can be called from all threads */
+struct pipe_context *
+nine_context_get_pipe_multithread( struct NineDevice9 *device );
+
 #endif /* _NINE_STATE_H_ */
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 92980af..cb970ea 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -94,7 +94,7 @@ NinePixelShader9_dtor( struct NinePixelShader9 *This )
     DBG("This=%p\n", This);
 
     if (This->base.device) {
-        struct pipe_context *pipe = NineDevice9_GetPipe(This->base.device);
+        struct pipe_context *pipe = nine_context_get_pipe_multithread(This->base.device);
         struct nine_shader_variant *var = &This->variant;
 
         do {
diff --git a/src/gallium/state_trackers/nine/surface9.c b/src/gallium/state_trackers/nine/surface9.c
index 0ce4122..f348f50 100644
--- a/src/gallium/state_trackers/nine/surface9.c
+++ b/src/gallium/state_trackers/nine/surface9.c
@@ -31,6 +31,7 @@
 #include "nine_helpers.h"
 #include "nine_pipe.h"
 #include "nine_dump.h"
+#include "nine_state.h"
 
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c
index 502736c..d1d23c2 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -807,6 +807,8 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
         }
     }
 
+    nine_csmt_process(This->base.device);
+
     hr = present(This, pSourceRect, pDestRect,
                  hDestWindowOverride, pDirtyRegion, dwFlags);
     if (hr == D3DERR_WASSTILLDRAWING)
diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c
index 71a56f4..79a49d1 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.c
+++ b/src/gallium/state_trackers/nine/vertexshader9.c
@@ -112,7 +112,7 @@ NineVertexShader9_dtor( struct NineVertexShader9 *This )
     DBG("This=%p\n", This);
 
     if (This->base.device) {
-        struct pipe_context *pipe = NineDevice9_GetPipe(This->base.device);
+        struct pipe_context *pipe = nine_context_get_pipe_multithread(This->base.device);
         struct nine_shader_variant *var = &This->variant;
         struct nine_shader_variant_so *var_so = &This->variant_so;
 
diff --git a/src/gallium/targets/d3dadapter9/drm.c b/src/gallium/targets/d3dadapter9/drm.c
index e0c2e26..6163734 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -58,6 +58,7 @@ DRI_CONF_BEGIN
         DRI_CONF_NINE_THREADSUBMIT("false")
         DRI_CONF_NINE_ALLOWDISCARDDELAYEDRELEASE("true")
         DRI_CONF_NINE_TEARFREEDISCARD("false")
+        DRI_CONF_NINE_CSMT(-1)
     DRI_CONF_SECTION_END
 DRI_CONF_END;
 
@@ -301,6 +302,11 @@ drm_create_adapter( int fd,
         ctx->base.tearfree_discard = FALSE;
     }
 
+    if (driCheckOption(&userInitOptions, "csmt_force", DRI_INT))
+        ctx->base.csmt_force = driQueryOptioni(&userInitOptions, "csmt_force");
+    else
+        ctx->base.csmt_force = -1;
+
     driDestroyOptionCache(&userInitOptions);
     driDestroyOptionInfo(&defaultInitOptions);
 
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
index 0ab2efc..a189bbe 100644
--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -386,3 +386,8 @@ DRI_CONF_OPT_END
 DRI_CONF_OPT_BEGIN_B(tearfree_discard, def) \
         DRI_CONF_DESC(en,gettext("Whether to make d3d's presentation mode DISCARD (games usually use that mode) Tear Free. If rendering above screen refresh, some frames will get skipped. false by default.")) \
 DRI_CONF_OPT_END
+
+#define DRI_CONF_NINE_CSMT(def) \
+DRI_CONF_OPT_BEGIN(csmt_force, int, def) \
+        DRI_CONF_DESC(en,gettext("If set to 1, force gallium nine CSMT. If set to 0, disable it. By default (-1) CSMT is enabled on known thread-safe drivers.")) \
+DRI_CONF_OPT_END
-- 
2.10.2



More information about the mesa-dev mailing list