[Mesa-dev] [PATCH 45/71] st/nine: Rework constant buffer state handling

Axel Davy axel.davy at ens.fr
Sun Aug 16 08:28:09 PDT 2015


We have two paths:
. One that uses a fixed constant buffer, and updates it when needed
. One that uses a user constant buffer, and upload it when needed.

This patch separates the preparation of the constant buffer
and the commit.

It also removes NineDevice9_RestoreNonCSOState, which was
used to restore all states. Instead the commit of the constant
buffer is moved to nine_state, and the other field settings
moved to other functions where more appropriate.

Signed-off-by: Axel Davy <axel.davy at ens.fr>
---
 src/gallium/state_trackers/nine/device9.c    |  69 +--
 src/gallium/state_trackers/nine/device9.h    |   4 -
 src/gallium/state_trackers/nine/nine_state.c | 795 ++++++++++++++-------------
 src/gallium/state_trackers/nine/nine_state.h |   5 +
 src/gallium/state_trackers/nine/swapchain9.c |   2 +-
 5 files changed, 435 insertions(+), 440 deletions(-)

diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 1416a38..c568761 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -119,68 +119,6 @@ NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
             This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
 }
 
-void
-NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask )
-{
-    struct pipe_context *pipe = This->pipe;
-
-    DBG("This=%p mask=%u\n", This, mask);
-
-    if (mask & 0x1) {
-        struct pipe_constant_buffer cb;
-        cb.buffer_offset = 0;
-        cb.buffer_size = This->vs_const_size;
-
-        if (This->prefer_user_constbuf) {
-            cb.buffer = NULL;
-            cb.user_buffer = This->state.vs_const_f;
-            if (!This->driver_caps.user_cbufs) {
-                u_upload_data(This->constbuf_uploader,
-                              0,
-                              cb.buffer_size,
-                              cb.user_buffer,
-                              &cb.buffer_offset,
-                              &cb.buffer);
-                u_upload_unmap(This->constbuf_uploader);
-                cb.user_buffer = NULL;
-            }
-        } else {
-            cb.buffer = This->constbuf_vs;
-            cb.user_buffer = NULL;
-        }
-        pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
-
-        cb.buffer_size = This->ps_const_size;
-        if (This->prefer_user_constbuf) {
-            cb.user_buffer = This->state.ps_const_f;
-            if (!This->driver_caps.user_cbufs) {
-                u_upload_data(This->constbuf_uploader,
-                              0,
-                              cb.buffer_size,
-                              cb.user_buffer,
-                              &cb.buffer_offset,
-                              &cb.buffer);
-                u_upload_unmap(This->constbuf_uploader);
-                cb.user_buffer = NULL;
-            }
-        } else {
-            cb.buffer = This->constbuf_ps;
-        }
-        pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
-    }
-
-    if (mask & 0x2) {
-        struct pipe_poly_stipple stipple;
-        memset(&stipple, ~0, sizeof(stipple));
-        pipe->set_polygon_stipple(pipe, &stipple);
-    }
-
-    This->state.changed.group = NINE_STATE_ALL;
-    This->state.changed.vtxbuf = (1ULL << This->caps.MaxStreams) - 1;
-    This->state.changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
-    This->state.changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
-}
-
 #define GET_PCAP(n) pScreen->get_param(pScreen, PIPE_CAP_##n)
 HRESULT
 NineDevice9_ctor( struct NineDevice9 *This,
@@ -455,7 +393,12 @@ NineDevice9_ctor( struct NineDevice9 *This,
     nine_ff_init(This); /* initialize fixed function code */
 
     NineDevice9_SetDefaultState(This, FALSE);
-    NineDevice9_RestoreNonCSOState(This, ~0);
+
+    {
+        struct pipe_poly_stipple stipple;
+        memset(&stipple, ~0, sizeof(stipple));
+        This->pipe->set_polygon_stipple(This->pipe, &stipple);
+    }
 
     This->update = &This->state;
     nine_update_state(This);
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index 8b955a7..f109f3c 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -184,10 +184,6 @@ NineDevice9_GetCSO( struct NineDevice9 *This );
 const D3DCAPS9 *
 NineDevice9_GetCaps( struct NineDevice9 *This );
 
-/* Mask: 0x1 = constant buffers, 0x2 = stipple */
-void
-NineDevice9_RestoreNonCSOState( struct NineDevice9 *This, unsigned mask );
-
 /*** Direct3D public ***/
 
 HRESULT WINAPI
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 8c2b6eb..1b9622b 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -61,6 +61,275 @@ prepare_rasterizer(struct NineDevice9 *device)
     device->state.commit |= NINE_STATE_COMMIT_RASTERIZER;
 }
 
+#define DO_UPLOAD_CONST_F(buf,p,c,d) \
+    do { \
+        DBG("upload ConstantF [%u .. %u]\n", x, (x) + (c) - 1); \
+        box.x = (p) * 4 * sizeof(float); \
+        box.width = (c) * 4 * sizeof(float); \
+        pipe->transfer_inline_write(pipe, buf, 0, usage, &box, &((d)[p * 4]), \
+                                    0, 0); \
+    } while(0)
+
+/* OK, this is a bit ugly ... */
+static void
+upload_constants(struct NineDevice9 *device, unsigned shader_type)
+{
+    struct pipe_context *pipe = device->pipe;
+    struct pipe_resource *buf;
+    struct pipe_box box;
+    const void *data;
+    const float *const_f;
+    const int *const_i;
+    const BOOL *const_b;
+    uint32_t data_b[NINE_MAX_CONST_B];
+    uint16_t dirty_i;
+    uint16_t dirty_b;
+    const unsigned usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE;
+    unsigned x = 0; /* silence warning */
+    unsigned i, c;
+    struct nine_range *r, *p, *lconstf_ranges;
+    float *lconstf_data;
+
+    box.y = 0;
+    box.z = 0;
+    box.height = 1;
+    box.depth = 1;
+
+    if (shader_type == PIPE_SHADER_VERTEX) {
+        DBG("VS\n");
+        buf = device->constbuf_vs;
+
+        const_f = device->state.vs_const_f;
+        for (p = r = device->state.changed.vs_const_f; r; p = r, r = r->next)
+            DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f);
+        if (p) {
+            nine_range_pool_put_chain(&device->range_pool,
+                                      device->state.changed.vs_const_f, p);
+            device->state.changed.vs_const_f = NULL;
+        }
+
+        dirty_i = device->state.changed.vs_const_i;
+        device->state.changed.vs_const_i = 0;
+        const_i = &device->state.vs_const_i[0][0];
+
+        dirty_b = device->state.changed.vs_const_b;
+        device->state.changed.vs_const_b = 0;
+        const_b = device->state.vs_const_b;
+
+        lconstf_ranges = device->state.vs->lconstf.ranges;
+        lconstf_data = device->state.vs->lconstf.data;
+
+        device->state.ff.clobber.vs_const = TRUE;
+        device->state.changed.group &= ~NINE_STATE_VS_CONST;
+    } else {
+        DBG("PS\n");
+        buf = device->constbuf_ps;
+
+        const_f = device->state.ps_const_f;
+        for (p = r = device->state.changed.ps_const_f; r; p = r, r = r->next)
+            DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f);
+        if (p) {
+            nine_range_pool_put_chain(&device->range_pool,
+                                      device->state.changed.ps_const_f, p);
+            device->state.changed.ps_const_f = NULL;
+        }
+
+        dirty_i = device->state.changed.ps_const_i;
+        device->state.changed.ps_const_i = 0;
+        const_i = &device->state.ps_const_i[0][0];
+
+        dirty_b = device->state.changed.ps_const_b;
+        device->state.changed.ps_const_b = 0;
+        const_b = device->state.ps_const_b;
+
+        lconstf_ranges = NULL;
+        lconstf_data = NULL;
+
+        device->state.ff.clobber.ps_const = TRUE;
+        device->state.changed.group &= ~NINE_STATE_PS_CONST;
+    }
+
+    /* write range from min to max changed, it's not much data */
+    /* bool1 */
+    if (dirty_b) {
+       c = util_last_bit(dirty_b);
+       i = ffs(dirty_b) - 1;
+       x = buf->width0 - (NINE_MAX_CONST_B - i) * 4;
+       c -= i;
+       memcpy(data_b, &(const_b[i]), c * sizeof(uint32_t));
+       box.x = x;
+       box.width = c * 4;
+       DBG("upload ConstantB [%u .. %u]\n", x, x + c - 1);
+       pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data_b, 0, 0);
+    }
+
+    /* int4 */
+    for (c = 0, i = 0; dirty_i; i++, dirty_i >>= 1) {
+        if (dirty_i & 1) {
+            if (!c)
+                x = i;
+            ++c;
+        } else
+        if (c) {
+            DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1);
+            data = &const_i[x * 4];
+            box.x  = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4;
+            box.x += x * 4 * sizeof(int);
+            box.width = c * 4 * sizeof(int);
+            c = 0;
+            pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0);
+        }
+    }
+    if (c) {
+        DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1);
+        data = &const_i[x * 4];
+        box.x  = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4;
+        box.x += x * 4 * sizeof(int);
+        box.width = c * 4 * sizeof(int);
+        pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0);
+    }
+
+    /* TODO: only upload these when shader itself changes */
+    if (lconstf_ranges) {
+        unsigned n = 0;
+        struct nine_range *r = lconstf_ranges;
+        while (r) {
+            box.x = r->bgn * 4 * sizeof(float);
+            n += r->end - r->bgn;
+            box.width = (r->end - r->bgn) * 4 * sizeof(float);
+            data = &lconstf_data[4 * n];
+            pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0);
+            r = r->next;
+        }
+    }
+}
+
+static void
+prepare_vs_constants_userbuf(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+    struct pipe_constant_buffer cb;
+    cb.buffer = NULL;
+    cb.buffer_offset = 0;
+    cb.buffer_size = device->state.vs->const_used_size;
+    cb.user_buffer = device->state.vs_const_f;
+
+    if (!cb.buffer_size)
+        return;
+
+    if (state->changed.vs_const_i) {
+        int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
+        memcpy(idst, state->vs_const_i, sizeof(state->vs_const_i));
+        state->changed.vs_const_i = 0;
+    }
+    if (state->changed.vs_const_b) {
+        int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
+        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
+        memcpy(bdst, state->vs_const_b, sizeof(state->vs_const_b));
+        state->changed.vs_const_b = 0;
+    }
+
+    if (device->state.vs->lconstf.ranges) {
+        /* TODO: Can we make it so that we don't have to copy everything ? */
+        const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
+        const struct nine_range *r = lconstf->ranges;
+        unsigned n = 0;
+        float *dst = device->state.vs_lconstf_temp;
+        float *src = (float *)cb.user_buffer;
+        memcpy(dst, src, cb.buffer_size);
+        while (r) {
+            unsigned p = r->bgn;
+            unsigned c = r->end - r->bgn;
+            memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
+            n += c;
+            r = r->next;
+        }
+        cb.user_buffer = dst;
+    }
+
+    if (!device->driver_caps.user_cbufs) {
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb.buffer_size,
+                      cb.user_buffer,
+                      &cb.buffer_offset,
+                      &cb.buffer);
+        u_upload_unmap(device->constbuf_uploader);
+        cb.user_buffer = NULL;
+    }
+
+    state->pipe.cb_vs = cb;
+
+    if (device->state.changed.vs_const_f) {
+        struct nine_range *r = device->state.changed.vs_const_f;
+        struct nine_range *p = r;
+        while (p->next)
+            p = p->next;
+        nine_range_pool_put_chain(&device->range_pool, r, p);
+        device->state.changed.vs_const_f = NULL;
+    }
+    state->changed.group &= ~NINE_STATE_VS_CONST;
+    state->commit |= NINE_STATE_COMMIT_CONST_VS;
+}
+
+static void
+prepare_ps_constants_userbuf(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+    struct pipe_constant_buffer cb;
+    cb.buffer = NULL;
+    cb.buffer_offset = 0;
+    cb.buffer_size = device->state.ps->const_used_size;
+    cb.user_buffer = device->state.ps_const_f;
+
+    if (!cb.buffer_size)
+        return;
+
+    if (state->changed.ps_const_i) {
+        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
+        memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i));
+        state->changed.ps_const_i = 0;
+    }
+    if (state->changed.ps_const_b) {
+        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
+        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
+        memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b));
+        state->changed.ps_const_b = 0;
+    }
+
+    /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
+    if (device->state.ps->bumpenvmat_needed) {
+        memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+        memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars));
+
+        cb.user_buffer = device->state.ps_lconstf_temp;
+    }
+
+    if (!device->driver_caps.user_cbufs) {
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb.buffer_size,
+                      cb.user_buffer,
+                      &cb.buffer_offset,
+                      &cb.buffer);
+        u_upload_unmap(device->constbuf_uploader);
+        cb.user_buffer = NULL;
+    }
+
+    state->pipe.cb_ps = cb;
+
+    if (device->state.changed.ps_const_f) {
+        struct nine_range *r = device->state.changed.ps_const_f;
+        struct nine_range *p = r;
+        while (p->next)
+            p = p->next;
+        nine_range_pool_put_chain(&device->range_pool, r, p);
+        device->state.changed.ps_const_f = NULL;
+    }
+    state->changed.group &= ~NINE_STATE_PS_CONST;
+    state->commit |= NINE_STATE_COMMIT_CONST_PS;
+}
+
 /* State preparation incremental */
 
 /* State preparation + State commit */
@@ -252,404 +521,134 @@ update_vertex_elements(struct NineDevice9 *device)
         for (i = 0; i < device->caps.MaxStreams; i++ ) {
             if (!used_streams[i]) {
                 dummy_vbo_stream = i;
-                break;
-            }
-        }
-    }
-    /* there are less vertex shader inputs than stream slots,
-     * so if we need a slot for the dummy vbo, we should have found one */
-    assert (!need_dummy_vbo || dummy_vbo_stream != -1);
-
-    for (n = 0; n < vs->num_inputs; ++n) {
-        index = vdecl_index_map[n];
-        if (index >= 0) {
-            ve[n] = vdecl->elems[index];
-            b = ve[n].vertex_buffer_index;
-            state->stream_usage_mask |= 1 << b;
-            /* XXX wine just uses 1 here: */
-            if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
-                ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
-        } else {
-            /* if the vertex declaration is incomplete compared to what the
-             * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
-             * This is not precised by the spec, but is the behaviour
-             * tested on win */
-            ve[n].vertex_buffer_index = dummy_vbo_stream;
-            ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-            ve[n].src_offset = 0;
-            ve[n].instance_divisor = 0;
-        }
-    }
-
-    if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
-        if (state->dummy_vbo_bound_at >= 0)
-            state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
-        if (dummy_vbo_stream >= 0) {
-            state->changed.vtxbuf |= 1 << dummy_vbo_stream;
-            state->vbo_bound_done = FALSE;
-        }
-        state->dummy_vbo_bound_at = dummy_vbo_stream;
-    }
-
-    cso_set_vertex_elements(device->cso, vs->num_inputs, ve);
-
-    state->changed.stream_freq = 0;
-}
-
-static inline uint32_t
-update_shader_variant_keys(struct NineDevice9 *device)
-{
-    struct nine_state *state = &device->state;
-    uint32_t mask = 0;
-    uint32_t vs_key = state->samplers_shadow;
-    uint32_t ps_key = state->samplers_shadow;
-
-    vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0);
-    ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0);
-
-    if (state->vs) vs_key &= state->vs->sampler_mask;
-    if (state->ps) {
-        if (unlikely(state->ps->byte_code.version < 0x20)) {
-            /* no depth textures, but variable targets */
-            uint32_t m = state->ps->sampler_mask;
-            ps_key = 0;
-            while (m) {
-                int s = ffs(m) - 1;
-                m &= ~(1 << s);
-                ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2);
-            }
-        } else {
-            ps_key &= state->ps->sampler_mask;
-        }
-    }
-
-    if (state->vs && state->vs_key != vs_key) {
-        state->vs_key = vs_key;
-        mask |= NINE_STATE_VS;
-    }
-    if (state->ps && state->ps_key != ps_key) {
-        state->ps_key = ps_key;
-        mask |= NINE_STATE_PS;
-    }
-    return mask;
-}
-
-static inline uint32_t
-update_vs(struct NineDevice9 *device)
-{
-    struct nine_state *state = &device->state;
-    struct NineVertexShader9 *vs = state->vs;
-    uint32_t changed_group = 0;
-
-    /* likely because we dislike FF */
-    if (likely(vs)) {
-        state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key);
-    } else {
-        vs = device->ff.vs;
-        state->cso.vs = vs->variant.cso;
-    }
-    device->pipe->bind_vs_state(device->pipe, state->cso.vs);
-
-    if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
-        state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
-        changed_group |= NINE_STATE_RASTERIZER;
-    }
-
-    if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
-        /* Bound dummy sampler. */
-        changed_group |= NINE_STATE_SAMPLER;
-    return changed_group;
-}
-
-static inline uint32_t
-update_ps(struct NineDevice9 *device)
-{
-    struct nine_state *state = &device->state;
-    struct NinePixelShader9 *ps = state->ps;
-    uint32_t changed_group = 0;
-
-    if (likely(ps)) {
-        state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key);
-    } else {
-        ps = device->ff.ps;
-        state->cso.ps = ps->variant.cso;
-    }
-    device->pipe->bind_fs_state(device->pipe, state->cso.ps);
-
-    if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
-        /* Bound dummy sampler. */
-        changed_group |= NINE_STATE_SAMPLER;
-    return changed_group;
-}
-
-#define DO_UPLOAD_CONST_F(buf,p,c,d) \
-    do { \
-        DBG("upload ConstantF [%u .. %u]\n", x, (x) + (c) - 1); \
-        box.x = (p) * 4 * sizeof(float); \
-        box.width = (c) * 4 * sizeof(float); \
-        pipe->transfer_inline_write(pipe, buf, 0, usage, &box, &((d)[p * 4]), \
-                                    0, 0); \
-    } while(0)
-
-/* OK, this is a bit ugly ... */
-static void
-update_constants(struct NineDevice9 *device, unsigned shader_type)
-{
-    struct pipe_context *pipe = device->pipe;
-    struct pipe_resource *buf;
-    struct pipe_box box;
-    const void *data;
-    const float *const_f;
-    const int *const_i;
-    const BOOL *const_b;
-    uint32_t data_b[NINE_MAX_CONST_B];
-    uint16_t dirty_i;
-    uint16_t dirty_b;
-    const unsigned usage = PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE;
-    unsigned x = 0; /* silence warning */
-    unsigned i, c;
-    struct nine_range *r, *p, *lconstf_ranges;
-    float *lconstf_data;
-
-    box.y = 0;
-    box.z = 0;
-    box.height = 1;
-    box.depth = 1;
-
-    if (shader_type == PIPE_SHADER_VERTEX) {
-        DBG("VS\n");
-        buf = device->constbuf_vs;
-
-        const_f = device->state.vs_const_f;
-        for (p = r = device->state.changed.vs_const_f; r; p = r, r = r->next)
-            DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f);
-        if (p) {
-            nine_range_pool_put_chain(&device->range_pool,
-                                      device->state.changed.vs_const_f, p);
-            device->state.changed.vs_const_f = NULL;
-        }
-
-        dirty_i = device->state.changed.vs_const_i;
-        device->state.changed.vs_const_i = 0;
-        const_i = &device->state.vs_const_i[0][0];
-
-        dirty_b = device->state.changed.vs_const_b;
-        device->state.changed.vs_const_b = 0;
-        const_b = device->state.vs_const_b;
-
-        lconstf_ranges = device->state.vs->lconstf.ranges;
-        lconstf_data = device->state.vs->lconstf.data;
-
-        device->state.ff.clobber.vs_const = TRUE;
-        device->state.changed.group &= ~NINE_STATE_VS_CONST;
-    } else {
-        DBG("PS\n");
-        buf = device->constbuf_ps;
-
-        const_f = device->state.ps_const_f;
-        for (p = r = device->state.changed.ps_const_f; r; p = r, r = r->next)
-            DO_UPLOAD_CONST_F(buf, r->bgn, r->end - r->bgn, const_f);
-        if (p) {
-            nine_range_pool_put_chain(&device->range_pool,
-                                      device->state.changed.ps_const_f, p);
-            device->state.changed.ps_const_f = NULL;
-        }
-
-        dirty_i = device->state.changed.ps_const_i;
-        device->state.changed.ps_const_i = 0;
-        const_i = &device->state.ps_const_i[0][0];
-
-        dirty_b = device->state.changed.ps_const_b;
-        device->state.changed.ps_const_b = 0;
-        const_b = device->state.ps_const_b;
-
-        lconstf_ranges = NULL;
-        lconstf_data = NULL;
-
-        device->state.ff.clobber.ps_const = TRUE;
-        device->state.changed.group &= ~NINE_STATE_PS_CONST;
-    }
-
-    /* write range from min to max changed, it's not much data */
-    /* bool1 */
-    if (dirty_b) {
-       c = util_last_bit(dirty_b);
-       i = ffs(dirty_b) - 1;
-       x = buf->width0 - (NINE_MAX_CONST_B - i) * 4;
-       c -= i;
-       memcpy(data_b, &(const_b[i]), c * sizeof(uint32_t));
-       box.x = x;
-       box.width = c * 4;
-       DBG("upload ConstantB [%u .. %u]\n", x, x + c - 1);
-       pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data_b, 0, 0);
+                break;
+            }
+        }
     }
+    /* there are less vertex shader inputs than stream slots,
+     * so if we need a slot for the dummy vbo, we should have found one */
+    assert (!need_dummy_vbo || dummy_vbo_stream != -1);
 
-    /* int4 */
-    for (c = 0, i = 0; dirty_i; i++, dirty_i >>= 1) {
-        if (dirty_i & 1) {
-            if (!c)
-                x = i;
-            ++c;
-        } else
-        if (c) {
-            DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1);
-            data = &const_i[x * 4];
-            box.x  = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4;
-            box.x += x * 4 * sizeof(int);
-            box.width = c * 4 * sizeof(int);
-            c = 0;
-            pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0);
+    for (n = 0; n < vs->num_inputs; ++n) {
+        index = vdecl_index_map[n];
+        if (index >= 0) {
+            ve[n] = vdecl->elems[index];
+            b = ve[n].vertex_buffer_index;
+            state->stream_usage_mask |= 1 << b;
+            /* XXX wine just uses 1 here: */
+            if (state->stream_freq[b] & D3DSTREAMSOURCE_INSTANCEDATA)
+                ve[n].instance_divisor = state->stream_freq[b] & 0x7FFFFF;
+        } else {
+            /* if the vertex declaration is incomplete compared to what the
+             * vertex shader needs, we bind a dummy vbo with 0 0 0 0.
+             * This is not precised by the spec, but is the behaviour
+             * tested on win */
+            ve[n].vertex_buffer_index = dummy_vbo_stream;
+            ve[n].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+            ve[n].src_offset = 0;
+            ve[n].instance_divisor = 0;
         }
     }
-    if (c) {
-        DBG("upload ConstantI [%u .. %u]\n", x, x + c - 1);
-        data = &const_i[x * 4];
-        box.x  = buf->width0 - (NINE_MAX_CONST_I * 4 + NINE_MAX_CONST_B) * 4;
-        box.x += x * 4 * sizeof(int);
-        box.width = c * 4 * sizeof(int);
-        pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0);
-    }
 
-    /* TODO: only upload these when shader itself changes */
-    if (lconstf_ranges) {
-        unsigned n = 0;
-        struct nine_range *r = lconstf_ranges;
-        while (r) {
-            box.x = r->bgn * 4 * sizeof(float);
-            n += r->end - r->bgn;
-            box.width = (r->end - r->bgn) * 4 * sizeof(float);
-            data = &lconstf_data[4 * n];
-            pipe->transfer_inline_write(pipe, buf, 0, usage, &box, data, 0, 0);
-            r = r->next;
+    if (state->dummy_vbo_bound_at != dummy_vbo_stream) {
+        if (state->dummy_vbo_bound_at >= 0)
+            state->changed.vtxbuf |= 1 << state->dummy_vbo_bound_at;
+        if (dummy_vbo_stream >= 0) {
+            state->changed.vtxbuf |= 1 << dummy_vbo_stream;
+            state->vbo_bound_done = FALSE;
         }
+        state->dummy_vbo_bound_at = dummy_vbo_stream;
     }
+
+    cso_set_vertex_elements(device->cso, vs->num_inputs, ve);
+
+    state->changed.stream_freq = 0;
 }
 
-static void
-update_vs_constants_userbuf(struct NineDevice9 *device)
+static inline uint32_t
+update_shader_variant_keys(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
-    struct pipe_context *pipe = device->pipe;
-    struct pipe_constant_buffer cb;
-    cb.buffer = NULL;
-    cb.buffer_offset = 0;
-    cb.buffer_size = device->state.vs->const_used_size;
-    cb.user_buffer = device->state.vs_const_f;
-
-    if (!cb.buffer_size)
-        return;
+    uint32_t mask = 0;
+    uint32_t vs_key = state->samplers_shadow;
+    uint32_t ps_key = state->samplers_shadow;
 
-    if (state->changed.vs_const_i) {
-        int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
-        memcpy(idst, state->vs_const_i, sizeof(state->vs_const_i));
-        state->changed.vs_const_i = 0;
-    }
-    if (state->changed.vs_const_b) {
-        int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
-        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
-        memcpy(bdst, state->vs_const_b, sizeof(state->vs_const_b));
-        state->changed.vs_const_b = 0;
-    }
+    vs_key = (vs_key & NINE_VS_SAMPLERS_MASK) >> NINE_SAMPLER_VS(0);
+    ps_key = (ps_key & NINE_PS_SAMPLERS_MASK) >> NINE_SAMPLER_PS(0);
 
-    if (device->state.vs->lconstf.ranges) {
-        /* TODO: Can we make it so that we don't have to copy everything ? */
-        const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
-        const struct nine_range *r = lconstf->ranges;
-        unsigned n = 0;
-        float *dst = device->state.vs_lconstf_temp;
-        float *src = (float *)cb.user_buffer;
-        memcpy(dst, src, cb.buffer_size);
-        while (r) {
-            unsigned p = r->bgn;
-            unsigned c = r->end - r->bgn;
-            memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
-            n += c;
-            r = r->next;
+    if (state->vs) vs_key &= state->vs->sampler_mask;
+    if (state->ps) {
+        if (unlikely(state->ps->byte_code.version < 0x20)) {
+            /* no depth textures, but variable targets */
+            uint32_t m = state->ps->sampler_mask;
+            ps_key = 0;
+            while (m) {
+                int s = ffs(m) - 1;
+                m &= ~(1 << s);
+                ps_key |= (state->texture[s] ? state->texture[s]->pstype : 1) << (s * 2);
+            }
+        } else {
+            ps_key &= state->ps->sampler_mask;
         }
-        cb.user_buffer = dst;
     }
 
-    if (!device->driver_caps.user_cbufs) {
-        u_upload_data(device->constbuf_uploader,
-                      0,
-                      cb.buffer_size,
-                      cb.user_buffer,
-                      &cb.buffer_offset,
-                      &cb.buffer);
-        u_upload_unmap(device->constbuf_uploader);
-        cb.user_buffer = NULL;
+    if (state->vs && state->vs_key != vs_key) {
+        state->vs_key = vs_key;
+        mask |= NINE_STATE_VS;
     }
-
-    pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
-
-    if (device->state.changed.vs_const_f) {
-        struct nine_range *r = device->state.changed.vs_const_f;
-        struct nine_range *p = r;
-        while (p->next)
-            p = p->next;
-        nine_range_pool_put_chain(&device->range_pool, r, p);
-        device->state.changed.vs_const_f = NULL;
+    if (state->ps && state->ps_key != ps_key) {
+        state->ps_key = ps_key;
+        mask |= NINE_STATE_PS;
     }
-    state->changed.group &= ~NINE_STATE_VS_CONST;
+    return mask;
 }
 
-static void
-update_ps_constants_userbuf(struct NineDevice9 *device)
+static inline uint32_t
+update_vs(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
-    struct pipe_context *pipe = device->pipe;
-    struct pipe_constant_buffer cb;
-    int i;
-    cb.buffer = NULL;
-    cb.buffer_offset = 0;
-    cb.buffer_size = device->state.ps->const_used_size;
-    cb.user_buffer = device->state.ps_const_f;
-
-    if (!cb.buffer_size)
-        return;
+    struct NineVertexShader9 *vs = state->vs;
+    uint32_t changed_group = 0;
 
-    if (state->changed.ps_const_i) {
-        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
-        memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i));
-        state->changed.ps_const_i = 0;
-    }
-    if (state->changed.ps_const_b) {
-        int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
-        uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
-        memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b));
-        state->changed.ps_const_b = 0;
+    /* likely because we dislike FF */
+    if (likely(vs)) {
+        state->cso.vs = NineVertexShader9_GetVariant(vs, state->vs_key);
+    } else {
+        vs = device->ff.vs;
+        state->cso.vs = vs->variant.cso;
     }
+    device->pipe->bind_vs_state(device->pipe, state->cso.vs);
 
-    /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
-    if (device->state.ps->bumpenvmat_needed) {
-        memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
-        memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars));
-
-        cb.user_buffer = device->state.ps_lconstf_temp;
+    if (state->rs[NINED3DRS_VSPOINTSIZE] != vs->point_size) {
+        state->rs[NINED3DRS_VSPOINTSIZE] = vs->point_size;
+        changed_group |= NINE_STATE_RASTERIZER;
     }
 
-    if (!device->driver_caps.user_cbufs) {
-        u_upload_data(device->constbuf_uploader,
-                      0,
-                      cb.buffer_size,
-                      cb.user_buffer,
-                      &cb.buffer_offset,
-                      &cb.buffer);
-        u_upload_unmap(device->constbuf_uploader);
-        cb.user_buffer = NULL;
-    }
+    if ((state->bound_samplers_mask_vs & vs->sampler_mask) != vs->sampler_mask)
+        /* Bound dummy sampler. */
+        changed_group |= NINE_STATE_SAMPLER;
+    return changed_group;
+}
 
-    pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
+static inline uint32_t
+update_ps(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+    struct NinePixelShader9 *ps = state->ps;
+    uint32_t changed_group = 0;
 
-    if (device->state.changed.ps_const_f) {
-        struct nine_range *r = device->state.changed.ps_const_f;
-        struct nine_range *p = r;
-        while (p->next)
-            p = p->next;
-        nine_range_pool_put_chain(&device->range_pool, r, p);
-        device->state.changed.ps_const_f = NULL;
+    if (likely(ps)) {
+        state->cso.ps = NinePixelShader9_GetVariant(ps, state->ps_key);
+    } else {
+        ps = device->ff.ps;
+        state->cso.ps = ps->variant.cso;
     }
-    state->changed.group &= ~NINE_STATE_PS_CONST;
+    device->pipe->bind_fs_state(device->pipe, state->cso.ps);
+
+    if ((state->bound_samplers_mask_ps & ps->sampler_mask) != ps->sampler_mask)
+        /* Bound dummy sampler. */
+        changed_group |= NINE_STATE_SAMPLER;
+    return changed_group;
 }
 
 static void
@@ -905,6 +904,22 @@ commit_index_buffer(struct NineDevice9 *device)
         pipe->set_index_buffer(pipe, NULL);
 }
 
+static inline void
+commit_vs_constants(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+
+    pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+}
+
+static inline void
+commit_ps_constants(struct NineDevice9 *device)
+{
+    struct pipe_context *pipe = device->pipe;
+
+    pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &device->state.pipe.cb_ps);
+}
+
 /* State Update */
 
 #define NINE_STATE_FREQ_GROUP_0 \
@@ -1034,14 +1049,14 @@ nine_update_state(struct NineDevice9 *device)
 
         if (device->prefer_user_constbuf) {
             if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
-                update_vs_constants_userbuf(device);
+                prepare_vs_constants_userbuf(device);
             if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
-                update_ps_constants_userbuf(device);
+                prepare_ps_constants_userbuf(device);
         } else {
             if ((group & NINE_STATE_VS_CONST) && state->vs)
-                update_constants(device, PIPE_SHADER_VERTEX);
+                upload_constants(device, PIPE_SHADER_VERTEX);
             if ((group & NINE_STATE_PS_CONST) && state->ps)
-                update_constants(device, PIPE_SHADER_FRAGMENT);
+                upload_constants(device, PIPE_SHADER_FRAGMENT);
         }
     }
     if (state->changed.vtxbuf)
@@ -1053,6 +1068,10 @@ nine_update_state(struct NineDevice9 *device)
         commit_dsa(device);
     if (state->commit & NINE_STATE_COMMIT_RASTERIZER)
         commit_rasterizer(device);
+    if (state->commit & NINE_STATE_COMMIT_CONST_VS)
+        commit_vs_constants(device);
+    if (state->commit & NINE_STATE_COMMIT_CONST_PS)
+        commit_ps_constants(device);
 
     state->commit = 0;
 
@@ -1219,6 +1238,18 @@ static const DWORD nine_samp_state_defaults[NINED3DSAMP_LAST + 1] =
     [NINED3DSAMP_MINLOD] = 0,
     [NINED3DSAMP_SHADOW] = 0
 };
+
+void nine_state_restore_non_cso(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+
+    state->changed.group = NINE_STATE_ALL;
+    state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
+    state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
+    state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
+    state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
+}
+
 void
 nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
                         boolean is_reset)
@@ -1256,6 +1287,9 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
     /* Set changed flags to initialize driver.
      */
     state->changed.group = NINE_STATE_ALL;
+    state->changed.vtxbuf = (1ULL << device->caps.MaxStreams) - 1;
+    state->changed.ucp = (1 << PIPE_MAX_CLIP_PLANES) - 1;
+    state->changed.texture = NINE_PS_SAMPLERS_MASK | NINE_VS_SAMPLERS_MASK;
 
     state->ff.changed.transform[0] = ~0;
     state->ff.changed.transform[D3DTS_WORLD / 32] |= 1 << (D3DTS_WORLD % 32);
@@ -1272,6 +1306,23 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
         state->dummy_vbo_bound_at = -1;
         state->vbo_bound_done = FALSE;
     }
+
+    if (!device->prefer_user_constbuf) {
+        /* fill cb_vs and cb_ps for the non user constbuf path */
+        struct pipe_constant_buffer cb;
+
+        cb.buffer_offset = 0;
+        cb.buffer_size = device->vs_const_size;
+        cb.buffer = device->constbuf_vs;
+        cb.user_buffer = NULL;
+        state->pipe.cb_vs = cb;
+
+        cb.buffer_size = device->ps_const_size;
+        cb.buffer = device->constbuf_ps;
+        state->pipe.cb_ps = cb;
+
+        state->commit |= NINE_STATE_COMMIT_CONST_VS | NINE_STATE_COMMIT_CONST_PS;
+    }
 }
 
 void
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 60e5d8f..ff6c180 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -81,6 +81,8 @@
 #define NINE_STATE_COMMIT_DSA  (1 << 0)
 #define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
 #define NINE_STATE_COMMIT_BLEND (1 << 2)
+#define NINE_STATE_COMMIT_CONST_VS (1 << 3)
+#define NINE_STATE_COMMIT_CONST_PS (1 << 4)
 
 
 #define NINE_MAX_SIMULTANEOUS_RENDERTARGETS 4
@@ -218,6 +220,8 @@ struct nine_state
         struct pipe_depth_stencil_alpha_state dsa;
         struct pipe_rasterizer_state rast;
         struct pipe_blend_state blend;
+        struct pipe_constant_buffer cb_vs;
+        struct pipe_constant_buffer cb_ps;
     } pipe;
 };
 
@@ -235,6 +239,7 @@ struct NineDevice9;
 void nine_update_state_framebuffer(struct NineDevice9 *);
 boolean nine_update_state(struct NineDevice9 *);
 
+void nine_state_restore_non_cso(struct NineDevice9 *device);
 void nine_state_set_defaults(struct NineDevice9 *, const D3DCAPS9 *,
                              boolean is_reset);
 void nine_state_clear(struct nine_state *, const boolean device);
diff --git a/src/gallium/state_trackers/nine/swapchain9.c b/src/gallium/state_trackers/nine/swapchain9.c
index 6f8e066..3f5be26 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -597,7 +597,7 @@ handle_draw_cursor_and_hud( struct NineSwapChain9 *This, struct pipe_resource *r
     if (device->hud && resource) {
         hud_draw(device->hud, resource); /* XXX: no offset */
         /* HUD doesn't clobber stipple */
-        NineDevice9_RestoreNonCSOState(device, ~0x2);
+        nine_state_restore_non_cso(device);
     }
 }
 
-- 
2.1.0



More information about the mesa-dev mailing list