[Mesa-dev] [PATCH 19/75] st/nine: Partial software vertex processing support

Axel Davy axel.davy at ens.fr
Wed Oct 5 20:08:52 UTC 2016


Software Vertex Processing allows:
. Less limitations for shaders (more loops, etc)
. Less limitations for ff (more enabled lights, 255
matrices for VertexBlend)

In particular shaders can get more constants.
This patch implements support for this (not using software
rendering, but hardware rendering, as llvmpipe and dx10+ hw
have the same limits...)

This is considered a second class path. Even apps asking for
"Mixed Vertex processing" (ie the ability to switch to swvp
on demand) do not use the feature much. Some just initialize
more constants than the normal limit at the start of the
application, but never use more than the normal limit.
When the apps do not need the software vertex processing
features, they do not seem to turn it on. This means it is
ok if that path is slow.
Thus no care has been made to make the path optimized.

Signed-off-by: Axel Davy <axel.davy at ens.fr>
---
 src/gallium/state_trackers/nine/device9.c       |  85 +++++++++---
 src/gallium/state_trackers/nine/device9.h       |   1 +
 src/gallium/state_trackers/nine/nine_shader.c   |   5 +-
 src/gallium/state_trackers/nine/nine_state.c    | 170 ++++++++++++++++++++++--
 src/gallium/state_trackers/nine/nine_state.h    |  13 +-
 src/gallium/state_trackers/nine/stateblock9.c   |  85 ++++++++----
 src/gallium/state_trackers/nine/vertexshader9.c |  15 ++-
 src/gallium/state_trackers/nine/vertexshader9.h |   8 +-
 8 files changed, 322 insertions(+), 60 deletions(-)

diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index d350873..012cabf 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -168,12 +168,31 @@ NineDevice9_ctor( struct NineDevice9 *This,
     if (This->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) {
         DBG("Application asked full Software Vertex Processing.\n");
         This->swvp = true;
+        This->may_swvp = true;
     } else
         This->swvp = false;
-    if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)
+    if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
         DBG("Application asked mixed Software Vertex Processing.\n");
+        This->may_swvp = true;
+    }
     /* TODO: check if swvp is resetted by device Resets */
 
+    if (This->may_swvp &&
+        (This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+                                        PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE)
+                                     < (NINE_MAX_CONST_F_SWVP/2) * sizeof(float[4]) ||
+         This->screen->get_shader_param(This->screen, PIPE_SHADER_VERTEX,
+                                        PIPE_SHADER_CAP_MAX_CONST_BUFFERS) < 5)) {
+        /* Note: We just go on, some apps never use the abilities of
+         * swvp, and just set more constants than allowed at init.
+         * Only cards we support that are affected are the r500 */
+        WARN("Card unable to handle Software Vertex Processing. Game may fail\n");
+    }
+
+    /* When may_swvp, SetConstant* limits are different */
+    if (This->may_swvp)
+        This->caps.MaxVertexShaderConst = NINE_MAX_CONST_F_SWVP;
+
     This->pipe = This->screen->context_create(This->screen, NULL, 0);
     if (!This->pipe) { return E_OUTOFMEMORY; } /* guess */
 
@@ -320,12 +339,22 @@ NineDevice9_ctor( struct NineDevice9 *This,
         This->vs_const_size = max_const_vs * sizeof(float[4]);
         This->ps_const_size = max_const_ps * sizeof(float[4]);
         /* Include space for I,B constants for user constbuf. */
+        if (This->may_swvp) {
+            This->state.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+            if (!This->state.vs_const_f_swvp)
+                return E_OUTOFMEMORY;
+            This->state.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+            This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1);
+            This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1);
+        } else {
+            This->state.vs_const_f_swvp = NULL;
+            This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+            This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
+            This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
+        }
         This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
         This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
-        This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
         This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
-        This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
-        This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
         if (!This->state.vs_const_f || !This->state.ps_const_f ||
             !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp ||
             !This->state.vs_const_i || !This->state.vs_const_b)
@@ -461,6 +490,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
     FREE(This->state.ps_lconstf_temp);
     FREE(This->state.vs_const_i);
     FREE(This->state.vs_const_b);
+    FREE(This->state.vs_const_f_swvp);
 
     if (This->swapchains) {
         for (i = 0; i < This->nswapchains; ++i)
@@ -2486,11 +2516,11 @@ NineDevice9_CreateStateBlock( struct NineDevice9 *This,
        /* TODO: texture/sampler state */
        memcpy(dst->changed.rs,
               nine_render_states_vertex, sizeof(dst->changed.rs));
-       nine_ranges_insert(&dst->changed.vs_const_f, 0, This->max_vs_const_f,
+       nine_ranges_insert(&dst->changed.vs_const_f, 0, This->may_swvp ? NINE_MAX_CONST_F_SWVP : This->max_vs_const_f,
                           &This->range_pool);
-       nine_ranges_insert(&dst->changed.vs_const_i, 0, NINE_MAX_CONST_I,
+       nine_ranges_insert(&dst->changed.vs_const_i, 0, This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I,
                           &This->range_pool);
-       nine_ranges_insert(&dst->changed.vs_const_b, 0, NINE_MAX_CONST_B,
+       nine_ranges_insert(&dst->changed.vs_const_b, 0, This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B,
                           &This->range_pool);
        for (s = 0; s < NINE_MAX_SAMPLERS; ++s)
            dst->changed.sampler[s] |= 1 << D3DSAMP_DMAPOFFSET;
@@ -2886,6 +2916,7 @@ NineDevice9_SetSoftwareVertexProcessing( struct NineDevice9 *This,
 {
     if (This->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING) {
         This->swvp = bSoftware;
+        This->state.changed.group |= NINE_STATE_SWVP;
         return D3D_OK;
     } else
         return D3DERR_INVALIDCALL; /* msdn. TODO: check in practice */
@@ -3371,6 +3402,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
                                       UINT Vector4fCount )
 {
     struct nine_state *state = This->update;
+    float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
 
     DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n",
         This, StartRegister, pConstantData, Vector4fCount);
@@ -3383,12 +3415,12 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (!This->is_recording) {
-        if (!memcmp(&state->vs_const_f[StartRegister * 4], pConstantData,
+        if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData,
                     Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
             return D3D_OK;
     }
 
-    memcpy(&state->vs_const_f[StartRegister * 4],
+    memcpy(&vs_const_f[StartRegister * 4],
            pConstantData,
            Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
 
@@ -3396,6 +3428,14 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
                        StartRegister, StartRegister + Vector4fCount,
                        &This->range_pool);
 
+    if (This->may_swvp) {
+        Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
+        if (StartRegister < NINE_MAX_CONST_F)
+            memcpy(&state->vs_const_f[StartRegister * 4],
+                   pConstantData,
+                   Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
+    }
+
     state->changed.group |= NINE_STATE_VS_CONST;
 
     return D3D_OK;
@@ -3408,13 +3448,14 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
                                       UINT Vector4fCount )
 {
     const struct nine_state *state = &This->state;
+    float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
 
     user_assert(StartRegister                  < This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
     user_assert(StartRegister + Vector4fCount <= This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     memcpy(pConstantData,
-           &state->vs_const_f[StartRegister * 4],
+           &vs_const_f[StartRegister * 4],
            Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
 
     return D3D_OK;
@@ -3432,8 +3473,10 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
     DBG("This=%p StartRegister=%u pConstantData=%p Vector4iCount=%u\n",
         This, StartRegister, pConstantData, Vector4iCount);
 
-    user_assert(StartRegister                  < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (This->driver_caps.vs_integer) {
@@ -3471,8 +3514,10 @@ NineDevice9_GetVertexShaderConstantI( struct NineDevice9 *This,
     const struct nine_state *state = &This->state;
     int i;
 
-    user_assert(StartRegister                  < NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + Vector4iCount <= NINE_MAX_CONST_I, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + Vector4iCount <= (This->may_swvp ? NINE_MAX_CONST_I_SWVP : NINE_MAX_CONST_I),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (This->driver_caps.vs_integer) {
@@ -3504,8 +3549,10 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
     DBG("This=%p StartRegister=%u pConstantData=%p BoolCount=%u\n",
         This, StartRegister, pConstantData, BoolCount);
 
-    user_assert(StartRegister              < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     if (!This->is_recording) {
@@ -3538,8 +3585,10 @@ NineDevice9_GetVertexShaderConstantB( struct NineDevice9 *This,
     const struct nine_state *state = &This->state;
     int i;
 
-    user_assert(StartRegister              < NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
-    user_assert(StartRegister + BoolCount <= NINE_MAX_CONST_B, D3DERR_INVALIDCALL);
+    user_assert(StartRegister < (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
+    user_assert(StartRegister + BoolCount <= (This->may_swvp ? NINE_MAX_CONST_B_SWVP : NINE_MAX_CONST_B),
+                D3DERR_INVALIDCALL);
     user_assert(pConstantData, D3DERR_INVALIDCALL);
 
     for (i = 0; i < BoolCount; i++)
diff --git a/src/gallium/state_trackers/nine/device9.h b/src/gallium/state_trackers/nine/device9.h
index f2fd164..b6aa5e0 100644
--- a/src/gallium/state_trackers/nine/device9.h
+++ b/src/gallium/state_trackers/nine/device9.h
@@ -48,6 +48,7 @@ struct NineDevice9
 {
     struct NineUnknown base;
     boolean ex;
+    boolean may_swvp;
 
     /* G3D context */
     struct pipe_screen *screen;
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 480f096..3a2bfa8 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -3499,7 +3499,10 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
     tx->parse++; /* for byte_size */
 
     if (tx->failure) {
-        ERR("Encountered buggy shader\n");
+        /* For VS shaders, we print the warning later,
+         * we first try with swvp. */
+        if (IS_PS)
+            ERR("Encountered buggy shader\n");
         ureg_destroy(tx->ureg);
         hr = D3DERR_INVALIDCALL;
         goto out;
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 2faca12..024e639 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -79,6 +79,143 @@ prepare_rasterizer(struct NineDevice9 *device)
 }
 
 static void
+prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
+{
+    struct nine_state *state = &device->state;
+
+    if (state->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) {
+        struct pipe_constant_buffer cb;
+
+        cb.buffer = NULL;
+        cb.buffer_offset = 0;
+        cb.buffer_size = 4096 * sizeof(float[4]);
+        cb.user_buffer = state->vs_const_f_swvp;
+
+        if (state->vs->lconstf.ranges) {
+            const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
+            const struct nine_range *r = lconstf->ranges;
+            unsigned n = 0;
+            float *dst = device->state.vs_lconstf_temp;
+            float *src = (float *)cb.user_buffer;
+            memcpy(dst, src, cb.buffer_size);
+            while (r) {
+                unsigned p = r->bgn;
+                unsigned c = r->end - r->bgn;
+                memcpy(&dst[p * 4], &lconstf->data[n * 4], c * 4 * sizeof(float));
+                n += c;
+                r = r->next;
+            }
+            cb.user_buffer = dst;
+        }
+
+        state->pipe.cb0_swvp = cb;
+
+        cb.user_buffer = (char *)cb.user_buffer + 4096 * sizeof(float[4]);
+        state->pipe.cb1_swvp = cb;
+    }
+
+    if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
+        struct pipe_constant_buffer cb;
+
+        cb.buffer = NULL;
+        cb.buffer_offset = 0;
+        cb.buffer_size = 2048 * sizeof(float[4]);
+        cb.user_buffer = state->vs_const_i;
+
+        state->pipe.cb2_swvp = cb;
+        state->changed.vs_const_i = 0;
+    }
+
+    if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
+        struct pipe_constant_buffer cb;
+
+        cb.buffer = NULL;
+        cb.buffer_offset = 0;
+        cb.buffer_size = 512 * sizeof(float[4]);
+        cb.user_buffer = state->vs_const_b;
+
+        state->pipe.cb3_swvp = cb;
+        state->changed.vs_const_b = 0;
+    }
+
+    if (!device->driver_caps.user_cbufs) {
+        struct pipe_constant_buffer *cb = &(state->pipe.cb0_swvp);
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb->buffer_size,
+                      device->constbuf_alignment,
+                      cb->user_buffer,
+                      &(cb->buffer_offset),
+                      &(cb->buffer));
+        u_upload_unmap(device->constbuf_uploader);
+        cb->user_buffer = NULL;
+
+        cb = &(state->pipe.cb1_swvp);
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb->buffer_size,
+                      device->constbuf_alignment,
+                      cb->user_buffer,
+                      &(cb->buffer_offset),
+                      &(cb->buffer));
+        u_upload_unmap(device->constbuf_uploader);
+        cb->user_buffer = NULL;
+
+        cb = &(state->pipe.cb2_swvp);
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb->buffer_size,
+                      device->constbuf_alignment,
+                      cb->user_buffer,
+                      &(cb->buffer_offset),
+                      &(cb->buffer));
+        u_upload_unmap(device->constbuf_uploader);
+        cb->user_buffer = NULL;
+
+        cb = &(state->pipe.cb3_swvp);
+        u_upload_data(device->constbuf_uploader,
+                      0,
+                      cb->buffer_size,
+                      device->constbuf_alignment,
+                      cb->user_buffer,
+                      &(cb->buffer_offset),
+                      &(cb->buffer));
+        u_upload_unmap(device->constbuf_uploader);
+        cb->user_buffer = NULL;
+    }
+
+    if (device->state.changed.vs_const_f) {
+        struct nine_range *r = device->state.changed.vs_const_f;
+        struct nine_range *p = r;
+        while (p->next)
+            p = p->next;
+        nine_range_pool_put_chain(&device->range_pool, r, p);
+        device->state.changed.vs_const_f = NULL;
+    }
+
+    if (device->state.changed.vs_const_i) {
+        struct nine_range *r = device->state.changed.vs_const_i;
+        struct nine_range *p = r;
+        while (p->next)
+            p = p->next;
+        nine_range_pool_put_chain(&device->range_pool, r, p);
+        device->state.changed.vs_const_i = NULL;
+    }
+
+    if (device->state.changed.vs_const_b) {
+        struct nine_range *r = device->state.changed.vs_const_b;
+        struct nine_range *p = r;
+        while (p->next)
+            p = p->next;
+        nine_range_pool_put_chain(&device->range_pool, r, p);
+        device->state.changed.vs_const_b = NULL;
+    }
+
+    state->changed.group &= ~NINE_STATE_VS_CONST;
+    state->commit |= NINE_STATE_COMMIT_CONST_VS;
+}
+
+static void
 prepare_vs_constants_userbuf(struct NineDevice9 *device)
 {
     struct nine_state *state = &device->state;
@@ -88,21 +225,27 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device)
     cb.buffer_size = device->state.vs->const_used_size;
     cb.user_buffer = device->state.vs_const_f;
 
-    if (!cb.buffer_size)
+    if (device->swvp) {
+        prepare_vs_constants_userbuf_swvp(device);
         return;
+    }
 
-    if (state->changed.vs_const_i) {
+    if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
         int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
         memcpy(idst, state->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
         state->changed.vs_const_i = 0;
     }
-    if (state->changed.vs_const_b) {
+
+    if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
         int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
         uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
         memcpy(bdst, state->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
         state->changed.vs_const_b = 0;
     }
 
+    if (!cb.buffer_size)
+        return;
+
     if (device->state.vs->lconstf.ranges) {
         /* TODO: Can we make it so that we don't have to copy everything ? */
         const struct nine_lconstf *lconstf =  &device->state.vs->lconstf;
@@ -251,7 +394,7 @@ prepare_vs(struct NineDevice9 *device, uint8_t shader_changed)
     int has_key_changed = 0;
 
     if (likely(state->programmable_vs))
-        has_key_changed = NineVertexShader9_UpdateKey(vs, state);
+        has_key_changed = NineVertexShader9_UpdateKey(vs, device);
 
     if (!shader_changed && !has_key_changed)
         return 0;
@@ -740,8 +883,16 @@ commit_vs_constants(struct NineDevice9 *device)
 
     if (unlikely(!device->state.programmable_vs))
         pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
-    else
-        pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+    else {
+        if (device->swvp) {
+            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb0_swvp);
+            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 1, &device->state.pipe.cb1_swvp);
+            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 2, &device->state.pipe.cb2_swvp);
+            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 3, &device->state.pipe.cb3_swvp);
+        } else {
+            pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
+        }
+    }
 }
 
 static inline void
@@ -777,7 +928,8 @@ commit_ps(struct NineDevice9 *device)
    (NINE_STATE_VS |         \
     NINE_STATE_TEXTURE |    \
     NINE_STATE_FOG_SHADER | \
-    NINE_STATE_POINTSIZE_SHADER)
+    NINE_STATE_POINTSIZE_SHADER | \
+    NINE_STATE_SWVP)
 
 #define NINE_STATE_SHADER_CHANGE_PS \
    (NINE_STATE_PS |         \
@@ -886,14 +1038,14 @@ nine_update_state(struct NineDevice9 *device)
             commit_index_buffer(device);
     }
 
-    if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS))) {
+    if (likely(group & (NINE_STATE_FREQUENT | NINE_STATE_VS | NINE_STATE_PS | NINE_STATE_SWVP))) {
         if (group & NINE_STATE_MULTISAMPLE)
             group |= check_multisample(device);
         if (group & NINE_STATE_RASTERIZER)
             prepare_rasterizer(device);
         if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
             update_textures_and_samplers(device);
-        if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs)
+        if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS | NINE_STATE_SWVP)) && state->programmable_vs)
             prepare_vs_constants_userbuf(device);
         if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
             prepare_ps_constants_userbuf(device);
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 8c94832..2aa424d 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -84,8 +84,9 @@
 #define NINE_STATE_PS1X_SHADER (1 << 26)
 #define NINE_STATE_POINTSIZE_SHADER (1 << 27)
 #define NINE_STATE_MULTISAMPLE (1 << 28)
-#define NINE_STATE_ALL          0x1fffffff
-#define NINE_STATE_UNHANDLED   (1 << 29)
+#define NINE_STATE_SWVP        (1 << 29)
+#define NINE_STATE_ALL          0x3fffffff
+#define NINE_STATE_UNHANDLED   (1 << 30)
 
 #define NINE_STATE_COMMIT_DSA  (1 << 0)
 #define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
@@ -101,6 +102,9 @@
 #define NINE_MAX_CONST_F   256
 #define NINE_MAX_CONST_I   16
 #define NINE_MAX_CONST_B   16
+#define NINE_MAX_CONST_F_SWVP   8192
+#define NINE_MAX_CONST_I_SWVP   2048
+#define NINE_MAX_CONST_B_SWVP   2048
 #define NINE_MAX_CONST_ALL 276 /* B consts count only 1/4 th */
 
 #define NINE_CONST_I_BASE(nconstf) \
@@ -157,6 +161,7 @@ struct nine_state
      */
     struct NineVertexShader9 *vs;
     float *vs_const_f;
+    float *vs_const_f_swvp;
     int   *vs_const_i;
     BOOL  *vs_const_b;
     float *vs_lconstf_temp;
@@ -229,6 +234,10 @@ struct nine_state
         struct pipe_rasterizer_state rast;
         struct pipe_blend_state blend;
         struct pipe_constant_buffer cb_vs;
+        struct pipe_constant_buffer cb0_swvp;
+        struct pipe_constant_buffer cb1_swvp;
+        struct pipe_constant_buffer cb2_swvp;
+        struct pipe_constant_buffer cb3_swvp;
         struct pipe_constant_buffer cb_ps;
         struct pipe_constant_buffer cb_vs_ff;
         struct pipe_constant_buffer cb_ps_ff;
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 19c3766..102213e 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -30,8 +30,9 @@
 
 /* XXX TODO: handling of lights is broken */
 
-#define VS_CONST_I_SIZE (NINE_MAX_CONST_I * sizeof(int[4]))
-#define VS_CONST_B_SIZE (NINE_MAX_CONST_B * sizeof(BOOL))
+#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4])))
+#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL)))
+#define VS_CONST_F_SWVP_SIZE    (NINE_MAX_CONST_F_SWVP * sizeof(float[4]))
 
 HRESULT
 NineStateBlock9_ctor( struct NineStateBlock9 *This,
@@ -49,12 +50,19 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This,
 
     This->state.vs_const_f = MALLOC(This->base.device->vs_const_size);
     This->state.ps_const_f = MALLOC(This->base.device->ps_const_size);
-    This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE);
-    This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE);
+    This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE(This->base.device));
+    This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE(This->base.device));
     if (!This->state.vs_const_f || !This->state.ps_const_f ||
         !This->state.vs_const_i || !This->state.vs_const_b)
         return E_OUTOFMEMORY;
 
+    if (This->base.device->may_swvp) {
+        This->state.vs_const_f_swvp = MALLOC(VS_CONST_F_SWVP_SIZE);
+        if (!This->state.vs_const_f_swvp)
+            return E_OUTOFMEMORY;
+    } else
+        This->state.vs_const_f_swvp = NULL;
+
     return D3D_OK;
 }
 
@@ -71,6 +79,7 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
     FREE(state->ps_const_f);
     FREE(state->vs_const_i);
     FREE(state->vs_const_b);
+    FREE(state->vs_const_f_swvp);
 
     FREE(state->ff.light);
 
@@ -101,7 +110,8 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
  * TODO: compare ?
  */
 static void
-nine_state_copy_common(struct nine_state *dst,
+nine_state_copy_common(struct NineDevice9 *device,
+                       struct nine_state *dst,
                        struct nine_state *src,
                        struct nine_state *mask, /* aliases either src or dst */
                        const boolean apply,
@@ -130,13 +140,32 @@ nine_state_copy_common(struct nine_state *dst,
      */
     if (mask->changed.group & NINE_STATE_VS_CONST) {
         struct nine_range *r;
-        for (r = mask->changed.vs_const_f; r; r = r->next) {
-            memcpy(&dst->vs_const_f[r->bgn * 4],
-                   &src->vs_const_f[r->bgn * 4],
-                   (r->end - r->bgn) * 4 * sizeof(float));
-            if (apply)
-                nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
-                                   pool);
+        if (device->may_swvp) {
+            for (r = mask->changed.vs_const_f; r; r = r->next) {
+                int bgn = r->bgn;
+                int end = r->end;
+                memcpy(&dst->vs_const_f_swvp[bgn * 4],
+                       &src->vs_const_f_swvp[bgn * 4],
+                       (end - bgn) * 4 * sizeof(float));
+                if (apply)
+                    nine_ranges_insert(&dst->changed.vs_const_f, bgn, end,
+                                       pool);
+                if (bgn < device->max_vs_const_f) {
+                    end = MIN2(end, device->max_vs_const_f);
+                    memcpy(&dst->vs_const_f[bgn * 4],
+                           &src->vs_const_f[bgn * 4],
+                           (end - bgn) * 4 * sizeof(float));
+                }
+            }
+        } else {
+            for (r = mask->changed.vs_const_f; r; r = r->next) {
+                memcpy(&dst->vs_const_f[r->bgn * 4],
+                       &src->vs_const_f[r->bgn * 4],
+                       (r->end - r->bgn) * 4 * sizeof(float));
+                if (apply)
+                    nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
+                                       pool);
+            }
         }
         for (r = mask->changed.vs_const_i; r; r = r->next) {
             memcpy(&dst->vs_const_i[r->bgn * 4],
@@ -342,7 +371,8 @@ nine_state_copy_common(struct nine_state *dst,
 }
 
 static void
-nine_state_copy_common_all(struct nine_state *dst,
+nine_state_copy_common_all(struct NineDevice9 *device,
+                           struct nine_state *dst,
                            const struct nine_state *src,
                            struct nine_state *help,
                            const boolean apply,
@@ -369,12 +399,15 @@ nine_state_copy_common_all(struct nine_state *dst,
     if (1) {
         struct nine_range *r = help->changed.vs_const_f;
         memcpy(&dst->vs_const_f[0],
-               &src->vs_const_f[0], (r->end - r->bgn) * 4 * sizeof(float));
+               &src->vs_const_f[0], device->max_vs_const_f * 4 * sizeof(float));
+        if (device->may_swvp)
+            memcpy(dst->vs_const_f_swvp,
+                   src->vs_const_f_swvp, VS_CONST_F_SWVP_SIZE);
         if (apply)
             nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, pool);
 
-        memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE);
-        memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE);
+        memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE(device));
+        memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE(device));
         if (apply) {
             r = help->changed.vs_const_i;
             nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end, pool);
@@ -491,17 +524,18 @@ nine_state_copy_common_all(struct nine_state *dst,
 HRESULT NINE_WINAPI
 NineStateBlock9_Capture( struct NineStateBlock9 *This )
 {
+    struct NineDevice9 *device = This->base.device;
     struct nine_state *dst = &This->state;
-    struct nine_state *src = &This->base.device->state;
-    const int MaxStreams = This->base.device->caps.MaxStreams;
+    struct nine_state *src = &device->state;
+    const int MaxStreams = device->caps.MaxStreams;
     unsigned s;
 
     DBG("This=%p\n", This);
 
     if (This->type == NINESBT_ALL)
-        nine_state_copy_common_all(dst, src, dst, FALSE, NULL, MaxStreams);
+        nine_state_copy_common_all(device, dst, src, dst, FALSE, NULL, MaxStreams);
     else
-        nine_state_copy_common(dst, src, dst, FALSE, NULL);
+        nine_state_copy_common(device, dst, src, dst, FALSE, NULL);
 
     if (dst->changed.group & NINE_STATE_VDECL)
         nine_bind(&dst->vdecl, src->vdecl);
@@ -521,18 +555,19 @@ NineStateBlock9_Capture( struct NineStateBlock9 *This )
 HRESULT NINE_WINAPI
 NineStateBlock9_Apply( struct NineStateBlock9 *This )
 {
-    struct nine_state *dst = &This->base.device->state;
+    struct NineDevice9 *device = This->base.device;
+    struct nine_state *dst = &device->state;
     struct nine_state *src = &This->state;
-    struct nine_range_pool *pool = &This->base.device->range_pool;
-    const int MaxStreams = This->base.device->caps.MaxStreams;
+    struct nine_range_pool *pool = &device->range_pool;
+    const int MaxStreams = device->caps.MaxStreams;
     unsigned s;
 
     DBG("This=%p\n", This);
 
     if (This->type == NINESBT_ALL)
-        nine_state_copy_common_all(dst, src, src, TRUE, pool, MaxStreams);
+        nine_state_copy_common_all(device, dst, src, src, TRUE, pool, MaxStreams);
     else
-        nine_state_copy_common(dst, src, src, TRUE, pool);
+        nine_state_copy_common(device, dst, src, src, TRUE, pool);
 
     if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
         NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl);
diff --git a/src/gallium/state_trackers/nine/vertexshader9.c b/src/gallium/state_trackers/nine/vertexshader9.c
index bc09a41..92f8f6b 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.c
+++ b/src/gallium/state_trackers/nine/vertexshader9.c
@@ -63,12 +63,21 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
     info.fog_enable = 0;
     info.point_size_min = 0;
     info.point_size_max = 0;
-    info.swvp_on = false;
+    info.swvp_on = !!(device->params.BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING);
 
     hr = nine_translate_shader(device, &info);
+    if (hr == D3DERR_INVALIDCALL &&
+        (device->params.BehaviorFlags & D3DCREATE_MIXED_VERTEXPROCESSING)) {
+        /* Retry with a swvp shader. It will require swvp to be on. */
+        info.swvp_on = true;
+        hr = nine_translate_shader(device, &info);
+    }
+    if (hr == D3DERR_INVALIDCALL)
+        ERR("Encountered buggy shader\n");
     if (FAILED(hr))
         return hr;
     This->byte_code.version = info.version;
+    This->swvp_only = info.swvp_on;
 
     This->byte_code.tokens = mem_dup(pFunction, info.byte_size);
     if (!This->byte_code.tokens)
@@ -77,7 +86,7 @@ NineVertexShader9_ctor( struct NineVertexShader9 *This,
 
     This->variant.cso = info.cso;
     This->last_cso = info.cso;
-    This->last_key = 0;
+    This->last_key = (uint32_t) (info.swvp_on << 9);
 
     This->const_used_size = info.const_used_size;
     This->lconstf = info.lconstf;
@@ -168,7 +177,7 @@ NineVertexShader9_GetVariant( struct NineVertexShader9 *This )
         info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
         info.point_size_min = asfloat(device->state.rs[D3DRS_POINTSIZE_MIN]);
         info.point_size_max = asfloat(device->state.rs[D3DRS_POINTSIZE_MAX]);
-        info.swvp_on = false;
+        info.swvp_on = device->swvp;
 
         hr = nine_translate_shader(This->base.device, &info);
         if (FAILED(hr))
diff --git a/src/gallium/state_trackers/nine/vertexshader9.h b/src/gallium/state_trackers/nine/vertexshader9.h
index 3c9db79..823c71a 100644
--- a/src/gallium/state_trackers/nine/vertexshader9.h
+++ b/src/gallium/state_trackers/nine/vertexshader9.h
@@ -26,6 +26,7 @@
 #include "util/u_half.h"
 
 #include "iunknown.h"
+#include "device9.h"
 #include "nine_helpers.h"
 #include "nine_shader.h"
 #include "nine_state.h"
@@ -50,6 +51,7 @@ struct NineVertexShader9
 
     boolean position_t; /* if true, disable vport transform */
     boolean point_size; /* if true, set rasterizer.point_size_per_vertex to 1 */
+    boolean swvp_only;
 
     unsigned const_used_size; /* in bytes */
 
@@ -73,8 +75,9 @@ NineVertexShader9( void *data )
 
 static inline BOOL
 NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
-                             struct nine_state *state )
+                             struct NineDevice9 *device )
 {
+    struct nine_state *state = &(device->state);
     uint8_t samplers_shadow;
     uint64_t key;
     BOOL res;
@@ -84,7 +87,8 @@ NineVertexShader9_UpdateKey( struct NineVertexShader9 *vs,
     key = samplers_shadow;
 
     if (vs->byte_code.version < 0x30)
-        key |= (uint32_t) (state->rs[D3DRS_FOGENABLE] << 8);
+        key |= (uint32_t) ((!!state->rs[D3DRS_FOGENABLE]) << 8);
+    key |= (uint32_t) (device->swvp << 9);
 
     /* We want to use a 64 bits key for performance.
      * Use compressed float16 values for the pointsize min/max in the key.
-- 
2.10.0



More information about the mesa-dev mailing list