[Mesa-dev] [PATCH 13/84] st/nine: Back all shader constants to nine_context
Axel Davy
axel.davy at ens.fr
Wed Dec 7 22:54:46 UTC 2016
For device vs shader float constants and may_swvp,
the same tips than for the other constant types is
used.
Also memset the constants properly.
Signed-off-by: Axel Davy <axel.davy at ens.fr>
---
src/gallium/state_trackers/nine/device9.c | 138 ++++++----
src/gallium/state_trackers/nine/nine_state.c | 349 +++++++++++++++++++-------
src/gallium/state_trackers/nine/nine_state.h | 68 ++++-
src/gallium/state_trackers/nine/stateblock9.c | 82 +-----
4 files changed, 411 insertions(+), 226 deletions(-)
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 7dbd2e4..8eeddb2 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -345,24 +345,36 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->ps_const_size = max_const_ps * sizeof(float[4]);
/* Include space for I,B constants for user constbuf. */
if (This->may_swvp) {
- This->state.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
- if (!This->state.vs_const_f_swvp)
+ This->state.vs_const_f = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+ This->context.vs_const_f_swvp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+ if (!This->context.vs_const_f_swvp)
return E_OUTOFMEMORY;
This->state.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
+ This->context.vs_lconstf_temp = CALLOC(NINE_MAX_CONST_F_SWVP * sizeof(float[4]),1);
This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1);
+ This->context.vs_const_i = CALLOC(NINE_MAX_CONST_I_SWVP * sizeof(int[4]), 1);
This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1);
+ This->context.vs_const_b = CALLOC(NINE_MAX_CONST_B_SWVP * sizeof(BOOL), 1);
} else {
- This->state.vs_const_f_swvp = NULL;
+ This->state.vs_const_f = CALLOC(NINE_MAX_CONST_F * sizeof(float[4]), 1);
+ This->context.vs_const_f_swvp = NULL;
This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+ This->context.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
This->state.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
+ This->context.vs_const_i = CALLOC(NINE_MAX_CONST_I * sizeof(int[4]), 1);
This->state.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
+ This->context.vs_const_b = CALLOC(NINE_MAX_CONST_B * sizeof(BOOL), 1);
}
- This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
+ This->context.vs_const_f = CALLOC(This->vs_const_size, 1);
This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
- This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
- if (!This->state.vs_const_f || !This->state.ps_const_f ||
- !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp ||
- !This->state.vs_const_i || !This->state.vs_const_b)
+ This->context.ps_const_f = CALLOC(This->ps_const_size, 1);
+ This->context.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
+ if (!This->state.vs_const_f || !This->context.vs_const_f ||
+ !This->state.ps_const_f || !This->context.ps_const_f ||
+ !This->state.vs_lconstf_temp || !This->context.vs_lconstf_temp ||
+ !This->context.ps_lconstf_temp ||
+ !This->state.vs_const_i || !This->context.vs_const_i ||
+ !This->state.vs_const_b || !This->context.vs_const_b)
return E_OUTOFMEMORY;
if (strstr(pScreen->get_name(pScreen), "AMD") ||
@@ -505,12 +517,17 @@ NineDevice9_dtor( struct NineDevice9 *This )
pipe_resource_reference(&This->dummy_texture, NULL);
pipe_resource_reference(&This->dummy_vbo, NULL);
FREE(This->state.vs_const_f);
+ FREE(This->context.vs_const_f);
FREE(This->state.ps_const_f);
+ FREE(This->context.ps_const_f);
FREE(This->state.vs_lconstf_temp);
- FREE(This->state.ps_lconstf_temp);
+ FREE(This->context.vs_lconstf_temp);
+ FREE(This->context.ps_lconstf_temp);
FREE(This->state.vs_const_i);
+ FREE(This->context.vs_const_i);
FREE(This->state.vs_const_b);
- FREE(This->state.vs_const_f_swvp);
+ FREE(This->context.vs_const_b);
+ FREE(This->context.vs_const_f_swvp);
pipe_resource_reference(&This->cursor.image, NULL);
FREE(This->cursor.hw_upload_temp);
@@ -3190,7 +3207,7 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
UINT Vector4fCount )
{
struct nine_state *state = This->update;
- float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
+ float *vs_const_f = state->vs_const_f;
DBG("This=%p StartRegister=%u pConstantData=%p Vector4fCount=%u\n",
This, StartRegister, pConstantData, Vector4fCount);
@@ -3202,29 +3219,29 @@ NineDevice9_SetVertexShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
user_assert(pConstantData, D3DERR_INVALIDCALL);
- if (!This->is_recording) {
- if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData,
- Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
- return D3D_OK;
+ if (unlikely(This->is_recording)) {
+ memcpy(&vs_const_f[StartRegister * 4],
+ pConstantData,
+ Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
+
+ nine_ranges_insert(&state->changed.vs_const_f,
+ StartRegister, StartRegister + Vector4fCount,
+ &This->range_pool);
+
+ state->changed.group |= NINE_STATE_VS_CONST;
+
+ return D3D_OK;
}
+ if (!memcmp(&vs_const_f[StartRegister * 4], pConstantData,
+ Vector4fCount * 4 * sizeof(state->vs_const_f[0])))
+ return D3D_OK;
+
memcpy(&vs_const_f[StartRegister * 4],
pConstantData,
Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
- nine_ranges_insert(&state->changed.vs_const_f,
- StartRegister, StartRegister + Vector4fCount,
- &This->range_pool);
-
- if (This->may_swvp) {
- Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
- if (StartRegister < NINE_MAX_CONST_F)
- memcpy(&state->vs_const_f[StartRegister * 4],
- pConstantData,
- Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
- }
-
- state->changed.group |= NINE_STATE_VS_CONST;
+ nine_context_set_vertex_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount);
return D3D_OK;
}
@@ -3236,14 +3253,13 @@ NineDevice9_GetVertexShaderConstantF( struct NineDevice9 *This,
UINT Vector4fCount )
{
const struct nine_state *state = &This->state;
- float *vs_const_f = This->may_swvp ? state->vs_const_f_swvp : state->vs_const_f;
user_assert(StartRegister < This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
user_assert(StartRegister + Vector4fCount <= This->caps.MaxVertexShaderConst, D3DERR_INVALIDCALL);
user_assert(pConstantData, D3DERR_INVALIDCALL);
memcpy(pConstantData,
- &vs_const_f[StartRegister * 4],
+ &state->vs_const_f[StartRegister * 4],
Vector4fCount * 4 * sizeof(state->vs_const_f[0]));
return D3D_OK;
@@ -3285,10 +3301,13 @@ NineDevice9_SetVertexShaderConstantI( struct NineDevice9 *This,
}
}
- nine_ranges_insert(&state->changed.vs_const_i,
- StartRegister, StartRegister + Vector4iCount,
- &This->range_pool);
- state->changed.group |= NINE_STATE_VS_CONST;
+ if (unlikely(This->is_recording)) {
+ nine_ranges_insert(&state->changed.vs_const_i,
+ StartRegister, StartRegister + Vector4iCount,
+ &This->range_pool);
+ state->changed.group |= NINE_STATE_VS_CONST;
+ } else
+ nine_context_set_vertex_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount);
return D3D_OK;
}
@@ -3356,10 +3375,13 @@ NineDevice9_SetVertexShaderConstantB( struct NineDevice9 *This,
for (i = 0; i < BoolCount; i++)
state->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
- nine_ranges_insert(&state->changed.vs_const_b,
- StartRegister, StartRegister + BoolCount,
- &This->range_pool);
- state->changed.group |= NINE_STATE_VS_CONST;
+ if (unlikely(This->is_recording)) {
+ nine_ranges_insert(&state->changed.vs_const_b,
+ StartRegister, StartRegister + BoolCount,
+ &This->range_pool);
+ state->changed.group |= NINE_STATE_VS_CONST;
+ } else
+ nine_context_set_vertex_shader_constant_b(This, StartRegister, pConstantData, BoolCount);
return D3D_OK;
}
@@ -3599,21 +3621,28 @@ NineDevice9_SetPixelShaderConstantF( struct NineDevice9 *This,
return D3D_OK;
user_assert(pConstantData, D3DERR_INVALIDCALL);
- if (!This->is_recording) {
- if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData,
- Vector4fCount * 4 * sizeof(state->ps_const_f[0])))
- return D3D_OK;
+ if (unlikely(This->is_recording)) {
+ memcpy(&state->ps_const_f[StartRegister * 4],
+ pConstantData,
+ Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
+
+ nine_ranges_insert(&state->changed.ps_const_f,
+ StartRegister, StartRegister + Vector4fCount,
+ &This->range_pool);
+
+ state->changed.group |= NINE_STATE_PS_CONST;
+ return D3D_OK;
}
+ if (!memcmp(&state->ps_const_f[StartRegister * 4], pConstantData,
+ Vector4fCount * 4 * sizeof(state->ps_const_f[0])))
+ return D3D_OK;
+
memcpy(&state->ps_const_f[StartRegister * 4],
pConstantData,
Vector4fCount * 4 * sizeof(state->ps_const_f[0]));
- nine_ranges_insert(&state->changed.ps_const_f,
- StartRegister, StartRegister + Vector4fCount,
- &This->range_pool);
-
- state->changed.group |= NINE_STATE_PS_CONST;
+ nine_context_set_pixel_shader_constant_f(This, StartRegister, pConstantData, Vector4fCount);
return D3D_OK;
}
@@ -3670,8 +3699,12 @@ NineDevice9_SetPixelShaderConstantI( struct NineDevice9 *This,
state->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
}
}
- state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
- state->changed.group |= NINE_STATE_PS_CONST;
+
+ if (unlikely(This->is_recording)) {
+ state->changed.ps_const_i |= ((1 << Vector4iCount) - 1) << StartRegister;
+ state->changed.group |= NINE_STATE_PS_CONST;
+ } else
+ nine_context_set_pixel_shader_constant_i(This, StartRegister, pConstantData, Vector4iCount);
return D3D_OK;
}
@@ -3735,8 +3768,11 @@ NineDevice9_SetPixelShaderConstantB( struct NineDevice9 *This,
for (i = 0; i < BoolCount; i++)
state->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
- state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister;
- state->changed.group |= NINE_STATE_PS_CONST;
+ if (unlikely(This->is_recording)) {
+ state->changed.ps_const_b |= ((1 << BoolCount) - 1) << StartRegister;
+ state->changed.group |= NINE_STATE_PS_CONST;
+ } else
+ nine_context_set_pixel_shader_constant_b(This, StartRegister, pConstantData, BoolCount);
return D3D_OK;
}
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 39bdac9..9217529 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -88,18 +88,18 @@ prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
struct nine_state *state = &device->state;
struct nine_context *context = &device->context;
- if (state->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) {
+ if (context->changed.vs_const_f || state->changed.group & NINE_STATE_SWVP) {
struct pipe_constant_buffer cb;
cb.buffer_offset = 0;
cb.buffer_size = 4096 * sizeof(float[4]);
- cb.user_buffer = state->vs_const_f_swvp;
+ cb.user_buffer = context->vs_const_f_swvp;
if (context->vs->lconstf.ranges) {
const struct nine_lconstf *lconstf = &(context->vs->lconstf);
const struct nine_range *r = lconstf->ranges;
unsigned n = 0;
- float *dst = device->state.vs_lconstf_temp;
+ float *dst = context->vs_lconstf_temp;
float *src = (float *)cb.user_buffer;
memcpy(dst, src, cb.buffer_size);
while (r) {
@@ -123,30 +123,34 @@ prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
context->pipe.cb1_swvp.buffer_offset = cb.buffer_offset;
context->pipe.cb1_swvp.buffer_size = cb.buffer_size;
context->pipe.cb1_swvp.user_buffer = cb.user_buffer;
+
+ context->changed.vs_const_f = 0;
}
- if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
+ if (context->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
struct pipe_constant_buffer cb;
cb.buffer_offset = 0;
cb.buffer_size = 2048 * sizeof(float[4]);
- cb.user_buffer = state->vs_const_i;
+ cb.user_buffer = context->vs_const_i;
context->pipe.cb2_swvp.buffer_offset = cb.buffer_offset;
context->pipe.cb2_swvp.buffer_size = cb.buffer_size;
context->pipe.cb2_swvp.user_buffer = cb.user_buffer;
+ context->changed.vs_const_i = 0;
}
- if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
+ if (context->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
struct pipe_constant_buffer cb;
cb.buffer_offset = 0;
cb.buffer_size = 512 * sizeof(float[4]);
- cb.user_buffer = state->vs_const_b;
+ cb.user_buffer = context->vs_const_b;
context->pipe.cb3_swvp.buffer_offset = cb.buffer_offset;
context->pipe.cb3_swvp.buffer_size = cb.buffer_size;
context->pipe.cb3_swvp.user_buffer = cb.user_buffer;
+ context->changed.vs_const_b = 0;
}
if (!device->driver_caps.user_cbufs) {
@@ -195,33 +199,6 @@ prepare_vs_constants_userbuf_swvp(struct NineDevice9 *device)
cb->user_buffer = NULL;
}
- if (device->state.changed.vs_const_f) {
- struct nine_range *r = device->state.changed.vs_const_f;
- struct nine_range *p = r;
- while (p->next)
- p = p->next;
- nine_range_pool_put_chain(&device->range_pool, r, p);
- device->state.changed.vs_const_f = NULL;
- }
-
- if (device->state.changed.vs_const_i) {
- struct nine_range *r = device->state.changed.vs_const_i;
- struct nine_range *p = r;
- while (p->next)
- p = p->next;
- nine_range_pool_put_chain(&device->range_pool, r, p);
- device->state.changed.vs_const_i = NULL;
- }
-
- if (device->state.changed.vs_const_b) {
- struct nine_range *r = device->state.changed.vs_const_b;
- struct nine_range *p = r;
- while (p->next)
- p = p->next;
- nine_range_pool_put_chain(&device->range_pool, r, p);
- device->state.changed.vs_const_b = NULL;
- }
-
state->changed.group &= ~NINE_STATE_VS_CONST;
context->commit |= NINE_STATE_COMMIT_CONST_VS;
}
@@ -235,40 +212,24 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device)
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = context->vs->const_used_size;
- cb.user_buffer = device->state.vs_const_f;
+ cb.user_buffer = context->vs_const_f;
if (device->swvp) {
prepare_vs_constants_userbuf_swvp(device);
return;
}
- if (state->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
- int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
- memcpy(idst, state->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
+ if (context->changed.vs_const_i || state->changed.group & NINE_STATE_SWVP) {
+ int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
+ memcpy(idst, context->vs_const_i, NINE_MAX_CONST_I * sizeof(int[4]));
+ context->changed.vs_const_i = 0;
}
- if (state->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
- int *idst = (int *)&state->vs_const_f[4 * device->max_vs_const_f];
+ if (context->changed.vs_const_b || state->changed.group & NINE_STATE_SWVP) {
+ int *idst = (int *)&context->vs_const_f[4 * device->max_vs_const_f];
uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
- memcpy(bdst, state->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
- }
-
- if (device->state.changed.vs_const_i) {
- struct nine_range *r = device->state.changed.vs_const_i;
- struct nine_range *p = r;
- while (p->next)
- p = p->next;
- nine_range_pool_put_chain(&device->range_pool, r, p);
- device->state.changed.vs_const_i = NULL;
- }
-
- if (device->state.changed.vs_const_b) {
- struct nine_range *r = device->state.changed.vs_const_b;
- struct nine_range *p = r;
- while (p->next)
- p = p->next;
- nine_range_pool_put_chain(&device->range_pool, r, p);
- device->state.changed.vs_const_b = NULL;
+ memcpy(bdst, context->vs_const_b, NINE_MAX_CONST_B * sizeof(BOOL));
+ context->changed.vs_const_b = 0;
}
if (!cb.buffer_size)
@@ -279,7 +240,7 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device)
const struct nine_lconstf *lconstf = &(context->vs->lconstf);
const struct nine_range *r = lconstf->ranges;
unsigned n = 0;
- float *dst = device->state.vs_lconstf_temp;
+ float *dst = context->vs_lconstf_temp;
float *src = (float *)cb.user_buffer;
memcpy(dst, src, cb.buffer_size);
while (r) {
@@ -306,14 +267,7 @@ prepare_vs_constants_userbuf(struct NineDevice9 *device)
} else
context->pipe.cb_vs = cb;
- if (device->state.changed.vs_const_f) {
- struct nine_range *r = device->state.changed.vs_const_f;
- struct nine_range *p = r;
- while (p->next)
- p = p->next;
- nine_range_pool_put_chain(&device->range_pool, r, p);
- device->state.changed.vs_const_f = NULL;
- }
+ context->changed.vs_const_f = 0;
state->changed.group &= ~NINE_STATE_VS_CONST;
context->commit |= NINE_STATE_COMMIT_CONST_VS;
@@ -328,34 +282,34 @@ prepare_ps_constants_userbuf(struct NineDevice9 *device)
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = device->state.ps->const_used_size;
- cb.user_buffer = device->state.ps_const_f;
+ cb.user_buffer = context->ps_const_f;
- if (state->changed.ps_const_i) {
- int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
- memcpy(idst, state->ps_const_i, sizeof(state->ps_const_i));
- state->changed.ps_const_i = 0;
+ if (context->changed.ps_const_i) {
+ int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
+ memcpy(idst, context->ps_const_i, sizeof(context->ps_const_i));
+ context->changed.ps_const_i = 0;
}
- if (state->changed.ps_const_b) {
- int *idst = (int *)&state->ps_const_f[4 * device->max_ps_const_f];
+ if (context->changed.ps_const_b) {
+ int *idst = (int *)&context->ps_const_f[4 * device->max_ps_const_f];
uint32_t *bdst = (uint32_t *)&idst[4 * NINE_MAX_CONST_I];
- memcpy(bdst, state->ps_const_b, sizeof(state->ps_const_b));
- state->changed.ps_const_b = 0;
+ memcpy(bdst, context->ps_const_b, sizeof(context->ps_const_b));
+ context->changed.ps_const_b = 0;
}
/* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
if (device->state.ps->bumpenvmat_needed) {
- memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
- memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars));
+ memcpy(context->ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+ memcpy(&context->ps_lconstf_temp[4 * 8], &device->context.bumpmap_vars, sizeof(device->context.bumpmap_vars));
- cb.user_buffer = device->state.ps_lconstf_temp;
+ cb.user_buffer = context->ps_lconstf_temp;
}
if (state->ps->byte_code.version < 0x30 &&
context->rs[D3DRS_FOGENABLE]) {
- float *dst = &state->ps_lconstf_temp[4 * 32];
- if (cb.user_buffer != state->ps_lconstf_temp) {
- memcpy(state->ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
- cb.user_buffer = state->ps_lconstf_temp;
+ float *dst = &context->ps_lconstf_temp[4 * 32];
+ if (cb.user_buffer != context->ps_lconstf_temp) {
+ memcpy(context->ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+ cb.user_buffer = context->ps_lconstf_temp;
}
d3dcolor_to_rgba(dst, context->rs[D3DRS_FOGCOLOR]);
@@ -385,14 +339,8 @@ prepare_ps_constants_userbuf(struct NineDevice9 *device)
} else
context->pipe.cb_ps = cb;
- if (device->state.changed.ps_const_f) {
- struct nine_range *r = device->state.changed.ps_const_f;
- struct nine_range *p = r;
- while (p->next)
- p = p->next;
- nine_range_pool_put_chain(&device->range_pool, r, p);
- device->state.changed.ps_const_f = NULL;
- }
+ context->changed.ps_const_f = 0;
+
state->changed.group &= ~NINE_STATE_PS_CONST;
context->commit |= NINE_STATE_COMMIT_CONST_PS;
}
@@ -1331,6 +1279,139 @@ nine_context_set_vertex_shader(struct NineDevice9 *device,
}
void
+nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device,
+ UINT StartRegister,
+ const float *pConstantData,
+ UINT Vector4fCount)
+{
+ struct nine_state *state = &device->state;
+ struct nine_context *context = &device->context;
+ float *vs_const_f = device->may_swvp ? context->vs_const_f_swvp : context->vs_const_f;
+
+ memcpy(&vs_const_f[StartRegister * 4],
+ pConstantData,
+ Vector4fCount * 4 * sizeof(context->vs_const_f[0]));
+
+ if (device->may_swvp) {
+ Vector4fCount = MIN2(StartRegister + Vector4fCount, NINE_MAX_CONST_F) - StartRegister;
+ if (StartRegister < NINE_MAX_CONST_F)
+ memcpy(&context->vs_const_f[StartRegister * 4],
+ pConstantData,
+ Vector4fCount * 4 * sizeof(context->vs_const_f[0]));
+ }
+
+ context->changed.vs_const_f = TRUE;
+ state->changed.group |= NINE_STATE_VS_CONST;
+}
+
+
+void
+nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device,
+ UINT StartRegister,
+ const int *pConstantData,
+ UINT Vector4iCount)
+{
+ struct nine_state *state = &device->state;
+ struct nine_context *context = &device->context;
+ int i;
+
+ if (device->driver_caps.vs_integer) {
+ memcpy(&context->vs_const_i[4 * StartRegister],
+ pConstantData,
+ Vector4iCount * sizeof(int[4]));
+ } else {
+ for (i = 0; i < Vector4iCount; i++) {
+ context->vs_const_i[4 * (StartRegister + i)] = fui((float)(pConstantData[4 * i]));
+ context->vs_const_i[4 * (StartRegister + i) + 1] = fui((float)(pConstantData[4 * i + 1]));
+ context->vs_const_i[4 * (StartRegister + i) + 2] = fui((float)(pConstantData[4 * i + 2]));
+ context->vs_const_i[4 * (StartRegister + i) + 3] = fui((float)(pConstantData[4 * i + 3]));
+ }
+ }
+
+ context->changed.vs_const_i = TRUE;
+ state->changed.group |= NINE_STATE_VS_CONST;
+}
+
+void
+nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device,
+ UINT StartRegister,
+ const BOOL *pConstantData,
+ UINT BoolCount)
+{
+ struct nine_state *state = &device->state;
+ struct nine_context *context = &device->context;
+ int i;
+ uint32_t bool_true = device->driver_caps.vs_integer ? 0xFFFFFFFF : fui(1.0f);
+
+ for (i = 0; i < BoolCount; i++)
+ context->vs_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
+
+ context->changed.vs_const_b = TRUE;
+ state->changed.group |= NINE_STATE_VS_CONST;
+}
+
+void
+nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device,
+ UINT StartRegister,
+ const float *pConstantData,
+ UINT Vector4fCount)
+{
+ struct nine_state *state = &device->state;
+ struct nine_context *context = &device->context;
+
+ memcpy(&context->ps_const_f[StartRegister * 4],
+ pConstantData,
+ Vector4fCount * 4 * sizeof(context->ps_const_f[0]));
+
+ context->changed.ps_const_f = TRUE;
+ state->changed.group |= NINE_STATE_PS_CONST;
+}
+
+void
+nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device,
+ UINT StartRegister,
+ const int *pConstantData,
+ UINT Vector4iCount)
+{
+ struct nine_state *state = &device->state;
+ struct nine_context *context = &device->context;
+ int i;
+
+ if (device->driver_caps.ps_integer) {
+ memcpy(&context->ps_const_i[StartRegister][0],
+ pConstantData,
+ Vector4iCount * sizeof(context->ps_const_i[0]));
+ } else {
+ for (i = 0; i < Vector4iCount; i++) {
+ context->ps_const_i[StartRegister+i][0] = fui((float)(pConstantData[4*i]));
+ context->ps_const_i[StartRegister+i][1] = fui((float)(pConstantData[4*i+1]));
+ context->ps_const_i[StartRegister+i][2] = fui((float)(pConstantData[4*i+2]));
+ context->ps_const_i[StartRegister+i][3] = fui((float)(pConstantData[4*i+3]));
+ }
+ }
+ context->changed.ps_const_i = TRUE;
+ state->changed.group |= NINE_STATE_PS_CONST;
+}
+
+void
+nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device,
+ UINT StartRegister,
+ const BOOL *pConstantData,
+ UINT BoolCount)
+{
+ struct nine_state *state = &device->state;
+ struct nine_context *context = &device->context;
+ int i;
+ uint32_t bool_true = device->driver_caps.ps_integer ? 0xFFFFFFFF : fui(1.0f);
+
+ for (i = 0; i < BoolCount; i++)
+ context->ps_const_b[StartRegister + i] = pConstantData[i] ? bool_true : 0;
+
+ context->changed.ps_const_b = TRUE;
+ state->changed.group |= NINE_STATE_PS_CONST;
+}
+
+void
nine_context_apply_stateblock(struct NineDevice9 *device,
const struct nine_state *src)
{
@@ -1411,6 +1492,70 @@ nine_context_apply_stateblock(struct NineDevice9 *device,
nine_bind(&context->vs, src->vs);
context->programmable_vs = context->vs && !(context->vdecl && context->vdecl->position_t);
+
+ /* Vertex constants */
+ if (src->changed.group & NINE_STATE_VS_CONST) {
+ struct nine_range *r;
+ if (device->may_swvp) {
+ for (r = src->changed.vs_const_f; r; r = r->next) {
+ int bgn = r->bgn;
+ int end = r->end;
+ memcpy(&context->vs_const_f_swvp[bgn * 4],
+ &src->vs_const_f[bgn * 4],
+ (end - bgn) * 4 * sizeof(float));
+ if (bgn < device->max_vs_const_f) {
+ end = MIN2(end, device->max_vs_const_f);
+ memcpy(&context->vs_const_f[bgn * 4],
+ &src->vs_const_f[bgn * 4],
+ (end - bgn) * 4 * sizeof(float));
+ }
+ }
+ } else {
+ for (r = src->changed.vs_const_f; r; r = r->next) {
+ memcpy(&context->vs_const_f[r->bgn * 4],
+ &src->vs_const_f[r->bgn * 4],
+ (r->end - r->bgn) * 4 * sizeof(float));
+ }
+ }
+ for (r = src->changed.vs_const_i; r; r = r->next) {
+ memcpy(&context->vs_const_i[r->bgn * 4],
+ &src->vs_const_i[r->bgn * 4],
+ (r->end - r->bgn) * 4 * sizeof(int));
+ }
+ for (r = src->changed.vs_const_b; r; r = r->next) {
+ memcpy(&context->vs_const_b[r->bgn],
+ &src->vs_const_b[r->bgn],
+ (r->end - r->bgn) * sizeof(int));
+ }
+ context->changed.vs_const_f = !!src->changed.vs_const_f;
+ context->changed.vs_const_i = !!src->changed.vs_const_i;
+ context->changed.vs_const_b = !!src->changed.vs_const_b;
+ }
+
+ /* Pixel constants */
+ if (src->changed.group & NINE_STATE_PS_CONST) {
+ struct nine_range *r;
+ for (r = src->changed.ps_const_f; r; r = r->next) {
+ memcpy(&context->ps_const_f[r->bgn * 4],
+ &src->ps_const_f[r->bgn * 4],
+ (r->end - r->bgn) * 4 * sizeof(float));
+ }
+ if (src->changed.ps_const_i) {
+ uint16_t m = src->changed.ps_const_i;
+ for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
+ if (m & 1)
+ memcpy(context->ps_const_i[i], src->ps_const_i[i], 4 * sizeof(int));
+ }
+ if (src->changed.ps_const_b) {
+ uint16_t m = src->changed.ps_const_b;
+ for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
+ if (m & 1)
+ context->ps_const_b[i] = src->ps_const_b[i];
+ }
+ context->changed.ps_const_f = !!src->changed.ps_const_f;
+ context->changed.ps_const_i = !!src->changed.ps_const_i;
+ context->changed.ps_const_b = !!src->changed.ps_const_b;
+ }
}
static void
@@ -1879,10 +2024,20 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
sizeof(state->samp_advertised[s]));
}
- if (state->vs_const_f)
- memset(state->vs_const_f, 0, device->vs_const_size);
- if (state->ps_const_f)
- memset(state->ps_const_f, 0, device->ps_const_size);
+ memset(state->vs_const_f, 0, VS_CONST_F_SIZE(device));
+ memset(context->vs_const_f, 0, device->vs_const_size);
+ if (context->vs_const_f_swvp)
+ memset(context->vs_const_f_swvp, 0, NINE_MAX_CONST_F_SWVP * sizeof(float[4]));
+ memset(state->vs_const_i, 0, VS_CONST_I_SIZE(device));
+ memset(context->vs_const_i, 0, VS_CONST_I_SIZE(device));
+ memset(state->vs_const_b, 0, VS_CONST_B_SIZE(device));
+ memset(context->vs_const_b, 0, VS_CONST_B_SIZE(device));
+ memset(state->ps_const_f, 0, device->ps_const_size);
+ memset(context->ps_const_f, 0, device->ps_const_size);
+ memset(state->ps_const_i, 0, sizeof(state->ps_const_i));
+ memset(context->ps_const_i, 0, sizeof(context->ps_const_i));
+ memset(state->ps_const_b, 0, sizeof(state->ps_const_b));
+ memset(context->ps_const_b, 0, sizeof(context->ps_const_b));
/* Cap dependent initial state:
*/
@@ -2122,7 +2277,7 @@ update_vs_constants_sw(struct NineDevice9 *device)
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = 4096 * sizeof(float[4]);
- cb.user_buffer = state->vs_const_f_swvp;
+ cb.user_buffer = state->vs_const_f;
if (state->vs->lconstf.ranges) {
const struct nine_lconstf *lconstf = &device->state.vs->lconstf;
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index b6b8d76..e8519e9 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -116,6 +116,10 @@
((nconstf) * 4 * sizeof(float) + \
NINE_MAX_CONST_I * 4 * sizeof(int))
+#define VS_CONST_F_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_F_SWVP * sizeof(float[4])) : (NINE_MAX_CONST_F * sizeof(float[4])))
+#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4])))
+#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL)))
+
#define NINE_MAX_TEXTURE_STAGES 8
@@ -142,11 +146,11 @@ struct nine_state
uint32_t stream_freq; /* stateblocks only */
uint32_t texture; /* stateblocks only */
uint16_t sampler[NINE_MAX_SAMPLERS]; /* stateblocks only */
- struct nine_range *vs_const_f;
- struct nine_range *ps_const_f;
- struct nine_range *vs_const_i;
+ struct nine_range *vs_const_f; /* stateblocks only */
+ struct nine_range *ps_const_f; /* stateblocks only */
+ struct nine_range *vs_const_i; /* stateblocks only */
uint16_t ps_const_i; /* NINE_MAX_CONST_I == 16 */
- struct nine_range *vs_const_b;
+ struct nine_range *vs_const_b; /* stateblocks only */
uint16_t ps_const_b; /* NINE_MAX_CONST_B == 16 */
uint8_t ucp;
} changed;
@@ -164,16 +168,14 @@ struct nine_state
*/
struct NineVertexShader9 *vs;
float *vs_const_f;
- float *vs_const_f_swvp;
int *vs_const_i;
BOOL *vs_const_b;
- float *vs_lconstf_temp;
+ float *vs_lconstf_temp; /* ProcessVertices */
struct NinePixelShader9 *ps;
float *ps_const_f;
int ps_const_i[NINE_MAX_CONST_I][4];
BOOL ps_const_b[NINE_MAX_CONST_B];
- float *ps_lconstf_temp;
struct NineVertexDeclaration9 *vdecl;
@@ -217,6 +219,12 @@ struct nine_context {
struct {
uint16_t sampler[NINE_MAX_SAMPLERS];
uint32_t vtxbuf;
+ BOOL vs_const_f;
+ BOOL vs_const_i;
+ BOOL vs_const_b;
+ BOOL ps_const_f;
+ BOOL ps_const_i;
+ BOOL ps_const_b;
} changed;
uint32_t bumpmap_vars[6 * NINE_MAX_TEXTURE_STAGES];
@@ -230,6 +238,16 @@ struct nine_context {
struct NineVertexShader9 *vs;
BOOL programmable_vs;
+ float *vs_const_f;
+ float *vs_const_f_swvp;
+ int *vs_const_i;
+ BOOL *vs_const_b;
+ float *vs_lconstf_temp;
+
+ float *ps_const_f;
+ int ps_const_i[NINE_MAX_CONST_I][4];
+ BOOL ps_const_b[NINE_MAX_CONST_B];
+ float *ps_lconstf_temp;
struct NineVertexDeclaration9 *vdecl;
@@ -321,6 +339,42 @@ nine_context_set_vertex_shader(struct NineDevice9 *device,
struct NineVertexShader9 *pShader);
void
+nine_context_set_vertex_shader_constant_f(struct NineDevice9 *device,
+ UINT StartRegister,
+ const float *pConstantData,
+ UINT Vector4fCount);
+
+void
+nine_context_set_vertex_shader_constant_i(struct NineDevice9 *device,
+ UINT StartRegister,
+ const int *pConstantData,
+ UINT Vector4iCount);
+
+void
+nine_context_set_vertex_shader_constant_b(struct NineDevice9 *device,
+ UINT StartRegister,
+ const BOOL *pConstantData,
+ UINT BoolCount);
+
+void
+nine_context_set_pixel_shader_constant_f(struct NineDevice9 *device,
+ UINT StartRegister,
+ const float *pConstantData,
+ UINT Vector4fCount);
+
+void
+nine_context_set_pixel_shader_constant_i(struct NineDevice9 *device,
+ UINT StartRegister,
+ const int *pConstantData,
+ UINT Vector4iCount);
+
+void
+nine_context_set_pixel_shader_constant_b(struct NineDevice9 *device,
+ UINT StartRegister,
+ const BOOL *pConstantData,
+ UINT BoolCount);
+
+void
nine_context_apply_stateblock(struct NineDevice9 *device,
const struct nine_state *src);
diff --git a/src/gallium/state_trackers/nine/stateblock9.c b/src/gallium/state_trackers/nine/stateblock9.c
index 0c19703..0aa69be 100644
--- a/src/gallium/state_trackers/nine/stateblock9.c
+++ b/src/gallium/state_trackers/nine/stateblock9.c
@@ -30,10 +30,6 @@
/* XXX TODO: handling of lights is broken */
-#define VS_CONST_I_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_I_SWVP * sizeof(int[4])) : (NINE_MAX_CONST_I * sizeof(int[4])))
-#define VS_CONST_B_SIZE(device) (device->may_swvp ? (NINE_MAX_CONST_B_SWVP * sizeof(BOOL)) : (NINE_MAX_CONST_B * sizeof(BOOL)))
-#define VS_CONST_F_SWVP_SIZE (NINE_MAX_CONST_F_SWVP * sizeof(float[4]))
-
HRESULT
NineStateBlock9_ctor( struct NineStateBlock9 *This,
struct NineUnknownParams *pParams,
@@ -48,7 +44,7 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This,
This->type = type;
- This->state.vs_const_f = MALLOC(This->base.device->vs_const_size);
+ This->state.vs_const_f = MALLOC(VS_CONST_F_SIZE(This->base.device));
This->state.ps_const_f = MALLOC(This->base.device->ps_const_size);
This->state.vs_const_i = MALLOC(VS_CONST_I_SIZE(This->base.device));
This->state.vs_const_b = MALLOC(VS_CONST_B_SIZE(This->base.device));
@@ -56,13 +52,6 @@ NineStateBlock9_ctor( struct NineStateBlock9 *This,
!This->state.vs_const_i || !This->state.vs_const_b)
return E_OUTOFMEMORY;
- if (This->base.device->may_swvp) {
- This->state.vs_const_f_swvp = MALLOC(VS_CONST_F_SWVP_SIZE);
- if (!This->state.vs_const_f_swvp)
- return E_OUTOFMEMORY;
- } else
- This->state.vs_const_f_swvp = NULL;
-
return D3D_OK;
}
@@ -79,7 +68,6 @@ NineStateBlock9_dtor( struct NineStateBlock9 *This )
FREE(state->ps_const_f);
FREE(state->vs_const_i);
FREE(state->vs_const_b);
- FREE(state->vs_const_f_swvp);
FREE(state->ff.light);
@@ -138,51 +126,28 @@ nine_state_copy_common(struct NineDevice9 *device,
* Various possibilities for optimization here, like creating a per-SB
* constant buffer, or memcmp'ing for changes.
* Will do that later depending on what works best for specific apps.
+ *
+ * Note: Currently when we apply stateblocks, it's always on the device state.
+ * Should it affect recording stateblocks ? Since it's on device state, there
+ * is no need to copy which ranges are dirty. If it turns out we should affect
+ * recording stateblocks, the info should be copied.
*/
if (mask->changed.group & NINE_STATE_VS_CONST) {
struct nine_range *r;
- if (device->may_swvp) {
- for (r = mask->changed.vs_const_f; r; r = r->next) {
- int bgn = r->bgn;
- int end = r->end;
- memcpy(&dst->vs_const_f_swvp[bgn * 4],
- &src->vs_const_f_swvp[bgn * 4],
- (end - bgn) * 4 * sizeof(float));
- if (apply)
- nine_ranges_insert(&dst->changed.vs_const_f, bgn, end,
- pool);
- if (bgn < device->max_vs_const_f) {
- end = MIN2(end, device->max_vs_const_f);
- memcpy(&dst->vs_const_f[bgn * 4],
- &src->vs_const_f[bgn * 4],
- (end - bgn) * 4 * sizeof(float));
- }
- }
- } else {
- for (r = mask->changed.vs_const_f; r; r = r->next) {
- memcpy(&dst->vs_const_f[r->bgn * 4],
- &src->vs_const_f[r->bgn * 4],
- (r->end - r->bgn) * 4 * sizeof(float));
- if (apply)
- nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end,
- pool);
- }
+ for (r = mask->changed.vs_const_f; r; r = r->next) {
+ memcpy(&dst->vs_const_f[r->bgn * 4],
+ &src->vs_const_f[r->bgn * 4],
+ (r->end - r->bgn) * 4 * sizeof(float));
}
for (r = mask->changed.vs_const_i; r; r = r->next) {
memcpy(&dst->vs_const_i[r->bgn * 4],
&src->vs_const_i[r->bgn * 4],
(r->end - r->bgn) * 4 * sizeof(int));
- if (apply)
- nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end,
- pool);
}
for (r = mask->changed.vs_const_b; r; r = r->next) {
memcpy(&dst->vs_const_b[r->bgn],
&src->vs_const_b[r->bgn],
(r->end - r->bgn) * sizeof(int));
- if (apply)
- nine_ranges_insert(&dst->changed.vs_const_b, r->bgn, r->end,
- pool);
}
}
@@ -193,25 +158,18 @@ nine_state_copy_common(struct NineDevice9 *device,
memcpy(&dst->ps_const_f[r->bgn * 4],
&src->ps_const_f[r->bgn * 4],
(r->end - r->bgn) * 4 * sizeof(float));
- if (apply)
- nine_ranges_insert(&dst->changed.ps_const_f, r->bgn, r->end,
- pool);
}
if (mask->changed.ps_const_i) {
uint16_t m = mask->changed.ps_const_i;
for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
if (m & 1)
memcpy(dst->ps_const_i[i], src->ps_const_i[i], 4 * sizeof(int));
- if (apply)
- dst->changed.ps_const_i |= mask->changed.ps_const_i;
}
if (mask->changed.ps_const_b) {
uint16_t m = mask->changed.ps_const_b;
for (i = ffs(m) - 1, m >>= i; m; ++i, m >>= 1)
if (m & 1)
dst->ps_const_b[i] = src->ps_const_b[i];
- if (apply)
- dst->changed.ps_const_b |= mask->changed.ps_const_b;
}
}
@@ -395,23 +353,11 @@ nine_state_copy_common_all(struct NineDevice9 *device,
* Will do that later depending on what works best for specific apps.
*/
if (1) {
- struct nine_range *r = help->changed.vs_const_f;
memcpy(&dst->vs_const_f[0],
- &src->vs_const_f[0], device->max_vs_const_f * 4 * sizeof(float));
- if (device->may_swvp)
- memcpy(dst->vs_const_f_swvp,
- src->vs_const_f_swvp, VS_CONST_F_SWVP_SIZE);
- if (apply)
- nine_ranges_insert(&dst->changed.vs_const_f, r->bgn, r->end, pool);
+ &src->vs_const_f[0], VS_CONST_F_SIZE(device));
memcpy(dst->vs_const_i, src->vs_const_i, VS_CONST_I_SIZE(device));
memcpy(dst->vs_const_b, src->vs_const_b, VS_CONST_B_SIZE(device));
- if (apply) {
- r = help->changed.vs_const_i;
- nine_ranges_insert(&dst->changed.vs_const_i, r->bgn, r->end, pool);
- r = help->changed.vs_const_b;
- nine_ranges_insert(&dst->changed.vs_const_b, r->bgn, r->end, pool);
- }
}
/* Pixel constants. */
@@ -419,15 +365,9 @@ nine_state_copy_common_all(struct NineDevice9 *device,
struct nine_range *r = help->changed.ps_const_f;
memcpy(&dst->ps_const_f[0],
&src->ps_const_f[0], (r->end - r->bgn) * 4 * sizeof(float));
- if (apply)
- nine_ranges_insert(&dst->changed.ps_const_f, r->bgn, r->end, pool);
memcpy(dst->ps_const_i, src->ps_const_i, sizeof(dst->ps_const_i));
memcpy(dst->ps_const_b, src->ps_const_b, sizeof(dst->ps_const_b));
- if (apply) {
- dst->changed.ps_const_i |= src->changed.ps_const_i;
- dst->changed.ps_const_b |= src->changed.ps_const_b;
- }
}
/* Render states. */
--
2.10.2
More information about the mesa-dev
mailing list