[Mesa-dev] [PATCH 37/71] st/nine: Implement TEXBEM, TEXBEML and BEM
Axel Davy
axel.davy at ens.fr
Sun Aug 16 08:28:01 PDT 2015
From: Tiziano Bacocco <tizbac2 at gmail.com>
Signed-off-by: Tiziano Bacocco <tizbac2 at gmail.com>
---
src/gallium/state_trackers/nine/device9.c | 32 ++++++-
src/gallium/state_trackers/nine/nine_shader.c | 126 +++++++++++++++++++++++--
src/gallium/state_trackers/nine/nine_shader.h | 1 +
src/gallium/state_trackers/nine/nine_state.c | 10 ++
src/gallium/state_trackers/nine/nine_state.h | 2 +
src/gallium/state_trackers/nine/pixelshader9.c | 1 +
src/gallium/state_trackers/nine/pixelshader9.h | 1 +
7 files changed, 165 insertions(+), 8 deletions(-)
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index e0f3e39..1ca04a4 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -342,8 +342,9 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+ This->state.ps_lconstf_temp = CALLOC(This->ps_const_size,1);
if (!This->state.vs_const_f || !This->state.ps_const_f ||
- !This->state.vs_lconstf_temp)
+ !This->state.vs_lconstf_temp || !This->state.ps_lconstf_temp)
return E_OUTOFMEMORY;
if (strstr(pScreen->get_name(pScreen), "AMD") ||
@@ -466,6 +467,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
FREE(This->state.vs_const_f);
FREE(This->state.ps_const_f);
FREE(This->state.vs_lconstf_temp);
+ FREE(This->state.ps_lconstf_temp);
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
@@ -2636,6 +2638,7 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
DWORD Value )
{
struct nine_state *state = This->update;
+ int bumpmap_index = -1;
DBG("Stage=%u Type=%u Value=%08x\n", Stage, Type, Value);
nine_dump_D3DTSS_value(DBG_FF, Type, Value);
@@ -2644,6 +2647,33 @@ NineDevice9_SetTextureStageState( struct NineDevice9 *This,
user_assert(Type < Elements(state->ff.tex_stage[0]), D3DERR_INVALIDCALL);
state->ff.tex_stage[Stage][Type] = Value;
+ switch (Type) {
+ case D3DTSS_BUMPENVMAT00:
+ bumpmap_index = 4 * Stage;
+ break;
+ case D3DTSS_BUMPENVMAT10:
+ bumpmap_index = 4 * Stage + 1;
+ break;
+ case D3DTSS_BUMPENVMAT01:
+ bumpmap_index = 4 * Stage + 2;
+ break;
+ case D3DTSS_BUMPENVMAT11:
+ bumpmap_index = 4 * Stage + 3;
+ break;
+ case D3DTSS_BUMPENVLSCALE:
+ bumpmap_index = 4 * 8 + 2 * Stage;
+ break;
+ case D3DTSS_BUMPENVLOFFSET:
+ bumpmap_index = 4 * 8 + 2 * Stage + 1;
+ break;
+ default:
+ break;
+ }
+
+ if (bumpmap_index >= 0) {
+ state->bumpmap_vars[bumpmap_index] = Value;
+ state->changed.group |= NINE_STATE_PS_CONST;
+ }
state->changed.group |= NINE_STATE_FF_PSSTAGES;
state->ff.changed.tex_stage[Stage][Type / 32] |= 1 << (Type % 32);
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 754f5af..a11c4c7 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -89,6 +89,15 @@ static inline const char *d3dsio_to_string(unsigned opcode);
#define NINE_SWIZZLE4(x,y,z,w) \
TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
+#define NINE_CONSTANT_SRC(index) \
+ ureg_src_register(TGSI_FILE_CONSTANT, index)
+
+#define NINE_APPLY_SWIZZLE(src, s) \
+ ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
+
+#define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
+ NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
+
#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
@@ -2135,12 +2144,76 @@ DECL_SPECIAL(TEXKILL)
DECL_SPECIAL(TEXBEM)
{
- STUB(D3DERR_INVALIDCALL);
-}
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_dst tmp, tmp2;
+ struct ureg_src sample, m00, m01, m10, m11;
+ struct ureg_src bumpenvlscale, bumpenvloffset;
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
-DECL_SPECIAL(TEXBEML)
-{
- STUB(D3DERR_INVALIDCALL);
+ assert(tx->version.major == 1);
+
+ sample = ureg_DECL_sampler(ureg, m);
+ tx->info->sampler_mask |= 1 << m;
+
+ tx_texcoord_alloc(tx, m);
+
+ tmp = tx_scratch(tx);
+ tmp2 = tx_scratch(tx);
+ /*
+ * Bump-env-matrix:
+ * 00 is X
+ * 01 is Y
+ * 10 is Z
+ * 11 is W
+ */
+ nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
+ m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
+ m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
+ m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
+ m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
+
+ /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
+ if (m % 2 == 0) {
+ bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
+ bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
+ } else {
+ bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
+ bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
+ }
+
+ /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]);
+ /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
+ NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
+
+ /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), tx->regs.vT[m]);
+ /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
+ NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
+ NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
+
+ /* Now the texture coordinates are in tmp.xy */
+
+ if (tx->insn.opcode == D3DSIO_TEXBEM) {
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+ } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
+ /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
+ ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+ ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
+ bumpenvlscale, bumpenvloffset);
+ ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
+ }
+
+ tx->info->bumpenvmat_needed = 1;
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXREG2AR)
@@ -2421,7 +2494,43 @@ DECL_SPECIAL(TEXDEPTH)
DECL_SPECIAL(BEM)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+ struct ureg_src m00, m01, m10, m11;
+ const int m = tx->insn.dst[0].idx;
+ struct ureg_dst tmp;
+ /*
+ * Bump-env-matrix:
+ * 00 is X
+ * 01 is Y
+ * 10 is Z
+ * 11 is W
+ */
+ nine_info_mark_const_f_used(tx->info, 8 + m);
+ m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
+ m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
+ m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
+ m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
+ /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
+ NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
+ /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
+ NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
+
+ /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
+ NINE_APPLY_SWIZZLE(src1, X), src0);
+ /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
+ NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
+
+ tx->info->bumpenvmat_needed = 1;
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXLD)
@@ -2616,7 +2725,7 @@ struct sm1_op_info inst_table[] =
_OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
_OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
_OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
- _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
+ _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
_OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
_OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
_OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
@@ -3023,6 +3132,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
info->lconstf.data = NULL;
info->lconstf.ranges = NULL;
+ info->bumpenvmat_needed = 0;
+
for (i = 0; i < Elements(tx->regs.rL); ++i) {
tx->regs.rL[i] = ureg_dst_undef();
}
@@ -3233,6 +3344,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
info->const_int_slots > 0 ?
max_const_f + info->const_int_slots :
info->const_float_slots;
+
info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
for (s = 0; s < slot_max; s++)
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index ec256c1..f2b1e8b 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -68,6 +68,7 @@ struct nine_shader_info
unsigned const_bool_slots;
struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
+ uint8_t bumpenvmat_needed;
};
static inline void
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 4bf5908..403cd23 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -586,6 +586,7 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
struct nine_state *state = &device->state;
struct pipe_context *pipe = device->pipe;
struct pipe_constant_buffer cb;
+ int i;
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = device->state.ps->const_used_size;
@@ -606,6 +607,14 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
state->changed.ps_const_b = 0;
}
+ /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
+ if (device->state.ps->bumpenvmat_needed) {
+ memcpy(device->state.ps_lconstf_temp, cb.user_buffer, cb.buffer_size);
+ memcpy(&device->state.ps_lconstf_temp[4 * 8], &device->state.bumpmap_vars, sizeof(device->state.bumpmap_vars));
+
+ cb.user_buffer = device->state.ps_lconstf_temp;
+ }
+
pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
if (device->state.changed.ps_const_f) {
@@ -1152,6 +1161,7 @@ nine_state_set_defaults(struct NineDevice9 *device, const D3DCAPS9 *caps,
}
state->ff.tex_stage[0][D3DTSS_COLOROP] = D3DTOP_MODULATE;
state->ff.tex_stage[0][D3DTSS_ALPHAOP] = D3DTOP_SELECTARG1;
+ memset(&state->bumpmap_vars, 0, sizeof(state->bumpmap_vars));
for (s = 0; s < Elements(state->samp); ++s) {
memcpy(&state->samp[s], nine_samp_state_defaults,
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index cac9af6..89a5fd9 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -148,6 +148,8 @@ struct nine_state
float *ps_const_f;
int ps_const_i[NINE_MAX_CONST_I][4];
BOOL ps_const_b[NINE_MAX_CONST_B];
+ float *ps_lconstf_temp;
+ uint32_t bumpmap_vars[48];
uint32_t ps_key;
struct {
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 3f176a3..010c4f4 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -72,6 +72,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
This->sampler_mask = info.sampler_mask;
This->rt_mask = info.rt_mask;
This->const_used_size = info.const_used_size;
+ This->bumpenvmat_needed = info.bumpenvmat_needed;
/* no constant relative addressing for ps */
assert(info.lconstf.data == NULL);
assert(info.lconstf.ranges == NULL);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index 6dad1d1..9715d90 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -41,6 +41,7 @@ struct NinePixelShader9
unsigned const_used_size; /* in bytes */
+ uint8_t bumpenvmat_needed;
uint16_t sampler_mask;
uint16_t sampler_mask_shadow;
uint8_t rt_mask;
--
2.1.0
More information about the mesa-dev
mailing list