Mesa (master): st/nine: Implement TEXBEM,TEXBEML and BEM
David Heidelberg
okias at kemper.freedesktop.org
Fri Feb 6 17:32:52 UTC 2015
Module: Mesa
Branch: master
Commit: 5289276bd4faedb67b3c42e6572fb3e0374aa6b6
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5289276bd4faedb67b3c42e6572fb3e0374aa6b6
Author: Tiziano Bacocco <tizbac2 at gmail.com>
Date: Fri Jan 30 20:10:38 2015 +0100
st/nine: Implement TEXBEM,TEXBEML and BEM
Signed-off-by: Tiziano Bacocco <tizbac2 at gmail.com>
---
src/gallium/state_trackers/nine/device9.c | 4 +-
src/gallium/state_trackers/nine/nine_shader.c | 122 ++++++++++++++++++++++--
src/gallium/state_trackers/nine/nine_shader.h | 1 +
src/gallium/state_trackers/nine/nine_state.c | 27 ++++++
src/gallium/state_trackers/nine/nine_state.h | 1 +
src/gallium/state_trackers/nine/pixelshader9.c | 1 +
src/gallium/state_trackers/nine/pixelshader9.h | 1 +
7 files changed, 149 insertions(+), 8 deletions(-)
diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 96061e0..feb0b8a 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -320,8 +320,9 @@ NineDevice9_ctor( struct NineDevice9 *This,
This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+ This->state.ps_bumpenvmap_temp = CALLOC(This->ps_const_size,1);
if (!This->state.vs_const_f || !This->state.ps_const_f ||
- !This->state.vs_lconstf_temp)
+ !This->state.vs_lconstf_temp || !This->state.ps_bumpenvmap_temp)
return E_OUTOFMEMORY;
if (strstr(pScreen->get_name(pScreen), "AMD") ||
@@ -442,6 +443,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
FREE(This->state.vs_const_f);
FREE(This->state.ps_const_f);
FREE(This->state.vs_lconstf_temp);
+ FREE(This->state.ps_bumpenvmap_temp);
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 8ce3530..3f670ab 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -2145,12 +2145,78 @@ DECL_SPECIAL(TEXKILL)
DECL_SPECIAL(TEXBEM)
{
- STUB(D3DERR_INVALIDCALL);
-}
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_dst tmp;
+ struct ureg_dst tmp2;
+ struct ureg_src sample;
+ struct ureg_src m00;
+ struct ureg_src m01;
+ struct ureg_src m10;
+ struct ureg_src m11;
+ struct ureg_src bumpenvlscale;
+ struct ureg_src bumpenvloffset;
+ const int m = tx->insn.dst[0].idx;
+ const int n = tx->insn.src[0].idx;
-DECL_SPECIAL(TEXBEML)
-{
- STUB(D3DERR_INVALIDCALL);
+ assert(tx->version.major == 1);
+
+ sample = ureg_DECL_sampler(ureg, m);
+ tx->info->sampler_mask |= 1 << m;
+
+ tx_texcoord_alloc(tx, m);
+
+ tmp = tx_scratch(tx);
+ tmp2 = tx_scratch(tx);
+ /*
+ * Bump-env-matrix:
+ * 00 is X
+ * 01 is Y
+ * 10 is Z
+ * 11 is W
+ */
+ m00 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(X, X, X, X));
+ m01 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(Y, Y, Y, Y));
+ m10 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(Z, Z, Z, Z));
+ m11 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(W, W, W, W));
+
+ /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
+ if (m % 2 == 0) {
+ bumpenvlscale = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(X, X, X, X));
+ bumpenvloffset = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(Y, Y, Y, Y));
+ } else {
+ bumpenvlscale = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(Z, Z, Z, Z));
+ bumpenvloffset = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(W, W, W, W));
+ }
+
+ /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(X, X, X, X)), tx->regs.vT[m]);
+ /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X),
+ m10, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(X, X, X, X)));
+
+ /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(X, X, X, X)), tx->regs.vT[m]);
+ /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+ m11, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Y, Y, Y, Y)));
+
+ /* Now the texture coordinates are in tmp.xy */
+
+ if (tx->insn.opcode == D3DSIO_TEXBEM) {
+ ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+ } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
+ /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
+ ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+ ureg_MAD(ureg, tmp2, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Z, Z, Z, Z)), bumpenvlscale, bumpenvloffset);
+ ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
+ }
+
+
+
+ tx->info->bumpenvmat_needed = 1;
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXREG2AR)
@@ -2429,7 +2495,42 @@ DECL_SPECIAL(TEXDEPTH)
DECL_SPECIAL(BEM)
{
- STUB(D3DERR_INVALIDCALL);
+ struct ureg_program *ureg = tx->ureg;
+ struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+ struct ureg_dst tmp;
+ struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+ struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+ struct ureg_src m00;
+ struct ureg_src m01;
+ struct ureg_src m10;
+ struct ureg_src m11;
+ /*
+ * Bump-env-matrix:
+ * 00 is X
+ * 01 is Y
+ * 10 is Z
+ * 11 is W
+ */
+ m00 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(X, X, X, X));
+ m01 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(Y, Y, Y, Y));
+ m10 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(Z, Z, Z, Z));
+ m11 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(W, W, W, W));
+ /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, ureg_swizzle(src1, NINE_SWIZZLE4(X, X, X, X)), ureg_swizzle(src0, NINE_SWIZZLE4(X, X, X, X)));
+ /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X),
+ m10, ureg_swizzle(src1, NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(X, X, X, X)));
+
+ /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, ureg_swizzle(src1, NINE_SWIZZLE4(X, X, X, X)), src0);
+ /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
+ ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+ m11, ureg_swizzle(src1, NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Y, Y, Y, Y)));
+ ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
+
+ tx->info->bumpenvmat_needed = 1;
+
+ return D3D_OK;
}
DECL_SPECIAL(TEXLD)
@@ -2624,7 +2725,7 @@ struct sm1_op_info inst_table[] =
_OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
_OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
_OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
- _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
+ _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
_OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
_OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
_OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
@@ -3031,6 +3132,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
info->lconstf.data = NULL;
info->lconstf.ranges = NULL;
+ info->bumpenvmat_needed = 0;
+
for (i = 0; i < Elements(tx->regs.rL); ++i) {
tx->regs.rL[i] = ureg_dst_undef();
}
@@ -3239,6 +3342,11 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
info->const_int_slots > 0 ?
device->max_vs_const_f + info->const_int_slots :
info->const_float_slots;
+ assert(IS_VS || tx->version.major > 1 || slot_max <= 8);
+
+ if (info->bumpenvmat_needed)
+ slot_max = 8 + 8 + 4; /* 8 for ps1_x + 8 for texbem + 4 for texbeml*/
+
info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
for (s = 0; s < slot_max; s++)
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index 56c5d99..54049d2 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -68,6 +68,7 @@ struct nine_shader_info
unsigned const_bool_slots;
struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
+ uint8_t bumpenvmat_needed;
};
static INLINE void
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 495cc86..207f5e7 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -566,6 +566,7 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
struct nine_state *state = &device->state;
struct pipe_context *pipe = device->pipe;
struct pipe_constant_buffer cb;
+ int i;
cb.buffer = NULL;
cb.buffer_offset = 0;
cb.buffer_size = device->state.ps->const_used_size;
@@ -586,6 +587,32 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
state->changed.ps_const_b = 0;
}
+ /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
+ if (device->state.ps->byte_code.version >> 4 == 1 && device->state.ps->bumpenvmat_needed) { /* Version.major = 1 */
+
+ memcpy(device->state.ps_bumpenvmap_temp, cb.user_buffer, cb.buffer_size);
+
+ /* Set the bump env matrix */
+ for (i = 0; i < 8; i++) {
+ /* 4floats*maxps1xconst+4floats*texstage+matpart */
+ /* The matrix as comments on wine visual.c test say, is transposed */
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 0] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT00]);
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 1] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT10]);
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 2] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT01]);
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 3] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT11]);
+ }
+
+ /* Set the bumpenvl parameters */
+ for (i = 0; i < 4; i++) {
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 0] = *((float *)&device->state.ff.tex_stage[i * 2 + 0][D3DTSS_BUMPENVLSCALE]);
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 1] = *((float *)&device->state.ff.tex_stage[i * 2 + 0][D3DTSS_BUMPENVLOFFSET]);
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 2] = *((float *)&device->state.ff.tex_stage[i * 2 + 1][D3DTSS_BUMPENVLSCALE]);
+ device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 3] = *((float *)&device->state.ff.tex_stage[i * 2 + 1][D3DTSS_BUMPENVLOFFSET]);
+ }
+
+ cb.user_buffer = device->state.ps_bumpenvmap_temp;
+ }
+
pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
if (device->state.changed.ps_const_f) {
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 1916959..e0041f8 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -143,6 +143,7 @@ struct nine_state
int vs_const_i[NINE_MAX_CONST_I][4];
BOOL vs_const_b[NINE_MAX_CONST_B];
float *vs_lconstf_temp;
+ float *ps_bumpenvmap_temp;
uint32_t vs_key;
struct NinePixelShader9 *ps;
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 3f176a3..010c4f4 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -72,6 +72,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
This->sampler_mask = info.sampler_mask;
This->rt_mask = info.rt_mask;
This->const_used_size = info.const_used_size;
+ This->bumpenvmat_needed = info.bumpenvmat_needed;
/* no constant relative addressing for ps */
assert(info.lconstf.data == NULL);
assert(info.lconstf.ranges == NULL);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index 5e2219c..32be0cd 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -41,6 +41,7 @@ struct NinePixelShader9
unsigned const_used_size; /* in bytes */
+ uint8_t bumpenvmat_needed;
uint16_t sampler_mask;
uint16_t sampler_mask_shadow;
uint8_t rt_mask;
More information about the mesa-commit
mailing list