Mesa (master): st/nine: Implement TEXBEM,TEXBEML and BEM

David Heidelberg okias at kemper.freedesktop.org
Fri Feb 6 17:32:52 UTC 2015


Module: Mesa
Branch: master
Commit: 5289276bd4faedb67b3c42e6572fb3e0374aa6b6
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5289276bd4faedb67b3c42e6572fb3e0374aa6b6

Author: Tiziano Bacocco <tizbac2 at gmail.com>
Date:   Fri Jan 30 20:10:38 2015 +0100

st/nine: Implement TEXBEM,TEXBEML and BEM

Signed-off-by: Tiziano Bacocco <tizbac2 at gmail.com>

---

 src/gallium/state_trackers/nine/device9.c      |    4 +-
 src/gallium/state_trackers/nine/nine_shader.c  |  122 ++++++++++++++++++++++--
 src/gallium/state_trackers/nine/nine_shader.h  |    1 +
 src/gallium/state_trackers/nine/nine_state.c   |   27 ++++++
 src/gallium/state_trackers/nine/nine_state.h   |    1 +
 src/gallium/state_trackers/nine/pixelshader9.c |    1 +
 src/gallium/state_trackers/nine/pixelshader9.h |    1 +
 7 files changed, 149 insertions(+), 8 deletions(-)

diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c
index 96061e0..feb0b8a 100644
--- a/src/gallium/state_trackers/nine/device9.c
+++ b/src/gallium/state_trackers/nine/device9.c
@@ -320,8 +320,9 @@ NineDevice9_ctor( struct NineDevice9 *This,
         This->state.vs_const_f = CALLOC(This->vs_const_size, 1);
         This->state.ps_const_f = CALLOC(This->ps_const_size, 1);
         This->state.vs_lconstf_temp = CALLOC(This->vs_const_size,1);
+        This->state.ps_bumpenvmap_temp = CALLOC(This->ps_const_size,1);
         if (!This->state.vs_const_f || !This->state.ps_const_f ||
-            !This->state.vs_lconstf_temp)
+            !This->state.vs_lconstf_temp || !This->state.ps_bumpenvmap_temp)
             return E_OUTOFMEMORY;
 
         if (strstr(pScreen->get_name(pScreen), "AMD") ||
@@ -442,6 +443,7 @@ NineDevice9_dtor( struct NineDevice9 *This )
     FREE(This->state.vs_const_f);
     FREE(This->state.ps_const_f);
     FREE(This->state.vs_lconstf_temp);
+    FREE(This->state.ps_bumpenvmap_temp);
 
     if (This->swapchains) {
         for (i = 0; i < This->nswapchains; ++i)
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 8ce3530..3f670ab 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -2145,12 +2145,78 @@ DECL_SPECIAL(TEXKILL)
 
 DECL_SPECIAL(TEXBEM)
 {
-    STUB(D3DERR_INVALIDCALL);
-}
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+    struct ureg_dst tmp;
+    struct ureg_dst tmp2;
+    struct ureg_src sample;
+    struct ureg_src m00;
+    struct ureg_src m01;
+    struct ureg_src m10;
+    struct ureg_src m11;
+    struct ureg_src bumpenvlscale;
+    struct ureg_src bumpenvloffset;
+    const int m = tx->insn.dst[0].idx;
+    const int n = tx->insn.src[0].idx;
 
-DECL_SPECIAL(TEXBEML)
-{
-    STUB(D3DERR_INVALIDCALL);
+    assert(tx->version.major == 1);
+
+    sample = ureg_DECL_sampler(ureg, m);
+    tx->info->sampler_mask |= 1 << m;
+
+    tx_texcoord_alloc(tx, m);
+
+    tmp = tx_scratch(tx);
+    tmp2 = tx_scratch(tx);
+    /*
+     * Bump-env-matrix:
+     * 00 is X
+     * 01 is Y
+     * 10 is Z
+     * 11 is W
+     */
+    m00 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(X, X, X, X));
+    m01 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(Y, Y, Y, Y));
+    m10 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(Z, Z, Z, Z));
+    m11 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + m), NINE_SWIZZLE4(W, W, W, W));
+
+    /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
+    if (m % 2 == 0) {
+        bumpenvlscale = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(X, X, X, X));
+        bumpenvloffset = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(Y, Y, Y, Y));
+    } else {
+        bumpenvlscale = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(Z, Z, Z, Z));
+        bumpenvloffset = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + 8 + m / 2), NINE_SWIZZLE4(W, W, W, W));
+    }
+
+    /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(X, X, X, X)), tx->regs.vT[m]);
+    /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X),
+             m10, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(X, X, X, X)));
+
+    /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(X, X, X, X)), tx->regs.vT[m]);
+    /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+             m11, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Y, Y, Y, Y)));
+
+    /* Now the texture coordinates are in tmp.xy */
+
+    if (tx->insn.opcode == D3DSIO_TEXBEM) {
+        ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+    } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
+        /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
+        ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
+        ureg_MAD(ureg, tmp2, ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Z, Z, Z, Z)), bumpenvlscale, bumpenvloffset);
+        ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
+    }
+
+
+
+    tx->info->bumpenvmat_needed = 1;
+
+    return D3D_OK;
 }
 
 DECL_SPECIAL(TEXREG2AR)
@@ -2429,7 +2495,42 @@ DECL_SPECIAL(TEXDEPTH)
 
 DECL_SPECIAL(BEM)
 {
-    STUB(D3DERR_INVALIDCALL);
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
+    struct ureg_dst tmp;
+    struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
+    struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
+    struct ureg_src m00;
+    struct ureg_src m01;
+    struct ureg_src m10;
+    struct ureg_src m11;
+    /*
+     * Bump-env-matrix:
+     * 00 is X
+     * 01 is Y
+     * 10 is Z
+     * 11 is W
+     */
+    m00 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(X, X, X, X));
+    m01 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(Y, Y, Y, Y));
+    m10 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(Z, Z, Z, Z));
+    m11 = ureg_swizzle(ureg_src_register(TGSI_FILE_CONSTANT, 8 + tx->insn.dst[0].idx), NINE_SWIZZLE4(W, W, W, W));
+    /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, ureg_swizzle(src1, NINE_SWIZZLE4(X, X, X, X)), ureg_swizzle(src0, NINE_SWIZZLE4(X, X, X, X)));
+    /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X),
+             m10, ureg_swizzle(src1, NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(X, X, X, X)));
+
+    /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, ureg_swizzle(src1, NINE_SWIZZLE4(X, X, X, X)), src0);
+    /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
+    ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+             m11, ureg_swizzle(src1, NINE_SWIZZLE4(Y, Y, Y, Y)), ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Y, Y, Y, Y)));
+    ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
+
+    tx->info->bumpenvmat_needed = 1;
+
+    return D3D_OK;
 }
 
 DECL_SPECIAL(TEXLD)
@@ -2624,7 +2725,7 @@ struct sm1_op_info inst_table[] =
     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
-    _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEML)),
+    _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
@@ -3031,6 +3132,8 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
     info->lconstf.data = NULL;
     info->lconstf.ranges = NULL;
 
+    info->bumpenvmat_needed = 0;
+
     for (i = 0; i < Elements(tx->regs.rL); ++i) {
         tx->regs.rL[i] = ureg_dst_undef();
     }
@@ -3239,6 +3342,11 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info)
                        info->const_int_slots > 0 ?
                            device->max_vs_const_f + info->const_int_slots :
                                info->const_float_slots;
+    assert(IS_VS || tx->version.major > 1 || slot_max <= 8);
+
+    if (info->bumpenvmat_needed)
+        slot_max = 8 + 8 + 4; /* 8 for ps1_x + 8 for texbem + 4 for texbeml*/
+
     info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
 
     for (s = 0; s < slot_max; s++)
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index 56c5d99..54049d2 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -68,6 +68,7 @@ struct nine_shader_info
     unsigned const_bool_slots;
 
     struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
+    uint8_t bumpenvmat_needed;
 };
 
 static INLINE void
diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c
index 495cc86..207f5e7 100644
--- a/src/gallium/state_trackers/nine/nine_state.c
+++ b/src/gallium/state_trackers/nine/nine_state.c
@@ -566,6 +566,7 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
     struct nine_state *state = &device->state;
     struct pipe_context *pipe = device->pipe;
     struct pipe_constant_buffer cb;
+    int i;
     cb.buffer = NULL;
     cb.buffer_offset = 0;
     cb.buffer_size = device->state.ps->const_used_size;
@@ -586,6 +587,32 @@ update_ps_constants_userbuf(struct NineDevice9 *device)
         state->changed.ps_const_b = 0;
     }
 
+    /* Upload special constants needed to implement PS1.x instructions like TEXBEM,TEXBEML and BEM */
+    if (device->state.ps->byte_code.version >> 4 == 1 && device->state.ps->bumpenvmat_needed) { /* Version.major = 1 */
+
+        memcpy(device->state.ps_bumpenvmap_temp, cb.user_buffer, cb.buffer_size);
+
+        /* Set the bump env matrix */
+        for (i = 0; i < 8; i++) {
+            /* 4floats*maxps1xconst+4floats*texstage+matpart */
+            /* The matrix as comments on wine visual.c test say, is transposed */
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 0] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT00]);
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 1] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT10]);
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 2] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT01]);
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * i + 3] = *((float *)&device->state.ff.tex_stage[i][D3DTSS_BUMPENVMAT11]);
+        }
+
+        /* Set the bumpenvl parameters */
+        for (i = 0; i < 4; i++) {
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 0] = *((float *)&device->state.ff.tex_stage[i * 2 + 0][D3DTSS_BUMPENVLSCALE]);
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 1] = *((float *)&device->state.ff.tex_stage[i * 2 + 0][D3DTSS_BUMPENVLOFFSET]);
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 2] = *((float *)&device->state.ff.tex_stage[i * 2 + 1][D3DTSS_BUMPENVLSCALE]);
+            device->state.ps_bumpenvmap_temp[4 * 8 + 4 * 8 + i * 4 + 3] = *((float *)&device->state.ff.tex_stage[i * 2 + 1][D3DTSS_BUMPENVLOFFSET]);
+        }
+
+        cb.user_buffer = device->state.ps_bumpenvmap_temp;
+    }
+
     pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
 
     if (device->state.changed.ps_const_f) {
diff --git a/src/gallium/state_trackers/nine/nine_state.h b/src/gallium/state_trackers/nine/nine_state.h
index 1916959..e0041f8 100644
--- a/src/gallium/state_trackers/nine/nine_state.h
+++ b/src/gallium/state_trackers/nine/nine_state.h
@@ -143,6 +143,7 @@ struct nine_state
     int    vs_const_i[NINE_MAX_CONST_I][4];
     BOOL   vs_const_b[NINE_MAX_CONST_B];
     float *vs_lconstf_temp;
+    float *ps_bumpenvmap_temp;
     uint32_t vs_key;
 
     struct NinePixelShader9 *ps;
diff --git a/src/gallium/state_trackers/nine/pixelshader9.c b/src/gallium/state_trackers/nine/pixelshader9.c
index 3f176a3..010c4f4 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.c
+++ b/src/gallium/state_trackers/nine/pixelshader9.c
@@ -72,6 +72,7 @@ NinePixelShader9_ctor( struct NinePixelShader9 *This,
     This->sampler_mask = info.sampler_mask;
     This->rt_mask = info.rt_mask;
     This->const_used_size = info.const_used_size;
+    This->bumpenvmat_needed = info.bumpenvmat_needed;
     /* no constant relative addressing for ps */
     assert(info.lconstf.data == NULL);
     assert(info.lconstf.ranges == NULL);
diff --git a/src/gallium/state_trackers/nine/pixelshader9.h b/src/gallium/state_trackers/nine/pixelshader9.h
index 5e2219c..32be0cd 100644
--- a/src/gallium/state_trackers/nine/pixelshader9.h
+++ b/src/gallium/state_trackers/nine/pixelshader9.h
@@ -41,6 +41,7 @@ struct NinePixelShader9
 
     unsigned const_used_size; /* in bytes */
 
+    uint8_t bumpenvmat_needed;
     uint16_t sampler_mask;
     uint16_t sampler_mask_shadow;
     uint8_t rt_mask;




More information about the mesa-commit mailing list