[Mesa-dev] [PATCH 27/53] st/nine: Rewrite LOOP implementation, and a0 aL handling

Axel Davy axel.davy at ens.fr
Wed Jan 7 08:36:37 PST 2015


Previous implementation didn't work well with nested loops.

Instead of using several address registers, put a0 and aL
into normal registers, and copy them to one address register when
we need to use them.

Wine tests loop_index_test() and nested_loop_test() now pass correctly.

Fixes r600g crash while loading Bioshock -
bug https://bugs.freedesktop.org/show_bug.cgi?id=85696

Tested-by: David Heidelberg <david at ixit.cz>
Signed-off-by: Axel Davy <axel.davy at ens.fr>

Cc: "10.4" <mesa-stable at lists.freedesktop.org>
---
 src/gallium/state_trackers/nine/nine_shader.c | 156 +++++++++++++++-----------
 1 file changed, 92 insertions(+), 64 deletions(-)

diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index 327dd2c..21b06ce 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -466,14 +466,14 @@ struct shader_translator
         struct ureg_src vFace;
         struct ureg_src s;
         struct ureg_dst p;
-        struct ureg_dst a;
+        struct ureg_dst address;
+        struct ureg_dst a0;
         struct ureg_dst tS[8]; /* texture stage registers */
         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
         struct ureg_dst t[5]; /* scratch TEMPs */
         struct ureg_src vC[2]; /* PS color in */
         struct ureg_src vT[8]; /* PS texcoord in */
         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
-        struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* loop ctr ADDR register */
     } regs;
     unsigned num_temp; /* Elements(regs.r) */
     unsigned num_scratch;
@@ -482,6 +482,7 @@ struct shader_translator
     unsigned cond_depth;
     unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
     unsigned cond_labels[NINE_MAX_COND_DEPTH];
+    boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
 
     unsigned *inst_labels; /* LABEL op */
     unsigned num_inst_labels;
@@ -659,8 +660,10 @@ static INLINE void
 tx_addr_alloc(struct shader_translator *tx, INT idx)
 {
     assert(idx == 0);
-    if (ureg_dst_is_undef(tx->regs.a))
-        tx->regs.a = ureg_DECL_address(tx->ureg);
+    if (ureg_dst_is_undef(tx->regs.address))
+        tx->regs.address = ureg_DECL_address(tx->ureg);
+    if (ureg_dst_is_undef(tx->regs.a0))
+        tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
 }
 
 static INLINE void
@@ -702,7 +705,7 @@ tx_endloop(struct shader_translator *tx)
 }
 
 static struct ureg_dst
-tx_get_loopctr(struct shader_translator *tx)
+tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
 {
     const unsigned l = tx->loop_depth - 1;
 
@@ -712,26 +715,32 @@ tx_get_loopctr(struct shader_translator *tx)
         return ureg_dst_undef();
     }
 
-    if (ureg_dst_is_undef(tx->regs.aL[l]))
-    {
-        struct ureg_dst rreg = ureg_DECL_local_temporary(tx->ureg);
-        struct ureg_dst areg = ureg_DECL_address(tx->ureg);
-        unsigned c;
-
-        assert(l % 4 == 0);
-        for (c = l; c < (l + 4) && c < Elements(tx->regs.aL); ++c) {
-            tx->regs.rL[c] = ureg_writemask(rreg, 1 << (c & 3));
-            tx->regs.aL[c] = ureg_writemask(areg, 1 << (c & 3));
-        }
+    if (ureg_dst_is_undef(tx->regs.rL[l])) {
+        /* loop or rep ctr creation */
+        tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
+        tx->loop_or_rep[l] = loop_or_rep;
     }
+    /* loop - rep - endloop - endrep not allowed */
+    assert(tx->loop_or_rep[l] == loop_or_rep);
+
     return tx->regs.rL[l];
 }
-static struct ureg_dst
-tx_get_aL(struct shader_translator *tx)
+
+static struct ureg_src
+tx_get_loopal(struct shader_translator *tx)
 {
-    if (!ureg_dst_is_undef(tx_get_loopctr(tx)))
-        return tx->regs.aL[tx->loop_depth - 1];
-    return ureg_dst_undef();
+    int loop_level = tx->loop_depth - 1;
+
+    while (loop_level >= 0) {
+        /* handle loop - rep - endrep - endloop case */
+        if (tx->loop_or_rep[loop_level])
+            /* the value is in the loop counter y component (nine implementation) */
+            return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
+        loop_level--;
+    }
+
+    DBG("aL counter requested outside of loop\n");
+    return ureg_src_undef();
 }
 
 static INLINE unsigned *
@@ -782,8 +791,12 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
     case D3DSPR_ADDR:
         assert(!param->rel);
         if (IS_VS) {
-            tx_addr_alloc(tx, param->idx);
-            src = ureg_src(tx->regs.a);
+            assert(param->idx == 0);
+            /* the address register (vs only) must be
+             * assigned before use */
+            assert(!ureg_dst_is_undef(tx->regs.a0));
+            ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+            src = ureg_src(tx->regs.address);
         } else {
             if (tx->version.major < 2 && tx->version.minor < 4) {
                 /* no subroutines, so should be defined */
@@ -869,7 +882,13 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
         }
         break;
     case D3DSPR_LOOP:
-        src = tx_src_scalar(tx_get_aL(tx));
+        if (ureg_dst_is_undef(tx->regs.address))
+            tx->regs.address = ureg_DECL_address(ureg);
+        if (!tx->native_integers)
+            ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
+        else
+            ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
+        src = ureg_src(tx->regs.address);
         break;
     case D3DSPR_MISCTYPE:
         switch (param->idx) {
@@ -1008,7 +1027,7 @@ _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
             dst = ureg_dst(tx->regs.vT[param->idx]);
         } else {
             tx_addr_alloc(tx, param->idx);
-            dst = tx->regs.a;
+            dst = tx->regs.a0;
         }
         break;
     case D3DSPR_RASTOUT:
@@ -1427,9 +1446,17 @@ DECL_SPECIAL(CALLNZ)
 DECL_SPECIAL(MOV_vs1x)
 {
     if (tx->insn.dst[0].file == D3DSPR_ADDR) {
-        ureg_ARL(tx->ureg,
+        /* Implementation note: We don't write directly
+         * to the addr register, but to an intermediate
+         * float register.
+         * Contrary to the doc, when writing to ADDR here,
+         * the rounding is not to nearest, but to lowest
+         * (wine test).
+         * Since we use ARR next, substract 0.5. */
+        ureg_SUB(tx->ureg,
                  tx_dst_param(tx, &tx->insn.dst[0]),
-                 tx_src_param(tx, &tx->insn.src[0]));
+                 tx_src_param(tx, &tx->insn.src[0]),
+                 ureg_imm1f(tx->ureg, 0.5f));
         return D3D_OK;
     }
     return NineTranslateInstruction_Generic(tx);
@@ -1440,46 +1467,35 @@ DECL_SPECIAL(LOOP)
     struct ureg_program *ureg = tx->ureg;
     unsigned *label;
     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
-    struct ureg_src iter = ureg_scalar(src, TGSI_SWIZZLE_X);
-    struct ureg_src init = ureg_scalar(src, TGSI_SWIZZLE_Y);
-    struct ureg_src step = ureg_scalar(src, TGSI_SWIZZLE_Z);
     struct ureg_dst ctr;
-    struct ureg_dst tmp = tx_scratch_scalar(tx);
+    struct ureg_dst tmp;
 
     label = tx_bgnloop(tx);
-    ctr = tx_get_loopctr(tx);
+    ctr = tx_get_loopctr(tx, TRUE);
 
-    ureg_MOV(tx->ureg, ctr, init);
-    ureg_BGNLOOP(tx->ureg, label);
-    if (tx->native_integers) {
-        /* we'll let the backend pull up that MAD ... */
-        ureg_UMAD(ureg, tmp, iter, step, init);
-        ureg_USEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
-#ifdef NINE_TGSI_LAZY_DEVS
-        ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
-#endif
-    } else {
-        /* can't simply use SGE for precision because step might be negative */
-        ureg_MAD(ureg, tmp, iter, step, init);
-        ureg_SEQ(ureg, tmp, ureg_src(ctr), tx_src_scalar(tmp));
-#ifdef NINE_TGSI_LAZY_DEVS
+    /* src: num_iterations - start_value of al - step for al - 0 */
+    if (!tx->native_integers) {
+        ureg_MOV(ureg, ctr, src); /* ctr is floats */
+        /* substract 0.5 to num_iteration. We'll decrease it, and stop when < 0 */
+        ureg_ADD(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0), ureg_src(ctr), ureg_imm1f(ureg, -0.5f));
+
+        ureg_BGNLOOP(tx->ureg, label);
+
+        /* break if ctr.x <= 0 */
+        tmp = tx_scratch_scalar(tx);
+        ureg_SLE(ureg, tmp, ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 0.0f));
         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
-#endif
+    } else {
+        ureg_MOV(ureg, ctr, src); /* ctr is integers */
+        ureg_BGNLOOP(tx->ureg, label);
+        tmp = tx_scratch_scalar(tx);
+        /* stop when tmp = crt.x <= 0 */
+        ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X));
+        ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
     }
-#ifdef NINE_TGSI_LAZY_DEVS
     ureg_BRK(ureg);
     tx_endcond(tx);
     ureg_ENDIF(ureg);
-#else
-    ureg_BREAKC(ureg, tx_src_scalar(tmp));
-#endif
-    if (tx->native_integers) {
-        ureg_UARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
-        ureg_UADD(ureg, ctr, tx_src_scalar(ctr), step);
-    } else {
-        ureg_ARL(ureg, tx_get_aL(tx), tx_src_scalar(ctr));
-        ureg_ADD(ureg, ctr, tx_src_scalar(ctr), step);
-    }
     return D3D_OK;
 }
 
@@ -1491,6 +1507,17 @@ DECL_SPECIAL(RET)
 
 DECL_SPECIAL(ENDLOOP)
 {
+    struct ureg_program *ureg = tx->ureg;
+    struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
+    if (!tx->native_integers) {
+        /* decrement ctr.x */
+        ureg_ADD(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0), ureg_src(ctr), ureg_imm1f(ureg, -1.0f));
+        /* ctr.y (aL) += step */
+        ureg_ADD(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_1), ureg_src(ctr), ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_Z));
+    } else {
+        ureg_UADD(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0), ureg_src(ctr), ureg_imm1i(ureg, -1.0));
+        ureg_UADD(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_1), ureg_src(ctr), ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_Z));
+    }
     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
     return D3D_OK;
 }
@@ -1540,7 +1567,7 @@ DECL_SPECIAL(REP)
         tx->native_integers ? ureg_imm1u(ureg, 0) : ureg_imm1f(ureg, 0.0f);
 
     label = tx_bgnloop(tx);
-    ctr = tx_get_loopctr(tx);
+    ctr = tx_get_loopctr(tx, FALSE);
 
     /* NOTE: rep must be constant, so we don't have to save the count */
     assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
@@ -2341,8 +2368,9 @@ struct sm1_op_info inst_table[] =
     _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
     _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
     _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
-
-    _OPI(MOVA, ARR, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
+    /* we don't write to the address register, but a normal register (copied
+     * when needed to the address register), thus we don't use ARR */
+    _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
 
     _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
     _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
@@ -2759,11 +2787,11 @@ tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
     info->lconstf.data = NULL;
     info->lconstf.ranges = NULL;
 
-    for (i = 0; i < Elements(tx->regs.aL); ++i) {
-        tx->regs.aL[i] = ureg_dst_undef();
+    for (i = 0; i < Elements(tx->regs.rL); ++i) {
         tx->regs.rL[i] = ureg_dst_undef();
     }
-    tx->regs.a = ureg_dst_undef();
+    tx->regs.address = ureg_dst_undef();
+    tx->regs.a0 = ureg_dst_undef();
     tx->regs.p = ureg_dst_undef();
     tx->regs.oDepth = ureg_dst_undef();
     tx->regs.vPos = ureg_src_undef();
-- 
2.1.3



More information about the mesa-dev mailing list