[Mesa-dev] [PATCH 2/3] nv50, nvc0: add support for MUL_ZERO_WINS property

Ilia Mirkin imirkin at alum.mit.edu
Sun Jan 15 18:36:44 UTC 2017


This is simply keyed off the vertex shader, as that's guaranteed to be
present in any pipeline.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---

Note - untested on nv50. I started with very similar code for nvc0 though, which "worked" until I realized that the UNK1690 reg totally changed on Kepler.

 .../drivers/nouveau/codegen/nv50_ir_driver.h       |  1 +
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 30 ++++++++++++++++------
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_program.c    |  2 +-
 src/gallium/drivers/nouveau/nv50/nv50_program.h    |  2 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.h     |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c        |  6 +++++
 7 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index a038801..65d0904 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -177,6 +177,7 @@ struct nv50_ir_prog_info
       uint8_t backFaceColor[2];  /* input/output indices of back face colour */
       uint8_t globalAccess;      /* 1 for read, 2 for wr, 3 for rw */
       bool fp64;                 /* program uses fp64 math */
+      bool mul_zero_wins;        /* program wants for x*0 = 0 */
       bool nv50styleSurfaces;    /* generate gX[] access for raw buffers */
       uint16_t texBindBase;      /* base address for tex handles (nve4) */
       uint16_t fbtexBindBase;    /* base address for fbtex handle (nve4) */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 7433187..6bd0a36 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1164,6 +1164,9 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
    case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
       info->prop.fp.earlyFragTests = prop->u[0].Data;
       break;
+   case TGSI_PROPERTY_MUL_ZERO_WINS:
+      info->io.mul_zero_wins = prop->u[0].Data;
+      break;
    default:
       INFO("unhandled TGSI property %d\n", prop->Property.PropertyName);
       break;
@@ -2056,12 +2059,14 @@ Converter::buildDot(int dim)
    Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0);
    Value *dotp = getScratch();
 
-   mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
+   mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1)
+      ->dnz = info->io.mul_zero_wins;
 
    for (int c = 1; c < dim; ++c) {
       src0 = fetchSrc(0, c);
       src1 = fetchSrc(1, c);
-      mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp);
+      mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp)
+         ->dnz = info->io.mul_zero_wins;
    }
    return dotp;
 }
@@ -3031,6 +3036,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
          src1 = fetchSrc(1, c);
          geni = mkOp2(op, dstTy, dst0[c], src0, src1);
          geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
+         if (op == OP_MUL && dstTy == TYPE_F32)
+            geni->dnz = info->io.mul_zero_wins;
       }
       break;
    case TGSI_OPCODE_MAD:
@@ -3041,7 +3048,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
          src0 = fetchSrc(0, c);
          src1 = fetchSrc(1, c);
          src2 = fetchSrc(2, c);
-         mkOp3(op, dstTy, dst0[c], src0, src1, src2);
+         geni = mkOp3(op, dstTy, dst0[c], src0, src1, src2);
+         if (dstTy == TYPE_F32)
+            geni->dnz = info->io.mul_zero_wins;
       }
       break;
    case TGSI_OPCODE_MOV:
@@ -3140,7 +3149,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       if (dst0[1]) {
          mkOp1(OP_EX2, TYPE_F32, dst0[1], val1);
          mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]);
-         mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0);
+         mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0)
+            ->dnz = info->io.mul_zero_wins;
       }
       if (dst0[3])
          loadImm(dst0[3], 1.0f);
@@ -3173,7 +3183,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       if (dst0[1]) {
          src0 = fetchSrc(0, 1);
          src1 = fetchSrc(1, 1);
-         mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1);
+         mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1)
+            ->dnz = info->io.mul_zero_wins;
       }
       if (dst0[2])
          mkMov(dst0[2], fetchSrc(0, 2));
@@ -3186,7 +3197,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
          src1 = fetchSrc(1, c);
          src2 = fetchSrc(2, c);
          mkOp3(OP_MAD, TYPE_F32, dst0[c],
-               mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2);
+               mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2)
+            ->dnz = info->io.mul_zero_wins;
       }
       break;
    case TGSI_OPCODE_LIT:
@@ -3198,12 +3210,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
             val0 = getSSA();
             src0 = fetchSrc(1, (c + 1) % 3);
             src1 = fetchSrc(0, (c + 2) % 3);
-            mkOp2(OP_MUL, TYPE_F32, val0, src0, src1);
+            mkOp2(OP_MUL, TYPE_F32, val0, src0, src1)
+               ->dnz = info->io.mul_zero_wins;
             mkOp1(OP_NEG, TYPE_F32, val0, val0);
 
             src0 = fetchSrc(0, (c + 1) % 3);
             src1 = fetchSrc(1, (c + 2) % 3);
-            mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0);
+            mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0)
+               ->dnz = info->io.mul_zero_wins;
          } else {
             loadImm(dst0[c], 1.0f);
          }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 04b6af2..c996806 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1730,6 +1730,7 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
 
    add->op = toOp;
    add->subOp = src->getInsn()->subOp; // potentially mul-high
+   add->dnz = src->getInsn()->dnz;
    add->dType = src->getInsn()->dType; // sign matters for imad hi
    add->sType = src->getInsn()->sType;
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index ea5febd..76d06ae 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -380,7 +380,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
    prog->interps = info->bin.fixupData;
    prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1);
    prog->tls_space = info->bin.tlsSpace;
-
+   prog->mul_zero_wins = info->io.mul_zero_wins;
    prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
 
    prog->vp.clip_enable = (1 << info->io.clipDistances) - 1;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 009d41f..2b45b09 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -108,6 +108,8 @@ struct nv50_program {
       unsigned num_syms;
    } cp;
 
+   bool mul_zero_wins;
+
    void *fixups; /* relocation records */
    void *interps; /* interpolation records */
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
index 370d8f5..6f3b40c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -54,6 +54,7 @@ struct nv50_graph_state {
    uint8_t prim_size;
    uint16_t scissor;
    bool seamless_cube_map;
+   bool mul_zero_wins;
 };
 
 struct nv50_screen {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 4c46ed0..227038e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -824,6 +824,12 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       PUSH_DATA (push, nv50->seamless_cube_map ? NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0);
    }
 
+   if (nv50->vertprog->mul_zero_wins != nv50->state.mul_zero_wins) {
+      nv50->state.mul_zero_wins = nv50->vertprog->mul_zero_wins;
+      BEGIN_NV04(push, NV50_3D(UNK1690), 1);
+      PUSH_DATA (push, 0x00010000 * !!nv50->state.mul_zero_wins);
+   }
+
    if (nv50->vbo_fifo) {
       nv50_push_vbo(nv50, info);
       push->kick_notify = nv50_default_kick_notify;
-- 
2.10.2



More information about the mesa-dev mailing list