[Mesa-dev] [PATCH] nv50: fix alphatest for non-blendable formats

Ilia Mirkin imirkin at alum.mit.edu
Wed Jul 13 05:09:17 UTC 2016


The hardware can only do alphatest when using a blendable format. This
means that the various *16 norm formats didn't work with alphatest. It
appears that Talos Principle uses such formats, as well as alpha tests,
for some internal renders, which made them be incorrect. However this
does not appear to affect the final renders, but in a different game it
easily could.

The approach we take is that when alphatests are enabled and a suitable
format is used (which we anticipate is the vast minority of the time),
we insert code into the shader to perform the comparison and discard.
Once inserted, that code lives in the shader forever, and we re-upload
it each time the function changes with a fixed-up compare.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |  4 ++-
 .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp  | 26 +++++++++++++++++
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 23 ++++++++++++++-
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   |  2 ++
 .../drivers/nouveau/codegen/nv50_ir_target.cpp     |  6 ++--
 .../drivers/nouveau/codegen/nv50_ir_target.h       |  5 ++--
 src/gallium/drivers/nouveau/nv50/nv50_context.h    |  5 +++-
 src/gallium/drivers/nouveau/nv50/nv50_program.c    |  5 +++-
 src/gallium/drivers/nouveau/nv50/nv50_program.h    |  1 +
 .../drivers/nouveau/nv50/nv50_shader_state.c       | 33 ++++++++++++++++++++++
 src/gallium/drivers/nouveau/nv50/nv50_state.c      | 13 +++++++++
 src/gallium/drivers/nouveau/nv50/nv50_stateobj.h   |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  3 +-
 13 files changed, 118 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index b611519..58a5d38 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -166,6 +166,7 @@ struct nv50_ir_prog_info
       uint8_t auxCBSlot;         /* driver constant buffer slot */
       uint16_t ucpBase;          /* base address for UCPs */
       uint16_t drawInfoBase;     /* base address for draw parameters */
+      uint16_t alphaRefBase;     /* base address for alpha test values */
       uint8_t pointSize;         /* output index for PointSize */
       uint8_t instanceId;        /* system value index of InstanceID */
       uint8_t vertexId;          /* system value index of VertexID */
@@ -206,7 +207,8 @@ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
 
 extern void
 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
-                     bool force_per_sample, bool flatshade);
+                     bool force_per_sample, bool flatshade,
+                     uint8_t alphatest);
 
 /* obtain code that will be shared among programs */
 extern void nv50_ir_get_target_library(uint32_t chipset,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 0fe399b..c6cc013 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -1265,6 +1265,28 @@ CodeEmitterNV50::emitISAD(const Instruction *i)
    }
 }
 
+static void
+alphatestSet(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+{
+   int loc = entry->loc;
+   int enc;
+
+   switch (data.alphatest) {
+   case PIPE_FUNC_NEVER: enc = 0x0;; break;
+   case PIPE_FUNC_LESS: enc = 0x1; break;
+   case PIPE_FUNC_EQUAL: enc = 0x2; break;
+   case PIPE_FUNC_LEQUAL: enc = 0x3; break;
+   case PIPE_FUNC_GREATER: enc = 0x4; break;
+   case PIPE_FUNC_NOTEQUAL: enc = 0x5; break;
+   case PIPE_FUNC_GEQUAL: enc = 0x6; break;
+   default:
+   case PIPE_FUNC_ALWAYS: enc = 0xf; break;
+   }
+
+   code[loc + 1] &= ~(0x1f << 14);
+   code[loc + 1] |= enc << 14;
+}
+
 void
 CodeEmitterNV50::emitSET(const Instruction *i)
 {
@@ -1294,6 +1316,10 @@ CodeEmitterNV50::emitSET(const Instruction *i)
    if (i->src(1).mod.abs()) code[1] |= 0x00080000;
 
    emitForm_MAD(i);
+
+   if (i->subOp == 1) {
+      addInterp(0, 0, alphatestSet);
+   }
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 825a44f..10eeed3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1006,7 +1006,7 @@ bool Source::scanSource()
 
    if (info->type == PIPE_SHADER_FRAGMENT) {
       info->prop.fp.writesDepth = scan.writes_z;
-      info->prop.fp.usesDiscard = scan.uses_kill;
+      info->prop.fp.usesDiscard = scan.uses_kill || info->io.alphaRefBase;
    } else
    if (info->type == PIPE_SHADER_GEOMETRY) {
       info->prop.gp.instanceCount = 1; // default value
@@ -3819,6 +3819,27 @@ void
 Converter::exportOutputs()
 {
    for (unsigned int i = 0; i < info->numOutputs; ++i) {
+      if (info->out[i].sn != TGSI_SEMANTIC_COLOR ||
+          info->out[i].si != 0 ||
+          !info->io.alphaRefBase)
+         continue;
+      const unsigned int c = 3;
+      if (!oData.exists(sub.cur->values, i, c))
+         continue;
+      Value *val = oData.load(sub.cur->values, i, c, NULL);
+      if (!val)
+         continue;
+
+      Symbol *ref = mkSymbol(FILE_MEMORY_CONST, info->io.auxCBSlot,
+                             TYPE_U32, info->io.alphaRefBase);
+      Value *pred = new_LValue(func, FILE_PREDICATE);
+      mkCmp(OP_SET, CC_TR, TYPE_U32, pred, TYPE_F32, val,
+            mkLoadv(TYPE_U32, ref, NULL))
+         ->subOp = 1;
+      mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_NOT_P, pred);
+   }
+
+   for (unsigned int i = 0; i < info->numOutputs; ++i) {
       for (unsigned int c = 0; c < 4; ++c) {
          if (!oData.exists(sub.cur->values, i, c))
             continue;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 3213188..a7f4fcf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -196,6 +196,8 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
          return;
    if (insn->src(1).getFile() != FILE_GPR)
       return;
+   if (insn->subOp)
+      return;
 
    Instruction *i0 = insn->getSrc(0)->getInsn();
    Instruction *i1 = insn->getSrc(1)->getInsn();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 08aac00..7d7b315 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -506,14 +506,16 @@ nv50_ir_relocate_code(void *relocData, uint32_t *code,
 
 void
 nv50_ir_apply_fixups(void *fixupData, uint32_t *code,
-                     bool force_persample_interp, bool flatshade)
+                     bool force_persample_interp, bool flatshade,
+                     uint8_t alphatest)
 {
    nv50_ir::FixupInfo *info = reinterpret_cast<nv50_ir::FixupInfo *>(
       fixupData);
 
    // force_persample_interp: all non-flat -> per-sample
    // flatshade: all color -> flat
-   nv50_ir::FixupData data(force_persample_interp, flatshade);
+   // alphatest: PIPE_FUNC_* to use with alphatest
+   nv50_ir::FixupData data(force_persample_interp, flatshade, alphatest);
    for (unsigned i = 0; i < info->count; ++i)
       info->entry[i].apply(&info->entry[i], code, data);
 }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index 6bf1a5c..4a701f7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -59,10 +59,11 @@ struct RelocInfo
 };
 
 struct FixupData {
-   FixupData(bool force, bool flat) :
-      force_persample_interp(force), flatshade(flat) {}
+   FixupData(bool force, bool flat, uint8_t alphatest) :
+      force_persample_interp(force), flatshade(flat), alphatest(alphatest) {}
    bool force_persample_interp;
    bool flatshade;
+   uint8_t alphatest;
 };
 
 struct FixupEntry;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index cb94c8e..cca44f5 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -97,7 +97,10 @@
 /* Sample position pairs for the current output MS level */
 #define NV50_CB_AUX_SAMPLE_OFFSET 0x300
 #define NV50_CB_AUX_SAMPLE_OFFSET_SIZE (4 * 8 * 2)
-/* next spot: 0x340 */
+/* Alpha test ref value */
+#define NV50_CB_AUX_ALPHATEST_OFFSET 0x340
+#define NV50_CB_AUX_ALPHATEST_SIZE (4)
+/* next spot: 0x344 */
 /* 4 32-bit floats for the vertex runout, put at the end */
 #define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10)
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 4c2f853..0b22248 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -335,6 +335,8 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
    info->io.auxCBSlot = 15;
    info->io.ucpBase = NV50_CB_AUX_UCP_OFFSET;
    info->io.genUserClip = prog->vp.clpd_nr;
+   if (prog->fp.alphatest)
+      info->io.alphaRefBase = NV50_CB_AUX_ALPHATEST_OFFSET;
 
    info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
    info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
@@ -489,7 +491,8 @@ nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog)
    if (prog->interps)
       nv50_ir_apply_fixups(prog->interps, prog->code,
                            prog->fp.force_persample_interp,
-                           false /* flatshade */);
+                           false /* flatshade */,
+                           prog->fp.alphatest - 1);
 
    nv50_sifc_linear_u8(&nv50->base, nv50->screen->code,
                        (prog_type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base,
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 0a22e5b..fc9ada4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -90,6 +90,7 @@ struct nv50_program {
       uint32_t colors; /* 0x1904 */
       uint8_t has_samplemask;
       uint8_t force_persample_interp;
+      uint8_t alphatest;
    } fp;
 
    struct {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 2326394..e5c9a78 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -174,6 +174,39 @@ nv50_fragprog_validate(struct nv50_context *nv50)
    struct nv50_program *fp = nv50->fragprog;
    struct pipe_rasterizer_state *rast = &nv50->rast->pipe;
 
+   if (nv50->zsa && nv50->zsa->pipe.alpha.enabled) {
+      struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+      /* If we already have alphatest code, we have to keep updating
+       * it. Otherwise determine if it's needed based on whether RT0 is
+       * blendable and regenerate the shader with the discard.
+       */
+      if (fp->fp.alphatest ||
+          (fb->nr_cbufs > 0 && fb->cbufs[0] && fb->cbufs[0]->texture &&
+           !nv50->screen->base.base.is_format_supported(
+                 &nv50->screen->base.base,
+                 fb->cbufs[0]->format,
+                 fb->cbufs[0]->texture->target,
+                 fb->cbufs[0]->texture->nr_samples,
+                 PIPE_BIND_BLENDABLE))) {
+
+         if (!fp->fp.alphatest)
+            nv50_program_destroy(nv50, fp);
+         else if (fp->mem && fp->fp.alphatest != nv50->zsa->pipe.alpha.func + 1)
+            nouveau_heap_free(&fp->mem);
+
+         fp->fp.alphatest = nv50->zsa->pipe.alpha.func + 1;
+      }
+   } else if (fp->fp.alphatest && fp->fp.alphatest != PIPE_FUNC_ALWAYS + 1) {
+      /* Alpha test is disabled but we have a shader where it's filled
+       * in. Make sure to reset the function to 'always', otherwise it'll end
+       * up discard fragments incorrectly.
+       */
+      if (fp->mem)
+         nouveau_heap_free(&fp->mem);
+
+      fp->fp.alphatest = PIPE_FUNC_ALWAYS + 1;
+   }
+
    if (fp->fp.force_persample_interp != rast->force_persample_interp) {
       /* Force the program to be reuploaded, which will trigger interp fixups
        * to get applied
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index a84c9e2..e1c66dc 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -416,6 +416,11 @@ nv50_zsa_state_create(struct pipe_context *pipe,
       SB_DATA    (so, 0);
    }
 
+   SB_BEGIN_3D(so, CB_ADDR, 1);
+   SB_DATA    (so, NV50_CB_AUX_ALPHATEST_OFFSET << (8 - 2) | NV50_CB_AUX);
+   SB_BEGIN_3D(so, CB_DATA(0), 1);
+   SB_DATA    (so, fui(cso->alpha.ref_value));
+
    assert(so->size <= ARRAY_SIZE(so->state));
    return (void *)so;
 }
@@ -424,9 +429,13 @@ static void
 nv50_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
 {
    struct nv50_context *nv50 = nv50_context(pipe);
+   struct nv50_zsa_stateobj *so = hwcso;
 
    nv50->zsa = hwcso;
    nv50->dirty_3d |= NV50_NEW_3D_ZSA;
+
+   if (so && so->pipe.alpha.enabled)
+      nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
 }
 
 static void
@@ -951,6 +960,10 @@ nv50_set_framebuffer_state(struct pipe_context *pipe,
    util_copy_framebuffer_state(&nv50->framebuffer, fb);
 
    nv50->dirty_3d |= NV50_NEW_3D_FRAMEBUFFER;
+
+   /* We need to potentially upload a modified program depending on format */
+   if (nv50->zsa && nv50->zsa->pipe.alpha.enabled)
+      nv50->dirty_3d |= NV50_NEW_3D_FRAGPROG;
 }
 
 static void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
index b8fa0f6..9598b04 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -31,7 +31,7 @@ struct nv50_rasterizer_stateobj {
 struct nv50_zsa_stateobj {
    struct pipe_depth_stencil_alpha_state pipe;
    int size;
-   uint32_t state[34];
+   uint32_t state[38];
 };
 
 struct nv50_constbuf {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 5b3caca..5fc2753 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -748,7 +748,8 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
    if (prog->fixups) {
       nv50_ir_apply_fixups(prog->fixups, prog->code,
                            prog->fp.force_persample_interp,
-                           prog->fp.flatshade);
+                           prog->fp.flatshade,
+                           0 /* alphatest */);
       for (int i = 0; i < 2; i++) {
          unsigned mask = prog->fp.color_interp[i] >> 4;
          unsigned interp = prog->fp.color_interp[i] & 3;
-- 
2.7.3



More information about the mesa-dev mailing list