[Mesa-dev] [PATCH 2/2] nvc0: fix gl_SampleMaskIn computation

Ilia Mirkin imirkin at alum.mit.edu
Sat May 7 20:55:23 UTC 2016


The SAMPLEMASK semantic should only return the bits set covered by the
current invocation. However we were always retrieving the covmask, which
returns the covered samples of the whole pixel.

When not doing per-sample invocation, this is precisely what we want.
However when doing per-sample invocation, we have to select the
sampleid'th bit and only return that. Furthermore, this means that we
have to have a 1:1 correlation for invocations and samples.

This fixes most

dEQP-GLES31.functional.shaders.sample_variables.sample_mask_in.*

tests. A few failures remain due to disagreements about nr_samples==1
logic as well as what happens with MSAA x2 RTs when the shading fraction
is 0.5.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |  2 ++
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 14 ++++++++++
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 15 +++++++++++
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 14 ++++++++++
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  7 +++++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 31 +++++++++++++++++++---
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |  1 +
 .../drivers/nouveau/codegen/nv50_ir_target.h       |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_program.h    |  1 +
 .../drivers/nouveau/nvc0/nvc0_state_validate.c     | 11 ++++++--
 11 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 16dc1d1..1f7de51 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -146,6 +146,8 @@ struct nv50_ir_prog_info
          bool earlyFragTests;
          bool separateFragData;
          bool usesDiscard;
+         bool persampleInvocation;
+         bool usesSampleMaskIn;
       } fp;
       struct {
          uint32_t inputOffset; /* base address for user args */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 99f8ab7..2838ae2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1122,12 +1122,26 @@ CodeEmitterGK110::emitSLCT(const CmpInstruction *i)
    }
 }
 
+static void
+selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+{
+   int loc = entry->loc;
+   if (data.force_persample_interp)
+      code[loc + 1] |= 1 << 13;
+   else
+      code[loc + 1] &= ~(1 << 13);
+}
+
 void CodeEmitterGK110::emitSELP(const Instruction *i)
 {
    emitForm_21(i, 0x250, 0x050);
 
    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
       code[1] |= 1 << 13;
+
+   if (i->subOp == 1) {
+      addInterp(0, 0, selpFlip);
+   }
 }
 
 void CodeEmitterGK110::emitTEXBAR(const Instruction *i)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 9dc2e30..a43d7b1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -894,6 +894,16 @@ CodeEmitterGM107::emitI2I()
    emitGPR  (0x00, insn->def(0));
 }
 
+static void
+selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+{
+   int loc = entry->loc;
+   if (data.force_persample_interp)
+      code[loc + 1] |= 1 << 10;
+   else
+      code[loc + 1] &= ~(1 << 10);
+}
+
 void
 CodeEmitterGM107::emitSEL()
 {
@@ -915,9 +925,14 @@ CodeEmitterGM107::emitSEL()
       break;
    }
 
+   emitINV (0x2a, insn->src(2));
    emitPRED(0x27, insn->src(2));
    emitGPR (0x08, insn->src(0));
    emitGPR (0x00, insn->def(0));
+
+   if (insn->subOp == 1) {
+      addInterp(0, 0, selpFlip);
+   }
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 8819e3b..14f4be4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1177,12 +1177,26 @@ CodeEmitterNVC0::emitSLCT(const CmpInstruction *i)
       code[0] |= 1 << 5;
 }
 
+static void
+selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+{
+   int loc = entry->loc;
+   if (data.force_persample_interp)
+      code[loc + 1] |= 1 << 20;
+   else
+      code[loc + 1] &= ~(1 << 20);
+}
+
 void CodeEmitterNVC0::emitSELP(const Instruction *i)
 {
    emitForm_A(i, HEX64(20000000, 00000004));
 
    if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT))
       code[1] |= 1 << 20;
+
+   if (i->subOp == 1) {
+      addInterp(0, 0, selpFlip);
+   }
 }
 
 void CodeEmitterNVC0::emitTEXBAR(const Instruction *i)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 3708f37..8f5a502 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1268,6 +1268,13 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
       case TGSI_SEMANTIC_DRAWID:
          info->prop.vp.usesDrawParameters = true;
          break;
+      case TGSI_SEMANTIC_SAMPLEID:
+      case TGSI_SEMANTIC_SAMPLEPOS:
+         info->prop.fp.persampleInvocation = true;
+         break;
+      case TGSI_SEMANTIC_SAMPLEMASK:
+         info->prop.fp.usesSampleMaskIn = true;
+         break;
       default:
          break;
       }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 3bce962..1068c21 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -153,6 +153,7 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
 NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
    : rZero(NULL),
      carry(NULL),
+     pOne(NULL),
      needTexBar(prog->getTarget()->getChipset() >= 0xe0)
 {
 }
@@ -451,10 +452,12 @@ NVC0LegalizePostRA::visit(Function *fn)
       insertTextureBarriers(fn);
 
    rZero = new_LValue(fn, FILE_GPR);
+   pOne = new_LValue(fn, FILE_PREDICATE);
    carry = new_LValue(fn, FILE_FLAGS);
 
    rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
    carry->reg.data.id = 0;
+   pOne->reg.data.id = 7;
 
    return true;
 }
@@ -466,8 +469,15 @@ NVC0LegalizePostRA::replaceZero(Instruction *i)
       if (s == 2 && i->op == OP_SUCLAMP)
          continue;
       ImmediateValue *imm = i->getSrc(s)->asImm();
-      if (imm && imm->reg.data.u64 == 0)
-         i->setSrc(s, rZero);
+      if (imm) {
+         if (i->op == OP_SELP && s == 2) {
+            i->setSrc(s, pOne);
+            if (imm->reg.data.u64 == 0)
+               i->src(s).mod = i->src(s).mod ^ Modifier(NV50_IR_MOD_NOT);
+         } else if (imm->reg.data.u64 == 0) {
+            i->setSrc(s, rZero);
+         }
+      }
    }
 }
 
@@ -2204,10 +2214,25 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
                  off);
       break;
    }
-   case SV_SAMPLE_MASK:
+   case SV_SAMPLE_MASK: {
       ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
       ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK;
+      Instruction *sampleid =
+         bld.mkOp1(OP_PIXLD, TYPE_U32, bld.getSSA(), bld.mkImm(0));
+      sampleid->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
+      Value *masked =
+         bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ld->getDef(0),
+                    bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                               bld.loadImm(NULL, 1), sampleid->getDef(0)));
+      if (prog->driver->prop.fp.persampleInvocation) {
+         bld.mkMov(i->getDef(0), masked);
+      } else {
+         bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), ld->getDef(0), masked,
+                   bld.mkImm(0))
+            ->subOp = 1;
+      }
       break;
+   }
    case SV_BASEVERTEX:
    case SV_BASEINSTANCE:
    case SV_DRAWID:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 17883a9..c007e09 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -79,6 +79,7 @@ private:
 private:
    LValue *rZero;
    LValue *carry;
+   LValue *pOne;
    const bool needTexBar;
 };
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index 674bdc6..def62c1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -62,6 +62,7 @@ struct FixupData {
    FixupData(bool force, bool flat) :
       force_persample_interp(force), flatshade(flat) {}
    bool force_persample_interp;
+   bool persample_invoc;
    bool flatshade;
 };
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 944efa0..9db45c0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -464,6 +464,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
       fp->hdr[18] |= 0xf;
 
    fp->fp.early_z = info->prop.fp.earlyFragTests;
+   fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
 
    return 0;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index bd852e2..08af3c8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -48,6 +48,7 @@ struct nvc0_program {
       uint8_t early_z;
       uint8_t colors;
       uint8_t color_interp[2];
+      bool sample_mask_in;
       bool force_persample_interp;
       bool flatshade;
    } fp;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index e8d4172..e1de140 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -1,5 +1,6 @@
 
 #include "util/u_format.h"
+#include "util/u_framebuffer.h"
 #include "util/u_math.h"
 
 #include "nvc0/nvc0_context.h"
@@ -551,8 +552,14 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
    int samples;
 
    samples = util_next_power_of_two(nvc0->min_samples);
-   if (samples > 1)
+   if (samples > 1) {
+      // If we're using the incoming sample mask and doing sample shading, we
+      // have to do sample shading "to the max", otherwise there's no way to
+      // tell which sets of samples are covered by the current invocation.
+      if (nvc0->fragprog->fp.sample_mask_in)
+         samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
       samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
+   }
 
    IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples);
 }
@@ -708,6 +715,7 @@ validate_list_3d[] = {
     { nvc0_tevlprog_validate,      NVC0_NEW_3D_TEVLPROG },
     { nvc0_validate_tess_state,    NVC0_NEW_3D_TESSFACTOR },
     { nvc0_gmtyprog_validate,      NVC0_NEW_3D_GMTYPROG },
+    { nvc0_validate_min_samples,   NVC0_NEW_3D_MIN_SAMPLES | NVC0_NEW_3D_FRAGPROG },
     { nvc0_fragprog_validate,      NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER },
     { nvc0_validate_derived_1,     NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA |
                                    NVC0_NEW_3D_RASTERIZER },
@@ -726,7 +734,6 @@ validate_list_3d[] = {
     { nvc0_validate_buffers,       NVC0_NEW_3D_BUFFERS },
     { nvc0_idxbuf_validate,        NVC0_NEW_3D_IDXBUF },
     { nvc0_tfb_validate,           NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG },
-    { nvc0_validate_min_samples,   NVC0_NEW_3D_MIN_SAMPLES },
     { nvc0_validate_driverconst,   NVC0_NEW_3D_DRIVERCONST },
 };
 
-- 
2.7.3



More information about the mesa-dev mailing list