[Mesa-dev] [PATCH 7/7] nvc0: enable FBFETCH with a special slot for color buffer 0

Ilia Mirkin imirkin at alum.mit.edu
Mon Jan 2 06:01:38 UTC 2017


We don't need to support all the color buffers for advanced blend, just
cb0. For Fermi, we use the special binding slots so that we don't
overlap with user textures, while Kepler+ gets a dedicated position for
the fb handle in the driver constbuf.

This logic is only triggered when a FBREAD is actually present so it
should be a no-op most of the time.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---

Firstly, I'm not 100% sure that the default_tsc is needed. I had temporary
failures with srgb decoding that went away on later runs that I was unable
to reproduce even by forcing it to have srgb conversion disabled in the
sampler. And TXF shouldn't need samplers. However I remember a weird situation
where TXF was producing failures without a sampler bound.

Secondly this needs to get testing on Fermi. All the dEQP advanced blend
tests pass on Kepler though.

 docs/features.txt                                  |  2 +-
 docs/relnotes/13.1.0.html                          |  1 +
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |  2 +
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 41 ++++++++++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 21 +++--
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  4 +
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |  6 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_program.h    |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  6 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h     |  2 +
 .../drivers/nouveau/nvc0/nvc0_state_validate.c     | 92 +++++++++++++++++++++-
 11 files changed, 170 insertions(+), 8 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index c27d521..60cb68c 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -253,7 +253,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
 GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
 
   GL_EXT_color_buffer_float                             DONE (all drivers)
-  GL_KHR_blend_equation_advanced                        DONE (i965)
+  GL_KHR_blend_equation_advanced                        DONE (i965, nvc0)
   GL_KHR_debug                                          DONE (all drivers)
   GL_KHR_robustness                                     DONE (i965, nvc0, radeonsi)
   GL_KHR_texture_compression_astc_ldr                   DONE (i965/gen9+)
diff --git a/docs/relnotes/13.1.0.html b/docs/relnotes/13.1.0.html
index 4dce843..be2c206 100644
--- a/docs/relnotes/13.1.0.html
+++ b/docs/relnotes/13.1.0.html
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>GL_ARB_post_depth_coverage on i965/gen9+</li>
+<li>GL_KHR_blend_equation_advanced on nvc0</li>
 <li>GL_INTEL_conservative_rasterization on i965/gen9+</li>
 <li>GL_NV_image_formats on any driver supporting GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)</li>
 </ul>
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 9fdabcc..fbb692d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -146,6 +146,7 @@ struct nv50_ir_prog_info
          bool usesDiscard;
          bool persampleInvocation;
          bool usesSampleMaskIn;
+         bool readsFramebuffer;
       } fp;
       struct {
          uint32_t inputOffset; /* base address for user args */
@@ -179,6 +180,7 @@ struct nv50_ir_prog_info
       bool nv50styleSurfaces;    /* generate gX[] access for raw buffers */
       bool halfPixelCenter;      /* externally set half pixel center state */
       uint16_t texBindBase;      /* base address for tex handles (nve4) */
+      uint16_t fbtexBindBase;    /* base address for fbtex handle (nve4) */
       uint16_t suInfoBase;       /* base address for surface info (nve4) */
       uint16_t bufInfoBase;      /* base address for buffer info */
       uint16_t sampleInfoBase;   /* base address for sample positions */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index d4e29a0..84d04d1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1460,6 +1460,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
    if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
       info->numBarriers = 1;
 
+   if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
+      info->prop.fp.readsFramebuffer = true;
+
    if (insn.dstCount()) {
       Instruction::DstRegister dst = insn.getDst(0);
 
@@ -1575,6 +1578,7 @@ private:
    void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
    void handleTXF(Value *dst0[4], int R, int L_M);
    void handleTXQ(Value *dst0[4], enum TexQuery, int R);
+   void handleFBFETCH(Value *dst0[4]);
    void handleLIT(Value *dst0[4]);
    void handleUserClipPlanes();
 
@@ -2294,6 +2298,40 @@ Converter::handleTXF(Value *dst[4], int R, int L_M)
 }
 
 void
+Converter::handleFBFETCH(Value *dst[4])
+{
+   TexInstruction *texi = new_TexInstruction(func, OP_TXF);
+   unsigned int c, d;
+
+   texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
+   texi->tex.levelZero = 1;
+   texi->tex.useOffsets = 0;
+
+   for (c = 0, d = 0; c < 4; ++c) {
+      if (dst[c]) {
+         texi->setDef(d++, dst[c]);
+         texi->tex.mask |= 1 << c;
+      }
+   }
+
+   Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
+   Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
+   Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
+   Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
+
+   mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
+   mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
+   texi->setSrc(0, x);
+   texi->setSrc(1, y);
+   texi->setSrc(2, z);
+   texi->setSrc(3, ms);
+
+   texi->tex.r = texi->tex.s = -1;
+
+   bb->insertTail(texi);
+}
+
+void
 Converter::handleLIT(Value *dst0[4])
 {
    Value *val0 = NULL;
@@ -3336,6 +3374,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
       handleTXQ(dst0, TXQ_TYPE, 0);
       std::swap(dst0[0], dst0[2]);
       break;
+   case TGSI_OPCODE_FBFETCH:
+      handleFBFETCH(dst0);
+      break;
    case TGSI_OPCODE_F2I:
    case TGSI_OPCODE_F2U:
       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index fe18f47..0d1ca2d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -118,7 +118,7 @@ NVC0LegalizeSSA::handleFTZ(Instruction *i)
 void
 NVC0LegalizeSSA::handleTEXLOD(TexInstruction *i)
 {
-   if (i->tex.target.isMS())
+   if (i->tex.levelZero)
       return;
 
    ImmediateValue lod;
@@ -748,7 +748,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
          i->setIndirectR(hnd);
          i->setIndirectS(NULL);
       } else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
-         i->tex.r += prog->driver->io.texBindBase / 4;
+         if (i->tex.r == 0xffff)
+            i->tex.r = prog->driver->io.fbtexBindBase / 4;
+         else
+            i->tex.r += prog->driver->io.texBindBase / 4;
          i->tex.s  = 0; // only a single cX[] value possible here
       } else {
          Value *hnd = bld.getScratch();
@@ -804,6 +807,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
       Value *ticRel = i->getIndirectR();
       Value *tscRel = i->getIndirectS();
 
+      if (i->tex.r == 0xffff) {
+         i->tex.r = i->tex.s = 0x10;
+      }
+
       if (ticRel) {
          i->setSrc(i->tex.rIndirectSrc, NULL);
          if (i->tex.r)
@@ -2506,9 +2513,13 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
    default:
       if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
          vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
-      ld = bld.mkFetch(i->getDef(0), i->dType,
-                       FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
-      ld->perPatch = i->perPatch;
+      if (prog->getType() == Program::TYPE_FRAGMENT) {
+         bld.mkInterp(NV50_IR_INTERP_FLAT, i->getDef(0), addr, NULL);
+      } else {
+         ld = bld.mkFetch(i->getDef(0), i->dType,
+                          FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
+         ld->perPatch = i->perPatch;
+      }
       break;
    }
    bld.getBB()->remove(i);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index dfe68de..ad21509 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -120,6 +120,9 @@
 /* block/grid size, at 3 32-bits integers each, gridid and work_dim */
 #define NVC0_CB_AUX_GRID_INFO(i)    0x100 + (i) * 4 /* CP */
 #define NVC0_CB_AUX_GRID_SIZE       (8 * 4)
+/* FB texture handle */
+#define NVC0_CB_AUX_FB_TEX_INFO     0x100 /* FP */
+#define NVC0_CB_AUX_FB_TEX_SIZE     (4)
 /* 8 user clip planes, at 4 32-bits floats each */
 #define NVC0_CB_AUX_UCP_INFO        0x120
 #define NVC0_CB_AUX_UCP_SIZE        (PIPE_MAX_CLIP_PLANES * 4 * 4)
@@ -206,6 +209,7 @@ struct nvc0_context {
    unsigned num_samplers[6];
    uint32_t samplers_dirty[6];
    bool seamless_cube_map;
+   struct pipe_sampler_view *fbtexture;
 
    uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 50f8083..ccd859e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -486,6 +486,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
 
    fp->fp.early_z = info->prop.fp.earlyFragTests;
    fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
+   fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer;
+
+   /* Mark position xy and layer as read */
+   if (fp->fp.reads_framebuffer)
+      fp->hdr[5] |= 0x32000000;
 
    return 0;
 }
@@ -586,6 +591,7 @@ nvc0_program_translate(struct nvc0_program *prog,
    info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
    if (info->target >= NVISA_GK104_CHIPSET) {
       info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
+      info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
    }
 
    if (prog->type == PIPE_SHADER_COMPUTE) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index d33aa04..421ca19 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -49,6 +49,7 @@ struct nvc0_program {
       bool sample_mask_in;
       bool force_persample_interp;
       bool flatshade;
+      bool reads_framebuffer;
    } fp;
    struct {
       uint32_t tess_mode; /* ~0 if defined by the other stage */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index fe637af..ec7bfe0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -242,6 +242,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TGSI_VOTE:
    case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
    case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+   case PIPE_CAP_TGSI_FS_FBFETCH:
       return 1;
    case PIPE_CAP_COMPUTE:
       return (class_3d < GP100_3D_CLASS);
@@ -276,7 +277,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
    case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
    case PIPE_CAP_NATIVE_FENCE_FD:
-   case PIPE_CAP_TGSI_FS_FBFETCH:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -537,6 +537,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
    nouveau_heap_destroy(&screen->lib_code);
    nouveau_heap_destroy(&screen->text_heap);
 
+   FREE(screen->default_tsc);
    FREE(screen->tic.entries);
 
    nouveau_object_del(&screen->eng3d);
@@ -1228,6 +1229,9 @@ nvc0_screen_create(struct nouveau_device *dev)
    if (!nvc0_blitter_create(screen))
       goto fail;
 
+   screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry);
+   screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION;
+
    nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
 
    return &screen->base;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index aff0308..a6d4a2b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -81,6 +81,8 @@ struct nvc0_screen {
 
    struct nvc0_blitter *blitter;
 
+   struct nv50_tsc_entry *default_tsc;
+
    struct {
       void **entries;
       int next;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index f5060cc..35ae1b4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -605,7 +605,9 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
       // If we're using the incoming sample mask and doing sample shading, we
       // have to do sample shading "to the max", otherwise there's no way to
       // tell which sets of samples are covered by the current invocation.
-      if (nvc0->fragprog->fp.sample_mask_in)
+      // Similarly for reading the framebuffer.
+      if (nvc0->fragprog->fp.sample_mask_in ||
+          nvc0->fragprog->fp.reads_framebuffer)
          samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
       samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
    }
@@ -701,6 +703,92 @@ nvc0_validate_tess_state(struct nvc0_context *nvc0)
    PUSH_DATAp(push, nvc0->default_tess_inner, 2);
 }
 
+/* If we have a frag shader bound which tries to read from the framebuffer, we
+ * have to make sure that the fb is bound as a texture in the expected
+ * location. For Fermi, that's in the special driver slot 16, while for Kepler
+ * it's a regular binding stored in the driver constbuf.
+ */
+static void
+nvc0_validate_fbread(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
+   struct pipe_context *pipe = &nvc0->base.pipe;
+   struct pipe_sampler_view *old_view = nvc0->fbtexture;
+   struct pipe_sampler_view *new_view = NULL;
+
+   if (nvc0->fragprog->fp.reads_framebuffer &&
+       nvc0->framebuffer.nr_cbufs &&
+       nvc0->framebuffer.cbufs[0]) {
+      struct pipe_sampler_view tmpl;
+      struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
+
+      tmpl.target = PIPE_TEXTURE_2D_ARRAY;
+      tmpl.format = sf->format;
+      tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
+      tmpl.u.tex.first_layer = sf->u.tex.first_layer;
+      tmpl.u.tex.last_layer = sf->u.tex.last_layer;
+      tmpl.swizzle_r = PIPE_SWIZZLE_X;
+      tmpl.swizzle_g = PIPE_SWIZZLE_Y;
+      tmpl.swizzle_b = PIPE_SWIZZLE_Z;
+      tmpl.swizzle_a = PIPE_SWIZZLE_W;
+
+      /* Bail if it's the same parameters */
+      if (old_view && old_view->texture == sf->texture &&
+          old_view->format == sf->format &&
+          old_view->u.tex.first_level == sf->u.tex.level &&
+          old_view->u.tex.first_layer == sf->u.tex.first_layer &&
+          old_view->u.tex.last_layer == sf->u.tex.last_layer)
+         return;
+
+      new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
+   } else if (old_view == NULL) {
+      return;
+   }
+
+   if (old_view)
+      pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
+   nvc0->fbtexture = new_view;
+
+   if (screen->default_tsc->id < 0) {
+      struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc);
+      tsc->id = nvc0_screen_tsc_alloc(screen, tsc);
+      nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32,
+                           NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc);
+      screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+      IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
+      if (screen->base.class_3d < NVE4_3D_CLASS) {
+         BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1);
+         PUSH_DATA (push, (tsc->id << 12) | 1);
+      }
+   }
+
+   if (new_view) {
+      struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
+      assert(tic->id < 0);
+      tic->id = nvc0_screen_tic_alloc(screen, tic);
+      nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
+                           NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
+      screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+      if (screen->base.class_3d >= NVE4_3D_CLASS) {
+         BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+         PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+         PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+         PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+         BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
+         PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
+         PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id);
+      } else {
+         BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
+         PUSH_DATA (push, (tic->id << 9) | 1);
+      }
+
+      IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
+   }
+}
+
 static void
 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
 {
@@ -782,6 +870,8 @@ validate_list_3d[] = {
     { nvc0_validate_textures,      NVC0_NEW_3D_TEXTURES },
     { nvc0_validate_samplers,      NVC0_NEW_3D_SAMPLERS },
     { nve4_set_tex_handles,        NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
+    { nvc0_validate_fbread,        NVC0_NEW_3D_FRAGPROG |
+                                   NVC0_NEW_3D_FRAMEBUFFER },
     { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
     { nvc0_validate_surfaces,      NVC0_NEW_3D_SURFACES },
     { nvc0_validate_buffers,       NVC0_NEW_3D_BUFFERS },
-- 
2.10.2



More information about the mesa-dev mailing list