[Mesa-dev] [PATCH v3 4/5] nvc0: add support for programmable sample locations

Rhys Perry pendingchaos02 at gmail.com
Fri Jun 1 15:08:21 UTC 2018


Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |   2 +
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |   7 +
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 102 ++++++++++++--
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |   2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  15 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |   3 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  33 ++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c      |  17 ++-
 .../drivers/nouveau/nvc0/nvc0_state_validate.c     | 152 +++++++++++++++++----
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c    |  12 ++
 10 files changed, 299 insertions(+), 46 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 3d0782f86b..7c835ceab8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol
 #define NVISA_GK104_CHIPSET    0xe0
 #define NVISA_GK20A_CHIPSET    0xea
 #define NVISA_GM107_CHIPSET    0x110
+#define NVISA_GM200_CHIPSET    0x120
 
 struct nv50_ir_prog_info
 {
@@ -145,6 +146,7 @@ struct nv50_ir_prog_info
          bool persampleInvocation;
          bool usesSampleMaskIn;
          bool readsFramebuffer;
+         bool readsSampleLocations;
       } fp;
       struct {
          uint32_t inputOffset; /* base address for user args */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 3c5bad05fe..d7844d7381 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn,
          info->out[src.getIndex(0)].oread = 1;
       }
    }
+   if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
+      if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
+         info->prop.fp.readsSampleLocations = true;
+   }
    if (src.getFile() != TGSI_FILE_INPUT)
       return;
 
@@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
    if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
       info->prop.fp.readsFramebuffer = true;
 
+   if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
+      info->prop.fp.readsSampleLocations = true;
+
    if (insn.dstCount()) {
       Instruction::DstRegister dst = insn.getDst(0);
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 29f674b451..5723847234 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
       ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
       break;
    case SV_SAMPLE_POS: {
-      Value *off = new_LValue(func, FILE_GPR);
-      ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
+      Value *sampleID = bld.getScratch();
+      ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0));
       ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
-      bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
-      bld.mkLoad(TYPE_F32,
-                 i->getDef(0),
-                 bld.mkSymbol(
-                       FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
-                       TYPE_U32, prog->driver->io.sampleInfoBase +
-                       4 * sym->reg.data.sv.index),
-                 off);
+      Value *offset = calculateSampleOffset(sampleID);
+
+      assert(prog->driver->prop.fp.readsSampleLocations);
+
+      if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+         bld.mkLoad(TYPE_F32,
+                    i->getDef(0),
+                    bld.mkSymbol(
+                          FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+                          TYPE_U32, prog->driver->io.sampleInfoBase),
+                    offset);
+         bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0),
+                   bld.mkImm(0x040c + sym->reg.data.sv.index * 16));
+         bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0));
+         bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f / 16.0f));
+      } else {
+         bld.mkLoad(TYPE_F32,
+                    i->getDef(0),
+                    bld.mkSymbol(
+                          FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+                          TYPE_U32, prog->driver->io.sampleInfoBase +
+                          4 * sym->reg.data.sv.index),
+                    offset);
+      }
       break;
    }
    case SV_SAMPLE_MASK: {
@@ -2832,6 +2848,69 @@ NVC0LoweringPass::handleOUT(Instruction *i)
    return true;
 }
 
+Value *
+NVC0LoweringPass::calculateSampleOffset(Value *sampleID)
+{
+   Value *offset = bld.getScratch();
+   if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+      // Sample location offsets (in bytes) are calculated like so:
+      // offset = (SV_POSITION.y % 4 * 2) + (SV_POSITION.x % 2)
+      // offset = offset * 32 + sampleID % 8 * 4;
+      // which is equivalent to:
+      // offset = (SV_POSITION.y & 0x3) << 6 + (SV_POSITION.x & 0x1) << 5;
+      // offset += sampleID << 2
+
+      // The second operand (src1) of the INSBF instructions are like so:
+      // 0xssll where ss is the size and ll is the offset.
+      // so: dest = src2 | (src0 & (1 << ss - 1)) << ll
+
+      // Add sample ID (offset = (sampleID & 0x7) << 2)
+      bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0));
+
+      Symbol *xSym = bld.mkSysVal(SV_POSITION, 0);
+      Symbol *ySym = bld.mkSysVal(SV_POSITION, 1);
+      Value *coord = bld.getScratch();
+
+      // Add X coordinate (offset |= (SV_POSITION.x & 0x1) << 5)
+      bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+                   targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL);
+      bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+         ->rnd = ROUND_ZI;
+      bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset);
+
+      // Add Y coordinate (offset |= (SV_POSITION.y & 0x3) << 6)
+      bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+                   targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL);
+      bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+         ->rnd = ROUND_ZI;
+      bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset);
+   } else {
+      bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3));
+   }
+   return offset;
+}
+
+// Handle programmable sample locations for GM20x+
+void
+NVC0LoweringPass::handlePIXLD(Instruction *i)
+{
+   if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET)
+      return;
+   if (targ->getChipset() < NVISA_GM200_CHIPSET)
+      return;
+
+   assert(prog->driver->prop.fp.readsSampleLocations);
+
+   bld.mkLoad(TYPE_F32,
+              i->getDef(0),
+              bld.mkSymbol(
+                    FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+                    TYPE_U32, prog->driver->io.sampleInfoBase),
+              calculateSampleOffset(i->getSrc(0)));
+
+   bld.getBB()->remove(i);
+}
+
 // Generate a binary predicate if an instruction is predicated by
 // e.g. an f32 value.
 void
@@ -2931,6 +3010,9 @@ NVC0LoweringPass::visit(Instruction *i)
    case OP_BUFQ:
       handleBUFQ(i);
       break;
+   case OP_PIXLD:
+      handlePIXLD(i);
+      break;
    default:
       break;
    }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 1b2b36d3cc..91771fbf7e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -116,6 +116,7 @@ protected:
    void handleSharedATOMNVE4(Instruction *);
    void handleLDST(Instruction *);
    bool handleBUFQ(Instruction *);
+   void handlePIXLD(Instruction *);
 
    void checkPredicate(Instruction *);
 
@@ -142,6 +143,7 @@ private:
    void processSurfaceCoordsNVE4(TexInstruction *);
    void processSurfaceCoordsNVC0(TexInstruction *);
    void convertSurfaceFormat(TexInstruction *);
+   Value *calculateSampleOffset(Value *sampleID);
 
 protected:
    Value *loadTexHandle(Value *ptr, unsigned int slot);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 0729c88dff..77237a3c0a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -62,6 +62,8 @@
 #define NVC0_NEW_3D_DRIVERCONST  (1 << 27)
 #define NVC0_NEW_3D_WINDOW_RECTS (1 << 28)
 
+#define NVC0_NEW_3D_SAMPLE_LOCATIONS (1 << 29)
+
 #define NVC0_NEW_CP_PROGRAM   (1 << 0)
 #define NVC0_NEW_CP_SURFACES  (1 << 1)
 #define NVC0_NEW_CP_TEXTURES  (1 << 2)
@@ -134,20 +136,21 @@
 #define NVC0_CB_AUX_UBO_SIZE        ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
 /* 8 sets of 32-bits integer pairs sample offsets */
 #define NVC0_CB_AUX_SAMPLE_INFO     0x1a0 /* FP */
-#define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 4 * 2)
+/* 256 bytes, though only 64 bytes used before GM200 */
+#define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 2 * 4 * 4)
 /* draw parameters (index bais, base instance, drawid) */
 #define NVC0_CB_AUX_DRAW_INFO       0x1a0 /* VP */
 /* 32 user buffers, at 4 32-bits integers each */
-#define NVC0_CB_AUX_BUF_INFO(i)     0x220 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_INFO(i)     0x2a0 + (i) * 4 * 4
 #define NVC0_CB_AUX_BUF_SIZE        (NVC0_MAX_BUFFERS * 4 * 4)
 /* 8 surfaces, at 16 32-bits integers each */
-#define NVC0_CB_AUX_SU_INFO(i)      0x420 + (i) * 16 * 4
+#define NVC0_CB_AUX_SU_INFO(i)      0x4a0 + (i) * 16 * 4
 #define NVC0_CB_AUX_SU_SIZE         (NVC0_MAX_IMAGES * 16 * 4)
 /* 1 64-bits address and 1 32-bits sequence */
-#define NVC0_CB_AUX_MP_INFO         0x620
+#define NVC0_CB_AUX_MP_INFO         0x6a0
 #define NVC0_CB_AUX_MP_SIZE         3 * 4
 /* 512 64-byte blocks for bindless image handles */
-#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
 #define NVC0_CB_AUX_BINDLESS_SIZE   (NVE4_IMG_MAX_HANDLES * 16 * 4)
 /* 4 32-bits floats for the vertex runout, put at the end */
 #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
@@ -229,6 +232,8 @@ struct nvc0_context {
    struct list_head img_head;
 
    struct pipe_framebuffer_state framebuffer;
+   bool sample_locations_enabled;
+   uint8_t sample_locations[2 * 4 * 8];
    struct pipe_blend_color blend_colour;
    struct pipe_stencil_ref stencil_ref;
    struct pipe_poly_stipple stipple;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 9520d984bb..57d98753f4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
          }
       }
    }
+   /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
+   if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
+      fp->hdr[5] |= 0x30000000;
 
    for (i = 0; i < info->numOutputs; ++i) {
       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 45ac7dd626..53108cf44c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -269,6 +269,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
    case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
    case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
+   case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
       return class_3d >= GM200_3D_CLASS;
    case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
       return class_3d >= GP100_3D_CLASS;
@@ -317,7 +318,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_CONSTBUF0_FLAGS:
    case PIPE_CAP_PACKED_UNIFORMS:
    case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
-   case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
@@ -543,6 +543,36 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
 #undef RET
 }
 
+static void
+nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen,
+                                  unsigned sample_count,
+                                  unsigned *width, unsigned *height)
+{
+   switch (sample_count) {
+   case 0:
+   case 1:
+      /* this could be 4x4, but the GL state tracker makes it difficult to
+       * create a 1x MSAA texture and smaller grids save CB space */
+      *width = 2;
+      *height = 4;
+      break;
+   case 2:
+      *width = 2;
+      *height = 4;
+      break;
+   case 4:
+      *width = 2;
+      *height = 2;
+      break;
+   case 8:
+      *width = 1;
+      *height = 2;
+      break;
+   default:
+      assert(0);
+   }
+}
+
 static void
 nvc0_screen_destroy(struct pipe_screen *pscreen)
 {
@@ -869,6 +899,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    pscreen->get_param = nvc0_screen_get_param;
    pscreen->get_shader_param = nvc0_screen_get_shader_param;
    pscreen->get_paramf = nvc0_screen_get_paramf;
+   pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid;
    pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
    pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info;
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 37dbbe66c7..d9ee62523b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -854,7 +854,21 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe,
 
     util_copy_framebuffer_state(&nvc0->framebuffer, fb);
 
-    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS;
+}
+
+static void
+nvc0_set_sample_locations(struct pipe_context *pipe,
+                          size_t size, const uint8_t *locations)
+{
+    struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+    nvc0->sample_locations_enabled = size && locations;
+    if (size > sizeof(nvc0->sample_locations))
+       size = sizeof(nvc0->sample_locations);
+    memcpy(nvc0->sample_locations, locations, size);
+
+    nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_LOCATIONS;
 }
 
 static void
@@ -1407,6 +1421,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
    pipe->set_min_samples = nvc0_set_min_samples;
    pipe->set_constant_buffer = nvc0_set_constant_buffer;
    pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
+   pipe->set_sample_locations = nvc0_set_sample_locations;
    pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
    pipe->set_scissor_states = nvc0_set_scissor_states;
    pipe->set_viewport_states = nvc0_set_viewport_states;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 8e2192d3de..cc18f41c4b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -71,13 +71,132 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
    PUSH_DATA (push, 0);      // base layer
 }
 
+static uint32_t
+gm200_encode_cb_sample_location(uint8_t x, uint8_t y)
+{
+   static const uint8_t lut[] = {
+      0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+      0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
+   uint32_t result = 0;
+   /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
+   result |= lut[x] << 8 | lut[y] << 24;
+   /* fill in gaps with data in a representation for SV_SAMPLE_POS */
+   result |= x << 12 | y << 28;
+   return result;
+}
+
+static void
+gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
+   unsigned grid_width, grid_height, hw_grid_width;
+   uint8_t sample_locations[16][2];
+   unsigned cb[64];
+   unsigned i, pixel, pixel_y, pixel_x, sample;
+   uint32_t packed_locations[4] = {};
+
+   screen->base.base.get_sample_pixel_grid(
+      &screen->base.base, ms, &grid_width, &grid_height);
+
+   hw_grid_width = grid_width;
+   if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
+      hw_grid_width = 4;
+
+   if (nvc0->sample_locations_enabled) {
+      uint8_t locations[2 * 4 * 8];
+      memcpy(locations, nvc0->sample_locations, sizeof(locations));
+      util_sample_locations_flip_y(
+         &screen->base.base, nvc0->framebuffer.height, ms, locations);
+
+      for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
+         for (sample = 0; sample < ms; sample++) {
+            unsigned pixel_x = pixel % hw_grid_width;
+            unsigned pixel_y = pixel / hw_grid_width;
+            unsigned wi = pixel * ms + sample;
+            unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
+            ri = ri * ms + sample;
+            sample_locations[wi][0] = locations[ri] & 0xf;
+            sample_locations[wi][1] = 16 - (locations[ri] >> 4);
+         }
+      }
+   } else {
+      const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+      for (i = 0; i < 16; i++) {
+         sample_locations[i][0] = ptr[i % ms][0];
+         sample_locations[i][1] = ptr[i % ms][1];
+      }
+   }
+
+   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64);
+   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
+   for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+      for (pixel_x = 0; pixel_x < 2; pixel_x++) {
+         for (sample = 0; sample < ms; sample++) {
+            unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
+            unsigned read_index = pixel_y % grid_height * hw_grid_width;
+            uint8_t x, y;
+            read_index += pixel_x % grid_width;
+            read_index = read_index * ms + sample;
+            x = sample_locations[read_index][0];
+            y = sample_locations[read_index][1];
+            cb[write_index] = gm200_encode_cb_sample_location(x, y);
+         }
+      }
+   }
+   PUSH_DATAp(push, cb, 64);
+
+   for (i = 0; i < 16; i++) {
+      packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
+      packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
+   }
+
+   BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
+   PUSH_DATAp(push, packed_locations, 4);
+}
+
+static void
+nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nvc0_screen *screen = nvc0->screen;
+   unsigned i;
+
+   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
+   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
+   for (i = 0; i < ms; i++) {
+      float xy[2];
+      nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
+      PUSH_DATAf(push, xy[0]);
+      PUSH_DATAf(push, xy[1]);
+   }
+}
+
+static void
+validate_sample_locations(struct nvc0_context *nvc0)
+{
+   unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer);
+
+   if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
+      gm200_validate_sample_locations(nvc0, ms);
+   else
+      nvc0_validate_sample_locations(nvc0, ms);
+}
+
 static void
 nvc0_validate_fb(struct nvc0_context *nvc0)
 {
    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
    struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
-   struct nvc0_screen *screen = nvc0->screen;
-   unsigned i, ms;
+   unsigned i;
    unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
    unsigned nr_cbufs = fb->nr_cbufs;
    bool serialize = false;
@@ -197,33 +316,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
    PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
    IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
 
-   ms = 1 << ms_mode;
-   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
-   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
-   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
-   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
-   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
-   for (i = 0; i < ms; i++) {
-      float xy[2];
-      nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
-      PUSH_DATAf(push, xy[0]);
-      PUSH_DATAf(push, xy[1]);
-   }
-
-   if (screen->base.class_3d >= GM200_3D_CLASS) {
-      const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
-      uint32_t val[4] = {};
-
-      for (i = 0; i < 16; i++) {
-         val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
-         val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
-      }
-
-      BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
-      PUSH_DATAp(push, val, 4);
-   }
-
    if (serialize)
       IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
 
@@ -879,6 +971,8 @@ validate_list_3d[] = {
                                    NVC0_NEW_3D_TEVLPROG |
                                    NVC0_NEW_3D_GMTYPROG },
     { nvc0_validate_driverconst,   NVC0_NEW_3D_DRIVERCONST },
+    { validate_sample_locations,   NVC0_NEW_3D_SAMPLE_LOCATIONS |
+                                   NVC0_NEW_3D_FRAMEBUFFER},
 };
 
 bool
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 0f86c11b7f..39b1369758 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -754,6 +754,16 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers,
    }
 }
 
+static void
+gm200_evaluate_depth_buffer(struct pipe_context *pipe)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+   nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER);
+   IMMED_NVC0(push, SUBC_3D(0x11fc), 1);
+}
+
 
 /* =============================== BLIT CODE ===================================
  */
@@ -1720,4 +1730,6 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0)
    pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
    pipe->clear_texture = nv50_clear_texture;
    pipe->clear_buffer = nvc0_clear_buffer;
+   if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
+      pipe->evaluate_depth_buffer = gm200_evaluate_depth_buffer;
 }
-- 
2.14.3



More information about the mesa-dev mailing list