[Mesa-dev] [PATCH v2 4/5] nvc0: add support for programmable sample locations

Ilia Mirkin imirkin at alum.mit.edu
Mon May 28 20:05:39 UTC 2018


ARB_sample_locaitons has all this stuff about a resolve of some sort
when you switch around the locations. I don't see anything here about
that. Thoughts?

Also some more specific comments inline:

On Thu, May 10, 2018 at 12:28 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_driver.h       |   2 +
>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |   7 +
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      |  91 +++++++++--
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |   2 +
>  src/gallium/drivers/nouveau/nv50/nv50_miptree.c    |   1 +
>  src/gallium/drivers/nouveau/nv50/nv50_resource.h   |   1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  15 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c    |   1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c    |   3 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  33 +++-
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c      |  17 +-
>  .../drivers/nouveau/nvc0/nvc0_state_validate.c     | 174 +++++++++++++++++----
>  12 files changed, 301 insertions(+), 46 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 3d0782f86b..7c835ceab8 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol
>  #define NVISA_GK104_CHIPSET    0xe0
>  #define NVISA_GK20A_CHIPSET    0xea
>  #define NVISA_GM107_CHIPSET    0x110
> +#define NVISA_GM200_CHIPSET    0x120
>
>  struct nv50_ir_prog_info
>  {
> @@ -145,6 +146,7 @@ struct nv50_ir_prog_info
>           bool persampleInvocation;
>           bool usesSampleMaskIn;
>           bool readsFramebuffer;
> +         bool readsSampleLocations;
>        } fp;
>        struct {
>           uint32_t inputOffset; /* base address for user args */
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 3c5bad05fe..d7844d7381 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn,
>           info->out[src.getIndex(0)].oread = 1;
>        }
>     }
> +   if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
> +      if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
> +         info->prop.fp.readsSampleLocations = true;
> +   }
>     if (src.getFile() != TGSI_FILE_INPUT)
>        return;
>
> @@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
>     if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
>        info->prop.fp.readsFramebuffer = true;
>
> +   if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
> +      info->prop.fp.readsSampleLocations = true;
> +
>     if (insn.dstCount()) {
>        Instruction::DstRegister dst = insn.getDst(0);
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 29f674b451..5f5298777e 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>        ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
>        break;
>     case SV_SAMPLE_POS: {
> -      Value *off = new_LValue(func, FILE_GPR);
> -      ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
> +      Value *sampleID = bld.getScratch();
> +      ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0));
>        ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
> -      bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
> -      bld.mkLoad(TYPE_F32,
> -                 i->getDef(0),
> -                 bld.mkSymbol(
> -                       FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> -                       TYPE_U32, prog->driver->io.sampleInfoBase +
> -                       4 * sym->reg.data.sv.index),
> -                 off);
> +      Value *offset = calculateSampleOffset(sampleID);
> +
> +      assert(prog->driver->prop.fp.readsSampleLocations);
> +
> +      if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
> +         bld.mkLoad(TYPE_F32,
> +                    i->getDef(0),
> +                    bld.mkSymbol(
> +                          FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> +                          TYPE_U32, prog->driver->io.sampleInfoBase),
> +                    offset);
> +         bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0),
> +                   bld.mkImm(0x040c + sym->reg.data.sv.index * 16));
> +         bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0));
> +         bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f / 16.0f));
> +      } else {
> +         bld.mkLoad(TYPE_F32,
> +                    i->getDef(0),
> +                    bld.mkSymbol(
> +                          FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> +                          TYPE_U32, prog->driver->io.sampleInfoBase +
> +                          4 * sym->reg.data.sv.index),
> +                    offset);
> +      }
>        break;
>     }
>     case SV_SAMPLE_MASK: {
> @@ -2832,6 +2848,58 @@ NVC0LoweringPass::handleOUT(Instruction *i)
>     return true;
>  }
>
> +Value *
> +NVC0LoweringPass::calculateSampleOffset(Value *sampleID)
> +{
> +   Value *offset = bld.getScratch();
> +   if (targ->getChipset() >= NVISA_GM200_CHIPSET) {

This all needs a lot more comments. I think I sorta get what this is
doing (i.e. figuring where it is in the grid), but all the bit
encoding is confusing. Having some comments here about what's in the
data, and for the forgetful, what each INSBF will do, will be helpful.

> +      // Add sample ID
> +      bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0));
> +
> +      Symbol *xSym = bld.mkSysVal(SV_POSITION, 0);
> +      Symbol *ySym = bld.mkSysVal(SV_POSITION, 1);
> +      Value *coord = bld.getScratch();
> +
> +      // Add X coordinate
> +      bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
> +                   targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL);
> +      bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
> +         ->rnd = ROUND_ZI;
> +      bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset);
> +
> +      // Add Y coordinate
> +      bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
> +                   targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL);
> +      bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
> +         ->rnd = ROUND_ZI;
> +      bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset);
> +   } else {
> +      bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3));
> +   }
> +   return offset;
> +}
> +
> +// Handle programmable sample locations for GM20x+
> +void
> +NVC0LoweringPass::handlePIXLD(Instruction *i)
> +{
> +   if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET)
> +      return;
> +   if (targ->getChipset() < NVISA_GM200_CHIPSET)
> +      return;
> +
> +   assert(prog->driver->prop.fp.readsSampleLocations);
> +
> +   bld.mkLoad(TYPE_F32,
> +              i->getDef(0),
> +              bld.mkSymbol(
> +                    FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> +                    TYPE_U32, prog->driver->io.sampleInfoBase),
> +              calculateSampleOffset(i->getSrc(0)));
> +
> +   bld.getBB()->remove(i);
> +}
> +
>  // Generate a binary predicate if an instruction is predicated by
>  // e.g. an f32 value.
>  void
> @@ -2931,6 +2999,9 @@ NVC0LoweringPass::visit(Instruction *i)
>     case OP_BUFQ:
>        handleBUFQ(i);
>        break;
> +   case OP_PIXLD:
> +      handlePIXLD(i);
> +      break;
>     default:
>        break;
>     }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index 1b2b36d3cc..91771fbf7e 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -116,6 +116,7 @@ protected:
>     void handleSharedATOMNVE4(Instruction *);
>     void handleLDST(Instruction *);
>     bool handleBUFQ(Instruction *);
> +   void handlePIXLD(Instruction *);
>
>     void checkPredicate(Instruction *);
>
> @@ -142,6 +143,7 @@ private:
>     void processSurfaceCoordsNVE4(TexInstruction *);
>     void processSurfaceCoordsNVC0(TexInstruction *);
>     void convertSurfaceFormat(TexInstruction *);
> +   Value *calculateSampleOffset(Value *sampleID);
>
>  protected:
>     Value *loadTexHandle(Value *ptr, unsigned int slot);
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
> index f2e304fde6..ac76a9446b 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
> @@ -209,6 +209,7 @@ const struct u_resource_vtbl nv50_miptree_vtbl =
>  static inline bool
>  nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
>  {
> +   mt->multisampling = mt->base.base.nr_samples > 0;
>     switch (mt->base.base.nr_samples) {
>     case 8:
>        mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8;
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
> index 5d03925b0d..ba4fc0c64c 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
> @@ -54,6 +54,7 @@ struct nv50_miptree {
>     uint8_t ms_x;      /* log2 of number of samples in x/y dimension */
>     uint8_t ms_y;
>     uint8_t ms_mode;
> +   bool multisampling; /* true if nr_samples > 0 on creation */

Isn't ms_mode != 0 sufficient? I think we've killed MS=1 stuff at the
st/mesa level.

>  };
>
>  static inline struct nv50_miptree *
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 0729c88dff..77237a3c0a 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -62,6 +62,8 @@
>  #define NVC0_NEW_3D_DRIVERCONST  (1 << 27)
>  #define NVC0_NEW_3D_WINDOW_RECTS (1 << 28)
>
> +#define NVC0_NEW_3D_SAMPLE_LOCATIONS (1 << 29)
> +
>  #define NVC0_NEW_CP_PROGRAM   (1 << 0)
>  #define NVC0_NEW_CP_SURFACES  (1 << 1)
>  #define NVC0_NEW_CP_TEXTURES  (1 << 2)
> @@ -134,20 +136,21 @@
>  #define NVC0_CB_AUX_UBO_SIZE        ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
>  /* 8 sets of 32-bits integer pairs sample offsets */
>  #define NVC0_CB_AUX_SAMPLE_INFO     0x1a0 /* FP */
> -#define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 4 * 2)
> +/* 256 bytes, though only 64 bytes used before GM200 */
> +#define NVC0_CB_AUX_SAMPLE_SIZE     (8 * 2 * 4 * 4)
>  /* draw parameters (index bais, base instance, drawid) */
>  #define NVC0_CB_AUX_DRAW_INFO       0x1a0 /* VP */
>  /* 32 user buffers, at 4 32-bits integers each */
> -#define NVC0_CB_AUX_BUF_INFO(i)     0x220 + (i) * 4 * 4
> +#define NVC0_CB_AUX_BUF_INFO(i)     0x2a0 + (i) * 4 * 4
>  #define NVC0_CB_AUX_BUF_SIZE        (NVC0_MAX_BUFFERS * 4 * 4)
>  /* 8 surfaces, at 16 32-bits integers each */
> -#define NVC0_CB_AUX_SU_INFO(i)      0x420 + (i) * 16 * 4
> +#define NVC0_CB_AUX_SU_INFO(i)      0x4a0 + (i) * 16 * 4
>  #define NVC0_CB_AUX_SU_SIZE         (NVC0_MAX_IMAGES * 16 * 4)
>  /* 1 64-bits address and 1 32-bits sequence */
> -#define NVC0_CB_AUX_MP_INFO         0x620
> +#define NVC0_CB_AUX_MP_INFO         0x6a0
>  #define NVC0_CB_AUX_MP_SIZE         3 * 4
>  /* 512 64-byte blocks for bindless image handles */
> -#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
> +#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
>  #define NVC0_CB_AUX_BINDLESS_SIZE   (NVE4_IMG_MAX_HANDLES * 16 * 4)
>  /* 4 32-bits floats for the vertex runout, put at the end */
>  #define NVC0_CB_AUX_RUNOUT_INFO     NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
> @@ -229,6 +232,8 @@ struct nvc0_context {
>     struct list_head img_head;
>
>     struct pipe_framebuffer_state framebuffer;
> +   bool sample_locations_enabled;
> +   uint8_t sample_locations[2 * 4 * 8];
>     struct pipe_blend_color blend_colour;
>     struct pipe_stencil_ref stencil_ref;
>     struct pipe_poly_stipple stipple;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
> index 7983c40308..4607d53576 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
> @@ -139,6 +139,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
>  static inline bool
>  nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
>  {
> +   mt->multisampling = mt->base.base.nr_samples > 0;
>     switch (mt->base.base.nr_samples) {
>     case 8:
>        mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index 9520d984bb..57d98753f4 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
>           }
>        }
>     }
> +   /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
> +   if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
> +      fp->hdr[5] |= 0x30000000;
>
>     for (i = 0; i < info->numOutputs; ++i) {
>        if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index 45ac7dd626..53108cf44c 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -269,6 +269,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>     case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
>     case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
>     case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
> +   case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
>        return class_3d >= GM200_3D_CLASS;
>     case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
>        return class_3d >= GP100_3D_CLASS;
> @@ -317,7 +318,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>     case PIPE_CAP_CONSTBUF0_FLAGS:
>     case PIPE_CAP_PACKED_UNIFORMS:
>     case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
> -   case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
>        return 0;
>
>     case PIPE_CAP_VENDOR_ID:
> @@ -543,6 +543,36 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
>  #undef RET
>  }
>
> +static void
> +nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen,
> +                                  unsigned sample_count,
> +                                  unsigned *width, unsigned *height)
> +{
> +   switch (sample_count) {
> +   case 0:
> +   case 1:
> +      /* this could be 4x4, but the GL state tracker makes it difficult to
> +       * create a 1x MSAA texture and smaller grids save CB space */
> +      *width = 2;
> +      *height = 4;
> +      break;
> +   case 2:
> +      *width = 2;
> +      *height = 4;
> +      break;
> +   case 4:
> +      *width = 2;
> +      *height = 2;
> +      break;
> +   case 8:
> +      *width = 1;
> +      *height = 2;
> +      break;
> +   default:
> +      assert(0);
> +   }
> +}
> +
>  static void
>  nvc0_screen_destroy(struct pipe_screen *pscreen)
>  {
> @@ -869,6 +899,7 @@ nvc0_screen_create(struct nouveau_device *dev)
>     pscreen->get_param = nvc0_screen_get_param;
>     pscreen->get_shader_param = nvc0_screen_get_shader_param;
>     pscreen->get_paramf = nvc0_screen_get_paramf;
> +   pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid;
>     pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
>     pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info;
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> index 37dbbe66c7..d9ee62523b 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> @@ -854,7 +854,21 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe,
>
>      util_copy_framebuffer_state(&nvc0->framebuffer, fb);
>
> -    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
> +    nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS;
> +}
> +
> +static void
> +nvc0_set_sample_locations(struct pipe_context *pipe,
> +                          size_t size, const uint8_t *locations)
> +{
> +    struct nvc0_context *nvc0 = nvc0_context(pipe);
> +
> +    nvc0->sample_locations_enabled = size && locations;
> +    if (size > sizeof(nvc0->sample_locations))
> +       size = sizeof(nvc0->sample_locations);
> +    memcpy(nvc0->sample_locations, locations, size);
> +
> +    nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_LOCATIONS;
>  }
>
>  static void
> @@ -1407,6 +1421,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
>     pipe->set_min_samples = nvc0_set_min_samples;
>     pipe->set_constant_buffer = nvc0_set_constant_buffer;
>     pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
> +   pipe->set_sample_locations = nvc0_set_sample_locations;
>     pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
>     pipe->set_scissor_states = nvc0_set_scissor_states;
>     pipe->set_viewport_states = nvc0_set_viewport_states;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index 8e2192d3de..ccfe814658 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -71,13 +71,154 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
>     PUSH_DATA (push, 0);      // base layer
>  }
>
> +static uint32_t
> +nv120_encode_cb_sample_location(uint8_t x, uint8_t y)

gm200_... (and same below)

> +{
> +   static const uint8_t lut[] = {
> +      0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
> +      0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
> +   uint32_t result = 0;
> +   /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
> +   result |= lut[x] << 8 | lut[y] << 24;
> +   /* fill in gaps with data in a representation for SV_SAMPLE_POS */
> +   result |= x << 12 | y << 28;
> +   return result;
> +}
> +
> +static void
> +nv120_validate_sample_locations(struct nvc0_context *nvc0,
> +                                unsigned ms, bool multisampling)
> +{
> +   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> +   struct nvc0_screen *screen = nvc0->screen;
> +   unsigned grid_width, grid_height, hw_grid_width;
> +   uint8_t sample_locations[16][2];
> +   unsigned cb[64];
> +   unsigned i, pixel, pixel_y, pixel_x, sample;
> +
> +   screen->base.base.get_sample_pixel_grid(
> +      &screen->base.base, ms, &grid_width, &grid_height);
> +
> +   hw_grid_width = grid_width;
> +   if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
> +      hw_grid_width = 4;
> +
> +   if (!multisampling) {
> +      memset(sample_locations, 8, sizeof(sample_locations));
> +   } else if (nvc0->sample_locations_enabled) {
> +      uint8_t locations[2 * 4 * 8];
> +      memcpy(locations, nvc0->sample_locations, sizeof(locations));
> +      util_sample_locations_flip_y(
> +         &screen->base.base, nvc0->framebuffer.height, ms, locations);
> +
> +      for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
> +         for (sample = 0; sample < ms; sample++) {
> +            unsigned pixel_x = pixel % hw_grid_width;
> +            unsigned pixel_y = pixel / hw_grid_width;
> +            unsigned wi = pixel * ms + sample;
> +            unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
> +            ri = ri * ms + sample;
> +            sample_locations[wi][0] = locations[ri] & 0xf;
> +            sample_locations[wi][1] = 16 - (locations[ri] >> 4);
> +         }
> +      }
> +   } else {
> +      const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
> +      for (i = 0; i < 16; i++) {
> +         sample_locations[i][0] = ptr[i % ms][0];
> +         sample_locations[i][1] = ptr[i % ms][1];
> +      }
> +   }
> +
> +   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> +   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
> +   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> +   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> +   BEGIN_1IC0(push, NVC0_3D(CB_POS), 65);

1 + 64

> +   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
> +   for (pixel_y = 0; pixel_y < 4; pixel_y++) {
> +      for (pixel_x = 0; pixel_x < 2; pixel_x++) {
> +         for (sample = 0; sample < ms; sample++) {
> +            unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
> +            unsigned read_index = pixel_y % grid_height * hw_grid_width;
> +            read_index += pixel_x % grid_width;
> +            read_index = read_index * ms + sample;
> +            uint8_t x = sample_locations[read_index][0];
> +            uint8_t y = sample_locations[read_index][1];
> +            cb[write_index] = nv120_encode_cb_sample_location(x, y);
> +         }
> +      }
> +   }
> +   PUSH_DATAp(push, cb, 64);
> +
> +   if (screen->base.class_3d >= GM200_3D_CLASS) {
> +      uint32_t val[4] = {};
> +
> +      for (i = 0; i < 16; i++) {
> +         val[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
> +         val[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
> +      }
> +
> +      BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
> +      PUSH_DATAp(push, val, 4);
> +   }
> +}
> +
> +static void
> +nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
> +{
> +   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> +   struct nvc0_screen *screen = nvc0->screen;
> +   unsigned i;
> +
> +   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> +   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
> +   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> +   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> +   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
> +   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
> +   for (i = 0; i < ms; i++) {
> +      float xy[2];
> +      nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
> +      PUSH_DATAf(push, xy[0]);
> +      PUSH_DATAf(push, xy[1]);
> +   }
> +}
> +
> +static void
> +validate_sample_locations(struct nvc0_context *nvc0)
> +{
> +   struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
> +   unsigned ms = util_framebuffer_get_num_samples(fb);
> +   bool multisampling = false;
> +
> +   if (fb->zsbuf)
> +      multisampling = nv50_miptree(fb->zsbuf->texture)->multisampling;
> +   else
> +   if (fb->nr_cbufs) {
> +      unsigned i;
> +      for (i = 0; i < fb->nr_cbufs; ++i) {
> +         struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
> +
> +         if (sf && sf->base.texture->target != PIPE_BUFFER)
> +            multisampling = nv50_miptree(sf->base.texture)->multisampling;
> +      }
> +   } else
> +   if (fb->samples > 1)
> +      multisampling = true;
> +
> +   if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
> +      nv120_validate_sample_locations(nvc0, ms, multisampling);
> +   else
> +      nvc0_validate_sample_locations(nvc0, ms);
> +}
> +
>  static void
>  nvc0_validate_fb(struct nvc0_context *nvc0)
>  {
>     struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>     struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
> -   struct nvc0_screen *screen = nvc0->screen;
> -   unsigned i, ms;
> +   unsigned i;
>     unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
>     unsigned nr_cbufs = fb->nr_cbufs;
>     bool serialize = false;
> @@ -197,33 +338,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
>     PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
>     IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
>
> -   ms = 1 << ms_mode;
> -   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> -   PUSH_DATA (push, NVC0_CB_AUX_SIZE);
> -   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> -   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> -   BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
> -   PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
> -   for (i = 0; i < ms; i++) {
> -      float xy[2];
> -      nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
> -      PUSH_DATAf(push, xy[0]);
> -      PUSH_DATAf(push, xy[1]);
> -   }
> -
> -   if (screen->base.class_3d >= GM200_3D_CLASS) {
> -      const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
> -      uint32_t val[4] = {};
> -
> -      for (i = 0; i < 16; i++) {
> -         val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
> -         val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
> -      }
> -
> -      BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
> -      PUSH_DATAp(push, val, 4);
> -   }
> -
>     if (serialize)
>        IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
>
> @@ -879,6 +993,8 @@ validate_list_3d[] = {
>                                     NVC0_NEW_3D_TEVLPROG |
>                                     NVC0_NEW_3D_GMTYPROG },
>      { nvc0_validate_driverconst,   NVC0_NEW_3D_DRIVERCONST },
> +    { validate_sample_locations,   NVC0_NEW_3D_SAMPLE_LOCATIONS |
> +                                   NVC0_NEW_3D_FRAMEBUFFER},
>  };
>
>  bool
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list