[Mesa-dev] [PATCH 4/5] nvc0: add support for programmable sample locations
Rhys Perry
pendingchaos02 at gmail.com
Fri May 4 12:09:12 UTC 2018
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
.../drivers/nouveau/codegen/nv50_ir_driver.h | 2 +
.../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 7 +
.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 91 ++++++++++--
.../nouveau/codegen/nv50_ir_lowering_nvc0.h | 2 +
src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 1 +
src/gallium/drivers/nouveau/nv50/nv50_resource.h | 1 +
src/gallium/drivers/nouveau/nvc0/nvc0_context.c | 35 +++++
src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 12 +-
src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 1 +
src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 3 +
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 3 +-
src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 11 ++
.../drivers/nouveau/nvc0/nvc0_state_validate.c | 155 +++++++++++++++++----
13 files changed, 283 insertions(+), 41 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 3d0782f86b..7c835ceab8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol
#define NVISA_GK104_CHIPSET 0xe0
#define NVISA_GK20A_CHIPSET 0xea
#define NVISA_GM107_CHIPSET 0x110
+#define NVISA_GM200_CHIPSET 0x120
struct nv50_ir_prog_info
{
@@ -145,6 +146,7 @@ struct nv50_ir_prog_info
bool persampleInvocation;
bool usesSampleMaskIn;
bool readsFramebuffer;
+ bool readsSampleLocations;
} fp;
struct {
uint32_t inputOffset; /* base address for user args */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 3c5bad05fe..d7844d7381 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn,
info->out[src.getIndex(0)].oread = 1;
}
}
+ if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
+ if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
+ info->prop.fp.readsSampleLocations = true;
+ }
if (src.getFile() != TGSI_FILE_INPUT)
return;
@@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
info->prop.fp.readsFramebuffer = true;
+ if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
+ info->prop.fp.readsSampleLocations = true;
+
if (insn.dstCount()) {
Instruction::DstRegister dst = insn.getDst(0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 29f674b451..f64439dbdc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
break;
case SV_SAMPLE_POS: {
- Value *off = new_LValue(func, FILE_GPR);
- ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
+ Value *sampleID = bld.getScratch();
+ ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0));
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
- bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
- bld.mkLoad(TYPE_F32,
- i->getDef(0),
- bld.mkSymbol(
- FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
- TYPE_U32, prog->driver->io.sampleInfoBase +
- 4 * sym->reg.data.sv.index),
- off);
+ Value *offset = calculateSampleOffset(sampleID);
+
+ assert(prog->driver->prop.fp.readsSampleLocations);
+
+ if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase),
+ offset);
+ bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0),
+ bld.mkImm(0x040c + sym->reg.data.sv.index * 16));
+ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0));
+ bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f/16.0f));
+ } else {
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase +
+ 4 * sym->reg.data.sv.index),
+ offset);
+ }
break;
}
case SV_SAMPLE_MASK: {
@@ -2832,6 +2848,58 @@ NVC0LoweringPass::handleOUT(Instruction *i)
return true;
}
+Value *
+NVC0LoweringPass::calculateSampleOffset(Value *sampleID)
+{
+ Value *offset = bld.getScratch();
+ if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
+ // Add sample ID
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0));
+
+ Symbol *xSym = bld.mkSysVal(SV_POSITION, 0);
+ Symbol *ySym = bld.mkSysVal(SV_POSITION, 1);
+ Value *coord = bld.getScratch();
+
+ // Add X coordinate
+ bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+ targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL);
+ bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+ ->rnd = ROUND_ZI;
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset);
+
+ // Add Y coordinate
+ bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
+ targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL);
+ bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
+ ->rnd = ROUND_ZI;
+ bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset);
+ } else {
+ bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3));
+ }
+ return offset;
+}
+
+// Handle programmable sample locations for GM20x+
+void
+NVC0LoweringPass::handlePIXLD(Instruction *i)
+{
+ if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET)
+ return;
+ if (targ->getChipset() < NVISA_GM200_CHIPSET)
+ return;
+
+ assert(prog->driver->prop.fp.readsSampleLocations);
+
+ bld.mkLoad(TYPE_F32,
+ i->getDef(0),
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase),
+ calculateSampleOffset(i->getSrc(0)));
+
+ bld.getBB()->remove(i);
+}
+
// Generate a binary predicate if an instruction is predicated by
// e.g. an f32 value.
void
@@ -2931,6 +2999,9 @@ NVC0LoweringPass::visit(Instruction *i)
case OP_BUFQ:
handleBUFQ(i);
break;
+ case OP_PIXLD:
+ handlePIXLD(i);
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 1b2b36d3cc..91771fbf7e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -116,6 +116,7 @@ protected:
void handleSharedATOMNVE4(Instruction *);
void handleLDST(Instruction *);
bool handleBUFQ(Instruction *);
+ void handlePIXLD(Instruction *);
void checkPredicate(Instruction *);
@@ -142,6 +143,7 @@ private:
void processSurfaceCoordsNVE4(TexInstruction *);
void processSurfaceCoordsNVC0(TexInstruction *);
void convertSurfaceFormat(TexInstruction *);
+ Value *calculateSampleOffset(Value *sampleID);
protected:
Value *loadTexHandle(Value *ptr, unsigned int slot);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index f2e304fde6..ac76a9446b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -209,6 +209,7 @@ const struct u_resource_vtbl nv50_miptree_vtbl =
static inline bool
nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
{
+ mt->multisampling = mt->base.base.nr_samples > 0;
switch (mt->base.base.nr_samples) {
case 8:
mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
index 5d03925b0d..ba4fc0c64c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -54,6 +54,7 @@ struct nv50_miptree {
uint8_t ms_x; /* log2 of number of samples in x/y dimension */
uint8_t ms_y;
uint8_t ms_mode;
+ bool multisampling; /* true if nr_samples > 0 on creation */
};
static inline struct nv50_miptree *
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index 2e4490b8d9..7b97e8806d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -352,6 +352,10 @@ static void
nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
float *);
+static void
+nvc0_context_get_sample_pixel_grid(struct pipe_context *, unsigned,
+ unsigned *, unsigned *);
+
struct pipe_context *
nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
{
@@ -403,6 +407,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
pipe->texture_barrier = nvc0_texture_barrier;
pipe->memory_barrier = nvc0_memory_barrier;
pipe->get_sample_position = nvc0_context_get_sample_position;
+ pipe->get_sample_pixel_grid = nvc0_context_get_sample_pixel_grid;
pipe->emit_string_marker = nvc0_emit_string_marker;
nouveau_context_init(&nvc0->base);
@@ -566,3 +571,33 @@ nvc0_context_get_sample_position(struct pipe_context *pipe,
xy[0] = ptr[sample_index][0] * 0.0625f;
xy[1] = ptr[sample_index][1] * 0.0625f;
}
+
+static void
+nvc0_context_get_sample_pixel_grid(struct pipe_context *pipe,
+ unsigned sample_count,
+ unsigned *width, unsigned *height)
+{
+ switch (sample_count) {
+ case 0:
+ case 1:
+ /* this could be 4x4, but the GL state tracker makes it difficult to
+ * create a 1x MSAA texture and smaller grids save CB space */
+ *width = 2;
+ *height = 4;
+ break;
+ case 2:
+ *width = 2;
+ *height = 4;
+ break;
+ case 4:
+ *width = 2;
+ *height = 2;
+ break;
+ case 8:
+ *width = 1;
+ *height = 2;
+ break;
+ default:
+ assert(0);
+ }
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 0729c88dff..5396bdfca9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -134,20 +134,21 @@
#define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
/* 8 sets of 32-bits integer pairs sample offsets */
#define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */
-#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
+/* 256 bytes, though only 64 bytes used before GM200 */
+#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 2 * 4 * 4)
/* draw parameters (index bais, base instance, drawid) */
#define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */
/* 32 user buffers, at 4 32-bits integers each */
-#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4
+#define NVC0_CB_AUX_BUF_INFO(i) 0x2a0 + (i) * 4 * 4
#define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4)
/* 8 surfaces, at 16 32-bits integers each */
-#define NVC0_CB_AUX_SU_INFO(i) 0x420 + (i) * 16 * 4
+#define NVC0_CB_AUX_SU_INFO(i) 0x4a0 + (i) * 16 * 4
#define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4)
/* 1 64-bits address and 1 32-bits sequence */
-#define NVC0_CB_AUX_MP_INFO 0x620
+#define NVC0_CB_AUX_MP_INFO 0x6a0
#define NVC0_CB_AUX_MP_SIZE 3 * 4
/* 512 64-byte blocks for bindless image handles */
-#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
+#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
#define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4)
/* 4 32-bits floats for the vertex runout, put at the end */
#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
@@ -229,6 +230,7 @@ struct nvc0_context {
struct list_head img_head;
struct pipe_framebuffer_state framebuffer;
+ struct pipe_sample_locations_state sample_locations;
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
struct pipe_poly_stipple stipple;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 7983c40308..4607d53576 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -139,6 +139,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
static inline bool
nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
{
+ mt->multisampling = mt->base.base.nr_samples > 0;
switch (mt->base.base.nr_samples) {
case 8:
mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 9520d984bb..57d98753f4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
}
}
}
+ /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
+ if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
+ fp->hdr[5] |= 0x30000000;
for (i = 0; i < info->numOutputs; ++i) {
if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index c6b1aa3ca8..7c2c581103 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -264,6 +264,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_POST_DEPTH_COVERAGE:
+ case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
+ return class_3d >= GM200_3D_CLASS;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
case PIPE_CAP_TGSI_BALLOT:
case PIPE_CAP_BINDLESS_TEXTURE:
@@ -308,7 +310,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_FENCE_SIGNAL:
case PIPE_CAP_CONSTBUF0_FLAGS:
case PIPE_CAP_PACKED_UNIFORMS:
- case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
return 0;
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 3e204f09a4..0351c5644d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -843,6 +843,16 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe,
nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
}
+static void
+nvc0_set_sample_locations_state(struct pipe_context *pipe,
+ const struct pipe_sample_locations_state *locations)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->sample_locations = *locations;
+ nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
+}
+
static void
nvc0_set_polygon_stipple(struct pipe_context *pipe,
const struct pipe_poly_stipple *stipple)
@@ -1393,6 +1403,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_min_samples = nvc0_set_min_samples;
pipe->set_constant_buffer = nvc0_set_constant_buffer;
pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
+ pipe->set_sample_locations_state = nvc0_set_sample_locations_state;
pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
pipe->set_scissor_states = nvc0_set_scissor_states;
pipe->set_viewport_states = nvc0_set_viewport_states;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 8e2192d3de..fee612990b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -71,6 +71,126 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
PUSH_DATA (push, 0); // base layer
}
+static uint32_t
+nv120_encode_cb_sample_location(uint8_t x, uint8_t y)
+{
+ static const uint8_t lut[] = {
+ 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
+ uint32_t result = 0;
+ /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
+ result |= lut[x]<<8 | lut[y]<<24;
+ /* fill in gaps with data in a representation for SV_SAMPLE_POS */
+ result |= x<<12 | y<<28;
+ return result;
+}
+
+static void
+nv120_validate_sample_locations(struct nvc0_context *nvc0,
+ unsigned ms, bool multisampling)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ unsigned grid_width, grid_height, hw_grid_width;
+ uint8_t sample_locations[16][2];
+ unsigned cb[64];
+ unsigned i, pixel, pixel_y, pixel_x, sample;
+
+ nvc0->base.pipe.get_sample_pixel_grid(&nvc0->base.pipe, ms,
+ &grid_width, &grid_height);
+
+ hw_grid_width = grid_width;
+ if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
+ hw_grid_width = 4;
+
+ if (!multisampling) {
+ memset(sample_locations, 8, sizeof(sample_locations));
+ } else if (nvc0->sample_locations.enabled) {
+ struct pipe_sample_locations_state locs_state = nvc0->sample_locations;
+ util_sample_locations_flip_y(&nvc0->base.pipe, &locs_state, &nvc0->framebuffer);
+
+ for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
+ for (sample = 0; sample < ms; sample++) {
+ unsigned pixel_x = pixel % hw_grid_width;
+ unsigned pixel_y = pixel / hw_grid_width;
+ unsigned wi = pixel * ms + sample;
+ unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
+ ri = ri * ms + sample;
+ sample_locations[wi][0] = locs_state.locations[ri] & 0xf;
+ sample_locations[wi][1] = 16 - (locs_state.locations[ri] >> 4);
+ }
+ }
+ } else {
+ const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+ for (i = 0; i < 16; i++) {
+ sample_locations[i][0] = ptr[i%ms][0];
+ sample_locations[i][1] = ptr[i%ms][1];
+ }
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 65);
+ PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
+ for (pixel_y = 0; pixel_y < 4; pixel_y++) {
+ for (pixel_x = 0; pixel_x < 2; pixel_x++) {
+ for (sample = 0; sample < ms; sample++) {
+ unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
+ unsigned read_index = (pixel_y % grid_height * hw_grid_width + pixel_x % grid_width) * ms + sample;
+ uint8_t x = sample_locations[read_index][0];
+ uint8_t y = sample_locations[read_index][1];
+ cb[write_index] = nv120_encode_cb_sample_location(x, y);
+ }
+ }
+ }
+ PUSH_DATAp(push, cb, 64);
+
+ if (screen->base.class_3d >= GM200_3D_CLASS) {
+ uint32_t val[4] = {};
+
+ for (i = 0; i < 16; i++) {
+ val[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
+ val[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
+ }
+
+ BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
+ PUSH_DATAp(push, val, 4);
+ }
+}
+
+static void
+nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ unsigned i;
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, NVC0_CB_AUX_SIZE);
+ PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
+ PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
+ for (i = 0; i < ms; i++) {
+ float xy[2];
+ nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
+ PUSH_DATAf(push, xy[0]);
+ PUSH_DATAf(push, xy[1]);
+ }
+
+ if (screen->base.class_3d >= GM200_3D_CLASS) {
+ const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+ uint32_t val[4] = {};
+
+ for (i = 0; i < 16; i++) {
+ val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
+ val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
+ }
+ }
+}
+
static void
nvc0_validate_fb(struct nvc0_context *nvc0)
{
@@ -81,6 +201,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
unsigned nr_cbufs = fb->nr_cbufs;
bool serialize = false;
+ bool multisampling = false;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB);
@@ -120,6 +241,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
PUSH_DATA(push, sf->base.u.tex.first_layer);
ms_mode = mt->ms_mode;
+ multisampling = mt->multisampling;
} else {
if (res->base.target == PIPE_BUFFER) {
PUSH_DATA(push, 262144);
@@ -170,6 +292,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
PUSH_DATA (push, sf->base.u.tex.first_layer);
ms_mode = mt->ms_mode;
+ multisampling = mt->multisampling;
if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
serialize = true;
@@ -188,8 +311,10 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
nvc0_fb_set_null_rt(push, 0, fb->layers);
- if (fb->samples > 1)
+ if (fb->samples > 1) {
ms_mode = ffs(fb->samples) - 1;
+ multisampling = true;
+ }
nr_cbufs = 1;
}
@@ -198,31 +323,11 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
ms = 1 << ms_mode;
- BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, NVC0_CB_AUX_SIZE);
- PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
- PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
- BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
- PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
- for (i = 0; i < ms; i++) {
- float xy[2];
- nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
- PUSH_DATAf(push, xy[0]);
- PUSH_DATAf(push, xy[1]);
- }
-
- if (screen->base.class_3d >= GM200_3D_CLASS) {
- const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
- uint32_t val[4] = {};
- for (i = 0; i < 16; i++) {
- val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
- val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
- }
-
- BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
- PUSH_DATAp(push, val, 4);
- }
+ if (screen->base.class_3d>=GM200_3D_CLASS)
+ nv120_validate_sample_locations(nvc0, ms, multisampling);
+ else
+ nvc0_validate_sample_locations(nvc0, ms);
if (serialize)
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
--
2.14.3
More information about the mesa-dev
mailing list