[Mesa-dev] [PATCH 3/3] nv50: add support for PIPE_CAP_SAMPLE_SHADING
Ilia Mirkin
imirkin at alum.mit.edu
Sun Mar 30 17:52:25 PDT 2014
---
This doesn't actually pass any tests (well, sample-id passes for texture, but
not renderbuffer attachments).
src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 +
.../drivers/nouveau/codegen/nv50_ir_driver.h | 1 +
.../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 8 ++++
.../nouveau/codegen/nv50_ir_lowering_nv50.cpp | 12 +++++
.../drivers/nouveau/codegen/nv50_ir_print.cpp | 1 +
.../nouveau/codegen/nv50_ir_target_nv50.cpp | 2 +
src/gallium/drivers/nouveau/nv50/nv50_context.h | 7 ++-
src/gallium/drivers/nouveau/nv50/nv50_program.c | 5 +-
src/gallium/drivers/nouveau/nv50/nv50_program.h | 1 +
src/gallium/drivers/nouveau/nv50/nv50_screen.c | 3 +-
.../drivers/nouveau/nv50/nv50_shader_state.c | 11 +++++
src/gallium/drivers/nouveau/nv50/nv50_state.c | 9 ++++
.../drivers/nouveau/nv50/nv50_state_validate.c | 53 +++++++++++++++++++++-
13 files changed, 109 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 857980d..9030cc3 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -353,6 +353,7 @@ enum SVSemantic
SV_POINT_COORD,
SV_CLIP_DISTANCE,
SV_SAMPLE_INDEX,
+ SV_SAMPLE_POS,
SV_TESS_FACTOR,
SV_TESS_COORD,
SV_TID,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index f2f4ead..7d35239 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -187,6 +187,7 @@ struct nv50_ir_prog_info
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
uint16_t texBindBase; /* base address for tex handles (nve4) */
uint16_t suInfoBase; /* base address for surface info (nve4) */
+ uint16_t sampleInfoBase; /* base address for sample positions */
uint8_t msInfoCBSlot; /* cX[] used for multisample info */
uint16_t msInfoBase; /* base address for multisample info */
} io;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index ccddb9a..4f5665a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -345,6 +345,8 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID;
case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID;
case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID;
+ case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX;
+ case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS;
default:
assert(0);
return nv50_ir::SV_CLOCK;
@@ -954,6 +956,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
decl->Declaration.UsageMask << (si * 4);
info->io.genUserClip = -1;
break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->io.sampleMask = i;
+ break;
default:
break;
}
@@ -970,6 +975,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl)
case TGSI_SEMANTIC_VERTEXID:
info->io.vertexId = first;
break;
+ case TGSI_SEMANTIC_SAMPLEMASK:
+ info->io.sampleMask = first;
+ break;
default:
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 0908447..110c2b0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -1011,6 +1011,18 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
bld.mkMov(def, bld.mkImm(0));
}
break;
+ case SV_SAMPLE_POS: {
+ Value *off = new_LValue(func, FILE_ADDRESS);
+ bld.mkOp1(OP_RDSV, TYPE_U32, def, bld.mkSysVal(SV_SAMPLE_INDEX, 0));
+ bld.mkOp2(OP_SHL, TYPE_U32, off, def, bld.mkImm(3));
+ bld.mkLoad(TYPE_F32,
+ def,
+ bld.mkSymbol(
+ FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
+ TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx),
+ off);
+ break;
+ }
default:
bld.mkFetch(i->getDef(0), i->dType,
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), NULL);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index ae42d03..e31a393 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -252,6 +252,7 @@ static const char *SemanticStr[SV_LAST + 1] =
"POINT_COORD",
"CLIP_DISTANCE",
"SAMPLE_INDEX",
+ "SAMPLE_POS",
"TESS_FACTOR",
"TESS_COORD",
"TID",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index de07646..0b2f27a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -249,6 +249,8 @@ TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const
return 0x2 + 2 * sym->reg.data.sv.index;
case SV_TID:
return 0;
+ case SV_SAMPLE_POS:
+ return 0; /* sample position is handled differently */
default:
return sysvalLocation[sym->reg.data.sv.sv];
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 32ca591..f47a030 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -49,6 +49,7 @@
#define NV50_NEW_TEXTURES (1 << 19)
#define NV50_NEW_SAMPLERS (1 << 20)
#define NV50_NEW_STRMOUT (1 << 21)
+#define NV50_NEW_SAMPLE_SHADING (1 << 22)
#define NV50_NEW_CONTEXT (1 << 31)
#define NV50_BIND_FB 0
@@ -83,7 +84,10 @@
/* For each MS level (4), 8 sets of 32-bit integer pairs sample offsets */
#define NV50_CB_AUX_MS_OFFSET 0x880
#define NV50_CB_AUX_MS_SIZE (4 * 8 * 4 * 2)
-/* next spot: 0x980 */
+/* Sample position pairs for the current output MS level */
+#define NV50_CB_AUX_SAMPLE_OFFSET 0x980
+#define NV50_CB_AUX_SAMPLE_OFFSET_SIZE (4 * 8 * 2)
+/* next spot: 0x9c0 */
/* 4 32-bit floats for the vertex runout, put at the end */
#define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10)
@@ -170,6 +174,7 @@ struct nv50_context {
struct pipe_clip_state clip;
unsigned sample_mask;
+ float sample_shading;
boolean vbo_push_hint;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 0e06125..3b5efbe 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -234,8 +234,10 @@ nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info)
prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4);
}
- if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) {
info->out[info->io.sampleMask].slot[0] = prog->max_out++;
+ prog->fp.has_samplemask = 1;
+ }
if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
info->out[info->io.fragDepth].slot[2] = prog->max_out++;
@@ -336,6 +338,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
info->io.resInfoCBSlot = 15;
info->io.suInfoBase = NV50_CB_AUX_TEX_MS_OFFSET;
+ info->io.sampleInfoBase = NV50_CB_AUX_SAMPLE_OFFSET;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = NV50_CB_AUX_MS_OFFSET;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 87b0679..625dd77 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -84,6 +84,7 @@ struct nv50_program {
uint32_t flags[2]; /* 0x19a8, 196c */
uint32_t interp; /* 0x1988 */
uint32_t colors; /* 0x1904 */
+ uint8_t has_samplemask;
} fp;
struct {
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 26f4162..18c2d53 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -195,12 +195,13 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_TGSI_VS_LAYER:
case PIPE_CAP_TEXTURE_GATHER_SM5:
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
- case PIPE_CAP_SAMPLE_SHADING:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
return NV50_MAX_VIEWPORTS;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return (class_3d >= NVA3_3D_CLASS) ? 4 : 0;
+ case PIPE_CAP_SAMPLE_SHADING:
+ return class_3d >= NVA3_3D_CLASS;
default:
NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
return 0;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 28cff8b..c538d6a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -186,6 +186,17 @@ nv50_fragprog_validate(struct nv50_context *nv50)
PUSH_DATA (push, fp->fp.flags[1]);
BEGIN_NV04(push, NV50_3D(FP_START_ID), 1);
PUSH_DATA (push, fp->code_base);
+
+ if (nv50->screen->tesla->oclass >= NVA3_3D_CLASS) {
+ BEGIN_NV04(push, SUBC_3D(NVA3_3D_FP_MULTISAMPLE), 1);
+ if (nv50->sample_shading > 0)
+ PUSH_DATA(push,
+ NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE |
+ (NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK *
+ fp->fp.has_samplemask));
+ else
+ PUSH_DATA(push, 0);
+ }
}
void
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 647c01f..9ba0f3b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -864,6 +864,14 @@ nv50_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
nv50->dirty |= NV50_NEW_SAMPLE_MASK;
}
+static void
+nv50_set_sample_shading(struct pipe_context *pipe, float sample_shading)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+
+ nv50->sample_shading = sample_shading;
+ nv50->dirty |= NV50_NEW_SAMPLE_SHADING;
+}
static void
nv50_set_framebuffer_state(struct pipe_context *pipe,
@@ -1135,6 +1143,7 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->set_stencil_ref = nv50_set_stencil_ref;
pipe->set_clip_state = nv50_set_clip_state;
pipe->set_sample_mask = nv50_set_sample_mask;
+ pipe->set_sample_shading = nv50_set_sample_shading;
pipe->set_constant_buffer = nv50_set_constant_buffer;
pipe->set_framebuffer_state = nv50_set_framebuffer_state;
pipe->set_polygon_stipple = nv50_set_polygon_stipple;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 100d02d..8867928 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -125,6 +125,7 @@ nv50_validate_fb(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
PUSH_DATA (push, ms_mode);
+
/* Only need to initialize the first viewport, which is used for clears */
BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
PUSH_DATA (push, fb->width << 16);
@@ -359,6 +360,51 @@ nv50_validate_sample_mask(struct nv50_context *nv50)
}
static void
+nv50_validate_sample_shading(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+ int samples, ms = 0;
+ int i;
+
+ if (nv50->screen->tesla->oclass < NVA3_3D_CLASS)
+ return;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ struct nv50_miptree *mt;
+ if (!fb->cbufs[i])
+ continue;
+ mt = nv50_miptree(fb->cbufs[i]->texture);
+ ms = mt->ms_x + mt->ms_y;
+ break;
+ }
+ if (i == fb->nr_cbufs && fb->zsbuf) {
+ struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
+ ms = mt->ms_x + mt->ms_y;
+ }
+
+ samples = util_next_power_of_two(ceil(nv50->sample_shading * (1 << ms)));
+ if (samples > 1)
+ samples |= NVA3_3D_SAMPLE_SHADING_ENABLE;
+
+ BEGIN_NV04(push, SUBC_3D(NVA3_3D_SAMPLE_SHADING), 1);
+ PUSH_DATA (push, samples);
+
+ if (nv50->sample_shading == 0.0 || !(nv50->dirty & NV50_NEW_FRAMEBUFFER))
+ return;
+
+ BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
+ PUSH_DATA (push, (NV50_CB_AUX_SAMPLE_OFFSET << (8 - 2)) | NV50_CB_AUX);
+ BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 2 << ms);
+ for (i = 0; i < (1 << ms); i++) {
+ float xy[2];
+ nv50->base.pipe.get_sample_position(&nv50->base.pipe, 1 << ms, i, xy);
+ PUSH_DATAf(push, xy[0]);
+ PUSH_DATAf(push, xy[1]);
+ }
+}
+
+static void
nv50_switch_pipe_context(struct nv50_context *ctx_to)
{
struct nv50_context *ctx_from = ctx_to->screen->cur_ctx;
@@ -414,7 +460,8 @@ static struct state_validate {
{ nv50_validate_viewport, NV50_NEW_VIEWPORT },
{ nv50_vertprog_validate, NV50_NEW_VERTPROG },
{ nv50_gmtyprog_validate, NV50_NEW_GMTYPROG },
- { nv50_fragprog_validate, NV50_NEW_FRAGPROG },
+ { nv50_fragprog_validate, NV50_NEW_FRAGPROG |
+ NV50_NEW_SAMPLE_SHADING },
{ nv50_fp_linkage_validate, NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
NV50_NEW_GMTYPROG | NV50_NEW_RASTERIZER },
{ nv50_gp_linkage_validate, NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
@@ -427,7 +474,9 @@ static struct state_validate {
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
- { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
+ { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+ { nv50_validate_sample_shading, NV50_NEW_SAMPLE_SHADING |
+ NV50_NEW_FRAMEBUFFER },
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
--
1.8.3.2
More information about the mesa-dev
mailing list