[Mesa-dev] [PATCH 4/6] i965: Implement ARB_shader_stencil_export (SKL+)
Ben Widawsky
benjamin.widawsky at intel.com
Tue Oct 20 14:29:39 PDT 2015
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
src/mesa/drivers/dri/i965/brw_compiler.h | 1 +
src/mesa/drivers/dri/i965/brw_defines.h | 5 +++--
src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++++++++
src/mesa/drivers/dri/i965/brw_fs.h | 2 ++
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 8 ++++++++
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 ++
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 24 +++++++++++++++++++++---
src/mesa/drivers/dri/i965/gen8_ps_state.c | 5 +++++
8 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 11c485d..4a02ce4 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -334,6 +334,7 @@ struct brw_wm_prog_data {
} binding_table;
uint8_t computed_depth_mode;
+ bool computed_stencil;
bool early_fragment_tests;
bool no_8;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 215f454..c67728b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1316,8 +1316,9 @@ enum fb_write_logical_args {
FB_WRITE_SRC0_ALPHA = 2,
FB_WRITE_SRC_DEPTH = 3, /* gl_FragDepth */
FB_WRITE_DST_DEPTH = 4, /* GEN4-5: passthrough from thread */
- FB_WRITE_OMASK = 5, /* Sample Mask (gl_SampleMask) */
- FB_WRITE_COMPONENTS = 6, /* REQUIRED */
+ FB_WRITE_SRC_STENCIL = 5, /* gl_FragStencilRefARB */
+ FB_WRITE_OMASK = 6, /* Sample Mask (gl_SampleMask) */
+ FB_WRITE_COMPONENTS = 7, /* REQUIRED */
};
#ifdef __cplusplus
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e2e3761..560eb91 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3344,6 +3344,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
const fs_reg &src0_alpha = inst->src[FB_WRITE_SRC0_ALPHA];
const fs_reg &src_depth = inst->src[FB_WRITE_SRC_DEPTH];
const fs_reg &dst_depth = inst->src[FB_WRITE_DST_DEPTH];
+ const fs_reg &src_stencil = inst->src[FB_WRITE_SRC_STENCIL];
fs_reg sample_mask = inst->src[FB_WRITE_OMASK];
const unsigned components =
inst->src[FB_WRITE_COMPONENTS].fixed_hw_reg.dw1.ud;
@@ -3436,6 +3437,13 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
length++;
}
+ if (src_stencil.file != BAD_FILE) {
+ assert(devinfo->gen >= 9);
+ assert(bld.dispatch_width() != 16);
+ sources[length] = src_stencil;
+ length++;
+ }
+
fs_inst *load;
if (devinfo->gen >= 7) {
/* Send from the GRF */
@@ -4700,6 +4708,10 @@ fs_visitor::setup_payload_gen6()
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
+
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+ source_stencil_to_render_target = true;
+ }
}
void
@@ -5208,6 +5220,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
prog_data->uses_omask =
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data->computed_depth_mode = computed_depth_mode(shader);
+ prog_data->computed_stencil =
+ shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 171338d..4f59d4b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -328,6 +328,7 @@ public:
int *push_constant_loc;
fs_reg frag_depth;
+ fs_reg frag_stencil;
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
unsigned output_components[VARYING_SLOT_MAX];
@@ -367,6 +368,7 @@ public:
} payload;
bool source_depth_to_render_target;
+ bool source_stencil_to_render_target;
bool runtime_check_aads_emit;
fs_reg pixel_x;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 13c495c..1a893c9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -317,6 +317,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
brw_imm_ud(inst->target));
}
+ /* Set computes stencil to render target */
+ if (prog_data->computed_stencil) {
+ brw_OR(p,
+ vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
+ vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+ brw_imm_ud(0x1 << 14));
+ }
+
implied_header = brw_null_reg();
} else {
implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 792663f..086060a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -113,6 +113,8 @@ fs_visitor::nir_setup_outputs()
}
} else if (var->data.location == FRAG_RESULT_DEPTH) {
this->frag_depth = reg;
+ } else if (var->data.location == FRAG_RESULT_STENCIL) {
+ this->frag_stencil = reg;
} else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
this->sample_mask = reg;
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f825fed..647d14b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -696,7 +696,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
const fs_reg dst_depth = (payload.dest_depth_reg ?
fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
fs_reg());
- fs_reg src_depth;
+ fs_reg src_depth, src_stencil;
if (source_depth_to_render_target) {
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
@@ -705,10 +705,17 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
+ /* Hand over gl_FragStencilRefARB */
+ if (source_stencil_to_render_target) {
+ assert(nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL));
+ src_stencil = frag_stencil;
+ }
+
const fs_reg sources[] = {
- color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
- fs_reg(components)
+ color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
+ sample_mask, fs_reg(components)
};
+
fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
sources, ARRAY_SIZE(sources));
@@ -739,6 +746,16 @@ fs_visitor::emit_fb_writes()
no16("Missing support for simd16 depth writes on gen6\n");
}
+ if (source_stencil_to_render_target) {
+ /* From the 'Render Target Write message' section of the docs:
+ * "Output Stencil is not supported with SIMD16 Render Target Write
+ * Messages."
+ *
+ * FINISHME: split 16 into 2 8s
+ */
+ no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
+ }
+
if (do_dual_src) {
const fs_builder abld = bld.annotate("FB dual-source write");
@@ -1105,6 +1122,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
memset(this->outputs, 0, sizeof(this->outputs));
memset(this->output_components, 0, sizeof(this->output_components));
this->source_depth_to_render_target = false;
+ this->source_stencil_to_render_target = false;
this->runtime_check_aads_emit = false;
this->first_non_payload_grf = 0;
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 8f05074..10e433b 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw,
!brw_color_buffer_write_enabled(brw))
dw1 |= GEN8_PSX_SHADER_HAS_UAV;
+ if (prog_data->computed_stencil) {
+ assert(brw->gen >= 9);
+ dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
+ }
+
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
OUT_BATCH(dw1);
--
2.6.1
More information about the mesa-dev
mailing list