[Mesa-dev] [PATCH 4/6] i965: Implement ARB_shader_stencil_export (SKL+)

Ben Widawsky benjamin.widawsky at intel.com
Tue Oct 20 14:29:39 PDT 2015


Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_compiler.h       |  1 +
 src/mesa/drivers/dri/i965/brw_defines.h        |  5 +++--
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 14 ++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs.h             |  2 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  8 ++++++++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp       |  2 ++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 24 +++++++++++++++++++++---
 src/mesa/drivers/dri/i965/gen8_ps_state.c      |  5 +++++
 8 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 11c485d..4a02ce4 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -334,6 +334,7 @@ struct brw_wm_prog_data {
    } binding_table;
 
    uint8_t computed_depth_mode;
+   bool computed_stencil;
 
    bool early_fragment_tests;
    bool no_8;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 215f454..c67728b 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1316,8 +1316,9 @@ enum fb_write_logical_args {
    FB_WRITE_SRC0_ALPHA = 2,
    FB_WRITE_SRC_DEPTH = 3,   /* gl_FragDepth */
    FB_WRITE_DST_DEPTH = 4,   /* GEN4-5: passthrough from thread */
-   FB_WRITE_OMASK = 5,       /* Sample Mask (gl_SampleMask) */
-   FB_WRITE_COMPONENTS = 6,  /* REQUIRED */
+   FB_WRITE_SRC_STENCIL = 5, /* gl_FragStencilRefARB */
+   FB_WRITE_OMASK = 6,       /* Sample Mask (gl_SampleMask) */
+   FB_WRITE_COMPONENTS = 7,  /* REQUIRED */
 };
 
 #ifdef __cplusplus
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e2e3761..560eb91 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3344,6 +3344,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    const fs_reg &src0_alpha = inst->src[FB_WRITE_SRC0_ALPHA];
    const fs_reg &src_depth = inst->src[FB_WRITE_SRC_DEPTH];
    const fs_reg &dst_depth = inst->src[FB_WRITE_DST_DEPTH];
+   const fs_reg &src_stencil = inst->src[FB_WRITE_SRC_STENCIL];
    fs_reg sample_mask = inst->src[FB_WRITE_OMASK];
    const unsigned components =
       inst->src[FB_WRITE_COMPONENTS].fixed_hw_reg.dw1.ud;
@@ -3436,6 +3437,13 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
       length++;
    }
 
+   if (src_stencil.file != BAD_FILE) {
+      assert(devinfo->gen >= 9);
+      assert(bld.dispatch_width() != 16);
+      sources[length] = src_stencil;
+      length++;
+   }
+
    fs_inst *load;
    if (devinfo->gen >= 7) {
       /* Send from the GRF */
@@ -4700,6 +4708,10 @@ fs_visitor::setup_payload_gen6()
    if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
       source_depth_to_render_target = true;
    }
+
+   if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+      source_stencil_to_render_target = true;
+   }
 }
 
 void
@@ -5208,6 +5220,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
    prog_data->uses_omask =
       shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
    prog_data->computed_depth_mode = computed_depth_mode(shader);
+   prog_data->computed_stencil =
+      shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
 
    prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 171338d..4f59d4b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -328,6 +328,7 @@ public:
    int *push_constant_loc;
 
    fs_reg frag_depth;
+   fs_reg frag_stencil;
    fs_reg sample_mask;
    fs_reg outputs[VARYING_SLOT_MAX];
    unsigned output_components[VARYING_SLOT_MAX];
@@ -367,6 +368,7 @@ public:
    } payload;
 
    bool source_depth_to_render_target;
+   bool source_stencil_to_render_target;
    bool runtime_check_aads_emit;
 
    fs_reg pixel_x;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 13c495c..1a893c9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -317,6 +317,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
 		    brw_imm_ud(inst->target));
 	 }
 
+         /* Set computes stencil to render target */
+         if (prog_data->computed_stencil) {
+            brw_OR(p,
+                   vec1(retype(payload, BRW_REGISTER_TYPE_UD)),
+                   vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
+                   brw_imm_ud(0x1 << 14));
+         }
+
 	 implied_header = brw_null_reg();
       } else {
 	 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 792663f..086060a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -113,6 +113,8 @@ fs_visitor::nir_setup_outputs()
             }
          } else if (var->data.location == FRAG_RESULT_DEPTH) {
             this->frag_depth = reg;
+         } else if (var->data.location == FRAG_RESULT_STENCIL) {
+            this->frag_stencil = reg;
          } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
             this->sample_mask = reg;
          } else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f825fed..647d14b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -696,7 +696,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
    const fs_reg dst_depth = (payload.dest_depth_reg ?
                              fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) :
                              fs_reg());
-   fs_reg src_depth;
+   fs_reg src_depth, src_stencil;
 
    if (source_depth_to_render_target) {
       if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
@@ -705,10 +705,17 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
          src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
    }
 
+   /* Hand over gl_FragStencilRefARB */
+   if (source_stencil_to_render_target) {
+      assert(nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL));
+      src_stencil = frag_stencil;
+   }
+
    const fs_reg sources[] = {
-      color0, color1, src0_alpha, src_depth, dst_depth, sample_mask,
-      fs_reg(components)
+      color0, color1, src0_alpha, src_depth, dst_depth, src_stencil,
+      sample_mask, fs_reg(components)
    };
+
    fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(),
                              sources, ARRAY_SIZE(sources));
 
@@ -739,6 +746,16 @@ fs_visitor::emit_fb_writes()
       no16("Missing support for simd16 depth writes on gen6\n");
    }
 
+   if (source_stencil_to_render_target) {
+      /* From the 'Render Target Write message' section of the docs:
+       * "Output Stencil is not supported with SIMD16 Render Target Write
+       * Messages."
+       *
+       * FINISHME: split 16 into 2 8s
+       */
+      no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n");
+   }
+
    if (do_dual_src) {
       const fs_builder abld = bld.annotate("FB dual-source write");
 
@@ -1105,6 +1122,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
    memset(this->outputs, 0, sizeof(this->outputs));
    memset(this->output_components, 0, sizeof(this->output_components));
    this->source_depth_to_render_target = false;
+   this->source_stencil_to_render_target = false;
    this->runtime_check_aads_emit = false;
    this->first_non_payload_grf = 0;
    this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 8f05074..10e433b 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw,
        !brw_color_buffer_write_enabled(brw))
       dw1 |= GEN8_PSX_SHADER_HAS_UAV;
 
+   if (prog_data->computed_stencil) {
+      assert(brw->gen >= 9);
+      dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL;
+   }
+
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
    OUT_BATCH(dw1);
-- 
2.6.1



More information about the mesa-dev mailing list