[Mesa-dev] [PATCH 5/6] Implement the proper packing for the stencil payload

Ben Widawsky benjamin.widawsky at intel.com
Tue Oct 20 14:29:40 PDT 2015


This patch is split out for review. It will be squashed before pushing.
---
 src/mesa/drivers/dri/i965/brw_defines.h        |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp           |  6 +++-
 src/mesa/drivers/dri/i965/brw_fs.h             |  2 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 44 ++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_shader.cpp       |  2 ++
 5 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index c67728b..2c5cd7a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -915,6 +915,7 @@ enum opcode {
     */
    FS_OPCODE_FB_WRITE_LOGICAL,
 
+   FS_OPCODE_PACK_STENCIL_REF,
    FS_OPCODE_BLORP_FB_WRITE,
    FS_OPCODE_REP_FB_WRITE,
    SHADER_OPCODE_RCP,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 560eb91..c962043 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3440,7 +3440,11 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    if (src_stencil.file != BAD_FILE) {
       assert(devinfo->gen >= 9);
       assert(bld.dispatch_width() != 16);
-      sources[length] = src_stencil;
+
+      sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
+      bld.exec_all().annotate("FB write OS")
+         .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
+               retype(src_stencil, BRW_REGISTER_TYPE_UB));
       length++;
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 4f59d4b..e2bc469 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -419,6 +419,8 @@ private:
    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
+   void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst,
+                                     struct brw_reg src);
    void generate_barrier(fs_inst *inst, struct brw_reg src);
    void generate_blorp_fb_write(fs_inst *inst);
    void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 1a893c9..80cc6ad 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -415,6 +415,46 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
 }
 
 void
+fs_generator::generate_stencil_ref_packing(fs_inst *inst,
+                                           struct brw_reg dst,
+                                           struct brw_reg src)
+{
+   assert(dispatch_width == 8);
+   assert(devinfo->gen >= 9);
+
+   /* Stencil value updates are provided in 8 slots of 1 byte per slot.
+    * Presumably, in order to save memory bandwidth, the stencil reference
+    * values written from the FS need to be packed into 2 dwords (this makes
+    * sense because the stencil values are limited to 1 byte each and a SIMD8
+    * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.)
+    *
+    * The spec is confusing here because in the payload definition of MDP_RTW_S8
+    * (Message Data Payload for Render Target Writes with Stencil 8b) the
+    * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of
+    * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the
+    * packed values specified above and diagrammed below:
+    *
+    *     31                             0
+    *     --------------------------------
+    * DW  |                              |
+    * 2-7 |            IGNORED           |
+    *     |                              |
+    *     --------------------------------
+    * DW1 | STC   | STC   | STC   | STC  |
+    *     | slot7 | slot6 | slot5 | slot4|
+    *     --------------------------------
+    * DW0 | STC   | STC   | STC   | STC  |
+    *     | slot3 | slot2 | slot1 | slot0|
+    *     --------------------------------
+    */
+
+   src.width = BRW_WIDTH_1;
+   src.hstride = BRW_HORIZONTAL_STRIDE_0;
+   src.vstride = BRW_VERTICAL_STRIDE_4;
+   brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), retype(src, BRW_REGISTER_TYPE_UB));
+}
+
+void
 fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src)
 {
    brw_barrier(p, src);
@@ -2153,6 +2193,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 	 generate_barrier(inst, src[0]);
 	 break;
 
+      case FS_OPCODE_PACK_STENCIL_REF:
+         generate_stencil_ref_packing(inst, dst, src[0]);
+         break;
+
       default:
          unreachable("Unsupported opcode");
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 2324b56..c38e34e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -290,6 +290,8 @@ brw_instruction_name(enum opcode op)
       return "fb_write";
    case FS_OPCODE_FB_WRITE_LOGICAL:
       return "fb_write_logical";
+   case FS_OPCODE_PACK_STENCIL_REF:
+      return "fb write output stencil";
    case FS_OPCODE_BLORP_FB_WRITE:
       return "blorp_fb_write";
    case FS_OPCODE_REP_FB_WRITE:
-- 
2.6.1



More information about the mesa-dev mailing list