[Mesa-dev] [PATCH 3/3] i965/fs: Implement FS_OPCODE_[UN]PACK_HALF_2x16_SPLIT[_XY] opcodes.

Kenneth Graunke kenneth at whitecape.org
Wed Jan 29 14:36:22 PST 2014


I'd neglected to port these to Broadwell.  Most of this code is copy
and pasted from Gen7, but instead of using F32TO16/F16TO32, we just
use MOV with HF register types.

Fixes fs-packHalf2x16 and fs-unpackHalf2x16 tests (both the ARB
extension and ES 3.0 variants).

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.h              |  7 +++
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 76 ++++++++++++++++++++++++-
 2 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9c5c13a..5c7f2ce 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -673,6 +673,13 @@ private:
    void generate_set_simd4x2_offset(fs_inst *ir,
                                     struct brw_reg dst,
                                     struct brw_reg offset);
+   void generate_pack_half_2x16_split(fs_inst *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg x,
+                                      struct brw_reg y);
+   void generate_unpack_half_2x16_split(fs_inst *inst,
+                                        struct brw_reg dst,
+                                        struct brw_reg src);
    void generate_discard_jump(fs_inst *ir);
 
    void patch_discard_jumps_to_fb_writes();
diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
index 6793ce0..43eaa35 100644
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -582,6 +582,78 @@ gen8_fs_generator::generate_set_simd4x2_offset(fs_inst *ir,
    MOV_RAW(retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
 }
 
+/**
+ * Change the register's data type from UD to HF, doubling the strides in order
+ * to compensate for halving the data type width.
+ */
+static struct brw_reg
+ud_reg_to_hf(struct brw_reg r)
+{
+   assert(r.type == BRW_REGISTER_TYPE_UD);
+   r.type = BRW_REGISTER_TYPE_HF;
+
+   /* The BRW_*_STRIDE enums are defined so that incrementing the field
+    * doubles the real stride.
+    */
+   if (r.hstride != 0)
+      ++r.hstride;
+   if (r.vstride != 0)
+      ++r.vstride;
+
+   return r;
+}
+
+void
+gen8_fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg x,
+                                                 struct brw_reg y)
+{
+   assert(dst.type == BRW_REGISTER_TYPE_UD);
+   assert(x.type == BRW_REGISTER_TYPE_F);
+   assert(y.type == BRW_REGISTER_TYPE_F);
+
+   struct brw_reg dst_hf = ud_reg_to_hf(dst);
+
+   /* Give each 32-bit channel of dst the form below , where "." means
+    * unchanged.
+    *   0x....hhhh
+    */
+   MOV(dst_hf, y);
+
+   /* Now the form:
+    *   0xhhhh0000
+    */
+   SHL(dst, dst, brw_imm_ud(16u));
+
+   /* And, finally the form of packHalf2x16's output:
+    *   0xhhhhllll
+    */
+   MOV(dst_hf, x);
+}
+
+void
+gen8_fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
+                                                   struct brw_reg dst,
+                                                   struct brw_reg src)
+{
+   assert(dst.type == BRW_REGISTER_TYPE_F);
+   assert(src.type == BRW_REGISTER_TYPE_UD);
+
+   struct brw_reg src_hf = ud_reg_to_hf(src);
+
+   /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
+    * For the Y case, we wish to access only the upper word; therefore
+    * a 16-bit subregister offset is needed.
+    */
+   assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
+          inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
+   if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
+      src_hf.subnr += 2;
+
+   MOV(dst, src_hf);
+}
+
 void
 gen8_fs_generator::generate_code(exec_list *instructions)
 {
@@ -965,12 +1037,12 @@ gen8_fs_generator::generate_code(exec_list *instructions)
          break;
 
       case FS_OPCODE_PACK_HALF_2x16_SPLIT:
-         assert(!"XXX: Missing Gen8 scalar support for PACK_HALF_2x16_SPLIT");
+         generate_pack_half_2x16_split(ir, dst, src[0], src[1]);
          break;
 
       case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
       case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
-         assert(!"XXX: Missing Gen8 scalar support for UNPACK_HALF_2x16_SPLIT");
+         generate_unpack_half_2x16_split(ir, dst, src[0]);
          break;
 
       case FS_OPCODE_PLACEHOLDER_HALT:
-- 
1.8.4.2



More information about the mesa-dev mailing list