[Mesa-dev] [PATCH v2 054/103] i965/vec4: translate 64-bit swizzles to 32-bit

Tue Oct 11 09:01:58 UTC 2016

The hardware can only operate with 32-bit swizzles, which is a rather
limiting restriction. However, the idea is not to expose this to the
optimization passes, which would be a mess to deal with. Instead, we let
the bulk of the vec4 backend ignore this fact and we fix the swizzles right
at codegen time.

At the moment the pass only needs to handle single value swizzles thanks to
the scalarization pass that runs before it.

Notice that this only works for X/Y swizzles. We will add support for Z/W
swizzles in the next patch, since they need a bit more work.

v2 (Sam):
  - Do not expand swizzle of 64-bit immediate values.

v3:
  - Do this after translation to hardware registers instead of doing it right
    before so we don't need the force_vstride0 flag (Curro).
  - Squashed patch that included FIXED_GRF in the list of register files that
    need this translation (Iago).
  - Remove swizzle assignments for VGRF and UNIFORM files in
    convert_to_hw_regs(), they will be set by apply_logical_swizzle() (Iago).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias at igalia.com>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 49 +++++++++++++++++++++++++++++++---
 src/mesa/drivers/dri/i965/brw_vec4.h   |  2 ++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b15fcee..b37dd59 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1891,7 +1891,6 @@ vec4_visitor::convert_to_hw_regs()
             unsigned width = REG_SIZE / 2 / MAX2(4, type_size);
             reg = byte_offset(brw_vecn_grf(width, src.nr, 0), src.offset);
             reg.type = src.type;
-            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
             break;
@@ -1905,7 +1904,6 @@ vec4_visitor::convert_to_hw_regs()
                                      src.offset),
                          0, width, 1);
             reg.type = src.type;
-            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
 
@@ -1914,8 +1912,13 @@ vec4_visitor::convert_to_hw_regs()
             break;
          }
 
-         case ARF:
          case FIXED_GRF:
+            if (type_sz(src.type) == 8) {
+               reg = src.as_brw_reg();
+               break;
+            }
+            /* fallthrough */
+         case ARF:
          case IMM:
             continue;
 
@@ -1929,6 +1932,7 @@ vec4_visitor::convert_to_hw_regs()
             unreachable("not reached");
          }
 
+         apply_logical_swizzle(&reg, inst, i);
          src = reg;
       }
 
@@ -2226,6 +2230,45 @@ vec4_visitor::scalarize_df()
    return progress;
 }
 
+/* The align16 hardware can only do 32-bit swizzle channels, so we need to
+ * translate the logical 64-bit swizzle channels that we use in the Vec4 IR
+ * to 32-bit swizzle channels in hardware registers.
+ *
+ * @inst and @arg identify the original vec4 IR source operand we need to
+ * translate the swizzle for and @hw_reg is the hardware register where we
+ * will write the hardware swizzle to use.
+ *
+ * This pass assumes that Align16/DF instructions have been fully scalarized
+ * previously so there is just one 64-bit swizzle channel to deal with for any
+ * given Vec4 IR source.
+ */
+void
+vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
+                                    vec4_instruction *inst, int arg)
+{
+   src_reg reg = inst->src[arg];
+
+   if (reg.file == BAD_FILE || reg.file == BRW_IMMEDIATE_VALUE)
+      return;
+
+   /* If this is not a 64-bit operand or this is a scalar instruction we don't
+    * need to do anything about the swizzles.
+    */
+   if(type_sz(reg.type) < 8 || is_align1_df(inst)) {
+      hw_reg->swizzle = reg.swizzle;
+      return;
+   }
+
+   /* Otherwise we should have scalarized the instruction, so take the single
+    * 64-bit logical swizzle channel and translate it to 32-bit
+    */
+   assert(brw_is_single_value_swizzle(reg.swizzle));
+
+   unsigned swizzle = BRW_GET_SWZ(reg.swizzle, 0);
+   hw_reg->swizzle = BRW_SWIZZLE4(swizzle * 2, swizzle * 2 + 1,
+                                  swizzle * 2, swizzle * 2 + 1);
+}
+
 bool
 vec4_visitor::run()
 {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 03c7345..7e51c41 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -164,6 +164,8 @@ public:
 
    bool lower_simd_width();
    bool scalarize_df();
+   void apply_logical_swizzle(struct brw_reg *hw_reg,
+                              vec4_instruction *inst, int arg);
 
    vec4_instruction *emit(vec4_instruction *inst);
 
-- 
2.7.4