[Mesa-dev] [PATCH 12/17] intel/compiler/fs: Implement ddy without using align16 for Gen11+

Matt Turner mattst88 at gmail.com
Wed Feb 21 05:15:19 UTC 2018


Align16 is no more. We previously generated an align16 ADD instruction
to calculate DDY:

   add(8) g11<1>F  -g10<4>.xyxyF  g10<4>.zwzwF  { align16 1Q };

Without align16, we now implement it as two align1 instructions:

   add(4) g11<2>F   -g10<4,2,0>F    g10.2<4,2,0>F  { align1 1N };
   add(4) g11.1<2>F -g10.1<4,2,0>F  g10.3<4,2,0>F  { align1 1N };
---
 src/intel/compiler/brw_fs_generator.cpp | 70 ++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 14 deletions(-)

diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 013d2c820a0..ffc46972420 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1192,23 +1192,65 @@ fs_generator::generate_ddy(const fs_inst *inst,
 {
    if (inst->opcode == FS_OPCODE_DDY_FINE) {
       /* produce accurate derivatives */
-      struct brw_reg src0 = src;
-      struct brw_reg src1 = src;
+      if (devinfo->gen >= 11) {
+         struct brw_reg x = src;
+         struct brw_reg y = src;
+         struct brw_reg z = src;
+         struct brw_reg w = src;
+         struct brw_reg dst_e = dst;
+         struct brw_reg dst_o = dst;
+
+         x.vstride = BRW_VERTICAL_STRIDE_4;
+         y.vstride = BRW_VERTICAL_STRIDE_4;
+         z.vstride = BRW_VERTICAL_STRIDE_4;
+         w.vstride = BRW_VERTICAL_STRIDE_4;
+
+         x.width = BRW_WIDTH_2;
+         y.width = BRW_WIDTH_2;
+         z.width = BRW_WIDTH_2;
+         w.width = BRW_WIDTH_2;
+
+         x.hstride = BRW_HORIZONTAL_STRIDE_0;
+         y.hstride = BRW_HORIZONTAL_STRIDE_0;
+         z.hstride = BRW_HORIZONTAL_STRIDE_0;
+         w.hstride = BRW_HORIZONTAL_STRIDE_0;
+
+         x.subnr = 0 * sizeof(float);
+         y.subnr = 1 * sizeof(float);
+         z.subnr = 2 * sizeof(float);
+         w.subnr = 3 * sizeof(float);
+
+         dst_e.hstride = BRW_HORIZONTAL_STRIDE_2;
+         dst_o.hstride = BRW_HORIZONTAL_STRIDE_2;
+         dst_o.subnr = sizeof(float);
 
-      src0.swizzle = BRW_SWIZZLE_XYXY;
-      src0.vstride = BRW_VERTICAL_STRIDE_4;
-      src0.width   = BRW_WIDTH_4;
-      src0.hstride = BRW_HORIZONTAL_STRIDE_1;
+         brw_push_insn_state(p);
+         if (inst->exec_size == 8)
+            brw_set_default_exec_size(p, BRW_EXECUTE_4);
+         else
+            brw_set_default_exec_size(p, BRW_EXECUTE_8);
+         brw_ADD(p, dst_e, negate(x), z);
+         brw_ADD(p, dst_o, negate(y), w);
+         brw_pop_insn_state(p);
+      } else {
+         struct brw_reg src0 = src;
+         struct brw_reg src1 = src;
 
-      src1.swizzle = BRW_SWIZZLE_ZWZW;
-      src1.vstride = BRW_VERTICAL_STRIDE_4;
-      src1.width   = BRW_WIDTH_4;
-      src1.hstride = BRW_HORIZONTAL_STRIDE_1;
+         src0.swizzle = BRW_SWIZZLE_XYXY;
+         src0.vstride = BRW_VERTICAL_STRIDE_4;
+         src0.width   = BRW_WIDTH_4;
+         src0.hstride = BRW_HORIZONTAL_STRIDE_1;
 
-      brw_push_insn_state(p);
-      brw_set_default_access_mode(p, BRW_ALIGN_16);
-      brw_ADD(p, dst, negate(src0), src1);
-      brw_pop_insn_state(p);
+         src1.swizzle = BRW_SWIZZLE_ZWZW;
+         src1.vstride = BRW_VERTICAL_STRIDE_4;
+         src1.width   = BRW_WIDTH_4;
+         src1.hstride = BRW_HORIZONTAL_STRIDE_1;
+
+         brw_push_insn_state(p);
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         brw_ADD(p, dst, negate(src0), src1);
+         brw_pop_insn_state(p);
+      }
    } else {
       /* replicate the derivative at the top-left pixel to other pixels */
       struct brw_reg src0 = src;
-- 
2.16.1



More information about the mesa-dev mailing list