[Mesa-dev] [PATCH 09/12] i965/fs: make SIMD-splitting respect the original stride/offset

Connor Abbott cwabbott0 at gmail.com
Fri Aug 14 15:30:18 PDT 2015


In some cases, we need to emit ALU instructions with a certain stride
due to a HW limitation. When splitting that instruction, we need to
respect the original stride when creating the temporaries we load from
and store into. Otherwise, we'll reintroduce the problem we were trying
to work around.

Signed-off-by: Connor Abbott <connor.w.abbott at intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 812648f..386e9a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2370,12 +2370,14 @@ fs_visitor::opt_register_renaming()
 
       if (depth == 0 &&
           inst->dst.file == GRF &&
-          alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
+          alloc.sizes[inst->dst.reg] ==
+            inst->dst.component_size(inst->exec_size) &&
           !inst->is_partial_write()) {
          if (remap[dst] == -1) {
             remap[dst] = dst;
          } else {
-            remap[dst] = alloc.allocate(inst->exec_size / 8);
+            remap[dst] =
+               alloc.allocate(inst->dst.component_size(inst->exec_size));
             inst->dst.reg = remap[dst];
             progress = true;
          }
@@ -4334,6 +4336,8 @@ fs_visitor::lower_simd_width()
                    * temporary passed as source to the lowered instruction.
                    */
                   split_inst.src[j] = lbld.vgrf(inst->src[j].type, src_size);
+                  split_inst.src[j].subreg_offset = inst->src[j].subreg_offset;
+                  split_inst.src[j].stride = inst->src[j].stride;
                   emit_transpose(lbld.group(copy_width, 0),
                                  split_inst.src[j], &src, 1, src_size, n);
                }
@@ -4343,8 +4347,10 @@ fs_visitor::lower_simd_width()
                /* Allocate enough space to hold the result of the lowered
                 * instruction and fix up the number of registers written.
                 */
-               split_inst.dst = dsts[i] =
-                  lbld.vgrf(inst->dst.type, dst_size);
+               fs_reg dst = lbld.vgrf(inst->dst.type, dst_size);
+               dst.stride = inst->dst.stride;
+               dst.subreg_offset = inst->dst.subreg_offset;
+               split_inst.dst = dsts[i] = dst;
                split_inst.regs_written =
                   DIV_ROUND_UP(inst->regs_written * lower_width,
                                inst->exec_size);
-- 
2.4.3



More information about the mesa-dev mailing list