[Mesa-dev] [PATCH 08/23] i965/fs: Add support for specifying register horizontal strides.

Francisco Jerez currojerez at riseup.net
Mon Dec 2 11:31:13 PST 2013


---
 src/mesa/drivers/dri/i965/brw_fs.cpp               | 29 +++++++++++++++++++---
 src/mesa/drivers/dri/i965/brw_fs.h                 |  3 +++
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  7 +++++-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp     |  8 +++---
 .../drivers/dri/i965/brw_fs_live_variables.cpp     |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  4 +--
 6 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0caae2d..e4cee33 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -380,6 +380,7 @@ fs_reg::init()
 {
    memset(this, 0, sizeof(*this));
    this->smear = -1;
+   stride = 1;
 }
 
 /** Generic unset register constructor. */
@@ -445,6 +446,7 @@ fs_reg::equals(const fs_reg &r) const
            memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
                   sizeof(fixed_hw_reg)) == 0 &&
            smear == r.smear &&
+           stride == r.stride &&
            imm.u == r.imm.u);
 }
 
@@ -456,6 +458,22 @@ fs_reg::retype(uint32_t type)
    return result;
 }
 
+fs_reg &
+fs_reg::apply_stride(unsigned stride)
+{
+   assert((this->stride * stride) <= 4 &&
+          is_power_of_two(stride) &&
+          file != HW_REG && file != IMM);
+   this->stride *= stride;
+   return *this;
+}
+
+bool
+fs_reg::is_contiguous() const
+{
+   return stride == 1;
+}
+
 bool
 fs_reg::is_valid_3src() const
 {
@@ -686,7 +704,7 @@ fs_inst::is_partial_write()
 {
    return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
            this->force_uncompressed ||
-           this->force_sechalf);
+           this->force_sechalf || !this->dst.is_contiguous());
 }
 
 int
@@ -2246,6 +2264,7 @@ fs_visitor::register_coalesce_2()
 	  inst->src[0].negate ||
 	  inst->src[0].abs ||
 	  inst->src[0].smear != -1 ||
+          !inst->src[0].is_contiguous() ||
 	  inst->dst.file != GRF ||
 	  inst->dst.type != inst->src[0].type ||
 	  virtual_grf_sizes[inst->src[0].reg] != 1) {
@@ -2338,6 +2357,7 @@ fs_visitor::register_coalesce()
       bool has_source_modifiers = (inst->src[0].abs ||
                                    inst->src[0].negate ||
                                    inst->src[0].smear != -1 ||
+                                   !inst->src[0].is_contiguous() ||
                                    inst->src[0].file == UNIFORM);
 
       /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
@@ -2422,7 +2442,9 @@ fs_visitor::register_coalesce()
                }
 	       new_src.negate ^= scan_inst->src[i].negate;
 	       new_src.sechalf = scan_inst->src[i].sechalf;
-               new_src.subreg_offset += scan_inst->src[i].subreg_offset;
+               new_src.subreg_offset +=
+                  scan_inst->src[i].subreg_offset * new_src.stride;
+               new_src.stride *= scan_inst->src[i].stride;
 	       scan_inst->src[i] = new_src;
 	    }
 	 }
@@ -2458,7 +2480,8 @@ fs_visitor::compute_to_mrf()
 	  inst->dst.file != MRF || inst->src[0].file != GRF ||
 	  inst->dst.type != inst->src[0].type ||
 	  inst->src[0].abs || inst->src[0].negate ||
-          inst->src[0].smear != -1 || inst->src[0].subreg_offset)
+          inst->src[0].smear != -1 || !inst->src[0].is_contiguous() ||
+          inst->src[0].subreg_offset)
 	 continue;
 
       /* Work out which hardware MRF registers are written by this
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 93a393d..b0ce812 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -77,13 +77,16 @@ public:
 
    bool equals(const fs_reg &r) const;
    bool is_valid_3src() const;
+   bool is_contiguous() const;
    fs_reg retype(uint32_t type);
+   fs_reg &apply_stride(unsigned stride);
 
    bool negate;
    bool abs;
    bool sechalf;
    int smear; /* -1, or a channel of the reg to smear to all channels. */
    int subreg_offset; /**< Offset in bytes from the start of the register. */
+   int stride; /**< Register region horizontal stride */
 
    fs_reg *reladdr;
 };
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index f3f44c6..2f2d6b6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -298,7 +298,11 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    bool has_source_modifiers = entry->src.abs || entry->src.negate;
 
    if ((has_source_modifiers || entry->src.file == UNIFORM ||
-        entry->src.smear != -1) && !can_do_source_mods(inst))
+        entry->src.smear != -1 || !entry->src.is_contiguous()) &&
+       !can_do_source_mods(inst))
+      return false;
+
+   if (entry->src.stride * inst->src[arg].stride > 4)
       return false;
 
    if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
@@ -310,6 +314,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    if (entry->src.smear != -1)
       inst->src[arg].smear = entry->src.smear;
    inst->src[arg].subreg_offset = entry->src.subreg_offset;
+   inst->src[arg].stride *= entry->src.stride;
 
    if (!inst->src[arg].abs) {
       inst->src[arg].abs = entry->src.abs;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index a0b6135..66321bd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -988,11 +988,13 @@ brw_reg_from_fs_reg(fs_reg *reg)
    switch (reg->file) {
    case GRF:
    case MRF:
-      if (reg->smear == -1) {
-	 brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
+      if (reg->stride == 0 || reg->smear >= 0) {
+         brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
       } else {
-	 brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
+         brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
+         brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
       }
+
       brw_reg = retype(brw_reg, reg->type);
       if (reg->sechalf)
 	 brw_reg = sechalf(brw_reg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 21b2618..1a2d398 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -84,7 +84,7 @@ fs_live_variables::setup_one_read(bblock_t *block, fs_inst *inst,
     * would get stomped by the first decode as well.
     */
    int end_ip = ip;
-   if (v->dispatch_width == 16 && (reg.smear != -1 ||
+   if (v->dispatch_width == 16 && (reg.smear != -1 || reg.stride == 0 ||
                                    (v->pixel_x.reg == reg.reg ||
                                     v->pixel_y.reg == reg.reg))) {
       end_ip++;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 2d35909..3290004 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -591,7 +591,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
              * loading pull constants, so spilling them is unlikely to reduce
              * register pressure anyhow.
              */
-            if (inst->src[i].smear >= 0) {
+            if (inst->src[i].smear >= 0 || !inst->src[i].is_contiguous()) {
                no_spill[inst->src[i].reg] = true;
             }
 	 }
@@ -600,7 +600,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
       if (inst->dst.file == GRF) {
 	 spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
 
-         if (inst->dst.smear >= 0) {
+         if (inst->dst.smear >= 0 || !inst->dst.is_contiguous()) {
             no_spill[inst->dst.reg] = true;
          }
       }
-- 
1.8.3.4



More information about the mesa-dev mailing list