[Mesa-dev] [PATCH 08/23] i965/fs: Add support for specifying register horizontal strides.

Paul Berry stereotype441 at gmail.com
Thu Dec 19 14:09:56 PST 2013


On 2 December 2013 11:31, Francisco Jerez <currojerez at riseup.net> wrote:

> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp               | 29
> +++++++++++++++++++---
>  src/mesa/drivers/dri/i965/brw_fs.h                 |  3 +++
>  .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  7 +++++-
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp     |  8 +++---
>  .../drivers/dri/i965/brw_fs_live_variables.cpp     |  2 +-
>  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  4 +--
>  6 files changed, 43 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 0caae2d..e4cee33 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -380,6 +380,7 @@ fs_reg::init()
>  {
>     memset(this, 0, sizeof(*this));
>     this->smear = -1;
> +   stride = 1;
>  }
>
>  /** Generic unset register constructor. */
> @@ -445,6 +446,7 @@ fs_reg::equals(const fs_reg &r) const
>             memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
>                    sizeof(fixed_hw_reg)) == 0 &&
>             smear == r.smear &&
> +           stride == r.stride &&
>             imm.u == r.imm.u);
>  }
>
> @@ -456,6 +458,22 @@ fs_reg::retype(uint32_t type)
>     return result;
>  }
>
> +fs_reg &
> +fs_reg::apply_stride(unsigned stride)
> +{
> +   assert((this->stride * stride) <= 4 &&
> +          is_power_of_two(stride) &&
> +          file != HW_REG && file != IMM);
>

This function relies on the (IMO surprising) fact that is_power_of_two(0)
== true.  Can we add a comment to clarify that we're taking advantage of
this to allow a stride of 0?  Otherwise a reader might reasonably assume
that strides of 0 were not allowed.


> +   this->stride *= stride;
> +   return *this;
> +}
> +
> +bool
> +fs_reg::is_contiguous() const
> +{
> +   return stride == 1;
> +}
> +
>  bool
>  fs_reg::is_valid_3src() const
>  {
> @@ -686,7 +704,7 @@ fs_inst::is_partial_write()
>  {
>     return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
>             this->force_uncompressed ||
> -           this->force_sechalf);
> +           this->force_sechalf || !this->dst.is_contiguous());
>  }
>
>  int
> @@ -2246,6 +2264,7 @@ fs_visitor::register_coalesce_2()
>           inst->src[0].negate ||
>           inst->src[0].abs ||
>           inst->src[0].smear != -1 ||
> +          !inst->src[0].is_contiguous() ||
>           inst->dst.file != GRF ||
>           inst->dst.type != inst->src[0].type ||
>           virtual_grf_sizes[inst->src[0].reg] != 1) {
> @@ -2338,6 +2357,7 @@ fs_visitor::register_coalesce()
>        bool has_source_modifiers = (inst->src[0].abs ||
>                                     inst->src[0].negate ||
>                                     inst->src[0].smear != -1 ||
> +                                   !inst->src[0].is_contiguous() ||
>                                     inst->src[0].file == UNIFORM);
>
>        /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
> @@ -2422,7 +2442,9 @@ fs_visitor::register_coalesce()
>                 }
>                new_src.negate ^= scan_inst->src[i].negate;
>                new_src.sechalf = scan_inst->src[i].sechalf;
> -               new_src.subreg_offset += scan_inst->src[i].subreg_offset;
> +               new_src.subreg_offset +=
> +                  scan_inst->src[i].subreg_offset * new_src.stride;
> +               new_src.stride *= scan_inst->src[i].stride;
>                scan_inst->src[i] = new_src;
>             }
>          }
> @@ -2458,7 +2480,8 @@ fs_visitor::compute_to_mrf()
>           inst->dst.file != MRF || inst->src[0].file != GRF ||
>           inst->dst.type != inst->src[0].type ||
>           inst->src[0].abs || inst->src[0].negate ||
> -          inst->src[0].smear != -1 || inst->src[0].subreg_offset)
> +          inst->src[0].smear != -1 || !inst->src[0].is_contiguous() ||
> +          inst->src[0].subreg_offset)
>          continue;
>
>        /* Work out which hardware MRF registers are written by this
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 93a393d..b0ce812 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -77,13 +77,16 @@ public:
>
>     bool equals(const fs_reg &r) const;
>     bool is_valid_3src() const;
> +   bool is_contiguous() const;
>     fs_reg retype(uint32_t type);
> +   fs_reg &apply_stride(unsigned stride);
>
>     bool negate;
>     bool abs;
>     bool sechalf;
>     int smear; /* -1, or a channel of the reg to smear to all channels. */
>     int subreg_offset; /**< Offset in bytes from the start of the
> register. */
> +   int stride; /**< Register region horizontal stride */
>
>     fs_reg *reladdr;
>  };
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> index f3f44c6..2f2d6b6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> @@ -298,7 +298,11 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int
> arg, acp_entry *entry)
>     bool has_source_modifiers = entry->src.abs || entry->src.negate;
>
>     if ((has_source_modifiers || entry->src.file == UNIFORM ||
> -        entry->src.smear != -1) && !can_do_source_mods(inst))
> +        entry->src.smear != -1 || !entry->src.is_contiguous()) &&
> +       !can_do_source_mods(inst))
> +      return false;
> +
> +   if (entry->src.stride * inst->src[arg].stride > 4)
>        return false;
>
>     if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
> @@ -310,6 +314,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg,
> acp_entry *entry)
>     if (entry->src.smear != -1)
>        inst->src[arg].smear = entry->src.smear;
>     inst->src[arg].subreg_offset = entry->src.subreg_offset;
> +   inst->src[arg].stride *= entry->src.stride;
>
>     if (!inst->src[arg].abs) {
>        inst->src[arg].abs = entry->src.abs;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index a0b6135..66321bd 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -988,11 +988,13 @@ brw_reg_from_fs_reg(fs_reg *reg)
>     switch (reg->file) {
>     case GRF:
>     case MRF:
> -      if (reg->smear == -1) {
> -        brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
> +      if (reg->stride == 0 || reg->smear >= 0) {
> +         brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg,
> reg->smear);
>        } else {
> -        brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg,
> reg->smear);
> +         brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
> +         brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
>        }
> +
>        brw_reg = retype(brw_reg, reg->type);
>        if (reg->sechalf)
>          brw_reg = sechalf(brw_reg);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index 21b2618..1a2d398 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -84,7 +84,7 @@ fs_live_variables::setup_one_read(bblock_t *block,
> fs_inst *inst,
>      * would get stomped by the first decode as well.
>      */
>     int end_ip = ip;
> -   if (v->dispatch_width == 16 && (reg.smear != -1 ||
> +   if (v->dispatch_width == 16 && (reg.smear != -1 || reg.stride == 0 ||
>                                     (v->pixel_x.reg == reg.reg ||
>                                      v->pixel_y.reg == reg.reg))) {
>        end_ip++;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index 2d35909..3290004 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -591,7 +591,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
>               * loading pull constants, so spilling them is unlikely to
> reduce
>               * register pressure anyhow.
>               */
> -            if (inst->src[i].smear >= 0) {
> +            if (inst->src[i].smear >= 0 || !inst->src[i].is_contiguous())
> {
>                 no_spill[inst->src[i].reg] = true;
>              }
>          }
> @@ -600,7 +600,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
>        if (inst->dst.file == GRF) {
>          spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
>
> -         if (inst->dst.smear >= 0) {
> +         if (inst->dst.smear >= 0 || !inst->dst.is_contiguous()) {
>              no_spill[inst->dst.reg] = true;
>           }
>        }
>

In v2 of this patch, you add the following code to
fs_visitor::try_copy_propagate():

+   /* Bail if the result of composing both strides cannot be expressed
+    * as another stride.
+    */
+   if (entry->src.stride != 1 &&
+       (inst->src[arg].stride *
+        type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0)
       return false;

I don't understand.  It seems like this is trying to make sure that
(src_stride * src_type_sz) / entry_type_sz is an integer so we can later
use the factor (src_type_sz / entry_type_sz) as a multiplicative factor to
correct the stride without creating a fractional result.  But I don't see
us applying this correction factor anywhere.  Is there some code missing?
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131219/b97e28c4/attachment-0001.html>


More information about the mesa-dev mailing list