[Mesa-dev] [PATCH 08/23] i965/fs: Add support for specifying register horizontal strides.
Paul Berry
stereotype441 at gmail.com
Thu Dec 19 14:09:56 PST 2013
On 2 December 2013 11:31, Francisco Jerez <currojerez at riseup.net> wrote:
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 29
> +++++++++++++++++++---
> src/mesa/drivers/dri/i965/brw_fs.h | 3 +++
> .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 7 +++++-
> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 8 +++---
> .../drivers/dri/i965/brw_fs_live_variables.cpp | 2 +-
> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +--
> 6 files changed, 43 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 0caae2d..e4cee33 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -380,6 +380,7 @@ fs_reg::init()
> {
> memset(this, 0, sizeof(*this));
> this->smear = -1;
> + stride = 1;
> }
>
> /** Generic unset register constructor. */
> @@ -445,6 +446,7 @@ fs_reg::equals(const fs_reg &r) const
> memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
> sizeof(fixed_hw_reg)) == 0 &&
> smear == r.smear &&
> + stride == r.stride &&
> imm.u == r.imm.u);
> }
>
> @@ -456,6 +458,22 @@ fs_reg::retype(uint32_t type)
> return result;
> }
>
> +fs_reg &
> +fs_reg::apply_stride(unsigned stride)
> +{
> + assert((this->stride * stride) <= 4 &&
> + is_power_of_two(stride) &&
> + file != HW_REG && file != IMM);
>
This function relies on the (IMO surprising) fact that is_power_of_two(0)
== true. Can we add a comment to clarify that we're taking advantage of
this to allow a stride of 0? Otherwise a reader might reasonably assume
that strides of 0 were not allowed.
> + this->stride *= stride;
> + return *this;
> +}
> +
> +bool
> +fs_reg::is_contiguous() const
> +{
> + return stride == 1;
> +}
> +
> bool
> fs_reg::is_valid_3src() const
> {
> @@ -686,7 +704,7 @@ fs_inst::is_partial_write()
> {
> return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
> this->force_uncompressed ||
> - this->force_sechalf);
> + this->force_sechalf || !this->dst.is_contiguous());
> }
>
> int
> @@ -2246,6 +2264,7 @@ fs_visitor::register_coalesce_2()
> inst->src[0].negate ||
> inst->src[0].abs ||
> inst->src[0].smear != -1 ||
> + !inst->src[0].is_contiguous() ||
> inst->dst.file != GRF ||
> inst->dst.type != inst->src[0].type ||
> virtual_grf_sizes[inst->src[0].reg] != 1) {
> @@ -2338,6 +2357,7 @@ fs_visitor::register_coalesce()
> bool has_source_modifiers = (inst->src[0].abs ||
> inst->src[0].negate ||
> inst->src[0].smear != -1 ||
> + !inst->src[0].is_contiguous() ||
> inst->src[0].file == UNIFORM);
>
> /* Found a move of a GRF to a GRF. Let's see if we can coalesce
> @@ -2422,7 +2442,9 @@ fs_visitor::register_coalesce()
> }
> new_src.negate ^= scan_inst->src[i].negate;
> new_src.sechalf = scan_inst->src[i].sechalf;
> - new_src.subreg_offset += scan_inst->src[i].subreg_offset;
> + new_src.subreg_offset +=
> + scan_inst->src[i].subreg_offset * new_src.stride;
> + new_src.stride *= scan_inst->src[i].stride;
> scan_inst->src[i] = new_src;
> }
> }
> @@ -2458,7 +2480,8 @@ fs_visitor::compute_to_mrf()
> inst->dst.file != MRF || inst->src[0].file != GRF ||
> inst->dst.type != inst->src[0].type ||
> inst->src[0].abs || inst->src[0].negate ||
> - inst->src[0].smear != -1 || inst->src[0].subreg_offset)
> + inst->src[0].smear != -1 || !inst->src[0].is_contiguous() ||
> + inst->src[0].subreg_offset)
> continue;
>
> /* Work out which hardware MRF registers are written by this
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 93a393d..b0ce812 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -77,13 +77,16 @@ public:
>
> bool equals(const fs_reg &r) const;
> bool is_valid_3src() const;
> + bool is_contiguous() const;
> fs_reg retype(uint32_t type);
> + fs_reg &apply_stride(unsigned stride);
>
> bool negate;
> bool abs;
> bool sechalf;
> int smear; /* -1, or a channel of the reg to smear to all channels. */
> int subreg_offset; /**< Offset in bytes from the start of the
> register. */
> + int stride; /**< Register region horizontal stride */
>
> fs_reg *reladdr;
> };
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> index f3f44c6..2f2d6b6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> @@ -298,7 +298,11 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int
> arg, acp_entry *entry)
> bool has_source_modifiers = entry->src.abs || entry->src.negate;
>
> if ((has_source_modifiers || entry->src.file == UNIFORM ||
> - entry->src.smear != -1) && !can_do_source_mods(inst))
> + entry->src.smear != -1 || !entry->src.is_contiguous()) &&
> + !can_do_source_mods(inst))
> + return false;
> +
> + if (entry->src.stride * inst->src[arg].stride > 4)
> return false;
>
> if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
> @@ -310,6 +314,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg,
> acp_entry *entry)
> if (entry->src.smear != -1)
> inst->src[arg].smear = entry->src.smear;
> inst->src[arg].subreg_offset = entry->src.subreg_offset;
> + inst->src[arg].stride *= entry->src.stride;
>
> if (!inst->src[arg].abs) {
> inst->src[arg].abs = entry->src.abs;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index a0b6135..66321bd 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -988,11 +988,13 @@ brw_reg_from_fs_reg(fs_reg *reg)
> switch (reg->file) {
> case GRF:
> case MRF:
> - if (reg->smear == -1) {
> - brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
> + if (reg->stride == 0 || reg->smear >= 0) {
> + brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg,
> reg->smear);
> } else {
> - brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg,
> reg->smear);
> + brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
> + brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
> }
> +
> brw_reg = retype(brw_reg, reg->type);
> if (reg->sechalf)
> brw_reg = sechalf(brw_reg);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index 21b2618..1a2d398 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -84,7 +84,7 @@ fs_live_variables::setup_one_read(bblock_t *block,
> fs_inst *inst,
> * would get stomped by the first decode as well.
> */
> int end_ip = ip;
> - if (v->dispatch_width == 16 && (reg.smear != -1 ||
> + if (v->dispatch_width == 16 && (reg.smear != -1 || reg.stride == 0 ||
> (v->pixel_x.reg == reg.reg ||
> v->pixel_y.reg == reg.reg))) {
> end_ip++;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index 2d35909..3290004 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -591,7 +591,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
> * loading pull constants, so spilling them is unlikely to
> reduce
> * register pressure anyhow.
> */
> - if (inst->src[i].smear >= 0) {
> + if (inst->src[i].smear >= 0 || !inst->src[i].is_contiguous())
> {
> no_spill[inst->src[i].reg] = true;
> }
> }
> @@ -600,7 +600,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
> if (inst->dst.file == GRF) {
> spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
>
> - if (inst->dst.smear >= 0) {
> + if (inst->dst.smear >= 0 || !inst->dst.is_contiguous()) {
> no_spill[inst->dst.reg] = true;
> }
> }
>
In v2 of this patch, you add the following code to
fs_visitor::try_copy_propagate():
+ /* Bail if the result of composing both strides cannot be expressed
+ * as another stride.
+ */
+ if (entry->src.stride != 1 &&
+ (inst->src[arg].stride *
+ type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0)
return false;
I don't understand. It seems like this is trying to make sure that
(src_stride * src_type_sz) / entry_type_sz is an integer so we can later
use the factor (src_type_sz / entry_type_sz) as a multiplicative factor to
correct the stride without creating a fractional result. But I don't see
us applying this correction factor anywhere. Is there some code missing?
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131219/b97e28c4/attachment-0001.html>
More information about the mesa-dev
mailing list