[Mesa-dev] [PATCH 1/7] i965: Introduce the BROADCAST pseudo-opcode.

Matt Turner mattst88 at gmail.com
Wed Apr 29 22:29:46 PDT 2015


On Fri, Feb 20, 2015 at 11:48 AM, Francisco Jerez <currojerez at riseup.net> wrote:
> The BROADCAST instruction picks the channel from its first source
> given by an index passed in as second source.  This will be used in
> situations where all channels from the same SIMD thread have to agree
> on the value of something, e.g. a surface binding table index.
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h          |  6 ++
>  src/mesa/drivers/dri/i965/brw_eu.h               |  6 ++
>  src/mesa/drivers/dri/i965/brw_eu_emit.c          | 77 ++++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |  4 ++
>  src/mesa/drivers/dri/i965/brw_shader.cpp         |  3 +
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  4 ++
>  6 files changed, 100 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 17c27dd..d4930e3 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -911,6 +911,12 @@ enum opcode {
>
>     SHADER_OPCODE_URB_WRITE_SIMD8,
>
> +   /**
> +    * Pick the channel from its first source register given by the index
> +    * specified as second source.  Useful for variable indexing of surfaces.
> +    */
> +   SHADER_OPCODE_BROADCAST,
> +
>     VEC4_OPCODE_MOV_BYTES,
>     VEC4_OPCODE_PACK_BYTES,
>     VEC4_OPCODE_UNPACK_UNIFORM,
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
> index a94ea42..2505480 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -413,6 +413,12 @@ brw_pixel_interpolator_query(struct brw_compile *p,
>                               unsigned msg_length,
>                               unsigned response_length);
>
> +void
> +brw_broadcast(struct brw_compile *p,
> +              struct brw_reg dst,
> +              struct brw_reg src,
> +              struct brw_reg idx);
> +
>  /***********************************************************************
>   * brw_eu_util.c:
>   */
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index 1d6fd67..d7e3995 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -2854,6 +2854,83 @@ brw_pixel_interpolator_query(struct brw_compile *p,
>     brw_inst_set_pi_message_data(brw, insn, data);
>  }
>
> +void
> +brw_broadcast(struct brw_compile *p,
> +              struct brw_reg dst,
> +              struct brw_reg src,
> +              struct brw_reg idx)
> +{
> +   const struct brw_context *brw = p->brw;
> +   const bool align1 = (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1);

Unnecessary parentheses.

> +   brw_inst *inst;
> +
> +   assert(src.file == BRW_GENERAL_REGISTER_FILE &&
> +          src.address_mode == BRW_ADDRESS_DIRECT);
> +
> +   if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
> +       idx.file == BRW_IMMEDIATE_VALUE) {
> +      /* Trivial, the source is already uniform or the index is a constant.
> +       * We will typically not get here if the optimizer is doing its job, but
> +       * asserting would be mean.
> +       */
> +      const unsigned i = (idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0);

Unnecessary parentheses.

> +      brw_MOV(p, dst,
> +              (align1 ? stride(suboffset(src, i), 0, 1, 0) :
> +               stride(suboffset(src, 4 * i), 0, 4, 1)));
> +

Extra new line.

> +   } else {
> +      if (align1) {
> +         const struct brw_reg addr =
> +            retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
> +         const unsigned offset = src.nr * REG_SIZE + src.subnr;
> +         /* Limit in bytes of the signed indirect addressing immediate. */
> +         const unsigned limit = 512;
> +
> +         brw_push_insn_state(p);
> +         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> +         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> +
> +         /* Take into account the component size and horizontal stride. */
> +         assert(src.vstride == src.hstride + src.width);
> +         brw_SHL(p, addr, vec1(idx),
> +                 brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
> +                            src.hstride - 1));
> +
> +         /* We can only address up to limit bytes using the indirect
> +          * addressing immediate, account for the difference if the source
> +          * register is above this limit.
> +          */
> +         if (offset >= limit)
> +            brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
> +
> +         brw_pop_insn_state(p);
> +
> +         /* Use indirect addressing to fetch the specified component. */
> +         brw_MOV(p, dst,
> +                 retype(brw_vec1_indirect(addr.subnr, offset % limit),
> +                        src.type));
> +

Extra new line.


Putting some of Ian's explanation for why this is needed into the
commit message might be good. I had to go read the piglit tests before
I really understood.


More information about the mesa-dev mailing list