[Mesa-dev] [PATCH 08/12] i965/fs: Implement lowering of logical texturing opcodes on Gen4.

Tue Jul 21 15:24:24 PDT 2015

I *think* this is sane.  I compared it against the current SIMD16 and
SIMD8 code and it looks like it does the right thing.  However, it's
not a direct translation and I don't know the gen4 texturing code, so
I can't really call it reviewed.

Acked-by: Jason Ekstrand <jason.ekstrand at intel.com>

Ken, could you give it a quick look?

On Sat, Jul 18, 2015 at 7:34 AM, Francisco Jerez <currojerez at riseup.net> wrote:
> Unlike its Gen5 and Gen7 counterparts this patch isn't a plain
> refactor of the previous Gen4 texturing code, it's more of a rewrite
> largely based on emit_texture_gen4_simd16().  The reason is that on
> the one hand the original emit_texture_gen4() code didn't seem easily
> fixable to be SIMD width-invariant and had plenty of clutter to
> support SIMD-width workarounds which are no longer required.  On the
> other hand emit_texture_gen4_simd16() was missing a number of
> SIMD8-only opcodes.  This should generalize both and roughly match
> their current behaviour where there is overlap.
>
> Incidentally this will fix the following piglits on Gen4:
>
>     arb_shader_texture_lod.execution.arb_shader_texture_lod-texgrad
>     arb_shader_texture_lod.execution.tex-miplevel-selection *gradarb 2d
>     arb_shader_texture_lod.execution.tex-miplevel-selection *gradarb 3d
>     arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 2d
>     arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 2d_projvec4
>     arb_shader_texture_lod.execution.tex-miplevel-selection *projgradarb 3d
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 108 ++++++++++++++++++++++++++++++++++-
>  1 file changed, 107 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 5233ac3..043d9e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -3370,6 +3370,110 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
>  }
>
>  static void
> +lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
> +                                const fs_reg &coordinate,
> +                                const fs_reg &shadow_c,
> +                                const fs_reg &lod, const fs_reg &lod2,
> +                                const fs_reg &sampler,
> +                                unsigned coord_components,
> +                                unsigned grad_components)
> +{
> +   const bool has_lod = (op == SHADER_OPCODE_TXL || op == FS_OPCODE_TXB ||
> +                         op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS);
> +   fs_reg msg_begin(MRF, 1, BRW_REGISTER_TYPE_F);
> +   fs_reg msg_end = msg_begin;
> +
> +   /* g0 header. */
> +   msg_end = offset(msg_end, bld.group(8, 0), 1);
> +
> +   for (unsigned i = 0; i < coord_components; i++)
> +      bld.MOV(retype(offset(msg_end, bld, i), coordinate.type),
> +              offset(coordinate, bld, i));
> +
> +   msg_end = offset(msg_end, bld, coord_components);
> +
> +   /* Messages other than SAMPLE and RESINFO in SIMD16 and TXD in SIMD8
> +    * require all three components to be present and zero if they are unused.
> +    */
> +   if (coord_components > 0 &&
> +       (has_lod || shadow_c.file != BAD_FILE ||
> +        (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {
> +      for (unsigned i = coord_components; i < 3; i++)
> +         bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f));
> +
> +      msg_end = offset(msg_end, bld, 3 - coord_components);
> +   }
> +
> +   if (op == SHADER_OPCODE_TXD) {
> +      /* TXD unsupported in SIMD16 mode. */
> +      assert(bld.dispatch_width() == 8);
> +
> +      /* the slots for u and v are always present, but r is optional */
> +      if (coord_components < 2)
> +         msg_end = offset(msg_end, bld, 2 - coord_components);
> +
> +      /*  P   = u, v, r
> +       * dPdx = dudx, dvdx, drdx
> +       * dPdy = dudy, dvdy, drdy
> +       *
> +       * 1-arg: Does not exist.
> +       *
> +       * 2-arg: dudx   dvdx   dudy   dvdy
> +       *        dPdx.x dPdx.y dPdy.x dPdy.y
> +       *        m4     m5     m6     m7
> +       *
> +       * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
> +       *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
> +       *        m5     m6     m7     m8     m9     m10
> +       */
> +      for (unsigned i = 0; i < grad_components; i++)
> +         bld.MOV(offset(msg_end, bld, i), offset(lod, bld, i));
> +
> +      msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
> +
> +      for (unsigned i = 0; i < grad_components; i++)
> +         bld.MOV(offset(msg_end, bld, i), offset(lod2, bld, i));
> +
> +      msg_end = offset(msg_end, bld, MAX2(grad_components, 2));
> +   }
> +
> +   if (has_lod) {
> +      /* Bias/LOD with shadow comparitor is unsupported in SIMD16 -- *Without*
> +       * shadow comparitor (including RESINFO) it's unsupported in SIMD8 mode.
> +       */
> +      assert(shadow_c.file != BAD_FILE ? bld.dispatch_width() == 8 :
> +             bld.dispatch_width() == 16);
> +
> +      const brw_reg_type type =
> +         (op == SHADER_OPCODE_TXF || op == SHADER_OPCODE_TXS ?
> +          BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
> +      bld.MOV(retype(msg_end, type), lod);
> +      msg_end = offset(msg_end, bld, 1);
> +   }
> +
> +   if (shadow_c.file != BAD_FILE) {
> +      if (op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8) {
> +         /* There's no plain shadow compare message, so we use shadow
> +          * compare with a bias of 0.0.
> +          */
> +         bld.MOV(msg_end, fs_reg(0.0f));
> +         msg_end = offset(msg_end, bld, 1);
> +      }
> +
> +      bld.MOV(msg_end, shadow_c);
> +      msg_end = offset(msg_end, bld, 1);
> +   }
> +
> +   inst->opcode = op;
> +   inst->src[0] = reg_undef;
> +   inst->src[1] = sampler;
> +   inst->resize_sources(2);
> +   inst->base_mrf = msg_begin.reg;
> +   inst->mlen = msg_end.reg - msg_begin.reg;
> +   inst->header_size = 1;
> +}
> +
> +static void
>  lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
>                                  fs_reg coordinate,
>                                  const fs_reg &shadow_c,
> @@ -3708,7 +3812,9 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
>                                        sampler, offset_value,
>                                        coord_components, grad_components);
>     } else {
> -      assert(!"Not implemented");
> +      lower_sampler_logical_send_gen4(bld, inst, op, coordinate,
> +                                      shadow_c, lod, lod2, sampler,
> +                                      coord_components, grad_components);
>     }
>  }
>
> --
> 2.4.3
>