[Mesa-dev] [PATCH V3 04/11] i965/fs: Add support for ir_tg4

Kenneth Graunke kenneth at whitecape.org
Wed Sep 18 18:20:22 PDT 2013


On 09/15/2013 02:58 AM, Chris Forbes wrote:
> Lowers ir_tg4 (from textureGather and textureGatherOffset builtins) to
> SHADER_OPCODE_TG4.
> 
> The usual post-sampling swizzle workaround can't work for ir_tg4,
> so avoid doing that:
> 
> * For R/G/B/A swizzles use the hardware channel select (lives in the
>    same dword in the header as the texel offset), and then don't do
>    anything afterward in the shader.
> * For 0/1 swizzles blast the appropriate constant over all the output
>    channels instead of sampling.
> 
> V2: Avoid duplicating header enabling block
> V3: Avoid sampling at all, for degenerate swizzles.
> 
> Signed-off-by: Chris Forbes <chrisf at ijw.co.nz>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h           |  1 +
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 62 ++++++++++++++++++++++++++--
>  2 files changed, 60 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index cb4ac3b..f3a1eeb 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -241,6 +241,7 @@ public:
>     void visit(ir_emit_vertex *);
>     void visit(ir_end_primitive *);
>  
> +   uint32_t gather_channel(ir_texture *ir, int sampler);
>     void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler);
>  
>     bool can_do_source_mods(fs_inst *inst);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index d935c7b..be7aed7 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1163,6 +1163,12 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>     case ir_lod:
>        inst = emit(SHADER_OPCODE_LOD, dst);
>        break;
> +   case ir_tg4:
> +      inst = emit(SHADER_OPCODE_TG4, dst);
> +      break;
> +   default:
> +      fail("unrecognized texture opcode");

Maybe assert rather than fail?  Not a huge deal either way.

> +      break;
>     }
>     inst->base_mrf = base_mrf;
>     inst->mlen = mlen;
> @@ -1187,9 +1193,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>     bool header_present = false;
>     int offsets[3];
>  
> -   if (ir->offset && ir->op != ir_txf) {
> -      /* The offsets set up by the ir_texture visitor are in the
> +   if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
> +      /* * The offsets set up by the ir_texture visitor are in the
>         * m1 header, so we can't go headerless.
> +       *
> +       * * ir4_tg4 needs to place its channel select in the header,
> +       * for interaction with ARB_texture_swizzle

Small typo: ir4_tg4 -> ir_tg4.

Not sure about the two double stars here.  (Guessing they're meant to be
bullets but it looks a little funny.)  Maybe just remove them...

>         */
>        header_present = true;
>        mlen++;
> @@ -1205,6 +1214,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>     switch (ir->op) {
>     case ir_tex:
>     case ir_lod:
> +   case ir_tg4:
>        break;
>     case ir_txb:
>        emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
> @@ -1319,6 +1329,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>     case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
>     case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
>     case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
> +   case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break;
>     }
>     inst->base_mrf = base_mrf;
>     inst->mlen = mlen;
> @@ -1446,6 +1457,24 @@ fs_visitor::visit(ir_texture *ir)
>      */
>     int texunit = fp->Base.SamplerUnits[sampler];
>  
> +   if (ir->op == ir_tg4) {
> +      /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
> +       * emitting anything other than setting up the constant result.
> +       */
> +      int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0);
> +      if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
> +
> +         fs_reg res = fs_reg(this, glsl_type::vec4_type);
> +         this->result = res;
> +
> +         for (int i=0; i<4; i++) {
> +            emit(MOV(res, fs_reg(swiz == SWIZZLE_ZERO ? 0.0f : 1.0f)));
> +            res.reg_offset++;
> +         }
> +         return;
> +      }
> +   }
> +
>     /* Should be lowered by do_lower_texture_projection */
>     assert(!ir->projector);
>  
> @@ -1473,6 +1502,7 @@ fs_visitor::visit(ir_texture *ir)
>     switch (ir->op) {
>     case ir_tex:
>     case ir_lod:
> +   case ir_tg4:
>        break;
>     case ir_txb:
>        ir->lod_info.bias->accept(this);
> @@ -1495,6 +1525,8 @@ fs_visitor::visit(ir_texture *ir)
>        ir->lod_info.sample_index->accept(this);
>        sample_index = this->result;
>        break;
> +   default:
> +      assert(!"Unrecognized texture opcode");
>     };
>  
>     /* Writemasking doesn't eliminate channels on SIMD8 texture
> @@ -1519,6 +1551,9 @@ fs_visitor::visit(ir_texture *ir)
>     if (ir->offset != NULL && ir->op != ir_txf)
>        inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
>  
> +   if (ir->op == ir_tg4)
> +      inst->texture_offset |= gather_channel(ir, sampler) << 16; // M0.2:16-17
> +
>     inst->sampler = sampler;
>  
>     if (ir->shadow_comparitor)
> @@ -1539,6 +1574,24 @@ fs_visitor::visit(ir_texture *ir)
>  }
>  
>  /**
> + * Set up the gather channel based on the swizzle, for gather4.
> + */
> +uint32_t
> +fs_visitor::gather_channel(ir_texture *ir, int sampler)
> +{
> +   int swiz = GET_SWZ(c->key.tex.swizzles[sampler], 0 /* red */);
> +   switch (swiz) {
> +      case SWIZZLE_X: return 0;
> +      case SWIZZLE_Y: return 1;
> +      case SWIZZLE_Z: return 2;
> +      case SWIZZLE_W: return 3;
> +      default:
> +         /* zero, one swizzles */
> +         return 0;
> +   }

I'm not sure how useful this helper function is, given that it's the
identity function for SWIZZLE_X/Y/Z/W.  From prog_instruction.h:

#define SWIZZLE_X    0
#define SWIZZLE_Y    1
#define SWIZZLE_Z    2
#define SWIZZLE_W    3
#define SWIZZLE_ZERO 4   /**< For SWZ instruction only */
#define SWIZZLE_ONE  5   /**< For SWZ instruction only */

ZERO and ONE already require (and have) special handling, so you'll
never actually see them here.

So you could actually just write the caller as:

   if (ir->op == ir_tg4) {
      /* textureGather() normally pulls from the RED component, which
       * might be swizzled to another channel.
       */
      int gather_channel = GET_SWZ(c->key.tex.swizzles[sampler], 0);
      inst->texture_offset |= gather_channel << 16; /* M0.2:16-17 */
   }

*shrug*

By the way, we don't populate c->key.tex.swizzles[sampler] on Haswell,
so this probably won't work there.  The SURFACE_STATE Shader Channel
Select fields might take care of this by swizzling things all around
before you see it, so you could just skip this code.  But I'm not sure.
 If not, we'll have to populate them. :(

I'll have to run some experiments.

> +}
> +
> +/**
>   * Swizzle the result of a texture result.  This is necessary for
>   * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
>   */
> @@ -1547,7 +1600,10 @@ fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
>  {
>     this->result = orig_val;
>  
> -   if (ir->op == ir_txs || ir->op == ir_lod)
> +   /* txs,lod don't actually sample the texture, so swizzling the result
> +    * makes no sense.

This is a nice comment for the existing code, but it doesn't explain why
ir_tg4 doesn't need swizzling either.  Might be nice to add one.

> +    */
> +   if (ir->op == ir_txs || ir->op == ir_lod || ir->op == ir_tg4)
>        return;
>  
>     if (ir->type == glsl_type::float_type) {

This patch looks correct.  Sorry about all the nitpicking!


More information about the mesa-dev mailing list