[Mesa-dev] [PATCH 4/4] i965: Implement SIMD16 texturing on Gen4.

Jordan Justen jordan.l.justen at intel.com
Sat Apr 4 16:28:49 PDT 2015


On 2015-04-04 01:23:28, Kenneth Graunke wrote:
> This allows SIMD16 mode to work for a lot more programs.  Texturing is
> also more efficient in SIMD16 mode than SIMD8.  Several messages don't
> actually exist in SIMD8 mode, so we did SIMD16 messages and threw away
> half of the data.  Now we compute real data in both halves.
> 
> Also, the SIMD16 "sample" message doesn't require all three coordinate
> components to exist (like the SIMD8 one), so we can shorten the message
> lengths, cutting register usage a bit.
> 
> I chose to implement the visitor functionality in a separate function,
> since mixing true SIMD16 with SIMD8 code that uses SIMD16 fallbacks
> seemed like a mess.  The new code bails on a few cases where we'd
> have to do two SIMD8 messages - we just fall back to SIMD8 for now.
> 
> Improves performance in "Shadowrun: Dragonfall - Director's Cut" by
> about 20% on GM45 (measured with LIBGL_SHOW_FPS=1 while standing around
> in the first mission).
> 
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h             |  4 ++
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 28 ++++++++---
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 68 +++++++++++++++++++++++++-
>  3 files changed, 90 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 278a8ee..cfdbf55 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -271,6 +271,10 @@ public:
>                                fs_reg shadow_comp,
>                                fs_reg lod, fs_reg lod2, int grad_components,
>                                uint32_t sampler);
> +   fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
> +                                     fs_reg coordinate, int vector_elements,
> +                                     fs_reg shadow_c, fs_reg lod,
> +                                     uint32_t sampler);
>     fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
>                                fs_reg coordinate, int coord_components,
>                                fs_reg shadow_comp,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 40e51aa..2743297 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -622,16 +622,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
>          /* Note that G45 and older determines shadow compare and dispatch width
>           * from message length for most messages.
>           */
> -        assert(dispatch_width == 8);
> -        msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
> -        if (inst->shadow_compare) {
> -           assert(inst->mlen == 6);
> -        } else {
> -           assert(inst->mlen <= 4);
> -        }
> +         if (dispatch_width == 8) {
> +            msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
> +            if (inst->shadow_compare) {
> +               assert(inst->mlen == 6);
> +            } else {
> +               assert(inst->mlen <= 4);
> +            }
> +         } else {
> +            if (inst->shadow_compare) {
> +               msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
> +               assert(inst->mlen == 9);
> +            } else {
> +               msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
> +               assert(inst->mlen <= 7 && inst->mlen % 2 == 1);
> +            }
> +         }
>          break;
>        case FS_OPCODE_TXB:
>          if (inst->shadow_compare) {
> +            assert(dispatch_width == 8);
>             assert(inst->mlen == 6);
>             msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
>          } else {
> @@ -642,6 +652,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
>          break;
>        case SHADER_OPCODE_TXL:
>          if (inst->shadow_compare) {
> +            assert(dispatch_width == 8);
>             assert(inst->mlen == 6);
>             msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
>          } else {
> @@ -652,11 +663,12 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
>          break;
>        case SHADER_OPCODE_TXD:
>          /* There is no sample_d_c message; comparisons are done manually */
> +         assert(dispatch_width == 8);
>          assert(inst->mlen == 7 || inst->mlen == 10);
>          msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
>          break;
>        case SHADER_OPCODE_TXF:
> -        assert(inst->mlen == 9);
> +         assert(inst->mlen <= 9 && inst->mlen % 2 == 1);
>          msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
>          simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
>          break;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 8c0ec33..25c424a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1435,8 +1435,6 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
>     bool simd16 = false;
>     fs_reg orig_dst;
>  
> -   no16("SIMD16 texturing on Gen4 not supported yet.");
> -
>     /* g0 header. */
>     mlen = 1;
>  
> @@ -1588,6 +1586,69 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
>     return inst;
>  }
>  
> +fs_inst *
> +fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
> +                                     fs_reg coordinate, int vector_elements,
> +                                     fs_reg shadow_c, fs_reg lod,
> +                                     uint32_t sampler)
> +{
> +   fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
> +   bool has_lod = op == ir_txl || op == ir_txb;
> +
> +   if (has_lod && shadow_c.file != BAD_FILE)
> +      no16("TXB and TXL with shadow comparison unsupported in SIMD16.");
> +
> +   if (op == ir_txd)
> +      no16("textureGrad unsupported in SIMD16.");
> +
> +   /* Copy the coordinates. */
> +   for (int i = 0; i < vector_elements; i++) {
> +      emit(MOV(retype(offset(message, i), coordinate.type), coordinate));
> +      coordinate = offset(coordinate, 1);
> +   }
> +
> +   fs_reg msg_end = offset(message, vector_elements);
> +
> +   /* Messages other than sample and ld require all three components */
> +   if (has_lod || shadow_c.file != BAD_FILE) {
> +      for (int i = vector_elements; i < 3; i++) {
> +         emit(MOV(offset(message, i), fs_reg(0.0f)));
> +      }
> +   }
> +
> +   if (has_lod) {
> +      fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ?
> +                              BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);

From above: has_lod = op == ir_txl || op == ir_txb, so the
op == ir_txf check here should always be false, right?

Should has_lod also check for ir_txf?

Otherwise,
Series Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>

> +      emit(MOV(msg_lod, lod));
> +      msg_end = offset(msg_lod, 1);
> +   }
> +
> +   if (shadow_c.file != BAD_FILE) {
> +      fs_reg msg_ref = offset(message, 3 + has_lod);
> +      emit(MOV(msg_ref, shadow_c));
> +      msg_end = offset(msg_ref, 1);
> +   }
> +
> +   enum opcode opcode;
> +   switch (op) {
> +   case ir_tex: opcode = SHADER_OPCODE_TEX; break;
> +   case ir_txb: opcode = FS_OPCODE_TXB;     break;
> +   case ir_txd: opcode = SHADER_OPCODE_TXD; break;
> +   case ir_txl: opcode = SHADER_OPCODE_TXL; break;
> +   case ir_txs: opcode = SHADER_OPCODE_TXS; break;
> +   case ir_txf: opcode = SHADER_OPCODE_TXF; break;
> +   default: unreachable("not reached");
> +   }
> +
> +   fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
> +   inst->base_mrf = message.reg - 1;
> +   inst->mlen = msg_end.reg - inst->base_mrf;
> +   inst->header_present = true;
> +   inst->regs_written = 8;
> +
> +   return inst;
> +}
> +
>  /* gen5's sampler has slots for u, v, r, array index, then optional
>   * parameters like shadow comparitor or LOD bias.  If optional
>   * parameters aren't present, those base slots are optional and don't
> @@ -2150,6 +2211,9 @@ fs_visitor::emit_texture(ir_texture_opcode op,
>                                 shadow_c, lod, lod2, grad_components,
>                                 sample_index, sampler,
>                                 offset_value.file != BAD_FILE);
> +   } else if (dispatch_width == 16) {
> +      inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components,
> +                                      shadow_c, lod, sampler);
>     } else {
>        inst = emit_texture_gen4(op, dst, coordinate, coord_components,
>                                 shadow_c, lod, lod2, grad_components,
> -- 
> 2.1.2
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list