[Mesa-dev] [PATCH 3/3] i965/fs: Optimize float conversions of byte/word extract.

Iago Toral itoral at igalia.com
Thu Mar 3 15:21:33 UTC 2016


On Wed, 2016-03-02 at 15:45 -0800, Matt Turner wrote:
>    instructions in affected programs: 31535 -> 29966 (-4.98%)
>    helped: 23
> 
>    cycles in affected programs: 272648 -> 266022 (-2.43%)
>    helped: 14
>    HURT: 1
> 
> The patch decreases the number of instructions in the two Unigine
> programs by:
> 
>  #1721: 4374 -> 4155 instructions (-5.01%)
>  #1706: 3582 -> 3363 instructions (-6.11%)
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h       |  2 ++
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 ++++++++++++++++++++++++++++++++
>  2 files changed, 48 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 7446ca1..21c7813 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -272,6 +272,8 @@ public:
>     void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
>                       unsigned wr_mask);
>  
> +   bool optimize_extract_to_float(nir_alu_instr *instr,
> +                                  const fs_reg &result);
>     bool optimize_frontfacing_ternary(nir_alu_instr *instr,
>                                       const fs_reg &result);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index db20c71..04e9b8f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -500,6 +500,49 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
>     }
>  }
>  
> +/**
> + * Recognizes a parent instruction of nir_op_extract_* and changes the type to
> + * match instr.
> + */
> +bool
> +fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
> +                                      const fs_reg &result)
> +{
> +   if (!instr->src[0].src.is_ssa ||
> +       !instr->src[0].src.ssa->parent_instr)
> +      return false;
> +
> +   if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
> +      return false;
> +
> +   nir_alu_instr *src0 =
> +      nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
> +
> +   if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 &&
> +       src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
> +      return false;
> +
> +   nir_const_value *element = nir_src_as_const_value(src0->src[1].src);
> +   assert(element != NULL);
> +
> +   enum opcode extract_op;
> +   if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) {
> +      assert(element->u[0] <= 1);
> +      extract_op = SHADER_OPCODE_EXTRACT_WORD;
> +   } else {
> +      assert(element->u[0] <= 3);
> +      extract_op = SHADER_OPCODE_EXTRACT_BYTE;
> +   }
> +
> +   fs_reg op0 = get_nir_src(src0->src[0].src);
> +   op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]);
> +   op0 = offset(op0, bld, src0->src[0].swizzle[0]);
> +
> +   set_saturate(instr->dest.saturate,
> +                bld.emit(extract_op, result, op0, brw_imm_ud(element->u[0])));

So this relies on dead code elimination to remove the original extract
opcode, right?

Series is:
Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>

> +   return true;
> +}
> +
>  bool
>  fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
>                                           const fs_reg &result)
> @@ -671,6 +714,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
>     switch (instr->op) {
>     case nir_op_i2f:
>     case nir_op_u2f:
> +      if (optimize_extract_to_float(instr, result))
> +         return;
> +
>        inst = bld.MOV(result, op[0]);
>        inst->saturate = instr->dest.saturate;
>        break;




More information about the mesa-dev mailing list