[Mesa-dev] [PATCH 3/3] i965/fs: Optimize float conversions of byte/word extract.
Iago Toral
itoral at igalia.com
Thu Mar 3 15:21:33 UTC 2016
On Wed, 2016-03-02 at 15:45 -0800, Matt Turner wrote:
> instructions in affected programs: 31535 -> 29966 (-4.98%)
> helped: 23
>
> cycles in affected programs: 272648 -> 266022 (-2.43%)
> helped: 14
> HURT: 1
>
> The patch decreases the number of instructions in the two Unigine
> programs by:
>
> #1721: 4374 -> 4155 instructions (-5.01%)
> #1706: 3582 -> 3363 instructions (-6.11%)
> ---
> src/mesa/drivers/dri/i965/brw_fs.h | 2 ++
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 46 ++++++++++++++++++++++++++++++++
> 2 files changed, 48 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 7446ca1..21c7813 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -272,6 +272,8 @@ public:
> void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
> unsigned wr_mask);
>
> + bool optimize_extract_to_float(nir_alu_instr *instr,
> + const fs_reg &result);
> bool optimize_frontfacing_ternary(nir_alu_instr *instr,
> const fs_reg &result);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index db20c71..04e9b8f 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -500,6 +500,49 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
> }
> }
>
> +/**
> + * Recognizes a parent instruction of nir_op_extract_* and changes the type to
> + * match instr.
> + */
> +bool
> +fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
> + const fs_reg &result)
> +{
> + if (!instr->src[0].src.is_ssa ||
> + !instr->src[0].src.ssa->parent_instr)
> + return false;
> +
> + if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
> + return false;
> +
> + nir_alu_instr *src0 =
> + nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
> +
> + if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 &&
> + src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
> + return false;
> +
> + nir_const_value *element = nir_src_as_const_value(src0->src[1].src);
> + assert(element != NULL);
> +
> + enum opcode extract_op;
> + if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) {
> + assert(element->u[0] <= 1);
> + extract_op = SHADER_OPCODE_EXTRACT_WORD;
> + } else {
> + assert(element->u[0] <= 3);
> + extract_op = SHADER_OPCODE_EXTRACT_BYTE;
> + }
> +
> + fs_reg op0 = get_nir_src(src0->src[0].src);
> + op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]);
> + op0 = offset(op0, bld, src0->src[0].swizzle[0]);
> +
> + set_saturate(instr->dest.saturate,
> + bld.emit(extract_op, result, op0, brw_imm_ud(element->u[0])));
So this relies on dead code elimination to remove the original extract
opcode, right?
Series is:
Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>
> + return true;
> +}
> +
> bool
> fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
> const fs_reg &result)
> @@ -671,6 +714,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
> switch (instr->op) {
> case nir_op_i2f:
> case nir_op_u2f:
> + if (optimize_extract_to_float(instr, result))
> + return;
> +
> inst = bld.MOV(result, op[0]);
> inst->saturate = instr->dest.saturate;
> break;
More information about the mesa-dev
mailing list