[Mesa-dev] [PATCH 3/3] intel/compiler: implement more algebraic optimizations

Ian Romanick idr at freedesktop.org
Wed Feb 27 16:49:12 UTC 2019


On 2/27/19 4:45 AM, Iago Toral Quiroga wrote:
> Now that we propagate constants to the first source of 2src instructions we
> see more opportunities of constant folding in the backend.

All the benefit of the series is from more constant folding?  Once upon
a time, I had a patch that added another call to
nir_opt_constant_folding after we call nir_opt_algebraic_late.  My
recollection is that it hurt vec4 shaders, but it helped scalar shaders
quite a bit.  How does doing that affect these results?

Hrm... I can collect that data.

> Shader-db results on KBL:
> 
> total instructions in shared programs: 14965607 -> 14855983 (-0.73%)
> instructions in affected programs: 3988102 -> 3878478 (-2.75%)
> helped: 14292
> HURT: 59
> 
> total cycles in shared programs: 344324295 -> 340656008 (-1.07%)
> cycles in affected programs: 247527740 -> 243859453 (-1.48%)
> helped: 14056
> HURT: 3314
> 
> total loops in shared programs: 4283 -> 4283 (0.00%)
> loops in affected programs: 0 -> 0
> helped: 0
> HURT: 0
> 
> total spills in shared programs: 27812 -> 24350 (-12.45%)
> spills in affected programs: 24921 -> 21459 (-13.89%)
> helped: 345
> HURT: 19
> 
> total fills in shared programs: 24173 -> 22032 (-8.86%)
> fills in affected programs: 21124 -> 18983 (-10.14%)
> helped: 355
> HURT: 25
> 
> LOST:   0
> GAINED: 5
> ---
>  src/intel/compiler/brw_fs.cpp | 203 ++++++++++++++++++++++++++++++++--
>  1 file changed, 195 insertions(+), 8 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 2358acbeb59..b2b60237c82 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -2583,9 +2583,55 @@ fs_visitor::opt_algebraic()
>           break;
>  
>        case BRW_OPCODE_MUL:
> -         if (inst->src[1].file != IMM)
> +         if (inst->src[0].file != IMM && inst->src[1].file != IMM)
>              continue;
>  
> +         /* Constant folding */
> +         if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> +            assert(inst->src[0].type == inst->src[1].type);
> +            bool local_progress = true;
> +            switch (inst->src[0].type) {
> +            case BRW_REGISTER_TYPE_HF: {
> +               float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu);
> +               float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu);
> +               inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 * v2));
> +               break;
> +            }
> +            case BRW_REGISTER_TYPE_W: {
> +               int16_t v1 = inst->src[0].ud & 0xffffu;
> +               int16_t v2 = inst->src[1].ud & 0xffffu;
> +               inst->src[0] = brw_imm_w(v1 * v2);
> +               break;
> +            }
> +            case BRW_REGISTER_TYPE_UW: {
> +               uint16_t v1 = inst->src[0].ud & 0xffffu;
> +               uint16_t v2 = inst->src[1].ud & 0xffffu;
> +               inst->src[0] = brw_imm_uw(v1 * v2);
> +               break;
> +            }
> +            case BRW_REGISTER_TYPE_F:
> +               inst->src[0].f *= inst->src[1].f;
> +               break;
> +            case BRW_REGISTER_TYPE_D:
> +               inst->src[0].d *= inst->src[1].d;
> +               break;
> +            case BRW_REGISTER_TYPE_UD:
> +               inst->src[0].ud *= inst->src[1].ud;
> +               break;
> +            default:
> +               local_progress = false;
> +               break;
> +            };
> +
> +            if (local_progress) {
> +               inst->opcode = BRW_OPCODE_MOV;
> +               inst->src[1] = reg_undef;
> +               progress = true;
> +               break;
> +            }
> +         }
> +
> +
>           /* a * 1.0 = a */
>           if (inst->src[1].is_one()) {
>              inst->opcode = BRW_OPCODE_MOV;
> @@ -2594,6 +2640,14 @@ fs_visitor::opt_algebraic()
>              break;
>           }
>  
> +         if (inst->src[0].is_one()) {
> +            inst->opcode = BRW_OPCODE_MOV;
> +            inst->src[0] = inst->src[1];
> +            inst->src[1] = reg_undef;
> +            progress = true;
> +            break;
> +         }
> +
>           /* a * -1.0 = -a */
>           if (inst->src[1].is_negative_one()) {
>              inst->opcode = BRW_OPCODE_MOV;
> @@ -2603,27 +2657,160 @@ fs_visitor::opt_algebraic()
>              break;
>           }
>  
> -         if (inst->src[0].file == IMM) {
> -            assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
> +         if (inst->src[0].is_negative_one()) {
> +            inst->opcode = BRW_OPCODE_MOV;
> +            inst->src[0] = inst->src[1];
> +            inst->src[0].negate = !inst->src[1].negate;
> +            inst->src[1] = reg_undef;
> +            progress = true;
> +            break;
> +         }
> +
> +         /* a * 0 = 0 (this is not exact for floating point) */
> +         if (inst->src[1].is_zero() &&
> +             brw_reg_type_is_integer(inst->src[1].type)) {
> +            inst->opcode = BRW_OPCODE_MOV;
> +            inst->src[0] = inst->src[1];
> +            inst->src[1] = reg_undef;
> +            progress = true;
> +            break;
> +         }
> +
> +         if (inst->src[0].is_zero() &&
> +             brw_reg_type_is_integer(inst->src[0].type)) {
>              inst->opcode = BRW_OPCODE_MOV;
> -            inst->src[0].f *= inst->src[1].f;
>              inst->src[1] = reg_undef;
>              progress = true;
>              break;
>           }
>           break;
>        case BRW_OPCODE_ADD:
> -         if (inst->src[1].file != IMM)
> +         if (inst->src[0].file != IMM && inst->src[1].file != IMM)
>              continue;
>  
> -         if (inst->src[0].file == IMM) {
> -            assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
> +         /* Constant folding */
> +         if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> +            assert(inst->src[0].type == inst->src[1].type);
> +            bool local_progress = true;
> +            switch (inst->src[0].type) {
> +            case BRW_REGISTER_TYPE_HF: {
> +               float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu);
> +               float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu);
> +               inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 + v2));
> +               break;
> +            }
> +            case BRW_REGISTER_TYPE_W: {
> +               int16_t v1 = inst->src[0].ud & 0xffffu;
> +               int16_t v2 = inst->src[1].ud & 0xffffu;
> +               inst->src[0] = brw_imm_w(v1 + v2);
> +               break;
> +            }
> +            case BRW_REGISTER_TYPE_UW: {
> +               uint16_t v1 = inst->src[0].ud & 0xffffu;
> +               uint16_t v2 = inst->src[1].ud & 0xffffu;
> +               inst->src[0] = brw_imm_uw(v1 + v2);
> +               break;
> +            }
> +            case BRW_REGISTER_TYPE_F:
> +               inst->src[0].f += inst->src[1].f;
> +               break;
> +            case BRW_REGISTER_TYPE_D:
> +               inst->src[0].d += inst->src[1].d;
> +               break;
> +            case BRW_REGISTER_TYPE_UD:
> +               inst->src[0].ud += inst->src[1].ud;
> +               break;
> +            default:
> +               local_progress = false;
> +               break;
> +            };
> +
> +            if (local_progress) {
> +               inst->opcode = BRW_OPCODE_MOV;
> +               inst->src[1] = reg_undef;
> +               progress = true;
> +               break;
> +            }
> +         }
> +
> +         /* a + 0 = a (this is not exact for floating point) */
> +         if (inst->src[1].is_zero() &&
> +             brw_reg_type_is_integer(inst->src[1].type)) {
>              inst->opcode = BRW_OPCODE_MOV;
> -            inst->src[0].f += inst->src[1].f;
>              inst->src[1] = reg_undef;
>              progress = true;
>              break;
>           }
> +
> +         if (inst->src[0].is_zero() &&
> +             brw_reg_type_is_integer(inst->src[0].type)) {
> +            inst->opcode = BRW_OPCODE_MOV;
> +            inst->src[0] = inst->src[1];
> +            inst->src[1] = reg_undef;
> +            progress = true;
> +            break;
> +         }
> +         break;
> +      case BRW_OPCODE_SHL:
> +         if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> +            bool local_progress = true;
> +            switch (inst->src[0].type) {
> +            case BRW_REGISTER_TYPE_D:
> +            case BRW_REGISTER_TYPE_UD:
> +               inst->src[0].ud <<= inst->src[1].ud;
> +               break;
> +            case BRW_REGISTER_TYPE_W:
> +            case BRW_REGISTER_TYPE_UW: {
> +               uint16_t v1 = inst->src[0].ud & 0xffffu;
> +               uint16_t v2 = inst->src[1].ud & 0xffffu;
> +               inst->src[0] = retype(brw_imm_uw(v1 << v2), inst->src[0].type);
> +               break;
> +            }
> +            default:
> +               local_progress = false;
> +               break;
> +            }
> +            if (local_progress) {
> +               inst->opcode = BRW_OPCODE_MOV;
> +               inst->src[1] = reg_undef;
> +               progress = true;
> +               break;
> +            }
> +         }
> +         break;
> +      case BRW_OPCODE_SHR:
> +         if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> +            bool local_progress = true;
> +            switch (inst->src[0].type) {
> +            case BRW_REGISTER_TYPE_D:
> +               inst->src[0].d >>= inst->src[1].ud;
> +               break;
> +            case BRW_REGISTER_TYPE_UD:
> +               inst->src[0].ud >>= inst->src[1].ud;
> +               break;
> +            case BRW_REGISTER_TYPE_W: {
> +               int16_t v1 = inst->src[0].ud & 0xffffu;
> +               uint16_t v2 = inst->src[1].ud & 0xffffu;
> +               inst->src[0] = brw_imm_w(v1 >> v2);
> +               break;
> +            }
> +            case BRW_REGISTER_TYPE_UW: {
> +               uint16_t v1 = inst->src[0].ud & 0xffffu;
> +               uint16_t v2 = inst->src[1].ud & 0xffffu;
> +               inst->src[0] = brw_imm_uw(v1 >> v2);
> +               break;
> +            }
> +            default:
> +               local_progress = false;
> +               break;
> +            }
> +            if (local_progress) {
> +               inst->opcode = BRW_OPCODE_MOV;
> +               inst->src[1] = reg_undef;
> +               progress = true;
> +               break;
> +            }
> +         }
>           break;
>        case BRW_OPCODE_OR:
>           if (inst->src[0].equals(inst->src[1]) ||


More information about the mesa-dev mailing list