[Mesa-dev] [PATCH 3/3] intel/compiler: implement more algebraic optimizations
Ian Romanick
idr at freedesktop.org
Wed Feb 27 16:49:12 UTC 2019
On 2/27/19 4:45 AM, Iago Toral Quiroga wrote:
> Now that we propagate constants to the first source of 2src instructions we
> see more opportunities of constant folding in the backend.
All the benefit of the series is from more constant folding? Once upon
a time, I had a patch that added another call to
nir_opt_constant_folding after we call nir_opt_algebraic_late. My
recollection is that it hurt vec4 shaders, but it helped scalar shaders
quite a bit. How does doing that affect these results?
Hrm... I can collect that data.
> Shader-db results on KBL:
>
> total instructions in shared programs: 14965607 -> 14855983 (-0.73%)
> instructions in affected programs: 3988102 -> 3878478 (-2.75%)
> helped: 14292
> HURT: 59
>
> total cycles in shared programs: 344324295 -> 340656008 (-1.07%)
> cycles in affected programs: 247527740 -> 243859453 (-1.48%)
> helped: 14056
> HURT: 3314
>
> total loops in shared programs: 4283 -> 4283 (0.00%)
> loops in affected programs: 0 -> 0
> helped: 0
> HURT: 0
>
> total spills in shared programs: 27812 -> 24350 (-12.45%)
> spills in affected programs: 24921 -> 21459 (-13.89%)
> helped: 345
> HURT: 19
>
> total fills in shared programs: 24173 -> 22032 (-8.86%)
> fills in affected programs: 21124 -> 18983 (-10.14%)
> helped: 355
> HURT: 25
>
> LOST: 0
> GAINED: 5
> ---
> src/intel/compiler/brw_fs.cpp | 203 ++++++++++++++++++++++++++++++++--
> 1 file changed, 195 insertions(+), 8 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 2358acbeb59..b2b60237c82 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -2583,9 +2583,55 @@ fs_visitor::opt_algebraic()
> break;
>
> case BRW_OPCODE_MUL:
> - if (inst->src[1].file != IMM)
> + if (inst->src[0].file != IMM && inst->src[1].file != IMM)
> continue;
>
> + /* Constant folding */
> + if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> + assert(inst->src[0].type == inst->src[1].type);
> + bool local_progress = true;
> + switch (inst->src[0].type) {
> + case BRW_REGISTER_TYPE_HF: {
> + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu);
> + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu);
> + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 * v2));
> + break;
> + }
> + case BRW_REGISTER_TYPE_W: {
> + int16_t v1 = inst->src[0].ud & 0xffffu;
> + int16_t v2 = inst->src[1].ud & 0xffffu;
> + inst->src[0] = brw_imm_w(v1 * v2);
> + break;
> + }
> + case BRW_REGISTER_TYPE_UW: {
> + uint16_t v1 = inst->src[0].ud & 0xffffu;
> + uint16_t v2 = inst->src[1].ud & 0xffffu;
> + inst->src[0] = brw_imm_uw(v1 * v2);
> + break;
> + }
> + case BRW_REGISTER_TYPE_F:
> + inst->src[0].f *= inst->src[1].f;
> + break;
> + case BRW_REGISTER_TYPE_D:
> + inst->src[0].d *= inst->src[1].d;
> + break;
> + case BRW_REGISTER_TYPE_UD:
> + inst->src[0].ud *= inst->src[1].ud;
> + break;
> + default:
> + local_progress = false;
> + break;
> + };
> +
> + if (local_progress) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> + }
> +
> +
> /* a * 1.0 = a */
> if (inst->src[1].is_one()) {
> inst->opcode = BRW_OPCODE_MOV;
> @@ -2594,6 +2640,14 @@ fs_visitor::opt_algebraic()
> break;
> }
>
> + if (inst->src[0].is_one()) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[0] = inst->src[1];
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> +
> /* a * -1.0 = -a */
> if (inst->src[1].is_negative_one()) {
> inst->opcode = BRW_OPCODE_MOV;
> @@ -2603,27 +2657,160 @@ fs_visitor::opt_algebraic()
> break;
> }
>
> - if (inst->src[0].file == IMM) {
> - assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
> + if (inst->src[0].is_negative_one()) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[0] = inst->src[1];
> + inst->src[0].negate = !inst->src[1].negate;
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> +
> + /* a * 0 = 0 (this is not exact for floating point) */
> + if (inst->src[1].is_zero() &&
> + brw_reg_type_is_integer(inst->src[1].type)) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[0] = inst->src[1];
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> +
> + if (inst->src[0].is_zero() &&
> + brw_reg_type_is_integer(inst->src[0].type)) {
> inst->opcode = BRW_OPCODE_MOV;
> - inst->src[0].f *= inst->src[1].f;
> inst->src[1] = reg_undef;
> progress = true;
> break;
> }
> break;
> case BRW_OPCODE_ADD:
> - if (inst->src[1].file != IMM)
> + if (inst->src[0].file != IMM && inst->src[1].file != IMM)
> continue;
>
> - if (inst->src[0].file == IMM) {
> - assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
> + /* Constant folding */
> + if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> + assert(inst->src[0].type == inst->src[1].type);
> + bool local_progress = true;
> + switch (inst->src[0].type) {
> + case BRW_REGISTER_TYPE_HF: {
> + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu);
> + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu);
> + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 + v2));
> + break;
> + }
> + case BRW_REGISTER_TYPE_W: {
> + int16_t v1 = inst->src[0].ud & 0xffffu;
> + int16_t v2 = inst->src[1].ud & 0xffffu;
> + inst->src[0] = brw_imm_w(v1 + v2);
> + break;
> + }
> + case BRW_REGISTER_TYPE_UW: {
> + uint16_t v1 = inst->src[0].ud & 0xffffu;
> + uint16_t v2 = inst->src[1].ud & 0xffffu;
> + inst->src[0] = brw_imm_uw(v1 + v2);
> + break;
> + }
> + case BRW_REGISTER_TYPE_F:
> + inst->src[0].f += inst->src[1].f;
> + break;
> + case BRW_REGISTER_TYPE_D:
> + inst->src[0].d += inst->src[1].d;
> + break;
> + case BRW_REGISTER_TYPE_UD:
> + inst->src[0].ud += inst->src[1].ud;
> + break;
> + default:
> + local_progress = false;
> + break;
> + };
> +
> + if (local_progress) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> + }
> +
> + /* a + 0 = a (this is not exact for floating point) */
> + if (inst->src[1].is_zero() &&
> + brw_reg_type_is_integer(inst->src[1].type)) {
> inst->opcode = BRW_OPCODE_MOV;
> - inst->src[0].f += inst->src[1].f;
> inst->src[1] = reg_undef;
> progress = true;
> break;
> }
> +
> + if (inst->src[0].is_zero() &&
> + brw_reg_type_is_integer(inst->src[0].type)) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[0] = inst->src[1];
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> + break;
> + case BRW_OPCODE_SHL:
> + if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> + bool local_progress = true;
> + switch (inst->src[0].type) {
> + case BRW_REGISTER_TYPE_D:
> + case BRW_REGISTER_TYPE_UD:
> + inst->src[0].ud <<= inst->src[1].ud;
> + break;
> + case BRW_REGISTER_TYPE_W:
> + case BRW_REGISTER_TYPE_UW: {
> + uint16_t v1 = inst->src[0].ud & 0xffffu;
> + uint16_t v2 = inst->src[1].ud & 0xffffu;
> + inst->src[0] = retype(brw_imm_uw(v1 << v2), inst->src[0].type);
> + break;
> + }
> + default:
> + local_progress = false;
> + break;
> + }
> + if (local_progress) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> + }
> + break;
> + case BRW_OPCODE_SHR:
> + if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
> + bool local_progress = true;
> + switch (inst->src[0].type) {
> + case BRW_REGISTER_TYPE_D:
> + inst->src[0].d >>= inst->src[1].ud;
> + break;
> + case BRW_REGISTER_TYPE_UD:
> + inst->src[0].ud >>= inst->src[1].ud;
> + break;
> + case BRW_REGISTER_TYPE_W: {
> + int16_t v1 = inst->src[0].ud & 0xffffu;
> + uint16_t v2 = inst->src[1].ud & 0xffffu;
> + inst->src[0] = brw_imm_w(v1 >> v2);
> + break;
> + }
> + case BRW_REGISTER_TYPE_UW: {
> + uint16_t v1 = inst->src[0].ud & 0xffffu;
> + uint16_t v2 = inst->src[1].ud & 0xffffu;
> + inst->src[0] = brw_imm_uw(v1 >> v2);
> + break;
> + }
> + default:
> + local_progress = false;
> + break;
> + }
> + if (local_progress) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> + }
> + }
> break;
> case BRW_OPCODE_OR:
> if (inst->src[0].equals(inst->src[1]) ||
More information about the mesa-dev
mailing list