[Mesa-dev] [PATCH 2/6] glsl: Rebalance expression trees that are reduction operations.

Eric Anholt eric at anholt.net
Tue Mar 11 15:49:02 PDT 2014


Matt Turner <mattst88 at gmail.com> writes:

> The intention of this pass was to give us better instruction scheduling
> opportunities, but it unexpectedly reduced some instruction counts as
> well:
>
> total instructions in shared programs: 1666639 -> 1666073 (-0.03%)
> instructions in affected programs:     54612 -> 54046 (-1.04%)
> (and trades 4 SIMD16 programs in SS3)

Patches 1, 3, 4, 6 are:

Reviewed-by: Eric Anholt <eric at anholt.net>

I got lost on this one, though...

> diff --git a/src/glsl/opt_rebalance_tree.cpp b/src/glsl/opt_rebalance_tree.cpp
> new file mode 100644
> index 0000000..91aa999
> --- /dev/null
> +++ b/src/glsl/opt_rebalance_tree.cpp

> +/**
> + * \file opt_rebalance_tree.cpp
> + *
> + * Rebalances a reduction expression tree.
> + *
> + * For reduction operations (e.g., x + y + z + w) we generate an expression
> + * tree like
> + *
> + *        +
> + *       / \
> + *      +   w
> + *     / \
> + *    +   z
> + *   / \
> + *  x   y
> + *
> + * which we can rebalance into
> + *
> + *       +
> + *      / \
> + *     /   \
> + *    +     +
> + *   / \   / \
> + *  x   y z   w
> + *
> + * to get a better instruction scheduling.
> + *
> + * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
> + * and Bette L. Warren.
> + */
> +
> +#include "ir.h"
> +#include "ir_visitor.h"
> +#include "ir_rvalue_visitor.h"
> +#include "ir_optimization.h"
> +
> +/* The DSW algorithm generates a degenerate tree (really, a linked list) in
> + * tree_to_vine(). We'd rather not leave a binary expression with only one
> + * operand, so trivial modifications (the ternary operators below) are needed
> + * to ensure that we only rotate around the ir_expression nodes of the tree.
> + */

Why do we care about having NULL remainder.left briefly, if we're
definitely going to be rebalancing back?

> +static unsigned
> +tree_to_vine(ir_expression *root)
> +{
> +   unsigned size = 0;
> +   ir_rvalue *vine_tail = root;
> +   ir_rvalue *remainder = root->operands[1];
> +
> +   while (remainder != NULL) {
> +      ir_expression *remainder_left = remainder->as_expression() ?
> +         remainder->as_expression()->operands[0]->as_expression() : NULL;

A "remainder_expr = remainder->as_expression();" temp would have kept me
From misreading this function a couple of times.

> +
> +      if (remainder_left == NULL) {
> +         /* move vine_tail down one */
> +         vine_tail = remainder;
> +         remainder = remainder->as_expression() ?
> +            remainder->as_expression()->operands[1] : NULL;
> +         size++;
> +      } else {
> +         /* rotate */
> +         ir_expression *tempptr = remainder_left;
> +         remainder->as_expression()->operands[0] = tempptr->operands[1];
> +         tempptr->operands[1] = remainder;
> +         remainder = tempptr;
> +         vine_tail->as_expression()->operands[1] = tempptr;
> +      }
> +   }
> +
> +   return size;
> +}
> +static void
> +compression(ir_expression *root, unsigned count)
> +{
> +   ir_expression *scanner = root;
> +
> +   for (unsigned i = 0; i < count; i++) {
> +      ir_expression *child = scanner->operands[1]->as_expression();
> +      scanner->operands[1] = child->operands[1];
> +      scanner = scanner->operands[1]->as_expression();
> +      child->operands[1] = scanner->operands[0];
> +      scanner->operands[0] = child;
> +   }
> +}
> +
> +static void
> +vine_to_tree(ir_expression *root, unsigned size)
> +{
> +   int n = size - 1;
> +   for (int m = n / 2; m > 0; m = n / 2) {
> +      compression(root, m);
> +      n -= m + 1;
> +   }
> +}

These two functions need some comments.  I'm not sure what those needed
comments are.

> +
> +namespace {
> +
> +class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
> +public:
> +   ir_rebalance_visitor()
> +   {
> +      progress = false;
> +   }
> +
> +   void handle_rvalue(ir_rvalue **rvalue);
> +
> +   bool progress;
> +};
> +
> +struct is_reduction_data {
> +   ir_expression_operation operation;
> +   const glsl_type *type;
> +   unsigned num_expr;
> +   bool is_reduction;
> +   bool contains_constant;
> +};
> +
> +} /* anonymous namespace */
> +
> +static bool
> +is_reduction_operation(ir_expression_operation operation)
> +{
> +   switch (operation) {
> +   case ir_binop_add:
> +   case ir_binop_mul:
> +   case ir_binop_bit_and:
> +   case ir_binop_bit_xor:
> +   case ir_binop_bit_or:
> +   case ir_binop_logic_and:
> +   case ir_binop_logic_xor:
> +   case ir_binop_logic_or:
> +   case ir_binop_min:
> +   case ir_binop_max:
> +      return true;
> +   default:
> +      return false;
> +   }
> +}
> +
> +static void
> +is_reduction(ir_instruction *ir, void *data)
> +{
> +   struct is_reduction_data *ird = (struct is_reduction_data *)data;
> +   if (!ird->is_reduction)
> +      return;
> +
> +   /* We don't want to balance a tree that contains multiple constants, since
> +    * we'll be able to constant fold them if they're not in separate subtrees.
> +    */
> +   if (ir->as_constant()) {
> +      if (ird->contains_constant) {
> +         ird->is_reduction = false;
> +      }
> +      ird->contains_constant = true;
> +      return;
> +   }
> +
> +   ir_expression *expr = ir->as_expression();
> +   if (!expr)
> +      return;
> +
> +   /* Non-constant matrices might still contain constant vec4 that we can
> +    * constant fold once split up. Handling matrices will need some more
> +    * work.
> +    */
> +   if (expr->type->is_matrix()) {
> +      ird->is_reduction = false;
> +      return;
> +   }
> +
> +   if (ird->type != NULL && ird->type != expr->type) {
> +      ird->is_reduction = false;
> +      return;
> +   }
> +   ird->type = expr->type;
> +
> +   ird->num_expr++;
> +   if (is_reduction_operation(expr->operation)) {
> +      if (ird->operation != 0 && ird->operation != expr->operation)
> +         ird->is_reduction = false;
> +      ird->operation = expr->operation;
> +   } else {
> +      ird->is_reduction = false;
> +   }

It seems weird that this pass only rebalances when no expression in the
tree references the result of an expression of a different operation
(or, similarly, a different result type).  I think that produces a weird
tension for us where we sometimes don't want to pull things into bigger
trees, because it would break this pass.

> +static ir_rvalue *
> +handle_expression(ir_expression *expr)
> +{
> +   struct is_reduction_data ird;
> +   ird.operation = (ir_expression_operation)0;
> +   ird.type = NULL;
> +   ird.num_expr = 0;
> +   ird.is_reduction = true;
> +   ird.contains_constant = false;
> +
> +   visit_tree(expr, is_reduction, (void *)&ird);
> +
> +   if (ird.is_reduction && ird.num_expr > 2) {
> +      ir_constant z = ir_constant(0.0f);
> +      ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
> +
> +      unsigned size = tree_to_vine(&pseudo_root);
> +      vine_to_tree(&pseudo_root, size);
> +
> +      expr = pseudo_root.operands[1]->as_expression();
> +   }
> +   return expr;

A comment about why we need the pseudo_root here would be nice.

> +}
> +
> +void
> +ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> +   if (!*rvalue)
> +      return;
> +
> +   ir_expression *expr = (*rvalue)->as_expression();
> +   if (!expr || !is_reduction_operation(expr->operation))
> +      return;
> +
> +   ir_rvalue *new_rvalue = handle_expression(expr);
> +   if (new_rvalue == *rvalue)
> +      return;
> +
> +   *rvalue = new_rvalue;
> +   this->progress = true;
> +}

If progress is ever set to true, when what makes sure progress is false
in the next pass over the tree?  Is progress just not getting set to
true ever?
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 818 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140311/2da64327/attachment.pgp>


More information about the mesa-dev mailing list