[Mesa-dev] [PATCH 2/6] glsl: Rebalance expression trees that are reduction operations.
Eric Anholt
eric at anholt.net
Tue Mar 11 15:49:02 PDT 2014
Matt Turner <mattst88 at gmail.com> writes:
> The intention of this pass was to give us better instruction scheduling
> opportunities, but it unexpectedly reduced some instruction counts as
> well:
>
> total instructions in shared programs: 1666639 -> 1666073 (-0.03%)
> instructions in affected programs: 54612 -> 54046 (-1.04%)
> (and trades 4 SIMD16 programs in SS3)
Patches 1, 3, 4, 6 are:
Reviewed-by: Eric Anholt <eric at anholt.net>
I got lost on this one, though...
> diff --git a/src/glsl/opt_rebalance_tree.cpp b/src/glsl/opt_rebalance_tree.cpp
> new file mode 100644
> index 0000000..91aa999
> --- /dev/null
> +++ b/src/glsl/opt_rebalance_tree.cpp
> +/**
> + * \file opt_rebalance_tree.cpp
> + *
> + * Rebalances a reduction expression tree.
> + *
> + * For reduction operations (e.g., x + y + z + w) we generate an expression
> + * tree like
> + *
> + * +
> + * / \
> + * + w
> + * / \
> + * + z
> + * / \
> + * x y
> + *
> + * which we can rebalance into
> + *
> + * +
> + * / \
> + * / \
> + * + +
> + * / \ / \
> + * x y z w
> + *
> + * to get a better instruction scheduling.
> + *
> + * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
> + * and Bette L. Warren.
> + */
> +
> +#include "ir.h"
> +#include "ir_visitor.h"
> +#include "ir_rvalue_visitor.h"
> +#include "ir_optimization.h"
> +
> +/* The DSW algorithm generates a degenerate tree (really, a linked list) in
> + * tree_to_vine(). We'd rather not leave a binary expression with only one
> + * operand, so trivial modifications (the ternary operators below) are needed
> + * to ensure that we only rotate around the ir_expression nodes of the tree.
> + */
Why do we care about having NULL remainder.left briefly, if we're
definitely going to be rebalancing back?
> +static unsigned
> +tree_to_vine(ir_expression *root)
> +{
> + unsigned size = 0;
> + ir_rvalue *vine_tail = root;
> + ir_rvalue *remainder = root->operands[1];
> +
> + while (remainder != NULL) {
> + ir_expression *remainder_left = remainder->as_expression() ?
> + remainder->as_expression()->operands[0]->as_expression() : NULL;
A "remainder_expr = remainder->as_expression();" temp would have kept me
From misreading this function a couple of times.
> +
> + if (remainder_left == NULL) {
> + /* move vine_tail down one */
> + vine_tail = remainder;
> + remainder = remainder->as_expression() ?
> + remainder->as_expression()->operands[1] : NULL;
> + size++;
> + } else {
> + /* rotate */
> + ir_expression *tempptr = remainder_left;
> + remainder->as_expression()->operands[0] = tempptr->operands[1];
> + tempptr->operands[1] = remainder;
> + remainder = tempptr;
> + vine_tail->as_expression()->operands[1] = tempptr;
> + }
> + }
> +
> + return size;
> +}
> +static void
> +compression(ir_expression *root, unsigned count)
> +{
> + ir_expression *scanner = root;
> +
> + for (unsigned i = 0; i < count; i++) {
> + ir_expression *child = scanner->operands[1]->as_expression();
> + scanner->operands[1] = child->operands[1];
> + scanner = scanner->operands[1]->as_expression();
> + child->operands[1] = scanner->operands[0];
> + scanner->operands[0] = child;
> + }
> +}
> +
> +static void
> +vine_to_tree(ir_expression *root, unsigned size)
> +{
> + int n = size - 1;
> + for (int m = n / 2; m > 0; m = n / 2) {
> + compression(root, m);
> + n -= m + 1;
> + }
> +}
These two functions need some comments. I'm not sure what those needed
comments are.
> +
> +namespace {
> +
> +class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
> +public:
> + ir_rebalance_visitor()
> + {
> + progress = false;
> + }
> +
> + void handle_rvalue(ir_rvalue **rvalue);
> +
> + bool progress;
> +};
> +
> +struct is_reduction_data {
> + ir_expression_operation operation;
> + const glsl_type *type;
> + unsigned num_expr;
> + bool is_reduction;
> + bool contains_constant;
> +};
> +
> +} /* anonymous namespace */
> +
> +static bool
> +is_reduction_operation(ir_expression_operation operation)
> +{
> + switch (operation) {
> + case ir_binop_add:
> + case ir_binop_mul:
> + case ir_binop_bit_and:
> + case ir_binop_bit_xor:
> + case ir_binop_bit_or:
> + case ir_binop_logic_and:
> + case ir_binop_logic_xor:
> + case ir_binop_logic_or:
> + case ir_binop_min:
> + case ir_binop_max:
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> +static void
> +is_reduction(ir_instruction *ir, void *data)
> +{
> + struct is_reduction_data *ird = (struct is_reduction_data *)data;
> + if (!ird->is_reduction)
> + return;
> +
> + /* We don't want to balance a tree that contains multiple constants, since
> + * we'll be able to constant fold them if they're not in separate subtrees.
> + */
> + if (ir->as_constant()) {
> + if (ird->contains_constant) {
> + ird->is_reduction = false;
> + }
> + ird->contains_constant = true;
> + return;
> + }
> +
> + ir_expression *expr = ir->as_expression();
> + if (!expr)
> + return;
> +
> + /* Non-constant matrices might still contain constant vec4 that we can
> + * constant fold once split up. Handling matrices will need some more
> + * work.
> + */
> + if (expr->type->is_matrix()) {
> + ird->is_reduction = false;
> + return;
> + }
> +
> + if (ird->type != NULL && ird->type != expr->type) {
> + ird->is_reduction = false;
> + return;
> + }
> + ird->type = expr->type;
> +
> + ird->num_expr++;
> + if (is_reduction_operation(expr->operation)) {
> + if (ird->operation != 0 && ird->operation != expr->operation)
> + ird->is_reduction = false;
> + ird->operation = expr->operation;
> + } else {
> + ird->is_reduction = false;
> + }
It seems weird that this pass only rebalances when no expression in the
tree references the result of an expression of a different operation
(or, similarly, a different result type). I think that produces a weird
tension for us where we sometimes don't want to pull things into bigger
trees, because it would break this pass.
> +static ir_rvalue *
> +handle_expression(ir_expression *expr)
> +{
> + struct is_reduction_data ird;
> + ird.operation = (ir_expression_operation)0;
> + ird.type = NULL;
> + ird.num_expr = 0;
> + ird.is_reduction = true;
> + ird.contains_constant = false;
> +
> + visit_tree(expr, is_reduction, (void *)&ird);
> +
> + if (ird.is_reduction && ird.num_expr > 2) {
> + ir_constant z = ir_constant(0.0f);
> + ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
> +
> + unsigned size = tree_to_vine(&pseudo_root);
> + vine_to_tree(&pseudo_root, size);
> +
> + expr = pseudo_root.operands[1]->as_expression();
> + }
> + return expr;
A comment about why we need the pseudo_root here would be nice.
> +}
> +
> +void
> +ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
> +{
> + if (!*rvalue)
> + return;
> +
> + ir_expression *expr = (*rvalue)->as_expression();
> + if (!expr || !is_reduction_operation(expr->operation))
> + return;
> +
> + ir_rvalue *new_rvalue = handle_expression(expr);
> + if (new_rvalue == *rvalue)
> + return;
> +
> + *rvalue = new_rvalue;
> + this->progress = true;
> +}
If progress is ever set to true, when what makes sure progress is false
in the next pass over the tree? Is progress just not getting set to
true ever?
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 818 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140311/2da64327/attachment.pgp>
More information about the mesa-dev
mailing list