/*
 * Copyright © 2014 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

/**
 * \file opt_rebalance_tree.cpp
 *
 * Rebalances a reduction expression tree.
 *
 * For reduction operations (e.g., x + y + z + w) we generate an expression
 * tree like
 *
 *        +
 *       / \
 *      +   w
 *     / \
 *    +   z
 *   / \
 *  x   y
 *
 * which we can rebalance into
 *
 *       +
 *      / \
 *     /   \
 *    +     +
 *   / \   / \
 *  x   y z   w
 *
 * to get a better instruction scheduling.
 *
 * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
 * and Bette L. Warren.
 *
 * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
 * explanation of the of the tree_to_vine() (rightward rotation) and
 * vine_to_tree() (leftward rotation) algorithms.
 */

#include "ir.h"
#include "ir_visitor.h"
#include "ir_rvalue_visitor.h"
#include "ir_optimization.h"
#include "main/macros.h" /* for MAX2 */

/* The DSW algorithm generates a degenerate tree (really, a linked list) in
 * tree_to_vine(). We'd rather not leave a binary expression with only one
 * operand, so trivial modifications (the ternary operators below) are needed
 * to ensure that we only rotate around the ir_expression nodes of the tree.
 */
static unsigned
tree_to_vine(ir_expression *root)
{
   unsigned size = 0;
   ir_rvalue *vine_tail = root;
   ir_rvalue *remainder = root->operands[1];

   while (remainder != NULL) {
      ir_expression *remainder_temp = remainder->as_expression();
      ir_expression *remainder_left = remainder_temp ?
         remainder_temp->operands[0]->as_expression() : NULL;

      if (remainder_left == NULL) {
         /* move vine_tail down one */
         vine_tail = remainder;
         remainder = remainder->as_expression() ?
            ((ir_expression *)remainder)->operands[1] : NULL;
         size++;
      } else {
         /* rotate */
         ir_expression *tempptr = remainder_left;
         ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
         tempptr->operands[1] = remainder;
         remainder = tempptr;
         ((ir_expression *)vine_tail)->operands[1] = tempptr;
      }
   }

   return size;
}

static void
compression(ir_expression *root, unsigned count)
{
   ir_expression *scanner = root;

   for (unsigned i = 0; i < count; i++) {
      ir_expression *child = (ir_expression *)scanner->operands[1];
      scanner->operands[1] = child->operands[1];
      scanner = (ir_expression *)scanner->operands[1];
      child->operands[1] = scanner->operands[0];
      scanner->operands[0] = child;
   }
}

static void
vine_to_tree(ir_expression *root, unsigned size)
{
   int n = size - 1;
   for (int m = n / 2; m > 0; m = n / 2) {
      compression(root, m);
      n -= m + 1;
   }
}

namespace {

class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
public:
   ir_rebalance_visitor()
   {
      progress = false;
   }

   virtual ir_visitor_status visit_enter(ir_assignment *ir);

   void handle_rvalue(ir_rvalue **rvalue);

   bool progress;
};

struct is_reduction_data {
   ir_expression_operation operation;
   const glsl_type *type;
   unsigned num_expr;
   bool is_reduction;
   bool contains_constant;
};

} /* anonymous namespace */

ir_visitor_status
ir_rebalance_visitor::visit_enter(ir_assignment *ir)
{
   ir_variable *var = ir->lhs->variable_referenced();
   if (var->data.invariant || var->data.precise) {
      /* If we're assigning to an invariant variable, just bail.  Tree
       * rebalancing (reassociation) isn't precision-safe.
       */
      return visit_continue_with_parent;
   } else {
      return visit_continue;
   }
}

static bool
is_reduction_operation(ir_expression_operation operation)
{
   switch (operation) {
   case ir_binop_add:
   case ir_binop_mul:
   case ir_binop_bit_and:
   case ir_binop_bit_xor:
   case ir_binop_bit_or:
   case ir_binop_logic_and:
   case ir_binop_logic_xor:
   case ir_binop_logic_or:
   case ir_binop_min:
   case ir_binop_max:
      return true;
   default:
      return false;
   }
}

/* Note that this function does not attempt to recognize that reduction trees
 * are already balanced.
 *
 * We return false from this function for a number of reasons other than an
 * expression tree not being a mathematical reduction. Namely,
 *
 *    - if the tree contains multiple constants that we may be able to combine.
 *    - if the tree contains matrices:
 *       - they might contain vec4's with many constant components that we can
 *         simplify after splitting.
 *       - applying the matrix chain ordering optimization is more than just
 *         balancing an expression tree.
 *    - if the tree contains operations on multiple types.
 *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
 *      would trick the visiting pass.
 */
static void
is_reduction(ir_instruction *ir, void *data)
{
   struct is_reduction_data *ird = (struct is_reduction_data *)data;
   if (!ird->is_reduction)
      return;

   /* We don't want to balance a tree that contains multiple constants, since
    * we'll be able to constant fold them if they're not in separate subtrees.
    */
   if (ir->as_constant()) {
      if (ird->contains_constant) {
         ird->is_reduction = false;
      }
      ird->contains_constant = true;
      return;
   }

   /* Array/record dereferences have subtrees that are not part of the expr
    * tree we're balancing. Skip trees containing them.
    */
   if (ir->ir_type == ir_type_dereference_array ||
       ir->ir_type == ir_type_dereference_record) {
      ird->is_reduction = false;
      return;
   }

   ir_expression *expr = ir->as_expression();
   if (!expr)
      return;

   /* Non-constant matrices might still contain constant vec4 that we can
    * constant fold once split up. Handling matrices will need some more
    * work.
    */
   if (expr->type->is_matrix() ||
       expr->operands[0]->type->is_matrix() ||
       (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
      ird->is_reduction = false;
      return;
   }

   if (ird->type != NULL && ird->type != expr->type) {
      ird->is_reduction = false;
      return;
   }
   ird->type = expr->type;

   ird->num_expr++;
   if (is_reduction_operation(expr->operation)) {
      if (ird->operation != 0 && ird->operation != expr->operation)
         ird->is_reduction = false;
      ird->operation = expr->operation;
   } else {
      ird->is_reduction = false;
   }
}

static ir_rvalue *
handle_expression(ir_expression *expr)
{
   struct is_reduction_data ird;
   ird.operation = (ir_expression_operation)0;
   ird.type = NULL;
   ird.num_expr = 0;
   ird.is_reduction = true;
   ird.contains_constant = false;

   visit_tree(expr, is_reduction, (void *)&ird);

   if (ird.is_reduction && ird.num_expr > 2) {
      ir_constant z = ir_constant(0.0f);
      ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);

      unsigned size = tree_to_vine(&pseudo_root);
      vine_to_tree(&pseudo_root, size);

      expr = (ir_expression *)pseudo_root.operands[1];
   }
   return expr;
}

static void
update_types(ir_instruction *ir, void *)
{
   ir_expression *expr = ir->as_expression();
   if (!expr)
      return;

   const glsl_type *const new_type =
      glsl_type::get_instance(expr->type->base_type,
                              MAX2(expr->operands[0]->type->vector_elements,
                                   expr->operands[1]->type->vector_elements),
                              1);
   assert(new_type != glsl_type::error_type);
   expr->type = new_type;
}

void
ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
{
   if (!*rvalue)
      return;

   ir_expression *expr = (*rvalue)->as_expression();
   if (!expr || !is_reduction_operation(expr->operation))
      return;

   ir_rvalue *new_rvalue = handle_expression(expr);

   /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
    * or some other set of cases) new_rvalue will point to the same root as
    * before.
    *
    * Similarly, if the tree rooted at *rvalue was a reduction and was already
    * balanced, the algorithm will rearrange the tree but will ultimately
    * return an identical tree, so this check will handle that as well and
    * will not set progress = true.
    */
   if (new_rvalue == *rvalue)
      return;

   visit_tree(new_rvalue, NULL, NULL, update_types);

   *rvalue = new_rvalue;
   this->progress = true;
}

bool
do_rebalance_tree(exec_list *instructions)
{
   ir_rebalance_visitor v;

   v.run(instructions);

   return v.progress;
}