/*
 * Copyright © 2014 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

/**
 * \file opt_minmax.cpp
 *
 * Drop operands from an expression tree of only min/max operations if they
 * can be proven to not contribute to the final result.
 *
 * The algorithm is similar to alpha-beta pruning on a minmax search.
 */

#include "ir.h"
#include "ir_visitor.h"
#include "ir_rvalue_visitor.h"
#include "ir_optimization.h"
#include "ir_builder.h"
#include "program/prog_instruction.h"
#include "compiler/glsl_types.h"
#include "main/macros.h"

using namespace ir_builder;

namespace {

enum compare_components_result {
   LESS,
   LESS_OR_EQUAL,
   EQUAL,
   GREATER_OR_EQUAL,
   GREATER,
   MIXED
};

class minmax_range {
public:
   minmax_range(ir_constant *low = NULL, ir_constant *high = NULL)
   {
      this->low = low;
      this->high = high;
   }

   /* low is the lower limit of the range, high is the higher limit. NULL on
    * low means negative infinity (unlimited) and on high positive infinity
    * (unlimited). Because of the two interpretations of the value NULL,
    * arbitrary comparison between ir_constants is impossible.
    */
   ir_constant *low;
   ir_constant *high;
};

class ir_minmax_visitor : public ir_rvalue_enter_visitor {
public:
   ir_minmax_visitor()
      : progress(false)
   {
   }

   ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange);

   void handle_rvalue(ir_rvalue **rvalue);

   bool progress;
};

/*
 * Returns LESS if all vector components of `a' are strictly lower than of `b',
 * GREATER if all vector components of `a' are strictly greater than of `b',
 * MIXED if some vector components of `a' are strictly lower than of `b' while
 * others are strictly greater, or EQUAL otherwise.
 */
static enum compare_components_result
compare_components(ir_constant *a, ir_constant *b)
{
   assert(a != NULL);
   assert(b != NULL);

   assert(a->type->base_type == b->type->base_type);

   unsigned a_inc = a->type->is_scalar() ? 0 : 1;
   unsigned b_inc = b->type->is_scalar() ? 0 : 1;
   unsigned components = MAX2(a->type->components(), b->type->components());

   bool foundless = false;
   bool foundgreater = false;
   bool foundequal = false;

   for (unsigned i = 0, c0 = 0, c1 = 0;
        i < components;
        c0 += a_inc, c1 += b_inc, ++i) {
      switch (a->type->base_type) {
      case GLSL_TYPE_UINT:
         if (a->value.u[c0] < b->value.u[c1])
            foundless = true;
         else if (a->value.u[c0] > b->value.u[c1])
            foundgreater = true;
         else
            foundequal = true;
         break;
      case GLSL_TYPE_INT:
         if (a->value.i[c0] < b->value.i[c1])
            foundless = true;
         else if (a->value.i[c0] > b->value.i[c1])
            foundgreater = true;
         else
            foundequal = true;
         break;
      case GLSL_TYPE_FLOAT:
         if (a->value.f[c0] < b->value.f[c1])
            foundless = true;
         else if (a->value.f[c0] > b->value.f[c1])
            foundgreater = true;
         else
            foundequal = true;
         break;
      case GLSL_TYPE_DOUBLE:
         if (a->value.d[c0] < b->value.d[c1])
            foundless = true;
         else if (a->value.d[c0] > b->value.d[c1])
            foundgreater = true;
         else
            foundequal = true;
         break;
      default:
         unreachable("not reached");
      }
   }

   if (foundless && foundgreater) {
      /* Some components are strictly lower, others are strictly greater */
      return MIXED;
   }

   if (foundequal) {
       /* It is not mixed, but it is not strictly lower or greater */
      if (foundless)
         return LESS_OR_EQUAL;
      if (foundgreater)
         return GREATER_OR_EQUAL;
      return EQUAL;
   }

   /* All components are strictly lower or strictly greater */
   return foundless ? LESS : GREATER;
}

static ir_constant *
combine_constant(bool ismin, ir_constant *a, ir_constant *b)
{
   void *mem_ctx = ralloc_parent(a);
   ir_constant *c = a->clone(mem_ctx, NULL);
   for (unsigned i = 0; i < c->type->components(); i++) {
      switch (c->type->base_type) {
      case GLSL_TYPE_UINT:
         if ((ismin && b->value.u[i] < c->value.u[i]) ||
             (!ismin && b->value.u[i] > c->value.u[i]))
            c->value.u[i] = b->value.u[i];
         break;
      case GLSL_TYPE_INT:
         if ((ismin && b->value.i[i] < c->value.i[i]) ||
             (!ismin && b->value.i[i] > c->value.i[i]))
            c->value.i[i] = b->value.i[i];
         break;
      case GLSL_TYPE_FLOAT:
         if ((ismin && b->value.f[i] < c->value.f[i]) ||
             (!ismin && b->value.f[i] > c->value.f[i]))
            c->value.f[i] = b->value.f[i];
         break;
      case GLSL_TYPE_DOUBLE:
         if ((ismin && b->value.d[i] < c->value.d[i]) ||
             (!ismin && b->value.d[i] > c->value.d[i]))
            c->value.d[i] = b->value.d[i];
         break;
      default:
         assert(!"not reached");
      }
   }
   return c;
}

static ir_constant *
smaller_constant(ir_constant *a, ir_constant *b)
{
   assert(a != NULL);
   assert(b != NULL);

   enum compare_components_result ret = compare_components(a, b);
   if (ret == MIXED)
      return combine_constant(true, a, b);
   else if (ret < EQUAL)
      return a;
   else
      return b;
}

static ir_constant *
larger_constant(ir_constant *a, ir_constant *b)
{
   assert(a != NULL);
   assert(b != NULL);

   enum compare_components_result ret = compare_components(a, b);
   if (ret == MIXED)
      return combine_constant(false, a, b);
   else if (ret < EQUAL)
      return b;
   else
      return a;
}

/* Combines two ranges by doing an element-wise min() / max() depending on the
 * operation.
 */
static minmax_range
combine_range(minmax_range r0, minmax_range r1, bool ismin)
{
   minmax_range ret;

   if (!r0.low) {
      ret.low = ismin ? r0.low : r1.low;
   } else if (!r1.low) {
      ret.low = ismin ? r1.low : r0.low;
   } else {
      ret.low = ismin ? smaller_constant(r0.low, r1.low) :
         larger_constant(r0.low, r1.low);
   }

   if (!r0.high) {
      ret.high = ismin ? r1.high : r0.high;
   } else if (!r1.high) {
      ret.high = ismin ? r0.high : r1.high;
   } else {
      ret.high = ismin ? smaller_constant(r0.high, r1.high) :
         larger_constant(r0.high, r1.high);
   }

   return ret;
}

/* Returns a range so that lower limit is the larger of the two lower limits,
 * and higher limit is the smaller of the two higher limits.
 */
static minmax_range
range_intersection(minmax_range r0, minmax_range r1)
{
   minmax_range ret;

   if (!r0.low)
      ret.low = r1.low;
   else if (!r1.low)
      ret.low = r0.low;
   else
      ret.low = larger_constant(r0.low, r1.low);

   if (!r0.high)
      ret.high = r1.high;
   else if (!r1.high)
      ret.high = r0.high;
   else
      ret.high = smaller_constant(r0.high, r1.high);

   return ret;
}

static minmax_range
get_range(ir_rvalue *rval)
{
   ir_expression *expr = rval->as_expression();
   if (expr && (expr->operation == ir_binop_min ||
                expr->operation == ir_binop_max)) {
      minmax_range r0 = get_range(expr->operands[0]);
      minmax_range r1 = get_range(expr->operands[1]);
      return combine_range(r0, r1, expr->operation == ir_binop_min);
   }

   ir_constant *c = rval->as_constant();
   if (c) {
      return minmax_range(c, c);
   }

   return minmax_range();
}

/**
 * Prunes a min/max expression considering the base range of the parent
 * min/max expression.
 *
 * @param baserange the range that the parents of this min/max expression
 * in the min/max tree will clamp its value to.
 */
ir_rvalue *
ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
{
   assert(expr->operation == ir_binop_min ||
          expr->operation == ir_binop_max);

   bool ismin = expr->operation == ir_binop_min;
   minmax_range limits[2];

   /* Recurse to get the ranges for each of the subtrees of this
    * expression. We need to do this as a separate step because we need to
    * know the ranges of each of the subtrees before we prune either one.
    * Consider something like this:
    *
    *        max
    *     /       \
    *    max     max
    *   /   \   /   \
    *  3    a   b    2
    *
    * We would like to prune away the max on the bottom-right, but to do so
    * we need to know the range of the expression on the left beforehand,
    * and there's no guarantee that we will visit either subtree in a
    * particular order.
    */
   for (unsigned i = 0; i < 2; ++i)
      limits[i] = get_range(expr->operands[i]);

   for (unsigned i = 0; i < 2; ++i) {
      bool is_redundant = false;

      enum compare_components_result cr = LESS;
      if (ismin) {
         /* If this operand will always be greater than the other one, it's
          * redundant.
          */
         if (limits[i].low && limits[1 - i].high) {
               cr = compare_components(limits[i].low, limits[1 - i].high);
            if (cr >= EQUAL && cr != MIXED)
               is_redundant = true;
         }
         /* If this operand is always greater than baserange, then even if
          * it's smaller than the other one it'll get clamped, so it's
          * redundant.
          */
         if (!is_redundant && limits[i].low && baserange.high) {
            cr = compare_components(limits[i].low, baserange.high);
            if (cr > EQUAL && cr != MIXED)
               is_redundant = true;
         }
      } else {
         /* If this operand will always be lower than the other one, it's
          * redundant.
          */
         if (limits[i].high && limits[1 - i].low) {
            cr = compare_components(limits[i].high, limits[1 - i].low);
            if (cr <= EQUAL)
               is_redundant = true;
         }
         /* If this operand is always lower than baserange, then even if
          * it's greater than the other one it'll get clamped, so it's
          * redundant.
          */
         if (!is_redundant && limits[i].high && baserange.low) {
            cr = compare_components(limits[i].high, baserange.low);
            if (cr < EQUAL)
               is_redundant = true;
         }
      }

      if (is_redundant) {
         progress = true;

         /* Recurse if necessary. */
         ir_expression *op_expr = expr->operands[1 - i]->as_expression();
         if (op_expr && (op_expr->operation == ir_binop_min ||
                         op_expr->operation == ir_binop_max)) {
            return prune_expression(op_expr, baserange);
         }

         return expr->operands[1 - i];
      } else if (cr == MIXED) {
         /* If we have mixed vector operands, we can try to resolve the minmax
          * expression by doing a component-wise minmax:
          *
          *             min                          min
          *           /    \                       /    \
          *         min     a       ===>        [1,1]    a
          *       /    \
          *    [1,3]   [3,1]
          *
          */
         ir_constant *a = expr->operands[0]->as_constant();
         ir_constant *b = expr->operands[1]->as_constant();
         if (a && b)
            return combine_constant(ismin, a, b);
      }
   }

   /* Now recurse to operands giving them the proper baserange. The baserange
    * to pass is the intersection of our baserange and the other operand's
    * limit with one of the ranges unlimited. If we can't compute a valid
    * intersection, we use the current baserange.
    */
   for (unsigned i = 0; i < 2; ++i) {
      ir_expression *op_expr = expr->operands[i]->as_expression();
      if (op_expr && (op_expr->operation == ir_binop_min ||
                      op_expr->operation == ir_binop_max)) {
         /* We can only compute a new baserange for this operand if we managed
          * to compute a valid range for the other operand.
          */
         if (ismin)
            limits[1 - i].low = NULL;
         else
            limits[1 - i].high = NULL;
         minmax_range base = range_intersection(limits[1 - i], baserange);
         expr->operands[i] = prune_expression(op_expr, base);
      }
   }

   /* If we got here we could not discard any of the operands of the minmax
    * expression, but we can still try to resolve the expression if both
    * operands are constant. We do this after the loop above, to make sure
    * that if our operands are minmax expressions we have tried to prune them
    * first (hopefully reducing them to constants).
    */
   ir_constant *a = expr->operands[0]->as_constant();
   ir_constant *b = expr->operands[1]->as_constant();
   if (a && b)
      return combine_constant(ismin, a, b);

   return expr;
}

static ir_rvalue *
swizzle_if_required(ir_expression *expr, ir_rvalue *rval)
{
   if (expr->type->is_vector() && rval->type->is_scalar()) {
      return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements);
   } else {
      return rval;
   }
}

void
ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue)
{
   if (!*rvalue)
      return;

   ir_expression *expr = (*rvalue)->as_expression();
   if (!expr || (expr->operation != ir_binop_min &&
                 expr->operation != ir_binop_max))
      return;

   ir_rvalue *new_rvalue = prune_expression(expr, minmax_range());
   if (new_rvalue == *rvalue)
      return;

   /* If the expression type is a vector and the optimization leaves a scalar
    * as the result, we need to turn it into a vector.
    */
   *rvalue = swizzle_if_required(expr, new_rvalue);

   progress = true;
}

}

bool
do_minmax_prune(exec_list *instructions)
{
   ir_minmax_visitor v;

   visit_list_elements(&v, instructions);

   return v.progress;
}