// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/compiler/memory-optimizer.h"

#include "src/compiler/js-graph.h"
#include "src/compiler/linkage.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node-properties.h"
#include "src/compiler/node.h"
#include "src/compiler/simplified-operator.h"

namespace v8 {
namespace internal {
namespace compiler {

MemoryOptimizer::MemoryOptimizer(JSGraph* jsgraph, Zone* zone)
    : jsgraph_(jsgraph),
      empty_state_(AllocationState::Empty(zone)),
      pending_(zone),
      tokens_(zone),
      zone_(zone) {}

void MemoryOptimizer::Optimize() {
  EnqueueUses(graph()->start(), empty_state());
  while (!tokens_.empty()) {
    Token const token = tokens_.front();
    tokens_.pop();
    VisitNode(token.node, token.state);
  }
  DCHECK(pending_.empty());
  DCHECK(tokens_.empty());
}

MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
                                                  PretenureFlag pretenure,
                                                  Zone* zone)
    : node_ids_(zone), pretenure_(pretenure), size_(nullptr) {
  node_ids_.insert(node->id());
}

MemoryOptimizer::AllocationGroup::AllocationGroup(Node* node,
                                                  PretenureFlag pretenure,
                                                  Node* size, Zone* zone)
    : node_ids_(zone), pretenure_(pretenure), size_(size) {
  node_ids_.insert(node->id());
}

void MemoryOptimizer::AllocationGroup::Add(Node* node) {
  node_ids_.insert(node->id());
}

bool MemoryOptimizer::AllocationGroup::Contains(Node* node) const {
  return node_ids_.find(node->id()) != node_ids_.end();
}

MemoryOptimizer::AllocationState::AllocationState()
    : group_(nullptr), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group)
    : group_(group), size_(std::numeric_limits<int>::max()), top_(nullptr) {}

MemoryOptimizer::AllocationState::AllocationState(AllocationGroup* group,
                                                  int size, Node* top)
    : group_(group), size_(size), top_(top) {}

bool MemoryOptimizer::AllocationState::IsNewSpaceAllocation() const {
  return group() && group()->IsNewSpaceAllocation();
}

void MemoryOptimizer::VisitNode(Node* node, AllocationState const* state) {
  DCHECK(!node->IsDead());
  DCHECK_LT(0, node->op()->EffectInputCount());
  switch (node->opcode()) {
    case IrOpcode::kAllocate:
      return VisitAllocate(node, state);
    case IrOpcode::kCall:
      return VisitCall(node, state);
    case IrOpcode::kLoadElement:
      return VisitLoadElement(node, state);
    case IrOpcode::kLoadField:
      return VisitLoadField(node, state);
    case IrOpcode::kStoreElement:
      return VisitStoreElement(node, state);
    case IrOpcode::kStoreField:
      return VisitStoreField(node, state);
    case IrOpcode::kCheckedLoad:
    case IrOpcode::kCheckedStore:
    case IrOpcode::kDeoptimizeIf:
    case IrOpcode::kDeoptimizeUnless:
    case IrOpcode::kIfException:
    case IrOpcode::kLoad:
    case IrOpcode::kStore:
    case IrOpcode::kRetain:
    case IrOpcode::kUnsafePointerAdd:
      return VisitOtherEffect(node, state);
    default:
      break;
  }
  DCHECK_EQ(0, node->op()->EffectOutputCount());
}

void MemoryOptimizer::VisitAllocate(Node* node, AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kAllocate, node->opcode());
  Node* value;
  Node* size = node->InputAt(0);
  Node* effect = node->InputAt(1);
  Node* control = node->InputAt(2);
  PretenureFlag pretenure = PretenureFlagOf(node->op());

  // Propagate tenuring from outer allocations to inner allocations, i.e.
  // when we allocate an object in old space and store a newly allocated
  // child object into the pretenured object, then the newly allocated
  // child object also should get pretenured to old space.
  if (pretenure == TENURED) {
    for (Edge const edge : node->use_edges()) {
      Node* const user = edge.from();
      if (user->opcode() == IrOpcode::kStoreField && edge.index() == 0) {
        Node* const child = user->InputAt(1);
        if (child->opcode() == IrOpcode::kAllocate &&
            PretenureFlagOf(child->op()) == NOT_TENURED) {
          NodeProperties::ChangeOp(child, node->op());
          break;
        }
      }
    }
  } else {
    DCHECK_EQ(NOT_TENURED, pretenure);
    for (Edge const edge : node->use_edges()) {
      Node* const user = edge.from();
      if (user->opcode() == IrOpcode::kStoreField && edge.index() == 1) {
        Node* const parent = user->InputAt(0);
        if (parent->opcode() == IrOpcode::kAllocate &&
            PretenureFlagOf(parent->op()) == TENURED) {
          pretenure = TENURED;
          break;
        }
      }
    }
  }

  // Determine the top/limit addresses.
  Node* top_address = jsgraph()->ExternalConstant(
      pretenure == NOT_TENURED
          ? ExternalReference::new_space_allocation_top_address(isolate())
          : ExternalReference::old_space_allocation_top_address(isolate()));
  Node* limit_address = jsgraph()->ExternalConstant(
      pretenure == NOT_TENURED
          ? ExternalReference::new_space_allocation_limit_address(isolate())
          : ExternalReference::old_space_allocation_limit_address(isolate()));

  // Check if we can fold this allocation into a previous allocation represented
  // by the incoming {state}.
  Int32Matcher m(size);
  if (m.HasValue() && m.Value() < kMaxRegularHeapObjectSize) {
    int32_t const object_size = m.Value();
    if (state->size() <= kMaxRegularHeapObjectSize - object_size &&
        state->group()->pretenure() == pretenure) {
      // We can fold this Allocate {node} into the allocation {group}
      // represented by the given {state}. Compute the upper bound for
      // the new {state}.
      int32_t const state_size = state->size() + object_size;

      // Update the reservation check to the actual maximum upper bound.
      AllocationGroup* const group = state->group();
      if (OpParameter<int32_t>(group->size()) < state_size) {
        NodeProperties::ChangeOp(group->size(),
                                 common()->Int32Constant(state_size));
      }

      // Update the allocation top with the new object allocation.
      // TODO(bmeurer): Defer writing back top as much as possible.
      Node* top = graph()->NewNode(machine()->IntAdd(), state->top(),
                                   jsgraph()->IntPtrConstant(object_size));
      effect = graph()->NewNode(
          machine()->Store(StoreRepresentation(
              MachineType::PointerRepresentation(), kNoWriteBarrier)),
          top_address, jsgraph()->IntPtrConstant(0), top, effect, control);

      // Compute the effective inner allocated address.
      value = graph()->NewNode(
          machine()->BitcastWordToTagged(),
          graph()->NewNode(machine()->IntAdd(), state->top(),
                           jsgraph()->IntPtrConstant(kHeapObjectTag)));

      // Extend the allocation {group}.
      group->Add(value);
      state = AllocationState::Open(group, state_size, top, zone());
    } else {
      // Setup a mutable reservation size node; will be patched as we fold
      // additional allocations into this new group.
      Node* size = graph()->NewNode(common()->Int32Constant(object_size));

      // Load allocation top and limit.
      Node* top = effect =
          graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,
                           jsgraph()->IntPtrConstant(0), effect, control);
      Node* limit = effect = graph()->NewNode(
          machine()->Load(MachineType::Pointer()), limit_address,
          jsgraph()->IntPtrConstant(0), effect, control);

      // Check if we need to collect garbage before we can start bump pointer
      // allocation (always done for folded allocations).
      Node* check = graph()->NewNode(
          machine()->UintLessThan(),
          graph()->NewNode(
              machine()->IntAdd(), top,
              machine()->Is64()
                  ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)
                  : size),
          limit);
      Node* branch =
          graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);

      Node* if_true = graph()->NewNode(common()->IfTrue(), branch);
      Node* etrue = effect;
      Node* vtrue = top;

      Node* if_false = graph()->NewNode(common()->IfFalse(), branch);
      Node* efalse = effect;
      Node* vfalse;
      {
        Node* target = pretenure == NOT_TENURED
                           ? jsgraph()->AllocateInNewSpaceStubConstant()
                           : jsgraph()->AllocateInOldSpaceStubConstant();
        if (!allocate_operator_.is_set()) {
          CallDescriptor* descriptor =
              Linkage::GetAllocateCallDescriptor(graph()->zone());
          allocate_operator_.set(common()->Call(descriptor));
        }
        vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target,
                                           size, efalse, if_false);
        vfalse = graph()->NewNode(machine()->IntSub(), vfalse,
                                  jsgraph()->IntPtrConstant(kHeapObjectTag));
      }

      control = graph()->NewNode(common()->Merge(2), if_true, if_false);
      effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);
      value = graph()->NewNode(
          common()->Phi(MachineType::PointerRepresentation(), 2), vtrue, vfalse,
          control);

      // Compute the new top and write it back.
      top = graph()->NewNode(machine()->IntAdd(), value,
                             jsgraph()->IntPtrConstant(object_size));
      effect = graph()->NewNode(
          machine()->Store(StoreRepresentation(
              MachineType::PointerRepresentation(), kNoWriteBarrier)),
          top_address, jsgraph()->IntPtrConstant(0), top, effect, control);

      // Compute the initial object address.
      value = graph()->NewNode(
          machine()->BitcastWordToTagged(),
          graph()->NewNode(machine()->IntAdd(), value,
                           jsgraph()->IntPtrConstant(kHeapObjectTag)));

      // Start a new allocation group.
      AllocationGroup* group =
          new (zone()) AllocationGroup(value, pretenure, size, zone());
      state = AllocationState::Open(group, object_size, top, zone());
    }
  } else {
    // Load allocation top and limit.
    Node* top = effect =
        graph()->NewNode(machine()->Load(MachineType::Pointer()), top_address,
                         jsgraph()->IntPtrConstant(0), effect, control);
    Node* limit = effect =
        graph()->NewNode(machine()->Load(MachineType::Pointer()), limit_address,
                         jsgraph()->IntPtrConstant(0), effect, control);

    // Compute the new top.
    Node* new_top = graph()->NewNode(
        machine()->IntAdd(), top,
        machine()->Is64()
            ? graph()->NewNode(machine()->ChangeInt32ToInt64(), size)
            : size);

    // Check if we can do bump pointer allocation here.
    Node* check = graph()->NewNode(machine()->UintLessThan(), new_top, limit);
    Node* branch =
        graph()->NewNode(common()->Branch(BranchHint::kTrue), check, control);

    Node* if_true = graph()->NewNode(common()->IfTrue(), branch);
    Node* etrue = effect;
    Node* vtrue;
    {
      etrue = graph()->NewNode(
          machine()->Store(StoreRepresentation(
              MachineType::PointerRepresentation(), kNoWriteBarrier)),
          top_address, jsgraph()->IntPtrConstant(0), new_top, etrue, if_true);
      vtrue = graph()->NewNode(
          machine()->BitcastWordToTagged(),
          graph()->NewNode(machine()->IntAdd(), top,
                           jsgraph()->IntPtrConstant(kHeapObjectTag)));
    }

    Node* if_false = graph()->NewNode(common()->IfFalse(), branch);
    Node* efalse = effect;
    Node* vfalse;
    {
      Node* target = pretenure == NOT_TENURED
                         ? jsgraph()->AllocateInNewSpaceStubConstant()
                         : jsgraph()->AllocateInOldSpaceStubConstant();
      if (!allocate_operator_.is_set()) {
        CallDescriptor* descriptor =
            Linkage::GetAllocateCallDescriptor(graph()->zone());
        allocate_operator_.set(common()->Call(descriptor));
      }
      vfalse = efalse = graph()->NewNode(allocate_operator_.get(), target, size,
                                         efalse, if_false);
    }

    control = graph()->NewNode(common()->Merge(2), if_true, if_false);
    effect = graph()->NewNode(common()->EffectPhi(2), etrue, efalse, control);
    value = graph()->NewNode(
        common()->Phi(MachineRepresentation::kTaggedPointer, 2), vtrue, vfalse,
        control);

    // Create an unfoldable allocation group.
    AllocationGroup* group =
        new (zone()) AllocationGroup(value, pretenure, zone());
    state = AllocationState::Closed(group, zone());
  }

  // Replace all effect uses of {node} with the {effect}, enqueue the
  // effect uses for further processing, and replace all value uses of
  // {node} with the {value}.
  for (Edge edge : node->use_edges()) {
    if (NodeProperties::IsEffectEdge(edge)) {
      EnqueueUse(edge.from(), edge.index(), state);
      edge.UpdateTo(effect);
    } else {
      DCHECK(NodeProperties::IsValueEdge(edge));
      edge.UpdateTo(value);
    }
  }

  // Kill the {node} to make sure we don't leave dangling dead uses.
  node->Kill();
}

void MemoryOptimizer::VisitCall(Node* node, AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kCall, node->opcode());
  // If the call can allocate, we start with a fresh state.
  if (!(CallDescriptorOf(node->op())->flags() & CallDescriptor::kNoAllocate)) {
    state = empty_state();
  }
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitLoadElement(Node* node,
                                       AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kLoadElement, node->opcode());
  ElementAccess const& access = ElementAccessOf(node->op());
  Node* index = node->InputAt(1);
  node->ReplaceInput(1, ComputeIndex(access, index));
  NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitLoadField(Node* node, AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kLoadField, node->opcode());
  FieldAccess const& access = FieldAccessOf(node->op());
  Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
  node->InsertInput(graph()->zone(), 1, offset);
  NodeProperties::ChangeOp(node, machine()->Load(access.machine_type));
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitStoreElement(Node* node,
                                        AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kStoreElement, node->opcode());
  ElementAccess const& access = ElementAccessOf(node->op());
  Node* object = node->InputAt(0);
  Node* index = node->InputAt(1);
  WriteBarrierKind write_barrier_kind =
      ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
  node->ReplaceInput(1, ComputeIndex(access, index));
  NodeProperties::ChangeOp(
      node, machine()->Store(StoreRepresentation(
                access.machine_type.representation(), write_barrier_kind)));
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitStoreField(Node* node,
                                      AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kStoreField, node->opcode());
  FieldAccess const& access = FieldAccessOf(node->op());
  Node* object = node->InputAt(0);
  WriteBarrierKind write_barrier_kind =
      ComputeWriteBarrierKind(object, state, access.write_barrier_kind);
  Node* offset = jsgraph()->IntPtrConstant(access.offset - access.tag());
  node->InsertInput(graph()->zone(), 1, offset);
  NodeProperties::ChangeOp(
      node, machine()->Store(StoreRepresentation(
                access.machine_type.representation(), write_barrier_kind)));
  EnqueueUses(node, state);
}

void MemoryOptimizer::VisitOtherEffect(Node* node,
                                       AllocationState const* state) {
  EnqueueUses(node, state);
}

Node* MemoryOptimizer::ComputeIndex(ElementAccess const& access, Node* key) {
  Node* index;
  if (machine()->Is64()) {
    // On 64-bit platforms, we need to feed a Word64 index to the Load and
    // Store operators. Since LoadElement or StoreElement don't do any bounds
    // checking themselves, we can be sure that the {key} was already checked
    // and is in valid range, so we can do the further address computation on
    // Word64 below, which ideally allows us to fuse the address computation
    // with the actual memory access operation on Intel platforms.
    index = graph()->NewNode(machine()->ChangeUint32ToUint64(), key);
  } else {
    index = key;
  }
  int const element_size_shift =
      ElementSizeLog2Of(access.machine_type.representation());
  if (element_size_shift) {
    index = graph()->NewNode(machine()->WordShl(), index,
                             jsgraph()->IntPtrConstant(element_size_shift));
  }
  int const fixed_offset = access.header_size - access.tag();
  if (fixed_offset) {
    index = graph()->NewNode(machine()->IntAdd(), index,
                             jsgraph()->IntPtrConstant(fixed_offset));
  }
  return index;
}

WriteBarrierKind MemoryOptimizer::ComputeWriteBarrierKind(
    Node* object, AllocationState const* state,
    WriteBarrierKind write_barrier_kind) {
  if (state->IsNewSpaceAllocation() && state->group()->Contains(object)) {
    write_barrier_kind = kNoWriteBarrier;
  }
  return write_barrier_kind;
}

MemoryOptimizer::AllocationState const* MemoryOptimizer::MergeStates(
    AllocationStates const& states) {
  // Check if all states are the same; or at least if all allocation
  // states belong to the same allocation group.
  AllocationState const* state = states.front();
  AllocationGroup* group = state->group();
  for (size_t i = 1; i < states.size(); ++i) {
    if (states[i] != state) state = nullptr;
    if (states[i]->group() != group) group = nullptr;
  }
  if (state == nullptr) {
    if (group != nullptr) {
      // We cannot fold any more allocations into this group, but we can still
      // eliminate write barriers on stores to this group.
      // TODO(bmeurer): We could potentially just create a Phi here to merge
      // the various tops; but we need to pay special attention not to create
      // an unschedulable graph.
      state = AllocationState::Closed(group, zone());
    } else {
      // The states are from different allocation groups.
      state = empty_state();
    }
  }
  return state;
}

void MemoryOptimizer::EnqueueMerge(Node* node, int index,
                                   AllocationState const* state) {
  DCHECK_EQ(IrOpcode::kEffectPhi, node->opcode());
  int const input_count = node->InputCount() - 1;
  DCHECK_LT(0, input_count);
  Node* const control = node->InputAt(input_count);
  if (control->opcode() == IrOpcode::kLoop) {
    // For loops we always start with an empty state at the beginning.
    if (index == 0) EnqueueUses(node, empty_state());
  } else {
    DCHECK_EQ(IrOpcode::kMerge, control->opcode());
    // Check if we already know about this pending merge.
    NodeId const id = node->id();
    auto it = pending_.find(id);
    if (it == pending_.end()) {
      // Insert a new pending merge.
      it = pending_.insert(std::make_pair(id, AllocationStates(zone()))).first;
    }
    // Add the next input state.
    it->second.push_back(state);
    // Check if states for all inputs are available by now.
    if (it->second.size() == static_cast<size_t>(input_count)) {
      // All inputs to this effect merge are done, merge the states given all
      // input constraints, drop the pending merge and enqueue uses of the
      // EffectPhi {node}.
      state = MergeStates(it->second);
      EnqueueUses(node, state);
      pending_.erase(it);
    }
  }
}

void MemoryOptimizer::EnqueueUses(Node* node, AllocationState const* state) {
  for (Edge const edge : node->use_edges()) {
    if (NodeProperties::IsEffectEdge(edge)) {
      EnqueueUse(edge.from(), edge.index(), state);
    }
  }
}

void MemoryOptimizer::EnqueueUse(Node* node, int index,
                                 AllocationState const* state) {
  if (node->opcode() == IrOpcode::kEffectPhi) {
    // An EffectPhi represents a merge of different effect chains, which
    // needs special handling depending on whether the merge is part of a
    // loop or just a normal control join.
    EnqueueMerge(node, index, state);
  } else {
    Token token = {node, state};
    tokens_.push(token);
  }
}

Graph* MemoryOptimizer::graph() const { return jsgraph()->graph(); }

Isolate* MemoryOptimizer::isolate() const { return jsgraph()->isolate(); }

CommonOperatorBuilder* MemoryOptimizer::common() const {
  return jsgraph()->common();
}

MachineOperatorBuilder* MemoryOptimizer::machine() const {
  return jsgraph()->machine();
}

}  // namespace compiler
}  // namespace internal
}  // namespace v8