//===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
//
//                        The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Implements the LinearScan class, which performs the linear-scan
/// register allocation after liveness analysis has been performed.
///
//===----------------------------------------------------------------------===//

#include "IceRegAlloc.h"

#include "IceBitVector.h"
#include "IceCfg.h"
#include "IceCfgNode.h"
#include "IceInst.h"
#include "IceInstVarIter.h"
#include "IceOperand.h"
#include "IceTargetLowering.h"

#include "llvm/Support/Format.h"

namespace Ice {

namespace {

// Returns true if Var has any definitions within Item's live range.
// TODO(stichnot): Consider trimming the Definitions list similar to how the
// live ranges are trimmed, since all the overlapsDefs() tests are whether some
// variable's definitions overlap Cur, and trimming is with respect Cur.start.
// Initial tests show no measurable performance difference, so we'll keep the
// code simple for now.
bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) {
  constexpr bool UseTrimmed = true;
  VariablesMetadata *VMetadata = Func->getVMetadata();
  if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
    if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed))
      return true;
  for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) {
    if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed))
      return true;
  }
  return false;
}

void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
                        const char *Reason) {
  if (!BuildDefs::dump())
    return;
  if (!Func->isVerbose(IceV_LinearScan))
    return;

  VariablesMetadata *VMetadata = Func->getVMetadata();
  Ostream &Str = Func->getContext()->getStrDump();
  Str << "Disabling Overlap due to " << Reason << " " << *Var
      << " LIVE=" << Var->getLiveRange() << " Defs=";
  if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
    Str << FirstDef->getNumber();
  const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
  for (size_t i = 0; i < Defs.size(); ++i) {
    Str << "," << Defs[i]->getNumber();
  }
  Str << "\n";
}

void dumpLiveRange(const Variable *Var, const Cfg *Func) {
  if (!BuildDefs::dump())
    return;
  Ostream &Str = Func->getContext()->getStrDump();
  Str << "R=";
  if (Var->hasRegTmp()) {
    Str << llvm::format("%2d", int(Var->getRegNumTmp()));
  } else {
    Str << "NA";
  }
  Str << "  V=";
  Var->dump(Func);
  Str << "  Range=" << Var->getLiveRange();
}

int32_t findMinWeightIndex(
    const SmallBitVector &RegMask,
    const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
  int MinWeightIndex = -1;
  for (RegNumT i : RegNumBVIter(RegMask)) {
    if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
      MinWeightIndex = i;
  }
  assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
  return MinWeightIndex;
}

} // end of anonymous namespace

LinearScan::LinearScan(Cfg *Func)
    : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
      Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)),
      UseReserve(getFlags().getRegAllocReserve()) {}

// Prepare for full register allocation of all variables. We depend on liveness
// analysis to have calculated live ranges.
void LinearScan::initForGlobal() {
  TimerMarker T(TimerStack::TT_initUnhandled, Func);
  FindPreference = true;
  // For full register allocation, normally we want to enable FindOverlap
  // (meaning we look for opportunities for two overlapping live ranges to
  // safely share the same register). However, we disable it for phi-lowering
  // register allocation since no overlap opportunities should be available and
  // it's more expensive to look for opportunities.
  FindOverlap = (Kind != RAK_Phi);
  Unhandled.reserve(Vars.size());
  UnhandledPrecolored.reserve(Vars.size());
  // Gather the live ranges of all variables and add them to the Unhandled set.
  for (Variable *Var : Vars) {
    // Don't consider rematerializable variables.
    if (Var->isRematerializable())
      continue;
    // Explicitly don't consider zero-weight variables, which are meant to be
    // spill slots.
    if (Var->mustNotHaveReg())
      continue;
    // Don't bother if the variable has a null live range, which means it was
    // never referenced.
    if (Var->getLiveRange().isEmpty())
      continue;
    Var->untrimLiveRange();
    Unhandled.push_back(Var);
    if (Var->hasReg()) {
      Var->setRegNumTmp(Var->getRegNum());
      Var->setMustHaveReg();
      UnhandledPrecolored.push_back(Var);
    }
  }

  // Build the (ordered) list of FakeKill instruction numbers.
  Kills.clear();
  // Phi lowering should not be creating new call instructions, so there should
  // be no infinite-weight not-yet-colored live ranges that span a call
  // instruction, hence no need to construct the Kills list.
  if (Kind == RAK_Phi)
    return;
  for (CfgNode *Node : Func->getNodes()) {
    for (Inst &I : Node->getInsts()) {
      if (auto *Kill = llvm::dyn_cast<InstFakeKill>(&I)) {
        if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
          Kills.push_back(I.getNumber());
      }
    }
  }
}

// Validate the integrity of the live ranges.  If there are any errors, it
// prints details and returns false.  On success, it returns true.
bool LinearScan::livenessValidateIntervals(
    const DefUseErrorList &DefsWithoutUses,
    const DefUseErrorList &UsesBeforeDefs,
    const CfgVector<InstNumberT> &LRBegin,
    const CfgVector<InstNumberT> &LREnd) const {
  if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
    return true;

  if (!BuildDefs::dump())
    return false;

  OstreamLocker L(Ctx);
  Ostream &Str = Ctx->getStrDump();
  for (SizeT VarNum : DefsWithoutUses) {
    Variable *Var = Vars[VarNum];
    Str << "LR def without use, instruction " << LRBegin[VarNum]
        << ", variable " << Var->getName() << "\n";
  }
  for (SizeT VarNum : UsesBeforeDefs) {
    Variable *Var = Vars[VarNum];
    Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable "
        << Var->getName() << "\n";
  }
  return false;
}

// Prepare for very simple register allocation of only infinite-weight Variables
// while respecting pre-colored Variables. Some properties we take advantage of:
//
// * Live ranges of interest consist of a single segment.
//
// * Live ranges of interest never span a call instruction.
//
// * Phi instructions are not considered because either phis have already been
//   lowered, or they don't contain any pre-colored or infinite-weight
//   Variables.
//
// * We don't need to renumber instructions before computing live ranges because
//   all the high-level ICE instructions are deleted prior to lowering, and the
//   low-level instructions are added in monotonically increasing order.
//
// * There are no opportunities for register preference or allowing overlap.
//
// Some properties we aren't (yet) taking advantage of:
//
// * Because live ranges are a single segment, the Inactive set will always be
//   empty, and the live range trimming operation is unnecessary.
//
// * Calculating overlap of single-segment live ranges could be optimized a bit.
void LinearScan::initForInfOnly() {
  TimerMarker T(TimerStack::TT_initUnhandled, Func);
  FindPreference = false;
  FindOverlap = false;
  SizeT NumVars = 0;

  // Iterate across all instructions and record the begin and end of the live
  // range for each variable that is pre-colored or infinite weight.
  CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
  CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
  DefUseErrorList DefsWithoutUses, UsesBeforeDefs;
  for (CfgNode *Node : Func->getNodes()) {
    for (Inst &Instr : Node->getInsts()) {
      if (Instr.isDeleted())
        continue;
      FOREACH_VAR_IN_INST(Var, Instr) {
        if (Var->getIgnoreLiveness())
          continue;
        if (Var->hasReg() || Var->mustHaveReg()) {
          SizeT VarNum = Var->getIndex();
          LREnd[VarNum] = Instr.getNumber();
          if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel)
            UsesBeforeDefs.push_back(VarNum);
        }
      }
      if (const Variable *Var = Instr.getDest()) {
        if (!Var->getIgnoreLiveness() &&
            (Var->hasReg() || Var->mustHaveReg())) {
          if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
            LRBegin[Var->getIndex()] = Instr.getNumber();
            ++NumVars;
          }
        }
      }
    }
  }

  Unhandled.reserve(NumVars);
  UnhandledPrecolored.reserve(NumVars);
  for (SizeT i = 0; i < Vars.size(); ++i) {
    Variable *Var = Vars[i];
    if (Var->isRematerializable())
      continue;
    if (LRBegin[i] != Inst::NumberSentinel) {
      if (LREnd[i] == Inst::NumberSentinel) {
        DefsWithoutUses.push_back(i);
        continue;
      }
      Unhandled.push_back(Var);
      Var->resetLiveRange();
      Var->addLiveRange(LRBegin[i], LREnd[i]);
      Var->untrimLiveRange();
      if (Var->hasReg()) {
        Var->setRegNumTmp(Var->getRegNum());
        Var->setMustHaveReg();
        UnhandledPrecolored.push_back(Var);
      }
      --NumVars;
    }
  }

  if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin,
                                 LREnd)) {
    llvm::report_fatal_error("initForInfOnly: Liveness error");
    return;
  }

  if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) {
    if (BuildDefs::dump()) {
      OstreamLocker L(Ctx);
      Ostream &Str = Ctx->getStrDump();
      for (SizeT VarNum : DefsWithoutUses) {
        Variable *Var = Vars[VarNum];
        Str << "LR def without use, instruction " << LRBegin[VarNum]
            << ", variable " << Var->getName() << "\n";
      }
      for (SizeT VarNum : UsesBeforeDefs) {
        Variable *Var = Vars[VarNum];
        Str << "LR use before def, instruction " << LREnd[VarNum]
            << ", variable " << Var->getName() << "\n";
      }
    }
    llvm::report_fatal_error("initForInfOnly: Liveness error");
  }
  // This isn't actually a fatal condition, but it would be nice to know if we
  // somehow pre-calculated Unhandled's size wrong.
  assert(NumVars == 0);

  // Don't build up the list of Kills because we know that no infinite-weight
  // Variable has a live range spanning a call.
  Kills.clear();
}

void LinearScan::initForSecondChance() {
  TimerMarker T(TimerStack::TT_initUnhandled, Func);
  FindPreference = true;
  FindOverlap = true;
  const VarList &Vars = Func->getVariables();
  Unhandled.reserve(Vars.size());
  UnhandledPrecolored.reserve(Vars.size());
  for (Variable *Var : Vars) {
    if (Var->isRematerializable())
      continue;
    if (Var->hasReg()) {
      Var->untrimLiveRange();
      Var->setRegNumTmp(Var->getRegNum());
      Var->setMustHaveReg();
      UnhandledPrecolored.push_back(Var);
      Unhandled.push_back(Var);
    }
  }
  for (Variable *Var : Evicted) {
    Var->untrimLiveRange();
    Unhandled.push_back(Var);
  }
}

void LinearScan::init(RegAllocKind Kind, CfgSet<Variable *> ExcludeVars) {
  this->Kind = Kind;
  Unhandled.clear();
  UnhandledPrecolored.clear();
  Handled.clear();
  Inactive.clear();
  Active.clear();
  Vars.clear();
  Vars.reserve(Func->getVariables().size() - ExcludeVars.size());
  for (auto *Var : Func->getVariables()) {
    if (ExcludeVars.find(Var) == ExcludeVars.end())
      Vars.emplace_back(Var);
  }

  SizeT NumRegs = Target->getNumRegisters();
  RegAliases.resize(NumRegs);
  for (SizeT Reg = 0; Reg < NumRegs; ++Reg) {
    RegAliases[Reg] = &Target->getAliasesForRegister(RegNumT::fromInt(Reg));
  }

  switch (Kind) {
  case RAK_Unknown:
    llvm::report_fatal_error("Invalid RAK_Unknown");
    break;
  case RAK_Global:
  case RAK_Phi:
    initForGlobal();
    break;
  case RAK_InfOnly:
    initForInfOnly();
    break;
  case RAK_SecondChance:
    initForSecondChance();
    break;
  }

  Evicted.clear();

  auto CompareRanges = [](const Variable *L, const Variable *R) {
    InstNumberT Lstart = L->getLiveRange().getStart();
    InstNumberT Rstart = R->getLiveRange().getStart();
    if (Lstart == Rstart)
      return L->getIndex() < R->getIndex();
    return Lstart < Rstart;
  };
  // Do a reverse sort so that erasing elements (from the end) is fast.
  std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges);
  std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
            CompareRanges);

  Handled.reserve(Unhandled.size());
  Inactive.reserve(Unhandled.size());
  Active.reserve(Unhandled.size());
  Evicted.reserve(Unhandled.size());
}

// This is called when Cur must be allocated a register but no registers are
// available across Cur's live range. To handle this, we find a register that is
// not explicitly used during Cur's live range, spill that register to a stack
// location right before Cur's live range begins, and fill (reload) the register
// from the stack location right after Cur's live range ends.
void LinearScan::addSpillFill(IterationState &Iter) {
  // Identify the actual instructions that begin and end Iter.Cur's live range.
  // Iterate through Iter.Cur's node's instruction list until we find the actual
  // instructions with instruction numbers corresponding to Iter.Cur's recorded
  // live range endpoints.  This sounds inefficient but shouldn't be a problem
  // in practice because:
  // (1) This function is almost never called in practice.
  // (2) Since this register over-subscription problem happens only for
  //     phi-lowered instructions, the number of instructions in the node is
  //     proportional to the number of phi instructions in the original node,
  //     which is never very large in practice.
  // (3) We still have to iterate through all instructions of Iter.Cur's live
  //     range to find all explicitly used registers (though the live range is
  //     usually only 2-3 instructions), so the main cost that could be avoided
  //     would be finding the instruction that begin's Iter.Cur's live range.
  assert(!Iter.Cur->getLiveRange().isEmpty());
  InstNumberT Start = Iter.Cur->getLiveRange().getStart();
  InstNumberT End = Iter.Cur->getLiveRange().getEnd();
  CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur);
  assert(Node);
  InstList &Insts = Node->getInsts();
  InstList::iterator SpillPoint = Insts.end();
  InstList::iterator FillPoint = Insts.end();
  // Stop searching after we have found both the SpillPoint and the FillPoint.
  for (auto I = Insts.begin(), E = Insts.end();
       I != E && (SpillPoint == E || FillPoint == E); ++I) {
    if (I->getNumber() == Start)
      SpillPoint = I;
    if (I->getNumber() == End)
      FillPoint = I;
    if (SpillPoint != E) {
      // Remove from RegMask any physical registers referenced during Cur's live
      // range. Start looking after SpillPoint gets set, i.e. once Cur's live
      // range begins.
      FOREACH_VAR_IN_INST(Var, *I) {
        if (!Var->hasRegTmp())
          continue;
        const auto &Aliases = *RegAliases[Var->getRegNumTmp()];
        for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
          Iter.RegMask[RegAlias] = false;
        }
      }
    }
  }
  assert(SpillPoint != Insts.end());
  assert(FillPoint != Insts.end());
  ++FillPoint;
  // TODO(stichnot): Randomize instead of *.begin() which maps to find_first().
  const RegNumT RegNum = *RegNumBVIter(Iter.RegMask).begin();
  Iter.Cur->setRegNumTmp(RegNum);
  Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
  // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
  // is correctly identified as !isMultiBlock(), reducing stack frame size.
  Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
  // Add "reg=FakeDef;spill=reg" before SpillPoint
  Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
  Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg));
  // add "reg=spill;FakeUse(reg)" before FillPoint
  Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc));
  Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg));
}

void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) {
  for (SizeT I = Active.size(); I > 0; --I) {
    const SizeT Index = I - 1;
    Variable *Item = Active[Index];
    Item->trimLiveRange(Cur->getLiveRange().getStart());
    bool Moved = false;
    if (Item->rangeEndsBefore(Cur)) {
      // Move Item from Active to Handled list.
      dumpLiveRangeTrace("Expiring     ", Item);
      moveItem(Active, Index, Handled);
      Moved = true;
    } else if (!Item->rangeOverlapsStart(Cur)) {
      // Move Item from Active to Inactive list.
      dumpLiveRangeTrace("Inactivating ", Item);
      moveItem(Active, Index, Inactive);
      Moved = true;
    }
    if (Moved) {
      // Decrement Item from RegUses[].
      assert(Item->hasRegTmp());
      const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
      for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
        --RegUses[RegAlias];
        assert(RegUses[RegAlias] >= 0);
      }
    }
  }
}

void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) {
  for (SizeT I = Inactive.size(); I > 0; --I) {
    const SizeT Index = I - 1;
    Variable *Item = Inactive[Index];
    Item->trimLiveRange(Cur->getLiveRange().getStart());
    if (Item->rangeEndsBefore(Cur)) {
      // Move Item from Inactive to Handled list.
      dumpLiveRangeTrace("Expiring     ", Item);
      moveItem(Inactive, Index, Handled);
    } else if (Item->rangeOverlapsStart(Cur)) {
      // Move Item from Inactive to Active list.
      dumpLiveRangeTrace("Reactivating ", Item);
      moveItem(Inactive, Index, Active);
      // Increment Item in RegUses[].
      assert(Item->hasRegTmp());
      const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
      for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
        assert(RegUses[RegAlias] >= 0);
        ++RegUses[RegAlias];
      }
    }
  }
}

// Infer register preference and allowable overlap. Only form a preference when
// the current Variable has an unambiguous "first" definition. The preference is
// some source Variable of the defining instruction that either is assigned a
// register that is currently free, or that is assigned a register that is not
// free but overlap is allowed. Overlap is allowed when the Variable under
// consideration is single-definition, and its definition is a simple assignment
// - i.e., the register gets copied/aliased but is never modified.  Furthermore,
// overlap is only allowed when preferred Variable definition instructions do
// not appear within the current Variable's live range.
void LinearScan::findRegisterPreference(IterationState &Iter) {
  Iter.Prefer = nullptr;
  Iter.PreferReg = RegNumT();
  Iter.AllowOverlap = false;

  if (!FindPreference)
    return;

  VariablesMetadata *VMetadata = Func->getVMetadata();
  const Inst *DefInst = VMetadata->getFirstDefinitionSingleBlock(Iter.Cur);
  if (DefInst == nullptr)
    return;

  assert(DefInst->getDest() == Iter.Cur);
  const bool IsSingleDefAssign =
      DefInst->isVarAssign() && !VMetadata->isMultiDef(Iter.Cur);
  FOREACH_VAR_IN_INST(SrcVar, *DefInst) {
    // Only consider source variables that have (so far) been assigned a
    // register.
    if (!SrcVar->hasRegTmp())
      continue;

    // That register must be one in the RegMask set, e.g. don't try to prefer
    // the stack pointer as a result of the stacksave intrinsic.
    const auto &Aliases = *RegAliases[SrcVar->getRegNumTmp()];
    const int SrcReg = (Iter.RegMask & Aliases).find_first();
    if (SrcReg == -1)
      continue;

    if (FindOverlap && IsSingleDefAssign && !Iter.Free[SrcReg]) {
      // Don't bother trying to enable AllowOverlap if the register is already
      // free (hence the test on Iter.Free[SrcReg]).
      Iter.AllowOverlap = !overlapsDefs(Func, Iter.Cur, SrcVar);
    }
    if (Iter.AllowOverlap || Iter.Free[SrcReg]) {
      Iter.Prefer = SrcVar;
      Iter.PreferReg = RegNumT::fromInt(SrcReg);
      // Stop looking for a preference after the first valid preference is
      // found.  One might think that we should look at all instruction
      // variables to find the best <Prefer,AllowOverlap> combination, but note
      // that AllowOverlap can only be true for a simple assignment statement
      // which can have only one source operand, so it's not possible for
      // AllowOverlap to be true beyond the first source operand.
      FOREACH_VAR_IN_INST_BREAK;
    }
  }
  if (BuildDefs::dump() && Verbose && Iter.Prefer) {
    Ostream &Str = Ctx->getStrDump();
    Str << "Initial Iter.Prefer=";
    Iter.Prefer->dump(Func);
    Str << " R=" << Iter.PreferReg << " LIVE=" << Iter.Prefer->getLiveRange()
        << " Overlap=" << Iter.AllowOverlap << "\n";
  }
}

// Remove registers from the Iter.Free[] list where an Inactive range overlaps
// with the current range.
void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
  for (const Variable *Item : Inactive) {
    if (!Item->rangeOverlaps(Iter.Cur))
      continue;
    const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
      // Don't assert(Iter.Free[RegAlias]) because in theory (though probably
      // never in practice) there could be two inactive variables that were
      // marked with AllowOverlap.
      Iter.Free[RegAlias] = false;
      Iter.FreeUnfiltered[RegAlias] = false;
      // Disable AllowOverlap if an Inactive variable, which is not Prefer,
      // shares Prefer's register, and has a definition within Cur's live range.
      if (Iter.AllowOverlap && Item != Iter.Prefer &&
          RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) {
        Iter.AllowOverlap = false;
        dumpDisableOverlap(Func, Item, "Inactive");
      }
    }
  }
}

// Remove registers from the Iter.Free[] list where an Unhandled pre-colored
// range overlaps with the current range, and set those registers to infinite
// weight so that they aren't candidates for eviction.
// Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed
// O(N^2) algorithm into expected linear complexity.
void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
  // TODO(stichnot): Partition UnhandledPrecolored according to register class,
  // to restrict the number of overlap comparisons needed.
  for (Variable *Item : reverse_range(UnhandledPrecolored)) {
    assert(Item->hasReg());
    if (Iter.Cur->rangeEndsBefore(Item))
      break;
    if (!Item->rangeOverlaps(Iter.Cur))
      continue;
    const auto &Aliases =
        *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp()
    for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
      Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
      Iter.Free[RegAlias] = false;
      Iter.FreeUnfiltered[RegAlias] = false;
      Iter.PrecoloredUnhandledMask[RegAlias] = true;
      // Disable Iter.AllowOverlap if the preferred register is one of these
      // pre-colored unhandled overlapping ranges.
      if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) {
        Iter.AllowOverlap = false;
        dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
      }
    }
  }
}

void LinearScan::allocatePrecoloredRegister(Variable *Cur) {
  const auto RegNum = Cur->getRegNum();
  // RegNumTmp should have already been set above.
  assert(Cur->getRegNumTmp() == RegNum);
  dumpLiveRangeTrace("Precoloring  ", Cur);
  Active.push_back(Cur);
  const auto &Aliases = *RegAliases[RegNum];
  for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    assert(RegUses[RegAlias] >= 0);
    ++RegUses[RegAlias];
  }
  assert(!UnhandledPrecolored.empty());
  assert(UnhandledPrecolored.back() == Cur);
  UnhandledPrecolored.pop_back();
}

void LinearScan::allocatePreferredRegister(IterationState &Iter) {
  Iter.Cur->setRegNumTmp(Iter.PreferReg);
  dumpLiveRangeTrace("Preferring   ", Iter.Cur);
  const auto &Aliases = *RegAliases[Iter.PreferReg];
  for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    assert(RegUses[RegAlias] >= 0);
    ++RegUses[RegAlias];
  }
  Active.push_back(Iter.Cur);
}

void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
  const RegNumT RegNum =
      *RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin();
  Iter.Cur->setRegNumTmp(RegNum);
  if (Filtered)
    dumpLiveRangeTrace("Allocating Y ", Iter.Cur);
  else
    dumpLiveRangeTrace("Allocating X ", Iter.Cur);
  const auto &Aliases = *RegAliases[RegNum];
  for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    assert(RegUses[RegAlias] >= 0);
    ++RegUses[RegAlias];
  }
  Active.push_back(Iter.Cur);
}

void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
  // Check Active ranges.
  for (const Variable *Item : Active) {
    assert(Item->rangeOverlaps(Iter.Cur));
    assert(Item->hasRegTmp());
    const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    // We add the Item's weight to each alias/subregister to represent that,
    // should we decide to pick any of them, then we would incur that many
    // memory accesses.
    RegWeight W = Item->getWeight(Func);
    for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
      Iter.Weights[RegAlias].addWeight(W);
    }
  }
  // Same as above, but check Inactive ranges instead of Active.
  for (const Variable *Item : Inactive) {
    if (!Item->rangeOverlaps(Iter.Cur))
      continue;
    assert(Item->hasRegTmp());
    const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
    RegWeight W = Item->getWeight(Func);
    for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
      Iter.Weights[RegAlias].addWeight(W);
    }
  }

  // All the weights are now calculated. Find the register with smallest weight.
  int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);

  if (MinWeightIndex < 0 ||
      Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
    if (!Iter.Cur->mustHaveReg()) {
      // Iter.Cur doesn't have priority over any other live ranges, so don't
      // allocate any register to it, and move it to the Handled state.
      Handled.push_back(Iter.Cur);
      return;
    }
    if (Kind == RAK_Phi) {
      // Iter.Cur is infinite-weight but all physical registers are already
      // taken, so we need to force one to be temporarily available.
      addSpillFill(Iter);
      Handled.push_back(Iter.Cur);
      return;
    }
    // The remaining portion of the enclosing "if" block should only be
    // reachable if we are manually limiting physical registers for testing.
    if (UseReserve) {
      if (Iter.FreeUnfiltered.any()) {
        // There is some available physical register held in reserve, so use it.
        constexpr bool NotFiltered = false;
        allocateFreeRegister(Iter, NotFiltered);
        // Iter.Cur is now on the Active list.
        return;
      }
      // At this point, we need to find some reserve register that is already
      // assigned to a non-infinite-weight variable.  This could happen if some
      // variable was previously assigned an alias of such a register.
      MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights);
    }
    if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
      dumpLiveRangeTrace("Failing      ", Iter.Cur);
      Func->setError("Unable to find a physical register for an "
                     "infinite-weight live range "
                     "(consider using -reg-reserve): " +
                     Iter.Cur->getName());
      Handled.push_back(Iter.Cur);
      return;
    }
    // At this point, MinWeightIndex points to a valid reserve register to
    // reallocate to Iter.Cur, so drop into the eviction code.
  }

  // Evict all live ranges in Active that register number MinWeightIndex is
  // assigned to.
  const auto &Aliases = *RegAliases[MinWeightIndex];
  for (SizeT I = Active.size(); I > 0; --I) {
    const SizeT Index = I - 1;
    Variable *Item = Active[Index];
    const auto RegNum = Item->getRegNumTmp();
    if (Aliases[RegNum]) {
      dumpLiveRangeTrace("Evicting A   ", Item);
      const auto &Aliases = *RegAliases[RegNum];
      for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
        --RegUses[RegAlias];
        assert(RegUses[RegAlias] >= 0);
      }
      Item->setRegNumTmp(RegNumT());
      moveItem(Active, Index, Handled);
      Evicted.push_back(Item);
    }
  }
  // Do the same for Inactive.
  for (SizeT I = Inactive.size(); I > 0; --I) {
    const SizeT Index = I - 1;
    Variable *Item = Inactive[Index];
    // Note: The Item->rangeOverlaps(Cur) clause is not part of the description
    // of AssignMemLoc() in the original paper. But there doesn't seem to be any
    // need to evict an inactive live range that doesn't overlap with the live
    // range currently being considered. It's especially bad if we would end up
    // evicting an infinite-weight but currently-inactive live range. The most
    // common situation for this would be a scratch register kill set for call
    // instructions.
    if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
      dumpLiveRangeTrace("Evicting I   ", Item);
      Item->setRegNumTmp(RegNumT());
      moveItem(Inactive, Index, Handled);
      Evicted.push_back(Item);
    }
  }
  // Assign the register to Cur.
  Iter.Cur->setRegNumTmp(RegNumT::fromInt(MinWeightIndex));
  for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
    assert(RegUses[RegAlias] >= 0);
    ++RegUses[RegAlias];
  }
  Active.push_back(Iter.Cur);
  dumpLiveRangeTrace("Allocating Z ", Iter.Cur);
}

void LinearScan::assignFinalRegisters(const SmallBitVector &RegMaskFull,
                                      const SmallBitVector &PreDefinedRegisters,
                                      bool Randomized) {
  const size_t NumRegisters = RegMaskFull.size();
  llvm::SmallVector<RegNumT, REGS_SIZE> Permutation(NumRegisters);
  if (Randomized) {
    // Create a random number generator for regalloc randomization. Merge
    // function's sequence and Kind value as the Salt. Because regAlloc() is
    // called twice under O2, the second time with RAK_Phi, we check Kind ==
    // RAK_Phi to determine the lowest-order bit to make sure the Salt is
    // different.
    uint64_t Salt =
        (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
    Target->makeRandomRegisterPermutation(
        Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
  }

  // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof)
  // for each Variable.
  for (Variable *Item : Handled) {
    const auto RegNum = Item->getRegNumTmp();
    auto AssignedRegNum = RegNum;

    if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
      AssignedRegNum = Permutation[RegNum];
    }
    if (BuildDefs::dump() && Verbose) {
      Ostream &Str = Ctx->getStrDump();
      if (!Item->hasRegTmp()) {
        Str << "Not assigning ";
        Item->dump(Func);
        Str << "\n";
      } else {
        Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
                                                    : "Assigning ")
            << Target->getRegName(AssignedRegNum, Item->getType()) << "(r"
            << AssignedRegNum << ") to ";
        Item->dump(Func);
        Str << "\n";
      }
    }
    Item->setRegNum(AssignedRegNum);
  }
}

// Implements the linear-scan algorithm. Based on "Linear Scan Register
// Allocation in the Context of SSA Form and Register Constraints" by Hanspeter
// Mössenböck and Michael Pfeiffer,
// ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is
// modified to take affinity into account and allow two interfering variables to
// share the same register in certain cases.
//
// Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
// are assigned to Variable::RegNum for each Variable.
void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
  TimerMarker T(TimerStack::TT_linearScan, Func);
  assert(RegMaskFull.any()); // Sanity check
  if (Verbose)
    Ctx->lockStr();
  Func->resetCurrentNode();
  const size_t NumRegisters = RegMaskFull.size();
  SmallBitVector PreDefinedRegisters(NumRegisters);
  if (Randomized) {
    for (Variable *Var : UnhandledPrecolored) {
      PreDefinedRegisters[Var->getRegNum()] = true;
    }
  }

  // Build a LiveRange representing the Kills list.
  LiveRange KillsRange(Kills);
  KillsRange.untrim();

  // Reset the register use count.
  RegUses.resize(NumRegisters);
  std::fill(RegUses.begin(), RegUses.end(), 0);

  // Unhandled is already set to all ranges in increasing order of start points.
  assert(Active.empty());
  assert(Inactive.empty());
  assert(Handled.empty());
  const TargetLowering::RegSetMask RegsInclude =
      TargetLowering::RegSet_CallerSave;
  const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
  const SmallBitVector KillsMask =
      Target->getRegisterSet(RegsInclude, RegsExclude);

  // Allocate memory once outside the loop.
  IterationState Iter;
  Iter.Weights.reserve(NumRegisters);
  Iter.PrecoloredUnhandledMask.reserve(NumRegisters);

  while (!Unhandled.empty()) {
    Iter.Cur = Unhandled.back();
    Unhandled.pop_back();
    dumpLiveRangeTrace("\nConsidering  ", Iter.Cur);
    if (UseReserve)
      assert(Target->getAllRegistersForVariable(Iter.Cur).any());
    else
      assert(Target->getRegistersForVariable(Iter.Cur).any());
    Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
    Iter.RegMaskUnfiltered =
        RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
    KillsRange.trim(Iter.Cur->getLiveRange().getStart());

    // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
    // that register. Previously processed live ranges would have avoided that
    // register due to it being pre-colored. Future processed live ranges won't
    // evict that register because the live range has infinite weight.
    if (Iter.Cur->hasReg()) {
      allocatePrecoloredRegister(Iter.Cur);
      continue;
    }

    handleActiveRangeExpiredOrInactive(Iter.Cur);
    handleInactiveRangeExpiredOrReactivated(Iter.Cur);

    // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[].
    Iter.Free = Iter.RegMask;
    Iter.FreeUnfiltered = Iter.RegMaskUnfiltered;
    for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
      if (RegUses[i] > 0) {
        Iter.Free[i] = false;
        Iter.FreeUnfiltered[i] = false;
      }
    }

    findRegisterPreference(Iter);
    filterFreeWithInactiveRanges(Iter);

    // Disable AllowOverlap if an Active variable, which is not Prefer, shares
    // Prefer's register, and has a definition within Cur's live range.
    if (Iter.AllowOverlap) {
      const auto &Aliases = *RegAliases[Iter.PreferReg];
      for (const Variable *Item : Active) {
        const RegNumT RegNum = Item->getRegNumTmp();
        if (Item != Iter.Prefer && Aliases[RegNum] &&
            overlapsDefs(Func, Iter.Cur, Item)) {
          Iter.AllowOverlap = false;
          dumpDisableOverlap(Func, Item, "Active");
        }
      }
    }

    Iter.Weights.resize(Iter.RegMask.size());
    std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight());

    Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size());
    Iter.PrecoloredUnhandledMask.reset();

    filterFreeWithPrecoloredRanges(Iter);

    // Remove scratch registers from the Iter.Free[] list, and mark their
    // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range.
    constexpr bool UseTrimmed = true;
    if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
      Iter.Free.reset(KillsMask);
      Iter.FreeUnfiltered.reset(KillsMask);
      for (RegNumT i : RegNumBVIter(KillsMask)) {
        Iter.Weights[i].setWeight(RegWeight::Inf);
        if (Iter.PreferReg == i)
          Iter.AllowOverlap = false;
      }
    }

    // Print info about physical register availability.
    if (BuildDefs::dump() && Verbose) {
      Ostream &Str = Ctx->getStrDump();
      for (RegNumT i : RegNumBVIter(Iter.RegMaskUnfiltered)) {
        Str << Target->getRegName(i, Iter.Cur->getType()) << "(U=" << RegUses[i]
            << ",F=" << Iter.Free[i] << ",P=" << Iter.PrecoloredUnhandledMask[i]
            << ") ";
      }
      Str << "\n";
    }

    if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) {
      // First choice: a preferred register that is either free or is allowed to
      // overlap with its linked variable.
      allocatePreferredRegister(Iter);
    } else if (Iter.Free.any()) {
      // Second choice: any free register.
      constexpr bool Filtered = true;
      allocateFreeRegister(Iter, Filtered);
    } else {
      // Fallback: there are no free registers, so we look for the lowest-weight
      // register and see if Cur has higher weight.
      handleNoFreeRegisters(Iter);
    }
    dump(Func);
  }

  // Move anything Active or Inactive to Handled for easier handling.
  Handled.insert(Handled.end(), Active.begin(), Active.end());
  Active.clear();
  Handled.insert(Handled.end(), Inactive.begin(), Inactive.end());
  Inactive.clear();
  dump(Func);

  assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized);

  if (Verbose)
    Ctx->unlockStr();
}

// ======================== Dump routines ======================== //

void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) {
  if (!BuildDefs::dump())
    return;

  if (Verbose) {
    Ostream &Str = Ctx->getStrDump();
    Str << Label;
    dumpLiveRange(Item, Func);
    Str << "\n";
  }
}

void LinearScan::dump(Cfg *Func) const {
  if (!BuildDefs::dump())
    return;
  if (!Verbose)
    return;
  Ostream &Str = Func->getContext()->getStrDump();
  Func->resetCurrentNode();
  Str << "**** Current regalloc state:\n";
  Str << "++++++ Handled:\n";
  for (const Variable *Item : Handled) {
    dumpLiveRange(Item, Func);
    Str << "\n";
  }
  Str << "++++++ Unhandled:\n";
  for (const Variable *Item : reverse_range(Unhandled)) {
    dumpLiveRange(Item, Func);
    Str << "\n";
  }
  Str << "++++++ Active:\n";
  for (const Variable *Item : Active) {
    dumpLiveRange(Item, Func);
    Str << "\n";
  }
  Str << "++++++ Inactive:\n";
  for (const Variable *Item : Inactive) {
    dumpLiveRange(Item, Func);
    Str << "\n";
  }
}

} // end of namespace Ice