//===- subzero/src/IceTargetLowering.cpp - Basic lowering implementation --===// // // The Subzero Code Generator // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Implements the skeleton of the TargetLowering class. /// /// Specifically this invokes the appropriate lowering method for a given /// instruction kind and driving global register allocation. It also implements /// the non-deleted instruction iteration in LoweringContext. /// //===----------------------------------------------------------------------===// #include "IceTargetLowering.h" #include "IceBitVector.h" #include "IceCfg.h" // setError() #include "IceCfgNode.h" #include "IceGlobalContext.h" #include "IceGlobalInits.h" #include "IceInstVarIter.h" #include "IceLiveness.h" #include "IceOperand.h" #include "IceRegAlloc.h" #include <string> #include <vector> #define TARGET_LOWERING_CLASS_FOR(t) Target_##t // We prevent target-specific implementation details from leaking outside their // implementations by forbidding #include of target-specific header files // anywhere outside their own files. To create target-specific objects // (TargetLowering, TargetDataLowering, and TargetHeaderLowering) we use the // following named constructors. For reference, each target Foo needs to // implement the following named constructors and initializer: // // namespace Foo { // unique_ptr<Ice::TargetLowering> createTargetLowering(Ice::Cfg *); // unique_ptr<Ice::TargetDataLowering> // createTargetDataLowering(Ice::GlobalContext*); // unique_ptr<Ice::TargetHeaderLowering> // createTargetHeaderLowering(Ice::GlobalContext *); // void staticInit(::Ice::GlobalContext *); // } #define SUBZERO_TARGET(X) \ namespace X { \ std::unique_ptr<::Ice::TargetLowering> \ createTargetLowering(::Ice::Cfg *Func); \ std::unique_ptr<::Ice::TargetDataLowering> \ createTargetDataLowering(::Ice::GlobalContext *Ctx); \ std::unique_ptr<::Ice::TargetHeaderLowering> \ createTargetHeaderLowering(::Ice::GlobalContext *Ctx); \ void staticInit(::Ice::GlobalContext *Ctx); \ bool shouldBePooled(const ::Ice::Constant *C); \ ::Ice::Type getPointerType(); \ } // end of namespace X #include "SZTargets.def" #undef SUBZERO_TARGET namespace Ice { void LoweringContext::init(CfgNode *N) { Node = N; End = getNode()->getInsts().end(); rewind(); advanceForward(Next); } void LoweringContext::rewind() { Begin = getNode()->getInsts().begin(); Cur = Begin; skipDeleted(Cur); Next = Cur; availabilityReset(); } void LoweringContext::insert(Inst *Instr) { getNode()->getInsts().insert(Next, Instr); LastInserted = Instr; } void LoweringContext::skipDeleted(InstList::iterator &I) const { while (I != End && I->isDeleted()) ++I; } void LoweringContext::advanceForward(InstList::iterator &I) const { if (I != End) { ++I; skipDeleted(I); } } Inst *LoweringContext::getLastInserted() const { assert(LastInserted); return LastInserted; } void LoweringContext::availabilityReset() { LastDest = nullptr; LastSrc = nullptr; } void LoweringContext::availabilityUpdate() { availabilityReset(); Inst *Instr = LastInserted; if (Instr == nullptr) return; if (!Instr->isVarAssign()) return; // Since isVarAssign() is true, the source operand must be a Variable. LastDest = Instr->getDest(); LastSrc = llvm::cast<Variable>(Instr->getSrc(0)); } Variable *LoweringContext::availabilityGet(Operand *Src) const { assert(Src); if (Src == LastDest) return LastSrc; return nullptr; } namespace { void printRegisterSet(Ostream &Str, const SmallBitVector &Bitset, std::function<std::string(RegNumT)> getRegName, const std::string &LineIndentString) { constexpr size_t RegistersPerLine = 16; size_t Count = 0; for (RegNumT RegNum : RegNumBVIter(Bitset)) { if (Count == 0) { Str << LineIndentString; } else { Str << ","; } if (Count > 0 && Count % RegistersPerLine == 0) Str << "\n" << LineIndentString; ++Count; Str << getRegName(RegNum); } if (Count) Str << "\n"; } // Splits "<class>:<reg>" into "<class>" plus "<reg>". If there is no <class> // component, the result is "" plus "<reg>". void splitToClassAndName(const std::string &RegName, std::string *SplitRegClass, std::string *SplitRegName) { constexpr const char Separator[] = ":"; constexpr size_t SeparatorWidth = llvm::array_lengthof(Separator) - 1; size_t Pos = RegName.find(Separator); if (Pos == std::string::npos) { *SplitRegClass = ""; *SplitRegName = RegName; } else { *SplitRegClass = RegName.substr(0, Pos); *SplitRegName = RegName.substr(Pos + SeparatorWidth); } } LLVM_ATTRIBUTE_NORETURN void badTargetFatalError(TargetArch Target) { llvm::report_fatal_error("Unsupported target: " + std::string(targetArchString(Target))); } } // end of anonymous namespace void TargetLowering::filterTypeToRegisterSet( GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[], size_t TypeToRegisterSetSize, std::function<std::string(RegNumT)> getRegName, std::function<const char *(RegClass)> getRegClassName) { std::vector<SmallBitVector> UseSet(TypeToRegisterSetSize, SmallBitVector(NumRegs)); std::vector<SmallBitVector> ExcludeSet(TypeToRegisterSetSize, SmallBitVector(NumRegs)); std::unordered_map<std::string, RegNumT> RegNameToIndex; for (int32_t RegIndex = 0; RegIndex < NumRegs; ++RegIndex) { const auto RegNum = RegNumT::fromInt(RegIndex); RegNameToIndex[getRegName(RegNum)] = RegNum; } std::vector<std::string> BadRegNames; // The processRegList function iterates across the RegNames vector. Each // entry in the vector is a string of the form "<reg>" or "<class>:<reg>". // The register class and register number are computed, and the corresponding // bit is set in RegSet[][]. If "<class>:" is missing, then the bit is set // for all classes. auto processRegList = [&](const std::vector<std::string> &RegNames, std::vector<SmallBitVector> &RegSet) { for (const std::string &RegClassAndName : RegNames) { std::string RClass; std::string RName; splitToClassAndName(RegClassAndName, &RClass, &RName); if (!RegNameToIndex.count(RName)) { BadRegNames.push_back(RName); continue; } const int32_t RegIndex = RegNameToIndex.at(RName); for (SizeT TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { if (RClass.empty() || RClass == getRegClassName(static_cast<RegClass>(TypeIndex))) { RegSet[TypeIndex][RegIndex] = TypeToRegisterSet[TypeIndex][RegIndex]; } } } }; processRegList(getFlags().getUseRestrictedRegisters(), UseSet); processRegList(getFlags().getExcludedRegisters(), ExcludeSet); if (!BadRegNames.empty()) { std::string Buffer; llvm::raw_string_ostream StrBuf(Buffer); StrBuf << "Unrecognized use/exclude registers:"; for (const auto &RegName : BadRegNames) StrBuf << " " << RegName; llvm::report_fatal_error(StrBuf.str()); } // Apply filters. for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { SmallBitVector *TypeBitSet = &TypeToRegisterSet[TypeIndex]; SmallBitVector *UseBitSet = &UseSet[TypeIndex]; SmallBitVector *ExcludeBitSet = &ExcludeSet[TypeIndex]; if (UseBitSet->any()) *TypeBitSet = *UseBitSet; (*TypeBitSet).reset(*ExcludeBitSet); } // Display filtered register sets, if requested. if (BuildDefs::dump() && NumRegs && (getFlags().getVerbose() & IceV_AvailableRegs)) { Ostream &Str = Ctx->getStrDump(); const std::string Indent = " "; const std::string IndentTwice = Indent + Indent; Str << "Registers available for register allocation:\n"; for (size_t TypeIndex = 0; TypeIndex < TypeToRegisterSetSize; ++TypeIndex) { Str << Indent << getRegClassName(static_cast<RegClass>(TypeIndex)) << ":\n"; printRegisterSet(Str, TypeToRegisterSet[TypeIndex], getRegName, IndentTwice); } Str << "\n"; } } std::unique_ptr<TargetLowering> TargetLowering::createLowering(TargetArch Target, Cfg *Func) { switch (Target) { default: badTargetFatalError(Target); #define SUBZERO_TARGET(X) \ case TARGET_LOWERING_CLASS_FOR(X): \ return ::X::createTargetLowering(Func); #include "SZTargets.def" #undef SUBZERO_TARGET } } void TargetLowering::staticInit(GlobalContext *Ctx) { const TargetArch Target = getFlags().getTargetArch(); // Call the specified target's static initializer. switch (Target) { default: badTargetFatalError(Target); #define SUBZERO_TARGET(X) \ case TARGET_LOWERING_CLASS_FOR(X): { \ static bool InitGuard##X = false; \ if (InitGuard##X) { \ return; \ } \ InitGuard##X = true; \ ::X::staticInit(Ctx); \ } break; #include "SZTargets.def" #undef SUBZERO_TARGET } } bool TargetLowering::shouldBePooled(const Constant *C) { const TargetArch Target = getFlags().getTargetArch(); switch (Target) { default: return false; #define SUBZERO_TARGET(X) \ case TARGET_LOWERING_CLASS_FOR(X): \ return ::X::shouldBePooled(C); #include "SZTargets.def" #undef SUBZERO_TARGET } } ::Ice::Type TargetLowering::getPointerType() { const TargetArch Target = getFlags().getTargetArch(); switch (Target) { default: return ::Ice::IceType_void; #define SUBZERO_TARGET(X) \ case TARGET_LOWERING_CLASS_FOR(X): \ return ::X::getPointerType(); #include "SZTargets.def" #undef SUBZERO_TARGET } } TargetLowering::SandboxType TargetLowering::determineSandboxTypeFromFlags(const ClFlags &Flags) { assert(!Flags.getUseSandboxing() || !Flags.getUseNonsfi()); if (Flags.getUseNonsfi()) { return TargetLowering::ST_Nonsfi; } if (Flags.getUseSandboxing()) { return TargetLowering::ST_NaCl; } return TargetLowering::ST_None; } TargetLowering::TargetLowering(Cfg *Func) : Func(Func), Ctx(Func->getContext()), SandboxingType(determineSandboxTypeFromFlags(getFlags())) {} TargetLowering::AutoBundle::AutoBundle(TargetLowering *Target, InstBundleLock::Option Option) : Target(Target), NeedSandboxing(getFlags().getUseSandboxing()) { assert(!Target->AutoBundling); Target->AutoBundling = true; if (NeedSandboxing) { Target->_bundle_lock(Option); } } TargetLowering::AutoBundle::~AutoBundle() { assert(Target->AutoBundling); Target->AutoBundling = false; if (NeedSandboxing) { Target->_bundle_unlock(); } } void TargetLowering::genTargetHelperCalls() { TimerMarker T(TimerStack::TT_genHelpers, Func); Utils::BoolFlagSaver _(GeneratingTargetHelpers, true); for (CfgNode *Node : Func->getNodes()) { Context.init(Node); while (!Context.atEnd()) { PostIncrLoweringContext _(Context); genTargetHelperCallFor(iteratorToInst(Context.getCur())); } } } void TargetLowering::doAddressOpt() { doAddressOptOther(); if (llvm::isa<InstLoad>(*Context.getCur())) doAddressOptLoad(); else if (llvm::isa<InstStore>(*Context.getCur())) doAddressOptStore(); else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(&*Context.getCur())) { if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::LoadSubVector) doAddressOptLoadSubVector(); else if (Intrinsic->getIntrinsicInfo().ID == Intrinsics::StoreSubVector) doAddressOptStoreSubVector(); } Context.advanceCur(); Context.advanceNext(); } void TargetLowering::doNopInsertion(RandomNumberGenerator &RNG) { Inst *I = iteratorToInst(Context.getCur()); bool ShouldSkip = llvm::isa<InstFakeUse>(I) || llvm::isa<InstFakeDef>(I) || llvm::isa<InstFakeKill>(I) || I->isRedundantAssign() || I->isDeleted(); if (!ShouldSkip) { int Probability = getFlags().getNopProbabilityAsPercentage(); for (int I = 0; I < getFlags().getMaxNopsPerInstruction(); ++I) { randomlyInsertNop(Probability / 100.0, RNG); } } } // Lowers a single instruction according to the information in Context, by // checking the Context.Cur instruction kind and calling the appropriate // lowering method. The lowering method should insert target instructions at // the Cur.Next insertion point, and should not delete the Context.Cur // instruction or advance Context.Cur. // // The lowering method may look ahead in the instruction stream as desired, and // lower additional instructions in conjunction with the current one, for // example fusing a compare and branch. If it does, it should advance // Context.Cur to point to the next non-deleted instruction to process, and it // should delete any additional instructions it consumes. void TargetLowering::lower() { assert(!Context.atEnd()); Inst *Instr = iteratorToInst(Context.getCur()); Instr->deleteIfDead(); if (!Instr->isDeleted() && !llvm::isa<InstFakeDef>(Instr) && !llvm::isa<InstFakeUse>(Instr)) { // Mark the current instruction as deleted before lowering, otherwise the // Dest variable will likely get marked as non-SSA. See // Variable::setDefinition(). However, just pass-through FakeDef and // FakeUse instructions that might have been inserted prior to lowering. Instr->setDeleted(); switch (Instr->getKind()) { case Inst::Alloca: lowerAlloca(llvm::cast<InstAlloca>(Instr)); break; case Inst::Arithmetic: lowerArithmetic(llvm::cast<InstArithmetic>(Instr)); break; case Inst::Assign: lowerAssign(llvm::cast<InstAssign>(Instr)); break; case Inst::Br: lowerBr(llvm::cast<InstBr>(Instr)); break; case Inst::Breakpoint: lowerBreakpoint(llvm::cast<InstBreakpoint>(Instr)); break; case Inst::Call: lowerCall(llvm::cast<InstCall>(Instr)); break; case Inst::Cast: lowerCast(llvm::cast<InstCast>(Instr)); break; case Inst::ExtractElement: lowerExtractElement(llvm::cast<InstExtractElement>(Instr)); break; case Inst::Fcmp: lowerFcmp(llvm::cast<InstFcmp>(Instr)); break; case Inst::Icmp: lowerIcmp(llvm::cast<InstIcmp>(Instr)); break; case Inst::InsertElement: lowerInsertElement(llvm::cast<InstInsertElement>(Instr)); break; case Inst::IntrinsicCall: { auto *Call = llvm::cast<InstIntrinsicCall>(Instr); if (Call->getIntrinsicInfo().ReturnsTwice) setCallsReturnsTwice(true); lowerIntrinsicCall(Call); break; } case Inst::Load: lowerLoad(llvm::cast<InstLoad>(Instr)); break; case Inst::Phi: lowerPhi(llvm::cast<InstPhi>(Instr)); break; case Inst::Ret: lowerRet(llvm::cast<InstRet>(Instr)); break; case Inst::Select: lowerSelect(llvm::cast<InstSelect>(Instr)); break; case Inst::ShuffleVector: lowerShuffleVector(llvm::cast<InstShuffleVector>(Instr)); break; case Inst::Store: lowerStore(llvm::cast<InstStore>(Instr)); break; case Inst::Switch: lowerSwitch(llvm::cast<InstSwitch>(Instr)); break; case Inst::Unreachable: lowerUnreachable(llvm::cast<InstUnreachable>(Instr)); break; default: lowerOther(Instr); break; } postLower(); } Context.advanceCur(); Context.advanceNext(); } void TargetLowering::lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr) { // TODO(stichnot): Consider modifying the design/implementation to avoid // multiple init() calls when using lowerInst() to lower several instructions // in the same node. Context.init(Node); Context.setNext(Next); Context.insert(Instr); --Next; assert(iteratorToInst(Next) == Instr); Context.setCur(Next); lower(); } void TargetLowering::lowerOther(const Inst *Instr) { (void)Instr; Func->setError("Can't lower unsupported instruction type"); } // Drives register allocation, allowing all physical registers (except perhaps // for the frame pointer) to be allocated. This set of registers could // potentially be parameterized if we want to restrict registers e.g. for // performance testing. void TargetLowering::regAlloc(RegAllocKind Kind) { TimerMarker T(TimerStack::TT_regAlloc, Func); LinearScan LinearScan(Func); RegSetMask RegInclude = RegSet_None; RegSetMask RegExclude = RegSet_None; RegInclude |= RegSet_CallerSave; RegInclude |= RegSet_CalleeSave; if (hasFramePointer()) RegExclude |= RegSet_FramePointer; SmallBitVector RegMask = getRegisterSet(RegInclude, RegExclude); bool Repeat = (Kind == RAK_Global && getFlags().getRepeatRegAlloc()); CfgSet<Variable *> EmptySet; do { LinearScan.init(Kind, EmptySet); LinearScan.scan(RegMask, getFlags().getRandomizeRegisterAllocation()); if (!LinearScan.hasEvictions()) Repeat = false; Kind = RAK_SecondChance; } while (Repeat); // TODO(stichnot): Run the register allocator one more time to do stack slot // coalescing. The idea would be to initialize the Unhandled list with the // set of Variables that have no register and a non-empty live range, and // model an infinite number of registers. Maybe use the register aliasing // mechanism to get better packing of narrower slots. if (getFlags().getSplitGlobalVars()) postRegallocSplitting(RegMask); } namespace { CfgVector<Inst *> getInstructionsInRange(CfgNode *Node, InstNumberT Start, InstNumberT End) { CfgVector<Inst *> Result; bool Started = false; auto Process = [&](InstList &Insts) { for (auto &Instr : Insts) { if (Instr.isDeleted()) { continue; } if (Instr.getNumber() == Start) { Started = true; } if (Started) { Result.emplace_back(&Instr); } if (Instr.getNumber() == End) { break; } } }; Process(Node->getPhis()); Process(Node->getInsts()); // TODO(manasijm): Investigate why checking >= End significantly changes // output. Should not happen when renumbering produces monotonically // increasing instruction numbers and live ranges begin and end on non-deleted // instructions. return Result; } } void TargetLowering::postRegallocSplitting(const SmallBitVector &RegMask) { // Splits the live ranges of global(/multi block) variables and runs the // register allocator to find registers for as many of the new variables as // possible. // TODO(manasijm): Merge the small liveranges back into multi-block ones when // the variables get the same register. This will reduce the amount of new // instructions inserted. This might involve a full dataflow analysis. // Also, modify the preference mechanism in the register allocator to match. TimerMarker _(TimerStack::TT_splitGlobalVars, Func); CfgSet<Variable *> SplitCandidates; // Find variables that do not have registers but are allowed to. Also skip // variables with single segment live ranges as they are not split further in // this function. for (Variable *Var : Func->getVariables()) { if (!Var->mustNotHaveReg() && !Var->hasReg()) { if (Var->getLiveRange().getNumSegments() > 1) SplitCandidates.insert(Var); } } if (SplitCandidates.empty()) return; CfgSet<Variable *> ExtraVars; struct UseInfo { Variable *Replacing = nullptr; Inst *FirstUse = nullptr; Inst *LastDef = nullptr; SizeT UseCount = 0; }; CfgUnorderedMap<Variable *, UseInfo> VarInfo; // Split the live ranges of the viable variables by node. // Compute metadata (UseInfo) for each of the resulting variables. for (auto *Var : SplitCandidates) { for (auto &Segment : Var->getLiveRange().getSegments()) { UseInfo Info; Info.Replacing = Var; auto *Node = Var->getLiveRange().getNodeForSegment(Segment.first); for (auto *Instr : getInstructionsInRange(Node, Segment.first, Segment.second)) { for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { // It's safe to iterate over the top-level src operands rather than // using FOREACH_VAR_IN_INST(), because any variables inside e.g. // mem operands should already have registers. if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { if (Var == Info.Replacing) { if (Info.FirstUse == nullptr && !llvm::isa<InstPhi>(Instr)) { Info.FirstUse = Instr; } Info.UseCount++; } } } if (Instr->getDest() == Info.Replacing && !llvm::isa<InstPhi>(Instr)) { Info.LastDef = Instr; } } static constexpr SizeT MinUseThreshold = 3; // Skip if variable has less than `MinUseThreshold` uses in the segment. if (Info.UseCount < MinUseThreshold) continue; if (!Info.FirstUse && !Info.LastDef) { continue; } LiveRange LR; LR.addSegment(Segment); Variable *NewVar = Func->makeVariable(Var->getType()); NewVar->setLiveRange(LR); VarInfo[NewVar] = Info; ExtraVars.insert(NewVar); } } // Run the register allocator with all these new variables included LinearScan RegAlloc(Func); RegAlloc.init(RAK_Global, SplitCandidates); RegAlloc.scan(RegMask, getFlags().getRandomizeRegisterAllocation()); // Modify the Cfg to use the new variables that now have registers. for (auto *ExtraVar : ExtraVars) { if (!ExtraVar->hasReg()) { continue; } auto &Info = VarInfo[ExtraVar]; assert(ExtraVar->getLiveRange().getSegments().size() == 1); auto Segment = ExtraVar->getLiveRange().getSegments()[0]; auto *Node = Info.Replacing->getLiveRange().getNodeForSegment(Segment.first); auto RelevantInsts = getInstructionsInRange(Node, Segment.first, Segment.second); if (RelevantInsts.empty()) continue; // Replace old variables for (auto *Instr : RelevantInsts) { if (llvm::isa<InstPhi>(Instr)) continue; // TODO(manasijm): Figure out how to safely enable replacing phi dest // variables. The issue is that we can not insert low level mov // instructions into the PhiList. for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { // FOREACH_VAR_IN_INST() not needed. Same logic as above. if (auto *Var = llvm::dyn_cast<Variable>(Instr->getSrc(i))) { if (Var == Info.Replacing) { Instr->replaceSource(i, ExtraVar); } } } if (Instr->getDest() == Info.Replacing) { Instr->replaceDest(ExtraVar); } } assert(Info.FirstUse != Info.LastDef); assert(Info.FirstUse || Info.LastDef); // Insert spill code if (Info.FirstUse != nullptr) { auto *NewInst = Func->getTarget()->createLoweredMove(ExtraVar, Info.Replacing); Node->getInsts().insert(instToIterator(Info.FirstUse), NewInst); } if (Info.LastDef != nullptr) { auto *NewInst = Func->getTarget()->createLoweredMove(Info.Replacing, ExtraVar); Node->getInsts().insertAfter(instToIterator(Info.LastDef), NewInst); } } } void TargetLowering::markRedefinitions() { // Find (non-SSA) instructions where the Dest variable appears in some source // operand, and set the IsDestRedefined flag to keep liveness analysis // consistent. for (auto Instr = Context.getCur(), E = Context.getNext(); Instr != E; ++Instr) { if (Instr->isDeleted()) continue; Variable *Dest = Instr->getDest(); if (Dest == nullptr) continue; FOREACH_VAR_IN_INST(Var, *Instr) { if (Var == Dest) { Instr->setDestRedefined(); break; } } } } void TargetLowering::addFakeDefUses(const Inst *Instr) { FOREACH_VAR_IN_INST(Var, *Instr) { if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Var)) { Context.insert<InstFakeUse>(Var64->getLo()); Context.insert<InstFakeUse>(Var64->getHi()); } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Var)) { for (Variable *Var : VarVec->getContainers()) { Context.insert<InstFakeUse>(Var); } } else { Context.insert<InstFakeUse>(Var); } } Variable *Dest = Instr->getDest(); if (Dest == nullptr) return; if (auto *Var64 = llvm::dyn_cast<Variable64On32>(Dest)) { Context.insert<InstFakeDef>(Var64->getLo()); Context.insert<InstFakeDef>(Var64->getHi()); } else if (auto *VarVec = llvm::dyn_cast<VariableVecOn32>(Dest)) { for (Variable *Var : VarVec->getContainers()) { Context.insert<InstFakeDef>(Var); } } else { Context.insert<InstFakeDef>(Dest); } } void TargetLowering::sortVarsByAlignment(VarList &Dest, const VarList &Source) const { Dest = Source; // Instead of std::sort, we could do a bucket sort with log2(alignment) as // the buckets, if performance is an issue. std::sort(Dest.begin(), Dest.end(), [this](const Variable *V1, const Variable *V2) { const size_t WidthV1 = typeWidthInBytesOnStack(V1->getType()); const size_t WidthV2 = typeWidthInBytesOnStack(V2->getType()); if (WidthV1 == WidthV2) return V1->getIndex() < V2->getIndex(); return WidthV1 > WidthV2; }); } void TargetLowering::getVarStackSlotParams( VarList &SortedSpilledVariables, SmallBitVector &RegsUsed, size_t *GlobalsSize, size_t *SpillAreaSizeBytes, uint32_t *SpillAreaAlignmentBytes, uint32_t *LocalsSlotsAlignmentBytes, std::function<bool(Variable *)> TargetVarHook) { const VariablesMetadata *VMetadata = Func->getVMetadata(); BitVector IsVarReferenced(Func->getNumVariables()); for (CfgNode *Node : Func->getNodes()) { for (Inst &Instr : Node->getInsts()) { if (Instr.isDeleted()) continue; if (const Variable *Var = Instr.getDest()) IsVarReferenced[Var->getIndex()] = true; FOREACH_VAR_IN_INST(Var, Instr) { IsVarReferenced[Var->getIndex()] = true; } } } // If SimpleCoalescing is false, each variable without a register gets its // own unique stack slot, which leads to large stack frames. If // SimpleCoalescing is true, then each "global" variable without a register // gets its own slot, but "local" variable slots are reused across basic // blocks. E.g., if A and B are local to block 1 and C is local to block 2, // then C may share a slot with A or B. // // We cannot coalesce stack slots if this function calls a "returns twice" // function. In that case, basic blocks may be revisited, and variables local // to those basic blocks are actually live until after the called function // returns a second time. const bool SimpleCoalescing = !callsReturnsTwice(); CfgVector<size_t> LocalsSize(Func->getNumNodes()); const VarList &Variables = Func->getVariables(); VarList SpilledVariables; for (Variable *Var : Variables) { if (Var->hasReg()) { // Don't consider a rematerializable variable to be an actual register use // (specifically of the frame pointer). Otherwise, the prolog may decide // to save the frame pointer twice - once because of the explicit need for // a frame pointer, and once because of an active use of a callee-save // register. if (!Var->isRematerializable()) RegsUsed[Var->getRegNum()] = true; continue; } // An argument either does not need a stack slot (if passed in a register) // or already has one (if passed on the stack). if (Var->getIsArg()) { if (!Var->hasReg()) { assert(!Var->hasStackOffset()); Var->setHasStackOffset(); } continue; } // An unreferenced variable doesn't need a stack slot. if (!IsVarReferenced[Var->getIndex()]) continue; // Check a target-specific variable (it may end up sharing stack slots) and // not need accounting here. if (TargetVarHook(Var)) continue; assert(!Var->hasStackOffset()); Var->setHasStackOffset(); SpilledVariables.push_back(Var); } SortedSpilledVariables.reserve(SpilledVariables.size()); sortVarsByAlignment(SortedSpilledVariables, SpilledVariables); for (Variable *Var : SortedSpilledVariables) { size_t Increment = typeWidthInBytesOnStack(Var->getType()); // We have sorted by alignment, so the first variable we encounter that is // located in each area determines the max alignment for the area. if (!*SpillAreaAlignmentBytes) *SpillAreaAlignmentBytes = Increment; if (SimpleCoalescing && VMetadata->isTracked(Var)) { if (VMetadata->isMultiBlock(Var)) { *GlobalsSize += Increment; } else { SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); LocalsSize[NodeIndex] += Increment; if (LocalsSize[NodeIndex] > *SpillAreaSizeBytes) *SpillAreaSizeBytes = LocalsSize[NodeIndex]; if (!*LocalsSlotsAlignmentBytes) *LocalsSlotsAlignmentBytes = Increment; } } else { *SpillAreaSizeBytes += Increment; } } // For testing legalization of large stack offsets on targets with limited // offset bits in instruction encodings, add some padding. *SpillAreaSizeBytes += getFlags().getTestStackExtra(); } void TargetLowering::alignStackSpillAreas(uint32_t SpillAreaStartOffset, uint32_t SpillAreaAlignmentBytes, size_t GlobalsSize, uint32_t LocalsSlotsAlignmentBytes, uint32_t *SpillAreaPaddingBytes, uint32_t *LocalsSlotsPaddingBytes) { if (SpillAreaAlignmentBytes) { uint32_t PaddingStart = SpillAreaStartOffset; uint32_t SpillAreaStart = Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes); *SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; } // If there are separate globals and locals areas, make sure the locals area // is aligned by padding the end of the globals area. if (LocalsSlotsAlignmentBytes) { uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; GlobalsAndSubsequentPaddingSize = Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); *LocalsSlotsPaddingBytes = GlobalsAndSubsequentPaddingSize - GlobalsSize; } } void TargetLowering::assignVarStackSlots(VarList &SortedSpilledVariables, size_t SpillAreaPaddingBytes, size_t SpillAreaSizeBytes, size_t GlobalsAndSubsequentPaddingSize, bool UsesFramePointer) { const VariablesMetadata *VMetadata = Func->getVMetadata(); // For testing legalization of large stack offsets on targets with limited // offset bits in instruction encodings, add some padding. This assumes that // SpillAreaSizeBytes has accounted for the extra test padding. When // UseFramePointer is true, the offset depends on the padding, not just the // SpillAreaSizeBytes. On the other hand, when UseFramePointer is false, the // offsets depend on the gap between SpillAreaSizeBytes and // SpillAreaPaddingBytes, so we don't increment that. size_t TestPadding = getFlags().getTestStackExtra(); if (UsesFramePointer) SpillAreaPaddingBytes += TestPadding; size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; size_t NextStackOffset = SpillAreaPaddingBytes; CfgVector<size_t> LocalsSize(Func->getNumNodes()); const bool SimpleCoalescing = !callsReturnsTwice(); for (Variable *Var : SortedSpilledVariables) { size_t Increment = typeWidthInBytesOnStack(Var->getType()); if (SimpleCoalescing && VMetadata->isTracked(Var)) { if (VMetadata->isMultiBlock(Var)) { GlobalsSpaceUsed += Increment; NextStackOffset = GlobalsSpaceUsed; } else { SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex(); LocalsSize[NodeIndex] += Increment; NextStackOffset = SpillAreaPaddingBytes + GlobalsAndSubsequentPaddingSize + LocalsSize[NodeIndex]; } } else { NextStackOffset += Increment; } if (UsesFramePointer) Var->setStackOffset(-NextStackOffset); else Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset); } } InstCall *TargetLowering::makeHelperCall(RuntimeHelper FuncID, Variable *Dest, SizeT MaxSrcs) { constexpr bool HasTailCall = false; Constant *CallTarget = Ctx->getRuntimeHelperFunc(FuncID); InstCall *Call = InstCall::create(Func, MaxSrcs, Dest, CallTarget, HasTailCall); return Call; } bool TargetLowering::shouldOptimizeMemIntrins() { return Func->getOptLevel() >= Opt_1 || getFlags().getForceMemIntrinOpt(); } void TargetLowering::scalarizeArithmetic(InstArithmetic::OpKind Kind, Variable *Dest, Operand *Src0, Operand *Src1) { scalarizeInstruction( Dest, [this, Kind](Variable *Dest, Operand *Src0, Operand *Src1) { return Context.insert<InstArithmetic>(Kind, Dest, Src0, Src1); }, Src0, Src1); } void TargetLowering::emitWithoutPrefix(const ConstantRelocatable *C, const char *Suffix) const { if (!BuildDefs::dump()) return; Ostream &Str = Ctx->getStrEmit(); const std::string &EmitStr = C->getEmitString(); if (!EmitStr.empty()) { // C has a custom emit string, so we use it instead of the canonical // Name + Offset form. Str << EmitStr; return; } Str << C->getName() << Suffix; RelocOffsetT Offset = C->getOffset(); if (Offset) { if (Offset > 0) Str << "+"; Str << Offset; } } std::unique_ptr<TargetDataLowering> TargetDataLowering::createLowering(GlobalContext *Ctx) { TargetArch Target = getFlags().getTargetArch(); switch (Target) { default: badTargetFatalError(Target); #define SUBZERO_TARGET(X) \ case TARGET_LOWERING_CLASS_FOR(X): \ return ::X::createTargetDataLowering(Ctx); #include "SZTargets.def" #undef SUBZERO_TARGET } } TargetDataLowering::~TargetDataLowering() = default; namespace { // dataSectionSuffix decides whether to use SectionSuffix or VarName as data // section suffix. Essentially, when using separate data sections for globals // SectionSuffix is not necessary. std::string dataSectionSuffix(const std::string &SectionSuffix, const std::string &VarName, const bool DataSections) { if (SectionSuffix.empty() && !DataSections) { return ""; } if (DataSections) { // With data sections we don't need to use the SectionSuffix. return "." + VarName; } assert(!SectionSuffix.empty()); return "." + SectionSuffix; } } // end of anonymous namespace void TargetDataLowering::emitGlobal(const VariableDeclaration &Var, const std::string &SectionSuffix) { if (!BuildDefs::dump()) return; // If external and not initialized, this must be a cross test. Don't generate // a declaration for such cases. const bool IsExternal = Var.isExternal() || getFlags().getDisableInternal(); if (IsExternal && !Var.hasInitializer()) return; Ostream &Str = Ctx->getStrEmit(); const bool HasNonzeroInitializer = Var.hasNonzeroInitializer(); const bool IsConstant = Var.getIsConstant(); const SizeT Size = Var.getNumBytes(); const std::string Name = Var.getName().toString(); Str << "\t.type\t" << Name << ",%object\n"; const bool UseDataSections = getFlags().getDataSections(); const bool UseNonsfi = getFlags().getUseNonsfi(); const std::string Suffix = dataSectionSuffix(SectionSuffix, Name, UseDataSections); if (IsConstant && UseNonsfi) Str << "\t.section\t.data.rel.ro" << Suffix << ",\"aw\",%progbits\n"; else if (IsConstant) Str << "\t.section\t.rodata" << Suffix << ",\"a\",%progbits\n"; else if (HasNonzeroInitializer) Str << "\t.section\t.data" << Suffix << ",\"aw\",%progbits\n"; else Str << "\t.section\t.bss" << Suffix << ",\"aw\",%nobits\n"; if (IsExternal) Str << "\t.globl\t" << Name << "\n"; const uint32_t Align = Var.getAlignment(); if (Align > 1) { assert(llvm::isPowerOf2_32(Align)); // Use the .p2align directive, since the .align N directive can either // interpret N as bytes, or power of 2 bytes, depending on the target. Str << "\t.p2align\t" << llvm::Log2_32(Align) << "\n"; } Str << Name << ":\n"; if (HasNonzeroInitializer) { for (const auto *Init : Var.getInitializers()) { switch (Init->getKind()) { case VariableDeclaration::Initializer::DataInitializerKind: { const auto &Data = llvm::cast<VariableDeclaration::DataInitializer>(Init) ->getContents(); for (SizeT i = 0; i < Init->getNumBytes(); ++i) { Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; } break; } case VariableDeclaration::Initializer::ZeroInitializerKind: Str << "\t.zero\t" << Init->getNumBytes() << "\n"; break; case VariableDeclaration::Initializer::RelocInitializerKind: { const auto *Reloc = llvm::cast<VariableDeclaration::RelocInitializer>(Init); Str << "\t" << getEmit32Directive() << "\t"; Str << Reloc->getDeclaration()->getName(); if (Reloc->hasFixup()) { // TODO(jpp): this is ARM32 specific. Str << "(GOTOFF)"; } if (RelocOffsetT Offset = Reloc->getOffset()) { if (Offset >= 0 || (Offset == INT32_MIN)) Str << " + " << Offset; else Str << " - " << -Offset; } Str << "\n"; break; } } } } else { // NOTE: for non-constant zero initializers, this is BSS (no bits), so an // ELF writer would not write to the file, and only track virtual offsets, // but the .s writer still needs this .zero and cannot simply use the .size // to advance offsets. Str << "\t.zero\t" << Size << "\n"; } Str << "\t.size\t" << Name << ", " << Size << "\n"; } std::unique_ptr<TargetHeaderLowering> TargetHeaderLowering::createLowering(GlobalContext *Ctx) { TargetArch Target = getFlags().getTargetArch(); switch (Target) { default: badTargetFatalError(Target); #define SUBZERO_TARGET(X) \ case TARGET_LOWERING_CLASS_FOR(X): \ return ::X::createTargetHeaderLowering(Ctx); #include "SZTargets.def" #undef SUBZERO_TARGET } } TargetHeaderLowering::~TargetHeaderLowering() = default; } // end of namespace Ice