//===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// // // The Subzero Code Generator // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Implements the CfgNode class, including the complexities of /// instruction insertion and in-edge calculation. /// //===----------------------------------------------------------------------===// #include "IceCfgNode.h" #include "IceAssembler.h" #include "IceCfg.h" #include "IceGlobalInits.h" #include "IceInst.h" #include "IceInstVarIter.h" #include "IceLiveness.h" #include "IceOperand.h" #include "IceTargetLowering.h" namespace Ice { // Adds an instruction to either the Phi list or the regular instruction list. // Validates that all Phis are added before all regular instructions. void CfgNode::appendInst(Inst *Instr) { ++InstCountEstimate; if (BuildDefs::wasm()) { if (llvm::isa<InstSwitch>(Instr) || llvm::isa<InstBr>(Instr)) { for (auto *N : Instr->getTerminatorEdges()) { N->addInEdge(this); addOutEdge(N); } } } if (auto *Phi = llvm::dyn_cast<InstPhi>(Instr)) { if (!Insts.empty()) { Func->setError("Phi instruction added to the middle of a block"); return; } Phis.push_back(Phi); } else { Insts.push_back(Instr); } } void CfgNode::replaceInEdge(CfgNode *Old, CfgNode *New) { for (SizeT i = 0; i < InEdges.size(); ++i) { if (InEdges[i] == Old) { InEdges[i] = New; } } for (auto &Inst : getPhis()) { auto &Phi = llvm::cast<InstPhi>(Inst); for (SizeT i = 0; i < Phi.getSrcSize(); ++i) { if (Phi.getLabel(i) == Old) { Phi.setLabel(i, New); } } } } namespace { template <typename List> void removeDeletedAndRenumber(List *L, Cfg *Func) { const bool DoDelete = BuildDefs::minimal() || !getFlags().getKeepDeletedInsts(); auto I = L->begin(), E = L->end(), Next = I; for (++Next; I != E; I = Next++) { if (DoDelete && I->isDeleted()) { L->remove(I); } else { I->renumber(Func); } } } } // end of anonymous namespace void CfgNode::renumberInstructions() { InstNumberT FirstNumber = Func->getNextInstNumber(); removeDeletedAndRenumber(&Phis, Func); removeDeletedAndRenumber(&Insts, Func); InstCountEstimate = Func->getNextInstNumber() - FirstNumber; } // When a node is created, the OutEdges are immediately known, but the InEdges // have to be built up incrementally. After the CFG has been constructed, the // computePredecessors() pass finalizes it by creating the InEdges list. void CfgNode::computePredecessors() { for (CfgNode *Succ : OutEdges) Succ->InEdges.push_back(this); } void CfgNode::computeSuccessors() { OutEdges.clear(); InEdges.clear(); assert(!Insts.empty()); OutEdges = Insts.rbegin()->getTerminatorEdges(); } // Ensure each Phi instruction in the node is consistent with respect to control // flow. For each predecessor, there must be a phi argument with that label. // If a phi argument's label doesn't appear in the predecessor list (which can // happen as a result of e.g. unreachable node elimination), its value is // modified to be zero, to maintain consistency in liveness analysis. This // allows us to remove some dead control flow without a major rework of the phi // instructions. We don't check that phi arguments with the same label have the // same value. void CfgNode::enforcePhiConsistency() { for (Inst &Instr : Phis) { auto *Phi = llvm::cast<InstPhi>(&Instr); // We do a simple O(N^2) algorithm to check for consistency. Even so, it // shows up as only about 0.2% of the total translation time. But if // necessary, we could improve the complexity by using a hash table to // count how many times each node is referenced in the Phi instruction, and // how many times each node is referenced in the incoming edge list, and // compare the two for equality. for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { CfgNode *Label = Phi->getLabel(i); bool Found = false; for (CfgNode *InNode : getInEdges()) { if (InNode == Label) { Found = true; break; } } if (!Found) { // Predecessor was unreachable, so if (impossibly) the control flow // enters from that predecessor, the value should be zero. Phi->clearOperandForTarget(Label); } } for (CfgNode *InNode : getInEdges()) { bool Found = false; for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { CfgNode *Label = Phi->getLabel(i); if (InNode == Label) { Found = true; break; } } if (!Found) llvm::report_fatal_error("Phi error: missing label for incoming edge"); } } } // This does part 1 of Phi lowering, by creating a new dest variable for each // Phi instruction, replacing the Phi instruction's dest with that variable, // and adding an explicit assignment of the old dest to the new dest. For // example, // a=phi(...) // changes to // "a_phi=phi(...); a=a_phi". // // This is in preparation for part 2 which deletes the Phi instructions and // appends assignment instructions to predecessor blocks. Note that this // transformation preserves SSA form. void CfgNode::placePhiLoads() { for (Inst &I : Phis) { auto *Phi = llvm::dyn_cast<InstPhi>(&I); Insts.insert(Insts.begin(), Phi->lower(Func)); } } // This does part 2 of Phi lowering. For each Phi instruction at each out-edge, // create a corresponding assignment instruction, and add all the assignments // near the end of this block. They need to be added before any branch // instruction, and also if the block ends with a compare instruction followed // by a branch instruction that we may want to fuse, it's better to insert the // new assignments before the compare instruction. The // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions. // // Note that this transformation takes the Phi dest variables out of SSA form, // as there may be assignments to the dest variable in multiple blocks. void CfgNode::placePhiStores() { // Find the insertion point. InstList::iterator InsertionPoint = Insts.end(); // Every block must end in a terminator instruction, and therefore must have // at least one instruction, so it's valid to decrement InsertionPoint (but // assert just in case). assert(InsertionPoint != Insts.begin()); --InsertionPoint; // Confirm that InsertionPoint is a terminator instruction. Calling // getTerminatorEdges() on a non-terminator instruction will cause an // llvm_unreachable(). (void)InsertionPoint->getTerminatorEdges(); // SafeInsertionPoint is always immediately before the terminator // instruction. If the block ends in a compare and conditional branch, it's // better to place the Phi store before the compare so as not to interfere // with compare/branch fusing. However, if the compare instruction's dest // operand is the same as the new assignment statement's source operand, this // can't be done due to data dependences, so we need to fall back to the // SafeInsertionPoint. To illustrate: // ; <label>:95 // %97 = load i8* %96, align 1 // %98 = icmp ne i8 %97, 0 // br i1 %98, label %99, label %2132 // ; <label>:99 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] // would be Phi-lowered as: // ; <label>:95 // %97 = load i8* %96, align 1 // %100_phi = %97 ; can be at InsertionPoint // %98 = icmp ne i8 %97, 0 // %101_phi = %98 ; must be at SafeInsertionPoint // br i1 %98, label %99, label %2132 // ; <label>:99 // %100 = %100_phi // %101 = %101_phi // // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint // mechanism. If a source basic block ends in a conditional branch: // labelSource: // ... // br i1 %foo, label %labelTrue, label %labelFalse // and a branch target has a Phi involving the branch operand: // labelTrue: // %bar = phi i1 [ %foo, %labelSource ], ... // then we actually know the constant i1 value of the Phi operand: // labelTrue: // %bar = phi i1 [ true, %labelSource ], ... // It seems that this optimization should be done by clang or opt, but we // could also do it here. InstList::iterator SafeInsertionPoint = InsertionPoint; // Keep track of the dest variable of a compare instruction, so that we // insert the new instruction at the SafeInsertionPoint if the compare's dest // matches the Phi-lowered assignment's source. Variable *CmpInstDest = nullptr; // If the current insertion point is at a conditional branch instruction, and // the previous instruction is a compare instruction, then we move the // insertion point before the compare instruction so as not to interfere with // compare/branch fusing. if (auto *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { if (!Branch->isUnconditional()) { if (InsertionPoint != Insts.begin()) { --InsertionPoint; if (llvm::isa<InstIcmp>(InsertionPoint) || llvm::isa<InstFcmp>(InsertionPoint)) { CmpInstDest = InsertionPoint->getDest(); } else { ++InsertionPoint; } } } } // Consider every out-edge. for (CfgNode *Succ : OutEdges) { // Consider every Phi instruction at the out-edge. for (Inst &I : Succ->Phis) { auto *Phi = llvm::dyn_cast<InstPhi>(&I); Operand *Operand = Phi->getOperandForTarget(this); assert(Operand); Variable *Dest = I.getDest(); assert(Dest); auto *NewInst = InstAssign::create(Func, Dest, Operand); if (CmpInstDest == Operand) Insts.insert(SafeInsertionPoint, NewInst); else Insts.insert(InsertionPoint, NewInst); } } } // Deletes the phi instructions after the loads and stores are placed. void CfgNode::deletePhis() { for (Inst &I : Phis) I.setDeleted(); } // Splits the edge from Pred to this node by creating a new node and hooking up // the in and out edges appropriately. (The EdgeIndex parameter is only used to // make the new node's name unique when there are multiple edges between the // same pair of nodes.) The new node's instruction list is initialized to the // empty list, with no terminator instruction. There must not be multiple edges // from Pred to this node so all Inst::getTerminatorEdges implementations must // not contain duplicates. CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { CfgNode *NewNode = Func->makeNode(); // Depth is the minimum as it works if both are the same, but if one is // outside the loop and the other is inside, the new node should be placed // outside and not be executed multiple times within the loop. NewNode->setLoopNestDepth( std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); if (BuildDefs::dump()) NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + std::to_string(EdgeIndex)); // The new node is added to the end of the node list, and will later need to // be sorted into a reasonable topological order. NewNode->setNeedsPlacement(true); // Repoint Pred's out-edge. bool Found = false; for (CfgNode *&I : Pred->OutEdges) { if (I == this) { I = NewNode; NewNode->InEdges.push_back(Pred); Found = true; break; } } assert(Found); (void)Found; // Repoint this node's in-edge. Found = false; for (CfgNode *&I : InEdges) { if (I == Pred) { I = NewNode; NewNode->OutEdges.push_back(this); Found = true; break; } } assert(Found); (void)Found; // Repoint all suitable branch instructions' target and return. Found = false; for (Inst &I : Pred->getInsts()) if (!I.isDeleted() && I.repointEdges(this, NewNode)) Found = true; assert(Found); (void)Found; return NewNode; } namespace { // Helpers for advancedPhiLowering(). class PhiDesc { PhiDesc() = delete; PhiDesc(const PhiDesc &) = delete; PhiDesc &operator=(const PhiDesc &) = delete; public: PhiDesc(InstPhi *Phi, Variable *Dest) : Phi(Phi), Dest(Dest) {} PhiDesc(PhiDesc &&) = default; InstPhi *Phi = nullptr; Variable *Dest = nullptr; Operand *Src = nullptr; bool Processed = false; size_t NumPred = 0; // number of entries whose Src is this Dest int32_t Weight = 0; // preference for topological order }; using PhiDescList = llvm::SmallVector<PhiDesc, 32>; // Always pick NumPred=0 over NumPred>0. constexpr int32_t WeightNoPreds = 8; // Prefer Src as a register because the register might free up. constexpr int32_t WeightSrcIsReg = 4; // Prefer Dest not as a register because the register stays free longer. constexpr int32_t WeightDestNotReg = 2; // Prefer NumPred=1 over NumPred>1. This is used as a tiebreaker when a // dependency cycle must be broken so that hopefully only one temporary // assignment has to be added to break the cycle. constexpr int32_t WeightOnePred = 1; bool sameVarOrReg(TargetLowering *Target, const Variable *Var1, const Operand *Opnd) { if (Var1 == Opnd) return true; const auto *Var2 = llvm::dyn_cast<Variable>(Opnd); if (Var2 == nullptr) return false; // If either operand lacks a register, they cannot be the same. if (!Var1->hasReg()) return false; if (!Var2->hasReg()) return false; const auto RegNum1 = Var1->getRegNum(); const auto RegNum2 = Var2->getRegNum(); // Quick common-case check. if (RegNum1 == RegNum2) return true; assert(Target->getAliasesForRegister(RegNum1)[RegNum2] == Target->getAliasesForRegister(RegNum2)[RegNum1]); return Target->getAliasesForRegister(RegNum1)[RegNum2]; } // Update NumPred for all Phi assignments using Var as their Dest variable. // Also update Weight if NumPred dropped from 2 to 1, or 1 to 0. void updatePreds(PhiDescList &Desc, TargetLowering *Target, Variable *Var) { for (PhiDesc &Item : Desc) { if (!Item.Processed && sameVarOrReg(Target, Var, Item.Dest)) { --Item.NumPred; if (Item.NumPred == 1) { // If NumPred changed from 2 to 1, add in WeightOnePred. Item.Weight += WeightOnePred; } else if (Item.NumPred == 0) { // If NumPred changed from 1 to 0, subtract WeightOnePred and add in // WeightNoPreds. Item.Weight += (WeightNoPreds - WeightOnePred); } } } } } // end of anonymous namespace // This the "advanced" version of Phi lowering for a basic block, in contrast // to the simple version that lowers through assignments involving temporaries. // // All Phi instructions in a basic block are conceptually executed in parallel. // However, if we lower Phis early and commit to a sequential ordering, we may // end up creating unnecessary interferences which lead to worse register // allocation. Delaying Phi scheduling until after register allocation can help // unless there are no free registers for shuffling registers or stack slots // and spilling becomes necessary. // // The advanced Phi lowering starts by finding a topological sort of the Phi // instructions, where "A=B" comes before "B=C" due to the anti-dependence on // B. Preexisting register assignments are considered in the topological sort. // If a topological sort is not possible due to a cycle, the cycle is broken by // introducing a non-parallel temporary. For example, a cycle arising from a // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being // equal, prefer to schedule assignments with register-allocated Src operands // earlier, in case that register becomes free afterwards, and prefer to // schedule assignments with register-allocated Dest variables later, to keep // that register free for longer. // // Once the ordering is determined, the Cfg edge is split and the assignment // list is lowered by the target lowering layer. Since the assignment lowering // may create new infinite-weight temporaries, a follow-on register allocation // pass will be needed. To prepare for this, liveness (including live range // calculation) of the split nodes needs to be calculated, and liveness of the // original node need to be updated to "undo" the effects of the phi // assignments. // The specific placement of the new node within the Cfg node list is deferred // until later, including after empty node contraction. // // After phi assignments are lowered across all blocks, another register // allocation pass is run, focusing only on pre-colored and infinite-weight // variables, similar to Om1 register allocation (except without the need to // specially compute these variables' live ranges, since they have already been // precisely calculated). The register allocator in this mode needs the ability // to forcibly spill and reload registers in case none are naturally available. void CfgNode::advancedPhiLowering() { if (getPhis().empty()) return; PhiDescList Desc; for (Inst &I : Phis) { auto *Phi = llvm::dyn_cast<InstPhi>(&I); if (!Phi->isDeleted()) { Variable *Dest = Phi->getDest(); Desc.emplace_back(Phi, Dest); // Undo the effect of the phi instruction on this node's live-in set by // marking the phi dest variable as live on entry. SizeT VarNum = Func->getLiveness()->getLiveIndex(Dest->getIndex()); auto &LiveIn = Func->getLiveness()->getLiveIn(this); if (VarNum < LiveIn.size()) { assert(!LiveIn[VarNum]); LiveIn[VarNum] = true; } Phi->setDeleted(); } } if (Desc.empty()) return; TargetLowering *Target = Func->getTarget(); SizeT InEdgeIndex = 0; for (CfgNode *Pred : InEdges) { CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); SizeT Remaining = Desc.size(); // First pass computes Src and initializes NumPred. for (PhiDesc &Item : Desc) { Variable *Dest = Item.Dest; Operand *Src = Item.Phi->getOperandForTarget(Pred); Item.Src = Src; Item.Processed = false; Item.NumPred = 0; // Cherry-pick any trivial assignments, so that they don't contribute to // the running complexity of the topological sort. if (sameVarOrReg(Target, Dest, Src)) { Item.Processed = true; --Remaining; if (Dest != Src) // If Dest and Src are syntactically the same, don't bother adding // the assignment, because in all respects it would be redundant, and // if Dest/Src are on the stack, the target lowering may naively // decide to lower it using a temporary register. Split->appendInst(InstAssign::create(Func, Dest, Src)); } } // Second pass computes NumPred by comparing every pair of Phi instructions. for (PhiDesc &Item : Desc) { if (Item.Processed) continue; const Variable *Dest = Item.Dest; for (PhiDesc &Item2 : Desc) { if (Item2.Processed) continue; // There shouldn't be two different Phis with the same Dest variable or // register. assert((&Item == &Item2) || !sameVarOrReg(Target, Dest, Item2.Dest)); if (sameVarOrReg(Target, Dest, Item2.Src)) ++Item.NumPred; } } // Another pass to compute initial Weight values. for (PhiDesc &Item : Desc) { if (Item.Processed) continue; int32_t Weight = 0; if (Item.NumPred == 0) Weight += WeightNoPreds; if (Item.NumPred == 1) Weight += WeightOnePred; if (auto *Var = llvm::dyn_cast<Variable>(Item.Src)) if (Var->hasReg()) Weight += WeightSrcIsReg; if (!Item.Dest->hasReg()) Weight += WeightDestNotReg; Item.Weight = Weight; } // Repeatedly choose and process the best candidate in the topological sort, // until no candidates remain. This implementation is O(N^2) where N is the // number of Phi instructions, but with a small constant factor compared to // a likely implementation of O(N) topological sort. for (; Remaining; --Remaining) { int32_t BestWeight = -1; PhiDesc *BestItem = nullptr; // Find the best candidate. for (PhiDesc &Item : Desc) { if (Item.Processed) continue; const int32_t Weight = Item.Weight; if (Weight > BestWeight) { BestItem = &Item; BestWeight = Weight; } } assert(BestWeight >= 0); Variable *Dest = BestItem->Dest; Operand *Src = BestItem->Src; assert(!sameVarOrReg(Target, Dest, Src)); // Break a cycle by introducing a temporary. while (BestItem->NumPred > 0) { bool Found = false; // If the target instruction "A=B" is part of a cycle, find the "X=A" // assignment in the cycle because it will have to be rewritten as // "X=tmp". for (PhiDesc &Item : Desc) { if (Item.Processed) continue; Operand *OtherSrc = Item.Src; if (Item.NumPred && sameVarOrReg(Target, Dest, OtherSrc)) { SizeT VarNum = Func->getNumVariables(); Variable *Tmp = Func->makeVariable(OtherSrc->getType()); if (BuildDefs::dump()) Tmp->setName(Func, "__split_" + std::to_string(VarNum)); Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); Item.Src = Tmp; updatePreds(Desc, Target, llvm::cast<Variable>(OtherSrc)); Found = true; break; } } assert(Found); (void)Found; } // Now that a cycle (if any) has been broken, create the actual // assignment. Split->appendInst(InstAssign::create(Func, Dest, Src)); if (auto *Var = llvm::dyn_cast<Variable>(Src)) updatePreds(Desc, Target, Var); BestItem->Processed = true; } Split->appendInst(InstBr::create(Func, this)); Split->genCode(); Func->getVMetadata()->addNode(Split); // Validate to be safe. All items should be marked as processed, and have // no predecessors. if (BuildDefs::asserts()) { for (PhiDesc &Item : Desc) { (void)Item; assert(Item.Processed); assert(Item.NumPred == 0); } } } } // Does address mode optimization. Pass each instruction to the TargetLowering // object. If it returns a new instruction (representing the optimized address // mode), then insert the new instruction and delete the old. void CfgNode::doAddressOpt() { TargetLowering *Target = Func->getTarget(); LoweringContext &Context = Target->getContext(); Context.init(this); while (!Context.atEnd()) { Target->doAddressOpt(); } } void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { TargetLowering *Target = Func->getTarget(); LoweringContext &Context = Target->getContext(); Context.init(this); Context.setInsertPoint(Context.getCur()); // Do not insert nop in bundle locked instructions. bool PauseNopInsertion = false; while (!Context.atEnd()) { if (llvm::isa<InstBundleLock>(Context.getCur())) { PauseNopInsertion = true; } else if (llvm::isa<InstBundleUnlock>(Context.getCur())) { PauseNopInsertion = false; } if (!PauseNopInsertion) Target->doNopInsertion(RNG); // Ensure Cur=Next, so that the nops are inserted before the current // instruction rather than after. Context.advanceCur(); Context.advanceNext(); } } // Drives the target lowering. Passes the current instruction and the next // non-deleted instruction for target lowering. void CfgNode::genCode() { TargetLowering *Target = Func->getTarget(); LoweringContext &Context = Target->getContext(); // Lower the regular instructions. Context.init(this); Target->initNodeForLowering(this); while (!Context.atEnd()) { InstList::iterator Orig = Context.getCur(); if (llvm::isa<InstRet>(*Orig)) setHasReturn(); Target->lower(); // Ensure target lowering actually moved the cursor. assert(Context.getCur() != Orig); } Context.availabilityReset(); // Do preliminary lowering of the Phi instructions. Target->prelowerPhis(); } void CfgNode::livenessLightweight() { SizeT NumVars = Func->getNumVariables(); LivenessBV Live(NumVars); // Process regular instructions in reverse order. for (Inst &I : reverse_range(Insts)) { if (I.isDeleted()) continue; I.livenessLightweight(Func, Live); } for (Inst &I : Phis) { if (I.isDeleted()) continue; I.livenessLightweight(Func, Live); } } // Performs liveness analysis on the block. Returns true if the incoming // liveness changed from before, false if it stayed the same. (If it changes, // the node's predecessors need to be processed again.) bool CfgNode::liveness(Liveness *Liveness) { const SizeT NumVars = Liveness->getNumVarsInNode(this); const SizeT NumGlobalVars = Liveness->getNumGlobalVars(); LivenessBV &Live = Liveness->getScratchBV(); Live.clear(); LiveBeginEndMap *LiveBegin = nullptr; LiveBeginEndMap *LiveEnd = nullptr; // Mark the beginning and ending of each variable's live range with the // sentinel instruction number 0. if (Liveness->getMode() == Liveness_Intervals) { LiveBegin = Liveness->getLiveBegin(this); LiveEnd = Liveness->getLiveEnd(this); LiveBegin->clear(); LiveEnd->clear(); // Guess that the number of live ranges beginning is roughly the number of // instructions, and same for live ranges ending. LiveBegin->reserve(getInstCountEstimate()); LiveEnd->reserve(getInstCountEstimate()); } // Initialize Live to be the union of all successors' LiveIn. for (CfgNode *Succ : OutEdges) { const LivenessBV &LiveIn = Liveness->getLiveIn(Succ); assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars); Live |= LiveIn; // Mark corresponding argument of phis in successor as live. for (Inst &I : Succ->Phis) { if (I.isDeleted()) continue; auto *Phi = llvm::cast<InstPhi>(&I); Phi->livenessPhiOperand(Live, this, Liveness); } } assert(Live.empty() || Live.size() == NumGlobalVars); Liveness->getLiveOut(this) = Live; // Expand Live so it can hold locals in addition to globals. Live.resize(NumVars); // Process regular instructions in reverse order. for (Inst &I : reverse_range(Insts)) { if (I.isDeleted()) continue; I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); } // Process phis in forward order so that we can override the instruction // number to be that of the earliest phi instruction in the block. SizeT NumNonDeadPhis = 0; InstNumberT FirstPhiNumber = Inst::NumberSentinel; for (Inst &I : Phis) { if (I.isDeleted()) continue; if (FirstPhiNumber == Inst::NumberSentinel) FirstPhiNumber = I.getNumber(); if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) ++NumNonDeadPhis; } // When using the sparse representation, after traversing the instructions in // the block, the Live bitvector should only contain set bits for global // variables upon block entry. We validate this by testing the upper bits of // the Live bitvector. if (Live.find_next(NumGlobalVars) != -1) { if (BuildDefs::dump()) { // This is a fatal liveness consistency error. Print some diagnostics and // abort. Ostream &Str = Func->getContext()->getStrDump(); Func->resetCurrentNode(); Str << "Invalid Live ="; for (SizeT i = NumGlobalVars; i < Live.size(); ++i) { if (Live.test(i)) { Str << " "; Liveness->getVariable(i, this)->dump(Func); } } Str << "\n"; } llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); } // Now truncate Live to prevent LiveIn from growing. Live.resize(NumGlobalVars); bool Changed = false; LivenessBV &LiveIn = Liveness->getLiveIn(this); assert(LiveIn.empty() || LiveIn.size() == NumGlobalVars); // Add in current LiveIn Live |= LiveIn; // Check result, set LiveIn=Live SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); bool LiveInChanged = (Live != LiveIn); Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); if (LiveInChanged) LiveIn = Live; PrevNumNonDeadPhis = NumNonDeadPhis; return Changed; } // Validate the integrity of the live ranges in this block. If there are any // errors, it prints details and returns false. On success, it returns true. bool CfgNode::livenessValidateIntervals(Liveness *Liveness) const { if (!BuildDefs::asserts()) return true; // Verify there are no duplicates. auto ComparePair = [](const LiveBeginEndMapEntry &A, const LiveBeginEndMapEntry &B) { return A.first == B.first; }; LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); if (std::adjacent_find(MapBegin.begin(), MapBegin.end(), ComparePair) == MapBegin.end() && std::adjacent_find(MapEnd.begin(), MapEnd.end(), ComparePair) == MapEnd.end()) return true; // There is definitely a liveness error. All paths from here return false. if (!BuildDefs::dump()) return false; // Print all the errors. if (BuildDefs::dump()) { GlobalContext *Ctx = Func->getContext(); OstreamLocker L(Ctx); Ostream &Str = Ctx->getStrDump(); if (Func->isVerbose()) { Str << "Live range errors in the following block:\n"; dump(Func); } for (auto Start = MapBegin.begin(); (Start = std::adjacent_find(Start, MapBegin.end(), ComparePair)) != MapBegin.end(); ++Start) { auto Next = Start + 1; Str << "Duplicate LR begin, block " << getName() << ", instructions " << Start->second << " & " << Next->second << ", variable " << Liveness->getVariable(Start->first, this)->getName() << "\n"; } for (auto Start = MapEnd.begin(); (Start = std::adjacent_find(Start, MapEnd.end(), ComparePair)) != MapEnd.end(); ++Start) { auto Next = Start + 1; Str << "Duplicate LR end, block " << getName() << ", instructions " << Start->second << " & " << Next->second << ", variable " << Liveness->getVariable(Start->first, this)->getName() << "\n"; } } return false; } // Once basic liveness is complete, compute actual live ranges. It is assumed // that within a single basic block, a live range begins at most once and ends // at most once. This is certainly true for pure SSA form. It is also true once // phis are lowered, since each assignment to the phi-based temporary is in a // different basic block, and there is a single read that ends the live in the // basic block that contained the actual phi instruction. void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, InstNumberT LastInstNum) { TimerMarker T1(TimerStack::TT_liveRange, Func); const SizeT NumVars = Liveness->getNumVarsInNode(this); const LivenessBV &LiveIn = Liveness->getLiveIn(this); const LivenessBV &LiveOut = Liveness->getLiveOut(this); LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); std::sort(MapBegin.begin(), MapBegin.end()); std::sort(MapEnd.begin(), MapEnd.end()); if (!livenessValidateIntervals(Liveness)) { llvm::report_fatal_error("livenessAddIntervals: Liveness error"); return; } LivenessBV &LiveInAndOut = Liveness->getScratchBV(); LiveInAndOut = LiveIn; LiveInAndOut &= LiveOut; // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); auto IBE = MapBegin.end(), IEE = MapEnd.end(); while (IBB != IBE || IEB != IEE) { SizeT i1 = IBB == IBE ? NumVars : IBB->first; SizeT i2 = IEB == IEE ? NumVars : IEB->first; SizeT i = std::min(i1, i2); // i1 is the Variable number of the next MapBegin entry, and i2 is the // Variable number of the next MapEnd entry. If i1==i2, then the Variable's // live range begins and ends in this block. If i1<i2, then i1's live range // begins at instruction IBB->second and extends through the end of the // block. If i1>i2, then i2's live range begins at the first instruction of // the block and ends at IEB->second. In any case, we choose the lesser of // i1 and i2 and proceed accordingly. InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; Variable *Var = Liveness->getVariable(i, this); if (LB > LE) { Var->addLiveRange(FirstInstNum, LE, this); Var->addLiveRange(LB, LastInstNum + 1, this); // Assert that Var is a global variable by checking that its liveness // index is less than the number of globals. This ensures that the // LiveInAndOut[] access is valid. assert(i < Liveness->getNumGlobalVars()); LiveInAndOut[i] = false; } else { Var->addLiveRange(LB, LE, this); } if (i == i1) ++IBB; if (i == i2) ++IEB; } // Process the variables that are live across the entire block. for (int i = LiveInAndOut.find_first(); i != -1; i = LiveInAndOut.find_next(i)) { Variable *Var = Liveness->getVariable(i, this); if (Liveness->getRangeMask(Var->getIndex())) Var->addLiveRange(FirstInstNum, LastInstNum + 1, this); } } // If this node contains only deleted instructions, and ends in an // unconditional branch, contract the node by repointing all its in-edges to // its successor. void CfgNode::contractIfEmpty() { if (InEdges.empty()) return; Inst *Branch = nullptr; for (Inst &I : Insts) { if (I.isDeleted()) continue; if (I.isUnconditionalBranch()) Branch = &I; else if (!I.isRedundantAssign()) return; } // Make sure there is actually a successor to repoint in-edges to. if (OutEdges.empty()) return; assert(hasSingleOutEdge()); // Don't try to delete a self-loop. if (OutEdges[0] == this) return; // Make sure the node actually contains (ends with) an unconditional branch. if (Branch == nullptr) return; Branch->setDeleted(); CfgNode *Successor = OutEdges.front(); // Repoint all this node's in-edges to this node's successor, unless this // node's successor is actually itself (in which case the statement // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator // over this->InEdges). if (Successor != this) { for (CfgNode *Pred : InEdges) { for (CfgNode *&I : Pred->OutEdges) { if (I == this) { I = Successor; Successor->InEdges.push_back(Pred); } } for (Inst &I : Pred->getInsts()) { if (!I.isDeleted()) I.repointEdges(this, Successor); } } // Remove the in-edge to the successor to allow node reordering to make // better decisions. For example it's more helpful to place a node after a // reachable predecessor than an unreachable one (like the one we just // contracted). Successor->InEdges.erase( std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); } InEdges.clear(); } void CfgNode::doBranchOpt(const CfgNode *NextNode) { TargetLowering *Target = Func->getTarget(); // Find the first opportunity for branch optimization (which will be the last // instruction in the block) and stop. This is sufficient unless there is // some target lowering where we have the possibility of multiple // optimizations per block. Take care with switch lowering as there are // multiple unconditional branches and only the last can be deleted. for (Inst &I : reverse_range(Insts)) { if (!I.isDeleted()) { Target->doBranchOpt(&I, NextNode); return; } } } // ======================== Dump routines ======================== // namespace { // Helper functions for emit(). void emitRegisterUsage(Ostream &Str, const Cfg *Func, const CfgNode *Node, bool IsLiveIn, CfgVector<SizeT> &LiveRegCount) { if (!BuildDefs::dump()) return; Liveness *Liveness = Func->getLiveness(); const LivenessBV *Live; const auto StackReg = Func->getTarget()->getStackReg(); const auto FrameOrStackReg = Func->getTarget()->getFrameOrStackReg(); if (IsLiveIn) { Live = &Liveness->getLiveIn(Node); Str << "\t\t\t\t/* LiveIn="; } else { Live = &Liveness->getLiveOut(Node); Str << "\t\t\t\t/* LiveOut="; } if (!Live->empty()) { CfgVector<Variable *> LiveRegs; for (SizeT i = 0; i < Live->size(); ++i) { if (!(*Live)[i]) continue; Variable *Var = Liveness->getVariable(i, Node); if (!Var->hasReg()) continue; const auto RegNum = Var->getRegNum(); if (RegNum == StackReg || RegNum == FrameOrStackReg) continue; if (IsLiveIn) ++LiveRegCount[RegNum]; LiveRegs.push_back(Var); } // Sort the variables by regnum so they are always printed in a familiar // order. std::sort(LiveRegs.begin(), LiveRegs.end(), [](const Variable *V1, const Variable *V2) { return unsigned(V1->getRegNum()) < unsigned(V2->getRegNum()); }); bool First = true; for (Variable *Var : LiveRegs) { if (!First) Str << ","; First = false; Var->emit(Func); } } Str << " */\n"; } /// Returns true if some text was emitted - in which case the caller definitely /// needs to emit a newline character. bool emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, CfgVector<SizeT> &LiveRegCount) { bool Printed = false; if (!BuildDefs::dump()) return Printed; Variable *Dest = Instr->getDest(); // Normally we increment the live count for the dest register. But we // shouldn't if the instruction's IsDestRedefined flag is set, because this // means that the target lowering created this instruction as a non-SSA // assignment; i.e., a different, previous instruction started the dest // variable's live range. if (!Instr->isDestRedefined() && Dest && Dest->hasReg()) ++LiveRegCount[Dest->getRegNum()]; FOREACH_VAR_IN_INST(Var, *Instr) { bool ShouldReport = Instr->isLastUse(Var); if (ShouldReport && Var->hasReg()) { // Don't report end of live range until the live count reaches 0. SizeT NewCount = --LiveRegCount[Var->getRegNum()]; if (NewCount) ShouldReport = false; } if (ShouldReport) { if (Printed) Str << ","; else Str << " \t/* END="; Var->emit(Func); Printed = true; } } if (Printed) Str << " */"; return Printed; } void updateStats(Cfg *Func, const Inst *I) { if (!BuildDefs::dump()) return; // Update emitted instruction count, plus fill/spill count for Variable // operands without a physical register. if (uint32_t Count = I->getEmitInstCount()) { Func->getContext()->statsUpdateEmitted(Count); if (Variable *Dest = I->getDest()) { if (!Dest->hasReg()) Func->getContext()->statsUpdateFills(); } for (SizeT S = 0; S < I->getSrcSize(); ++S) { if (auto *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { if (!Src->hasReg()) Func->getContext()->statsUpdateSpills(); } } } } } // end of anonymous namespace void CfgNode::emit(Cfg *Func) const { if (!BuildDefs::dump()) return; Func->setCurrentNode(this); Ostream &Str = Func->getContext()->getStrEmit(); Liveness *Liveness = Func->getLiveness(); const bool DecorateAsm = Liveness && getFlags().getDecorateAsm(); Str << getAsmName() << ":\n"; // LiveRegCount keeps track of the number of currently live variables that // each register is assigned to. Normally that would be only 0 or 1, but the // register allocator's AllowOverlap inference allows it to be greater than 1 // for short periods. CfgVector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); if (DecorateAsm) { constexpr bool IsLiveIn = true; emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); if (getInEdges().size()) { Str << "\t\t\t\t/* preds="; bool First = true; for (CfgNode *I : getInEdges()) { if (!First) Str << ","; First = false; Str << "$" << I->getName(); } Str << " */\n"; } if (getLoopNestDepth()) { Str << "\t\t\t\t/* loop depth=" << getLoopNestDepth() << " */\n"; } } for (const Inst &I : Phis) { if (I.isDeleted()) continue; // Emitting a Phi instruction should cause an error. I.emit(Func); } for (const Inst &I : Insts) { if (I.isDeleted()) continue; if (I.isRedundantAssign()) { // Usually, redundant assignments end the live range of the src variable // and begin the live range of the dest variable, with no net effect on // the liveness of their register. However, if the register allocator // infers the AllowOverlap condition, then this may be a redundant // assignment that does not end the src variable's live range, in which // case the active variable count for that register needs to be bumped. // That normally would have happened as part of emitLiveRangesEnded(), // but that isn't called for redundant assignments. Variable *Dest = I.getDest(); if (DecorateAsm && Dest->hasReg()) { ++LiveRegCount[Dest->getRegNum()]; if (I.isLastUse(I.getSrc(0))) --LiveRegCount[llvm::cast<Variable>(I.getSrc(0))->getRegNum()]; } continue; } I.emit(Func); bool Printed = false; if (DecorateAsm) Printed = emitLiveRangesEnded(Str, Func, &I, LiveRegCount); if (Printed || llvm::isa<InstTarget>(&I)) Str << "\n"; updateStats(Func, &I); } if (DecorateAsm) { constexpr bool IsLiveIn = false; emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); } } // Helper class for emitIAS(). namespace { class BundleEmitHelper { BundleEmitHelper() = delete; BundleEmitHelper(const BundleEmitHelper &) = delete; BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; public: BundleEmitHelper(Assembler *Asm, const InstList &Insts) : Asm(Asm), End(Insts.end()), BundleLockStart(End), BundleSize(1 << Asm->getBundleAlignLog2Bytes()), BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} // Check whether we're currently within a bundle_lock region. bool isInBundleLockRegion() const { return BundleLockStart != End; } // Check whether the current bundle_lock region has the align_to_end option. bool isAlignToEnd() const { assert(isInBundleLockRegion()); return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == InstBundleLock::Opt_AlignToEnd; } bool isPadToEnd() const { assert(isInBundleLockRegion()); return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == InstBundleLock::Opt_PadToEnd; } // Check whether the entire bundle_lock region falls within the same bundle. bool isSameBundle() const { assert(isInBundleLockRegion()); return SizeSnapshotPre == SizeSnapshotPost || (SizeSnapshotPre & BundleMaskHi) == ((SizeSnapshotPost - 1) & BundleMaskHi); } // Get the bundle alignment of the first instruction of the bundle_lock // region. intptr_t getPreAlignment() const { assert(isInBundleLockRegion()); return SizeSnapshotPre & BundleMaskLo; } // Get the bundle alignment of the first instruction past the bundle_lock // region. intptr_t getPostAlignment() const { assert(isInBundleLockRegion()); return SizeSnapshotPost & BundleMaskLo; } // Get the iterator pointing to the bundle_lock instruction, e.g. to roll // back the instruction iteration to that point. InstList::const_iterator getBundleLockStart() const { assert(isInBundleLockRegion()); return BundleLockStart; } // Set up bookkeeping when the bundle_lock instruction is first processed. void enterBundleLock(InstList::const_iterator I) { assert(!isInBundleLockRegion()); BundleLockStart = I; SizeSnapshotPre = Asm->getBufferSize(); Asm->setPreliminary(true); assert(isInBundleLockRegion()); } // Update bookkeeping when the bundle_unlock instruction is processed. void enterBundleUnlock() { assert(isInBundleLockRegion()); SizeSnapshotPost = Asm->getBufferSize(); } // Update bookkeeping when we are completely finished with the bundle_lock // region. void leaveBundleLockRegion() { BundleLockStart = End; } // Check whether the instruction sequence fits within the current bundle, and // if not, add nop padding to the end of the current bundle. void padToNextBundle() { assert(isInBundleLockRegion()); if (!isSameBundle()) { intptr_t PadToNextBundle = BundleSize - getPreAlignment(); Asm->padWithNop(PadToNextBundle); SizeSnapshotPre += PadToNextBundle; SizeSnapshotPost += PadToNextBundle; assert((Asm->getBufferSize() & BundleMaskLo) == 0); assert(Asm->getBufferSize() == SizeSnapshotPre); } } // If align_to_end is specified, add padding such that the instruction // sequences ends precisely at a bundle boundary. void padForAlignToEnd() { assert(isInBundleLockRegion()); if (isAlignToEnd()) { if (intptr_t Offset = getPostAlignment()) { Asm->padWithNop(BundleSize - Offset); SizeSnapshotPre = Asm->getBufferSize(); } } } // If pad_to_end is specified, add padding such that the first instruction // after the instruction sequence starts at a bundle boundary. void padForPadToEnd() { assert(isInBundleLockRegion()); if (isPadToEnd()) { if (intptr_t Offset = getPostAlignment()) { Asm->padWithNop(BundleSize - Offset); SizeSnapshotPre = Asm->getBufferSize(); } } } // Update bookkeeping when rolling back for the second pass. void rollback() { assert(isInBundleLockRegion()); Asm->setBufferSize(SizeSnapshotPre); Asm->setPreliminary(false); } private: Assembler *const Asm; // End is a sentinel value such that BundleLockStart==End implies that we are // not in a bundle_lock region. const InstList::const_iterator End; InstList::const_iterator BundleLockStart; const intptr_t BundleSize; // Masking with BundleMaskLo identifies an address's bundle offset. const intptr_t BundleMaskLo; // Masking with BundleMaskHi identifies an address's bundle. const intptr_t BundleMaskHi; intptr_t SizeSnapshotPre = 0; intptr_t SizeSnapshotPost = 0; }; } // end of anonymous namespace void CfgNode::emitIAS(Cfg *Func) const { Func->setCurrentNode(this); Assembler *Asm = Func->getAssembler<>(); // TODO(stichnot): When sandboxing, defer binding the node label until just // before the first instruction is emitted, to reduce the chance that a // padding nop is a branch target. Asm->bindCfgNodeLabel(this); for (const Inst &I : Phis) { if (I.isDeleted()) continue; // Emitting a Phi instruction should cause an error. I.emitIAS(Func); } // Do the simple emission if not sandboxed. if (!getFlags().getUseSandboxing()) { for (const Inst &I : Insts) { if (!I.isDeleted() && !I.isRedundantAssign()) { I.emitIAS(Func); updateStats(Func, &I); } } return; } // The remainder of the function handles emission with sandboxing. There are // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock // instructions. All other instructions are treated as an implicit // one-instruction bundle_lock region. Emission is done twice for each // bundle_lock region. The first pass is a preliminary pass, after which we // can figure out what nop padding is needed, then roll back, and make the // final pass. // // Ideally, the first pass would be speculative and the second pass would // only be done if nop padding were needed, but the structure of the // integrated assembler makes it hard to roll back the state of label // bindings, label links, and relocation fixups. Instead, the first pass just // disables all mutation of that state. BundleEmitHelper Helper(Asm, Insts); InstList::const_iterator End = Insts.end(); // Retrying indicates that we had to roll back to the bundle_lock instruction // to apply padding before the bundle_lock sequence. bool Retrying = false; for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { if (I->isDeleted() || I->isRedundantAssign()) continue; if (llvm::isa<InstBundleLock>(I)) { // Set up the initial bundle_lock state. This should not happen while // retrying, because the retry rolls back to the instruction following // the bundle_lock instruction. assert(!Retrying); Helper.enterBundleLock(I); continue; } if (llvm::isa<InstBundleUnlock>(I)) { Helper.enterBundleUnlock(); if (Retrying) { // Make sure all instructions are in the same bundle. assert(Helper.isSameBundle()); // If align_to_end is specified, make sure the next instruction begins // the bundle. assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); Helper.padForPadToEnd(); Helper.leaveBundleLockRegion(); Retrying = false; } else { // This is the first pass, so roll back for the retry pass. Helper.rollback(); // Pad to the next bundle if the instruction sequence crossed a bundle // boundary. Helper.padToNextBundle(); // Insert additional padding to make AlignToEnd work. Helper.padForAlignToEnd(); // Prepare for the retry pass after padding is done. Retrying = true; I = Helper.getBundleLockStart(); } continue; } // I points to a non bundle_lock/bundle_unlock instruction. if (Helper.isInBundleLockRegion()) { I->emitIAS(Func); // Only update stats during the final pass. if (Retrying) updateStats(Func, iteratorToInst(I)); } else { // Treat it as though there were an implicit bundle_lock and // bundle_unlock wrapping the instruction. Helper.enterBundleLock(I); I->emitIAS(Func); Helper.enterBundleUnlock(); Helper.rollback(); Helper.padToNextBundle(); I->emitIAS(Func); updateStats(Func, iteratorToInst(I)); Helper.leaveBundleLockRegion(); } } // Don't allow bundle locking across basic blocks, to keep the backtracking // mechanism simple. assert(!Helper.isInBundleLockRegion()); assert(!Retrying); } void CfgNode::dump(Cfg *Func) const { if (!BuildDefs::dump()) return; Func->setCurrentNode(this); Ostream &Str = Func->getContext()->getStrDump(); Liveness *Liveness = Func->getLiveness(); if (Func->isVerbose(IceV_Instructions) || Func->isVerbose(IceV_Loop)) Str << getName() << ":\n"; // Dump the loop nest depth if (Func->isVerbose(IceV_Loop)) Str << " // LoopNestDepth = " << getLoopNestDepth() << "\n"; // Dump list of predecessor nodes. if (Func->isVerbose(IceV_Preds) && !InEdges.empty()) { Str << " // preds = "; bool First = true; for (CfgNode *I : InEdges) { if (!First) Str << ", "; First = false; Str << "%" << I->getName(); } Str << "\n"; } // Dump the live-in variables. if (Func->isVerbose(IceV_Liveness)) { if (Liveness != nullptr && !Liveness->getLiveIn(this).empty()) { const LivenessBV &LiveIn = Liveness->getLiveIn(this); Str << " // LiveIn:"; for (SizeT i = 0; i < LiveIn.size(); ++i) { if (LiveIn[i]) { Variable *Var = Liveness->getVariable(i, this); Str << " %" << Var->getName(); if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) { Str << ":" << Func->getTarget()->getRegName(Var->getRegNum(), Var->getType()); } } } Str << "\n"; } } // Dump each instruction. if (Func->isVerbose(IceV_Instructions)) { for (const Inst &I : Phis) I.dumpDecorated(Func); for (const Inst &I : Insts) I.dumpDecorated(Func); } // Dump the live-out variables. if (Func->isVerbose(IceV_Liveness)) { if (Liveness != nullptr && !Liveness->getLiveOut(this).empty()) { const LivenessBV &LiveOut = Liveness->getLiveOut(this); Str << " // LiveOut:"; for (SizeT i = 0; i < LiveOut.size(); ++i) { if (LiveOut[i]) { Variable *Var = Liveness->getVariable(i, this); Str << " %" << Var->getName(); if (Func->isVerbose(IceV_RegOrigins) && Var->hasReg()) { Str << ":" << Func->getTarget()->getRegName(Var->getRegNum(), Var->getType()); } } } Str << "\n"; } } // Dump list of successor nodes. if (Func->isVerbose(IceV_Succs)) { Str << " // succs = "; bool First = true; for (CfgNode *I : OutEdges) { if (!First) Str << ", "; First = false; Str << "%" << I->getName(); } Str << "\n"; } } void CfgNode::profileExecutionCount(VariableDeclaration *Var) { GlobalContext *Ctx = Func->getContext(); GlobalString RMW_I64 = Ctx->getGlobalString("llvm.nacl.atomic.rmw.i64"); bool BadIntrinsic = false; const Intrinsics::FullIntrinsicInfo *Info = Ctx->getIntrinsicsInfo().find(RMW_I64, BadIntrinsic); assert(!BadIntrinsic); assert(Info != nullptr); Operand *RMWI64Name = Ctx->getConstantExternSym(RMW_I64); constexpr RelocOffsetT Offset = 0; Constant *Counter = Ctx->getConstantSym(Offset, Var->getName()); Constant *AtomicRMWOp = Ctx->getConstantInt32(Intrinsics::AtomicAdd); Constant *One = Ctx->getConstantInt64(1); Constant *OrderAcquireRelease = Ctx->getConstantInt32(Intrinsics::MemoryOrderAcquireRelease); auto *Instr = InstIntrinsicCall::create( Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); Instr->addArg(AtomicRMWOp); Instr->addArg(Counter); Instr->addArg(One); Instr->addArg(OrderAcquireRelease); Insts.push_front(Instr); } void CfgNode::removeInEdge(CfgNode *In) { InEdges.erase(std::find(InEdges.begin(), InEdges.end(), In)); } CfgNode *CfgNode::shortCircuit() { auto *Func = getCfg(); auto *Last = &getInsts().back(); Variable *Condition = nullptr; InstBr *Br = nullptr; if ((Br = llvm::dyn_cast<InstBr>(Last))) { if (!Br->isUnconditional()) { Condition = llvm::dyn_cast<Variable>(Br->getCondition()); } } if (Condition == nullptr) return nullptr; auto *JumpOnTrue = Br->getTargetTrue(); auto *JumpOnFalse = Br->getTargetFalse(); bool FoundOr = false; bool FoundAnd = false; InstArithmetic *TopLevelBoolOp = nullptr; for (auto &Inst : reverse_range(getInsts())) { if (Inst.isDeleted()) continue; if (Inst.getDest() == Condition) { if (auto *Arith = llvm::dyn_cast<InstArithmetic>(&Inst)) { FoundOr = (Arith->getOp() == InstArithmetic::OpKind::Or); FoundAnd = (Arith->getOp() == InstArithmetic::OpKind::And); if (FoundOr || FoundAnd) { TopLevelBoolOp = Arith; break; } } } } if (!TopLevelBoolOp) return nullptr; auto IsOperand = [](Inst *Instr, Operand *Opr) -> bool { for (SizeT i = 0; i < Instr->getSrcSize(); ++i) { if (Instr->getSrc(i) == Opr) return true; } return false; }; Inst *FirstOperandDef = nullptr; for (auto &Inst : getInsts()) { if (IsOperand(TopLevelBoolOp, Inst.getDest())) { FirstOperandDef = &Inst; break; } } if (FirstOperandDef == nullptr) { return nullptr; } // Check for side effects auto It = Ice::instToIterator(FirstOperandDef); while (It != getInsts().end()) { if (It->isDeleted()) { ++It; continue; } if (llvm::isa<InstBr>(It) || llvm::isa<InstRet>(It)) { break; } auto *Dest = It->getDest(); if (It->getDest() == nullptr || It->hasSideEffects() || !Func->getVMetadata()->isSingleBlock(Dest)) { // Relying on short cicuit eval here. // getVMetadata()->isSingleBlock(Dest) // will segfault if It->getDest() == nullptr return nullptr; } It++; } auto *NewNode = Func->makeNode(); NewNode->setLoopNestDepth(getLoopNestDepth()); It = Ice::instToIterator(FirstOperandDef); It++; // Have to split after the def NewNode->getInsts().splice(NewNode->getInsts().begin(), getInsts(), It, getInsts().end()); if (BuildDefs::dump()) { NewNode->setName(getName().append("_2")); setName(getName().append("_1")); } // Point edges properly NewNode->addInEdge(this); for (auto *Out : getOutEdges()) { NewNode->addOutEdge(Out); Out->addInEdge(NewNode); } removeAllOutEdges(); addOutEdge(NewNode); // Manage Phi instructions of successors for (auto *Succ : NewNode->getOutEdges()) { for (auto &Inst : Succ->getPhis()) { auto *Phi = llvm::cast<InstPhi>(&Inst); for (SizeT i = 0; i < Phi->getSrcSize(); ++i) { if (Phi->getLabel(i) == this) { Phi->addArgument(Phi->getSrc(i), NewNode); } } } } // Create new Br instruction InstBr *NewInst = nullptr; if (FoundOr) { addOutEdge(JumpOnTrue); JumpOnFalse->removeInEdge(this); NewInst = InstBr::create(Func, FirstOperandDef->getDest(), JumpOnTrue, NewNode); } else if (FoundAnd) { addOutEdge(JumpOnFalse); JumpOnTrue->removeInEdge(this); NewInst = InstBr::create(Func, FirstOperandDef->getDest(), NewNode, JumpOnFalse); } else { return nullptr; } assert(NewInst != nullptr); appendInst(NewInst); Operand *UnusedOperand = nullptr; assert(TopLevelBoolOp->getSrcSize() == 2); if (TopLevelBoolOp->getSrc(0) == FirstOperandDef->getDest()) UnusedOperand = TopLevelBoolOp->getSrc(1); else if (TopLevelBoolOp->getSrc(1) == FirstOperandDef->getDest()) UnusedOperand = TopLevelBoolOp->getSrc(0); assert(UnusedOperand); Br->replaceSource(0, UnusedOperand); // Index 0 has the condition of the Br TopLevelBoolOp->setDeleted(); return NewNode; } } // end of namespace Ice