//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements a shrink wrapping variant of prolog/epilog insertion: // - Spills and restores of callee-saved registers (CSRs) are placed in the // machine CFG to tightly surround their uses so that execution paths that // do not use CSRs do not pay the spill/restore penalty. // // - Avoiding placment of spills/restores in loops: if a CSR is used inside a // loop the spills are placed in the loop preheader, and restores are // placed in the loop exit nodes (the successors of loop _exiting_ nodes). // // - Covering paths without CSR uses: // If a region in a CFG uses CSRs and has multiple entry and/or exit points, // the use info for the CSRs inside the region is propagated outward in the // CFG to ensure validity of the spill/restore placements. This decreases // the effectiveness of shrink wrapping but does not require edge splitting // in the machine CFG. // // This shrink wrapping implementation uses an iterative analysis to determine // which basic blocks require spills and restores for CSRs. // // This pass uses MachineDominators and MachineLoopInfo. Loop information // is used to prevent placement of callee-saved register spills/restores // in the bodies of loops. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "shrink-wrap" #include "PrologEpilogInserter.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include <sstream> using namespace llvm; STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); // Shrink Wrapping: static cl::opt<bool> ShrinkWrapping("shrink-wrap", cl::desc("Shrink wrap callee-saved register spills/restores")); // Shrink wrap only the specified function, a debugging aid. static cl::opt<std::string> ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, cl::desc("Shrink wrap the specified function"), cl::value_desc("funcname"), cl::init("")); // Debugging level for shrink wrapping. enum ShrinkWrapDebugLevel { None, BasicInfo, Iterations, Details }; static cl::opt<enum ShrinkWrapDebugLevel> ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, cl::desc("Print shrink wrapping debugging information"), cl::values( clEnumVal(None , "disable debug output"), clEnumVal(BasicInfo , "print basic DF sets"), clEnumVal(Iterations, "print SR sets for each iteration"), clEnumVal(Details , "print all DF sets"), clEnumValEnd)); void PEI::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); if (ShrinkWrapping || ShrinkWrapFunc != "") { AU.addRequired<MachineLoopInfo>(); AU.addRequired<MachineDominatorTree>(); } AU.addPreserved<MachineLoopInfo>(); AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } //===----------------------------------------------------------------------===// // ShrinkWrapping implementation //===----------------------------------------------------------------------===// // Convienences for dealing with machine loops. MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { assert(LP && "Machine loop is NULL."); MachineBasicBlock* PHDR = LP->getLoopPreheader(); MachineLoop* PLP = LP->getParentLoop(); while (PLP) { PHDR = PLP->getLoopPreheader(); PLP = PLP->getParentLoop(); } return PHDR; } MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { if (LP == 0) return 0; MachineLoop* PLP = LP->getParentLoop(); while (PLP) { LP = PLP; PLP = PLP->getParentLoop(); } return LP; } bool PEI::isReturnBlock(MachineBasicBlock* MBB) { return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn()); } // Initialize shrink wrapping DFA sets, called before iterations. void PEI::clearAnticAvailSets() { AnticIn.clear(); AnticOut.clear(); AvailIn.clear(); AvailOut.clear(); } // Clear all sets constructed by shrink wrapping. void PEI::clearAllSets() { ReturnBlocks.clear(); clearAnticAvailSets(); UsedCSRegs.clear(); CSRUsed.clear(); TLLoops.clear(); CSRSave.clear(); CSRRestore.clear(); } // Initialize all shrink wrapping data. void PEI::initShrinkWrappingInfo() { clearAllSets(); EntryBlock = 0; #ifndef NDEBUG HasFastExitPath = false; #endif ShrinkWrapThisFunction = ShrinkWrapping; // DEBUG: enable or disable shrink wrapping for the current function // via --shrink-wrap-func=<funcname>. #ifndef NDEBUG if (ShrinkWrapFunc != "") { std::string MFName = MF->getFunction()->getNameStr(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif } /// placeCSRSpillsAndRestores - determine which MBBs of the function /// need save, restore code for callee-saved registers by doing a DF analysis /// similar to the one used in code motion (GVNPRE). This produces maps of MBBs /// to sets of registers (CSRs) for saves and restores. MachineLoopInfo /// is used to ensure that CSR save/restore code is not placed inside loops. /// This function computes the maps of MBBs -> CSRs to spill and restore /// in CSRSave, CSRRestore. /// /// If shrink wrapping is not being performed, place all spills in /// the entry block, all restores in return blocks. In this case, /// CSRSave has a single mapping, CSRRestore has mappings for each /// return block. /// void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { DEBUG(MF = &Fn); initShrinkWrappingInfo(); DEBUG(if (ShrinkWrapThisFunction) { dbgs() << "Place CSR spills/restores for " << MF->getFunction()->getName() << "\n"; }); if (calculateSets(Fn)) placeSpillsAndRestores(Fn); } /// calcAnticInOut - calculate the anticipated in/out reg sets /// for the given MBB by looking forward in the MCFG at MBB's /// successors. /// bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { bool changed = false; // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) SmallVector<MachineBasicBlock*, 4> successors; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock* SUCC = *SI; if (SUCC != MBB) successors.push_back(SUCC); } unsigned i = 0, e = successors.size(); if (i != e) { CSRegSet prevAnticOut = AnticOut[MBB]; MachineBasicBlock* SUCC = successors[i]; AnticOut[MBB] = AnticIn[SUCC]; for (++i; i != e; ++i) { SUCC = successors[i]; AnticOut[MBB] &= AnticIn[SUCC]; } if (prevAnticOut != AnticOut[MBB]) changed = true; } // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); CSRegSet prevAnticIn = AnticIn[MBB]; AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; if (prevAnticIn != AnticIn[MBB]) changed = true; return changed; } /// calcAvailInOut - calculate the available in/out reg sets /// for the given MBB by looking backward in the MCFG at MBB's /// predecessors. /// bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { bool changed = false; // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) SmallVector<MachineBasicBlock*, 4> predecessors; for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock* PRED = *PI; if (PRED != MBB) predecessors.push_back(PRED); } unsigned i = 0, e = predecessors.size(); if (i != e) { CSRegSet prevAvailIn = AvailIn[MBB]; MachineBasicBlock* PRED = predecessors[i]; AvailIn[MBB] = AvailOut[PRED]; for (++i; i != e; ++i) { PRED = predecessors[i]; AvailIn[MBB] &= AvailOut[PRED]; } if (prevAvailIn != AvailIn[MBB]) changed = true; } // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); CSRegSet prevAvailOut = AvailOut[MBB]; AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; if (prevAvailOut != AvailOut[MBB]) changed = true; return changed; } /// calculateAnticAvail - build the sets anticipated and available /// registers in the MCFG of the current function iteratively, /// doing a combined forward and backward analysis. /// void PEI::calculateAnticAvail(MachineFunction &Fn) { // Initialize data flow sets. clearAnticAvailSets(); // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. bool changed = true; unsigned iterations = 0; while (changed) { changed = false; ++iterations; for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; // Calculate anticipated in, out regs at MBB from // anticipated at successors of MBB. changed |= calcAnticInOut(MBB); // Calculate available in, out regs at MBB from // available at predecessors of MBB. changed |= calcAvailInOut(MBB); } } DEBUG({ if (ShrinkWrapDebugging >= Details) { dbgs() << "-----------------------------------------------------------\n" << " Antic/Avail Sets:\n" << "-----------------------------------------------------------\n" << "iterations = " << iterations << "\n" << "-----------------------------------------------------------\n" << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" << "-----------------------------------------------------------\n"; for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; dumpSets(MBB); } dbgs() << "-----------------------------------------------------------\n"; } }); } /// propagateUsesAroundLoop - copy used register info from MBB to all blocks /// of the loop given by LP and its parent loops. This prevents spills/restores /// from being placed in the bodies of loops. /// void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { if (! MBB || !LP) return; std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks(); for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { MachineBasicBlock* LBB = loopBlocks[i]; if (LBB == MBB) continue; if (CSRUsed[LBB].contains(CSRUsed[MBB])) continue; CSRUsed[LBB] |= CSRUsed[MBB]; } } /// calculateSets - collect the CSRs used in this function, compute /// the DF sets that describe the initial minimal regions in the /// Machine CFG around which CSR spills and restores must be placed. /// /// Additionally, this function decides if shrink wrapping should /// be disabled for the current function, checking the following: /// 1. the current function has more than 500 MBBs: heuristic limit /// on function size to reduce compile time impact of the current /// iterative algorithm. /// 2. all CSRs are used in the entry block. /// 3. all CSRs are used in all immediate successors of the entry block. /// 4. all CSRs are used in a subset of blocks, each of which dominates /// all return blocks. These blocks, taken as a subgraph of the MCFG, /// are equivalent to the entry block since all execution paths pass /// through them. /// bool PEI::calculateSets(MachineFunction &Fn) { // Sets used to compute spill, restore placement sets. const std::vector<CalleeSavedInfo> CSI = Fn.getFrameInfo()->getCalleeSavedInfo(); // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": uses no callee-saved registers\n"); return false; } // Save refs to entry and return blocks. EntryBlock = Fn.begin(); for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); MBB != E; ++MBB) if (isReturnBlock(MBB)) ReturnBlocks.push_back(MBB); // Determine if this function has fast exit paths. DEBUG(if (ShrinkWrapThisFunction) findFastExitPath()); // Limit shrink wrapping via the current iterative bit vector // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } // Return now if not shrink wrapping. if (! ShrinkWrapThisFunction) return false; // Collect set of used CSRs. for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { UsedCSRegs.set(inx); } // Walk instructions in all MBBs, create CSRUsed[] sets, choose // whether or not to shrink wrap this function. MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); bool allCSRUsesInEntryBlock = true; for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { unsigned Reg = CSI[inx].getReg(); // If instruction I reads or modifies Reg, add it to UsedCSRegs, // CSRUsed map for the current block. for (unsigned opInx = 0, opEnd = I->getNumOperands(); opInx != opEnd; ++opInx) { const MachineOperand &MO = I->getOperand(opInx); if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) continue; unsigned MOReg = MO.getReg(); if (!MOReg) continue; if (MOReg == Reg || (TargetRegisterInfo::isPhysicalRegister(MOReg) && TargetRegisterInfo::isPhysicalRegister(Reg) && TRI->isSubRegister(Reg, MOReg))) { // CSR Reg is defined/used in block MBB. CSRUsed[MBB].set(inx); // Check for uses in EntryBlock. if (MBB != EntryBlock) allCSRUsesInEntryBlock = false; } } } } if (CSRUsed[MBB].empty()) continue; // Propagate CSRUsed[MBB] in loops if (MachineLoop* LP = LI.getLoopFor(MBB)) { // Add top level loop to work list. MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); MachineLoop* PLP = getTopLevelLoopParent(LP); if (! HDR) { HDR = PLP->getHeader(); assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); HDR = *PI; } TLLoops[HDR] = PLP; // Push uses from inside loop to its parent loops, // or to all other MBBs in its loop. if (LP->getLoopDepth() > 1) { for (MachineLoop* PLP = LP->getParentLoop(); PLP; PLP = PLP->getParentLoop()) { propagateUsesAroundLoop(MBB, PLP); } } else { propagateUsesAroundLoop(MBB, LP); } } } if (allCSRUsesInEntryBlock) { DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { bool allCSRsUsedInEntryFanout = true; for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), SE = EntryBlock->succ_end(); SI != SE; ++SI) { MachineBasicBlock* SUCC = *SI; if (CSRUsed[SUCC] != UsedCSRegs) allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } } if (ShrinkWrapThisFunction) { // Check if MBB uses CSRs and dominates all exit nodes. // Such nodes are equiv. to the entry node w.r.t. // CSR uses: every path through the function must // pass through this node. If each CSR is used at least // once by these nodes, shrink wrapping is disabled. CSRegSet CSRUsedInChokePoints; for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) continue; bool dominatesExitNodes = true; for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) if (! DT.dominates(MBB, ReturnBlocks[ri])) { dominatesExitNodes = false; break; } if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName() << ": all CSRs used in choke point(s) at " << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; break; } } } } // Return now if we have decided not to apply shrink wrapping // to the current function. if (! ShrinkWrapThisFunction) return false; DEBUG({ dbgs() << "ENABLED: " << Fn.getFunction()->getName(); if (HasFastExitPath) dbgs() << " (fast exit path)"; dbgs() << "\n"; if (ShrinkWrapDebugging >= BasicInfo) { dbgs() << "------------------------------" << "-----------------------------\n"; dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; if (ShrinkWrapDebugging >= Details) { dbgs() << "------------------------------" << "-----------------------------\n"; dumpAllUsed(); } } }); // Build initial DF sets to determine minimal regions in the // Machine CFG around which CSRs must be spilled and restored. calculateAnticAvail(Fn); return true; } /// addUsesForMEMERegion - add uses of CSRs spilled or restored in /// multi-entry, multi-exit (MEME) regions so spill and restore /// placement will not break code that enters or leaves a /// shrink-wrapped region by inducing spills with no matching /// restores or restores with no matching spills. A MEME region /// is a subgraph of the MCFG with multiple entry edges, multiple /// exit edges, or both. This code propagates use information /// through the MCFG until all paths requiring spills and restores /// _outside_ the computed minimal placement regions have been covered. /// bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, SmallVector<MachineBasicBlock*, 4>& blks) { if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { bool processThisBlock = false; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock* SUCC = *SI; if (SUCC->pred_size() > 1) { processThisBlock = true; break; } } if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock* PRED = *PI; if (PRED->succ_size() > 1) { processThisBlock = true; break; } } } if (! processThisBlock) return false; } CSRegSet prop; if (!CSRSave[MBB].empty()) prop = CSRSave[MBB]; else if (!CSRRestore[MBB].empty()) prop = CSRRestore[MBB]; else prop = CSRUsed[MBB]; if (prop.empty()) return false; // Propagate selected bits to successors, predecessors of MBB. bool addedUses = false; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock* SUCC = *SI; // Self-loop if (SUCC == MBB) continue; if (! CSRUsed[SUCC].contains(prop)) { CSRUsed[SUCC] |= prop; addedUses = true; blks.push_back(SUCC); DEBUG(if (ShrinkWrapDebugging >= Iterations) dbgs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "successor " << getBasicBlockName(SUCC) << "\n"); } } for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock* PRED = *PI; // Self-loop if (PRED == MBB) continue; if (! CSRUsed[PRED].contains(prop)) { CSRUsed[PRED] |= prop; addedUses = true; blks.push_back(PRED); DEBUG(if (ShrinkWrapDebugging >= Iterations) dbgs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "predecessor " << getBasicBlockName(PRED) << "\n"); } } return addedUses; } /// addUsesForTopLevelLoops - add uses for CSRs used inside top /// level loops to the exit blocks of those loops. /// bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) { bool addedUses = false; // Place restores for top level loops where needed. for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { MachineBasicBlock* MBB = I->first; MachineLoop* LP = I->second; MachineBasicBlock* HDR = LP->getHeader(); SmallVector<MachineBasicBlock*, 4> exitBlocks; CSRegSet loopSpills; loopSpills = CSRSave[MBB]; if (CSRSave[MBB].empty()) { loopSpills = CSRUsed[HDR]; assert(!loopSpills.empty() && "No CSRs used in loop?"); } else if (CSRRestore[MBB].contains(CSRSave[MBB])) continue; LP->getExitBlocks(exitBlocks); assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { MachineBasicBlock* EXB = exitBlocks[i]; if (! CSRUsed[EXB].contains(loopSpills)) { CSRUsed[EXB] |= loopSpills; addedUses = true; DEBUG(if (ShrinkWrapDebugging >= Iterations) dbgs() << "LOOP " << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(loopSpills) << ")->" << getBasicBlockName(EXB) << "\n"); if (EXB->succ_size() > 1 || EXB->pred_size() > 1) blks.push_back(EXB); } } } return addedUses; } /// calcSpillPlacements - determine which CSRs should be spilled /// in MBB using AnticIn sets of MBB's predecessors, keeping track /// of changes to spilled reg sets. Add MBB to the set of blocks /// that need to be processed for propagating use info to cover /// multi-entry/exit regions. /// bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, SmallVector<MachineBasicBlock*, 4> &blks, CSRegBlockMap &prevSpills) { bool placedSpills = false; // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) CSRegSet anticInPreds; SmallVector<MachineBasicBlock*, 4> predecessors; for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock* PRED = *PI; if (PRED != MBB) predecessors.push_back(PRED); } unsigned i = 0, e = predecessors.size(); if (i != e) { MachineBasicBlock* PRED = predecessors[i]; anticInPreds = UsedCSRegs - AnticIn[PRED]; for (++i; i != e; ++i) { PRED = predecessors[i]; anticInPreds &= (UsedCSRegs - AnticIn[PRED]); } } else { // Handle uses in entry blocks (which have no predecessors). // This is necessary because the DFA formulation assumes the // entry and (multiple) exit nodes cannot have CSR uses, which // is not the case in the real world. anticInPreds = UsedCSRegs; } // Compute spills required at MBB: CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; if (! CSRSave[MBB].empty()) { if (MBB == EntryBlock) { for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; } else { // Reset all regs spilled in MBB that are also spilled in EntryBlock. if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; } } } placedSpills = (CSRSave[MBB] != prevSpills[MBB]); prevSpills[MBB] = CSRSave[MBB]; // Remember this block for adding restores to successor // blocks for multi-entry region. if (placedSpills) blks.push_back(MBB); DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << "\n"); return placedSpills; } /// calcRestorePlacements - determine which CSRs should be restored /// in MBB using AvailOut sets of MBB's succcessors, keeping track /// of changes to restored reg sets. Add MBB to the set of blocks /// that need to be processed for propagating use info to cover /// multi-entry/exit regions. /// bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, SmallVector<MachineBasicBlock*, 4> &blks, CSRegBlockMap &prevRestores) { bool placedRestores = false; // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) CSRegSet availOutSucc; SmallVector<MachineBasicBlock*, 4> successors; for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { MachineBasicBlock* SUCC = *SI; if (SUCC != MBB) successors.push_back(SUCC); } unsigned i = 0, e = successors.size(); if (i != e) { MachineBasicBlock* SUCC = successors[i]; availOutSucc = UsedCSRegs - AvailOut[SUCC]; for (++i; i != e; ++i) { SUCC = successors[i]; availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); } } else { if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { // Handle uses in return blocks (which have no successors). // This is necessary because the DFA formulation assumes the // entry and (multiple) exit nodes cannot have CSR uses, which // is not the case in the real world. availOutSucc = UsedCSRegs; } } // Compute restores required at MBB: CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; // Postprocess restore placements at MBB. // Remove the CSRs that are restored in the return blocks. // Lest this be confusing, note that: // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { if (! CSRSave[EntryBlock].empty()) CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; } placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); prevRestores[MBB] = CSRRestore[MBB]; // Remember this block for adding saves to predecessor // blocks for multi-entry region. if (placedRestores) blks.push_back(MBB); DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); return placedRestores; } /// placeSpillsAndRestores - place spills and restores of CSRs /// used in MBBs in minimal regions that contain the uses. /// void PEI::placeSpillsAndRestores(MachineFunction &Fn) { CSRegBlockMap prevCSRSave; CSRegBlockMap prevCSRRestore; SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks; bool changed = true; unsigned iterations = 0; // Iterate computation of spill and restore placements in the MCFG until: // 1. CSR use info has been fully propagated around the MCFG, and // 2. computation of CSRSave[], CSRRestore[] reach fixed points. while (changed) { changed = false; ++iterations; DEBUG(if (ShrinkWrapDebugging >= Iterations) dbgs() << "iter " << iterations << " --------------------------------------------------\n"); // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, // which determines the placements of spills and restores. // Keep track of changes to spills, restores in each iteration to // minimize the total iterations. bool SRChanged = false; for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; // Place spills for CSRs in MBB. SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); // Place restores for CSRs in MBB. SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); } // Add uses of CSRs used inside loops where needed. changed |= addUsesForTopLevelLoops(cvBlocks); // Add uses for CSRs spilled or restored at branch, join points. if (changed || SRChanged) { while (! cvBlocks.empty()) { MachineBasicBlock* MBB = cvBlocks.pop_back_val(); changed |= addUsesForMEMERegion(MBB, ncvBlocks); } if (! ncvBlocks.empty()) { cvBlocks = ncvBlocks; ncvBlocks.clear(); } } if (changed) { calculateAnticAvail(Fn); CSRSave.clear(); CSRRestore.clear(); } } // Check for effectiveness: // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] // Gives a measure of how many CSR spills have been moved from EntryBlock // to minimal regions enclosing their uses. CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); numSRReduced += numSRReducedThisFunc; DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { dbgs() << "-----------------------------------------------------------\n"; dbgs() << "total iterations = " << iterations << " ( " << Fn.getFunction()->getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; dbgs() << "-----------------------------------------------------------\n"; dumpSRSets(); dbgs() << "-----------------------------------------------------------\n"; if (numSRReducedThisFunc) verifySpillRestorePlacement(); }); } // Debugging methods. #ifndef NDEBUG /// findFastExitPath - debugging method used to detect functions /// with at least one path from the entry block to a return block /// directly or which has a very small number of edges. /// void PEI::findFastExitPath() { if (! EntryBlock) return; // Fina a path from EntryBlock to any return block that does not branch: // Entry // | ... // v | // B1<-----+ // | // v // Return for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), SE = EntryBlock->succ_end(); SI != SE; ++SI) { MachineBasicBlock* SUCC = *SI; // Assume positive, disprove existence of fast path. HasFastExitPath = true; // Check the immediate successors. if (isReturnBlock(SUCC)) { if (ShrinkWrapDebugging >= BasicInfo) dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << getBasicBlockName(SUCC) << "\n"; break; } // Traverse df from SUCC, look for a branch block. std::string exitPath = getBasicBlockName(SUCC); for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC), BE = df_end(SUCC); BI != BE; ++BI) { MachineBasicBlock* SBB = *BI; // Reject paths with branch nodes. if (SBB->succ_size() > 1) { HasFastExitPath = false; break; } exitPath += "->" + getBasicBlockName(SBB); } if (HasFastExitPath) { if (ShrinkWrapDebugging >= BasicInfo) dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << exitPath << "\n"; break; } } } /// verifySpillRestorePlacement - check the current spill/restore /// sets for safety. Attempt to find spills without restores or /// restores without spills. /// Spills: walk df from each MBB in spill set ensuring that /// all CSRs spilled at MMBB are restored on all paths /// from MBB to all exit blocks. /// Restores: walk idf from each MBB in restore set ensuring that /// all CSRs restored at MBB are spilled on all paths /// reaching MBB. /// void PEI::verifySpillRestorePlacement() { unsigned numReturnBlocks = 0; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; if (isReturnBlock(MBB) || MBB->succ_size() == 0) ++numReturnBlocks; } for (CSRegBlockMap::iterator BI = CSRSave.begin(), BE = CSRSave.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet spilled = BI->second; CSRegSet restored; if (spilled.empty()) continue; DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(spilled) << " RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); if (CSRRestore[MBB].intersects(spilled)) { restored |= (CSRRestore[MBB] & spilled); } // Walk depth first from MBB to find restores of all CSRs spilled at MBB: // we must find restores for all spills w/no intervening spills on all // paths from MBB to all return blocks. for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB), BE = df_end(MBB); BI != BE; ++BI) { MachineBasicBlock* SBB = *BI; if (SBB == MBB) continue; // Stop when we encounter spills of any CSRs spilled at MBB that // have not yet been seen to be restored. if (CSRSave[SBB].intersects(spilled) && !restored.contains(CSRSave[SBB] & spilled)) break; // Collect the CSRs spilled at MBB that are restored // at this DF successor of MBB. if (CSRRestore[SBB].intersects(spilled)) restored |= (CSRRestore[SBB] & spilled); // If we are at a retun block, check that the restores // we have seen so far exhaust the spills at MBB, then // reset the restores. if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); DEBUG(dbgs() << MF->getFunction()->getName() << ": " << stringifyCSRegSet(notRestored) << " spilled at " << getBasicBlockName(MBB) << " are never restored on path to return " << getBasicBlockName(SBB) << "\n"); } restored.clear(); } } } // Check restore placements. for (CSRegBlockMap::iterator BI = CSRRestore.begin(), BE = CSRRestore.end(); BI != BE; ++BI) { MachineBasicBlock* MBB = BI->first; CSRegSet restored = BI->second; CSRegSet spilled; if (restored.empty()) continue; DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << " RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(restored) << "\n"); if (CSRSave[MBB].intersects(restored)) { spilled |= (CSRSave[MBB] & restored); } // Walk inverse depth first from MBB to find spills of all // CSRs restored at MBB: for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB), BE = idf_end(MBB); BI != BE; ++BI) { MachineBasicBlock* PBB = *BI; if (PBB == MBB) continue; // Stop when we encounter restores of any CSRs restored at MBB that // have not yet been seen to be spilled. if (CSRRestore[PBB].intersects(restored) && !spilled.contains(CSRRestore[PBB] & restored)) break; // Collect the CSRs restored at MBB that are spilled // at this DF predecessor of MBB. if (CSRSave[PBB].intersects(restored)) spilled |= (CSRSave[PBB] & restored); } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); DEBUG(dbgs() << MF->getFunction()->getName() << ": " << stringifyCSRegSet(notSpilled) << " restored at " << getBasicBlockName(MBB) << " are never spilled\n"); } } } // Debugging print methods. std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { if (!MBB) return ""; if (MBB->getBasicBlock()) return MBB->getBasicBlock()->getNameStr(); std::ostringstream name; name << "_MBB_" << MBB->getNumber(); return name.str(); } std::string PEI::stringifyCSRegSet(const CSRegSet& s) { const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); const std::vector<CalleeSavedInfo> CSI = MF->getFrameInfo()->getCalleeSavedInfo(); std::ostringstream srep; if (CSI.size() == 0) { srep << "[]"; return srep.str(); } srep << "["; CSRegSet::iterator I = s.begin(), E = s.end(); if (I != E) { unsigned reg = CSI[*I].getReg(); srep << TRI->getName(reg); for (++I; I != E; ++I) { reg = CSI[*I].getReg(); srep << ","; srep << TRI->getName(reg); } } srep << "]"; return srep.str(); } void PEI::dumpSet(const CSRegSet& s) { DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); } void PEI::dumpUsed(MachineBasicBlock* MBB) { DEBUG({ if (MBB) dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; }); } void PEI::dumpAllUsed() { for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; dumpUsed(MBB); } } void PEI::dumpSets(MachineBasicBlock* MBB) { DEBUG({ if (MBB) dbgs() << getBasicBlockName(MBB) << " | " << stringifyCSRegSet(CSRUsed[MBB]) << " | " << stringifyCSRegSet(AnticIn[MBB]) << " | " << stringifyCSRegSet(AnticOut[MBB]) << " | " << stringifyCSRegSet(AvailIn[MBB]) << " | " << stringifyCSRegSet(AvailOut[MBB]) << "\n"; }); } void PEI::dumpSets1(MachineBasicBlock* MBB) { DEBUG({ if (MBB) dbgs() << getBasicBlockName(MBB) << " | " << stringifyCSRegSet(CSRUsed[MBB]) << " | " << stringifyCSRegSet(AnticIn[MBB]) << " | " << stringifyCSRegSet(AnticOut[MBB]) << " | " << stringifyCSRegSet(AvailIn[MBB]) << " | " << stringifyCSRegSet(AvailOut[MBB]) << " | " << stringifyCSRegSet(CSRSave[MBB]) << " | " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; }); } void PEI::dumpAllSets() { for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock* MBB = MBBI; dumpSets1(MBB); } } void PEI::dumpSRSets() { DEBUG({ for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); MBB != E; ++MBB) { if (!CSRSave[MBB].empty()) { dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]); if (CSRRestore[MBB].empty()) dbgs() << '\n'; } if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) dbgs() << " " << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; } }); } #endif