//===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===// // // The Subzero Code Generator // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Defines aspects of the compilation that persist across multiple /// functions. /// //===----------------------------------------------------------------------===// #include "IceGlobalContext.h" #include "IceCfg.h" #include "IceCfgNode.h" #include "IceClFlags.h" #include "IceDefs.h" #include "IceELFObjectWriter.h" #include "IceGlobalInits.h" #include "IceLiveness.h" #include "IceOperand.h" #include "IceRevision.h" #include "IceTargetLowering.h" #include "IceTimerTree.h" #include "IceTypes.def" #include "IceTypes.h" #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-parameter" #endif // __clang__ #include "llvm/Support/Timer.h" #ifdef __clang__ #pragma clang diagnostic pop #endif // __clang__ #include <algorithm> // max() namespace std { template <> struct hash<Ice::RelocatableTuple> { size_t operator()(const Ice::RelocatableTuple &Key) const { // Use the relocatable's name, plus the hash of a combination of the number // of OffsetExprs and the known, fixed offset for the reloc. We left shift // the known relocatable by 5 trying to minimize the interaction between the // bits in OffsetExpr.size() and Key.Offset. return hash<Ice::SizeT>()(Key.Name.getID()) + hash<std::size_t>()(Key.OffsetExpr.size() + (Key.Offset << 5)); } }; } // end of namespace std namespace Ice { namespace { // Define the key comparison function for the constant pool's unordered_map, // but only for key types of interest: integer types, floating point types, and // the special RelocatableTuple. template <typename KeyType, class Enable = void> struct KeyCompare {}; template <typename KeyType> struct KeyCompare<KeyType, typename std::enable_if< std::is_integral<KeyType>::value || std::is_same<KeyType, RelocatableTuple>::value>::type> { bool operator()(const KeyType &Value1, const KeyType &Value2) const { return Value1 == Value2; } }; template <typename KeyType> struct KeyCompare<KeyType, typename std::enable_if< std::is_floating_point<KeyType>::value>::type> { bool operator()(const KeyType &Value1, const KeyType &Value2) const { return !memcmp(&Value1, &Value2, sizeof(KeyType)); } }; // Define a key comparison function for sorting the constant pool's values // after they are dumped to a vector. This covers integer types, floating point // types, and ConstantRelocatable values. template <typename ValueType, class Enable = void> struct KeyCompareLess {}; template <typename ValueType> struct KeyCompareLess<ValueType, typename std::enable_if<std::is_floating_point< typename ValueType::PrimType>::value>::type> { bool operator()(const Constant *Const1, const Constant *Const2) const { using CompareType = uint64_t; static_assert(sizeof(typename ValueType::PrimType) <= sizeof(CompareType), "Expected floating-point type of width 64-bit or less"); typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue(); typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue(); // We avoid "V1<V2" because of NaN. // We avoid "memcmp(&V1,&V2,sizeof(V1))<0" which depends on the // endian-ness of the host system running Subzero. // Instead, compare the result of bit_cast to uint64_t. uint64_t I1 = 0, I2 = 0; memcpy(&I1, &V1, sizeof(V1)); memcpy(&I2, &V2, sizeof(V2)); return I1 < I2; } }; template <typename ValueType> struct KeyCompareLess<ValueType, typename std::enable_if<std::is_integral< typename ValueType::PrimType>::value>::type> { bool operator()(const Constant *Const1, const Constant *Const2) const { typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue(); typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue(); return V1 < V2; } }; template <typename ValueType> struct KeyCompareLess< ValueType, typename std::enable_if< std::is_same<ValueType, ConstantRelocatable>::value>::type> { bool operator()(const Constant *Const1, const Constant *Const2) const { auto *V1 = llvm::cast<ValueType>(Const1); auto *V2 = llvm::cast<ValueType>(Const2); if (V1->getName() == V2->getName()) return V1->getOffset() < V2->getOffset(); return V1->getName() < V2->getName(); } }; // TypePool maps constants of type KeyType (e.g. float) to pointers to // type ValueType (e.g. ConstantFloat). template <Type Ty, typename KeyType, typename ValueType> class TypePool { TypePool(const TypePool &) = delete; TypePool &operator=(const TypePool &) = delete; public: TypePool() = default; ValueType *getOrAdd(GlobalContext *Ctx, KeyType Key) { auto Iter = Pool.find(Key); if (Iter != Pool.end()) { Iter->second->updateLookupCount(); return Iter->second; } auto *Result = ValueType::create(Ctx, Ty, Key); Pool[Key] = Result; Result->updateLookupCount(); return Result; } ConstantList getConstantPool() const { ConstantList Constants; Constants.reserve(Pool.size()); for (auto &I : Pool) Constants.push_back(I.second); // The sort (and its KeyCompareLess machinery) is not strictly necessary, // but is desirable for producing output that is deterministic across // unordered_map::iterator implementations. std::sort(Constants.begin(), Constants.end(), KeyCompareLess<ValueType>()); return Constants; } size_t size() const { return Pool.size(); } private: // Use the default hash function, and a custom key comparison function. The // key comparison function for floating point variables can't use the default // == based implementation because of special C++ semantics regarding +0.0, // -0.0, and NaN comparison. However, it's OK to use the default hash for // floating point values because KeyCompare is the final source of truth - in // the worst case a "false" collision must be resolved. using ContainerType = std::unordered_map<KeyType, ValueType *, std::hash<KeyType>, KeyCompare<KeyType>>; ContainerType Pool; }; // UndefPool maps ICE types to the corresponding ConstantUndef values. class UndefPool { UndefPool(const UndefPool &) = delete; UndefPool &operator=(const UndefPool &) = delete; public: UndefPool() : Pool(IceType_NUM) {} ConstantUndef *getOrAdd(GlobalContext *Ctx, Type Ty) { if (Pool[Ty] == nullptr) Pool[Ty] = ConstantUndef::create(Ctx, Ty); return Pool[Ty]; } private: std::vector<ConstantUndef *> Pool; }; } // end of anonymous namespace // The global constant pool bundles individual pools of each type of // interest. class ConstantPool { ConstantPool(const ConstantPool &) = delete; ConstantPool &operator=(const ConstantPool &) = delete; public: ConstantPool() = default; TypePool<IceType_f32, float, ConstantFloat> Floats; TypePool<IceType_f64, double, ConstantDouble> Doubles; TypePool<IceType_i1, int8_t, ConstantInteger32> Integers1; TypePool<IceType_i8, int8_t, ConstantInteger32> Integers8; TypePool<IceType_i16, int16_t, ConstantInteger32> Integers16; TypePool<IceType_i32, int32_t, ConstantInteger32> Integers32; TypePool<IceType_i64, int64_t, ConstantInteger64> Integers64; TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> Relocatables; TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> ExternRelocatables; UndefPool Undefs; }; void GlobalContext::waitForWorkerThreads() { if (WaitForWorkerThreadsCalled.exchange(true)) return; optQueueNotifyEnd(); for (std::thread &Worker : TranslationThreads) { Worker.join(); } TranslationThreads.clear(); // Only notify the emit queue to end after all the translation threads have // ended. emitQueueNotifyEnd(); for (std::thread &Worker : EmitterThreads) { Worker.join(); } EmitterThreads.clear(); if (BuildDefs::timers()) { auto Timers = getTimers(); for (ThreadContext *TLS : AllThreadContexts) Timers->mergeFrom(TLS->Timers); } if (BuildDefs::dump()) { // Do a separate loop over AllThreadContexts to avoid holding two locks at // once. auto Stats = getStatsCumulative(); for (ThreadContext *TLS : AllThreadContexts) Stats->add(TLS->StatsCumulative); } } void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) { if (!BuildDefs::dump()) return; OstreamLocker _(Ctx); Ostream &Str = Ctx->getStrDump(); const std::string Name = (Func == nullptr ? "_FINAL_" : Func->getFunctionNameAndSize()); #define X(str, tag) \ Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n"; CODESTATS_TABLE #undef X Str << "|" << Name << "|Spills+Fills|" << Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n"; Str << "|" << Name << "|Memory Usage |"; if (const auto MemUsed = static_cast<size_t>( llvm::TimeRecord::getCurrentTime(false).getMemUsed())) { static constexpr size_t _1MB = 1024 * 1024; Str << (MemUsed / _1MB) << " MB"; } else { Str << "(requires '-track-memory')"; } Str << "\n"; Str << "|" << Name << "|CPool Sizes "; { auto Pool = Ctx->getConstPool(); Str << "|f32=" << Pool->Floats.size(); Str << "|f64=" << Pool->Doubles.size(); Str << "|i1=" << Pool->Integers1.size(); Str << "|i8=" << Pool->Integers8.size(); Str << "|i16=" << Pool->Integers16.size(); Str << "|i32=" << Pool->Integers32.size(); Str << "|i64=" << Pool->Integers64.size(); Str << "|Rel=" << Pool->Relocatables.size(); Str << "|ExtRel=" << Pool->ExternRelocatables.size(); } Str << "\n"; if (Func != nullptr) { Str << "|" << Name << "|Cfg Memory |" << Func->getTotalMemoryMB() << " MB\n"; Str << "|" << Name << "|Liveness Memory |" << Func->getLivenessMemoryMB() << " MB\n"; } } namespace { // By default, wake up the main parser thread when the OptQ gets half empty. static constexpr size_t DefaultOptQWakeupSize = GlobalContext::MaxOptQSize >> 1; } // end of anonymous namespace GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError, ELFStreamer *ELFStr) : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(), StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this), ObjectWriter(), OptQWakeupSize(std::max(DefaultOptQWakeupSize, size_t(getFlags().getNumTranslationThreads()))), OptQ(/*Sequential=*/getFlags().isSequential(), /*MaxSize=*/ getFlags().isParseParallel() ? MaxOptQSize : getFlags().getNumTranslationThreads()), // EmitQ is allowed unlimited size. EmitQ(/*Sequential=*/getFlags().isSequential()), DataLowering(TargetDataLowering::createLowering(this)) { assert(OsDump && "OsDump is not defined for GlobalContext"); assert(OsEmit && "OsEmit is not defined for GlobalContext"); assert(OsError && "OsError is not defined for GlobalContext"); // Make sure thread_local fields are properly initialized before any // accesses are made. Do this here instead of at the start of // main() so that all clients (e.g. unit tests) can benefit for // free. GlobalContext::TlsInit(); Cfg::TlsInit(); Liveness::TlsInit(); // Create a new ThreadContext for the current thread. No need to // lock AllThreadContexts at this point since no other threads have // access yet to this GlobalContext object. ThreadContext *MyTLS = new ThreadContext(); AllThreadContexts.push_back(MyTLS); ICE_TLS_SET_FIELD(TLS, MyTLS); // Pre-register built-in stack names. if (BuildDefs::timers()) { // TODO(stichnot): There needs to be a strong relationship between // the newTimerStackID() return values and TSK_Default/TSK_Funcs. newTimerStackID("Total across all functions"); newTimerStackID("Per-function summary"); } Timers.initInto(MyTLS->Timers); switch (getFlags().getOutFileType()) { case FT_Elf: ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr)); break; case FT_Asm: case FT_Iasm: break; } // Cache up front common constants. #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \ ConstZeroForType[IceType_##tag] = getConstantZeroInternal(IceType_##tag); ICETYPE_TABLE; #undef X ConstantTrue = getConstantInt1Internal(1); // Define runtime helper functions. #define X(Tag, Name) \ RuntimeHelperFunc[static_cast<size_t>(RuntimeHelper::H_##Tag)] = \ getConstantExternSym(getGlobalString(Name)); RUNTIME_HELPER_FUNCTIONS_TABLE #undef X TargetLowering::staticInit(this); if (getFlags().getEmitRevision()) { // Embed the Subzero revision into the compiled binary by creating a special // global variable initialized with the revision string. auto *Revision = VariableDeclaration::create(&Globals, true); Revision->setName(this, "__Sz_revision"); Revision->setIsConstant(true); const char *RevisionString = getSubzeroRevision(); Revision->addInitializer(VariableDeclaration::DataInitializer::create( &Globals, RevisionString, 1 + strlen(RevisionString))); Globals.push_back(Revision); } } void GlobalContext::translateFunctionsWrapper(ThreadContext *MyTLS) { ICE_TLS_SET_FIELD(TLS, MyTLS); translateFunctions(); } void GlobalContext::translateFunctions() { TimerMarker Timer(TimerStack::TT_translateFunctions, this); while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) { std::unique_ptr<EmitterWorkItem> Item; auto Func = OptItem->getParsedCfg(); // Install Func in TLS for Cfg-specific container allocators. CfgLocalAllocatorScope _(Func.get()); // Reset per-function stats being accumulated in TLS. resetStats(); // Set verbose level to none if the current function does NOT match the // -verbose-focus command-line option. if (!getFlags().matchVerboseFocusOn(Func->getFunctionName(), Func->getSequenceNumber())) Func->setVerbose(IceV_None); // Disable translation if -notranslate is specified, or if the current // function matches the -translate-only option. If translation is disabled, // just dump the high-level IR and continue. if (getFlags().getDisableTranslation() || !getFlags().matchTranslateOnly(Func->getFunctionName(), Func->getSequenceNumber())) { Func->dump(); // Add a dummy work item as a placeholder. This maintains sequence // numbers so that the emitter thread will emit subsequent functions. Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber()); emitQueueBlockingPush(std::move(Item)); continue; // Func goes out of scope and gets deleted } Func->translate(); if (Func->hasError()) { getErrorStatus()->assign(EC_Translation); OstreamLocker L(this); getStrError() << "ICE translation error: " << Func->getFunctionName() << ": " << Func->getError() << ": " << Func->getFunctionNameAndSize() << "\n"; Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber()); } else { Func->getAssembler<>()->setInternal(Func->getInternal()); switch (getFlags().getOutFileType()) { case FT_Elf: case FT_Iasm: { Func->emitIAS(); // The Cfg has already emitted into the assembly buffer, so // stats have been fully collected into this thread's TLS. // Dump them before TLS is reset for the next Cfg. if (BuildDefs::dump()) dumpStats(Func.get()); auto Asm = Func->releaseAssembler(); // Copy relevant fields into Asm before Func is deleted. Asm->setFunctionName(Func->getFunctionName()); Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(), std::move(Asm)); Item->setGlobalInits(Func->getGlobalInits()); } break; case FT_Asm: // The Cfg has not been emitted yet, so stats are not ready // to be dumped. std::unique_ptr<VariableDeclarationList> GlobalInits = Func->getGlobalInits(); Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(), std::move(Func)); Item->setGlobalInits(std::move(GlobalInits)); break; } } assert(Item != nullptr); emitQueueBlockingPush(std::move(Item)); // The Cfg now gets deleted as Func goes out of scope. } } namespace { // Ensure Pending is large enough that Pending[Index] is valid. void resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> *Pending, uint32_t Index) { if (Index >= Pending->size()) Utils::reserveAndResize(*Pending, Index + 1); } } // end of anonymous namespace // static void GlobalContext::TlsInit() { ICE_TLS_INIT_FIELD(TLS); } void GlobalContext::emitFileHeader() { TimerMarker T1(Ice::TimerStack::TT_emitAsm, this); if (getFlags().getOutFileType() == FT_Elf) { getObjectWriter()->writeInitialELFHeader(); } else { if (!BuildDefs::dump()) { getStrError() << "emitFileHeader for non-ELF"; getErrorStatus()->assign(EC_Translation); } TargetHeaderLowering::createLowering(this)->lower(); } } void GlobalContext::lowerConstants() { DataLowering->lowerConstants(); } void GlobalContext::lowerJumpTables() { DataLowering->lowerJumpTables(); } void GlobalContext::emitTargetRODataSections() { DataLowering->emitTargetRODataSections(); } void GlobalContext::saveBlockInfoPtrs() { for (VariableDeclaration *Global : Globals) { if (Cfg::isProfileGlobal(*Global)) { ProfileBlockInfos.push_back(Global); } } } void GlobalContext::lowerGlobals(const std::string &SectionSuffix) { TimerMarker T(TimerStack::TT_emitGlobalInitializers, this); const bool DumpGlobalVariables = BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit) && getFlags().matchVerboseFocusOn("", 0); if (DumpGlobalVariables) { OstreamLocker L(this); Ostream &Stream = getStrDump(); for (const Ice::VariableDeclaration *Global : Globals) { Global->dump(Stream); } } if (getFlags().getDisableTranslation()) return; saveBlockInfoPtrs(); // If we need to shuffle the layout of global variables, shuffle them now. if (getFlags().getReorderGlobalVariables()) { // Create a random number generator for global variable reordering. RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_GlobalVariableReordering); RandomShuffle(Globals.begin(), Globals.end(), [&RNG](int N) { return (uint32_t)RNG.next(N); }); } if (!BuildDefs::minimal() && Instrumentor) Instrumentor->instrumentGlobals(Globals); DataLowering->lowerGlobals(Globals, SectionSuffix); if (ProfileBlockInfos.empty() && DisposeGlobalVariablesAfterLowering) { Globals.clearAndPurge(); } else { Globals.clear(); } } void GlobalContext::lowerProfileData() { // ProfileBlockInfoVarDecl is initialized in the constructor, and will only // ever be nullptr after this method completes. This assertion is a convoluted // way of ensuring lowerProfileData is invoked a single time. assert(ProfileBlockInfoVarDecl == nullptr); auto GlobalVariablePool = getInitializerAllocator(); ProfileBlockInfoVarDecl = VariableDeclaration::createExternal(GlobalVariablePool.get()); ProfileBlockInfoVarDecl->setAlignment(typeWidthInBytes(IceType_i64)); ProfileBlockInfoVarDecl->setIsConstant(true); // Note: if you change this symbol, make sure to update // runtime/szrt_profiler.c as well. ProfileBlockInfoVarDecl->setName(this, "__Sz_block_profile_info"); for (const VariableDeclaration *PBI : ProfileBlockInfos) { if (Cfg::isProfileGlobal(*PBI)) { constexpr RelocOffsetT BlockExecutionCounterOffset = 0; ProfileBlockInfoVarDecl->addInitializer( VariableDeclaration::RelocInitializer::create( GlobalVariablePool.get(), PBI, {RelocOffset::create(this, BlockExecutionCounterOffset)})); } } // This adds a 64-bit sentinel entry to the end of our array. For 32-bit // architectures this will waste 4 bytes. const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64); ProfileBlockInfoVarDecl->addInitializer( VariableDeclaration::ZeroInitializer::create(GlobalVariablePool.get(), Sizeof64BitNullPtr)); Globals.push_back(ProfileBlockInfoVarDecl); constexpr char ProfileDataSection[] = "$sz_profiler$"; lowerGlobals(ProfileDataSection); } void GlobalContext::emitterWrapper(ThreadContext *MyTLS) { ICE_TLS_SET_FIELD(TLS, MyTLS); emitItems(); } void GlobalContext::emitItems() { const bool Threaded = !getFlags().isSequential(); // Pending is a vector containing the reassembled, ordered list of // work items. When we're ready for the next item, we first check // whether it's in the Pending list. If not, we take an item from // the work queue, and if it's not the item we're waiting for, we // insert it into Pending and repeat. The work item is deleted // after it is processed. std::vector<std::unique_ptr<EmitterWorkItem>> Pending; uint32_t DesiredSequenceNumber = getFirstSequenceNumber(); uint32_t ShuffleStartIndex = DesiredSequenceNumber; uint32_t ShuffleEndIndex = DesiredSequenceNumber; bool EmitQueueEmpty = false; const uint32_t ShuffleWindowSize = std::max(1u, getFlags().getReorderFunctionsWindowSize()); bool Shuffle = Threaded && getFlags().getReorderFunctions(); // Create a random number generator for function reordering. RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_FunctionReordering); while (!EmitQueueEmpty) { resizePending(&Pending, DesiredSequenceNumber); // See if Pending contains DesiredSequenceNumber. if (Pending[DesiredSequenceNumber] == nullptr) { // We need to fetch an EmitterWorkItem from the queue. auto RawItem = emitQueueBlockingPop(); if (RawItem == nullptr) { // This is the notifier for an empty queue. EmitQueueEmpty = true; } else { // We get an EmitterWorkItem, we need to add it to Pending. uint32_t ItemSeq = RawItem->getSequenceNumber(); if (Threaded && ItemSeq != DesiredSequenceNumber) { // Not the desired one, add it to Pending but do not increase // DesiredSequenceNumber. Continue the loop, do not emit the item. resizePending(&Pending, ItemSeq); Pending[ItemSeq] = std::move(RawItem); continue; } // ItemSeq == DesiredSequenceNumber, we need to check if we should // emit it or not. If !Threaded, we're OK with ItemSeq != // DesiredSequenceNumber. Pending[DesiredSequenceNumber] = std::move(RawItem); } } const auto *CurrentWorkItem = Pending[DesiredSequenceNumber].get(); // We have the desired EmitterWorkItem or nullptr as the end notifier. // If the emitter queue is not empty, increase DesiredSequenceNumber and // ShuffleEndIndex. if (!EmitQueueEmpty) { DesiredSequenceNumber++; ShuffleEndIndex++; } if (Shuffle) { // Continue fetching EmitterWorkItem if function reordering is turned on, // and emit queue is not empty, and the number of consecutive pending // items is smaller than the window size, and RawItem is not a // WI_GlobalInits kind. Emit WI_GlobalInits kind block first to avoid // holding an arbitrarily large GlobalDeclarationList. if (!EmitQueueEmpty && ShuffleEndIndex - ShuffleStartIndex < ShuffleWindowSize && CurrentWorkItem->getKind() != EmitterWorkItem::WI_GlobalInits) continue; // Emit the EmitterWorkItem between Pending[ShuffleStartIndex] to // Pending[ShuffleEndIndex]. If function reordering turned on, shuffle the // pending items from Pending[ShuffleStartIndex] to // Pending[ShuffleEndIndex]. RandomShuffle(Pending.begin() + ShuffleStartIndex, Pending.begin() + ShuffleEndIndex, [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); }); } // Emit the item from ShuffleStartIndex to ShuffleEndIndex. for (uint32_t I = ShuffleStartIndex; I < ShuffleEndIndex; I++) { std::unique_ptr<EmitterWorkItem> Item = std::move(Pending[I]); switch (Item->getKind()) { case EmitterWorkItem::WI_Nop: break; case EmitterWorkItem::WI_GlobalInits: { accumulateGlobals(Item->getGlobalInits()); } break; case EmitterWorkItem::WI_Asm: { lowerGlobalsIfNoCodeHasBeenSeen(); accumulateGlobals(Item->getGlobalInits()); std::unique_ptr<Assembler> Asm = Item->getAsm(); Asm->alignFunction(); GlobalString Name = Asm->getFunctionName(); switch (getFlags().getOutFileType()) { case FT_Elf: getObjectWriter()->writeFunctionCode(Name, Asm->getInternal(), Asm.get()); break; case FT_Iasm: { OstreamLocker L(this); Cfg::emitTextHeader(Name, this, Asm.get()); Asm->emitIASBytes(this); } break; case FT_Asm: llvm::report_fatal_error("Unexpected FT_Asm"); break; } } break; case EmitterWorkItem::WI_Cfg: { if (!BuildDefs::dump()) llvm::report_fatal_error("WI_Cfg work item created inappropriately"); lowerGlobalsIfNoCodeHasBeenSeen(); accumulateGlobals(Item->getGlobalInits()); assert(getFlags().getOutFileType() == FT_Asm); std::unique_ptr<Cfg> Func = Item->getCfg(); // Unfortunately, we have to temporarily install the Cfg in TLS // because Variable::asType() uses the allocator to create the // differently-typed copy. CfgLocalAllocatorScope _(Func.get()); Func->emit(); dumpStats(Func.get()); } break; } } // Update the start index for next shuffling queue ShuffleStartIndex = ShuffleEndIndex; } // In case there are no code to be generated, we invoke the conditional // lowerGlobals again -- this is a no-op if code has been emitted. lowerGlobalsIfNoCodeHasBeenSeen(); } GlobalContext::~GlobalContext() { llvm::DeleteContainerPointers(AllThreadContexts); LockedPtr<DestructorArray> Dtors = getDestructors(); // Destructors are invoked in the opposite object construction order. for (const auto &Dtor : reverse_range(*Dtors)) Dtor(); } void GlobalContext::dumpStrings() { if (!getFlags().getDumpStrings()) return; OstreamLocker _(this); Ostream &Str = getStrDump(); Str << "GlobalContext strings:\n"; getStrings()->dump(Str); } void GlobalContext::dumpConstantLookupCounts() { if (!BuildDefs::dump()) return; const bool DumpCounts = (getFlags().getVerbose() & IceV_ConstPoolStats) && getFlags().matchVerboseFocusOn("", 0); if (!DumpCounts) return; OstreamLocker _(this); Ostream &Str = getStrDump(); Str << "Constant pool use stats: count+value+type\n"; #define X(WhichPool) \ for (auto *C : getConstPool()->WhichPool.getConstantPool()) { \ Str << C->getLookupCount() << " "; \ C->dump(Str); \ Str << " " << C->getType() << "\n"; \ } X(Integers1); X(Integers8); X(Integers16); X(Integers32); X(Integers64); X(Floats); X(Doubles); X(Relocatables); X(ExternRelocatables); #undef X } // TODO(stichnot): Consider adding thread-local caches of constant pool entries // to reduce contention. // All locking is done by the getConstantInt[0-9]+() target function. Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) { switch (Ty) { case IceType_i1: return getConstantInt1(Value); case IceType_i8: return getConstantInt8(Value); case IceType_i16: return getConstantInt16(Value); case IceType_i32: return getConstantInt32(Value); case IceType_i64: return getConstantInt64(Value); default: llvm_unreachable("Bad integer type for getConstant"); } return nullptr; } Constant *GlobalContext::getConstantInt1Internal(int8_t ConstantInt1) { ConstantInt1 &= INT8_C(1); return getConstPool()->Integers1.getOrAdd(this, ConstantInt1); } Constant *GlobalContext::getConstantInt8Internal(int8_t ConstantInt8) { return getConstPool()->Integers8.getOrAdd(this, ConstantInt8); } Constant *GlobalContext::getConstantInt16Internal(int16_t ConstantInt16) { return getConstPool()->Integers16.getOrAdd(this, ConstantInt16); } Constant *GlobalContext::getConstantInt32Internal(int32_t ConstantInt32) { return getConstPool()->Integers32.getOrAdd(this, ConstantInt32); } Constant *GlobalContext::getConstantInt64Internal(int64_t ConstantInt64) { return getConstPool()->Integers64.getOrAdd(this, ConstantInt64); } Constant *GlobalContext::getConstantFloat(float ConstantFloat) { return getConstPool()->Floats.getOrAdd(this, ConstantFloat); } Constant *GlobalContext::getConstantDouble(double ConstantDouble) { return getConstPool()->Doubles.getOrAdd(this, ConstantDouble); } Constant *GlobalContext::getConstantSymWithEmitString( const RelocOffsetT Offset, const RelocOffsetArray &OffsetExpr, GlobalString Name, const std::string &EmitString) { return getConstPool()->Relocatables.getOrAdd( this, RelocatableTuple(Offset, OffsetExpr, Name, EmitString)); } Constant *GlobalContext::getConstantSym(RelocOffsetT Offset, GlobalString Name) { constexpr char EmptyEmitString[] = ""; return getConstantSymWithEmitString(Offset, {}, Name, EmptyEmitString); } Constant *GlobalContext::getConstantExternSym(GlobalString Name) { constexpr RelocOffsetT Offset = 0; return getConstPool()->ExternRelocatables.getOrAdd( this, RelocatableTuple(Offset, {}, Name)); } Constant *GlobalContext::getConstantUndef(Type Ty) { return getConstPool()->Undefs.getOrAdd(this, Ty); } Constant *GlobalContext::getConstantZero(Type Ty) { Constant *Zero = ConstZeroForType[Ty]; if (Zero == nullptr) llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty)); return Zero; } // All locking is done by the getConstant*() target function. Constant *GlobalContext::getConstantZeroInternal(Type Ty) { switch (Ty) { case IceType_i1: return getConstantInt1Internal(0); case IceType_i8: return getConstantInt8Internal(0); case IceType_i16: return getConstantInt16Internal(0); case IceType_i32: return getConstantInt32Internal(0); case IceType_i64: return getConstantInt64Internal(0); case IceType_f32: return getConstantFloat(0); case IceType_f64: return getConstantDouble(0); default: return nullptr; } } ConstantList GlobalContext::getConstantPool(Type Ty) { switch (Ty) { case IceType_i1: case IceType_i8: return getConstPool()->Integers8.getConstantPool(); case IceType_i16: return getConstPool()->Integers16.getConstantPool(); case IceType_i32: return getConstPool()->Integers32.getConstantPool(); case IceType_i64: return getConstPool()->Integers64.getConstantPool(); case IceType_f32: return getConstPool()->Floats.getConstantPool(); case IceType_f64: return getConstPool()->Doubles.getConstantPool(); case IceType_v4i1: case IceType_v8i1: case IceType_v16i1: case IceType_v16i8: case IceType_v8i16: case IceType_v4i32: case IceType_v4f32: llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty)); break; case IceType_void: case IceType_NUM: break; } llvm_unreachable("Unknown type"); } ConstantList GlobalContext::getConstantExternSyms() { return getConstPool()->ExternRelocatables.getConstantPool(); } GlobalString GlobalContext::getGlobalString(const std::string &Name) { return GlobalString::createWithString(this, Name); } JumpTableDataList GlobalContext::getJumpTables() { JumpTableDataList JumpTables(*getJumpTableList()); // Make order deterministic by sorting into functions and then ID of the jump // table within that function. std::sort(JumpTables.begin(), JumpTables.end(), [](const JumpTableData &A, const JumpTableData &B) { if (A.getFunctionName() != B.getFunctionName()) return A.getFunctionName() < B.getFunctionName(); return A.getId() < B.getId(); }); if (getFlags().getReorderPooledConstants()) { // If reorder-pooled-constants option is set to true, we also shuffle the // jump tables before emitting them. // Create a random number generator for jump tables reordering, considering // jump tables as pooled constants. RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_PooledConstantReordering); RandomShuffle(JumpTables.begin(), JumpTables.end(), [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); }); } return JumpTables; } void GlobalContext::addJumpTableData(JumpTableData JumpTable) { getJumpTableList()->emplace_back(std::move(JumpTable)); } TimerStackIdT GlobalContext::newTimerStackID(const std::string &Name) { if (!BuildDefs::timers()) return 0; auto Timers = getTimers(); TimerStackIdT NewID = Timers->size(); Timers->push_back(TimerStack(Name)); return NewID; } TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID, const std::string &Name) { auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; assert(StackID < Timers->size()); return Timers->at(StackID).getTimerID(Name); } void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) { auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; assert(StackID < Timers->size()); Timers->at(StackID).push(ID); } void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) { auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; assert(StackID < Timers->size()); Timers->at(StackID).pop(ID); } void GlobalContext::resetTimer(TimerStackIdT StackID) { auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; assert(StackID < Timers->size()); Timers->at(StackID).reset(); } std::string GlobalContext::getTimerName(TimerStackIdT StackID) { auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; assert(StackID < Timers->size()); return Timers->at(StackID).getName(); } void GlobalContext::setTimerName(TimerStackIdT StackID, const std::string &NewName) { auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; assert(StackID < Timers->size()); Timers->at(StackID).setName(NewName); } // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the // interface to take and transfer ownership, but they internally store the raw // Cfg pointer in the work queue. This allows e.g. future queue optimizations // such as the use of atomics to modify queue elements. void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) { assert(Item); { TimerMarker _(TimerStack::TT_qTransPush, this); OptQ.blockingPush(std::move(Item)); } if (getFlags().isSequential()) translateFunctions(); } std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() { TimerMarker _(TimerStack::TT_qTransPop, this); return OptQ.blockingPop(OptQWakeupSize); } void GlobalContext::emitQueueBlockingPush( std::unique_ptr<EmitterWorkItem> Item) { assert(Item); { TimerMarker _(TimerStack::TT_qEmitPush, this); EmitQ.blockingPush(std::move(Item)); } if (getFlags().isSequential()) emitItems(); } std::unique_ptr<EmitterWorkItem> GlobalContext::emitQueueBlockingPop() { TimerMarker _(TimerStack::TT_qEmitPop, this); return EmitQ.blockingPop(); } void GlobalContext::initParserThread() { ThreadContext *Tls = new ThreadContext(); auto Timers = getTimers(); Timers->initInto(Tls->Timers); AllThreadContexts.push_back(Tls); ICE_TLS_SET_FIELD(TLS, Tls); } void GlobalContext::startWorkerThreads() { size_t NumWorkers = getFlags().getNumTranslationThreads(); auto Timers = getTimers(); for (size_t i = 0; i < NumWorkers; ++i) { ThreadContext *WorkerTLS = new ThreadContext(); Timers->initInto(WorkerTLS->Timers); AllThreadContexts.push_back(WorkerTLS); TranslationThreads.push_back(std::thread( &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); } if (NumWorkers) { ThreadContext *WorkerTLS = new ThreadContext(); Timers->initInto(WorkerTLS->Timers); AllThreadContexts.push_back(WorkerTLS); EmitterThreads.push_back( std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS)); } } void GlobalContext::resetStats() { if (BuildDefs::dump()) ICE_TLS_GET_FIELD(TLS)->StatsFunction.reset(); } void GlobalContext::dumpStats(const Cfg *Func) { if (!getFlags().getDumpStats()) return; if (Func == nullptr) { getStatsCumulative()->dump(Func, this); } else { ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Func, this); } } void GlobalContext::statsUpdateEmitted(uint32_t InstCount) { if (!getFlags().getDumpStats()) return; ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); Tls->StatsFunction.update(CodeStats::CS_InstCount, InstCount); Tls->StatsCumulative.update(CodeStats::CS_InstCount, InstCount); } void GlobalContext::statsUpdateRegistersSaved(uint32_t Num) { if (!getFlags().getDumpStats()) return; ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); Tls->StatsFunction.update(CodeStats::CS_RegsSaved, Num); Tls->StatsCumulative.update(CodeStats::CS_RegsSaved, Num); } void GlobalContext::statsUpdateFrameBytes(uint32_t Bytes) { if (!getFlags().getDumpStats()) return; ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); Tls->StatsFunction.update(CodeStats::CS_FrameByte, Bytes); Tls->StatsCumulative.update(CodeStats::CS_FrameByte, Bytes); } void GlobalContext::statsUpdateSpills() { if (!getFlags().getDumpStats()) return; ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); Tls->StatsFunction.update(CodeStats::CS_NumSpills); Tls->StatsCumulative.update(CodeStats::CS_NumSpills); } void GlobalContext::statsUpdateFills() { if (!getFlags().getDumpStats()) return; ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); Tls->StatsFunction.update(CodeStats::CS_NumFills); Tls->StatsCumulative.update(CodeStats::CS_NumFills); } void GlobalContext::statsUpdateRPImms() { if (!getFlags().getDumpStats()) return; ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS); Tls->StatsFunction.update(CodeStats::CS_NumRPImms); Tls->StatsCumulative.update(CodeStats::CS_NumRPImms); } void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) { if (!BuildDefs::timers()) return; auto Timers = getTimers(); assert(Timers->size() > StackID); OstreamLocker L(this); Timers->at(StackID).dump(getStrDump(), DumpCumulative); } void GlobalContext::dumpLocalTimers(const std::string &TimerNameOverride, TimerStackIdT StackID, bool DumpCumulative) { if (!BuildDefs::timers()) return; auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers; assert(Timers->size() > StackID); // Temporarily override the thread-local timer name with the given name. // Don't do it permanently because the final timer merge at the end expects // the thread-local timer names to be the same as the global timer name. auto OrigName = getTimerName(StackID); setTimerName(StackID, TimerNameOverride); { OstreamLocker _(this); Timers->at(StackID).dump(getStrDump(), DumpCumulative); } setTimerName(StackID, OrigName); } LockedPtr<StringPool> GlobalStringPoolTraits::getStrings(const GlobalContext *PoolOwner) { return PoolOwner->getStrings(); } TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx, const std::string &FuncName) { if (!BuildDefs::timers()) return 0; if (!getFlags().getTimeEachFunction()) return 0; return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName); } void TimerMarker::push() { switch (StackID) { case GlobalContext::TSK_Default: Active = getFlags().getSubzeroTimingEnabled() || !getFlags().getTimingFocusOnString().empty(); break; case GlobalContext::TSK_Funcs: Active = getFlags().getTimeEachFunction(); break; default: break; } if (Active) Ctx->pushTimer(ID, StackID); } void TimerMarker::pushCfg(const Cfg *Func) { Ctx = Func->getContext(); Active = Func->getFocusedTiming() || getFlags().getSubzeroTimingEnabled(); if (Active) Ctx->pushTimer(ID, StackID); } ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS); } // end of namespace Ice