//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the Link Time Optimization library. This library is // intended to be used by linker to optimize code at link time. // //===----------------------------------------------------------------------===// #include "llvm/LTO/legacy/LTOModule.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include <system_error> using namespace llvm; using namespace llvm::object; LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef, llvm::TargetMachine *TM) : Mod(std::move(M)), MBRef(MBRef), _target(TM) { SymTab.addModule(Mod.get()); } LTOModule::~LTOModule() {} /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM /// bitcode. bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) { Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer( MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>")); return !errorToBool(BCData.takeError()); } bool LTOModule::isBitcodeFile(StringRef Path) { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getFile(Path); if (!BufferOrErr) return false; Expected<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer( BufferOrErr.get()->getMemBufferRef()); return !errorToBool(BCData.takeError()); } bool LTOModule::isThinLTO() { Expected<BitcodeLTOInfo> Result = getBitcodeLTOInfo(MBRef); if (!Result) { logAllUnhandledErrors(Result.takeError(), errs(), ""); return false; } return Result->IsThinLTO; } bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, StringRef TriplePrefix) { Expected<MemoryBufferRef> BCOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); if (errorToBool(BCOrErr.takeError())) return false; LLVMContext Context; ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(*BCOrErr)); if (!TripleOrErr) return false; return StringRef(*TripleOrErr).startswith(TriplePrefix); } std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { Expected<MemoryBufferRef> BCOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); if (errorToBool(BCOrErr.takeError())) return ""; LLVMContext Context; ErrorOr<std::string> ProducerOrErr = expectedToErrorOrAndEmitErrors( Context, getBitcodeProducerString(*BCOrErr)); if (!ProducerOrErr) return ""; return *ProducerOrErr; } ErrorOr<std::unique_ptr<LTOModule>> LTOModule::createFromFile(LLVMContext &Context, StringRef path, const TargetOptions &options) { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getFile(path); if (std::error_code EC = BufferOrErr.getError()) { Context.emitError(EC.message()); return EC; } std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get()); return makeLTOModule(Buffer->getMemBufferRef(), options, Context, /* ShouldBeLazy*/ false); } ErrorOr<std::unique_ptr<LTOModule>> LTOModule::createFromOpenFile(LLVMContext &Context, int fd, StringRef path, size_t size, const TargetOptions &options) { return createFromOpenFileSlice(Context, fd, path, size, 0, options); } ErrorOr<std::unique_ptr<LTOModule>> LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path, size_t map_size, off_t offset, const TargetOptions &options) { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); if (std::error_code EC = BufferOrErr.getError()) { Context.emitError(EC.message()); return EC; } std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get()); return makeLTOModule(Buffer->getMemBufferRef(), options, Context, /* ShouldBeLazy */ false); } ErrorOr<std::unique_ptr<LTOModule>> LTOModule::createFromBuffer(LLVMContext &Context, const void *mem, size_t length, const TargetOptions &options, StringRef path) { StringRef Data((const char *)mem, length); MemoryBufferRef Buffer(Data, path); return makeLTOModule(Buffer, options, Context, /* ShouldBeLazy */ false); } ErrorOr<std::unique_ptr<LTOModule>> LTOModule::createInLocalContext(std::unique_ptr<LLVMContext> Context, const void *mem, size_t length, const TargetOptions &options, StringRef path) { StringRef Data((const char *)mem, length); MemoryBufferRef Buffer(Data, path); // If we own a context, we know this is being used only for symbol extraction, // not linking. Be lazy in that case. ErrorOr<std::unique_ptr<LTOModule>> Ret = makeLTOModule(Buffer, options, *Context, /* ShouldBeLazy */ true); if (Ret) (*Ret)->OwnedContext = std::move(Context); return Ret; } static ErrorOr<std::unique_ptr<Module>> parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, bool ShouldBeLazy) { // Find the buffer. Expected<MemoryBufferRef> MBOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer); if (Error E = MBOrErr.takeError()) { std::error_code EC = errorToErrorCode(std::move(E)); Context.emitError(EC.message()); return EC; } if (!ShouldBeLazy) { // Parse the full file. return expectedToErrorOrAndEmitErrors(Context, parseBitcodeFile(*MBOrErr, Context)); } // Parse lazily. return expectedToErrorOrAndEmitErrors( Context, getLazyBitcodeModule(*MBOrErr, Context, true /*ShouldLazyLoadMetadata*/)); } ErrorOr<std::unique_ptr<LTOModule>> LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, LLVMContext &Context, bool ShouldBeLazy) { ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFileImpl(Buffer, Context, ShouldBeLazy); if (std::error_code EC = MOrErr.getError()) return EC; std::unique_ptr<Module> &M = *MOrErr; std::string TripleStr = M->getTargetTriple(); if (TripleStr.empty()) TripleStr = sys::getDefaultTargetTriple(); llvm::Triple Triple(TripleStr); // find machine architecture for this module std::string errMsg; const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); if (!march) return make_error_code(object::object_error::arch_not_found); // construct LTOModule, hand over ownership of module and target SubtargetFeatures Features; Features.getDefaultSubtargetFeatures(Triple); std::string FeatureStr = Features.getString(); // Set a default CPU for Darwin triples. std::string CPU; if (Triple.isOSDarwin()) { if (Triple.getArch() == llvm::Triple::x86_64) CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; else if (Triple.getArch() == llvm::Triple::aarch64) CPU = "cyclone"; } TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, options, None); std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(M), Buffer, target)); Ret->parseSymbols(); Ret->parseMetadata(); return std::move(Ret); } /// Create a MemoryBuffer from a memory range with an optional name. std::unique_ptr<MemoryBuffer> LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) { const char *startPtr = (const char*)mem; return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false); } /// objcClassNameFromExpression - Get string that the data pointer points to. bool LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) { if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) { Constant *op = ce->getOperand(0); if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) { Constant *cn = gvn->getInitializer(); if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) { if (ca->isCString()) { name = (".objc_class_name_" + ca->getAsCString()).str(); return true; } } } } return false; } /// addObjCClass - Parse i386/ppc ObjC class data structure. void LTOModule::addObjCClass(const GlobalVariable *clgv) { const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer()); if (!c) return; // second slot in __OBJC,__class is pointer to superclass name std::string superclassName; if (objcClassNameFromExpression(c->getOperand(1), superclassName)) { auto IterBool = _undefines.insert(std::make_pair(superclassName, NameAndAttributes())); if (IterBool.second) { NameAndAttributes &info = IterBool.first->second; info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; info.isFunction = false; info.symbol = clgv; } } // third slot in __OBJC,__class is pointer to class name std::string className; if (objcClassNameFromExpression(c->getOperand(2), className)) { auto Iter = _defines.insert(className).first; NameAndAttributes info; info.name = Iter->first(); info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT; info.isFunction = false; info.symbol = clgv; _symbols.push_back(info); } } /// addObjCCategory - Parse i386/ppc ObjC category data structure. void LTOModule::addObjCCategory(const GlobalVariable *clgv) { const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer()); if (!c) return; // second slot in __OBJC,__category is pointer to target class name std::string targetclassName; if (!objcClassNameFromExpression(c->getOperand(1), targetclassName)) return; auto IterBool = _undefines.insert(std::make_pair(targetclassName, NameAndAttributes())); if (!IterBool.second) return; NameAndAttributes &info = IterBool.first->second; info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; info.isFunction = false; info.symbol = clgv; } /// addObjCClassRef - Parse i386/ppc ObjC class list data structure. void LTOModule::addObjCClassRef(const GlobalVariable *clgv) { std::string targetclassName; if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) return; auto IterBool = _undefines.insert(std::make_pair(targetclassName, NameAndAttributes())); if (!IterBool.second) return; NameAndAttributes &info = IterBool.first->second; info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; info.isFunction = false; info.symbol = clgv; } void LTOModule::addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym) { SmallString<64> Buffer; { raw_svector_ostream OS(Buffer); SymTab.printSymbolName(OS, Sym); Buffer.c_str(); } const GlobalValue *V = Sym.get<GlobalValue *>(); addDefinedDataSymbol(Buffer, V); } void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) { // Add to list of defined symbols. addDefinedSymbol(Name, v, false); if (!v->hasSection() /* || !isTargetDarwin */) return; // Special case i386/ppc ObjC data structures in magic sections: // The issue is that the old ObjC object format did some strange // contortions to avoid real linker symbols. For instance, the // ObjC class data structure is allocated statically in the executable // that defines that class. That data structures contains a pointer to // its superclass. But instead of just initializing that part of the // struct to the address of its superclass, and letting the static and // dynamic linkers do the rest, the runtime works by having that field // instead point to a C-string that is the name of the superclass. // At runtime the objc initialization updates that pointer and sets // it to point to the actual super class. As far as the linker // knows it is just a pointer to a string. But then someone wanted the // linker to issue errors at build time if the superclass was not found. // So they figured out a way in mach-o object format to use an absolute // symbols (.objc_class_name_Foo = 0) and a floating reference // (.reference .objc_class_name_Bar) to cause the linker into erroring when // a class was missing. // The following synthesizes the implicit .objc_* symbols for the linker // from the ObjC data structures generated by the front end. // special case if this data blob is an ObjC class definition if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) { StringRef Section = GV->getSection(); if (Section.startswith("__OBJC,__class,")) { addObjCClass(GV); } // special case if this data blob is an ObjC category definition else if (Section.startswith("__OBJC,__category,")) { addObjCCategory(GV); } // special case if this data blob is the list of referenced classes else if (Section.startswith("__OBJC,__cls_refs,")) { addObjCClassRef(GV); } } } void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) { SmallString<64> Buffer; { raw_svector_ostream OS(Buffer); SymTab.printSymbolName(OS, Sym); Buffer.c_str(); } const Function *F = cast<Function>(Sym.get<GlobalValue *>()); addDefinedFunctionSymbol(Buffer, F); } void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) { // add to list of defined symbols addDefinedSymbol(Name, F, true); } void LTOModule::addDefinedSymbol(StringRef Name, const GlobalValue *def, bool isFunction) { // set alignment part log2() can have rounding errors uint32_t align = def->getAlignment(); uint32_t attr = align ? countTrailingZeros(align) : 0; // set permissions part if (isFunction) { attr |= LTO_SYMBOL_PERMISSIONS_CODE; } else { const GlobalVariable *gv = dyn_cast<GlobalVariable>(def); if (gv && gv->isConstant()) attr |= LTO_SYMBOL_PERMISSIONS_RODATA; else attr |= LTO_SYMBOL_PERMISSIONS_DATA; } // set definition part if (def->hasWeakLinkage() || def->hasLinkOnceLinkage()) attr |= LTO_SYMBOL_DEFINITION_WEAK; else if (def->hasCommonLinkage()) attr |= LTO_SYMBOL_DEFINITION_TENTATIVE; else attr |= LTO_SYMBOL_DEFINITION_REGULAR; // set scope part if (def->hasLocalLinkage()) // Ignore visibility if linkage is local. attr |= LTO_SYMBOL_SCOPE_INTERNAL; else if (def->hasHiddenVisibility()) attr |= LTO_SYMBOL_SCOPE_HIDDEN; else if (def->hasProtectedVisibility()) attr |= LTO_SYMBOL_SCOPE_PROTECTED; else if (def->canBeOmittedFromSymbolTable()) attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN; else attr |= LTO_SYMBOL_SCOPE_DEFAULT; if (def->hasComdat()) attr |= LTO_SYMBOL_COMDAT; if (isa<GlobalAlias>(def)) attr |= LTO_SYMBOL_ALIAS; auto Iter = _defines.insert(Name).first; // fill information structure NameAndAttributes info; StringRef NameRef = Iter->first(); info.name = NameRef; assert(NameRef.data()[NameRef.size()] == '\0'); info.attributes = attr; info.isFunction = isFunction; info.symbol = def; // add to table of symbols _symbols.push_back(info); } /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the /// defined list. void LTOModule::addAsmGlobalSymbol(StringRef name, lto_symbol_attributes scope) { auto IterBool = _defines.insert(name); // only add new define if not already defined if (!IterBool.second) return; NameAndAttributes &info = _undefines[IterBool.first->first()]; if (info.symbol == nullptr) { // FIXME: This is trying to take care of module ASM like this: // // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0" // // but is gross and its mother dresses it funny. Have the ASM parser give us // more details for this type of situation so that we're not guessing so // much. // fill information structure info.name = IterBool.first->first(); info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope; info.isFunction = false; info.symbol = nullptr; // add to table of symbols _symbols.push_back(info); return; } if (info.isFunction) addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol)); else addDefinedDataSymbol(info.name, info.symbol); _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK; _symbols.back().attributes |= scope; } /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the /// undefined list. void LTOModule::addAsmGlobalSymbolUndef(StringRef name) { auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes())); _asm_undefines.push_back(IterBool.first->first()); // we already have the symbol if (!IterBool.second) return; uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED; attr |= LTO_SYMBOL_SCOPE_DEFAULT; NameAndAttributes &info = IterBool.first->second; info.name = IterBool.first->first(); info.attributes = attr; info.isFunction = false; info.symbol = nullptr; } /// Add a symbol which isn't defined just yet to a list to be resolved later. void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym, bool isFunc) { SmallString<64> name; { raw_svector_ostream OS(name); SymTab.printSymbolName(OS, Sym); name.c_str(); } auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes())); // we already have the symbol if (!IterBool.second) return; NameAndAttributes &info = IterBool.first->second; info.name = IterBool.first->first(); const GlobalValue *decl = Sym.dyn_cast<GlobalValue *>(); if (decl->hasExternalWeakLinkage()) info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; else info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; info.isFunction = isFunc; info.symbol = decl; } void LTOModule::parseSymbols() { for (auto Sym : SymTab.symbols()) { auto *GV = Sym.dyn_cast<GlobalValue *>(); uint32_t Flags = SymTab.getSymbolFlags(Sym); if (Flags & object::BasicSymbolRef::SF_FormatSpecific) continue; bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined; if (!GV) { SmallString<64> Buffer; { raw_svector_ostream OS(Buffer); SymTab.printSymbolName(OS, Sym); Buffer.c_str(); } StringRef Name(Buffer); if (IsUndefined) addAsmGlobalSymbolUndef(Name); else if (Flags & object::BasicSymbolRef::SF_Global) addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT); else addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL); continue; } auto *F = dyn_cast<Function>(GV); if (IsUndefined) { addPotentialUndefinedSymbol(Sym, F != nullptr); continue; } if (F) { addDefinedFunctionSymbol(Sym); continue; } if (isa<GlobalVariable>(GV)) { addDefinedDataSymbol(Sym); continue; } assert(isa<GlobalAlias>(GV)); addDefinedDataSymbol(Sym); } // make symbols for all undefines for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(), e = _undefines.end(); u != e; ++u) { // If this symbol also has a definition, then don't make an undefine because // it is a tentative definition. if (_defines.count(u->getKey())) continue; NameAndAttributes info = u->getValue(); _symbols.push_back(info); } } /// parseMetadata - Parse metadata from the module void LTOModule::parseMetadata() { raw_string_ostream OS(LinkerOpts); // Linker Options if (NamedMDNode *LinkerOptions = getModule().getNamedMetadata("llvm.linker.options")) { for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { MDNode *MDOptions = LinkerOptions->getOperand(i); for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); OS << " " << MDOption->getString(); } } } // Globals - we only need to do this for COFF. const Triple TT(_target->getTargetTriple()); if (!TT.isOSBinFormatCOFF()) return; Mangler M; for (const NameAndAttributes &Sym : _symbols) { if (!Sym.symbol) continue; emitLinkerFlagsForGlobalCOFF(OS, Sym.symbol, TT, M); } // Add other interesting metadata here. }