//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/Object/MachOObject.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::object; /* Translation Utilities */ template<typename T> static void SwapValue(T &Value) { Value = sys::SwapByteOrder(Value); } template<typename T> static void SwapStruct(T &Value); template<typename T> static void ReadInMemoryStruct(const MachOObject &MOO, StringRef Buffer, uint64_t Base, InMemoryStruct<T> &Res) { typedef T struct_type; uint64_t Size = sizeof(struct_type); // Check that the buffer contains the expected data. if (Base + Size > Buffer.size()) { Res = 0; return; } // Check whether we can return a direct pointer. struct_type *Ptr = (struct_type *) (Buffer.data() + Base); if (!MOO.isSwappedEndian()) { Res = Ptr; return; } // Otherwise, copy the struct and translate the values. Res = *Ptr; SwapStruct(*Res); } /* *** */ MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_, bool Is64Bit_) : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_), IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()), HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) { // Load the common header. memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header)); if (IsSwappedEndian) { SwapValue(Header.Magic); SwapValue(Header.CPUType); SwapValue(Header.CPUSubtype); SwapValue(Header.FileType); SwapValue(Header.NumLoadCommands); SwapValue(Header.SizeOfLoadCommands); SwapValue(Header.Flags); } if (is64Bit()) { memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header), sizeof(Header64Ext)); if (IsSwappedEndian) { SwapValue(Header64Ext.Reserved); } } // Create the load command array if sane. if (getHeader().NumLoadCommands < (1 << 20)) LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands]; } MachOObject::~MachOObject() { delete [] LoadCommands; } MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer, std::string *ErrorStr) { // First, check the magic value and initialize the basic object info. bool IsLittleEndian = false, Is64Bit = false; StringRef Magic = Buffer->getBuffer().slice(0, 4); if (Magic == "\xFE\xED\xFA\xCE") { } else if (Magic == "\xCE\xFA\xED\xFE") { IsLittleEndian = true; } else if (Magic == "\xFE\xED\xFA\xCF") { Is64Bit = true; } else if (Magic == "\xCF\xFA\xED\xFE") { IsLittleEndian = true; Is64Bit = true; } else { if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)"; return 0; } // Ensure that the at least the full header is present. unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size; if (Buffer->getBufferSize() < HeaderSize) { if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)"; return 0; } OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian, Is64Bit)); // Check for bogus number of load commands. if (Object->getHeader().NumLoadCommands >= (1 << 20)) { if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)"; return 0; } if (ErrorStr) *ErrorStr = ""; return Object.take(); } StringRef MachOObject::getData(size_t Offset, size_t Size) const { return Buffer->getBuffer().substr(Offset,Size); } void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) { HasStringTable = true; StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset, SLC.StringTableSize); } const MachOObject::LoadCommandInfo & MachOObject::getLoadCommandInfo(unsigned Index) const { assert(Index < getHeader().NumLoadCommands && "Invalid index!"); // Load the command, if necessary. if (Index >= NumLoadedCommands) { uint64_t Offset; if (Index == 0) { Offset = getHeaderSize(); } else { const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1); Offset = Prev.Offset + Prev.Command.Size; } LoadCommandInfo &Info = LoadCommands[Index]; memcpy(&Info.Command, Buffer->getBuffer().data() + Offset, sizeof(macho::LoadCommand)); if (IsSwappedEndian) { SwapValue(Info.Command.Type); SwapValue(Info.Command.Size); } Info.Offset = Offset; NumLoadedCommands = Index + 1; } return LoadCommands[Index]; } template<> void SwapStruct(macho::SegmentLoadCommand &Value) { SwapValue(Value.Type); SwapValue(Value.Size); SwapValue(Value.VMAddress); SwapValue(Value.VMSize); SwapValue(Value.FileOffset); SwapValue(Value.FileSize); SwapValue(Value.MaxVMProtection); SwapValue(Value.InitialVMProtection); SwapValue(Value.NumSections); SwapValue(Value.Flags); } void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI, InMemoryStruct<macho::SegmentLoadCommand> &Res) const { ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); } template<> void SwapStruct(macho::Segment64LoadCommand &Value) { SwapValue(Value.Type); SwapValue(Value.Size); SwapValue(Value.VMAddress); SwapValue(Value.VMSize); SwapValue(Value.FileOffset); SwapValue(Value.FileSize); SwapValue(Value.MaxVMProtection); SwapValue(Value.InitialVMProtection); SwapValue(Value.NumSections); SwapValue(Value.Flags); } void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI, InMemoryStruct<macho::Segment64LoadCommand> &Res) const { ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); } template<> void SwapStruct(macho::SymtabLoadCommand &Value) { SwapValue(Value.Type); SwapValue(Value.Size); SwapValue(Value.SymbolTableOffset); SwapValue(Value.NumSymbolTableEntries); SwapValue(Value.StringTableOffset); SwapValue(Value.StringTableSize); } void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI, InMemoryStruct<macho::SymtabLoadCommand> &Res) const { ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); } template<> void SwapStruct(macho::DysymtabLoadCommand &Value) { SwapValue(Value.Type); SwapValue(Value.Size); SwapValue(Value.LocalSymbolsIndex); SwapValue(Value.NumLocalSymbols); SwapValue(Value.ExternalSymbolsIndex); SwapValue(Value.NumExternalSymbols); SwapValue(Value.UndefinedSymbolsIndex); SwapValue(Value.NumUndefinedSymbols); SwapValue(Value.TOCOffset); SwapValue(Value.NumTOCEntries); SwapValue(Value.ModuleTableOffset); SwapValue(Value.NumModuleTableEntries); SwapValue(Value.ReferenceSymbolTableOffset); SwapValue(Value.NumReferencedSymbolTableEntries); SwapValue(Value.IndirectSymbolTableOffset); SwapValue(Value.NumIndirectSymbolTableEntries); SwapValue(Value.ExternalRelocationTableOffset); SwapValue(Value.NumExternalRelocationTableEntries); SwapValue(Value.LocalRelocationTableOffset); SwapValue(Value.NumLocalRelocationTableEntries); } void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI, InMemoryStruct<macho::DysymtabLoadCommand> &Res) const { ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); } template<> void SwapStruct(macho::LinkeditDataLoadCommand &Value) { SwapValue(Value.Type); SwapValue(Value.Size); SwapValue(Value.DataOffset); SwapValue(Value.DataSize); } void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI, InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const { ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res); } template<> void SwapStruct(macho::IndirectSymbolTableEntry &Value) { SwapValue(Value.Index); } void MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC, unsigned Index, InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const { uint64_t Offset = (DLC.IndirectSymbolTableOffset + Index * sizeof(macho::IndirectSymbolTableEntry)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } template<> void SwapStruct(macho::Section &Value) { SwapValue(Value.Address); SwapValue(Value.Size); SwapValue(Value.Offset); SwapValue(Value.Align); SwapValue(Value.RelocationTableOffset); SwapValue(Value.NumRelocationTableEntries); SwapValue(Value.Flags); SwapValue(Value.Reserved1); SwapValue(Value.Reserved2); } void MachOObject::ReadSection(const LoadCommandInfo &LCI, unsigned Index, InMemoryStruct<macho::Section> &Res) const { assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected load command info!"); uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) + Index * sizeof(macho::Section)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } template<> void SwapStruct(macho::Section64 &Value) { SwapValue(Value.Address); SwapValue(Value.Size); SwapValue(Value.Offset); SwapValue(Value.Align); SwapValue(Value.RelocationTableOffset); SwapValue(Value.NumRelocationTableEntries); SwapValue(Value.Flags); SwapValue(Value.Reserved1); SwapValue(Value.Reserved2); SwapValue(Value.Reserved3); } void MachOObject::ReadSection64(const LoadCommandInfo &LCI, unsigned Index, InMemoryStruct<macho::Section64> &Res) const { assert(LCI.Command.Type == macho::LCT_Segment64 && "Unexpected load command info!"); uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) + Index * sizeof(macho::Section64)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } template<> void SwapStruct(macho::RelocationEntry &Value) { SwapValue(Value.Word0); SwapValue(Value.Word1); } void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset, unsigned Index, InMemoryStruct<macho::RelocationEntry> &Res) const { uint64_t Offset = (RelocationTableOffset + Index * sizeof(macho::RelocationEntry)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } template<> void SwapStruct(macho::SymbolTableEntry &Value) { SwapValue(Value.StringIndex); SwapValue(Value.Flags); SwapValue(Value.Value); } void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset, unsigned Index, InMemoryStruct<macho::SymbolTableEntry> &Res) const { uint64_t Offset = (SymbolTableOffset + Index * sizeof(macho::SymbolTableEntry)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } template<> void SwapStruct(macho::Symbol64TableEntry &Value) { SwapValue(Value.StringIndex); SwapValue(Value.Flags); SwapValue(Value.Value); } void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset, unsigned Index, InMemoryStruct<macho::Symbol64TableEntry> &Res) const { uint64_t Offset = (SymbolTableOffset + Index * sizeof(macho::Symbol64TableEntry)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } void MachOObject::ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const { const char *ptr = Buffer->getBufferStart() + Index; uint64_t data = 0; uint64_t delta = 0; uint32_t shift = 0; while (true) { assert(ptr < Buffer->getBufferEnd() && "index out of bounds"); assert(shift < 64 && "too big for uint64_t"); uint8_t byte = *ptr++; delta |= ((byte & 0x7F) << shift); shift += 7; if (byte < 0x80) { if (delta == 0) break; data += delta; Out.push_back(data); delta = 0; shift = 0; } } } /* ** */ // Object Dumping Facilities void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; } void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; } void MachOObject::printHeader(raw_ostream &O) const { O << "('cputype', " << Header.CPUType << ")\n"; O << "('cpusubtype', " << Header.CPUSubtype << ")\n"; O << "('filetype', " << Header.FileType << ")\n"; O << "('num_load_commands', " << Header.NumLoadCommands << ")\n"; O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n"; O << "('flag', " << Header.Flags << ")\n"; // Print extended header if 64-bit. if (is64Bit()) O << "('reserved', " << Header64Ext.Reserved << ")\n"; } void MachOObject::print(raw_ostream &O) const { O << "Header:\n"; printHeader(O); O << "Load Commands:\n"; O << "Buffer:\n"; }