//===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "FileAnalysis.h" #include "GraphBuilder.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" using Instr = llvm::cfi_verify::FileAnalysis::Instr; using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer; namespace llvm { namespace cfi_verify { bool IgnoreDWARFFlag; static cl::opt<bool, true> IgnoreDWARFArg( "ignore-dwarf", cl::desc( "Ignore all DWARF data. This relaxes the requirements for all " "statically linked libraries to have been compiled with '-g', but " "will result in false positives for 'CFI unprotected' instructions."), cl::location(IgnoreDWARFFlag), cl::init(false)); StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) { switch (Status) { case CFIProtectionStatus::PROTECTED: return "PROTECTED"; case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF: return "FAIL_NOT_INDIRECT_CF"; case CFIProtectionStatus::FAIL_ORPHANS: return "FAIL_ORPHANS"; case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH: return "FAIL_BAD_CONDITIONAL_BRANCH"; case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED: return "FAIL_REGISTER_CLOBBERED"; case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION: return "FAIL_INVALID_INSTRUCTION"; } llvm_unreachable("Attempted to stringify an unknown enum value."); } Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) { // Open the filename provided. Expected<object::OwningBinary<object::Binary>> BinaryOrErr = object::createBinary(Filename); if (!BinaryOrErr) return BinaryOrErr.takeError(); // Construct the object and allow it to take ownership of the binary. object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get()); FileAnalysis Analysis(std::move(Binary)); Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary()); if (!Analysis.Object) return make_error<UnsupportedDisassembly>("Failed to cast object"); switch (Analysis.Object->getArch()) { case Triple::x86: case Triple::x86_64: case Triple::aarch64: case Triple::aarch64_be: break; default: return make_error<UnsupportedDisassembly>("Unsupported architecture."); } Analysis.ObjectTriple = Analysis.Object->makeTriple(); Analysis.Features = Analysis.Object->getFeatures(); // Init the rest of the object. if (auto InitResponse = Analysis.initialiseDisassemblyMembers()) return std::move(InitResponse); if (auto SectionParseResponse = Analysis.parseCodeSections()) return std::move(SectionParseResponse); return std::move(Analysis); } FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary) : Binary(std::move(Binary)) {} FileAnalysis::FileAnalysis(const Triple &ObjectTriple, const SubtargetFeatures &Features) : ObjectTriple(ObjectTriple), Features(Features) {} const Instr * FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const { std::map<uint64_t, Instr>::const_iterator KV = Instructions.find(InstrMeta.VMAddress); if (KV == Instructions.end() || KV == Instructions.begin()) return nullptr; if (!(--KV)->second.Valid) return nullptr; return &KV->second; } const Instr * FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const { std::map<uint64_t, Instr>::const_iterator KV = Instructions.find(InstrMeta.VMAddress); if (KV == Instructions.end() || ++KV == Instructions.end()) return nullptr; if (!KV->second.Valid) return nullptr; return &KV->second; } bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const { for (const auto &Operand : InstrMeta.Instruction) { if (Operand.isReg()) return true; } return false; } const Instr *FileAnalysis::getInstruction(uint64_t Address) const { const auto &InstrKV = Instructions.find(Address); if (InstrKV == Instructions.end()) return nullptr; return &InstrKV->second; } const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const { const auto &InstrKV = Instructions.find(Address); assert(InstrKV != Instructions.end() && "Address doesn't exist."); return InstrKV->second; } bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const { const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); return InstrDesc.isTrap(); } bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const { if (!InstrMeta.Valid) return false; if (isCFITrap(InstrMeta)) return false; const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) return InstrDesc.isConditionalBranch(); return true; } const Instr * FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const { if (!InstrMeta.Valid) return nullptr; if (isCFITrap(InstrMeta)) return nullptr; const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode()); const Instr *NextMetaPtr; if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) { if (InstrDesc.isConditionalBranch()) return nullptr; uint64_t Target; if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress, InstrMeta.InstructionSize, Target)) return nullptr; NextMetaPtr = getInstruction(Target); } else { NextMetaPtr = getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize); } if (!NextMetaPtr || !NextMetaPtr->Valid) return nullptr; return NextMetaPtr; } std::set<const Instr *> FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const { std::set<const Instr *> CFCrossReferences; const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta); if (PrevInstruction && canFallThrough(*PrevInstruction)) CFCrossReferences.insert(PrevInstruction); const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress); if (TargetRefsKV == StaticBranchTargetings.end()) return CFCrossReferences; for (uint64_t SourceInstrAddress : TargetRefsKV->second) { const auto &SourceInstrKV = Instructions.find(SourceInstrAddress); if (SourceInstrKV == Instructions.end()) { errs() << "Failed to find source instruction at address " << format_hex(SourceInstrAddress, 2) << " for the cross-reference to instruction at address " << format_hex(InstrMeta.VMAddress, 2) << ".\n"; continue; } CFCrossReferences.insert(&SourceInstrKV->second); } return CFCrossReferences; } const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const { return IndirectInstructions; } const MCRegisterInfo *FileAnalysis::getRegisterInfo() const { return RegisterInfo.get(); } const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); } const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const { return MIA.get(); } Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) { assert(Symbolizer != nullptr && "Symbolizer is invalid."); return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address); } CFIProtectionStatus FileAnalysis::validateCFIProtection(const GraphResult &Graph) const { const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress); if (!InstrMetaPtr) return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION; const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode()); if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo)) return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; if (!usesRegisterOperand(*InstrMetaPtr)) return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF; if (!Graph.OrphanedNodes.empty()) return CFIProtectionStatus::FAIL_ORPHANS; for (const auto &BranchNode : Graph.ConditionalBranchNodes) { if (!BranchNode.CFIProtection) return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH; } if (indirectCFOperandClobber(Graph) != Graph.BaseAddress) return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED; return CFIProtectionStatus::PROTECTED; } uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const { assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty."); // Get the set of registers we must check to ensure they're not clobbered. const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress); DenseSet<unsigned> RegisterNumbers; for (const auto &Operand : IndirectCF.Instruction) { if (Operand.isReg()) RegisterNumbers.insert(Operand.getReg()); } assert(RegisterNumbers.size() && "Zero register operands on indirect CF."); // Now check all branches to indirect CFs and ensure no clobbering happens. for (const auto &Branch : Graph.ConditionalBranchNodes) { uint64_t Node; if (Branch.IndirectCFIsOnTargetPath) Node = Branch.Target; else Node = Branch.Fallthrough; // Some architectures (e.g., AArch64) cannot load in an indirect branch, so // we allow them one load. bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad(); // We walk backwards from the indirect CF. It is the last node returned by // Graph.flattenAddress, so we skip it since we already handled it. DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers; std::vector<uint64_t> Nodes = Graph.flattenAddress(Node); for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) { Node = *I; const Instr &NodeInstr = getInstructionOrDie(Node); const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode()); for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end(); RI != RE; ++RI) { unsigned RegNum = *RI; if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum, *RegisterInfo)) { if (!canLoad || !InstrDesc.mayLoad()) return Node; canLoad = false; CurRegisterNumbers.erase(RI); // Add the registers this load reads to those we check for clobbers. for (unsigned i = InstrDesc.getNumDefs(), e = InstrDesc.getNumOperands(); i != e; i++) { const auto Operand = NodeInstr.Instruction.getOperand(i); if (Operand.isReg()) CurRegisterNumbers.insert(Operand.getReg()); } break; } } } } return Graph.BaseAddress; } void FileAnalysis::printInstruction(const Instr &InstrMeta, raw_ostream &OS) const { Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get()); } Error FileAnalysis::initialiseDisassemblyMembers() { std::string TripleName = ObjectTriple.getTriple(); ArchName = ""; MCPU = ""; std::string ErrorString; Symbolizer.reset(new LLVMSymbolizer()); ObjectTarget = TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString); if (!ObjectTarget) return make_error<UnsupportedDisassembly>( (Twine("Couldn't find target \"") + ObjectTriple.getTriple() + "\", failed with error: " + ErrorString) .str()); RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName)); if (!RegisterInfo) return make_error<UnsupportedDisassembly>( "Failed to initialise RegisterInfo."); AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName)); if (!AsmInfo) return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo."); SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo( TripleName, MCPU, Features.getString())); if (!SubtargetInfo) return make_error<UnsupportedDisassembly>( "Failed to initialise SubtargetInfo."); MII.reset(ObjectTarget->createMCInstrInfo()); if (!MII) return make_error<UnsupportedDisassembly>("Failed to initialise MII."); Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI)); Disassembler.reset( ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context)); if (!Disassembler) return make_error<UnsupportedDisassembly>( "No disassembler available for target"); MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get())); Printer.reset(ObjectTarget->createMCInstPrinter( ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII, *RegisterInfo)); return Error::success(); } Error FileAnalysis::parseCodeSections() { if (!IgnoreDWARFFlag) { std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object); if (!DWARF) return make_error<StringError>("Could not create DWARF information.", inconvertibleErrorCode()); bool LineInfoValid = false; for (auto &Unit : DWARF->compile_units()) { const auto &LineTable = DWARF->getLineTableForUnit(Unit.get()); if (LineTable && !LineTable->Rows.empty()) { LineInfoValid = true; break; } } if (!LineInfoValid) return make_error<StringError>( "DWARF line information missing. Did you compile with '-g'?", inconvertibleErrorCode()); } for (const object::SectionRef &Section : Object->sections()) { // Ensure only executable sections get analysed. if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR)) continue; StringRef SectionContents; if (Section.getContents(SectionContents)) return make_error<StringError>("Failed to retrieve section contents", inconvertibleErrorCode()); ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(), Section.getSize()); parseSectionContents(SectionBytes, Section.getAddress()); } return Error::success(); } void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes, uint64_t SectionAddress) { assert(Symbolizer && "Symbolizer is uninitialised."); MCInst Instruction; Instr InstrMeta; uint64_t InstructionSize; for (uint64_t Byte = 0; Byte < SectionBytes.size();) { bool ValidInstruction = Disassembler->getInstruction(Instruction, InstructionSize, SectionBytes.drop_front(Byte), 0, nulls(), outs()) == MCDisassembler::Success; Byte += InstructionSize; uint64_t VMAddress = SectionAddress + Byte - InstructionSize; InstrMeta.Instruction = Instruction; InstrMeta.VMAddress = VMAddress; InstrMeta.InstructionSize = InstructionSize; InstrMeta.Valid = ValidInstruction; addInstruction(InstrMeta); if (!ValidInstruction) continue; // Skip additional parsing for instructions that do not affect the control // flow. const auto &InstrDesc = MII->get(Instruction.getOpcode()); if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) continue; uint64_t Target; if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) { // If the target can be evaluated, it's not indirect. StaticBranchTargetings[Target].push_back(VMAddress); continue; } if (!usesRegisterOperand(InstrMeta)) continue; if (InstrDesc.isReturn()) continue; // Check if this instruction exists in the range of the DWARF metadata. if (!IgnoreDWARFFlag) { auto LineInfo = Symbolizer->symbolizeCode(Object->getFileName(), VMAddress); if (!LineInfo) { handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) { errs() << "Symbolizer failed to get line: " << E.message() << "\n"; }); continue; } if (LineInfo->FileName == "<invalid>") continue; } IndirectInstructions.insert(VMAddress); } } void FileAnalysis::addInstruction(const Instr &Instruction) { const auto &KV = Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction)); if (!KV.second) { errs() << "Failed to add instruction at address " << format_hex(Instruction.VMAddress, 2) << ": Instruction at this address already exists.\n"; exit(EXIT_FAILURE); } } UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {} char UnsupportedDisassembly::ID; void UnsupportedDisassembly::log(raw_ostream &OS) const { OS << "Could not initialise disassembler: " << Text; } std::error_code UnsupportedDisassembly::convertToErrorCode() const { return std::error_code(); } } // namespace cfi_verify } // namespace llvm