//===------- ShadowCallStack.cpp - Shadow Call Stack pass -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // The ShadowCallStack pass instruments function prologs/epilogs to check that // the return address has not been corrupted during the execution of the // function. The return address is stored in a 'shadow call stack' addressed // using the %gs segment register. // //===----------------------------------------------------------------------===// #include "X86.h" #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; namespace llvm { void initializeShadowCallStackPass(PassRegistry &); } namespace { class ShadowCallStack : public MachineFunctionPass { public: static char ID; ShadowCallStack() : MachineFunctionPass(ID) { initializeShadowCallStackPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &Fn) override; private: // Do not instrument leaf functions with this many or fewer instructions. The // shadow call stack instrumented prolog/epilog are slightly race-y reading // and checking the saved return address, so it is better to not instrument // functions that have fewer instructions than the instrumented prolog/epilog // race. static const size_t SkipLeafInstructions = 3; }; char ShadowCallStack::ID = 0; } // end anonymous namespace. static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII, MachineBasicBlock &MBB, const DebugLoc &DL); static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII, MachineBasicBlock &MBB, const DebugLoc &DL, MCPhysReg FreeRegister); static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock &TrapBB); static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock &TrapBB, MCPhysReg FreeRegister); // Generate a longer epilog that only uses r10 when a tailcall branches to r11. static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock &TrapBB); // Helper function to add ModR/M references for [Seg: Reg + Offset] memory // accesses static inline const MachineInstrBuilder & addSegmentedMem(const MachineInstrBuilder &MIB, MCPhysReg Seg, MCPhysReg Reg, int Offset = 0) { return MIB.addReg(Reg).addImm(1).addReg(0).addImm(Offset).addReg(Seg); } static void addProlog(MachineFunction &Fn, const TargetInstrInfo *TII, MachineBasicBlock &MBB, const DebugLoc &DL) { const MCPhysReg ReturnReg = X86::R10; const MCPhysReg OffsetReg = X86::R11; auto MBBI = MBB.begin(); // mov r10, [rsp] addDirectMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(ReturnReg), X86::RSP); // xor r11, r11 BuildMI(MBB, MBBI, DL, TII->get(X86::XOR64rr)) .addDef(OffsetReg) .addReg(OffsetReg, RegState::Undef) .addReg(OffsetReg, RegState::Undef); // add QWORD [gs:r11], 8 addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64mi8)), X86::GS, OffsetReg) .addImm(8); // mov r11, [gs:r11] addSegmentedMem( BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64rm)).addDef(OffsetReg), X86::GS, OffsetReg); // mov [gs:r11], r10 addSegmentedMem(BuildMI(MBB, MBBI, DL, TII->get(X86::MOV64mr)), X86::GS, OffsetReg) .addReg(ReturnReg); } static void addPrologLeaf(MachineFunction &Fn, const TargetInstrInfo *TII, MachineBasicBlock &MBB, const DebugLoc &DL, MCPhysReg FreeRegister) { // mov REG, [rsp] addDirectMem(BuildMI(MBB, MBB.begin(), DL, TII->get(X86::MOV64rm)) .addDef(FreeRegister), X86::RSP); } static void addEpilog(const TargetInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock &TrapBB) { const DebugLoc &DL = MI.getDebugLoc(); // xor r11, r11 BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr)) .addDef(X86::R11) .addReg(X86::R11, RegState::Undef) .addReg(X86::R11, RegState::Undef); // mov r10, [gs:r11] addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), X86::GS, X86::R11); // mov r10, [gs:r10] addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), X86::GS, X86::R10); // sub QWORD [gs:r11], 8 // This instruction should not be moved up to avoid a signal race. addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, X86::R11) .addImm(8); // cmp [rsp], r10 addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) .addReg(X86::R10); // jne trap BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB); MBB.addSuccessor(&TrapBB); } static void addEpilogLeaf(const TargetInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock &TrapBB, MCPhysReg FreeRegister) { const DebugLoc &DL = MI.getDebugLoc(); // cmp [rsp], REG addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) .addReg(FreeRegister); // jne trap BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB); MBB.addSuccessor(&TrapBB); } static void addEpilogOnlyR10(const TargetInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock &TrapBB) { const DebugLoc &DL = MI.getDebugLoc(); // xor r10, r10 BuildMI(MBB, MI, DL, TII->get(X86::XOR64rr)) .addDef(X86::R10) .addReg(X86::R10, RegState::Undef) .addReg(X86::R10, RegState::Undef); // mov r10, [gs:r10] addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), X86::GS, X86::R10); // mov r10, [gs:r10] addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::MOV64rm)).addDef(X86::R10), X86::GS, X86::R10); // sub QWORD [gs:0], 8 // This instruction should not be moved up to avoid a signal race. addSegmentedMem(BuildMI(MBB, MI, DL, TII->get(X86::SUB64mi8)), X86::GS, 0) .addImm(8); // cmp [rsp], r10 addDirectMem(BuildMI(MBB, MI, DL, TII->get(X86::CMP64mr)), X86::RSP) .addReg(X86::R10); // jne trap BuildMI(MBB, MI, DL, TII->get(X86::JNE_1)).addMBB(&TrapBB); MBB.addSuccessor(&TrapBB); } bool ShadowCallStack::runOnMachineFunction(MachineFunction &Fn) { if (!Fn.getFunction().hasFnAttribute(Attribute::ShadowCallStack) || Fn.getFunction().hasFnAttribute(Attribute::Naked)) return false; if (Fn.empty() || !Fn.getRegInfo().tracksLiveness()) return false; // FIXME: Skip functions that have r10 or r11 live on entry (r10 can be live // on entry for parameters with the nest attribute.) if (Fn.front().isLiveIn(X86::R10) || Fn.front().isLiveIn(X86::R11)) return false; // FIXME: Skip functions with conditional and r10 tail calls for now. bool HasReturn = false; for (auto &MBB : Fn) { if (MBB.empty()) continue; const MachineInstr &MI = MBB.instr_back(); if (MI.isReturn()) HasReturn = true; if (MI.isReturn() && MI.isCall()) { if (MI.findRegisterUseOperand(X86::EFLAGS)) return false; // This should only be possible on Windows 64 (see GR64_TC versus // GR64_TCW64.) if (MI.findRegisterUseOperand(X86::R10) || MI.hasRegisterImplicitUseOperand(X86::R10)) return false; } } if (!HasReturn) return false; // For leaf functions: // 1. Do not instrument very short functions where it would not improve that // function's security. // 2. Detect if there is an unused caller-saved register we can reserve to // hold the return address instead of writing/reading it from the shadow // call stack. MCPhysReg LeafFuncRegister = X86::NoRegister; if (!Fn.getFrameInfo().adjustsStack()) { size_t InstructionCount = 0; std::bitset<X86::NUM_TARGET_REGS> UsedRegs; for (auto &MBB : Fn) { for (auto &LiveIn : MBB.liveins()) UsedRegs.set(LiveIn.PhysReg); for (auto &MI : MBB) { if (!MI.isDebugValue() && !MI.isCFIInstruction() && !MI.isLabel()) InstructionCount++; for (auto &Op : MI.operands()) if (Op.isReg() && Op.isDef()) UsedRegs.set(Op.getReg()); } } if (InstructionCount <= SkipLeafInstructions) return false; std::bitset<X86::NUM_TARGET_REGS> CalleeSavedRegs; const MCPhysReg *CSRegs = Fn.getRegInfo().getCalleeSavedRegs(); for (size_t i = 0; CSRegs[i]; i++) CalleeSavedRegs.set(CSRegs[i]); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); for (auto &Reg : X86::GR64_NOSPRegClass.getRegisters()) { // FIXME: Optimization opportunity: spill/restore a callee-saved register // if a caller-saved register is unavailable. if (CalleeSavedRegs.test(Reg)) continue; bool Used = false; for (MCSubRegIterator SR(Reg, TRI, true); SR.isValid(); ++SR) if ((Used = UsedRegs.test(*SR))) break; if (!Used) { LeafFuncRegister = Reg; break; } } } const bool LeafFuncOptimization = LeafFuncRegister != X86::NoRegister; if (LeafFuncOptimization) // Mark the leaf function register live-in for all MBBs except the entry MBB for (auto I = ++Fn.begin(), E = Fn.end(); I != E; ++I) I->addLiveIn(LeafFuncRegister); MachineBasicBlock &MBB = Fn.front(); const MachineBasicBlock *NonEmpty = MBB.empty() ? MBB.getFallThrough() : &MBB; const DebugLoc &DL = NonEmpty->front().getDebugLoc(); const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); if (LeafFuncOptimization) addPrologLeaf(Fn, TII, MBB, DL, LeafFuncRegister); else addProlog(Fn, TII, MBB, DL); MachineBasicBlock *Trap = nullptr; for (auto &MBB : Fn) { if (MBB.empty()) continue; MachineInstr &MI = MBB.instr_back(); if (MI.isReturn()) { if (!Trap) { Trap = Fn.CreateMachineBasicBlock(); BuildMI(Trap, MI.getDebugLoc(), TII->get(X86::TRAP)); Fn.push_back(Trap); } if (LeafFuncOptimization) addEpilogLeaf(TII, MBB, MI, *Trap, LeafFuncRegister); else if (MI.findRegisterUseOperand(X86::R11)) addEpilogOnlyR10(TII, MBB, MI, *Trap); else addEpilog(TII, MBB, MI, *Trap); } } return true; } INITIALIZE_PASS(ShadowCallStack, "shadow-call-stack", "Shadow Call Stack", false, false) FunctionPass *llvm::createShadowCallStackPass() { return new ShadowCallStack(); }