//===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// // // The Subzero Code Generator // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// \brief Declares the TargetLoweringARM32 class, which implements the /// TargetLowering interface for the ARM 32-bit architecture. /// //===----------------------------------------------------------------------===// #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H #include "IceAssemblerARM32.h" #include "IceDefs.h" #include "IceInstARM32.h" #include "IceRegistersARM32.h" #include "IceTargetLowering.h" #include <utility> namespace Ice { namespace ARM32 { // Class encapsulating ARM cpu features / instruction set. class TargetARM32Features { TargetARM32Features() = delete; TargetARM32Features(const TargetARM32Features &) = delete; TargetARM32Features &operator=(const TargetARM32Features &) = delete; public: explicit TargetARM32Features(const ClFlags &Flags); enum ARM32InstructionSet { Begin, // Neon is the PNaCl baseline instruction set. Neon = Begin, HWDivArm, // HW divide in ARM mode (not just Thumb mode). End }; bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; } private: ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin; }; // The target lowering logic for ARM32. class TargetARM32 : public TargetLowering { TargetARM32() = delete; TargetARM32(const TargetARM32 &) = delete; TargetARM32 &operator=(const TargetARM32 &) = delete; public: static void staticInit(GlobalContext *Ctx); static bool shouldBePooled(const Constant *C) { if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) { return !Utils::isPositiveZero(ConstDouble->getValue()); } if (llvm::isa<ConstantFloat>(C)) return true; return false; } static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; } // TODO(jvoung): return a unique_ptr. static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) { return makeUnique<TargetARM32>(Func); } std::unique_ptr<::Ice::Assembler> createAssembler() const override { const bool IsNonsfi = SandboxingType == ST_Nonsfi; return makeUnique<ARM32::AssemblerARM32>(IsNonsfi); } void initNodeForLowering(CfgNode *Node) override { Computations.forgetProducers(); Computations.recordProducers(Node); Computations.dump(Func); } void translateOm1() override; void translateO2() override; bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } Variable *getPhysicalRegister(RegNumT RegNum, Type Ty = IceType_void) override; const char *getRegName(RegNumT RegNum, Type Ty) const override; SmallBitVector getRegisterSet(RegSetMask Include, RegSetMask Exclude) const override; const SmallBitVector & getRegistersForVariable(const Variable *Var) const override { RegClass RC = Var->getRegClass(); switch (RC) { default: assert(RC < RC_Target); return TypeToRegisterSet[RC]; case RegARM32::RCARM32_QtoS: return TypeToRegisterSet[RC]; } } const SmallBitVector & getAllRegistersForVariable(const Variable *Var) const override { RegClass RC = Var->getRegClass(); assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); return TypeToRegisterSetUnfiltered[RC]; } const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { return RegisterAliases[Reg]; } bool hasFramePointer() const override { return UsesFramePointer; } void setHasFramePointer() override { UsesFramePointer = true; } RegNumT getStackReg() const override { return RegARM32::Reg_sp; } RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } RegNumT getFrameOrStackReg() const override { return UsesFramePointer ? getFrameReg() : getStackReg(); } RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } size_t typeWidthInBytesOnStack(Type Ty) const override { // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16 // are rounded up to 4 bytes. return (typeWidthInBytes(Ty) + 3) & ~3; } uint32_t getStackAlignment() const override; void reserveFixedAllocaArea(size_t Size, size_t Align) override { FixedAllocaSizeBytes = Size; assert(llvm::isPowerOf2_32(Align)); FixedAllocaAlignBytes = Align; PrologEmitsFixedAllocas = true; } int32_t getFrameFixedAllocaOffset() const override { return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes); } uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; } bool shouldSplitToVariable64On32(Type Ty) const override { return Ty == IceType_i64; } // TODO(ascull): what size is best for ARM? SizeT getMinJumpTableSize() const override { return 3; } void emitJumpTable(const Cfg *Func, const InstJumpTable *JumpTable) const override; void emitVariable(const Variable *Var) const override; void emit(const ConstantUndef *C) const final; void emit(const ConstantInteger32 *C) const final; void emit(const ConstantInteger64 *C) const final; void emit(const ConstantFloat *C) const final; void emit(const ConstantDouble *C) const final; void emit(const ConstantRelocatable *C) const final; void lowerArguments() override; void addProlog(CfgNode *Node) override; void addEpilog(CfgNode *Node) override; Operand *loOperand(Operand *Operand); Operand *hiOperand(Operand *Operand); void finishArgumentLowering(Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset, size_t *InArgsSizeBytes); bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const { return CPUFeatures.hasFeature(I); } enum OperandLegalization { Legal_Reg = 1 << 0, /// physical register, not stack location Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small /// immediates, shifted registers, or modified fp imm. Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12] Legal_Rematerializable = 1 << 3, Legal_Default = ~Legal_Rematerializable, }; using LegalMask = uint32_t; Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default, RegNumT RegNum = RegNumT()); Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT()); OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const { assert(ShAmtImm < 32); return OperandARM32ShAmtImm::create( Func, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F))); } GlobalContext *getCtx() const { return Ctx; } protected: explicit TargetARM32(Cfg *Func); void postLower() override; enum SafeBoolChain { SBC_No, SBC_Yes, }; void lowerAlloca(const InstAlloca *Instr) override; SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr); void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest, Operand *Src0, Operand *Src1); void lowerArithmetic(const InstArithmetic *Instr) override; void lowerAssign(const InstAssign *Instr) override; void lowerBr(const InstBr *Instr) override; void lowerCall(const InstCall *Instr) override; void lowerCast(const InstCast *Instr) override; void lowerExtractElement(const InstExtractElement *Instr) override; /// CondWhenTrue is a helper type returned by every method in the lowering /// that emits code to set the condition codes. class CondWhenTrue { public: explicit CondWhenTrue(CondARM32::Cond T0, CondARM32::Cond T1 = CondARM32::kNone) : WhenTrue0(T0), WhenTrue1(T1) { assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone); assert(T1 != T0 || T0 == CondARM32::kNone); } CondARM32::Cond WhenTrue0; CondARM32::Cond WhenTrue1; /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted. CondWhenTrue invert() const { switch (WhenTrue0) { default: if (WhenTrue1 == CondARM32::kNone) return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0)); return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0), InstARM32::getOppositeCondition(WhenTrue1)); case CondARM32::AL: return CondWhenTrue(CondARM32::kNone); case CondARM32::kNone: return CondWhenTrue(CondARM32::AL); } } }; CondWhenTrue lowerFcmpCond(const InstFcmp *Instr); void lowerFcmp(const InstFcmp *Instr) override; CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, Operand *Src1); CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, Operand *Src1); CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, Operand *Src1); CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0, Operand *Src1); CondWhenTrue lowerIcmpCond(const InstIcmp *Instr); void lowerIcmp(const InstIcmp *Instr) override; /// Emits the basic sequence for lower-linked/store-exclusive loops: /// /// retry: /// ldrex tmp, [Addr] /// StoreValue = Operation(tmp) /// strexCond success, StoreValue, [Addr] /// cmpCond success, #0 /// bne retry /// /// Operation needs to return which value to strex in Addr, it must not change /// the flags if Cond is not AL, and must not emit any instructions that could /// end up writing to memory. Operation also needs to handle fake-defing for /// i64 handling. void lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation, CondARM32::Cond Cond = CondARM32::AL); void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, Operand *Val); void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr, Operand *Val); void lowerBreakpoint(const InstBreakpoint *Instr) override; void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override; void lowerInsertElement(const InstInsertElement *Instr) override; void lowerLoad(const InstLoad *Instr) override; void lowerPhi(const InstPhi *Instr) override; void lowerRet(const InstRet *Instr) override; void lowerSelect(const InstSelect *Instr) override; void lowerShuffleVector(const InstShuffleVector *Instr) override; void lowerStore(const InstStore *Instr) override; void lowerSwitch(const InstSwitch *Instr) override; void lowerUnreachable(const InstUnreachable *Instr) override; void prelowerPhis() override; uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override; void genTargetHelperCallFor(Inst *Instr) override; void doAddressOptLoad() override; void doAddressOptStore() override; void randomlyInsertNop(float Probability, RandomNumberGenerator &RNG) override; OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty); Variable64On32 *makeI64RegPair(); Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT()); static Type stackSlotType(); Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); void alignRegisterPow2(Variable *Reg, uint32_t Align, RegNumT TmpRegNum = RegNumT()); /// Returns a vector in a register with the given constant entries. Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); void makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation, const SmallBitVector &ExcludeRegisters, uint64_t Salt) const override; // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; // .LSKIP: <continuation>. If no check is needed nothing is inserted. void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, CondARM32::Cond); using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, CondARM32::Cond); void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder); void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi); // The following are helpers that insert lowered ARM32 instructions with // minimal syntactic overhead, so that the lowering code can look as close to // assembly as practical. void _add(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred); } void _adds(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { constexpr bool SetFlags = true; Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags); if (SetFlags) { Context.insert<InstFakeUse>(Dest); } } void _adc(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred); } void _and(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32And>(Dest, Src0, Src1, Pred); } void _asr(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred); } void _bic(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred); } void _br(CfgNode *TargetTrue, CfgNode *TargetFalse, CondARM32::Cond Condition) { Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition); } void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); } void _br(CfgNode *Target, CondARM32::Cond Condition) { Context.insert<InstARM32Br>(Target, Condition); } void _br(InstARM32Label *Label, CondARM32::Cond Condition) { Context.insert<InstARM32Br>(Label, Condition); } void _cmn(Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Cmn>(Src0, Src1, Pred); } void _cmp(Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Cmp>(Src0, Src1, Pred); } void _clz(Variable *Dest, Variable *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Clz>(Dest, Src0, Pred); } void _dmb() { Context.insert<InstARM32Dmb>(); } void _eor(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred); } /// _ldr, for all your memory to Variable data moves. It handles all types /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's /// type (e.g., no immediates for vector loads, and no index registers for fp /// loads.) void _ldr(Variable *Dest, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Ldr>(Dest, Addr, Pred); } InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred); if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { Context.insert<InstFakeDef>(Dest64->getLo(), Dest); Context.insert<InstFakeDef>(Dest64->getHi(), Dest); } return Ldrex; } void _lsl(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred); } void _lsls(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { constexpr bool SetFlags = true; Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags); if (SetFlags) { Context.insert<InstFakeUse>(Dest); } } void _lsr(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred); } void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred); } void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred); } /// _mov, for all your Variable to Variable data movement needs. It handles /// all types (integer, floating point, and vectors), as well as moves between /// Core and VFP registers. This is not a panacea: you must obey the (weird, /// confusing, non-uniform) rules for data moves in ARM. void _mov(Variable *Dest, Operand *Src0, CondARM32::Cond Pred = CondARM32::AL) { // _mov used to be unique in the sense that it would create a temporary // automagically if Dest was nullptr. It won't do that anymore, so we keep // an assert around just in case there is some untested code path where Dest // is nullptr. assert(Dest != nullptr); assert(!llvm::isa<OperandARM32Mem>(Src0)); auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); if (Instr->isMultiDest()) { // If Instr is multi-dest, then Dest must be a Variable64On32. We add a // fake-def for Instr.DestHi here. assert(llvm::isa<Variable64On32>(Dest)); Context.insert<InstFakeDef>(Instr->getDestHi()); } } void _mov_redefined(Variable *Dest, Operand *Src0, CondARM32::Cond Pred = CondARM32::AL) { auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred); Instr->setDestRedefined(); if (Instr->isMultiDest()) { // If Instr is multi-dest, then Dest must be a Variable64On32. We add a // fake-def for Instr.DestHi here. assert(llvm::isa<Variable64On32>(Dest)); Context.insert<InstFakeDef>(Instr->getDestHi()); } } void _nop() { Context.insert<InstARM32Nop>(); } // Generates a vmov instruction to extract the given index from a vector // register. void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred); } // Generates a vmov instruction to insert a value into the given index of a // vector register. void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred); } // -------------------------------------------------------------------------- // Begin bool folding machinery. // // There are three types of boolean lowerings handled by this target: // // 1) Boolean expressions leading to a boolean Variable definition // --------------------------------------------------------------- // // Whenever a i1 Variable is live out (i.e., its live range extends beyond // the defining basic block) we do not fold the operation. We instead // materialize (i.e., compute) the variable normally, so that it can be used // when needed. We also materialize i1 values that are not single use to // avoid code duplication. These expressions are not short circuited. // // 2) Boolean expressions leading to a select // ------------------------------------------ // // These include boolean chains leading to a select instruction, as well as // i1 Sexts. These boolean expressions are lowered to: // // mov T, <false value> // CC <- eval(Boolean Expression) // movCC T, <true value> // // For Sexts, <false value> is 0, and <true value> is -1. // // 3) Boolean expressions leading to a br i1 // ----------------------------------------- // // These are the boolean chains leading to a branch. These chains are // short-circuited, i.e.: // // A = or i1 B, C // br i1 A, label %T, label %F // // becomes // // tst B // jne %T // tst B // jne %T // j %F // // and // // A = and i1 B, C // br i1 A, label %T, label %F // // becomes // // tst B // jeq %F // tst B // jeq %F // j %T // // Arbitrarily long chains are short circuited, e.g // // A = or i1 B, C // D = and i1 A, E // F = and i1 G, H // I = or i1 D, F // br i1 I, label %True, label %False // // becomes // // Label[A]: // tst B, 1 // bne Label[D] // tst C, 1 // beq Label[I] // Label[D]: // tst E, 1 // bne %True // Label[I] // tst G, 1 // beq %False // tst H, 1 // beq %False (bne %True) /// lowerInt1 materializes Boolean to a Variable. SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean); /// lowerInt1ForSelect generates the following instruction sequence: /// /// mov T, FalseValue /// CC <- eval(Boolean) /// movCC T, TrueValue /// mov Dest, T /// /// It is used for lowering select i1, as well as i1 Sext. void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue, Operand *FalseValue); /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or /// an InstARM32Label (but never both) so that, during br i1 lowering, we can /// create auxiliary labels for short circuiting the condition evaluation. class LowerInt1BranchTarget { public: explicit LowerInt1BranchTarget(CfgNode *const Target) : NodeTarget(Target) {} explicit LowerInt1BranchTarget(InstARM32Label *const Target) : LabelTarget(Target) {} /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that /// is the exact copy of this if Label is nullptr; otherwise, the returned /// object will wrap Label instead. LowerInt1BranchTarget createForLabelOrDuplicate(InstARM32Label *Label) const { if (Label != nullptr) return LowerInt1BranchTarget(Label); if (NodeTarget) return LowerInt1BranchTarget(NodeTarget); return LowerInt1BranchTarget(LabelTarget); } CfgNode *const NodeTarget = nullptr; InstARM32Label *const LabelTarget = nullptr; }; /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for /// determining which type arithmetic is allowed to be short circuited. This /// is useful for lowering /// /// t1 = and i1 A, B /// t2 = and i1 t1, C /// br i1 t2, label %False, label %True /// /// to /// /// tst A, 1 /// beq %False /// tst B, 1 /// beq %False /// tst C, 1 /// bne %True /// b %False /// /// Without this information, short circuiting would only allow to short /// circuit a single high level instruction. For example: /// /// t1 = or i1 A, B /// t2 = and i1 t1, C /// br i1 t2, label %False, label %True /// /// cannot be lowered to /// /// tst A, 1 /// bne %True /// tst B, 1 /// bne %True /// tst C, 1 /// beq %True /// b %False /// /// It needs to be lowered to /// /// tst A, 1 /// bne Aux /// tst B, 1 /// beq %False /// Aux: /// tst C, 1 /// bne %True /// b %False /// /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it /// might.) enum LowerInt1AllowShortCircuit { SC_And = 1, SC_Or = 2, SC_All = SC_And | SC_Or, }; /// ShortCircuitCondAndLabel wraps the condition codes that should be used /// after a lowerInt1ForBranch returns to branch to the /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the /// called lowerInt1forBranch created an internal (i.e., short-circuit) label /// used for short circuiting. class ShortCircuitCondAndLabel { public: explicit ShortCircuitCondAndLabel(CondWhenTrue &&C, InstARM32Label *L = nullptr) : Cond(std::move(C)), ShortCircuitTarget(L) {} const CondWhenTrue Cond; InstARM32Label *const ShortCircuitTarget; CondWhenTrue assertNoLabelAndReturnCond() const { assert(ShortCircuitTarget == nullptr); return Cond; } }; /// lowerInt1ForBranch expands Boolean, and returns the condition codes that /// are to be used for branching to the branch's TrueTarget. It may return a /// label that the expansion of Boolean used to short circuit the chain's /// evaluation. ShortCircuitCondAndLabel lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable); // _br is a convenience wrapper that emits br instructions to Target. void _br(const LowerInt1BranchTarget &BrTarget, CondARM32::Cond Cond = CondARM32::AL) { assert((BrTarget.NodeTarget == nullptr) != (BrTarget.LabelTarget == nullptr)); if (BrTarget.NodeTarget != nullptr) _br(BrTarget.NodeTarget, Cond); else _br(BrTarget.LabelTarget, Cond); } // _br_short_circuit is used when lowering InstArithmetic::And and // InstArithmetic::Or and a short circuit branch is needed. void _br_short_circuit(const LowerInt1BranchTarget &Target, const CondWhenTrue &Cond) { if (Cond.WhenTrue1 != CondARM32::kNone) { _br(Target, Cond.WhenTrue1); } if (Cond.WhenTrue0 != CondARM32::kNone) { _br(Target, Cond.WhenTrue0); } } // End of bool folding machinery // -------------------------------------------------------------------------- /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with /// an upper16 relocation). void _movt(Variable *Dest, Operand *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Movt>(Dest, Src0, Pred); } void _movw(Variable *Dest, Operand *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Movw>(Dest, Src0, Pred); } void _mul(Variable *Dest, Variable *Src0, Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred); } void _mvn(Variable *Dest, Operand *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Mvn>(Dest, Src0, Pred); } void _orr(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred); } void _orrs(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { constexpr bool SetFlags = true; Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags); if (SetFlags) { Context.insert<InstFakeUse>(Dest); } } void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); } void _pop(const VarList &Dests) { Context.insert<InstARM32Pop>(Dests); // Mark dests as modified. for (Variable *Dest : Dests) Context.insert<InstFakeDef>(Dest); } void _rbit(Variable *Dest, Variable *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Rbit>(Dest, Src0, Pred); } void _rev(Variable *Dest, Variable *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Rev>(Dest, Src0, Pred); } void _ret(Variable *LR, Variable *Src0 = nullptr) { Context.insert<InstARM32Ret>(LR, Src0); } void _rscs(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { constexpr bool SetFlags = true; Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags); if (SetFlags) { Context.insert<InstFakeUse>(Dest); } } void _rsc(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred); } void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { constexpr bool SetFlags = true; Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags); if (SetFlags) { Context.insert<InstFakeUse>(Dest); } } void _rsb(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred); } void _sbc(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred); } void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { constexpr bool SetFlags = true; Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags); if (SetFlags) { Context.insert<InstFakeUse>(Dest); } } void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred); } /// _str, for all your Variable to memory transfers. Addr has the same /// restrictions that it does in _ldr. void _str(Variable *Value, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Str>(Value, Addr, Pred); } InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { Context.insert<InstFakeUse>(Value64->getLo()); Context.insert<InstFakeUse>(Value64->getHi()); } return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred); } void _sub(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred); } void _subs(Variable *Dest, Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { constexpr bool SetFlags = true; Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags); if (SetFlags) { Context.insert<InstFakeUse>(Dest); } } void _sxt(Variable *Dest, Variable *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Sxt>(Dest, Src0, Pred); } void _tst(Variable *Src0, Operand *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Tst>(Src0, Src1, Pred); } void _trap() { Context.insert<InstARM32Trap>(); } void _udiv(Variable *Dest, Variable *Src0, Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred); } void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0, Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { // umull requires DestLo and DestHi to be assigned to different GPRs. The // following lines create overlapping liveness ranges for both variables. If // either one of them is live, then they are both going to be live, and thus // assigned to different registers; if they are both dead, then DCE will // kick in and delete the following three instructions. Context.insert<InstFakeDef>(DestHi); Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred); Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined(); Context.insert<InstFakeUse>(DestHi); } void _uxt(Variable *Dest, Variable *Src0, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Uxt>(Dest, Src0, Pred); } void _vabs(Variable *Dest, Variable *Src, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vabs>(Dest, Src, Pred); } void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vadd>(Dest, Src0, Src1); } void _vand(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vand>(Dest, Src0, Src1); } InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) { return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1); } void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vceq>(Dest, Src0, Src1); } InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) { return Context.insert<InstARM32Vcge>(Dest, Src0, Src1); } InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) { return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1); } void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred); } void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vdiv>(Dest, Src0, Src1); } void _vcmp(Variable *Src0, Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vcmp>(Src0, Src1, Pred); } void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred); } void _vdup(Variable *Dest, Variable *Src, int Idx) { Context.insert<InstARM32Vdup>(Dest, Src, Idx); } void _veor(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Veor>(Dest, Src0, Src1); } void _vldr1d(Variable *Dest, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred); } void _vldr1q(Variable *Dest, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred); } void _vmrs(CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vmrs>(Pred); } void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmla>(Dest, Src0, Src1); } void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmlap>(Dest, Src0, Src1); } void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmls>(Dest, Src0, Src1); } void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmovl>(Dest, Src0, Src1); } void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmovh>(Dest, Src0, Src1); } void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1); } void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1); } void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vmul>(Dest, Src0, Src1); } void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { Context.insert<InstARM32Vmulh>(Dest, Src0, Src1) ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); } void _vmvn(Variable *Dest, Variable *Src0) { Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL); } void _vneg(Variable *Dest, Variable *Src0) { Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL) ->setSignType(InstARM32::FS_Signed); } void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vorr>(Dest, Src0, Src1); } void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { Context.insert<InstARM32Vqadd>(Dest, Src0, Src1) ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); } void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned, bool Saturating) { Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1) ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed) : InstARM32::FS_None); } void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) { Context.insert<InstARM32Vqsub>(Dest, Src0, Src1) ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed); } InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { return Context.insert<InstARM32Vshl>(Dest, Src0, Src1); } void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) { Context.insert<InstARM32Vshl>(Dest, Src0, Src1) ->setSignType(InstARM32::FS_Unsigned); } InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) { return Context.insert<InstARM32Vshr>(Dest, Src0, Src1); } void _vsqrt(Variable *Dest, Variable *Src, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); } void _vstr1d(Variable *Value, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32); } void _vstr1q(Variable *Value, OperandARM32Mem *Addr, CondARM32::Cond Pred = CondARM32::AL) { Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64); } void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vsub>(Dest, Src0, Src1); } void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) { Context.insert<InstARM32Vzip>(Dest, Src0, Src1); } // Iterates over the CFG and determines the maximum outgoing stack arguments // bytes. This information is later used during addProlog() to pre-allocate // the outargs area. // TODO(jpp): This could live in the Parser, if we provided a Target-specific // method that the Parser could call. void findMaxStackOutArgsSize(); /// Returns true if the given Offset can be represented in a Load/Store Mem /// Operand. bool isLegalMemOffset(Type Ty, int32_t Offset) const; void postLowerLegalization(); /// Manages the GotPtr variable, which is used for Nonsfi sandboxing. /// @{ void createGotPtr(); void insertGotPtrInitPlaceholder(); VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc); void materializeGotAddr(CfgNode *Node); Variable *GotPtr = nullptr; // TODO(jpp): use CfgLocalAllocator. /// @} /// Manages the Gotoff relocations created during the function lowering. A /// single Gotoff relocation is created for each global variable used by the /// function being lowered. /// @{ // TODO(jpp): if the same global G is used in different functions, then this // method will emit one G(gotoff) relocation per function. GlobalString createGotoffRelocation(const ConstantRelocatable *CR); CfgUnorderedSet<GlobalString> KnownGotoffs; /// @} /// Loads the constant relocatable Name to Register. Then invoke Finish to /// finish the relocatable lowering. Finish **must** use PC in its first /// emitted instruction, or the relocatable in Register will contain the wrong /// value. // // Lowered sequence: // // Movw: // movw Register, #:lower16:Name - (End - Movw) - 8 . // Movt: // movt Register, #:upper16:Name - (End - Movt) - 8 . // PC = fake-def // End: // Finish(PC) // // The -8 in movw/movt above is to account for the PC value that the first // instruction emitted by Finish(PC) will read. void loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register, std::function<void(Variable *PC)> Finish); /// Sandboxer defines methods for ensuring that "dangerous" operations are /// masked during sandboxed code emission. For regular, non-sandboxed code /// emission, its methods are simple pass-through methods. /// /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions /// in the constructor/destructor during sandboxed code emission. Therefore, /// it is a bad idea to create an object of this type and "keep it around." /// The recommended usage is: /// /// AutoSandboxing(this).<<operation>>(...); /// /// This usage ensures that no other instructions are inadvertently added to /// the bundle. class Sandboxer { Sandboxer() = delete; Sandboxer(const Sandboxer &) = delete; Sandboxer &operator=(const Sandboxer &) = delete; public: explicit Sandboxer( TargetARM32 *Target, InstBundleLock::Option BundleOption = InstBundleLock::Opt_None); ~Sandboxer(); /// Increments sp: /// /// add sp, sp, AddAmount /// bic sp, sp, 0xc0000000 /// /// (for the rationale, see the ARM 32-bit Sandbox Specification.) void add_sp(Operand *AddAmount); /// Emits code to align sp to the specified alignment: /// /// bic/and sp, sp, Alignment /// bic, sp, sp, 0xc0000000 void align_sp(size_t Alignment); /// Emits a call instruction. If CallTarget is a Variable, it emits /// /// bic CallTarget, CallTarget, 0xc000000f /// bl CallTarget /// /// Otherwise, it emits /// /// bl CallTarget /// /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16. InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget); /// Emits a load: /// /// bic rBase, rBase, 0xc0000000 /// ldr rDest, [rBase, #Offset] /// /// Exception: if rBase is r9 or sp, then the load is emitted as: /// /// ldr rDest, [rBase, #Offset] /// /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are /// always valid. void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); /// Emits a load exclusive: /// /// bic rBase, rBase, 0xc0000000 /// ldrex rDest, [rBase] /// /// Exception: if rBase is r9 or sp, then the load is emitted as: /// /// ldrex rDest, [rBase] /// /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are /// always valid. void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred); /// Resets sp to Src: /// /// mov sp, Src /// bic sp, sp, 0xc0000000 void reset_sp(Variable *Src); /// Emits code to return from a function: /// /// bic lr, lr, 0xc000000f /// bx lr void ret(Variable *RetAddr, Variable *RetValue); /// Emits a store: /// /// bic rBase, rBase, 0xc0000000 /// str rSrc, [rBase, #Offset] /// /// Exception: if rBase is r9 or sp, then the store is emitted as: /// /// str rDest, [rBase, #Offset] /// /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are /// always valid. void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred); /// Emits a store exclusive: /// /// bic rBase, rBase, 0xc0000000 /// strex rDest, rSrc, [rBase] /// /// Exception: if rBase is r9 or sp, then the store is emitted as: /// /// strex rDest, rSrc, [rBase] /// /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are /// always valid. void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred); /// Decrements sp: /// /// sub sp, sp, SubAmount /// bic sp, sp, 0xc0000000 void sub_sp(Operand *SubAmount); private: TargetARM32 *const Target; const InstBundleLock::Option BundleOption; std::unique_ptr<AutoBundle> Bundler; void createAutoBundle(); }; class PostLoweringLegalizer { PostLoweringLegalizer() = delete; PostLoweringLegalizer(const PostLoweringLegalizer &) = delete; PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete; public: explicit PostLoweringLegalizer(TargetARM32 *Target) : Target(Target), StackOrFrameReg(Target->getPhysicalRegister( Target->getFrameOrStackReg())) {} void resetTempBaseIfClobberedBy(const Inst *Instr); // Ensures that the TempBase register held by the this legalizer (if any) is // assigned to IP. void assertNoTempOrAssignedToIP() const { assert(TempBaseReg == nullptr || TempBaseReg->getRegNum() == Target->getReservedTmpReg()); } // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is // fixed up. OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem, bool AllowOffsets = true); /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or /// if its Source is a Rematerializable variable (this form is used in lieu /// of lea, which is not available in ARM.) /// /// Moves to memory become store instructions, and moves from memory, loads. void legalizeMov(InstARM32Mov *Mov); private: /// Creates a new Base register centered around [Base, +/- Offset]. Variable *newBaseRegister(Variable *Base, int32_t Offset, RegNumT ScratchRegNum); /// Creates a new, legal OperandARM32Mem for accessing Base + Offset. /// The returned mem operand is a legal operand for accessing memory that is /// of type Ty. /// /// If [Base, #Offset] is encodable, then the method returns a Mem operand /// expressing it. Otherwise, /// /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the /// method will return that. Otherwise, /// /// a new base register ip=Base+Offset is created, and the method returns a /// memory operand expressing [ip, #0]. OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets = true); TargetARM32 *const Target; Variable *const StackOrFrameReg; Variable *TempBaseReg = nullptr; int32_t TempBaseOffset = 0; }; const bool NeedSandboxing; TargetARM32Features CPUFeatures; bool UsesFramePointer = false; bool NeedsStackAlignment = false; bool MaybeLeafFunc = true; size_t SpillAreaSizeBytes = 0; size_t FixedAllocaSizeBytes = 0; size_t FixedAllocaAlignBytes = 0; bool PrologEmitsFixedAllocas = false; uint32_t MaxOutArgsSizeBytes = 0; // TODO(jpp): std::array instead of array. static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; static SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; SmallBitVector RegsUsed; VarList PhysicalRegisters[IceType_NUM]; VarList PreservedGPRs; VarList PreservedSRegs; /// Helper class that understands the Calling Convention and register /// assignments. The first few integer type parameters can use r0-r3, /// regardless of their position relative to the floating-point/vector /// arguments in the argument list. Floating-point and vector arguments /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, /// see the ARM Architecture Procedure Calling Standards (AAPCS). /// /// Technically, arguments that can start with registers but extend beyond the /// available registers can be split between the registers and the stack. /// However, this is typically for passing GPR structs by value, and PNaCl /// transforms expand this out. /// /// At (public) function entry, the stack must be 8-byte aligned. class CallingConv { CallingConv(const CallingConv &) = delete; CallingConv &operator=(const CallingConv &) = delete; public: CallingConv(); ~CallingConv() = default; /// argInGPR returns true if there is a GPR available for the requested /// type, and false otherwise. If it returns true, Reg is set to the /// appropriate register number. Note that, when Ty == IceType_i64, Reg will /// be an I64 register pair. bool argInGPR(Type Ty, RegNumT *Reg); /// argInVFP is to floating-point/vector types what argInGPR is for integer /// types. bool argInVFP(Type Ty, RegNumT *Reg); private: void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); SmallBitVector GPRegsUsed; CfgVector<RegNumT> GPRArgs; CfgVector<RegNumT> I64Args; void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); SmallBitVector VFPRegsUsed; CfgVector<RegNumT> FP32Args; CfgVector<RegNumT> FP64Args; CfgVector<RegNumT> Vec128Args; }; private: ENABLE_MAKE_UNIQUE; OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, Operand *Base); void postambleCtpop64(const InstCall *Instr); void preambleDivRem(const InstCall *Instr); CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> ARM32HelpersPreamble; CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> ARM32HelpersPostamble; class ComputationTracker { public: ComputationTracker() = default; ~ComputationTracker() = default; void forgetProducers() { KnownComputations.clear(); } void recordProducers(CfgNode *Node); const Inst *getProducerOf(const Operand *Opnd) const { auto *Var = llvm::dyn_cast<Variable>(Opnd); if (Var == nullptr) { return nullptr; } auto Iter = KnownComputations.find(Var->getIndex()); if (Iter == KnownComputations.end()) { return nullptr; } return Iter->second.Instr; } void dump(const Cfg *Func) const { if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) return; OstreamLocker L(Func->getContext()); Ostream &Str = Func->getContext()->getStrDump(); Str << "foldable producer:\n"; for (const auto &Computation : KnownComputations) { Str << " "; Computation.second.Instr->dump(Func); Str << "\n"; } Str << "\n"; } private: class ComputationEntry { public: ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {} Inst *const Instr; // Boolean folding is disabled for variables whose live range is multi // block. We conservatively initialize IsLiveOut to true, and set it to // false once we find the end of the live range for the variable defined // by this instruction. If liveness analysis is not performed (e.g., in // Om1 mode) IsLiveOut will never be set to false, and folding will be // disabled. bool IsLiveOut = true; int32_t NumUses = 0; Type ComputationType; }; // ComputationMap maps a Variable number to a payload identifying which // instruction defined it. using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>; ComputationMap KnownComputations; }; ComputationTracker Computations; // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked // without specifying a physical register. This is needed for creating unbound // temporaries during Ice -> ARM lowering, but before register allocation. // This a safe-guard that no unbound temporaries are created during the // legalization post-passes. bool AllowTemporaryWithNoReg = true; // ForbidTemporaryWithoutReg is a RAII class that manages // AllowTemporaryWithNoReg. class ForbidTemporaryWithoutReg { ForbidTemporaryWithoutReg() = delete; ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete; ForbidTemporaryWithoutReg & operator=(const ForbidTemporaryWithoutReg &) = delete; public: explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) { Target->AllowTemporaryWithNoReg = false; } ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; } private: TargetARM32 *const Target; }; }; class TargetDataARM32 final : public TargetDataLowering { TargetDataARM32() = delete; TargetDataARM32(const TargetDataARM32 &) = delete; TargetDataARM32 &operator=(const TargetDataARM32 &) = delete; public: static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) { return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx)); } void lowerGlobals(const VariableDeclarationList &Vars, const std::string &SectionSuffix) override; void lowerConstants() override; void lowerJumpTables() override; protected: explicit TargetDataARM32(GlobalContext *Ctx); private: ~TargetDataARM32() override = default; }; class TargetHeaderARM32 final : public TargetHeaderLowering { TargetHeaderARM32() = delete; TargetHeaderARM32(const TargetHeaderARM32 &) = delete; TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete; public: static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) { return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx)); } void lower() override; protected: explicit TargetHeaderARM32(GlobalContext *Ctx); private: ~TargetHeaderARM32() = default; TargetARM32Features CPUFeatures; }; } // end of namespace ARM32 } // end of namespace Ice #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H