//===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Declares the TargetLoweringARM32 class, which implements the
/// TargetLowering interface for the ARM 32-bit architecture.
///
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
#define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
#include "IceAssemblerARM32.h"
#include "IceDefs.h"
#include "IceInstARM32.h"
#include "IceRegistersARM32.h"
#include "IceTargetLowering.h"
#include <utility>
namespace Ice {
namespace ARM32 {
// Class encapsulating ARM cpu features / instruction set.
class TargetARM32Features {
TargetARM32Features() = delete;
TargetARM32Features(const TargetARM32Features &) = delete;
TargetARM32Features &operator=(const TargetARM32Features &) = delete;
public:
explicit TargetARM32Features(const ClFlags &Flags);
enum ARM32InstructionSet {
Begin,
// Neon is the PNaCl baseline instruction set.
Neon = Begin,
HWDivArm, // HW divide in ARM mode (not just Thumb mode).
End
};
bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
private:
ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
};
// The target lowering logic for ARM32.
class TargetARM32 : public TargetLowering {
TargetARM32() = delete;
TargetARM32(const TargetARM32 &) = delete;
TargetARM32 &operator=(const TargetARM32 &) = delete;
public:
static void staticInit(GlobalContext *Ctx);
static bool shouldBePooled(const Constant *C) {
if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
return !Utils::isPositiveZero(ConstDouble->getValue());
}
if (llvm::isa<ConstantFloat>(C))
return true;
return false;
}
static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; }
// TODO(jvoung): return a unique_ptr.
static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
return makeUnique<TargetARM32>(Func);
}
std::unique_ptr<::Ice::Assembler> createAssembler() const override {
const bool IsNonsfi = SandboxingType == ST_Nonsfi;
return makeUnique<ARM32::AssemblerARM32>(IsNonsfi);
}
void initNodeForLowering(CfgNode *Node) override {
Computations.forgetProducers();
Computations.recordProducers(Node);
Computations.dump(Func);
}
void translateOm1() override;
void translateO2() override;
bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
Variable *getPhysicalRegister(RegNumT RegNum,
Type Ty = IceType_void) override;
const char *getRegName(RegNumT RegNum, Type Ty) const override;
SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const override;
const SmallBitVector &
getRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
switch (RC) {
default:
assert(RC < RC_Target);
return TypeToRegisterSet[RC];
case RegARM32::RCARM32_QtoS:
return TypeToRegisterSet[RC];
}
}
const SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
return TypeToRegisterSetUnfiltered[RC];
}
const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
return RegisterAliases[Reg];
}
bool hasFramePointer() const override { return UsesFramePointer; }
void setHasFramePointer() override { UsesFramePointer = true; }
RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
RegNumT getFrameOrStackReg() const override {
return UsesFramePointer ? getFrameReg() : getStackReg();
}
RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
// are rounded up to 4 bytes.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
FixedAllocaSizeBytes = Size;
assert(llvm::isPowerOf2_32(Align));
FixedAllocaAlignBytes = Align;
PrologEmitsFixedAllocas = true;
}
int32_t getFrameFixedAllocaOffset() const override {
return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
}
uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
}
// TODO(ascull): what size is best for ARM?
SizeT getMinJumpTableSize() const override { return 3; }
void emitJumpTable(const Cfg *Func,
const InstJumpTable *JumpTable) const override;
void emitVariable(const Variable *Var) const override;
void emit(const ConstantUndef *C) const final;
void emit(const ConstantInteger32 *C) const final;
void emit(const ConstantInteger64 *C) const final;
void emit(const ConstantFloat *C) const final;
void emit(const ConstantDouble *C) const final;
void emit(const ConstantRelocatable *C) const final;
void lowerArguments() override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
Operand *loOperand(Operand *Operand);
Operand *hiOperand(Operand *Operand);
void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t *InArgsSizeBytes);
bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
return CPUFeatures.hasFeature(I);
}
enum OperandLegalization {
Legal_Reg = 1 << 0, /// physical register, not stack location
Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
/// immediates, shifted registers, or modified fp imm.
Legal_Mem = 1 << 2, /// includes [r0, r1 lsl #2] as well as [sp, #12]
Legal_Rematerializable = 1 << 3,
Legal_Default = ~Legal_Rematerializable,
};
using LegalMask = uint32_t;
Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
RegNumT RegNum = RegNumT());
Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
assert(ShAmtImm < 32);
return OperandARM32ShAmtImm::create(
Func,
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
}
GlobalContext *getCtx() const { return Ctx; }
protected:
explicit TargetARM32(Cfg *Func);
void postLower() override;
enum SafeBoolChain {
SBC_No,
SBC_Yes,
};
void lowerAlloca(const InstAlloca *Instr) override;
SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr);
void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
Operand *Src0, Operand *Src1);
void lowerArithmetic(const InstArithmetic *Instr) override;
void lowerAssign(const InstAssign *Instr) override;
void lowerBr(const InstBr *Instr) override;
void lowerCall(const InstCall *Instr) override;
void lowerCast(const InstCast *Instr) override;
void lowerExtractElement(const InstExtractElement *Instr) override;
/// CondWhenTrue is a helper type returned by every method in the lowering
/// that emits code to set the condition codes.
class CondWhenTrue {
public:
explicit CondWhenTrue(CondARM32::Cond T0,
CondARM32::Cond T1 = CondARM32::kNone)
: WhenTrue0(T0), WhenTrue1(T1) {
assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
assert(T1 != T0 || T0 == CondARM32::kNone);
}
CondARM32::Cond WhenTrue0;
CondARM32::Cond WhenTrue1;
/// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
CondWhenTrue invert() const {
switch (WhenTrue0) {
default:
if (WhenTrue1 == CondARM32::kNone)
return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
InstARM32::getOppositeCondition(WhenTrue1));
case CondARM32::AL:
return CondWhenTrue(CondARM32::kNone);
case CondARM32::kNone:
return CondWhenTrue(CondARM32::AL);
}
}
};
CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
void lowerFcmp(const InstFcmp *Instr) override;
CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
Operand *Src0, Operand *Src1);
CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
Operand *Src1);
CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
void lowerIcmp(const InstIcmp *Instr) override;
/// Emits the basic sequence for lower-linked/store-exclusive loops:
///
/// retry:
/// ldrex tmp, [Addr]
/// StoreValue = Operation(tmp)
/// strexCond success, StoreValue, [Addr]
/// cmpCond success, #0
/// bne retry
///
/// Operation needs to return which value to strex in Addr, it must not change
/// the flags if Cond is not AL, and must not emit any instructions that could
/// end up writing to memory. Operation also needs to handle fake-defing for
/// i64 handling.
void
lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
std::function<Variable *(Variable *)> Operation,
CondARM32::Cond Cond = CondARM32::AL);
void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
Operand *Val);
void lowerBreakpoint(const InstBreakpoint *Instr) override;
void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override;
void lowerInsertElement(const InstInsertElement *Instr) override;
void lowerLoad(const InstLoad *Instr) override;
void lowerPhi(const InstPhi *Instr) override;
void lowerRet(const InstRet *Instr) override;
void lowerSelect(const InstSelect *Instr) override;
void lowerShuffleVector(const InstShuffleVector *Instr) override;
void lowerStore(const InstStore *Instr) override;
void lowerSwitch(const InstSwitch *Instr) override;
void lowerUnreachable(const InstUnreachable *Instr) override;
void prelowerPhis() override;
uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
void genTargetHelperCallFor(Inst *Instr) override;
void doAddressOptLoad() override;
void doAddressOptStore() override;
void randomlyInsertNop(float Probability,
RandomNumberGenerator &RNG) override;
OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
Variable64On32 *makeI64RegPair();
Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
static Type stackSlotType();
Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
void alignRegisterPow2(Variable *Reg, uint32_t Align,
RegNumT TmpRegNum = RegNumT());
/// Returns a vector in a register with the given constant entries.
Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
void
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
const SmallBitVector &ExcludeRegisters,
uint64_t Salt) const override;
// If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
// .LSKIP: <continuation>. If no check is needed nothing is inserted.
void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
CondARM32::Cond);
using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
CondARM32::Cond);
void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
// The following are helpers that insert lowered ARM32 instructions with
// minimal syntactic overhead, so that the lowering code can look as close to
// assembly as practical.
void _add(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
}
void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
if (SetFlags) {
Context.insert<InstFakeUse>(Dest);
}
}
void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
}
void _and(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
}
void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
}
void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
}
void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
CondARM32::Cond Condition) {
Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
}
void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
void _br(CfgNode *Target, CondARM32::Cond Condition) {
Context.insert<InstARM32Br>(Target, Condition);
}
void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
Context.insert<InstARM32Br>(Label, Condition);
}
void _cmn(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
}
void _cmp(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
}
void _clz(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Clz>(Dest, Src0, Pred);
}
void _dmb() { Context.insert<InstARM32Dmb>(); }
void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
}
/// _ldr, for all your memory to Variable data moves. It handles all types
/// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
/// type (e.g., no immediates for vector loads, and no index registers for fp
/// loads.)
void _ldr(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
}
InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
}
return Ldrex;
}
void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
}
void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
if (SetFlags) {
Context.insert<InstFakeUse>(Dest);
}
}
void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
}
void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
}
void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
}
/// _mov, for all your Variable to Variable data movement needs. It handles
/// all types (integer, floating point, and vectors), as well as moves between
/// Core and VFP registers. This is not a panacea: you must obey the (weird,
/// confusing, non-uniform) rules for data moves in ARM.
void _mov(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
// _mov used to be unique in the sense that it would create a temporary
// automagically if Dest was nullptr. It won't do that anymore, so we keep
// an assert around just in case there is some untested code path where Dest
// is nullptr.
assert(Dest != nullptr);
assert(!llvm::isa<OperandARM32Mem>(Src0));
auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
if (Instr->isMultiDest()) {
// If Instr is multi-dest, then Dest must be a Variable64On32. We add a
// fake-def for Instr.DestHi here.
assert(llvm::isa<Variable64On32>(Dest));
Context.insert<InstFakeDef>(Instr->getDestHi());
}
}
void _mov_redefined(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
Instr->setDestRedefined();
if (Instr->isMultiDest()) {
// If Instr is multi-dest, then Dest must be a Variable64On32. We add a
// fake-def for Instr.DestHi here.
assert(llvm::isa<Variable64On32>(Dest));
Context.insert<InstFakeDef>(Instr->getDestHi());
}
}
void _nop() { Context.insert<InstARM32Nop>(); }
// Generates a vmov instruction to extract the given index from a vector
// register.
void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
}
// Generates a vmov instruction to insert a value into the given index of a
// vector register.
void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
}
// --------------------------------------------------------------------------
// Begin bool folding machinery.
//
// There are three types of boolean lowerings handled by this target:
//
// 1) Boolean expressions leading to a boolean Variable definition
// ---------------------------------------------------------------
//
// Whenever a i1 Variable is live out (i.e., its live range extends beyond
// the defining basic block) we do not fold the operation. We instead
// materialize (i.e., compute) the variable normally, so that it can be used
// when needed. We also materialize i1 values that are not single use to
// avoid code duplication. These expressions are not short circuited.
//
// 2) Boolean expressions leading to a select
// ------------------------------------------
//
// These include boolean chains leading to a select instruction, as well as
// i1 Sexts. These boolean expressions are lowered to:
//
// mov T, <false value>
// CC <- eval(Boolean Expression)
// movCC T, <true value>
//
// For Sexts, <false value> is 0, and <true value> is -1.
//
// 3) Boolean expressions leading to a br i1
// -----------------------------------------
//
// These are the boolean chains leading to a branch. These chains are
// short-circuited, i.e.:
//
// A = or i1 B, C
// br i1 A, label %T, label %F
//
// becomes
//
// tst B
// jne %T
// tst B
// jne %T
// j %F
//
// and
//
// A = and i1 B, C
// br i1 A, label %T, label %F
//
// becomes
//
// tst B
// jeq %F
// tst B
// jeq %F
// j %T
//
// Arbitrarily long chains are short circuited, e.g
//
// A = or i1 B, C
// D = and i1 A, E
// F = and i1 G, H
// I = or i1 D, F
// br i1 I, label %True, label %False
//
// becomes
//
// Label[A]:
// tst B, 1
// bne Label[D]
// tst C, 1
// beq Label[I]
// Label[D]:
// tst E, 1
// bne %True
// Label[I]
// tst G, 1
// beq %False
// tst H, 1
// beq %False (bne %True)
/// lowerInt1 materializes Boolean to a Variable.
SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
/// lowerInt1ForSelect generates the following instruction sequence:
///
/// mov T, FalseValue
/// CC <- eval(Boolean)
/// movCC T, TrueValue
/// mov Dest, T
///
/// It is used for lowering select i1, as well as i1 Sext.
void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
Operand *FalseValue);
/// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
/// an InstARM32Label (but never both) so that, during br i1 lowering, we can
/// create auxiliary labels for short circuiting the condition evaluation.
class LowerInt1BranchTarget {
public:
explicit LowerInt1BranchTarget(CfgNode *const Target)
: NodeTarget(Target) {}
explicit LowerInt1BranchTarget(InstARM32Label *const Target)
: LabelTarget(Target) {}
/// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
/// is the exact copy of this if Label is nullptr; otherwise, the returned
/// object will wrap Label instead.
LowerInt1BranchTarget
createForLabelOrDuplicate(InstARM32Label *Label) const {
if (Label != nullptr)
return LowerInt1BranchTarget(Label);
if (NodeTarget)
return LowerInt1BranchTarget(NodeTarget);
return LowerInt1BranchTarget(LabelTarget);
}
CfgNode *const NodeTarget = nullptr;
InstARM32Label *const LabelTarget = nullptr;
};
/// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
/// determining which type arithmetic is allowed to be short circuited. This
/// is useful for lowering
///
/// t1 = and i1 A, B
/// t2 = and i1 t1, C
/// br i1 t2, label %False, label %True
///
/// to
///
/// tst A, 1
/// beq %False
/// tst B, 1
/// beq %False
/// tst C, 1
/// bne %True
/// b %False
///
/// Without this information, short circuiting would only allow to short
/// circuit a single high level instruction. For example:
///
/// t1 = or i1 A, B
/// t2 = and i1 t1, C
/// br i1 t2, label %False, label %True
///
/// cannot be lowered to
///
/// tst A, 1
/// bne %True
/// tst B, 1
/// bne %True
/// tst C, 1
/// beq %True
/// b %False
///
/// It needs to be lowered to
///
/// tst A, 1
/// bne Aux
/// tst B, 1
/// beq %False
/// Aux:
/// tst C, 1
/// bne %True
/// b %False
///
/// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
/// might.)
enum LowerInt1AllowShortCircuit {
SC_And = 1,
SC_Or = 2,
SC_All = SC_And | SC_Or,
};
/// ShortCircuitCondAndLabel wraps the condition codes that should be used
/// after a lowerInt1ForBranch returns to branch to the
/// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
/// called lowerInt1forBranch created an internal (i.e., short-circuit) label
/// used for short circuiting.
class ShortCircuitCondAndLabel {
public:
explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
InstARM32Label *L = nullptr)
: Cond(std::move(C)), ShortCircuitTarget(L) {}
const CondWhenTrue Cond;
InstARM32Label *const ShortCircuitTarget;
CondWhenTrue assertNoLabelAndReturnCond() const {
assert(ShortCircuitTarget == nullptr);
return Cond;
}
};
/// lowerInt1ForBranch expands Boolean, and returns the condition codes that
/// are to be used for branching to the branch's TrueTarget. It may return a
/// label that the expansion of Boolean used to short circuit the chain's
/// evaluation.
ShortCircuitCondAndLabel
lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
const LowerInt1BranchTarget &TargetFalse,
uint32_t ShortCircuitable);
// _br is a convenience wrapper that emits br instructions to Target.
void _br(const LowerInt1BranchTarget &BrTarget,
CondARM32::Cond Cond = CondARM32::AL) {
assert((BrTarget.NodeTarget == nullptr) !=
(BrTarget.LabelTarget == nullptr));
if (BrTarget.NodeTarget != nullptr)
_br(BrTarget.NodeTarget, Cond);
else
_br(BrTarget.LabelTarget, Cond);
}
// _br_short_circuit is used when lowering InstArithmetic::And and
// InstArithmetic::Or and a short circuit branch is needed.
void _br_short_circuit(const LowerInt1BranchTarget &Target,
const CondWhenTrue &Cond) {
if (Cond.WhenTrue1 != CondARM32::kNone) {
_br(Target, Cond.WhenTrue1);
}
if (Cond.WhenTrue0 != CondARM32::kNone) {
_br(Target, Cond.WhenTrue0);
}
}
// End of bool folding machinery
// --------------------------------------------------------------------------
/// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
/// an upper16 relocation).
void _movt(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Movt>(Dest, Src0, Pred);
}
void _movw(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Movw>(Dest, Src0, Pred);
}
void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
}
void _mvn(Variable *Dest, Operand *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
}
void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
}
void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
if (SetFlags) {
Context.insert<InstFakeUse>(Dest);
}
}
void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
void _pop(const VarList &Dests) {
Context.insert<InstARM32Pop>(Dests);
// Mark dests as modified.
for (Variable *Dest : Dests)
Context.insert<InstFakeDef>(Dest);
}
void _rbit(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
}
void _rev(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Rev>(Dest, Src0, Pred);
}
void _ret(Variable *LR, Variable *Src0 = nullptr) {
Context.insert<InstARM32Ret>(LR, Src0);
}
void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
if (SetFlags) {
Context.insert<InstFakeUse>(Dest);
}
}
void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
}
void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
if (SetFlags) {
Context.insert<InstFakeUse>(Dest);
}
}
void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
}
void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
}
void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
if (SetFlags) {
Context.insert<InstFakeUse>(Dest);
}
}
void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
}
/// _str, for all your Variable to memory transfers. Addr has the same
/// restrictions that it does in _ldr.
void _str(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Str>(Value, Addr, Pred);
}
InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
Context.insert<InstFakeUse>(Value64->getLo());
Context.insert<InstFakeUse>(Value64->getHi());
}
return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
}
void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
}
void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
constexpr bool SetFlags = true;
Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
if (SetFlags) {
Context.insert<InstFakeUse>(Dest);
}
}
void _sxt(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
}
void _tst(Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Tst>(Src0, Src1, Pred);
}
void _trap() { Context.insert<InstARM32Trap>(); }
void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
}
void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
// umull requires DestLo and DestHi to be assigned to different GPRs. The
// following lines create overlapping liveness ranges for both variables. If
// either one of them is live, then they are both going to be live, and thus
// assigned to different registers; if they are both dead, then DCE will
// kick in and delete the following three instructions.
Context.insert<InstFakeDef>(DestHi);
Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined();
Context.insert<InstFakeUse>(DestHi);
}
void _uxt(Variable *Dest, Variable *Src0,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
}
void _vabs(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vabs>(Dest, Src, Pred);
}
void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
}
void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vand>(Dest, Src0, Src1);
}
InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
}
void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
}
InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
}
InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
}
void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
}
void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
}
void _vcmp(Variable *Src0, Variable *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
}
void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
}
void _vdup(Variable *Dest, Variable *Src, int Idx) {
Context.insert<InstARM32Vdup>(Dest, Src, Idx);
}
void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Veor>(Dest, Src0, Src1);
}
void _vldr1d(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred);
}
void _vldr1q(Variable *Dest, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred);
}
void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vmrs>(Pred);
}
void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
}
void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmlap>(Dest, Src0, Src1);
}
void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
}
void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmovl>(Dest, Src0, Src1);
}
void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmovh>(Dest, Src0, Src1);
}
void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1);
}
void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1);
}
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
}
void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vmulh>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
}
void _vmvn(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
}
void _vneg(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
->setSignType(InstARM32::FS_Signed);
}
void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
}
void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
}
void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned,
bool Saturating) {
Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1)
->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned
: InstARM32::FS_Signed)
: InstARM32::FS_None);
}
void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
}
InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
}
void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
->setSignType(InstARM32::FS_Unsigned);
}
InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
ConstantInteger32 *Src1) {
return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
}
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
}
void _vstr1d(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32);
}
void _vstr1q(Variable *Value, OperandARM32Mem *Addr,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64);
}
void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
}
void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vzip>(Dest, Src0, Src1);
}
// Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() to pre-allocate
// the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call.
void findMaxStackOutArgsSize();
/// Returns true if the given Offset can be represented in a Load/Store Mem
/// Operand.
bool isLegalMemOffset(Type Ty, int32_t Offset) const;
void postLowerLegalization();
/// Manages the GotPtr variable, which is used for Nonsfi sandboxing.
/// @{
void createGotPtr();
void insertGotPtrInitPlaceholder();
VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc);
void materializeGotAddr(CfgNode *Node);
Variable *GotPtr = nullptr;
// TODO(jpp): use CfgLocalAllocator.
/// @}
/// Manages the Gotoff relocations created during the function lowering. A
/// single Gotoff relocation is created for each global variable used by the
/// function being lowered.
/// @{
// TODO(jpp): if the same global G is used in different functions, then this
// method will emit one G(gotoff) relocation per function.
GlobalString createGotoffRelocation(const ConstantRelocatable *CR);
CfgUnorderedSet<GlobalString> KnownGotoffs;
/// @}
/// Loads the constant relocatable Name to Register. Then invoke Finish to
/// finish the relocatable lowering. Finish **must** use PC in its first
/// emitted instruction, or the relocatable in Register will contain the wrong
/// value.
//
// Lowered sequence:
//
// Movw:
// movw Register, #:lower16:Name - (End - Movw) - 8 .
// Movt:
// movt Register, #:upper16:Name - (End - Movt) - 8 .
// PC = fake-def
// End:
// Finish(PC)
//
// The -8 in movw/movt above is to account for the PC value that the first
// instruction emitted by Finish(PC) will read.
void
loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register,
std::function<void(Variable *PC)> Finish);
/// Sandboxer defines methods for ensuring that "dangerous" operations are
/// masked during sandboxed code emission. For regular, non-sandboxed code
/// emission, its methods are simple pass-through methods.
///
/// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
/// in the constructor/destructor during sandboxed code emission. Therefore,
/// it is a bad idea to create an object of this type and "keep it around."
/// The recommended usage is:
///
/// AutoSandboxing(this).<<operation>>(...);
///
/// This usage ensures that no other instructions are inadvertently added to
/// the bundle.
class Sandboxer {
Sandboxer() = delete;
Sandboxer(const Sandboxer &) = delete;
Sandboxer &operator=(const Sandboxer &) = delete;
public:
explicit Sandboxer(
TargetARM32 *Target,
InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
~Sandboxer();
/// Increments sp:
///
/// add sp, sp, AddAmount
/// bic sp, sp, 0xc0000000
///
/// (for the rationale, see the ARM 32-bit Sandbox Specification.)
void add_sp(Operand *AddAmount);
/// Emits code to align sp to the specified alignment:
///
/// bic/and sp, sp, Alignment
/// bic, sp, sp, 0xc0000000
void align_sp(size_t Alignment);
/// Emits a call instruction. If CallTarget is a Variable, it emits
///
/// bic CallTarget, CallTarget, 0xc000000f
/// bl CallTarget
///
/// Otherwise, it emits
///
/// bl CallTarget
///
/// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
/// Emits a load:
///
/// bic rBase, rBase, 0xc0000000
/// ldr rDest, [rBase, #Offset]
///
/// Exception: if rBase is r9 or sp, then the load is emitted as:
///
/// ldr rDest, [rBase, #Offset]
///
/// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
/// always valid.
void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
/// Emits a load exclusive:
///
/// bic rBase, rBase, 0xc0000000
/// ldrex rDest, [rBase]
///
/// Exception: if rBase is r9 or sp, then the load is emitted as:
///
/// ldrex rDest, [rBase]
///
/// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
/// always valid.
void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
/// Resets sp to Src:
///
/// mov sp, Src
/// bic sp, sp, 0xc0000000
void reset_sp(Variable *Src);
/// Emits code to return from a function:
///
/// bic lr, lr, 0xc000000f
/// bx lr
void ret(Variable *RetAddr, Variable *RetValue);
/// Emits a store:
///
/// bic rBase, rBase, 0xc0000000
/// str rSrc, [rBase, #Offset]
///
/// Exception: if rBase is r9 or sp, then the store is emitted as:
///
/// str rDest, [rBase, #Offset]
///
/// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
/// always valid.
void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
/// Emits a store exclusive:
///
/// bic rBase, rBase, 0xc0000000
/// strex rDest, rSrc, [rBase]
///
/// Exception: if rBase is r9 or sp, then the store is emitted as:
///
/// strex rDest, rSrc, [rBase]
///
/// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
/// always valid.
void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
CondARM32::Cond Pred);
/// Decrements sp:
///
/// sub sp, sp, SubAmount
/// bic sp, sp, 0xc0000000
void sub_sp(Operand *SubAmount);
private:
TargetARM32 *const Target;
const InstBundleLock::Option BundleOption;
std::unique_ptr<AutoBundle> Bundler;
void createAutoBundle();
};
class PostLoweringLegalizer {
PostLoweringLegalizer() = delete;
PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
public:
explicit PostLoweringLegalizer(TargetARM32 *Target)
: Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
Target->getFrameOrStackReg())) {}
void resetTempBaseIfClobberedBy(const Inst *Instr);
// Ensures that the TempBase register held by the this legalizer (if any) is
// assigned to IP.
void assertNoTempOrAssignedToIP() const {
assert(TempBaseReg == nullptr ||
TempBaseReg->getRegNum() == Target->getReservedTmpReg());
}
// Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
// fixed up.
OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
bool AllowOffsets = true);
/// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
/// if its Source is a Rematerializable variable (this form is used in lieu
/// of lea, which is not available in ARM.)
///
/// Moves to memory become store instructions, and moves from memory, loads.
void legalizeMov(InstARM32Mov *Mov);
private:
/// Creates a new Base register centered around [Base, +/- Offset].
Variable *newBaseRegister(Variable *Base, int32_t Offset,
RegNumT ScratchRegNum);
/// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
/// The returned mem operand is a legal operand for accessing memory that is
/// of type Ty.
///
/// If [Base, #Offset] is encodable, then the method returns a Mem operand
/// expressing it. Otherwise,
///
/// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
/// method will return that. Otherwise,
///
/// a new base register ip=Base+Offset is created, and the method returns a
/// memory operand expressing [ip, #0].
OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
bool AllowOffsets = true);
TargetARM32 *const Target;
Variable *const StackOrFrameReg;
Variable *TempBaseReg = nullptr;
int32_t TempBaseOffset = 0;
};
const bool NeedSandboxing;
TargetARM32Features CPUFeatures;
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array.
static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM];
VarList PreservedGPRs;
VarList PreservedSRegs;
/// Helper class that understands the Calling Convention and register
/// assignments. The first few integer type parameters can use r0-r3,
/// regardless of their position relative to the floating-point/vector
/// arguments in the argument list. Floating-point and vector arguments
/// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
/// see the ARM Architecture Procedure Calling Standards (AAPCS).
///
/// Technically, arguments that can start with registers but extend beyond the
/// available registers can be split between the registers and the stack.
/// However, this is typically for passing GPR structs by value, and PNaCl
/// transforms expand this out.
///
/// At (public) function entry, the stack must be 8-byte aligned.
class CallingConv {
CallingConv(const CallingConv &) = delete;
CallingConv &operator=(const CallingConv &) = delete;
public:
CallingConv();
~CallingConv() = default;
/// argInGPR returns true if there is a GPR available for the requested
/// type, and false otherwise. If it returns true, Reg is set to the
/// appropriate register number. Note that, when Ty == IceType_i64, Reg will
/// be an I64 register pair.
bool argInGPR(Type Ty, RegNumT *Reg);
/// argInVFP is to floating-point/vector types what argInGPR is for integer
/// types.
bool argInVFP(Type Ty, RegNumT *Reg);
private:
void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
SmallBitVector GPRegsUsed;
CfgVector<RegNumT> GPRArgs;
CfgVector<RegNumT> I64Args;
void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
SmallBitVector VFPRegsUsed;
CfgVector<RegNumT> FP32Args;
CfgVector<RegNumT> FP64Args;
CfgVector<RegNumT> Vec128Args;
};
private:
ENABLE_MAKE_UNIQUE;
OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
Operand *Base);
void postambleCtpop64(const InstCall *Instr);
void preambleDivRem(const InstCall *Instr);
CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
ARM32HelpersPreamble;
CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
ARM32HelpersPostamble;
class ComputationTracker {
public:
ComputationTracker() = default;
~ComputationTracker() = default;
void forgetProducers() { KnownComputations.clear(); }
void recordProducers(CfgNode *Node);
const Inst *getProducerOf(const Operand *Opnd) const {
auto *Var = llvm::dyn_cast<Variable>(Opnd);
if (Var == nullptr) {
return nullptr;
}
auto Iter = KnownComputations.find(Var->getIndex());
if (Iter == KnownComputations.end()) {
return nullptr;
}
return Iter->second.Instr;
}
void dump(const Cfg *Func) const {
if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
return;
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "foldable producer:\n";
for (const auto &Computation : KnownComputations) {
Str << " ";
Computation.second.Instr->dump(Func);
Str << "\n";
}
Str << "\n";
}
private:
class ComputationEntry {
public:
ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
Inst *const Instr;
// Boolean folding is disabled for variables whose live range is multi
// block. We conservatively initialize IsLiveOut to true, and set it to
// false once we find the end of the live range for the variable defined
// by this instruction. If liveness analysis is not performed (e.g., in
// Om1 mode) IsLiveOut will never be set to false, and folding will be
// disabled.
bool IsLiveOut = true;
int32_t NumUses = 0;
Type ComputationType;
};
// ComputationMap maps a Variable number to a payload identifying which
// instruction defined it.
using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
ComputationMap KnownComputations;
};
ComputationTracker Computations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound
// temporaries during Ice -> ARM lowering, but before register allocation.
// This a safe-guard that no unbound temporaries are created during the
// legalization post-passes.
bool AllowTemporaryWithNoReg = true;
// ForbidTemporaryWithoutReg is a RAII class that manages
// AllowTemporaryWithNoReg.
class ForbidTemporaryWithoutReg {
ForbidTemporaryWithoutReg() = delete;
ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
ForbidTemporaryWithoutReg &
operator=(const ForbidTemporaryWithoutReg &) = delete;
public:
explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
Target->AllowTemporaryWithNoReg = false;
}
~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
private:
TargetARM32 *const Target;
};
};
class TargetDataARM32 final : public TargetDataLowering {
TargetDataARM32() = delete;
TargetDataARM32(const TargetDataARM32 &) = delete;
TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
public:
static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
}
void lowerGlobals(const VariableDeclarationList &Vars,
const std::string &SectionSuffix) override;
void lowerConstants() override;
void lowerJumpTables() override;
protected:
explicit TargetDataARM32(GlobalContext *Ctx);
private:
~TargetDataARM32() override = default;
};
class TargetHeaderARM32 final : public TargetHeaderLowering {
TargetHeaderARM32() = delete;
TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
public:
static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
}
void lower() override;
protected:
explicit TargetHeaderARM32(GlobalContext *Ctx);
private:
~TargetHeaderARM32() = default;
TargetARM32Features CPUFeatures;
};
} // end of namespace ARM32
} // end of namespace Ice
#endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H