/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
#define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_

#include <deque>
#include <utility>
#include <vector>

#include "base/arena_containers.h"
#include "base/logging.h"
#include "constants_arm.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/arm/assembler_arm.h"
#include "utils/array_ref.h"
#include "offsets.h"

namespace art {
namespace arm {

class Thumb2Assembler FINAL : public ArmAssembler {
 public:
  explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true)
      : ArmAssembler(arena),
        can_relocate_branches_(can_relocate_branches),
        force_32bit_(false),
        it_cond_index_(kNoItCondition),
        next_condition_(AL),
        fixups_(arena->Adapter(kArenaAllocAssembler)),
        fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
        literals_(arena->Adapter(kArenaAllocAssembler)),
        jump_tables_(arena->Adapter(kArenaAllocAssembler)),
        last_position_adjustment_(0u),
        last_old_position_(0u),
        last_fixup_id_(0u) {
    cfi().DelayEmittingAdvancePCs();
  }

  virtual ~Thumb2Assembler() {
  }

  bool IsThumb() const OVERRIDE {
    return true;
  }

  bool IsForced32Bit() const {
    return force_32bit_;
  }

  bool CanRelocateBranches() const {
    return can_relocate_branches_;
  }

  void FinalizeCode() OVERRIDE;

  // Data-processing instructions.
  virtual void and_(Register rd, Register rn, const ShifterOperand& so,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void eor(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void sub(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void rsb(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void add(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void adc(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void sbc(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void rsc(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;

  void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;

  void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;

  void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;

  virtual void orr(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void orn(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void mov(Register rd, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void bic(Register rd, Register rn, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void mvn(Register rd, const ShifterOperand& so,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  // Miscellaneous data-processing instructions.
  void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
  void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
  void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
  void rbit(Register rd, Register rm, Condition cond = AL) OVERRIDE;
  void rev(Register rd, Register rm, Condition cond = AL) OVERRIDE;
  void rev16(Register rd, Register rm, Condition cond = AL) OVERRIDE;
  void revsh(Register rd, Register rm, Condition cond = AL) OVERRIDE;

  // Multiply instructions.
  void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
  void mla(Register rd, Register rn, Register rm, Register ra,
           Condition cond = AL) OVERRIDE;
  void mls(Register rd, Register rn, Register rm, Register ra,
           Condition cond = AL) OVERRIDE;
  void smull(Register rd_lo, Register rd_hi, Register rn, Register rm,
             Condition cond = AL) OVERRIDE;
  void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
             Condition cond = AL) OVERRIDE;

  void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
  void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;

  // Bit field extract instructions.
  void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
  void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;

  // Load/store instructions.
  void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
  void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;

  void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
  void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;

  void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
  void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;

  void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
  void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;

  // Load/store register dual instructions using registers `rd` and `rd` + 1.
  void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
  void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;

  // Load/store register dual instructions using registers `rd` and `rd2`.
  // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
  // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
  void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
  void strd(Register rd, Register rd2, const Address& ad, Condition cond);


  void ldm(BlockAddressMode am, Register base,
           RegList regs, Condition cond = AL) OVERRIDE;
  void stm(BlockAddressMode am, Register base,
           RegList regs, Condition cond = AL) OVERRIDE;

  void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
  void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;

  void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL);
  void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL);

  void ldrexd(Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;
  void strexd(Register rd, Register rt, Register rt2, Register rn, Condition cond = AL) OVERRIDE;

  // Miscellaneous instructions.
  void clrex(Condition cond = AL) OVERRIDE;
  void nop(Condition cond = AL) OVERRIDE;

  void bkpt(uint16_t imm16) OVERRIDE;
  void svc(uint32_t imm24) OVERRIDE;

  // If-then
  void it(Condition firstcond, ItState i1 = kItOmitted,
        ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE;

  void cbz(Register rn, Label* target) OVERRIDE;
  void cbnz(Register rn, Label* target) OVERRIDE;

  // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
  void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
  void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
  void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
  void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
  void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
  void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
  void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;

  // Returns false if the immediate cannot be encoded.
  bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
  bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;

  void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
  void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
  void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
  void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;

  void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
  void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
  void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
  void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
  void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
  void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
  void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
  void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
  void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
  void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
  void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
  void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;

  void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
  void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
  void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;

  void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
  void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
  void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
  void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;

  void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
  void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
  void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
  void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
  void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR

  void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
  void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
  void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
  void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;

  // Branch instructions.
  void b(Label* label, Condition cond = AL);
  void bl(Label* label, Condition cond = AL);
  void blx(Label* label);
  void blx(Register rm, Condition cond = AL) OVERRIDE;
  void bx(Register rm, Condition cond = AL) OVERRIDE;

  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
  virtual void Asr(Register rd, Register rm, uint32_t shift_imm,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
  virtual void Ror(Register rd, Register rm, uint32_t shift_imm,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
  virtual void Rrx(Register rd, Register rm,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  virtual void Lsl(Register rd, Register rm, Register rn,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
  virtual void Lsr(Register rd, Register rm, Register rn,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
  virtual void Asr(Register rd, Register rm, Register rn,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
  virtual void Ror(Register rd, Register rm, Register rn,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  void Push(Register rd, Condition cond = AL) OVERRIDE;
  void Pop(Register rd, Condition cond = AL) OVERRIDE;

  void PushList(RegList regs, Condition cond = AL) OVERRIDE;
  void PopList(RegList regs, Condition cond = AL) OVERRIDE;

  void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;

  void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
  void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;

  // Memory barriers.
  void dmb(DmbOptions flavor) OVERRIDE;

  // Get the final position of a label after local fixup based on the old position
  // recorded before FinalizeCode().
  uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE;

  using ArmAssembler::NewLiteral;  // Make the helper template visible.

  Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE;
  void LoadLiteral(Register rt, Literal* literal) OVERRIDE;
  void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE;
  void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE;
  void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE;

  // Add signed constant value to rd. May clobber IP.
  void AddConstant(Register rd, Register rn, int32_t value,
                   Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;

  void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;

  // Load and Store. May clobber IP.
  void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
  void MarkExceptionHandler(Label* label) OVERRIDE;
  void LoadFromOffset(LoadOperandType type,
                      Register reg,
                      Register base,
                      int32_t offset,
                      Condition cond = AL) OVERRIDE;
  void StoreToOffset(StoreOperandType type,
                     Register reg,
                     Register base,
                     int32_t offset,
                     Condition cond = AL) OVERRIDE;
  void LoadSFromOffset(SRegister reg,
                       Register base,
                       int32_t offset,
                       Condition cond = AL) OVERRIDE;
  void StoreSToOffset(SRegister reg,
                      Register base,
                      int32_t offset,
                      Condition cond = AL) OVERRIDE;
  void LoadDFromOffset(DRegister reg,
                       Register base,
                       int32_t offset,
                       Condition cond = AL) OVERRIDE;
  void StoreDToOffset(DRegister reg,
                      Register base,
                      int32_t offset,
                      Condition cond = AL) OVERRIDE;

  bool ShifterOperandCanHold(Register rd,
                             Register rn,
                             Opcode opcode,
                             uint32_t immediate,
                             SetCc set_cc,
                             ShifterOperand* shifter_op) OVERRIDE;
  using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.

  bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;


  static bool IsInstructionForExceptionHandling(uintptr_t pc);

  // Emit data (e.g. encoded instruction or immediate) to the.
  // instruction stream.
  void Emit32(int32_t value);     // Emit a 32 bit instruction in thumb format.
  void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
  void Bind(Label* label) OVERRIDE;

  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;

  // Force the assembler to generate 32 bit instructions.
  void Force32Bit() {
    force_32bit_ = true;
  }

  // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
  // will generate a fixup.
  JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
  // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
  void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;

 private:
  typedef uint16_t FixupId;

  // Fixup: branches and literal pool references.
  //
  // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This
  // depends on both the type of branch and the offset to which it is branching. The 16-bit
  // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare
  // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be
  // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence
  // of instructions to make up for the limited range of load literal instructions (up to
  // 4KiB for the 32-bit variant). When generating code for these insns we don't know the
  // size before hand, so we assume it is the smallest available size and determine the final
  // code offsets and sizes and emit code in FinalizeCode().
  //
  // To handle this, we keep a record of every branch and literal pool load in the program.
  // The actual instruction encoding for these is delayed until we know the final size of
  // every instruction. When we bind a label to a branch we don't know the final location yet
  // as some preceding instructions may need to be expanded, so we record a non-final offset.
  // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of
  // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with
  // target on the other side of the expanded insn, as their offsets change and this may
  // trigger further expansion.
  //
  // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the
  // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing
  // to it, using the fixup ids as links. The first link is stored in the label's position
  // (the label is linked but not bound), the following links are stored in the code buffer,
  // in the placeholder where we will eventually emit the actual code.

  class Fixup {
   public:
    // Branch type.
    enum Type : uint8_t {
      kConditional,               // B<cond>.
      kUnconditional,             // B.
      kUnconditionalLink,         // BL.
      kUnconditionalLinkX,        // BLX.
      kCompareAndBranchXZero,     // cbz/cbnz.
      kLoadLiteralNarrow,         // Load narrrow integer literal.
      kLoadLiteralWide,           // Load wide integer literal.
      kLoadLiteralAddr,           // Load address of literal (used for jump table).
      kLoadFPLiteralSingle,       // Load FP literal single.
      kLoadFPLiteralDouble,       // Load FP literal double.
    };

    // Calculated size of branch instruction based on type and offset.
    enum Size : uint8_t {
      // Branch variants.
      kBranch16Bit,
      kBranch32Bit,
      // NOTE: We don't support branches which would require multiple instructions, i.e.
      // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB.

      // CBZ/CBNZ variants.
      kCbxz16Bit,   // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset.
      kCbxz32Bit,   // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset.
      kCbxz48Bit,   // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset.

      // Load integer literal variants.
      // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes.
      kLiteral1KiB,
      // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes.
      kLiteral4KiB,
      // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes.
      kLiteral64KiB,
      // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes.
      kLiteral1MiB,
      // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit.
      // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
      kLiteralFar,

      // Load literal base addr.
      // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
      kLiteralAddr1KiB,
      // ADR rX, label; 4KiB offset. 4 bytes.
      kLiteralAddr4KiB,
      // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
      kLiteralAddr64KiB,
      // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
      kLiteralAddrFar,

      // Load long or FP literal variants.
      // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
      kLongOrFPLiteral1KiB,
      // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes.
      kLongOrFPLiteral256KiB,
      // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes.
      kLongOrFPLiteralFar,
    };

    // Unresolved branch possibly with a condition.
    static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit,
                        Condition cond = AL) {
      DCHECK(type == kConditional || type == kUnconditional ||
             type == kUnconditionalLink || type == kUnconditionalLinkX);
      DCHECK(size == kBranch16Bit || size == kBranch32Bit);
      DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional));
      return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister,
                   cond, type, size, location);
    }

    // Unresolved compare-and-branch instruction with a register and condition (EQ or NE).
    static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) {
      DCHECK(cond == EQ || cond == NE);
      return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister,
                   cond, kCompareAndBranchXZero, kCbxz16Bit, location);
    }

    // Load narrow literal.
    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
      DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
             size == kLiteral1MiB || size == kLiteralFar);
      DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
                   AL, kLoadLiteralNarrow, size, location);
    }

    // Load wide literal.
    static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2,
                                 Size size = kLongOrFPLiteral1KiB) {
      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
             size == kLongOrFPLiteralFar);
      DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
      return Fixup(rt, rt2, kNoSRegister, kNoDRegister,
                   AL, kLoadLiteralWide, size, location);
    }

    // Load FP single literal.
    static Fixup LoadSingleLiteral(uint32_t location, SRegister sd,
                                   Size size = kLongOrFPLiteral1KiB) {
      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
             size == kLongOrFPLiteralFar);
      return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister,
                   AL, kLoadFPLiteralSingle, size, location);
    }

    // Load FP double literal.
    static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd,
                                   Size size = kLongOrFPLiteral1KiB) {
      DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB ||
             size == kLongOrFPLiteralFar);
      return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd,
                   AL, kLoadFPLiteralDouble, size, location);
    }

    static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
      DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
             size == kLiteralAddrFar);
      DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
                   AL, kLoadLiteralAddr, size, location);
    }

    Type GetType() const {
      return type_;
    }

    bool IsLoadLiteral() const {
      return GetType() >= kLoadLiteralNarrow;
    }

    // Returns whether the Fixup can expand from the original size.
    bool CanExpand() const {
      switch (GetOriginalSize()) {
        case kBranch32Bit:
        case kCbxz48Bit:
        case kLiteralFar:
        case kLiteralAddrFar:
        case kLongOrFPLiteralFar:
          return false;
        default:
          return true;
      }
    }

    Size GetOriginalSize() const {
      return original_size_;
    }

    Size GetSize() const {
      return size_;
    }

    uint32_t GetOriginalSizeInBytes() const;

    uint32_t GetSizeInBytes() const;

    uint32_t GetLocation() const {
      return location_;
    }

    uint32_t GetAdjustment() const {
      return adjustment_;
    }

    // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
    static void PrepareDependents(Thumb2Assembler* assembler);

    ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const {
      return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_,
                                                                           dependents_count_);
    }

    // Resolve a branch when the target is known.
    void Resolve(uint32_t target) {
      DCHECK_EQ(target_, kUnresolved);
      DCHECK_NE(target, kUnresolved);
      target_ = target;
    }

    // Check if the current size is OK for current location_, target_ and adjustment_.
    // If not, increase the size. Return the size increase, 0 if unchanged.
    // If the target if after this Fixup, also add the difference to adjustment_,
    // so that we don't need to consider forward Fixups as their own dependencies.
    uint32_t AdjustSizeIfNeeded(uint32_t current_code_size);

    // Increase adjustments. This is called for dependents of a Fixup when its size changes.
    void IncreaseAdjustment(uint32_t increase) {
      adjustment_ += increase;
    }

    // Finalize the branch with an adjustment to the location. Both location and target are updated.
    void Finalize(uint32_t location_adjustment) {
      DCHECK_NE(target_, kUnresolved);
      location_ += location_adjustment;
      target_ += location_adjustment;
    }

    // Emit the branch instruction into the assembler buffer.  This does the
    // encoding into the thumb instruction.
    void Emit(AssemblerBuffer* buffer, uint32_t code_size) const;

   private:
    Fixup(Register rn, Register rt2, SRegister sd, DRegister dd,
          Condition cond, Type type, Size size, uint32_t location)
        : rn_(rn),
          rt2_(rt2),
          sd_(sd),
          dd_(dd),
          cond_(cond),
          type_(type),
          original_size_(size), size_(size),
          location_(location),
          target_(kUnresolved),
          adjustment_(0u),
          dependents_count_(0u),
          dependents_start_(0u) {
    }

    static size_t SizeInBytes(Size size);

    // The size of padding added before the literal pool.
    static size_t LiteralPoolPaddingSize(uint32_t current_code_size);

    // Returns the offset from the PC-using insn to the target.
    int32_t GetOffset(uint32_t current_code_size) const;

    size_t IncreaseSize(Size new_size);

    int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const;

    template <typename Function>
    static void ForExpandableDependencies(Thumb2Assembler* assembler, Function fn);

    static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.

    const Register rn_;   // Rn for cbnz/cbz, Rt for literal loads.
    Register rt2_;        // For kLoadLiteralWide.
    SRegister sd_;        // For kLoadFPLiteralSingle.
    DRegister dd_;        // For kLoadFPLiteralDouble.
    const Condition cond_;
    const Type type_;
    Size original_size_;
    Size size_;
    uint32_t location_;     // Offset into assembler buffer in bytes.
    uint32_t target_;       // Offset into assembler buffer in bytes.
    uint32_t adjustment_;   // The number of extra bytes inserted between location_ and target_.
    // Fixups that require adjustment when current size changes are stored in a single
    // array in the assembler and we store only the start index and count here.
    uint32_t dependents_count_;
    uint32_t dependents_start_;
  };

  // Emit a single 32 or 16 bit data processing instruction.
  void EmitDataProcessing(Condition cond,
                          Opcode opcode,
                          SetCc set_cc,
                          Register rn,
                          Register rd,
                          const ShifterOperand& so);

  // Emit a single 32 bit miscellaneous instruction.
  void Emit32Miscellaneous(uint8_t op1,
                           uint8_t op2,
                           uint32_t rest_encoding);

  // Emit reverse byte instructions: rev, rev16, revsh.
  void EmitReverseBytes(Register rd, Register rm, uint32_t op);

  // Emit a single 16 bit miscellaneous instruction.
  void Emit16Miscellaneous(uint32_t rest_encoding);

  // Must the instruction be 32 bits or can it possibly be encoded
  // in 16 bits?
  bool Is32BitDataProcessing(Condition cond,
                             Opcode opcode,
                             SetCc set_cc,
                             Register rn,
                             Register rd,
                             const ShifterOperand& so);

  // Emit a 32 bit data processing instruction.
  void Emit32BitDataProcessing(Condition cond,
                               Opcode opcode,
                               SetCc set_cc,
                               Register rn,
                               Register rd,
                               const ShifterOperand& so);

  // Emit a 16 bit data processing instruction.
  void Emit16BitDataProcessing(Condition cond,
                               Opcode opcode,
                               SetCc set_cc,
                               Register rn,
                               Register rd,
                               const ShifterOperand& so);

  void Emit16BitAddSub(Condition cond,
                       Opcode opcode,
                       SetCc set_cc,
                       Register rn,
                       Register rd,
                       const ShifterOperand& so);

  uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n);

  void EmitLoadStore(Condition cond,
                     bool load,
                     bool byte,
                     bool half,
                     bool is_signed,
                     Register rd,
                     const Address& ad);

  void EmitMemOpAddressMode3(Condition cond,
                             int32_t mode,
                             Register rd,
                             const Address& ad);

  void EmitMultiMemOp(Condition cond,
                      BlockAddressMode am,
                      bool load,
                      Register base,
                      RegList regs);

  void EmitMulOp(Condition cond,
                 int32_t opcode,
                 Register rd,
                 Register rn,
                 Register rm,
                 Register rs);

  void EmitVFPsss(Condition cond,
                  int32_t opcode,
                  SRegister sd,
                  SRegister sn,
                  SRegister sm);

  void EmitVFPddd(Condition cond,
                  int32_t opcode,
                  DRegister dd,
                  DRegister dn,
                  DRegister dm);

  void EmitVFPsd(Condition cond,
                 int32_t opcode,
                 SRegister sd,
                 DRegister dm);

  void EmitVFPds(Condition cond,
                 int32_t opcode,
                 DRegister dd,
                 SRegister sm);

  void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);

  void EmitBranch(Condition cond, Label* label, bool link, bool x);
  static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
  static int DecodeBranchOffset(int32_t inst);
  void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
                 Condition cond = AL, SetCc set_cc = kCcDontCare);
  void EmitShift(Register rd, Register rn, Shift shift, Register rm,
                 Condition cond = AL, SetCc set_cc = kCcDontCare);

  static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
  static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
  bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
                               int32_t offset,
                               /*out*/ int32_t* add_to_base,
                               /*out*/ int32_t* offset_for_load_store);
  int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
                                Register temp,
                                Register base,
                                int32_t offset,
                                Condition cond);

  // Whether the assembler can relocate branches. If false, unresolved branches will be
  // emitted on 32bits.
  bool can_relocate_branches_;

  // Force the assembler to use 32 bit thumb2 instructions.
  bool force_32bit_;

  // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
  Condition it_conditions_[4];
  uint8_t it_cond_index_;
  Condition next_condition_;

  void SetItCondition(ItState s, Condition cond, uint8_t index);

  void CheckCondition(Condition cond) {
    CHECK_EQ(cond, next_condition_);

    // Move to the next condition if there is one.
    if (it_cond_index_ < 3) {
      ++it_cond_index_;
      next_condition_ = it_conditions_[it_cond_index_];
    } else {
      next_condition_ = AL;
    }
  }

  void CheckConditionLastIt(Condition cond) {
    if (it_cond_index_ < 3) {
      // Check that the next condition is AL.  This means that the
      // current condition is the last in the IT block.
      CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL);
    }
    CheckCondition(cond);
  }

  FixupId AddFixup(Fixup fixup) {
    FixupId fixup_id = static_cast<FixupId>(fixups_.size());
    fixups_.push_back(fixup);
    // For iterating using FixupId, we need the next id to be representable.
    DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size());
    return fixup_id;
  }

  Fixup* GetFixup(FixupId fixup_id) {
    DCHECK_LT(fixup_id, fixups_.size());
    return &fixups_[fixup_id];
  }

  void BindLabel(Label* label, uint32_t bound_pc);
  uint32_t BindLiterals();
  void BindJumpTables(uint32_t code_size);
  void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
                           std::deque<FixupId>* fixups_to_recalculate);
  uint32_t AdjustFixups();
  void EmitFixups(uint32_t adjusted_code_size);
  void EmitLiterals();
  void EmitJumpTables();
  void PatchCFI();

  static int16_t BEncoding16(int32_t offset, Condition cond);
  static int32_t BEncoding32(int32_t offset, Condition cond);
  static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond);
  static int16_t CmpRnImm8Encoding16(Register rn, int32_t value);
  static int16_t AddRdnRmEncoding16(Register rdn, Register rm);
  static int32_t MovwEncoding32(Register rd, int32_t value);
  static int32_t MovtEncoding32(Register rd, int32_t value);
  static int32_t MovModImmEncoding32(Register rd, int32_t value);
  static int16_t LdrLitEncoding16(Register rt, int32_t offset);
  static int32_t LdrLitEncoding32(Register rt, int32_t offset);
  static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset);
  static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset);
  static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
  static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
  static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
  static int16_t AdrEncoding16(Register rd, int32_t offset);
  static int32_t AdrEncoding32(Register rd, int32_t offset);

  ArenaVector<Fixup> fixups_;
  ArenaVector<FixupId> fixup_dependents_;

  // Use std::deque<> for literal labels to allow insertions at the end
  // without invalidating pointers and references to existing elements.
  ArenaDeque<Literal> literals_;

  // Jump table list.
  ArenaDeque<JumpTable> jump_tables_;

  // Data for AdjustedPosition(), see the description there.
  uint32_t last_position_adjustment_;
  uint32_t last_old_position_;
  FixupId last_fixup_id_;
};

}  // namespace arm
}  // namespace art

#endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_