/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#include "arch/instruction_set.h"
#include "arch/instruction_set_features.h"
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
#include "base/bit_utils.h"
#include "base/enums.h"
#include "globals.h"
#include "graph_visualizer.h"
#include "locations.h"
#include "memory_region.h"
#include "nodes.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
#include "stack_map_stream.h"
#include "string_reference.h"
#include "utils/label.h"
#include "utils/type_reference.h"
namespace art {
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
// Binary encoding of 2^31 for type double.
static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
// Minimum value for a primitive integer.
static int32_t constexpr kPrimIntMin = 0x80000000;
// Minimum value for a primitive long.
static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
// Maximum value for a primitive integer.
static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
static constexpr ReadBarrierOption kCompilerReadBarrierOption =
kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
class Assembler;
class CodeGenerator;
class CompilerDriver;
class CompilerOptions;
class LinkerPatch;
class ParallelMoveResolver;
class CodeAllocator {
public:
CodeAllocator() {}
virtual ~CodeAllocator() {}
virtual uint8_t* Allocate(size_t size) = 0;
private:
DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
};
class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
public:
explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
saved_core_stack_offsets_[i] = kRegisterNotSaved;
saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
}
}
virtual ~SlowPathCode() {}
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
// Save live core and floating-point caller-save registers and
// update the stack mask in `locations` for registers holding object
// references.
virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
// Restore live core and floating-point caller-save registers.
virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
bool IsCoreRegisterSaved(int reg) const {
return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
}
bool IsFpuRegisterSaved(int reg) const {
return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
}
uint32_t GetStackOffsetOfCoreRegister(int reg) const {
return saved_core_stack_offsets_[reg];
}
uint32_t GetStackOffsetOfFpuRegister(int reg) const {
return saved_fpu_stack_offsets_[reg];
}
virtual bool IsFatal() const { return false; }
virtual const char* GetDescription() const = 0;
Label* GetEntryLabel() { return &entry_label_; }
Label* GetExitLabel() { return &exit_label_; }
HInstruction* GetInstruction() const {
return instruction_;
}
uint32_t GetDexPc() const {
return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
}
protected:
static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
static constexpr uint32_t kRegisterNotSaved = -1;
// The instruction where this slow path is happening.
HInstruction* instruction_;
uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
private:
Label entry_label_;
Label exit_label_;
DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};
class InvokeDexCallingConventionVisitor {
public:
virtual Location GetNextLocation(Primitive::Type type) = 0;
virtual Location GetReturnLocation(Primitive::Type type) const = 0;
virtual Location GetMethodLocation() const = 0;
protected:
InvokeDexCallingConventionVisitor() {}
virtual ~InvokeDexCallingConventionVisitor() {}
// The current index for core registers.
uint32_t gp_index_ = 0u;
// The current index for floating-point registers.
uint32_t float_index_ = 0u;
// The current stack index.
uint32_t stack_index_ = 0u;
private:
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};
class FieldAccessCallingConvention {
public:
virtual Location GetObjectLocation() const = 0;
virtual Location GetFieldIndexLocation() const = 0;
virtual Location GetReturnLocation(Primitive::Type type) const = 0;
virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0;
virtual Location GetFpuLocation(Primitive::Type type) const = 0;
virtual ~FieldAccessCallingConvention() {}
protected:
FieldAccessCallingConvention() {}
private:
DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
};
class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
public:
// Compiles the graph to executable instructions.
void Compile(CodeAllocator* allocator);
static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
InstructionSet instruction_set,
const InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGenerator() {}
// Get the graph. This is the outermost graph, never the graph of a method being inlined.
HGraph* GetGraph() const { return graph_; }
HBasicBlock* GetNextBlockToEmit() const;
HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
// Note that this follows the current calling convention.
return GetFrameSize()
+ static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method
+ parameter->GetIndex() * kVRegSize;
}
virtual void Initialize() = 0;
virtual void Finalize(CodeAllocator* allocator);
virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
virtual void GenerateFrameEntry() = 0;
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
virtual void MoveConstant(Location destination, int32_t value) = 0;
virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0;
virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
virtual Assembler* GetAssembler() = 0;
virtual const Assembler& GetAssembler() const = 0;
virtual size_t GetWordSize() const = 0;
virtual size_t GetFloatingPointSpillSlotSize() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
void InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
// Backends can override this as necessary. For most, no special alignment is required.
virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
uint32_t GetFrameSize() const { return frame_size_; }
void SetFrameSize(uint32_t size) { frame_size_ = size; }
uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
virtual void SetupBlockedRegisters() const = 0;
virtual void ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
}
static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
uint32_t mask = 0;
for (size_t i = 0, e = length; i < e; ++i) {
mask |= (1 << registers[i]);
}
return mask;
}
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const;
// Saves the register in the stack. Returns the size taken on stack.
virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
// Restores the register from the stack. Returns the size taken on stack.
virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0;
// Returns whether we should split long moves in parallel moves.
virtual bool ShouldSplitLongMoves() const { return false; }
size_t GetNumberOfCoreCalleeSaveRegisters() const {
return POPCOUNT(core_callee_save_mask_);
}
size_t GetNumberOfCoreCallerSaveRegisters() const {
DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
}
bool IsCoreCalleeSaveRegister(int reg) const {
return (core_callee_save_mask_ & (1 << reg)) != 0;
}
bool IsFloatingPointCalleeSaveRegister(int reg) const {
return (fpu_callee_save_mask_ & (1 << reg)) != 0;
}
uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
DCHECK(locations->OnlyCallsOnSlowPath() ||
(locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
!locations->HasCustomSlowPathCallingConvention()));
uint32_t live_registers = core_registers
? locations->GetLiveRegisters()->GetCoreRegisters()
: locations->GetLiveRegisters()->GetFloatingPointRegisters();
if (locations->HasCustomSlowPathCallingConvention()) {
// Save only the live registers that the custom calling convention wants us to save.
uint32_t caller_saves = core_registers
? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
: locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
return live_registers & caller_saves;
} else {
// Default ABI, we need to spill non-callee-save live registers.
uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
return live_registers & ~callee_saves;
}
}
size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
return POPCOUNT(GetSlowPathSpills(locations, core_registers));
}
size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
}
// Record native to dex mapping for a suspend point. Required by runtime.
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
// Check whether we have already recorded mapping at this PC.
bool HasStackMapAtCurrentPc();
// Record extra stack maps if we support native debugging.
void MaybeRecordNativeDebugInfo(HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr);
bool CanMoveNullCheckToUser(HNullCheck* null_check);
void MaybeRecordImplicitNullCheck(HInstruction* instruction);
LocationSummary* CreateThrowingSlowPathLocations(
HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
void GenerateNullCheck(HNullCheck* null_check);
virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
// Records a stack map which the runtime might use to set catch phi values
// during exception delivery.
// TODO: Replace with a catch-entering instruction that records the environment.
void RecordCatchBlockInfo();
// TODO: Avoid creating the `std::unique_ptr` here.
void AddSlowPath(SlowPathCode* slow_path) {
slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
}
void BuildStackMaps(MemoryRegion stack_map_region,
MemoryRegion method_info_region,
const DexFile::CodeItem& code_item);
void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
size_t GetNumberOfJitRoots() const {
return jit_string_roots_.size() + jit_class_roots_.size();
}
// Fills the `literals` array with literals collected during code generation.
// Also emits literal patches.
void EmitJitRoots(uint8_t* code,
Handle<mirror::ObjectArray<mirror::Object>> roots,
const uint8_t* roots_data)
REQUIRES_SHARED(Locks::mutator_lock_);
bool IsLeafMethod() const {
return is_leaf_;
}
void MarkNotLeaf() {
is_leaf_ = false;
requires_current_method_ = true;
}
void SetRequiresCurrentMethod() {
requires_current_method_ = true;
}
bool RequiresCurrentMethod() const {
return requires_current_method_;
}
// Clears the spill slots taken by loop phis in the `LocationSummary` of the
// suspend check. This is called when the code generator generates code
// for the suspend check at the back edge (instead of where the suspend check
// is, which is the loop entry). At this point, the spill slots for the phis
// have not been written to.
void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const;
bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
// Helper that returns the pointer offset of an index in an object array.
// Note: this method assumes we always have the same pointer size, regardless
// of the architecture.
static size_t GetCacheOffset(uint32_t index);
// Pointer variant for ArtMethod and ArtField arrays.
size_t GetCachePointerOffset(uint32_t index);
// Helper that returns the offset of the array's length field.
// Note: Besides the normal arrays, we also use the HArrayLength for
// accessing the String's `count` field in String intrinsics.
static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
// Helper that returns the offset of the array's data.
// Note: Besides the normal arrays, we also use the HArrayGet for
// accessing the String's `value` field in String intrinsics.
static uint32_t GetArrayDataOffset(HArrayGet* array_get);
// Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`.
template <PointerSize pointer_size>
static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) {
// The entry point list defines 30 ReadBarrierMarkRegX entry points.
DCHECK_LT(reg, 30u);
// The ReadBarrierMarkRegX entry points are ordered by increasing
// register number in Thread::tls_Ptr_.quick_entrypoints.
return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value()
+ static_cast<size_t>(pointer_size) * reg;
}
void EmitParallelMoves(Location from1,
Location to1,
Primitive::Type type1,
Location from2,
Location to2,
Primitive::Type type2);
static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
// Check that null value is not represented as an integer constant.
DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant());
return type == Primitive::kPrimNot && !value->IsNullConstant();
}
// Performs checks pertaining to an InvokeRuntime call.
void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
SlowPathCode* slow_path);
// Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
SlowPathCode* slow_path);
void AddAllocatedRegister(Location location) {
allocated_registers_.Add(location);
}
bool HasAllocatedRegister(bool is_core, int reg) const {
return is_core
? allocated_registers_.ContainsCoreRegister(reg)
: allocated_registers_.ContainsFloatingPointRegister(reg);
}
void AllocateLocations(HInstruction* instruction);
// Tells whether the stack frame of the compiled method is
// considered "empty", that is either actually having a size of zero,
// or just containing the saved return address register.
bool HasEmptyFrame() const {
return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
}
static int32_t GetInt32ValueOf(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
} else if (constant->IsNullConstant()) {
return 0;
} else {
DCHECK(constant->IsFloatConstant());
return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
}
}
static int64_t GetInt64ValueOf(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
} else if (constant->IsNullConstant()) {
return 0;
} else if (constant->IsFloatConstant()) {
return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
} else if (constant->IsLongConstant()) {
return constant->AsLongConstant()->GetValue();
} else {
DCHECK(constant->IsDoubleConstant());
return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
}
}
size_t GetFirstRegisterSlotInSlowPath() const {
return first_register_slot_in_slow_path_;
}
uint32_t FrameEntrySpillSize() const {
return GetFpuSpillSize() + GetCoreSpillSize();
}
virtual ParallelMoveResolver* GetMoveResolver() = 0;
static void CreateCommonInvokeLocationSummary(
HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke);
void CreateUnresolvedFieldLocationSummary(
HInstruction* field_access,
Primitive::Type field_type,
const FieldAccessCallingConvention& calling_convention);
void GenerateUnresolvedFieldAccess(
HInstruction* field_access,
Primitive::Type field_type,
uint32_t field_index,
uint32_t dex_pc,
const FieldAccessCallingConvention& calling_convention);
static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
Location runtime_type_index_location,
Location runtime_return_location);
void GenerateLoadClassRuntimeCall(HLoadClass* cls);
static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr) = 0;
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
virtual HLoadString::LoadKind GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) = 0;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) = 0;
static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBssEntry:
DCHECK(load->NeedsEnvironment());
return LocationSummary::kCallOnSlowPath;
case HLoadString::LoadKind::kDexCacheViaMethod:
DCHECK(load->NeedsEnvironment());
return LocationSummary::kCallOnMainOnly;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(!load->NeedsEnvironment());
return kEmitCompilerReadBarrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
break;
default:
DCHECK(!load->NeedsEnvironment());
return LocationSummary::kNoCall;
}
}
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
HInvokeStaticOrDirect* invoke) = 0;
// Generate a call to a static or direct method.
virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
// Generate a call to a virtual method.
virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
// Copy the result of a call into the given target.
virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
virtual void GenerateNop() = 0;
uint32_t GetReferenceSlowFlagOffset() const;
uint32_t GetReferenceDisableFlagOffset() const;
static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
protected:
// Patch info used for recording locations of required linker patches and their targets,
// i.e. target method, string, type or code identified by their dex file and index.
template <typename LabelType>
struct PatchInfo {
PatchInfo(const DexFile& target_dex_file, uint32_t target_index)
: dex_file(target_dex_file), index(target_index) { }
const DexFile& dex_file;
uint32_t index;
LabelType label;
};
CodeGenerator(HGraph* graph,
size_t number_of_core_registers,
size_t number_of_fpu_registers,
size_t number_of_register_pairs,
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: frame_size_(0),
core_spill_mask_(0),
fpu_spill_mask_(0),
first_register_slot_in_slow_path_(0),
allocated_registers_(RegisterSet::Empty()),
blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers,
kArenaAllocCodeGenerator)),
blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
kArenaAllocCodeGenerator)),
number_of_core_registers_(number_of_core_registers),
number_of_fpu_registers_(number_of_fpu_registers),
number_of_register_pairs_(number_of_register_pairs),
core_callee_save_mask_(core_callee_save_mask),
fpu_callee_save_mask_(fpu_callee_save_mask),
stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()),
block_order_(nullptr),
jit_string_roots_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_roots_(TypeReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
disasm_info_(nullptr),
stats_(stats),
graph_(graph),
compiler_options_(compiler_options),
slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
current_slow_path_(nullptr),
current_block_index_(0),
is_leaf_(true),
requires_current_method_(false) {
slow_paths_.reserve(8);
}
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
// Returns the location of the first spilled entry for floating point registers,
// relative to the stack pointer.
uint32_t GetFpuSpillStart() const {
return GetFrameSize() - FrameEntrySpillSize();
}
uint32_t GetFpuSpillSize() const {
return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize();
}
uint32_t GetCoreSpillSize() const {
return POPCOUNT(core_spill_mask_) * GetWordSize();
}
virtual bool HasAllocatedCalleeSaveRegisters() const {
// We check the core registers against 1 because it always comprises the return PC.
return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
|| (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
}
bool CallPushesPC() const {
InstructionSet instruction_set = GetInstructionSet();
return instruction_set == kX86 || instruction_set == kX86_64;
}
// Arm64 has its own type for a label, so we need to templatize these methods
// to share the logic.
template <typename LabelType>
LabelType* CommonInitializeLabels() {
// We use raw array allocations instead of ArenaVector<> because Labels are
// non-constructible and non-movable and as such cannot be held in a vector.
size_t size = GetGraph()->GetBlocks().size();
LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
kArenaAllocCodeGenerator);
for (size_t i = 0; i != size; ++i) {
new(labels + i) LabelType();
}
return labels;
}
template <typename LabelType>
LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
block = FirstNonEmptyBlock(block);
return raw_pointer_to_labels_array + block->GetBlockId();
}
SlowPathCode* GetCurrentSlowPath() {
return current_slow_path_;
}
// Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
const uint8_t* roots_data ATTRIBUTE_UNUSED) {
DCHECK_EQ(jit_string_roots_.size(), 0u);
DCHECK_EQ(jit_class_roots_.size(), 0u);
}
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
uint32_t fpu_spill_mask_;
uint32_t first_register_slot_in_slow_path_;
// Registers that were allocated during linear scan.
RegisterSet allocated_registers_;
// Arrays used when doing register allocation to know which
// registers we can allocate. `SetupBlockedRegisters` updates the
// arrays.
bool* const blocked_core_registers_;
bool* const blocked_fpu_registers_;
size_t number_of_core_registers_;
size_t number_of_fpu_registers_;
size_t number_of_register_pairs_;
const uint32_t core_callee_save_mask_;
const uint32_t fpu_callee_save_mask_;
StackMapStream stack_map_stream_;
// The order to use for code generation.
const ArenaVector<HBasicBlock*>* block_order_;
// Maps a StringReference (dex_file, string_index) to the index in the literal table.
// Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
// will compute all the indices.
ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
// Maps a ClassReference (dex_file, type_index) to the index in the literal table.
// Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
// will compute all the indices.
ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
DisassemblyInformation* disasm_info_;
private:
size_t GetStackOffsetOfSavedRegister(size_t index);
void GenerateSlowPaths();
void BlockIfInRegister(Location location, bool is_out = false) const;
void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
OptimizingCompilerStats* stats_;
HGraph* const graph_;
const CompilerOptions& compiler_options_;
ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
// The current slow-path that we're generating code for.
SlowPathCode* current_slow_path_;
// The current block index in `block_order_` of the block
// we are generating code for.
size_t current_block_index_;
// Whether the method is a leaf method.
bool is_leaf_;
// Whether an instruction in the graph accesses the current method.
// TODO: Rename: this actually indicates that some instruction in the method
// needs the environment including a valid stack frame.
bool requires_current_method_;
friend class OptimizingCFITest;
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
};
template <typename C, typename F>
class CallingConvention {
public:
CallingConvention(const C* registers,
size_t number_of_registers,
const F* fpu_registers,
size_t number_of_fpu_registers,
PointerSize pointer_size)
: registers_(registers),
number_of_registers_(number_of_registers),
fpu_registers_(fpu_registers),
number_of_fpu_registers_(number_of_fpu_registers),
pointer_size_(pointer_size) {}
size_t GetNumberOfRegisters() const { return number_of_registers_; }
size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
C GetRegisterAt(size_t index) const {
DCHECK_LT(index, number_of_registers_);
return registers_[index];
}
F GetFpuRegisterAt(size_t index) const {
DCHECK_LT(index, number_of_fpu_registers_);
return fpu_registers_[index];
}
size_t GetStackOffsetOf(size_t index) const {
// We still reserve the space for parameters passed by registers.
// Add space for the method pointer.
return static_cast<size_t>(pointer_size_) + index * kVRegSize;
}
private:
const C* registers_;
const size_t number_of_registers_;
const F* fpu_registers_;
const size_t number_of_fpu_registers_;
const PointerSize pointer_size_;
DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};
/**
* A templated class SlowPathGenerator with a templated method NewSlowPath()
* that can be used by any code generator to share equivalent slow-paths with
* the objective of reducing generated code size.
*
* InstructionType: instruction that requires SlowPathCodeType
* SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
*/
template <typename InstructionType>
class SlowPathGenerator {
static_assert(std::is_base_of<HInstruction, InstructionType>::value,
"InstructionType is not a subclass of art::HInstruction");
public:
SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
: graph_(graph),
codegen_(codegen),
slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
// Creates and adds a new slow-path, if needed, or returns existing one otherwise.
// Templating the method (rather than the whole class) on the slow-path type enables
// keeping this code at a generic, non architecture-specific place.
//
// NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
// To relax this requirement, we would need some RTTI on the stored slow-paths,
// or template the class as a whole on SlowPathType.
template <typename SlowPathCodeType>
SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
"SlowPathCodeType is not a subclass of art::SlowPathCode");
static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
"SlowPathCodeType is not constructible from InstructionType*");
// Iterate over potential candidates for sharing. Currently, only same-typed
// slow-paths with exactly the same dex-pc are viable candidates.
// TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
const uint32_t dex_pc = instruction->GetDexPc();
auto iter = slow_path_map_.find(dex_pc);
if (iter != slow_path_map_.end()) {
auto candidates = iter->second;
for (const auto& it : candidates) {
InstructionType* other_instruction = it.first;
SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
// Determine if the instructions allow for slow-path sharing.
if (HaveSameLiveRegisters(instruction, other_instruction) &&
HaveSameStackMap(instruction, other_instruction)) {
// Can share: reuse existing one.
return other_slow_path;
}
}
} else {
// First time this dex-pc is seen.
iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
}
// Cannot share: create and add new slow-path for this particular dex-pc.
SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
iter->second.emplace_back(std::make_pair(instruction, slow_path));
codegen_->AddSlowPath(slow_path);
return slow_path;
}
private:
// Tests if both instructions have same set of live physical registers. This ensures
// the slow-path has exactly the same preamble on saving these registers to stack.
bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
return (((live1->GetCoreRegisters() & core_spill) ==
(live2->GetCoreRegisters() & core_spill)) &&
((live1->GetFloatingPointRegisters() & fpu_spill) ==
(live2->GetFloatingPointRegisters() & fpu_spill)));
}
// Tests if both instructions have the same stack map. This ensures the interpreter
// will find exactly the same dex-registers at the same entries.
bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
DCHECK(i1->HasEnvironment());
DCHECK(i2->HasEnvironment());
// We conservatively test if the two instructions find exactly the same instructions
// and location in each dex-register. This guarantees they will have the same stack map.
HEnvironment* e1 = i1->GetEnvironment();
HEnvironment* e2 = i2->GetEnvironment();
if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
return false;
}
for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
!e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
return false;
}
}
return true;
}
HGraph* const graph_;
CodeGenerator* const codegen_;
// Map from dex-pc to vector of already existing instruction/slow-path pairs.
ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
};
class InstructionCodeGenerator : public HGraphVisitor {
public:
InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
: HGraphVisitor(graph),
deopt_slow_paths_(graph, codegen) {}
protected:
// Add slow-path generator for each instruction/slow-path combination that desires sharing.
// TODO: under current regime, only deopt sharing make sense; extend later.
SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
};
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_