/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "scheduler_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "code_generator_utils.h"
#include "common_arm.h"
#include "heap_poisoning.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
namespace art {
namespace arm {
using helpers::Int32ConstantFrom;
using helpers::Uint64ConstantFrom;
void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt64:
// HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
// so a bubble (kArmNopLatency) is added to represent the internal carry flag
// dependency inside these pairs.
last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
HandleBinaryOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
HandleBinaryOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt64:
last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_latency_ = kArmMulFloatingPointLatency;
break;
default:
last_visited_latency_ = kArmMulIntegerLatency;
break;
}
}
void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt64:
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
HandleBitwiseOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
HandleBitwiseOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
HandleBitwiseOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt32:
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kInt64: {
// HandleLongRotate
HInstruction* rhs = instr->GetRight();
if (rhs->IsConstant()) {
uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
if (rot != 0u) {
last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
} else {
last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
last_visited_latency_ = kArmBranchLatency;
}
break;
}
default:
LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
DataType::Type type = instr->GetResultType();
HInstruction* rhs = instr->GetRight();
switch (type) {
case DataType::Type::kInt32:
if (!rhs->IsConstant()) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
}
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kInt64:
if (!rhs->IsConstant()) {
last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
} else {
uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
if (shift_value == 1 || shift_value >= 32) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
}
}
last_visited_latency_ = kArmIntegerOpLatency;
break;
default:
LOG(FATAL) << "Unexpected operation type " << type;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
HandleShiftLatencies(instr);
}
void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
HandleShiftLatencies(instr);
}
void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
HandleShiftLatencies(instr);
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
switch (condition) {
case kCondEQ:
case kCondBE:
case kCondNE:
case kCondA:
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case kCondGE:
// Mvn
last_visited_internal_latency_ += kArmIntegerOpLatency;
FALLTHROUGH_INTENDED;
case kCondLT:
// Lsr
last_visited_latency_ = kArmIntegerOpLatency;
break;
case kCondAE:
// Trivially true.
// Mov
last_visited_latency_ = kArmIntegerOpLatency;
break;
case kCondB:
// Trivially false.
// Mov
last_visited_latency_ = kArmIntegerOpLatency;
break;
default:
LOG(FATAL) << "Unexpected condition " << condition;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition cond = condition->GetCondition();
HInstruction* right = condition->InputAt(1);
int64_t value = Uint64ConstantFrom(right);
// Comparisons against 0 are common enough, so codegen has special handling for them.
if (value == 0) {
switch (cond) {
case kCondNE:
case kCondA:
case kCondEQ:
case kCondBE:
// Orrs
last_visited_internal_latency_ += kArmIntegerOpLatency;
return;
case kCondLT:
case kCondGE:
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
return;
case kCondB:
case kCondAE:
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
return;
default:
break;
}
}
switch (cond) {
case kCondEQ:
case kCondNE:
case kCondB:
case kCondBE:
case kCondA:
case kCondAE: {
// Cmp, IT, Cmp
last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
break;
}
case kCondLE:
case kCondGT:
// Trivially true or false.
if (value == std::numeric_limits<int64_t>::max()) {
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
break;
}
FALLTHROUGH_INTENDED;
case kCondGE:
case kCondLT: {
// Cmp, Sbcs
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
break;
}
default:
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition cond = condition->GetCondition();
switch (cond) {
case kCondEQ:
case kCondNE:
case kCondB:
case kCondBE:
case kCondA:
case kCondAE: {
// Cmp, IT, Cmp
last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
break;
}
case kCondLE:
case kCondGT:
case kCondGE:
case kCondLT: {
// Cmp, Sbcs
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
break;
}
default:
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
}
// The GenerateTest series of function all counted as internal latency.
void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
const DataType::Type type = condition->GetLeft()->GetType();
if (type == DataType::Type::kInt64) {
condition->InputAt(1)->IsConstant()
? HandleGenerateLongTestConstant(condition)
: HandleGenerateLongTest(condition);
} else if (DataType::IsFloatingPointType(type)) {
// GenerateVcmp + Vmrs
last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
} else {
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
}
}
bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
if (condition->GetLeft()->GetType() == DataType::Type::kInt64) {
HInstruction* right = condition->InputAt(1);
if (right->IsConstant()) {
IfCondition c = condition->GetCondition();
const uint64_t value = Uint64ConstantFrom(right);
if (c < kCondLT || c > kCondGE) {
if (value != 0) {
return false;
}
} else if (c == kCondLE || c == kCondGT) {
if (value < std::numeric_limits<int64_t>::max() &&
!codegen_->GetAssembler()->ShifterOperandCanHold(
SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) {
return false;
}
} else if (!codegen_->GetAssembler()->ShifterOperandCanHold(
SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) {
return false;
}
}
}
return true;
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
HandleGenerateTest(cond);
// Unlike codegen pass, we cannot check 'out' register IsLow() here,
// because scheduling is before liveness(location builder) and register allocator,
// so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition condition = cond->GetCondition();
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
if (condition == kCondNE) {
// Orrs, IT, Mov
last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
HandleGenerateConditionWithZero(condition);
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
last_visited_internal_latency_ += kArmBranchLatency;
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition condition = cond->GetCondition();
HInstruction* right = cond->InputAt(1);
if (right->IsConstant()) {
// Comparisons against 0 are common enough, so codegen has special handling for them.
if (Uint64ConstantFrom(right) == 0) {
switch (condition) {
case kCondNE:
case kCondA:
case kCondEQ:
case kCondBE:
// Orr
last_visited_internal_latency_ += kArmIntegerOpLatency;
HandleGenerateConditionWithZero(condition);
return;
case kCondLT:
case kCondGE:
FALLTHROUGH_INTENDED;
case kCondAE:
case kCondB:
HandleGenerateConditionWithZero(condition);
return;
case kCondLE:
case kCondGT:
default:
break;
}
}
}
if ((condition == kCondEQ || condition == kCondNE) &&
!CanGenerateTest(cond)) {
HandleGenerateEqualLong(cond);
return;
}
if (CanGenerateTest(cond)) {
HandleGenerateConditionGeneric(cond);
return;
}
HandleGenerateLongComparesAndJumps();
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmBranchLatency;;
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
const DataType::Type type = cond->GetLeft()->GetType();
DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
if (type == DataType::Type::kInt64) {
HandleGenerateConditionLong(cond);
return;
}
IfCondition condition = cond->GetCondition();
HInstruction* right = cond->InputAt(1);
int64_t value;
if (right->IsConstant()) {
value = Uint64ConstantFrom(right);
// Comparisons against 0 are common enough, so codegen has special handling for them.
if (value == 0) {
switch (condition) {
case kCondNE:
case kCondA:
case kCondEQ:
case kCondBE:
case kCondLT:
case kCondGE:
case kCondAE:
case kCondB:
HandleGenerateConditionWithZero(condition);
return;
case kCondLE:
case kCondGT:
default:
break;
}
}
}
if (condition == kCondEQ || condition == kCondNE) {
if (condition == kCondNE) {
// CMP, IT, MOV.ne
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
HandleGenerateConditionWithZero(condition);
}
return;
}
HandleGenerateConditionGeneric(cond);
}
void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
if (cond->IsEmittedAtUseSite()) {
last_visited_latency_ = 0;
return;
}
const DataType::Type type = cond->GetLeft()->GetType();
if (DataType::IsFloatingPointType(type)) {
HandleGenerateConditionGeneric(cond);
return;
}
DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
const IfCondition condition = cond->GetCondition();
if (type == DataType::Type::kBool &&
cond->GetRight()->GetType() == DataType::Type::kBool &&
(condition == kCondEQ || condition == kCondNE)) {
if (condition == kCondEQ) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
}
last_visited_latency_ = kArmIntegerOpLatency;
return;
}
HandleGenerateConditionIntegralOrNonPrimitive(cond);
}
void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
HandleCondition(instr);
}
void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
DataType::Type type = instr->InputAt(0)->GetType();
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
break;
case DataType::Type::kInt64:
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
break;
default:
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
break;
}
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
if (instruction->GetResultType() == DataType::Type::kInt32) {
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
}
void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
if (internal_latency) {
last_visited_internal_latency_ += kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmDataProcWithShifterOpLatency;
}
}
void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
const HInstruction::InstructionKind kind = instruction->GetInstrKind();
if (kind == HInstruction::kAdd) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else if (kind == HInstruction::kSub) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
HandleGenerateDataProcInstruction(/* internal_latency */ true);
HandleGenerateDataProcInstruction();
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
const uint32_t shift_value = instruction->GetShiftAmount();
const HInstruction::InstructionKind kind = instruction->GetInstrKind();
if (shift_value >= 32) {
// Different shift types actually generate similar code here,
// no need to differentiate shift types like the codegen pass does,
// which also avoids handling shift types from different ARM backends.
HandleGenerateDataProc(instruction);
} else {
DCHECK_GT(shift_value, 1U);
DCHECK_LT(shift_value, 32U);
if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
HandleGenerateDataProcInstruction(/* internal_latency */ true);
HandleGenerateDataProcInstruction(/* internal_latency */ true);
HandleGenerateDataProcInstruction();
} else {
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
HandleGenerateDataProc(instruction);
}
}
}
void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
if (instruction->GetType() == DataType::Type::kInt32) {
HandleGenerateDataProcInstruction();
} else {
DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
HandleGenerateDataProc(instruction);
} else {
HandleGenerateLongDataProc(instruction);
}
}
}
void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_internal_latency_ = kArmNopLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
}
void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
last_visited_latency_ = kArmMulIntegerLatency;
}
void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
DataType::Type type = instruction->GetType();
const bool maybe_compressed_char_at =
mirror::kUseStringCompression && instruction->IsStringCharAt();
HInstruction* array_instr = instruction->GetArray();
bool has_intermediate_address = array_instr->IsIntermediateAddress();
HInstruction* index = instruction->InputAt(1);
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32: {
if (maybe_compressed_char_at) {
last_visited_internal_latency_ += kArmMemoryLoadLatency;
}
if (index->IsConstant()) {
if (maybe_compressed_char_at) {
last_visited_internal_latency_ +=
kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
last_visited_latency_ = kArmBranchLatency;
} else {
last_visited_latency_ += kArmMemoryLoadLatency;
}
} else {
if (has_intermediate_address) {
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
}
if (maybe_compressed_char_at) {
last_visited_internal_latency_ +=
kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
last_visited_latency_ = kArmBranchLatency;
} else {
last_visited_latency_ += kArmMemoryLoadLatency;
}
}
break;
}
case DataType::Type::kReference: {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
} else {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
if (has_intermediate_address) {
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
}
last_visited_internal_latency_ = kArmMemoryLoadLatency;
}
}
break;
}
case DataType::Type::kInt64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat32: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
default:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
last_visited_latency_ = kArmMemoryLoadLatency;
if (mirror::kUseStringCompression && instruction->IsStringLength()) {
last_visited_internal_latency_ = kArmMemoryLoadLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
}
void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
HInstruction* index = instruction->InputAt(1);
DataType::Type value_type = instruction->GetComponentType();
HInstruction* array_instr = instruction->GetArray();
bool has_intermediate_address = array_instr->IsIntermediateAddress();
switch (value_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryStoreLatency;
} else {
if (has_intermediate_address) {
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
}
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
}
case DataType::Type::kReference: {
if (instruction->InputAt(2)->IsNullConstant()) {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryStoreLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryStoreLatency;
}
} else {
// Following the exact instructions of runtime type checks is too complicated,
// just giving it a simple slow latency.
last_visited_latency_ = kArmRuntimeTypeCheckLatency;
}
break;
}
case DataType::Type::kInt64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat32: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
default:
LOG(FATAL) << "Unreachable type " << value_type;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
// Users do not use any data results.
last_visited_latency_ = 0;
}
void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
if (imm == 0) {
last_visited_internal_latency_ = 0;
last_visited_latency_ = 0;
} else if (imm == 1 || imm == -1) {
last_visited_latency_ = kArmIntegerOpLatency;
} else if (IsPowerOfTwo(AbsOrMin(imm))) {
last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
}
void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
DataType::Type type = instruction->GetResultType();
switch (type) {
case DataType::Type::kInt32: {
HInstruction* rhs = instruction->GetRight();
if (rhs->IsConstant()) {
int32_t imm = Int32ConstantFrom(rhs->AsConstant());
HandleDivRemConstantIntegralLatencies(imm);
} else {
last_visited_latency_ = kArmDivIntegerLatency;
}
break;
}
case DataType::Type::kFloat32:
last_visited_latency_ = kArmDivFloatLatency;
break;
case DataType::Type::kFloat64:
last_visited_latency_ = kArmDivDoubleLatency;
break;
default:
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
last_visited_internal_latency_ = kArmLoadStringInternalLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
if (instruction->IsStringAlloc()) {
last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
} else {
last_visited_internal_latency_ = kArmCallInternalLatency;
}
last_visited_latency_ = kArmCallLatency;
}
void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
DataType::Type type = instruction->GetResultType();
switch (type) {
case DataType::Type::kInt32: {
HInstruction* rhs = instruction->GetRight();
if (rhs->IsConstant()) {
int32_t imm = Int32ConstantFrom(rhs->AsConstant());
HandleDivRemConstantIntegralLatencies(imm);
} else {
last_visited_internal_latency_ = kArmDivIntegerLatency;
last_visited_latency_ = kArmMulIntegerLatency;
}
break;
}
default:
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
break;
}
}
void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
DCHECK(codegen_ != nullptr);
bool is_volatile = field_info.IsVolatile();
DataType::Type field_type = field_info.GetFieldType();
bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
switch (field_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_latency_ = kArmMemoryLoadLatency;
break;
case DataType::Type::kReference:
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
case DataType::Type::kInt64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
case DataType::Type::kFloat32:
last_visited_latency_ = kArmMemoryLoadLatency;
break;
case DataType::Type::kFloat64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ =
kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
default:
last_visited_latency_ = kArmMemoryLoadLatency;
break;
}
if (is_volatile) {
last_visited_internal_latency_ += kArmMemoryBarrierLatency;
}
}
void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
DCHECK(codegen_ != nullptr);
bool is_volatile = field_info.IsVolatile();
DataType::Type field_type = field_info.GetFieldType();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
switch (field_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
if (is_volatile) {
last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
last_visited_latency_ = kArmMemoryBarrierLatency;
} else {
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
case DataType::Type::kInt32:
case DataType::Type::kReference:
if (kPoisonHeapReferences && needs_write_barrier) {
last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
}
last_visited_latency_ = kArmMemoryStoreLatency;
break;
case DataType::Type::kInt64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ =
kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
case DataType::Type::kFloat32:
last_visited_latency_ = kArmMemoryStoreLatency;
break;
case DataType::Type::kFloat64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ = kArmIntegerOpLatency +
kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
default:
last_visited_latency_ = kArmMemoryStoreLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
HBasicBlock* block = instruction->GetBlock();
DCHECK((block->GetLoopInformation() != nullptr) ||
(block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
// Users do not use any data results.
last_visited_latency_ = 0;
}
void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
DataType::Type result_type = instr->GetResultType();
DataType::Type input_type = instr->GetInputType();
switch (result_type) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
break;
case DataType::Type::kInt32:
switch (input_type) {
case DataType::Type::kInt64:
last_visited_latency_ = kArmIntegerOpLatency; // MOV
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
break;
case DataType::Type::kInt64:
switch (input_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
// MOV and extension
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
// invokes runtime
last_visited_internal_latency_ = kArmCallInternalLatency;
break;
default:
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
break;
case DataType::Type::kFloat32:
switch (input_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
case DataType::Type::kInt64:
// invokes runtime
last_visited_internal_latency_ = kArmCallInternalLatency;
break;
case DataType::Type::kFloat64:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
}
break;
case DataType::Type::kFloat64:
switch (input_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
case DataType::Type::kInt64:
last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
case DataType::Type::kFloat32:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
}
break;
default:
last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
break;
}
}
} // namespace arm
} // namespace art