/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "scheduler_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "code_generator_utils.h" #include "common_arm.h" #include "heap_poisoning.h" #include "mirror/array-inl.h" #include "mirror/string.h" namespace art { namespace arm { using helpers::Int32ConstantFrom; using helpers::Uint64ConstantFrom; void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) { switch (instr->GetResultType()) { case DataType::Type::kInt64: // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs, // so a bubble (kArmNopLatency) is added to represent the internal carry flag // dependency inside these pairs. last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency; last_visited_latency_ = kArmIntegerOpLatency; break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: last_visited_latency_ = kArmIntegerOpLatency; break; } } void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) { HandleBinaryOperationLantencies(instr); } void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) { HandleBinaryOperationLantencies(instr); } void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) { switch (instr->GetResultType()) { case DataType::Type::kInt64: last_visited_internal_latency_ = 3 * kArmMulIntegerLatency; last_visited_latency_ = kArmIntegerOpLatency; break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: last_visited_latency_ = kArmMulFloatingPointLatency; break; default: last_visited_latency_ = kArmMulIntegerLatency; break; } } void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) { switch (instr->GetResultType()) { case DataType::Type::kInt64: last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: last_visited_latency_ = kArmIntegerOpLatency; break; } } void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) { HandleBitwiseOperationLantencies(instr); } void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) { HandleBitwiseOperationLantencies(instr); } void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) { HandleBitwiseOperationLantencies(instr); } void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) { switch (instr->GetResultType()) { case DataType::Type::kInt32: last_visited_latency_ = kArmIntegerOpLatency; break; case DataType::Type::kInt64: { // HandleLongRotate HInstruction* rhs = instr->GetRight(); if (rhs->IsConstant()) { uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; if (rot != 0u) { last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } } else { last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency; last_visited_latency_ = kArmBranchLatency; } break; } default: LOG(FATAL) << "Unexpected operation type " << instr->GetResultType(); UNREACHABLE(); } } void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) { DataType::Type type = instr->GetResultType(); HInstruction* rhs = instr->GetRight(); switch (type) { case DataType::Type::kInt32: if (!rhs->IsConstant()) { last_visited_internal_latency_ = kArmIntegerOpLatency; } last_visited_latency_ = kArmIntegerOpLatency; break; case DataType::Type::kInt64: if (!rhs->IsConstant()) { last_visited_internal_latency_ = 8 * kArmIntegerOpLatency; } else { uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; if (shift_value == 1 || shift_value >= 32) { last_visited_internal_latency_ = kArmIntegerOpLatency; } else { last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; } } last_visited_latency_ = kArmIntegerOpLatency; break; default: LOG(FATAL) << "Unexpected operation type " << type; UNREACHABLE(); } } void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) { HandleShiftLatencies(instr); } void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) { HandleShiftLatencies(instr); } void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) { HandleShiftLatencies(instr); } void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) { switch (condition) { case kCondEQ: case kCondBE: case kCondNE: case kCondA: last_visited_internal_latency_ += kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; break; case kCondGE: // Mvn last_visited_internal_latency_ += kArmIntegerOpLatency; FALLTHROUGH_INTENDED; case kCondLT: // Lsr last_visited_latency_ = kArmIntegerOpLatency; break; case kCondAE: // Trivially true. // Mov last_visited_latency_ = kArmIntegerOpLatency; break; case kCondB: // Trivially false. // Mov last_visited_latency_ = kArmIntegerOpLatency; break; default: LOG(FATAL) << "Unexpected condition " << condition; UNREACHABLE(); } } void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) { DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); IfCondition cond = condition->GetCondition(); HInstruction* right = condition->InputAt(1); int64_t value = Uint64ConstantFrom(right); // Comparisons against 0 are common enough, so codegen has special handling for them. if (value == 0) { switch (cond) { case kCondNE: case kCondA: case kCondEQ: case kCondBE: // Orrs last_visited_internal_latency_ += kArmIntegerOpLatency; return; case kCondLT: case kCondGE: // Cmp last_visited_internal_latency_ += kArmIntegerOpLatency; return; case kCondB: case kCondAE: // Cmp last_visited_internal_latency_ += kArmIntegerOpLatency; return; default: break; } } switch (cond) { case kCondEQ: case kCondNE: case kCondB: case kCondBE: case kCondA: case kCondAE: { // Cmp, IT, Cmp last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; break; } case kCondLE: case kCondGT: // Trivially true or false. if (value == std::numeric_limits<int64_t>::max()) { // Cmp last_visited_internal_latency_ += kArmIntegerOpLatency; break; } FALLTHROUGH_INTENDED; case kCondGE: case kCondLT: { // Cmp, Sbcs last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; break; } default: LOG(FATAL) << "Unreachable"; UNREACHABLE(); } } void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) { DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); IfCondition cond = condition->GetCondition(); switch (cond) { case kCondEQ: case kCondNE: case kCondB: case kCondBE: case kCondA: case kCondAE: { // Cmp, IT, Cmp last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; break; } case kCondLE: case kCondGT: case kCondGE: case kCondLT: { // Cmp, Sbcs last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; break; } default: LOG(FATAL) << "Unreachable"; UNREACHABLE(); } } // The GenerateTest series of function all counted as internal latency. void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) { const DataType::Type type = condition->GetLeft()->GetType(); if (type == DataType::Type::kInt64) { condition->InputAt(1)->IsConstant() ? HandleGenerateLongTestConstant(condition) : HandleGenerateLongTest(condition); } else if (DataType::IsFloatingPointType(type)) { // GenerateVcmp + Vmrs last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency; } else { // Cmp last_visited_internal_latency_ += kArmIntegerOpLatency; } } bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) { if (condition->GetLeft()->GetType() == DataType::Type::kInt64) { HInstruction* right = condition->InputAt(1); if (right->IsConstant()) { IfCondition c = condition->GetCondition(); const uint64_t value = Uint64ConstantFrom(right); if (c < kCondLT || c > kCondGE) { if (value != 0) { return false; } } else if (c == kCondLE || c == kCondGT) { if (value < std::numeric_limits<int64_t>::max() && !codegen_->GetAssembler()->ShifterOperandCanHold( SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) { return false; } } else if (!codegen_->GetAssembler()->ShifterOperandCanHold( SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) { return false; } } } return true; } void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) { HandleGenerateTest(cond); // Unlike codegen pass, we cannot check 'out' register IsLow() here, // because scheduling is before liveness(location builder) and register allocator, // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true. last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) { DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); IfCondition condition = cond->GetCondition(); last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; if (condition == kCondNE) { // Orrs, IT, Mov last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; } else { last_visited_internal_latency_ += kArmIntegerOpLatency; HandleGenerateConditionWithZero(condition); } } void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() { last_visited_internal_latency_ += 4 * kArmIntegerOpLatency; last_visited_internal_latency_ += kArmBranchLatency; } void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) { DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); IfCondition condition = cond->GetCondition(); HInstruction* right = cond->InputAt(1); if (right->IsConstant()) { // Comparisons against 0 are common enough, so codegen has special handling for them. if (Uint64ConstantFrom(right) == 0) { switch (condition) { case kCondNE: case kCondA: case kCondEQ: case kCondBE: // Orr last_visited_internal_latency_ += kArmIntegerOpLatency; HandleGenerateConditionWithZero(condition); return; case kCondLT: case kCondGE: FALLTHROUGH_INTENDED; case kCondAE: case kCondB: HandleGenerateConditionWithZero(condition); return; case kCondLE: case kCondGT: default: break; } } } if ((condition == kCondEQ || condition == kCondNE) && !CanGenerateTest(cond)) { HandleGenerateEqualLong(cond); return; } if (CanGenerateTest(cond)) { HandleGenerateConditionGeneric(cond); return; } HandleGenerateLongComparesAndJumps(); last_visited_internal_latency_ += kArmIntegerOpLatency; last_visited_latency_ = kArmBranchLatency;; } void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) { const DataType::Type type = cond->GetLeft()->GetType(); DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; if (type == DataType::Type::kInt64) { HandleGenerateConditionLong(cond); return; } IfCondition condition = cond->GetCondition(); HInstruction* right = cond->InputAt(1); int64_t value; if (right->IsConstant()) { value = Uint64ConstantFrom(right); // Comparisons against 0 are common enough, so codegen has special handling for them. if (value == 0) { switch (condition) { case kCondNE: case kCondA: case kCondEQ: case kCondBE: case kCondLT: case kCondGE: case kCondAE: case kCondB: HandleGenerateConditionWithZero(condition); return; case kCondLE: case kCondGT: default: break; } } } if (condition == kCondEQ || condition == kCondNE) { if (condition == kCondNE) { // CMP, IT, MOV.ne last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_internal_latency_ += kArmIntegerOpLatency; HandleGenerateConditionWithZero(condition); } return; } HandleGenerateConditionGeneric(cond); } void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) { if (cond->IsEmittedAtUseSite()) { last_visited_latency_ = 0; return; } const DataType::Type type = cond->GetLeft()->GetType(); if (DataType::IsFloatingPointType(type)) { HandleGenerateConditionGeneric(cond); return; } DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; const IfCondition condition = cond->GetCondition(); if (type == DataType::Type::kBool && cond->GetRight()->GetType() == DataType::Type::kBool && (condition == kCondEQ || condition == kCondNE)) { if (condition == kCondEQ) { last_visited_internal_latency_ = kArmIntegerOpLatency; } last_visited_latency_ = kArmIntegerOpLatency; return; } HandleGenerateConditionIntegralOrNonPrimitive(cond); } void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { HandleCondition(instr); } void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) { DataType::Type type = instr->InputAt(0)->GetType(); switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; break; case DataType::Type::kInt64: last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency; break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency; break; default: last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; break; } last_visited_latency_ = kArmIntegerOpLatency; } void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { if (instruction->GetResultType() == DataType::Type::kInt32) { last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } } void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) { if (internal_latency) { last_visited_internal_latency_ += kArmIntegerOpLatency; } else { last_visited_latency_ = kArmDataProcWithShifterOpLatency; } } void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) { const HInstruction::InstructionKind kind = instruction->GetInstrKind(); if (kind == HInstruction::kAdd) { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } else if (kind == HInstruction::kSub) { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { HandleGenerateDataProcInstruction(/* internal_latency */ true); HandleGenerateDataProcInstruction(); } } void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) { DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); const uint32_t shift_value = instruction->GetShiftAmount(); const HInstruction::InstructionKind kind = instruction->GetInstrKind(); if (shift_value >= 32) { // Different shift types actually generate similar code here, // no need to differentiate shift types like the codegen pass does, // which also avoids handling shift types from different ARM backends. HandleGenerateDataProc(instruction); } else { DCHECK_GT(shift_value, 1U); DCHECK_LT(shift_value, 32U); if (kind == HInstruction::kOr || kind == HInstruction::kXor) { HandleGenerateDataProcInstruction(/* internal_latency */ true); HandleGenerateDataProcInstruction(/* internal_latency */ true); HandleGenerateDataProcInstruction(); } else { last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; HandleGenerateDataProc(instruction); } } } void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) { const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); if (instruction->GetType() == DataType::Type::kInt32) { HandleGenerateDataProcInstruction(); } else { DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { HandleGenerateDataProc(instruction); } else { HandleGenerateLongDataProc(instruction); } } } void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) { // Although the code generated is a simple `add` instruction, we found through empirical results // that spacing it from its use in memory accesses was beneficial. last_visited_internal_latency_ = kArmNopLatency; last_visited_latency_ = kArmIntegerOpLatency; } void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex( HIntermediateAddressIndex* ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM"; } void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArmMulIntegerLatency; } void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { DataType::Type type = instruction->GetType(); const bool maybe_compressed_char_at = mirror::kUseStringCompression && instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); HInstruction* index = instruction->InputAt(1); switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: { if (maybe_compressed_char_at) { last_visited_internal_latency_ += kArmMemoryLoadLatency; } if (index->IsConstant()) { if (maybe_compressed_char_at) { last_visited_internal_latency_ += kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; last_visited_latency_ = kArmBranchLatency; } else { last_visited_latency_ += kArmMemoryLoadLatency; } } else { if (has_intermediate_address) { } else { last_visited_internal_latency_ += kArmIntegerOpLatency; } if (maybe_compressed_char_at) { last_visited_internal_latency_ += kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; last_visited_latency_ = kArmBranchLatency; } else { last_visited_latency_ += kArmMemoryLoadLatency; } } break; } case DataType::Type::kReference: { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; } else { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { if (has_intermediate_address) { } else { last_visited_internal_latency_ += kArmIntegerOpLatency; } last_visited_internal_latency_ = kArmMemoryLoadLatency; } } break; } case DataType::Type::kInt64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_internal_latency_ += kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } break; } case DataType::Type::kFloat32: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_internal_latency_ += kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } break; } case DataType::Type::kFloat64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_internal_latency_ += kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } break; } default: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } } void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) { last_visited_latency_ = kArmMemoryLoadLatency; if (mirror::kUseStringCompression && instruction->IsStringLength()) { last_visited_internal_latency_ = kArmMemoryLoadLatency; last_visited_latency_ = kArmIntegerOpLatency; } } void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { HInstruction* index = instruction->InputAt(1); DataType::Type value_type = instruction->GetComponentType(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); switch (value_type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryStoreLatency; } else { if (has_intermediate_address) { } else { last_visited_internal_latency_ = kArmIntegerOpLatency; } last_visited_latency_ = kArmMemoryStoreLatency; } break; } case DataType::Type::kReference: { if (instruction->InputAt(2)->IsNullConstant()) { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryStoreLatency; } else { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryStoreLatency; } } else { // Following the exact instructions of runtime type checks is too complicated, // just giving it a simple slow latency. last_visited_latency_ = kArmRuntimeTypeCheckLatency; } break; } case DataType::Type::kInt64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } break; } case DataType::Type::kFloat32: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } break; } case DataType::Type::kFloat64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } break; } default: LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); } } void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { last_visited_internal_latency_ = kArmIntegerOpLatency; // Users do not use any data results. last_visited_latency_ = 0; } void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) { if (imm == 0) { last_visited_internal_latency_ = 0; last_visited_latency_ = 0; } else if (imm == 1 || imm == -1) { last_visited_latency_ = kArmIntegerOpLatency; } else if (IsPowerOfTwo(AbsOrMin(imm))) { last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } } void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) { DataType::Type type = instruction->GetResultType(); switch (type) { case DataType::Type::kInt32: { HInstruction* rhs = instruction->GetRight(); if (rhs->IsConstant()) { int32_t imm = Int32ConstantFrom(rhs->AsConstant()); HandleDivRemConstantIntegralLatencies(imm); } else { last_visited_latency_ = kArmDivIntegerLatency; } break; } case DataType::Type::kFloat32: last_visited_latency_ = kArmDivFloatLatency; break; case DataType::Type::kFloat64: last_visited_latency_ = kArmDivDoubleLatency; break; default: last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; break; } } void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); } void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); } void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmIntegerOpLatency; } void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; } void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { last_visited_internal_latency_ = kArmLoadStringInternalLatency; last_visited_latency_ = kArmMemoryLoadLatency; } void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; } void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) { if (instruction->IsStringAlloc()) { last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency; } else { last_visited_internal_latency_ = kArmCallInternalLatency; } last_visited_latency_ = kArmCallLatency; } void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) { DataType::Type type = instruction->GetResultType(); switch (type) { case DataType::Type::kInt32: { HInstruction* rhs = instruction->GetRight(); if (rhs->IsConstant()) { int32_t imm = Int32ConstantFrom(rhs->AsConstant()); HandleDivRemConstantIntegralLatencies(imm); } else { last_visited_internal_latency_ = kArmDivIntegerLatency; last_visited_latency_ = kArmMulIntegerLatency; } break; } default: last_visited_internal_latency_ = kArmCallInternalLatency; last_visited_latency_ = kArmCallLatency; break; } } void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); DCHECK(codegen_ != nullptr); bool is_volatile = field_info.IsVolatile(); DataType::Type field_type = field_info.GetFieldType(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); switch (field_type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: last_visited_latency_ = kArmMemoryLoadLatency; break; case DataType::Type::kReference: if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_latency_ = kArmMemoryLoadLatency; } break; case DataType::Type::kInt64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } else { last_visited_latency_ = kArmMemoryLoadLatency; } break; case DataType::Type::kFloat32: last_visited_latency_ = kArmMemoryLoadLatency; break; case DataType::Type::kFloat64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_latency_ = kArmMemoryLoadLatency; } break; default: last_visited_latency_ = kArmMemoryLoadLatency; break; } if (is_volatile) { last_visited_internal_latency_ += kArmMemoryBarrierLatency; } } void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); DCHECK(codegen_ != nullptr); bool is_volatile = field_info.IsVolatile(); DataType::Type field_type = field_info.GetFieldType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); switch (field_type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: if (is_volatile) { last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency; last_visited_latency_ = kArmMemoryBarrierLatency; } else { last_visited_latency_ = kArmMemoryStoreLatency; } break; case DataType::Type::kInt32: case DataType::Type::kReference: if (kPoisonHeapReferences && needs_write_barrier) { last_visited_internal_latency_ += kArmIntegerOpLatency * 2; } last_visited_latency_ = kArmMemoryStoreLatency; break; case DataType::Type::kInt64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_latency_ = kArmMemoryStoreLatency; } break; case DataType::Type::kFloat32: last_visited_latency_ = kArmMemoryStoreLatency; break; case DataType::Type::kFloat64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmIntegerOpLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_latency_ = kArmMemoryStoreLatency; } break; default: last_visited_latency_ = kArmMemoryStoreLatency; break; } } void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); } void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); } void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) { HBasicBlock* block = instruction->GetBlock(); DCHECK((block->GetLoopInformation() != nullptr) || (block->IsEntryBlock() && instruction->GetNext()->IsGoto())); // Users do not use any data results. last_visited_latency_ = 0; } void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { DataType::Type result_type = instr->GetResultType(); DataType::Type input_type = instr->GetInputType(); switch (result_type) { case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX break; case DataType::Type::kInt32: switch (input_type) { case DataType::Type::kInt64: last_visited_latency_ = kArmIntegerOpLatency; // MOV break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; default: last_visited_latency_ = kArmIntegerOpLatency; break; } break; case DataType::Type::kInt64: switch (input_type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: // MOV and extension last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: // invokes runtime last_visited_internal_latency_ = kArmCallInternalLatency; break; default: last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; break; } break; case DataType::Type::kFloat32: switch (input_type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; case DataType::Type::kInt64: // invokes runtime last_visited_internal_latency_ = kArmCallInternalLatency; break; case DataType::Type::kFloat64: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: last_visited_latency_ = kArmFloatingPointOpLatency; break; } break; case DataType::Type::kFloat64: switch (input_type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; case DataType::Type::kInt64: last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; case DataType::Type::kFloat32: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: last_visited_latency_ = kArmFloatingPointOpLatency; break; } break; default: last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency; break; } } } // namespace arm } // namespace art