/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "code_generator_arm_vixl.h"
#include "mirror/array-inl.h"

namespace vixl32 = vixl::aarch32;
using namespace vixl32;  // NOLINT(build/namespaces)

namespace art {
namespace arm {

using helpers::DRegisterFrom;
using helpers::Int64ConstantFrom;
using helpers::InputDRegisterAt;
using helpers::InputRegisterAt;
using helpers::OutputDRegister;
using helpers::OutputRegister;
using helpers::RegisterFrom;

#define __ GetVIXLAssembler()->

void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      locations->SetInAt(0, Location::RequiresRegister());
      locations->SetOut(Location::RequiresFpuRegister());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vdup(Untyped8, dst, InputRegisterAt(instruction, 0));
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vdup(Untyped16, dst, InputRegisterAt(instruction, 0));
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0));
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt32:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresRegister());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vmov(OutputRegister(instruction), DRegisterLane(src, 0));
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresFpuRegister(),
                        instruction->IsVecNot() ? Location::kOutputOverlap
                                                : Location::kNoOutputOverlap);
      break;
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecReduce(HVecReduce* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      switch (instruction->GetReductionKind()) {
        case HVecReduce::kSum:
          __ Vpadd(DataTypeValue::I32, dst, src, src);
          break;
        case HVecReduce::kMin:
          __ Vpmin(DataTypeValue::S32, dst, src, src);
          break;
        case HVecReduce::kMax:
          __ Vpmax(DataTypeValue::S32, dst, src, src);
          break;
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) {
  LOG(FATAL) << "No SIMD for " << instruction->GetId();
}

void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vneg(DataTypeValue::S8, dst, src);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vneg(DataTypeValue::S16, dst, src);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vneg(DataTypeValue::S32, dst, src);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vabs(DataTypeValue::S8, dst, src);
      break;
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vabs(DataTypeValue::S16, dst, src);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vabs(DataTypeValue::S32, dst, src);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister src = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:  // special case boolean-not
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vmov(I8, dst, 1);
      __ Veor(dst, dst, src);
      break;
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      __ Vmvn(I8, dst, src);  // lanes do not matter
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector binary operations.
static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetInAt(1, Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vadd(I8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vadd(I16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vadd(I32, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vqadd(DataTypeValue::U8, dst, lhs, rhs);
      break;
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vqadd(DataTypeValue::S8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vqadd(DataTypeValue::U16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vqadd(DataTypeValue::S16, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      instruction->IsRounded()
          ? __ Vrhadd(DataTypeValue::U8, dst, lhs, rhs)
          : __ Vhadd(DataTypeValue::U8, dst, lhs, rhs);
      break;
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      instruction->IsRounded()
          ? __ Vrhadd(DataTypeValue::S8, dst, lhs, rhs)
          : __ Vhadd(DataTypeValue::S8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      instruction->IsRounded()
          ? __ Vrhadd(DataTypeValue::U16, dst, lhs, rhs)
          : __ Vhadd(DataTypeValue::U16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      instruction->IsRounded()
          ? __ Vrhadd(DataTypeValue::S16, dst, lhs, rhs)
          : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vsub(I8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vsub(I16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vsub(I32, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vqsub(DataTypeValue::U8, dst, lhs, rhs);
      break;
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vqsub(DataTypeValue::S8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vqsub(DataTypeValue::U16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vqsub(DataTypeValue::S16, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vmul(I8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vmul(I16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vmul(I32, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) {
  LOG(FATAL) << "No SIMD for " << instruction->GetId();
}

void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vmin(DataTypeValue::U8, dst, lhs, rhs);
      break;
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vmin(DataTypeValue::S8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vmin(DataTypeValue::U16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vmin(DataTypeValue::S16, dst, lhs, rhs);
      break;
    case DataType::Type::kUint32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vmin(DataTypeValue::U32, dst, lhs, rhs);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vmin(DataTypeValue::S32, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vmax(DataTypeValue::U8, dst, lhs, rhs);
      break;
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vmax(DataTypeValue::S8, dst, lhs, rhs);
      break;
    case DataType::Type::kUint16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vmax(DataTypeValue::U16, dst, lhs, rhs);
      break;
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vmax(DataTypeValue::S16, dst, lhs, rhs);
      break;
    case DataType::Type::kUint32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vmax(DataTypeValue::U32, dst, lhs, rhs);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vmax(DataTypeValue::S32, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
  // TODO: Allow constants supported by VAND (immediate).
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      __ Vand(I8, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
  LOG(FATAL) << "No SIMD for " << instruction->GetId();
}

void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      __ Vorr(I8, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      __ Veor(I8, dst, lhs, rhs);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector shift operations.
static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) {
  CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vshl(I8, dst, lhs, value);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vshl(I16, dst, lhs, value);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vshl(I32, dst, lhs, value);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) {
  CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vshr(DataTypeValue::S8, dst, lhs, value);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vshr(DataTypeValue::S16, dst, lhs, value);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vshr(DataTypeValue::S32, dst, lhs, value);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) {
  CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister dst = DRegisterFrom(locations->Out());
  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      __ Vshr(DataTypeValue::U8, dst, lhs, value);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      __ Vshr(DataTypeValue::U16, dst, lhs, value);
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vshr(DataTypeValue::U32, dst, lhs, value);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);

  DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented

  HInstruction* input = instruction->InputAt(0);
  bool is_zero = IsZeroBitPattern(input);

  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt32:
      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
                                    : Location::RequiresRegister());
      locations->SetOut(Location::RequiresFpuRegister());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister dst = DRegisterFrom(locations->Out());

  DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented

  // Zero out all other elements first.
  __ Vmov(I32, dst, 0);

  // Shorthand for any type of zero.
  if (IsZeroBitPattern(instruction->InputAt(0))) {
    return;
  }

  // Set required elements.
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      __ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0));
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector accumulations.
static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetInAt(1, Location::RequiresFpuRegister());
      locations->SetInAt(2, Location::RequiresFpuRegister());
      locations->SetOut(Location::SameAsFirstInput());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
  CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
  LOG(FATAL) << "No SIMD for " << instruction->GetId();
}

void LocationsBuilderARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
  CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::DRegister acc = DRegisterFrom(locations->InAt(0));
  vixl32::DRegister left = DRegisterFrom(locations->InAt(1));
  vixl32::DRegister right = DRegisterFrom(locations->InAt(2));

  DCHECK(locations->InAt(0).Equals(locations->Out()));

  // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
  HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
  HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
  DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
  switch (a->GetPackedType()) {
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, a->GetVectorLength());
      switch (instruction->GetPackedType()) {
        case DataType::Type::kInt32: {
          DCHECK_EQ(2u, instruction->GetVectorLength());
          UseScratchRegisterScope temps(GetVIXLAssembler());
          vixl32::DRegister tmp = temps.AcquireD();
          __ Vsub(DataTypeValue::I32, tmp, left, right);
          __ Vabs(DataTypeValue::S32, tmp, tmp);
          __ Vadd(DataTypeValue::I32, acc, acc, tmp);
          break;
        }
        default:
          LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
          UNREACHABLE();
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecDotProd(HVecDotProd* instruction) {
  LOG(FATAL) << "No SIMD for " << instruction->GetId();
}

void InstructionCodeGeneratorARMVIXL::VisitVecDotProd(HVecDotProd* instruction) {
  LOG(FATAL) << "No SIMD for " << instruction->GetId();
}

// Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word
// size equals to 4).
static bool IsWordAligned(HVecMemoryOperation* instruction) {
  return instruction->GetAlignment().IsAlignedAt(4u);
}

// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* allocator,
                                  HVecMemoryOperation* instruction,
                                  bool is_load) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
      locations->SetInAt(0, Location::RequiresRegister());
      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
      if (is_load) {
        locations->SetOut(Location::RequiresFpuRegister());
      } else {
        locations->SetInAt(2, Location::RequiresFpuRegister());
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector memory operations. Returns the memory operand and,
// if used, sets the output parameter scratch to a temporary register used in this operand,
// so that the client can release it right after the memory operand use.
MemOperand InstructionCodeGeneratorARMVIXL::VecAddress(
        HVecMemoryOperation* instruction,
        UseScratchRegisterScope* temps_scope,
        /*out*/ vixl32::Register* scratch) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::Register base = InputRegisterAt(instruction, 0);

  Location index = locations->InAt(1);
  size_t size = DataType::Size(instruction->GetPackedType());
  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
  size_t shift = ComponentSizeShiftWidth(size);

  // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
  DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());

  if (index.IsConstant()) {
    offset += Int64ConstantFrom(index) << shift;
    return MemOperand(base, offset);
  } else {
    *scratch = temps_scope->Acquire();
    __ Add(*scratch, base, Operand(RegisterFrom(index), ShiftType::LSL, shift));

    return MemOperand(*scratch, offset);
  }
}

AlignedMemOperand InstructionCodeGeneratorARMVIXL::VecAddressUnaligned(
        HVecMemoryOperation* instruction,
        UseScratchRegisterScope* temps_scope,
        /*out*/ vixl32::Register* scratch) {
  LocationSummary* locations = instruction->GetLocations();
  vixl32::Register base = InputRegisterAt(instruction, 0);

  Location index = locations->InAt(1);
  size_t size = DataType::Size(instruction->GetPackedType());
  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
  size_t shift = ComponentSizeShiftWidth(size);

  // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
  DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());

  if (index.IsConstant()) {
    offset += Int64ConstantFrom(index) << shift;
    __ Add(*scratch, base, offset);
  } else {
    *scratch = temps_scope->Acquire();
    __ Add(*scratch, base, offset);
    __ Add(*scratch, *scratch, Operand(RegisterFrom(index), ShiftType::LSL, shift));
  }
  return AlignedMemOperand(*scratch, kNoAlignment);
}

void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) {
  CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
}

void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
  vixl32::DRegister reg = OutputDRegister(instruction);
  UseScratchRegisterScope temps(GetVIXLAssembler());
  vixl32::Register scratch;

  DCHECK(instruction->GetPackedType() != DataType::Type::kUint16 || !instruction->IsStringCharAt());

  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      if (IsWordAligned(instruction)) {
        __ Vldr(reg, VecAddress(instruction, &temps, &scratch));
      } else {
        __ Vld1(Untyped8,
            NeonRegisterList(reg, kMultipleLanes),
            VecAddressUnaligned(instruction, &temps, &scratch));
      }
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      if (IsWordAligned(instruction)) {
        __ Vldr(reg, VecAddress(instruction, &temps, &scratch));
      } else {
        __ Vld1(Untyped16,
            NeonRegisterList(reg, kMultipleLanes),
            VecAddressUnaligned(instruction, &temps, &scratch));
      }
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      if (IsWordAligned(instruction)) {
        __ Vldr(reg, VecAddress(instruction, &temps, &scratch));
      } else {
        __ Vld1(Untyped32,
            NeonRegisterList(reg, kMultipleLanes),
            VecAddressUnaligned(instruction, &temps, &scratch));
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) {
  CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
}

void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
  vixl32::DRegister reg = InputDRegisterAt(instruction, 2);
  UseScratchRegisterScope temps(GetVIXLAssembler());
  vixl32::Register scratch;
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      DCHECK_EQ(8u, instruction->GetVectorLength());
      if (IsWordAligned(instruction)) {
        __ Vstr(reg, VecAddress(instruction, &temps, &scratch));
      } else {
        __ Vst1(Untyped8,
                NeonRegisterList(reg, kMultipleLanes),
                VecAddressUnaligned(instruction, &temps, &scratch));
      }
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      DCHECK_EQ(4u, instruction->GetVectorLength());
      if (IsWordAligned(instruction)) {
        __ Vstr(reg, VecAddress(instruction, &temps, &scratch));
      } else {
        __ Vst1(Untyped16,
                NeonRegisterList(reg, kMultipleLanes),
                VecAddressUnaligned(instruction, &temps, &scratch));
      }
      break;
    case DataType::Type::kInt32:
      DCHECK_EQ(2u, instruction->GetVectorLength());
      if (IsWordAligned(instruction)) {
        __ Vstr(reg, VecAddress(instruction, &temps, &scratch));
      } else {
        __ Vst1(Untyped32,
                NeonRegisterList(reg, kMultipleLanes),
                VecAddressUnaligned(instruction, &temps, &scratch));
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

#undef __

}  // namespace arm
}  // namespace art