/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "SVDF.h" #include "CpuExecutor.h" #include "CpuOperationUtils.h" #include "HalInterfaces.h" #include "Tracing.h" namespace android { namespace nn { SVDF::SVDF(const Operation& operation, std::vector<RunTimeOperandInfo>& operands) { NNTRACE_TRANS("SVDF::SVDF"); input_ = GetInput(operation, operands, kInputTensor); weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor); weights_time_ = GetInput(operation, operands, kWeightsTimeTensor); bias_ = GetInput(operation, operands, kBiasTensor); state_in_ = GetInput(operation, operands, kStateInTensor); params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam)); params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>( *GetInput(operation, operands, kActivationParam))); state_out_ = GetOutput(operation, operands, kStateOutTensor); output_ = GetOutput(operation, operands, kOutputTensor); } bool SVDF::Prepare(const Operation &operation, std::vector<RunTimeOperandInfo> &operands, Shape *stateShape, Shape *outputShape) { NNTRACE_TRANS("SVDF::Prepare"); // Check we have all the inputs and outputs we need. const int num_inputs = NumInputsWithValues(operation, operands); NN_CHECK(num_inputs == 6 || num_inputs == 7); NN_CHECK_EQ(NumOutputs(operation), 2); const RunTimeOperandInfo *input = GetInput(operation, operands, SVDF::kInputTensor); const RunTimeOperandInfo *weights_feature = GetInput(operation, operands, SVDF::kWeightsFeatureTensor); const RunTimeOperandInfo *weights_time = GetInput(operation, operands, SVDF::kWeightsTimeTensor); // Check all the parameters of tensor match within themselves and match the // input configuration. const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam)); const uint32_t batch_size = SizeOfDimension(input, 0); const uint32_t num_filters = SizeOfDimension(weights_feature, 0); NN_CHECK_EQ(num_filters % rank, 0); const uint32_t num_units = num_filters / rank; const uint32_t memory_size = SizeOfDimension(weights_time, 1); NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1)); NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters); const RunTimeOperandInfo *bias = GetInput(operation, operands, kBiasTensor); if (!IsNullInput(bias)) { NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units); } // Resize state. const Shape &inputShape = input->shape(); stateShape->type = inputShape.type; stateShape->dimensions = { batch_size, memory_size * num_filters }; stateShape->offset = inputShape.offset; stateShape->scale = inputShape.scale; // Resize output. outputShape->type = inputShape.type; outputShape->dimensions = { batch_size, num_units }; outputShape->offset = inputShape.offset; outputShape->scale = inputShape.scale; return true; } bool SVDF::Eval() { NNTRACE_TRANS("SVDF::Eval"); switch (input_->type) { case OperandType::TENSOR_FLOAT16: { std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape())); convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32); std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape())); convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer), &inputStateDataFloat32); std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape())); if (!IsNullInput(bias_)) { convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer), &biasDataFloat32); } std::vector<float> weightsFeatureDataFloat32( getNumberOfElements(weights_feature_->shape())); convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer), &weightsFeatureDataFloat32); std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape())); convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer), &weightsTimeDataFloat32); std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape())); std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape())); EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(), biasDataFloat32.data(), weightsFeatureDataFloat32.data(), weightsTimeDataFloat32.data(), outputDataFloat32.data(), outputStateDataFloat32.data()); convertFloat32ToFloat16(outputDataFloat32, reinterpret_cast<_Float16*>(output_->buffer)); convertFloat32ToFloat16(outputStateDataFloat32, reinterpret_cast<_Float16*>(state_out_->buffer)); break; } case OperandType::TENSOR_FLOAT32: { EvalFloat32(reinterpret_cast<float*>(input_->buffer), reinterpret_cast<float*>(state_in_->buffer), reinterpret_cast<float*>(bias_->buffer), reinterpret_cast<float*>(weights_feature_->buffer), reinterpret_cast<float*>(weights_time_->buffer), reinterpret_cast<float*>(output_->buffer), reinterpret_cast<float*>(state_out_->buffer)); break; } default: { LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type); return false; } } return true; } void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData, const float* weightsFeatureData, const float* weightsTimeData, float* outputData, float* outputStateData) { NNTRACE_COMP("SVDF::EvalFloat32"); const int rank = params_.rank_; const int batch_size = SizeOfDimension(input_, 0); const int input_size = SizeOfDimension(input_, 1); const int num_filters = SizeOfDimension(weights_feature_, 0); const int num_units = num_filters / rank; const int memory_size = SizeOfDimension(weights_time_, 1); memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters); // Compute conv1d(inputs, weights_feature). for (int b = 0; b < batch_size; b++) { float* state_ptr_batch = outputStateData + b * memory_size * num_filters; for (int c = 0; c < num_filters; c++) { float* state_ptr = state_ptr_batch + c * memory_size; state_ptr[memory_size - 1] = 0.0; } } // The state left most column is used to save current cycle activation. This // is achieved by starting at state->data.f[memory_size - 1] and having the // stride equal to memory_size. tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( weightsFeatureData, num_filters, input_size, inputData, batch_size, &outputStateData[memory_size - 1], memory_size); // Compute matmul(state, weights_time). // The right most column is used to save temporary output (with the size of // num_filters). This is achieved by starting at state->data.f and having the // stride equal to memory_size. float scratch[batch_size * num_filters]; for (int b = 0; b < batch_size; b++) { float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters; float* scratch_ptr_batch = scratch + b * num_filters; tflite::tensor_utils::BatchVectorBatchVectorDotProduct( weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch, /*result_stride=*/1); } // Initialize output with bias if provided. if (!IsNullInput(bias_)) { tflite::tensor_utils::VectorBatchVectorAssign(biasData, num_units, batch_size, outputData); } else { tflite::tensor_utils::ZeroVector(outputData, batch_size * num_units); } // Reduction sum for (int b = 0; b < batch_size; b++) { float* output_ptr_batch = outputData + b * num_units; float* scratch_ptr_batch = scratch + b * num_filters; tflite::tensor_utils::ReductionSumVector(scratch_ptr_batch, output_ptr_batch, num_units, rank); } // Apply activation. for (int b = 0; b < batch_size; b++) { float* output_ptr_batch = outputData + b * num_units; tflite::tensor_utils::ApplyActivationToVector(output_ptr_batch, num_units, params_.activation_, output_ptr_batch); } // Right shift the state. for (int b = 0; b < batch_size; b++) { float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters; for (int f = 0; f < num_filters; f++) { tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size, /*shift_value=*/0.0); state_out_ptr_batch += memory_size; } } } } // namespace nn } // namespace android