/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "SVDF.h"
#include "CpuExecutor.h"
#include "HalInterfaces.h"
namespace android {
namespace nn {
namespace {
template <typename T>
inline T *GetBuffer(RunTimeOperandInfo* operand) {
return reinterpret_cast<T*>(operand->buffer);
}
template <typename T>
inline const T *GetBuffer(const RunTimeOperandInfo* operand) {
return reinterpret_cast<const T*>(operand->buffer);
}
}
SVDF::SVDF(const Operation& operation,
std::vector<RunTimeOperandInfo>& operands) {
input_ = GetInput(operation, operands, kInputTensor);
weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
bias_ = GetInput(operation, operands, kBiasTensor);
state_in_ = GetInput(operation, operands, kStateInTensor);
params_.rank_ = getScalarData<int>(*GetInput(operation, operands, kRankParam));
params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarData<int>(
*GetInput(operation, operands, kActivationParam)));
state_out_ = GetOutput(operation, operands, kStateOutTensor);
output_ = GetOutput(operation, operands, kOutputTensor);
}
bool SVDF::Prepare(const Operation &operation,
std::vector<RunTimeOperandInfo> &operands,
Shape *stateShape,
Shape *outputShape) {
// Check we have all the inputs and outputs we need.
const int num_inputs = NumInputsWithValues(operation, operands);
NN_CHECK(num_inputs == 6 || num_inputs == 7);
NN_CHECK_EQ(NumOutputs(operation), 2);
const RunTimeOperandInfo *input =
GetInput(operation, operands, SVDF::kInputTensor);
const RunTimeOperandInfo *weights_feature =
GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
const RunTimeOperandInfo *weights_time =
GetInput(operation, operands, SVDF::kWeightsTimeTensor);
// Check all the parameters of tensor match within themselves and match the
// input configuration.
const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
const uint32_t batch_size = SizeOfDimension(input, 0);
const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
NN_CHECK_EQ(num_filters % rank, 0);
const uint32_t num_units = num_filters / rank;
const uint32_t memory_size = SizeOfDimension(weights_time, 1);
NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);
const RunTimeOperandInfo *bias =
GetInput(operation, operands, kBiasTensor);
if (!IsNullInput(bias)) {
NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
}
// Resize state.
const Shape &inputShape = input->shape();
stateShape->type = inputShape.type;
stateShape->dimensions = { batch_size, memory_size * num_filters };
stateShape->offset = inputShape.offset;
stateShape->scale = inputShape.scale;
// Resize output.
outputShape->type = inputShape.type;
outputShape->dimensions = { batch_size, num_units };
outputShape->offset = inputShape.offset;
outputShape->scale = inputShape.scale;
return true;
}
bool SVDF::Eval() {
const int rank = params_.rank_;
const int batch_size = SizeOfDimension(input_, 0);
const int input_size = SizeOfDimension(input_, 1);
const int num_filters = SizeOfDimension(weights_feature_, 0);
const int num_units = num_filters / rank;
const int memory_size = SizeOfDimension(weights_time_, 1);
memcpy(GetBuffer<float>(state_out_), GetBuffer<float>(state_in_),
sizeof(float) * batch_size * memory_size * num_filters);
// Compute conv1d(inputs, weights_feature).
for (int b = 0; b < batch_size; b++) {
float* state_ptr_batch = GetBuffer<float>(state_out_) + b * memory_size * num_filters;
for (int c = 0; c < num_filters; c++) {
float* state_ptr = state_ptr_batch + c * memory_size;
state_ptr[memory_size - 1] = 0.0;
}
}
// The state left most column is used to save current cycle activation. This
// is achieved by starting at state->data.f[memory_size - 1] and having the
// stride equal to memory_size.
tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
GetBuffer<float>(weights_feature_), num_filters, input_size,
GetBuffer<float>(input_), batch_size,
&GetBuffer<float>(state_out_)[memory_size - 1], memory_size);
// Compute matmul(state, weights_time).
// The right most column is used to save temporary output (with the size of
// num_filters). This is achieved by starting at state->data.f and having the
// stride equal to memory_size.
float scratch[batch_size * num_filters];
for (int b = 0; b < batch_size; b++) {
float* state_out_ptr_batch =
GetBuffer<float>(state_out_) + b * memory_size * num_filters;
float* scratch_ptr_batch = scratch + b * num_filters;
tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
GetBuffer<float>(weights_time_), state_out_ptr_batch, memory_size, num_filters,
scratch_ptr_batch, /*result_stride=*/1);
}
// Initialize output with bias if provided.
if (!IsNullInput(bias_)) {
tflite::tensor_utils::VectorBatchVectorAssign(
GetBuffer<float>(bias_), num_units, batch_size,
GetBuffer<float>(output_));
} else {
tflite::tensor_utils::ZeroVector(
GetBuffer<float>(output_), batch_size * num_units);
}
// Reduction sum
for (int b = 0; b < batch_size; b++) {
float* output_ptr_batch = GetBuffer<float>(output_) + b * num_units;
float* scratch_ptr_batch = scratch + b * num_filters;
tflite::tensor_utils::ReductionSumVector(
scratch_ptr_batch, output_ptr_batch, num_units, rank);
}
// Apply activation.
for (int b = 0; b < batch_size; b++) {
float* output_ptr_batch = GetBuffer<float>(output_) + b * num_units;
tflite::tensor_utils::ApplyActivationToVector(
output_ptr_batch, num_units,
params_.activation_, output_ptr_batch);
}
// Right shift the state.
for (int b = 0; b < batch_size; b++) {
float* state_out_ptr_batch =
GetBuffer<float>(state_out_) + b * memory_size * num_filters;
for (int f = 0; f < num_filters; f++) {
tflite::tensor_utils::VectorShiftLeft(state_out_ptr_batch, memory_size,
/*shift_value=*/0.0);
state_out_ptr_batch += memory_size;
}
}
return true;
}
} // namespace nn
} // namespace android