/*
* Copyright (C) 2019 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "Operations"
#include "OperationResolver.h"
#include "RNN.h"
namespace android {
namespace nn {
namespace bidirectional_sequence_rnn {
constexpr uint32_t kNumInputs = 15;
constexpr uint32_t kInputTensor = 0;
// Forward cell tensors
constexpr uint32_t kFwWeightsTensor = 1;
constexpr uint32_t kFwRecurrentWeightsTensor = 2;
constexpr uint32_t kFwBiasTensor = 3;
constexpr uint32_t kFwHiddenStateTensor = 4;
// Backward cell tensors
constexpr uint32_t kBwWeightsTensor = 5;
constexpr uint32_t kBwRecurrentWeightsTensor = 6;
constexpr uint32_t kBwBiasTensor = 7;
constexpr uint32_t kBwHiddenStateTensor = 8;
// Auxiliary inputs
constexpr uint32_t kAuxInputTensor = 9; // optional
constexpr uint32_t kFwAuxWeightsTensor = 10; // optional
constexpr uint32_t kBwAuxWeightsTensor = 11; // optional
// Cell parameters
constexpr uint32_t kActivationParam = 12;
constexpr uint32_t kTimeMajorParam = 13;
constexpr uint32_t kMergeOutputsParam = 14;
constexpr uint32_t kFwOutputTensor = 0;
constexpr uint32_t kBwOutputTensor = 1; // Only if mergeOutputs parameter is false
namespace {
template <typename T>
void transposeFirstTwoDims(const T* input, const Shape& inputShape, T* output) {
const uint32_t firstDimSize = getSizeOfDimension(inputShape, 0);
const uint32_t secondDimSize = getSizeOfDimension(inputShape, 1);
const uint32_t inputSize = getSizeOfDimension(inputShape, 2);
for (int f = 0; f < firstDimSize; ++f) {
for (int s = 0; s < secondDimSize; ++s) {
for (int i = 0; i < inputSize; ++i) {
const uint32_t inputIndex = f * secondDimSize * inputSize + s * inputSize + i;
const uint32_t outputIndex = s * firstDimSize * inputSize + f * inputSize + i;
output[outputIndex] = input[inputIndex];
}
}
}
}
Shape removeFirstDim(const Shape& input) {
Shape output = input;
output.dimensions.resize(input.dimensions.size() - 1);
for (int i = 0; i < input.dimensions.size() - 1; ++i) {
output.dimensions[i] = input.dimensions[i + 1];
}
return output;
}
template <typename T>
bool executeTyped(IOperationExecutionContext* context) {
const T* input = context->getInputBuffer<T>(kInputTensor);
Shape inputShape = context->getInputShape(kInputTensor);
const T* fwWeights = context->getInputBuffer<T>(kFwWeightsTensor);
Shape fwWeightsShape = context->getInputShape(kFwWeightsTensor);
const T* fwRecurrentWeights = context->getInputBuffer<T>(kFwRecurrentWeightsTensor);
Shape fwRecurrentWeightsShape = context->getInputShape(kFwRecurrentWeightsTensor);
const T* fwBias = context->getInputBuffer<T>(kFwBiasTensor);
const T* fwHiddenState = context->getInputBuffer<T>(kFwHiddenStateTensor);
const T* bwWeights = context->getInputBuffer<T>(kBwWeightsTensor);
Shape bwWeightsShape = context->getInputShape(kBwWeightsTensor);
const T* bwRecurrentWeights = context->getInputBuffer<T>(kBwRecurrentWeightsTensor);
Shape bwRecurrentWeightsShape = context->getInputShape(kBwRecurrentWeightsTensor);
const T* bwBias = context->getInputBuffer<T>(kBwBiasTensor);
const T* bwHiddenState = context->getInputBuffer<T>(kBwHiddenStateTensor);
const T* auxInput = nullptr;
const T* fwAuxWeights = nullptr;
const T* bwAuxWeights = nullptr;
const bool hasAuxInputs = !context->isOmittedInput(kAuxInputTensor);
if (hasAuxInputs) {
auxInput = context->getInputBuffer<T>(kAuxInputTensor);
fwAuxWeights = context->getInputBuffer<T>(kFwAuxWeightsTensor);
bwAuxWeights = context->getInputBuffer<T>(kBwAuxWeightsTensor);
}
Shape auxInputShape = context->getInputShape(kAuxInputTensor);
Shape fwAuxWeightsShape = context->getInputShape(kFwAuxWeightsTensor);
Shape bwAuxWeightsShape = context->getInputShape(kBwAuxWeightsTensor);
int32_t activation = context->getInputValue<int32_t>(kActivationParam);
int32_t timeMajor = context->getInputValue<bool>(kTimeMajorParam);
int32_t mergeOutputs = context->getInputValue<bool>(kMergeOutputsParam);
T* fwOutput = context->getOutputBuffer<T>(kFwOutputTensor);
Shape fwOutputShape = context->getOutputShape(kFwOutputTensor);
T* bwOutput = nullptr;
Shape bwOutputShape;
if (!mergeOutputs) {
bwOutputShape = context->getOutputShape(kBwOutputTensor);
bwOutput = context->getOutputBuffer<T>(kBwOutputTensor);
}
// If the input tensors are not in time major format, we transpose the first
// two dimensions, and set input and output pointers to temporary vectors
// which are transposed back after the RNN is applied.
std::vector<T> inputTransposed;
std::vector<T> auxInputTransposed;
std::vector<T> fwOutputTransposed;
std::vector<T> bwOutputTransposed;
if (!timeMajor) {
// First, resize temporary buffers to accommodate for transposed tensors.
inputTransposed.resize(getNumberOfElements(inputShape));
if (hasAuxInputs) {
auxInputTransposed.resize(getNumberOfElements(auxInputShape));
}
fwOutputTransposed.resize(getNumberOfElements(fwOutputShape));
if (!mergeOutputs) {
bwOutputTransposed.resize(getNumberOfElements(bwOutputShape));
}
// Transpose the input tensors.
transposeFirstTwoDims(input, inputShape, inputTransposed.data());
if (hasAuxInputs) {
transposeFirstTwoDims(auxInput, auxInputShape, auxInputTransposed.data());
}
// Change input and output pointers to the temporary buffers.
input = inputTransposed.data();
if (hasAuxInputs) {
auxInput = auxInputTransposed.data();
}
fwOutput = fwOutputTransposed.data();
if (!mergeOutputs) {
bwOutput = bwOutputTransposed.data();
}
// Swap the first two dimensions in the Shapes to reflect the
// transposition.
std::swap(inputShape.dimensions[0], inputShape.dimensions[1]);
if (hasAuxInputs) {
std::swap(auxInputShape.dimensions[0], auxInputShape.dimensions[1]);
}
std::swap(fwOutputShape.dimensions[0], fwOutputShape.dimensions[1]);
if (!mergeOutputs) {
std::swap(bwOutputShape.dimensions[0], bwOutputShape.dimensions[1]);
}
}
const uint32_t maxTime = getSizeOfDimension(inputShape, 0);
const uint32_t batchSize = getSizeOfDimension(inputShape, 1);
const uint32_t inputSize = getSizeOfDimension(inputShape, 2);
uint32_t auxInputSize = 0;
if (hasAuxInputs) {
auxInputSize = getSizeOfDimension(auxInputShape, 2);
}
const uint32_t fwNumUnits = getSizeOfDimension(fwWeightsShape, 0);
const uint32_t bwNumUnits = getSizeOfDimension(bwWeightsShape, 0);
Shape fixedTimeInputShape = removeFirstDim(inputShape);
Shape fixedTimeAuxInputShape = auxInputShape;
if (hasAuxInputs) {
fixedTimeAuxInputShape = removeFirstDim(auxInputShape);
}
// Create an additional buffer to store a hidden state between steps.
std::vector<T> tempHiddenState(batchSize * fwNumUnits);
// Forward pass
for (int i = 0; i < maxTime; ++i) {
const T* inputBatchPtr = input + i * batchSize * inputSize;
const T* auxInputBatchPtr = nullptr;
if (hasAuxInputs) {
auxInputBatchPtr = auxInput + i * batchSize * auxInputSize;
}
const uint32_t fwOutputBatchStride = mergeOutputs ? (fwNumUnits + bwNumUnits) : fwNumUnits;
T* fwOutputBatchPtr = fwOutput + i * batchSize * fwOutputBatchStride;
RNN::RNNStep<T>(inputBatchPtr, fixedTimeInputShape, auxInputBatchPtr,
fixedTimeAuxInputShape, fwHiddenState, fwBias, fwWeights, fwWeightsShape,
fwAuxWeights, fwAuxWeightsShape, fwRecurrentWeights,
fwRecurrentWeightsShape, activation, fwOutputBatchStride,
/*outputBatchOffset=*/0, fwOutputBatchPtr, tempHiddenState.data());
fwHiddenState = tempHiddenState.data();
}
tempHiddenState.resize(batchSize * bwNumUnits);
// Backward pass
for (int i = maxTime - 1; i >= 0; --i) {
const T* inputBatchPtr = input + i * batchSize * inputSize;
const T* auxInputBatchPtr = nullptr;
if (hasAuxInputs) {
auxInputBatchPtr = auxInput + i * batchSize * auxInputSize;
}
T* bwOutputBatchPtr;
uint32_t bwOutputBatchOffset = 0;
uint32_t bwOutputBatchStride;
if (mergeOutputs) {
bwOutputBatchStride = fwNumUnits + bwNumUnits;
bwOutputBatchOffset = fwNumUnits;
bwOutputBatchPtr = fwOutput + i * batchSize * bwOutputBatchStride;
} else {
bwOutputBatchStride = bwNumUnits;
bwOutputBatchPtr = bwOutput + i * batchSize * bwOutputBatchStride;
}
RNN::RNNStep<T>(inputBatchPtr, fixedTimeInputShape, auxInputBatchPtr,
fixedTimeAuxInputShape, bwHiddenState, bwBias, bwWeights, bwWeightsShape,
bwAuxWeights, bwAuxWeightsShape, bwRecurrentWeights,
bwRecurrentWeightsShape, activation, bwOutputBatchStride,
bwOutputBatchOffset, bwOutputBatchPtr, tempHiddenState.data());
bwHiddenState = tempHiddenState.data();
}
// If the inputs were in batch major format, transpose data in temporary
// buffers and write to the output(s).
if (!timeMajor) {
transposeFirstTwoDims(fwOutputTransposed.data(), fwOutputShape,
context->getOutputBuffer<T>(kFwOutputTensor));
if (!mergeOutputs) {
transposeFirstTwoDims(bwOutputTransposed.data(), bwOutputShape,
context->getOutputBuffer<T>(kBwOutputTensor));
}
}
return true;
}
} // namespace
bool validate(const IOperationValidationContext* context) {
NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
// Exact number is dependent on the mergeOutputs parameter and checked
// during preparation.
NN_RET_CHECK(context->getNumOutputs() == 1 || context->getNumOutputs() == 2);
OperandType inputType = context->getInputType(kInputTensor);
if (inputType != OperandType::TENSOR_FLOAT16 && inputType != OperandType::TENSOR_FLOAT32) {
LOG(ERROR) << "Unsupported input operand type for UNIDIRECTIONAL_SEQUENCE_RNN op: "
<< toString(inputType);
return false;
}
NN_RET_CHECK(validateInputTypes(
context, {inputType, inputType, inputType, inputType, inputType, inputType, inputType,
inputType, inputType, inputType, inputType, inputType, OperandType::INT32,
OperandType::BOOL, OperandType::BOOL}));
if (context->getNumOutputs() == 1) {
NN_RET_CHECK(validateOutputTypes(context, {inputType}));
} else {
NN_RET_CHECK(validateOutputTypes(context, {inputType, inputType}));
}
return validateHalVersion(context, HalVersion::V1_2);
}
bool prepare(IOperationExecutionContext* context) {
int32_t mergeOutputs = context->getInputValue<bool>(kMergeOutputsParam);
if (mergeOutputs) {
NN_RET_CHECK_EQ(context->getNumOutputs(), 1);
} else {
NN_RET_CHECK_EQ(context->getNumOutputs(), 2);
}
// Check that none of the required inputs are omitted.
const std::vector<int> requiredInputs = {
kInputTensor, kFwWeightsTensor, kFwRecurrentWeightsTensor, kFwBiasTensor,
kFwHiddenStateTensor, kBwWeightsTensor, kBwRecurrentWeightsTensor, kBwBiasTensor,
kBwHiddenStateTensor, kActivationParam, kTimeMajorParam, kMergeOutputsParam,
};
for (const int requiredInput : requiredInputs) {
NN_RET_CHECK(!context->isOmittedInput(requiredInput))
<< "required input " << requiredInput << " is omitted";
}
Shape input = context->getInputShape(kInputTensor);
Shape fwWeights = context->getInputShape(kFwWeightsTensor);
Shape fwRecurrentWeights = context->getInputShape(kFwRecurrentWeightsTensor);
Shape fwBias = context->getInputShape(kFwBiasTensor);
Shape fwHiddenState = context->getInputShape(kFwHiddenStateTensor);
Shape bwWeights = context->getInputShape(kBwWeightsTensor);
Shape bwRecurrentWeights = context->getInputShape(kBwRecurrentWeightsTensor);
Shape bwBias = context->getInputShape(kBwBiasTensor);
Shape bwHiddenState = context->getInputShape(kBwHiddenStateTensor);
Shape auxInput = context->getInputShape(kAuxInputTensor);
Shape fwAuxWeights = context->getInputShape(kFwAuxWeightsTensor);
Shape bwAuxWeights = context->getInputShape(kBwAuxWeightsTensor);
const bool auxInputsAllOrNone = (context->isOmittedInput(kAuxInputTensor) &&
context->isOmittedInput(kFwAuxWeightsTensor) &&
context->isOmittedInput(kBwAuxWeightsTensor)) ||
(!context->isOmittedInput(kAuxInputTensor) &&
!context->isOmittedInput(kFwAuxWeightsTensor) &&
!context->isOmittedInput(kBwAuxWeightsTensor));
NN_RET_CHECK(auxInputsAllOrNone);
const bool hasAuxInputs = !context->isOmittedInput(kAuxInputTensor);
int32_t timeMajor = context->getInputValue<bool>(kTimeMajorParam);
const uint32_t batchSize =
timeMajor ? getSizeOfDimension(input, 1) : getSizeOfDimension(input, 0);
const uint32_t maxTime =
timeMajor ? getSizeOfDimension(input, 0) : getSizeOfDimension(input, 1);
const uint32_t fwNumUnits = getSizeOfDimension(fwWeights, 0);
const uint32_t bwNumUnits = getSizeOfDimension(bwWeights, 0);
const uint32_t inputSize = getSizeOfDimension(input, 2);
NN_RET_CHECK_EQ(getNumberOfDimensions(input), 3);
NN_RET_CHECK_EQ(getNumberOfDimensions(fwWeights), 2);
NN_RET_CHECK_EQ(getNumberOfDimensions(fwRecurrentWeights), 2);
NN_RET_CHECK_EQ(getNumberOfDimensions(fwBias), 1);
NN_RET_CHECK_EQ(getNumberOfDimensions(fwHiddenState), 2);
NN_RET_CHECK_EQ(getNumberOfDimensions(bwWeights), 2);
NN_RET_CHECK_EQ(getNumberOfDimensions(bwRecurrentWeights), 2);
NN_RET_CHECK_EQ(getNumberOfDimensions(bwBias), 1);
NN_RET_CHECK_EQ(getNumberOfDimensions(bwHiddenState), 2);
NN_RET_CHECK_EQ(inputSize, getSizeOfDimension(fwWeights, 1));
NN_RET_CHECK_EQ(fwNumUnits, getSizeOfDimension(fwBias, 0));
NN_RET_CHECK_EQ(fwNumUnits, getSizeOfDimension(fwRecurrentWeights, 0));
NN_RET_CHECK_EQ(fwNumUnits, getSizeOfDimension(fwRecurrentWeights, 1));
NN_RET_CHECK_EQ(batchSize, getSizeOfDimension(fwHiddenState, 0));
NN_RET_CHECK_EQ(fwNumUnits, getSizeOfDimension(fwHiddenState, 1));
NN_RET_CHECK_EQ(inputSize, getSizeOfDimension(bwWeights, 1));
NN_RET_CHECK_EQ(bwNumUnits, getSizeOfDimension(bwBias, 0));
NN_RET_CHECK_EQ(bwNumUnits, getSizeOfDimension(bwRecurrentWeights, 0));
NN_RET_CHECK_EQ(bwNumUnits, getSizeOfDimension(bwRecurrentWeights, 1));
NN_RET_CHECK_EQ(batchSize, getSizeOfDimension(bwHiddenState, 0));
NN_RET_CHECK_EQ(bwNumUnits, getSizeOfDimension(bwHiddenState, 1));
if (hasAuxInputs) {
NN_RET_CHECK_EQ(getNumberOfDimensions(auxInput), 3);
NN_RET_CHECK_EQ(getNumberOfDimensions(fwAuxWeights), 2);
NN_RET_CHECK_EQ(getNumberOfDimensions(bwAuxWeights), 2);
NN_RET_CHECK_EQ(getSizeOfDimension(auxInput, 0), getSizeOfDimension(input, 0));
NN_RET_CHECK_EQ(getSizeOfDimension(auxInput, 1), getSizeOfDimension(input, 1));
NN_RET_CHECK_EQ(getSizeOfDimension(fwAuxWeights, 0), fwNumUnits);
NN_RET_CHECK_EQ(getSizeOfDimension(fwAuxWeights, 1), getSizeOfDimension(auxInput, 2));
NN_RET_CHECK_EQ(getSizeOfDimension(bwAuxWeights, 0), bwNumUnits);
NN_RET_CHECK_EQ(getSizeOfDimension(bwAuxWeights, 1), getSizeOfDimension(auxInput, 2));
}
Shape fwOutput = context->getOutputShape(kFwOutputTensor);
fwOutput.dimensions.resize(3);
fwOutput.dimensions[0] = timeMajor ? maxTime : batchSize;
fwOutput.dimensions[1] = timeMajor ? batchSize : maxTime;
fwOutput.dimensions[2] = mergeOutputs ? fwNumUnits + bwNumUnits : fwNumUnits;
NN_RET_CHECK(context->setOutputShape(kFwOutputTensor, fwOutput));
if (!mergeOutputs) {
Shape bwOutput = context->getOutputShape(kBwOutputTensor);
bwOutput.dimensions.resize(3);
bwOutput.dimensions[0] = timeMajor ? maxTime : batchSize;
bwOutput.dimensions[1] = timeMajor ? batchSize : maxTime;
bwOutput.dimensions[2] = bwNumUnits;
NN_RET_CHECK(context->setOutputShape(kBwOutputTensor, bwOutput));
}
return true;
}
bool execute(IOperationExecutionContext* context) {
if (context->getInputType(kInputTensor) == OperandType::TENSOR_FLOAT16) {
executeTyped<_Float16>(context);
} else {
executeTyped<float>(context);
}
return true;
}
} // namespace bidirectional_sequence_rnn
NN_REGISTER_OPERATION(BIDIRECTIONAL_SEQUENCE_RNN, "BIDIRECTIONAL_SEQUENCE_RNN",
bidirectional_sequence_rnn::validate, bidirectional_sequence_rnn::prepare,
bidirectional_sequence_rnn::execute, .allowOmittedOperand = true);
} // namespace nn
} // namespace android