/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "QuantizedLSTM.h" #include "NeuralNetworksWrapper.h" #include "gmock/gmock-matchers.h" #include "gtest/gtest.h" #include <iostream> namespace android { namespace nn { namespace wrapper { namespace { struct OperandTypeParams { Type type; std::vector<uint32_t> shape; float scale; int32_t zeroPoint; OperandTypeParams(Type type, std::vector<uint32_t> shape, float scale, int32_t zeroPoint) : type(type), shape(shape), scale(scale), zeroPoint(zeroPoint) {} }; } // namespace using ::testing::Each; using ::testing::ElementsAreArray; using ::testing::FloatNear; using ::testing::Matcher; class QuantizedLSTMOpModel { public: QuantizedLSTMOpModel(const std::vector<OperandTypeParams>& inputOperandTypeParams) { std::vector<uint32_t> inputs; for (int i = 0; i < NUM_INPUTS; ++i) { const auto& curOTP = inputOperandTypeParams[i]; OperandType curType(curOTP.type, curOTP.shape, curOTP.scale, curOTP.zeroPoint); inputs.push_back(model_.addOperand(&curType)); } const uint32_t numBatches = inputOperandTypeParams[0].shape[0]; inputSize_ = inputOperandTypeParams[0].shape[0]; const uint32_t outputSize = inputOperandTypeParams[QuantizedLSTMCell::kPrevCellStateTensor].shape[1]; outputSize_ = outputSize; std::vector<uint32_t> outputs; OperandType cellStateOutOperandType(Type::TENSOR_QUANT16_SYMM, {numBatches, outputSize}, 1. / 2048., 0); outputs.push_back(model_.addOperand(&cellStateOutOperandType)); OperandType outputOperandType(Type::TENSOR_QUANT8_ASYMM, {numBatches, outputSize}, 1. / 128., 128); outputs.push_back(model_.addOperand(&outputOperandType)); model_.addOperation(ANEURALNETWORKS_QUANTIZED_16BIT_LSTM, inputs, outputs); model_.identifyInputsAndOutputs(inputs, outputs); initializeInputData(inputOperandTypeParams[QuantizedLSTMCell::kInputTensor], &input_); initializeInputData(inputOperandTypeParams[QuantizedLSTMCell::kPrevOutputTensor], &prevOutput_); initializeInputData(inputOperandTypeParams[QuantizedLSTMCell::kPrevCellStateTensor], &prevCellState_); cellStateOut_.resize(numBatches * outputSize, 0); output_.resize(numBatches * outputSize, 0); model_.finish(); } void invoke() { ASSERT_TRUE(model_.isValid()); Compilation compilation(&model_); compilation.finish(); Execution execution(&compilation); // Set all the inputs. ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kInputTensor, input_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kInputToInputWeightsTensor, inputToInputWeights_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kInputToForgetWeightsTensor, inputToForgetWeights_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kInputToCellWeightsTensor, inputToCellWeights_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kInputToOutputWeightsTensor, inputToOutputWeights_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kRecurrentToInputWeightsTensor, recurrentToInputWeights_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kRecurrentToForgetWeightsTensor, recurrentToForgetWeights_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kRecurrentToCellWeightsTensor, recurrentToCellWeights_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kRecurrentToOutputWeightsTensor, recurrentToOutputWeights_), Result::NO_ERROR); ASSERT_EQ( setInputTensor(&execution, QuantizedLSTMCell::kInputGateBiasTensor, inputGateBias_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kForgetGateBiasTensor, forgetGateBias_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kCellGateBiasTensor, cellGateBias_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kOutputGateBiasTensor, outputGateBias_), Result::NO_ERROR); ASSERT_EQ( setInputTensor(&execution, QuantizedLSTMCell::kPrevCellStateTensor, prevCellState_), Result::NO_ERROR); ASSERT_EQ(setInputTensor(&execution, QuantizedLSTMCell::kPrevOutputTensor, prevOutput_), Result::NO_ERROR); // Set all the outputs. ASSERT_EQ( setOutputTensor(&execution, QuantizedLSTMCell::kCellStateOutTensor, &cellStateOut_), Result::NO_ERROR); ASSERT_EQ(setOutputTensor(&execution, QuantizedLSTMCell::kOutputTensor, &output_), Result::NO_ERROR); ASSERT_EQ(execution.compute(), Result::NO_ERROR); // Put state outputs into inputs for the next step prevOutput_ = output_; prevCellState_ = cellStateOut_; } int inputSize() { return inputSize_; } int outputSize() { return outputSize_; } void setInput(const std::vector<uint8_t>& input) { input_ = input; } void setWeightsAndBiases(std::vector<uint8_t> inputToInputWeights, std::vector<uint8_t> inputToForgetWeights, std::vector<uint8_t> inputToCellWeights, std::vector<uint8_t> inputToOutputWeights, std::vector<uint8_t> recurrentToInputWeights, std::vector<uint8_t> recurrentToForgetWeights, std::vector<uint8_t> recurrentToCellWeights, std::vector<uint8_t> recurrentToOutputWeights, std::vector<int32_t> inputGateBias, std::vector<int32_t> forgetGateBias, std::vector<int32_t> cellGateBias, // std::vector<int32_t> outputGateBias) { inputToInputWeights_ = inputToInputWeights; inputToForgetWeights_ = inputToForgetWeights; inputToCellWeights_ = inputToCellWeights; inputToOutputWeights_ = inputToOutputWeights; recurrentToInputWeights_ = recurrentToInputWeights; recurrentToForgetWeights_ = recurrentToForgetWeights; recurrentToCellWeights_ = recurrentToCellWeights; recurrentToOutputWeights_ = recurrentToOutputWeights; inputGateBias_ = inputGateBias; forgetGateBias_ = forgetGateBias; cellGateBias_ = cellGateBias; outputGateBias_ = outputGateBias; } template <typename T> void initializeInputData(OperandTypeParams params, std::vector<T>* vec) { int size = 1; for (int d : params.shape) { size *= d; } vec->clear(); vec->resize(size, params.zeroPoint); } std::vector<uint8_t> getOutput() { return output_; } private: static constexpr int NUM_INPUTS = 15; static constexpr int NUM_OUTPUTS = 2; Model model_; // Inputs std::vector<uint8_t> input_; std::vector<uint8_t> inputToInputWeights_; std::vector<uint8_t> inputToForgetWeights_; std::vector<uint8_t> inputToCellWeights_; std::vector<uint8_t> inputToOutputWeights_; std::vector<uint8_t> recurrentToInputWeights_; std::vector<uint8_t> recurrentToForgetWeights_; std::vector<uint8_t> recurrentToCellWeights_; std::vector<uint8_t> recurrentToOutputWeights_; std::vector<int32_t> inputGateBias_; std::vector<int32_t> forgetGateBias_; std::vector<int32_t> cellGateBias_; std::vector<int32_t> outputGateBias_; std::vector<int16_t> prevCellState_; std::vector<uint8_t> prevOutput_; // Outputs std::vector<int16_t> cellStateOut_; std::vector<uint8_t> output_; int inputSize_; int outputSize_; template <typename T> Result setInputTensor(Execution* execution, int tensor, const std::vector<T>& data) { return execution->setInput(tensor, data.data(), sizeof(T) * data.size()); } template <typename T> Result setOutputTensor(Execution* execution, int tensor, std::vector<T>* data) { return execution->setOutput(tensor, data->data(), sizeof(T) * data->size()); } }; class QuantizedLstmTest : public ::testing::Test { protected: void VerifyGoldens(const std::vector<std::vector<uint8_t>>& input, const std::vector<std::vector<uint8_t>>& output, QuantizedLSTMOpModel* lstm) { const int numBatches = input.size(); EXPECT_GT(numBatches, 0); const int inputSize = lstm->inputSize(); EXPECT_GT(inputSize, 0); const int inputSequenceSize = input[0].size() / inputSize; EXPECT_GT(inputSequenceSize, 0); for (int i = 0; i < inputSequenceSize; ++i) { std::vector<uint8_t> inputStep; for (int b = 0; b < numBatches; ++b) { const uint8_t* batchStart = input[b].data() + i * inputSize; const uint8_t* batchEnd = batchStart + inputSize; inputStep.insert(inputStep.end(), batchStart, batchEnd); } lstm->setInput(inputStep); lstm->invoke(); const int outputSize = lstm->outputSize(); std::vector<float> expected; for (int b = 0; b < numBatches; ++b) { const uint8_t* goldenBatchStart = output[b].data() + i * outputSize; const uint8_t* goldenBatchEnd = goldenBatchStart + outputSize; expected.insert(expected.end(), goldenBatchStart, goldenBatchEnd); } EXPECT_THAT(lstm->getOutput(), ElementsAreArray(expected)); } } }; // Inputs and weights in this test are random and the test only checks that the // outputs are equal to outputs obtained from running TF Lite version of // quantized LSTM on the same inputs. TEST_F(QuantizedLstmTest, BasicQuantizedLstmTest) { const int numBatches = 2; const int inputSize = 2; const int outputSize = 4; float weightsScale = 0.00408021; int weightsZeroPoint = 100; // OperandType biasOperandType(Type::TENSOR_INT32, input_shapes[3], // weightsScale / 128., 0); // inputs.push_back(model_.addOperand(&biasOperandType)); // OperandType prevCellStateOperandType(Type::TENSOR_QUANT16_SYMM, input_shapes[4], // 1. / 2048., 0); // inputs.push_back(model_.addOperand(&prevCellStateOperandType)); QuantizedLSTMOpModel lstm({ // input OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {numBatches, inputSize}, 1. / 128., 128), // inputToInputWeights // inputToForgetWeights // inputToCellWeights // inputToOutputWeights OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, inputSize}, weightsScale, weightsZeroPoint), OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, inputSize}, weightsScale, weightsZeroPoint), OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, inputSize}, weightsScale, weightsZeroPoint), OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, inputSize}, weightsScale, weightsZeroPoint), // recurrentToInputWeights // recurrentToForgetWeights // recurrentToCellWeights // recurrentToOutputWeights OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, outputSize}, weightsScale, weightsZeroPoint), OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, outputSize}, weightsScale, weightsZeroPoint), OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, outputSize}, weightsScale, weightsZeroPoint), OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {outputSize, outputSize}, weightsScale, weightsZeroPoint), // inputGateBias // forgetGateBias // cellGateBias // outputGateBias OperandTypeParams(Type::TENSOR_INT32, {outputSize}, weightsScale / 128., 0), OperandTypeParams(Type::TENSOR_INT32, {outputSize}, weightsScale / 128., 0), OperandTypeParams(Type::TENSOR_INT32, {outputSize}, weightsScale / 128., 0), OperandTypeParams(Type::TENSOR_INT32, {outputSize}, weightsScale / 128., 0), // prevCellState OperandTypeParams(Type::TENSOR_QUANT16_SYMM, {numBatches, outputSize}, 1. / 2048., 0), // prevOutput OperandTypeParams(Type::TENSOR_QUANT8_ASYMM, {numBatches, outputSize}, 1. / 128., 128), }); lstm.setWeightsAndBiases( // inputToInputWeights {146, 250, 235, 171, 10, 218, 171, 108}, // inputToForgetWeights {24, 50, 132, 179, 158, 110, 3, 169}, // inputToCellWeights {133, 34, 29, 49, 206, 109, 54, 183}, // inputToOutputWeights {195, 187, 11, 99, 109, 10, 218, 48}, // recurrentToInputWeights {254, 206, 77, 168, 71, 20, 215, 6, 223, 7, 118, 225, 59, 130, 174, 26}, // recurrentToForgetWeights {137, 240, 103, 52, 68, 51, 237, 112, 0, 220, 89, 23, 69, 4, 207, 253}, // recurrentToCellWeights {172, 60, 205, 65, 14, 0, 140, 168, 240, 223, 133, 56, 142, 64, 246, 216}, // recurrentToOutputWeights {106, 214, 67, 23, 59, 158, 45, 3, 119, 132, 49, 205, 129, 218, 11, 98}, // inputGateBias {-7876, 13488, -726, 32839}, // forgetGateBias {9206, -46884, -11693, -38724}, // cellGateBias {39481, 48624, 48976, -21419}, // outputGateBias {-58999, -17050, -41852, -40538}); // LSTM input is stored as numBatches x (sequenceLength x inputSize) vector. std::vector<std::vector<uint8_t>> lstmInput; // clang-format off lstmInput = {{154, 166, 166, 179, 141, 141}, {100, 200, 50, 150, 111, 222}}; // clang-format on // LSTM output is stored as numBatches x (sequenceLength x outputSize) vector. std::vector<std::vector<uint8_t>> lstmGoldenOutput; // clang-format off lstmGoldenOutput = {{136, 150, 140, 115, 140, 151, 146, 112, 139, 153, 146, 114}, {135, 152, 138, 112, 136, 156, 142, 112, 141, 154, 146, 108}}; // clang-format on VerifyGoldens(lstmInput, lstmGoldenOutput, &lstm); }; } // namespace wrapper } // namespace nn } // namespace android