/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "Operations.h" #include "CpuOperationUtils.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" namespace android { namespace nn { // If possible we will use this static buffer for the tensor. static constexpr size_t kStaticBufferSize = 1605632; static char static_scratch_buffer[kStaticBufferSize]; // executionMutex is used to protect concurrent access of the static_scratch_buffer // and other non-threadsafe resources like gemmlowp::GemmContext. // std::mutex is safe for pthreads on Android. static std::mutex executionMutex; #define ANDROID_NN_CONV_PARAMETERS(Type) \ uint32_t height = getSizeOfDimension(inputShape, 1); \ uint32_t width = getSizeOfDimension(inputShape, 2); \ uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \ uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \ uint32_t outHeight = getSizeOfDimension(outputShape, 1); \ uint32_t outWidth = getSizeOfDimension(outputShape, 2); \ uint32_t inDepth = getSizeOfDimension(inputShape, 3); \ \ uint32_t paddingHeight = (uint32_t)padding_top; \ uint32_t paddingWidth = (uint32_t)padding_left; \ \ tflite::Dims<4> im2colDim; \ im2colDim.sizes[3] = (int)getSizeOfDimension(outputShape, 0); \ im2colDim.sizes[2] = (int)getSizeOfDimension(outputShape, 1); \ im2colDim.sizes[1] = (int)getSizeOfDimension(outputShape, 2); \ im2colDim.sizes[0] = (int)inDepth * filterHeight * filterWidth; \ \ im2colDim.strides[0] = 1; \ for (int i=1; i<4; i++) { \ im2colDim.strides[i] = im2colDim.strides[i-1] * im2colDim.sizes[i-1]; \ } \ \ Type* im2colData = nullptr; \ uint64_t im2colByteSize = sizeof(Type); \ std::unique_ptr<Type[]> im2colGuard; \ for (int i=0; i<4; i++) { \ im2colByteSize *= im2colDim.sizes[i]; \ } \ /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ if (im2colByteSize >= 0x7fffffff) { \ LOG(ERROR) << "Conv size is too large, not enough memory"; \ return false; \ } \ if (im2colByteSize <= kStaticBufferSize) { \ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ } else { \ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ if (im2colData == nullptr) { \ LOG(ERROR) << "Conv size is too large, not enough memory"; \ return false; \ } \ im2colGuard.reset(im2colData); \ } bool convFloat32(const float* inputData, const Shape& inputShape, const float* filterData, const Shape& filterShape, const float* biasData, const Shape& biasShape, int32_t padding_left, int32_t padding_right, int32_t padding_top, int32_t padding_bottom, int32_t stride_width, int32_t stride_height, int32_t activation, float* outputData, const Shape& outputShape) { ANDROID_NN_CONV_PARAMETERS(float) float output_activation_min, output_activation_max; CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max); // Prevent concurrent executions that may access the scratch buffer. std::unique_lock<std::mutex> lock(executionMutex); tflite::optimized_ops::Conv( inputData, convertShapeToDims(inputShape), filterData, convertShapeToDims(filterShape), biasData, convertShapeToDims(biasShape), stride_width, stride_height, paddingWidth, paddingHeight, output_activation_min, output_activation_max, outputData, convertShapeToDims(outputShape), im2colData, im2colDim); return true; } bool convQuant8(const uint8_t* inputData, const Shape& inputShape, const uint8_t* filterData, const Shape& filterShape, const int32_t* biasData, const Shape& biasShape, int32_t padding_left, int32_t padding_right, int32_t padding_top, int32_t padding_bottom, int32_t stride_width, int32_t stride_height, int32_t activation, uint8_t* outputData, const Shape& outputShape) { ANDROID_NN_CONV_PARAMETERS(uint8_t) int32_t inputOffset = -inputShape.offset; int32_t filterOffset = -filterShape.offset; int32_t outputOffset = outputShape.offset; float real_multiplier = 0.0; int32_t output_multiplier = 0; int32_t output_shift = 0; int32_t output_activation_min = 0; int32_t output_activation_max = 0; if (!GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape, &real_multiplier) || !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)){ return false; } CalculateActivationRangeUint8(activation, outputShape, &output_activation_min, &output_activation_max); static gemmlowp::GemmContext gemm_context; // Prevent concurrent executions that may access the scratch buffer and // gemm_context. std::unique_lock<std::mutex> lock(executionMutex); // Alow gemmlowp automatically decide how many threads to use. gemm_context.set_max_num_threads(0); tflite::optimized_ops::Conv( inputData, convertShapeToDims(inputShape), inputOffset, filterData, convertShapeToDims(filterShape), filterOffset, biasData, convertShapeToDims(biasShape), stride_width, stride_height, paddingWidth, paddingHeight, outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max, outputData, convertShapeToDims(outputShape), im2colData, im2colDim, &gemm_context); return true; } #undef ANDROID_NN_CONV_PARAMETERS } // namespace nn } // namespace android