/* * Copyright (C) 2010 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ #include "helloneon-intrinsics.h" #include <arm_neon.h> /* this source file should only be compiled by Android.mk when targeting * the armeabi-v7a ABI, and should be built in NEON mode */ void fir_filter_neon_intrinsics(short *output, const short* input, const short* kernel, int width, int kernelSize) { #if 1 int nn, offset = -kernelSize/2; for (nn = 0; nn < width; nn++) { int mm, sum = 0; int32x4_t sum_vec = vdupq_n_s32(0); for(mm = 0; mm < kernelSize/4; mm++) { int16x4_t kernel_vec = vld1_s16(kernel + mm*4); int16x4_t input_vec = vld1_s16(input + (nn+offset+mm*4)); sum_vec = vmlal_s16(sum_vec, kernel_vec, input_vec); } sum += vgetq_lane_s32(sum_vec, 0); sum += vgetq_lane_s32(sum_vec, 1); sum += vgetq_lane_s32(sum_vec, 2); sum += vgetq_lane_s32(sum_vec, 3); if(kernelSize & 3) { for(mm = kernelSize - (kernelSize & 3); mm < kernelSize; mm++) sum += kernel[mm] * input[nn+offset+mm]; } output[nn] = (short)((sum + 0x8000) >> 16); } #else /* for comparison purposes only */ int nn, offset = -kernelSize/2; for (nn = 0; nn < width; nn++) { int sum = 0; int mm; for (mm = 0; mm < kernelSize; mm++) { sum += kernel[mm]*input[nn+offset+mm]; } output[n] = (short)((sum + 0x8000) >> 16); } #endif }