// Copyright 2015, ARM Limited
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of ARM Limited nor the names of its contributors may be
// used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "examples.h"
// Macro to compute the number of elements in a vector.
#define ARRAY_SIZE(Array) (sizeof(Array) / sizeof((Array)[0]))
#define BUF_SIZE (4096)
#define __ masm->
/*
* This example adds two vectors with 1-byte elements using NEON instructions,
* and returns the results in the first vector.
*/
void GenerateAdd2Vectors(MacroAssembler* masm) {
// void add2_vectors(uint8_t *vec*, const uint8_t *vecB, unsigned size)
// Argument locations:
// vecA (pointer) -> x0
// vecB (pointer) -> x1
// size (integer) -> w2
// Result returned in vecA.
Label loop16, loopr, end;
// Loop to add vector elements in 16-byte chunks.
__ Bind(&loop16);
// Handle vectors smaller than 16-bytes in the remainder loop.
__ Cmp(w2, 16);
__ B(lo, &loopr);
__ Sub(w2, w2, 16);
// Add vectors in 16-byte chunks.
__ Ld1(v0.V16B(), MemOperand(x0));
__ Ld1(v1.V16B(), MemOperand(x1, 16, PostIndex));
__ Add(v0.V16B(), v0.V16B(), v1.V16B());
__ St1(v0.V16B(), MemOperand(x0, 16, PostIndex));
__ B(&loop16);
// Loop to add the remaining vector elements.
__ Bind(&loopr);
// If there are no more vector elements to process, then exit.
__ Cbz(w2, &end);
__ Sub(w2, w2, 1);
// Add remaining vector elements in 1-byte chunks.
__ Ldrb(w5, MemOperand(x0));
__ Ldrb(w6, MemOperand(x1, 1, PostIndex));
__ Add(w5, w5, w6);
__ Strb(w5, MemOperand(x0, 1, PostIndex));
__ B(&loopr);
__ Bind(&end);
__ Ret();
}
void PrintVector(const uint8_t *vec, unsigned num) {
unsigned i;
printf("( ");
if (num > 0) {
for (i = 0; i < num - 1; ++i) {
printf("%d, ", vec[i]);
}
printf("%d", vec[i]);
}
printf(" )\n");
}
#ifndef TEST_EXAMPLES
int main(void) {
// Create and initialize the assembler.
byte assm_buf[BUF_SIZE];
MacroAssembler masm(assm_buf, BUF_SIZE);
// Generate native code for the example function.
Label add2_vectors;
masm.Bind(&add2_vectors);
GenerateAdd2Vectors(&masm);
masm.FinalizeCode();
// Initialize input data for the example function.
uint8_t vecA[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20};
uint8_t vecB[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31,
32, 33, 34, 35, 36};
uint8_t vecC[ARRAY_SIZE(vecA)];
// Check whether the number of elements in both vectors match.
assert(ARRAY_SIZE(vecA) == ARRAY_SIZE(vecB));
// Compute the result in C.
for (unsigned i = 0; i < ARRAY_SIZE(vecA); i++) {
vecC[i] = vecA[i] + vecB[i];
}
#ifdef USE_SIMULATOR
uintptr_t vecA_addr = reinterpret_cast<uintptr_t>(vecA);
uintptr_t vecB_addr = reinterpret_cast<uintptr_t>(vecB);
// Configure register environment in the simulator.
Decoder decoder;
Simulator simulator(&decoder);
simulator.set_xreg(0, vecA_addr);
simulator.set_xreg(1, vecB_addr);
simulator.set_xreg(2, ARRAY_SIZE(vecA));
PrintVector(vecA, ARRAY_SIZE(vecA));
printf(" +\n");
PrintVector(vecB, ARRAY_SIZE(vecB));
// Run the example function in the simulator.
simulator.RunFrom(masm.GetLabelAddress<Instruction*>(&add2_vectors));
printf(" =\n");
PrintVector(vecA, ARRAY_SIZE(vecA));
#else
// Placeholder to run test natively.
printf("Running tests natively is not supported yet.\n");
return 0;
#endif // USE_SIMULATOR
// Check that the computed value in NEON matches the C version.
for (unsigned i = 0; i < ARRAY_SIZE(vecA); i++) {
assert(vecC[i] == vecA[i]);
}
return 0;
}
#endif // TEST_EXAMPLES