/*
* Copyright (C) 2007 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <sched.h>
#include <sys/resource.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/mman.h>
#if 0
const int DCACHE_SIZE = 8*1024;
const int CPU_FREQ_EST = 195;
const int BRANCH_CYCLE = 3;
#else
const int DCACHE_SIZE = 32*1024;
const int CPU_FREQ_EST = 384;
const int BRANCH_CYCLE = 2;
#endif
typedef long long nsecs_t;
static nsecs_t system_time()
{
struct timespec t;
t.tv_sec = t.tv_nsec = 0;
clock_gettime(CLOCK_MONOTONIC, &t);
return nsecs_t(t.tv_sec)*1000000000LL + t.tv_nsec;
}
nsecs_t loop_overhead(size_t count) __attribute__((noinline));
nsecs_t loop_overhead(size_t count)
{
nsecs_t overhead = -system_time();
do {
asm volatile ("":::"memory");
} while (--count);
overhead += system_time();
return overhead;
}
static void preload(volatile char* addr, size_t s)
{
for (size_t i=0 ; i<s ; i+=32) {
char c = addr[i];
(void)c;
}
}
static void usage(char* p) {
printf( "Usage: %s <test> <options>\n"
"<test> is one of the following:\n"
" cpufreq\n"
" memcpy [perf [fast] | test]\n"
" memset [perf | test]\n"
" memcmp [perf | test]\n"
" strlen [perf | test]\n"
" malloc [fill]\n"
" madvise\n"
" resampler\n"
" crash\n"
" stack (stack smasher)\n"
" crawl\n"
, p);
}
int cpufreq_test(int argc, char** argv);
int memcpy_test(int argc, char** argv);
int memset_test(int argc, char** argv);
int memcmp_test(int argc, char** argv);
int strlen_test(int argc, char** argv);
int malloc_test(int argc, char** argv);
int madvise_test(int argc, char** argv);
int crash_test(int argc, char** argv);
int stack_smasher_test(int argc, char** argv);
int crawl_test(int argc, char** argv);
#if 0
#pragma mark -
#pragma mark main
#endif
int main(int argc, char** argv)
{
if (argc == 1) {
usage(argv[0]);
return 0;
}
int err = -1;
if (!strcmp(argv[1], "cpufreq")) err = cpufreq_test(argc-1, argv+1);
else if (!strcmp(argv[1], "memcpy")) err = memcpy_test(argc-1, argv+1);
else if (!strcmp(argv[1], "memset")) err = memset_test(argc-1, argv+1);
else if (!strcmp(argv[1], "memcmp")) err = memcmp_test(argc-1, argv+1);
else if (!strcmp(argv[1], "strlen")) err = strlen_test(argc-1, argv+1);
else if (!strcmp(argv[1], "malloc")) err = malloc_test(argc-1, argv+1);
else if (!strcmp(argv[1], "madvise")) err = madvise_test(argc-1, argv+1);
else if (!strcmp(argv[1], "crash")) err = crash_test(argc-1, argv+1);
else if (!strcmp(argv[1], "stack")) err = stack_smasher_test(argc-1, argv+1);
else if (!strcmp(argv[1], "crawl")) err = crawl_test(argc-1, argv+1);
if (err) {
usage(argv[0]);
}
return 0;
}
#if 0
#pragma mark -
#pragma mark memcpy
#endif
int validate_memcpy(char* s, char* d, size_t size);
int validate_memset(char* s, char c, size_t size);
int memcpy_test(int argc, char** argv)
{
int option = 0;
if (argc >= 2) {
if (!strcmp(argv[1], "perf")) option = 0;
else if (!strcmp(argv[1], "test")) option = 1;
else return -1;
}
const int MAX_SIZE = 1024*1024; // 1MB
const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
char* src = (char*)malloc(MAX_SIZE+4+8+32);
char* dst = (char*)malloc(MAX_SIZE+4+8+32);
memset(src, 0, MAX_SIZE+4+8+32);
memset(dst, 0, MAX_SIZE+4+8+32);
if (option == 0) {
bool fast = (argc>=3 && !strcmp(argv[2], "fast"));
printf("memcpy() performance test is running, please wait...\n");
fflush(stdout);
usleep(10000);
setpriority(PRIO_PROCESS, 0, -20);
static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
struct result_t { int size; float res; };
result_t* results = (result_t*)src;
int nbr = 0;
int size = 0;
for (int i=0 ; ; i++) {
if (!fast) {
if (size<128) size += 8;
else if (size<1024) size += 128;
else if (size<16384) size += 1024;
else size <<= 1;
} else {
if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
break;
size = FAST_SIZES[i];
}
if (size > MAX_SIZE) {
break;
}
const int REPEAT = (((size < DCACHE_SIZE) ?
(CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
// ~0.5 second per test
const nsecs_t overhead = loop_overhead(REPEAT);
// tweak to make it a bad case
char* ddd = (char*)((long(dst+31)&~31) + 4);
char* sss = (char*)((long(src+31)&~31) + 28);
for (int offset=0 ; offset<=2 ; offset +=2 ) {
memcpy(dst, src, size); // just make sure to load the caches I/D
nsecs_t t = -system_time();
register int count = REPEAT;
do {
memcpy(ddd, sss+offset, size);
} while (--count);
t += system_time() - overhead;
const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
results[nbr].size = size;
results[nbr].res = throughput;
nbr++;
}
}
printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
for (int i=0 ; i<nbr ; i+=2) {
printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
}
} else if (option == 1) {
printf("memcpy() validation test is running, please wait...\n");
fflush(stdout);
char* curr = (char*)src;
for (int i=0 ; i<MAX_SIZE ; i++) {
char c = rand();
*curr++ = c != 0x55 ? c : 0xAA;
}
char* s = src + 1024;
char* d = dst + 1024;
int nb = 0;
for (int size=0 ; size<4096 && !nb ; size++) {
nb += validate_memcpy(s, d, size);
for (int o=1 ; o<32 && !nb ; o++) {
nb += validate_memcpy(s+o, d, size);
nb += validate_memcpy(s, d+o, size);
nb += validate_memcpy(s+o, d+o, size);
}
}
if (nb) printf("%d error(s) found\n", nb);
else printf("success!\n");
}
fflush(stdout);
free(dst);
free(src);
return 0;
}
int validate_memcpy(char* s, char* d, size_t size)
{
int nberr = 0;
memset(d-4, 0x55, size+8);
memcpy(s, d, size);
if (memcmp(s,d,size)) {
printf("*** memcpy(%p,%p,%lu) destination != source\n",s,d,size);
nberr++;
}
bool r = (d[size]==0x55)&&(d[size+1]==0x55)&&(d[size+2]==0x55)&&(d[size+3]==0x55);
if (!r) {
printf("*** memcpy(%p,%p,%lu) clobbered past end of destination!\n",s,d,size);
nberr++;
}
r = (d[-1]==0x55)&&(d[-2]==0x55)&&(d[-3]==0x55)&&(d[-4]==0x55);
if (!r) {
printf("*** memcpy(%p,%p,%lu) clobbered before start of destination!\n",s,d,size);
nberr++;
}
return nberr;
}
#if 0
#pragma mark -
#pragma mark memset
#endif
int memset_test(int argc, char** argv)
{
int option = 0;
if (argc >= 2) {
if (!strcmp(argv[1], "perf")) option = 0;
else if (!strcmp(argv[1], "test")) option = 1;
else return -1;
}
const int MAX_SIZE = 1024*1024; // 1MB
const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
char* dst = (char*)malloc(MAX_SIZE+4+8);
if (option == 0) {
printf("memset() performance test is running, please wait...\n");
fflush(stdout);
usleep(10000);
setpriority(PRIO_PROCESS, 0, -20);
static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
struct result_t { int size; float res; };
result_t results[FAST_SIZES_COUNT*2];
int nbr = 0;
int size = 0;
for (int i=0 ; ; i++) {
if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
break;
size = FAST_SIZES[i];
if (size > MAX_SIZE) {
break;
}
const int REPEAT = (((size < DCACHE_SIZE) ?
(CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
// ~0.5 second per test
const nsecs_t overhead = loop_overhead(REPEAT);
for (int j=0 ; j<2 ; j++) {
if (j==0) preload(dst, DCACHE_SIZE*4); // flush D
else preload(dst, size); // load D
nsecs_t t = -system_time();
size_t count = REPEAT;
do {
memset(dst, 0, size);
} while (--count);
t += system_time() - overhead;
const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
results[nbr].size = size;
results[nbr].res = throughput;
nbr++;
}
}
printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
for (int i=0 ; i<nbr ; i+=2) {
printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
}
} else if (option == 1) {
printf("memset() validation test is running, please wait...\n");
fflush(stdout);
char* d = dst + 1024;
int nb = 0;
for (int o=1 ; o<32 ; o++) {
for (int size=0 ; size<4096 && !nb ; size++) {
nb += validate_memset(d, char(o), size);
nb += validate_memset(d+o, char(o), size);
}
}
if (nb) printf("%d error(s) found\n", nb);
else printf("success!\n");
}
fflush(stdout);
free(dst);
return 0;
}
int validate_memset(char* d, char c, size_t size)
{
int nberr = 0;
for (size_t i=0; i<size ; d[i++]=0xaa) ;
d[-1] = 0x55;
d[size+1] = 0x55;
memset(d, c, size);
if (d[size+1]!=0x55) {
printf("*** memset(%p,%02x,%lu) clobbered past end of destination!\n",d,(int)c,size);
nberr++;
}
if (d[-1]!=0x55) {
printf("*** memset(%p,%02x,%lu) clobbered before start of destination!\n",d,(int)c,size);
nberr++;
}
for (size_t i=0 ; i<size ; i++) {
if (d[i] != c) {
printf("*** memset(%p,%02x,%lu) failed at offset %lu\n",d,(int)c,size, i);
nberr++;
break;
}
}
return nberr;
}
#if 0
#pragma mark -
#pragma mark memcmp
#endif
static int ref_memcmp(const void *s1, const void *s2, size_t n)
{
const unsigned char *c1 = (const unsigned char *)s1, *c2 = (const unsigned char *)s2;
int d = 0;
while ( n-- ) {
d = (int)*c1++ - (int)*c2++;
if ( d )
break;
}
return d;
}
int validate_memcmp(const char* s, const char* d, size_t size)
{
int a = ref_memcmp(s, d, size);
int b = memcmp(s, d, size);
//printf("%d, %d\n", a, b);
if (a != b) {
printf("*** memcmp(%p,%p,%lu) failed %d should be %d\n",s,d,size,b,a);
return 1;
}
return 0;
}
int memcmp_test(int argc, char** argv)
{
int option = 0;
if (argc >= 2) {
if (!strcmp(argv[1], "perf")) option = 0;
else if (!strcmp(argv[1], "test")) option = 1;
else return -1;
}
const int MAX_SIZE = 1024*1024; // 1MB
const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 150 MB/s
const int UNCACHED_SPEED_EST = (CPU_FREQ_EST/4)*1024*1024; // 60 MB/s
char* src = (char*)malloc(MAX_SIZE+4+8+32);
char* dst = (char*)malloc(MAX_SIZE+4+8+32);
if (option == 0) {
printf("memcmp() performance test is running, please wait...\n");
fflush(stdout);
usleep(10000);
setpriority(PRIO_PROCESS, 0, -20);
static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
struct result_t { int size; float res; };
result_t* results = (result_t*)src;
int nbr = 0;
int size = 0;
for (int i=0 ; ; i++) {
if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
break;
size = FAST_SIZES[i];
if (size > MAX_SIZE) {
break;
}
const int REPEAT = (((size < DCACHE_SIZE) ?
(CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size) / 2;
// ~0.5 second per test
const nsecs_t overhead = loop_overhead(REPEAT);
// tweak to make it a bad case
char* ddd = (char*)((long(dst+31)&~31) + 4);
char* sss = (char*)((long(src+31)&~31) + 28);
for (int offset=0 ; offset<=2 ; offset +=2 ) {
memcpy(ddd, sss+offset, size); // just make sure to load the caches I/D
nsecs_t t = -system_time();
register int count = REPEAT;
char c;
c = memcmp(ddd, sss+offset, size);
//printf("size %d, memcmp -> %d\n", size, (int)c);
do {
c = memcmp(ddd, sss+offset, size);
asm volatile (""::"r"(c):"memory");
} while (--count);
t += system_time() - overhead;
const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
results[nbr].size = size;
results[nbr].res = throughput;
nbr++;
}
}
printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (nc)");
for (int i=0 ; i<nbr ; i+=2) {
printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
}
} else {
printf("memcmp() validation test is running, please wait...\n");
fflush(stdout);
const char* const s = (const char*)src + 1024;
const char* const d = (const char*)dst + 1024;
int nb = 0;
for (int j=0 ; j<32 ; j++) {
char *curr0 = (char*)src;
char *curr1 = (char*)dst;
for (int i=0 ; i<MAX_SIZE ; i++) {
char c = rand();
*curr0++ = c;
*curr1++ = c;
}
if (j) {
src[1024 + j] ^= 0xFF;
}
for (int size=0 ; size<32 && !nb ; size++) {
for (int o=0 ; o<4 ; o++) {
nb += validate_memcmp(s+o, d+o, size);
}
// memmove((char*)d+1, d, size);
for (int o=0 ; o<4 ; o++) {
nb += validate_memcmp(s, d+o, size);
}
}
}
if (nb) printf("%d error(s) found\n", nb);
else printf("success!\n");
}
fflush(stdout);
free(dst);
free(src);
return 0;
}
#if 0
#pragma mark -
#pragma mark strlen
#endif
int strlen_test(int argc, char** argv)
{
int option = 0;
if (argc >= 2) {
if (!strcmp(argv[1], "perf")) option = 0;
else if (!strcmp(argv[1], "test")) option = 1;
else return -1;
}
const int MAX_SIZE = 1024*1024; // 1MB
const int CACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
const int UNCACHED_SPEED_EST = CPU_FREQ_EST*1024*1024; // 195 MB/s
char* str = (char*)calloc(MAX_SIZE+4+8, 1);
if (option == 0) {
printf("strlen() performance test is running, please wait...\n");
fflush(stdout);
usleep(10000);
setpriority(PRIO_PROCESS, 0, -20);
static int FAST_SIZES[] = { 1024, DCACHE_SIZE/2, DCACHE_SIZE, DCACHE_SIZE*2, MAX_SIZE };
const size_t FAST_SIZES_COUNT = sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]);
struct result_t { int size; float res; };
result_t results[FAST_SIZES_COUNT*2];
int nbr = 0;
int size = 0;
for (int i=0 ; ; i++) {
if (size_t(i) >= sizeof(FAST_SIZES)/sizeof(FAST_SIZES[0]))
break;
size = FAST_SIZES[i];
if (size > MAX_SIZE) {
break;
}
const int REPEAT = (((size < DCACHE_SIZE) ?
(CACHED_SPEED_EST) : (UNCACHED_SPEED_EST)) / size);
// ~0.5 second per test
const nsecs_t overhead = loop_overhead(REPEAT);
for (int j=0 ; j<2 ; j++) {
memset(str, 'A', size-1);
if (j==0) preload(str, DCACHE_SIZE*4); // flush D
else preload(str, size); // load D
nsecs_t t = -system_time();
size_t count = REPEAT;
int c=0;
do {
c = strlen(str);
asm volatile (""::"r"(c):"memory");
} while (--count);
t += system_time() - overhead;
const float throughput = (size*1000000000.0f*REPEAT) / (1024*1024*t);
results[nbr].size = size;
results[nbr].res = throughput;
nbr++;
}
}
printf("%9s %9s %9s\n", "size", "MB/s", "MB/s (cached)");
for (int i=0 ; i<nbr ; i+=2) {
printf("%9d %9ld %9ld\n", results[i].size, (long)results[i].res, (long)results[i+1].res);
}
}
fflush(stdout);
free(str);
return 0;
}
#if 0
#pragma mark -
#pragma mark malloc
#endif
int malloc_test(int argc, char** argv)
{
bool fill = (argc>=2 && !strcmp(argv[1], "fill"));
size_t total = 0;
size_t size = 0x40000000;
while (size) {
void* addr = malloc(size);
if (addr == 0) {
printf("size = %9lu failed\n", size);
size >>= 1;
} else {
total += size;
printf("size = %9lu, addr = %p (total = %9lu (%lu MB))\n",
size, addr, total, total / (1024*1024));
if (fill) {
printf("filling...\n");
fflush(stdout);
memset(addr, 0, size);
}
size = size + size>>1;
}
}
printf("done. allocated %lu MB\n", total / (1024*1024));
return 0;
}
#if 0
#pragma mark -
#pragma mark madvise
#endif
int madvise_test(int argc, char** argv)
{
for (int i=0 ; i<2 ; i++) {
size_t size = i==0 ? 4096 : 48*1024*1024; // 48 MB
printf("Allocating %lu MB... ", size/(1024*1024)); fflush(stdout);
void* addr1 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
printf("%p (%s)\n", addr1, addr1==(void*)-1 ? "failed" : "OK"); fflush(stdout);
printf("touching %p...\n", addr1); fflush(stdout);
memset(addr1, 0x55, size);
printf("advising DONTNEED...\n"); fflush(stdout);
madvise(addr1, size, MADV_DONTNEED);
printf("reading back %p...\n", addr1); fflush(stdout);
if (*(long*)addr1 == 0) {
printf("madvise freed some pages\n");
} else if (*(long*)addr1 == 0x55555555) {
printf("pages are still there\n");
} else {
printf("getting garbage back\n");
}
printf("Allocating %lu MB... ", size/(1024*1024)); fflush(stdout);
void* addr2 = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
printf("%p (%s)\n", addr2, addr2==(void*)-1 ? "failed" : "OK"); fflush(stdout);
printf("touching %p...\n", addr2); fflush(stdout);
memset(addr2, 0xAA, size);
printf("unmap %p ...\n", addr2); fflush(stdout);
munmap(addr2, size);
printf("touching %p...\n", addr1); fflush(stdout);
memset(addr1, 0x55, size);
printf("unmap %p ...\n", addr1); fflush(stdout);
munmap(addr1, size);
}
printf("Done\n"); fflush(stdout);
return 0;
}
#if 0
#pragma mark -
#pragma mark cpufreq
#endif
int cpufreq_test(int argc, char** argv)
{
struct timespec res;
clock_getres(CLOCK_REALTIME, &res);
printf("CLOCK_REALTIME resolution: %lu ns\n", res.tv_nsec);
clock_getres(CLOCK_MONOTONIC, &res);
printf("CLOCK_MONOTONIC resolution: %lu ns\n", res.tv_nsec);
clock_getres(CLOCK_PROCESS_CPUTIME_ID, &res);
printf("CLOCK_PROCESS_CPUTIME_ID resolution: %lu ns\n", res.tv_nsec);
clock_getres(CLOCK_THREAD_CPUTIME_ID, &res);
printf("CLOCK_THREAD_CPUTIME_ID resolution: %lu ns\n", res.tv_nsec);
if (clock_getres(CLOCK_REALTIME_HR, &res) != 0)
printf("CLOCK_REALTIME_HR resolution: %lu ns\n", res.tv_nsec);
else
printf("CLOCK_REALTIME_HR not supported\n");
if (clock_getres(CLOCK_MONOTONIC_HR, &res) != 0)
printf("CLOCK_MONOTONIC_HR resolution: %lu ns\n", res.tv_nsec);
else
printf("CLOCK_MONOTONIC_HR not supported\n");
printf("\nEstimating the CPU frequency, please wait...\n");
fflush(stdout);
usleep(10000);
setpriority(PRIO_PROCESS, 0, -20);
const int LOOP_CYCLES = 1+BRANCH_CYCLE; // 1 cycle + 3 cycles for the branch
const size_t REPEAT = CPU_FREQ_EST*1000000; // ~4 seconds (4cycles/loop)
register size_t count = REPEAT;
nsecs_t t = system_time();
do { // this loop generates 1+3 cycles
asm volatile ("":::"memory");
} while (--count);
t = system_time() - t;
const float freq = t ? (1000.0f*float(REPEAT)*LOOP_CYCLES) / t : 0;
printf("this CPU frequency: %ld MHz\n", long(freq+0.5f));
return 0;
}
#if 0
#pragma mark -
#pragma mark crash_test
#endif
int crash_test(int argc, char** argv)
{
printf("about to crash...\n");
asm volatile(
"mov r0, #0 \n"
"mov r1, #1 \n"
"mov r2, #2 \n"
"mov r3, #3 \n"
"ldr r12, [r0] \n"
);
return 0;
}
int stack_smasher_test(int argc, char** argv)
{
int dummy = 0;
printf("corrupting our stack...\n");
*(volatile long long*)&dummy = 0;
return 0;
}
// --------------------------------------------------------------------
extern "C" void thumb_function_1(int*p);
extern "C" void thumb_function_2(int*p);
extern "C" void arm_function_3(int*p);
extern "C" void arm_function_2(int*p);
extern "C" void arm_function_1(int*p);
void arm_function_3(int*p) {
int a = 0;
thumb_function_2(&a);
}
void arm_function_2(int*p) {
int a = 0;
thumb_function_1(&a);
}
void arm_function_1(int*p) {
int a = 0;
arm_function_2(&a);
}
int crawl_test(int argc, char** argv)
{
int a = 0;
arm_function_1(&a);
return 0;
}