#include <stdio.h>
#include <vector>
#include <pthread.h>
#include <malloc.h>
#include <algorithm>
using namespace std;
const size_t kNumThreds = 16;
const size_t kNumIters = 1 << 23;
inline void break_optimization(void *arg) {
__asm__ __volatile__("" : : "r" (arg) : "memory");
}
__attribute__((noinline))
static void *MallocThread(void *t) {
size_t total_malloced = 0, total_freed = 0;
size_t max_in_use = 0;
size_t tid = reinterpret_cast<size_t>(t);
vector<pair<char *, size_t> > allocated;
allocated.reserve(kNumIters);
for (size_t i = 1; i < kNumIters; i++) {
if ((i % (kNumIters / 4)) == 0 && tid == 0)
fprintf(stderr, " T[%ld] iter %ld\n", tid, i);
bool allocate = (i % 5) <= 2; // 60% malloc, 40% free
if (i > kNumIters / 4)
allocate = i % 2; // then switch to 50% malloc, 50% free
if (allocate) {
size_t size = 1 + (i % 200);
if ((i % 10001) == 0)
size *= 4096;
total_malloced += size;
char *x = new char[size];
x[0] = x[size - 1] = x[size / 2] = 0;
allocated.push_back(make_pair(x, size));
max_in_use = max(max_in_use, total_malloced - total_freed);
} else {
if (allocated.empty()) continue;
size_t slot = i % allocated.size();
char *p = allocated[slot].first;
p[0] = 0; // emulate last user touch of the block
size_t size = allocated[slot].second;
total_freed += size;
swap(allocated[slot], allocated.back());
allocated.pop_back();
delete [] p;
}
}
if (tid == 0)
fprintf(stderr, " T[%ld] total_malloced: %ldM in use %ldM max %ldM\n",
tid, total_malloced >> 20, (total_malloced - total_freed) >> 20,
max_in_use >> 20);
for (size_t i = 0; i < allocated.size(); i++)
delete [] allocated[i].first;
return 0;
}
template <int depth>
struct DeepStack {
__attribute__((noinline))
static void *run(void *t) {
break_optimization(0);
DeepStack<depth - 1>::run(t);
break_optimization(0);
return 0;
}
};
template<>
struct DeepStack<0> {
static void *run(void *t) {
MallocThread(t);
return 0;
}
};
// Build with -Dstandalone_malloc_test=main to make it a separate program.
int standalone_malloc_test() {
pthread_t t[kNumThreds];
for (size_t i = 0; i < kNumThreds; i++)
pthread_create(&t[i], 0, DeepStack<200>::run, reinterpret_cast<void *>(i));
for (size_t i = 0; i < kNumThreds; i++)
pthread_join(t[i], 0);
malloc_stats();
return 0;
}