// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/base_switches.h"
#include "base/bind.h"
#include "base/command_line.h"
#include "base/memory/scoped_vector.h"
#include "base/synchronization/condition_variable.h"
#include "base/synchronization/lock.h"
#include "base/synchronization/waitable_event.h"
#include "base/threading/thread.h"
#include "base/time/time.h"
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/perf/perf_test.h"
#if defined(OS_POSIX)
#include <pthread.h>
#endif
namespace base {
namespace {
const int kNumRuns = 100000;
// Base class for a threading perf-test. This sets up some threads for the
// test and measures the clock-time in addition to time spent on each thread.
class ThreadPerfTest : public testing::Test {
public:
ThreadPerfTest()
: done_(false, false) {
// Disable the task profiler as it adds significant cost!
CommandLine::Init(0, NULL);
CommandLine::ForCurrentProcess()->AppendSwitchASCII(
switches::kProfilerTiming,
switches::kProfilerTimingDisabledValue);
}
// To be implemented by each test. Subclass must uses threads_ such that
// their cpu-time can be measured. Test must return from PingPong() _and_
// call FinishMeasurement from any thread to complete the test.
virtual void Init() {}
virtual void PingPong(int hops) = 0;
virtual void Reset() {}
void TimeOnThread(base::TimeTicks* ticks, base::WaitableEvent* done) {
*ticks = base::TimeTicks::ThreadNow();
done->Signal();
}
base::TimeTicks ThreadNow(base::Thread* thread) {
base::WaitableEvent done(false, false);
base::TimeTicks ticks;
thread->message_loop_proxy()->PostTask(
FROM_HERE,
base::Bind(&ThreadPerfTest::TimeOnThread,
base::Unretained(this),
&ticks,
&done));
done.Wait();
return ticks;
}
void RunPingPongTest(const std::string& name, unsigned num_threads) {
// Create threads and collect starting cpu-time for each thread.
std::vector<base::TimeTicks> thread_starts;
while (threads_.size() < num_threads) {
threads_.push_back(new base::Thread("PingPonger"));
threads_.back()->Start();
if (base::TimeTicks::IsThreadNowSupported())
thread_starts.push_back(ThreadNow(threads_.back()));
}
Init();
base::TimeTicks start = base::TimeTicks::HighResNow();
PingPong(kNumRuns);
done_.Wait();
base::TimeTicks end = base::TimeTicks::HighResNow();
// Gather the cpu-time spent on each thread. This does one extra tasks,
// but that should be in the noise given enough runs.
base::TimeDelta thread_time;
while (threads_.size()) {
if (base::TimeTicks::IsThreadNowSupported()) {
thread_time += ThreadNow(threads_.back()) - thread_starts.back();
thread_starts.pop_back();
}
threads_.pop_back();
}
Reset();
double num_runs = static_cast<double>(kNumRuns);
double us_per_task_clock = (end - start).InMicroseconds() / num_runs;
double us_per_task_cpu = thread_time.InMicroseconds() / num_runs;
// Clock time per task.
perf_test::PrintResult(
"task", "", name + "_time ", us_per_task_clock, "us/hop", true);
// Total utilization across threads if available (likely higher).
if (base::TimeTicks::IsThreadNowSupported()) {
perf_test::PrintResult(
"task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true);
}
}
protected:
void FinishMeasurement() { done_.Signal(); }
ScopedVector<base::Thread> threads_;
private:
base::WaitableEvent done_;
};
// Class to test task performance by posting empty tasks back and forth.
class TaskPerfTest : public ThreadPerfTest {
base::Thread* NextThread(int count) {
return threads_[count % threads_.size()];
}
virtual void PingPong(int hops) OVERRIDE {
if (!hops) {
FinishMeasurement();
return;
}
NextThread(hops)->message_loop_proxy()->PostTask(
FROM_HERE,
base::Bind(
&ThreadPerfTest::PingPong, base::Unretained(this), hops - 1));
}
};
// This tries to test the 'best-case' as well as the 'worst-case' task posting
// performance. The best-case keeps one thread alive such that it never yeilds,
// while the worse-case forces a context switch for every task. Four threads are
// used to ensure the threads do yeild (with just two it might be possible for
// both threads to stay awake if they can signal each other fast enough).
TEST_F(TaskPerfTest, TaskPingPong) {
RunPingPongTest("1_Task_Threads", 1);
RunPingPongTest("4_Task_Threads", 4);
}
// Same as above, but add observers to test their perf impact.
class MessageLoopObserver : public base::MessageLoop::TaskObserver {
public:
virtual void WillProcessTask(const base::PendingTask& pending_task) OVERRIDE {
}
virtual void DidProcessTask(const base::PendingTask& pending_task) OVERRIDE {
}
};
MessageLoopObserver message_loop_observer;
class TaskObserverPerfTest : public TaskPerfTest {
public:
virtual void Init() OVERRIDE {
TaskPerfTest::Init();
for (size_t i = 0; i < threads_.size(); i++) {
threads_[i]->message_loop()->AddTaskObserver(&message_loop_observer);
}
}
};
TEST_F(TaskObserverPerfTest, TaskPingPong) {
RunPingPongTest("1_Task_Threads_With_Observer", 1);
RunPingPongTest("4_Task_Threads_With_Observer", 4);
}
// Class to test our WaitableEvent performance by signaling back and fort.
// WaitableEvent is templated so we can also compare with other versions.
template <typename WaitableEventType>
class EventPerfTest : public ThreadPerfTest {
public:
virtual void Init() OVERRIDE {
for (size_t i = 0; i < threads_.size(); i++)
events_.push_back(new WaitableEventType(false, false));
}
virtual void Reset() OVERRIDE { events_.clear(); }
void WaitAndSignalOnThread(size_t event) {
size_t next_event = (event + 1) % events_.size();
int my_hops = 0;
do {
events_[event]->Wait();
my_hops = --remaining_hops_; // We own 'hops' between Wait and Signal.
events_[next_event]->Signal();
} while (my_hops > 0);
// Once we are done, all threads will signal as hops passes zero.
// We only signal completion once, on the thread that reaches zero.
if (!my_hops)
FinishMeasurement();
}
virtual void PingPong(int hops) OVERRIDE {
remaining_hops_ = hops;
for (size_t i = 0; i < threads_.size(); i++) {
threads_[i]->message_loop_proxy()->PostTask(
FROM_HERE,
base::Bind(&EventPerfTest::WaitAndSignalOnThread,
base::Unretained(this),
i));
}
// Kick off the Signal ping-ponging.
events_.front()->Signal();
}
int remaining_hops_;
ScopedVector<WaitableEventType> events_;
};
// Similar to the task posting test, this just tests similar functionality
// using WaitableEvents. We only test four threads (worst-case), but we
// might want to craft a way to test the best-case (where the thread doesn't
// end up blocking because the event is already signalled).
typedef EventPerfTest<base::WaitableEvent> WaitableEventPerfTest;
TEST_F(WaitableEventPerfTest, EventPingPong) {
RunPingPongTest("4_WaitableEvent_Threads", 4);
}
// Build a minimal event using ConditionVariable.
class ConditionVariableEvent {
public:
ConditionVariableEvent(bool manual_reset, bool initially_signaled)
: cond_(&lock_), signaled_(false) {
DCHECK(!manual_reset);
DCHECK(!initially_signaled);
}
void Signal() {
{
base::AutoLock scoped_lock(lock_);
signaled_ = true;
}
cond_.Signal();
}
void Wait() {
base::AutoLock scoped_lock(lock_);
while (!signaled_)
cond_.Wait();
signaled_ = false;
}
private:
base::Lock lock_;
base::ConditionVariable cond_;
bool signaled_;
};
// This is meant to test the absolute minimal context switching time
// using our own base synchronization code.
typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest;
TEST_F(ConditionVariablePerfTest, EventPingPong) {
RunPingPongTest("4_ConditionVariable_Threads", 4);
}
#if defined(OS_POSIX)
// Absolutely 100% minimal posix waitable event. If there is a better/faster
// way to force a context switch, we should use that instead.
class PthreadEvent {
public:
PthreadEvent(bool manual_reset, bool initially_signaled) {
DCHECK(!manual_reset);
DCHECK(!initially_signaled);
pthread_mutex_init(&mutex_, 0);
pthread_cond_init(&cond_, 0);
signaled_ = false;
}
~PthreadEvent() {
pthread_cond_destroy(&cond_);
pthread_mutex_destroy(&mutex_);
}
void Signal() {
pthread_mutex_lock(&mutex_);
signaled_ = true;
pthread_mutex_unlock(&mutex_);
pthread_cond_signal(&cond_);
}
void Wait() {
pthread_mutex_lock(&mutex_);
while (!signaled_)
pthread_cond_wait(&cond_, &mutex_);
signaled_ = false;
pthread_mutex_unlock(&mutex_);
}
private:
bool signaled_;
pthread_mutex_t mutex_;
pthread_cond_t cond_;
};
// This is meant to test the absolute minimal context switching time.
// If there is any faster way to do this we should substitute it in.
typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest;
TEST_F(PthreadEventPerfTest, EventPingPong) {
RunPingPongTest("4_PthreadCondVar_Threads", 4);
}
#endif
} // namespace
} // namespace base