// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
#define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
#include <fcntl.h>
#include <sys/queue.h>
#include <ext/hash_map> // it is annoying that gcc does this. oh well.
#include <ext/hash_set>
#include <map>
#include <string>
#include <utility>
#include <set>
#include <vector>
// #define EPOLL_SERVER_EVENT_TRACING 1
//
// Defining EPOLL_SERVER_EVENT_TRACING
// causes code to exist which didn't before.
// This code tracks each event generated by the epollserver,
// as well as providing a per-fd-registered summary of
// events. Note that enabling this code vastly slows
// down operations, and uses substantially more
// memory. For these reasons, it should only be enabled when doing
// developer debugging at his/her workstation.
//
// A structure called 'EventRecorder' will exist when
// the macro is defined. See the EventRecorder class interface
// within the EpollServer class for more details.
#ifdef EPOLL_SERVER_EVENT_TRACING
#include <iostream>
#include "base/logging.h"
#endif
#include "base/basictypes.h"
#include "base/scoped_ptr.h"
#include <sys/epoll.h>
namespace net {
class EpollServer;
class EpollAlarmCallbackInterface;
class ReadPipeCallback;
class WatchDogToken;
struct EpollEvent {
EpollEvent(int events, bool is_epoll_wait)
: in_events(events),
out_ready_mask(0) {
}
int in_events; // incoming events
int out_ready_mask; // the new event mask for ready list (0 means don't
// get on the ready list). This field is always
// initialized to 0 when the event is passed to
// OnEvent.
};
// Callbacks which go into EpollServers are expected to derive from this class.
class EpollCallbackInterface {
public:
// Summary:
// Called when the callback is registered into a EpollServer.
// Args:
// eps - the poll server into which this callback was registered
// fd - the file descriptor which was registered
// event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
// which was registered (and will initially be used
// in the epoll() calls)
virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0;
// Summary:
// Called when the event_mask is modified (for a file-descriptor)
// Args:
// fd - the file descriptor which was registered
// event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
// which was is now curren (and will be used
// in subsequent epoll() calls)
virtual void OnModification(int fd, int event_mask) = 0;
// Summary:
// Called whenever an event occurs on the file-descriptor.
// This is where the bulk of processing is expected to occur.
// Args:
// fd - the file descriptor which was registered
// event - a struct that contains the event mask (composed of EPOLLIN,
// EPOLLOUT, etc), a flag that indicates whether this is a true
// epoll_wait event vs one from the ready list, and an output
// parameter for OnEvent to inform the EpollServer whether to put
// this fd on the ready list.
virtual void OnEvent(int fd, EpollEvent* event) = 0;
// Summary:
// Called when the file-descriptor is unregistered from the poll-server.
// Args:
// fd - the file descriptor which was registered, and of this call, is now
// unregistered.
// replaced - If true, this callback is being replaced by another, otherwise
// it is simply being removed.
virtual void OnUnregistration(int fd, bool replaced) = 0;
// Summary:
// Called when the epoll server is shutting down. This is different from
// OnUnregistration because the subclass may want to clean up memory.
// This is called in leiu of OnUnregistration.
// Args:
// fd - the file descriptor which was registered.
virtual void OnShutdown(EpollServer* eps, int fd) = 0;
virtual ~EpollCallbackInterface() {}
protected:
EpollCallbackInterface() {}
};
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
class EpollServer {
public:
typedef EpollAlarmCallbackInterface AlarmCB;
typedef EpollCallbackInterface CB;
typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap;
typedef TimeToAlarmCBMap::iterator AlarmRegToken;
// Summary:
// Constructor:
// By default, we don't wait any amount of time for events, and
// we suggest to the epoll-system that we're going to use on-the-order
// of 1024 FDs.
EpollServer();
////////////////////////////////////////
// Destructor
virtual ~EpollServer();
////////////////////////////////////////
// Summary
// Register a callback to be called whenever an event contained
// in the set of events included in event_mask occurs on the
// file-descriptor 'fd'
//
// Note that only one callback is allowed to be registered for
// any specific file-decriptor.
//
// If a callback is registered for a file-descriptor which has already
// been registered, then the previous callback is unregistered with
// the 'replaced' flag set to true. I.e. the previous callback's
// OnUnregistration() function is called like so:
// OnUnregistration(fd, true);
//
// The epoll server does NOT take on ownership of the callback: the callback
// creator is responsible for managing that memory.
//
// Args:
// fd - a valid file-descriptor
// cb - an instance of a subclass of EpollCallbackInterface
// event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
// the events for which the callback would like to be
// called.
virtual void RegisterFD(int fd, CB* cb, int event_mask);
////////////////////////////////////////
// Summary:
// A shortcut for RegisterFD which sets things up such that the
// callback is called when 'fd' is available for writing.
// Args:
// fd - a valid file-descriptor
// cb - an instance of a subclass of EpollCallbackInterface
virtual void RegisterFDForWrite(int fd, CB* cb);
////////////////////////////////////////
// Summary:
// A shortcut for RegisterFD which sets things up such that the
// callback is called when 'fd' is available for reading or writing.
// Args:
// fd - a valid file-descriptor
// cb - an instance of a subclass of EpollCallbackInterface
virtual void RegisterFDForReadWrite(int fd, CB* cb);
////////////////////////////////////////
// Summary:
// A shortcut for RegisterFD which sets things up such that the
// callback is called when 'fd' is available for reading.
// Args:
// fd - a valid file-descriptor
// cb - an instance of a subclass of EpollCallbackInterface
virtual void RegisterFDForRead(int fd, CB* cb);
////////////////////////////////////////
// Summary:
// Removes the FD and the associated callback from the pollserver.
// If the callback is registered with other FDs, they will continue
// to be processed using the callback without modification.
// If the file-descriptor specified is not registered in the
// epoll_server, then nothing happens as a result of this call.
// Args:
// fd - the file-descriptor which should no-longer be monitored.
virtual void UnregisterFD(int fd);
////////////////////////////////////////
// Summary:
// Modifies the event mask for the file-descriptor, replacing
// the old event_mask with the new one specified here.
// If the file-descriptor specified is not registered in the
// epoll_server, then nothing happens as a result of this call.
// Args:
// fd - the fd whose event mask should be modified.
// event_mask - the new event mask.
virtual void ModifyCallback(int fd, int event_mask);
////////////////////////////////////////
// Summary:
// Modifies the event mask for the file-descriptor such that we
// no longer request events when 'fd' is readable.
// If the file-descriptor specified is not registered in the
// epoll_server, then nothing happens as a result of this call.
// Args:
// fd - the fd whose event mask should be modified.
virtual void StopRead(int fd);
////////////////////////////////////////
// Summary:
// Modifies the event mask for the file-descriptor such that we
// request events when 'fd' is readable.
// If the file-descriptor specified is not registered in the
// epoll_server, then nothing happens as a result of this call.
// Args:
// fd - the fd whose event mask should be modified.
virtual void StartRead(int fd);
////////////////////////////////////////
// Summary:
// Modifies the event mask for the file-descriptor such that we
// no longer request events when 'fd' is writable.
// If the file-descriptor specified is not registered in the
// epoll_server, then nothing happens as a result of this call.
// Args:
// fd - the fd whose event mask should be modified.
virtual void StopWrite(int fd);
////////////////////////////////////////
// Summary:
// Modifies the event mask for the file-descriptor such that we
// request events when 'fd' is writable.
// If the file-descriptor specified is not registered in the
// epoll_server, then nothing happens as a result of this call.
// Args:
// fd - the fd whose event mask should be modified.
virtual void StartWrite(int fd);
////////////////////////////////////////
// Summary:
// Looks up the callback associated with the file-desriptor 'fd'.
// If a callback is associated with this file-descriptor, then
// it's OnEvent() method is called with the file-descriptor 'fd',
// and event_mask 'event_mask'
//
// If no callback is registered for this file-descriptor, nothing
// will happen as a result of this call.
//
// This function is used internally by the EpollServer, but is
// available publically so that events might be 'faked'. Calling
// this function with an fd and event_mask is equivalent (as far
// as the callback is concerned) to having a real event generated
// by epoll (except, of course, that read(), etc won't necessarily
// be able to read anything)
// Args:
// fd - the file-descriptor on which an event has occured.
// event_mask - a bitmask representing the events which have occured
// on/for this fd. This bitmask is composed of
// POLLIN, POLLOUT, etc.
//
void HandleEvent(int fd, int event_mask);
// Summary:
// Call this when you want the pollserver to
// wait for events and execute the callbacks associated with
// the file-descriptors on which those events have occured.
// Depending on the value of timeout_in_us_, this may or may
// not return immediately. Please reference the set_timeout()
// function for the specific behaviour.
virtual void WaitForEventsAndExecuteCallbacks();
// Summary:
// When an fd is registered to use edge trigger notification, the ready
// list can be used to simulate level trigger semantics. Edge trigger
// registration doesn't send an initial event, and only rising edge (going
// from blocked to unblocked) events are sent. A callback can put itself on
// the ready list by calling SetFDReady() after calling RegisterFD(). The
// OnEvent method of all callbacks associated with the fds on the ready
// list will be called immediately after processing the events returned by
// epoll_wait(). The fd is removed from the ready list before the
// callback's OnEvent() method is invoked. To stay on the ready list, the
// OnEvent() (or some function in that call chain) must call SetFDReady
// again. When a fd is unregistered using UnregisterFD(), the fd is
// automatically removed from the ready list.
//
// When the callback for a edge triggered fd hits the falling edge (about
// to block, either because of it got an EAGAIN, or had a short read/write
// operation), it should remove itself from the ready list using
// SetFDNotReady() (since OnEvent cannot distinguish between invocation
// from the ready list vs from a normal epoll event). All four ready list
// methods are safe to be called within the context of the callbacks.
//
// Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
// that are registered with the EpollServer will be put on the ready list.
// SetFDReady() and SetFDNotReady() will do nothing if the EpollServer
// doesn't know about the fd passed in.
//
// Since the ready list cannot reliably determine proper set of events
// which should be sent to the callback, SetFDReady() requests the caller
// to provide the ready list with the event mask, which will be used later
// when OnEvent() is invoked by the ready list. Hence, the event_mask
// passedto SetFDReady() does not affect the actual epoll registration of
// the fd with the kernel. If a fd is already put on the ready list, and
// SetFDReady() is called again for that fd with a different event_mask,
// the event_mask will be updated.
virtual void SetFDReady(int fd, int events_to_fake);
virtual void SetFDNotReady(int fd);
// Summary:
// IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
// debugging tools and for writing unit tests.
// ISFDReady() returns whether a fd is in the ready list.
// ReadyListSize() returns the number of fds on the ready list.
// VerifyReadyList() checks the consistency of internal data structure. It
// will CHECK if it finds an error.
virtual bool IsFDReady(int fd) const;
size_t ReadyListSize() const { return ready_list_size_; }
void VerifyReadyList() const;
////////////////////////////////////////
// Summary:
// Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
// If the callback returns a positive number from its OnAlarm() function,
// then the callback will be re-registered at that time, else the alarm
// owner is responsible for freeing up memory.
//
// Important: A give AlarmCB* can not be registered again if it is already
// registered. If a user wants to register a callback again it should first
// unregister the previous callback before calling RegisterAlarm again.
// Args:
// timeout_time_in_us - the absolute time at which the alarm should go off
// ac - the alarm which will be called.
virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac);
// Summary:
// Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
// delta_in_us). While this is somewhat less accurate (see the description
// for ApproximateNowInUs() to see how 'approximate'), the error is never
// worse than the amount of time it takes to process all events in one
// WaitForEvents. As with 'RegisterAlarm()', if the callback returns a
// positive number from its OnAlarm() function, then the callback will be
// re-registered at that time, else the alarm owner is responsible for
// freeing up memory.
// Note that this function is purely a convienence. The
// same thing may be accomplished by using RegisterAlarm with
// ApproximateNowInUs() directly.
//
// Important: A give AlarmCB* can not be registered again if it is already
// registered. If a user wants to register a callback again it should first
// unregister the previous callback before calling RegisterAlarm again.
// Args:
// delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
// which point the alarm should go off.
// ac - the alarm which will be called.
void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) {
RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
}
////////////////////////////////////////
// Summary:
// Unregister the alarm referred to by iterator_token; Callers should
// be warned that a token may have become already invalid when OnAlarm()
// is called, was unregistered, or OnShutdown was called on that alarm.
// Args:
// iterator_token - iterator to the alarm callback to unregister.
virtual void UnregisterAlarm(
const EpollServer::AlarmRegToken& iterator_token);
////////////////////////////////////////
// Summary:
// returns the number of file-descriptors registered in this EpollServer.
// Returns:
// number of FDs registered (discounting the internal pipe used for Wake)
virtual int NumFDsRegistered() const;
// Summary:
// Force the epoll server to wake up (by writing to an internal pipe).
virtual void Wake();
// Summary:
// Wrapper around WallTimer's NowInUsec. We do this so that we can test
// EpollServer without using the system clock (and can avoid the flakiness
// that would ensue)
// Returns:
// the current time as number of microseconds since the Unix epoch.
virtual int64 NowInUsec() const;
// Summary:
// Since calling NowInUsec() many thousands of times per
// WaitForEventsAndExecuteCallbacks function call is, to say the least,
// inefficient, we allow users to use an approximate time instead. The
// time returned from this function is as accurate as NowInUsec() when
// WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
// callstack.
// However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
// this function returns the time at which the
// WaitForEventsAndExecuteCallbacks function started to process events or
// alarms.
//
// Essentially, this function makes available a fast and mostly accurate
// mechanism for getting the time for any function handling an event or
// alarm. When functions which are not handling callbacks or alarms call
// this function, they get the slow and "absolutely" accurate time.
//
// Users should be encouraged to use this function.
// Returns:
// the "approximate" current time as number of microseconds since the Unix
// epoch.
virtual int64 ApproximateNowInUsec() const {
if (recorded_now_in_us_ != 0) {
return recorded_now_in_us_;
}
return this->NowInUsec();
}
static std::string EventMaskToString(int event_mask);
// Summary:
// Logs the state of the epoll server with LOG(ERROR).
void LogStateOnCrash();
// Summary:
// Set the timeout to the value specified.
// If the timeout is set to a negative number,
// WaitForEventsAndExecuteCallbacks() will only return when an event has
// occured
// If the timeout is set to zero,
// WaitForEventsAndExecuteCallbacks() will return immediately
// If the timeout is set to a positive number,
// WaitForEventsAndExecuteCallbacks() will return when an event has
// occured, or when timeout_in_us microseconds has elapsed, whichever
// is first.
// Args:
// timeout_in_us - value specified depending on behaviour desired.
// See above.
void set_timeout_in_us(int64 timeout_in_us) {
timeout_in_us_ = timeout_in_us;
}
////////////////////////////////////////
// Summary:
// Accessor for the current value of timeout_in_us.
int timeout_in_us() const { return timeout_in_us_; }
// Summary:
// Returns true when the EpollServer() is being destroyed.
bool in_shutdown() const { return in_shutdown_; }
bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const {
return all_alarms_.find(alarm) != all_alarms_.end();
}
// Summary:
// A function for implementing the ready list. It invokes OnEvent for each
// of the fd in the ready list, and takes care of adding them back to the
// ready list if the callback requests it (by checking that out_ready_mask
// is non-zero).
void CallReadyListCallbacks();
// Granularity at which time moves when considering what alarms are on.
// See function: DoRoundingOnNow() on exact usage.
static const int kMinimumEffectiveAlarmQuantum;
protected:
// These have to be in the .h file so that we can override them in tests.
virtual inline int GetFlags(int fd) { return fcntl(fd, F_GETFL, 0); }
inline int SetFlags(int fd, int flags) {
return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
}
virtual void SetNonblocking (int fd);
// This exists here so that we can override this function in unittests
// in order to make effective mock EpollServer objects.
virtual int epoll_wait_impl(int epfd,
struct epoll_event* events,
int max_events,
int timeout_in_ms) {
return epoll_wait(epfd, events, max_events, timeout_in_ms);
}
// this struct is used internally, and is never used by anything external
// to this class. Some of its members are declared mutable to get around the
// restriction imposed by hash_set. Since hash_set knows nothing about the
// objects it stores, it has to assume that every bit of the object is used
// in the hash function and equal_to comparison. Thus hash_set::iterator is a
// const iterator. In this case, the only thing that must stay constant is
// fd. Everything else are just along for the ride and changing them doesn't
// compromise the hash_set integrity.
struct CBAndEventMask {
CBAndEventMask()
: cb(NULL),
fd(-1),
event_mask(0),
events_asserted(0),
events_to_fake(0),
in_use(false) {
entry.le_next = NULL;
entry.le_prev = NULL;
}
CBAndEventMask(EpollCallbackInterface* cb,
int event_mask,
int fd)
: cb(cb), fd(fd), event_mask(event_mask), events_asserted(0),
events_to_fake(0), in_use(false) {
entry.le_next = NULL;
entry.le_prev = NULL;
}
// Required operator for hash_set. Normally operator== should be a free
// standing function. However, since CBAndEventMask is a protected type and
// it will never be a base class, it makes no difference.
bool operator==(const CBAndEventMask& cb_and_mask) const {
return fd == cb_and_mask.fd;
}
// A callback. If the fd is unregistered inside the callchain of OnEvent,
// the cb will be set to NULL.
mutable EpollCallbackInterface* cb;
mutable LIST_ENTRY(CBAndEventMask) entry;
// file descriptor registered with the epoll server.
int fd;
// the current event_mask registered for this callback.
mutable int event_mask;
// the event_mask that was returned by epoll
mutable int events_asserted;
// the event_mask for the ready list to use to call OnEvent.
mutable int events_to_fake;
// toggle around calls to OnEvent to tell UnregisterFD to not erase the
// iterator because HandleEvent is using it.
mutable bool in_use;
};
// Custom hash function to be used by hash_set.
struct CBAndEventMaskHash {
size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
return static_cast<size_t>(cb_and_eventmask.fd);
}
};
typedef __gnu_cxx::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap;
// the following four functions are OS-specific, and are likely
// to be changed in a subclass if the poll/select method is changed
// from epoll.
// Summary:
// Deletes a file-descriptor from the set of FDs that should be
// monitored with epoll.
// Note that this only deals with modifying data relating -directly-
// with the epoll call-- it does not modify any data within the
// epoll_server.
// Args:
// fd - the file descriptor to-be-removed from the monitoring set
virtual void DelFD(int fd) const;
////////////////////////////////////////
// Summary:
// Adds a file-descriptor to the set of FDs that should be
// monitored with epoll.
// Note that this only deals with modifying data relating -directly-
// with the epoll call.
// Args:
// fd - the file descriptor to-be-added to the monitoring set
// event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
// OR'd together) which will be associated with this
// FD initially.
virtual void AddFD(int fd, int event_mask) const;
////////////////////////////////////////
// Summary:
// Modifies a file-descriptor in the set of FDs that should be
// monitored with epoll.
// Note that this only deals with modifying data relating -directly-
// with the epoll call.
// Args:
// fd - the file descriptor to-be-added to the monitoring set
// event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
// OR'd together) which will be associated with this
// FD after this call.
virtual void ModFD(int fd, int event_mask) const;
////////////////////////////////////////
// Summary:
// Modified the event mask associated with an FD in the set of
// data needed by epoll.
// Events are removed before they are added, thus, if ~0 is put
// in 'remove_event', whatever is put in 'add_event' will be
// the new event mask.
// If the file-descriptor specified is not registered in the
// epoll_server, then nothing happens as a result of this call.
// Args:
// fd - the file descriptor whose event mask is to be modified
// remove_event - the events which are to be removed from the current
// event_mask
// add_event - the events which are to be added to the current event_mask
//
//
virtual void ModifyFD(int fd, int remove_event, int add_event);
////////////////////////////////////////
// Summary:
// Waits for events, and calls HandleEvents() for each
// fd, event pair discovered to possibly have an event.
// Note that a callback (B) may get a spurious event if
// another callback (A) has closed a file-descriptor N, and
// the callback (B) has a newly opened file-descriptor, which
// also happens to be N.
virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
struct epoll_event events[],
int events_size);
// Summary:
// An internal function for implementing the ready list. It adds a fd's
// CBAndEventMask to the ready list. If the fd is already on the ready
// list, it is a no-op.
void AddToReadyList(CBAndEventMask* cb_and_mask);
// Summary:
// An internal function for implementing the ready list. It remove a fd's
// CBAndEventMask from the ready list. If the fd is not on the ready list,
// it is a no-op.
void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
// Summary:
// Calls any pending alarms that should go off and reregisters them if they
// were recurring.
virtual void CallAndReregisterAlarmEvents();
// The file-descriptor created for epolling
int epoll_fd_;
// The mapping of file-descriptor to CBAndEventMasks
FDToCBMap cb_map_;
// Custom hash function to be used by hash_set.
struct AlarmCBHash {
size_t operator()(AlarmCB*const& p) const {
return reinterpret_cast<size_t>(p);
}
};
// TOOD(sushantj): Having this hash_set is avoidable. We currently have it
// only so that we can enforce stringent checks that a caller can not register
// the same alarm twice. One option is to have an implementation in which
// this hash_set is used only in the debug mode.
typedef __gnu_cxx::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap;
AlarmCBMap all_alarms_;
TimeToAlarmCBMap alarm_map_;
// The amount of time in microseconds that we'll wait before returning
// from the WaitForEventsAndExecuteCallbacks() function.
// If this is positive, wait that many microseconds.
// If this is negative, wait forever, or for the first event that occurs
// If this is zero, never wait for an event.
int64 timeout_in_us_;
// This is nonzero only after the invocation of epoll_wait_impl within
// WaitForEventsAndCallHandleEvents and before the function
// WaitForEventsAndExecuteCallbacks returns. At all other times, this is
// zero. This enables us to have relatively accurate time returned from the
// ApproximateNowInUs() function. See that function for more details.
int64 recorded_now_in_us_;
// This is used to implement CallAndReregisterAlarmEvents. This stores
// all alarms that were reregistered because OnAlarm() returned a
// value > 0 and the time at which they should be executed is less that
// the current time. By storing such alarms in this map we ensure
// that while calling CallAndReregisterAlarmEvents we do not call
// OnAlarm on any alarm in this set. This ensures that we do not
// go in an infinite loop.
AlarmCBMap alarms_reregistered_and_should_be_skipped_;
LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
int ready_list_size_;
// TODO(alyssar): make this into something that scales up.
static const int events_size_ = 256;
struct epoll_event events_[256];
// These controls the granularity for alarms
// See function CallAndReregisterAlarmEvents()
// TODO(sushantj): Add test for this.
int64 DoRoundingOnNow(int64 now_in_us) const;
#ifdef EPOLL_SERVER_EVENT_TRACING
struct EventRecorder {
public:
EventRecorder() : num_records_(0), record_threshold_(10000) {}
~EventRecorder() {
Clear();
}
// When a number of events equals the record threshold,
// the collected data summary for all FDs will be written
// to LOG(INFO). Note that this does not include the
// individual events (if you'reinterested in those, you'll
// have to get at them programmatically).
// After any such flushing to LOG(INFO) all events will
// be cleared.
// Note that the definition of an 'event' is a bit 'hazy',
// as it includes the 'Unregistration' event, and perhaps
// others.
void set_record_threshold(int64 new_threshold) {
record_threshold_ = new_threshold;
}
void Clear() {
for (int i = 0; i < debug_events_.size(); ++i) {
delete debug_events_[i];
}
debug_events_.clear();
unregistered_fds_.clear();
event_counts_.clear();
}
void MaybeRecordAndClear() {
++num_records_;
if ((num_records_ > record_threshold_) &&
(record_threshold_ > 0)) {
LOG(INFO) << "\n" << *this;
num_records_ = 0;
Clear();
}
}
void RecordFDMaskEvent(int fd, int mask, const char* function) {
FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
debug_events_.push_back(fdmo);
MaybeRecordAndClear();
}
void RecordEpollWaitEvent(int timeout_in_ms,
int num_events_generated) {
EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms,
num_events_generated);
debug_events_.push_back(ewo);
MaybeRecordAndClear();
}
void RecordEpollEvent(int fd, int event_mask) {
Events& events_for_fd = event_counts_[fd];
events_for_fd.AssignFromMask(event_mask);
MaybeRecordAndClear();
}
friend ostream& operator<<(ostream& os, const EventRecorder& er) {
for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
os << "fd: " << er.unregistered_fds_[i] << "\n";
os << er.unregistered_fds_[i];
}
for (EventCountsMap::const_iterator i = er.event_counts_.begin();
i != er.event_counts_.end();
++i) {
os << "fd: " << i->first << "\n";
os << i->second;
}
for (int i = 0; i < er.debug_events_.size(); ++i) {
os << *(er.debug_events_[i]) << "\n";
}
return os;
}
void RecordUnregistration(int fd) {
EventCountsMap::iterator i = event_counts_.find(fd);
if (i != event_counts_.end()) {
unregistered_fds_.push_back(i->second);
event_counts_.erase(i);
}
MaybeRecordAndClear();
}
protected:
class DebugOutput {
public:
friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
debug_output.OutputToStream(os);
return os;
}
virtual void OutputToStream(ostream* os) const = 0;
virtual ~DebugOutput() {}
};
class FDMaskOutput : public DebugOutput {
public:
FDMaskOutput(int fd, int mask, const char* function) :
fd_(fd), mask_(mask), function_(function) {}
virtual void OutputToStream(ostream* os) const {
(*os) << "func: " << function_
<< "\tfd: " << fd_;
if (mask_ != 0) {
(*os) << "\tmask: " << EventMaskToString(mask_);
}
}
int fd_;
int mask_;
const char* function_;
};
class EpollWaitOutput : public DebugOutput {
public:
EpollWaitOutput(int timeout_in_ms,
int num_events_generated) :
timeout_in_ms_(timeout_in_ms),
num_events_generated_(num_events_generated) {}
virtual void OutputToStream(ostream* os) const {
(*os) << "timeout_in_ms: " << timeout_in_ms_
<< "\tnum_events_generated: " << num_events_generated_;
}
protected:
int timeout_in_ms_;
int num_events_generated_;
};
struct Events {
Events() :
epoll_in(0),
epoll_pri(0),
epoll_out(0),
epoll_rdnorm(0),
epoll_rdband(0),
epoll_wrnorm(0),
epoll_wrband(0),
epoll_msg(0),
epoll_err(0),
epoll_hup(0),
epoll_oneshot(0),
epoll_et(0) {}
void AssignFromMask(int event_mask) {
if (event_mask & EPOLLIN) ++epoll_in;
if (event_mask & EPOLLPRI) ++epoll_pri;
if (event_mask & EPOLLOUT) ++epoll_out;
if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
if (event_mask & EPOLLRDBAND) ++epoll_rdband;
if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
if (event_mask & EPOLLWRBAND) ++epoll_wrband;
if (event_mask & EPOLLMSG) ++epoll_msg;
if (event_mask & EPOLLERR) ++epoll_err;
if (event_mask & EPOLLHUP) ++epoll_hup;
if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
if (event_mask & EPOLLET) ++epoll_et;
};
friend ostream& operator<<(ostream& os, const Events& ev) {
if (ev.epoll_in) {
os << "\t EPOLLIN: " << ev.epoll_in << "\n";
}
if (ev.epoll_pri) {
os << "\t EPOLLPRI: " << ev.epoll_pri << "\n";
}
if (ev.epoll_out) {
os << "\t EPOLLOUT: " << ev.epoll_out << "\n";
}
if (ev.epoll_rdnorm) {
os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
}
if (ev.epoll_rdband) {
os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n";
}
if (ev.epoll_wrnorm) {
os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
}
if (ev.epoll_wrband) {
os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n";
}
if (ev.epoll_msg) {
os << "\t EPOLLMSG: " << ev.epoll_msg << "\n";
}
if (ev.epoll_err) {
os << "\t EPOLLERR: " << ev.epoll_err << "\n";
}
if (ev.epoll_hup) {
os << "\t EPOLLHUP: " << ev.epoll_hup << "\n";
}
if (ev.epoll_oneshot) {
os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
}
if (ev.epoll_et) {
os << "\t EPOLLET: " << ev.epoll_et << "\n";
}
return os;
}
unsigned int epoll_in;
unsigned int epoll_pri;
unsigned int epoll_out;
unsigned int epoll_rdnorm;
unsigned int epoll_rdband;
unsigned int epoll_wrnorm;
unsigned int epoll_wrband;
unsigned int epoll_msg;
unsigned int epoll_err;
unsigned int epoll_hup;
unsigned int epoll_oneshot;
unsigned int epoll_et;
};
std::vector<DebugOutput*> debug_events_;
std::vector<Events> unregistered_fds_;
typedef __gnu_cxx::hash_map<int, Events> EventCountsMap;
EventCountsMap event_counts_;
int64 num_records_;
int64 record_threshold_;
};
void ClearEventRecords() {
event_recorder_.Clear();
}
void WriteEventRecords(ostream* os) const {
(*os) << event_recorder_;
}
mutable EventRecorder event_recorder_;
#endif
private:
// Helper functions used in the destructor.
void CleanupFDToCBMap();
void CleanupTimeToAlarmCBMap();
// The callback registered to the fds below. As the purpose of their
// registration is to wake the epoll server it just clears the pipe and
// returns.
scoped_ptr<ReadPipeCallback> wake_cb_;
// A pipe owned by the epoll server. The server will be registered to listen
// on read_fd_ and can be woken by Wake() which writes to write_fd_.
int read_fd_;
int write_fd_;
// This boolean is checked to see if it is false at the top of the
// WaitForEventsAndExecuteCallbacks function. If not, then it either returns
// without doing work, and logs to ERROR, or aborts the program (in
// DEBUG mode). If so, then it sets the bool to true, does work, and
// sets it back to false when done. This catches unwanted recursion.
bool in_wait_for_events_and_execute_callbacks_;
// Returns true when the EpollServer() is being destroyed.
bool in_shutdown_;
DISALLOW_COPY_AND_ASSIGN(EpollServer);
};
class EpollAlarmCallbackInterface {
public:
// Summary:
// Called when an alarm times out. Invalidates an AlarmRegToken.
// WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
// delete it, as the reference is no longer valid.
// Returns:
// the unix time (in microseconds) at which this alarm should be signaled
// again, or 0 if the alarm should be removed.
virtual int64 OnAlarm() = 0;
// Summary:
// Called when the an alarm is registered. Invalidates an AlarmRegToken.
// Args:
// token: the iterator to the the alarm registered in the alarm map.
// WARNING: this token becomes invalid when the alarm fires, is
// unregistered, or OnShutdown is called on that alarm.
// eps: the epoll server the alarm is registered with.
virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
EpollServer* eps) = 0;
// Summary:
// Called when the an alarm is unregistered.
// WARNING: It is not valid to unregister a callback and then use the token
// that was saved to refer to the callback.
virtual void OnUnregistration() = 0;
// Summary:
// Called when the epoll server is shutting down.
// Invalidates the AlarmRegToken that was given when this alarm was
// registered.
virtual void OnShutdown(EpollServer* eps) = 0;
virtual ~EpollAlarmCallbackInterface() {}
protected:
EpollAlarmCallbackInterface() {}
};
// A simple alarm which unregisters itself on destruction.
//
// PLEASE NOTE:
// Any classes overriding these functions must either call the implementation
// of the parent class, or is must otherwise make sure that the 'registered_'
// boolean and the token, 'token_', are updated appropriately.
class EpollAlarm : public EpollAlarmCallbackInterface {
public:
EpollAlarm();
virtual ~EpollAlarm();
// Marks the alarm as unregistered and returns 0. The return value may be
// safely ignored by subclasses.
virtual int64 OnAlarm();
// Marks the alarm as registered, and stores the token.
virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
EpollServer* eps);
// Marks the alarm as unregistered.
virtual void OnUnregistration();
// Marks the alarm as unregistered.
virtual void OnShutdown(EpollServer* eps);
// If the alarm was registered, unregister it.
void UnregisterIfRegistered();
bool registered() const { return registered_; }
const EpollServer* eps() const { return eps_; }
private:
EpollServer::AlarmRegToken token_;
EpollServer* eps_;
bool registered_;
};
} // namespace net
#endif // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_