// // Copyright (C) 2013 The Android Open Source Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "shill/traffic_monitor.h" #include <base/bind.h> #include <base/strings/stringprintf.h> #include <netinet/in.h> #include "shill/device.h" #include "shill/device_info.h" #include "shill/event_dispatcher.h" #include "shill/logging.h" #include "shill/socket_info_reader.h" using base::StringPrintf; using std::string; using std::vector; namespace shill { namespace Logging { static auto kModuleLogScope = ScopeLogger::kLink; static string ObjectID(Device* d) { return d->link_name(); } } // static const uint16_t TrafficMonitor::kDnsPort = 53; const int64_t TrafficMonitor::kDnsTimedOutThresholdSeconds = 15; const int TrafficMonitor::kMinimumFailedSamplesToTrigger = 2; const int64_t TrafficMonitor::kSamplingIntervalMilliseconds = 5000; TrafficMonitor::TrafficMonitor(const DeviceRefPtr& device, EventDispatcher* dispatcher) : device_(device), dispatcher_(dispatcher), socket_info_reader_(new SocketInfoReader), accummulated_congested_tx_queues_samples_(0), connection_info_reader_(new ConnectionInfoReader), accummulated_dns_failures_samples_(0) { } TrafficMonitor::~TrafficMonitor() { Stop(); } void TrafficMonitor::Start() { SLOG(device_.get(), 2) << __func__; Stop(); sample_traffic_callback_.Reset(base::Bind(&TrafficMonitor::SampleTraffic, base::Unretained(this))); dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(), kSamplingIntervalMilliseconds); } void TrafficMonitor::Stop() { SLOG(device_.get(), 2) << __func__; sample_traffic_callback_.Cancel(); ResetCongestedTxQueuesStats(); ResetDnsFailingStats(); } void TrafficMonitor::ResetCongestedTxQueuesStats() { accummulated_congested_tx_queues_samples_ = 0; } void TrafficMonitor::ResetCongestedTxQueuesStatsWithLogging() { SLOG(device_.get(), 2) << __func__ << ": Tx-queues decongested"; ResetCongestedTxQueuesStats(); } void TrafficMonitor::BuildIPPortToTxQueueLength( const vector<SocketInfo>& socket_infos, IPPortToTxQueueLengthMap* tx_queue_lengths) { SLOG(device_.get(), 3) << __func__; string device_ip_address = device_->ipconfig()->properties().address; for (const auto& info : socket_infos) { SLOG(device_.get(), 4) << "SocketInfo(IP=" << info.local_ip_address().ToString() << ", TX=" << info.transmit_queue_value() << ", State=" << info.connection_state() << ", TimerState=" << info.timer_state(); if (info.local_ip_address().ToString() != device_ip_address || info.transmit_queue_value() == 0 || info.connection_state() != SocketInfo::kConnectionStateEstablished || (info.timer_state() != SocketInfo::kTimerStateRetransmitTimerPending && info.timer_state() != SocketInfo::kTimerStateZeroWindowProbeTimerPending)) { SLOG(device_.get(), 4) << "Connection Filtered."; continue; } SLOG(device_.get(), 3) << "Monitoring connection: TX=" << info.transmit_queue_value() << " TimerState=" << info.timer_state(); string local_ip_port = StringPrintf("%s:%d", info.local_ip_address().ToString().c_str(), info.local_port()); (*tx_queue_lengths)[local_ip_port] = info.transmit_queue_value(); } } bool TrafficMonitor::IsCongestedTxQueues() { SLOG(device_.get(), 4) << __func__; vector<SocketInfo> socket_infos; if (!socket_info_reader_->LoadTcpSocketInfo(&socket_infos) || socket_infos.empty()) { SLOG(device_.get(), 3) << __func__ << ": Empty socket info"; ResetCongestedTxQueuesStatsWithLogging(); return false; } bool congested_tx_queues = true; IPPortToTxQueueLengthMap curr_tx_queue_lengths; BuildIPPortToTxQueueLength(socket_infos, &curr_tx_queue_lengths); if (curr_tx_queue_lengths.empty()) { SLOG(device_.get(), 3) << __func__ << ": No interesting socket info"; ResetCongestedTxQueuesStatsWithLogging(); } else { for (const auto& length_entry : old_tx_queue_lengths_) { IPPortToTxQueueLengthMap::iterator curr_tx_queue_it = curr_tx_queue_lengths.find(length_entry.first); if (curr_tx_queue_it == curr_tx_queue_lengths.end() || curr_tx_queue_it->second < length_entry.second) { congested_tx_queues = false; // TODO(armansito): If we had a false positive earlier, we may // want to correct it here by invoking a "connection back to normal // callback", so that the OutOfCredits property can be set to // false. break; } } if (congested_tx_queues) { ++accummulated_congested_tx_queues_samples_; SLOG(device_.get(), 2) << __func__ << ": Congested tx-queues detected (" << accummulated_congested_tx_queues_samples_ << ")"; } } old_tx_queue_lengths_ = curr_tx_queue_lengths; return congested_tx_queues; } void TrafficMonitor::ResetDnsFailingStats() { accummulated_dns_failures_samples_ = 0; } void TrafficMonitor::ResetDnsFailingStatsWithLogging() { SLOG(device_.get(), 2) << __func__ << ": DNS queries restored"; ResetDnsFailingStats(); } bool TrafficMonitor::IsDnsFailing() { SLOG(device_.get(), 4) << __func__; vector<ConnectionInfo> connection_infos; if (!connection_info_reader_->LoadConnectionInfo(&connection_infos) || connection_infos.empty()) { SLOG(device_.get(), 3) << __func__ << ": Empty connection info"; } else { // The time-to-expire counter is used to determine when a DNS request // has timed out. This counter is the number of seconds remaining until // the entry is removed from the system IP connection tracker. The // default time is 30 seconds. This is too long of a wait. Instead, we // want to time out at |kDnsTimedOutThresholdSeconds|. Unfortunately, // we cannot simply look for entries less than // |kDnsTimedOutThresholdSeconds| because we will count the entry // multiple times once its time-to-expire is less than // |kDnsTimedOutThresholdSeconds|. To ensure that we only count an // entry once, we look for entries in this time window between // |kDnsTimedOutThresholdSeconds| and |kDnsTimedOutLowerThresholdSeconds|. const int64_t kDnsTimedOutLowerThresholdSeconds = kDnsTimedOutThresholdSeconds - kSamplingIntervalMilliseconds / 1000; string device_ip_address = device_->ipconfig()->properties().address; for (const auto& info : connection_infos) { if (info.protocol() != IPPROTO_UDP || info.time_to_expire_seconds() > kDnsTimedOutThresholdSeconds || info.time_to_expire_seconds() <= kDnsTimedOutLowerThresholdSeconds || !info.is_unreplied() || info.original_source_ip_address().ToString() != device_ip_address || info.original_destination_port() != kDnsPort) continue; ++accummulated_dns_failures_samples_; SLOG(device_.get(), 2) << __func__ << ": DNS failures detected (" << accummulated_dns_failures_samples_ << ")"; return true; } } ResetDnsFailingStatsWithLogging(); return false; } void TrafficMonitor::SampleTraffic() { SLOG(device_.get(), 3) << __func__; // Schedule the sample callback first, so it is possible for the network // problem callback to stop the traffic monitor. dispatcher_->PostDelayedTask(sample_traffic_callback_.callback(), kSamplingIntervalMilliseconds); if (IsCongestedTxQueues() && accummulated_congested_tx_queues_samples_ == kMinimumFailedSamplesToTrigger) { LOG(WARNING) << "Congested tx queues detected, out-of-credits?"; network_problem_detected_callback_.Run(kNetworkProblemCongestedTxQueue); } else if (IsDnsFailing() && accummulated_dns_failures_samples_ == kMinimumFailedSamplesToTrigger) { LOG(WARNING) << "DNS queries failing, out-of-credits?"; network_problem_detected_callback_.Run(kNetworkProblemDNSFailure); } } } // namespace shill