/* Copyright (C) 2007-2008 The Android Open Source Project ** ** This software is licensed under the terms of the GNU General Public ** License version 2, as published by the Free Software Foundation, and ** may be copied, distributed, and modified under those terms. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ** GNU General Public License for more details. */ #include "shaper.h" #include "qemu-common.h" #include "qemu-timer.h" #include <stdlib.h> #define SHAPER_CLOCK rt_clock #define SHAPER_CLOCK_UNIT 1000. static int _packet_is_internal( const uint8_t* data, size_t size ) { const uint8_t* end = data + size; /* must have room for Mac + IP header */ if (data + 40 > end) return 0; if (data[12] != 0x08 || data[13] != 0x00 ) return 0; /* must have valid IP header */ data += 14; if ((data[0] >> 4) != 4 || (data[0] & 15) < 5) return 0; /* internal if both source and dest addresses are in 10.x.x.x */ return ( data[12] == 10 && data[16] == 10); } /* here's how we implement network shaping. we want to limit the network * rate to a given constant MAX_RATE expressed as bits/second. this means * that it takes 1/MAX_RATE seconds to send a single bit, and count*8/MAX_RATE * seconds to send 'count' bytes. * * we're going to implement a scheme where, when we send a packet of * 'count' bytes, no other packet will go through in the same direction for * at least 'count*8/MAX_RATE' seconds. any successive packet that is "sent" * in this interval is placed in a queue, associated to a timer * * there are different (queue/timer/rate) values for the input and output * direction of the user vlan. */ typedef struct QueuedPacketRec_ { int64_t expiration; struct QueuedPacketRec_* next; size_t size; void* opaque; void* data; } QueuedPacketRec, *QueuedPacket; static QueuedPacket queued_packet_create( const void* data, size_t size, void* opaque, int do_copy ) { QueuedPacket packet; size_t packet_size = sizeof(*packet); if (do_copy) packet_size += size; packet = qemu_malloc(packet_size); packet->next = NULL; packet->expiration = 0; packet->size = (size_t)size; packet->opaque = opaque; if (do_copy) { packet->data = (void*)(packet+1); memcpy( (char*)packet->data, (char*)data, packet->size ); } else { packet->data = (void*)data; } return packet; } static void queued_packet_free( QueuedPacket packet ) { if (packet) { qemu_free( packet ); } } typedef struct NetShaperRec_ { QueuedPacket packets; /* list of queued packets, ordered by expiration date */ int num_packets; int active; /* is this shaper active ? */ int64_t block_until; double max_rate; /* max rate expressed in bytes/second */ double inv_rate; /* inverse of max rate */ QEMUTimer* timer; /* QEMU timer */ int do_copy; NetShaperSendFunc send_func; } NetShaperRec; void netshaper_destroy( NetShaper shaper ) { if (shaper) { shaper->active = 0; while (shaper->packets) { QueuedPacket packet = shaper->packets; shaper->packets = packet->next; packet->next = NULL; queued_packet_free(packet); } qemu_del_timer(shaper->timer); qemu_free_timer(shaper->timer); shaper->timer = NULL; qemu_free(shaper); } } /* this function is called when the shaper's timer expires */ static void netshaper_expires( NetShaper shaper ) { QueuedPacket packet; while ((packet = shaper->packets) != NULL) { int64_t now = qemu_get_clock( SHAPER_CLOCK ); if (packet->expiration > now) break; shaper->packets = packet->next; shaper->send_func( packet->data, packet->size, packet->opaque ); queued_packet_free(packet); shaper->num_packets--; } /* reprogram timer if needed */ if (shaper->packets) { shaper->block_until = shaper->packets->expiration; qemu_mod_timer( shaper->timer, shaper->block_until ); } else { shaper->block_until = -1; } } NetShaper netshaper_create( int do_copy, NetShaperSendFunc send_func ) { NetShaper shaper = qemu_malloc(sizeof(*shaper)); shaper->active = 0; shaper->packets = NULL; shaper->num_packets = 0; shaper->timer = qemu_new_timer( SHAPER_CLOCK, (QEMUTimerCB*) netshaper_expires, shaper ); shaper->send_func = send_func; shaper->max_rate = 1e6; shaper->inv_rate = 0.; shaper->block_until = -1; /* magic value, means to not block */ return shaper; } void netshaper_set_rate( NetShaper shaper, double rate ) { /* send all current packets when changing the rate */ while (shaper->packets) { QueuedPacket packet = shaper->packets; shaper->packets = packet->next; shaper->send_func(packet->data, packet->size, packet->opaque); qemu_free(packet); shaper->num_packets = 0; } shaper->max_rate = rate; if (rate > 1.) { shaper->inv_rate = (8.*SHAPER_CLOCK_UNIT)/rate; /* qemu_get_clock returns time in ms */ shaper->active = 1; /* for the real-time clock */ } else { shaper->active = 0; } shaper->block_until = -1; } void netshaper_send_aux( NetShaper shaper, void* data, size_t size, void* opaque ) { int64_t now; if (!shaper->active || _packet_is_internal(data, size)) { shaper->send_func( data, size, opaque ); return; } now = qemu_get_clock( SHAPER_CLOCK ); if (now >= shaper->block_until) { shaper->send_func( data, size, opaque ); shaper->block_until = now + size*shaper->inv_rate; //fprintf(stderr, "NETSHAPER: block for %.2fms\n", (shaper->block_until - now)*1.0 ); return; } /* create new packet, add it to the queue */ { QueuedPacket packet; packet = queued_packet_create( data, size, opaque, shaper->do_copy ); packet->expiration = shaper->block_until; { QueuedPacket *pnode, node; pnode = &shaper->packets; for (;;) { node = *pnode; if (node == NULL || node->expiration > packet->expiration ) break; pnode = &node->next; } packet->next = *pnode; *pnode = packet; if (packet == shaper->packets) qemu_mod_timer( shaper->timer, packet->expiration ); } shaper->num_packets += 1; } shaper->block_until += size*shaper->inv_rate; //fprintf(stderr, "NETSHAPER: block2 for %.2fms\n", (shaper->block_until - now)*1.0 ); } void netshaper_send( NetShaper shaper, void* data, size_t size ) { netshaper_send_aux(shaper, data, size, NULL); } int netshaper_can_send( NetShaper shaper ) { int64_t now; if (!shaper->active || shaper->block_until < 0) return 1; if (shaper->packets) return 0; now = qemu_get_clock( SHAPER_CLOCK ); return (now >= shaper->block_until); } /* this type is used to model a session connection/state * if session->packet is != NULL, then the connection is delayed */ typedef struct SessionRec_ { int64_t expiration; struct SessionRec_* next; unsigned src_ip; unsigned dst_ip; unsigned short src_port; unsigned short dst_port; uint8_t protocol; QueuedPacket packet; } SessionRec, *Session; #define _PROTOCOL_TCP 6 #define _PROTOCOL_UDP 17 static void session_free( Session session ) { if (session) { if (session->packet) { queued_packet_free(session->packet); session->packet = NULL; } qemu_free( session ); } } #if 0 /* useful for debugging */ static const char* session_to_string( Session session ) { static char temp[256]; const char* format = (session->protocol == _PROTOCOL_TCP) ? "TCP" : "UDP"; sprintf( temp, "%s[%d.%d.%d.%d:%d / %d.%d.%d.%d:%d]", format, (session->src_ip >> 24) & 255, (session->src_ip >> 16) & 255, (session->src_ip >> 8) & 255, (session->src_ip) & 255, session->src_port, (session->dst_ip >> 24) & 255, (session->dst_ip >> 16) & 255, (session->dst_ip >> 8) & 255, (session->dst_ip) & 255, session->dst_port); return temp; } #endif /* returns TRUE if this corresponds to a SYN packet */ int _packet_SYN_flags( const void* _data, size_t size, Session info ) { const uint8_t* data = (const uint8_t*)_data; const uint8_t* end = data + size; /* enough room for a Ethernet MAC packet ? */ if (data + 14 > end - 4) return 0; /* is it an IP packet ? */ if (data[12] != 0x8 || data[13] != 0) return 0; data += 14; end -= 4; if (data + 20 > end) return 0; /* IP version must be 4, and the header length in words at least 5 */ if ((data[0] & 0xF) < 5 || (data[0] >> 4) != 4) return 0; /* time-to-live must be > 0 */ if (data[8] == 0) return 0; /* must be TCP or UDP packet */ if (data[9] != _PROTOCOL_TCP && data[9] != _PROTOCOL_UDP) return 0; info->protocol = data[9]; info->src_ip = (data[12] << 24) | (data[13] << 16) | (data[14] << 8) | data[15]; info->dst_ip = (data[16] << 24) | (data[17] << 16) | (data[18] << 8) | data[19]; data += 4*(data[0] & 15); if (data + 20 > end) return 0; info->src_port = (unsigned short)((data[0] << 8) | data[1]); info->dst_port = (unsigned short)((data[2] << 8) | data[3]); return (data[13] & 0x1f); } typedef struct NetDelayRec_ { Session sessions; int num_sessions; QEMUTimer* timer; int active; int min_ms; int max_ms; NetShaperSendFunc send_func; } NetDelayRec; static Session* netdelay_lookup_session( NetDelay delay, Session info ) { Session* pnode = &delay->sessions; Session node; for (;;) { node = *pnode; if (node == NULL) break; if (node->src_ip == info->src_ip && node->dst_ip == info->dst_ip && node->src_port == info->src_port && node->dst_port == info->dst_port && node->protocol == info->protocol ) break; pnode = &node->next; } return pnode; } /* called by the delay's timer on expiration */ static void netdelay_expires( NetDelay delay ) { Session session; int64_t now = qemu_get_clock( SHAPER_CLOCK ); int rearm = 0; int64_t rearm_time = 0; for (session = delay->sessions; session != NULL; session = session->next) { QueuedPacket packet = session->packet; if (packet == NULL) continue; if (session->expiration <= now) { /* send the SYN packet now */ //fprintf(stderr, "NetDelay:RST: sending creation for %s\n", session_to_string(session) ); delay->send_func( packet->data, packet->size, packet->opaque ); session->packet = NULL; queued_packet_free( packet ); } else { if (!rearm) { rearm = 1; rearm_time = session->expiration; } else if ( session->expiration < rearm_time ) rearm_time = session->expiration; } } if (rearm) qemu_mod_timer( delay->timer, rearm_time ); } NetDelay netdelay_create( NetShaperSendFunc send_func ) { NetDelay delay = qemu_malloc(sizeof(*delay)); delay->sessions = NULL; delay->num_sessions = 0; delay->timer = qemu_new_timer( SHAPER_CLOCK, (QEMUTimerCB*) netdelay_expires, delay ); delay->active = 0; delay->min_ms = 0; delay->max_ms = 0; delay->send_func = send_func; return delay; } void netdelay_set_latency( NetDelay delay, int min_ms, int max_ms ) { /* when changing the latency, accept all sessions */ while (delay->sessions) { Session session = delay->sessions; delay->sessions = session->next; session->next = NULL; if (session->packet) { QueuedPacket packet = session->packet; delay->send_func( packet->data, packet->size, packet->opaque ); } session_free(session); delay->num_sessions--; } delay->min_ms = min_ms; delay->max_ms = max_ms; delay->active = (min_ms <= max_ms) && min_ms > 0; } void netdelay_send( NetDelay delay, const void* data, size_t size ) { netdelay_send_aux(delay, data, size, NULL); } void netdelay_send_aux( NetDelay delay, const void* data, size_t size, void* opaque ) { if (delay->active && !_packet_is_internal(data, size)) { SessionRec info[1]; int flags; flags = _packet_SYN_flags( data, size, info ); if ((flags & 0x05) != 0) { /* FIN or RST: drop connection */ Session* lookup = netdelay_lookup_session( delay, info ); Session session = *lookup; if (session != NULL) { //fprintf(stderr, "NetDelay:RST: dropping %s\n", session_to_string(info) ); *lookup = session->next; session_free( session ); delay->num_sessions -= 1; } } else if ((flags & 0x12) == 0x02) { /* SYN: create connection */ Session* lookup = netdelay_lookup_session( delay, info ); Session session = *lookup; if (session != NULL) { if (session->packet != NULL) { /* this is a SYN re-transmission, since we didn't * send the original SYN packet yet, just eat this one */ //fprintf(stderr, "NetDelay:RST: swallow SYN re-send for %s\n", session_to_string(info) ); return; } } else { /* establish a new session slightly in the future */ int latency = delay->min_ms; int range = delay->max_ms - delay->min_ms; if (range > 0) latency += rand() % range; //fprintf(stderr, "NetDelay:RST: delay creation for %s\n", session_to_string(info) ); session = qemu_malloc( sizeof(*session) ); session->next = delay->sessions; delay->sessions = session; delay->num_sessions += 1; session->expiration = qemu_get_clock( SHAPER_CLOCK ) + latency; session->src_ip = info->src_ip; session->dst_ip = info->dst_ip; session->src_port = info->src_port; session->dst_port = info->dst_port; session->protocol = info->protocol; session->packet = queued_packet_create( data, size, opaque, 1 ); netdelay_expires(delay); return; } } } delay->send_func( (void*)data, size, opaque ); } void netdelay_destroy( NetDelay delay ) { if (delay) { while (delay->sessions) { Session session = delay->sessions; delay->sessions = session->next; session_free(session); delay->num_sessions -= 1; } delay->active = 0; qemu_free( delay ); } }